Known issues: The string.Formatter class, as discussed in the PEP, is incomplete. Error handling needs to conform to the PEP. Need to fix this warning that I introduced in Python/formatter_unicode.c: Objects/stringlib/unicodedefs.h:26: warning: `STRINGLIB_CMP' defined but not used Need to make sure sign formatting is correct, more tests needed. Need to remove '()' sign formatting, left over from an earlier version of the PEP.
231 lines
7.7 KiB
Python
231 lines
7.7 KiB
Python
"""A collection of string constants.
|
|
|
|
Public module variables:
|
|
|
|
whitespace -- a string containing all characters considered whitespace
|
|
lowercase -- a string containing all characters considered lowercase letters
|
|
uppercase -- a string containing all characters considered uppercase letters
|
|
letters -- a string containing all characters considered letters
|
|
digits -- a string containing all characters considered decimal digits
|
|
hexdigits -- a string containing all characters considered hexadecimal digits
|
|
octdigits -- a string containing all characters considered octal digits
|
|
punctuation -- a string containing all characters considered punctuation
|
|
printable -- a string containing all characters considered printable
|
|
|
|
"""
|
|
|
|
# Some strings for ctype-style character classification
|
|
whitespace = ' \t\n\r\v\f'
|
|
ascii_lowercase = 'abcdefghijklmnopqrstuvwxyz'
|
|
ascii_uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
|
|
ascii_letters = ascii_lowercase + ascii_uppercase
|
|
digits = '0123456789'
|
|
hexdigits = digits + 'abcdef' + 'ABCDEF'
|
|
octdigits = '01234567'
|
|
punctuation = """!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""
|
|
printable = digits + ascii_letters + punctuation + whitespace
|
|
|
|
# Case conversion helpers
|
|
# Use str to convert Unicode literal in case of -U
|
|
_idmap = str('').join(chr(c) for c in range(256))
|
|
|
|
# Functions which aren't available as string methods.
|
|
|
|
# Capitalize the words in a string, e.g. " aBc dEf " -> "Abc Def".
|
|
def capwords(s, sep=None):
|
|
"""capwords(s, [sep]) -> string
|
|
|
|
Split the argument into words using split, capitalize each
|
|
word using capitalize, and join the capitalized words using
|
|
join. Note that this replaces runs of whitespace characters by
|
|
a single space.
|
|
|
|
"""
|
|
return (sep or ' ').join([x.capitalize() for x in s.split(sep)])
|
|
|
|
|
|
# Construct a translation string
|
|
_idmapL = None
|
|
def maketrans(fromstr, tostr):
|
|
"""maketrans(frm, to) -> string
|
|
|
|
Return a translation table (a string of 256 bytes long)
|
|
suitable for use in string.translate. The strings frm and to
|
|
must be of the same length.
|
|
|
|
"""
|
|
if len(fromstr) != len(tostr):
|
|
raise ValueError, "maketrans arguments must have same length"
|
|
global _idmapL
|
|
if not _idmapL:
|
|
_idmapL = list(_idmap)
|
|
L = _idmapL[:]
|
|
for i, c in enumerate(fromstr):
|
|
L[ord(c)] = tostr[i]
|
|
return ''.join(L)
|
|
|
|
|
|
|
|
####################################################################
|
|
import re as _re
|
|
|
|
class _multimap:
|
|
"""Helper class for combining multiple mappings.
|
|
|
|
Used by .{safe_,}substitute() to combine the mapping and keyword
|
|
arguments.
|
|
"""
|
|
def __init__(self, primary, secondary):
|
|
self._primary = primary
|
|
self._secondary = secondary
|
|
|
|
def __getitem__(self, key):
|
|
try:
|
|
return self._primary[key]
|
|
except KeyError:
|
|
return self._secondary[key]
|
|
|
|
|
|
class _TemplateMetaclass(type):
|
|
pattern = r"""
|
|
%(delim)s(?:
|
|
(?P<escaped>%(delim)s) | # Escape sequence of two delimiters
|
|
(?P<named>%(id)s) | # delimiter and a Python identifier
|
|
{(?P<braced>%(id)s)} | # delimiter and a braced identifier
|
|
(?P<invalid>) # Other ill-formed delimiter exprs
|
|
)
|
|
"""
|
|
|
|
def __init__(cls, name, bases, dct):
|
|
super(_TemplateMetaclass, cls).__init__(name, bases, dct)
|
|
if 'pattern' in dct:
|
|
pattern = cls.pattern
|
|
else:
|
|
pattern = _TemplateMetaclass.pattern % {
|
|
'delim' : _re.escape(cls.delimiter),
|
|
'id' : cls.idpattern,
|
|
}
|
|
cls.pattern = _re.compile(pattern, _re.IGNORECASE | _re.VERBOSE)
|
|
|
|
|
|
class Template(metaclass=_TemplateMetaclass):
|
|
"""A string class for supporting $-substitutions."""
|
|
|
|
delimiter = '$'
|
|
idpattern = r'[_a-z][_a-z0-9]*'
|
|
|
|
def __init__(self, template):
|
|
self.template = template
|
|
|
|
# Search for $$, $identifier, ${identifier}, and any bare $'s
|
|
|
|
def _invalid(self, mo):
|
|
i = mo.start('invalid')
|
|
lines = self.template[:i].splitlines(True)
|
|
if not lines:
|
|
colno = 1
|
|
lineno = 1
|
|
else:
|
|
colno = i - len(''.join(lines[:-1]))
|
|
lineno = len(lines)
|
|
raise ValueError('Invalid placeholder in string: line %d, col %d' %
|
|
(lineno, colno))
|
|
|
|
def substitute(self, *args, **kws):
|
|
if len(args) > 1:
|
|
raise TypeError('Too many positional arguments')
|
|
if not args:
|
|
mapping = kws
|
|
elif kws:
|
|
mapping = _multimap(kws, args[0])
|
|
else:
|
|
mapping = args[0]
|
|
# Helper function for .sub()
|
|
def convert(mo):
|
|
# Check the most common path first.
|
|
named = mo.group('named') or mo.group('braced')
|
|
if named is not None:
|
|
val = mapping[named]
|
|
# We use this idiom instead of str() because the latter will
|
|
# fail if val is a Unicode containing non-ASCII characters.
|
|
return '%s' % (val,)
|
|
if mo.group('escaped') is not None:
|
|
return self.delimiter
|
|
if mo.group('invalid') is not None:
|
|
self._invalid(mo)
|
|
raise ValueError('Unrecognized named group in pattern',
|
|
self.pattern)
|
|
return self.pattern.sub(convert, self.template)
|
|
|
|
def safe_substitute(self, *args, **kws):
|
|
if len(args) > 1:
|
|
raise TypeError('Too many positional arguments')
|
|
if not args:
|
|
mapping = kws
|
|
elif kws:
|
|
mapping = _multimap(kws, args[0])
|
|
else:
|
|
mapping = args[0]
|
|
# Helper function for .sub()
|
|
def convert(mo):
|
|
named = mo.group('named')
|
|
if named is not None:
|
|
try:
|
|
# We use this idiom instead of str() because the latter
|
|
# will fail if val is a Unicode containing non-ASCII
|
|
return '%s' % (mapping[named],)
|
|
except KeyError:
|
|
return self.delimiter + named
|
|
braced = mo.group('braced')
|
|
if braced is not None:
|
|
try:
|
|
return '%s' % (mapping[braced],)
|
|
except KeyError:
|
|
return self.delimiter + '{' + braced + '}'
|
|
if mo.group('escaped') is not None:
|
|
return self.delimiter
|
|
if mo.group('invalid') is not None:
|
|
return self.delimiter
|
|
raise ValueError('Unrecognized named group in pattern',
|
|
self.pattern)
|
|
return self.pattern.sub(convert, self.template)
|
|
|
|
|
|
|
|
########################################################################
|
|
# the Formatter class
|
|
# see PEP 3101 for details and purpose of this class
|
|
|
|
# The hard parts are reused from the C implementation. They're
|
|
# exposed here via the sys module. sys was chosen because it's always
|
|
# available and doesn't have to be dynamically loaded.
|
|
|
|
# The parser is implemented in sys._formatter_parser.
|
|
# The "object lookup" is implemented in sys._formatter_lookup
|
|
|
|
from sys import _formatter_parser, _formatter_lookup
|
|
|
|
class Formatter:
|
|
def format(self, format_string, *args, **kwargs):
|
|
return self.vformat(format_string, args, kwargs)
|
|
|
|
def vformat(self, format_string, args, kwargs):
|
|
result = []
|
|
for (is_markup, literal, field_name, format_spec, conversion) in \
|
|
_formatter_parser(format_string):
|
|
if is_markup:
|
|
# find the object
|
|
index, name, obj = _formatter_lookup(field_name, args, kwargs)
|
|
else:
|
|
result.append(literal)
|
|
return ''.join(result)
|
|
|
|
def get_value(self, key, args, kwargs):
|
|
pass
|
|
|
|
def check_unused_args(self, used_args, args, kwargs):
|
|
pass
|
|
|
|
def format_field(self, value, format_spec):
|
|
pass
|