__init__(): Coerce the input_charset to unicode (with ascii encoding) before
calling .lower() on it. This fixes the problem described in SF patch # 866982 where in the tr_TR.ISO-8859-9 locale, 'I'.lower() isn't 'i'. unicodes are locale insensitive.
This commit is contained in:
parent
19717fa33a
commit
ea7c7af10b
@ -185,8 +185,9 @@ class Charset:
|
||||
this attribute will have the same value as the input_codec.
|
||||
"""
|
||||
def __init__(self, input_charset=DEFAULT_CHARSET):
|
||||
# RFC 2046, $4.1.2 says charsets are not case sensitive
|
||||
input_charset = input_charset.lower()
|
||||
# RFC 2046, $4.1.2 says charsets are not case sensitive. We coerce to
|
||||
# unicode because its .lower() is locale insensitive.
|
||||
input_charset = unicode(input_charset, 'ascii').lower()
|
||||
# Set the input charset after filtering through the aliases
|
||||
self.input_charset = ALIASES.get(input_charset, input_charset)
|
||||
# We can try to guess which encoding and conversion to use by the
|
||||
|
Loading…
x
Reference in New Issue
Block a user