Return complete lines from codec stream readers
even if there is an exception in later lines, resulting in correct line numbers for decoding errors in source code. Fixes #1178484. Will backport to 2.4.
This commit is contained in:
parent
6d2b346140
commit
56066d2e55
@ -394,7 +394,7 @@ order to be compatible to the Python codec registry.
|
|||||||
be extended with \function{register_error()}.
|
be extended with \function{register_error()}.
|
||||||
\end{classdesc}
|
\end{classdesc}
|
||||||
|
|
||||||
\begin{methoddesc}{read}{\optional{size\optional{, chars}}}
|
\begin{methoddesc}{read}{\optional{size\optional{, chars, \optional{firstline}}}}
|
||||||
Decodes data from the stream and returns the resulting object.
|
Decodes data from the stream and returns the resulting object.
|
||||||
|
|
||||||
\var{chars} indicates the number of characters to read from the
|
\var{chars} indicates the number of characters to read from the
|
||||||
@ -408,12 +408,16 @@ order to be compatible to the Python codec registry.
|
|||||||
decode as much as possible. \var{size} is intended to prevent having
|
decode as much as possible. \var{size} is intended to prevent having
|
||||||
to decode huge files in one step.
|
to decode huge files in one step.
|
||||||
|
|
||||||
|
\var{firstline} indicates that it would be sufficient to only return
|
||||||
|
the first line, if there are decoding errors on later lines.
|
||||||
|
|
||||||
The method should use a greedy read strategy meaning that it should
|
The method should use a greedy read strategy meaning that it should
|
||||||
read as much data as is allowed within the definition of the encoding
|
read as much data as is allowed within the definition of the encoding
|
||||||
and the given size, e.g. if optional encoding endings or state
|
and the given size, e.g. if optional encoding endings or state
|
||||||
markers are available on the stream, these should be read too.
|
markers are available on the stream, these should be read too.
|
||||||
|
|
||||||
\versionchanged[\var{chars} argument added]{2.4}
|
\versionchanged[\var{chars} argument added]{2.4}
|
||||||
|
\versionchanged[\var{firstline} argument added]{2.4.2}
|
||||||
\end{methoddesc}
|
\end{methoddesc}
|
||||||
|
|
||||||
\begin{methoddesc}{readline}{\optional{size\optional{, keepends}}}
|
\begin{methoddesc}{readline}{\optional{size\optional{, keepends}}}
|
||||||
|
@ -236,7 +236,7 @@ class StreamReader(Codec):
|
|||||||
def decode(self, input, errors='strict'):
|
def decode(self, input, errors='strict'):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def read(self, size=-1, chars=-1):
|
def read(self, size=-1, chars=-1, firstline=False):
|
||||||
|
|
||||||
""" Decodes data from the stream self.stream and returns the
|
""" Decodes data from the stream self.stream and returns the
|
||||||
resulting object.
|
resulting object.
|
||||||
@ -253,6 +253,11 @@ class StreamReader(Codec):
|
|||||||
is intended to prevent having to decode huge files in one
|
is intended to prevent having to decode huge files in one
|
||||||
step.
|
step.
|
||||||
|
|
||||||
|
If firstline is true, and a UnicodeDecodeError happens
|
||||||
|
after the first line terminator in the input only the first line
|
||||||
|
will be returned, the rest of the input will be kept until the
|
||||||
|
next call to read().
|
||||||
|
|
||||||
The method should use a greedy read strategy meaning that
|
The method should use a greedy read strategy meaning that
|
||||||
it should read as much data as is allowed within the
|
it should read as much data as is allowed within the
|
||||||
definition of the encoding and the given size, e.g. if
|
definition of the encoding and the given size, e.g. if
|
||||||
@ -275,7 +280,16 @@ class StreamReader(Codec):
|
|||||||
newdata = self.stream.read(size)
|
newdata = self.stream.read(size)
|
||||||
# decode bytes (those remaining from the last call included)
|
# decode bytes (those remaining from the last call included)
|
||||||
data = self.bytebuffer + newdata
|
data = self.bytebuffer + newdata
|
||||||
newchars, decodedbytes = self.decode(data, self.errors)
|
try:
|
||||||
|
newchars, decodedbytes = self.decode(data, self.errors)
|
||||||
|
except UnicodeDecodeError, exc:
|
||||||
|
if firstline:
|
||||||
|
newchars, decodedbytes = self.decode(data[:exc.start], self.errors)
|
||||||
|
lines = newchars.splitlines(True)
|
||||||
|
if len(lines)<=1:
|
||||||
|
raise
|
||||||
|
else:
|
||||||
|
raise
|
||||||
# keep undecoded bytes until the next call
|
# keep undecoded bytes until the next call
|
||||||
self.bytebuffer = data[decodedbytes:]
|
self.bytebuffer = data[decodedbytes:]
|
||||||
# put new characters in the character buffer
|
# put new characters in the character buffer
|
||||||
@ -306,7 +320,7 @@ class StreamReader(Codec):
|
|||||||
line = ""
|
line = ""
|
||||||
# If size is given, we call read() only once
|
# If size is given, we call read() only once
|
||||||
while True:
|
while True:
|
||||||
data = self.read(readsize)
|
data = self.read(readsize, firstline=True)
|
||||||
if data:
|
if data:
|
||||||
# If we're at a "\r" read one extra character (which might
|
# If we're at a "\r" read one extra character (which might
|
||||||
# be a "\n") to get a proper line ending. If the stream is
|
# be a "\n") to get a proper line ending. If the stream is
|
||||||
|
@ -191,6 +191,10 @@ Extension Modules
|
|||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- Bug #1178484: Return complete lines from codec stream readers
|
||||||
|
even if there is an exception in later lines, resulting in
|
||||||
|
correct line numbers for decoding errors in source code.
|
||||||
|
|
||||||
- Bug #1192315: Disallow negative arguments to clear() in pdb.
|
- Bug #1192315: Disallow negative arguments to clear() in pdb.
|
||||||
|
|
||||||
- Patch #827386: Support absolute source paths in msvccompiler.py.
|
- Patch #827386: Support absolute source paths in msvccompiler.py.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user