gh-86155: Fix data loss after unclosed script or style tag in HTMLParser (GH-22658)
When calling .close() the HTMLParser should flush all remaining content, even when that content is in an unclosed script or style tag.
This commit is contained in:
parent
7dddb4e667
commit
53383e90e4
@ -260,7 +260,7 @@ class HTMLParser(_markupbase.ParserBase):
|
|||||||
else:
|
else:
|
||||||
assert 0, "interesting.search() lied"
|
assert 0, "interesting.search() lied"
|
||||||
# end while
|
# end while
|
||||||
if end and i < n and not self.cdata_elem:
|
if end and i < n:
|
||||||
if self.convert_charrefs and not self.cdata_elem:
|
if self.convert_charrefs and not self.cdata_elem:
|
||||||
self.handle_data(unescape(rawdata[i:n]))
|
self.handle_data(unescape(rawdata[i:n]))
|
||||||
else:
|
else:
|
||||||
|
@ -317,6 +317,16 @@ text
|
|||||||
("endtag", element_lower)],
|
("endtag", element_lower)],
|
||||||
collector=Collector(convert_charrefs=False))
|
collector=Collector(convert_charrefs=False))
|
||||||
|
|
||||||
|
def test_EOF_in_cdata(self):
|
||||||
|
content = """<!-- not a comment --> ¬-an-entity-ref;
|
||||||
|
<a href="" /> </p><p> <span></span></style>
|
||||||
|
'</script' + '>'"""
|
||||||
|
s = f'<script>{content}'
|
||||||
|
self._run_check(s, [
|
||||||
|
("starttag", 'script', []),
|
||||||
|
("data", content)
|
||||||
|
])
|
||||||
|
|
||||||
def test_comments(self):
|
def test_comments(self):
|
||||||
html = ("<!-- I'm a valid comment -->"
|
html = ("<!-- I'm a valid comment -->"
|
||||||
'<!--me too!-->'
|
'<!--me too!-->'
|
||||||
|
@ -0,0 +1,2 @@
|
|||||||
|
:meth:`html.parser.HTMLParser.close` no longer loses data when the
|
||||||
|
``<script>`` tag is not closed. Patch by Waylan Limberg.
|
Loading…
x
Reference in New Issue
Block a user