Replace the iter/itertext methods of Element in _elementtree with true C implementations, instead of the bootstrapped Python code. In addition to being cleaner (removing the last remains of the bootstrapping code in _elementtree), this gives a 10x performance boost for iter() on large documents.
Also reorganized the tests a bit to be more robust.
This commit is contained in:
parent
fedb04a37a
commit
64d11e60f2
@ -23,7 +23,8 @@ import weakref
|
|||||||
from test import support
|
from test import support
|
||||||
from test.support import findfile, import_fresh_module, gc_collect
|
from test.support import findfile, import_fresh_module, gc_collect
|
||||||
|
|
||||||
pyET = import_fresh_module('xml.etree.ElementTree', blocked=['_elementtree'])
|
pyET = None
|
||||||
|
ET = None
|
||||||
|
|
||||||
SIMPLE_XMLFILE = findfile("simple.xml", subdir="xmltestdata")
|
SIMPLE_XMLFILE = findfile("simple.xml", subdir="xmltestdata")
|
||||||
try:
|
try:
|
||||||
@ -209,10 +210,8 @@ def interface():
|
|||||||
|
|
||||||
These methods return an iterable. See bug 6472.
|
These methods return an iterable. See bug 6472.
|
||||||
|
|
||||||
>>> check_method(element.iter("tag").__next__)
|
|
||||||
>>> check_method(element.iterfind("tag").__next__)
|
>>> check_method(element.iterfind("tag").__next__)
|
||||||
>>> check_method(element.iterfind("*").__next__)
|
>>> check_method(element.iterfind("*").__next__)
|
||||||
>>> check_method(tree.iter("tag").__next__)
|
|
||||||
>>> check_method(tree.iterfind("tag").__next__)
|
>>> check_method(tree.iterfind("tag").__next__)
|
||||||
>>> check_method(tree.iterfind("*").__next__)
|
>>> check_method(tree.iterfind("*").__next__)
|
||||||
|
|
||||||
@ -291,42 +290,6 @@ def cdata():
|
|||||||
'<tag>hello</tag>'
|
'<tag>hello</tag>'
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Only with Python implementation
|
|
||||||
def simplefind():
|
|
||||||
"""
|
|
||||||
Test find methods using the elementpath fallback.
|
|
||||||
|
|
||||||
>>> ElementTree = pyET
|
|
||||||
|
|
||||||
>>> CurrentElementPath = ElementTree.ElementPath
|
|
||||||
>>> ElementTree.ElementPath = ElementTree._SimpleElementPath()
|
|
||||||
>>> elem = ElementTree.XML(SAMPLE_XML)
|
|
||||||
>>> elem.find("tag").tag
|
|
||||||
'tag'
|
|
||||||
>>> ElementTree.ElementTree(elem).find("tag").tag
|
|
||||||
'tag'
|
|
||||||
>>> elem.findtext("tag")
|
|
||||||
'text'
|
|
||||||
>>> elem.findtext("tog")
|
|
||||||
>>> elem.findtext("tog", "default")
|
|
||||||
'default'
|
|
||||||
>>> ElementTree.ElementTree(elem).findtext("tag")
|
|
||||||
'text'
|
|
||||||
>>> summarize_list(elem.findall("tag"))
|
|
||||||
['tag', 'tag']
|
|
||||||
>>> summarize_list(elem.findall(".//tag"))
|
|
||||||
['tag', 'tag', 'tag']
|
|
||||||
|
|
||||||
Path syntax doesn't work in this case.
|
|
||||||
|
|
||||||
>>> elem.find("section/tag")
|
|
||||||
>>> elem.findtext("section/tag")
|
|
||||||
>>> summarize_list(elem.findall("section/tag"))
|
|
||||||
[]
|
|
||||||
|
|
||||||
>>> ElementTree.ElementPath = CurrentElementPath
|
|
||||||
"""
|
|
||||||
|
|
||||||
def find():
|
def find():
|
||||||
"""
|
"""
|
||||||
Test find methods (including xpath syntax).
|
Test find methods (including xpath syntax).
|
||||||
@ -1002,36 +965,6 @@ def methods():
|
|||||||
'1 < 2\n'
|
'1 < 2\n'
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def iterators():
|
|
||||||
"""
|
|
||||||
Test iterators.
|
|
||||||
|
|
||||||
>>> e = ET.XML("<html><body>this is a <i>paragraph</i>.</body>..</html>")
|
|
||||||
>>> summarize_list(e.iter())
|
|
||||||
['html', 'body', 'i']
|
|
||||||
>>> summarize_list(e.find("body").iter())
|
|
||||||
['body', 'i']
|
|
||||||
>>> summarize(next(e.iter()))
|
|
||||||
'html'
|
|
||||||
>>> "".join(e.itertext())
|
|
||||||
'this is a paragraph...'
|
|
||||||
>>> "".join(e.find("body").itertext())
|
|
||||||
'this is a paragraph.'
|
|
||||||
>>> next(e.itertext())
|
|
||||||
'this is a '
|
|
||||||
|
|
||||||
Method iterparse should return an iterator. See bug 6472.
|
|
||||||
|
|
||||||
>>> sourcefile = serialize(e, to_string=False)
|
|
||||||
>>> next(ET.iterparse(sourcefile)) # doctest: +ELLIPSIS
|
|
||||||
('end', <Element 'i' at 0x...>)
|
|
||||||
|
|
||||||
>>> tree = ET.ElementTree(None)
|
|
||||||
>>> tree.iter()
|
|
||||||
Traceback (most recent call last):
|
|
||||||
AttributeError: 'NoneType' object has no attribute 'iter'
|
|
||||||
"""
|
|
||||||
|
|
||||||
ENTITY_XML = """\
|
ENTITY_XML = """\
|
||||||
<!DOCTYPE points [
|
<!DOCTYPE points [
|
||||||
<!ENTITY % user-entities SYSTEM 'user-entities.xml'>
|
<!ENTITY % user-entities SYSTEM 'user-entities.xml'>
|
||||||
@ -1339,6 +1272,7 @@ XINCLUDE["default.xml"] = """\
|
|||||||
</document>
|
</document>
|
||||||
""".format(html.escape(SIMPLE_XMLFILE, True))
|
""".format(html.escape(SIMPLE_XMLFILE, True))
|
||||||
|
|
||||||
|
|
||||||
def xinclude_loader(href, parse="xml", encoding=None):
|
def xinclude_loader(href, parse="xml", encoding=None):
|
||||||
try:
|
try:
|
||||||
data = XINCLUDE[href]
|
data = XINCLUDE[href]
|
||||||
@ -1411,22 +1345,6 @@ def xinclude():
|
|||||||
>>> # print(serialize(document)) # C5
|
>>> # print(serialize(document)) # C5
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def xinclude_default():
|
|
||||||
"""
|
|
||||||
>>> from xml.etree import ElementInclude
|
|
||||||
|
|
||||||
>>> document = xinclude_loader("default.xml")
|
|
||||||
>>> ElementInclude.include(document)
|
|
||||||
>>> print(serialize(document)) # default
|
|
||||||
<document>
|
|
||||||
<p>Example.</p>
|
|
||||||
<root>
|
|
||||||
<element key="value">text</element>
|
|
||||||
<element>text</element>tail
|
|
||||||
<empty-element />
|
|
||||||
</root>
|
|
||||||
</document>
|
|
||||||
"""
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# badly formatted xi:include tags
|
# badly formatted xi:include tags
|
||||||
@ -1917,9 +1835,8 @@ class ElementTreeTest(unittest.TestCase):
|
|||||||
self.assertIsInstance(ET.QName, type)
|
self.assertIsInstance(ET.QName, type)
|
||||||
self.assertIsInstance(ET.ElementTree, type)
|
self.assertIsInstance(ET.ElementTree, type)
|
||||||
self.assertIsInstance(ET.Element, type)
|
self.assertIsInstance(ET.Element, type)
|
||||||
# XXX issue 14128 with C ElementTree
|
self.assertIsInstance(ET.TreeBuilder, type)
|
||||||
# self.assertIsInstance(ET.TreeBuilder, type)
|
self.assertIsInstance(ET.XMLParser, type)
|
||||||
# self.assertIsInstance(ET.XMLParser, type)
|
|
||||||
|
|
||||||
def test_Element_subclass_trivial(self):
|
def test_Element_subclass_trivial(self):
|
||||||
class MyElement(ET.Element):
|
class MyElement(ET.Element):
|
||||||
@ -1953,6 +1870,73 @@ class ElementTreeTest(unittest.TestCase):
|
|||||||
self.assertEqual(mye.newmethod(), 'joe')
|
self.assertEqual(mye.newmethod(), 'joe')
|
||||||
|
|
||||||
|
|
||||||
|
class ElementIterTest(unittest.TestCase):
|
||||||
|
def _ilist(self, elem, tag=None):
|
||||||
|
return summarize_list(elem.iter(tag))
|
||||||
|
|
||||||
|
def test_basic(self):
|
||||||
|
doc = ET.XML("<html><body>this is a <i>paragraph</i>.</body>..</html>")
|
||||||
|
self.assertEqual(self._ilist(doc), ['html', 'body', 'i'])
|
||||||
|
self.assertEqual(self._ilist(doc.find('body')), ['body', 'i'])
|
||||||
|
self.assertEqual(next(doc.iter()).tag, 'html')
|
||||||
|
self.assertEqual(''.join(doc.itertext()), 'this is a paragraph...')
|
||||||
|
self.assertEqual(''.join(doc.find('body').itertext()),
|
||||||
|
'this is a paragraph.')
|
||||||
|
self.assertEqual(next(doc.itertext()), 'this is a ')
|
||||||
|
|
||||||
|
# iterparse should return an iterator
|
||||||
|
sourcefile = serialize(doc, to_string=False)
|
||||||
|
self.assertEqual(next(ET.iterparse(sourcefile))[0], 'end')
|
||||||
|
|
||||||
|
tree = ET.ElementTree(None)
|
||||||
|
self.assertRaises(AttributeError, tree.iter)
|
||||||
|
|
||||||
|
def test_corners(self):
|
||||||
|
# single root, no subelements
|
||||||
|
a = ET.Element('a')
|
||||||
|
self.assertEqual(self._ilist(a), ['a'])
|
||||||
|
|
||||||
|
# one child
|
||||||
|
b = ET.SubElement(a, 'b')
|
||||||
|
self.assertEqual(self._ilist(a), ['a', 'b'])
|
||||||
|
|
||||||
|
# one child and one grandchild
|
||||||
|
c = ET.SubElement(b, 'c')
|
||||||
|
self.assertEqual(self._ilist(a), ['a', 'b', 'c'])
|
||||||
|
|
||||||
|
# two children, only first with grandchild
|
||||||
|
d = ET.SubElement(a, 'd')
|
||||||
|
self.assertEqual(self._ilist(a), ['a', 'b', 'c', 'd'])
|
||||||
|
|
||||||
|
# replace first child by second
|
||||||
|
a[0] = a[1]
|
||||||
|
del a[1]
|
||||||
|
self.assertEqual(self._ilist(a), ['a', 'd'])
|
||||||
|
|
||||||
|
def test_iter_by_tag(self):
|
||||||
|
doc = ET.XML('''
|
||||||
|
<document>
|
||||||
|
<house>
|
||||||
|
<room>bedroom1</room>
|
||||||
|
<room>bedroom2</room>
|
||||||
|
</house>
|
||||||
|
<shed>nothing here
|
||||||
|
</shed>
|
||||||
|
<house>
|
||||||
|
<room>bedroom8</room>
|
||||||
|
</house>
|
||||||
|
</document>''')
|
||||||
|
|
||||||
|
self.assertEqual(self._ilist(doc, 'room'), ['room'] * 3)
|
||||||
|
self.assertEqual(self._ilist(doc, 'house'), ['house'] * 2)
|
||||||
|
|
||||||
|
# make sure both tag=None and tag='*' return all tags
|
||||||
|
all_tags = ['document', 'house', 'room', 'room',
|
||||||
|
'shed', 'house', 'room']
|
||||||
|
self.assertEqual(self._ilist(doc), all_tags)
|
||||||
|
self.assertEqual(self._ilist(doc, '*'), all_tags)
|
||||||
|
|
||||||
|
|
||||||
class TreeBuilderTest(unittest.TestCase):
|
class TreeBuilderTest(unittest.TestCase):
|
||||||
sample1 = ('<!DOCTYPE html PUBLIC'
|
sample1 = ('<!DOCTYPE html PUBLIC'
|
||||||
' "-//W3C//DTD XHTML 1.0 Transitional//EN"'
|
' "-//W3C//DTD XHTML 1.0 Transitional//EN"'
|
||||||
@ -2026,7 +2010,20 @@ class TreeBuilderTest(unittest.TestCase):
|
|||||||
('html', '-//W3C//DTD XHTML 1.0 Transitional//EN',
|
('html', '-//W3C//DTD XHTML 1.0 Transitional//EN',
|
||||||
'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'))
|
'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'))
|
||||||
|
|
||||||
|
class XincludeTest(unittest.TestCase):
|
||||||
|
def test_xinclude_default(self):
|
||||||
|
from xml.etree import ElementInclude
|
||||||
|
doc = xinclude_loader('default.xml')
|
||||||
|
ElementInclude.include(doc)
|
||||||
|
s = serialize(doc)
|
||||||
|
self.assertEqual(s.strip(), '''<document>
|
||||||
|
<p>Example.</p>
|
||||||
|
<root>
|
||||||
|
<element key="value">text</element>
|
||||||
|
<element>text</element>tail
|
||||||
|
<empty-element />
|
||||||
|
</root>
|
||||||
|
</document>''')
|
||||||
class XMLParserTest(unittest.TestCase):
|
class XMLParserTest(unittest.TestCase):
|
||||||
sample1 = '<file><line>22</line></file>'
|
sample1 = '<file><line>22</line></file>'
|
||||||
sample2 = ('<!DOCTYPE html PUBLIC'
|
sample2 = ('<!DOCTYPE html PUBLIC'
|
||||||
@ -2073,13 +2070,6 @@ class XMLParserTest(unittest.TestCase):
|
|||||||
'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'))
|
'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'))
|
||||||
|
|
||||||
|
|
||||||
class NoAcceleratorTest(unittest.TestCase):
|
|
||||||
# Test that the C accelerator was not imported for pyET
|
|
||||||
def test_correct_import_pyET(self):
|
|
||||||
self.assertEqual(pyET.Element.__module__, 'xml.etree.ElementTree')
|
|
||||||
self.assertEqual(pyET.SubElement.__module__, 'xml.etree.ElementTree')
|
|
||||||
|
|
||||||
|
|
||||||
class NamespaceParseTest(unittest.TestCase):
|
class NamespaceParseTest(unittest.TestCase):
|
||||||
def test_find_with_namespace(self):
|
def test_find_with_namespace(self):
|
||||||
nsmap = {'h': 'hello', 'f': 'foo'}
|
nsmap = {'h': 'hello', 'f': 'foo'}
|
||||||
@ -2090,7 +2080,6 @@ class NamespaceParseTest(unittest.TestCase):
|
|||||||
self.assertEqual(len(doc.findall('.//{foo}name', nsmap)), 1)
|
self.assertEqual(len(doc.findall('.//{foo}name', nsmap)), 1)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class ElementSlicingTest(unittest.TestCase):
|
class ElementSlicingTest(unittest.TestCase):
|
||||||
def _elem_tags(self, elemlist):
|
def _elem_tags(self, elemlist):
|
||||||
return [e.tag for e in elemlist]
|
return [e.tag for e in elemlist]
|
||||||
@ -2232,6 +2221,36 @@ class KeywordArgsTest(unittest.TestCase):
|
|||||||
with self.assertRaisesRegex(TypeError, 'must be dict, not str'):
|
with self.assertRaisesRegex(TypeError, 'must be dict, not str'):
|
||||||
ET.Element('a', attrib="I'm not a dict")
|
ET.Element('a', attrib="I'm not a dict")
|
||||||
|
|
||||||
|
# --------------------------------------------------------------------
|
||||||
|
|
||||||
|
@unittest.skipUnless(pyET, 'only for the Python version')
|
||||||
|
class NoAcceleratorTest(unittest.TestCase):
|
||||||
|
# Test that the C accelerator was not imported for pyET
|
||||||
|
def test_correct_import_pyET(self):
|
||||||
|
self.assertEqual(pyET.Element.__module__, 'xml.etree.ElementTree')
|
||||||
|
self.assertEqual(pyET.SubElement.__module__, 'xml.etree.ElementTree')
|
||||||
|
|
||||||
|
|
||||||
|
class ElementPathFallbackTest(unittest.TestCase):
|
||||||
|
def test_fallback(self):
|
||||||
|
current_ElementPath = ET.ElementPath
|
||||||
|
ET.ElementPath = ET._SimpleElementPath()
|
||||||
|
elem = ET.XML(SAMPLE_XML)
|
||||||
|
self.assertEqual(elem.find('tag').tag, 'tag')
|
||||||
|
self.assertEqual(ET.ElementTree(elem).find('tag').tag, 'tag')
|
||||||
|
self.assertEqual(elem.findtext('tag'), 'text')
|
||||||
|
self.assertIsNone(elem.findtext('tog'))
|
||||||
|
self.assertEqual(elem.findtext('tog', 'default'), 'default')
|
||||||
|
self.assertEqual(ET.ElementTree(elem).findtext('tag'), 'text')
|
||||||
|
self.assertEqual(summarize_list(elem.findall('tag')), ['tag', 'tag'])
|
||||||
|
self.assertEqual(summarize_list(elem.findall('.//tag')),
|
||||||
|
['tag', 'tag', 'tag'])
|
||||||
|
|
||||||
|
self.assertIsNone(elem.find('section/tag'))
|
||||||
|
self.assertIsNone(elem.findtext('section/tag'))
|
||||||
|
self.assertEqual(summarize_list(elem.findall('section/tag')), [])
|
||||||
|
|
||||||
|
ET.ElementPath = current_ElementPath
|
||||||
|
|
||||||
# --------------------------------------------------------------------
|
# --------------------------------------------------------------------
|
||||||
|
|
||||||
@ -2276,31 +2295,43 @@ class CleanContext(object):
|
|||||||
self.checkwarnings.__exit__(*args)
|
self.checkwarnings.__exit__(*args)
|
||||||
|
|
||||||
|
|
||||||
def test_main(module=pyET):
|
def test_main(module=None):
|
||||||
from test import test_xml_etree
|
# When invoked without a module, runs the Python ET tests by loading pyET.
|
||||||
|
# Otherwise, uses the given module as the ET.
|
||||||
|
if module is None:
|
||||||
|
global pyET
|
||||||
|
pyET = import_fresh_module('xml.etree.ElementTree',
|
||||||
|
blocked=['_elementtree'])
|
||||||
|
module = pyET
|
||||||
|
|
||||||
# The same doctests are used for both the Python and the C implementations
|
global ET
|
||||||
test_xml_etree.ET = module
|
ET = module
|
||||||
|
|
||||||
test_classes = [
|
test_classes = [
|
||||||
ElementSlicingTest,
|
ElementSlicingTest,
|
||||||
BasicElementTest,
|
BasicElementTest,
|
||||||
StringIOTest,
|
StringIOTest,
|
||||||
ParseErrorTest,
|
ParseErrorTest,
|
||||||
|
XincludeTest,
|
||||||
ElementTreeTest,
|
ElementTreeTest,
|
||||||
NamespaceParseTest,
|
ElementIterTest,
|
||||||
TreeBuilderTest,
|
TreeBuilderTest,
|
||||||
XMLParserTest,
|
]
|
||||||
KeywordArgsTest]
|
|
||||||
if module is pyET:
|
# These tests will only run for the pure-Python version that doesn't import
|
||||||
# Run the tests specific to the Python implementation
|
# _elementtree. We can't use skipUnless here, because pyET is filled in only
|
||||||
test_classes += [NoAcceleratorTest]
|
# after the module is loaded.
|
||||||
|
if pyET:
|
||||||
|
test_classes.extend([
|
||||||
|
NoAcceleratorTest,
|
||||||
|
ElementPathFallbackTest,
|
||||||
|
])
|
||||||
|
|
||||||
support.run_unittest(*test_classes)
|
support.run_unittest(*test_classes)
|
||||||
|
|
||||||
# XXX the C module should give the same warnings as the Python module
|
# XXX the C module should give the same warnings as the Python module
|
||||||
with CleanContext(quiet=(module is not pyET)):
|
with CleanContext(quiet=(module is not pyET)):
|
||||||
support.run_doctest(test_xml_etree, verbosity=True)
|
support.run_doctest(sys.modules[__name__], verbosity=True)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
test_main()
|
test_main()
|
||||||
|
@ -8,31 +8,6 @@ cET = import_fresh_module('xml.etree.ElementTree', fresh=['_elementtree'])
|
|||||||
cET_alias = import_fresh_module('xml.etree.cElementTree', fresh=['_elementtree', 'xml.etree'])
|
cET_alias = import_fresh_module('xml.etree.cElementTree', fresh=['_elementtree', 'xml.etree'])
|
||||||
|
|
||||||
|
|
||||||
# cElementTree specific tests
|
|
||||||
|
|
||||||
def sanity():
|
|
||||||
r"""
|
|
||||||
Import sanity.
|
|
||||||
|
|
||||||
Issue #6697.
|
|
||||||
|
|
||||||
>>> cElementTree = cET
|
|
||||||
>>> e = cElementTree.Element('a')
|
|
||||||
>>> getattr(e, '\uD800') # doctest: +ELLIPSIS
|
|
||||||
Traceback (most recent call last):
|
|
||||||
...
|
|
||||||
UnicodeEncodeError: ...
|
|
||||||
|
|
||||||
>>> p = cElementTree.XMLParser()
|
|
||||||
>>> p.version.split()[0]
|
|
||||||
'Expat'
|
|
||||||
>>> getattr(p, '\uD800')
|
|
||||||
Traceback (most recent call last):
|
|
||||||
...
|
|
||||||
AttributeError: 'XMLParser' object has no attribute '\ud800'
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
class MiscTests(unittest.TestCase):
|
class MiscTests(unittest.TestCase):
|
||||||
# Issue #8651.
|
# Issue #8651.
|
||||||
@support.bigmemtest(size=support._2G + 100, memuse=1)
|
@support.bigmemtest(size=support._2G + 100, memuse=1)
|
||||||
@ -46,6 +21,7 @@ class MiscTests(unittest.TestCase):
|
|||||||
finally:
|
finally:
|
||||||
data = None
|
data = None
|
||||||
|
|
||||||
|
|
||||||
@unittest.skipUnless(cET, 'requires _elementtree')
|
@unittest.skipUnless(cET, 'requires _elementtree')
|
||||||
class TestAliasWorking(unittest.TestCase):
|
class TestAliasWorking(unittest.TestCase):
|
||||||
# Test that the cET alias module is alive
|
# Test that the cET alias module is alive
|
||||||
@ -53,6 +29,7 @@ class TestAliasWorking(unittest.TestCase):
|
|||||||
e = cET_alias.Element('foo')
|
e = cET_alias.Element('foo')
|
||||||
self.assertEqual(e.tag, 'foo')
|
self.assertEqual(e.tag, 'foo')
|
||||||
|
|
||||||
|
|
||||||
@unittest.skipUnless(cET, 'requires _elementtree')
|
@unittest.skipUnless(cET, 'requires _elementtree')
|
||||||
class TestAcceleratorImported(unittest.TestCase):
|
class TestAcceleratorImported(unittest.TestCase):
|
||||||
# Test that the C accelerator was imported, as expected
|
# Test that the C accelerator was imported, as expected
|
||||||
@ -67,7 +44,6 @@ def test_main():
|
|||||||
from test import test_xml_etree, test_xml_etree_c
|
from test import test_xml_etree, test_xml_etree_c
|
||||||
|
|
||||||
# Run the tests specific to the C implementation
|
# Run the tests specific to the C implementation
|
||||||
support.run_doctest(test_xml_etree_c, verbosity=True)
|
|
||||||
support.run_unittest(
|
support.run_unittest(
|
||||||
MiscTests,
|
MiscTests,
|
||||||
TestAliasWorking,
|
TestAliasWorking,
|
||||||
|
@ -916,11 +916,7 @@ def _namespaces(elem, default_namespace=None):
|
|||||||
_raise_serialization_error(qname)
|
_raise_serialization_error(qname)
|
||||||
|
|
||||||
# populate qname and namespaces table
|
# populate qname and namespaces table
|
||||||
try:
|
for elem in elem.iter():
|
||||||
iterate = elem.iter
|
|
||||||
except AttributeError:
|
|
||||||
iterate = elem.getiterator # cET compatibility
|
|
||||||
for elem in iterate():
|
|
||||||
tag = elem.tag
|
tag = elem.tag
|
||||||
if isinstance(tag, QName):
|
if isinstance(tag, QName):
|
||||||
if tag.text not in qnames:
|
if tag.text not in qnames:
|
||||||
|
@ -103,8 +103,6 @@ do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
|
|||||||
/* glue functions (see the init function for details) */
|
/* glue functions (see the init function for details) */
|
||||||
static PyObject* elementtree_parseerror_obj;
|
static PyObject* elementtree_parseerror_obj;
|
||||||
static PyObject* elementtree_deepcopy_obj;
|
static PyObject* elementtree_deepcopy_obj;
|
||||||
static PyObject* elementtree_iter_obj;
|
|
||||||
static PyObject* elementtree_itertext_obj;
|
|
||||||
static PyObject* elementpath_obj;
|
static PyObject* elementpath_obj;
|
||||||
|
|
||||||
/* helpers */
|
/* helpers */
|
||||||
@ -1109,67 +1107,32 @@ element_getchildren(ElementObject* self, PyObject* args)
|
|||||||
return list;
|
return list;
|
||||||
}
|
}
|
||||||
|
|
||||||
static PyObject*
|
|
||||||
element_iter(ElementObject* self, PyObject* args)
|
|
||||||
{
|
|
||||||
PyObject* result;
|
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
create_elementiter(ElementObject *self, PyObject *tag, int gettext);
|
||||||
|
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
element_iter(ElementObject *self, PyObject *args)
|
||||||
|
{
|
||||||
PyObject* tag = Py_None;
|
PyObject* tag = Py_None;
|
||||||
if (!PyArg_ParseTuple(args, "|O:iter", &tag))
|
if (!PyArg_ParseTuple(args, "|O:iter", &tag))
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
if (!elementtree_iter_obj) {
|
return create_elementiter(self, tag, 0);
|
||||||
PyErr_SetString(
|
|
||||||
PyExc_RuntimeError,
|
|
||||||
"iter helper not found"
|
|
||||||
);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
args = PyTuple_New(2);
|
|
||||||
if (!args)
|
|
||||||
return NULL;
|
|
||||||
|
|
||||||
Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
|
|
||||||
Py_INCREF(tag); PyTuple_SET_ITEM(args, 1, (PyObject*) tag);
|
|
||||||
|
|
||||||
result = PyObject_CallObject(elementtree_iter_obj, args);
|
|
||||||
|
|
||||||
Py_DECREF(args);
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static PyObject*
|
static PyObject*
|
||||||
element_itertext(ElementObject* self, PyObject* args)
|
element_itertext(ElementObject* self, PyObject* args)
|
||||||
{
|
{
|
||||||
PyObject* result;
|
|
||||||
|
|
||||||
if (!PyArg_ParseTuple(args, ":itertext"))
|
if (!PyArg_ParseTuple(args, ":itertext"))
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
if (!elementtree_itertext_obj) {
|
return create_elementiter(self, Py_None, 1);
|
||||||
PyErr_SetString(
|
|
||||||
PyExc_RuntimeError,
|
|
||||||
"itertext helper not found"
|
|
||||||
);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
args = PyTuple_New(1);
|
|
||||||
if (!args)
|
|
||||||
return NULL;
|
|
||||||
|
|
||||||
Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
|
|
||||||
|
|
||||||
result = PyObject_CallObject(elementtree_itertext_obj, args);
|
|
||||||
|
|
||||||
Py_DECREF(args);
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static PyObject*
|
static PyObject*
|
||||||
element_getitem(PyObject* self_, Py_ssize_t index)
|
element_getitem(PyObject* self_, Py_ssize_t index)
|
||||||
{
|
{
|
||||||
@ -1790,6 +1753,267 @@ static PyTypeObject Element_Type = {
|
|||||||
0, /* tp_free */
|
0, /* tp_free */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/******************************* Element iterator ****************************/
|
||||||
|
|
||||||
|
/* ElementIterObject represents the iteration state over an XML element in
|
||||||
|
* pre-order traversal. To keep track of which sub-element should be returned
|
||||||
|
* next, a stack of parents is maintained. This is a standard stack-based
|
||||||
|
* iterative pre-order traversal of a tree.
|
||||||
|
* The stack is managed using a single-linked list starting at parent_stack.
|
||||||
|
* Each stack node contains the saved parent to which we should return after
|
||||||
|
* the current one is exhausted, and the next child to examine in that parent.
|
||||||
|
*/
|
||||||
|
typedef struct ParentLocator_t {
|
||||||
|
ElementObject *parent;
|
||||||
|
Py_ssize_t child_index;
|
||||||
|
struct ParentLocator_t *next;
|
||||||
|
} ParentLocator;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
PyObject_HEAD
|
||||||
|
ParentLocator *parent_stack;
|
||||||
|
ElementObject *root_element;
|
||||||
|
PyObject *sought_tag;
|
||||||
|
int root_done;
|
||||||
|
int gettext;
|
||||||
|
} ElementIterObject;
|
||||||
|
|
||||||
|
|
||||||
|
static void
|
||||||
|
elementiter_dealloc(ElementIterObject *it)
|
||||||
|
{
|
||||||
|
ParentLocator *p = it->parent_stack;
|
||||||
|
while (p) {
|
||||||
|
ParentLocator *temp = p;
|
||||||
|
Py_XDECREF(p->parent);
|
||||||
|
p = p->next;
|
||||||
|
PyObject_Free(temp);
|
||||||
|
}
|
||||||
|
|
||||||
|
Py_XDECREF(it->sought_tag);
|
||||||
|
Py_XDECREF(it->root_element);
|
||||||
|
|
||||||
|
PyObject_GC_UnTrack(it);
|
||||||
|
PyObject_GC_Del(it);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
|
||||||
|
{
|
||||||
|
ParentLocator *p = it->parent_stack;
|
||||||
|
while (p) {
|
||||||
|
Py_VISIT(p->parent);
|
||||||
|
p = p->next;
|
||||||
|
}
|
||||||
|
|
||||||
|
Py_VISIT(it->root_element);
|
||||||
|
Py_VISIT(it->sought_tag);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Helper function for elementiter_next. Add a new parent to the parent stack.
|
||||||
|
*/
|
||||||
|
static ParentLocator *
|
||||||
|
parent_stack_push_new(ParentLocator *stack, ElementObject *parent)
|
||||||
|
{
|
||||||
|
ParentLocator *new_node = PyObject_Malloc(sizeof(ParentLocator));
|
||||||
|
if (new_node) {
|
||||||
|
new_node->parent = parent;
|
||||||
|
Py_INCREF(parent);
|
||||||
|
new_node->child_index = 0;
|
||||||
|
new_node->next = stack;
|
||||||
|
}
|
||||||
|
return new_node;
|
||||||
|
}
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
elementiter_next(ElementIterObject *it)
|
||||||
|
{
|
||||||
|
/* Sub-element iterator.
|
||||||
|
*
|
||||||
|
* A short note on gettext: this function serves both the iter() and
|
||||||
|
* itertext() methods to avoid code duplication. However, there are a few
|
||||||
|
* small differences in the way these iterations work. Namely:
|
||||||
|
* - itertext() only yields text from nodes that have it, and continues
|
||||||
|
* iterating when a node doesn't have text (so it doesn't return any
|
||||||
|
* node like iter())
|
||||||
|
* - itertext() also has to handle tail, after finishing with all the
|
||||||
|
* children of a node.
|
||||||
|
*/
|
||||||
|
|
||||||
|
while (1) {
|
||||||
|
/* Handle the case reached in the beginning and end of iteration, where
|
||||||
|
* the parent stack is empty. The root_done flag gives us indication
|
||||||
|
* whether we've just started iterating (so root_done is 0), in which
|
||||||
|
* case the root is returned. If root_done is 1 and we're here, the
|
||||||
|
* iterator is exhausted.
|
||||||
|
*/
|
||||||
|
if (!it->parent_stack->parent) {
|
||||||
|
if (it->root_done) {
|
||||||
|
PyErr_SetNone(PyExc_StopIteration);
|
||||||
|
return NULL;
|
||||||
|
} else {
|
||||||
|
it->parent_stack = parent_stack_push_new(it->parent_stack,
|
||||||
|
it->root_element);
|
||||||
|
if (!it->parent_stack) {
|
||||||
|
PyErr_NoMemory();
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
it->root_done = 1;
|
||||||
|
if (it->sought_tag == Py_None ||
|
||||||
|
PyObject_RichCompareBool(it->root_element->tag,
|
||||||
|
it->sought_tag, Py_EQ) == 1) {
|
||||||
|
if (it->gettext) {
|
||||||
|
PyObject *text = JOIN_OBJ(it->root_element->text);
|
||||||
|
if (PyObject_IsTrue(text)) {
|
||||||
|
Py_INCREF(text);
|
||||||
|
return text;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
Py_INCREF(it->root_element);
|
||||||
|
return (PyObject *)it->root_element;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* See if there are children left to traverse in the current parent. If
|
||||||
|
* yes, visit the next child. If not, pop the stack and try again.
|
||||||
|
*/
|
||||||
|
ElementObject *cur_parent = it->parent_stack->parent;
|
||||||
|
Py_ssize_t child_index = it->parent_stack->child_index;
|
||||||
|
if (cur_parent->extra && child_index < cur_parent->extra->length) {
|
||||||
|
ElementObject *child = (ElementObject *)
|
||||||
|
cur_parent->extra->children[child_index];
|
||||||
|
it->parent_stack->child_index++;
|
||||||
|
it->parent_stack = parent_stack_push_new(it->parent_stack,
|
||||||
|
child);
|
||||||
|
if (!it->parent_stack) {
|
||||||
|
PyErr_NoMemory();
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (it->gettext) {
|
||||||
|
PyObject *text = JOIN_OBJ(child->text);
|
||||||
|
if (PyObject_IsTrue(text)) {
|
||||||
|
Py_INCREF(text);
|
||||||
|
return text;
|
||||||
|
}
|
||||||
|
} else if (it->sought_tag == Py_None ||
|
||||||
|
PyObject_RichCompareBool(child->tag,
|
||||||
|
it->sought_tag, Py_EQ) == 1) {
|
||||||
|
Py_INCREF(child);
|
||||||
|
return (PyObject *)child;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
PyObject *tail = it->gettext ? JOIN_OBJ(cur_parent->tail) : Py_None;
|
||||||
|
ParentLocator *next = it->parent_stack->next;
|
||||||
|
Py_XDECREF(it->parent_stack->parent);
|
||||||
|
PyObject_Free(it->parent_stack);
|
||||||
|
it->parent_stack = next;
|
||||||
|
|
||||||
|
/* Note that extra condition on it->parent_stack->parent here;
|
||||||
|
* this is because itertext() is supposed to only return *inner*
|
||||||
|
* text, not text following the element it began iteration with.
|
||||||
|
*/
|
||||||
|
if (it->parent_stack->parent && PyObject_IsTrue(tail)) {
|
||||||
|
Py_INCREF(tail);
|
||||||
|
return tail;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static PyTypeObject ElementIter_Type = {
|
||||||
|
PyVarObject_HEAD_INIT(NULL, 0)
|
||||||
|
"_elementtree._element_iterator", /* tp_name */
|
||||||
|
sizeof(ElementIterObject), /* tp_basicsize */
|
||||||
|
0, /* tp_itemsize */
|
||||||
|
/* methods */
|
||||||
|
(destructor)elementiter_dealloc, /* tp_dealloc */
|
||||||
|
0, /* tp_print */
|
||||||
|
0, /* tp_getattr */
|
||||||
|
0, /* tp_setattr */
|
||||||
|
0, /* tp_reserved */
|
||||||
|
0, /* tp_repr */
|
||||||
|
0, /* tp_as_number */
|
||||||
|
0, /* tp_as_sequence */
|
||||||
|
0, /* tp_as_mapping */
|
||||||
|
0, /* tp_hash */
|
||||||
|
0, /* tp_call */
|
||||||
|
0, /* tp_str */
|
||||||
|
0, /* tp_getattro */
|
||||||
|
0, /* tp_setattro */
|
||||||
|
0, /* tp_as_buffer */
|
||||||
|
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
|
||||||
|
0, /* tp_doc */
|
||||||
|
(traverseproc)elementiter_traverse, /* tp_traverse */
|
||||||
|
0, /* tp_clear */
|
||||||
|
0, /* tp_richcompare */
|
||||||
|
0, /* tp_weaklistoffset */
|
||||||
|
PyObject_SelfIter, /* tp_iter */
|
||||||
|
(iternextfunc)elementiter_next, /* tp_iternext */
|
||||||
|
0, /* tp_methods */
|
||||||
|
0, /* tp_members */
|
||||||
|
0, /* tp_getset */
|
||||||
|
0, /* tp_base */
|
||||||
|
0, /* tp_dict */
|
||||||
|
0, /* tp_descr_get */
|
||||||
|
0, /* tp_descr_set */
|
||||||
|
0, /* tp_dictoffset */
|
||||||
|
0, /* tp_init */
|
||||||
|
0, /* tp_alloc */
|
||||||
|
0, /* tp_new */
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
create_elementiter(ElementObject *self, PyObject *tag, int gettext)
|
||||||
|
{
|
||||||
|
ElementIterObject *it;
|
||||||
|
PyObject *star = NULL;
|
||||||
|
|
||||||
|
it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
|
||||||
|
if (!it)
|
||||||
|
return NULL;
|
||||||
|
if (!(it->parent_stack = PyObject_Malloc(sizeof(ParentLocator)))) {
|
||||||
|
PyObject_GC_Del(it);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
it->parent_stack->parent = NULL;
|
||||||
|
it->parent_stack->child_index = 0;
|
||||||
|
it->parent_stack->next = NULL;
|
||||||
|
|
||||||
|
if (PyUnicode_Check(tag))
|
||||||
|
star = PyUnicode_FromString("*");
|
||||||
|
else if (PyBytes_Check(tag))
|
||||||
|
star = PyBytes_FromString("*");
|
||||||
|
|
||||||
|
if (star && PyObject_RichCompareBool(tag, star, Py_EQ) == 1)
|
||||||
|
tag = Py_None;
|
||||||
|
|
||||||
|
Py_XDECREF(star);
|
||||||
|
it->sought_tag = tag;
|
||||||
|
it->root_done = 0;
|
||||||
|
it->gettext = gettext;
|
||||||
|
it->root_element = self;
|
||||||
|
|
||||||
|
Py_INCREF(self);
|
||||||
|
Py_INCREF(tag);
|
||||||
|
|
||||||
|
PyObject_GC_Track(it);
|
||||||
|
return (PyObject *)it;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/* ==================================================================== */
|
/* ==================================================================== */
|
||||||
/* the tree builder type */
|
/* the tree builder type */
|
||||||
|
|
||||||
@ -3238,8 +3462,7 @@ static struct PyModuleDef _elementtreemodule = {
|
|||||||
PyMODINIT_FUNC
|
PyMODINIT_FUNC
|
||||||
PyInit__elementtree(void)
|
PyInit__elementtree(void)
|
||||||
{
|
{
|
||||||
PyObject *m, *g, *temp;
|
PyObject *m, *temp;
|
||||||
char* bootstrap;
|
|
||||||
|
|
||||||
/* Initialize object types */
|
/* Initialize object types */
|
||||||
if (PyType_Ready(&TreeBuilder_Type) < 0)
|
if (PyType_Ready(&TreeBuilder_Type) < 0)
|
||||||
@ -3255,44 +3478,6 @@ PyInit__elementtree(void)
|
|||||||
if (!m)
|
if (!m)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
/* The code below requires that the module gets already added
|
|
||||||
to sys.modules. */
|
|
||||||
PyDict_SetItemString(PyImport_GetModuleDict(),
|
|
||||||
_elementtreemodule.m_name,
|
|
||||||
m);
|
|
||||||
|
|
||||||
/* python glue code */
|
|
||||||
|
|
||||||
g = PyDict_New();
|
|
||||||
if (!g)
|
|
||||||
return NULL;
|
|
||||||
|
|
||||||
PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins());
|
|
||||||
|
|
||||||
bootstrap = (
|
|
||||||
"def iter(node, tag=None):\n" /* helper */
|
|
||||||
" if tag == '*':\n"
|
|
||||||
" tag = None\n"
|
|
||||||
" if tag is None or node.tag == tag:\n"
|
|
||||||
" yield node\n"
|
|
||||||
" for node in node:\n"
|
|
||||||
" for node in iter(node, tag):\n"
|
|
||||||
" yield node\n"
|
|
||||||
|
|
||||||
"def itertext(node):\n" /* helper */
|
|
||||||
" if node.text:\n"
|
|
||||||
" yield node.text\n"
|
|
||||||
" for e in node:\n"
|
|
||||||
" for s in e.itertext():\n"
|
|
||||||
" yield s\n"
|
|
||||||
" if e.tail:\n"
|
|
||||||
" yield e.tail\n"
|
|
||||||
|
|
||||||
);
|
|
||||||
|
|
||||||
if (!PyRun_String(bootstrap, Py_file_input, g, NULL))
|
|
||||||
return NULL;
|
|
||||||
|
|
||||||
if (!(temp = PyImport_ImportModule("copy")))
|
if (!(temp = PyImport_ImportModule("copy")))
|
||||||
return NULL;
|
return NULL;
|
||||||
elementtree_deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
|
elementtree_deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
|
||||||
@ -3301,9 +3486,6 @@ PyInit__elementtree(void)
|
|||||||
if (!(elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
|
if (!(elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
elementtree_iter_obj = PyDict_GetItemString(g, "iter");
|
|
||||||
elementtree_itertext_obj = PyDict_GetItemString(g, "itertext");
|
|
||||||
|
|
||||||
/* link against pyexpat */
|
/* link against pyexpat */
|
||||||
expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
|
expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
|
||||||
if (expat_capi) {
|
if (expat_capi) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user