def test_parsing_with_duplicates():
xml_with_duplicates = io.BytesIO(
b"""<?xml version="1.0"?>
<entry xmlns="http://www.w3.org/2005/Atom"
xmlns:codemeta="https://doi.org/10.5063/SCHEMA/CODEMETA-2.0">
<title>Another Compiler</title>
<codemeta:runtimePlatform>GNU/Linux</codemeta:runtimePlatform>
<codemeta:license>
<codemeta:name>GPL3.0</codemeta:name>
<codemeta:url>https://opensource.org/licenses/GPL-3.0</codemeta:url>
</codemeta:license>
<codemeta:runtimePlatform>Un*x</codemeta:runtimePlatform>
<codemeta:author>
<codemeta:name>author1</codemeta:name>
<codemeta:affiliation>Inria</codemeta:affiliation>
</codemeta:author>
<codemeta:author>
<codemeta:name>author2</codemeta:name>
<codemeta:affiliation>Inria</codemeta:affiliation>
</codemeta:author>
<codemeta:programmingLanguage>ocaml</codemeta:programmingLanguage>
<codemeta:programmingLanguage>haskell</codemeta:programmingLanguage>
<codemeta:license>
<codemeta:name>spdx</codemeta:name>
<codemeta:url>http://spdx.org</codemeta:url>
</codemeta:license>
<codemeta:programmingLanguage>python3</codemeta:programmingLanguage>
</entry>"""
)
> actual_result = SWHXMLParser().parse(xml_with_duplicates)
.tox/py3/lib/python3.7/site-packages/swh/deposit/tests/api/test_parsers.py:89:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
.tox/py3/lib/python3.7/site-packages/swh/deposit/parsers.py:52: in parse
return _parse_xml(stream, encoding=encoding)
.tox/py3/lib/python3.7/site-packages/swh/deposit/utils.py:37: in parse_xml
dict_constructor=dict,
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
xml_input = <_io.BytesIO object at 0x7f0219bffd58>, encoding = 'utf-8'
expat = <module 'xml.parsers.expat' from '/usr/lib/python3.7/xml/parsers/expat.py'>
process_namespaces = True, namespace_separator = ':', disable_entities = True
kwargs = {'dict_constructor': <class 'dict'>, 'namespaces': {'http://purl.org/dc/terms/': 'dc', 'http://purl.org/net/sword/terms/': 'sword', 'http://schema.org/': 'schema', 'http://www.w3.org/2005/Atom': 'atom', ...}}
handler = <xmltodict._DictSAXHandler object at 0x7f02196f8128>
parser = <pyexpat.xmlparser object at 0x7f0219a2cc48>
feature = 'http://apache.org/xml/features/disallow-doctype-decl'
def parse(xml_input, encoding=None, expat=expat, process_namespaces=False,
namespace_separator=':', disable_entities=True, **kwargs):
"""Parse the given XML input and convert it into a dictionary.
`xml_input` can either be a `string` or a file-like object.
If `xml_attribs` is `True`, element attributes are put in the dictionary
among regular child elements, using `@` as a prefix to avoid collisions. If
set to `False`, they are just ignored.
Simple example::
>>> import xmltodict
>>> doc = xmltodict.parse(\"\"\"
... <a prop="x">
... <b>1</b>
... <b>2</b>
... </a>
... \"\"\")
>>> doc['a']['@prop']
u'x'
>>> doc['a']['b']
[u'1', u'2']
If `item_depth` is `0`, the function returns a dictionary for the root
element (default behavior). Otherwise, it calls `item_callback` every time
an item at the specified depth is found and returns `None` in the end
(streaming mode).
The callback function receives two parameters: the `path` from the document
root to the item (name-attribs pairs), and the `item` (dict). If the
callback's return value is false-ish, parsing will be stopped with the
:class:`ParsingInterrupted` exception.
Streaming example::
>>> def handle(path, item):
... print('path:%s item:%s' % (path, item))
... return True
...
>>> xmltodict.parse(\"\"\"
... <a prop="x">
... <b>1</b>
... <b>2</b>
... </a>\"\"\", item_depth=2, item_callback=handle)
path:[(u'a', {u'prop': u'x'}), (u'b', None)] item:1
path:[(u'a', {u'prop': u'x'}), (u'b', None)] item:2
The optional argument `postprocessor` is a function that takes `path`,
`key` and `value` as positional arguments and returns a new `(key, value)`
pair where both `key` and `value` may have changed. Usage example::
>>> def postprocessor(path, key, value):
... try:
... return key + ':int', int(value)
... except (ValueError, TypeError):
... return key, value
>>> xmltodict.parse('<a><b>1</b><b>2</b><b>x</b></a>',
... postprocessor=postprocessor)
OrderedDict([(u'a', OrderedDict([(u'b:int', [1, 2]), (u'b', u'x')]))])
You can pass an alternate version of `expat` (such as `defusedexpat`) by
using the `expat` parameter. E.g:
>>> import defusedexpat
>>> xmltodict.parse('<a>hello</a>', expat=defusedexpat.pyexpat)
OrderedDict([(u'a', u'hello')])
You can use the force_list argument to force lists to be created even
when there is only a single child of a given level of hierarchy. The
force_list argument is a tuple of keys. If the key for a given level
of hierarchy is in the force_list argument, that level of hierarchy
will have a list as a child (even if there is only one sub-element).
The index_keys operation takes precendence over this. This is applied
after any user-supplied postprocessor has already run.
For example, given this input:
<servers>
<server>
<name>host1</name>
<os>Linux</os>
<interfaces>
<interface>
<name>em0</name>
<ip_address>10.0.0.1</ip_address>
</interface>
</interfaces>
</server>
</servers>
If called with force_list=('interface',), it will produce
this dictionary:
{'servers':
{'server':
{'name': 'host1',
'os': 'Linux'},
'interfaces':
{'interface':
[ {'name': 'em0', 'ip_address': '10.0.0.1' } ] } } }
`force_list` can also be a callable that receives `path`, `key` and
`value`. This is helpful in cases where the logic that decides whether
a list should be forced is more complex.
"""
handler = _DictSAXHandler(namespace_separator=namespace_separator,
**kwargs)
if isinstance(xml_input, _unicode):
if not encoding:
encoding = 'utf-8'
xml_input = xml_input.encode(encoding)
if not process_namespaces:
namespace_separator = None
parser = expat.ParserCreate(
encoding,
namespace_separator
)
try:
parser.ordered_attributes = True
except AttributeError:
# Jython's expat does not support ordered_attributes
pass
parser.StartNamespaceDeclHandler = handler.startNamespaceDecl
parser.StartElementHandler = handler.startElement
parser.EndElementHandler = handler.endElement
parser.CharacterDataHandler = handler.characters
parser.buffer_text = True
if disable_entities:
try:
# Attempt to disable DTD in Jython's expat parser (Xerces-J).
feature = "http://apache.org/xml/features/disallow-doctype-decl"
parser._reader.setFeature(feature, True)
except AttributeError:
# For CPython / expat parser.
# Anything not handled ends up here and entities aren't expanded.
parser.DefaultHandler = lambda x: None
# Expects an integer return; zero means failure -> expat.ExpatError.
parser.ExternalEntityRefHandler = lambda *x: 1
if hasattr(xml_input, 'read'):
> parser.ParseFile(xml_input)
E xml.parsers.expat.ExpatError: out of memory: line 1, column 0
.tox/py3/lib/python3.7/site-packages/xmltodict.py:325: ExpatError
TEST RESULT
TEST RESULT
- Run At
- Feb 24 2022, 10:00 AM