raw_content = '<?xml version="1.0" encoding="utf-8"?>\n<atom:entry xmlns:atom="http://www.w3.org/2005/Atom" xmlns:codemeta="https://...codemeta:author>\n\t<codemeta:author>\n\t\t<codemeta:name>authors</codemeta:name>\n\t</codemeta:author>\n</atom:entry>'
def parse_xml(raw_content):
"""Parse xml body.
Args:
raw_content (bytes): The content to parse
Raises:
ParserError in case of a malformed xml
Returns:
content parsed as dict.
"""
try:
> return SWHXMLParser().parse(raw_content)
.tox/py3/lib/python3.7/site-packages/swh/deposit/parsers.py:92:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <swh.deposit.parsers.SWHXMLParser object at 0x7f30497f4128>
stream = '<?xml version="1.0" encoding="utf-8"?>\n<atom:entry xmlns:atom="http://www.w3.org/2005/Atom" xmlns:codemeta="https://...codemeta:author>\n\t<codemeta:author>\n\t\t<codemeta:name>authors</codemeta:name>\n\t</codemeta:author>\n</atom:entry>'
media_type = None, parser_context = {}
def parse(self, stream, media_type=None, parser_context=None):
"""
Parses the incoming bytestream as XML and returns the resulting data.
"""
parser_context = parser_context or {}
encoding = parser_context.get("encoding", settings.DEFAULT_CHARSET)
> return _parse_xml(stream, encoding=encoding)
.tox/py3/lib/python3.7/site-packages/swh/deposit/parsers.py:52:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
stream = '<?xml version="1.0" encoding="utf-8"?>\n<atom:entry xmlns:atom="http://www.w3.org/2005/Atom" xmlns:codemeta="https://...codemeta:author>\n\t<codemeta:author>\n\t\t<codemeta:name>authors</codemeta:name>\n\t</codemeta:author>\n</atom:entry>'
encoding = 'utf-8'
def parse_xml(stream, encoding="utf-8"):
data = xmltodict.parse(
stream,
encoding=encoding,
namespaces={uri: prefix for (prefix, uri) in NAMESPACES.items()},
process_namespaces=True,
> dict_constructor=dict,
)
.tox/py3/lib/python3.7/site-packages/swh/deposit/utils.py:37:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
xml_input = b'<?xml version="1.0" encoding="utf-8"?>\n<atom:entry xmlns:atom="http://www.w3.org/2005/Atom" xmlns:codemeta="https:/...codemeta:author>\n\t<codemeta:author>\n\t\t<codemeta:name>authors</codemeta:name>\n\t</codemeta:author>\n</atom:entry>'
encoding = 'utf-8'
expat = <module 'xml.parsers.expat' from '/usr/lib/python3.7/xml/parsers/expat.py'>
process_namespaces = True, namespace_separator = ':', disable_entities = True
kwargs = {'dict_constructor': <class 'dict'>, 'namespaces': {'http://purl.org/dc/terms/': 'dc', 'http://purl.org/net/sword/terms/': 'sword', 'http://schema.org/': 'schema', 'http://www.w3.org/2005/Atom': 'atom', ...}}
handler = <xmltodict._DictSAXHandler object at 0x7f30488b5a20>
parser = <pyexpat.xmlparser object at 0x7f3048676dc8>
feature = 'http://apache.org/xml/features/disallow-doctype-decl'
def parse(xml_input, encoding=None, expat=expat, process_namespaces=False,
namespace_separator=':', disable_entities=True, **kwargs):
"""Parse the given XML input and convert it into a dictionary.
`xml_input` can either be a `string` or a file-like object.
If `xml_attribs` is `True`, element attributes are put in the dictionary
among regular child elements, using `@` as a prefix to avoid collisions. If
set to `False`, they are just ignored.
Simple example::
>>> import xmltodict
>>> doc = xmltodict.parse(\"\"\"
... <a prop="x">
... <b>1</b>
... <b>2</b>
... </a>
... \"\"\")
>>> doc['a']['@prop']
u'x'
>>> doc['a']['b']
[u'1', u'2']
If `item_depth` is `0`, the function returns a dictionary for the root
element (default behavior). Otherwise, it calls `item_callback` every time
an item at the specified depth is found and returns `None` in the end
(streaming mode).
The callback function receives two parameters: the `path` from the document
root to the item (name-attribs pairs), and the `item` (dict). If the
callback's return value is false-ish, parsing will be stopped with the
:class:`ParsingInterrupted` exception.
Streaming example::
>>> def handle(path, item):
... print('path:%s item:%s' % (path, item))
... return True
...
>>> xmltodict.parse(\"\"\"
... <a prop="x">
... <b>1</b>
... <b>2</b>
... </a>\"\"\", item_depth=2, item_callback=handle)
path:[(u'a', {u'prop': u'x'}), (u'b', None)] item:1
path:[(u'a', {u'prop': u'x'}), (u'b', None)] item:2
The optional argument `postprocessor` is a function that takes `path`,
`key` and `value` as positional arguments and returns a new `(key, value)`
pair where both `key` and `value` may have changed. Usage example::
>>> def postprocessor(path, key, value):
... try:
... return key + ':int', int(value)
... except (ValueError, TypeError):
... return key, value
>>> xmltodict.parse('<a><b>1</b><b>2</b><b>x</b></a>',
... postprocessor=postprocessor)
OrderedDict([(u'a', OrderedDict([(u'b:int', [1, 2]), (u'b', u'x')]))])
You can pass an alternate version of `expat` (such as `defusedexpat`) by
using the `expat` parameter. E.g:
>>> import defusedexpat
>>> xmltodict.parse('<a>hello</a>', expat=defusedexpat.pyexpat)
OrderedDict([(u'a', u'hello')])
You can use the force_list argument to force lists to be created even
when there is only a single child of a given level of hierarchy. The
force_list argument is a tuple of keys. If the key for a given level
of hierarchy is in the force_list argument, that level of hierarchy
will have a list as a child (even if there is only one sub-element).
The index_keys operation takes precendence over this. This is applied
after any user-supplied postprocessor has already run.
For example, given this input:
<servers>
<server>
<name>host1</name>
<os>Linux</os>
<interfaces>
<interface>
<name>em0</name>
<ip_address>10.0.0.1</ip_address>
</interface>
</interfaces>
</server>
</servers>
If called with force_list=('interface',), it will produce
this dictionary:
{'servers':
{'server':
{'name': 'host1',
'os': 'Linux'},
'interfaces':
{'interface':
[ {'name': 'em0', 'ip_address': '10.0.0.1' } ] } } }
`force_list` can also be a callable that receives `path`, `key` and
`value`. This is helpful in cases where the logic that decides whether
a list should be forced is more complex.
"""
handler = _DictSAXHandler(namespace_separator=namespace_separator,
**kwargs)
if isinstance(xml_input, _unicode):
if not encoding:
encoding = 'utf-8'
xml_input = xml_input.encode(encoding)
if not process_namespaces:
namespace_separator = None
parser = expat.ParserCreate(
encoding,
namespace_separator
)
try:
parser.ordered_attributes = True
except AttributeError:
# Jython's expat does not support ordered_attributes
pass
parser.StartNamespaceDeclHandler = handler.startNamespaceDecl
parser.StartElementHandler = handler.startElement
parser.EndElementHandler = handler.endElement
parser.CharacterDataHandler = handler.characters
parser.buffer_text = True
if disable_entities:
try:
# Attempt to disable DTD in Jython's expat parser (Xerces-J).
feature = "http://apache.org/xml/features/disallow-doctype-decl"
parser._reader.setFeature(feature, True)
except AttributeError:
# For CPython / expat parser.
# Anything not handled ends up here and entities aren't expanded.
parser.DefaultHandler = lambda x: None
# Expects an integer return; zero means failure -> expat.ExpatError.
parser.ExternalEntityRefHandler = lambda *x: 1
if hasattr(xml_input, 'read'):
parser.ParseFile(xml_input)
else:
> parser.Parse(xml_input, True)
E xml.parsers.expat.ExpatError: out of memory: line 1, column 0
.tox/py3/lib/python3.7/site-packages/xmltodict.py:327: ExpatError
During handling of the above exception, another exception occurred:
slug = '5d399a59-0742-448d-85b7-4704169e7671'
def test_cli_client_generate_metadata_ok2(slug):
"""Generated metadata is well formed and pass service side metadata checks
"""
actual_metadata_xml = generate_metadata(
"deposit-client", "project-name", authors=["some", "authors"],
)
> actual_metadata = dict(parse_xml(actual_metadata_xml))
.tox/py3/lib/python3.7/site-packages/swh/deposit/tests/cli/test_client.py:219:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
raw_content = '<?xml version="1.0" encoding="utf-8"?>\n<atom:entry xmlns:atom="http://www.w3.org/2005/Atom" xmlns:codemeta="https://...codemeta:author>\n\t<codemeta:author>\n\t\t<codemeta:name>authors</codemeta:name>\n\t</codemeta:author>\n</atom:entry>'
def parse_xml(raw_content):
"""Parse xml body.
Args:
raw_content (bytes): The content to parse
Raises:
ParserError in case of a malformed xml
Returns:
content parsed as dict.
"""
try:
return SWHXMLParser().parse(raw_content)
except ExpatError as e:
> raise ParserError(str(e))
E swh.deposit.errors.ParserError: out of memory: line 1, column 0
.tox/py3/lib/python3.7/site-packages/swh/deposit/parsers.py:94: ParserError
TEST RESULT
TEST RESULT
- Run At
- Feb 23 2022, 1:52 PM