raw_content = b'<entry xmlns="http://www.w3.org/2005/Atom"\n xmlns:sword="http://purl.org/net/sword/terms/"\n xmlns:dcte.../test/34/status/" />\n\n <sword:packaging>http://purl.org/net/sword/package/SimpleZip</sword:packaging>\n</entry>\n'
def parse_xml(raw_content):
"""Parse xml body.
raw_content (bytes): The content to parse
ParserError in case of a malformed xml
content parsed as dict.
> return SWHXMLParser().parse(raw_content)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <swh.deposit.parsers.SWHXMLParser object at 0x7f8502768080>
stream = b'<entry xmlns="http://www.w3.org/2005/Atom"\n xmlns:sword="http://purl.org/net/sword/terms/"\n xmlns:dcte.../test/34/status/" />\n\n <sword:packaging>http://purl.org/net/sword/package/SimpleZip</sword:packaging>\n</entry>\n'
media_type = None, parser_context = {}
def parse(self, stream, media_type=None, parser_context=None):
Parses the incoming bytestream as XML and returns the resulting data.
parser_context = parser_context or {}
encoding = parser_context.get("encoding", settings.DEFAULT_CHARSET)
> return _parse_xml(stream, encoding=encoding)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
stream = b'<entry xmlns="http://www.w3.org/2005/Atom"\n xmlns:sword="http://purl.org/net/sword/terms/"\n xmlns:dcte.../test/34/status/" />\n\n <sword:packaging>http://purl.org/net/sword/package/SimpleZip</sword:packaging>\n</entry>\n'
encoding = 'utf-8'
def parse_xml(stream, encoding="utf-8"):
data = xmltodict.parse(
namespaces={uri: prefix for (prefix, uri) in NAMESPACES.items()},
> dict_constructor=dict,
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
xml_input = b'<entry xmlns="http://www.w3.org/2005/Atom"\n xmlns:sword="http://purl.org/net/sword/terms/"\n xmlns:dcte.../test/34/status/" />\n\n <sword:packaging>http://purl.org/net/sword/package/SimpleZip</sword:packaging>\n</entry>\n'
encoding = 'utf-8'
expat = <module 'xml.parsers.expat' from '/usr/lib/python3.7/xml/parsers/expat.py'>
process_namespaces = True, namespace_separator = ':', disable_entities = True
kwargs = {'dict_constructor': <class 'dict'>, 'namespaces': {'http://purl.org/dc/terms/': 'dc', 'http://purl.org/net/sword/terms/': 'sword', 'http://schema.org/': 'schema', 'http://www.w3.org/2005/Atom': 'atom', ...}}
handler = <xmltodict._DictSAXHandler object at 0x7f8500d56e48>
parser = <pyexpat.xmlparser object at 0x7f8501021f48>
feature = 'http://apache.org/xml/features/disallow-doctype-decl'
def parse(xml_input, encoding=None, expat=expat, process_namespaces=False,
namespace_separator=':', disable_entities=True, **kwargs):
"""Parse the given XML input and convert it into a dictionary.
`xml_input` can either be a `string` or a file-like object.
If `xml_attribs` is `True`, element attributes are put in the dictionary
among regular child elements, using `@` as a prefix to avoid collisions. If
set to `False`, they are just ignored.
Simple example::
>>> import xmltodict
>>> doc = xmltodict.parse(\"\"\"
... <a prop="x">
... <b>1</b>
... <b>2</b>
... </a>
... \"\"\")
>>> doc['a']['@prop']
>>> doc['a']['b']
[u'1', u'2']
If `item_depth` is `0`, the function returns a dictionary for the root
element (default behavior). Otherwise, it calls `item_callback` every time
an item at the specified depth is found and returns `None` in the end
(streaming mode).
The callback function receives two parameters: the `path` from the document
root to the item (name-attribs pairs), and the `item` (dict). If the
callback's return value is false-ish, parsing will be stopped with the
:class:`ParsingInterrupted` exception.
Streaming example::
>>> def handle(path, item):
... print('path:%s item:%s' % (path, item))
... return True
>>> xmltodict.parse(\"\"\"
... <a prop="x">
... <b>1</b>
... <b>2</b>
... </a>\"\"\", item_depth=2, item_callback=handle)
path:[(u'a', {u'prop': u'x'}), (u'b', None)] item:1
path:[(u'a', {u'prop': u'x'}), (u'b', None)] item:2
The optional argument `postprocessor` is a function that takes `path`,
`key` and `value` as positional arguments and returns a new `(key, value)`
pair where both `key` and `value` may have changed. Usage example::
>>> def postprocessor(path, key, value):
... try:
... return key + ':int', int(value)
... except (ValueError, TypeError):
... return key, value
>>> xmltodict.parse('<a><b>1</b><b>2</b><b>x</b></a>',
... postprocessor=postprocessor)
OrderedDict([(u'a', OrderedDict([(u'b:int', [1, 2]), (u'b', u'x')]))])
You can pass an alternate version of `expat` (such as `defusedexpat`) by
using the `expat` parameter. E.g:
>>> import defusedexpat
>>> xmltodict.parse('<a>hello</a>', expat=defusedexpat.pyexpat)
OrderedDict([(u'a', u'hello')])
You can use the force_list argument to force lists to be created even
when there is only a single child of a given level of hierarchy. The
force_list argument is a tuple of keys. If the key for a given level
of hierarchy is in the force_list argument, that level of hierarchy
will have a list as a child (even if there is only one sub-element).
The index_keys operation takes precendence over this. This is applied
after any user-supplied postprocessor has already run.
For example, given this input:
If called with force_list=('interface',), it will produce
this dictionary:
{'name': 'host1',
'os': 'Linux'},
[ {'name': 'em0', 'ip_address': '' } ] } } }
`force_list` can also be a callable that receives `path`, `key` and
`value`. This is helpful in cases where the logic that decides whether
a list should be forced is more complex.
handler = _DictSAXHandler(namespace_separator=namespace_separator,
if isinstance(xml_input, _unicode):
if not encoding:
encoding = 'utf-8'
xml_input = xml_input.encode(encoding)
if not process_namespaces:
namespace_separator = None
parser = expat.ParserCreate(
parser.ordered_attributes = True
except AttributeError:
# Jython's expat does not support ordered_attributes
parser.StartNamespaceDeclHandler = handler.startNamespaceDecl
parser.StartElementHandler = handler.startElement
parser.EndElementHandler = handler.endElement
parser.CharacterDataHandler = handler.characters
parser.buffer_text = True
if disable_entities:
# Attempt to disable DTD in Jython's expat parser (Xerces-J).
feature = "http://apache.org/xml/features/disallow-doctype-decl"
parser._reader.setFeature(feature, True)
except AttributeError:
# For CPython / expat parser.
# Anything not handled ends up here and entities aren't expanded.
parser.DefaultHandler = lambda x: None
# Expects an integer return; zero means failure -> expat.ExpatError.
parser.ExternalEntityRefHandler = lambda *x: 1
if hasattr(xml_input, 'read'):
> parser.Parse(xml_input, True)
E xml.parsers.expat.ExpatError: out of memory: line 1, column 0
.tox/py3/lib/python3.7/site-packages/xmltodict.py:327: ExpatError
During handling of the above exception, another exception occurred:
sample_archive = {'data': b'PK\x03\x04\x14\x00\x00\x00\x00\x00\x11GXT\xcba\xb4c\x14\x00\x00\x00\x14\x00\x00\x00\x05\x00\x00\x00file1som...ytest-0/test_access_to_existing_deposi0/tmp62rwph8e', 'length': 128, 'md5sum': '75d1edea87c60dd08478f9ebddf5f455', ...}
deposit_collection = <DepositCollection: {'id': 110, 'name': 'test'}>
authenticated_client = <rest_framework.test.APIClient object at 0x7f8500fb5978>
def complete_deposit(sample_archive, deposit_collection, authenticated_client):
"""Returns a completed deposit (load success)
deposit = create_deposit(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
.tox/py3/lib/python3.7/site-packages/swh/deposit/tests/conftest.py:448: in create_deposit
response_content = parse_xml(response.content)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
raw_content = b'<entry xmlns="http://www.w3.org/2005/Atom"\n xmlns:sword="http://purl.org/net/sword/terms/"\n xmlns:dcte.../test/34/status/" />\n\n <sword:packaging>http://purl.org/net/sword/package/SimpleZip</sword:packaging>\n</entry>\n'
def parse_xml(raw_content):
"""Parse xml body.
raw_content (bytes): The content to parse
ParserError in case of a malformed xml
content parsed as dict.
return SWHXMLParser().parse(raw_content)
except ExpatError as e:
> raise ParserError(str(e))
E swh.deposit.errors.ParserError: out of memory: line 1, column 0
.tox/py3/lib/python3.7/site-packages/swh/deposit/parsers.py:94: ParserError
- Run At
- Feb 24 2022, 10:00 AM