raw_content = <_io.BytesIO object at 0x7f3049ee8d58>
def parse_xml(raw_content):
"""Parse xml body.
Args:
raw_content (bytes): The content to parse
Raises:
ParserError in case of a malformed xml
Returns:
content parsed as dict.
"""
try:
> return SWHXMLParser().parse(raw_content)
.tox/py3/lib/python3.7/site-packages/swh/deposit/parsers.py:92:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <swh.deposit.parsers.SWHXMLParser object at 0x7f3048354080>
stream = <_io.BytesIO object at 0x7f3049ee8d58>, media_type = None
parser_context = {}
def parse(self, stream, media_type=None, parser_context=None):
"""
Parses the incoming bytestream as XML and returns the resulting data.
"""
parser_context = parser_context or {}
encoding = parser_context.get("encoding", settings.DEFAULT_CHARSET)
> return _parse_xml(stream, encoding=encoding)
.tox/py3/lib/python3.7/site-packages/swh/deposit/parsers.py:52:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
stream = <_io.BytesIO object at 0x7f3049ee8d58>, encoding = 'utf-8'
def parse_xml(stream, encoding="utf-8"):
data = xmltodict.parse(
stream,
encoding=encoding,
namespaces={uri: prefix for (prefix, uri) in NAMESPACES.items()},
process_namespaces=True,
> dict_constructor=dict,
)
.tox/py3/lib/python3.7/site-packages/swh/deposit/utils.py:37:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
xml_input = <_io.BytesIO object at 0x7f3049ee8d58>, encoding = 'utf-8'
expat = <module 'xml.parsers.expat' from '/usr/lib/python3.7/xml/parsers/expat.py'>
process_namespaces = True, namespace_separator = ':', disable_entities = True
kwargs = {'dict_constructor': <class 'dict'>, 'namespaces': {'http://purl.org/dc/terms/': 'dc', 'http://purl.org/net/sword/terms/': 'sword', 'http://schema.org/': 'schema', 'http://www.w3.org/2005/Atom': 'atom', ...}}
handler = <xmltodict._DictSAXHandler object at 0x7f3048354978>
parser = <pyexpat.xmlparser object at 0x7f304984bb28>
feature = 'http://apache.org/xml/features/disallow-doctype-decl'
def parse(xml_input, encoding=None, expat=expat, process_namespaces=False,
namespace_separator=':', disable_entities=True, **kwargs):
"""Parse the given XML input and convert it into a dictionary.
`xml_input` can either be a `string` or a file-like object.
If `xml_attribs` is `True`, element attributes are put in the dictionary
among regular child elements, using `@` as a prefix to avoid collisions. If
set to `False`, they are just ignored.
Simple example::
>>> import xmltodict
>>> doc = xmltodict.parse(\"\"\"
... <a prop="x">
... <b>1</b>
... <b>2</b>
... </a>
... \"\"\")
>>> doc['a']['@prop']
u'x'
>>> doc['a']['b']
[u'1', u'2']
If `item_depth` is `0`, the function returns a dictionary for the root
element (default behavior). Otherwise, it calls `item_callback` every time
an item at the specified depth is found and returns `None` in the end
(streaming mode).
The callback function receives two parameters: the `path` from the document
root to the item (name-attribs pairs), and the `item` (dict). If the
callback's return value is false-ish, parsing will be stopped with the
:class:`ParsingInterrupted` exception.
Streaming example::
>>> def handle(path, item):
... print('path:%s item:%s' % (path, item))
... return True
...
>>> xmltodict.parse(\"\"\"
... <a prop="x">
... <b>1</b>
... <b>2</b>
... </a>\"\"\", item_depth=2, item_callback=handle)
path:[(u'a', {u'prop': u'x'}), (u'b', None)] item:1
path:[(u'a', {u'prop': u'x'}), (u'b', None)] item:2
The optional argument `postprocessor` is a function that takes `path`,
`key` and `value` as positional arguments and returns a new `(key, value)`
pair where both `key` and `value` may have changed. Usage example::
>>> def postprocessor(path, key, value):
... try:
... return key + ':int', int(value)
... except (ValueError, TypeError):
... return key, value
>>> xmltodict.parse('<a><b>1</b><b>2</b><b>x</b></a>',
... postprocessor=postprocessor)
OrderedDict([(u'a', OrderedDict([(u'b:int', [1, 2]), (u'b', u'x')]))])
You can pass an alternate version of `expat` (such as `defusedexpat`) by
using the `expat` parameter. E.g:
>>> import defusedexpat
>>> xmltodict.parse('<a>hello</a>', expat=defusedexpat.pyexpat)
OrderedDict([(u'a', u'hello')])
You can use the force_list argument to force lists to be created even
when there is only a single child of a given level of hierarchy. The
force_list argument is a tuple of keys. If the key for a given level
of hierarchy is in the force_list argument, that level of hierarchy
will have a list as a child (even if there is only one sub-element).
The index_keys operation takes precendence over this. This is applied
after any user-supplied postprocessor has already run.
For example, given this input:
<servers>
<server>
<name>host1</name>
<os>Linux</os>
<interfaces>
<interface>
<name>em0</name>
<ip_address>10.0.0.1</ip_address>
</interface>
</interfaces>
</server>
</servers>
If called with force_list=('interface',), it will produce
this dictionary:
{'servers':
{'server':
{'name': 'host1',
'os': 'Linux'},
'interfaces':
{'interface':
[ {'name': 'em0', 'ip_address': '10.0.0.1' } ] } } }
`force_list` can also be a callable that receives `path`, `key` and
`value`. This is helpful in cases where the logic that decides whether
a list should be forced is more complex.
"""
handler = _DictSAXHandler(namespace_separator=namespace_separator,
**kwargs)
if isinstance(xml_input, _unicode):
if not encoding:
encoding = 'utf-8'
xml_input = xml_input.encode(encoding)
if not process_namespaces:
namespace_separator = None
parser = expat.ParserCreate(
encoding,
namespace_separator
)
try:
parser.ordered_attributes = True
except AttributeError:
# Jython's expat does not support ordered_attributes
pass
parser.StartNamespaceDeclHandler = handler.startNamespaceDecl
parser.StartElementHandler = handler.startElement
parser.EndElementHandler = handler.endElement
parser.CharacterDataHandler = handler.characters
parser.buffer_text = True
if disable_entities:
try:
# Attempt to disable DTD in Jython's expat parser (Xerces-J).
feature = "http://apache.org/xml/features/disallow-doctype-decl"
parser._reader.setFeature(feature, True)
except AttributeError:
# For CPython / expat parser.
# Anything not handled ends up here and entities aren't expanded.
parser.DefaultHandler = lambda x: None
# Expects an integer return; zero means failure -> expat.ExpatError.
parser.ExternalEntityRefHandler = lambda *x: 1
if hasattr(xml_input, 'read'):
> parser.ParseFile(xml_input)
E xml.parsers.expat.ExpatError: out of memory: line 1, column 0
.tox/py3/lib/python3.7/site-packages/xmltodict.py:325: ExpatError
During handling of the above exception, another exception occurred:
swhid = 'swh:1:snp:41b5c8cc985d190b5a7ef4878128ebfdc2358f49'
authenticated_client = <rest_framework.test.APIClient object at 0x7f3049e58eb8>
deposit_collection = <DepositCollection: {'id': 53, 'name': 'test'}>
atom_dataset = {'codemeta-sample': '<?xml version="1.0"?>\n <entry xmlns="http://www.w3.org/2005/Atom"\n xmlns:d...ntry>\n', 'entry-data-empty-body': '<?xml version="1.0"?>\n<entry xmlns="http://www.w3.org/2005/Atom"></entry>\n', ...}
swh_storage = <swh.storage.postgresql.storage.Storage object at 0x7f3049e58a58>
@pytest.mark.parametrize(
"swhid",
[
"swh:1:cnt:01b5c8cc985d190b5a7ef4878128ebfdc2358f49",
"swh:1:dir:11b5c8cc985d190b5a7ef4878128ebfdc2358f49",
"swh:1:rev:21b5c8cc985d190b5a7ef4878128ebfdc2358f49",
"swh:1:rel:31b5c8cc985d190b5a7ef4878128ebfdc2358f49",
"swh:1:snp:41b5c8cc985d190b5a7ef4878128ebfdc2358f49",
"swh:1:cnt:51b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo",
"swh:1:dir:c4993c872593e960dc84e4430dbbfbc34fd706d0;origin=https://inria.halpreprod.archives-ouvertes.fr/hal-01243573;visit=swh:1:snp:0175049fc45055a3824a1675ac06e3711619a55a;anchor=swh:1:rev:b5f505b005435fa5c4fa4c279792bd7b17167c04;path=/", # noqa
"swh:1:rev:71b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo",
"swh:1:rel:81b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo",
"swh:1:snp:91b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo",
],
)
def test_deposit_metadata_unknown_swhid(
swhid, authenticated_client, deposit_collection, atom_dataset, swh_storage,
):
"""Posting a swhid reference is rejected if the referenced object is unknown
"""
xml_data = atom_dataset["entry-data-with-swhid"].format(swhid=swhid)
response = post_atom(
authenticated_client,
reverse(COL_IRI, args=[deposit_collection.name]),
data=xml_data,
)
assert (
response.status_code == status.HTTP_400_BAD_REQUEST
), response.content.decode()
> response_content = parse_xml(BytesIO(response.content))
.tox/py3/lib/python3.7/site-packages/swh/deposit/tests/api/test_collection_post_atom.py:741:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
raw_content = <_io.BytesIO object at 0x7f3049ee8d58>
def parse_xml(raw_content):
"""Parse xml body.
Args:
raw_content (bytes): The content to parse
Raises:
ParserError in case of a malformed xml
Returns:
content parsed as dict.
"""
try:
return SWHXMLParser().parse(raw_content)
except ExpatError as e:
> raise ParserError(str(e))
E swh.deposit.errors.ParserError: out of memory: line 1, column 0
.tox/py3/lib/python3.7/site-packages/swh/deposit/parsers.py:94: ParserError
TEST RESULT
TEST RESULT
- Run At
- Feb 23 2022, 1:52 PM