diff --git a/swh/deposit/parsers.py b/swh/deposit/parsers.py --- a/swh/deposit/parsers.py +++ b/swh/deposit/parsers.py @@ -14,9 +14,9 @@ from django.conf import settings from rest_framework.parsers import BaseParser, FileUploadParser, MultiPartParser -import xmltodict from swh.deposit.errors import ParserError +from swh.deposit.utils import parse_xml as _parse_xml from swh.model.exceptions import ValidationError from swh.model.identifiers import ( DIRECTORY, @@ -59,20 +59,7 @@ """ parser_context = parser_context or {} encoding = parser_context.get("encoding", settings.DEFAULT_CHARSET) - namespaces = { - "http://www.w3.org/2005/Atom": None, - "http://purl.org/dc/terms/": None, - "https://doi.org/10.5063/SCHEMA/CODEMETA-2.0": "codemeta", - "http://purl.org/net/sword/": "sword", - "https://www.softwareheritage.org/schema/2018/deposit": "swh", - } - - data = xmltodict.parse( - stream, encoding=encoding, namespaces=namespaces, process_namespaces=True - ) - if "entry" in data: - data = data["entry"] - return data + return _parse_xml(stream, encoding=encoding) class SWHAtomEntryParser(SWHXMLParser): diff --git a/swh/deposit/utils.py b/swh/deposit/utils.py --- a/swh/deposit/utils.py +++ b/swh/deposit/utils.py @@ -7,11 +7,29 @@ from typing import Any, Dict, Tuple, Union import iso8601 +import xmltodict from swh.model.identifiers import SWHID, normalize_timestamp, parse_swhid from swh.model.model import MetadataTargetType +def parse_xml(stream, encoding="utf-8"): + namespaces = { + "http://www.w3.org/2005/Atom": None, + "http://purl.org/dc/terms/": None, + "https://doi.org/10.5063/SCHEMA/CODEMETA-2.0": "codemeta", + "http://purl.org/net/sword/": "sword", + "https://www.softwareheritage.org/schema/2018/deposit": "swh", + } + + data = xmltodict.parse( + stream, encoding=encoding, namespaces=namespaces, process_namespaces=True + ) + if "entry" in data: + data = data["entry"] + return data + + def merge(*dicts): """Given an iterator of dicts, merge them losing no information.