Changeset View
Changeset View
Standalone View
Standalone View
swh/deposit/utils.py
# Copyright (C) 2018-2020 The Software Heritage developers | # Copyright (C) 2018-2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from types import GeneratorType | from types import GeneratorType | ||||
from typing import Any, Dict, Tuple, Union | from typing import Any, Dict, Tuple, Union | ||||
import iso8601 | import iso8601 | ||||
import xmltodict | |||||
from swh.model.identifiers import SWHID, normalize_timestamp, parse_swhid | from swh.model.identifiers import SWHID, normalize_timestamp, parse_swhid | ||||
from swh.model.model import MetadataTargetType | from swh.model.model import MetadataTargetType | ||||
def parse_xml(stream, encoding="utf-8"): | |||||
namespaces = { | |||||
"http://www.w3.org/2005/Atom": None, | |||||
"http://purl.org/dc/terms/": None, | |||||
"https://doi.org/10.5063/SCHEMA/CODEMETA-2.0": "codemeta", | |||||
"http://purl.org/net/sword/": "sword", | |||||
"https://www.softwareheritage.org/schema/2018/deposit": "swh", | |||||
} | |||||
data = xmltodict.parse( | |||||
stream, encoding=encoding, namespaces=namespaces, process_namespaces=True | |||||
) | |||||
if "entry" in data: | |||||
data = data["entry"] | |||||
return data | |||||
def merge(*dicts): | def merge(*dicts): | ||||
"""Given an iterator of dicts, merge them losing no information. | """Given an iterator of dicts, merge them losing no information. | ||||
Args: | Args: | ||||
*dicts: arguments are all supposed to be dict to merge into one | *dicts: arguments are all supposed to be dict to merge into one | ||||
Returns: | Returns: | ||||
dict merged without losing information | dict merged without losing information | ||||
▲ Show 20 Lines • Show All 97 Lines • Show Last 20 Lines |