diff --git a/swh/deposit/templates/deposit/content.xml b/swh/deposit/templates/deposit/content.xml index a5e5d3c9..9140e255 100644 --- a/swh/deposit/templates/deposit/content.xml +++ b/swh/deposit/templates/deposit/content.xml @@ -1,17 +1,17 @@ {{ deposit_id }} {{ request.date }} {{ status }} {{ status_detail }} {{ deposit_id }} {{ request.date }} {{ status }} {{ status_detail }} diff --git a/swh/deposit/templates/deposit/deposit_receipt.xml b/swh/deposit/templates/deposit/deposit_receipt.xml index 294ec016..651ffb25 100644 --- a/swh/deposit/templates/deposit/deposit_receipt.xml +++ b/swh/deposit/templates/deposit/deposit_receipt.xml @@ -1,28 +1,28 @@ {{ deposit_id }} {{ deposit_date }} {{ archive }} {{ status }} {{ deposit_id }} {{ deposit_date }} {{ archive }} {{ status }} {% for packaging in packagings %}{{ packaging }}{% endfor %} diff --git a/swh/deposit/templates/deposit/error.xml b/swh/deposit/templates/deposit/error.xml index 4e273995..3f5b5e85 100644 --- a/swh/deposit/templates/deposit/error.xml +++ b/swh/deposit/templates/deposit/error.xml @@ -1,11 +1,11 @@ + xmlns:sword="http://purl.org/net/sword/terms/"> {{ summary }} processing failed {% if verboseDescription is not None %} {{ verboseDescription }} {% endif %} diff --git a/swh/deposit/templates/deposit/status.xml b/swh/deposit/templates/deposit/status.xml index d9e23d4b..08178702 100644 --- a/swh/deposit/templates/deposit/status.xml +++ b/swh/deposit/templates/deposit/status.xml @@ -1,24 +1,24 @@ {{ deposit_id }} {{ status }} {{ status_detail }} {% if swhid is not None %}{{ swhid }}{% endif %} {% if swhid_context is not None %}{{ swhid_context }}{% endif %} {% if external_id is not None %}{{ external_id }}{% endif %} {{ deposit_id }} {{ status }} {{ status_detail }} {% if swhid is not None %}{{ swhid }}{% endif %} {% if swhid_context is not None %}{{ swhid_context }}{% endif %} {% if external_id is not None %}{{ external_id }}{% endif %} diff --git a/swh/deposit/tests/data/https_deposit.test.metadata/1_test b/swh/deposit/tests/data/https_deposit.test.metadata/1_test index 38dc6484..042ab318 100644 --- a/swh/deposit/tests/data/https_deposit.test.metadata/1_test +++ b/swh/deposit/tests/data/https_deposit.test.metadata/1_test @@ -1,27 +1,27 @@ 666 Oct. 8, 2020, 4:57 p.m. hardcoded_sample_archive_path partial 666 Oct. 8, 2020, 4:57 p.m. hardcoded_sample_archive_path partial http://purl.org/net/sword/package/SimpleZip diff --git a/swh/deposit/tests/data/https_deposit.test.metadata/1_test_666_metadata b/swh/deposit/tests/data/https_deposit.test.metadata/1_test_666_metadata index 6e7eeb63..e15d53c9 100644 --- a/swh/deposit/tests/data/https_deposit.test.metadata/1_test_666_metadata +++ b/swh/deposit/tests/data/https_deposit.test.metadata/1_test_666_metadata @@ -1,26 +1,26 @@ 666 Oct. 9, 2020, 8:44 p.m. something deposited 666 Oct. 9, 2020, 8:44 p.m. something deposited http://purl.org/net/sword/package/SimpleZip diff --git a/swh/deposit/tests/data/https_deposit.test.metadata/1_test_666_status b/swh/deposit/tests/data/https_deposit.test.metadata/1_test_666_status index 0af5ba9e..63469ccf 100644 --- a/swh/deposit/tests/data/https_deposit.test.metadata/1_test_666_status +++ b/swh/deposit/tests/data/https_deposit.test.metadata/1_test_666_status @@ -1,8 +1,8 @@ 666 partial Deposit is partially received. To finalize it, In-Progress header should be false external-id diff --git a/swh/deposit/tests/data/https_deposit.test.status/1_test_1033_status b/swh/deposit/tests/data/https_deposit.test.status/1_test_1033_status index afbc0f4d..ecd52fb5 100644 --- a/swh/deposit/tests/data/https_deposit.test.status/1_test_1033_status +++ b/swh/deposit/tests/data/https_deposit.test.status/1_test_1033_status @@ -1,10 +1,10 @@ 1033 done The deposit has been successfully loaded into the Software Heritage archive swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea;origin=https://www.softwareheritage.org/check-deposit-2020-10-08T13:52:34.509655;visit=swh:1:snp:c477c6ef51833127b13a86ece7d75e5b3cc4e93d;anchor=swh:1:rev:f26f3960c175f15f6e24200171d446b86f6f7230;path=/ check-deposit-2020-10-08T13:52:34.509655 diff --git a/swh/deposit/tests/data/https_deposit.test.updateswhid/1_test_123_metadata b/swh/deposit/tests/data/https_deposit.test.updateswhid/1_test_123_metadata index 270d91f4..bf27f52c 100644 --- a/swh/deposit/tests/data/https_deposit.test.updateswhid/1_test_123_metadata +++ b/swh/deposit/tests/data/https_deposit.test.updateswhid/1_test_123_metadata @@ -1,10 +1,10 @@ 123 done The deposit has been successfully loaded into the Software Heritage archive swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea;origin=https://www.softwareheritage.org/check-deposit-2020-10-08T13:52:34.509655;visit=swh:1:snp:c477c6ef51833127b13a86ece7d75e5b3cc4e93d;anchor=swh:1:rev:f26f3960c175f15f6e24200171d446b86f6f7230;path=/ check-deposit-2020-10-08T13:52:34.509655 diff --git a/swh/deposit/tests/data/https_deposit.test.updateswhid/1_test_123_status b/swh/deposit/tests/data/https_deposit.test.updateswhid/1_test_123_status index 270d91f4..bf27f52c 100644 --- a/swh/deposit/tests/data/https_deposit.test.updateswhid/1_test_123_status +++ b/swh/deposit/tests/data/https_deposit.test.updateswhid/1_test_123_status @@ -1,10 +1,10 @@ 123 done The deposit has been successfully loaded into the Software Heritage archive swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea;origin=https://www.softwareheritage.org/check-deposit-2020-10-08T13:52:34.509655;visit=swh:1:snp:c477c6ef51833127b13a86ece7d75e5b3cc4e93d;anchor=swh:1:rev:f26f3960c175f15f6e24200171d446b86f6f7230;path=/ check-deposit-2020-10-08T13:52:34.509655 diff --git a/swh/deposit/tests/data/https_deposit.test.updateswhid/1_test_321_status b/swh/deposit/tests/data/https_deposit.test.updateswhid/1_test_321_status index 557e2167..a694c01e 100644 --- a/swh/deposit/tests/data/https_deposit.test.updateswhid/1_test_321_status +++ b/swh/deposit/tests/data/https_deposit.test.updateswhid/1_test_321_status @@ -1,8 +1,8 @@ 321 partial The deposit is in partial state check-deposit-2020-10-08T13:52:34.509655 diff --git a/swh/deposit/utils.py b/swh/deposit/utils.py index 04229583..07ba07b8 100644 --- a/swh/deposit/utils.py +++ b/swh/deposit/utils.py @@ -1,137 +1,137 @@ # Copyright (C) 2018-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from types import GeneratorType from typing import Any, Dict, Tuple, Union import iso8601 import xmltodict from swh.model.identifiers import SWHID, normalize_timestamp, parse_swhid from swh.model.model import MetadataTargetType def parse_xml(stream, encoding="utf-8"): namespaces = { "http://www.w3.org/2005/Atom": None, "http://purl.org/dc/terms/": None, "https://doi.org/10.5063/SCHEMA/CODEMETA-2.0": "codemeta", - "http://purl.org/net/sword/": "sword", + "http://purl.org/net/sword/terms/": "sword", "https://www.softwareheritage.org/schema/2018/deposit": "swh", } data = xmltodict.parse( stream, encoding=encoding, namespaces=namespaces, process_namespaces=True ) if "entry" in data: data = data["entry"] return data def merge(*dicts): """Given an iterator of dicts, merge them losing no information. Args: *dicts: arguments are all supposed to be dict to merge into one Returns: dict merged without losing information """ def _extend(existing_val, value): """Given an existing value and a value (as potential lists), merge them together without repetition. """ if isinstance(value, (list, map, GeneratorType)): vals = value else: vals = [value] for v in vals: if v in existing_val: continue existing_val.append(v) return existing_val d = {} for data in dicts: if not isinstance(data, dict): raise ValueError("dicts is supposed to be a variable arguments of dict") for key, value in data.items(): existing_val = d.get(key) if not existing_val: d[key] = value continue if isinstance(existing_val, (list, map, GeneratorType)): new_val = _extend(existing_val, value) elif isinstance(existing_val, dict): if isinstance(value, dict): new_val = merge(existing_val, value) else: new_val = _extend([existing_val], value) else: new_val = _extend([existing_val], value) d[key] = new_val return d def normalize_date(date): """Normalize date fields as expected by swh workers. If date is a list, elect arbitrarily the first element of that list If date is (then) a string, parse it through dateutil.parser.parse to extract a datetime. Then normalize it through swh.model.identifiers.normalize_timestamp. Returns The swh date object """ if isinstance(date, list): date = date[0] if isinstance(date, str): date = iso8601.parse_date(date) return normalize_timestamp(date) def compute_metadata_context( swhid_reference: Union[SWHID, str] ) -> Tuple[MetadataTargetType, Dict[str, Any]]: """Given a SWHID object, determine the context as a dict. The parse_swhid calls within are not expected to raise (because they should have been caught early on). """ metadata_context: Dict[str, Any] = {"origin": None} if isinstance(swhid_reference, SWHID): object_type = MetadataTargetType(swhid_reference.object_type) assert object_type != MetadataTargetType.ORIGIN if swhid_reference.metadata: path = swhid_reference.metadata.get("path") metadata_context = { "origin": swhid_reference.metadata.get("origin"), "path": path.encode() if path else None, } snapshot = swhid_reference.metadata.get("visit") if snapshot: metadata_context["snapshot"] = parse_swhid(snapshot) anchor = swhid_reference.metadata.get("anchor") if anchor: anchor_swhid = parse_swhid(anchor) metadata_context[anchor_swhid.object_type] = anchor_swhid else: object_type = MetadataTargetType.ORIGIN return object_type, metadata_context