diff --git a/swh/deposit/templates/deposit/content.xml b/swh/deposit/templates/deposit/content.xml
index a5e5d3c9..9140e255 100644
--- a/swh/deposit/templates/deposit/content.xml
+++ b/swh/deposit/templates/deposit/content.xml
@@ -1,17 +1,17 @@
{{ deposit_id }}{{ request.date }}{{ status }}{{ status_detail }}{{ deposit_id }}{{ request.date }}{{ status }}{{ status_detail }}
diff --git a/swh/deposit/templates/deposit/deposit_receipt.xml b/swh/deposit/templates/deposit/deposit_receipt.xml
index 294ec016..651ffb25 100644
--- a/swh/deposit/templates/deposit/deposit_receipt.xml
+++ b/swh/deposit/templates/deposit/deposit_receipt.xml
@@ -1,28 +1,28 @@
{{ deposit_id }}{{ deposit_date }}{{ archive }}{{ status }}{{ deposit_id }}{{ deposit_date }}{{ archive }}{{ status }}
{% for packaging in packagings %}{{ packaging }}{% endfor %}
diff --git a/swh/deposit/templates/deposit/error.xml b/swh/deposit/templates/deposit/error.xml
index 4e273995..3f5b5e85 100644
--- a/swh/deposit/templates/deposit/error.xml
+++ b/swh/deposit/templates/deposit/error.xml
@@ -1,11 +1,11 @@
+ xmlns:sword="http://purl.org/net/sword/terms/">
{{ summary }}processing failed
{% if verboseDescription is not None %}
{{ verboseDescription }}
{% endif %}
diff --git a/swh/deposit/templates/deposit/status.xml b/swh/deposit/templates/deposit/status.xml
index d9e23d4b..08178702 100644
--- a/swh/deposit/templates/deposit/status.xml
+++ b/swh/deposit/templates/deposit/status.xml
@@ -1,24 +1,24 @@
{{ deposit_id }}{{ status }}{{ status_detail }}
{% if swhid is not None %}{{ swhid }}{% endif %}
{% if swhid_context is not None %}{{ swhid_context }}{% endif %}
{% if external_id is not None %}{{ external_id }}{% endif %}
{{ deposit_id }}{{ status }}{{ status_detail }}
{% if swhid is not None %}{{ swhid }}{% endif %}
{% if swhid_context is not None %}{{ swhid_context }}{% endif %}
{% if external_id is not None %}{{ external_id }}{% endif %}
diff --git a/swh/deposit/tests/data/https_deposit.test.metadata/1_test b/swh/deposit/tests/data/https_deposit.test.metadata/1_test
index 38dc6484..042ab318 100644
--- a/swh/deposit/tests/data/https_deposit.test.metadata/1_test
+++ b/swh/deposit/tests/data/https_deposit.test.metadata/1_test
@@ -1,27 +1,27 @@
666Oct. 8, 2020, 4:57 p.m.hardcoded_sample_archive_pathpartial666Oct. 8, 2020, 4:57 p.m.hardcoded_sample_archive_pathpartialhttp://purl.org/net/sword/package/SimpleZip
diff --git a/swh/deposit/tests/data/https_deposit.test.metadata/1_test_666_metadata b/swh/deposit/tests/data/https_deposit.test.metadata/1_test_666_metadata
index 6e7eeb63..e15d53c9 100644
--- a/swh/deposit/tests/data/https_deposit.test.metadata/1_test_666_metadata
+++ b/swh/deposit/tests/data/https_deposit.test.metadata/1_test_666_metadata
@@ -1,26 +1,26 @@
666Oct. 9, 2020, 8:44 p.m.somethingdeposited666Oct. 9, 2020, 8:44 p.m.somethingdepositedhttp://purl.org/net/sword/package/SimpleZip
diff --git a/swh/deposit/tests/data/https_deposit.test.metadata/1_test_666_status b/swh/deposit/tests/data/https_deposit.test.metadata/1_test_666_status
index 0af5ba9e..63469ccf 100644
--- a/swh/deposit/tests/data/https_deposit.test.metadata/1_test_666_status
+++ b/swh/deposit/tests/data/https_deposit.test.metadata/1_test_666_status
@@ -1,8 +1,8 @@
666partialDeposit is partially received. To finalize it, In-Progress header should be falseexternal-id
diff --git a/swh/deposit/tests/data/https_deposit.test.status/1_test_1033_status b/swh/deposit/tests/data/https_deposit.test.status/1_test_1033_status
index afbc0f4d..ecd52fb5 100644
--- a/swh/deposit/tests/data/https_deposit.test.status/1_test_1033_status
+++ b/swh/deposit/tests/data/https_deposit.test.status/1_test_1033_status
@@ -1,10 +1,10 @@
1033doneThe deposit has been successfully loaded into the Software Heritage archiveswh:1:dir:ef04a768181417fbc5eef4243e2507915f24deeaswh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea;origin=https://www.softwareheritage.org/check-deposit-2020-10-08T13:52:34.509655;visit=swh:1:snp:c477c6ef51833127b13a86ece7d75e5b3cc4e93d;anchor=swh:1:rev:f26f3960c175f15f6e24200171d446b86f6f7230;path=/check-deposit-2020-10-08T13:52:34.509655
diff --git a/swh/deposit/tests/data/https_deposit.test.updateswhid/1_test_123_metadata b/swh/deposit/tests/data/https_deposit.test.updateswhid/1_test_123_metadata
index 270d91f4..bf27f52c 100644
--- a/swh/deposit/tests/data/https_deposit.test.updateswhid/1_test_123_metadata
+++ b/swh/deposit/tests/data/https_deposit.test.updateswhid/1_test_123_metadata
@@ -1,10 +1,10 @@
123doneThe deposit has been successfully loaded into the Software Heritage archiveswh:1:dir:ef04a768181417fbc5eef4243e2507915f24deeaswh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea;origin=https://www.softwareheritage.org/check-deposit-2020-10-08T13:52:34.509655;visit=swh:1:snp:c477c6ef51833127b13a86ece7d75e5b3cc4e93d;anchor=swh:1:rev:f26f3960c175f15f6e24200171d446b86f6f7230;path=/check-deposit-2020-10-08T13:52:34.509655
diff --git a/swh/deposit/tests/data/https_deposit.test.updateswhid/1_test_123_status b/swh/deposit/tests/data/https_deposit.test.updateswhid/1_test_123_status
index 270d91f4..bf27f52c 100644
--- a/swh/deposit/tests/data/https_deposit.test.updateswhid/1_test_123_status
+++ b/swh/deposit/tests/data/https_deposit.test.updateswhid/1_test_123_status
@@ -1,10 +1,10 @@
123doneThe deposit has been successfully loaded into the Software Heritage archiveswh:1:dir:ef04a768181417fbc5eef4243e2507915f24deeaswh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea;origin=https://www.softwareheritage.org/check-deposit-2020-10-08T13:52:34.509655;visit=swh:1:snp:c477c6ef51833127b13a86ece7d75e5b3cc4e93d;anchor=swh:1:rev:f26f3960c175f15f6e24200171d446b86f6f7230;path=/check-deposit-2020-10-08T13:52:34.509655
diff --git a/swh/deposit/tests/data/https_deposit.test.updateswhid/1_test_321_status b/swh/deposit/tests/data/https_deposit.test.updateswhid/1_test_321_status
index 557e2167..a694c01e 100644
--- a/swh/deposit/tests/data/https_deposit.test.updateswhid/1_test_321_status
+++ b/swh/deposit/tests/data/https_deposit.test.updateswhid/1_test_321_status
@@ -1,8 +1,8 @@
321partialThe deposit is in partial statecheck-deposit-2020-10-08T13:52:34.509655
diff --git a/swh/deposit/utils.py b/swh/deposit/utils.py
index 04229583..07ba07b8 100644
--- a/swh/deposit/utils.py
+++ b/swh/deposit/utils.py
@@ -1,137 +1,137 @@
# Copyright (C) 2018-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from types import GeneratorType
from typing import Any, Dict, Tuple, Union
import iso8601
import xmltodict
from swh.model.identifiers import SWHID, normalize_timestamp, parse_swhid
from swh.model.model import MetadataTargetType
def parse_xml(stream, encoding="utf-8"):
namespaces = {
"http://www.w3.org/2005/Atom": None,
"http://purl.org/dc/terms/": None,
"https://doi.org/10.5063/SCHEMA/CODEMETA-2.0": "codemeta",
- "http://purl.org/net/sword/": "sword",
+ "http://purl.org/net/sword/terms/": "sword",
"https://www.softwareheritage.org/schema/2018/deposit": "swh",
}
data = xmltodict.parse(
stream, encoding=encoding, namespaces=namespaces, process_namespaces=True
)
if "entry" in data:
data = data["entry"]
return data
def merge(*dicts):
"""Given an iterator of dicts, merge them losing no information.
Args:
*dicts: arguments are all supposed to be dict to merge into one
Returns:
dict merged without losing information
"""
def _extend(existing_val, value):
"""Given an existing value and a value (as potential lists), merge
them together without repetition.
"""
if isinstance(value, (list, map, GeneratorType)):
vals = value
else:
vals = [value]
for v in vals:
if v in existing_val:
continue
existing_val.append(v)
return existing_val
d = {}
for data in dicts:
if not isinstance(data, dict):
raise ValueError("dicts is supposed to be a variable arguments of dict")
for key, value in data.items():
existing_val = d.get(key)
if not existing_val:
d[key] = value
continue
if isinstance(existing_val, (list, map, GeneratorType)):
new_val = _extend(existing_val, value)
elif isinstance(existing_val, dict):
if isinstance(value, dict):
new_val = merge(existing_val, value)
else:
new_val = _extend([existing_val], value)
else:
new_val = _extend([existing_val], value)
d[key] = new_val
return d
def normalize_date(date):
"""Normalize date fields as expected by swh workers.
If date is a list, elect arbitrarily the first element of that
list
If date is (then) a string, parse it through
dateutil.parser.parse to extract a datetime.
Then normalize it through
swh.model.identifiers.normalize_timestamp.
Returns
The swh date object
"""
if isinstance(date, list):
date = date[0]
if isinstance(date, str):
date = iso8601.parse_date(date)
return normalize_timestamp(date)
def compute_metadata_context(
swhid_reference: Union[SWHID, str]
) -> Tuple[MetadataTargetType, Dict[str, Any]]:
"""Given a SWHID object, determine the context as a dict.
The parse_swhid calls within are not expected to raise (because they should have
been caught early on).
"""
metadata_context: Dict[str, Any] = {"origin": None}
if isinstance(swhid_reference, SWHID):
object_type = MetadataTargetType(swhid_reference.object_type)
assert object_type != MetadataTargetType.ORIGIN
if swhid_reference.metadata:
path = swhid_reference.metadata.get("path")
metadata_context = {
"origin": swhid_reference.metadata.get("origin"),
"path": path.encode() if path else None,
}
snapshot = swhid_reference.metadata.get("visit")
if snapshot:
metadata_context["snapshot"] = parse_swhid(snapshot)
anchor = swhid_reference.metadata.get("anchor")
if anchor:
anchor_swhid = parse_swhid(anchor)
metadata_context[anchor_swhid.object_type] = anchor_swhid
else:
object_type = MetadataTargetType.ORIGIN
return object_type, metadata_context