diff --git a/swh/deposit/api/private/__init__.py b/swh/deposit/api/private/__init__.py
--- a/swh/deposit/api/private/__init__.py
+++ b/swh/deposit/api/private/__init__.py
@@ -1,15 +1,13 @@
-# Copyright (C) 2017-2021 The Software Heritage developers
+# Copyright (C) 2017-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-from typing import Any, Dict, List, Tuple
+from typing import Any, Dict, Optional, Tuple
from rest_framework.permissions import AllowAny
from rest_framework.views import APIView
-from swh.deposit import utils
-
from ...config import METADATA_TYPE, APIConfig
from ...models import Deposit, DepositRequest
@@ -27,17 +25,18 @@
request_type: 'archive' or 'metadata'
Yields:
- deposit requests of type request_type associated to the deposit
+ deposit requests of type request_type associated to the deposit,
+ most recent first
"""
deposit_requests = DepositRequest.objects.filter(
type=request_type, deposit=deposit
- ).order_by("id")
+ ).order_by("-id")
for deposit_request in deposit_requests:
yield deposit_request
- def _metadata_get(self, deposit: Deposit) -> Tuple[Dict[str, Any], List[str]]:
+ def _metadata_get(self, deposit: Deposit) -> Tuple[Dict[str, Any], Optional[bytes]]:
"""Given a deposit, retrieve all metadata requests into one Dict and returns both that
aggregated metadata dict and the list of raw_metdadata.
@@ -45,19 +44,16 @@
deposit: The deposit instance to extract metadata from
Returns:
- Tuple of aggregated metadata dict, list of raw_metadata
+ Tuple of last metadata dict and last raw_metadata
"""
- metadata: List[Dict[str, Any]] = []
- raw_metadata: List[str] = []
for deposit_request in self._deposit_requests(
deposit, request_type=METADATA_TYPE
):
- metadata.append(deposit_request.metadata)
- raw_metadata.append(deposit_request.raw_metadata)
+ if deposit_request.raw_metadata is not None:
+ return (deposit_request.metadata, deposit_request.raw_metadata)
- aggregated_metadata = utils.merge(*metadata)
- return (aggregated_metadata, raw_metadata)
+ return ({}, None)
class APIPrivateView(APIConfig, APIView):
diff --git a/swh/deposit/api/private/deposit_check.py b/swh/deposit/api/private/deposit_check.py
--- a/swh/deposit/api/private/deposit_check.py
+++ b/swh/deposit/api/private/deposit_check.py
@@ -72,6 +72,7 @@
"""
requests = list(self._deposit_requests(deposit, request_type=ARCHIVE_TYPE))
+ requests.reverse()
if len(requests) == 0: # no associated archive is refused
return False, {"archive": [{"summary": MANDATORY_ARCHIVE_MISSING,}]}
diff --git a/swh/deposit/api/private/deposit_read.py b/swh/deposit/api/private/deposit_read.py
--- a/swh/deposit/api/private/deposit_read.py
+++ b/swh/deposit/api/private/deposit_read.py
@@ -144,7 +144,7 @@
**origin** (Dict): Information about the origin
- **metadata_raw** (List[str]): List of raw metadata received for the
+ **metadata_raw** (str): List of raw metadata received for the
deposit
**metadata_dict** (Dict): Deposit aggregated metadata into one dict
diff --git a/swh/deposit/tests/api/test_deposit_private_read_metadata.py b/swh/deposit/tests/api/test_deposit_private_read_metadata.py
--- a/swh/deposit/tests/api/test_deposit_private_read_metadata.py
+++ b/swh/deposit/tests/api/test_deposit_private_read_metadata.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2017-2021 The Software Heritage developers
+# Copyright (C) 2017-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -6,7 +6,7 @@
from django.urls import reverse_lazy as reverse
from rest_framework import status
-from swh.deposit import __version__, utils
+from swh.deposit import __version__
from swh.deposit.config import PRIVATE_GET_DEPOSIT_METADATA, SE_IRI, SWH_PERSON
from swh.deposit.models import Deposit
from swh.deposit.parsers import parse_xml
@@ -47,14 +47,10 @@
deposit.origin_url = f"https://hal-test.archives-ouvertes.fr/{deposit.external_id}"
deposit.save()
- metadata_xml_atoms = [
- atom_dataset[atom_key] for atom_key in ["entry-data2", "entry-data3"]
- ]
- metadata_xml_raws = [parse_xml(xml) for xml in metadata_xml_atoms]
- for atom_xml in metadata_xml_atoms:
- deposit = update_deposit_with_metadata(
- authenticated_client, deposit_collection, deposit, atom_xml,
- )
+ metadata_xml_raw = atom_dataset["entry-data2"]
+ deposit = update_deposit_with_metadata(
+ authenticated_client, deposit_collection, deposit, metadata_xml_raw,
+ )
for url in private_get_raw_url_endpoints(deposit_collection, deposit):
response = authenticated_client.get(url)
@@ -66,8 +62,8 @@
"type": "deposit",
"url": "https://hal-test.archives-ouvertes.fr/some-external-id",
},
- "metadata_raw": metadata_xml_atoms,
- "metadata_dict": utils.merge(*metadata_xml_raws),
+ "metadata_raw": metadata_xml_raw,
+ "metadata_dict": parse_xml(metadata_xml_raw),
"provider": {
"metadata": {},
"provider_name": "",
@@ -109,14 +105,10 @@
deposit.external_id = "some-external-id"
deposit.origin_url = f"https://hal-test.archives-ouvertes.fr/{deposit.external_id}"
deposit.save()
- metadata_xml_atoms = [
- atom_dataset[atom_key] for atom_key in ["entry-data2", "entry-data3"]
- ]
- metadata_xml_raws = [parse_xml(xml) for xml in metadata_xml_atoms]
- for atom_xml in metadata_xml_atoms:
- deposit = update_deposit_with_metadata(
- authenticated_client, deposit_collection, deposit, atom_xml,
- )
+ metadata_xml_raw = atom_dataset["entry-data2"]
+ deposit = update_deposit_with_metadata(
+ authenticated_client, deposit_collection, deposit, metadata_xml_raw,
+ )
rev_id = "da78a9d4cf1d5d29873693fd496142e3a18c20fa"
swhid = "swh:1:rev:%s" % rev_id
@@ -138,8 +130,8 @@
"type": "deposit",
"url": "https://hal-test.archives-ouvertes.fr/some-external-id",
},
- "metadata_raw": metadata_xml_atoms,
- "metadata_dict": utils.merge(*metadata_xml_raws),
+ "metadata_raw": metadata_xml_raw,
+ "metadata_dict": parse_xml(metadata_xml_raw),
"provider": {
"metadata": {},
"provider_name": "",
@@ -182,24 +174,10 @@
deposit.origin_url = f"https://hal-test.archives-ouvertes.fr/{deposit.external_id}"
deposit.save()
- # add metadata to the deposit with datePublished and dateCreated
- codemeta_entry_data = (
- atom_dataset["metadata"]
- % """
- 2015-04-06T17:08:47+02:00
- 2017-05-03T16:08:47+02:00
-"""
+ metadata_xml_raw = atom_dataset["entry-data3"]
+ update_deposit_with_metadata(
+ authenticated_client, deposit_collection, deposit, metadata_xml_raw,
)
- metadata_xml_atoms = [
- atom_dataset["entry-data2"],
- atom_dataset["entry-data3"],
- codemeta_entry_data,
- ]
- metadata_xml_raws = [parse_xml(xml) for xml in metadata_xml_atoms]
- for atom_xml in metadata_xml_atoms:
- update_deposit_with_metadata(
- authenticated_client, deposit_collection, deposit, atom_xml,
- )
for url in private_get_raw_url_endpoints(deposit_collection, deposit):
response = authenticated_client.get(url)
@@ -212,8 +190,8 @@
"type": "deposit",
"url": "https://hal-test.archives-ouvertes.fr/hal-01243065",
},
- "metadata_raw": metadata_xml_atoms,
- "metadata_dict": utils.merge(*metadata_xml_raws),
+ "metadata_raw": metadata_xml_raw,
+ "metadata_dict": parse_xml(metadata_xml_raw),
"provider": {
"metadata": {},
"provider_name": "",
@@ -270,7 +248,7 @@
assert actual_data == {
"origin": {"type": "deposit", "url": None,},
- "metadata_raw": [codemeta_entry_data],
+ "metadata_raw": codemeta_entry_data,
"metadata_dict": parse_xml(codemeta_entry_data),
"provider": {
"metadata": {},
@@ -341,7 +319,7 @@
"type": "deposit",
"url": "https://hal-test.archives-ouvertes.fr/hal-01243065",
},
- "metadata_raw": [codemeta_entry_data],
+ "metadata_raw": codemeta_entry_data,
"metadata_dict": parse_xml(codemeta_entry_data),
"provider": {
"metadata": {},
@@ -404,14 +382,10 @@
deposit.origin_url = f"https://hal-test.archives-ouvertes.fr/{deposit.external_id}"
deposit.save()
- metadata_xml_atoms = [
- atom_dataset[atom_key] for atom_key in ["entry-data-multiple-release-notes"]
- ]
- metadata_xml_raws = [parse_xml(xml) for xml in metadata_xml_atoms]
- for atom_xml in metadata_xml_atoms:
- deposit = update_deposit_with_metadata(
- authenticated_client, deposit_collection, deposit, atom_xml,
- )
+ metadata_xml_raw = atom_dataset["entry-data-multiple-release-notes"]
+ deposit = update_deposit_with_metadata(
+ authenticated_client, deposit_collection, deposit, metadata_xml_raw,
+ )
for url in private_get_raw_url_endpoints(deposit_collection, deposit):
response = authenticated_client.get(url)
@@ -423,8 +397,8 @@
"type": "deposit",
"url": "https://hal-test.archives-ouvertes.fr/some-external-id",
},
- "metadata_raw": metadata_xml_atoms,
- "metadata_dict": utils.merge(*metadata_xml_raws),
+ "metadata_raw": metadata_xml_raw,
+ "metadata_dict": parse_xml(metadata_xml_raw),
"provider": {
"metadata": {},
"provider_name": "",
diff --git a/swh/deposit/tests/data/atom/entry-data2.xml b/swh/deposit/tests/data/atom/entry-data2.xml
--- a/swh/deposit/tests/data/atom/entry-data2.xml
+++ b/swh/deposit/tests/data/atom/entry-data2.xml
@@ -1,5 +1,6 @@
https://hal-test.archives-ouvertes.fr/some-external-id
some awesome author
@@ -9,4 +10,9 @@
+
+ another one
+ no one
+ 2017-10-07T15:17:08Z
+ This is the release of October 7th, 2017.
diff --git a/swh/deposit/tests/data/atom/entry-data3.xml b/swh/deposit/tests/data/atom/entry-data3.xml
--- a/swh/deposit/tests/data/atom/entry-data3.xml
+++ b/swh/deposit/tests/data/atom/entry-data3.xml
@@ -1,7 +1,13 @@
-
+
+ https://hal-test.archives-ouvertes.fr/some-external-id
+ some awesome author
+
another one
no one
2017-10-07T15:17:08Z
This is the release of October 7th, 2017.
+ 2015-04-06T17:08:47+02:00
+ 2017-05-03T16:08:47+02:00
diff --git a/swh/deposit/tests/test_utils.py b/swh/deposit/tests/test_utils.py
--- a/swh/deposit/tests/test_utils.py
+++ b/swh/deposit/tests/test_utils.py
@@ -27,92 +27,6 @@
return xml_data.strip()
-def test_merge():
- """Calling utils.merge on dicts should merge without losing information
-
- """
- d0 = {"author": "someone", "license": [["gpl2"]], "a": 1}
-
- d1 = {
- "author": ["author0", {"name": "author1"}],
- "license": [["gpl3"]],
- "b": {"1": "2"},
- }
-
- d2 = {"author": map(lambda x: x, ["else"]), "license": "mit", "b": {"2": "3",}}
-
- d3 = {
- "author": (v for v in ["no one"]),
- }
-
- actual_merge = utils.merge(d0, d1, d2, d3)
-
- expected_merge = {
- "a": 1,
- "license": [["gpl2"], ["gpl3"], "mit"],
- "author": ["someone", "author0", {"name": "author1"}, "else", "no one"],
- "b": {"1": "2", "2": "3",},
- }
- assert actual_merge == expected_merge
-
-
-def test_merge_2():
- d0 = {"license": "gpl2", "runtime": {"os": "unix derivative"}}
-
- d1 = {"license": "gpl3", "runtime": "GNU/Linux"}
-
- expected = {
- "license": ["gpl2", "gpl3"],
- "runtime": [{"os": "unix derivative"}, "GNU/Linux"],
- }
-
- actual = utils.merge(d0, d1)
- assert actual == expected
-
-
-def test_merge_edge_cases():
- input_dict = {
- "license": ["gpl2", "gpl3"],
- "runtime": [{"os": "unix derivative"}, "GNU/Linux"],
- }
- # against empty dict
- actual = utils.merge(input_dict, {})
- assert actual == input_dict
-
- # against oneself
- actual = utils.merge(input_dict, input_dict, input_dict)
- assert actual == input_dict
-
-
-def test_merge_one_dict():
- """Merge one dict should result in the same dict value
-
- """
- input_and_expected = {"anything": "really"}
- actual = utils.merge(input_and_expected)
- assert actual == input_and_expected
-
-
-def test_merge_raise():
- """Calling utils.merge with any no dict argument should raise
-
- """
- d0 = {"author": "someone", "a": 1}
-
- d1 = ["not a dict"]
-
- with pytest.raises(ValueError):
- utils.merge(d0, d1)
-
- with pytest.raises(ValueError):
- utils.merge(d1, d0)
-
- with pytest.raises(ValueError):
- utils.merge(d1)
-
- assert utils.merge(d0) == d0
-
-
def test_normalize_date_0():
"""When date is a list, choose the first date and normalize it
"""
diff --git a/swh/deposit/utils.py b/swh/deposit/utils.py
--- a/swh/deposit/utils.py
+++ b/swh/deposit/utils.py
@@ -4,7 +4,6 @@
# See top-level LICENSE file for more information
import logging
-from types import GeneratorType
from typing import Any, Dict, Optional, Union
import iso8601
@@ -40,55 +39,6 @@
return data
-def merge(*dicts):
- """Given an iterator of dicts, merge them losing no information.
-
- Args:
- *dicts: arguments are all supposed to be dict to merge into one
-
- Returns:
- dict merged without losing information
-
- """
-
- def _extend(existing_val, value):
- """Given an existing value and a value (as potential lists), merge
- them together without repetition.
-
- """
- if isinstance(value, (list, map, GeneratorType)):
- vals = value
- else:
- vals = [value]
- for v in vals:
- if v in existing_val:
- continue
- existing_val.append(v)
- return existing_val
-
- d = {}
- for data in dicts:
- if not isinstance(data, dict):
- raise ValueError("dicts is supposed to be a variable arguments of dict")
-
- for key, value in data.items():
- existing_val = d.get(key)
- if not existing_val:
- d[key] = value
- continue
- if isinstance(existing_val, (list, map, GeneratorType)):
- new_val = _extend(existing_val, value)
- elif isinstance(existing_val, dict):
- if isinstance(value, dict):
- new_val = merge(existing_val, value)
- else:
- new_val = _extend([existing_val], value)
- else:
- new_val = _extend([existing_val], value)
- d[key] = new_val
- return d
-
-
def normalize_date(date):
"""Normalize date fields as expected by swh workers.