diff --git a/swh/deposit/api/private/__init__.py b/swh/deposit/api/private/__init__.py --- a/swh/deposit/api/private/__init__.py +++ b/swh/deposit/api/private/__init__.py @@ -1,15 +1,13 @@ -# Copyright (C) 2017-2021 The Software Heritage developers +# Copyright (C) 2017-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -from typing import Any, Dict, List, Tuple +from typing import Any, Dict, Optional, Tuple from rest_framework.permissions import AllowAny from rest_framework.views import APIView -from swh.deposit import utils - from ...config import METADATA_TYPE, APIConfig from ...models import Deposit, DepositRequest @@ -27,17 +25,18 @@ request_type: 'archive' or 'metadata' Yields: - deposit requests of type request_type associated to the deposit + deposit requests of type request_type associated to the deposit, + most recent first """ deposit_requests = DepositRequest.objects.filter( type=request_type, deposit=deposit - ).order_by("id") + ).order_by("-id") for deposit_request in deposit_requests: yield deposit_request - def _metadata_get(self, deposit: Deposit) -> Tuple[Dict[str, Any], List[str]]: + def _metadata_get(self, deposit: Deposit) -> Tuple[Dict[str, Any], Optional[bytes]]: """Given a deposit, retrieve all metadata requests into one Dict and returns both that aggregated metadata dict and the list of raw_metdadata. @@ -45,19 +44,16 @@ deposit: The deposit instance to extract metadata from Returns: - Tuple of aggregated metadata dict, list of raw_metadata + Tuple of last metadata dict and last raw_metadata """ - metadata: List[Dict[str, Any]] = [] - raw_metadata: List[str] = [] for deposit_request in self._deposit_requests( deposit, request_type=METADATA_TYPE ): - metadata.append(deposit_request.metadata) - raw_metadata.append(deposit_request.raw_metadata) + if deposit_request.raw_metadata is not None: + return (deposit_request.metadata, deposit_request.raw_metadata) - aggregated_metadata = utils.merge(*metadata) - return (aggregated_metadata, raw_metadata) + return ({}, None) class APIPrivateView(APIConfig, APIView): diff --git a/swh/deposit/api/private/deposit_check.py b/swh/deposit/api/private/deposit_check.py --- a/swh/deposit/api/private/deposit_check.py +++ b/swh/deposit/api/private/deposit_check.py @@ -72,6 +72,7 @@ """ requests = list(self._deposit_requests(deposit, request_type=ARCHIVE_TYPE)) + requests.reverse() if len(requests) == 0: # no associated archive is refused return False, {"archive": [{"summary": MANDATORY_ARCHIVE_MISSING,}]} diff --git a/swh/deposit/api/private/deposit_read.py b/swh/deposit/api/private/deposit_read.py --- a/swh/deposit/api/private/deposit_read.py +++ b/swh/deposit/api/private/deposit_read.py @@ -144,7 +144,7 @@ **origin** (Dict): Information about the origin - **metadata_raw** (List[str]): List of raw metadata received for the + **metadata_raw** (str): List of raw metadata received for the deposit **metadata_dict** (Dict): Deposit aggregated metadata into one dict diff --git a/swh/deposit/tests/api/test_deposit_private_read_metadata.py b/swh/deposit/tests/api/test_deposit_private_read_metadata.py --- a/swh/deposit/tests/api/test_deposit_private_read_metadata.py +++ b/swh/deposit/tests/api/test_deposit_private_read_metadata.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2021 The Software Heritage developers +# Copyright (C) 2017-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -6,7 +6,7 @@ from django.urls import reverse_lazy as reverse from rest_framework import status -from swh.deposit import __version__, utils +from swh.deposit import __version__ from swh.deposit.config import PRIVATE_GET_DEPOSIT_METADATA, SE_IRI, SWH_PERSON from swh.deposit.models import Deposit from swh.deposit.parsers import parse_xml @@ -47,14 +47,10 @@ deposit.origin_url = f"https://hal-test.archives-ouvertes.fr/{deposit.external_id}" deposit.save() - metadata_xml_atoms = [ - atom_dataset[atom_key] for atom_key in ["entry-data2", "entry-data3"] - ] - metadata_xml_raws = [parse_xml(xml) for xml in metadata_xml_atoms] - for atom_xml in metadata_xml_atoms: - deposit = update_deposit_with_metadata( - authenticated_client, deposit_collection, deposit, atom_xml, - ) + metadata_xml_raw = atom_dataset["entry-data2"] + deposit = update_deposit_with_metadata( + authenticated_client, deposit_collection, deposit, metadata_xml_raw, + ) for url in private_get_raw_url_endpoints(deposit_collection, deposit): response = authenticated_client.get(url) @@ -66,8 +62,8 @@ "type": "deposit", "url": "https://hal-test.archives-ouvertes.fr/some-external-id", }, - "metadata_raw": metadata_xml_atoms, - "metadata_dict": utils.merge(*metadata_xml_raws), + "metadata_raw": metadata_xml_raw, + "metadata_dict": parse_xml(metadata_xml_raw), "provider": { "metadata": {}, "provider_name": "", @@ -109,14 +105,10 @@ deposit.external_id = "some-external-id" deposit.origin_url = f"https://hal-test.archives-ouvertes.fr/{deposit.external_id}" deposit.save() - metadata_xml_atoms = [ - atom_dataset[atom_key] for atom_key in ["entry-data2", "entry-data3"] - ] - metadata_xml_raws = [parse_xml(xml) for xml in metadata_xml_atoms] - for atom_xml in metadata_xml_atoms: - deposit = update_deposit_with_metadata( - authenticated_client, deposit_collection, deposit, atom_xml, - ) + metadata_xml_raw = atom_dataset["entry-data2"] + deposit = update_deposit_with_metadata( + authenticated_client, deposit_collection, deposit, metadata_xml_raw, + ) rev_id = "da78a9d4cf1d5d29873693fd496142e3a18c20fa" swhid = "swh:1:rev:%s" % rev_id @@ -138,8 +130,8 @@ "type": "deposit", "url": "https://hal-test.archives-ouvertes.fr/some-external-id", }, - "metadata_raw": metadata_xml_atoms, - "metadata_dict": utils.merge(*metadata_xml_raws), + "metadata_raw": metadata_xml_raw, + "metadata_dict": parse_xml(metadata_xml_raw), "provider": { "metadata": {}, "provider_name": "", @@ -182,24 +174,10 @@ deposit.origin_url = f"https://hal-test.archives-ouvertes.fr/{deposit.external_id}" deposit.save() - # add metadata to the deposit with datePublished and dateCreated - codemeta_entry_data = ( - atom_dataset["metadata"] - % """ - 2015-04-06T17:08:47+02:00 - 2017-05-03T16:08:47+02:00 -""" + metadata_xml_raw = atom_dataset["entry-data3"] + update_deposit_with_metadata( + authenticated_client, deposit_collection, deposit, metadata_xml_raw, ) - metadata_xml_atoms = [ - atom_dataset["entry-data2"], - atom_dataset["entry-data3"], - codemeta_entry_data, - ] - metadata_xml_raws = [parse_xml(xml) for xml in metadata_xml_atoms] - for atom_xml in metadata_xml_atoms: - update_deposit_with_metadata( - authenticated_client, deposit_collection, deposit, atom_xml, - ) for url in private_get_raw_url_endpoints(deposit_collection, deposit): response = authenticated_client.get(url) @@ -212,8 +190,8 @@ "type": "deposit", "url": "https://hal-test.archives-ouvertes.fr/hal-01243065", }, - "metadata_raw": metadata_xml_atoms, - "metadata_dict": utils.merge(*metadata_xml_raws), + "metadata_raw": metadata_xml_raw, + "metadata_dict": parse_xml(metadata_xml_raw), "provider": { "metadata": {}, "provider_name": "", @@ -270,7 +248,7 @@ assert actual_data == { "origin": {"type": "deposit", "url": None,}, - "metadata_raw": [codemeta_entry_data], + "metadata_raw": codemeta_entry_data, "metadata_dict": parse_xml(codemeta_entry_data), "provider": { "metadata": {}, @@ -341,7 +319,7 @@ "type": "deposit", "url": "https://hal-test.archives-ouvertes.fr/hal-01243065", }, - "metadata_raw": [codemeta_entry_data], + "metadata_raw": codemeta_entry_data, "metadata_dict": parse_xml(codemeta_entry_data), "provider": { "metadata": {}, @@ -404,14 +382,10 @@ deposit.origin_url = f"https://hal-test.archives-ouvertes.fr/{deposit.external_id}" deposit.save() - metadata_xml_atoms = [ - atom_dataset[atom_key] for atom_key in ["entry-data-multiple-release-notes"] - ] - metadata_xml_raws = [parse_xml(xml) for xml in metadata_xml_atoms] - for atom_xml in metadata_xml_atoms: - deposit = update_deposit_with_metadata( - authenticated_client, deposit_collection, deposit, atom_xml, - ) + metadata_xml_raw = atom_dataset["entry-data-multiple-release-notes"] + deposit = update_deposit_with_metadata( + authenticated_client, deposit_collection, deposit, metadata_xml_raw, + ) for url in private_get_raw_url_endpoints(deposit_collection, deposit): response = authenticated_client.get(url) @@ -423,8 +397,8 @@ "type": "deposit", "url": "https://hal-test.archives-ouvertes.fr/some-external-id", }, - "metadata_raw": metadata_xml_atoms, - "metadata_dict": utils.merge(*metadata_xml_raws), + "metadata_raw": metadata_xml_raw, + "metadata_dict": parse_xml(metadata_xml_raw), "provider": { "metadata": {}, "provider_name": "", diff --git a/swh/deposit/tests/data/atom/entry-data2.xml b/swh/deposit/tests/data/atom/entry-data2.xml --- a/swh/deposit/tests/data/atom/entry-data2.xml +++ b/swh/deposit/tests/data/atom/entry-data2.xml @@ -1,5 +1,6 @@ https://hal-test.archives-ouvertes.fr/some-external-id some awesome author @@ -9,4 +10,9 @@ + + another one + no one + 2017-10-07T15:17:08Z + This is the release of October 7th, 2017. diff --git a/swh/deposit/tests/data/atom/entry-data3.xml b/swh/deposit/tests/data/atom/entry-data3.xml --- a/swh/deposit/tests/data/atom/entry-data3.xml +++ b/swh/deposit/tests/data/atom/entry-data3.xml @@ -1,7 +1,13 @@ - + + https://hal-test.archives-ouvertes.fr/some-external-id + some awesome author + another one no one 2017-10-07T15:17:08Z This is the release of October 7th, 2017. + 2015-04-06T17:08:47+02:00 + 2017-05-03T16:08:47+02:00 diff --git a/swh/deposit/tests/test_utils.py b/swh/deposit/tests/test_utils.py --- a/swh/deposit/tests/test_utils.py +++ b/swh/deposit/tests/test_utils.py @@ -27,92 +27,6 @@ return xml_data.strip() -def test_merge(): - """Calling utils.merge on dicts should merge without losing information - - """ - d0 = {"author": "someone", "license": [["gpl2"]], "a": 1} - - d1 = { - "author": ["author0", {"name": "author1"}], - "license": [["gpl3"]], - "b": {"1": "2"}, - } - - d2 = {"author": map(lambda x: x, ["else"]), "license": "mit", "b": {"2": "3",}} - - d3 = { - "author": (v for v in ["no one"]), - } - - actual_merge = utils.merge(d0, d1, d2, d3) - - expected_merge = { - "a": 1, - "license": [["gpl2"], ["gpl3"], "mit"], - "author": ["someone", "author0", {"name": "author1"}, "else", "no one"], - "b": {"1": "2", "2": "3",}, - } - assert actual_merge == expected_merge - - -def test_merge_2(): - d0 = {"license": "gpl2", "runtime": {"os": "unix derivative"}} - - d1 = {"license": "gpl3", "runtime": "GNU/Linux"} - - expected = { - "license": ["gpl2", "gpl3"], - "runtime": [{"os": "unix derivative"}, "GNU/Linux"], - } - - actual = utils.merge(d0, d1) - assert actual == expected - - -def test_merge_edge_cases(): - input_dict = { - "license": ["gpl2", "gpl3"], - "runtime": [{"os": "unix derivative"}, "GNU/Linux"], - } - # against empty dict - actual = utils.merge(input_dict, {}) - assert actual == input_dict - - # against oneself - actual = utils.merge(input_dict, input_dict, input_dict) - assert actual == input_dict - - -def test_merge_one_dict(): - """Merge one dict should result in the same dict value - - """ - input_and_expected = {"anything": "really"} - actual = utils.merge(input_and_expected) - assert actual == input_and_expected - - -def test_merge_raise(): - """Calling utils.merge with any no dict argument should raise - - """ - d0 = {"author": "someone", "a": 1} - - d1 = ["not a dict"] - - with pytest.raises(ValueError): - utils.merge(d0, d1) - - with pytest.raises(ValueError): - utils.merge(d1, d0) - - with pytest.raises(ValueError): - utils.merge(d1) - - assert utils.merge(d0) == d0 - - def test_normalize_date_0(): """When date is a list, choose the first date and normalize it """ diff --git a/swh/deposit/utils.py b/swh/deposit/utils.py --- a/swh/deposit/utils.py +++ b/swh/deposit/utils.py @@ -4,7 +4,6 @@ # See top-level LICENSE file for more information import logging -from types import GeneratorType from typing import Any, Dict, Optional, Union import iso8601 @@ -40,55 +39,6 @@ return data -def merge(*dicts): - """Given an iterator of dicts, merge them losing no information. - - Args: - *dicts: arguments are all supposed to be dict to merge into one - - Returns: - dict merged without losing information - - """ - - def _extend(existing_val, value): - """Given an existing value and a value (as potential lists), merge - them together without repetition. - - """ - if isinstance(value, (list, map, GeneratorType)): - vals = value - else: - vals = [value] - for v in vals: - if v in existing_val: - continue - existing_val.append(v) - return existing_val - - d = {} - for data in dicts: - if not isinstance(data, dict): - raise ValueError("dicts is supposed to be a variable arguments of dict") - - for key, value in data.items(): - existing_val = d.get(key) - if not existing_val: - d[key] = value - continue - if isinstance(existing_val, (list, map, GeneratorType)): - new_val = _extend(existing_val, value) - elif isinstance(existing_val, dict): - if isinstance(value, dict): - new_val = merge(existing_val, value) - else: - new_val = _extend([existing_val], value) - else: - new_val = _extend([existing_val], value) - d[key] = new_val - return d - - def normalize_date(date): """Normalize date fields as expected by swh workers.