diff --git a/swh/deposit/api/private/__init__.py b/swh/deposit/api/private/__init__.py --- a/swh/deposit/api/private/__init__.py +++ b/swh/deposit/api/private/__init__.py @@ -3,6 +3,8 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +from typing import Any, Dict, List, Tuple + from rest_framework.permissions import AllowAny from swh.deposit import utils @@ -39,22 +41,27 @@ for deposit_request in deposit_requests: yield deposit_request - def _metadata_get(self, deposit): - """Given a deposit, aggregate all metadata requests. + def _metadata_get(self, deposit: Deposit) -> Tuple[Dict[str, Any], List[str]]: + """Given a deposit, retrieve all metadata requests into one Dict and returns both that + aggregated metadata dict and the list of raw_metdadata. Args: - deposit (Deposit): The deposit instance to extract - metadata from. + deposit: The deposit instance to extract metadata from Returns: - metadata dict from the deposit. + Tuple of aggregated metadata dict, list of raw_metadata """ - metadata = ( - m.metadata - for m in self._deposit_requests(deposit, request_type=METADATA_TYPE) - ) - return utils.merge(*metadata) + metadata: List[Dict[str, Any]] = [] + raw_metadata: List[str] = [] + for deposit_request in self._deposit_requests( + deposit, request_type=METADATA_TYPE + ): + metadata.append(deposit_request.metadata) + raw_metadata.append(deposit_request.raw_metadata) + + aggregated_metadata = utils.merge(*metadata) + return (aggregated_metadata, raw_metadata) class APIPrivateView(APIConfig, AuthenticatedAPIView): diff --git a/swh/deposit/api/private/deposit_check.py b/swh/deposit/api/private/deposit_check.py --- a/swh/deposit/api/private/deposit_check.py +++ b/swh/deposit/api/private/deposit_check.py @@ -11,6 +11,7 @@ import zipfile from rest_framework import status +from rest_framework.request import Request from swh.scheduler.utils import create_oneshot_task_dict @@ -130,22 +131,22 @@ return True, None def process_get( - self, req, collection_name: str, deposit_id: int + self, req: Request, collection_name: str, deposit_id: int ) -> Tuple[int, Dict, str]: """Build a unique tarball from the multiple received and stream that content to the client. Args: - req (Request): - collection_name (str): Collection owning the deposit - deposit_id (id): Deposit concerned by the reading + req: Client request + collection_name: Collection owning the deposit + deposit_id: Deposit concerned by the reading Returns: Tuple status, stream of content, content-type """ deposit = Deposit.objects.get(pk=deposit_id) - metadata = self._metadata_get(deposit) + metadata, _ = self._metadata_get(deposit) problems: Dict = {} # will check each deposit's associated request (both of type # archive and metadata) for errors diff --git a/swh/deposit/api/private/deposit_read.py b/swh/deposit/api/private/deposit_read.py --- a/swh/deposit/api/private/deposit_read.py +++ b/swh/deposit/api/private/deposit_read.py @@ -131,26 +131,27 @@ commit_date = deposit.complete_date return (normalize_date(author_date), normalize_date(commit_date)) - def metadata_read(self, deposit): + def metadata_read(self, deposit: Deposit) -> Dict[str, Any]: """Read and aggregate multiple data on deposit into one unified data dictionary. Args: - deposit (Deposit): Deposit concerned by the data aggregation. + deposit: Deposit concerned by the data aggregation. Returns: Dictionary of data representing the deposit to inject in swh. """ - metadata = self._metadata_get(deposit) + metadata, raw_metadata = self._metadata_get(deposit) # Read information metadata data = {"origin": {"type": "deposit", "url": deposit.origin_url,}} author_date, commit_date = self._normalize_dates(deposit, metadata) if deposit.parent: - swh_persistent_id = deposit.parent.swhid - swhid = identifiers.parse_swhid(swh_persistent_id) + parent_swhid = deposit.parent.swhid + assert parent_swhid is not None + swhid = identifiers.parse_swhid(parent_swhid) parent_revision = swhid.object_id parents = [parent_revision] else: @@ -165,7 +166,7 @@ "metadata": {}, }, "tool": self.tool, - "metadata": metadata, + "raw_metadata": raw_metadata, } data["deposit"] = { "id": deposit.id, diff --git a/swh/deposit/tests/api/test_deposit_private_read_metadata.py b/swh/deposit/tests/api/test_deposit_private_read_metadata.py --- a/swh/deposit/tests/api/test_deposit_private_read_metadata.py +++ b/swh/deposit/tests/api/test_deposit_private_read_metadata.py @@ -22,14 +22,6 @@ ] -def update_deposit(authenticated_client, collection, deposit, atom_dataset): - for atom_data in ["entry-data2", "entry-data3"]: - update_deposit_with_metadata( - authenticated_client, collection, deposit, atom_dataset[atom_data] - ) - return deposit - - def update_deposit_with_metadata(authenticated_client, collection, deposit, metadata): # update deposit's metadata response = authenticated_client.post( @@ -52,9 +44,14 @@ deposit = partial_deposit deposit.external_id = "some-external-id" deposit.save() - deposit = update_deposit( - authenticated_client, deposit_collection, deposit, atom_dataset - ) + + atom_xml_metadata = [] + for atom_key in ["entry-data2", "entry-data3"]: + atom_xml_data = atom_dataset[atom_key] + deposit = update_deposit_with_metadata( + authenticated_client, deposit_collection, deposit, atom_xml_data, + ) + atom_xml_metadata.append(atom_xml_data) for url in private_get_raw_url_endpoints(deposit_collection, deposit): response = authenticated_client.get(url) @@ -68,12 +65,7 @@ "url": "https://hal-test.archives-ouvertes.fr/some-external-id", }, "origin_metadata": { - "metadata": { - "author": ["some awesome author", "another one", "no one"], - "codemeta:dateCreated": "2017-10-07T15:17:08Z", - "external_identifier": "some-external-id", - "url": "https://hal-test.archives-ouvertes.fr/some-external-id", # noqa - }, + "raw_metadata": atom_xml_metadata, "provider": { "metadata": {}, "provider_name": "", @@ -118,9 +110,13 @@ deposit = partial_deposit deposit.external_id = "some-external-id" deposit.save() - deposit = update_deposit( - authenticated_client, deposit_collection, deposit, atom_dataset - ) + atom_xml_metadata = [] + for atom_key in ["entry-data2", "entry-data3"]: + atom_xml_data = atom_dataset[atom_key] + deposit = update_deposit_with_metadata( + authenticated_client, deposit_collection, deposit, atom_xml_data, + ) + atom_xml_metadata.append(atom_xml_data) rev_id = "da78a9d4cf1d5d29873693fd496142e3a18c20fa" swhid = "swh:1:rev:%s" % rev_id fake_parent = Deposit( @@ -143,12 +139,7 @@ "url": "https://hal-test.archives-ouvertes.fr/some-external-id", }, "origin_metadata": { - "metadata": { - "author": ["some awesome author", "another one", "no one"], - "codemeta:dateCreated": "2017-10-07T15:17:08Z", - "external_identifier": "some-external-id", - "url": "https://hal-test.archives-ouvertes.fr/some-external-id", # noqa - }, + "raw_metadata": atom_xml_metadata, "provider": { "metadata": {}, "provider_name": "", @@ -193,9 +184,7 @@ deposit = partial_deposit deposit.external_id = "hal-01243065" deposit.save() - deposit = update_deposit( - authenticated_client, deposit_collection, deposit, atom_dataset - ) + # add metadata to the deposit with datePublished and dateCreated codemeta_entry_data = ( atom_dataset["metadata"] @@ -204,9 +193,15 @@ 2017-05-03T16:08:47+02:00 """ ) - update_deposit_with_metadata( - authenticated_client, deposit_collection, deposit, codemeta_entry_data - ) + atom_xml_metadata = [ + atom_dataset["entry-data2"], + atom_dataset["entry-data3"], + codemeta_entry_data, + ] + for atom_data in atom_xml_metadata: + update_deposit_with_metadata( + authenticated_client, deposit_collection, deposit, atom_data, + ) for url in private_get_raw_url_endpoints(deposit_collection, deposit): response = authenticated_client.get(url) @@ -215,51 +210,13 @@ assert response._headers["content-type"][1] == "application/json" data = response.json() - metadata = { - "author": [ - "some awesome author", - "another one", - "no one", - {"email": "hal@ccsd.cnrs.fr", "name": "HAL"}, - ], - "client": "hal", - "codemeta:applicationCategory": "test", - "codemeta:author": {"codemeta:name": "Morane Gruenpeter"}, - "codemeta:dateCreated": [ - "2017-10-07T15:17:08Z", - "2015-04-06T17:08:47+02:00", - ], - "codemeta:datePublished": "2017-05-03T16:08:47+02:00", - "codemeta:description": "this is the description", - "codemeta:developmentStatus": "stable", - "codemeta:keywords": "DSP programming", - "codemeta:license": [ - {"codemeta:name": "GNU General Public License v3.0 only"}, - { - "codemeta:name": "CeCILL " - "Free " - "Software " - "License " - "Agreement " - "v1.1" - }, - ], - "codemeta:programmingLanguage": ["php", "python", "C"], - "codemeta:runtimePlatform": "phpstorm", - "codemeta:url": "https://hal-test.archives-ouvertes.fr/hal-01243065", # noqa - "codemeta:version": "1", - "external_identifier": ["some-external-id", "hal-01243065"], - "id": "hal-01243065", - "title": "Composing a Web of Audio Applications", - "url": "https://hal-test.archives-ouvertes.fr/some-external-id", - } expected_meta = { "origin": { "type": "deposit", "url": "https://hal-test.archives-ouvertes.fr/hal-01243065", }, "origin_metadata": { - "metadata": metadata, + "raw_metadata": atom_xml_metadata, "provider": { "metadata": {}, "provider_name": "", @@ -317,48 +274,13 @@ assert response._headers["content-type"][1] == "application/json" data = response.json() - metadata = { - "author": {"email": "hal@ccsd.cnrs.fr", "name": "HAL"}, - "client": "hal", - "codemeta:applicationCategory": "test", - "codemeta:author": {"codemeta:name": "Morane Gruenpeter"}, - "codemeta:description": "this is the description", - "codemeta:developmentStatus": "stable", - "codemeta:keywords": "DSP programming", - "codemeta:license": [ - { - "codemeta:name": "GNU " - "General " - "Public " - "License " - "v3.0 " - "only" - }, - { - "codemeta:name": "CeCILL " - "Free " - "Software " - "License " - "Agreement " - "v1.1" - }, - ], - "codemeta:programmingLanguage": ["php", "python", "C"], - "codemeta:runtimePlatform": "phpstorm", - "codemeta:url": "https://hal-test.archives-ouvertes.fr/hal-01243065", - "codemeta:version": "1", - "external_identifier": "hal-01243065", - "id": "hal-01243065", - "title": "Composing a Web of Audio Applications", - } - expected_origin = { "type": "deposit", "url": "https://hal-test.archives-ouvertes.fr/%s" % (deposit.external_id), } expected_origin_metadata = { - "metadata": metadata, + "raw_metadata": [codemeta_entry_data], "provider": { "metadata": {}, "provider_name": "", @@ -438,51 +360,8 @@ "url": "https://hal-test.archives-ouvertes.fr/external-id-partial", } - metadata = { - "author": {"email": "hal@ccsd.cnrs.fr", "name": "HAL"}, - "client": "hal", - "codemeta:applicationCategory": "test", - "codemeta:author": {"codemeta:name": "Morane Gruenpeter"}, - "codemeta:dateCreated": [ - "2015-04-06T17:08:47+02:00", - "2016-04-06T17:08:47+02:00", - ], - "codemeta:datePublished": [ - "2017-05-03T16:08:47+02:00", - "2018-05-03T16:08:47+02:00", - ], - "codemeta:description": "this is the description", - "codemeta:developmentStatus": "stable", - "codemeta:keywords": "DSP programming", - "codemeta:license": [ - { - "codemeta:name": "GNU " - "General " - "Public " - "License " - "v3.0 " - "only" - }, - { - "codemeta:name": "CeCILL " - "Free " - "Software " - "License " - "Agreement " - "v1.1" - }, - ], - "codemeta:programmingLanguage": ["php", "python", "C"], - "codemeta:runtimePlatform": "phpstorm", - "codemeta:url": "https://hal-test.archives-ouvertes.fr/hal-01243065", # noqa - "codemeta:version": "1", - "external_identifier": "hal-01243065", - "id": "hal-01243065", - "title": "Composing a Web of Audio Applications", - } - expected_origin_metadata = { - "metadata": metadata, + "raw_metadata": [codemeta_entry_data], "provider": { "metadata": {}, "provider_name": "",