diff --git a/swh/deposit/api/private/__init__.py b/swh/deposit/api/private/__init__.py --- a/swh/deposit/api/private/__init__.py +++ b/swh/deposit/api/private/__init__.py @@ -3,6 +3,8 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +from typing import Any, Dict, List, Tuple + from rest_framework.permissions import AllowAny from swh.deposit import utils @@ -39,22 +41,27 @@ for deposit_request in deposit_requests: yield deposit_request - def _metadata_get(self, deposit): - """Given a deposit, aggregate all metadata requests. + def _metadata_get(self, deposit: Deposit) -> Tuple[Dict[str, Any], List[str]]: + """Given a deposit, retrieve all metadata requests into one Dict and returns both that + aggregated metadata dict and the list of raw_metdadata. Args: - deposit (Deposit): The deposit instance to extract - metadata from. + deposit: The deposit instance to extract metadata from Returns: - metadata dict from the deposit. + Tuple of aggregated metadata dict, list of raw_metadata """ - metadata = ( - m.metadata - for m in self._deposit_requests(deposit, request_type=METADATA_TYPE) - ) - return utils.merge(*metadata) + metadata: List[Dict[str, Any]] = [] + raw_metadata: List[str] = [] + for deposit_request in self._deposit_requests( + deposit, request_type=METADATA_TYPE + ): + metadata.append(deposit_request.metadata) + raw_metadata.append(deposit_request.raw_metadata) + + aggregated_metadata = utils.merge(*metadata) + return (aggregated_metadata, raw_metadata) class APIPrivateView(APIConfig, AuthenticatedAPIView): diff --git a/swh/deposit/api/private/deposit_check.py b/swh/deposit/api/private/deposit_check.py --- a/swh/deposit/api/private/deposit_check.py +++ b/swh/deposit/api/private/deposit_check.py @@ -11,6 +11,7 @@ import zipfile from rest_framework import status +from rest_framework.request import Request from swh.scheduler.utils import create_oneshot_task_dict @@ -130,22 +131,22 @@ return True, None def process_get( - self, req, collection_name: str, deposit_id: int + self, req: Request, collection_name: str, deposit_id: int ) -> Tuple[int, Dict, str]: """Build a unique tarball from the multiple received and stream that content to the client. Args: - req (Request): - collection_name (str): Collection owning the deposit - deposit_id (id): Deposit concerned by the reading + req: Client request + collection_name: Collection owning the deposit + deposit_id: Deposit concerned by the reading Returns: Tuple status, stream of content, content-type """ deposit = Deposit.objects.get(pk=deposit_id) - metadata = self._metadata_get(deposit) + metadata, _ = self._metadata_get(deposit) problems: Dict = {} # will check each deposit's associated request (both of type # archive and metadata) for errors diff --git a/swh/deposit/api/private/deposit_read.py b/swh/deposit/api/private/deposit_read.py --- a/swh/deposit/api/private/deposit_read.py +++ b/swh/deposit/api/private/deposit_read.py @@ -131,26 +131,44 @@ commit_date = deposit.complete_date return (normalize_date(author_date), normalize_date(commit_date)) - def metadata_read(self, deposit): - """Read and aggregate multiple data on deposit into one unified data - dictionary. + def metadata_read(self, deposit: Deposit) -> Dict[str, Any]: + """Read and aggregate multiple deposit information into one unified dictionary. Args: - deposit (Deposit): Deposit concerned by the data aggregation. + deposit: Deposit concerned by the data aggregation. Returns: - Dictionary of data representing the deposit to inject in swh. + Dictionary of deposit information read by the deposit loader, with the + following keys: + + **origin** (Dict): Information about the origin + + **origin_metadata (Dict): Metadata about the origin to load + + **metadata_raw** (List[str]): List of raw metadata received for the + deposit + + **metadata_dict** (Dict): Deposit aggregated metadata into one dict + + **provider** (Dict): the metadata provider information about the + deposit client + + **tool** (Dict): the deposit information + + **deposit** (Dict): deposit information relevant to build the revision + (author_date, committer_date, etc...) """ - metadata = self._metadata_get(deposit) + metadata, raw_metadata = self._metadata_get(deposit) # Read information metadata data = {"origin": {"type": "deposit", "url": deposit.origin_url,}} author_date, commit_date = self._normalize_dates(deposit, metadata) if deposit.parent: - swh_persistent_id = deposit.parent.swhid - swhid = identifiers.parse_swhid(swh_persistent_id) + parent_swhid = deposit.parent.swhid + assert parent_swhid is not None + swhid = identifiers.parse_swhid(parent_swhid) parent_revision = swhid.object_id parents = [parent_revision] else: @@ -165,7 +183,8 @@ "metadata": {}, }, "tool": self.tool, - "metadata": metadata, + "metadata_raw": raw_metadata, + "metadata_dict": metadata, } data["deposit"] = { "id": deposit.id, diff --git a/swh/deposit/tests/api/test_deposit_private_read_metadata.py b/swh/deposit/tests/api/test_deposit_private_read_metadata.py --- a/swh/deposit/tests/api/test_deposit_private_read_metadata.py +++ b/swh/deposit/tests/api/test_deposit_private_read_metadata.py @@ -6,9 +6,10 @@ from django.urls import reverse from rest_framework import status -from swh.deposit import __version__ +from swh.deposit import __version__, utils from swh.deposit.config import EDIT_SE_IRI, PRIVATE_GET_DEPOSIT_METADATA, SWH_PERSON from swh.deposit.models import Deposit +from swh.deposit.parsers import parse_xml PRIVATE_GET_DEPOSIT_METADATA_NC = PRIVATE_GET_DEPOSIT_METADATA + "-nc" @@ -22,14 +23,6 @@ ] -def update_deposit(authenticated_client, collection, deposit, atom_dataset): - for atom_data in ["entry-data2", "entry-data3"]: - update_deposit_with_metadata( - authenticated_client, collection, deposit, atom_dataset[atom_data] - ) - return deposit - - def update_deposit_with_metadata(authenticated_client, collection, deposit, metadata): # update deposit's metadata response = authenticated_client.post( @@ -52,28 +45,29 @@ deposit = partial_deposit deposit.external_id = "some-external-id" deposit.save() - deposit = update_deposit( - authenticated_client, deposit_collection, deposit, atom_dataset - ) + + metadata_xml_atoms = [ + atom_dataset[atom_key] for atom_key in ["entry-data2", "entry-data3"] + ] + metadata_xml_raws = [parse_xml(xml) for xml in metadata_xml_atoms] + for atom_xml in metadata_xml_atoms: + deposit = update_deposit_with_metadata( + authenticated_client, deposit_collection, deposit, atom_xml, + ) for url in private_get_raw_url_endpoints(deposit_collection, deposit): response = authenticated_client.get(url) assert response.status_code == status.HTTP_200_OK assert response._headers["content-type"][1] == "application/json" data = response.json() - - expected_meta = { + assert data == { "origin": { "type": "deposit", "url": "https://hal-test.archives-ouvertes.fr/some-external-id", }, "origin_metadata": { - "metadata": { - "author": ["some awesome author", "another one", "no one"], - "codemeta:dateCreated": "2017-10-07T15:17:08Z", - "external_identifier": "some-external-id", - "url": "https://hal-test.archives-ouvertes.fr/some-external-id", # noqa - }, + "metadata_raw": metadata_xml_atoms, + "metadata_dict": utils.merge(*metadata_xml_raws), "provider": { "metadata": {}, "provider_name": "", @@ -106,8 +100,6 @@ }, } - assert data == expected_meta - def test_read_metadata_revision_with_parent( authenticated_client, deposit_collection, partial_deposit, atom_dataset @@ -118,9 +110,15 @@ deposit = partial_deposit deposit.external_id = "some-external-id" deposit.save() - deposit = update_deposit( - authenticated_client, deposit_collection, deposit, atom_dataset - ) + metadata_xml_atoms = [ + atom_dataset[atom_key] for atom_key in ["entry-data2", "entry-data3"] + ] + metadata_xml_raws = [parse_xml(xml) for xml in metadata_xml_atoms] + for atom_xml in metadata_xml_atoms: + deposit = update_deposit_with_metadata( + authenticated_client, deposit_collection, deposit, atom_xml, + ) + rev_id = "da78a9d4cf1d5d29873693fd496142e3a18c20fa" swhid = "swh:1:rev:%s" % rev_id fake_parent = Deposit( @@ -136,19 +134,14 @@ assert response.status_code == status.HTTP_200_OK assert response._headers["content-type"][1] == "application/json" data = response.json() - - expected_meta = { + assert data == { "origin": { "type": "deposit", "url": "https://hal-test.archives-ouvertes.fr/some-external-id", }, "origin_metadata": { - "metadata": { - "author": ["some awesome author", "another one", "no one"], - "codemeta:dateCreated": "2017-10-07T15:17:08Z", - "external_identifier": "some-external-id", - "url": "https://hal-test.archives-ouvertes.fr/some-external-id", # noqa - }, + "metadata_raw": metadata_xml_atoms, + "metadata_dict": utils.merge(*metadata_xml_raws), "provider": { "metadata": {}, "provider_name": "", @@ -181,8 +174,6 @@ }, } - assert data == expected_meta - def test_read_metadata_3( authenticated_client, deposit_collection, partial_deposit, atom_dataset @@ -193,9 +184,7 @@ deposit = partial_deposit deposit.external_id = "hal-01243065" deposit.save() - deposit = update_deposit( - authenticated_client, deposit_collection, deposit, atom_dataset - ) + # add metadata to the deposit with datePublished and dateCreated codemeta_entry_data = ( atom_dataset["metadata"] @@ -204,9 +193,16 @@ 2017-05-03T16:08:47+02:00 """ ) - update_deposit_with_metadata( - authenticated_client, deposit_collection, deposit, codemeta_entry_data - ) + metadata_xml_atoms = [ + atom_dataset["entry-data2"], + atom_dataset["entry-data3"], + codemeta_entry_data, + ] + metadata_xml_raws = [parse_xml(xml) for xml in metadata_xml_atoms] + for atom_xml in metadata_xml_atoms: + update_deposit_with_metadata( + authenticated_client, deposit_collection, deposit, atom_xml, + ) for url in private_get_raw_url_endpoints(deposit_collection, deposit): response = authenticated_client.get(url) @@ -214,52 +210,14 @@ assert response.status_code == status.HTTP_200_OK assert response._headers["content-type"][1] == "application/json" data = response.json() - - metadata = { - "author": [ - "some awesome author", - "another one", - "no one", - {"email": "hal@ccsd.cnrs.fr", "name": "HAL"}, - ], - "client": "hal", - "codemeta:applicationCategory": "test", - "codemeta:author": {"codemeta:name": "Morane Gruenpeter"}, - "codemeta:dateCreated": [ - "2017-10-07T15:17:08Z", - "2015-04-06T17:08:47+02:00", - ], - "codemeta:datePublished": "2017-05-03T16:08:47+02:00", - "codemeta:description": "this is the description", - "codemeta:developmentStatus": "stable", - "codemeta:keywords": "DSP programming", - "codemeta:license": [ - {"codemeta:name": "GNU General Public License v3.0 only"}, - { - "codemeta:name": "CeCILL " - "Free " - "Software " - "License " - "Agreement " - "v1.1" - }, - ], - "codemeta:programmingLanguage": ["php", "python", "C"], - "codemeta:runtimePlatform": "phpstorm", - "codemeta:url": "https://hal-test.archives-ouvertes.fr/hal-01243065", # noqa - "codemeta:version": "1", - "external_identifier": ["some-external-id", "hal-01243065"], - "id": "hal-01243065", - "title": "Composing a Web of Audio Applications", - "url": "https://hal-test.archives-ouvertes.fr/some-external-id", - } - expected_meta = { + assert data == { "origin": { "type": "deposit", "url": "https://hal-test.archives-ouvertes.fr/hal-01243065", }, "origin_metadata": { - "metadata": metadata, + "metadata_raw": metadata_xml_atoms, + "metadata_dict": utils.merge(*metadata_xml_raws), "provider": { "metadata": {}, "provider_name": "", @@ -291,7 +249,6 @@ "revision_parents": [], }, } - assert data == expected_meta def test_read_metadata_4( @@ -317,48 +274,14 @@ assert response._headers["content-type"][1] == "application/json" data = response.json() - metadata = { - "author": {"email": "hal@ccsd.cnrs.fr", "name": "HAL"}, - "client": "hal", - "codemeta:applicationCategory": "test", - "codemeta:author": {"codemeta:name": "Morane Gruenpeter"}, - "codemeta:description": "this is the description", - "codemeta:developmentStatus": "stable", - "codemeta:keywords": "DSP programming", - "codemeta:license": [ - { - "codemeta:name": "GNU " - "General " - "Public " - "License " - "v3.0 " - "only" - }, - { - "codemeta:name": "CeCILL " - "Free " - "Software " - "License " - "Agreement " - "v1.1" - }, - ], - "codemeta:programmingLanguage": ["php", "python", "C"], - "codemeta:runtimePlatform": "phpstorm", - "codemeta:url": "https://hal-test.archives-ouvertes.fr/hal-01243065", - "codemeta:version": "1", - "external_identifier": "hal-01243065", - "id": "hal-01243065", - "title": "Composing a Web of Audio Applications", - } - expected_origin = { "type": "deposit", "url": "https://hal-test.archives-ouvertes.fr/%s" % (deposit.external_id), } expected_origin_metadata = { - "metadata": metadata, + "metadata_raw": [codemeta_entry_data], + "metadata_dict": parse_xml(codemeta_entry_data), "provider": { "metadata": {}, "provider_name": "", @@ -438,51 +361,9 @@ "url": "https://hal-test.archives-ouvertes.fr/external-id-partial", } - metadata = { - "author": {"email": "hal@ccsd.cnrs.fr", "name": "HAL"}, - "client": "hal", - "codemeta:applicationCategory": "test", - "codemeta:author": {"codemeta:name": "Morane Gruenpeter"}, - "codemeta:dateCreated": [ - "2015-04-06T17:08:47+02:00", - "2016-04-06T17:08:47+02:00", - ], - "codemeta:datePublished": [ - "2017-05-03T16:08:47+02:00", - "2018-05-03T16:08:47+02:00", - ], - "codemeta:description": "this is the description", - "codemeta:developmentStatus": "stable", - "codemeta:keywords": "DSP programming", - "codemeta:license": [ - { - "codemeta:name": "GNU " - "General " - "Public " - "License " - "v3.0 " - "only" - }, - { - "codemeta:name": "CeCILL " - "Free " - "Software " - "License " - "Agreement " - "v1.1" - }, - ], - "codemeta:programmingLanguage": ["php", "python", "C"], - "codemeta:runtimePlatform": "phpstorm", - "codemeta:url": "https://hal-test.archives-ouvertes.fr/hal-01243065", # noqa - "codemeta:version": "1", - "external_identifier": "hal-01243065", - "id": "hal-01243065", - "title": "Composing a Web of Audio Applications", - } - expected_origin_metadata = { - "metadata": metadata, + "metadata_raw": [codemeta_entry_data], + "metadata_dict": parse_xml(codemeta_entry_data), "provider": { "metadata": {}, "provider_name": "",