diff --git a/swh/deposit/api/common.py b/swh/deposit/api/common.py --- a/swh/deposit/api/common.py +++ b/swh/deposit/api/common.py @@ -19,7 +19,18 @@ from rest_framework.request import Request from rest_framework.views import APIView +from swh.deposit.api.checks import check_metadata +from swh.deposit.api.converters import convert_status_detail +from swh.deposit.models import Deposit from swh.model import hashutil +from swh.model.identifiers import SWHID, ValidationError +from swh.model.model import ( + MetadataAuthority, + MetadataAuthorityType, + MetadataFetcher, + MetadataTargetType, + RawExtrinsicMetadata, +) from swh.scheduler.utils import create_oneshot_task_dict from ..config import ( @@ -52,8 +63,8 @@ make_error_response, make_error_response_from_dict, ) -from ..models import Deposit, DepositClient, DepositCollection, DepositRequest -from ..parsers import parse_xml +from ..models import DepositClient, DepositCollection, DepositRequest +from ..parsers import parse_swh_reference, parse_xml ACCEPT_PACKAGINGS = ["http://purl.org/net/sword/package/SimpleZip"] ACCEPT_ARCHIVE_CONTENT_TYPES = ["application/zip", "application/x-tar"] @@ -603,6 +614,91 @@ "status": deposit.status, } + def _store_metadata_deposit( + self, + deposit: Deposit, + swhid: Union[str, SWHID], + metadata: Dict, + raw_metadata: bytes, + ) -> Dict: + """Metadata-only deposit + + Checks: + - The metadata received should pass the functional metadata checks. + - The SWHID is technically valid + + """ + if not metadata: + return make_error_dict( + BAD_REQUEST, + "Empty body request is not supported", + "Atom entry deposit is supposed to send for metadata. " + "If the body is empty, there is no metadata.", + ) + + metadata_ok, error_details = check_metadata(metadata) + if not metadata_ok: + assert error_details, "Details should be set when a failure occurs" + return make_error_dict( + BAD_REQUEST, + "Functional metadata checks failure", + convert_status_detail(error_details), + ) + + metadata_authority = MetadataAuthority( + type=MetadataAuthorityType.DEPOSIT_CLIENT, + url=deposit.client.provider_url, + metadata={"name": deposit.client.last_name}, + ) + + metadata_fetcher = MetadataFetcher( + name=self.tool["name"], + version=self.tool["version"], + metadata=self.tool["configuration"], + ) + + # replace metadata within the deposit backend + deposit_request_data = { + METADATA_KEY: metadata, + RAW_METADATA_KEY: raw_metadata, + } + + # actually add the metadata to the completed deposit + deposit_request = self._deposit_request_put(deposit, deposit_request_data) + + map_type = { + "content": MetadataTargetType.CONTENT, + "directory": MetadataTargetType.DIRECTORY, + "revision": MetadataTargetType.REVISION, + "release": MetadataTargetType.RELEASE, + "snapshot": MetadataTargetType.SNAPSHOT, + "origin": MetadataTargetType.ORIGIN, + } + assert isinstance(swhid, SWHID) + + # store that metadata to the metadata storage + metadata_object = RawExtrinsicMetadata( + type=map_type[swhid.object_type], + target=swhid, + discovery_date=deposit_request.date, + authority=metadata_authority, + fetcher=metadata_fetcher, + format="sword-v2-atom-codemeta", + metadata=raw_metadata, + origin=deposit.origin_url, + ) + + # write to metadata storage + self.storage_metadata.metadata_authority_add([metadata_authority]) + self.storage_metadata.metadata_fetcher_add([metadata_fetcher]) + self.storage_metadata.raw_extrinsic_metadata_add([metadata_object]) + return { + "deposit_id": deposit.id, + "deposit_date": deposit_request.date, + "status": deposit.status, + "archive": None, + } + def _atom_entry( self, request: Request, @@ -662,11 +758,13 @@ "If the body is empty, there is no metadata.", ) - external_id = metadata.get("external_identifier", headers["slug"]) + # Determine if we are in the metadata-only deposit case + try: + swhid = parse_swh_reference(metadata) + except ValidationError as e: + return make_error_dict(PARSING_ERROR, "Invalid SWHID reference", str(e),) - # TODO: Determine if we are in the metadata-only deposit case. If it is, then - # save deposit and deposit request typed 'metadata' and send metadata to the - # metadata storage. Otherwise, do as existing deposit. + external_id = metadata.get("external_identifier", headers["slug"]) deposit = self._deposit_put( request, @@ -675,6 +773,9 @@ external_id=external_id, ) + if swhid is not None: + return self._store_metadata_deposit(deposit, swhid, metadata, raw_metadata) + self._deposit_request_put( deposit, {METADATA_KEY: metadata, RAW_METADATA_KEY: raw_metadata}, diff --git a/swh/deposit/api/deposit_update.py b/swh/deposit/api/deposit_update.py --- a/swh/deposit/api/deposit_update.py +++ b/swh/deposit/api/deposit_update.py @@ -8,28 +8,10 @@ from rest_framework import status from rest_framework.request import Request -from swh.deposit.api.checks import check_metadata -from swh.deposit.api.converters import convert_status_detail from swh.deposit.models import Deposit from swh.model.identifiers import parse_swhid -from swh.model.model import ( - MetadataAuthority, - MetadataAuthorityType, - MetadataFetcher, - MetadataTargetType, - RawExtrinsicMetadata, -) -from swh.storage import get_storage -from swh.storage.interface import StorageInterface - -from ..config import ( - CONT_FILE_IRI, - DEPOSIT_STATUS_LOAD_SUCCESS, - EDIT_SE_IRI, - EM_IRI, - METADATA_KEY, - RAW_METADATA_KEY, -) + +from ..config import CONT_FILE_IRI, DEPOSIT_STATUS_LOAD_SUCCESS, EDIT_SE_IRI, EM_IRI from ..errors import BAD_REQUEST, ParserError, make_error_dict from ..parsers import ( SWHAtomEntryParser, @@ -125,12 +107,6 @@ parser_classes = (SWHMultiPartParser, SWHAtomEntryParser) - def __init__(self): - super().__init__() - self.storage_metadata: StorageInterface = get_storage( - **self.config["storage_metadata"] - ) - def restrict_access( self, request: Request, headers: Dict, deposit: Deposit ) -> Dict[str, Any]: @@ -221,69 +197,10 @@ "Please ensure your metadata file is correctly formatted.", ) - if not metadata: - return make_error_dict( - BAD_REQUEST, - "Empty body request is not supported", - "Atom entry deposit is supposed to send for metadata. " - "If the body is empty, there is no metadata.", - ) - - metadata_ok, error_details = check_metadata(metadata) - if not metadata_ok: - assert error_details, "Details should be set when a failure occurs" - return make_error_dict( - BAD_REQUEST, - "Functional metadata checks failure", - convert_status_detail(error_details), - ) - - metadata_authority = MetadataAuthority( - type=MetadataAuthorityType.DEPOSIT_CLIENT, - url=deposit.client.provider_url, - metadata={"name": deposit.client.last_name}, + return self._store_metadata_deposit( + deposit, parse_swhid(swhid), metadata, raw_metadata ) - metadata_fetcher = MetadataFetcher( - name=self.tool["name"], - version=self.tool["version"], - metadata=self.tool["configuration"], - ) - - deposit_swhid = parse_swhid(swhid) - - # replace metadata within the deposit backend - deposit_request_data = { - METADATA_KEY: metadata, - RAW_METADATA_KEY: raw_metadata, - } - - # actually add the metadata to the completed deposit - deposit_request = self._deposit_request_put(deposit, deposit_request_data) - # store that metadata to the metadata storage - metadata_object = RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, - target=deposit_swhid, - discovery_date=deposit_request.date, - authority=metadata_authority, - fetcher=metadata_fetcher, - format="sword-v2-atom-codemeta", - metadata=raw_metadata, - origin=deposit.origin_url, - ) - - # write to metadata storage - self.storage_metadata.metadata_authority_add([metadata_authority]) - self.storage_metadata.metadata_fetcher_add([metadata_fetcher]) - self.storage_metadata.raw_extrinsic_metadata_add([metadata_object]) - - return { - "deposit_id": deposit_id, - "deposit_date": deposit_request.date, - "status": deposit.status, - "archive": None, - } - def process_post( self, request, diff --git a/swh/deposit/config.py b/swh/deposit/config.py --- a/swh/deposit/config.py +++ b/swh/deposit/config.py @@ -10,6 +10,8 @@ from swh.deposit import __version__ from swh.scheduler import get_scheduler from swh.scheduler.interface import SchedulerInterface +from swh.storage import get_storage +from swh.storage.interface import StorageInterface # IRIs (Internationalized Resource identifier) sword 2.0 specified EDIT_SE_IRI = "edit_se_iri" @@ -101,3 +103,6 @@ "version": __version__, "configuration": {"sword_version": "2"}, } + self.storage_metadata: StorageInterface = get_storage( + **self.config["storage_metadata"] + ) diff --git a/swh/deposit/tests/api/test_deposit_metadata.py b/swh/deposit/tests/api/test_deposit_metadata.py new file mode 100644 --- /dev/null +++ b/swh/deposit/tests/api/test_deposit_metadata.py @@ -0,0 +1,28 @@ +# Copyright (C) 2020 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from django.urls import reverse +from rest_framework import status + +from swh.deposit.config import COL_IRI + + +def test_deposit_metadata_invalid( + authenticated_client, deposit_collection, atom_dataset +): + """Posting an invalid swhid reference is propagated to clients + + """ + invalid_swhid = "swh:1:dir :31b5c8cc985d190b5a7ef4878128ebfdc2358f49" + xml_data = atom_dataset["entry-data-with-swhid"].format(swhid=invalid_swhid) + + response = authenticated_client.post( + reverse(COL_IRI, args=[deposit_collection.name]), + content_type="application/atom+xml;type=entry", + data=xml_data, + HTTP_SLUG="external-id", + ) + assert response.status_code == status.HTTP_400_BAD_REQUEST + assert b"Invalid SWHID reference" in response.content diff --git a/swh/deposit/tests/api/test_parsers.py b/swh/deposit/tests/api/test_parsers.py --- a/swh/deposit/tests/api/test_parsers.py +++ b/swh/deposit/tests/api/test_parsers.py @@ -187,19 +187,8 @@ @pytest.fixture -def xml_with_swhid(): - xml_data = """ - - - - - - - - """ - return xml_data.strip() +def xml_with_swhid(atom_dataset): + return atom_dataset["entry-data-with-swhid"] @pytest.mark.parametrize( diff --git a/swh/deposit/tests/data/atom/entry-data-with-swhid.xml b/swh/deposit/tests/data/atom/entry-data-with-swhid.xml new file mode 100644 --- /dev/null +++ b/swh/deposit/tests/data/atom/entry-data-with-swhid.xml @@ -0,0 +1,12 @@ + + + Awesome Compiler + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a + + + + + +