diff --git a/docs/endpoints/collection.rst b/docs/endpoints/collection.rst --- a/docs/endpoints/collection.rst +++ b/docs/endpoints/collection.rst @@ -23,7 +23,6 @@ -F "file=@deposit.json;type=application/zip;filename=payload" \ -F "atom=@atom-entry.xml;type=application/atom+xml;charset=UTF-8" \ -H 'In-Progress: false' \ - -H 'Slug: some-external-id' \ -XPOST https://deposit.softwareheritage.org/1/hal/ .. code:: http @@ -31,7 +30,6 @@ POST /1/hal/ HTTP/1.1 Host: deposit.softwareheritage.org Authorization: Basic xxxxxxxxxxxx= - Slug: some-external-id In-Progress: false Content-Length: 123456 Content-Type: multipart/form-data; boundary=----------------------123456798 diff --git a/docs/specs/spec-loading.rst b/docs/specs/spec-loading.rst --- a/docs/specs/spec-loading.rst +++ b/docs/specs/spec-loading.rst @@ -55,7 +55,8 @@ ~~~~~~~~~~~~~~~ We create an origin URL by concatenating the client's `provider_url` and the -value of the Slug header of the initial POST request of the deposit. +value of the Slug header of the initial POST request of the deposit +(or a randomly generated slug if it is missing). For examples: diff --git a/swh/deposit/api/collection.py b/swh/deposit/api/collection.py --- a/swh/deposit/api/collection.py +++ b/swh/deposit/api/collection.py @@ -95,17 +95,11 @@ deposit = self._deposit_create(req, collection_name, external_id=headers.slug) if req.content_type in ACCEPT_ARCHIVE_CONTENT_TYPES: - receipt = self._binary_upload( - req, headers, collection_name, deposit, check_slug_is_present=True - ) + receipt = self._binary_upload(req, headers, collection_name, deposit) elif req.content_type.startswith("multipart/"): - receipt = self._multipart_upload( - req, headers, collection_name, deposit, check_slug_is_present=True - ) + receipt = self._multipart_upload(req, headers, collection_name, deposit) else: - receipt = self._atom_entry( - req, headers, collection_name, deposit, check_slug_is_present=True - ) + receipt = self._atom_entry(req, headers, collection_name, deposit) return status.HTTP_201_CREATED, EDIT_IRI, receipt diff --git a/swh/deposit/api/common.py b/swh/deposit/api/common.py --- a/swh/deposit/api/common.py +++ b/swh/deposit/api/common.py @@ -8,6 +8,7 @@ import hashlib import json from typing import Any, Dict, Optional, Sequence, Tuple, Type, Union +import uuid import attr from django.core.files.uploadedfile import UploadedFile @@ -63,7 +64,6 @@ PARSING_ERROR, DepositError, ParserError, - raise_missing_slug_error, ) from ..models import DepositClient, DepositCollection, DepositRequest from ..parsers import parse_swh_reference, parse_xml @@ -139,7 +139,12 @@ def guess_deposit_origin_url(deposit: Deposit): """Guesses an origin url for the given deposit.""" - return "%s/%s" % (deposit.client.provider_url.rstrip("/"), deposit.external_id,) + external_id = deposit.external_id + if not external_id: + # The client provided neither an origin_url nor a slug. That's inconvenient, + # but SWORD requires we support it. So let's generate a random slug. + external_id = str(uuid.uuid4()) + return "%s/%s" % (deposit.client.provider_url.rstrip("/"), external_id) class AuthenticatedAPIView(APIView): @@ -226,7 +231,7 @@ deposit.status = DEPOSIT_STATUS_DEPOSITED deposit.save() - if deposit.external_id and not deposit.origin_url: + if not deposit.origin_url: deposit.origin_url = guess_deposit_origin_url(deposit) if self.config["checks"]: @@ -386,7 +391,6 @@ deposit: Deposit, replace_metadata: bool = False, replace_archives: bool = False, - check_slug_is_present: bool = False, ) -> Receipt: """Binary upload routine. @@ -405,8 +409,6 @@ deposit. If False (default), this adds new archive request to existing ones. Otherwise, this will replace existing archives. ones. - check_slug_is_present: Check for the slug header if True and raise - if not present Raises: - 400 (bad request) if the request is not providing an external @@ -448,10 +450,6 @@ self._check_file_length(filehandler, content_length) self._check_file_md5sum(filehandler, headers.content_md5sum) - slug = headers.slug - if check_slug_is_present and not slug: - raise_missing_slug_error() - # actual storage of data archive_metadata = filehandler self._deposit_put( @@ -488,7 +486,6 @@ deposit: Deposit, replace_metadata: bool = False, replace_archives: bool = False, - check_slug_is_present: bool = False, ) -> Receipt: """Multipart upload supported with exactly: - 1 archive (zip) @@ -509,8 +506,6 @@ deposit. If False (default), this adds new archive request to existing ones. Otherwise, this will replace existing archives. ones. - check_slug_is_present: Check for the slug header if True and raise - if not present Raises: - 400 (bad request) if the request is not providing an external @@ -522,10 +517,6 @@ - 415 (unsupported media type) if a wrong media type is provided """ - slug = headers.slug - if check_slug_is_present and not slug: - raise_missing_slug_error() - content_types_present = set() data: Dict[str, Optional[Any]] = { @@ -703,7 +694,6 @@ deposit: Deposit, replace_metadata: bool = False, replace_archives: bool = False, - check_slug_is_present: bool = False, ) -> Receipt: """Atom entry deposit. @@ -720,8 +710,6 @@ deposit. If False (default), this adds new archive request to existing ones. Otherwise, this will replace existing archives. ones. - check_slug_is_present: Check for the slug header if True and raise - if not present Raises: - 400 (bad request) if the request is not providing an external @@ -750,6 +738,7 @@ if ( "atom:external_identifier" in metadata + and headers.slug and metadata["atom:external_identifier"] != headers.slug ): # TODO: When clients stopped using it, raise this error @@ -759,7 +748,6 @@ "The 'external_identifier' tag is deprecated, " "the Slug header should be used instead.", ) - # Determine if we are in the metadata-only deposit case try: swhid = parse_swh_reference(metadata) @@ -768,9 +756,6 @@ PARSING_ERROR, "Invalid SWHID reference", str(e), ) - if swhid is None and check_slug_is_present and not headers.slug: - raise_missing_slug_error() - self._deposit_put( deposit=deposit, in_progress=headers.in_progress, ) diff --git a/swh/deposit/errors.py b/swh/deposit/errors.py --- a/swh/deposit/errors.py +++ b/swh/deposit/errors.py @@ -8,7 +8,6 @@ """ import logging -from typing import NoReturn from django.shortcuts import render from rest_framework import status @@ -156,20 +155,6 @@ return make_error_response_from_dict(req, error["error"]) -def raise_missing_slug_error() -> NoReturn: - """Returns a missing slug header error dict - - """ - raise DepositError( - BAD_REQUEST, - "Missing SLUG header", - verbose_description=( - "Provide in the SLUG header one identifier, for example the " - "url pointing to the resource you are depositing." - ), - ) - - class DepositError(ValueError): """Represents an error that should be reported to the client diff --git a/swh/deposit/tests/api/test_collection_post_atom.py b/swh/deposit/tests/api/test_collection_post_atom.py --- a/swh/deposit/tests/api/test_collection_post_atom.py +++ b/swh/deposit/tests/api/test_collection_post_atom.py @@ -6,6 +6,7 @@ """Tests the handling of the Atom content when doing a POST Col-IRI.""" from io import BytesIO +import uuid from django.urls import reverse import pytest @@ -96,13 +97,16 @@ def test_post_deposit_atom_no_slug_header( - authenticated_client, deposit_collection, atom_dataset + authenticated_client, deposit_collection, deposit_user, atom_dataset, mocker ): - """Posting an atom entry without a slug header should return a 400 + """Posting an atom entry without a slug header should generate one """ url = reverse(COL_IRI, args=[deposit_collection.name]) + id_ = str(uuid.uuid4()) + mocker.patch("uuid.uuid4", return_value=id_) + # when response = authenticated_client.post( url, @@ -112,8 +116,14 @@ HTTP_IN_PROGRESS="false", ) - assert b"Missing SLUG header" in response.content - assert response.status_code == status.HTTP_400_BAD_REQUEST + assert response.status_code == status.HTTP_201_CREATED + response_content = parse_xml(BytesIO(response.content)) + deposit_id = response_content["swh:deposit_id"] + + deposit = Deposit.objects.get(pk=deposit_id) + assert deposit.collection == deposit_collection + assert deposit.origin_url == deposit_user.provider_url + id_ + assert deposit.status == DEPOSIT_STATUS_DEPOSITED def test_post_deposit_atom_with_external_identifier( @@ -134,7 +144,6 @@ HTTP_SLUG="something", ) - print(response.content) assert b"The 'external_identifier' tag is deprecated" in response.content assert response.status_code == status.HTTP_400_BAD_REQUEST diff --git a/swh/deposit/tests/api/test_collection_post_binary.py b/swh/deposit/tests/api/test_collection_post_binary.py --- a/swh/deposit/tests/api/test_collection_post_binary.py +++ b/swh/deposit/tests/api/test_collection_post_binary.py @@ -6,6 +6,7 @@ """Tests the handling of the binary content when doing a POST Col-IRI.""" from io import BytesIO +import uuid from django.core.files.uploadedfile import InMemoryUploadedFile from django.urls import reverse @@ -19,11 +20,14 @@ def test_post_deposit_binary_no_slug( - authenticated_client, deposit_collection, sample_archive + authenticated_client, deposit_collection, sample_archive, deposit_user, mocker ): - """Posting a binary deposit without slug header should return 400 + """Posting a binary deposit without slug header should generate one """ + id_ = str(uuid.uuid4()) + mocker.patch("uuid.uuid4", return_value=id_) + url = reverse(COL_IRI, args=[deposit_collection.name]) # when @@ -39,8 +43,14 @@ HTTP_CONTENT_DISPOSITION="attachment; filename=filename0", ) - assert b"Missing SLUG header" in response.content - assert response.status_code == status.HTTP_400_BAD_REQUEST + assert response.status_code == status.HTTP_201_CREATED + response_content = parse_xml(BytesIO(response.content)) + deposit_id = response_content["swh:deposit_id"] + + deposit = Deposit.objects.get(pk=deposit_id) + assert deposit.collection == deposit_collection + assert deposit.origin_url == deposit_user.provider_url + id_ + assert deposit.status == DEPOSIT_STATUS_DEPOSITED def test_post_deposit_binary_support( diff --git a/swh/deposit/tests/api/test_collection_post_multipart.py b/swh/deposit/tests/api/test_collection_post_multipart.py --- a/swh/deposit/tests/api/test_collection_post_multipart.py +++ b/swh/deposit/tests/api/test_collection_post_multipart.py @@ -6,6 +6,7 @@ """Tests handling of multipart requests to POST Col-IRI.""" from io import BytesIO +import uuid from django.core.files.uploadedfile import InMemoryUploadedFile from django.urls import reverse @@ -18,12 +19,15 @@ from swh.deposit.tests.common import check_archive -def test_post_deposit_multipart_without_slug_header_is_bad_request( - authenticated_client, deposit_collection, atom_dataset +def test_post_deposit_multipart_without_slug_header( + authenticated_client, deposit_collection, atom_dataset, mocker, deposit_user ): # given url = reverse(COL_IRI, args=[deposit_collection.name]) + id_ = str(uuid.uuid4()) + mocker.patch("uuid.uuid4", return_value=id_) + archive_content = b"some content representing archive" archive = InMemoryUploadedFile( BytesIO(archive_content), @@ -53,8 +57,14 @@ HTTP_IN_PROGRESS="false", ) - assert b"Missing SLUG header" in response.content - assert response.status_code == status.HTTP_400_BAD_REQUEST + assert response.status_code == status.HTTP_201_CREATED + response_content = parse_xml(BytesIO(response.content)) + deposit_id = response_content["swh:deposit_id"] + + deposit = Deposit.objects.get(pk=deposit_id) + assert deposit.collection == deposit_collection + assert deposit.origin_url == deposit_user.provider_url + id_ + assert deposit.status == DEPOSIT_STATUS_DEPOSITED def test_post_deposit_multipart_zip(