diff --git a/swh/deposit/tests/api/test_collection.py b/swh/deposit/tests/api/test_collection.py --- a/swh/deposit/tests/api/test_collection.py +++ b/swh/deposit/tests/api/test_collection.py @@ -9,19 +9,9 @@ from django.urls import reverse from rest_framework import status -from swh.deposit.config import ( - COL_IRI, - DEPOSIT_STATUS_LOAD_FAILURE, - DEPOSIT_STATUS_LOAD_SUCCESS, - DEPOSIT_STATUS_PARTIAL, - DEPOSIT_STATUS_REJECTED, - SE_IRI, -) -from swh.deposit.models import Deposit +from swh.deposit.config import COL_IRI, DEPOSIT_STATUS_REJECTED from swh.deposit.parsers import parse_xml -from ..conftest import create_deposit - def test_deposit_post_will_fail_with_401(client): """Without authentication, endpoint refuses access with 401 response @@ -83,317 +73,3 @@ response_content = parse_xml(BytesIO(response.content)) actual_state = response_content["deposit_status"] assert actual_state == DEPOSIT_STATUS_REJECTED - - -def test_act_on_deposit_rejected_is_not_permitted( - authenticated_client, deposit_collection, rejected_deposit, atom_dataset -): - deposit = rejected_deposit - - response = authenticated_client.post( - reverse(SE_IRI, args=[deposit.collection.name, deposit.id]), - content_type="application/atom+xml;type=entry", - data=atom_dataset["entry-data1"], - HTTP_SLUG=deposit.external_id, - ) - - assert response.status_code == status.HTTP_400_BAD_REQUEST - msg = "You can only act on deposit with status '%s'" % ( - DEPOSIT_STATUS_PARTIAL, - ) - assert msg in response.content.decode("utf-8") - - -def test_add_deposit_when_partial_makes_new_deposit( - authenticated_client, - deposit_collection, - partial_deposit, - atom_dataset, - deposit_user, -): - """Posting deposit on collection when previous is partial makes new deposit - - """ - deposit = partial_deposit - assert deposit.status == DEPOSIT_STATUS_PARTIAL - origin_url = deposit_user.provider_url + deposit.external_id - - # adding a new deposit with the same external id - response = authenticated_client.post( - reverse(COL_IRI, args=[deposit_collection.name]), - content_type="application/atom+xml;type=entry", - data=atom_dataset["entry-data0"] % origin_url, - HTTP_SLUG=deposit.external_id, - ) - - assert response.status_code == status.HTTP_201_CREATED, response.content.decode() - response_content = parse_xml(BytesIO(response.content)) - deposit_id = response_content["swh:deposit_id"] - - assert deposit_id != deposit.id # new deposit - - new_deposit = Deposit.objects.get(pk=deposit_id) - assert new_deposit != deposit - assert new_deposit.parent is None - - -def test_add_deposit_when_failed_makes_new_deposit_with_no_parent( - authenticated_client, deposit_collection, failed_deposit, atom_dataset, deposit_user -): - """Posting deposit on collection when deposit done makes new deposit with - parent - - """ - deposit = failed_deposit - assert deposit.status == DEPOSIT_STATUS_LOAD_FAILURE - origin_url = deposit_user.provider_url + deposit.external_id - - # adding a new deposit with the same external id as a completed deposit - # creates the parenting chain - response = authenticated_client.post( - reverse(COL_IRI, args=[deposit_collection.name]), - content_type="application/atom+xml;type=entry", - data=atom_dataset["entry-data0"] % origin_url, - HTTP_SLUG=deposit.external_id, - ) - - assert response.status_code == status.HTTP_201_CREATED - response_content = parse_xml(BytesIO(response.content)) - deposit_id = response_content["swh:deposit_id"] - - assert deposit_id != deposit.id - - new_deposit = Deposit.objects.get(pk=deposit_id) - assert new_deposit != deposit - assert new_deposit.parent is None - - -def test_add_deposit_when_done_makes_new_deposit_with_parent_old_one( - authenticated_client, - deposit_collection, - completed_deposit, - atom_dataset, - deposit_user, -): - """Posting deposit on collection when deposit done makes new deposit with - parent - - """ - # given multiple deposit already loaded - deposit = completed_deposit - assert deposit.status == DEPOSIT_STATUS_LOAD_SUCCESS - origin_url = deposit_user.provider_url + deposit.external_id - - # adding a new deposit with the same external id as a completed deposit - # creates the parenting chain - response = authenticated_client.post( - reverse(COL_IRI, args=[deposit_collection.name]), - content_type="application/atom+xml;type=entry", - data=atom_dataset["entry-data0"] % origin_url, - HTTP_SLUG=deposit.external_id, - ) - - assert response.status_code == status.HTTP_201_CREATED - response_content = parse_xml(BytesIO(response.content)) - deposit_id = response_content["swh:deposit_id"] - - assert deposit_id != deposit.id - - new_deposit = Deposit.objects.get(pk=deposit_id) - assert deposit.collection == new_deposit.collection - assert deposit.origin_url == origin_url - - assert new_deposit != deposit - assert new_deposit.parent == deposit - - -def test_add_deposit_with_add_to_origin( - authenticated_client, - deposit_collection, - completed_deposit, - atom_dataset, - deposit_user, -): - """Posting deposit with creates a new deposit with parent - - """ - # given multiple deposit already loaded - deposit = completed_deposit - assert deposit.status == DEPOSIT_STATUS_LOAD_SUCCESS - origin_url = deposit_user.provider_url + deposit.external_id - - # adding a new deposit with the same external id as a completed deposit - # creates the parenting chain - response = authenticated_client.post( - reverse(COL_IRI, args=[deposit_collection.name]), - content_type="application/atom+xml;type=entry", - data=atom_dataset["entry-data-with-add-to-origin"] % origin_url, - ) - - assert response.status_code == status.HTTP_201_CREATED - response_content = parse_xml(BytesIO(response.content)) - deposit_id = response_content["swh:deposit_id"] - - assert deposit_id != deposit.id - - new_deposit = Deposit.objects.get(pk=deposit_id) - assert deposit.collection == new_deposit.collection - assert deposit.origin_url == origin_url - - assert new_deposit != deposit - assert new_deposit.parent == deposit - - -def test_add_deposit_external_id_conflict_no_parent( - authenticated_client, - another_authenticated_client, - deposit_collection, - deposit_another_collection, - atom_dataset, - sample_archive, - deposit_user, -): - """Posting a deposit with an external_id conflicting with an external_id - of a different client does not create a parent relationship - - """ - external_id = "foobar" - origin_url = deposit_user.provider_url + external_id - - # create a deposit for that other user, with the same slug - other_deposit = create_deposit( - another_authenticated_client, - deposit_another_collection.name, - sample_archive, - external_id, - DEPOSIT_STATUS_LOAD_SUCCESS, - ) - - # adding a new deposit with the same external id as a completed deposit - response = authenticated_client.post( - reverse(COL_IRI, args=[deposit_collection.name]), - content_type="application/atom+xml;type=entry", - data=atom_dataset["entry-data0"] % origin_url, - HTTP_SLUG=external_id, - ) - - assert response.status_code == status.HTTP_201_CREATED - response_content = parse_xml(BytesIO(response.content)) - deposit_id = response_content["swh:deposit_id"] - - assert other_deposit.id != deposit_id - - new_deposit = Deposit.objects.get(pk=deposit_id) - - assert new_deposit.parent is None - - -def test_add_deposit_external_id_conflict_with_parent( - authenticated_client, - another_authenticated_client, - deposit_collection, - deposit_another_collection, - completed_deposit, - atom_dataset, - sample_archive, - deposit_user, -): - """Posting a deposit with an external_id conflicting with an external_id - of a different client creates a parent relationship with the deposit - of the right client instead of the last matching deposit - - This test does not have an equivalent for origin url conflicts, as these - can not happen (assuming clients do not have provider_url overlaps) - """ - # given multiple deposit already loaded - deposit = completed_deposit - assert deposit.status == DEPOSIT_STATUS_LOAD_SUCCESS - origin_url = deposit_user.provider_url + deposit.external_id - - # create a deposit for that other user, with the same slug - other_deposit = create_deposit( - another_authenticated_client, - deposit_another_collection.name, - sample_archive, - deposit.external_id, - DEPOSIT_STATUS_LOAD_SUCCESS, - ) - - # adding a new deposit with the same external id as a completed deposit - response = authenticated_client.post( - reverse(COL_IRI, args=[deposit_collection.name]), - content_type="application/atom+xml;type=entry", - data=atom_dataset["entry-data0"] % origin_url, - HTTP_SLUG=deposit.external_id, - ) - - assert response.status_code == status.HTTP_201_CREATED - response_content = parse_xml(BytesIO(response.content)) - deposit_id = response_content["swh:deposit_id"] - - assert deposit_id != deposit.id - assert other_deposit.id != deposit.id - - new_deposit = Deposit.objects.get(pk=deposit_id) - assert deposit.collection == new_deposit.collection - assert deposit.external_id == new_deposit.external_id - - assert new_deposit != deposit - assert new_deposit.parent == deposit - - -def test_add_deposit_add_to_origin_conflict( - authenticated_client, - another_authenticated_client, - deposit_collection, - deposit_another_collection, - atom_dataset, - sample_archive, - deposit_user, - deposit_another_user, -): - """Posting a deposit with an referencing an origin - owned by a different client raises an error - - """ - external_id = "foobar" - origin_url = deposit_another_user.provider_url + external_id - - # create a deposit for that other user, with the same slug - create_deposit( - another_authenticated_client, - deposit_another_collection.name, - sample_archive, - external_id, - DEPOSIT_STATUS_LOAD_SUCCESS, - ) - - # adding a new deposit with the same external id as a completed deposit - response = authenticated_client.post( - reverse(COL_IRI, args=[deposit_collection.name]), - content_type="application/atom+xml;type=entry", - data=atom_dataset["entry-data0"] % origin_url, - ) - - assert response.status_code == status.HTTP_403_FORBIDDEN - assert b"must start with" in response.content - - -def test_add_deposit_add_to_wrong_origin( - authenticated_client, deposit_collection, atom_dataset, sample_archive, -): - """Posting a deposit with an referencing an origin - not starting with the provider_url raises an error - - """ - origin_url = "http://example.org/foo" - - # adding a new deposit with the same external id as a completed deposit - response = authenticated_client.post( - reverse(COL_IRI, args=[deposit_collection.name]), - content_type="application/atom+xml;type=entry", - data=atom_dataset["entry-data0"] % origin_url, - ) - - assert response.status_code == status.HTTP_403_FORBIDDEN - assert b"must start with" in response.content diff --git a/swh/deposit/tests/api/test_collection_add_to_origin.py b/swh/deposit/tests/api/test_collection_add_to_origin.py new file mode 100644 --- /dev/null +++ b/swh/deposit/tests/api/test_collection_add_to_origin.py @@ -0,0 +1,157 @@ +# Copyright (C) 2017-2020 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from io import BytesIO + +from django.urls import reverse +from rest_framework import status + +from swh.deposit.config import COL_IRI, DEPOSIT_STATUS_LOAD_SUCCESS +from swh.deposit.models import Deposit +from swh.deposit.parsers import parse_xml + +from ..conftest import create_deposit + + +def test_add_deposit_with_add_to_origin( + authenticated_client, + deposit_collection, + completed_deposit, + atom_dataset, + deposit_user, +): + """Posting deposit with creates a new deposit with parent + + """ + # given multiple deposit already loaded + deposit = completed_deposit + assert deposit.status == DEPOSIT_STATUS_LOAD_SUCCESS + origin_url = deposit_user.provider_url + deposit.external_id + + # adding a new deposit with the same external id as a completed deposit + # creates the parenting chain + response = authenticated_client.post( + reverse(COL_IRI, args=[deposit_collection.name]), + content_type="application/atom+xml;type=entry", + data=atom_dataset["entry-data-with-add-to-origin"] % origin_url, + ) + + assert response.status_code == status.HTTP_201_CREATED + response_content = parse_xml(BytesIO(response.content)) + deposit_id = response_content["swh:deposit_id"] + + assert deposit_id != deposit.id + + new_deposit = Deposit.objects.get(pk=deposit_id) + assert deposit.collection == new_deposit.collection + assert deposit.origin_url == origin_url + + assert new_deposit != deposit + assert new_deposit.parent == deposit + + +def test_add_deposit_add_to_origin_conflict( + authenticated_client, + another_authenticated_client, + deposit_collection, + deposit_another_collection, + atom_dataset, + sample_archive, + deposit_user, + deposit_another_user, +): + """Posting a deposit with an referencing an origin + owned by a different client raises an error + + """ + external_id = "foobar" + origin_url = deposit_another_user.provider_url + external_id + + # create a deposit for that other user, with the same slug + create_deposit( + another_authenticated_client, + deposit_another_collection.name, + sample_archive, + external_id, + DEPOSIT_STATUS_LOAD_SUCCESS, + ) + + # adding a new deposit with the same external id as a completed deposit + response = authenticated_client.post( + reverse(COL_IRI, args=[deposit_collection.name]), + content_type="application/atom+xml;type=entry", + data=atom_dataset["entry-data0"] % origin_url, + ) + + assert response.status_code == status.HTTP_403_FORBIDDEN + assert b"must start with" in response.content + + +def test_add_deposit_add_to_wrong_origin( + authenticated_client, deposit_collection, atom_dataset, sample_archive, +): + """Posting a deposit with an referencing an origin + not starting with the provider_url raises an error + + """ + origin_url = "http://example.org/foo" + + # adding a new deposit with the same external id as a completed deposit + response = authenticated_client.post( + reverse(COL_IRI, args=[deposit_collection.name]), + content_type="application/atom+xml;type=entry", + data=atom_dataset["entry-data0"] % origin_url, + ) + + assert response.status_code == status.HTTP_403_FORBIDDEN + assert b"must start with" in response.content + + +def test_add_deposit_with_add_to_origin_and_external_identifier( + authenticated_client, + deposit_collection, + completed_deposit, + atom_dataset, + deposit_user, +): + """Posting deposit with creates a new deposit with parent + + """ + # given multiple deposit already loaded + origin_url = deposit_user.provider_url + completed_deposit.external_id + + # adding a new deposit with the same external id as a completed deposit + # creates the parenting chain + response = authenticated_client.post( + reverse(COL_IRI, args=[deposit_collection.name]), + content_type="application/atom+xml;type=entry", + data=atom_dataset["entry-data-with-both-add-to-origin-and-external-id"] + % origin_url, + ) + + assert response.status_code == status.HTTP_400_BAD_REQUEST + assert b"<external_identifier> is deprecated." in response.content + + +def test_post_deposit_atom_403_add_to_wrong_origin_url_prefix( + authenticated_client, deposit_collection, atom_dataset, deposit_user +): + """Creating an origin for a prefix not owned by the client is forbidden + + """ + origin_url = "http://example.org/foo" + + response = authenticated_client.post( + reverse(COL_IRI, args=[deposit_collection.name]), + content_type="application/atom+xml;type=entry", + data=atom_dataset["entry-data-with-add-to-origin"] % origin_url, + HTTP_IN_PROGRESS="true", + ) + assert response.status_code == status.HTTP_403_FORBIDDEN + expected_msg = ( + f"Cannot create origin {origin_url}, " + f"it must start with {deposit_user.provider_url}" + ) + assert expected_msg in response.content.decode() diff --git a/swh/deposit/tests/api/test_collection_post_atom.py b/swh/deposit/tests/api/test_collection_post_atom.py --- a/swh/deposit/tests/api/test_collection_post_atom.py +++ b/swh/deposit/tests/api/test_collection_post_atom.py @@ -8,13 +8,29 @@ from io import BytesIO import uuid +import attr from django.urls import reverse import pytest from rest_framework import status -from swh.deposit.config import COL_IRI, DEPOSIT_STATUS_DEPOSITED +from swh.deposit.config import ( + COL_IRI, + DEPOSIT_STATUS_DEPOSITED, + DEPOSIT_STATUS_LOAD_SUCCESS, + APIConfig, +) from swh.deposit.models import Deposit, DepositCollection, DepositRequest from swh.deposit.parsers import parse_xml +from swh.deposit.utils import compute_metadata_context +from swh.model.identifiers import SWHID, parse_swhid +from swh.model.model import ( + MetadataAuthority, + MetadataAuthorityType, + MetadataFetcher, + MetadataTargetType, + RawExtrinsicMetadata, +) +from swh.storage.interface import PagedResult def test_post_deposit_atom_201_even_with_decimal( @@ -114,32 +130,6 @@ ) in response.content -def test_add_deposit_with_add_to_origin_and_external_identifier( - authenticated_client, - deposit_collection, - completed_deposit, - atom_dataset, - deposit_user, -): - """Posting deposit with creates a new deposit with parent - - """ - # given multiple deposit already loaded - origin_url = deposit_user.provider_url + completed_deposit.external_id - - # adding a new deposit with the same external id as a completed deposit - # creates the parenting chain - response = authenticated_client.post( - reverse(COL_IRI, args=[deposit_collection.name]), - content_type="application/atom+xml;type=entry", - data=atom_dataset["entry-data-with-both-add-to-origin-and-external-id"] - % origin_url, - ) - - assert response.status_code == status.HTTP_400_BAD_REQUEST - assert b"<external_identifier> is deprecated." in response.content - - def test_post_deposit_atom_403_create_wrong_origin_url_prefix( authenticated_client, deposit_collection, atom_dataset, deposit_user ): @@ -162,28 +152,6 @@ assert expected_msg in response.content.decode() -def test_post_deposit_atom_403_add_to_wrong_origin_url_prefix( - authenticated_client, deposit_collection, atom_dataset, deposit_user -): - """Creating an origin for a prefix not owned by the client is forbidden - - """ - origin_url = "http://example.org/foo" - - response = authenticated_client.post( - reverse(COL_IRI, args=[deposit_collection.name]), - content_type="application/atom+xml;type=entry", - data=atom_dataset["entry-data-with-add-to-origin"] % origin_url, - HTTP_IN_PROGRESS="true", - ) - assert response.status_code == status.HTTP_403_FORBIDDEN - expected_msg = ( - f"Cannot create origin {origin_url}, " - f"it must start with {deposit_user.provider_url}" - ) - assert expected_msg in response.content.decode() - - def test_post_deposit_atom_use_slug_header( authenticated_client, deposit_collection, deposit_user, atom_dataset, mocker ): @@ -421,83 +389,249 @@ assert bool(deposit_request.archive) is False -def test_post_deposit_atom_entry_multiple_steps( - authenticated_client, deposit_collection, atom_dataset, deposit_user +def test_deposit_metadata_invalid( + authenticated_client, deposit_collection, atom_dataset ): - """After initial deposit, updating a deposit should return a 201 + """Posting invalid swhid reference is bad request returned to client """ - # given - origin_url = deposit_user.provider_url + "2225c695-cfb8-4ebb-aaaa-80da344efa6a" + invalid_swhid = "swh:1:dir :31b5c8cc985d190b5a7ef4878128ebfdc2358f49" + xml_data = atom_dataset["entry-data-with-swhid"].format(swhid=invalid_swhid) - with pytest.raises(Deposit.DoesNotExist): - deposit = Deposit.objects.get(origin_url=origin_url) + response = authenticated_client.post( + reverse(COL_IRI, args=[deposit_collection.name]), + content_type="application/atom+xml;type=entry", + data=xml_data, + ) + assert response.status_code == status.HTTP_400_BAD_REQUEST + assert b"Invalid SWHID reference" in response.content + + +def test_deposit_metadata_fails_functional_checks( + authenticated_client, deposit_collection, atom_dataset +): + """Posting functionally invalid metadata swhid is bad request returned to client + + """ + swhid = "swh:1:dir:31b5c8cc985d190b5a7ef4878128ebfdc2358f49" + invalid_xml_data = atom_dataset[ + "entry-data-with-swhid-fail-metadata-functional-checks" + ].format(swhid=swhid) - # when response = authenticated_client.post( reverse(COL_IRI, args=[deposit_collection.name]), content_type="application/atom+xml;type=entry", - data=atom_dataset["entry-data1"], - HTTP_IN_PROGRESS="True", + data=invalid_xml_data, ) + assert response.status_code == status.HTTP_400_BAD_REQUEST + assert b"Functional metadata checks failure" in response.content + + +@pytest.mark.parametrize( + "swhid,target_type", + [ + ( + "swh:1:cnt:01b5c8cc985d190b5a7ef4878128ebfdc2358f49", + MetadataTargetType.CONTENT, + ), + ( + "swh:1:dir:11b5c8cc985d190b5a7ef4878128ebfdc2358f49", + MetadataTargetType.DIRECTORY, + ), + ( + "swh:1:rev:21b5c8cc985d190b5a7ef4878128ebfdc2358f49", + MetadataTargetType.REVISION, + ), + ( + "swh:1:rel:31b5c8cc985d190b5a7ef4878128ebfdc2358f49", + MetadataTargetType.RELEASE, + ), + ( + "swh:1:snp:41b5c8cc985d190b5a7ef4878128ebfdc2358f49", + MetadataTargetType.SNAPSHOT, + ), + ( + "swh:1:cnt:51b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo", + MetadataTargetType.CONTENT, + ), + ( + "swh:1:dir:c4993c872593e960dc84e4430dbbfbc34fd706d0;origin=https://inria.halpreprod.archives-ouvertes.fr/hal-01243573;visit=swh:1:snp:0175049fc45055a3824a1675ac06e3711619a55a;anchor=swh:1:rev:b5f505b005435fa5c4fa4c279792bd7b17167c04;path=/", # noqa + MetadataTargetType.DIRECTORY, + ), + ( + "swh:1:rev:71b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo", + MetadataTargetType.REVISION, + ), + ( + "swh:1:rel:81b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo", + MetadataTargetType.RELEASE, + ), + ( + "swh:1:snp:91b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo", + MetadataTargetType.SNAPSHOT, + ), + ], +) +def test_deposit_metadata_swhid( + swhid, + target_type, + authenticated_client, + deposit_collection, + atom_dataset, + swh_storage, +): + """Posting a swhid reference is stored on raw extrinsic metadata storage - # then - assert response.status_code == status.HTTP_201_CREATED + """ + swhid_reference = parse_swhid(swhid) + swhid_core = attr.evolve(swhid_reference, metadata={}) + + xml_data = atom_dataset["entry-data-with-swhid"].format(swhid=swhid) + deposit_client = authenticated_client.deposit_client + + response = authenticated_client.post( + reverse(COL_IRI, args=[deposit_collection.name]), + content_type="application/atom+xml;type=entry", + data=xml_data, + ) + assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) - deposit_id = int(response_content["swh:deposit_id"]) + # Ensure the deposit is finalized + deposit_id = int(response_content["swh:deposit_id"]) deposit = Deposit.objects.get(pk=deposit_id) - assert deposit.collection == deposit_collection - assert deposit.origin_url is None # not provided yet - assert deposit.status == "partial" + assert isinstance(swhid_core, SWHID) + assert deposit.swhid == str(swhid_core) + assert deposit.swhid_context == str(swhid_reference) + assert deposit.complete_date == deposit.reception_date + assert deposit.complete_date is not None + assert deposit.status == DEPOSIT_STATUS_LOAD_SUCCESS + + # Ensure metadata stored in the metadata storage is consistent + metadata_authority = MetadataAuthority( + type=MetadataAuthorityType.DEPOSIT_CLIENT, + url=deposit_client.provider_url, + metadata={"name": deposit_client.last_name}, + ) - # one associated request to a deposit - deposit_requests = DepositRequest.objects.filter(deposit=deposit) - assert len(deposit_requests) == 1 + actual_authority = swh_storage.metadata_authority_get( + MetadataAuthorityType.DEPOSIT_CLIENT, url=deposit_client.provider_url + ) + assert actual_authority == metadata_authority + + config = APIConfig() + metadata_fetcher = MetadataFetcher( + name=config.tool["name"], + version=config.tool["version"], + metadata=config.tool["configuration"], + ) + + actual_fetcher = swh_storage.metadata_fetcher_get( + config.tool["name"], config.tool["version"] + ) + assert actual_fetcher == metadata_fetcher + + page_results = swh_storage.raw_extrinsic_metadata_get( + target_type, swhid_core, metadata_authority + ) + discovery_date = page_results.results[0].discovery_date + + assert len(page_results.results) == 1 + assert page_results.next_page_token is None + + object_type, metadata_context = compute_metadata_context(swhid_reference) + assert page_results == PagedResult( + results=[ + RawExtrinsicMetadata( + type=object_type, + target=swhid_core, + discovery_date=discovery_date, + authority=attr.evolve(metadata_authority, metadata=None), + fetcher=attr.evolve(metadata_fetcher, metadata=None), + format="sword-v2-atom-codemeta", + metadata=xml_data.encode(), + **metadata_context, + ) + ], + next_page_token=None, + ) + assert deposit.complete_date == discovery_date - atom_entry_data = atom_dataset["entry-only-create-origin"] % (origin_url) - for link in response_content["atom:link"]: - if link["@rel"] == "http://purl.org/net/sword/terms/add": - se_iri = link["@href"] - break - else: - assert False, f"missing SE-IRI from {response_content['link']}" +@pytest.mark.parametrize( + "url", ["https://gitlab.org/user/repo", "https://whatever.else/repo",] +) +def test_deposit_metadata_origin( + url, authenticated_client, deposit_collection, atom_dataset, swh_storage, +): + """Posting a swhid reference is stored on raw extrinsic metadata storage - # when updating the first deposit post + """ + xml_data = atom_dataset["entry-data-with-origin-reference"].format(url=url) + deposit_client = authenticated_client.deposit_client response = authenticated_client.post( - se_iri, + reverse(COL_IRI, args=[deposit_collection.name]), content_type="application/atom+xml;type=entry", - data=atom_entry_data, - HTTP_IN_PROGRESS="False", + data=xml_data, ) - # then - assert response.status_code == status.HTTP_201_CREATED, response.content.decode() - + assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) + # Ensure the deposit is finalized deposit_id = int(response_content["swh:deposit_id"]) - deposit = Deposit.objects.get(pk=deposit_id) - assert deposit.collection == deposit_collection - assert deposit.origin_url == origin_url - assert deposit.status == DEPOSIT_STATUS_DEPOSITED + # we got not swhid as input so we cannot have those + assert deposit.swhid is None + assert deposit.swhid_context is None + assert deposit.complete_date == deposit.reception_date + assert deposit.complete_date is not None + assert deposit.status == DEPOSIT_STATUS_LOAD_SUCCESS + + # Ensure metadata stored in the metadata storage is consistent + metadata_authority = MetadataAuthority( + type=MetadataAuthorityType.DEPOSIT_CLIENT, + url=deposit_client.provider_url, + metadata={"name": deposit_client.last_name}, + ) - assert len(Deposit.objects.all()) == 1 + actual_authority = swh_storage.metadata_authority_get( + MetadataAuthorityType.DEPOSIT_CLIENT, url=deposit_client.provider_url + ) + assert actual_authority == metadata_authority - # now 2 associated requests to a same deposit - deposit_requests = DepositRequest.objects.filter(deposit=deposit).order_by("id") - assert len(deposit_requests) == 2 + config = APIConfig() + metadata_fetcher = MetadataFetcher( + name=config.tool["name"], + version=config.tool["version"], + metadata=config.tool["configuration"], + ) - atom_entry_data1 = atom_dataset["entry-data1"] - expected_meta = [ - {"metadata": parse_xml(atom_entry_data1), "raw_metadata": atom_entry_data1}, - {"metadata": parse_xml(atom_entry_data), "raw_metadata": atom_entry_data}, - ] + actual_fetcher = swh_storage.metadata_fetcher_get( + config.tool["name"], config.tool["version"] + ) + assert actual_fetcher == metadata_fetcher - for i, deposit_request in enumerate(deposit_requests): - actual_metadata = deposit_request.metadata - assert actual_metadata == expected_meta[i]["metadata"] - assert deposit_request.raw_metadata == expected_meta[i]["raw_metadata"] - assert bool(deposit_request.archive) is False + page_results = swh_storage.raw_extrinsic_metadata_get( + MetadataTargetType.ORIGIN, url, metadata_authority + ) + discovery_date = page_results.results[0].discovery_date + + assert len(page_results.results) == 1 + assert page_results.next_page_token is None + + assert page_results == PagedResult( + results=[ + RawExtrinsicMetadata( + type=MetadataTargetType.ORIGIN, + target=url, + discovery_date=discovery_date, + authority=attr.evolve(metadata_authority, metadata=None), + fetcher=attr.evolve(metadata_fetcher, metadata=None), + format="sword-v2-atom-codemeta", + metadata=xml_data.encode(), + ) + ], + next_page_token=None, + ) + assert deposit.complete_date == discovery_date diff --git a/swh/deposit/tests/api/test_collection_post_binary.py b/swh/deposit/tests/api/test_collection_post_binary.py --- a/swh/deposit/tests/api/test_collection_post_binary.py +++ b/swh/deposit/tests/api/test_collection_post_binary.py @@ -8,12 +8,11 @@ from io import BytesIO import uuid -from django.core.files.uploadedfile import InMemoryUploadedFile from django.urls import reverse import pytest from rest_framework import status -from swh.deposit.config import COL_IRI, DEPOSIT_STATUS_DEPOSITED, EM_IRI +from swh.deposit.config import COL_IRI, DEPOSIT_STATUS_DEPOSITED from swh.deposit.models import Deposit, DepositRequest from swh.deposit.parsers import parse_xml from swh.deposit.tests.common import check_archive, create_arborescence_archive @@ -381,251 +380,3 @@ deposits = Deposit.objects.all().order_by("id") assert len(deposits) == 2 assert list(deposits), [deposit == deposit2] - - -def test_post_deposit_binary_and_post_to_add_another_archive( - authenticated_client, deposit_collection, sample_archive, tmp_path -): - """Updating a deposit should return a 201 with receipt - - """ - tmp_path = str(tmp_path) - url = reverse(COL_IRI, args=[deposit_collection.name]) - - external_id = "some-external-id-1" - - # when - response = authenticated_client.post( - url, - content_type="application/zip", # as zip - data=sample_archive["data"], - # + headers - CONTENT_LENGTH=sample_archive["length"], - HTTP_SLUG=external_id, - HTTP_CONTENT_MD5=sample_archive["md5sum"], - HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip", - HTTP_IN_PROGRESS="true", - HTTP_CONTENT_DISPOSITION="attachment; filename=%s" % (sample_archive["name"],), - ) - - # then - assert response.status_code == status.HTTP_201_CREATED - - response_content = parse_xml(BytesIO(response.content)) - deposit_id = response_content["swh:deposit_id"] - - deposit = Deposit.objects.get(pk=deposit_id) - assert deposit.status == "partial" - assert deposit.external_id == external_id - assert deposit.collection == deposit_collection - assert deposit.swhid is None - - deposit_request = DepositRequest.objects.get(deposit=deposit) - assert deposit_request.deposit == deposit - assert deposit_request.type == "archive" - check_archive(sample_archive["name"], deposit_request.archive.name) - - # 2nd archive to upload - archive2 = create_arborescence_archive( - tmp_path, "archive2", "file2", b"some other content in file" - ) - - # uri to update the content - update_uri = reverse(EM_IRI, args=[deposit_collection.name, deposit_id]) - - # adding another archive for the deposit and finalizing it - response = authenticated_client.post( - update_uri, - content_type="application/zip", # as zip - data=archive2["data"], - # + headers - CONTENT_LENGTH=archive2["length"], - HTTP_SLUG=external_id, - HTTP_CONTENT_MD5=archive2["md5sum"], - HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip", - HTTP_CONTENT_DISPOSITION="attachment; filename=%s" % (archive2["name"]), - ) - - assert response.status_code == status.HTTP_201_CREATED - response_content = parse_xml(BytesIO(response.content)) - - deposit = Deposit.objects.get(pk=deposit_id) - assert deposit.status == DEPOSIT_STATUS_DEPOSITED - assert deposit.external_id == external_id - assert deposit.collection == deposit_collection - assert deposit.swhid is None - - deposit_requests = list( - DepositRequest.objects.filter(deposit=deposit).order_by("id") - ) - - # 2 deposit requests for the same deposit - assert len(deposit_requests) == 2 - assert deposit_requests[0].deposit == deposit - assert deposit_requests[0].type == "archive" - check_archive(sample_archive["name"], deposit_requests[0].archive.name) - - assert deposit_requests[1].deposit == deposit - assert deposit_requests[1].type == "archive" - check_archive(archive2["name"], deposit_requests[1].archive.name) - - # only 1 deposit in db - deposits = Deposit.objects.all() - assert len(deposits) == 1 - - -def test_post_deposit_then_update_refused( - authenticated_client, deposit_collection, sample_archive, atom_dataset, tmp_path -): - """Updating a deposit with status 'ready' should return a 400 - - """ - tmp_path = str(tmp_path) - url = reverse(COL_IRI, args=[deposit_collection.name]) - - external_id = "some-external-id-1" - - # when - response = authenticated_client.post( - url, - content_type="application/zip", # as zip - data=sample_archive["data"], - # + headers - CONTENT_LENGTH=sample_archive["length"], - HTTP_SLUG=external_id, - HTTP_CONTENT_MD5=sample_archive["md5sum"], - HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip", - HTTP_IN_PROGRESS="false", - HTTP_CONTENT_DISPOSITION="attachment; filename=filename0", - ) - - # then - assert response.status_code == status.HTTP_201_CREATED - - response_content = parse_xml(BytesIO(response.content)) - deposit_id = response_content["swh:deposit_id"] - - deposit = Deposit.objects.get(pk=deposit_id) - assert deposit.status == DEPOSIT_STATUS_DEPOSITED - assert deposit.external_id == external_id - assert deposit.collection == deposit_collection - assert deposit.swhid is None - - deposit_request = DepositRequest.objects.get(deposit=deposit) - assert deposit_request.deposit == deposit - check_archive("filename0", deposit_request.archive.name) - - # updating/adding is forbidden - - # uri to update the content - edit_iri = reverse("edit_iri", args=[deposit_collection.name, deposit_id]) - se_iri = reverse("se_iri", args=[deposit_collection.name, deposit_id]) - em_iri = reverse("em_iri", args=[deposit_collection.name, deposit_id]) - - # Testing all update/add endpoint should fail - # since the status is ready - - archive2 = create_arborescence_archive( - tmp_path, "archive2", "file2", b"some content in file 2" - ) - - # replacing file is no longer possible since the deposit's - # status is ready - r = authenticated_client.put( - em_iri, - content_type="application/zip", - data=archive2["data"], - CONTENT_LENGTH=archive2["length"], - HTTP_SLUG=external_id, - HTTP_CONTENT_MD5=archive2["md5sum"], - HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip", - HTTP_IN_PROGRESS="false", - HTTP_CONTENT_DISPOSITION="attachment; filename=filename0", - ) - - assert r.status_code == status.HTTP_400_BAD_REQUEST - assert b"You can only act on deposit with status 'partial'" in r.content - - # adding file is no longer possible since the deposit's status - # is ready - r = authenticated_client.post( - em_iri, - content_type="application/zip", - data=archive2["data"], - CONTENT_LENGTH=archive2["length"], - HTTP_SLUG=external_id, - HTTP_CONTENT_MD5=archive2["md5sum"], - HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip", - HTTP_IN_PROGRESS="false", - HTTP_CONTENT_DISPOSITION="attachment; filename=filename0", - ) - - assert r.status_code == status.HTTP_400_BAD_REQUEST - assert b"You can only act on deposit with status 'partial'" in r.content - - # replacing metadata is no longer possible since the deposit's - # status is ready - r = authenticated_client.put( - edit_iri, - content_type="application/atom+xml;type=entry", - data=atom_dataset["entry-data-deposit-binary"], - CONTENT_LENGTH=len(atom_dataset["entry-data-deposit-binary"]), - HTTP_SLUG=external_id, - ) - - assert r.status_code == status.HTTP_400_BAD_REQUEST - assert b"You can only act on deposit with status 'partial'" in r.content - - # adding new metadata is no longer possible since the - # deposit's status is ready - r = authenticated_client.post( - se_iri, - content_type="application/atom+xml;type=entry", - data=atom_dataset["entry-data-deposit-binary"], - CONTENT_LENGTH=len(atom_dataset["entry-data-deposit-binary"]), - HTTP_SLUG=external_id, - ) - - assert r.status_code == status.HTTP_400_BAD_REQUEST - assert b"You can only act on deposit with status 'partial'" in r.content - - archive_content = b"some content representing archive" - archive = InMemoryUploadedFile( - BytesIO(archive_content), - field_name="archive0", - name="archive0", - content_type="application/zip", - size=len(archive_content), - charset=None, - ) - - atom_entry = InMemoryUploadedFile( - BytesIO(atom_dataset["entry-data-deposit-binary"].encode("utf-8")), - field_name="atom0", - name="atom0", - content_type='application/atom+xml; charset="utf-8"', - size=len(atom_dataset["entry-data-deposit-binary"]), - charset="utf-8", - ) - - # replacing multipart metadata is no longer possible since the - # deposit's status is ready - r = authenticated_client.put( - edit_iri, - format="multipart", - data={"archive": archive, "atom_entry": atom_entry,}, - ) - - assert r.status_code == status.HTTP_400_BAD_REQUEST - assert b"You can only act on deposit with status 'partial'" in r.content - - # adding new metadata is no longer possible since the - # deposit's status is ready - r = authenticated_client.post( - se_iri, - format="multipart", - data={"archive": archive, "atom_entry": atom_entry,}, - ) - - assert r.status_code == status.HTTP_400_BAD_REQUEST - assert b"You can only act on deposit with status 'partial'" in r.content diff --git a/swh/deposit/tests/api/test_collection_post_metadata.py b/swh/deposit/tests/api/test_collection_post_metadata.py deleted file mode 100644 --- a/swh/deposit/tests/api/test_collection_post_metadata.py +++ /dev/null @@ -1,275 +0,0 @@ -# Copyright (C) 2020 The Software Heritage developers -# See the AUTHORS file at the top-level directory of this distribution -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file for more information - -"""Tests metadata is loaded when sent via a POST Col-IRI""" - -from io import BytesIO - -import attr -from django.urls import reverse -import pytest -from rest_framework import status - -from swh.deposit.config import COL_IRI, DEPOSIT_STATUS_LOAD_SUCCESS, APIConfig -from swh.deposit.models import Deposit -from swh.deposit.parsers import parse_xml -from swh.deposit.utils import compute_metadata_context -from swh.model.identifiers import SWHID, parse_swhid -from swh.model.model import ( - MetadataAuthority, - MetadataAuthorityType, - MetadataFetcher, - MetadataTargetType, - RawExtrinsicMetadata, -) -from swh.storage.interface import PagedResult - - -def test_deposit_metadata_invalid( - authenticated_client, deposit_collection, atom_dataset -): - """Posting invalid swhid reference is bad request returned to client - - """ - invalid_swhid = "swh:1:dir :31b5c8cc985d190b5a7ef4878128ebfdc2358f49" - xml_data = atom_dataset["entry-data-with-swhid"].format(swhid=invalid_swhid) - - response = authenticated_client.post( - reverse(COL_IRI, args=[deposit_collection.name]), - content_type="application/atom+xml;type=entry", - data=xml_data, - ) - assert response.status_code == status.HTTP_400_BAD_REQUEST - assert b"Invalid SWHID reference" in response.content - - -def test_deposit_metadata_fails_functional_checks( - authenticated_client, deposit_collection, atom_dataset -): - """Posting functionally invalid metadata swhid is bad request returned to client - - """ - swhid = "swh:1:dir:31b5c8cc985d190b5a7ef4878128ebfdc2358f49" - invalid_xml_data = atom_dataset[ - "entry-data-with-swhid-fail-metadata-functional-checks" - ].format(swhid=swhid) - - response = authenticated_client.post( - reverse(COL_IRI, args=[deposit_collection.name]), - content_type="application/atom+xml;type=entry", - data=invalid_xml_data, - ) - assert response.status_code == status.HTTP_400_BAD_REQUEST - assert b"Functional metadata checks failure" in response.content - - -@pytest.mark.parametrize( - "swhid,target_type", - [ - ( - "swh:1:cnt:01b5c8cc985d190b5a7ef4878128ebfdc2358f49", - MetadataTargetType.CONTENT, - ), - ( - "swh:1:dir:11b5c8cc985d190b5a7ef4878128ebfdc2358f49", - MetadataTargetType.DIRECTORY, - ), - ( - "swh:1:rev:21b5c8cc985d190b5a7ef4878128ebfdc2358f49", - MetadataTargetType.REVISION, - ), - ( - "swh:1:rel:31b5c8cc985d190b5a7ef4878128ebfdc2358f49", - MetadataTargetType.RELEASE, - ), - ( - "swh:1:snp:41b5c8cc985d190b5a7ef4878128ebfdc2358f49", - MetadataTargetType.SNAPSHOT, - ), - ( - "swh:1:cnt:51b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo", - MetadataTargetType.CONTENT, - ), - ( - "swh:1:dir:c4993c872593e960dc84e4430dbbfbc34fd706d0;origin=https://inria.halpreprod.archives-ouvertes.fr/hal-01243573;visit=swh:1:snp:0175049fc45055a3824a1675ac06e3711619a55a;anchor=swh:1:rev:b5f505b005435fa5c4fa4c279792bd7b17167c04;path=/", # noqa - MetadataTargetType.DIRECTORY, - ), - ( - "swh:1:rev:71b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo", - MetadataTargetType.REVISION, - ), - ( - "swh:1:rel:81b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo", - MetadataTargetType.RELEASE, - ), - ( - "swh:1:snp:91b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo", - MetadataTargetType.SNAPSHOT, - ), - ], -) -def test_deposit_metadata_swhid( - swhid, - target_type, - authenticated_client, - deposit_collection, - atom_dataset, - swh_storage, -): - """Posting a swhid reference is stored on raw extrinsic metadata storage - - """ - swhid_reference = parse_swhid(swhid) - swhid_core = attr.evolve(swhid_reference, metadata={}) - - xml_data = atom_dataset["entry-data-with-swhid"].format(swhid=swhid) - deposit_client = authenticated_client.deposit_client - - response = authenticated_client.post( - reverse(COL_IRI, args=[deposit_collection.name]), - content_type="application/atom+xml;type=entry", - data=xml_data, - ) - - assert response.status_code == status.HTTP_201_CREATED - response_content = parse_xml(BytesIO(response.content)) - - # Ensure the deposit is finalized - deposit_id = int(response_content["swh:deposit_id"]) - deposit = Deposit.objects.get(pk=deposit_id) - assert isinstance(swhid_core, SWHID) - assert deposit.swhid == str(swhid_core) - assert deposit.swhid_context == str(swhid_reference) - assert deposit.complete_date == deposit.reception_date - assert deposit.complete_date is not None - assert deposit.status == DEPOSIT_STATUS_LOAD_SUCCESS - - # Ensure metadata stored in the metadata storage is consistent - metadata_authority = MetadataAuthority( - type=MetadataAuthorityType.DEPOSIT_CLIENT, - url=deposit_client.provider_url, - metadata={"name": deposit_client.last_name}, - ) - - actual_authority = swh_storage.metadata_authority_get( - MetadataAuthorityType.DEPOSIT_CLIENT, url=deposit_client.provider_url - ) - assert actual_authority == metadata_authority - - config = APIConfig() - metadata_fetcher = MetadataFetcher( - name=config.tool["name"], - version=config.tool["version"], - metadata=config.tool["configuration"], - ) - - actual_fetcher = swh_storage.metadata_fetcher_get( - config.tool["name"], config.tool["version"] - ) - assert actual_fetcher == metadata_fetcher - - page_results = swh_storage.raw_extrinsic_metadata_get( - target_type, swhid_core, metadata_authority - ) - discovery_date = page_results.results[0].discovery_date - - assert len(page_results.results) == 1 - assert page_results.next_page_token is None - - object_type, metadata_context = compute_metadata_context(swhid_reference) - assert page_results == PagedResult( - results=[ - RawExtrinsicMetadata( - type=object_type, - target=swhid_core, - discovery_date=discovery_date, - authority=attr.evolve(metadata_authority, metadata=None), - fetcher=attr.evolve(metadata_fetcher, metadata=None), - format="sword-v2-atom-codemeta", - metadata=xml_data.encode(), - **metadata_context, - ) - ], - next_page_token=None, - ) - assert deposit.complete_date == discovery_date - - -@pytest.mark.parametrize( - "url", ["https://gitlab.org/user/repo", "https://whatever.else/repo",] -) -def test_deposit_metadata_origin( - url, authenticated_client, deposit_collection, atom_dataset, swh_storage, -): - """Posting a swhid reference is stored on raw extrinsic metadata storage - - """ - xml_data = atom_dataset["entry-data-with-origin-reference"].format(url=url) - deposit_client = authenticated_client.deposit_client - response = authenticated_client.post( - reverse(COL_IRI, args=[deposit_collection.name]), - content_type="application/atom+xml;type=entry", - data=xml_data, - ) - - assert response.status_code == status.HTTP_201_CREATED - response_content = parse_xml(BytesIO(response.content)) - # Ensure the deposit is finalized - deposit_id = int(response_content["swh:deposit_id"]) - deposit = Deposit.objects.get(pk=deposit_id) - # we got not swhid as input so we cannot have those - assert deposit.swhid is None - assert deposit.swhid_context is None - assert deposit.complete_date == deposit.reception_date - assert deposit.complete_date is not None - assert deposit.status == DEPOSIT_STATUS_LOAD_SUCCESS - - # Ensure metadata stored in the metadata storage is consistent - metadata_authority = MetadataAuthority( - type=MetadataAuthorityType.DEPOSIT_CLIENT, - url=deposit_client.provider_url, - metadata={"name": deposit_client.last_name}, - ) - - actual_authority = swh_storage.metadata_authority_get( - MetadataAuthorityType.DEPOSIT_CLIENT, url=deposit_client.provider_url - ) - assert actual_authority == metadata_authority - - config = APIConfig() - metadata_fetcher = MetadataFetcher( - name=config.tool["name"], - version=config.tool["version"], - metadata=config.tool["configuration"], - ) - - actual_fetcher = swh_storage.metadata_fetcher_get( - config.tool["name"], config.tool["version"] - ) - assert actual_fetcher == metadata_fetcher - - page_results = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.ORIGIN, url, metadata_authority - ) - discovery_date = page_results.results[0].discovery_date - - assert len(page_results.results) == 1 - assert page_results.next_page_token is None - - assert page_results == PagedResult( - results=[ - RawExtrinsicMetadata( - type=MetadataTargetType.ORIGIN, - target=url, - discovery_date=discovery_date, - authority=attr.evolve(metadata_authority, metadata=None), - fetcher=attr.evolve(metadata_fetcher, metadata=None), - format="sword-v2-atom-codemeta", - metadata=xml_data.encode(), - ) - ], - next_page_token=None, - ) - assert deposit.complete_date == discovery_date diff --git a/swh/deposit/tests/api/test_collection.py b/swh/deposit/tests/api/test_collection_reuse_slug.py copy from swh/deposit/tests/api/test_collection.py copy to swh/deposit/tests/api/test_collection_reuse_slug.py --- a/swh/deposit/tests/api/test_collection.py +++ b/swh/deposit/tests/api/test_collection_reuse_slug.py @@ -1,9 +1,8 @@ -# Copyright (C) 2017-2019 The Software Heritage developers +# Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -import hashlib from io import BytesIO from django.urls import reverse @@ -14,7 +13,6 @@ DEPOSIT_STATUS_LOAD_FAILURE, DEPOSIT_STATUS_LOAD_SUCCESS, DEPOSIT_STATUS_PARTIAL, - DEPOSIT_STATUS_REJECTED, SE_IRI, ) from swh.deposit.models import Deposit @@ -23,68 +21,6 @@ from ..conftest import create_deposit -def test_deposit_post_will_fail_with_401(client): - """Without authentication, endpoint refuses access with 401 response - - """ - url = reverse(COL_IRI, args=["hal"]) - response = client.post(url) - assert response.status_code == status.HTTP_401_UNAUTHORIZED - - -def test_access_to_another_user_collection_is_forbidden( - authenticated_client, deposit_another_collection, deposit_user -): - """Access to another user collection should return a 403 - - """ - coll2 = deposit_another_collection - url = reverse(COL_IRI, args=[coll2.name]) - response = authenticated_client.post(url) - assert response.status_code == status.HTTP_403_FORBIDDEN - msg = "Client %s cannot access collection %s" % (deposit_user.username, coll2.name,) - assert msg in response.content.decode("utf-8") - - -def test_delete_on_col_iri_not_supported(authenticated_client, deposit_collection): - """Delete on col iri should return a 405 response - - """ - url = reverse(COL_IRI, args=[deposit_collection.name]) - response = authenticated_client.delete(url) - assert response.status_code == status.HTTP_405_METHOD_NOT_ALLOWED - assert "DELETE method is not supported on this endpoint" in response.content.decode( - "utf-8" - ) - - -def create_deposit_with_rejection_status(authenticated_client, deposit_collection): - url = reverse(COL_IRI, args=[deposit_collection.name]) - - data = b"some data which is clearly not a zip file" - md5sum = hashlib.md5(data).hexdigest() - external_id = "some-external-id-1" - - # when - response = authenticated_client.post( - url, - content_type="application/zip", # as zip - data=data, - # + headers - CONTENT_LENGTH=len(data), - # other headers needs HTTP_ prefix to be taken into account - HTTP_SLUG=external_id, - HTTP_CONTENT_MD5=md5sum, - HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip", - HTTP_CONTENT_DISPOSITION="attachment; filename=filename0", - ) - - assert response.status_code == status.HTTP_201_CREATED - response_content = parse_xml(BytesIO(response.content)) - actual_state = response_content["deposit_status"] - assert actual_state == DEPOSIT_STATUS_REJECTED - - def test_act_on_deposit_rejected_is_not_permitted( authenticated_client, deposit_collection, rejected_deposit, atom_dataset ): @@ -207,43 +143,6 @@ assert new_deposit.parent == deposit -def test_add_deposit_with_add_to_origin( - authenticated_client, - deposit_collection, - completed_deposit, - atom_dataset, - deposit_user, -): - """Posting deposit with creates a new deposit with parent - - """ - # given multiple deposit already loaded - deposit = completed_deposit - assert deposit.status == DEPOSIT_STATUS_LOAD_SUCCESS - origin_url = deposit_user.provider_url + deposit.external_id - - # adding a new deposit with the same external id as a completed deposit - # creates the parenting chain - response = authenticated_client.post( - reverse(COL_IRI, args=[deposit_collection.name]), - content_type="application/atom+xml;type=entry", - data=atom_dataset["entry-data-with-add-to-origin"] % origin_url, - ) - - assert response.status_code == status.HTTP_201_CREATED - response_content = parse_xml(BytesIO(response.content)) - deposit_id = response_content["swh:deposit_id"] - - assert deposit_id != deposit.id - - new_deposit = Deposit.objects.get(pk=deposit_id) - assert deposit.collection == new_deposit.collection - assert deposit.origin_url == origin_url - - assert new_deposit != deposit - assert new_deposit.parent == deposit - - def test_add_deposit_external_id_conflict_no_parent( authenticated_client, another_authenticated_client, @@ -340,60 +239,3 @@ assert new_deposit != deposit assert new_deposit.parent == deposit - - -def test_add_deposit_add_to_origin_conflict( - authenticated_client, - another_authenticated_client, - deposit_collection, - deposit_another_collection, - atom_dataset, - sample_archive, - deposit_user, - deposit_another_user, -): - """Posting a deposit with an referencing an origin - owned by a different client raises an error - - """ - external_id = "foobar" - origin_url = deposit_another_user.provider_url + external_id - - # create a deposit for that other user, with the same slug - create_deposit( - another_authenticated_client, - deposit_another_collection.name, - sample_archive, - external_id, - DEPOSIT_STATUS_LOAD_SUCCESS, - ) - - # adding a new deposit with the same external id as a completed deposit - response = authenticated_client.post( - reverse(COL_IRI, args=[deposit_collection.name]), - content_type="application/atom+xml;type=entry", - data=atom_dataset["entry-data0"] % origin_url, - ) - - assert response.status_code == status.HTTP_403_FORBIDDEN - assert b"must start with" in response.content - - -def test_add_deposit_add_to_wrong_origin( - authenticated_client, deposit_collection, atom_dataset, sample_archive, -): - """Posting a deposit with an referencing an origin - not starting with the provider_url raises an error - - """ - origin_url = "http://example.org/foo" - - # adding a new deposit with the same external id as a completed deposit - response = authenticated_client.post( - reverse(COL_IRI, args=[deposit_collection.name]), - content_type="application/atom+xml;type=entry", - data=atom_dataset["entry-data0"] % origin_url, - ) - - assert response.status_code == status.HTTP_403_FORBIDDEN - assert b"must start with" in response.content diff --git a/swh/deposit/tests/api/test_deposit_update.py b/swh/deposit/tests/api/test_deposit_update.py --- a/swh/deposit/tests/api/test_deposit_update.py +++ b/swh/deposit/tests/api/test_deposit_update.py @@ -3,253 +3,21 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +"""Tests updates on SE-IRI.""" + from io import BytesIO -import attr from django.core.files.uploadedfile import InMemoryUploadedFile from django.urls import reverse from rest_framework import status -from swh.deposit.api.common import ACCEPT_ARCHIVE_CONTENT_TYPES from swh.deposit.config import ( - COL_IRI, DEPOSIT_STATUS_DEPOSITED, DEPOSIT_STATUS_PARTIAL, EDIT_IRI, - EM_IRI, SE_IRI, - APIConfig, -) -from swh.deposit.models import Deposit, DepositCollection, DepositRequest -from swh.deposit.parsers import parse_xml -from swh.deposit.tests.common import check_archive, create_arborescence_archive -from swh.model.hashutil import hash_to_bytes -from swh.model.identifiers import parse_swhid, swhid -from swh.model.model import ( - MetadataAuthority, - MetadataAuthorityType, - MetadataFetcher, - MetadataTargetType, - RawExtrinsicMetadata, ) -from swh.storage.interface import PagedResult - - -def test_replace_archive_to_deposit_is_possible( - tmp_path, - partial_deposit, - deposit_collection, - authenticated_client, - sample_archive, - atom_dataset, -): - """Replace all archive with another one should return a 204 response - - """ - tmp_path = str(tmp_path) - # given - deposit = partial_deposit - requests = DepositRequest.objects.filter(deposit=deposit, type="archive") - - assert len(list(requests)) == 1 - check_archive(sample_archive["name"], requests[0].archive.name) - - # we have no metadata for that deposit - requests = list(DepositRequest.objects.filter(deposit=deposit, type="metadata")) - assert len(requests) == 0 - - response = authenticated_client.post( - reverse(SE_IRI, args=[deposit_collection.name, deposit.id]), - content_type="application/atom+xml;type=entry", - data=atom_dataset["entry-data1"], - HTTP_SLUG=deposit.external_id, - HTTP_IN_PROGRESS=True, - ) - - requests = list(DepositRequest.objects.filter(deposit=deposit, type="metadata")) - assert len(requests) == 1 - - update_uri = reverse(EM_IRI, args=[deposit_collection.name, deposit.id]) - external_id = "some-external-id-1" - archive2 = create_arborescence_archive( - tmp_path, "archive2", "file2", b"some other content in file" - ) - - response = authenticated_client.put( - update_uri, - content_type="application/zip", # as zip - data=archive2["data"], - # + headers - CONTENT_LENGTH=archive2["length"], - HTTP_SLUG=external_id, - HTTP_CONTENT_MD5=archive2["md5sum"], - HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip", - HTTP_IN_PROGRESS="false", - HTTP_CONTENT_DISPOSITION="attachment; filename=%s" % (archive2["name"],), - ) - - assert response.status_code == status.HTTP_204_NO_CONTENT - - requests = DepositRequest.objects.filter(deposit=deposit, type="archive") - - assert len(list(requests)) == 1 - check_archive(archive2["name"], requests[0].archive.name) - - # check we did not touch the other parts - requests = list(DepositRequest.objects.filter(deposit=deposit, type="metadata")) - assert len(requests) == 1 - - -def test_replace_metadata_to_deposit_is_possible( - tmp_path, - authenticated_client, - partial_deposit_with_metadata, - deposit_collection, - atom_dataset, - deposit_user, -): - """Replace all metadata with another one should return a 204 response - - """ - # given - deposit = partial_deposit_with_metadata - origin_url = deposit_user.provider_url + deposit.external_id - raw_metadata0 = atom_dataset["entry-data0"] % origin_url - - requests_meta = DepositRequest.objects.filter(deposit=deposit, type="metadata") - assert len(requests_meta) == 1 - request_meta0 = requests_meta[0] - assert request_meta0.raw_metadata == raw_metadata0 - - requests_archive0 = DepositRequest.objects.filter(deposit=deposit, type="archive") - assert len(requests_archive0) == 1 - - update_uri = reverse(EDIT_IRI, args=[deposit_collection.name, deposit.id]) - - response = authenticated_client.put( - update_uri, - content_type="application/atom+xml;type=entry", - data=atom_dataset["entry-data1"], - ) - - assert response.status_code == status.HTTP_204_NO_CONTENT - - requests_meta = DepositRequest.objects.filter(deposit=deposit, type="metadata") - - assert len(requests_meta) == 1 - request_meta1 = requests_meta[0] - raw_metadata1 = request_meta1.raw_metadata - assert raw_metadata1 == atom_dataset["entry-data1"] - assert raw_metadata0 != raw_metadata1 - assert request_meta0 != request_meta1 - - # check we did not touch the other parts - requests_archive1 = DepositRequest.objects.filter(deposit=deposit, type="archive") - assert len(requests_archive1) == 1 - assert set(requests_archive0) == set(requests_archive1) - - -def test_add_archive_to_deposit_is_possible( - tmp_path, - authenticated_client, - deposit_collection, - partial_deposit_with_metadata, - sample_archive, -): - """Add another archive to a deposit return a 201 response - - """ - tmp_path = str(tmp_path) - deposit = partial_deposit_with_metadata - - requests = DepositRequest.objects.filter(deposit=deposit, type="archive") - - assert len(requests) == 1 - check_archive(sample_archive["name"], requests[0].archive.name) - - requests_meta0 = DepositRequest.objects.filter(deposit=deposit, type="metadata") - assert len(requests_meta0) == 1 - - update_uri = reverse(EM_IRI, args=[deposit_collection.name, deposit.id]) - - external_id = "some-external-id-1" - archive2 = create_arborescence_archive( - tmp_path, "archive2", "file2", b"some other content in file" - ) - - response = authenticated_client.post( - update_uri, - content_type="application/zip", # as zip - data=archive2["data"], - # + headers - CONTENT_LENGTH=archive2["length"], - HTTP_SLUG=external_id, - HTTP_CONTENT_MD5=archive2["md5sum"], - HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip", - HTTP_IN_PROGRESS="false", - HTTP_CONTENT_DISPOSITION="attachment; filename=%s" % (archive2["name"],), - ) - - assert response.status_code == status.HTTP_201_CREATED - - requests = DepositRequest.objects.filter(deposit=deposit, type="archive").order_by( - "id" - ) - - assert len(requests) == 2 - # first archive still exists - check_archive(sample_archive["name"], requests[0].archive.name) - # a new one was added - check_archive(archive2["name"], requests[1].archive.name) - - # check we did not touch the other parts - requests_meta1 = DepositRequest.objects.filter(deposit=deposit, type="metadata") - assert len(requests_meta1) == 1 - assert set(requests_meta0) == set(requests_meta1) - - -def test_add_metadata_to_deposit_is_possible( - authenticated_client, - deposit_collection, - partial_deposit_with_metadata, - atom_dataset, - deposit_user, -): - """Add metadata with another one should return a 204 response - - """ - deposit = partial_deposit_with_metadata - origin_url = deposit_user.provider_url + deposit.external_id - requests = DepositRequest.objects.filter(deposit=deposit, type="metadata") - - assert len(requests) == 1 - - requests_archive0 = DepositRequest.objects.filter(deposit=deposit, type="archive") - assert len(requests_archive0) == 1 - - update_uri = reverse(SE_IRI, args=[deposit_collection.name, deposit.id]) - - atom_entry = atom_dataset["entry-data1"] - response = authenticated_client.post( - update_uri, content_type="application/atom+xml;type=entry", data=atom_entry - ) - - assert response.status_code == status.HTTP_201_CREATED - - requests = DepositRequest.objects.filter(deposit=deposit, type="metadata").order_by( - "id" - ) - - assert len(requests) == 2 - expected_raw_meta0 = atom_dataset["entry-data0"] % origin_url - # a new one was added - assert requests[0].raw_metadata == expected_raw_meta0 - assert requests[1].raw_metadata == atom_entry - - # check we did not touch the other parts - requests_archive1 = DepositRequest.objects.filter(deposit=deposit, type="archive") - assert len(requests_archive1) == 1 - assert set(requests_archive0) == set(requests_archive1) +from swh.deposit.models import Deposit, DepositRequest def test_add_both_archive_and_metadata_to_deposit( @@ -345,169 +113,6 @@ assert deposit.status == DEPOSIT_STATUS_DEPOSITED -def test_add_metadata_to_unknown_deposit( - deposit_collection, authenticated_client, atom_dataset -): - """Replacing metadata to unknown deposit should return a 404 response - - """ - unknown_deposit_id = 1000 - try: - Deposit.objects.get(pk=unknown_deposit_id) - except Deposit.DoesNotExist: - assert True - - url = reverse(SE_IRI, args=[deposit_collection, unknown_deposit_id]) - response = authenticated_client.post( - url, - content_type="application/atom+xml;type=entry", - data=atom_dataset["entry-data1"], - ) - assert response.status_code == status.HTTP_404_NOT_FOUND - response_content = parse_xml(response.content) - assert ( - "Deposit 1000 does not exist" in response_content["sword:error"]["atom:summary"] - ) - - -def test_add_metadata_to_unknown_collection( - partial_deposit, authenticated_client, atom_dataset -): - """Replacing metadata to unknown deposit should return a 404 response - - """ - deposit = partial_deposit - unknown_collection_name = "unknown-collection" - try: - DepositCollection.objects.get(name=unknown_collection_name) - except DepositCollection.DoesNotExist: - assert True - - url = reverse(SE_IRI, args=[unknown_collection_name, deposit.id]) - response = authenticated_client.post( - url, - content_type="application/atom+xml;type=entry", - data=atom_dataset["entry-data1"], - ) - assert response.status_code == status.HTTP_404_NOT_FOUND - response_content = parse_xml(response.content) - assert "Unknown collection name" in response_content["sword:error"]["atom:summary"] - - -def test_replace_metadata_to_unknown_deposit( - authenticated_client, deposit_collection, atom_dataset -): - """Adding metadata to unknown deposit should return a 404 response - - """ - unknown_deposit_id = 998 - try: - Deposit.objects.get(pk=unknown_deposit_id) - except Deposit.DoesNotExist: - assert True - url = reverse(EDIT_IRI, args=[deposit_collection.name, unknown_deposit_id]) - response = authenticated_client.put( - url, - content_type="application/atom+xml;type=entry", - data=atom_dataset["entry-data1"], - ) - assert response.status_code == status.HTTP_404_NOT_FOUND - response_content = parse_xml(response.content) - assert ( - "Deposit %s does not exist" % unknown_deposit_id - == response_content["sword:error"]["atom:summary"] - ) - - -def test_add_archive_to_unknown_deposit( - authenticated_client, deposit_collection, atom_dataset -): - """Adding metadata to unknown deposit should return a 404 response - - """ - unknown_deposit_id = 997 - try: - Deposit.objects.get(pk=unknown_deposit_id) - except Deposit.DoesNotExist: - assert True - - url = reverse(EM_IRI, args=[deposit_collection.name, unknown_deposit_id]) - response = authenticated_client.post( - url, content_type="application/zip", data=atom_dataset["entry-data1"] - ) - assert response.status_code == status.HTTP_404_NOT_FOUND - response_content = parse_xml(response.content) - assert ( - "Deposit %s does not exist" % unknown_deposit_id - == response_content["sword:error"]["atom:summary"] - ) - - -def test_replace_archive_to_unknown_deposit( - authenticated_client, deposit_collection, atom_dataset -): - """Replacing archive to unknown deposit should return a 404 response - - """ - unknown_deposit_id = 996 - try: - Deposit.objects.get(pk=unknown_deposit_id) - except Deposit.DoesNotExist: - assert True - - url = reverse(EM_IRI, args=[deposit_collection.name, unknown_deposit_id]) - response = authenticated_client.put( - url, content_type="application/zip", data=atom_dataset["entry-data1"] - ) - assert response.status_code == status.HTTP_404_NOT_FOUND - response_content = parse_xml(response.content) - assert ( - "Deposit %s does not exist" % unknown_deposit_id - == response_content["sword:error"]["atom:summary"] - ) - - -def test_post_metadata_to_em_iri_failure( - authenticated_client, deposit_collection, partial_deposit, atom_dataset -): - """Update (POST) archive with wrong content type should return 400 - - """ - deposit = partial_deposit - update_uri = reverse(EM_IRI, args=[deposit_collection.name, deposit.id]) - response = authenticated_client.post( - update_uri, - content_type="application/x-gtar-compressed", - data=atom_dataset["entry-data1"], - ) - assert response.status_code == status.HTTP_400_BAD_REQUEST - assert b"Packaging format supported is restricted" in response.content - for supported_format in ACCEPT_ARCHIVE_CONTENT_TYPES: - assert supported_format.encode() in response.content - - -def test_put_metadata_to_em_iri_failure( - authenticated_client, deposit_collection, partial_deposit, atom_dataset -): - """Update (PUT) archive with wrong content type should return 400 - - """ - # given - deposit = partial_deposit - # when - update_uri = reverse(EM_IRI, args=[deposit_collection.name, deposit.id]) - response = authenticated_client.put( - update_uri, - content_type="application/atom+xml;type=entry", - data=atom_dataset["entry-data1"], - ) - # then - assert response.status_code == status.HTTP_400_BAD_REQUEST - assert b"Packaging format supported is restricted" in response.content - for supported_format in ACCEPT_ARCHIVE_CONTENT_TYPES: - assert supported_format.encode() in response.content - - def test_put_update_metadata_and_archive_deposit_partial_nominal( tmp_path, authenticated_client, @@ -576,310 +181,3 @@ requests_archive1 = DepositRequest.objects.filter(deposit=deposit, type="archive") assert len(requests_archive1) == 1 assert set(requests_archive0) != set(requests_archive1) - - -def test_put_update_metadata_done_deposit_nominal( - tmp_path, - authenticated_client, - complete_deposit, - deposit_collection, - atom_dataset, - sample_data, - swh_storage, -): - """Nominal scenario, client send an update of metadata on a deposit with status "done" - with an existing swhid. Such swhid has its metadata updated accordingly both in - the deposit backend and in the metadata storage. - - Response: 204 - - """ - deposit_swhid = parse_swhid(complete_deposit.swhid) - assert deposit_swhid.object_type == "directory" - directory_id = hash_to_bytes(deposit_swhid.object_id) - - # directory targeted by the complete_deposit does not exist in the storage - assert list(swh_storage.directory_missing([directory_id])) == [directory_id] - - # so let's create a directory reference in the storage (current deposit targets an - # unknown swhid) - existing_directory = sample_data.directory - swh_storage.directory_add([existing_directory]) - assert list(swh_storage.directory_missing([existing_directory.id])) == [] - - # and patch one complete deposit swhid so it targets said reference - complete_deposit.swhid = swhid("directory", existing_directory.id) - complete_deposit.save() - - actual_existing_requests_archive = DepositRequest.objects.filter( - deposit=complete_deposit, type="archive" - ) - nb_archives = len(actual_existing_requests_archive) - actual_existing_requests_metadata = DepositRequest.objects.filter( - deposit=complete_deposit, type="metadata" - ) - nb_metadata = len(actual_existing_requests_metadata) - - update_uri = reverse(EDIT_IRI, args=[deposit_collection.name, complete_deposit.id]) - response = authenticated_client.put( - update_uri, - content_type="application/atom+xml;type=entry", - data=atom_dataset["entry-data1"], - HTTP_X_CHECK_SWHID=complete_deposit.swhid, - ) - - assert response.status_code == status.HTTP_204_NO_CONTENT - - new_requests_meta = DepositRequest.objects.filter( - deposit=complete_deposit, type="metadata" - ) - assert len(new_requests_meta) == nb_metadata + 1 - request_meta1 = new_requests_meta[0] - raw_metadata1 = request_meta1.raw_metadata - assert raw_metadata1 == atom_dataset["entry-data1"] - - # check we did not touch the other parts - requests_archive1 = DepositRequest.objects.filter( - deposit=complete_deposit, type="archive" - ) - assert len(requests_archive1) == nb_archives - assert set(actual_existing_requests_archive) == set(requests_archive1) - - # Ensure metadata stored in the metadata storage is consistent - metadata_authority = MetadataAuthority( - type=MetadataAuthorityType.DEPOSIT_CLIENT, - url=complete_deposit.client.provider_url, - metadata={"name": complete_deposit.client.last_name}, - ) - - actual_authority = swh_storage.metadata_authority_get( - MetadataAuthorityType.DEPOSIT_CLIENT, url=complete_deposit.client.provider_url - ) - assert actual_authority == metadata_authority - - config = APIConfig() - metadata_fetcher = MetadataFetcher( - name=config.tool["name"], - version=config.tool["version"], - metadata=config.tool["configuration"], - ) - - actual_fetcher = swh_storage.metadata_fetcher_get( - config.tool["name"], config.tool["version"] - ) - assert actual_fetcher == metadata_fetcher - - directory_swhid = parse_swhid(complete_deposit.swhid) - page_results = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.DIRECTORY, directory_swhid, metadata_authority - ) - assert page_results == PagedResult( - results=[ - RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, - target=directory_swhid, - discovery_date=request_meta1.date, - authority=attr.evolve(metadata_authority, metadata=None), - fetcher=attr.evolve(metadata_fetcher, metadata=None), - format="sword-v2-atom-codemeta", - metadata=raw_metadata1.encode(), - origin=complete_deposit.origin_url, - ) - ], - next_page_token=None, - ) - - -def test_put_update_metadata_done_deposit_failure_mismatched_swhid( - tmp_path, - authenticated_client, - complete_deposit, - deposit_collection, - atom_dataset, - swh_storage, -): - """failure: client updates metadata on deposit with SWHID not matching the deposit's. - - Response: 400 - - """ - incorrect_swhid = "swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea" - assert complete_deposit.swhid != incorrect_swhid - - update_uri = reverse(EDIT_IRI, args=[deposit_collection.name, complete_deposit.id]) - response = authenticated_client.put( - update_uri, - content_type="application/atom+xml;type=entry", - data=atom_dataset["entry-data1"], - HTTP_X_CHECK_SWHID=incorrect_swhid, - ) - - assert response.status_code == status.HTTP_400_BAD_REQUEST - assert b"Mismatched provided SWHID" in response.content - - -def test_put_update_metadata_done_deposit_failure_malformed_xml( - tmp_path, - authenticated_client, - complete_deposit, - deposit_collection, - atom_dataset, - swh_storage, -): - """failure: client updates metadata on deposit done with a malformed xml - - Response: 400 - - """ - update_uri = reverse(EDIT_IRI, args=[deposit_collection.name, complete_deposit.id]) - response = authenticated_client.put( - update_uri, - content_type="application/atom+xml;type=entry", - data=atom_dataset["entry-data-ko"], - HTTP_X_CHECK_SWHID=complete_deposit.swhid, - ) - - assert response.status_code == status.HTTP_400_BAD_REQUEST - assert b"Malformed xml metadata" in response.content - - -def test_put_update_metadata_done_deposit_failure_empty_xml( - tmp_path, - authenticated_client, - complete_deposit, - deposit_collection, - atom_dataset, - swh_storage, -): - """failure: client updates metadata on deposit done with an empty xml. - - Response: 400 - - """ - update_uri = reverse(EDIT_IRI, args=[deposit_collection.name, complete_deposit.id]) - - atom_content = atom_dataset["entry-data-empty-body"] - response = authenticated_client.put( - update_uri, - content_type="application/atom+xml;type=entry", - data=atom_content, - HTTP_X_CHECK_SWHID=complete_deposit.swhid, - ) - - assert response.status_code == status.HTTP_400_BAD_REQUEST - assert b"Empty body request is not supported" in response.content - - -def test_put_update_metadata_done_deposit_failure_functional_checks( - tmp_path, - authenticated_client, - complete_deposit, - deposit_collection, - atom_dataset, - swh_storage, -): - """failure: client updates metadata on deposit done without required incomplete metadata - - Response: 400 - - """ - update_uri = reverse(EDIT_IRI, args=[deposit_collection.name, complete_deposit.id]) - - response = authenticated_client.put( - update_uri, - content_type="application/atom+xml;type=entry", - # no title, nor author, nor name fields - data=atom_dataset["entry-data-fail-metadata-functional-checks"], - HTTP_X_CHECK_SWHID=complete_deposit.swhid, - ) - - assert response.status_code == status.HTTP_400_BAD_REQUEST - assert b"Functional metadata checks failure" in response.content - # detail on the errors - msg = ( - b"- Mandatory fields are missing (" - b"atom:name or atom:title or codemeta:name, " - b"atom:author or codemeta:author)" - ) - assert msg in response.content - - -def test_put_atom_with_create_origin_and_external_identifier( - authenticated_client, deposit_collection, atom_dataset, deposit_user -): - """ was deprecated before - was introduced, clients should get an error when trying to use both - - """ - external_id = "foobar" - origin_url = deposit_user.provider_url + external_id - url = reverse(COL_IRI, args=[deposit_collection.name]) - - response = authenticated_client.post( - url, - content_type="application/atom+xml;type=entry", - data=atom_dataset["entry-data0"] % origin_url, - HTTP_IN_PROGRESS="true", - ) - - assert response.status_code == status.HTTP_201_CREATED - response_content = parse_xml(BytesIO(response.content)) - - for link in response_content["atom:link"]: - if link["@rel"] == "edit": - edit_iri = link["@href"] - break - else: - assert False, response_content - - # when - response = authenticated_client.put( - edit_iri, - content_type="application/atom+xml;type=entry", - data=atom_dataset["error-with-external-identifier"] % external_id, - # + headers - HTTP_IN_PROGRESS="false", - ) - - assert b"<external_identifier> is deprecated" in response.content - assert response.status_code == status.HTTP_400_BAD_REQUEST - - -def test_put_atom_with_create_origin_and_reference( - authenticated_client, deposit_collection, atom_dataset, deposit_user -): - """ and are mutually exclusive - - """ - external_id = "foobar" - origin_url = deposit_user.provider_url + external_id - url = reverse(COL_IRI, args=[deposit_collection.name]) - - response = authenticated_client.post( - url, - content_type="application/atom+xml;type=entry", - data=atom_dataset["entry-data0"] % origin_url, - HTTP_IN_PROGRESS="true", - ) - - assert response.status_code == status.HTTP_201_CREATED - response_content = parse_xml(BytesIO(response.content)) - - for link in response_content["atom:link"]: - if link["@rel"] == "edit": - edit_iri = link["@href"] - break - else: - assert False, response_content - - # when - response = authenticated_client.put( - edit_iri, - content_type="application/atom+xml;type=entry", - data=atom_dataset["entry-data-with-origin-reference"].format(url=origin_url), - # + headers - HTTP_IN_PROGRESS="false", - ) - - assert b"only one may be used on a given deposit" in response.content - assert response.status_code == status.HTTP_400_BAD_REQUEST diff --git a/swh/deposit/tests/api/test_deposit_update.py b/swh/deposit/tests/api/test_deposit_update_atom.py copy from swh/deposit/tests/api/test_deposit_update.py copy to swh/deposit/tests/api/test_deposit_update_atom.py --- a/swh/deposit/tests/api/test_deposit_update.py +++ b/swh/deposit/tests/api/test_deposit_update_atom.py @@ -6,15 +6,14 @@ from io import BytesIO import attr -from django.core.files.uploadedfile import InMemoryUploadedFile from django.urls import reverse +import pytest from rest_framework import status from swh.deposit.api.common import ACCEPT_ARCHIVE_CONTENT_TYPES from swh.deposit.config import ( COL_IRI, DEPOSIT_STATUS_DEPOSITED, - DEPOSIT_STATUS_PARTIAL, EDIT_IRI, EM_IRI, SE_IRI, @@ -22,7 +21,6 @@ ) from swh.deposit.models import Deposit, DepositCollection, DepositRequest from swh.deposit.parsers import parse_xml -from swh.deposit.tests.common import check_archive, create_arborescence_archive from swh.model.hashutil import hash_to_bytes from swh.model.identifiers import parse_swhid, swhid from swh.model.model import ( @@ -35,69 +33,86 @@ from swh.storage.interface import PagedResult -def test_replace_archive_to_deposit_is_possible( - tmp_path, - partial_deposit, - deposit_collection, - authenticated_client, - sample_archive, - atom_dataset, +def test_post_deposit_atom_entry_multiple_steps( + authenticated_client, deposit_collection, atom_dataset, deposit_user ): - """Replace all archive with another one should return a 204 response + """After initial deposit, updating a deposit should return a 201 """ - tmp_path = str(tmp_path) # given - deposit = partial_deposit - requests = DepositRequest.objects.filter(deposit=deposit, type="archive") - - assert len(list(requests)) == 1 - check_archive(sample_archive["name"], requests[0].archive.name) + origin_url = deposit_user.provider_url + "2225c695-cfb8-4ebb-aaaa-80da344efa6a" - # we have no metadata for that deposit - requests = list(DepositRequest.objects.filter(deposit=deposit, type="metadata")) - assert len(requests) == 0 + with pytest.raises(Deposit.DoesNotExist): + deposit = Deposit.objects.get(origin_url=origin_url) + # when response = authenticated_client.post( - reverse(SE_IRI, args=[deposit_collection.name, deposit.id]), + reverse(COL_IRI, args=[deposit_collection.name]), content_type="application/atom+xml;type=entry", data=atom_dataset["entry-data1"], - HTTP_SLUG=deposit.external_id, - HTTP_IN_PROGRESS=True, + HTTP_IN_PROGRESS="True", ) - requests = list(DepositRequest.objects.filter(deposit=deposit, type="metadata")) - assert len(requests) == 1 + # then + assert response.status_code == status.HTTP_201_CREATED - update_uri = reverse(EM_IRI, args=[deposit_collection.name, deposit.id]) - external_id = "some-external-id-1" - archive2 = create_arborescence_archive( - tmp_path, "archive2", "file2", b"some other content in file" - ) + response_content = parse_xml(BytesIO(response.content)) + deposit_id = int(response_content["swh:deposit_id"]) - response = authenticated_client.put( - update_uri, - content_type="application/zip", # as zip - data=archive2["data"], - # + headers - CONTENT_LENGTH=archive2["length"], - HTTP_SLUG=external_id, - HTTP_CONTENT_MD5=archive2["md5sum"], - HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip", - HTTP_IN_PROGRESS="false", - HTTP_CONTENT_DISPOSITION="attachment; filename=%s" % (archive2["name"],), + deposit = Deposit.objects.get(pk=deposit_id) + assert deposit.collection == deposit_collection + assert deposit.origin_url is None # not provided yet + assert deposit.status == "partial" + + # one associated request to a deposit + deposit_requests = DepositRequest.objects.filter(deposit=deposit) + assert len(deposit_requests) == 1 + + atom_entry_data = atom_dataset["entry-only-create-origin"] % (origin_url) + + for link in response_content["atom:link"]: + if link["@rel"] == "http://purl.org/net/sword/terms/add": + se_iri = link["@href"] + break + else: + assert False, f"missing SE-IRI from {response_content['link']}" + + # when updating the first deposit post + response = authenticated_client.post( + se_iri, + content_type="application/atom+xml;type=entry", + data=atom_entry_data, + HTTP_IN_PROGRESS="False", ) - assert response.status_code == status.HTTP_204_NO_CONTENT + # then + assert response.status_code == status.HTTP_201_CREATED, response.content.decode() - requests = DepositRequest.objects.filter(deposit=deposit, type="archive") + response_content = parse_xml(BytesIO(response.content)) + deposit_id = int(response_content["swh:deposit_id"]) - assert len(list(requests)) == 1 - check_archive(archive2["name"], requests[0].archive.name) + deposit = Deposit.objects.get(pk=deposit_id) + assert deposit.collection == deposit_collection + assert deposit.origin_url == origin_url + assert deposit.status == DEPOSIT_STATUS_DEPOSITED - # check we did not touch the other parts - requests = list(DepositRequest.objects.filter(deposit=deposit, type="metadata")) - assert len(requests) == 1 + assert len(Deposit.objects.all()) == 1 + + # now 2 associated requests to a same deposit + deposit_requests = DepositRequest.objects.filter(deposit=deposit).order_by("id") + assert len(deposit_requests) == 2 + + atom_entry_data1 = atom_dataset["entry-data1"] + expected_meta = [ + {"metadata": parse_xml(atom_entry_data1), "raw_metadata": atom_entry_data1}, + {"metadata": parse_xml(atom_entry_data), "raw_metadata": atom_entry_data}, + ] + + for i, deposit_request in enumerate(deposit_requests): + actual_metadata = deposit_request.metadata + assert actual_metadata == expected_meta[i]["metadata"] + assert deposit_request.raw_metadata == expected_meta[i]["raw_metadata"] + assert bool(deposit_request.archive) is False def test_replace_metadata_to_deposit_is_possible( @@ -149,65 +164,6 @@ assert set(requests_archive0) == set(requests_archive1) -def test_add_archive_to_deposit_is_possible( - tmp_path, - authenticated_client, - deposit_collection, - partial_deposit_with_metadata, - sample_archive, -): - """Add another archive to a deposit return a 201 response - - """ - tmp_path = str(tmp_path) - deposit = partial_deposit_with_metadata - - requests = DepositRequest.objects.filter(deposit=deposit, type="archive") - - assert len(requests) == 1 - check_archive(sample_archive["name"], requests[0].archive.name) - - requests_meta0 = DepositRequest.objects.filter(deposit=deposit, type="metadata") - assert len(requests_meta0) == 1 - - update_uri = reverse(EM_IRI, args=[deposit_collection.name, deposit.id]) - - external_id = "some-external-id-1" - archive2 = create_arborescence_archive( - tmp_path, "archive2", "file2", b"some other content in file" - ) - - response = authenticated_client.post( - update_uri, - content_type="application/zip", # as zip - data=archive2["data"], - # + headers - CONTENT_LENGTH=archive2["length"], - HTTP_SLUG=external_id, - HTTP_CONTENT_MD5=archive2["md5sum"], - HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip", - HTTP_IN_PROGRESS="false", - HTTP_CONTENT_DISPOSITION="attachment; filename=%s" % (archive2["name"],), - ) - - assert response.status_code == status.HTTP_201_CREATED - - requests = DepositRequest.objects.filter(deposit=deposit, type="archive").order_by( - "id" - ) - - assert len(requests) == 2 - # first archive still exists - check_archive(sample_archive["name"], requests[0].archive.name) - # a new one was added - check_archive(archive2["name"], requests[1].archive.name) - - # check we did not touch the other parts - requests_meta1 = DepositRequest.objects.filter(deposit=deposit, type="metadata") - assert len(requests_meta1) == 1 - assert set(requests_meta0) == set(requests_meta1) - - def test_add_metadata_to_deposit_is_possible( authenticated_client, deposit_collection, @@ -252,99 +208,6 @@ assert set(requests_archive0) == set(requests_archive1) -def test_add_both_archive_and_metadata_to_deposit( - authenticated_client, - deposit_collection, - partial_deposit_with_metadata, - atom_dataset, - sample_archive, - deposit_user, -): - """Scenario: Add both a new archive and new metadata to a partial deposit is ok - - Response: 201 - - """ - deposit = partial_deposit_with_metadata - origin_url = deposit_user.provider_url + deposit.external_id - requests = DepositRequest.objects.filter(deposit=deposit, type="metadata") - assert len(requests) == 1 - - requests_archive0 = DepositRequest.objects.filter(deposit=deposit, type="archive") - assert len(requests_archive0) == 1 - - update_uri = reverse(EDIT_IRI, args=[deposit_collection.name, deposit.id]) - archive = InMemoryUploadedFile( - BytesIO(sample_archive["data"]), - field_name=sample_archive["name"], - name=sample_archive["name"], - content_type="application/x-tar", - size=sample_archive["length"], - charset=None, - ) - - data_atom_entry = atom_dataset["entry-data1"] - atom_entry = InMemoryUploadedFile( - BytesIO(data_atom_entry.encode("utf-8")), - field_name="atom0", - name="atom0", - content_type='application/atom+xml; charset="utf-8"', - size=len(data_atom_entry), - charset="utf-8", - ) - - update_uri = reverse(SE_IRI, args=[deposit_collection.name, deposit.id]) - response = authenticated_client.post( - update_uri, - format="multipart", - data={"archive": archive, "atom_entry": atom_entry,}, - ) - - assert response.status_code == status.HTTP_201_CREATED - requests = DepositRequest.objects.filter(deposit=deposit, type="metadata").order_by( - "id" - ) - - assert len(requests) == 1 + 1, "New deposit request archive got added" - expected_raw_meta0 = atom_dataset["entry-data0"] % origin_url - # a new one was added - assert requests[0].raw_metadata == expected_raw_meta0 - assert requests[1].raw_metadata == data_atom_entry - - # check we did not touch the other parts - requests_archive1 = DepositRequest.objects.filter(deposit=deposit, type="archive") - assert len(requests_archive1) == 1 + 1, "New deposit request metadata got added" - - -def test_post_metadata_empty_post_finalize_deposit_ok( - authenticated_client, - deposit_collection, - partial_deposit_with_metadata, - atom_dataset, -): - """Empty atom post entry with header in-progress to false transitions deposit to - 'deposited' status - - Response: 200 - - """ - deposit = partial_deposit_with_metadata - assert deposit.status == DEPOSIT_STATUS_PARTIAL - - update_uri = reverse(SE_IRI, args=[deposit_collection.name, deposit.id]) - response = authenticated_client.post( - update_uri, - content_type="application/atom+xml;type=entry", - data="", - size=0, - HTTP_IN_PROGRESS=False, - ) - - assert response.status_code == status.HTTP_200_OK - deposit = Deposit.objects.get(pk=deposit.id) - assert deposit.status == DEPOSIT_STATUS_DEPOSITED - - def test_add_metadata_to_unknown_deposit( deposit_collection, authenticated_client, atom_dataset ): @@ -419,54 +282,6 @@ ) -def test_add_archive_to_unknown_deposit( - authenticated_client, deposit_collection, atom_dataset -): - """Adding metadata to unknown deposit should return a 404 response - - """ - unknown_deposit_id = 997 - try: - Deposit.objects.get(pk=unknown_deposit_id) - except Deposit.DoesNotExist: - assert True - - url = reverse(EM_IRI, args=[deposit_collection.name, unknown_deposit_id]) - response = authenticated_client.post( - url, content_type="application/zip", data=atom_dataset["entry-data1"] - ) - assert response.status_code == status.HTTP_404_NOT_FOUND - response_content = parse_xml(response.content) - assert ( - "Deposit %s does not exist" % unknown_deposit_id - == response_content["sword:error"]["atom:summary"] - ) - - -def test_replace_archive_to_unknown_deposit( - authenticated_client, deposit_collection, atom_dataset -): - """Replacing archive to unknown deposit should return a 404 response - - """ - unknown_deposit_id = 996 - try: - Deposit.objects.get(pk=unknown_deposit_id) - except Deposit.DoesNotExist: - assert True - - url = reverse(EM_IRI, args=[deposit_collection.name, unknown_deposit_id]) - response = authenticated_client.put( - url, content_type="application/zip", data=atom_dataset["entry-data1"] - ) - assert response.status_code == status.HTTP_404_NOT_FOUND - response_content = parse_xml(response.content) - assert ( - "Deposit %s does not exist" % unknown_deposit_id - == response_content["sword:error"]["atom:summary"] - ) - - def test_post_metadata_to_em_iri_failure( authenticated_client, deposit_collection, partial_deposit, atom_dataset ): @@ -508,76 +323,6 @@ assert supported_format.encode() in response.content -def test_put_update_metadata_and_archive_deposit_partial_nominal( - tmp_path, - authenticated_client, - partial_deposit_with_metadata, - deposit_collection, - atom_dataset, - sample_archive, - deposit_user, -): - """Scenario: Replace metadata and archive(s) with new ones should be ok - - Response: 204 - - """ - # given - deposit = partial_deposit_with_metadata - origin_url = deposit_user.provider_url + deposit.external_id - raw_metadata0 = atom_dataset["entry-data0"] % origin_url - - requests_meta = DepositRequest.objects.filter(deposit=deposit, type="metadata") - assert len(requests_meta) == 1 - request_meta0 = requests_meta[0] - assert request_meta0.raw_metadata == raw_metadata0 - - requests_archive0 = DepositRequest.objects.filter(deposit=deposit, type="archive") - assert len(requests_archive0) == 1 - - archive = InMemoryUploadedFile( - BytesIO(sample_archive["data"]), - field_name=sample_archive["name"], - name=sample_archive["name"], - content_type="application/x-tar", - size=sample_archive["length"], - charset=None, - ) - - data_atom_entry = atom_dataset["entry-data1"] - atom_entry = InMemoryUploadedFile( - BytesIO(data_atom_entry.encode("utf-8")), - field_name="atom0", - name="atom0", - content_type='application/atom+xml; charset="utf-8"', - size=len(data_atom_entry), - charset="utf-8", - ) - - update_uri = reverse(EDIT_IRI, args=[deposit_collection.name, deposit.id]) - response = authenticated_client.put( - update_uri, - format="multipart", - data={"archive": archive, "atom_entry": atom_entry,}, - ) - - assert response.status_code == status.HTTP_204_NO_CONTENT - - # check we updated the metadata part - requests_meta = DepositRequest.objects.filter(deposit=deposit, type="metadata") - assert len(requests_meta) == 1 - request_meta1 = requests_meta[0] - raw_metadata1 = request_meta1.raw_metadata - assert raw_metadata1 == data_atom_entry - assert raw_metadata0 != raw_metadata1 - assert request_meta0 != request_meta1 - - # and the archive part - requests_archive1 = DepositRequest.objects.filter(deposit=deposit, type="archive") - assert len(requests_archive1) == 1 - assert set(requests_archive0) != set(requests_archive1) - - def test_put_update_metadata_done_deposit_nominal( tmp_path, authenticated_client, diff --git a/swh/deposit/tests/api/test_deposit_update_binary.py b/swh/deposit/tests/api/test_deposit_update_binary.py new file mode 100644 --- /dev/null +++ b/swh/deposit/tests/api/test_deposit_update_binary.py @@ -0,0 +1,437 @@ +# Copyright (C) 2017-2020 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +"""Tests updates on EM-IRI""" + +from io import BytesIO + +from django.core.files.uploadedfile import InMemoryUploadedFile +from django.urls import reverse +from rest_framework import status + +from swh.deposit.config import COL_IRI, DEPOSIT_STATUS_DEPOSITED, EM_IRI, SE_IRI +from swh.deposit.models import Deposit, DepositRequest +from swh.deposit.parsers import parse_xml +from swh.deposit.tests.common import check_archive, create_arborescence_archive + + +def test_post_deposit_binary_and_post_to_add_another_archive( + authenticated_client, deposit_collection, sample_archive, tmp_path +): + """Updating a deposit should return a 201 with receipt + + """ + tmp_path = str(tmp_path) + url = reverse(COL_IRI, args=[deposit_collection.name]) + + external_id = "some-external-id-1" + + # when + response = authenticated_client.post( + url, + content_type="application/zip", # as zip + data=sample_archive["data"], + # + headers + CONTENT_LENGTH=sample_archive["length"], + HTTP_SLUG=external_id, + HTTP_CONTENT_MD5=sample_archive["md5sum"], + HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip", + HTTP_IN_PROGRESS="true", + HTTP_CONTENT_DISPOSITION="attachment; filename=%s" % (sample_archive["name"],), + ) + + # then + assert response.status_code == status.HTTP_201_CREATED + + response_content = parse_xml(BytesIO(response.content)) + deposit_id = response_content["swh:deposit_id"] + + deposit = Deposit.objects.get(pk=deposit_id) + assert deposit.status == "partial" + assert deposit.external_id == external_id + assert deposit.collection == deposit_collection + assert deposit.swhid is None + + deposit_request = DepositRequest.objects.get(deposit=deposit) + assert deposit_request.deposit == deposit + assert deposit_request.type == "archive" + check_archive(sample_archive["name"], deposit_request.archive.name) + + # 2nd archive to upload + archive2 = create_arborescence_archive( + tmp_path, "archive2", "file2", b"some other content in file" + ) + + # uri to update the content + update_uri = reverse(EM_IRI, args=[deposit_collection.name, deposit_id]) + + # adding another archive for the deposit and finalizing it + response = authenticated_client.post( + update_uri, + content_type="application/zip", # as zip + data=archive2["data"], + # + headers + CONTENT_LENGTH=archive2["length"], + HTTP_SLUG=external_id, + HTTP_CONTENT_MD5=archive2["md5sum"], + HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip", + HTTP_CONTENT_DISPOSITION="attachment; filename=%s" % (archive2["name"]), + ) + + assert response.status_code == status.HTTP_201_CREATED + response_content = parse_xml(BytesIO(response.content)) + + deposit = Deposit.objects.get(pk=deposit_id) + assert deposit.status == DEPOSIT_STATUS_DEPOSITED + assert deposit.external_id == external_id + assert deposit.collection == deposit_collection + assert deposit.swhid is None + + deposit_requests = list( + DepositRequest.objects.filter(deposit=deposit).order_by("id") + ) + + # 2 deposit requests for the same deposit + assert len(deposit_requests) == 2 + assert deposit_requests[0].deposit == deposit + assert deposit_requests[0].type == "archive" + check_archive(sample_archive["name"], deposit_requests[0].archive.name) + + assert deposit_requests[1].deposit == deposit + assert deposit_requests[1].type == "archive" + check_archive(archive2["name"], deposit_requests[1].archive.name) + + # only 1 deposit in db + deposits = Deposit.objects.all() + assert len(deposits) == 1 + + +def test_replace_archive_to_deposit_is_possible( + tmp_path, + partial_deposit, + deposit_collection, + authenticated_client, + sample_archive, + atom_dataset, +): + """Replace all archive with another one should return a 204 response + + """ + tmp_path = str(tmp_path) + # given + deposit = partial_deposit + requests = DepositRequest.objects.filter(deposit=deposit, type="archive") + + assert len(list(requests)) == 1 + check_archive(sample_archive["name"], requests[0].archive.name) + + # we have no metadata for that deposit + requests = list(DepositRequest.objects.filter(deposit=deposit, type="metadata")) + assert len(requests) == 0 + + response = authenticated_client.post( + reverse(SE_IRI, args=[deposit_collection.name, deposit.id]), + content_type="application/atom+xml;type=entry", + data=atom_dataset["entry-data1"], + HTTP_SLUG=deposit.external_id, + HTTP_IN_PROGRESS=True, + ) + + requests = list(DepositRequest.objects.filter(deposit=deposit, type="metadata")) + assert len(requests) == 1 + + update_uri = reverse(EM_IRI, args=[deposit_collection.name, deposit.id]) + external_id = "some-external-id-1" + archive2 = create_arborescence_archive( + tmp_path, "archive2", "file2", b"some other content in file" + ) + + response = authenticated_client.put( + update_uri, + content_type="application/zip", # as zip + data=archive2["data"], + # + headers + CONTENT_LENGTH=archive2["length"], + HTTP_SLUG=external_id, + HTTP_CONTENT_MD5=archive2["md5sum"], + HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip", + HTTP_IN_PROGRESS="false", + HTTP_CONTENT_DISPOSITION="attachment; filename=%s" % (archive2["name"],), + ) + + assert response.status_code == status.HTTP_204_NO_CONTENT + + requests = DepositRequest.objects.filter(deposit=deposit, type="archive") + + assert len(list(requests)) == 1 + check_archive(archive2["name"], requests[0].archive.name) + + # check we did not touch the other parts + requests = list(DepositRequest.objects.filter(deposit=deposit, type="metadata")) + assert len(requests) == 1 + + +def test_add_archive_to_unknown_deposit( + authenticated_client, deposit_collection, atom_dataset +): + """Adding metadata to unknown deposit should return a 404 response + + """ + unknown_deposit_id = 997 + try: + Deposit.objects.get(pk=unknown_deposit_id) + except Deposit.DoesNotExist: + assert True + + url = reverse(EM_IRI, args=[deposit_collection.name, unknown_deposit_id]) + response = authenticated_client.post( + url, content_type="application/zip", data=atom_dataset["entry-data1"] + ) + assert response.status_code == status.HTTP_404_NOT_FOUND + response_content = parse_xml(response.content) + assert ( + "Deposit %s does not exist" % unknown_deposit_id + == response_content["sword:error"]["atom:summary"] + ) + + +def test_replace_archive_to_unknown_deposit( + authenticated_client, deposit_collection, atom_dataset +): + """Replacing archive to unknown deposit should return a 404 response + + """ + unknown_deposit_id = 996 + try: + Deposit.objects.get(pk=unknown_deposit_id) + except Deposit.DoesNotExist: + assert True + + url = reverse(EM_IRI, args=[deposit_collection.name, unknown_deposit_id]) + response = authenticated_client.put( + url, content_type="application/zip", data=atom_dataset["entry-data1"] + ) + assert response.status_code == status.HTTP_404_NOT_FOUND + response_content = parse_xml(response.content) + assert ( + "Deposit %s does not exist" % unknown_deposit_id + == response_content["sword:error"]["atom:summary"] + ) + + +def test_add_archive_to_deposit_is_possible( + tmp_path, + authenticated_client, + deposit_collection, + partial_deposit_with_metadata, + sample_archive, +): + """Add another archive to a deposit return a 201 response + + """ + tmp_path = str(tmp_path) + deposit = partial_deposit_with_metadata + + requests = DepositRequest.objects.filter(deposit=deposit, type="archive") + + assert len(requests) == 1 + check_archive(sample_archive["name"], requests[0].archive.name) + + requests_meta0 = DepositRequest.objects.filter(deposit=deposit, type="metadata") + assert len(requests_meta0) == 1 + + update_uri = reverse(EM_IRI, args=[deposit_collection.name, deposit.id]) + + external_id = "some-external-id-1" + archive2 = create_arborescence_archive( + tmp_path, "archive2", "file2", b"some other content in file" + ) + + response = authenticated_client.post( + update_uri, + content_type="application/zip", # as zip + data=archive2["data"], + # + headers + CONTENT_LENGTH=archive2["length"], + HTTP_SLUG=external_id, + HTTP_CONTENT_MD5=archive2["md5sum"], + HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip", + HTTP_IN_PROGRESS="false", + HTTP_CONTENT_DISPOSITION="attachment; filename=%s" % (archive2["name"],), + ) + + assert response.status_code == status.HTTP_201_CREATED + + requests = DepositRequest.objects.filter(deposit=deposit, type="archive").order_by( + "id" + ) + + assert len(requests) == 2 + # first archive still exists + check_archive(sample_archive["name"], requests[0].archive.name) + # a new one was added + check_archive(archive2["name"], requests[1].archive.name) + + # check we did not touch the other parts + requests_meta1 = DepositRequest.objects.filter(deposit=deposit, type="metadata") + assert len(requests_meta1) == 1 + assert set(requests_meta0) == set(requests_meta1) + + +def test_post_deposit_then_update_refused( + authenticated_client, deposit_collection, sample_archive, atom_dataset, tmp_path +): + """Updating a deposit with status 'ready' should return a 400 + + """ + tmp_path = str(tmp_path) + url = reverse(COL_IRI, args=[deposit_collection.name]) + + external_id = "some-external-id-1" + + # when + response = authenticated_client.post( + url, + content_type="application/zip", # as zip + data=sample_archive["data"], + # + headers + CONTENT_LENGTH=sample_archive["length"], + HTTP_SLUG=external_id, + HTTP_CONTENT_MD5=sample_archive["md5sum"], + HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip", + HTTP_IN_PROGRESS="false", + HTTP_CONTENT_DISPOSITION="attachment; filename=filename0", + ) + + # then + assert response.status_code == status.HTTP_201_CREATED + + response_content = parse_xml(BytesIO(response.content)) + deposit_id = response_content["swh:deposit_id"] + + deposit = Deposit.objects.get(pk=deposit_id) + assert deposit.status == DEPOSIT_STATUS_DEPOSITED + assert deposit.external_id == external_id + assert deposit.collection == deposit_collection + assert deposit.swhid is None + + deposit_request = DepositRequest.objects.get(deposit=deposit) + assert deposit_request.deposit == deposit + check_archive("filename0", deposit_request.archive.name) + + # updating/adding is forbidden + + # uri to update the content + edit_iri = reverse("edit_iri", args=[deposit_collection.name, deposit_id]) + se_iri = reverse("se_iri", args=[deposit_collection.name, deposit_id]) + em_iri = reverse("em_iri", args=[deposit_collection.name, deposit_id]) + + # Testing all update/add endpoint should fail + # since the status is ready + + archive2 = create_arborescence_archive( + tmp_path, "archive2", "file2", b"some content in file 2" + ) + + # replacing file is no longer possible since the deposit's + # status is ready + r = authenticated_client.put( + em_iri, + content_type="application/zip", + data=archive2["data"], + CONTENT_LENGTH=archive2["length"], + HTTP_SLUG=external_id, + HTTP_CONTENT_MD5=archive2["md5sum"], + HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip", + HTTP_IN_PROGRESS="false", + HTTP_CONTENT_DISPOSITION="attachment; filename=filename0", + ) + + assert r.status_code == status.HTTP_400_BAD_REQUEST + assert b"You can only act on deposit with status 'partial'" in r.content + + # adding file is no longer possible since the deposit's status + # is ready + r = authenticated_client.post( + em_iri, + content_type="application/zip", + data=archive2["data"], + CONTENT_LENGTH=archive2["length"], + HTTP_SLUG=external_id, + HTTP_CONTENT_MD5=archive2["md5sum"], + HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip", + HTTP_IN_PROGRESS="false", + HTTP_CONTENT_DISPOSITION="attachment; filename=filename0", + ) + + assert r.status_code == status.HTTP_400_BAD_REQUEST + assert b"You can only act on deposit with status 'partial'" in r.content + + # replacing metadata is no longer possible since the deposit's + # status is ready + r = authenticated_client.put( + edit_iri, + content_type="application/atom+xml;type=entry", + data=atom_dataset["entry-data-deposit-binary"], + CONTENT_LENGTH=len(atom_dataset["entry-data-deposit-binary"]), + HTTP_SLUG=external_id, + ) + + assert r.status_code == status.HTTP_400_BAD_REQUEST + assert b"You can only act on deposit with status 'partial'" in r.content + + # adding new metadata is no longer possible since the + # deposit's status is ready + r = authenticated_client.post( + se_iri, + content_type="application/atom+xml;type=entry", + data=atom_dataset["entry-data-deposit-binary"], + CONTENT_LENGTH=len(atom_dataset["entry-data-deposit-binary"]), + HTTP_SLUG=external_id, + ) + + assert r.status_code == status.HTTP_400_BAD_REQUEST + assert b"You can only act on deposit with status 'partial'" in r.content + + archive_content = b"some content representing archive" + archive = InMemoryUploadedFile( + BytesIO(archive_content), + field_name="archive0", + name="archive0", + content_type="application/zip", + size=len(archive_content), + charset=None, + ) + + atom_entry = InMemoryUploadedFile( + BytesIO(atom_dataset["entry-data-deposit-binary"].encode("utf-8")), + field_name="atom0", + name="atom0", + content_type='application/atom+xml; charset="utf-8"', + size=len(atom_dataset["entry-data-deposit-binary"]), + charset="utf-8", + ) + + # replacing multipart metadata is no longer possible since the + # deposit's status is ready + r = authenticated_client.put( + edit_iri, + format="multipart", + data={"archive": archive, "atom_entry": atom_entry,}, + ) + + assert r.status_code == status.HTTP_400_BAD_REQUEST + assert b"You can only act on deposit with status 'partial'" in r.content + + # adding new metadata is no longer possible since the + # deposit's status is ready + r = authenticated_client.post( + se_iri, + format="multipart", + data={"archive": archive, "atom_entry": atom_entry,}, + ) + + assert r.status_code == status.HTTP_400_BAD_REQUEST + assert b"You can only act on deposit with status 'partial'" in r.content