Page MenuHomeSoftware Heritage

D4781.diff
No OneTemporary

D4781.diff

diff --git a/swh/deposit/tests/api/test_collection.py b/swh/deposit/tests/api/test_collection.py
--- a/swh/deposit/tests/api/test_collection.py
+++ b/swh/deposit/tests/api/test_collection.py
@@ -9,19 +9,9 @@
from django.urls import reverse
from rest_framework import status
-from swh.deposit.config import (
- COL_IRI,
- DEPOSIT_STATUS_LOAD_FAILURE,
- DEPOSIT_STATUS_LOAD_SUCCESS,
- DEPOSIT_STATUS_PARTIAL,
- DEPOSIT_STATUS_REJECTED,
- SE_IRI,
-)
-from swh.deposit.models import Deposit
+from swh.deposit.config import COL_IRI, DEPOSIT_STATUS_REJECTED
from swh.deposit.parsers import parse_xml
-from ..conftest import create_deposit
-
def test_deposit_post_will_fail_with_401(client):
"""Without authentication, endpoint refuses access with 401 response
@@ -83,317 +73,3 @@
response_content = parse_xml(BytesIO(response.content))
actual_state = response_content["deposit_status"]
assert actual_state == DEPOSIT_STATUS_REJECTED
-
-
-def test_act_on_deposit_rejected_is_not_permitted(
- authenticated_client, deposit_collection, rejected_deposit, atom_dataset
-):
- deposit = rejected_deposit
-
- response = authenticated_client.post(
- reverse(SE_IRI, args=[deposit.collection.name, deposit.id]),
- content_type="application/atom+xml;type=entry",
- data=atom_dataset["entry-data1"],
- HTTP_SLUG=deposit.external_id,
- )
-
- assert response.status_code == status.HTTP_400_BAD_REQUEST
- msg = "You can only act on deposit with status '%s'" % (
- DEPOSIT_STATUS_PARTIAL,
- )
- assert msg in response.content.decode("utf-8")
-
-
-def test_add_deposit_when_partial_makes_new_deposit(
- authenticated_client,
- deposit_collection,
- partial_deposit,
- atom_dataset,
- deposit_user,
-):
- """Posting deposit on collection when previous is partial makes new deposit
-
- """
- deposit = partial_deposit
- assert deposit.status == DEPOSIT_STATUS_PARTIAL
- origin_url = deposit_user.provider_url + deposit.external_id
-
- # adding a new deposit with the same external id
- response = authenticated_client.post(
- reverse(COL_IRI, args=[deposit_collection.name]),
- content_type="application/atom+xml;type=entry",
- data=atom_dataset["entry-data0"] % origin_url,
- HTTP_SLUG=deposit.external_id,
- )
-
- assert response.status_code == status.HTTP_201_CREATED, response.content.decode()
- response_content = parse_xml(BytesIO(response.content))
- deposit_id = response_content["swh:deposit_id"]
-
- assert deposit_id != deposit.id # new deposit
-
- new_deposit = Deposit.objects.get(pk=deposit_id)
- assert new_deposit != deposit
- assert new_deposit.parent is None
-
-
-def test_add_deposit_when_failed_makes_new_deposit_with_no_parent(
- authenticated_client, deposit_collection, failed_deposit, atom_dataset, deposit_user
-):
- """Posting deposit on collection when deposit done makes new deposit with
- parent
-
- """
- deposit = failed_deposit
- assert deposit.status == DEPOSIT_STATUS_LOAD_FAILURE
- origin_url = deposit_user.provider_url + deposit.external_id
-
- # adding a new deposit with the same external id as a completed deposit
- # creates the parenting chain
- response = authenticated_client.post(
- reverse(COL_IRI, args=[deposit_collection.name]),
- content_type="application/atom+xml;type=entry",
- data=atom_dataset["entry-data0"] % origin_url,
- HTTP_SLUG=deposit.external_id,
- )
-
- assert response.status_code == status.HTTP_201_CREATED
- response_content = parse_xml(BytesIO(response.content))
- deposit_id = response_content["swh:deposit_id"]
-
- assert deposit_id != deposit.id
-
- new_deposit = Deposit.objects.get(pk=deposit_id)
- assert new_deposit != deposit
- assert new_deposit.parent is None
-
-
-def test_add_deposit_when_done_makes_new_deposit_with_parent_old_one(
- authenticated_client,
- deposit_collection,
- completed_deposit,
- atom_dataset,
- deposit_user,
-):
- """Posting deposit on collection when deposit done makes new deposit with
- parent
-
- """
- # given multiple deposit already loaded
- deposit = completed_deposit
- assert deposit.status == DEPOSIT_STATUS_LOAD_SUCCESS
- origin_url = deposit_user.provider_url + deposit.external_id
-
- # adding a new deposit with the same external id as a completed deposit
- # creates the parenting chain
- response = authenticated_client.post(
- reverse(COL_IRI, args=[deposit_collection.name]),
- content_type="application/atom+xml;type=entry",
- data=atom_dataset["entry-data0"] % origin_url,
- HTTP_SLUG=deposit.external_id,
- )
-
- assert response.status_code == status.HTTP_201_CREATED
- response_content = parse_xml(BytesIO(response.content))
- deposit_id = response_content["swh:deposit_id"]
-
- assert deposit_id != deposit.id
-
- new_deposit = Deposit.objects.get(pk=deposit_id)
- assert deposit.collection == new_deposit.collection
- assert deposit.origin_url == origin_url
-
- assert new_deposit != deposit
- assert new_deposit.parent == deposit
-
-
-def test_add_deposit_with_add_to_origin(
- authenticated_client,
- deposit_collection,
- completed_deposit,
- atom_dataset,
- deposit_user,
-):
- """Posting deposit with <swh:add_to_origin> creates a new deposit with parent
-
- """
- # given multiple deposit already loaded
- deposit = completed_deposit
- assert deposit.status == DEPOSIT_STATUS_LOAD_SUCCESS
- origin_url = deposit_user.provider_url + deposit.external_id
-
- # adding a new deposit with the same external id as a completed deposit
- # creates the parenting chain
- response = authenticated_client.post(
- reverse(COL_IRI, args=[deposit_collection.name]),
- content_type="application/atom+xml;type=entry",
- data=atom_dataset["entry-data-with-add-to-origin"] % origin_url,
- )
-
- assert response.status_code == status.HTTP_201_CREATED
- response_content = parse_xml(BytesIO(response.content))
- deposit_id = response_content["swh:deposit_id"]
-
- assert deposit_id != deposit.id
-
- new_deposit = Deposit.objects.get(pk=deposit_id)
- assert deposit.collection == new_deposit.collection
- assert deposit.origin_url == origin_url
-
- assert new_deposit != deposit
- assert new_deposit.parent == deposit
-
-
-def test_add_deposit_external_id_conflict_no_parent(
- authenticated_client,
- another_authenticated_client,
- deposit_collection,
- deposit_another_collection,
- atom_dataset,
- sample_archive,
- deposit_user,
-):
- """Posting a deposit with an external_id conflicting with an external_id
- of a different client does not create a parent relationship
-
- """
- external_id = "foobar"
- origin_url = deposit_user.provider_url + external_id
-
- # create a deposit for that other user, with the same slug
- other_deposit = create_deposit(
- another_authenticated_client,
- deposit_another_collection.name,
- sample_archive,
- external_id,
- DEPOSIT_STATUS_LOAD_SUCCESS,
- )
-
- # adding a new deposit with the same external id as a completed deposit
- response = authenticated_client.post(
- reverse(COL_IRI, args=[deposit_collection.name]),
- content_type="application/atom+xml;type=entry",
- data=atom_dataset["entry-data0"] % origin_url,
- HTTP_SLUG=external_id,
- )
-
- assert response.status_code == status.HTTP_201_CREATED
- response_content = parse_xml(BytesIO(response.content))
- deposit_id = response_content["swh:deposit_id"]
-
- assert other_deposit.id != deposit_id
-
- new_deposit = Deposit.objects.get(pk=deposit_id)
-
- assert new_deposit.parent is None
-
-
-def test_add_deposit_external_id_conflict_with_parent(
- authenticated_client,
- another_authenticated_client,
- deposit_collection,
- deposit_another_collection,
- completed_deposit,
- atom_dataset,
- sample_archive,
- deposit_user,
-):
- """Posting a deposit with an external_id conflicting with an external_id
- of a different client creates a parent relationship with the deposit
- of the right client instead of the last matching deposit
-
- This test does not have an equivalent for origin url conflicts, as these
- can not happen (assuming clients do not have provider_url overlaps)
- """
- # given multiple deposit already loaded
- deposit = completed_deposit
- assert deposit.status == DEPOSIT_STATUS_LOAD_SUCCESS
- origin_url = deposit_user.provider_url + deposit.external_id
-
- # create a deposit for that other user, with the same slug
- other_deposit = create_deposit(
- another_authenticated_client,
- deposit_another_collection.name,
- sample_archive,
- deposit.external_id,
- DEPOSIT_STATUS_LOAD_SUCCESS,
- )
-
- # adding a new deposit with the same external id as a completed deposit
- response = authenticated_client.post(
- reverse(COL_IRI, args=[deposit_collection.name]),
- content_type="application/atom+xml;type=entry",
- data=atom_dataset["entry-data0"] % origin_url,
- HTTP_SLUG=deposit.external_id,
- )
-
- assert response.status_code == status.HTTP_201_CREATED
- response_content = parse_xml(BytesIO(response.content))
- deposit_id = response_content["swh:deposit_id"]
-
- assert deposit_id != deposit.id
- assert other_deposit.id != deposit.id
-
- new_deposit = Deposit.objects.get(pk=deposit_id)
- assert deposit.collection == new_deposit.collection
- assert deposit.external_id == new_deposit.external_id
-
- assert new_deposit != deposit
- assert new_deposit.parent == deposit
-
-
-def test_add_deposit_add_to_origin_conflict(
- authenticated_client,
- another_authenticated_client,
- deposit_collection,
- deposit_another_collection,
- atom_dataset,
- sample_archive,
- deposit_user,
- deposit_another_user,
-):
- """Posting a deposit with an <swh:add_to_origin> referencing an origin
- owned by a different client raises an error
-
- """
- external_id = "foobar"
- origin_url = deposit_another_user.provider_url + external_id
-
- # create a deposit for that other user, with the same slug
- create_deposit(
- another_authenticated_client,
- deposit_another_collection.name,
- sample_archive,
- external_id,
- DEPOSIT_STATUS_LOAD_SUCCESS,
- )
-
- # adding a new deposit with the same external id as a completed deposit
- response = authenticated_client.post(
- reverse(COL_IRI, args=[deposit_collection.name]),
- content_type="application/atom+xml;type=entry",
- data=atom_dataset["entry-data0"] % origin_url,
- )
-
- assert response.status_code == status.HTTP_403_FORBIDDEN
- assert b"must start with" in response.content
-
-
-def test_add_deposit_add_to_wrong_origin(
- authenticated_client, deposit_collection, atom_dataset, sample_archive,
-):
- """Posting a deposit with an <swh:add_to_origin> referencing an origin
- not starting with the provider_url raises an error
-
- """
- origin_url = "http://example.org/foo"
-
- # adding a new deposit with the same external id as a completed deposit
- response = authenticated_client.post(
- reverse(COL_IRI, args=[deposit_collection.name]),
- content_type="application/atom+xml;type=entry",
- data=atom_dataset["entry-data0"] % origin_url,
- )
-
- assert response.status_code == status.HTTP_403_FORBIDDEN
- assert b"must start with" in response.content
diff --git a/swh/deposit/tests/api/test_collection_add_to_origin.py b/swh/deposit/tests/api/test_collection_add_to_origin.py
new file mode 100644
--- /dev/null
+++ b/swh/deposit/tests/api/test_collection_add_to_origin.py
@@ -0,0 +1,157 @@
+# Copyright (C) 2017-2020 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from io import BytesIO
+
+from django.urls import reverse
+from rest_framework import status
+
+from swh.deposit.config import COL_IRI, DEPOSIT_STATUS_LOAD_SUCCESS
+from swh.deposit.models import Deposit
+from swh.deposit.parsers import parse_xml
+
+from ..conftest import create_deposit
+
+
+def test_add_deposit_with_add_to_origin(
+ authenticated_client,
+ deposit_collection,
+ completed_deposit,
+ atom_dataset,
+ deposit_user,
+):
+ """Posting deposit with <swh:add_to_origin> creates a new deposit with parent
+
+ """
+ # given multiple deposit already loaded
+ deposit = completed_deposit
+ assert deposit.status == DEPOSIT_STATUS_LOAD_SUCCESS
+ origin_url = deposit_user.provider_url + deposit.external_id
+
+ # adding a new deposit with the same external id as a completed deposit
+ # creates the parenting chain
+ response = authenticated_client.post(
+ reverse(COL_IRI, args=[deposit_collection.name]),
+ content_type="application/atom+xml;type=entry",
+ data=atom_dataset["entry-data-with-add-to-origin"] % origin_url,
+ )
+
+ assert response.status_code == status.HTTP_201_CREATED
+ response_content = parse_xml(BytesIO(response.content))
+ deposit_id = response_content["swh:deposit_id"]
+
+ assert deposit_id != deposit.id
+
+ new_deposit = Deposit.objects.get(pk=deposit_id)
+ assert deposit.collection == new_deposit.collection
+ assert deposit.origin_url == origin_url
+
+ assert new_deposit != deposit
+ assert new_deposit.parent == deposit
+
+
+def test_add_deposit_add_to_origin_conflict(
+ authenticated_client,
+ another_authenticated_client,
+ deposit_collection,
+ deposit_another_collection,
+ atom_dataset,
+ sample_archive,
+ deposit_user,
+ deposit_another_user,
+):
+ """Posting a deposit with an <swh:add_to_origin> referencing an origin
+ owned by a different client raises an error
+
+ """
+ external_id = "foobar"
+ origin_url = deposit_another_user.provider_url + external_id
+
+ # create a deposit for that other user, with the same slug
+ create_deposit(
+ another_authenticated_client,
+ deposit_another_collection.name,
+ sample_archive,
+ external_id,
+ DEPOSIT_STATUS_LOAD_SUCCESS,
+ )
+
+ # adding a new deposit with the same external id as a completed deposit
+ response = authenticated_client.post(
+ reverse(COL_IRI, args=[deposit_collection.name]),
+ content_type="application/atom+xml;type=entry",
+ data=atom_dataset["entry-data0"] % origin_url,
+ )
+
+ assert response.status_code == status.HTTP_403_FORBIDDEN
+ assert b"must start with" in response.content
+
+
+def test_add_deposit_add_to_wrong_origin(
+ authenticated_client, deposit_collection, atom_dataset, sample_archive,
+):
+ """Posting a deposit with an <swh:add_to_origin> referencing an origin
+ not starting with the provider_url raises an error
+
+ """
+ origin_url = "http://example.org/foo"
+
+ # adding a new deposit with the same external id as a completed deposit
+ response = authenticated_client.post(
+ reverse(COL_IRI, args=[deposit_collection.name]),
+ content_type="application/atom+xml;type=entry",
+ data=atom_dataset["entry-data0"] % origin_url,
+ )
+
+ assert response.status_code == status.HTTP_403_FORBIDDEN
+ assert b"must start with" in response.content
+
+
+def test_add_deposit_with_add_to_origin_and_external_identifier(
+ authenticated_client,
+ deposit_collection,
+ completed_deposit,
+ atom_dataset,
+ deposit_user,
+):
+ """Posting deposit with <swh:add_to_origin> creates a new deposit with parent
+
+ """
+ # given multiple deposit already loaded
+ origin_url = deposit_user.provider_url + completed_deposit.external_id
+
+ # adding a new deposit with the same external id as a completed deposit
+ # creates the parenting chain
+ response = authenticated_client.post(
+ reverse(COL_IRI, args=[deposit_collection.name]),
+ content_type="application/atom+xml;type=entry",
+ data=atom_dataset["entry-data-with-both-add-to-origin-and-external-id"]
+ % origin_url,
+ )
+
+ assert response.status_code == status.HTTP_400_BAD_REQUEST
+ assert b"&lt;external_identifier&gt; is deprecated." in response.content
+
+
+def test_post_deposit_atom_403_add_to_wrong_origin_url_prefix(
+ authenticated_client, deposit_collection, atom_dataset, deposit_user
+):
+ """Creating an origin for a prefix not owned by the client is forbidden
+
+ """
+ origin_url = "http://example.org/foo"
+
+ response = authenticated_client.post(
+ reverse(COL_IRI, args=[deposit_collection.name]),
+ content_type="application/atom+xml;type=entry",
+ data=atom_dataset["entry-data-with-add-to-origin"] % origin_url,
+ HTTP_IN_PROGRESS="true",
+ )
+ assert response.status_code == status.HTTP_403_FORBIDDEN
+ expected_msg = (
+ f"Cannot create origin {origin_url}, "
+ f"it must start with {deposit_user.provider_url}"
+ )
+ assert expected_msg in response.content.decode()
diff --git a/swh/deposit/tests/api/test_collection_post_atom.py b/swh/deposit/tests/api/test_collection_post_atom.py
--- a/swh/deposit/tests/api/test_collection_post_atom.py
+++ b/swh/deposit/tests/api/test_collection_post_atom.py
@@ -8,13 +8,29 @@
from io import BytesIO
import uuid
+import attr
from django.urls import reverse
import pytest
from rest_framework import status
-from swh.deposit.config import COL_IRI, DEPOSIT_STATUS_DEPOSITED
+from swh.deposit.config import (
+ COL_IRI,
+ DEPOSIT_STATUS_DEPOSITED,
+ DEPOSIT_STATUS_LOAD_SUCCESS,
+ APIConfig,
+)
from swh.deposit.models import Deposit, DepositCollection, DepositRequest
from swh.deposit.parsers import parse_xml
+from swh.deposit.utils import compute_metadata_context
+from swh.model.identifiers import SWHID, parse_swhid
+from swh.model.model import (
+ MetadataAuthority,
+ MetadataAuthorityType,
+ MetadataFetcher,
+ MetadataTargetType,
+ RawExtrinsicMetadata,
+)
+from swh.storage.interface import PagedResult
def test_post_deposit_atom_201_even_with_decimal(
@@ -114,32 +130,6 @@
) in response.content
-def test_add_deposit_with_add_to_origin_and_external_identifier(
- authenticated_client,
- deposit_collection,
- completed_deposit,
- atom_dataset,
- deposit_user,
-):
- """Posting deposit with <swh:add_to_origin> creates a new deposit with parent
-
- """
- # given multiple deposit already loaded
- origin_url = deposit_user.provider_url + completed_deposit.external_id
-
- # adding a new deposit with the same external id as a completed deposit
- # creates the parenting chain
- response = authenticated_client.post(
- reverse(COL_IRI, args=[deposit_collection.name]),
- content_type="application/atom+xml;type=entry",
- data=atom_dataset["entry-data-with-both-add-to-origin-and-external-id"]
- % origin_url,
- )
-
- assert response.status_code == status.HTTP_400_BAD_REQUEST
- assert b"&lt;external_identifier&gt; is deprecated." in response.content
-
-
def test_post_deposit_atom_403_create_wrong_origin_url_prefix(
authenticated_client, deposit_collection, atom_dataset, deposit_user
):
@@ -162,28 +152,6 @@
assert expected_msg in response.content.decode()
-def test_post_deposit_atom_403_add_to_wrong_origin_url_prefix(
- authenticated_client, deposit_collection, atom_dataset, deposit_user
-):
- """Creating an origin for a prefix not owned by the client is forbidden
-
- """
- origin_url = "http://example.org/foo"
-
- response = authenticated_client.post(
- reverse(COL_IRI, args=[deposit_collection.name]),
- content_type="application/atom+xml;type=entry",
- data=atom_dataset["entry-data-with-add-to-origin"] % origin_url,
- HTTP_IN_PROGRESS="true",
- )
- assert response.status_code == status.HTTP_403_FORBIDDEN
- expected_msg = (
- f"Cannot create origin {origin_url}, "
- f"it must start with {deposit_user.provider_url}"
- )
- assert expected_msg in response.content.decode()
-
-
def test_post_deposit_atom_use_slug_header(
authenticated_client, deposit_collection, deposit_user, atom_dataset, mocker
):
@@ -421,83 +389,249 @@
assert bool(deposit_request.archive) is False
-def test_post_deposit_atom_entry_multiple_steps(
- authenticated_client, deposit_collection, atom_dataset, deposit_user
+def test_deposit_metadata_invalid(
+ authenticated_client, deposit_collection, atom_dataset
):
- """After initial deposit, updating a deposit should return a 201
+ """Posting invalid swhid reference is bad request returned to client
"""
- # given
- origin_url = deposit_user.provider_url + "2225c695-cfb8-4ebb-aaaa-80da344efa6a"
+ invalid_swhid = "swh:1:dir :31b5c8cc985d190b5a7ef4878128ebfdc2358f49"
+ xml_data = atom_dataset["entry-data-with-swhid"].format(swhid=invalid_swhid)
- with pytest.raises(Deposit.DoesNotExist):
- deposit = Deposit.objects.get(origin_url=origin_url)
+ response = authenticated_client.post(
+ reverse(COL_IRI, args=[deposit_collection.name]),
+ content_type="application/atom+xml;type=entry",
+ data=xml_data,
+ )
+ assert response.status_code == status.HTTP_400_BAD_REQUEST
+ assert b"Invalid SWHID reference" in response.content
+
+
+def test_deposit_metadata_fails_functional_checks(
+ authenticated_client, deposit_collection, atom_dataset
+):
+ """Posting functionally invalid metadata swhid is bad request returned to client
+
+ """
+ swhid = "swh:1:dir:31b5c8cc985d190b5a7ef4878128ebfdc2358f49"
+ invalid_xml_data = atom_dataset[
+ "entry-data-with-swhid-fail-metadata-functional-checks"
+ ].format(swhid=swhid)
- # when
response = authenticated_client.post(
reverse(COL_IRI, args=[deposit_collection.name]),
content_type="application/atom+xml;type=entry",
- data=atom_dataset["entry-data1"],
- HTTP_IN_PROGRESS="True",
+ data=invalid_xml_data,
)
+ assert response.status_code == status.HTTP_400_BAD_REQUEST
+ assert b"Functional metadata checks failure" in response.content
+
+
+@pytest.mark.parametrize(
+ "swhid,target_type",
+ [
+ (
+ "swh:1:cnt:01b5c8cc985d190b5a7ef4878128ebfdc2358f49",
+ MetadataTargetType.CONTENT,
+ ),
+ (
+ "swh:1:dir:11b5c8cc985d190b5a7ef4878128ebfdc2358f49",
+ MetadataTargetType.DIRECTORY,
+ ),
+ (
+ "swh:1:rev:21b5c8cc985d190b5a7ef4878128ebfdc2358f49",
+ MetadataTargetType.REVISION,
+ ),
+ (
+ "swh:1:rel:31b5c8cc985d190b5a7ef4878128ebfdc2358f49",
+ MetadataTargetType.RELEASE,
+ ),
+ (
+ "swh:1:snp:41b5c8cc985d190b5a7ef4878128ebfdc2358f49",
+ MetadataTargetType.SNAPSHOT,
+ ),
+ (
+ "swh:1:cnt:51b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo",
+ MetadataTargetType.CONTENT,
+ ),
+ (
+ "swh:1:dir:c4993c872593e960dc84e4430dbbfbc34fd706d0;origin=https://inria.halpreprod.archives-ouvertes.fr/hal-01243573;visit=swh:1:snp:0175049fc45055a3824a1675ac06e3711619a55a;anchor=swh:1:rev:b5f505b005435fa5c4fa4c279792bd7b17167c04;path=/", # noqa
+ MetadataTargetType.DIRECTORY,
+ ),
+ (
+ "swh:1:rev:71b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo",
+ MetadataTargetType.REVISION,
+ ),
+ (
+ "swh:1:rel:81b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo",
+ MetadataTargetType.RELEASE,
+ ),
+ (
+ "swh:1:snp:91b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo",
+ MetadataTargetType.SNAPSHOT,
+ ),
+ ],
+)
+def test_deposit_metadata_swhid(
+ swhid,
+ target_type,
+ authenticated_client,
+ deposit_collection,
+ atom_dataset,
+ swh_storage,
+):
+ """Posting a swhid reference is stored on raw extrinsic metadata storage
- # then
- assert response.status_code == status.HTTP_201_CREATED
+ """
+ swhid_reference = parse_swhid(swhid)
+ swhid_core = attr.evolve(swhid_reference, metadata={})
+
+ xml_data = atom_dataset["entry-data-with-swhid"].format(swhid=swhid)
+ deposit_client = authenticated_client.deposit_client
+
+ response = authenticated_client.post(
+ reverse(COL_IRI, args=[deposit_collection.name]),
+ content_type="application/atom+xml;type=entry",
+ data=xml_data,
+ )
+ assert response.status_code == status.HTTP_201_CREATED
response_content = parse_xml(BytesIO(response.content))
- deposit_id = int(response_content["swh:deposit_id"])
+ # Ensure the deposit is finalized
+ deposit_id = int(response_content["swh:deposit_id"])
deposit = Deposit.objects.get(pk=deposit_id)
- assert deposit.collection == deposit_collection
- assert deposit.origin_url is None # not provided yet
- assert deposit.status == "partial"
+ assert isinstance(swhid_core, SWHID)
+ assert deposit.swhid == str(swhid_core)
+ assert deposit.swhid_context == str(swhid_reference)
+ assert deposit.complete_date == deposit.reception_date
+ assert deposit.complete_date is not None
+ assert deposit.status == DEPOSIT_STATUS_LOAD_SUCCESS
+
+ # Ensure metadata stored in the metadata storage is consistent
+ metadata_authority = MetadataAuthority(
+ type=MetadataAuthorityType.DEPOSIT_CLIENT,
+ url=deposit_client.provider_url,
+ metadata={"name": deposit_client.last_name},
+ )
- # one associated request to a deposit
- deposit_requests = DepositRequest.objects.filter(deposit=deposit)
- assert len(deposit_requests) == 1
+ actual_authority = swh_storage.metadata_authority_get(
+ MetadataAuthorityType.DEPOSIT_CLIENT, url=deposit_client.provider_url
+ )
+ assert actual_authority == metadata_authority
+
+ config = APIConfig()
+ metadata_fetcher = MetadataFetcher(
+ name=config.tool["name"],
+ version=config.tool["version"],
+ metadata=config.tool["configuration"],
+ )
+
+ actual_fetcher = swh_storage.metadata_fetcher_get(
+ config.tool["name"], config.tool["version"]
+ )
+ assert actual_fetcher == metadata_fetcher
+
+ page_results = swh_storage.raw_extrinsic_metadata_get(
+ target_type, swhid_core, metadata_authority
+ )
+ discovery_date = page_results.results[0].discovery_date
+
+ assert len(page_results.results) == 1
+ assert page_results.next_page_token is None
+
+ object_type, metadata_context = compute_metadata_context(swhid_reference)
+ assert page_results == PagedResult(
+ results=[
+ RawExtrinsicMetadata(
+ type=object_type,
+ target=swhid_core,
+ discovery_date=discovery_date,
+ authority=attr.evolve(metadata_authority, metadata=None),
+ fetcher=attr.evolve(metadata_fetcher, metadata=None),
+ format="sword-v2-atom-codemeta",
+ metadata=xml_data.encode(),
+ **metadata_context,
+ )
+ ],
+ next_page_token=None,
+ )
+ assert deposit.complete_date == discovery_date
- atom_entry_data = atom_dataset["entry-only-create-origin"] % (origin_url)
- for link in response_content["atom:link"]:
- if link["@rel"] == "http://purl.org/net/sword/terms/add":
- se_iri = link["@href"]
- break
- else:
- assert False, f"missing SE-IRI from {response_content['link']}"
+@pytest.mark.parametrize(
+ "url", ["https://gitlab.org/user/repo", "https://whatever.else/repo",]
+)
+def test_deposit_metadata_origin(
+ url, authenticated_client, deposit_collection, atom_dataset, swh_storage,
+):
+ """Posting a swhid reference is stored on raw extrinsic metadata storage
- # when updating the first deposit post
+ """
+ xml_data = atom_dataset["entry-data-with-origin-reference"].format(url=url)
+ deposit_client = authenticated_client.deposit_client
response = authenticated_client.post(
- se_iri,
+ reverse(COL_IRI, args=[deposit_collection.name]),
content_type="application/atom+xml;type=entry",
- data=atom_entry_data,
- HTTP_IN_PROGRESS="False",
+ data=xml_data,
)
- # then
- assert response.status_code == status.HTTP_201_CREATED, response.content.decode()
-
+ assert response.status_code == status.HTTP_201_CREATED
response_content = parse_xml(BytesIO(response.content))
+ # Ensure the deposit is finalized
deposit_id = int(response_content["swh:deposit_id"])
-
deposit = Deposit.objects.get(pk=deposit_id)
- assert deposit.collection == deposit_collection
- assert deposit.origin_url == origin_url
- assert deposit.status == DEPOSIT_STATUS_DEPOSITED
+ # we got not swhid as input so we cannot have those
+ assert deposit.swhid is None
+ assert deposit.swhid_context is None
+ assert deposit.complete_date == deposit.reception_date
+ assert deposit.complete_date is not None
+ assert deposit.status == DEPOSIT_STATUS_LOAD_SUCCESS
+
+ # Ensure metadata stored in the metadata storage is consistent
+ metadata_authority = MetadataAuthority(
+ type=MetadataAuthorityType.DEPOSIT_CLIENT,
+ url=deposit_client.provider_url,
+ metadata={"name": deposit_client.last_name},
+ )
- assert len(Deposit.objects.all()) == 1
+ actual_authority = swh_storage.metadata_authority_get(
+ MetadataAuthorityType.DEPOSIT_CLIENT, url=deposit_client.provider_url
+ )
+ assert actual_authority == metadata_authority
- # now 2 associated requests to a same deposit
- deposit_requests = DepositRequest.objects.filter(deposit=deposit).order_by("id")
- assert len(deposit_requests) == 2
+ config = APIConfig()
+ metadata_fetcher = MetadataFetcher(
+ name=config.tool["name"],
+ version=config.tool["version"],
+ metadata=config.tool["configuration"],
+ )
- atom_entry_data1 = atom_dataset["entry-data1"]
- expected_meta = [
- {"metadata": parse_xml(atom_entry_data1), "raw_metadata": atom_entry_data1},
- {"metadata": parse_xml(atom_entry_data), "raw_metadata": atom_entry_data},
- ]
+ actual_fetcher = swh_storage.metadata_fetcher_get(
+ config.tool["name"], config.tool["version"]
+ )
+ assert actual_fetcher == metadata_fetcher
- for i, deposit_request in enumerate(deposit_requests):
- actual_metadata = deposit_request.metadata
- assert actual_metadata == expected_meta[i]["metadata"]
- assert deposit_request.raw_metadata == expected_meta[i]["raw_metadata"]
- assert bool(deposit_request.archive) is False
+ page_results = swh_storage.raw_extrinsic_metadata_get(
+ MetadataTargetType.ORIGIN, url, metadata_authority
+ )
+ discovery_date = page_results.results[0].discovery_date
+
+ assert len(page_results.results) == 1
+ assert page_results.next_page_token is None
+
+ assert page_results == PagedResult(
+ results=[
+ RawExtrinsicMetadata(
+ type=MetadataTargetType.ORIGIN,
+ target=url,
+ discovery_date=discovery_date,
+ authority=attr.evolve(metadata_authority, metadata=None),
+ fetcher=attr.evolve(metadata_fetcher, metadata=None),
+ format="sword-v2-atom-codemeta",
+ metadata=xml_data.encode(),
+ )
+ ],
+ next_page_token=None,
+ )
+ assert deposit.complete_date == discovery_date
diff --git a/swh/deposit/tests/api/test_collection_post_binary.py b/swh/deposit/tests/api/test_collection_post_binary.py
--- a/swh/deposit/tests/api/test_collection_post_binary.py
+++ b/swh/deposit/tests/api/test_collection_post_binary.py
@@ -8,12 +8,11 @@
from io import BytesIO
import uuid
-from django.core.files.uploadedfile import InMemoryUploadedFile
from django.urls import reverse
import pytest
from rest_framework import status
-from swh.deposit.config import COL_IRI, DEPOSIT_STATUS_DEPOSITED, EM_IRI
+from swh.deposit.config import COL_IRI, DEPOSIT_STATUS_DEPOSITED
from swh.deposit.models import Deposit, DepositRequest
from swh.deposit.parsers import parse_xml
from swh.deposit.tests.common import check_archive, create_arborescence_archive
@@ -381,251 +380,3 @@
deposits = Deposit.objects.all().order_by("id")
assert len(deposits) == 2
assert list(deposits), [deposit == deposit2]
-
-
-def test_post_deposit_binary_and_post_to_add_another_archive(
- authenticated_client, deposit_collection, sample_archive, tmp_path
-):
- """Updating a deposit should return a 201 with receipt
-
- """
- tmp_path = str(tmp_path)
- url = reverse(COL_IRI, args=[deposit_collection.name])
-
- external_id = "some-external-id-1"
-
- # when
- response = authenticated_client.post(
- url,
- content_type="application/zip", # as zip
- data=sample_archive["data"],
- # + headers
- CONTENT_LENGTH=sample_archive["length"],
- HTTP_SLUG=external_id,
- HTTP_CONTENT_MD5=sample_archive["md5sum"],
- HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip",
- HTTP_IN_PROGRESS="true",
- HTTP_CONTENT_DISPOSITION="attachment; filename=%s" % (sample_archive["name"],),
- )
-
- # then
- assert response.status_code == status.HTTP_201_CREATED
-
- response_content = parse_xml(BytesIO(response.content))
- deposit_id = response_content["swh:deposit_id"]
-
- deposit = Deposit.objects.get(pk=deposit_id)
- assert deposit.status == "partial"
- assert deposit.external_id == external_id
- assert deposit.collection == deposit_collection
- assert deposit.swhid is None
-
- deposit_request = DepositRequest.objects.get(deposit=deposit)
- assert deposit_request.deposit == deposit
- assert deposit_request.type == "archive"
- check_archive(sample_archive["name"], deposit_request.archive.name)
-
- # 2nd archive to upload
- archive2 = create_arborescence_archive(
- tmp_path, "archive2", "file2", b"some other content in file"
- )
-
- # uri to update the content
- update_uri = reverse(EM_IRI, args=[deposit_collection.name, deposit_id])
-
- # adding another archive for the deposit and finalizing it
- response = authenticated_client.post(
- update_uri,
- content_type="application/zip", # as zip
- data=archive2["data"],
- # + headers
- CONTENT_LENGTH=archive2["length"],
- HTTP_SLUG=external_id,
- HTTP_CONTENT_MD5=archive2["md5sum"],
- HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip",
- HTTP_CONTENT_DISPOSITION="attachment; filename=%s" % (archive2["name"]),
- )
-
- assert response.status_code == status.HTTP_201_CREATED
- response_content = parse_xml(BytesIO(response.content))
-
- deposit = Deposit.objects.get(pk=deposit_id)
- assert deposit.status == DEPOSIT_STATUS_DEPOSITED
- assert deposit.external_id == external_id
- assert deposit.collection == deposit_collection
- assert deposit.swhid is None
-
- deposit_requests = list(
- DepositRequest.objects.filter(deposit=deposit).order_by("id")
- )
-
- # 2 deposit requests for the same deposit
- assert len(deposit_requests) == 2
- assert deposit_requests[0].deposit == deposit
- assert deposit_requests[0].type == "archive"
- check_archive(sample_archive["name"], deposit_requests[0].archive.name)
-
- assert deposit_requests[1].deposit == deposit
- assert deposit_requests[1].type == "archive"
- check_archive(archive2["name"], deposit_requests[1].archive.name)
-
- # only 1 deposit in db
- deposits = Deposit.objects.all()
- assert len(deposits) == 1
-
-
-def test_post_deposit_then_update_refused(
- authenticated_client, deposit_collection, sample_archive, atom_dataset, tmp_path
-):
- """Updating a deposit with status 'ready' should return a 400
-
- """
- tmp_path = str(tmp_path)
- url = reverse(COL_IRI, args=[deposit_collection.name])
-
- external_id = "some-external-id-1"
-
- # when
- response = authenticated_client.post(
- url,
- content_type="application/zip", # as zip
- data=sample_archive["data"],
- # + headers
- CONTENT_LENGTH=sample_archive["length"],
- HTTP_SLUG=external_id,
- HTTP_CONTENT_MD5=sample_archive["md5sum"],
- HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip",
- HTTP_IN_PROGRESS="false",
- HTTP_CONTENT_DISPOSITION="attachment; filename=filename0",
- )
-
- # then
- assert response.status_code == status.HTTP_201_CREATED
-
- response_content = parse_xml(BytesIO(response.content))
- deposit_id = response_content["swh:deposit_id"]
-
- deposit = Deposit.objects.get(pk=deposit_id)
- assert deposit.status == DEPOSIT_STATUS_DEPOSITED
- assert deposit.external_id == external_id
- assert deposit.collection == deposit_collection
- assert deposit.swhid is None
-
- deposit_request = DepositRequest.objects.get(deposit=deposit)
- assert deposit_request.deposit == deposit
- check_archive("filename0", deposit_request.archive.name)
-
- # updating/adding is forbidden
-
- # uri to update the content
- edit_iri = reverse("edit_iri", args=[deposit_collection.name, deposit_id])
- se_iri = reverse("se_iri", args=[deposit_collection.name, deposit_id])
- em_iri = reverse("em_iri", args=[deposit_collection.name, deposit_id])
-
- # Testing all update/add endpoint should fail
- # since the status is ready
-
- archive2 = create_arborescence_archive(
- tmp_path, "archive2", "file2", b"some content in file 2"
- )
-
- # replacing file is no longer possible since the deposit's
- # status is ready
- r = authenticated_client.put(
- em_iri,
- content_type="application/zip",
- data=archive2["data"],
- CONTENT_LENGTH=archive2["length"],
- HTTP_SLUG=external_id,
- HTTP_CONTENT_MD5=archive2["md5sum"],
- HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip",
- HTTP_IN_PROGRESS="false",
- HTTP_CONTENT_DISPOSITION="attachment; filename=filename0",
- )
-
- assert r.status_code == status.HTTP_400_BAD_REQUEST
- assert b"You can only act on deposit with status &#39;partial&#39;" in r.content
-
- # adding file is no longer possible since the deposit's status
- # is ready
- r = authenticated_client.post(
- em_iri,
- content_type="application/zip",
- data=archive2["data"],
- CONTENT_LENGTH=archive2["length"],
- HTTP_SLUG=external_id,
- HTTP_CONTENT_MD5=archive2["md5sum"],
- HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip",
- HTTP_IN_PROGRESS="false",
- HTTP_CONTENT_DISPOSITION="attachment; filename=filename0",
- )
-
- assert r.status_code == status.HTTP_400_BAD_REQUEST
- assert b"You can only act on deposit with status &#39;partial&#39;" in r.content
-
- # replacing metadata is no longer possible since the deposit's
- # status is ready
- r = authenticated_client.put(
- edit_iri,
- content_type="application/atom+xml;type=entry",
- data=atom_dataset["entry-data-deposit-binary"],
- CONTENT_LENGTH=len(atom_dataset["entry-data-deposit-binary"]),
- HTTP_SLUG=external_id,
- )
-
- assert r.status_code == status.HTTP_400_BAD_REQUEST
- assert b"You can only act on deposit with status &#39;partial&#39;" in r.content
-
- # adding new metadata is no longer possible since the
- # deposit's status is ready
- r = authenticated_client.post(
- se_iri,
- content_type="application/atom+xml;type=entry",
- data=atom_dataset["entry-data-deposit-binary"],
- CONTENT_LENGTH=len(atom_dataset["entry-data-deposit-binary"]),
- HTTP_SLUG=external_id,
- )
-
- assert r.status_code == status.HTTP_400_BAD_REQUEST
- assert b"You can only act on deposit with status &#39;partial&#39;" in r.content
-
- archive_content = b"some content representing archive"
- archive = InMemoryUploadedFile(
- BytesIO(archive_content),
- field_name="archive0",
- name="archive0",
- content_type="application/zip",
- size=len(archive_content),
- charset=None,
- )
-
- atom_entry = InMemoryUploadedFile(
- BytesIO(atom_dataset["entry-data-deposit-binary"].encode("utf-8")),
- field_name="atom0",
- name="atom0",
- content_type='application/atom+xml; charset="utf-8"',
- size=len(atom_dataset["entry-data-deposit-binary"]),
- charset="utf-8",
- )
-
- # replacing multipart metadata is no longer possible since the
- # deposit's status is ready
- r = authenticated_client.put(
- edit_iri,
- format="multipart",
- data={"archive": archive, "atom_entry": atom_entry,},
- )
-
- assert r.status_code == status.HTTP_400_BAD_REQUEST
- assert b"You can only act on deposit with status &#39;partial&#39;" in r.content
-
- # adding new metadata is no longer possible since the
- # deposit's status is ready
- r = authenticated_client.post(
- se_iri,
- format="multipart",
- data={"archive": archive, "atom_entry": atom_entry,},
- )
-
- assert r.status_code == status.HTTP_400_BAD_REQUEST
- assert b"You can only act on deposit with status &#39;partial&#39;" in r.content
diff --git a/swh/deposit/tests/api/test_collection_post_metadata.py b/swh/deposit/tests/api/test_collection_post_metadata.py
deleted file mode 100644
--- a/swh/deposit/tests/api/test_collection_post_metadata.py
+++ /dev/null
@@ -1,275 +0,0 @@
-# Copyright (C) 2020 The Software Heritage developers
-# See the AUTHORS file at the top-level directory of this distribution
-# License: GNU General Public License version 3, or any later version
-# See top-level LICENSE file for more information
-
-"""Tests metadata is loaded when sent via a POST Col-IRI"""
-
-from io import BytesIO
-
-import attr
-from django.urls import reverse
-import pytest
-from rest_framework import status
-
-from swh.deposit.config import COL_IRI, DEPOSIT_STATUS_LOAD_SUCCESS, APIConfig
-from swh.deposit.models import Deposit
-from swh.deposit.parsers import parse_xml
-from swh.deposit.utils import compute_metadata_context
-from swh.model.identifiers import SWHID, parse_swhid
-from swh.model.model import (
- MetadataAuthority,
- MetadataAuthorityType,
- MetadataFetcher,
- MetadataTargetType,
- RawExtrinsicMetadata,
-)
-from swh.storage.interface import PagedResult
-
-
-def test_deposit_metadata_invalid(
- authenticated_client, deposit_collection, atom_dataset
-):
- """Posting invalid swhid reference is bad request returned to client
-
- """
- invalid_swhid = "swh:1:dir :31b5c8cc985d190b5a7ef4878128ebfdc2358f49"
- xml_data = atom_dataset["entry-data-with-swhid"].format(swhid=invalid_swhid)
-
- response = authenticated_client.post(
- reverse(COL_IRI, args=[deposit_collection.name]),
- content_type="application/atom+xml;type=entry",
- data=xml_data,
- )
- assert response.status_code == status.HTTP_400_BAD_REQUEST
- assert b"Invalid SWHID reference" in response.content
-
-
-def test_deposit_metadata_fails_functional_checks(
- authenticated_client, deposit_collection, atom_dataset
-):
- """Posting functionally invalid metadata swhid is bad request returned to client
-
- """
- swhid = "swh:1:dir:31b5c8cc985d190b5a7ef4878128ebfdc2358f49"
- invalid_xml_data = atom_dataset[
- "entry-data-with-swhid-fail-metadata-functional-checks"
- ].format(swhid=swhid)
-
- response = authenticated_client.post(
- reverse(COL_IRI, args=[deposit_collection.name]),
- content_type="application/atom+xml;type=entry",
- data=invalid_xml_data,
- )
- assert response.status_code == status.HTTP_400_BAD_REQUEST
- assert b"Functional metadata checks failure" in response.content
-
-
-@pytest.mark.parametrize(
- "swhid,target_type",
- [
- (
- "swh:1:cnt:01b5c8cc985d190b5a7ef4878128ebfdc2358f49",
- MetadataTargetType.CONTENT,
- ),
- (
- "swh:1:dir:11b5c8cc985d190b5a7ef4878128ebfdc2358f49",
- MetadataTargetType.DIRECTORY,
- ),
- (
- "swh:1:rev:21b5c8cc985d190b5a7ef4878128ebfdc2358f49",
- MetadataTargetType.REVISION,
- ),
- (
- "swh:1:rel:31b5c8cc985d190b5a7ef4878128ebfdc2358f49",
- MetadataTargetType.RELEASE,
- ),
- (
- "swh:1:snp:41b5c8cc985d190b5a7ef4878128ebfdc2358f49",
- MetadataTargetType.SNAPSHOT,
- ),
- (
- "swh:1:cnt:51b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo",
- MetadataTargetType.CONTENT,
- ),
- (
- "swh:1:dir:c4993c872593e960dc84e4430dbbfbc34fd706d0;origin=https://inria.halpreprod.archives-ouvertes.fr/hal-01243573;visit=swh:1:snp:0175049fc45055a3824a1675ac06e3711619a55a;anchor=swh:1:rev:b5f505b005435fa5c4fa4c279792bd7b17167c04;path=/", # noqa
- MetadataTargetType.DIRECTORY,
- ),
- (
- "swh:1:rev:71b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo",
- MetadataTargetType.REVISION,
- ),
- (
- "swh:1:rel:81b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo",
- MetadataTargetType.RELEASE,
- ),
- (
- "swh:1:snp:91b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo",
- MetadataTargetType.SNAPSHOT,
- ),
- ],
-)
-def test_deposit_metadata_swhid(
- swhid,
- target_type,
- authenticated_client,
- deposit_collection,
- atom_dataset,
- swh_storage,
-):
- """Posting a swhid reference is stored on raw extrinsic metadata storage
-
- """
- swhid_reference = parse_swhid(swhid)
- swhid_core = attr.evolve(swhid_reference, metadata={})
-
- xml_data = atom_dataset["entry-data-with-swhid"].format(swhid=swhid)
- deposit_client = authenticated_client.deposit_client
-
- response = authenticated_client.post(
- reverse(COL_IRI, args=[deposit_collection.name]),
- content_type="application/atom+xml;type=entry",
- data=xml_data,
- )
-
- assert response.status_code == status.HTTP_201_CREATED
- response_content = parse_xml(BytesIO(response.content))
-
- # Ensure the deposit is finalized
- deposit_id = int(response_content["swh:deposit_id"])
- deposit = Deposit.objects.get(pk=deposit_id)
- assert isinstance(swhid_core, SWHID)
- assert deposit.swhid == str(swhid_core)
- assert deposit.swhid_context == str(swhid_reference)
- assert deposit.complete_date == deposit.reception_date
- assert deposit.complete_date is not None
- assert deposit.status == DEPOSIT_STATUS_LOAD_SUCCESS
-
- # Ensure metadata stored in the metadata storage is consistent
- metadata_authority = MetadataAuthority(
- type=MetadataAuthorityType.DEPOSIT_CLIENT,
- url=deposit_client.provider_url,
- metadata={"name": deposit_client.last_name},
- )
-
- actual_authority = swh_storage.metadata_authority_get(
- MetadataAuthorityType.DEPOSIT_CLIENT, url=deposit_client.provider_url
- )
- assert actual_authority == metadata_authority
-
- config = APIConfig()
- metadata_fetcher = MetadataFetcher(
- name=config.tool["name"],
- version=config.tool["version"],
- metadata=config.tool["configuration"],
- )
-
- actual_fetcher = swh_storage.metadata_fetcher_get(
- config.tool["name"], config.tool["version"]
- )
- assert actual_fetcher == metadata_fetcher
-
- page_results = swh_storage.raw_extrinsic_metadata_get(
- target_type, swhid_core, metadata_authority
- )
- discovery_date = page_results.results[0].discovery_date
-
- assert len(page_results.results) == 1
- assert page_results.next_page_token is None
-
- object_type, metadata_context = compute_metadata_context(swhid_reference)
- assert page_results == PagedResult(
- results=[
- RawExtrinsicMetadata(
- type=object_type,
- target=swhid_core,
- discovery_date=discovery_date,
- authority=attr.evolve(metadata_authority, metadata=None),
- fetcher=attr.evolve(metadata_fetcher, metadata=None),
- format="sword-v2-atom-codemeta",
- metadata=xml_data.encode(),
- **metadata_context,
- )
- ],
- next_page_token=None,
- )
- assert deposit.complete_date == discovery_date
-
-
-@pytest.mark.parametrize(
- "url", ["https://gitlab.org/user/repo", "https://whatever.else/repo",]
-)
-def test_deposit_metadata_origin(
- url, authenticated_client, deposit_collection, atom_dataset, swh_storage,
-):
- """Posting a swhid reference is stored on raw extrinsic metadata storage
-
- """
- xml_data = atom_dataset["entry-data-with-origin-reference"].format(url=url)
- deposit_client = authenticated_client.deposit_client
- response = authenticated_client.post(
- reverse(COL_IRI, args=[deposit_collection.name]),
- content_type="application/atom+xml;type=entry",
- data=xml_data,
- )
-
- assert response.status_code == status.HTTP_201_CREATED
- response_content = parse_xml(BytesIO(response.content))
- # Ensure the deposit is finalized
- deposit_id = int(response_content["swh:deposit_id"])
- deposit = Deposit.objects.get(pk=deposit_id)
- # we got not swhid as input so we cannot have those
- assert deposit.swhid is None
- assert deposit.swhid_context is None
- assert deposit.complete_date == deposit.reception_date
- assert deposit.complete_date is not None
- assert deposit.status == DEPOSIT_STATUS_LOAD_SUCCESS
-
- # Ensure metadata stored in the metadata storage is consistent
- metadata_authority = MetadataAuthority(
- type=MetadataAuthorityType.DEPOSIT_CLIENT,
- url=deposit_client.provider_url,
- metadata={"name": deposit_client.last_name},
- )
-
- actual_authority = swh_storage.metadata_authority_get(
- MetadataAuthorityType.DEPOSIT_CLIENT, url=deposit_client.provider_url
- )
- assert actual_authority == metadata_authority
-
- config = APIConfig()
- metadata_fetcher = MetadataFetcher(
- name=config.tool["name"],
- version=config.tool["version"],
- metadata=config.tool["configuration"],
- )
-
- actual_fetcher = swh_storage.metadata_fetcher_get(
- config.tool["name"], config.tool["version"]
- )
- assert actual_fetcher == metadata_fetcher
-
- page_results = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.ORIGIN, url, metadata_authority
- )
- discovery_date = page_results.results[0].discovery_date
-
- assert len(page_results.results) == 1
- assert page_results.next_page_token is None
-
- assert page_results == PagedResult(
- results=[
- RawExtrinsicMetadata(
- type=MetadataTargetType.ORIGIN,
- target=url,
- discovery_date=discovery_date,
- authority=attr.evolve(metadata_authority, metadata=None),
- fetcher=attr.evolve(metadata_fetcher, metadata=None),
- format="sword-v2-atom-codemeta",
- metadata=xml_data.encode(),
- )
- ],
- next_page_token=None,
- )
- assert deposit.complete_date == discovery_date
diff --git a/swh/deposit/tests/api/test_collection.py b/swh/deposit/tests/api/test_collection_reuse_slug.py
copy from swh/deposit/tests/api/test_collection.py
copy to swh/deposit/tests/api/test_collection_reuse_slug.py
--- a/swh/deposit/tests/api/test_collection.py
+++ b/swh/deposit/tests/api/test_collection_reuse_slug.py
@@ -1,9 +1,8 @@
-# Copyright (C) 2017-2019 The Software Heritage developers
+# Copyright (C) 2017-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-import hashlib
from io import BytesIO
from django.urls import reverse
@@ -14,7 +13,6 @@
DEPOSIT_STATUS_LOAD_FAILURE,
DEPOSIT_STATUS_LOAD_SUCCESS,
DEPOSIT_STATUS_PARTIAL,
- DEPOSIT_STATUS_REJECTED,
SE_IRI,
)
from swh.deposit.models import Deposit
@@ -23,68 +21,6 @@
from ..conftest import create_deposit
-def test_deposit_post_will_fail_with_401(client):
- """Without authentication, endpoint refuses access with 401 response
-
- """
- url = reverse(COL_IRI, args=["hal"])
- response = client.post(url)
- assert response.status_code == status.HTTP_401_UNAUTHORIZED
-
-
-def test_access_to_another_user_collection_is_forbidden(
- authenticated_client, deposit_another_collection, deposit_user
-):
- """Access to another user collection should return a 403
-
- """
- coll2 = deposit_another_collection
- url = reverse(COL_IRI, args=[coll2.name])
- response = authenticated_client.post(url)
- assert response.status_code == status.HTTP_403_FORBIDDEN
- msg = "Client %s cannot access collection %s" % (deposit_user.username, coll2.name,)
- assert msg in response.content.decode("utf-8")
-
-
-def test_delete_on_col_iri_not_supported(authenticated_client, deposit_collection):
- """Delete on col iri should return a 405 response
-
- """
- url = reverse(COL_IRI, args=[deposit_collection.name])
- response = authenticated_client.delete(url)
- assert response.status_code == status.HTTP_405_METHOD_NOT_ALLOWED
- assert "DELETE method is not supported on this endpoint" in response.content.decode(
- "utf-8"
- )
-
-
-def create_deposit_with_rejection_status(authenticated_client, deposit_collection):
- url = reverse(COL_IRI, args=[deposit_collection.name])
-
- data = b"some data which is clearly not a zip file"
- md5sum = hashlib.md5(data).hexdigest()
- external_id = "some-external-id-1"
-
- # when
- response = authenticated_client.post(
- url,
- content_type="application/zip", # as zip
- data=data,
- # + headers
- CONTENT_LENGTH=len(data),
- # other headers needs HTTP_ prefix to be taken into account
- HTTP_SLUG=external_id,
- HTTP_CONTENT_MD5=md5sum,
- HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip",
- HTTP_CONTENT_DISPOSITION="attachment; filename=filename0",
- )
-
- assert response.status_code == status.HTTP_201_CREATED
- response_content = parse_xml(BytesIO(response.content))
- actual_state = response_content["deposit_status"]
- assert actual_state == DEPOSIT_STATUS_REJECTED
-
-
def test_act_on_deposit_rejected_is_not_permitted(
authenticated_client, deposit_collection, rejected_deposit, atom_dataset
):
@@ -207,43 +143,6 @@
assert new_deposit.parent == deposit
-def test_add_deposit_with_add_to_origin(
- authenticated_client,
- deposit_collection,
- completed_deposit,
- atom_dataset,
- deposit_user,
-):
- """Posting deposit with <swh:add_to_origin> creates a new deposit with parent
-
- """
- # given multiple deposit already loaded
- deposit = completed_deposit
- assert deposit.status == DEPOSIT_STATUS_LOAD_SUCCESS
- origin_url = deposit_user.provider_url + deposit.external_id
-
- # adding a new deposit with the same external id as a completed deposit
- # creates the parenting chain
- response = authenticated_client.post(
- reverse(COL_IRI, args=[deposit_collection.name]),
- content_type="application/atom+xml;type=entry",
- data=atom_dataset["entry-data-with-add-to-origin"] % origin_url,
- )
-
- assert response.status_code == status.HTTP_201_CREATED
- response_content = parse_xml(BytesIO(response.content))
- deposit_id = response_content["swh:deposit_id"]
-
- assert deposit_id != deposit.id
-
- new_deposit = Deposit.objects.get(pk=deposit_id)
- assert deposit.collection == new_deposit.collection
- assert deposit.origin_url == origin_url
-
- assert new_deposit != deposit
- assert new_deposit.parent == deposit
-
-
def test_add_deposit_external_id_conflict_no_parent(
authenticated_client,
another_authenticated_client,
@@ -340,60 +239,3 @@
assert new_deposit != deposit
assert new_deposit.parent == deposit
-
-
-def test_add_deposit_add_to_origin_conflict(
- authenticated_client,
- another_authenticated_client,
- deposit_collection,
- deposit_another_collection,
- atom_dataset,
- sample_archive,
- deposit_user,
- deposit_another_user,
-):
- """Posting a deposit with an <swh:add_to_origin> referencing an origin
- owned by a different client raises an error
-
- """
- external_id = "foobar"
- origin_url = deposit_another_user.provider_url + external_id
-
- # create a deposit for that other user, with the same slug
- create_deposit(
- another_authenticated_client,
- deposit_another_collection.name,
- sample_archive,
- external_id,
- DEPOSIT_STATUS_LOAD_SUCCESS,
- )
-
- # adding a new deposit with the same external id as a completed deposit
- response = authenticated_client.post(
- reverse(COL_IRI, args=[deposit_collection.name]),
- content_type="application/atom+xml;type=entry",
- data=atom_dataset["entry-data0"] % origin_url,
- )
-
- assert response.status_code == status.HTTP_403_FORBIDDEN
- assert b"must start with" in response.content
-
-
-def test_add_deposit_add_to_wrong_origin(
- authenticated_client, deposit_collection, atom_dataset, sample_archive,
-):
- """Posting a deposit with an <swh:add_to_origin> referencing an origin
- not starting with the provider_url raises an error
-
- """
- origin_url = "http://example.org/foo"
-
- # adding a new deposit with the same external id as a completed deposit
- response = authenticated_client.post(
- reverse(COL_IRI, args=[deposit_collection.name]),
- content_type="application/atom+xml;type=entry",
- data=atom_dataset["entry-data0"] % origin_url,
- )
-
- assert response.status_code == status.HTTP_403_FORBIDDEN
- assert b"must start with" in response.content
diff --git a/swh/deposit/tests/api/test_deposit_update.py b/swh/deposit/tests/api/test_deposit_update.py
--- a/swh/deposit/tests/api/test_deposit_update.py
+++ b/swh/deposit/tests/api/test_deposit_update.py
@@ -3,253 +3,21 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+"""Tests updates on SE-IRI."""
+
from io import BytesIO
-import attr
from django.core.files.uploadedfile import InMemoryUploadedFile
from django.urls import reverse
from rest_framework import status
-from swh.deposit.api.common import ACCEPT_ARCHIVE_CONTENT_TYPES
from swh.deposit.config import (
- COL_IRI,
DEPOSIT_STATUS_DEPOSITED,
DEPOSIT_STATUS_PARTIAL,
EDIT_IRI,
- EM_IRI,
SE_IRI,
- APIConfig,
-)
-from swh.deposit.models import Deposit, DepositCollection, DepositRequest
-from swh.deposit.parsers import parse_xml
-from swh.deposit.tests.common import check_archive, create_arborescence_archive
-from swh.model.hashutil import hash_to_bytes
-from swh.model.identifiers import parse_swhid, swhid
-from swh.model.model import (
- MetadataAuthority,
- MetadataAuthorityType,
- MetadataFetcher,
- MetadataTargetType,
- RawExtrinsicMetadata,
)
-from swh.storage.interface import PagedResult
-
-
-def test_replace_archive_to_deposit_is_possible(
- tmp_path,
- partial_deposit,
- deposit_collection,
- authenticated_client,
- sample_archive,
- atom_dataset,
-):
- """Replace all archive with another one should return a 204 response
-
- """
- tmp_path = str(tmp_path)
- # given
- deposit = partial_deposit
- requests = DepositRequest.objects.filter(deposit=deposit, type="archive")
-
- assert len(list(requests)) == 1
- check_archive(sample_archive["name"], requests[0].archive.name)
-
- # we have no metadata for that deposit
- requests = list(DepositRequest.objects.filter(deposit=deposit, type="metadata"))
- assert len(requests) == 0
-
- response = authenticated_client.post(
- reverse(SE_IRI, args=[deposit_collection.name, deposit.id]),
- content_type="application/atom+xml;type=entry",
- data=atom_dataset["entry-data1"],
- HTTP_SLUG=deposit.external_id,
- HTTP_IN_PROGRESS=True,
- )
-
- requests = list(DepositRequest.objects.filter(deposit=deposit, type="metadata"))
- assert len(requests) == 1
-
- update_uri = reverse(EM_IRI, args=[deposit_collection.name, deposit.id])
- external_id = "some-external-id-1"
- archive2 = create_arborescence_archive(
- tmp_path, "archive2", "file2", b"some other content in file"
- )
-
- response = authenticated_client.put(
- update_uri,
- content_type="application/zip", # as zip
- data=archive2["data"],
- # + headers
- CONTENT_LENGTH=archive2["length"],
- HTTP_SLUG=external_id,
- HTTP_CONTENT_MD5=archive2["md5sum"],
- HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip",
- HTTP_IN_PROGRESS="false",
- HTTP_CONTENT_DISPOSITION="attachment; filename=%s" % (archive2["name"],),
- )
-
- assert response.status_code == status.HTTP_204_NO_CONTENT
-
- requests = DepositRequest.objects.filter(deposit=deposit, type="archive")
-
- assert len(list(requests)) == 1
- check_archive(archive2["name"], requests[0].archive.name)
-
- # check we did not touch the other parts
- requests = list(DepositRequest.objects.filter(deposit=deposit, type="metadata"))
- assert len(requests) == 1
-
-
-def test_replace_metadata_to_deposit_is_possible(
- tmp_path,
- authenticated_client,
- partial_deposit_with_metadata,
- deposit_collection,
- atom_dataset,
- deposit_user,
-):
- """Replace all metadata with another one should return a 204 response
-
- """
- # given
- deposit = partial_deposit_with_metadata
- origin_url = deposit_user.provider_url + deposit.external_id
- raw_metadata0 = atom_dataset["entry-data0"] % origin_url
-
- requests_meta = DepositRequest.objects.filter(deposit=deposit, type="metadata")
- assert len(requests_meta) == 1
- request_meta0 = requests_meta[0]
- assert request_meta0.raw_metadata == raw_metadata0
-
- requests_archive0 = DepositRequest.objects.filter(deposit=deposit, type="archive")
- assert len(requests_archive0) == 1
-
- update_uri = reverse(EDIT_IRI, args=[deposit_collection.name, deposit.id])
-
- response = authenticated_client.put(
- update_uri,
- content_type="application/atom+xml;type=entry",
- data=atom_dataset["entry-data1"],
- )
-
- assert response.status_code == status.HTTP_204_NO_CONTENT
-
- requests_meta = DepositRequest.objects.filter(deposit=deposit, type="metadata")
-
- assert len(requests_meta) == 1
- request_meta1 = requests_meta[0]
- raw_metadata1 = request_meta1.raw_metadata
- assert raw_metadata1 == atom_dataset["entry-data1"]
- assert raw_metadata0 != raw_metadata1
- assert request_meta0 != request_meta1
-
- # check we did not touch the other parts
- requests_archive1 = DepositRequest.objects.filter(deposit=deposit, type="archive")
- assert len(requests_archive1) == 1
- assert set(requests_archive0) == set(requests_archive1)
-
-
-def test_add_archive_to_deposit_is_possible(
- tmp_path,
- authenticated_client,
- deposit_collection,
- partial_deposit_with_metadata,
- sample_archive,
-):
- """Add another archive to a deposit return a 201 response
-
- """
- tmp_path = str(tmp_path)
- deposit = partial_deposit_with_metadata
-
- requests = DepositRequest.objects.filter(deposit=deposit, type="archive")
-
- assert len(requests) == 1
- check_archive(sample_archive["name"], requests[0].archive.name)
-
- requests_meta0 = DepositRequest.objects.filter(deposit=deposit, type="metadata")
- assert len(requests_meta0) == 1
-
- update_uri = reverse(EM_IRI, args=[deposit_collection.name, deposit.id])
-
- external_id = "some-external-id-1"
- archive2 = create_arborescence_archive(
- tmp_path, "archive2", "file2", b"some other content in file"
- )
-
- response = authenticated_client.post(
- update_uri,
- content_type="application/zip", # as zip
- data=archive2["data"],
- # + headers
- CONTENT_LENGTH=archive2["length"],
- HTTP_SLUG=external_id,
- HTTP_CONTENT_MD5=archive2["md5sum"],
- HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip",
- HTTP_IN_PROGRESS="false",
- HTTP_CONTENT_DISPOSITION="attachment; filename=%s" % (archive2["name"],),
- )
-
- assert response.status_code == status.HTTP_201_CREATED
-
- requests = DepositRequest.objects.filter(deposit=deposit, type="archive").order_by(
- "id"
- )
-
- assert len(requests) == 2
- # first archive still exists
- check_archive(sample_archive["name"], requests[0].archive.name)
- # a new one was added
- check_archive(archive2["name"], requests[1].archive.name)
-
- # check we did not touch the other parts
- requests_meta1 = DepositRequest.objects.filter(deposit=deposit, type="metadata")
- assert len(requests_meta1) == 1
- assert set(requests_meta0) == set(requests_meta1)
-
-
-def test_add_metadata_to_deposit_is_possible(
- authenticated_client,
- deposit_collection,
- partial_deposit_with_metadata,
- atom_dataset,
- deposit_user,
-):
- """Add metadata with another one should return a 204 response
-
- """
- deposit = partial_deposit_with_metadata
- origin_url = deposit_user.provider_url + deposit.external_id
- requests = DepositRequest.objects.filter(deposit=deposit, type="metadata")
-
- assert len(requests) == 1
-
- requests_archive0 = DepositRequest.objects.filter(deposit=deposit, type="archive")
- assert len(requests_archive0) == 1
-
- update_uri = reverse(SE_IRI, args=[deposit_collection.name, deposit.id])
-
- atom_entry = atom_dataset["entry-data1"]
- response = authenticated_client.post(
- update_uri, content_type="application/atom+xml;type=entry", data=atom_entry
- )
-
- assert response.status_code == status.HTTP_201_CREATED
-
- requests = DepositRequest.objects.filter(deposit=deposit, type="metadata").order_by(
- "id"
- )
-
- assert len(requests) == 2
- expected_raw_meta0 = atom_dataset["entry-data0"] % origin_url
- # a new one was added
- assert requests[0].raw_metadata == expected_raw_meta0
- assert requests[1].raw_metadata == atom_entry
-
- # check we did not touch the other parts
- requests_archive1 = DepositRequest.objects.filter(deposit=deposit, type="archive")
- assert len(requests_archive1) == 1
- assert set(requests_archive0) == set(requests_archive1)
+from swh.deposit.models import Deposit, DepositRequest
def test_add_both_archive_and_metadata_to_deposit(
@@ -345,169 +113,6 @@
assert deposit.status == DEPOSIT_STATUS_DEPOSITED
-def test_add_metadata_to_unknown_deposit(
- deposit_collection, authenticated_client, atom_dataset
-):
- """Replacing metadata to unknown deposit should return a 404 response
-
- """
- unknown_deposit_id = 1000
- try:
- Deposit.objects.get(pk=unknown_deposit_id)
- except Deposit.DoesNotExist:
- assert True
-
- url = reverse(SE_IRI, args=[deposit_collection, unknown_deposit_id])
- response = authenticated_client.post(
- url,
- content_type="application/atom+xml;type=entry",
- data=atom_dataset["entry-data1"],
- )
- assert response.status_code == status.HTTP_404_NOT_FOUND
- response_content = parse_xml(response.content)
- assert (
- "Deposit 1000 does not exist" in response_content["sword:error"]["atom:summary"]
- )
-
-
-def test_add_metadata_to_unknown_collection(
- partial_deposit, authenticated_client, atom_dataset
-):
- """Replacing metadata to unknown deposit should return a 404 response
-
- """
- deposit = partial_deposit
- unknown_collection_name = "unknown-collection"
- try:
- DepositCollection.objects.get(name=unknown_collection_name)
- except DepositCollection.DoesNotExist:
- assert True
-
- url = reverse(SE_IRI, args=[unknown_collection_name, deposit.id])
- response = authenticated_client.post(
- url,
- content_type="application/atom+xml;type=entry",
- data=atom_dataset["entry-data1"],
- )
- assert response.status_code == status.HTTP_404_NOT_FOUND
- response_content = parse_xml(response.content)
- assert "Unknown collection name" in response_content["sword:error"]["atom:summary"]
-
-
-def test_replace_metadata_to_unknown_deposit(
- authenticated_client, deposit_collection, atom_dataset
-):
- """Adding metadata to unknown deposit should return a 404 response
-
- """
- unknown_deposit_id = 998
- try:
- Deposit.objects.get(pk=unknown_deposit_id)
- except Deposit.DoesNotExist:
- assert True
- url = reverse(EDIT_IRI, args=[deposit_collection.name, unknown_deposit_id])
- response = authenticated_client.put(
- url,
- content_type="application/atom+xml;type=entry",
- data=atom_dataset["entry-data1"],
- )
- assert response.status_code == status.HTTP_404_NOT_FOUND
- response_content = parse_xml(response.content)
- assert (
- "Deposit %s does not exist" % unknown_deposit_id
- == response_content["sword:error"]["atom:summary"]
- )
-
-
-def test_add_archive_to_unknown_deposit(
- authenticated_client, deposit_collection, atom_dataset
-):
- """Adding metadata to unknown deposit should return a 404 response
-
- """
- unknown_deposit_id = 997
- try:
- Deposit.objects.get(pk=unknown_deposit_id)
- except Deposit.DoesNotExist:
- assert True
-
- url = reverse(EM_IRI, args=[deposit_collection.name, unknown_deposit_id])
- response = authenticated_client.post(
- url, content_type="application/zip", data=atom_dataset["entry-data1"]
- )
- assert response.status_code == status.HTTP_404_NOT_FOUND
- response_content = parse_xml(response.content)
- assert (
- "Deposit %s does not exist" % unknown_deposit_id
- == response_content["sword:error"]["atom:summary"]
- )
-
-
-def test_replace_archive_to_unknown_deposit(
- authenticated_client, deposit_collection, atom_dataset
-):
- """Replacing archive to unknown deposit should return a 404 response
-
- """
- unknown_deposit_id = 996
- try:
- Deposit.objects.get(pk=unknown_deposit_id)
- except Deposit.DoesNotExist:
- assert True
-
- url = reverse(EM_IRI, args=[deposit_collection.name, unknown_deposit_id])
- response = authenticated_client.put(
- url, content_type="application/zip", data=atom_dataset["entry-data1"]
- )
- assert response.status_code == status.HTTP_404_NOT_FOUND
- response_content = parse_xml(response.content)
- assert (
- "Deposit %s does not exist" % unknown_deposit_id
- == response_content["sword:error"]["atom:summary"]
- )
-
-
-def test_post_metadata_to_em_iri_failure(
- authenticated_client, deposit_collection, partial_deposit, atom_dataset
-):
- """Update (POST) archive with wrong content type should return 400
-
- """
- deposit = partial_deposit
- update_uri = reverse(EM_IRI, args=[deposit_collection.name, deposit.id])
- response = authenticated_client.post(
- update_uri,
- content_type="application/x-gtar-compressed",
- data=atom_dataset["entry-data1"],
- )
- assert response.status_code == status.HTTP_400_BAD_REQUEST
- assert b"Packaging format supported is restricted" in response.content
- for supported_format in ACCEPT_ARCHIVE_CONTENT_TYPES:
- assert supported_format.encode() in response.content
-
-
-def test_put_metadata_to_em_iri_failure(
- authenticated_client, deposit_collection, partial_deposit, atom_dataset
-):
- """Update (PUT) archive with wrong content type should return 400
-
- """
- # given
- deposit = partial_deposit
- # when
- update_uri = reverse(EM_IRI, args=[deposit_collection.name, deposit.id])
- response = authenticated_client.put(
- update_uri,
- content_type="application/atom+xml;type=entry",
- data=atom_dataset["entry-data1"],
- )
- # then
- assert response.status_code == status.HTTP_400_BAD_REQUEST
- assert b"Packaging format supported is restricted" in response.content
- for supported_format in ACCEPT_ARCHIVE_CONTENT_TYPES:
- assert supported_format.encode() in response.content
-
-
def test_put_update_metadata_and_archive_deposit_partial_nominal(
tmp_path,
authenticated_client,
@@ -576,310 +181,3 @@
requests_archive1 = DepositRequest.objects.filter(deposit=deposit, type="archive")
assert len(requests_archive1) == 1
assert set(requests_archive0) != set(requests_archive1)
-
-
-def test_put_update_metadata_done_deposit_nominal(
- tmp_path,
- authenticated_client,
- complete_deposit,
- deposit_collection,
- atom_dataset,
- sample_data,
- swh_storage,
-):
- """Nominal scenario, client send an update of metadata on a deposit with status "done"
- with an existing swhid. Such swhid has its metadata updated accordingly both in
- the deposit backend and in the metadata storage.
-
- Response: 204
-
- """
- deposit_swhid = parse_swhid(complete_deposit.swhid)
- assert deposit_swhid.object_type == "directory"
- directory_id = hash_to_bytes(deposit_swhid.object_id)
-
- # directory targeted by the complete_deposit does not exist in the storage
- assert list(swh_storage.directory_missing([directory_id])) == [directory_id]
-
- # so let's create a directory reference in the storage (current deposit targets an
- # unknown swhid)
- existing_directory = sample_data.directory
- swh_storage.directory_add([existing_directory])
- assert list(swh_storage.directory_missing([existing_directory.id])) == []
-
- # and patch one complete deposit swhid so it targets said reference
- complete_deposit.swhid = swhid("directory", existing_directory.id)
- complete_deposit.save()
-
- actual_existing_requests_archive = DepositRequest.objects.filter(
- deposit=complete_deposit, type="archive"
- )
- nb_archives = len(actual_existing_requests_archive)
- actual_existing_requests_metadata = DepositRequest.objects.filter(
- deposit=complete_deposit, type="metadata"
- )
- nb_metadata = len(actual_existing_requests_metadata)
-
- update_uri = reverse(EDIT_IRI, args=[deposit_collection.name, complete_deposit.id])
- response = authenticated_client.put(
- update_uri,
- content_type="application/atom+xml;type=entry",
- data=atom_dataset["entry-data1"],
- HTTP_X_CHECK_SWHID=complete_deposit.swhid,
- )
-
- assert response.status_code == status.HTTP_204_NO_CONTENT
-
- new_requests_meta = DepositRequest.objects.filter(
- deposit=complete_deposit, type="metadata"
- )
- assert len(new_requests_meta) == nb_metadata + 1
- request_meta1 = new_requests_meta[0]
- raw_metadata1 = request_meta1.raw_metadata
- assert raw_metadata1 == atom_dataset["entry-data1"]
-
- # check we did not touch the other parts
- requests_archive1 = DepositRequest.objects.filter(
- deposit=complete_deposit, type="archive"
- )
- assert len(requests_archive1) == nb_archives
- assert set(actual_existing_requests_archive) == set(requests_archive1)
-
- # Ensure metadata stored in the metadata storage is consistent
- metadata_authority = MetadataAuthority(
- type=MetadataAuthorityType.DEPOSIT_CLIENT,
- url=complete_deposit.client.provider_url,
- metadata={"name": complete_deposit.client.last_name},
- )
-
- actual_authority = swh_storage.metadata_authority_get(
- MetadataAuthorityType.DEPOSIT_CLIENT, url=complete_deposit.client.provider_url
- )
- assert actual_authority == metadata_authority
-
- config = APIConfig()
- metadata_fetcher = MetadataFetcher(
- name=config.tool["name"],
- version=config.tool["version"],
- metadata=config.tool["configuration"],
- )
-
- actual_fetcher = swh_storage.metadata_fetcher_get(
- config.tool["name"], config.tool["version"]
- )
- assert actual_fetcher == metadata_fetcher
-
- directory_swhid = parse_swhid(complete_deposit.swhid)
- page_results = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.DIRECTORY, directory_swhid, metadata_authority
- )
- assert page_results == PagedResult(
- results=[
- RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
- target=directory_swhid,
- discovery_date=request_meta1.date,
- authority=attr.evolve(metadata_authority, metadata=None),
- fetcher=attr.evolve(metadata_fetcher, metadata=None),
- format="sword-v2-atom-codemeta",
- metadata=raw_metadata1.encode(),
- origin=complete_deposit.origin_url,
- )
- ],
- next_page_token=None,
- )
-
-
-def test_put_update_metadata_done_deposit_failure_mismatched_swhid(
- tmp_path,
- authenticated_client,
- complete_deposit,
- deposit_collection,
- atom_dataset,
- swh_storage,
-):
- """failure: client updates metadata on deposit with SWHID not matching the deposit's.
-
- Response: 400
-
- """
- incorrect_swhid = "swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea"
- assert complete_deposit.swhid != incorrect_swhid
-
- update_uri = reverse(EDIT_IRI, args=[deposit_collection.name, complete_deposit.id])
- response = authenticated_client.put(
- update_uri,
- content_type="application/atom+xml;type=entry",
- data=atom_dataset["entry-data1"],
- HTTP_X_CHECK_SWHID=incorrect_swhid,
- )
-
- assert response.status_code == status.HTTP_400_BAD_REQUEST
- assert b"Mismatched provided SWHID" in response.content
-
-
-def test_put_update_metadata_done_deposit_failure_malformed_xml(
- tmp_path,
- authenticated_client,
- complete_deposit,
- deposit_collection,
- atom_dataset,
- swh_storage,
-):
- """failure: client updates metadata on deposit done with a malformed xml
-
- Response: 400
-
- """
- update_uri = reverse(EDIT_IRI, args=[deposit_collection.name, complete_deposit.id])
- response = authenticated_client.put(
- update_uri,
- content_type="application/atom+xml;type=entry",
- data=atom_dataset["entry-data-ko"],
- HTTP_X_CHECK_SWHID=complete_deposit.swhid,
- )
-
- assert response.status_code == status.HTTP_400_BAD_REQUEST
- assert b"Malformed xml metadata" in response.content
-
-
-def test_put_update_metadata_done_deposit_failure_empty_xml(
- tmp_path,
- authenticated_client,
- complete_deposit,
- deposit_collection,
- atom_dataset,
- swh_storage,
-):
- """failure: client updates metadata on deposit done with an empty xml.
-
- Response: 400
-
- """
- update_uri = reverse(EDIT_IRI, args=[deposit_collection.name, complete_deposit.id])
-
- atom_content = atom_dataset["entry-data-empty-body"]
- response = authenticated_client.put(
- update_uri,
- content_type="application/atom+xml;type=entry",
- data=atom_content,
- HTTP_X_CHECK_SWHID=complete_deposit.swhid,
- )
-
- assert response.status_code == status.HTTP_400_BAD_REQUEST
- assert b"Empty body request is not supported" in response.content
-
-
-def test_put_update_metadata_done_deposit_failure_functional_checks(
- tmp_path,
- authenticated_client,
- complete_deposit,
- deposit_collection,
- atom_dataset,
- swh_storage,
-):
- """failure: client updates metadata on deposit done without required incomplete metadata
-
- Response: 400
-
- """
- update_uri = reverse(EDIT_IRI, args=[deposit_collection.name, complete_deposit.id])
-
- response = authenticated_client.put(
- update_uri,
- content_type="application/atom+xml;type=entry",
- # no title, nor author, nor name fields
- data=atom_dataset["entry-data-fail-metadata-functional-checks"],
- HTTP_X_CHECK_SWHID=complete_deposit.swhid,
- )
-
- assert response.status_code == status.HTTP_400_BAD_REQUEST
- assert b"Functional metadata checks failure" in response.content
- # detail on the errors
- msg = (
- b"- Mandatory fields are missing ("
- b"atom:name or atom:title or codemeta:name, "
- b"atom:author or codemeta:author)"
- )
- assert msg in response.content
-
-
-def test_put_atom_with_create_origin_and_external_identifier(
- authenticated_client, deposit_collection, atom_dataset, deposit_user
-):
- """<atom:external_identifier> was deprecated before <swh:create_origin>
- was introduced, clients should get an error when trying to use both
-
- """
- external_id = "foobar"
- origin_url = deposit_user.provider_url + external_id
- url = reverse(COL_IRI, args=[deposit_collection.name])
-
- response = authenticated_client.post(
- url,
- content_type="application/atom+xml;type=entry",
- data=atom_dataset["entry-data0"] % origin_url,
- HTTP_IN_PROGRESS="true",
- )
-
- assert response.status_code == status.HTTP_201_CREATED
- response_content = parse_xml(BytesIO(response.content))
-
- for link in response_content["atom:link"]:
- if link["@rel"] == "edit":
- edit_iri = link["@href"]
- break
- else:
- assert False, response_content
-
- # when
- response = authenticated_client.put(
- edit_iri,
- content_type="application/atom+xml;type=entry",
- data=atom_dataset["error-with-external-identifier"] % external_id,
- # + headers
- HTTP_IN_PROGRESS="false",
- )
-
- assert b"&lt;external_identifier&gt; is deprecated" in response.content
- assert response.status_code == status.HTTP_400_BAD_REQUEST
-
-
-def test_put_atom_with_create_origin_and_reference(
- authenticated_client, deposit_collection, atom_dataset, deposit_user
-):
- """<swh:reference> and <swh:create_origin> are mutually exclusive
-
- """
- external_id = "foobar"
- origin_url = deposit_user.provider_url + external_id
- url = reverse(COL_IRI, args=[deposit_collection.name])
-
- response = authenticated_client.post(
- url,
- content_type="application/atom+xml;type=entry",
- data=atom_dataset["entry-data0"] % origin_url,
- HTTP_IN_PROGRESS="true",
- )
-
- assert response.status_code == status.HTTP_201_CREATED
- response_content = parse_xml(BytesIO(response.content))
-
- for link in response_content["atom:link"]:
- if link["@rel"] == "edit":
- edit_iri = link["@href"]
- break
- else:
- assert False, response_content
-
- # when
- response = authenticated_client.put(
- edit_iri,
- content_type="application/atom+xml;type=entry",
- data=atom_dataset["entry-data-with-origin-reference"].format(url=origin_url),
- # + headers
- HTTP_IN_PROGRESS="false",
- )
-
- assert b"only one may be used on a given deposit" in response.content
- assert response.status_code == status.HTTP_400_BAD_REQUEST
diff --git a/swh/deposit/tests/api/test_deposit_update.py b/swh/deposit/tests/api/test_deposit_update_atom.py
copy from swh/deposit/tests/api/test_deposit_update.py
copy to swh/deposit/tests/api/test_deposit_update_atom.py
--- a/swh/deposit/tests/api/test_deposit_update.py
+++ b/swh/deposit/tests/api/test_deposit_update_atom.py
@@ -6,15 +6,14 @@
from io import BytesIO
import attr
-from django.core.files.uploadedfile import InMemoryUploadedFile
from django.urls import reverse
+import pytest
from rest_framework import status
from swh.deposit.api.common import ACCEPT_ARCHIVE_CONTENT_TYPES
from swh.deposit.config import (
COL_IRI,
DEPOSIT_STATUS_DEPOSITED,
- DEPOSIT_STATUS_PARTIAL,
EDIT_IRI,
EM_IRI,
SE_IRI,
@@ -22,7 +21,6 @@
)
from swh.deposit.models import Deposit, DepositCollection, DepositRequest
from swh.deposit.parsers import parse_xml
-from swh.deposit.tests.common import check_archive, create_arborescence_archive
from swh.model.hashutil import hash_to_bytes
from swh.model.identifiers import parse_swhid, swhid
from swh.model.model import (
@@ -35,69 +33,86 @@
from swh.storage.interface import PagedResult
-def test_replace_archive_to_deposit_is_possible(
- tmp_path,
- partial_deposit,
- deposit_collection,
- authenticated_client,
- sample_archive,
- atom_dataset,
+def test_post_deposit_atom_entry_multiple_steps(
+ authenticated_client, deposit_collection, atom_dataset, deposit_user
):
- """Replace all archive with another one should return a 204 response
+ """After initial deposit, updating a deposit should return a 201
"""
- tmp_path = str(tmp_path)
# given
- deposit = partial_deposit
- requests = DepositRequest.objects.filter(deposit=deposit, type="archive")
-
- assert len(list(requests)) == 1
- check_archive(sample_archive["name"], requests[0].archive.name)
+ origin_url = deposit_user.provider_url + "2225c695-cfb8-4ebb-aaaa-80da344efa6a"
- # we have no metadata for that deposit
- requests = list(DepositRequest.objects.filter(deposit=deposit, type="metadata"))
- assert len(requests) == 0
+ with pytest.raises(Deposit.DoesNotExist):
+ deposit = Deposit.objects.get(origin_url=origin_url)
+ # when
response = authenticated_client.post(
- reverse(SE_IRI, args=[deposit_collection.name, deposit.id]),
+ reverse(COL_IRI, args=[deposit_collection.name]),
content_type="application/atom+xml;type=entry",
data=atom_dataset["entry-data1"],
- HTTP_SLUG=deposit.external_id,
- HTTP_IN_PROGRESS=True,
+ HTTP_IN_PROGRESS="True",
)
- requests = list(DepositRequest.objects.filter(deposit=deposit, type="metadata"))
- assert len(requests) == 1
+ # then
+ assert response.status_code == status.HTTP_201_CREATED
- update_uri = reverse(EM_IRI, args=[deposit_collection.name, deposit.id])
- external_id = "some-external-id-1"
- archive2 = create_arborescence_archive(
- tmp_path, "archive2", "file2", b"some other content in file"
- )
+ response_content = parse_xml(BytesIO(response.content))
+ deposit_id = int(response_content["swh:deposit_id"])
- response = authenticated_client.put(
- update_uri,
- content_type="application/zip", # as zip
- data=archive2["data"],
- # + headers
- CONTENT_LENGTH=archive2["length"],
- HTTP_SLUG=external_id,
- HTTP_CONTENT_MD5=archive2["md5sum"],
- HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip",
- HTTP_IN_PROGRESS="false",
- HTTP_CONTENT_DISPOSITION="attachment; filename=%s" % (archive2["name"],),
+ deposit = Deposit.objects.get(pk=deposit_id)
+ assert deposit.collection == deposit_collection
+ assert deposit.origin_url is None # not provided yet
+ assert deposit.status == "partial"
+
+ # one associated request to a deposit
+ deposit_requests = DepositRequest.objects.filter(deposit=deposit)
+ assert len(deposit_requests) == 1
+
+ atom_entry_data = atom_dataset["entry-only-create-origin"] % (origin_url)
+
+ for link in response_content["atom:link"]:
+ if link["@rel"] == "http://purl.org/net/sword/terms/add":
+ se_iri = link["@href"]
+ break
+ else:
+ assert False, f"missing SE-IRI from {response_content['link']}"
+
+ # when updating the first deposit post
+ response = authenticated_client.post(
+ se_iri,
+ content_type="application/atom+xml;type=entry",
+ data=atom_entry_data,
+ HTTP_IN_PROGRESS="False",
)
- assert response.status_code == status.HTTP_204_NO_CONTENT
+ # then
+ assert response.status_code == status.HTTP_201_CREATED, response.content.decode()
- requests = DepositRequest.objects.filter(deposit=deposit, type="archive")
+ response_content = parse_xml(BytesIO(response.content))
+ deposit_id = int(response_content["swh:deposit_id"])
- assert len(list(requests)) == 1
- check_archive(archive2["name"], requests[0].archive.name)
+ deposit = Deposit.objects.get(pk=deposit_id)
+ assert deposit.collection == deposit_collection
+ assert deposit.origin_url == origin_url
+ assert deposit.status == DEPOSIT_STATUS_DEPOSITED
- # check we did not touch the other parts
- requests = list(DepositRequest.objects.filter(deposit=deposit, type="metadata"))
- assert len(requests) == 1
+ assert len(Deposit.objects.all()) == 1
+
+ # now 2 associated requests to a same deposit
+ deposit_requests = DepositRequest.objects.filter(deposit=deposit).order_by("id")
+ assert len(deposit_requests) == 2
+
+ atom_entry_data1 = atom_dataset["entry-data1"]
+ expected_meta = [
+ {"metadata": parse_xml(atom_entry_data1), "raw_metadata": atom_entry_data1},
+ {"metadata": parse_xml(atom_entry_data), "raw_metadata": atom_entry_data},
+ ]
+
+ for i, deposit_request in enumerate(deposit_requests):
+ actual_metadata = deposit_request.metadata
+ assert actual_metadata == expected_meta[i]["metadata"]
+ assert deposit_request.raw_metadata == expected_meta[i]["raw_metadata"]
+ assert bool(deposit_request.archive) is False
def test_replace_metadata_to_deposit_is_possible(
@@ -149,65 +164,6 @@
assert set(requests_archive0) == set(requests_archive1)
-def test_add_archive_to_deposit_is_possible(
- tmp_path,
- authenticated_client,
- deposit_collection,
- partial_deposit_with_metadata,
- sample_archive,
-):
- """Add another archive to a deposit return a 201 response
-
- """
- tmp_path = str(tmp_path)
- deposit = partial_deposit_with_metadata
-
- requests = DepositRequest.objects.filter(deposit=deposit, type="archive")
-
- assert len(requests) == 1
- check_archive(sample_archive["name"], requests[0].archive.name)
-
- requests_meta0 = DepositRequest.objects.filter(deposit=deposit, type="metadata")
- assert len(requests_meta0) == 1
-
- update_uri = reverse(EM_IRI, args=[deposit_collection.name, deposit.id])
-
- external_id = "some-external-id-1"
- archive2 = create_arborescence_archive(
- tmp_path, "archive2", "file2", b"some other content in file"
- )
-
- response = authenticated_client.post(
- update_uri,
- content_type="application/zip", # as zip
- data=archive2["data"],
- # + headers
- CONTENT_LENGTH=archive2["length"],
- HTTP_SLUG=external_id,
- HTTP_CONTENT_MD5=archive2["md5sum"],
- HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip",
- HTTP_IN_PROGRESS="false",
- HTTP_CONTENT_DISPOSITION="attachment; filename=%s" % (archive2["name"],),
- )
-
- assert response.status_code == status.HTTP_201_CREATED
-
- requests = DepositRequest.objects.filter(deposit=deposit, type="archive").order_by(
- "id"
- )
-
- assert len(requests) == 2
- # first archive still exists
- check_archive(sample_archive["name"], requests[0].archive.name)
- # a new one was added
- check_archive(archive2["name"], requests[1].archive.name)
-
- # check we did not touch the other parts
- requests_meta1 = DepositRequest.objects.filter(deposit=deposit, type="metadata")
- assert len(requests_meta1) == 1
- assert set(requests_meta0) == set(requests_meta1)
-
-
def test_add_metadata_to_deposit_is_possible(
authenticated_client,
deposit_collection,
@@ -252,99 +208,6 @@
assert set(requests_archive0) == set(requests_archive1)
-def test_add_both_archive_and_metadata_to_deposit(
- authenticated_client,
- deposit_collection,
- partial_deposit_with_metadata,
- atom_dataset,
- sample_archive,
- deposit_user,
-):
- """Scenario: Add both a new archive and new metadata to a partial deposit is ok
-
- Response: 201
-
- """
- deposit = partial_deposit_with_metadata
- origin_url = deposit_user.provider_url + deposit.external_id
- requests = DepositRequest.objects.filter(deposit=deposit, type="metadata")
- assert len(requests) == 1
-
- requests_archive0 = DepositRequest.objects.filter(deposit=deposit, type="archive")
- assert len(requests_archive0) == 1
-
- update_uri = reverse(EDIT_IRI, args=[deposit_collection.name, deposit.id])
- archive = InMemoryUploadedFile(
- BytesIO(sample_archive["data"]),
- field_name=sample_archive["name"],
- name=sample_archive["name"],
- content_type="application/x-tar",
- size=sample_archive["length"],
- charset=None,
- )
-
- data_atom_entry = atom_dataset["entry-data1"]
- atom_entry = InMemoryUploadedFile(
- BytesIO(data_atom_entry.encode("utf-8")),
- field_name="atom0",
- name="atom0",
- content_type='application/atom+xml; charset="utf-8"',
- size=len(data_atom_entry),
- charset="utf-8",
- )
-
- update_uri = reverse(SE_IRI, args=[deposit_collection.name, deposit.id])
- response = authenticated_client.post(
- update_uri,
- format="multipart",
- data={"archive": archive, "atom_entry": atom_entry,},
- )
-
- assert response.status_code == status.HTTP_201_CREATED
- requests = DepositRequest.objects.filter(deposit=deposit, type="metadata").order_by(
- "id"
- )
-
- assert len(requests) == 1 + 1, "New deposit request archive got added"
- expected_raw_meta0 = atom_dataset["entry-data0"] % origin_url
- # a new one was added
- assert requests[0].raw_metadata == expected_raw_meta0
- assert requests[1].raw_metadata == data_atom_entry
-
- # check we did not touch the other parts
- requests_archive1 = DepositRequest.objects.filter(deposit=deposit, type="archive")
- assert len(requests_archive1) == 1 + 1, "New deposit request metadata got added"
-
-
-def test_post_metadata_empty_post_finalize_deposit_ok(
- authenticated_client,
- deposit_collection,
- partial_deposit_with_metadata,
- atom_dataset,
-):
- """Empty atom post entry with header in-progress to false transitions deposit to
- 'deposited' status
-
- Response: 200
-
- """
- deposit = partial_deposit_with_metadata
- assert deposit.status == DEPOSIT_STATUS_PARTIAL
-
- update_uri = reverse(SE_IRI, args=[deposit_collection.name, deposit.id])
- response = authenticated_client.post(
- update_uri,
- content_type="application/atom+xml;type=entry",
- data="",
- size=0,
- HTTP_IN_PROGRESS=False,
- )
-
- assert response.status_code == status.HTTP_200_OK
- deposit = Deposit.objects.get(pk=deposit.id)
- assert deposit.status == DEPOSIT_STATUS_DEPOSITED
-
-
def test_add_metadata_to_unknown_deposit(
deposit_collection, authenticated_client, atom_dataset
):
@@ -419,54 +282,6 @@
)
-def test_add_archive_to_unknown_deposit(
- authenticated_client, deposit_collection, atom_dataset
-):
- """Adding metadata to unknown deposit should return a 404 response
-
- """
- unknown_deposit_id = 997
- try:
- Deposit.objects.get(pk=unknown_deposit_id)
- except Deposit.DoesNotExist:
- assert True
-
- url = reverse(EM_IRI, args=[deposit_collection.name, unknown_deposit_id])
- response = authenticated_client.post(
- url, content_type="application/zip", data=atom_dataset["entry-data1"]
- )
- assert response.status_code == status.HTTP_404_NOT_FOUND
- response_content = parse_xml(response.content)
- assert (
- "Deposit %s does not exist" % unknown_deposit_id
- == response_content["sword:error"]["atom:summary"]
- )
-
-
-def test_replace_archive_to_unknown_deposit(
- authenticated_client, deposit_collection, atom_dataset
-):
- """Replacing archive to unknown deposit should return a 404 response
-
- """
- unknown_deposit_id = 996
- try:
- Deposit.objects.get(pk=unknown_deposit_id)
- except Deposit.DoesNotExist:
- assert True
-
- url = reverse(EM_IRI, args=[deposit_collection.name, unknown_deposit_id])
- response = authenticated_client.put(
- url, content_type="application/zip", data=atom_dataset["entry-data1"]
- )
- assert response.status_code == status.HTTP_404_NOT_FOUND
- response_content = parse_xml(response.content)
- assert (
- "Deposit %s does not exist" % unknown_deposit_id
- == response_content["sword:error"]["atom:summary"]
- )
-
-
def test_post_metadata_to_em_iri_failure(
authenticated_client, deposit_collection, partial_deposit, atom_dataset
):
@@ -508,76 +323,6 @@
assert supported_format.encode() in response.content
-def test_put_update_metadata_and_archive_deposit_partial_nominal(
- tmp_path,
- authenticated_client,
- partial_deposit_with_metadata,
- deposit_collection,
- atom_dataset,
- sample_archive,
- deposit_user,
-):
- """Scenario: Replace metadata and archive(s) with new ones should be ok
-
- Response: 204
-
- """
- # given
- deposit = partial_deposit_with_metadata
- origin_url = deposit_user.provider_url + deposit.external_id
- raw_metadata0 = atom_dataset["entry-data0"] % origin_url
-
- requests_meta = DepositRequest.objects.filter(deposit=deposit, type="metadata")
- assert len(requests_meta) == 1
- request_meta0 = requests_meta[0]
- assert request_meta0.raw_metadata == raw_metadata0
-
- requests_archive0 = DepositRequest.objects.filter(deposit=deposit, type="archive")
- assert len(requests_archive0) == 1
-
- archive = InMemoryUploadedFile(
- BytesIO(sample_archive["data"]),
- field_name=sample_archive["name"],
- name=sample_archive["name"],
- content_type="application/x-tar",
- size=sample_archive["length"],
- charset=None,
- )
-
- data_atom_entry = atom_dataset["entry-data1"]
- atom_entry = InMemoryUploadedFile(
- BytesIO(data_atom_entry.encode("utf-8")),
- field_name="atom0",
- name="atom0",
- content_type='application/atom+xml; charset="utf-8"',
- size=len(data_atom_entry),
- charset="utf-8",
- )
-
- update_uri = reverse(EDIT_IRI, args=[deposit_collection.name, deposit.id])
- response = authenticated_client.put(
- update_uri,
- format="multipart",
- data={"archive": archive, "atom_entry": atom_entry,},
- )
-
- assert response.status_code == status.HTTP_204_NO_CONTENT
-
- # check we updated the metadata part
- requests_meta = DepositRequest.objects.filter(deposit=deposit, type="metadata")
- assert len(requests_meta) == 1
- request_meta1 = requests_meta[0]
- raw_metadata1 = request_meta1.raw_metadata
- assert raw_metadata1 == data_atom_entry
- assert raw_metadata0 != raw_metadata1
- assert request_meta0 != request_meta1
-
- # and the archive part
- requests_archive1 = DepositRequest.objects.filter(deposit=deposit, type="archive")
- assert len(requests_archive1) == 1
- assert set(requests_archive0) != set(requests_archive1)
-
-
def test_put_update_metadata_done_deposit_nominal(
tmp_path,
authenticated_client,
diff --git a/swh/deposit/tests/api/test_deposit_update_binary.py b/swh/deposit/tests/api/test_deposit_update_binary.py
new file mode 100644
--- /dev/null
+++ b/swh/deposit/tests/api/test_deposit_update_binary.py
@@ -0,0 +1,437 @@
+# Copyright (C) 2017-2020 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+"""Tests updates on EM-IRI"""
+
+from io import BytesIO
+
+from django.core.files.uploadedfile import InMemoryUploadedFile
+from django.urls import reverse
+from rest_framework import status
+
+from swh.deposit.config import COL_IRI, DEPOSIT_STATUS_DEPOSITED, EM_IRI, SE_IRI
+from swh.deposit.models import Deposit, DepositRequest
+from swh.deposit.parsers import parse_xml
+from swh.deposit.tests.common import check_archive, create_arborescence_archive
+
+
+def test_post_deposit_binary_and_post_to_add_another_archive(
+ authenticated_client, deposit_collection, sample_archive, tmp_path
+):
+ """Updating a deposit should return a 201 with receipt
+
+ """
+ tmp_path = str(tmp_path)
+ url = reverse(COL_IRI, args=[deposit_collection.name])
+
+ external_id = "some-external-id-1"
+
+ # when
+ response = authenticated_client.post(
+ url,
+ content_type="application/zip", # as zip
+ data=sample_archive["data"],
+ # + headers
+ CONTENT_LENGTH=sample_archive["length"],
+ HTTP_SLUG=external_id,
+ HTTP_CONTENT_MD5=sample_archive["md5sum"],
+ HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip",
+ HTTP_IN_PROGRESS="true",
+ HTTP_CONTENT_DISPOSITION="attachment; filename=%s" % (sample_archive["name"],),
+ )
+
+ # then
+ assert response.status_code == status.HTTP_201_CREATED
+
+ response_content = parse_xml(BytesIO(response.content))
+ deposit_id = response_content["swh:deposit_id"]
+
+ deposit = Deposit.objects.get(pk=deposit_id)
+ assert deposit.status == "partial"
+ assert deposit.external_id == external_id
+ assert deposit.collection == deposit_collection
+ assert deposit.swhid is None
+
+ deposit_request = DepositRequest.objects.get(deposit=deposit)
+ assert deposit_request.deposit == deposit
+ assert deposit_request.type == "archive"
+ check_archive(sample_archive["name"], deposit_request.archive.name)
+
+ # 2nd archive to upload
+ archive2 = create_arborescence_archive(
+ tmp_path, "archive2", "file2", b"some other content in file"
+ )
+
+ # uri to update the content
+ update_uri = reverse(EM_IRI, args=[deposit_collection.name, deposit_id])
+
+ # adding another archive for the deposit and finalizing it
+ response = authenticated_client.post(
+ update_uri,
+ content_type="application/zip", # as zip
+ data=archive2["data"],
+ # + headers
+ CONTENT_LENGTH=archive2["length"],
+ HTTP_SLUG=external_id,
+ HTTP_CONTENT_MD5=archive2["md5sum"],
+ HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip",
+ HTTP_CONTENT_DISPOSITION="attachment; filename=%s" % (archive2["name"]),
+ )
+
+ assert response.status_code == status.HTTP_201_CREATED
+ response_content = parse_xml(BytesIO(response.content))
+
+ deposit = Deposit.objects.get(pk=deposit_id)
+ assert deposit.status == DEPOSIT_STATUS_DEPOSITED
+ assert deposit.external_id == external_id
+ assert deposit.collection == deposit_collection
+ assert deposit.swhid is None
+
+ deposit_requests = list(
+ DepositRequest.objects.filter(deposit=deposit).order_by("id")
+ )
+
+ # 2 deposit requests for the same deposit
+ assert len(deposit_requests) == 2
+ assert deposit_requests[0].deposit == deposit
+ assert deposit_requests[0].type == "archive"
+ check_archive(sample_archive["name"], deposit_requests[0].archive.name)
+
+ assert deposit_requests[1].deposit == deposit
+ assert deposit_requests[1].type == "archive"
+ check_archive(archive2["name"], deposit_requests[1].archive.name)
+
+ # only 1 deposit in db
+ deposits = Deposit.objects.all()
+ assert len(deposits) == 1
+
+
+def test_replace_archive_to_deposit_is_possible(
+ tmp_path,
+ partial_deposit,
+ deposit_collection,
+ authenticated_client,
+ sample_archive,
+ atom_dataset,
+):
+ """Replace all archive with another one should return a 204 response
+
+ """
+ tmp_path = str(tmp_path)
+ # given
+ deposit = partial_deposit
+ requests = DepositRequest.objects.filter(deposit=deposit, type="archive")
+
+ assert len(list(requests)) == 1
+ check_archive(sample_archive["name"], requests[0].archive.name)
+
+ # we have no metadata for that deposit
+ requests = list(DepositRequest.objects.filter(deposit=deposit, type="metadata"))
+ assert len(requests) == 0
+
+ response = authenticated_client.post(
+ reverse(SE_IRI, args=[deposit_collection.name, deposit.id]),
+ content_type="application/atom+xml;type=entry",
+ data=atom_dataset["entry-data1"],
+ HTTP_SLUG=deposit.external_id,
+ HTTP_IN_PROGRESS=True,
+ )
+
+ requests = list(DepositRequest.objects.filter(deposit=deposit, type="metadata"))
+ assert len(requests) == 1
+
+ update_uri = reverse(EM_IRI, args=[deposit_collection.name, deposit.id])
+ external_id = "some-external-id-1"
+ archive2 = create_arborescence_archive(
+ tmp_path, "archive2", "file2", b"some other content in file"
+ )
+
+ response = authenticated_client.put(
+ update_uri,
+ content_type="application/zip", # as zip
+ data=archive2["data"],
+ # + headers
+ CONTENT_LENGTH=archive2["length"],
+ HTTP_SLUG=external_id,
+ HTTP_CONTENT_MD5=archive2["md5sum"],
+ HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip",
+ HTTP_IN_PROGRESS="false",
+ HTTP_CONTENT_DISPOSITION="attachment; filename=%s" % (archive2["name"],),
+ )
+
+ assert response.status_code == status.HTTP_204_NO_CONTENT
+
+ requests = DepositRequest.objects.filter(deposit=deposit, type="archive")
+
+ assert len(list(requests)) == 1
+ check_archive(archive2["name"], requests[0].archive.name)
+
+ # check we did not touch the other parts
+ requests = list(DepositRequest.objects.filter(deposit=deposit, type="metadata"))
+ assert len(requests) == 1
+
+
+def test_add_archive_to_unknown_deposit(
+ authenticated_client, deposit_collection, atom_dataset
+):
+ """Adding metadata to unknown deposit should return a 404 response
+
+ """
+ unknown_deposit_id = 997
+ try:
+ Deposit.objects.get(pk=unknown_deposit_id)
+ except Deposit.DoesNotExist:
+ assert True
+
+ url = reverse(EM_IRI, args=[deposit_collection.name, unknown_deposit_id])
+ response = authenticated_client.post(
+ url, content_type="application/zip", data=atom_dataset["entry-data1"]
+ )
+ assert response.status_code == status.HTTP_404_NOT_FOUND
+ response_content = parse_xml(response.content)
+ assert (
+ "Deposit %s does not exist" % unknown_deposit_id
+ == response_content["sword:error"]["atom:summary"]
+ )
+
+
+def test_replace_archive_to_unknown_deposit(
+ authenticated_client, deposit_collection, atom_dataset
+):
+ """Replacing archive to unknown deposit should return a 404 response
+
+ """
+ unknown_deposit_id = 996
+ try:
+ Deposit.objects.get(pk=unknown_deposit_id)
+ except Deposit.DoesNotExist:
+ assert True
+
+ url = reverse(EM_IRI, args=[deposit_collection.name, unknown_deposit_id])
+ response = authenticated_client.put(
+ url, content_type="application/zip", data=atom_dataset["entry-data1"]
+ )
+ assert response.status_code == status.HTTP_404_NOT_FOUND
+ response_content = parse_xml(response.content)
+ assert (
+ "Deposit %s does not exist" % unknown_deposit_id
+ == response_content["sword:error"]["atom:summary"]
+ )
+
+
+def test_add_archive_to_deposit_is_possible(
+ tmp_path,
+ authenticated_client,
+ deposit_collection,
+ partial_deposit_with_metadata,
+ sample_archive,
+):
+ """Add another archive to a deposit return a 201 response
+
+ """
+ tmp_path = str(tmp_path)
+ deposit = partial_deposit_with_metadata
+
+ requests = DepositRequest.objects.filter(deposit=deposit, type="archive")
+
+ assert len(requests) == 1
+ check_archive(sample_archive["name"], requests[0].archive.name)
+
+ requests_meta0 = DepositRequest.objects.filter(deposit=deposit, type="metadata")
+ assert len(requests_meta0) == 1
+
+ update_uri = reverse(EM_IRI, args=[deposit_collection.name, deposit.id])
+
+ external_id = "some-external-id-1"
+ archive2 = create_arborescence_archive(
+ tmp_path, "archive2", "file2", b"some other content in file"
+ )
+
+ response = authenticated_client.post(
+ update_uri,
+ content_type="application/zip", # as zip
+ data=archive2["data"],
+ # + headers
+ CONTENT_LENGTH=archive2["length"],
+ HTTP_SLUG=external_id,
+ HTTP_CONTENT_MD5=archive2["md5sum"],
+ HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip",
+ HTTP_IN_PROGRESS="false",
+ HTTP_CONTENT_DISPOSITION="attachment; filename=%s" % (archive2["name"],),
+ )
+
+ assert response.status_code == status.HTTP_201_CREATED
+
+ requests = DepositRequest.objects.filter(deposit=deposit, type="archive").order_by(
+ "id"
+ )
+
+ assert len(requests) == 2
+ # first archive still exists
+ check_archive(sample_archive["name"], requests[0].archive.name)
+ # a new one was added
+ check_archive(archive2["name"], requests[1].archive.name)
+
+ # check we did not touch the other parts
+ requests_meta1 = DepositRequest.objects.filter(deposit=deposit, type="metadata")
+ assert len(requests_meta1) == 1
+ assert set(requests_meta0) == set(requests_meta1)
+
+
+def test_post_deposit_then_update_refused(
+ authenticated_client, deposit_collection, sample_archive, atom_dataset, tmp_path
+):
+ """Updating a deposit with status 'ready' should return a 400
+
+ """
+ tmp_path = str(tmp_path)
+ url = reverse(COL_IRI, args=[deposit_collection.name])
+
+ external_id = "some-external-id-1"
+
+ # when
+ response = authenticated_client.post(
+ url,
+ content_type="application/zip", # as zip
+ data=sample_archive["data"],
+ # + headers
+ CONTENT_LENGTH=sample_archive["length"],
+ HTTP_SLUG=external_id,
+ HTTP_CONTENT_MD5=sample_archive["md5sum"],
+ HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip",
+ HTTP_IN_PROGRESS="false",
+ HTTP_CONTENT_DISPOSITION="attachment; filename=filename0",
+ )
+
+ # then
+ assert response.status_code == status.HTTP_201_CREATED
+
+ response_content = parse_xml(BytesIO(response.content))
+ deposit_id = response_content["swh:deposit_id"]
+
+ deposit = Deposit.objects.get(pk=deposit_id)
+ assert deposit.status == DEPOSIT_STATUS_DEPOSITED
+ assert deposit.external_id == external_id
+ assert deposit.collection == deposit_collection
+ assert deposit.swhid is None
+
+ deposit_request = DepositRequest.objects.get(deposit=deposit)
+ assert deposit_request.deposit == deposit
+ check_archive("filename0", deposit_request.archive.name)
+
+ # updating/adding is forbidden
+
+ # uri to update the content
+ edit_iri = reverse("edit_iri", args=[deposit_collection.name, deposit_id])
+ se_iri = reverse("se_iri", args=[deposit_collection.name, deposit_id])
+ em_iri = reverse("em_iri", args=[deposit_collection.name, deposit_id])
+
+ # Testing all update/add endpoint should fail
+ # since the status is ready
+
+ archive2 = create_arborescence_archive(
+ tmp_path, "archive2", "file2", b"some content in file 2"
+ )
+
+ # replacing file is no longer possible since the deposit's
+ # status is ready
+ r = authenticated_client.put(
+ em_iri,
+ content_type="application/zip",
+ data=archive2["data"],
+ CONTENT_LENGTH=archive2["length"],
+ HTTP_SLUG=external_id,
+ HTTP_CONTENT_MD5=archive2["md5sum"],
+ HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip",
+ HTTP_IN_PROGRESS="false",
+ HTTP_CONTENT_DISPOSITION="attachment; filename=filename0",
+ )
+
+ assert r.status_code == status.HTTP_400_BAD_REQUEST
+ assert b"You can only act on deposit with status &#39;partial&#39;" in r.content
+
+ # adding file is no longer possible since the deposit's status
+ # is ready
+ r = authenticated_client.post(
+ em_iri,
+ content_type="application/zip",
+ data=archive2["data"],
+ CONTENT_LENGTH=archive2["length"],
+ HTTP_SLUG=external_id,
+ HTTP_CONTENT_MD5=archive2["md5sum"],
+ HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip",
+ HTTP_IN_PROGRESS="false",
+ HTTP_CONTENT_DISPOSITION="attachment; filename=filename0",
+ )
+
+ assert r.status_code == status.HTTP_400_BAD_REQUEST
+ assert b"You can only act on deposit with status &#39;partial&#39;" in r.content
+
+ # replacing metadata is no longer possible since the deposit's
+ # status is ready
+ r = authenticated_client.put(
+ edit_iri,
+ content_type="application/atom+xml;type=entry",
+ data=atom_dataset["entry-data-deposit-binary"],
+ CONTENT_LENGTH=len(atom_dataset["entry-data-deposit-binary"]),
+ HTTP_SLUG=external_id,
+ )
+
+ assert r.status_code == status.HTTP_400_BAD_REQUEST
+ assert b"You can only act on deposit with status &#39;partial&#39;" in r.content
+
+ # adding new metadata is no longer possible since the
+ # deposit's status is ready
+ r = authenticated_client.post(
+ se_iri,
+ content_type="application/atom+xml;type=entry",
+ data=atom_dataset["entry-data-deposit-binary"],
+ CONTENT_LENGTH=len(atom_dataset["entry-data-deposit-binary"]),
+ HTTP_SLUG=external_id,
+ )
+
+ assert r.status_code == status.HTTP_400_BAD_REQUEST
+ assert b"You can only act on deposit with status &#39;partial&#39;" in r.content
+
+ archive_content = b"some content representing archive"
+ archive = InMemoryUploadedFile(
+ BytesIO(archive_content),
+ field_name="archive0",
+ name="archive0",
+ content_type="application/zip",
+ size=len(archive_content),
+ charset=None,
+ )
+
+ atom_entry = InMemoryUploadedFile(
+ BytesIO(atom_dataset["entry-data-deposit-binary"].encode("utf-8")),
+ field_name="atom0",
+ name="atom0",
+ content_type='application/atom+xml; charset="utf-8"',
+ size=len(atom_dataset["entry-data-deposit-binary"]),
+ charset="utf-8",
+ )
+
+ # replacing multipart metadata is no longer possible since the
+ # deposit's status is ready
+ r = authenticated_client.put(
+ edit_iri,
+ format="multipart",
+ data={"archive": archive, "atom_entry": atom_entry,},
+ )
+
+ assert r.status_code == status.HTTP_400_BAD_REQUEST
+ assert b"You can only act on deposit with status &#39;partial&#39;" in r.content
+
+ # adding new metadata is no longer possible since the
+ # deposit's status is ready
+ r = authenticated_client.post(
+ se_iri,
+ format="multipart",
+ data={"archive": archive, "atom_entry": atom_entry,},
+ )
+
+ assert r.status_code == status.HTTP_400_BAD_REQUEST
+ assert b"You can only act on deposit with status &#39;partial&#39;" in r.content

File Metadata

Mime Type
text/plain
Expires
Jun 3 2025, 7:29 PM (9 w, 5 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3226851

Event Timeline