Changeset View
Changeset View
Standalone View
Standalone View
swh/deposit/tests/api/test_deposit_update_atom.py
- This file was copied from swh/deposit/tests/api/test_deposit_update.py.
# Copyright (C) 2017-2020 The Software Heritage developers | # Copyright (C) 2017-2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from io import BytesIO | from io import BytesIO | ||||
import attr | import attr | ||||
from django.core.files.uploadedfile import InMemoryUploadedFile | |||||
from django.urls import reverse | from django.urls import reverse | ||||
import pytest | |||||
from rest_framework import status | from rest_framework import status | ||||
from swh.deposit.api.common import ACCEPT_ARCHIVE_CONTENT_TYPES | from swh.deposit.api.common import ACCEPT_ARCHIVE_CONTENT_TYPES | ||||
from swh.deposit.config import ( | from swh.deposit.config import ( | ||||
COL_IRI, | COL_IRI, | ||||
DEPOSIT_STATUS_DEPOSITED, | DEPOSIT_STATUS_DEPOSITED, | ||||
DEPOSIT_STATUS_PARTIAL, | |||||
EDIT_IRI, | EDIT_IRI, | ||||
EM_IRI, | EM_IRI, | ||||
SE_IRI, | SE_IRI, | ||||
APIConfig, | APIConfig, | ||||
) | ) | ||||
from swh.deposit.models import Deposit, DepositCollection, DepositRequest | from swh.deposit.models import Deposit, DepositCollection, DepositRequest | ||||
from swh.deposit.parsers import parse_xml | from swh.deposit.parsers import parse_xml | ||||
from swh.deposit.tests.common import check_archive, create_arborescence_archive | |||||
from swh.model.hashutil import hash_to_bytes | from swh.model.hashutil import hash_to_bytes | ||||
from swh.model.identifiers import parse_swhid, swhid | from swh.model.identifiers import parse_swhid, swhid | ||||
from swh.model.model import ( | from swh.model.model import ( | ||||
MetadataAuthority, | MetadataAuthority, | ||||
MetadataAuthorityType, | MetadataAuthorityType, | ||||
MetadataFetcher, | MetadataFetcher, | ||||
MetadataTargetType, | MetadataTargetType, | ||||
RawExtrinsicMetadata, | RawExtrinsicMetadata, | ||||
) | ) | ||||
from swh.storage.interface import PagedResult | from swh.storage.interface import PagedResult | ||||
def test_replace_archive_to_deposit_is_possible( | def test_post_deposit_atom_entry_multiple_steps( | ||||
tmp_path, | authenticated_client, deposit_collection, atom_dataset, deposit_user | ||||
partial_deposit, | |||||
deposit_collection, | |||||
authenticated_client, | |||||
sample_archive, | |||||
atom_dataset, | |||||
): | ): | ||||
"""Replace all archive with another one should return a 204 response | """After initial deposit, updating a deposit should return a 201 | ||||
""" | """ | ||||
tmp_path = str(tmp_path) | |||||
# given | # given | ||||
deposit = partial_deposit | origin_url = deposit_user.provider_url + "2225c695-cfb8-4ebb-aaaa-80da344efa6a" | ||||
requests = DepositRequest.objects.filter(deposit=deposit, type="archive") | |||||
assert len(list(requests)) == 1 | |||||
check_archive(sample_archive["name"], requests[0].archive.name) | |||||
# we have no metadata for that deposit | with pytest.raises(Deposit.DoesNotExist): | ||||
requests = list(DepositRequest.objects.filter(deposit=deposit, type="metadata")) | deposit = Deposit.objects.get(origin_url=origin_url) | ||||
assert len(requests) == 0 | |||||
# when | |||||
response = authenticated_client.post( | response = authenticated_client.post( | ||||
reverse(SE_IRI, args=[deposit_collection.name, deposit.id]), | reverse(COL_IRI, args=[deposit_collection.name]), | ||||
content_type="application/atom+xml;type=entry", | content_type="application/atom+xml;type=entry", | ||||
data=atom_dataset["entry-data1"], | data=atom_dataset["entry-data1"], | ||||
HTTP_SLUG=deposit.external_id, | HTTP_IN_PROGRESS="True", | ||||
HTTP_IN_PROGRESS=True, | |||||
) | ) | ||||
requests = list(DepositRequest.objects.filter(deposit=deposit, type="metadata")) | # then | ||||
assert len(requests) == 1 | assert response.status_code == status.HTTP_201_CREATED | ||||
update_uri = reverse(EM_IRI, args=[deposit_collection.name, deposit.id]) | response_content = parse_xml(BytesIO(response.content)) | ||||
external_id = "some-external-id-1" | deposit_id = int(response_content["swh:deposit_id"]) | ||||
archive2 = create_arborescence_archive( | |||||
tmp_path, "archive2", "file2", b"some other content in file" | |||||
) | |||||
response = authenticated_client.put( | deposit = Deposit.objects.get(pk=deposit_id) | ||||
update_uri, | assert deposit.collection == deposit_collection | ||||
content_type="application/zip", # as zip | assert deposit.origin_url is None # not provided yet | ||||
data=archive2["data"], | assert deposit.status == "partial" | ||||
# + headers | |||||
CONTENT_LENGTH=archive2["length"], | # one associated request to a deposit | ||||
HTTP_SLUG=external_id, | deposit_requests = DepositRequest.objects.filter(deposit=deposit) | ||||
HTTP_CONTENT_MD5=archive2["md5sum"], | assert len(deposit_requests) == 1 | ||||
HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip", | |||||
HTTP_IN_PROGRESS="false", | atom_entry_data = atom_dataset["entry-only-create-origin"] % (origin_url) | ||||
HTTP_CONTENT_DISPOSITION="attachment; filename=%s" % (archive2["name"],), | |||||
for link in response_content["atom:link"]: | |||||
if link["@rel"] == "http://purl.org/net/sword/terms/add": | |||||
se_iri = link["@href"] | |||||
break | |||||
else: | |||||
assert False, f"missing SE-IRI from {response_content['link']}" | |||||
# when updating the first deposit post | |||||
response = authenticated_client.post( | |||||
se_iri, | |||||
content_type="application/atom+xml;type=entry", | |||||
data=atom_entry_data, | |||||
HTTP_IN_PROGRESS="False", | |||||
) | ) | ||||
assert response.status_code == status.HTTP_204_NO_CONTENT | # then | ||||
assert response.status_code == status.HTTP_201_CREATED, response.content.decode() | |||||
requests = DepositRequest.objects.filter(deposit=deposit, type="archive") | response_content = parse_xml(BytesIO(response.content)) | ||||
deposit_id = int(response_content["swh:deposit_id"]) | |||||
assert len(list(requests)) == 1 | deposit = Deposit.objects.get(pk=deposit_id) | ||||
check_archive(archive2["name"], requests[0].archive.name) | assert deposit.collection == deposit_collection | ||||
assert deposit.origin_url == origin_url | |||||
assert deposit.status == DEPOSIT_STATUS_DEPOSITED | |||||
# check we did not touch the other parts | assert len(Deposit.objects.all()) == 1 | ||||
requests = list(DepositRequest.objects.filter(deposit=deposit, type="metadata")) | |||||
assert len(requests) == 1 | # now 2 associated requests to a same deposit | ||||
deposit_requests = DepositRequest.objects.filter(deposit=deposit).order_by("id") | |||||
assert len(deposit_requests) == 2 | |||||
atom_entry_data1 = atom_dataset["entry-data1"] | |||||
expected_meta = [ | |||||
{"metadata": parse_xml(atom_entry_data1), "raw_metadata": atom_entry_data1}, | |||||
{"metadata": parse_xml(atom_entry_data), "raw_metadata": atom_entry_data}, | |||||
] | |||||
for i, deposit_request in enumerate(deposit_requests): | |||||
actual_metadata = deposit_request.metadata | |||||
assert actual_metadata == expected_meta[i]["metadata"] | |||||
assert deposit_request.raw_metadata == expected_meta[i]["raw_metadata"] | |||||
assert bool(deposit_request.archive) is False | |||||
def test_replace_metadata_to_deposit_is_possible( | def test_replace_metadata_to_deposit_is_possible( | ||||
tmp_path, | tmp_path, | ||||
authenticated_client, | authenticated_client, | ||||
partial_deposit_with_metadata, | partial_deposit_with_metadata, | ||||
deposit_collection, | deposit_collection, | ||||
atom_dataset, | atom_dataset, | ||||
Show All 35 Lines | ): | ||||
assert request_meta0 != request_meta1 | assert request_meta0 != request_meta1 | ||||
# check we did not touch the other parts | # check we did not touch the other parts | ||||
requests_archive1 = DepositRequest.objects.filter(deposit=deposit, type="archive") | requests_archive1 = DepositRequest.objects.filter(deposit=deposit, type="archive") | ||||
assert len(requests_archive1) == 1 | assert len(requests_archive1) == 1 | ||||
assert set(requests_archive0) == set(requests_archive1) | assert set(requests_archive0) == set(requests_archive1) | ||||
def test_add_archive_to_deposit_is_possible( | |||||
tmp_path, | |||||
authenticated_client, | |||||
deposit_collection, | |||||
partial_deposit_with_metadata, | |||||
sample_archive, | |||||
): | |||||
"""Add another archive to a deposit return a 201 response | |||||
""" | |||||
tmp_path = str(tmp_path) | |||||
deposit = partial_deposit_with_metadata | |||||
requests = DepositRequest.objects.filter(deposit=deposit, type="archive") | |||||
assert len(requests) == 1 | |||||
check_archive(sample_archive["name"], requests[0].archive.name) | |||||
requests_meta0 = DepositRequest.objects.filter(deposit=deposit, type="metadata") | |||||
assert len(requests_meta0) == 1 | |||||
update_uri = reverse(EM_IRI, args=[deposit_collection.name, deposit.id]) | |||||
external_id = "some-external-id-1" | |||||
archive2 = create_arborescence_archive( | |||||
tmp_path, "archive2", "file2", b"some other content in file" | |||||
) | |||||
response = authenticated_client.post( | |||||
update_uri, | |||||
content_type="application/zip", # as zip | |||||
data=archive2["data"], | |||||
# + headers | |||||
CONTENT_LENGTH=archive2["length"], | |||||
HTTP_SLUG=external_id, | |||||
HTTP_CONTENT_MD5=archive2["md5sum"], | |||||
HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip", | |||||
HTTP_IN_PROGRESS="false", | |||||
HTTP_CONTENT_DISPOSITION="attachment; filename=%s" % (archive2["name"],), | |||||
) | |||||
assert response.status_code == status.HTTP_201_CREATED | |||||
requests = DepositRequest.objects.filter(deposit=deposit, type="archive").order_by( | |||||
"id" | |||||
) | |||||
assert len(requests) == 2 | |||||
# first archive still exists | |||||
check_archive(sample_archive["name"], requests[0].archive.name) | |||||
# a new one was added | |||||
check_archive(archive2["name"], requests[1].archive.name) | |||||
# check we did not touch the other parts | |||||
requests_meta1 = DepositRequest.objects.filter(deposit=deposit, type="metadata") | |||||
assert len(requests_meta1) == 1 | |||||
assert set(requests_meta0) == set(requests_meta1) | |||||
def test_add_metadata_to_deposit_is_possible( | def test_add_metadata_to_deposit_is_possible( | ||||
authenticated_client, | authenticated_client, | ||||
deposit_collection, | deposit_collection, | ||||
partial_deposit_with_metadata, | partial_deposit_with_metadata, | ||||
atom_dataset, | atom_dataset, | ||||
deposit_user, | deposit_user, | ||||
): | ): | ||||
"""Add metadata with another one should return a 204 response | """Add metadata with another one should return a 204 response | ||||
Show All 28 Lines | ): | ||||
assert requests[1].raw_metadata == atom_entry | assert requests[1].raw_metadata == atom_entry | ||||
# check we did not touch the other parts | # check we did not touch the other parts | ||||
requests_archive1 = DepositRequest.objects.filter(deposit=deposit, type="archive") | requests_archive1 = DepositRequest.objects.filter(deposit=deposit, type="archive") | ||||
assert len(requests_archive1) == 1 | assert len(requests_archive1) == 1 | ||||
assert set(requests_archive0) == set(requests_archive1) | assert set(requests_archive0) == set(requests_archive1) | ||||
def test_add_both_archive_and_metadata_to_deposit( | |||||
authenticated_client, | |||||
deposit_collection, | |||||
partial_deposit_with_metadata, | |||||
atom_dataset, | |||||
sample_archive, | |||||
deposit_user, | |||||
): | |||||
"""Scenario: Add both a new archive and new metadata to a partial deposit is ok | |||||
Response: 201 | |||||
""" | |||||
deposit = partial_deposit_with_metadata | |||||
origin_url = deposit_user.provider_url + deposit.external_id | |||||
requests = DepositRequest.objects.filter(deposit=deposit, type="metadata") | |||||
assert len(requests) == 1 | |||||
requests_archive0 = DepositRequest.objects.filter(deposit=deposit, type="archive") | |||||
assert len(requests_archive0) == 1 | |||||
update_uri = reverse(EDIT_IRI, args=[deposit_collection.name, deposit.id]) | |||||
archive = InMemoryUploadedFile( | |||||
BytesIO(sample_archive["data"]), | |||||
field_name=sample_archive["name"], | |||||
name=sample_archive["name"], | |||||
content_type="application/x-tar", | |||||
size=sample_archive["length"], | |||||
charset=None, | |||||
) | |||||
data_atom_entry = atom_dataset["entry-data1"] | |||||
atom_entry = InMemoryUploadedFile( | |||||
BytesIO(data_atom_entry.encode("utf-8")), | |||||
field_name="atom0", | |||||
name="atom0", | |||||
content_type='application/atom+xml; charset="utf-8"', | |||||
size=len(data_atom_entry), | |||||
charset="utf-8", | |||||
) | |||||
update_uri = reverse(SE_IRI, args=[deposit_collection.name, deposit.id]) | |||||
response = authenticated_client.post( | |||||
update_uri, | |||||
format="multipart", | |||||
data={"archive": archive, "atom_entry": atom_entry,}, | |||||
) | |||||
assert response.status_code == status.HTTP_201_CREATED | |||||
requests = DepositRequest.objects.filter(deposit=deposit, type="metadata").order_by( | |||||
"id" | |||||
) | |||||
assert len(requests) == 1 + 1, "New deposit request archive got added" | |||||
expected_raw_meta0 = atom_dataset["entry-data0"] % origin_url | |||||
# a new one was added | |||||
assert requests[0].raw_metadata == expected_raw_meta0 | |||||
assert requests[1].raw_metadata == data_atom_entry | |||||
# check we did not touch the other parts | |||||
requests_archive1 = DepositRequest.objects.filter(deposit=deposit, type="archive") | |||||
assert len(requests_archive1) == 1 + 1, "New deposit request metadata got added" | |||||
def test_post_metadata_empty_post_finalize_deposit_ok( | |||||
authenticated_client, | |||||
deposit_collection, | |||||
partial_deposit_with_metadata, | |||||
atom_dataset, | |||||
): | |||||
"""Empty atom post entry with header in-progress to false transitions deposit to | |||||
'deposited' status | |||||
Response: 200 | |||||
""" | |||||
deposit = partial_deposit_with_metadata | |||||
assert deposit.status == DEPOSIT_STATUS_PARTIAL | |||||
update_uri = reverse(SE_IRI, args=[deposit_collection.name, deposit.id]) | |||||
response = authenticated_client.post( | |||||
update_uri, | |||||
content_type="application/atom+xml;type=entry", | |||||
data="", | |||||
size=0, | |||||
HTTP_IN_PROGRESS=False, | |||||
) | |||||
assert response.status_code == status.HTTP_200_OK | |||||
deposit = Deposit.objects.get(pk=deposit.id) | |||||
assert deposit.status == DEPOSIT_STATUS_DEPOSITED | |||||
def test_add_metadata_to_unknown_deposit( | def test_add_metadata_to_unknown_deposit( | ||||
deposit_collection, authenticated_client, atom_dataset | deposit_collection, authenticated_client, atom_dataset | ||||
): | ): | ||||
"""Replacing metadata to unknown deposit should return a 404 response | """Replacing metadata to unknown deposit should return a 404 response | ||||
""" | """ | ||||
unknown_deposit_id = 1000 | unknown_deposit_id = 1000 | ||||
try: | try: | ||||
▲ Show 20 Lines • Show All 58 Lines • ▼ Show 20 Lines | ): | ||||
assert response.status_code == status.HTTP_404_NOT_FOUND | assert response.status_code == status.HTTP_404_NOT_FOUND | ||||
response_content = parse_xml(response.content) | response_content = parse_xml(response.content) | ||||
assert ( | assert ( | ||||
"Deposit %s does not exist" % unknown_deposit_id | "Deposit %s does not exist" % unknown_deposit_id | ||||
== response_content["sword:error"]["atom:summary"] | == response_content["sword:error"]["atom:summary"] | ||||
) | ) | ||||
def test_add_archive_to_unknown_deposit( | |||||
authenticated_client, deposit_collection, atom_dataset | |||||
): | |||||
"""Adding metadata to unknown deposit should return a 404 response | |||||
""" | |||||
unknown_deposit_id = 997 | |||||
try: | |||||
Deposit.objects.get(pk=unknown_deposit_id) | |||||
except Deposit.DoesNotExist: | |||||
assert True | |||||
url = reverse(EM_IRI, args=[deposit_collection.name, unknown_deposit_id]) | |||||
response = authenticated_client.post( | |||||
url, content_type="application/zip", data=atom_dataset["entry-data1"] | |||||
) | |||||
assert response.status_code == status.HTTP_404_NOT_FOUND | |||||
response_content = parse_xml(response.content) | |||||
assert ( | |||||
"Deposit %s does not exist" % unknown_deposit_id | |||||
== response_content["sword:error"]["atom:summary"] | |||||
) | |||||
def test_replace_archive_to_unknown_deposit( | |||||
authenticated_client, deposit_collection, atom_dataset | |||||
): | |||||
"""Replacing archive to unknown deposit should return a 404 response | |||||
""" | |||||
unknown_deposit_id = 996 | |||||
try: | |||||
Deposit.objects.get(pk=unknown_deposit_id) | |||||
except Deposit.DoesNotExist: | |||||
assert True | |||||
url = reverse(EM_IRI, args=[deposit_collection.name, unknown_deposit_id]) | |||||
response = authenticated_client.put( | |||||
url, content_type="application/zip", data=atom_dataset["entry-data1"] | |||||
) | |||||
assert response.status_code == status.HTTP_404_NOT_FOUND | |||||
response_content = parse_xml(response.content) | |||||
assert ( | |||||
"Deposit %s does not exist" % unknown_deposit_id | |||||
== response_content["sword:error"]["atom:summary"] | |||||
) | |||||
def test_post_metadata_to_em_iri_failure( | def test_post_metadata_to_em_iri_failure( | ||||
authenticated_client, deposit_collection, partial_deposit, atom_dataset | authenticated_client, deposit_collection, partial_deposit, atom_dataset | ||||
): | ): | ||||
"""Update (POST) archive with wrong content type should return 400 | """Update (POST) archive with wrong content type should return 400 | ||||
""" | """ | ||||
deposit = partial_deposit | deposit = partial_deposit | ||||
update_uri = reverse(EM_IRI, args=[deposit_collection.name, deposit.id]) | update_uri = reverse(EM_IRI, args=[deposit_collection.name, deposit.id]) | ||||
Show All 25 Lines | ): | ||||
) | ) | ||||
# then | # then | ||||
assert response.status_code == status.HTTP_400_BAD_REQUEST | assert response.status_code == status.HTTP_400_BAD_REQUEST | ||||
assert b"Packaging format supported is restricted" in response.content | assert b"Packaging format supported is restricted" in response.content | ||||
for supported_format in ACCEPT_ARCHIVE_CONTENT_TYPES: | for supported_format in ACCEPT_ARCHIVE_CONTENT_TYPES: | ||||
assert supported_format.encode() in response.content | assert supported_format.encode() in response.content | ||||
def test_put_update_metadata_and_archive_deposit_partial_nominal( | |||||
tmp_path, | |||||
authenticated_client, | |||||
partial_deposit_with_metadata, | |||||
deposit_collection, | |||||
atom_dataset, | |||||
sample_archive, | |||||
deposit_user, | |||||
): | |||||
"""Scenario: Replace metadata and archive(s) with new ones should be ok | |||||
Response: 204 | |||||
""" | |||||
# given | |||||
deposit = partial_deposit_with_metadata | |||||
origin_url = deposit_user.provider_url + deposit.external_id | |||||
raw_metadata0 = atom_dataset["entry-data0"] % origin_url | |||||
requests_meta = DepositRequest.objects.filter(deposit=deposit, type="metadata") | |||||
assert len(requests_meta) == 1 | |||||
request_meta0 = requests_meta[0] | |||||
assert request_meta0.raw_metadata == raw_metadata0 | |||||
requests_archive0 = DepositRequest.objects.filter(deposit=deposit, type="archive") | |||||
assert len(requests_archive0) == 1 | |||||
archive = InMemoryUploadedFile( | |||||
BytesIO(sample_archive["data"]), | |||||
field_name=sample_archive["name"], | |||||
name=sample_archive["name"], | |||||
content_type="application/x-tar", | |||||
size=sample_archive["length"], | |||||
charset=None, | |||||
) | |||||
data_atom_entry = atom_dataset["entry-data1"] | |||||
atom_entry = InMemoryUploadedFile( | |||||
BytesIO(data_atom_entry.encode("utf-8")), | |||||
field_name="atom0", | |||||
name="atom0", | |||||
content_type='application/atom+xml; charset="utf-8"', | |||||
size=len(data_atom_entry), | |||||
charset="utf-8", | |||||
) | |||||
update_uri = reverse(EDIT_IRI, args=[deposit_collection.name, deposit.id]) | |||||
response = authenticated_client.put( | |||||
update_uri, | |||||
format="multipart", | |||||
data={"archive": archive, "atom_entry": atom_entry,}, | |||||
) | |||||
assert response.status_code == status.HTTP_204_NO_CONTENT | |||||
# check we updated the metadata part | |||||
requests_meta = DepositRequest.objects.filter(deposit=deposit, type="metadata") | |||||
assert len(requests_meta) == 1 | |||||
request_meta1 = requests_meta[0] | |||||
raw_metadata1 = request_meta1.raw_metadata | |||||
assert raw_metadata1 == data_atom_entry | |||||
assert raw_metadata0 != raw_metadata1 | |||||
assert request_meta0 != request_meta1 | |||||
# and the archive part | |||||
requests_archive1 = DepositRequest.objects.filter(deposit=deposit, type="archive") | |||||
assert len(requests_archive1) == 1 | |||||
assert set(requests_archive0) != set(requests_archive1) | |||||
def test_put_update_metadata_done_deposit_nominal( | def test_put_update_metadata_done_deposit_nominal( | ||||
tmp_path, | tmp_path, | ||||
authenticated_client, | authenticated_client, | ||||
complete_deposit, | complete_deposit, | ||||
deposit_collection, | deposit_collection, | ||||
atom_dataset, | atom_dataset, | ||||
sample_data, | sample_data, | ||||
swh_storage, | swh_storage, | ||||
▲ Show 20 Lines • Show All 297 Lines • Show Last 20 Lines |