Changeset View
Standalone View
swh/deposit/tests/api/test_deposit_update.py
# Copyright (C) 2017-2019 The Software Heritage developers | # Copyright (C) 2017-2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from io import BytesIO | from io import BytesIO | ||||
from django.core.files.uploadedfile import InMemoryUploadedFile | from django.core.files.uploadedfile import InMemoryUploadedFile | ||||
from django.urls import reverse | from django.urls import reverse | ||||
from rest_framework import status | from rest_framework import status | ||||
from swh.deposit.config import ( | from swh.deposit.config import ( | ||||
DEPOSIT_STATUS_DEPOSITED, | DEPOSIT_STATUS_DEPOSITED, | ||||
DEPOSIT_STATUS_PARTIAL, | DEPOSIT_STATUS_PARTIAL, | ||||
EDIT_SE_IRI, | EDIT_SE_IRI, | ||||
EM_IRI, | EM_IRI, | ||||
) | ) | ||||
from swh.deposit.models import Deposit, DepositCollection, DepositRequest | from swh.deposit.models import Deposit, DepositCollection, DepositRequest | ||||
from swh.deposit.parsers import parse_xml | from swh.deposit.parsers import parse_xml | ||||
from swh.deposit.tests.common import check_archive, create_arborescence_archive | from swh.deposit.tests.common import check_archive, create_arborescence_archive | ||||
from swh.model.hashutil import hash_to_bytes | |||||
from swh.model.identifiers import parse_swhid, swhid | |||||
def test_replace_archive_to_deposit_is_possible( | def test_replace_archive_to_deposit_is_possible( | ||||
tmp_path, | tmp_path, | ||||
partial_deposit, | partial_deposit, | ||||
deposit_collection, | deposit_collection, | ||||
authenticated_client, | authenticated_client, | ||||
sample_archive, | sample_archive, | ||||
▲ Show 20 Lines • Show All 97 Lines • ▼ Show 20 Lines | ): | ||||
assert raw_metadata0 != raw_metadata1 | assert raw_metadata0 != raw_metadata1 | ||||
assert request_meta0 != request_meta1 | assert request_meta0 != request_meta1 | ||||
# check we did not touch the other parts | # check we did not touch the other parts | ||||
requests_archive1 = DepositRequest.objects.filter(deposit=deposit, type="archive") | requests_archive1 = DepositRequest.objects.filter(deposit=deposit, type="archive") | ||||
assert len(requests_archive1) == 1 | assert len(requests_archive1) == 1 | ||||
assert set(requests_archive0) == set(requests_archive1) | assert set(requests_archive0) == set(requests_archive1) | ||||
moranegg: modify `1` into `nominal`.
Best to have a meaningful test name. | |||||
def test_add_archive_to_deposit_is_possible( | def test_add_archive_to_deposit_is_possible( | ||||
tmp_path, | tmp_path, | ||||
authenticated_client, | authenticated_client, | ||||
deposit_collection, | deposit_collection, | ||||
partial_deposit_with_metadata, | partial_deposit_with_metadata, | ||||
sample_archive, | sample_archive, | ||||
): | ): | ||||
"""Add another archive to a deposit return a 201 response | """Add another archive to a deposit return a 201 response | ||||
""" | """ | ||||
tmp_path = str(tmp_path) | tmp_path = str(tmp_path) | ||||
Not Done Inline Actionsit's not updated, it's added in both vlorentz: it's not updated, it's added in both | |||||
Done Inline Actionsyes, i used the wrong endpoint, should be in the other iri that allows adding new metadata to be added. Thanks! ardumont: yes, i used the wrong endpoint, should be in the other iri that allows adding new metadata to… | |||||
deposit = partial_deposit_with_metadata | deposit = partial_deposit_with_metadata | ||||
Not Done Inline Actionscomplete deposit is an already done deposit which deposited content, for which we have a deposit_id and a swhid, right? moranegg: complete deposit is an already done deposit which deposited content, for which we have a… | |||||
Done Inline Actionsyes, it is. ok for improving the docstrings. ardumont: yes, it is.
It's a pytest fixture with already predefined data (to avoid cluttering the tests… | |||||
requests = DepositRequest.objects.filter(deposit=deposit, type="archive") | requests = DepositRequest.objects.filter(deposit=deposit, type="archive") | ||||
Done Inline Actionswhat's that? is that the hash in the deposit table or the hash given by the deposit request? moranegg: what's that? is that the hash in the deposit table or the hash given by the deposit request? | |||||
Done Inline Actionsfrom the deposit table, the request is only created later (line 163) vlorentz: from the deposit table, the request is only created later (line 163) | |||||
Done Inline Actionsit's the "done" deposit's swhid we parsed into a directory id. ardumont: it's the "done" deposit's swhid we parsed into a directory id.
I'm using that to demonstrate… | |||||
Not Done Inline ActionsI would rename that to something like moranegg: I would rename that to something like
`existing_directory_id`
or commenting that it's the… | |||||
assert len(requests) == 1 | assert len(requests) == 1 | ||||
check_archive(sample_archive["name"], requests[0].archive.name) | check_archive(sample_archive["name"], requests[0].archive.name) | ||||
Not Done Inline Actionsyou should check deposit.swh_id is a dir swhid, just to be sure there's no inconsistency vlorentz: you should check `deposit.swh_id` is a dir swhid, just to be sure there's no inconsistency | |||||
requests_meta0 = DepositRequest.objects.filter(deposit=deposit, type="metadata") | requests_meta0 = DepositRequest.objects.filter(deposit=deposit, type="metadata") | ||||
assert len(requests_meta0) == 1 | assert len(requests_meta0) == 1 | ||||
update_uri = reverse(EM_IRI, args=[deposit_collection.name, deposit.id]) | update_uri = reverse(EM_IRI, args=[deposit_collection.name, deposit.id]) | ||||
external_id = "some-external-id-1" | external_id = "some-external-id-1" | ||||
archive2 = create_arborescence_archive( | archive2 = create_arborescence_archive( | ||||
tmp_path, "archive2", "file2", b"some other content in file" | tmp_path, "archive2", "file2", b"some other content in file" | ||||
Not Done Inline ActionsRemove this (see comment above) vlorentz: Remove this (see comment above) | |||||
) | ) | ||||
response = authenticated_client.post( | response = authenticated_client.post( | ||||
update_uri, | update_uri, | ||||
content_type="application/zip", # as zip | content_type="application/zip", # as zip | ||||
data=archive2["data"], | data=archive2["data"], | ||||
# + headers | # + headers | ||||
CONTENT_LENGTH=archive2["length"], | CONTENT_LENGTH=archive2["length"], | ||||
Not Done Inline Actionsmove this at the top of the test (and also check there's a deposit request with the metadata) vlorentz: move this at the top of the test (and also check there's a deposit request with the metadata) | |||||
HTTP_SLUG=external_id, | HTTP_SLUG=external_id, | ||||
HTTP_CONTENT_MD5=archive2["md5sum"], | HTTP_CONTENT_MD5=archive2["md5sum"], | ||||
HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip", | HTTP_PACKAGING="http://purl.org/net/sword/package/SimpleZip", | ||||
HTTP_IN_PROGRESS="false", | HTTP_IN_PROGRESS="false", | ||||
HTTP_CONTENT_DISPOSITION="attachment; filename=%s" % (archive2["name"],), | HTTP_CONTENT_DISPOSITION="attachment; filename=%s" % (archive2["name"],), | ||||
) | ) | ||||
assert response.status_code == status.HTTP_201_CREATED | assert response.status_code == status.HTTP_201_CREATED | ||||
requests = DepositRequest.objects.filter(deposit=deposit, type="archive").order_by( | requests = DepositRequest.objects.filter(deposit=deposit, type="archive").order_by( | ||||
"id" | "id" | ||||
) | ) | ||||
assert len(requests) == 2 | assert len(requests) == 2 | ||||
# first archive still exists | # first archive still exists | ||||
Not Done Inline Actions2 (see comment above) vlorentz: 2 (see comment above) | |||||
check_archive(sample_archive["name"], requests[0].archive.name) | check_archive(sample_archive["name"], requests[0].archive.name) | ||||
# a new one was added | # a new one was added | ||||
check_archive(archive2["name"], requests[1].archive.name) | check_archive(archive2["name"], requests[1].archive.name) | ||||
# check we did not touch the other parts | # check we did not touch the other parts | ||||
requests_meta1 = DepositRequest.objects.filter(deposit=deposit, type="metadata") | requests_meta1 = DepositRequest.objects.filter(deposit=deposit, type="metadata") | ||||
assert len(requests_meta1) == 1 | assert len(requests_meta1) == 1 | ||||
assert set(requests_meta0) == set(requests_meta1) | assert set(requests_meta0) == set(requests_meta1) | ||||
def test_add_metadata_to_deposit_is_possible( | def test_add_metadata_to_deposit_is_possible( | ||||
authenticated_client, | authenticated_client, | ||||
deposit_collection, | deposit_collection, | ||||
partial_deposit_with_metadata, | partial_deposit_with_metadata, | ||||
atom_dataset, | atom_dataset, | ||||
Done Inline Actionschange done_deposit_2 into done_deposit_swhid_in_archive_failure moranegg: change `done_deposit_2` into `done_deposit_swhid_in_archive_failure` | |||||
): | ): | ||||
"""Add metadata with another one should return a 204 response | """Add metadata with another one should return a 204 response | ||||
""" | """ | ||||
deposit = partial_deposit_with_metadata | deposit = partial_deposit_with_metadata | ||||
requests = DepositRequest.objects.filter(deposit=deposit, type="metadata") | requests = DepositRequest.objects.filter(deposit=deposit, type="metadata") | ||||
assert len(requests) == 1 | assert len(requests) == 1 | ||||
Show All 23 Lines | ): | ||||
assert requests[1].raw_metadata == atom_entry | assert requests[1].raw_metadata == atom_entry | ||||
# check we did not touch the other parts | # check we did not touch the other parts | ||||
requests_archive1 = DepositRequest.objects.filter(deposit=deposit, type="archive") | requests_archive1 = DepositRequest.objects.filter(deposit=deposit, type="archive") | ||||
assert len(requests_archive1) == 1 | assert len(requests_archive1) == 1 | ||||
assert set(requests_archive0) == set(requests_archive1) | assert set(requests_archive0) == set(requests_archive1) | ||||
def test_add_both_archive_and_metadata_to_deposit( | def test_add_both_archive_and_metadata_to_deposit( | ||||
Done Inline Actionsdone_deposit_3 change into done_deposit_swhid_match_failure moranegg: `done_deposit_3` change into `done_deposit_swhid_match_failure` | |||||
Done Inline Actionsi'd like to stop naming the test which tries to repeat poorly the docstring... ardumont: i'd like to stop naming the test which tries to repeat poorly the docstring... | |||||
authenticated_client, | authenticated_client, | ||||
deposit_collection, | deposit_collection, | ||||
partial_deposit_with_metadata, | partial_deposit_with_metadata, | ||||
atom_dataset, | atom_dataset, | ||||
sample_archive, | sample_archive, | ||||
): | ): | ||||
"""Scenario: Add both a new archive and new metadata to a partial deposit is ok | """Scenario: Add both a new archive and new metadata to a partial deposit is ok | ||||
▲ Show 20 Lines • Show All 63 Lines • ▼ Show 20 Lines | ): | ||||
Response: 200 | Response: 200 | ||||
""" | """ | ||||
deposit = partial_deposit_with_metadata | deposit = partial_deposit_with_metadata | ||||
assert deposit.status == DEPOSIT_STATUS_PARTIAL | assert deposit.status == DEPOSIT_STATUS_PARTIAL | ||||
update_uri = reverse(EDIT_SE_IRI, args=[deposit_collection.name, deposit.id]) | update_uri = reverse(EDIT_SE_IRI, args=[deposit_collection.name, deposit.id]) | ||||
response = authenticated_client.post( | response = authenticated_client.post( | ||||
Done Inline Actionsardumont: @moranegg empty body use case i kept talking about in D4047 ;) | |||||
update_uri, | update_uri, | ||||
content_type="application/atom+xml;type=entry", | content_type="application/atom+xml;type=entry", | ||||
data="", | data="", | ||||
size=0, | size=0, | ||||
HTTP_IN_PROGRESS=False, | HTTP_IN_PROGRESS=False, | ||||
) | ) | ||||
assert response.status_code == status.HTTP_200_OK | assert response.status_code == status.HTTP_200_OK | ||||
▲ Show 20 Lines • Show All 229 Lines • ▼ Show 20 Lines | ): | ||||
assert raw_metadata1 == data_atom_entry | assert raw_metadata1 == data_atom_entry | ||||
assert raw_metadata0 != raw_metadata1 | assert raw_metadata0 != raw_metadata1 | ||||
assert request_meta0 != request_meta1 | assert request_meta0 != request_meta1 | ||||
# and the archive part | # and the archive part | ||||
requests_archive1 = DepositRequest.objects.filter(deposit=deposit, type="archive") | requests_archive1 = DepositRequest.objects.filter(deposit=deposit, type="archive") | ||||
assert len(requests_archive1) == 1 | assert len(requests_archive1) == 1 | ||||
assert set(requests_archive0) != set(requests_archive1) | assert set(requests_archive0) != set(requests_archive1) | ||||
def test_put_update_metadata_done_deposit_nominal( | |||||
tmp_path, | |||||
authenticated_client, | |||||
complete_deposit, | |||||
deposit_collection, | |||||
atom_dataset, | |||||
sample_data, | |||||
swh_storage, | |||||
): | |||||
"""Nominal scenario, client send an update of metadata on a deposit with status "done" | |||||
with an existing swhid. Such swhid has its metadata updated accordingly both in | |||||
the deposit backend and in the metadata storage. | |||||
Response: 204 | |||||
""" | |||||
deposit_swhid = parse_swhid(complete_deposit.swh_id) | |||||
assert deposit_swhid.object_type == "directory" | |||||
directory_id = hash_to_bytes(deposit_swhid.object_id) | |||||
# directory targeted by the complete_deposit does not exist in the storage | |||||
assert list(swh_storage.directory_missing([directory_id])) == [directory_id] | |||||
# so let's create a directory reference in the storage (current deposit targets an | |||||
# unknown swhid) | |||||
existing_directory = sample_data.directory | |||||
swh_storage.directory_add([existing_directory]) | |||||
assert list(swh_storage.directory_missing([existing_directory.id])) == [] | |||||
# and patch one complete deposit swh_id so it targets said reference | |||||
complete_deposit.swh_id = swhid("directory", existing_directory.id) | |||||
complete_deposit.save() | |||||
actual_existing_requests_archive = DepositRequest.objects.filter( | |||||
deposit=complete_deposit, type="archive" | |||||
) | |||||
nb_archives = len(actual_existing_requests_archive) | |||||
actual_existing_requests_metadata = DepositRequest.objects.filter( | |||||
deposit=complete_deposit, type="metadata" | |||||
) | |||||
nb_metadata = len(actual_existing_requests_metadata) | |||||
update_uri = reverse( | |||||
EDIT_SE_IRI, args=[deposit_collection.name, complete_deposit.id] | |||||
) | |||||
response = authenticated_client.put( | |||||
update_uri, | |||||
content_type="application/atom+xml;type=entry", | |||||
data=atom_dataset["entry-data1"], | |||||
HTTP_X_CHECK_SWHID=complete_deposit.swh_id, | |||||
) | |||||
assert response.status_code == status.HTTP_204_NO_CONTENT | |||||
new_requests_meta = DepositRequest.objects.filter( | |||||
deposit=complete_deposit, type="metadata" | |||||
) | |||||
assert len(new_requests_meta) == nb_metadata + 1 | |||||
request_meta1 = new_requests_meta[0] | |||||
raw_metadata1 = request_meta1.raw_metadata | |||||
assert raw_metadata1 == atom_dataset["entry-data1"] | |||||
# check we did not touch the other parts | |||||
requests_archive1 = DepositRequest.objects.filter( | |||||
deposit=complete_deposit, type="archive" | |||||
) | |||||
assert len(requests_archive1) == nb_archives | |||||
assert set(actual_existing_requests_archive) == set(requests_archive1) | |||||
# FIXME: Check the metadata storage information created is consistent | |||||
pass | |||||
def test_put_update_metadata_done_deposit_failure_swhid_unknown( | |||||
tmp_path, | |||||
authenticated_client, | |||||
complete_deposit, | |||||
deposit_collection, | |||||
atom_dataset, | |||||
swh_storage, | |||||
): | |||||
"""Failure: client updates metadata with a SWHID matching the deposit's. Said SWHID does | |||||
not exist in the archive somehow. | |||||
This should not happen though, it is still technically possible so it's | |||||
covered... | |||||
Response: 400 | |||||
""" | |||||
# directory targeted by the complete_deposit does not exist in the storage | |||||
missing_directory_id = hash_to_bytes(parse_swhid(complete_deposit.swh_id).object_id) | |||||
assert list(swh_storage.directory_missing([missing_directory_id])) == [ | |||||
missing_directory_id | |||||
] | |||||
update_uri = reverse( | |||||
EDIT_SE_IRI, args=[deposit_collection.name, complete_deposit.id] | |||||
) | |||||
response = authenticated_client.put( | |||||
update_uri, | |||||
content_type="application/atom+xml;type=entry", | |||||
data=atom_dataset["entry-data1"], | |||||
HTTP_X_CHECK_SWHID=complete_deposit.swh_id, | |||||
) | |||||
assert response.status_code == status.HTTP_400_BAD_REQUEST | |||||
assert b"Unknown directory SWHID" in response.content | |||||
def test_put_update_metadata_done_deposit_failure_mismatched_swhid( | |||||
tmp_path, | |||||
authenticated_client, | |||||
complete_deposit, | |||||
deposit_collection, | |||||
atom_dataset, | |||||
swh_storage, | |||||
): | |||||
"""failure: client updates metadata on deposit with SWHID not matching the deposit's. | |||||
Response: 400 | |||||
""" | |||||
incorrect_swhid = "swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea" | |||||
assert complete_deposit.swh_id != incorrect_swhid | |||||
update_uri = reverse( | |||||
EDIT_SE_IRI, args=[deposit_collection.name, complete_deposit.id] | |||||
) | |||||
response = authenticated_client.put( | |||||
update_uri, | |||||
content_type="application/atom+xml;type=entry", | |||||
data=atom_dataset["entry-data1"], | |||||
HTTP_X_CHECK_SWHID=incorrect_swhid, | |||||
) | |||||
assert response.status_code == status.HTTP_400_BAD_REQUEST | |||||
assert b"Mismatched provided SWHID" in response.content | |||||
def test_put_update_metadata_done_deposit_failure_malformed_xml( | |||||
tmp_path, | |||||
authenticated_client, | |||||
complete_deposit, | |||||
deposit_collection, | |||||
atom_dataset, | |||||
swh_storage, | |||||
): | |||||
"""failure: client updates metadata on deposit done with a malformed xml | |||||
Response: 400 | |||||
""" | |||||
update_uri = reverse( | |||||
EDIT_SE_IRI, args=[deposit_collection.name, complete_deposit.id] | |||||
) | |||||
response = authenticated_client.put( | |||||
update_uri, | |||||
content_type="application/atom+xml;type=entry", | |||||
data=atom_dataset["entry-data-ko"], | |||||
HTTP_X_CHECK_SWHID=complete_deposit.swh_id, | |||||
) | |||||
assert response.status_code == status.HTTP_400_BAD_REQUEST | |||||
assert b"Malformed xml metadata" in response.content | |||||
def test_put_update_metadata_done_deposit_failure_empty_xml( | |||||
tmp_path, | |||||
authenticated_client, | |||||
complete_deposit, | |||||
deposit_collection, | |||||
atom_dataset, | |||||
swh_storage, | |||||
): | |||||
"""failure: client updates metadata on deposit done with an empty xml. | |||||
Response: 400 | |||||
""" | |||||
update_uri = reverse( | |||||
EDIT_SE_IRI, args=[deposit_collection.name, complete_deposit.id] | |||||
) | |||||
response = authenticated_client.put( | |||||
update_uri, | |||||
content_type="application/atom+xml;type=entry", | |||||
data=atom_dataset["entry-data-empty-body"], | |||||
HTTP_X_CHECK_SWHID=complete_deposit.swh_id, | |||||
) | |||||
assert response.status_code == status.HTTP_400_BAD_REQUEST | |||||
assert b"Empty body request is not supported" in response.content |
modify 1 into nominal.
Best to have a meaningful test name.