Changeset View
Changeset View
Standalone View
Standalone View
swh/deposit/tests/api/test_collection_post_atom.py
# Copyright (C) 2017-2019 The Software Heritage developers | # Copyright (C) 2017-2019 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
"""Tests the handling of the Atom content when doing a POST Col-IRI.""" | """Tests the handling of the Atom content when doing a POST Col-IRI.""" | ||||
from io import BytesIO | from io import BytesIO | ||||
import uuid | import uuid | ||||
import attr | |||||
from django.urls import reverse | from django.urls import reverse | ||||
import pytest | import pytest | ||||
from rest_framework import status | from rest_framework import status | ||||
from swh.deposit.config import COL_IRI, DEPOSIT_STATUS_DEPOSITED | from swh.deposit.config import ( | ||||
COL_IRI, | |||||
DEPOSIT_STATUS_DEPOSITED, | |||||
DEPOSIT_STATUS_LOAD_SUCCESS, | |||||
APIConfig, | |||||
) | |||||
from swh.deposit.models import Deposit, DepositCollection, DepositRequest | from swh.deposit.models import Deposit, DepositCollection, DepositRequest | ||||
from swh.deposit.parsers import parse_xml | from swh.deposit.parsers import parse_xml | ||||
from swh.deposit.utils import compute_metadata_context | |||||
from swh.model.identifiers import SWHID, parse_swhid | |||||
from swh.model.model import ( | |||||
MetadataAuthority, | |||||
MetadataAuthorityType, | |||||
MetadataFetcher, | |||||
MetadataTargetType, | |||||
RawExtrinsicMetadata, | |||||
) | |||||
from swh.storage.interface import PagedResult | |||||
def test_post_deposit_atom_201_even_with_decimal( | def test_post_deposit_atom_201_even_with_decimal( | ||||
authenticated_client, deposit_collection, atom_dataset | authenticated_client, deposit_collection, atom_dataset | ||||
): | ): | ||||
"""Posting an initial atom entry should return 201 with deposit receipt | """Posting an initial atom entry should return 201 with deposit receipt | ||||
""" | """ | ||||
▲ Show 20 Lines • Show All 83 Lines • ▼ Show 20 Lines | ): | ||||
) | ) | ||||
assert response.status_code == status.HTTP_400_BAD_REQUEST | assert response.status_code == status.HTTP_400_BAD_REQUEST | ||||
assert ( | assert ( | ||||
b"<swh:create_origin> and <swh:add_to_origin> " | b"<swh:create_origin> and <swh:add_to_origin> " | ||||
b"are mutually exclusive" | b"are mutually exclusive" | ||||
) in response.content | ) in response.content | ||||
def test_add_deposit_with_add_to_origin_and_external_identifier( | |||||
authenticated_client, | |||||
deposit_collection, | |||||
completed_deposit, | |||||
atom_dataset, | |||||
deposit_user, | |||||
): | |||||
"""Posting deposit with <swh:add_to_origin> creates a new deposit with parent | |||||
""" | |||||
# given multiple deposit already loaded | |||||
origin_url = deposit_user.provider_url + completed_deposit.external_id | |||||
# adding a new deposit with the same external id as a completed deposit | |||||
# creates the parenting chain | |||||
response = authenticated_client.post( | |||||
reverse(COL_IRI, args=[deposit_collection.name]), | |||||
content_type="application/atom+xml;type=entry", | |||||
data=atom_dataset["entry-data-with-both-add-to-origin-and-external-id"] | |||||
% origin_url, | |||||
) | |||||
assert response.status_code == status.HTTP_400_BAD_REQUEST | |||||
assert b"<external_identifier> is deprecated." in response.content | |||||
def test_post_deposit_atom_403_create_wrong_origin_url_prefix( | def test_post_deposit_atom_403_create_wrong_origin_url_prefix( | ||||
authenticated_client, deposit_collection, atom_dataset, deposit_user | authenticated_client, deposit_collection, atom_dataset, deposit_user | ||||
): | ): | ||||
"""Creating an origin for a prefix not owned by the client is forbidden | """Creating an origin for a prefix not owned by the client is forbidden | ||||
""" | """ | ||||
origin_url = "http://example.org/foo" | origin_url = "http://example.org/foo" | ||||
response = authenticated_client.post( | response = authenticated_client.post( | ||||
reverse(COL_IRI, args=[deposit_collection.name]), | reverse(COL_IRI, args=[deposit_collection.name]), | ||||
content_type="application/atom+xml;type=entry", | content_type="application/atom+xml;type=entry", | ||||
data=atom_dataset["entry-data0"] % origin_url, | data=atom_dataset["entry-data0"] % origin_url, | ||||
HTTP_IN_PROGRESS="true", | HTTP_IN_PROGRESS="true", | ||||
) | ) | ||||
assert response.status_code == status.HTTP_403_FORBIDDEN | assert response.status_code == status.HTTP_403_FORBIDDEN | ||||
expected_msg = ( | expected_msg = ( | ||||
f"Cannot create origin {origin_url}, " | f"Cannot create origin {origin_url}, " | ||||
f"it must start with {deposit_user.provider_url}" | f"it must start with {deposit_user.provider_url}" | ||||
) | ) | ||||
assert expected_msg in response.content.decode() | assert expected_msg in response.content.decode() | ||||
def test_post_deposit_atom_403_add_to_wrong_origin_url_prefix( | |||||
authenticated_client, deposit_collection, atom_dataset, deposit_user | |||||
): | |||||
"""Creating an origin for a prefix not owned by the client is forbidden | |||||
""" | |||||
origin_url = "http://example.org/foo" | |||||
response = authenticated_client.post( | |||||
reverse(COL_IRI, args=[deposit_collection.name]), | |||||
content_type="application/atom+xml;type=entry", | |||||
data=atom_dataset["entry-data-with-add-to-origin"] % origin_url, | |||||
HTTP_IN_PROGRESS="true", | |||||
) | |||||
assert response.status_code == status.HTTP_403_FORBIDDEN | |||||
expected_msg = ( | |||||
f"Cannot create origin {origin_url}, " | |||||
f"it must start with {deposit_user.provider_url}" | |||||
) | |||||
assert expected_msg in response.content.decode() | |||||
def test_post_deposit_atom_use_slug_header( | def test_post_deposit_atom_use_slug_header( | ||||
authenticated_client, deposit_collection, deposit_user, atom_dataset, mocker | authenticated_client, deposit_collection, deposit_user, atom_dataset, mocker | ||||
): | ): | ||||
"""Posting an atom entry with a slug header but no origin url generates | """Posting an atom entry with a slug header but no origin url generates | ||||
an origin url from the slug | an origin url from the slug | ||||
""" | """ | ||||
url = reverse(COL_IRI, args=[deposit_collection.name]) | url = reverse(COL_IRI, args=[deposit_collection.name]) | ||||
▲ Show 20 Lines • Show All 221 Lines • ▼ Show 20 Lines | ): | ||||
# one associated request to a deposit | # one associated request to a deposit | ||||
deposit_request = DepositRequest.objects.get(deposit=deposit) | deposit_request = DepositRequest.objects.get(deposit=deposit) | ||||
assert deposit_request.metadata is not None | assert deposit_request.metadata is not None | ||||
assert deposit_request.raw_metadata == atom_entry_data | assert deposit_request.raw_metadata == atom_entry_data | ||||
assert bool(deposit_request.archive) is False | assert bool(deposit_request.archive) is False | ||||
def test_post_deposit_atom_entry_multiple_steps( | def test_deposit_metadata_invalid( | ||||
authenticated_client, deposit_collection, atom_dataset, deposit_user | authenticated_client, deposit_collection, atom_dataset | ||||
): | ): | ||||
"""After initial deposit, updating a deposit should return a 201 | """Posting invalid swhid reference is bad request returned to client | ||||
""" | """ | ||||
# given | invalid_swhid = "swh:1:dir :31b5c8cc985d190b5a7ef4878128ebfdc2358f49" | ||||
origin_url = deposit_user.provider_url + "2225c695-cfb8-4ebb-aaaa-80da344efa6a" | xml_data = atom_dataset["entry-data-with-swhid"].format(swhid=invalid_swhid) | ||||
with pytest.raises(Deposit.DoesNotExist): | response = authenticated_client.post( | ||||
deposit = Deposit.objects.get(origin_url=origin_url) | reverse(COL_IRI, args=[deposit_collection.name]), | ||||
content_type="application/atom+xml;type=entry", | |||||
data=xml_data, | |||||
) | |||||
assert response.status_code == status.HTTP_400_BAD_REQUEST | |||||
assert b"Invalid SWHID reference" in response.content | |||||
def test_deposit_metadata_fails_functional_checks( | |||||
authenticated_client, deposit_collection, atom_dataset | |||||
): | |||||
"""Posting functionally invalid metadata swhid is bad request returned to client | |||||
""" | |||||
swhid = "swh:1:dir:31b5c8cc985d190b5a7ef4878128ebfdc2358f49" | |||||
invalid_xml_data = atom_dataset[ | |||||
"entry-data-with-swhid-fail-metadata-functional-checks" | |||||
].format(swhid=swhid) | |||||
# when | |||||
response = authenticated_client.post( | response = authenticated_client.post( | ||||
reverse(COL_IRI, args=[deposit_collection.name]), | reverse(COL_IRI, args=[deposit_collection.name]), | ||||
content_type="application/atom+xml;type=entry", | content_type="application/atom+xml;type=entry", | ||||
data=atom_dataset["entry-data1"], | data=invalid_xml_data, | ||||
HTTP_IN_PROGRESS="True", | |||||
) | ) | ||||
assert response.status_code == status.HTTP_400_BAD_REQUEST | |||||
assert b"Functional metadata checks failure" in response.content | |||||
# then | |||||
assert response.status_code == status.HTTP_201_CREATED | |||||
@pytest.mark.parametrize( | |||||
"swhid,target_type", | |||||
[ | |||||
( | |||||
"swh:1:cnt:01b5c8cc985d190b5a7ef4878128ebfdc2358f49", | |||||
MetadataTargetType.CONTENT, | |||||
), | |||||
( | |||||
"swh:1:dir:11b5c8cc985d190b5a7ef4878128ebfdc2358f49", | |||||
MetadataTargetType.DIRECTORY, | |||||
), | |||||
( | |||||
"swh:1:rev:21b5c8cc985d190b5a7ef4878128ebfdc2358f49", | |||||
MetadataTargetType.REVISION, | |||||
), | |||||
( | |||||
"swh:1:rel:31b5c8cc985d190b5a7ef4878128ebfdc2358f49", | |||||
MetadataTargetType.RELEASE, | |||||
), | |||||
( | |||||
"swh:1:snp:41b5c8cc985d190b5a7ef4878128ebfdc2358f49", | |||||
MetadataTargetType.SNAPSHOT, | |||||
), | |||||
( | |||||
"swh:1:cnt:51b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo", | |||||
MetadataTargetType.CONTENT, | |||||
), | |||||
( | |||||
"swh:1:dir:c4993c872593e960dc84e4430dbbfbc34fd706d0;origin=https://inria.halpreprod.archives-ouvertes.fr/hal-01243573;visit=swh:1:snp:0175049fc45055a3824a1675ac06e3711619a55a;anchor=swh:1:rev:b5f505b005435fa5c4fa4c279792bd7b17167c04;path=/", # noqa | |||||
MetadataTargetType.DIRECTORY, | |||||
), | |||||
( | |||||
"swh:1:rev:71b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo", | |||||
MetadataTargetType.REVISION, | |||||
), | |||||
( | |||||
"swh:1:rel:81b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo", | |||||
MetadataTargetType.RELEASE, | |||||
), | |||||
( | |||||
"swh:1:snp:91b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo", | |||||
MetadataTargetType.SNAPSHOT, | |||||
), | |||||
], | |||||
) | |||||
def test_deposit_metadata_swhid( | |||||
swhid, | |||||
target_type, | |||||
authenticated_client, | |||||
deposit_collection, | |||||
atom_dataset, | |||||
swh_storage, | |||||
): | |||||
"""Posting a swhid reference is stored on raw extrinsic metadata storage | |||||
""" | |||||
swhid_reference = parse_swhid(swhid) | |||||
swhid_core = attr.evolve(swhid_reference, metadata={}) | |||||
xml_data = atom_dataset["entry-data-with-swhid"].format(swhid=swhid) | |||||
deposit_client = authenticated_client.deposit_client | |||||
response = authenticated_client.post( | |||||
reverse(COL_IRI, args=[deposit_collection.name]), | |||||
content_type="application/atom+xml;type=entry", | |||||
data=xml_data, | |||||
) | |||||
assert response.status_code == status.HTTP_201_CREATED | |||||
response_content = parse_xml(BytesIO(response.content)) | response_content = parse_xml(BytesIO(response.content)) | ||||
deposit_id = int(response_content["swh:deposit_id"]) | |||||
# Ensure the deposit is finalized | |||||
deposit_id = int(response_content["swh:deposit_id"]) | |||||
deposit = Deposit.objects.get(pk=deposit_id) | deposit = Deposit.objects.get(pk=deposit_id) | ||||
assert deposit.collection == deposit_collection | assert isinstance(swhid_core, SWHID) | ||||
assert deposit.origin_url is None # not provided yet | assert deposit.swhid == str(swhid_core) | ||||
assert deposit.status == "partial" | assert deposit.swhid_context == str(swhid_reference) | ||||
assert deposit.complete_date == deposit.reception_date | |||||
assert deposit.complete_date is not None | |||||
assert deposit.status == DEPOSIT_STATUS_LOAD_SUCCESS | |||||
# one associated request to a deposit | # Ensure metadata stored in the metadata storage is consistent | ||||
deposit_requests = DepositRequest.objects.filter(deposit=deposit) | metadata_authority = MetadataAuthority( | ||||
assert len(deposit_requests) == 1 | type=MetadataAuthorityType.DEPOSIT_CLIENT, | ||||
url=deposit_client.provider_url, | |||||
metadata={"name": deposit_client.last_name}, | |||||
) | |||||
actual_authority = swh_storage.metadata_authority_get( | |||||
MetadataAuthorityType.DEPOSIT_CLIENT, url=deposit_client.provider_url | |||||
) | |||||
assert actual_authority == metadata_authority | |||||
config = APIConfig() | |||||
metadata_fetcher = MetadataFetcher( | |||||
name=config.tool["name"], | |||||
version=config.tool["version"], | |||||
metadata=config.tool["configuration"], | |||||
) | |||||
actual_fetcher = swh_storage.metadata_fetcher_get( | |||||
config.tool["name"], config.tool["version"] | |||||
) | |||||
assert actual_fetcher == metadata_fetcher | |||||
page_results = swh_storage.raw_extrinsic_metadata_get( | |||||
target_type, swhid_core, metadata_authority | |||||
) | |||||
discovery_date = page_results.results[0].discovery_date | |||||
assert len(page_results.results) == 1 | |||||
assert page_results.next_page_token is None | |||||
object_type, metadata_context = compute_metadata_context(swhid_reference) | |||||
assert page_results == PagedResult( | |||||
results=[ | |||||
RawExtrinsicMetadata( | |||||
type=object_type, | |||||
target=swhid_core, | |||||
discovery_date=discovery_date, | |||||
authority=attr.evolve(metadata_authority, metadata=None), | |||||
fetcher=attr.evolve(metadata_fetcher, metadata=None), | |||||
format="sword-v2-atom-codemeta", | |||||
metadata=xml_data.encode(), | |||||
**metadata_context, | |||||
) | |||||
], | |||||
next_page_token=None, | |||||
) | |||||
assert deposit.complete_date == discovery_date | |||||
atom_entry_data = atom_dataset["entry-only-create-origin"] % (origin_url) | |||||
for link in response_content["atom:link"]: | @pytest.mark.parametrize( | ||||
if link["@rel"] == "http://purl.org/net/sword/terms/add": | "url", ["https://gitlab.org/user/repo", "https://whatever.else/repo",] | ||||
se_iri = link["@href"] | ) | ||||
break | def test_deposit_metadata_origin( | ||||
else: | url, authenticated_client, deposit_collection, atom_dataset, swh_storage, | ||||
assert False, f"missing SE-IRI from {response_content['link']}" | ): | ||||
"""Posting a swhid reference is stored on raw extrinsic metadata storage | |||||
# when updating the first deposit post | """ | ||||
xml_data = atom_dataset["entry-data-with-origin-reference"].format(url=url) | |||||
deposit_client = authenticated_client.deposit_client | |||||
response = authenticated_client.post( | response = authenticated_client.post( | ||||
se_iri, | reverse(COL_IRI, args=[deposit_collection.name]), | ||||
content_type="application/atom+xml;type=entry", | content_type="application/atom+xml;type=entry", | ||||
data=atom_entry_data, | data=xml_data, | ||||
HTTP_IN_PROGRESS="False", | |||||
) | ) | ||||
# then | assert response.status_code == status.HTTP_201_CREATED | ||||
assert response.status_code == status.HTTP_201_CREATED, response.content.decode() | |||||
response_content = parse_xml(BytesIO(response.content)) | response_content = parse_xml(BytesIO(response.content)) | ||||
# Ensure the deposit is finalized | |||||
deposit_id = int(response_content["swh:deposit_id"]) | deposit_id = int(response_content["swh:deposit_id"]) | ||||
deposit = Deposit.objects.get(pk=deposit_id) | deposit = Deposit.objects.get(pk=deposit_id) | ||||
assert deposit.collection == deposit_collection | # we got not swhid as input so we cannot have those | ||||
assert deposit.origin_url == origin_url | assert deposit.swhid is None | ||||
assert deposit.status == DEPOSIT_STATUS_DEPOSITED | assert deposit.swhid_context is None | ||||
assert deposit.complete_date == deposit.reception_date | |||||
assert deposit.complete_date is not None | |||||
assert deposit.status == DEPOSIT_STATUS_LOAD_SUCCESS | |||||
assert len(Deposit.objects.all()) == 1 | # Ensure metadata stored in the metadata storage is consistent | ||||
metadata_authority = MetadataAuthority( | |||||
type=MetadataAuthorityType.DEPOSIT_CLIENT, | |||||
url=deposit_client.provider_url, | |||||
metadata={"name": deposit_client.last_name}, | |||||
) | |||||
# now 2 associated requests to a same deposit | actual_authority = swh_storage.metadata_authority_get( | ||||
deposit_requests = DepositRequest.objects.filter(deposit=deposit).order_by("id") | MetadataAuthorityType.DEPOSIT_CLIENT, url=deposit_client.provider_url | ||||
assert len(deposit_requests) == 2 | ) | ||||
assert actual_authority == metadata_authority | |||||
atom_entry_data1 = atom_dataset["entry-data1"] | |||||
expected_meta = [ | config = APIConfig() | ||||
{"metadata": parse_xml(atom_entry_data1), "raw_metadata": atom_entry_data1}, | metadata_fetcher = MetadataFetcher( | ||||
{"metadata": parse_xml(atom_entry_data), "raw_metadata": atom_entry_data}, | name=config.tool["name"], | ||||
] | version=config.tool["version"], | ||||
metadata=config.tool["configuration"], | |||||
for i, deposit_request in enumerate(deposit_requests): | ) | ||||
actual_metadata = deposit_request.metadata | |||||
assert actual_metadata == expected_meta[i]["metadata"] | actual_fetcher = swh_storage.metadata_fetcher_get( | ||||
assert deposit_request.raw_metadata == expected_meta[i]["raw_metadata"] | config.tool["name"], config.tool["version"] | ||||
assert bool(deposit_request.archive) is False | ) | ||||
assert actual_fetcher == metadata_fetcher | |||||
page_results = swh_storage.raw_extrinsic_metadata_get( | |||||
MetadataTargetType.ORIGIN, url, metadata_authority | |||||
) | |||||
discovery_date = page_results.results[0].discovery_date | |||||
assert len(page_results.results) == 1 | |||||
assert page_results.next_page_token is None | |||||
assert page_results == PagedResult( | |||||
results=[ | |||||
RawExtrinsicMetadata( | |||||
type=MetadataTargetType.ORIGIN, | |||||
target=url, | |||||
discovery_date=discovery_date, | |||||
authority=attr.evolve(metadata_authority, metadata=None), | |||||
fetcher=attr.evolve(metadata_fetcher, metadata=None), | |||||
format="sword-v2-atom-codemeta", | |||||
metadata=xml_data.encode(), | |||||
) | |||||
], | |||||
next_page_token=None, | |||||
) | |||||
assert deposit.complete_date == discovery_date |