diff --git a/swh/deposit/api/content.py b/swh/deposit/api/content.py
index 0bbf5017..7def1602 100644
--- a/swh/deposit/api/content.py
+++ b/swh/deposit/api/content.py
@@ -1,44 +1,43 @@
# Copyright (C) 2017-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from django.http import HttpResponse
from django.shortcuts import render
from rest_framework import status
from ..models import DEPOSIT_STATUS_DETAIL, DepositRequest
from .common import APIBase, get_deposit_by_id
class ContentAPI(APIBase):
"""Deposit request class defining api endpoints for sword deposit.
What's known as 'Cont-IRI' and 'File-IRI' in the sword specification.
HTTP verbs supported: GET
"""
def get( # type: ignore
self, req, collection_name: str, deposit_id: int
) -> HttpResponse:
deposit = get_deposit_by_id(deposit_id, collection_name)
self.checks(req, collection_name, deposit)
requests = DepositRequest.objects.filter(deposit=deposit)
context = {
- "deposit_id": deposit.id,
- "status": deposit.status,
+ "deposit": deposit,
"status_detail": DEPOSIT_STATUS_DETAIL[deposit.status],
"requests": requests,
}
return render(
req,
"deposit/content.xml",
context=context,
content_type="application/xml",
status=status.HTTP_200_OK,
)
diff --git a/swh/deposit/templates/deposit/content.xml b/swh/deposit/templates/deposit/content.xml
index 9140e255..d73fe55f 100644
--- a/swh/deposit/templates/deposit/content.xml
+++ b/swh/deposit/templates/deposit/content.xml
@@ -1,17 +1,16 @@
- {{ deposit_id }}
- {{ request.date }}
- {{ status }}
+ {{ deposit.id }}
+ {{ deposit.reception_date.isoformat }}
+ {{ deposit.status }}
{{ status_detail }}
- {{ deposit_id }}
- {{ request.date }}
- {{ status }}
+ {{ deposit.id }}
+ {{ deposit.status }}
{{ status_detail }}
diff --git a/swh/deposit/templates/deposit/deposit_receipt.xml b/swh/deposit/templates/deposit/deposit_receipt.xml
index 651ffb25..5a9e0142 100644
--- a/swh/deposit/templates/deposit/deposit_receipt.xml
+++ b/swh/deposit/templates/deposit/deposit_receipt.xml
@@ -1,28 +1,28 @@
{{ deposit_id }}
- {{ deposit_date }}
+ {{ deposit_date.isoformat }}
{{ archive }}
{{ status }}
{{ deposit_id }}
- {{ deposit_date }}
+ {{ deposit_date.isoformat }}
{{ archive }}
{{ status }}
{% for packaging in packagings %}{{ packaging }}{% endfor %}
diff --git a/swh/deposit/tests/api/test_collection_post_atom.py b/swh/deposit/tests/api/test_collection_post_atom.py
index 1ad4ae78..ae2f7953 100644
--- a/swh/deposit/tests/api/test_collection_post_atom.py
+++ b/swh/deposit/tests/api/test_collection_post_atom.py
@@ -1,825 +1,835 @@
-# Copyright (C) 2017-2021 The Software Heritage developers
+# Copyright (C) 2017-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
"""Tests the handling of the Atom content when doing a POST Col-IRI."""
+import datetime
import textwrap
import uuid
import warnings
from xml.etree import ElementTree
import attr
from django.urls import reverse_lazy as reverse
import pytest
from rest_framework import status
from swh.deposit.config import (
COL_IRI,
DEPOSIT_STATUS_DEPOSITED,
DEPOSIT_STATUS_LOAD_SUCCESS,
APIConfig,
)
from swh.deposit.models import Deposit, DepositCollection, DepositRequest
from swh.deposit.tests.common import post_atom
from swh.deposit.utils import (
NAMESPACES,
compute_metadata_context,
extended_swhid_from_qualified,
)
from swh.model.hypothesis_strategies import (
directories,
present_contents,
releases,
revisions,
snapshots,
)
from swh.model.model import (
MetadataAuthority,
MetadataAuthorityType,
MetadataFetcher,
Origin,
RawExtrinsicMetadata,
)
from swh.model.swhids import ObjectType, QualifiedSWHID
from swh.storage.interface import PagedResult
def _insert_object(swh_storage, swhid):
"""Insert an object with the given swhid in the archive"""
if swhid.object_type == ObjectType.CONTENT:
with warnings.catch_warnings():
# hypothesis doesn't like us using .example(), but we know what we're doing
warnings.simplefilter("ignore")
obj = present_contents().example()
swh_storage.content_add([attr.evolve(obj, sha1_git=swhid.object_id)])
else:
object_type_name = swhid.object_type.name.lower()
strategy = {
"directory": directories,
"revision": revisions,
"release": releases,
"snapshot": snapshots,
}[object_type_name]
method = getattr(swh_storage, object_type_name + "_add")
with warnings.catch_warnings():
# hypothesis doesn't like us using .example(), but we know what we're doing
warnings.simplefilter("ignore")
obj = strategy().example()
method([attr.evolve(obj, id=swhid.object_id)])
def _assert_deposit_info_on_metadata(
swh_storage, metadata_swhid, deposit, metadata_fetcher
):
swh_authority = MetadataAuthority(
MetadataAuthorityType.REGISTRY, "http://deposit.softwareheritage.example/",
)
page_results = swh_storage.raw_extrinsic_metadata_get(metadata_swhid, swh_authority)
assert len(page_results.results) == 1
assert page_results.next_page_token is None
expected_xml_data = textwrap.dedent(
f"""\
{deposit.id}
https://hal-test.archives-ouvertes.fr/
test
"""
)
assert page_results == PagedResult(
results=[
RawExtrinsicMetadata(
target=metadata_swhid,
discovery_date=deposit.complete_date,
authority=swh_authority,
fetcher=metadata_fetcher,
format="xml-deposit-info",
metadata=expected_xml_data.encode(),
)
],
next_page_token=None,
)
def test_post_deposit_atom_201_even_with_decimal(
authenticated_client, deposit_collection, atom_dataset
):
"""Posting an initial atom entry should return 201 with deposit receipt
"""
atom_error_with_decimal = atom_dataset["error-with-decimal"]
response = post_atom(
authenticated_client,
reverse(COL_IRI, args=[deposit_collection.name]),
data=atom_error_with_decimal,
HTTP_SLUG="external-id",
HTTP_IN_PROGRESS="false",
)
# then
assert response.status_code == status.HTTP_201_CREATED, response.content.decode()
response_content = ElementTree.fromstring(response.content)
deposit_id = int(response_content.findtext("swh:deposit_id", namespaces=NAMESPACES))
deposit = Deposit.objects.get(pk=deposit_id)
dr = DepositRequest.objects.get(deposit=deposit)
assert dr.raw_metadata is not None
sw_version = ElementTree.fromstring(dr.raw_metadata).findtext(
"codemeta:softwareVersion", namespaces=NAMESPACES
)
assert sw_version == "10.4"
def test_post_deposit_atom_400_with_empty_body(
authenticated_client, deposit_collection, atom_dataset
):
"""Posting empty body request should return a 400 response
"""
atom_content = atom_dataset["entry-data-empty-body"]
response = post_atom(
authenticated_client,
reverse(COL_IRI, args=[deposit_collection.name]),
data=atom_content,
HTTP_SLUG="external-id",
)
assert (
response.status_code == status.HTTP_400_BAD_REQUEST
), response.content.decode()
assert b"Empty body request is not supported" in response.content
def test_post_deposit_atom_400_with_empty_request(
authenticated_client, deposit_collection
):
"""Posting empty request should return a 400 response
"""
response = post_atom(
authenticated_client,
reverse(COL_IRI, args=[deposit_collection.name]),
data={},
HTTP_SLUG="external-id",
CONTENT_LENGTH=0,
)
assert response.status_code == status.HTTP_400_BAD_REQUEST
assert b"Empty body request is not supported" in response.content
def test_post_deposit_atom_400_badly_formatted_atom(
authenticated_client, deposit_collection, atom_dataset
):
"""Posting a badly formatted atom should return a 400 response
"""
response = post_atom(
authenticated_client,
reverse(COL_IRI, args=[deposit_collection.name]),
data=atom_dataset["entry-data-badly-formatted"],
HTTP_SLUG="external-id",
)
assert response.status_code == status.HTTP_400_BAD_REQUEST
assert b"Malformed xml metadata" in response.content
def test_post_deposit_atom_parsing_error(
authenticated_client, deposit_collection, atom_dataset
):
"""Posting parsing error prone atom should return 400
"""
response = post_atom(
authenticated_client,
reverse(COL_IRI, args=[deposit_collection.name]),
data=atom_dataset["entry-data-parsing-error-prone"],
HTTP_SLUG="external-id",
)
assert response.status_code == status.HTTP_400_BAD_REQUEST
assert b"Malformed xml metadata" in response.content
def test_post_deposit_atom_400_both_create_origin_and_add_to_origin(
authenticated_client, deposit_collection, atom_dataset
):
"""Posting a badly formatted atom should return a 400 response
"""
response = post_atom(
authenticated_client,
reverse(COL_IRI, args=[deposit_collection.name]),
data=atom_dataset["entry-data-with-both-create-origin-and-add-to-origin"],
)
assert response.status_code == status.HTTP_400_BAD_REQUEST
assert (
b"<swh:create_origin> and <swh:add_to_origin> "
b"are mutually exclusive"
) in response.content
def test_post_deposit_atom_403_create_wrong_origin_url_prefix(
authenticated_client, deposit_collection, atom_dataset, deposit_user
):
"""Creating an origin for a prefix not owned by the client is forbidden
"""
origin_url = "http://example.org/foo"
response = post_atom(
authenticated_client,
reverse(COL_IRI, args=[deposit_collection.name]),
data=atom_dataset["entry-data0"] % origin_url,
HTTP_IN_PROGRESS="true",
)
assert response.status_code == status.HTTP_403_FORBIDDEN
assert "URL mismatch" in response.content.decode()
def test_post_deposit_atom_use_slug_header(
authenticated_client, deposit_collection, deposit_user, atom_dataset, mocker
):
"""Posting an atom entry with a slug header but no origin url generates
an origin url from the slug
"""
url = reverse(COL_IRI, args=[deposit_collection.name])
slug = str(uuid.uuid4())
# when
response = post_atom(
authenticated_client,
url,
data=atom_dataset["entry-data-no-origin-url"],
HTTP_IN_PROGRESS="false",
HTTP_SLUG=slug,
)
assert response.status_code == status.HTTP_201_CREATED
response_content = ElementTree.fromstring(response.content)
deposit_id = int(response_content.findtext("swh:deposit_id", namespaces=NAMESPACES))
deposit = Deposit.objects.get(pk=deposit_id)
assert deposit.collection == deposit_collection
assert deposit.origin_url == deposit_user.provider_url + slug
assert deposit.status == DEPOSIT_STATUS_DEPOSITED
def test_post_deposit_atom_no_origin_url_nor_slug_header(
authenticated_client, deposit_collection, deposit_user, atom_dataset, mocker
):
"""Posting an atom entry without an origin url or a slug header should generate one
"""
url = reverse(COL_IRI, args=[deposit_collection.name])
slug = str(uuid.uuid4())
mocker.patch("uuid.uuid4", return_value=slug)
# when
response = post_atom(
authenticated_client,
url,
data=atom_dataset["entry-data-no-origin-url"],
HTTP_IN_PROGRESS="false",
)
assert response.status_code == status.HTTP_201_CREATED
response_content = ElementTree.fromstring(response.content)
deposit_id = int(response_content.findtext("swh:deposit_id", namespaces=NAMESPACES))
deposit = Deposit.objects.get(pk=deposit_id)
assert deposit.collection == deposit_collection
assert deposit.origin_url == deposit_user.provider_url + slug
assert deposit.status == DEPOSIT_STATUS_DEPOSITED
def test_post_deposit_atom_with_slug_and_external_identifier(
authenticated_client, deposit_collection, deposit_user, atom_dataset, mocker
):
"""Even though is deprecated, it should still be
allowed when it matches the slug, so that we don't break existing clients
"""
url = reverse(COL_IRI, args=[deposit_collection.name])
slug = str(uuid.uuid4())
# when
response = post_atom(
authenticated_client,
url,
data=atom_dataset["error-with-external-identifier"] % slug,
HTTP_IN_PROGRESS="false",
HTTP_SLUG=slug,
)
assert response.status_code == status.HTTP_201_CREATED
response_content = ElementTree.fromstring(response.content)
deposit_id = int(response_content.findtext("swh:deposit_id", namespaces=NAMESPACES))
deposit = Deposit.objects.get(pk=deposit_id)
assert deposit.collection == deposit_collection
assert deposit.origin_url == deposit_user.provider_url + slug
assert deposit.status == DEPOSIT_STATUS_DEPOSITED
def test_post_deposit_atom_with_mismatched_slug_and_external_identifier(
authenticated_client, deposit_collection, atom_dataset
):
"""Posting an atom entry with mismatched slug header and external_identifier
should return a 400
"""
external_id = "foobar"
url = reverse(COL_IRI, args=[deposit_collection.name])
# when
response = post_atom(
authenticated_client,
url,
data=atom_dataset["error-with-external-identifier"] % external_id,
HTTP_IN_PROGRESS="false",
HTTP_SLUG="something",
)
assert (
b"The <external_identifier> tag and Slug header are deprecated"
in response.content
)
assert response.status_code == status.HTTP_400_BAD_REQUEST
def test_post_deposit_atom_with_create_origin_and_external_identifier(
authenticated_client, deposit_collection, atom_dataset, deposit_user
):
""" was deprecated before
was introduced, clients should get an error when trying to use both
"""
external_id = "foobar"
origin_url = deposit_user.provider_url + external_id
url = reverse(COL_IRI, args=[deposit_collection.name])
document = atom_dataset["error-with-external-identifier-and-create-origin"].format(
external_id=external_id, url=origin_url,
)
# when
response = post_atom(
authenticated_client, url, data=document, HTTP_IN_PROGRESS="false",
)
assert b"<external_identifier> is deprecated" in response.content
assert response.status_code == status.HTTP_400_BAD_REQUEST
def test_post_deposit_atom_with_create_origin_and_reference(
authenticated_client, deposit_collection, atom_dataset, deposit_user
):
""" and are mutually exclusive
"""
external_id = "foobar"
origin_url = deposit_user.provider_url + external_id
url = reverse(COL_IRI, args=[deposit_collection.name])
document = atom_dataset["error-with-reference-and-create-origin"].format(
external_id=external_id, url=origin_url,
)
# when
response = post_atom(
authenticated_client, url, data=document, HTTP_IN_PROGRESS="false",
)
assert b"only one may be used on a given deposit" in response.content
assert response.status_code == status.HTTP_400_BAD_REQUEST
def test_post_deposit_atom_unknown_collection(authenticated_client, atom_dataset):
"""Posting an atom entry to an unknown collection should return a 404
"""
unknown_collection = "unknown-one"
with pytest.raises(DepositCollection.DoesNotExist):
DepositCollection.objects.get(name=unknown_collection)
response = post_atom(
authenticated_client,
reverse(COL_IRI, args=[unknown_collection]),
data=atom_dataset["entry-data0"],
HTTP_SLUG="something",
)
assert response.status_code == status.HTTP_404_NOT_FOUND
assert b"Unknown collection" in response.content
def test_post_deposit_atom_entry_initial(
authenticated_client, deposit_collection, atom_dataset, deposit_user
):
"""Posting an initial atom entry should return 201 with deposit receipt
"""
# given
origin_url = deposit_user.provider_url + "1225c695-cfb8-4ebb-aaaa-80da344efa6a"
with pytest.raises(Deposit.DoesNotExist):
Deposit.objects.get(origin_url=origin_url)
atom_entry_data = atom_dataset["entry-data0"] % origin_url
# when
+ date_before = datetime.datetime.now(tz=datetime.timezone.utc)
response = post_atom(
authenticated_client,
reverse(COL_IRI, args=[deposit_collection.name]),
data=atom_entry_data,
HTTP_IN_PROGRESS="false",
)
+ date_after = datetime.datetime.now(tz=datetime.timezone.utc)
# then
assert response.status_code == status.HTTP_201_CREATED, response.content.decode()
response_content = ElementTree.fromstring(response.content)
deposit_id = int(response_content.findtext("swh:deposit_id", namespaces=NAMESPACES))
+ assert (
+ date_before
+ <= datetime.datetime.fromisoformat(
+ response_content.findtext("swh:deposit_date", namespaces=NAMESPACES)
+ )
+ <= date_after
+ )
deposit = Deposit.objects.get(pk=deposit_id)
assert deposit.collection == deposit_collection
assert deposit.origin_url == origin_url
assert deposit.status == DEPOSIT_STATUS_DEPOSITED
# one associated request to a deposit
deposit_request = DepositRequest.objects.get(deposit=deposit)
assert deposit_request.raw_metadata == atom_entry_data
assert bool(deposit_request.archive) is False
def test_post_deposit_atom_entry_with_codemeta(
authenticated_client, deposit_collection, atom_dataset, deposit_user
):
"""Posting an initial atom entry should return 201 with deposit receipt
"""
# given
origin_url = deposit_user.provider_url + "1225c695-cfb8-4ebb-aaaa-80da344efa6a"
with pytest.raises(Deposit.DoesNotExist):
Deposit.objects.get(origin_url=origin_url)
atom_entry_data = atom_dataset["codemeta-sample"] % origin_url
# when
response = post_atom(
authenticated_client,
reverse(COL_IRI, args=[deposit_collection.name]),
data=atom_entry_data,
HTTP_IN_PROGRESS="false",
)
# then
assert response.status_code == status.HTTP_201_CREATED
response_content = ElementTree.fromstring(response.content)
deposit_id = int(response_content.findtext("swh:deposit_id", namespaces=NAMESPACES))
deposit = Deposit.objects.get(pk=deposit_id)
assert deposit.collection == deposit_collection
assert deposit.origin_url == origin_url
assert deposit.status == DEPOSIT_STATUS_DEPOSITED
# one associated request to a deposit
deposit_request = DepositRequest.objects.get(deposit=deposit)
assert deposit_request.raw_metadata == atom_entry_data
assert bool(deposit_request.archive) is False
def test_deposit_metadata_invalid(
authenticated_client, deposit_collection, atom_dataset
):
"""Posting invalid swhid reference is bad request returned to client
"""
invalid_swhid = "swh:1:dir :31b5c8cc985d190b5a7ef4878128ebfdc2358f49"
xml_data = atom_dataset["entry-data-with-swhid-no-prov"].format(swhid=invalid_swhid)
response = post_atom(
authenticated_client,
reverse(COL_IRI, args=[deposit_collection.name]),
data=xml_data,
)
assert response.status_code == status.HTTP_400_BAD_REQUEST
assert b"Invalid SWHID reference" in response.content
def test_deposit_metadata_invalid_metadata_provenance(
authenticated_client, deposit_collection, atom_dataset
):
"""Posting invalid metadata provenance is bad request returned to client
"""
invalid_swhid = "swh:1:dir:31b5c8cc985d190b5a7ef4878128ebfdc2358f49"
xml_data = atom_dataset["entry-data-with-swhid"].format(
swhid=invalid_swhid,
metadata_provenance_url=(
"https://inria.halpreprod.archives-ouvertes.fr/hal-abcdefgh"
),
)
response = post_atom(
authenticated_client,
reverse(COL_IRI, args=[deposit_collection.name]),
data=xml_data,
)
assert response.status_code == status.HTTP_403_FORBIDDEN
assert b"URL mismatch" in response.content
def test_deposit_metadata_fails_functional_checks(
authenticated_client, deposit_collection, atom_dataset
):
"""Posting functionally invalid metadata swhid is bad request returned to client
"""
swhid = "swh:1:dir:31b5c8cc985d190b5a7ef4878128ebfdc2358f49"
invalid_xml_data = atom_dataset[
"entry-data-with-swhid-fail-metadata-functional-checks"
].format(swhid=swhid)
response = post_atom(
authenticated_client,
reverse(COL_IRI, args=[deposit_collection.name]),
data=invalid_xml_data,
)
assert response.status_code == status.HTTP_400_BAD_REQUEST
assert b"Functional metadata checks failure" in response.content
@pytest.mark.parametrize(
"swhid",
[
"swh:1:cnt:01b5c8cc985d190b5a7ef4878128ebfdc2358f49",
"swh:1:dir:11b5c8cc985d190b5a7ef4878128ebfdc2358f49",
"swh:1:rev:21b5c8cc985d190b5a7ef4878128ebfdc2358f49",
"swh:1:rel:31b5c8cc985d190b5a7ef4878128ebfdc2358f49",
"swh:1:snp:41b5c8cc985d190b5a7ef4878128ebfdc2358f49",
"swh:1:cnt:51b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo",
"swh:1:dir:c4993c872593e960dc84e4430dbbfbc34fd706d0;origin=https://inria.halpreprod.archives-ouvertes.fr/hal-01243573;visit=swh:1:snp:0175049fc45055a3824a1675ac06e3711619a55a;anchor=swh:1:rev:b5f505b005435fa5c4fa4c279792bd7b17167c04;path=/", # noqa
"swh:1:rev:71b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo",
"swh:1:rel:81b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo",
"swh:1:snp:91b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo",
],
)
def test_deposit_metadata_swhid(
swhid, authenticated_client, deposit_collection, atom_dataset, swh_storage,
):
"""Posting a swhid reference is stored on raw extrinsic metadata storage
"""
swhid_reference = QualifiedSWHID.from_string(swhid)
swhid_target = extended_swhid_from_qualified(swhid_reference)
xml_data = atom_dataset["entry-data-with-swhid"].format(
swhid=swhid,
metadata_provenance_url="https://hal-test.archives-ouvertes.fr/hal-abcdefgh",
)
deposit_client = authenticated_client.deposit_client
_insert_object(swh_storage, swhid_reference)
response = post_atom(
authenticated_client,
reverse(COL_IRI, args=[deposit_collection.name]),
data=xml_data,
)
assert response.status_code == status.HTTP_201_CREATED, response.content.decode()
response_content = ElementTree.fromstring(response.content)
# Ensure the deposit is finalized
deposit_id = int(response_content.findtext("swh:deposit_id", namespaces=NAMESPACES))
deposit = Deposit.objects.get(pk=deposit_id)
assert deposit.swhid == str(swhid_target)
assert deposit.swhid_context == str(swhid_reference)
assert deposit.complete_date == deposit.reception_date
assert deposit.complete_date is not None
assert deposit.status == DEPOSIT_STATUS_LOAD_SUCCESS
# Ensure metadata stored in the metadata storage is consistent
metadata_authority = MetadataAuthority(
type=MetadataAuthorityType.DEPOSIT_CLIENT, url=deposit_client.provider_url,
)
actual_authority = swh_storage.metadata_authority_get(
MetadataAuthorityType.DEPOSIT_CLIENT, url=deposit_client.provider_url
)
assert actual_authority == metadata_authority
config = APIConfig()
metadata_fetcher = MetadataFetcher(
name=config.tool["name"], version=config.tool["version"],
)
actual_fetcher = swh_storage.metadata_fetcher_get(
config.tool["name"], config.tool["version"]
)
assert actual_fetcher == metadata_fetcher
# Get the deposited metadata object and check it:
page_results = swh_storage.raw_extrinsic_metadata_get(
swhid_target, metadata_authority
)
assert len(page_results.results) == 1
assert page_results.next_page_token is None
metadata_context = compute_metadata_context(swhid_reference)
metadata = RawExtrinsicMetadata(
target=swhid_target,
discovery_date=deposit.complete_date,
authority=metadata_authority,
fetcher=metadata_fetcher,
format="sword-v2-atom-codemeta",
metadata=xml_data.encode(),
**metadata_context,
)
assert page_results == PagedResult(results=[metadata], next_page_token=None,)
# Get metadata about the deposited metadata object and check it:
_assert_deposit_info_on_metadata(
swh_storage, metadata.swhid(), deposit, metadata_fetcher
)
@pytest.mark.parametrize(
"url", ["https://gitlab.org/user/repo", "https://whatever.else/repo",]
)
def test_deposit_metadata_origin(
url, authenticated_client, deposit_collection, atom_dataset, swh_storage,
):
"""Posting a swhid reference is stored on raw extrinsic metadata storage
"""
xml_data = atom_dataset["entry-data-with-origin-reference"].format(url=url)
origin_swhid = Origin(url).swhid()
deposit_client = authenticated_client.deposit_client
swh_storage.origin_add([Origin(url)])
response = post_atom(
authenticated_client,
reverse(COL_IRI, args=[deposit_collection.name]),
data=xml_data,
)
assert response.status_code == status.HTTP_201_CREATED, response.content.decode()
response_content = ElementTree.fromstring(response.content)
# Ensure the deposit is finalized
deposit_id = int(response_content.findtext("swh:deposit_id", namespaces=NAMESPACES))
deposit = Deposit.objects.get(pk=deposit_id)
# we got not swhid as input so we cannot have those
assert deposit.swhid is None
assert deposit.swhid_context is None
assert deposit.complete_date == deposit.reception_date
assert deposit.complete_date is not None
assert deposit.status == DEPOSIT_STATUS_LOAD_SUCCESS
# Ensure metadata stored in the metadata storage is consistent
metadata_authority = MetadataAuthority(
type=MetadataAuthorityType.DEPOSIT_CLIENT, url=deposit_client.provider_url,
)
actual_authority = swh_storage.metadata_authority_get(
MetadataAuthorityType.DEPOSIT_CLIENT, url=deposit_client.provider_url
)
assert actual_authority == metadata_authority
config = APIConfig()
metadata_fetcher = MetadataFetcher(
name=config.tool["name"], version=config.tool["version"],
)
actual_fetcher = swh_storage.metadata_fetcher_get(
config.tool["name"], config.tool["version"]
)
assert actual_fetcher == metadata_fetcher
# Get the deposited metadata object and check it:
page_results = swh_storage.raw_extrinsic_metadata_get(
origin_swhid, metadata_authority
)
assert len(page_results.results) == 1
assert page_results.next_page_token is None
metadata = RawExtrinsicMetadata(
target=origin_swhid,
discovery_date=deposit.complete_date,
authority=metadata_authority,
fetcher=metadata_fetcher,
format="sword-v2-atom-codemeta",
metadata=xml_data.encode(),
)
assert page_results == PagedResult(results=[metadata], next_page_token=None,)
# Get metadata about the deposited metadata object and check it:
_assert_deposit_info_on_metadata(
swh_storage, metadata.swhid(), deposit, metadata_fetcher
)
@pytest.mark.parametrize(
"swhid",
[
"swh:1:cnt:01b5c8cc985d190b5a7ef4878128ebfdc2358f49",
"swh:1:dir:11b5c8cc985d190b5a7ef4878128ebfdc2358f49",
"swh:1:rev:21b5c8cc985d190b5a7ef4878128ebfdc2358f49",
"swh:1:rel:31b5c8cc985d190b5a7ef4878128ebfdc2358f49",
"swh:1:snp:41b5c8cc985d190b5a7ef4878128ebfdc2358f49",
"swh:1:cnt:51b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo",
"swh:1:dir:c4993c872593e960dc84e4430dbbfbc34fd706d0;origin=https://inria.halpreprod.archives-ouvertes.fr/hal-01243573;visit=swh:1:snp:0175049fc45055a3824a1675ac06e3711619a55a;anchor=swh:1:rev:b5f505b005435fa5c4fa4c279792bd7b17167c04;path=/", # noqa
"swh:1:rev:71b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo",
"swh:1:rel:81b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo",
"swh:1:snp:91b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo",
],
)
def test_deposit_metadata_unknown_swhid(
swhid, authenticated_client, deposit_collection, atom_dataset, swh_storage,
):
"""Posting a swhid reference is rejected if the referenced object is unknown
"""
xml_data = atom_dataset["entry-data-with-swhid-no-prov"].format(swhid=swhid)
response = post_atom(
authenticated_client,
reverse(COL_IRI, args=[deposit_collection.name]),
data=xml_data,
)
assert (
response.status_code == status.HTTP_400_BAD_REQUEST
), response.content.decode()
response_content = ElementTree.fromstring(response.content)
assert "object does not exist" in response_content.findtext(
"atom:summary", namespaces=NAMESPACES
)
@pytest.mark.parametrize(
"swhid",
[
"swh:1:ori:01b5c8cc985d190b5a7ef4878128ebfdc2358f49",
"swh:1:emd:11b5c8cc985d190b5a7ef4878128ebfdc2358f49",
],
)
def test_deposit_metadata_extended_swhid(
swhid, authenticated_client, deposit_collection, atom_dataset, swh_storage,
):
"""Posting a swhid reference is rejected if the referenced SWHID is
for an extended object type
"""
xml_data = atom_dataset["entry-data-with-swhid-no-prov"].format(swhid=swhid)
response = post_atom(
authenticated_client,
reverse(COL_IRI, args=[deposit_collection.name]),
data=xml_data,
)
assert (
response.status_code == status.HTTP_400_BAD_REQUEST
), response.content.decode()
response_content = ElementTree.fromstring(response.content)
assert "Invalid SWHID reference" in response_content.findtext(
"atom:summary", namespaces=NAMESPACES
)
def test_deposit_metadata_unknown_origin(
authenticated_client, deposit_collection, atom_dataset, swh_storage,
):
"""Posting a swhid reference is stored on raw extrinsic metadata storage
"""
url = "https://gitlab.org/user/repo"
xml_data = atom_dataset["entry-data-with-origin-reference"].format(url=url)
response = post_atom(
authenticated_client,
reverse(COL_IRI, args=[deposit_collection.name]),
data=xml_data,
)
assert (
response.status_code == status.HTTP_400_BAD_REQUEST
), response.content.decode()
response_content = ElementTree.fromstring(response.content)
assert "known to the archive" in response_content.findtext(
"atom:summary", namespaces=NAMESPACES
)
diff --git a/swh/deposit/tests/api/test_get_file.py b/swh/deposit/tests/api/test_get_file.py
index 4f6d19b6..dbaf64d3 100644
--- a/swh/deposit/tests/api/test_get_file.py
+++ b/swh/deposit/tests/api/test_get_file.py
@@ -1,57 +1,67 @@
# Copyright (C) 2020-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
"""Tests 'GET File-IRI'."""
+import datetime
+
from django.urls import reverse_lazy as reverse
from rest_framework import status
from swh.deposit.config import CONT_FILE_IRI
from swh.deposit.models import DEPOSIT_STATUS_DETAIL
from swh.deposit.parsers import parse_xml
from swh.deposit.utils import NAMESPACES
def test_api_deposit_content_nominal(
authenticated_client, complete_deposit, partial_deposit_only_metadata
):
"""Retrieve information on deposit should return 200 response
"""
+ now = datetime.datetime.now(tz=datetime.timezone.utc)
for deposit in [complete_deposit, partial_deposit_only_metadata]:
url = reverse(CONT_FILE_IRI, args=[deposit.collection.name, deposit.id])
response = authenticated_client.get(url)
assert response.status_code == status.HTTP_200_OK
actual_deposit = parse_xml(response.content)
assert actual_deposit.findtext("swh:deposit_id", namespaces=NAMESPACES) == str(
deposit.id
)
assert (
actual_deposit.findtext("swh:deposit_status", namespaces=NAMESPACES)
== deposit.status
)
assert (
actual_deposit.findtext("swh:deposit_status_detail", namespaces=NAMESPACES)
== DEPOSIT_STATUS_DETAIL[deposit.status]
)
+ assert (
+ now - datetime.timedelta(hours=1)
+ <= datetime.datetime.fromisoformat(
+ actual_deposit.findtext("swh:deposit_date", namespaces=NAMESPACES)
+ )
+ <= now
+ )
def test_api_deposit_content_unknown(
authenticated_client, complete_deposit, deposit_collection
):
"""Retrieve information on unknown deposit or collection should return 404
"""
unknown_deposit_id = 999
unknown_collection = "unknown"
for collection, deposit_id in [
(deposit_collection.name, unknown_deposit_id),
(unknown_collection, complete_deposit.id),
(complete_deposit.collection.name, complete_deposit.id + 10),
]:
url = reverse(CONT_FILE_IRI, args=[collection, deposit_id])
response = authenticated_client.get(url)
assert response.status_code == status.HTTP_404_NOT_FOUND
diff --git a/swh/deposit/tests/cli/test_client.py b/swh/deposit/tests/cli/test_client.py
index 569ffb2b..4974845b 100644
--- a/swh/deposit/tests/cli/test_client.py
+++ b/swh/deposit/tests/cli/test_client.py
@@ -1,1176 +1,1176 @@
# Copyright (C) 2020-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import ast
import contextlib
import json
import logging
import os
from typing import Optional
from unittest.mock import MagicMock
from xml.etree import ElementTree
import pytest
import yaml
from swh.deposit.api.checks import (
METADATA_PROVENANCE_KEY,
SUGGESTED_FIELDS_MISSING,
check_metadata,
)
from swh.deposit.cli import deposit as cli
from swh.deposit.cli.client import InputError, _collection, _url, generate_metadata
from swh.deposit.client import (
BaseDepositClient,
MaintenanceError,
PublicApiDepositClient,
ServiceDocumentDepositClient,
)
from swh.deposit.parsers import parse_xml
from swh.deposit.utils import NAMESPACES
from swh.model.exceptions import ValidationError
from ..conftest import TEST_USER
def generate_slug() -> str:
"""Generate a slug (sample purposes).
"""
import uuid
return str(uuid.uuid4())
@pytest.fixture
def datadir(request):
"""Override default datadir to target main test datadir"""
return os.path.join(os.path.dirname(str(request.fspath)), "../data")
@pytest.fixture
def slug():
return generate_slug()
@pytest.fixture
def patched_tmp_path(tmp_path, mocker):
mocker.patch(
"tempfile.TemporaryDirectory",
return_value=contextlib.nullcontext(str(tmp_path)),
)
return tmp_path
@pytest.fixture
def client_mock_api_down(mocker, slug):
"""A mock client whose connection with api fails due to maintenance issue
"""
mock_client = MagicMock()
mocker.patch("swh.deposit.client.PublicApiDepositClient", return_value=mock_client)
mock_client.service_document.side_effect = MaintenanceError(
"Database backend maintenance: Temporarily unavailable, try again later."
)
return mock_client
def test_cli_url():
assert _url("http://deposit") == "http://deposit/1"
assert _url("https://other/1") == "https://other/1"
def test_cli_collection_error():
mock_client = MagicMock()
mock_client.service_document.return_value = {"error": "something went wrong"}
with pytest.raises(InputError) as e:
_collection(mock_client)
assert "Service document retrieval: something went wrong" == str(e.value)
def test_cli_collection_ok(requests_mock_datadir):
client = PublicApiDepositClient(
url="https://deposit.swh.test/1", auth=("test", "test")
)
collection_name = _collection(client)
assert collection_name == "test"
def test_cli_collection_ko_because_downtime():
mock_client = MagicMock()
mock_client.service_document.side_effect = MaintenanceError("downtime")
with pytest.raises(MaintenanceError, match="downtime"):
_collection(mock_client)
def test_cli_upload_conflictual_flags(
datadir, requests_mock_datadir, cli_runner, atom_dataset, tmp_path,
):
"""Post metadata-only deposit through cli with invalid swhid raises
"""
api_url_basename = "deposit.test.metadataonly"
metadata = atom_dataset["entry-data-minimal"]
metadata_path = os.path.join(tmp_path, "entry-data-minimal.xml")
with open(metadata_path, "w") as f:
f.write(metadata)
with pytest.raises(InputError, match="both with different values"):
# fmt: off
cli_runner.invoke(
cli,
[
"upload",
"--url", f"https://{api_url_basename}/1",
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--metadata", metadata_path,
"--slug", "some-slug", # deprecated flag
"--create-origin", "some-other-slug", # conflictual value, so raise
"--format", "json",
],
catch_exceptions=False,
)
# fmt: on
def test_cli_deposit_with_server_down_for_maintenance(
sample_archive, caplog, client_mock_api_down, slug, patched_tmp_path, cli_runner
):
""" Deposit failure due to maintenance down time should be explicit
"""
# fmt: off
result = cli_runner.invoke(
cli,
[
"upload",
"--url", "https://deposit.swh.test/1",
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--name", "test-project",
"--archive", sample_archive["path"],
"--author", "Jane Doe",
],
)
# fmt: on
assert result.exit_code == 1, result.output
assert result.output == ""
down_for_maintenance_log_record = (
"swh.deposit.cli.client",
logging.ERROR,
"Database backend maintenance: Temporarily unavailable, try again later.",
)
assert down_for_maintenance_log_record in caplog.record_tuples
client_mock_api_down.service_document.assert_called_once_with()
def test_cli_client_generate_metadata_ok(slug):
"""Generated metadata is well formed and pass service side metadata checks
"""
actual_metadata_xml = generate_metadata(
"deposit-client",
"project-name",
authors=["some", "authors"],
external_id="external-id",
create_origin="origin-url",
metadata_provenance_url="meta-prov-url",
)
actual_metadata = parse_xml(actual_metadata_xml)
assert (
actual_metadata.findtext("atom:author", namespaces=NAMESPACES)
== "deposit-client"
)
assert (
actual_metadata.findtext("atom:title", namespaces=NAMESPACES) == "project-name"
)
assert actual_metadata.findtext("atom:updated", namespaces=NAMESPACES) is not None
assert (
actual_metadata.findtext("codemeta:name", namespaces=NAMESPACES)
== "project-name"
)
assert (
actual_metadata.findtext("codemeta:identifier", namespaces=NAMESPACES)
== "external-id"
)
authors = actual_metadata.findall(
"codemeta:author/codemeta:name", namespaces=NAMESPACES
)
assert len(authors) == 2
assert authors[0].text == "some"
assert authors[1].text == "authors"
assert (
actual_metadata.find(
"swh:deposit/swh:create_origin/swh:origin", namespaces=NAMESPACES
).attrib["url"]
== "origin-url"
)
assert (
actual_metadata.findtext(
"swh:deposit/swh:metadata-provenance/schema:url", namespaces=NAMESPACES
)
== "meta-prov-url"
)
checks_ok, detail = check_metadata(ElementTree.fromstring(actual_metadata_xml))
assert checks_ok is True
assert detail is None
def test_cli_client_generate_metadata_ok2(slug):
"""Generated metadata is well formed and pass service side metadata checks
"""
actual_metadata_xml = generate_metadata(
"deposit-client", "project-name", authors=["some", "authors"],
)
actual_metadata = parse_xml(actual_metadata_xml)
assert (
actual_metadata.findtext("atom:author", namespaces=NAMESPACES)
== "deposit-client"
)
assert (
actual_metadata.findtext("atom:title", namespaces=NAMESPACES) == "project-name"
)
assert actual_metadata.findtext("atom:updated", namespaces=NAMESPACES) is not None
assert (
actual_metadata.findtext("codemeta:name", namespaces=NAMESPACES)
== "project-name"
)
authors = actual_metadata.findall(
"codemeta:author/codemeta:name", namespaces=NAMESPACES
)
assert len(authors) == 2
assert authors[0].text == "some"
assert authors[1].text == "authors"
assert actual_metadata.find("codemeta:identifier", namespaces=NAMESPACES) is None
assert actual_metadata.find("swh:deposit", namespaces=NAMESPACES) is None
checks_ok, detail = check_metadata(ElementTree.fromstring(actual_metadata_xml))
assert checks_ok is True
assert detail == {
"metadata": [
{"summary": SUGGESTED_FIELDS_MISSING, "fields": [METADATA_PROVENANCE_KEY]}
]
}
def test_cli_single_minimal_deposit_with_slug(
sample_archive, slug, patched_tmp_path, requests_mock_datadir, cli_runner, caplog,
):
""" This ensure a single deposit upload through the cli is fine, cf.
https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html#single-deposit
""" # noqa
metadata_path = os.path.join(patched_tmp_path, "metadata.xml")
# fmt: off
result = cli_runner.invoke(
cli,
[
"upload",
"--url", "https://deposit.swh.test/1",
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--name", "test-project",
"--archive", sample_archive["path"],
"--metadata-provenance-url", "meta-prov-url",
"--author", "Jane Doe",
"--slug", slug,
"--format", "json",
],
)
# fmt: on
assert result.exit_code == 0, result.output
assert json.loads(result.output) == {
"deposit_id": "615",
"deposit_status": "partial",
"deposit_status_detail": None,
- "deposit_date": "Oct. 8, 2020, 4:57 p.m.",
+ "deposit_date": "2020-10-08T13:52:34.509655Z",
}
with open(metadata_path) as fd:
actual_metadata = parse_xml(fd.read())
assert (
actual_metadata.findtext("atom:author", namespaces=NAMESPACES)
== TEST_USER["username"]
)
assert (
actual_metadata.findtext("codemeta:name", namespaces=NAMESPACES)
== "test-project"
)
assert (
actual_metadata.findtext("atom:title", namespaces=NAMESPACES)
== "test-project"
)
assert (
actual_metadata.findtext("atom:updated", namespaces=NAMESPACES) is not None
)
assert (
actual_metadata.findtext("codemeta:identifier", namespaces=NAMESPACES)
== slug
)
authors = actual_metadata.findall(
"codemeta:author/codemeta:name", namespaces=NAMESPACES
)
assert len(authors) == 1
assert authors[0].text == "Jane Doe"
count_warnings = 0
for (_, log_level, _) in caplog.record_tuples:
count_warnings += 1 if log_level == logging.WARNING else 0
assert (
count_warnings == 1
), "We should have 1 warning as we are using slug instead of create_origin"
def test_cli_single_minimal_deposit_with_create_origin(
sample_archive, slug, patched_tmp_path, requests_mock_datadir, cli_runner, caplog,
):
""" This ensure a single deposit upload through the cli is fine, cf.
https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html#single-deposit
""" # noqa
metadata_path = os.path.join(patched_tmp_path, "metadata.xml")
origin = slug
# fmt: off
result = cli_runner.invoke(
cli,
[
"upload",
"--url", "https://deposit.swh.test/1",
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--name", "test-project",
"--archive", sample_archive["path"],
"--author", "Jane Doe",
"--create-origin", origin,
"--metadata-provenance-url", "meta-prov-url",
"--format", "json",
],
)
# fmt: on
assert result.exit_code == 0, result.output
assert json.loads(result.output) == {
"deposit_id": "615",
"deposit_status": "partial",
"deposit_status_detail": None,
- "deposit_date": "Oct. 8, 2020, 4:57 p.m.",
+ "deposit_date": "2020-10-08T13:52:34.509655Z",
}
with open(metadata_path) as fd:
actual_metadata = parse_xml(fd.read())
assert (
actual_metadata.findtext("atom:author", namespaces=NAMESPACES)
== TEST_USER["username"]
)
assert (
actual_metadata.findtext("codemeta:name", namespaces=NAMESPACES)
== "test-project"
)
assert (
actual_metadata.findtext("atom:title", namespaces=NAMESPACES)
== "test-project"
)
assert (
actual_metadata.findtext("atom:updated", namespaces=NAMESPACES) is not None
)
assert (
actual_metadata.find(
"swh:deposit/swh:create_origin/swh:origin", namespaces=NAMESPACES
).attrib["url"]
== origin
)
assert (
actual_metadata.findtext(
"swh:deposit/swh:metadata-provenance/schema:url", namespaces=NAMESPACES
)
== "meta-prov-url"
)
authors = actual_metadata.findall(
"codemeta:author/codemeta:name", namespaces=NAMESPACES
)
assert len(authors) == 1
assert authors[0].text == "Jane Doe"
count_warnings = 0
for (_, log_level, _) in caplog.record_tuples:
count_warnings += 1 if log_level == logging.WARNING else 0
assert (
count_warnings == 0
), "We should have no warning as we are using create_origin"
def test_cli_validation_metadata(
sample_archive, caplog, patched_tmp_path, cli_runner, slug
):
"""Multiple metadata flags scenario (missing, conflicts) properly fails the calls
"""
metadata_path = os.path.join(patched_tmp_path, "metadata.xml")
with open(metadata_path, "a"):
pass # creates the file
for flag_title_or_name, author_or_name in [
("--author", "no one"),
("--name", "test-project"),
]:
# Test missing author then missing name
# fmt: off
result = cli_runner.invoke(
cli,
[
"upload",
"--url", "https://deposit.swh.test/1",
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--archive", sample_archive["path"],
"--slug", slug,
flag_title_or_name,
author_or_name,
],
)
# fmt: on
assert result.exit_code == 1, f"unexpected result: {result.output}"
assert result.output == ""
expected_error_log_record = (
"swh.deposit.cli.client",
logging.ERROR,
(
"Problem during parsing options: "
"For metadata deposit request, either a metadata file with "
"--metadata or both --author and --name must be provided. "
),
)
assert expected_error_log_record in caplog.record_tuples
# Clear mocking state
caplog.clear()
# incompatible flags: Test both --metadata and --author, then --metadata and
# --name
# fmt: off
result = cli_runner.invoke(
cli,
[
"upload",
"--url", "https://deposit.swh.test/1",
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--name", "test-project",
"--deposit-id", 666,
"--archive", sample_archive["path"],
"--slug", slug,
],
)
# fmt: on
assert result.exit_code == 1, f"unexpected result: {result.output}"
assert result.output == ""
expected_error_log_record = (
"swh.deposit.cli.client",
logging.ERROR,
(
"Problem during parsing options: "
"For metadata deposit request, either a metadata file with "
"--metadata or both --author and --name must be provided."
),
)
assert expected_error_log_record in caplog.record_tuples
# Clear mocking state
caplog.clear()
# incompatible flags check (Test both --metadata and --author,
# then --metadata and --name)
# fmt: off
result = cli_runner.invoke(
cli,
[
"upload",
"--url", "https://deposit.swh.test/1",
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--archive", sample_archive["path"],
"--metadata", metadata_path,
"--author", "Jane Doe",
"--slug", slug,
],
)
# fmt: on
assert result.exit_code == 1, result.output
assert result.output == ""
expected_error_log_record = (
"swh.deposit.cli.client",
logging.ERROR,
(
"Problem during parsing options: "
"Using --metadata flag is incompatible with --author "
"and --name and --create-origin (those are used to generate "
"one metadata file)."
),
)
assert expected_error_log_record in caplog.record_tuples
caplog.clear()
def test_cli_validation_no_actionable_command(caplog, cli_runner):
"""Multiple metadata flags scenario (missing, conflicts) properly fails the calls
"""
# no actionable command
# fmt: off
result = cli_runner.invoke(
cli,
[
"upload",
"--url", "https://deposit.swh.test/1",
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--partial",
],
)
# fmt: on
assert result.exit_code == 1, result.output
assert result.output == ""
expected_error_log_record = (
"swh.deposit.cli.client",
logging.ERROR,
(
"Problem during parsing options: "
"Please provide an actionable command. See --help for more information"
),
)
assert expected_error_log_record in caplog.record_tuples
def test_cli_validation_replace_with_no_deposit_id_fails(
sample_archive, caplog, patched_tmp_path, requests_mock_datadir, datadir, cli_runner
):
"""--replace flags require --deposit-id otherwise fails
"""
metadata_path = os.path.join(datadir, "atom", "entry-data-deposit-binary.xml")
# fmt: off
result = cli_runner.invoke(
cli,
[
"upload",
"--url", "https://deposit.swh.test/1",
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--metadata", metadata_path,
"--archive", sample_archive["path"],
"--replace",
],
)
# fmt: on
assert result.exit_code == 1, result.output
assert result.output == ""
expected_error_log_record = (
"swh.deposit.cli.client",
logging.ERROR,
(
"Problem during parsing options: "
"To update an existing deposit, you must provide its id"
),
)
assert expected_error_log_record in caplog.record_tuples
def test_cli_single_deposit_slug_generation(
sample_archive, patched_tmp_path, requests_mock_datadir, cli_runner
):
"""Single deposit scenario without providing the slug, it should
not be generated.
"""
metadata_path = os.path.join(patched_tmp_path, "metadata.xml")
# fmt: off
result = cli_runner.invoke(
cli,
[
"upload",
"--url", "https://deposit.swh.test/1",
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--name", "test-project",
"--archive", sample_archive["path"],
"--author", "Jane Doe",
"--format", "json",
],
)
# fmt: on
assert result.exit_code == 0, result.output
assert json.loads(result.output) == {
"deposit_id": "615",
"deposit_status": "partial",
"deposit_status_detail": None,
- "deposit_date": "Oct. 8, 2020, 4:57 p.m.",
+ "deposit_date": "2020-10-08T13:52:34.509655Z",
}
with open(metadata_path) as fd:
metadata_xml = fd.read()
actual_metadata = parse_xml(metadata_xml)
assert "codemeta:identifier" not in actual_metadata
def test_cli_multisteps_deposit(
sample_archive, datadir, slug, requests_mock_datadir, cli_runner
):
""" First deposit a partial deposit (no metadata, only archive), then update the metadata part.
https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html#multisteps-deposit
""" # noqa
api_url = "https://deposit.test.metadata/1"
deposit_id = 666
# Create a partial deposit with only 1 archive
# fmt: off
result = cli_runner.invoke(
cli,
[
"upload",
"--url", api_url,
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--archive", sample_archive["path"],
"--slug", slug,
"--format", "json",
"--partial",
],
)
# fmt: on
assert result.exit_code == 0, f"unexpected output: {result.output}"
actual_deposit = json.loads(result.output)
assert actual_deposit == {
"deposit_id": str(deposit_id),
"deposit_status": "partial",
"deposit_status_detail": None,
- "deposit_date": "Oct. 8, 2020, 4:57 p.m.",
+ "deposit_date": "2020-10-08T13:52:34.509655Z",
}
# Update the partial deposit with only 1 archive
# fmt: off
result = cli_runner.invoke(
cli,
[
"upload",
"--url", api_url,
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--archive", sample_archive["path"],
"--deposit-id", deposit_id,
"--slug", slug,
"--format", "json",
"--partial", # in-progress: True, because remains the metadata to upload
],
)
# fmt: on
assert result.exit_code == 0, f"unexpected output: {result.output}"
assert result.output is not None
actual_deposit = json.loads(result.output)
# deposit update scenario actually returns a deposit status dict
assert actual_deposit["deposit_id"] == str(deposit_id)
assert actual_deposit["deposit_status"] == "partial"
# Update the partial deposit with only some metadata (and then finalize it)
# https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html#add-content-or-metadata-to-the-deposit
metadata_path = os.path.join(datadir, "atom", "entry-data-deposit-binary.xml")
# Update deposit with metadata
# fmt: off
result = cli_runner.invoke(
cli,
[
"upload",
"--url", api_url,
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--metadata", metadata_path,
"--deposit-id", deposit_id,
"--slug", slug,
"--format", "json",
],
# this time, ^ we no longer flag it to partial, so the status changes to
# in-progress false
)
# fmt: on
assert result.exit_code == 0, f"unexpected output: {result.output}"
assert result.output is not None
actual_deposit = json.loads(result.output)
# deposit update scenario actually returns a deposit status dict
assert actual_deposit["deposit_id"] == str(deposit_id)
# FIXME: should be "deposited" but current limitation in the
# requests_mock_datadir_visits use, cannot find a way to make it work right now
assert actual_deposit["deposit_status"] == "partial"
@pytest.mark.parametrize(
"output_format,parser_fn",
[
("json", json.loads),
("yaml", yaml.safe_load),
(
"logging",
ast.literal_eval,
), # not enough though, the caplog fixture is needed
],
)
def test_cli_deposit_status_with_output_format(
output_format, parser_fn, datadir, slug, requests_mock_datadir, caplog, cli_runner
):
"""Check deposit status cli with all possible output formats (json, yaml, logging).
"""
api_url_basename = "deposit.test.status"
deposit_id = 1033
expected_deposit_status = {
"deposit_id": str(deposit_id),
"deposit_status": "done",
"deposit_status_detail": (
"The deposit has been successfully loaded into the "
"Software Heritage archive"
),
"deposit_swh_id": "swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea",
"deposit_swh_id_context": "swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea;origin=https://www.softwareheritage.org/check-deposit-2020-10-08T13:52:34.509655;visit=swh:1:snp:c477c6ef51833127b13a86ece7d75e5b3cc4e93d;anchor=swh:1:rev:f26f3960c175f15f6e24200171d446b86f6f7230;path=/", # noqa
"deposit_external_id": "check-deposit-2020-10-08T13:52:34.509655",
}
# fmt: off
result = cli_runner.invoke(
cli,
[
"status",
"--url", f"https://{api_url_basename}/1",
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--deposit-id", deposit_id,
"--format", output_format,
],
)
# fmt: on
assert result.exit_code == 0, f"unexpected output: {result.output}"
if output_format == "logging":
assert len(caplog.record_tuples) == 1
# format: (, , )
_, _, result_output = caplog.record_tuples[0]
else:
result_output = result.output
actual_deposit = parser_fn(result_output)
assert actual_deposit == expected_deposit_status
def test_cli_update_metadata_with_swhid_on_completed_deposit(
datadir, requests_mock_datadir, cli_runner
):
"""Update new metadata on a completed deposit (status done) is ok
"""
api_url_basename = "deposit.test.updateswhid"
deposit_id = 123
expected_deposit_status = {
"deposit_external_id": "check-deposit-2020-10-08T13:52:34.509655",
"deposit_id": str(deposit_id),
"deposit_status": "done",
"deposit_status_detail": (
"The deposit has been successfully loaded into the "
"Software Heritage archive"
),
"deposit_swh_id": "swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea",
"deposit_swh_id_context": "swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea;origin=https://www.softwareheritage.org/check-deposit-2020-10-08T13:52:34.509655;visit=swh:1:snp:c477c6ef51833127b13a86ece7d75e5b3cc4e93d;anchor=swh:1:rev:f26f3960c175f15f6e24200171d446b86f6f7230;path=/", # noqa
}
assert expected_deposit_status["deposit_status"] == "done"
assert expected_deposit_status["deposit_swh_id"] is not None
# fmt: off
result = cli_runner.invoke(
cli,
[
"upload",
"--url", f"https://{api_url_basename}/1",
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--name", "test-project",
"--author", "John Doe",
"--deposit-id", deposit_id,
"--swhid", expected_deposit_status["deposit_swh_id"],
"--format", "json",
],
)
# fmt: on
assert result.exit_code == 0, result.output
actual_deposit_status = json.loads(result.output)
assert "error" not in actual_deposit_status
assert actual_deposit_status == expected_deposit_status
def test_cli_update_metadata_with_swhid_on_other_status_deposit(
datadir, requests_mock_datadir, cli_runner
):
"""Update new metadata with swhid on other deposit status is not possible
"""
api_url_basename = "deposit.test.updateswhid"
deposit_id = "321"
# fmt: off
result = cli_runner.invoke(
cli,
[
"upload",
"--url", f"https://{api_url_basename}/1",
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--name", "test-project",
"--author", "John Doe",
"--deposit-id", deposit_id,
"--swhid", "swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea",
"--format", "json",
],
)
# fmt: on
assert result.exit_code == 0, result.output
actual_result = json.loads(result.output)
assert "error" in actual_result
assert actual_result == {
"error": "You can only update metadata on deposit with status 'done'",
"detail": f"The deposit {deposit_id} has status 'partial'",
"deposit_status": "partial",
"deposit_id": deposit_id,
}
@pytest.mark.parametrize(
"metadata_entry_key", ["entry-data-with-swhid", "entry-data-with-swhid-no-prov"]
)
def test_cli_metadata_only_deposit_full_metadata_file(
datadir,
requests_mock_datadir,
cli_runner,
atom_dataset,
tmp_path,
metadata_entry_key,
caplog,
):
"""Post metadata-only deposit through cli
The metadata file posted by the client already contains the swhid
"""
api_url_basename = "deposit.test.metadataonly"
swhid = "swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea"
atom_data = atom_dataset[metadata_entry_key]
if metadata_entry_key == "entry-data-with-swhid":
metadata = atom_data.format(
swhid=swhid,
metadata_provenance_url=(
"https://inria.halpreprod.archives-ouvertes.fr/hal-abcdefgh"
),
)
else:
metadata = atom_data.format(swhid=swhid)
metadata_path = os.path.join(tmp_path, "entry-data-with-swhid.xml")
with open(metadata_path, "w") as m:
m.write(metadata)
expected_deposit_status = {
"deposit_id": "100",
"deposit_status": "done",
- "deposit_date": "2020-10-08T13:52:34.509655",
+ "deposit_date": "2020-10-08T13:52:34.509655Z",
}
assert expected_deposit_status["deposit_status"] == "done"
# fmt: off
result = cli_runner.invoke(
cli,
[
"metadata-only",
"--url", f"https://{api_url_basename}/1",
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--metadata", metadata_path,
"--format", "json",
],
)
# fmt: on
assert result.exit_code == 0, result.output
actual_deposit_status = json.loads(result.output)
assert "error" not in actual_deposit_status
assert actual_deposit_status == expected_deposit_status
count_warnings = 0
warning_record: Optional[str] = None
for (_, log_level, msg) in caplog.record_tuples:
if log_level == logging.WARNING:
count_warnings += 1
warning_record = msg
if "no-prov" in metadata_entry_key:
assert count_warnings == 1
assert "metadata-provenance>' should be provided" in warning_record
else:
assert count_warnings == 0
def test_cli_metadata_only_deposit_invalid_swhid(
datadir, requests_mock_datadir, cli_runner, atom_dataset, tmp_path,
):
"""Post metadata-only deposit through cli with invalid swhid raises
"""
api_url_basename = "deposit.test.metadataonly"
invalid_swhid = "ssh:2:sth:xxx"
metadata = atom_dataset["entry-data-with-swhid-no-prov"].format(swhid=invalid_swhid)
metadata_path = os.path.join(tmp_path, "entry-data-with-swhid.xml")
with open(metadata_path, "w") as f:
f.write(metadata)
with pytest.raises(ValidationError, match="Invalid"):
# fmt: off
cli_runner.invoke(
cli,
[
"metadata-only",
"--url", f"https://{api_url_basename}/1",
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--metadata", metadata_path,
"--format", "json",
],
catch_exceptions=False,
)
# fmt: on
def test_cli_metadata_only_deposit_no_swhid(
datadir, requests_mock_datadir, cli_runner, atom_dataset, tmp_path,
):
"""Post metadata-only deposit through cli with invalid swhid raises
"""
api_url_basename = "deposit.test.metadataonly"
metadata = atom_dataset["entry-data-minimal"]
metadata_path = os.path.join(tmp_path, "entry-data-minimal.xml")
with open(metadata_path, "w") as f:
f.write(metadata)
with pytest.raises(InputError, match="SWHID must be provided"):
# fmt: off
cli_runner.invoke(
cli,
[
"metadata-only",
"--url", f"https://{api_url_basename}/1",
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--metadata", metadata_path,
"--format", "json",
],
catch_exceptions=False,
)
# fmt: on
@pytest.mark.parametrize(
"metadata_entry_key", ["entry-data-with-add-to-origin", "entry-only-create-origin"]
)
def test_cli_deposit_warning_missing_origin(
metadata_entry_key,
tmp_path,
atom_dataset,
caplog,
cli_runner,
requests_mock_datadir,
):
"""Deposit cli should warn when provided metadata xml is missing 'origins' tags
"""
# For the next deposit, no warning should be logged as either or
# are provided, and is always
# provided.
metadata_raw = atom_dataset[metadata_entry_key] % "some-url"
metadata_path = os.path.join(tmp_path, "metadata-with-origin-tag-to-deposit.xml")
with open(metadata_path, "w") as f:
f.write(metadata_raw)
# fmt: off
cli_runner.invoke(
cli,
[
"upload",
"--url", "https://deposit.swh.test/1",
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--metadata", metadata_path,
],
)
# fmt: on
for (_, log_level, _) in caplog.record_tuples:
# all messages are info or below messages so everything is fine
assert log_level < logging.WARNING
def test_cli_deposit_warning_missing_provenance_url(
tmp_path, atom_dataset, caplog, cli_runner, requests_mock_datadir,
):
"""Deposit cli should warn when no metadata provenance is provided
"""
atom_template = atom_dataset["entry-data-with-add-to-origin-no-prov"]
metadata_raw = atom_template % "some-url"
metadata_path = os.path.join(tmp_path, "metadata-with-missing-prov-url.xml")
with open(metadata_path, "w") as f:
f.write(metadata_raw)
# fmt: off
cli_runner.invoke(
cli,
[
"upload",
"--url", "https://deposit.swh.test/1",
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--metadata", metadata_path,
],
)
# fmt: on
count_warnings = sum(
1 for (_, log_level, _) in caplog.record_tuples if log_level == logging.WARNING
)
assert count_warnings == 1
def test_cli_failure_should_be_parseable(atom_dataset, mocker):
summary = "Cannot load metadata"
verbose_description = (
"Cannot load metadata on swh:1:dir:0eda267e7d3c2e37b3f6a78e542b16190ac4574e, "
"this directory object does not exist in the archive (yet?)."
)
error_xml = atom_dataset["error-cli"].format(
summary=summary, verboseDescription=verbose_description
)
api_call = BaseDepositClient(url="https://somewhere.org/")
actual_error = api_call.parse_result_error(error_xml)
assert actual_error == {
"summary": summary,
"detail": "",
"sword:verboseDescription": verbose_description,
}
def test_cli_service_document_failure(atom_dataset, mocker):
"""Ensure service document failures are properly served
"""
summary = "Invalid user credentials"
error_xml = atom_dataset["error-cli"].format(summary=summary, verboseDescription="")
api_call = ServiceDocumentDepositClient(url="https://somewhere.org/")
actual_error = api_call.parse_result_error(error_xml)
assert actual_error == {"error": summary}
@pytest.mark.parametrize(
"output_format,parser_fn",
[
("json", json.loads),
("yaml", yaml.safe_load),
(
"logging",
ast.literal_eval,
), # not enough though, the caplog fixture is needed
],
)
def test_cli_deposit_collection_list(
output_format, parser_fn, datadir, slug, requests_mock_datadir, caplog, cli_runner
):
"""Check deposit status cli with all possible output formats (json, yaml, logging).
"""
api_url_basename = "deposit.test.list"
expected_deposits = {
"count": "3",
"deposits": [
{
"external_id": "check-deposit-2020-10-09T13:10:00.000000",
"id": "1031",
"status": "rejected",
"status_detail": "Deposit without archive",
},
{
"external_id": "check-deposit-2020-10-10T13:20:00.000000",
"id": "1032",
"status": "rejected",
"status_detail": "Deposit without archive",
},
{
"complete_date": "2020-10-08T13:52:34.509655",
"external_id": "check-deposit-2020-10-08T13:52:34.509655",
"id": "1033",
"reception_date": "2020-10-08T13:50:30",
"status": "done",
"status_detail": "The deposit has been successfully loaded into "
"the Software Heritage archive",
"swhid": "swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea",
"swhid_context": "swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea;origin=https://www.softwareheritage.org/check-deposit-2020-10-08T13:52:34.509655;visit=swh:1:snp:c477c6ef51833127b13a86ece7d75e5b3cc4e93d;anchor=swh:1:rev:f26f3960c175f15f6e24200171d446b86f6f7230;path=/", # noqa
},
],
}
# fmt: off
result = cli_runner.invoke(
cli,
[
"list",
"--url", f"https://{api_url_basename}/1",
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--page", 1,
"--page-size", 10,
"--format", output_format,
],
)
# fmt: on
assert result.exit_code == 0, f"unexpected output: {result.output}"
if output_format == "logging":
assert len(caplog.record_tuples) == 1
# format: (, , )
_, _, result_output = caplog.record_tuples[0]
else:
result_output = result.output
actual_deposit = parser_fn(result_output)
assert actual_deposit == expected_deposits
diff --git a/swh/deposit/tests/data/https_deposit.swh.test/1_test b/swh/deposit/tests/data/https_deposit.swh.test/1_test
index 94dd08c3..ef8b597f 100644
--- a/swh/deposit/tests/data/https_deposit.swh.test/1_test
+++ b/swh/deposit/tests/data/https_deposit.swh.test/1_test
@@ -1,27 +1,27 @@
615
- Oct. 8, 2020, 4:57 p.m.
+ 2020-10-08T13:52:34.509655Z
None
partial
615
- Oct. 8, 2020, 4:57 p.m.
+ 2020-10-08T13:52:34.509655Z
None
partial
http://purl.org/net/sword/package/SimpleZip
diff --git a/swh/deposit/tests/data/https_deposit.test.metadata/1_test b/swh/deposit/tests/data/https_deposit.test.metadata/1_test
index 042ab318..55e716b7 100644
--- a/swh/deposit/tests/data/https_deposit.test.metadata/1_test
+++ b/swh/deposit/tests/data/https_deposit.test.metadata/1_test
@@ -1,27 +1,27 @@
666
- Oct. 8, 2020, 4:57 p.m.
+ 2020-10-08T13:52:34.509655Z
hardcoded_sample_archive_path
partial
666
- Oct. 8, 2020, 4:57 p.m.
+ 2020-10-08T13:52:34.509655Z
hardcoded_sample_archive_path
partial
http://purl.org/net/sword/package/SimpleZip
diff --git a/swh/deposit/tests/data/https_deposit.test.metadata/1_test_666_metadata b/swh/deposit/tests/data/https_deposit.test.metadata/1_test_666_metadata
index e15d53c9..7c2e79f3 100644
--- a/swh/deposit/tests/data/https_deposit.test.metadata/1_test_666_metadata
+++ b/swh/deposit/tests/data/https_deposit.test.metadata/1_test_666_metadata
@@ -1,26 +1,26 @@
666
- Oct. 9, 2020, 8:44 p.m.
+ 2020-10-08T13:52:34.509655Z
something
deposited
666
- Oct. 9, 2020, 8:44 p.m.
+ 2020-10-08T13:52:34.509655Z
something
deposited
http://purl.org/net/sword/package/SimpleZip
diff --git a/swh/deposit/tests/data/https_deposit.test.metadataonly/1_test b/swh/deposit/tests/data/https_deposit.test.metadataonly/1_test
index 2ef2a26d..bc584a46 100644
--- a/swh/deposit/tests/data/https_deposit.test.metadataonly/1_test
+++ b/swh/deposit/tests/data/https_deposit.test.metadataonly/1_test
@@ -1,12 +1,12 @@
100
- 2020-10-08T13:52:34.509655
+ 2020-10-08T13:52:34.509655Z
done
swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea
swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea;origin=https://www.softwareheritage.org/check-deposit-2020-10-08T13:52:34.509655;visit=swh:1:snp:c477c6ef51833127b13a86ece7d75e5b3cc4e93d;anchor=swh:1:rev:f26f3960c175f15f6e24200171d446b86f6f7230;path=/