Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9124598
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
53 KB
Subscribers
None
View Options
diff --git a/swh/deposit/api/checks.py b/swh/deposit/api/checks.py
index ad7ba48d..bf310e35 100644
--- a/swh/deposit/api/checks.py
+++ b/swh/deposit/api/checks.py
@@ -1,74 +1,92 @@
-# Copyright (C) 2017-2020 The Software Heritage developers
+# Copyright (C) 2017-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
"""Functional Metadata checks:
Mandatory fields:
- 'author'
- 'name' or 'title'
+Suggested fields:
+- metadata-provenance
+
"""
from typing import Dict, Optional, Tuple
import iso8601
-from swh.deposit.utils import normalize_date
+from swh.deposit.utils import normalize_date, parse_swh_metadata_provenance
MANDATORY_FIELDS_MISSING = "Mandatory fields are missing"
INVALID_DATE_FORMAT = "Invalid date format"
+SUGGESTED_FIELDS_MISSING = "Suggested fields are missing"
+METADATA_PROVENANCE_KEY = "swh:metadata-provenance"
+
def check_metadata(metadata: Dict) -> Tuple[bool, Optional[Dict]]:
"""Check metadata for mandatory field presence and date format.
Args:
metadata: Metadata dictionary to check
Returns:
- tuple (status, error_detail): True, None if metadata are
- ok (False, <detailed-error>) otherwise.
+ tuple (status, error_detail):
+ - (True, None) if metadata are ok and suggested fields are also present
+ - (True, <detailed-error>) if metadata are ok but some suggestions are missing
+ - (False, <detailed-error>) otherwise.
"""
+ suggested_fields = []
# at least one value per couple below is mandatory
alternate_fields = {
("atom:name", "atom:title", "codemeta:name"): False,
("atom:author", "codemeta:author"): False,
}
for field, value in metadata.items():
for possible_names in alternate_fields:
if field in possible_names:
alternate_fields[possible_names] = True
continue
mandatory_result = [" or ".join(k) for k, v in alternate_fields.items() if not v]
+ # provenance metadata is optional
+ provenance_meta = parse_swh_metadata_provenance(metadata)
+ if provenance_meta is None:
+ suggested_fields = [
+ {"summary": SUGGESTED_FIELDS_MISSING, "fields": [METADATA_PROVENANCE_KEY]}
+ ]
+
if mandatory_result:
detail = [{"summary": MANDATORY_FIELDS_MISSING, "fields": mandatory_result}]
- return False, {"metadata": detail}
+ return False, {"metadata": detail + suggested_fields}
fields = []
commit_date = metadata.get("codemeta:datePublished")
author_date = metadata.get("codemeta:dateCreated")
if commit_date:
try:
normalize_date(commit_date)
except iso8601.iso8601.ParseError:
fields.append("codemeta:datePublished")
if author_date:
try:
normalize_date(author_date)
except iso8601.iso8601.ParseError:
fields.append("codemeta:dateCreated")
if fields:
detail = [{"summary": INVALID_DATE_FORMAT, "fields": fields}]
- return False, {"metadata": detail}
+ return False, {"metadata": detail + suggested_fields}
+ if suggested_fields: # it's fine but warn about missing suggested fields
+ return True, {"metadata": suggested_fields}
return True, None
diff --git a/swh/deposit/tests/api/test_checks.py b/swh/deposit/tests/api/test_checks.py
index 226e83ee..f770cf07 100644
--- a/swh/deposit/tests/api/test_checks.py
+++ b/swh/deposit/tests/api/test_checks.py
@@ -1,125 +1,189 @@
-# Copyright (C) 2017-2020 The Software Heritage developers
+# Copyright (C) 2017-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+from typing import Any, Dict
+
import pytest
-from swh.deposit.api.checks import check_metadata
+from swh.deposit.api.checks import (
+ METADATA_PROVENANCE_KEY,
+ SUGGESTED_FIELDS_MISSING,
+ check_metadata,
+)
+
+METADATA_PROVENANCE_DICT: Dict[str, Any] = {
+ "swh:deposit": {
+ METADATA_PROVENANCE_KEY: {"schema:url": "some-metadata-provenance-url"}
+ }
+}
@pytest.mark.parametrize(
"metadata_ok",
[
{
"atom:url": "something",
"atom:external_identifier": "something-else",
"atom:name": "foo",
"atom:author": "someone",
+ **METADATA_PROVENANCE_DICT,
},
{
"atom:url": "some url",
"atom:external_identifier": "some id",
"atom:title": "bar",
"atom:author": "no one",
+ **METADATA_PROVENANCE_DICT,
+ },
+ {
+ "atom:url": "some url",
+ "codemeta:name": "bar",
+ "codemeta:author": "no one",
+ **METADATA_PROVENANCE_DICT,
},
- {"atom:url": "some url", "codemeta:name": "bar", "codemeta:author": "no one",},
{
"atom:url": "some url",
"atom:external_identifier": "some id",
"atom:title": "bar",
"atom:author": "no one",
"codemeta:datePublished": "2020-12-21",
"codemeta:dateCreated": "2020-12-21",
},
],
)
def test_api_checks_check_metadata_ok(metadata_ok, swh_checks_deposit):
actual_check, detail = check_metadata(metadata_ok)
- assert actual_check is True, detail
- assert detail is None
+ assert actual_check is True, f"Unexpected result: {detail}"
+ if "swh:deposit" in metadata_ok:
+ # no missing suggested field
+ assert detail is None
+ else:
+ # missing suggested field
+ assert detail == {
+ "metadata": [
+ {
+ "fields": [METADATA_PROVENANCE_KEY],
+ "summary": SUGGESTED_FIELDS_MISSING,
+ }
+ ]
+ }
@pytest.mark.parametrize(
"metadata_ko,expected_summary",
[
(
{
"atom:url": "something",
"atom:external_identifier": "something-else",
"atom:author": "someone",
+ **METADATA_PROVENANCE_DICT,
},
{
"summary": "Mandatory fields are missing",
"fields": ["atom:name or atom:title or codemeta:name"],
},
),
(
{
"atom:url": "something",
"atom:external_identifier": "something-else",
"atom:title": "foobar",
+ **METADATA_PROVENANCE_DICT,
},
{
"summary": "Mandatory fields are missing",
"fields": ["atom:author or codemeta:author"],
},
),
(
{
"atom:url": "something",
"atom:external_identifier": "something-else",
"codemeta:title": "bar",
"atom:author": "someone",
+ **METADATA_PROVENANCE_DICT,
},
{
"summary": "Mandatory fields are missing",
"fields": ["atom:name or atom:title or codemeta:name"],
},
),
(
{
"atom:url": "something",
"atom:external_identifier": "something-else",
"atom:title": "foobar",
"author": "foo",
+ **METADATA_PROVENANCE_DICT,
},
{
"summary": "Mandatory fields are missing",
"fields": ["atom:author or codemeta:author"],
},
),
(
{
"atom:url": "something",
"atom:external_identifier": "something-else",
"atom:title": "foobar",
"atom:authorblahblah": "foo",
+ **METADATA_PROVENANCE_DICT,
},
{
"summary": "Mandatory fields are missing",
"fields": ["atom:author or codemeta:author"],
},
),
+ (
+ {
+ "atom:url": "something",
+ "atom:external_identifier": "something-else",
+ "atom:author": "someone",
+ **METADATA_PROVENANCE_DICT,
+ },
+ {
+ "summary": "Mandatory fields are missing",
+ "fields": ["atom:name or atom:title or codemeta:name"],
+ },
+ ),
+ ],
+)
+def test_api_checks_check_metadata_ko(
+ metadata_ko, expected_summary, swh_checks_deposit
+):
+ actual_check, error_detail = check_metadata(metadata_ko)
+ assert actual_check is False
+ assert error_detail == {"metadata": [expected_summary]}
+
+
+@pytest.mark.parametrize(
+ "metadata_ko,expected_invalid_summary",
+ [
(
{
"atom:url": "some url",
"atom:external_identifier": "some id",
"atom:title": "bar",
"atom:author": "no one",
"codemeta:datePublished": "2020-aa-21",
"codemeta:dateCreated": "2020-12-bb",
},
{
"summary": "Invalid date format",
"fields": ["codemeta:datePublished", "codemeta:dateCreated"],
},
),
],
)
-def test_api_checks_check_metadata_ko(
- metadata_ko, expected_summary, swh_checks_deposit
+def test_api_checks_check_metadata_fields_ko_and_missing_suggested_fields(
+ metadata_ko, expected_invalid_summary, swh_checks_deposit
):
actual_check, error_detail = check_metadata(metadata_ko)
assert actual_check is False
- assert error_detail == {"metadata": [expected_summary]}
+ assert error_detail == {
+ "metadata": [expected_invalid_summary]
+ + [{"fields": [METADATA_PROVENANCE_KEY], "summary": SUGGESTED_FIELDS_MISSING,}]
+ }
diff --git a/swh/deposit/tests/api/test_deposit_private_check.py b/swh/deposit/tests/api/test_deposit_private_check.py
index 5fe2d3b5..d4fd5bd7 100644
--- a/swh/deposit/tests/api/test_deposit_private_check.py
+++ b/swh/deposit/tests/api/test_deposit_private_check.py
@@ -1,211 +1,218 @@
-# Copyright (C) 2017-2021 The Software Heritage developers
+# Copyright (C) 2017-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from django.urls import reverse_lazy as reverse
import pytest
from rest_framework import status
-from swh.deposit.api.checks import MANDATORY_FIELDS_MISSING
+from swh.deposit.api.checks import (
+ MANDATORY_FIELDS_MISSING,
+ METADATA_PROVENANCE_KEY,
+ SUGGESTED_FIELDS_MISSING,
+)
from swh.deposit.api.private.deposit_check import (
MANDATORY_ARCHIVE_INVALID,
MANDATORY_ARCHIVE_MISSING,
MANDATORY_ARCHIVE_UNSUPPORTED,
)
from swh.deposit.config import (
COL_IRI,
DEPOSIT_STATUS_DEPOSITED,
DEPOSIT_STATUS_REJECTED,
DEPOSIT_STATUS_VERIFIED,
PRIVATE_CHECK_DEPOSIT,
)
from swh.deposit.models import Deposit
from swh.deposit.parsers import parse_xml
from swh.deposit.tests.common import (
create_arborescence_archive,
create_archive_with_archive,
)
PRIVATE_CHECK_DEPOSIT_NC = PRIVATE_CHECK_DEPOSIT + "-nc"
def private_check_url_endpoints(collection, deposit):
"""There are 2 endpoints to check (one with collection, one without)"""
return [
reverse(PRIVATE_CHECK_DEPOSIT, args=[collection.name, deposit.id]),
reverse(PRIVATE_CHECK_DEPOSIT_NC, args=[deposit.id]),
]
@pytest.mark.parametrize("extension", ["zip", "tar", "tar.gz", "tar.bz2", "tar.xz"])
def test_deposit_ok(
authenticated_client, deposit_collection, ready_deposit_ok, extension
):
"""Proper deposit should succeed the checks (-> status ready)
"""
deposit = ready_deposit_ok
for url in private_check_url_endpoints(deposit_collection, deposit):
response = authenticated_client.get(url)
assert response.status_code == status.HTTP_200_OK
data = response.json()
assert data["status"] == DEPOSIT_STATUS_VERIFIED
deposit = Deposit.objects.get(pk=deposit.id)
assert deposit.status == DEPOSIT_STATUS_VERIFIED
deposit.status = DEPOSIT_STATUS_DEPOSITED
deposit.save()
@pytest.mark.parametrize("extension", ["zip", "tar", "tar.gz", "tar.bz2", "tar.xz"])
def test_deposit_invalid_tarball(
tmp_path, authenticated_client, deposit_collection, extension
):
"""Deposit with tarball (of 1 tarball) should fail the checks: rejected
"""
deposit = create_deposit_archive_with_archive(
tmp_path, extension, authenticated_client, deposit_collection.name
)
for url in private_check_url_endpoints(deposit_collection, deposit):
response = authenticated_client.get(url)
assert response.status_code == status.HTTP_200_OK
data = response.json()
assert data["status"] == DEPOSIT_STATUS_REJECTED
details = data["details"]
# archive checks failure
assert len(details["archive"]) == 1
assert details["archive"][0]["summary"] == MANDATORY_ARCHIVE_INVALID
deposit = Deposit.objects.get(pk=deposit.id)
assert deposit.status == DEPOSIT_STATUS_REJECTED
def test_deposit_ko_missing_tarball(
authenticated_client, deposit_collection, ready_deposit_only_metadata
):
"""Deposit without archive should fail the checks: rejected
"""
deposit = ready_deposit_only_metadata
assert deposit.status == DEPOSIT_STATUS_DEPOSITED
for url in private_check_url_endpoints(deposit_collection, deposit):
response = authenticated_client.get(url)
assert response.status_code == status.HTTP_200_OK
data = response.json()
assert data["status"] == DEPOSIT_STATUS_REJECTED
details = data["details"]
# archive checks failure
assert len(details["archive"]) == 1
assert details["archive"][0]["summary"] == MANDATORY_ARCHIVE_MISSING
deposit = Deposit.objects.get(pk=deposit.id)
assert deposit.status == DEPOSIT_STATUS_REJECTED
deposit.status = DEPOSIT_STATUS_DEPOSITED
deposit.save()
def test_deposit_ko_unsupported_tarball(
tmp_path, authenticated_client, deposit_collection, ready_deposit_invalid_archive
):
"""Deposit with an unsupported tarball should fail the checks: rejected
"""
deposit = ready_deposit_invalid_archive
assert DEPOSIT_STATUS_DEPOSITED == deposit.status
for url in private_check_url_endpoints(deposit_collection, deposit):
response = authenticated_client.get(url)
assert response.status_code == status.HTTP_200_OK
data = response.json()
assert data["status"] == DEPOSIT_STATUS_REJECTED
details = data["details"]
# archive checks failure
assert len(details["archive"]) == 1
assert details["archive"][0]["summary"] == MANDATORY_ARCHIVE_UNSUPPORTED
# metadata check failure
- assert len(details["metadata"]) == 1
+ assert len(details["metadata"]) == 2
mandatory = details["metadata"][0]
assert mandatory["summary"] == MANDATORY_FIELDS_MISSING
assert set(mandatory["fields"]) == set(
[
"atom:author or codemeta:author",
"atom:name or atom:title or codemeta:name",
]
)
+ suggested = details["metadata"][1]
+ assert suggested["summary"] == SUGGESTED_FIELDS_MISSING
+ assert set(suggested["fields"]) == set([METADATA_PROVENANCE_KEY])
deposit = Deposit.objects.get(pk=deposit.id)
assert deposit.status == DEPOSIT_STATUS_REJECTED
deposit.status = DEPOSIT_STATUS_DEPOSITED
deposit.save()
def test_check_deposit_metadata_ok(
authenticated_client, deposit_collection, ready_deposit_ok
):
"""Proper deposit should succeed the checks (-> status ready)
with all **MUST** metadata
using the codemeta metadata test set
"""
deposit = ready_deposit_ok
assert deposit.status == DEPOSIT_STATUS_DEPOSITED
for url in private_check_url_endpoints(deposit_collection, deposit):
response = authenticated_client.get(url)
assert response.status_code == status.HTTP_200_OK
data = response.json()
assert data["status"] == DEPOSIT_STATUS_VERIFIED
deposit = Deposit.objects.get(pk=deposit.id)
assert deposit.status == DEPOSIT_STATUS_VERIFIED
deposit.status = DEPOSIT_STATUS_DEPOSITED
deposit.save()
def create_deposit_archive_with_archive(
root_path, archive_extension, client, collection_name
):
# we create the holding archive to a given extension
archive = create_arborescence_archive(
root_path,
"archive1",
"file1",
b"some content in file",
extension=archive_extension,
)
# now we create an archive holding the first created archive
invalid_archive = create_archive_with_archive(root_path, "invalid.tgz", archive)
# we deposit it
response = client.post(
reverse(COL_IRI, args=[collection_name]),
content_type="application/x-tar",
data=invalid_archive["data"],
CONTENT_LENGTH=invalid_archive["length"],
HTTP_MD5SUM=invalid_archive["md5sum"],
HTTP_SLUG="external-id",
HTTP_IN_PROGRESS=False,
HTTP_CONTENT_DISPOSITION="attachment; filename=%s" % (invalid_archive["name"],),
)
# then
assert response.status_code == status.HTTP_201_CREATED
response_content = parse_xml(response.content)
deposit_status = response_content["swh:deposit_status"]
assert deposit_status == DEPOSIT_STATUS_DEPOSITED
deposit_id = int(response_content["swh:deposit_id"])
deposit = Deposit.objects.get(pk=deposit_id)
assert DEPOSIT_STATUS_DEPOSITED == deposit.status
return deposit
diff --git a/swh/deposit/tests/cli/test_client.py b/swh/deposit/tests/cli/test_client.py
index 8c8ec3bd..f9dfa6d5 100644
--- a/swh/deposit/tests/cli/test_client.py
+++ b/swh/deposit/tests/cli/test_client.py
@@ -1,1033 +1,1047 @@
-# Copyright (C) 2020-2021 The Software Heritage developers
+# Copyright (C) 2020-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import ast
from collections import OrderedDict
import contextlib
import json
import logging
import os
from unittest.mock import MagicMock
import pytest
import yaml
-from swh.deposit.api.checks import check_metadata
+from swh.deposit.api.checks import (
+ METADATA_PROVENANCE_KEY,
+ SUGGESTED_FIELDS_MISSING,
+ check_metadata,
+)
from swh.deposit.cli import deposit as cli
from swh.deposit.cli.client import InputError, _collection, _url, generate_metadata
from swh.deposit.client import (
BaseDepositClient,
MaintenanceError,
PublicApiDepositClient,
ServiceDocumentDepositClient,
)
from swh.deposit.parsers import parse_xml
from swh.model.exceptions import ValidationError
from ..conftest import TEST_USER
def generate_slug() -> str:
"""Generate a slug (sample purposes).
"""
import uuid
return str(uuid.uuid4())
@pytest.fixture
def datadir(request):
"""Override default datadir to target main test datadir"""
return os.path.join(os.path.dirname(str(request.fspath)), "../data")
@pytest.fixture
def slug():
return generate_slug()
@pytest.fixture
def patched_tmp_path(tmp_path, mocker):
mocker.patch(
"tempfile.TemporaryDirectory",
return_value=contextlib.nullcontext(str(tmp_path)),
)
return tmp_path
@pytest.fixture
def client_mock_api_down(mocker, slug):
"""A mock client whose connection with api fails due to maintenance issue
"""
mock_client = MagicMock()
mocker.patch("swh.deposit.client.PublicApiDepositClient", return_value=mock_client)
mock_client.service_document.side_effect = MaintenanceError(
"Database backend maintenance: Temporarily unavailable, try again later."
)
return mock_client
def test_cli_url():
assert _url("http://deposit") == "http://deposit/1"
assert _url("https://other/1") == "https://other/1"
def test_cli_collection_error():
mock_client = MagicMock()
mock_client.service_document.return_value = {"error": "something went wrong"}
with pytest.raises(InputError) as e:
_collection(mock_client)
assert "Service document retrieval: something went wrong" == str(e.value)
def test_cli_collection_ok(requests_mock_datadir):
client = PublicApiDepositClient(
url="https://deposit.swh.test/1", auth=("test", "test")
)
collection_name = _collection(client)
assert collection_name == "test"
def test_cli_collection_ko_because_downtime():
mock_client = MagicMock()
mock_client.service_document.side_effect = MaintenanceError("downtime")
with pytest.raises(MaintenanceError, match="downtime"):
_collection(mock_client)
def test_cli_upload_conflictual_flags(
datadir, requests_mock_datadir, cli_runner, atom_dataset, tmp_path,
):
"""Post metadata-only deposit through cli with invalid swhid raises
"""
api_url_basename = "deposit.test.metadataonly"
metadata = atom_dataset["entry-data-minimal"]
metadata_path = os.path.join(tmp_path, "entry-data-minimal.xml")
with open(metadata_path, "w") as f:
f.write(metadata)
with pytest.raises(InputError, match="both with different values"):
# fmt: off
cli_runner.invoke(
cli,
[
"upload",
"--url", f"https://{api_url_basename}/1",
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--metadata", metadata_path,
"--slug", "some-slug", # deprecated flag
"--create-origin", "some-other-slug", # conflictual value, so raise
"--format", "json",
],
catch_exceptions=False,
)
# fmt: on
def test_cli_deposit_with_server_down_for_maintenance(
sample_archive, caplog, client_mock_api_down, slug, patched_tmp_path, cli_runner
):
""" Deposit failure due to maintenance down time should be explicit
"""
# fmt: off
result = cli_runner.invoke(
cli,
[
"upload",
"--url", "https://deposit.swh.test/1",
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--name", "test-project",
"--archive", sample_archive["path"],
"--author", "Jane Doe",
],
)
# fmt: on
assert result.exit_code == 1, result.output
assert result.output == ""
down_for_maintenance_log_record = (
"swh.deposit.cli.client",
logging.ERROR,
"Database backend maintenance: Temporarily unavailable, try again later.",
)
assert down_for_maintenance_log_record in caplog.record_tuples
client_mock_api_down.service_document.assert_called_once_with()
def test_cli_client_generate_metadata_ok(slug):
"""Generated metadata is well formed and pass service side metadata checks
"""
actual_metadata_xml = generate_metadata(
"deposit-client",
"project-name",
authors=["some", "authors"],
external_id="external-id",
create_origin="origin-url",
)
actual_metadata = dict(parse_xml(actual_metadata_xml))
assert actual_metadata["atom:author"] == "deposit-client"
assert actual_metadata["atom:title"] == "project-name"
assert actual_metadata["atom:updated"] is not None
assert actual_metadata["codemeta:name"] == "project-name"
assert actual_metadata["codemeta:identifier"] == "external-id"
assert actual_metadata["codemeta:author"] == [
OrderedDict([("codemeta:name", "some")]),
OrderedDict([("codemeta:name", "authors")]),
]
assert (
actual_metadata["swh:deposit"]["swh:create_origin"]["swh:origin"]["@url"]
== "origin-url"
)
checks_ok, detail = check_metadata(actual_metadata)
assert checks_ok is True
- assert detail is None
+ # FIXME: Open the flag to suggest the provenance metadata url in the cli
+ assert detail == {
+ "metadata": [
+ {"summary": SUGGESTED_FIELDS_MISSING, "fields": [METADATA_PROVENANCE_KEY]}
+ ]
+ }
def test_cli_client_generate_metadata_ok2(slug):
"""Generated metadata is well formed and pass service side metadata checks
"""
actual_metadata_xml = generate_metadata(
"deposit-client", "project-name", authors=["some", "authors"],
)
actual_metadata = dict(parse_xml(actual_metadata_xml))
assert actual_metadata["atom:author"] == "deposit-client"
assert actual_metadata["atom:title"] == "project-name"
assert actual_metadata["atom:updated"] is not None
assert actual_metadata["codemeta:name"] == "project-name"
assert actual_metadata["codemeta:author"] == [
OrderedDict([("codemeta:name", "some")]),
OrderedDict([("codemeta:name", "authors")]),
]
assert actual_metadata.get("codemeta:identifier") is None
assert actual_metadata.get("swh:deposit") is None
checks_ok, detail = check_metadata(actual_metadata)
assert checks_ok is True
- assert detail is None
+ # FIXME: Open the flag to suggest the provenance metadata url in the cli
+ assert detail == {
+ "metadata": [
+ {"summary": SUGGESTED_FIELDS_MISSING, "fields": [METADATA_PROVENANCE_KEY]}
+ ]
+ }
def test_cli_single_minimal_deposit_with_slug(
sample_archive, slug, patched_tmp_path, requests_mock_datadir, cli_runner, caplog,
):
""" This ensure a single deposit upload through the cli is fine, cf.
https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html#single-deposit
""" # noqa
metadata_path = os.path.join(patched_tmp_path, "metadata.xml")
# fmt: off
result = cli_runner.invoke(
cli,
[
"upload",
"--url", "https://deposit.swh.test/1",
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--name", "test-project",
"--archive", sample_archive["path"],
"--author", "Jane Doe",
"--slug", slug,
"--format", "json",
],
)
# fmt: on
assert result.exit_code == 0, result.output
assert json.loads(result.output) == {
"deposit_id": "615",
"deposit_status": "partial",
"deposit_status_detail": None,
"deposit_date": "Oct. 8, 2020, 4:57 p.m.",
}
with open(metadata_path) as fd:
actual_metadata = dict(parse_xml(fd.read()))
assert actual_metadata["atom:author"] == TEST_USER["username"]
assert actual_metadata["codemeta:name"] == "test-project"
assert actual_metadata["atom:title"] == "test-project"
assert actual_metadata["atom:updated"] is not None
assert actual_metadata["codemeta:identifier"] == slug
assert actual_metadata["codemeta:author"] == OrderedDict(
[("codemeta:name", "Jane Doe")]
)
count_warnings = 0
for (_, log_level, _) in caplog.record_tuples:
count_warnings += 1 if log_level == logging.WARNING else 0
assert (
count_warnings == 1
), "We should have 1 warning as we are using slug instead of create_origin"
def test_cli_single_minimal_deposit_with_create_origin(
sample_archive, slug, patched_tmp_path, requests_mock_datadir, cli_runner, caplog,
):
""" This ensure a single deposit upload through the cli is fine, cf.
https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html#single-deposit
""" # noqa
metadata_path = os.path.join(patched_tmp_path, "metadata.xml")
origin = slug
# fmt: off
result = cli_runner.invoke(
cli,
[
"upload",
"--url", "https://deposit.swh.test/1",
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--name", "test-project",
"--archive", sample_archive["path"],
"--author", "Jane Doe",
"--create-origin", origin,
"--format", "json",
],
)
# fmt: on
assert result.exit_code == 0, result.output
assert json.loads(result.output) == {
"deposit_id": "615",
"deposit_status": "partial",
"deposit_status_detail": None,
"deposit_date": "Oct. 8, 2020, 4:57 p.m.",
}
with open(metadata_path) as fd:
actual_metadata = dict(parse_xml(fd.read()))
assert actual_metadata["atom:author"] == TEST_USER["username"]
assert actual_metadata["codemeta:name"] == "test-project"
assert actual_metadata["atom:title"] == "test-project"
assert actual_metadata["atom:updated"] is not None
assert (
actual_metadata["swh:deposit"]["swh:create_origin"]["swh:origin"]["@url"]
== origin
)
assert actual_metadata["codemeta:author"] == OrderedDict(
[("codemeta:name", "Jane Doe")]
)
count_warnings = 0
for (_, log_level, _) in caplog.record_tuples:
count_warnings += 1 if log_level == logging.WARNING else 0
assert (
count_warnings == 0
), "We should have no warning as we are using create_origin"
def test_cli_validation_metadata(
sample_archive, caplog, patched_tmp_path, cli_runner, slug
):
"""Multiple metadata flags scenario (missing, conflicts) properly fails the calls
"""
metadata_path = os.path.join(patched_tmp_path, "metadata.xml")
with open(metadata_path, "a"):
pass # creates the file
for flag_title_or_name, author_or_name in [
("--author", "no one"),
("--name", "test-project"),
]:
# Test missing author then missing name
# fmt: off
result = cli_runner.invoke(
cli,
[
"upload",
"--url", "https://deposit.swh.test/1",
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--archive", sample_archive["path"],
"--slug", slug,
flag_title_or_name,
author_or_name,
],
)
# fmt: on
assert result.exit_code == 1, f"unexpected result: {result.output}"
assert result.output == ""
expected_error_log_record = (
"swh.deposit.cli.client",
logging.ERROR,
(
"Problem during parsing options: "
"For metadata deposit request, either a metadata file with "
"--metadata or both --author and --name must be provided. "
),
)
assert expected_error_log_record in caplog.record_tuples
# Clear mocking state
caplog.clear()
# incompatible flags: Test both --metadata and --author, then --metadata and
# --name
# fmt: off
result = cli_runner.invoke(
cli,
[
"upload",
"--url", "https://deposit.swh.test/1",
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--name", "test-project",
"--deposit-id", 666,
"--archive", sample_archive["path"],
"--slug", slug,
],
)
# fmt: on
assert result.exit_code == 1, f"unexpected result: {result.output}"
assert result.output == ""
expected_error_log_record = (
"swh.deposit.cli.client",
logging.ERROR,
(
"Problem during parsing options: "
"For metadata deposit request, either a metadata file with "
"--metadata or both --author and --name must be provided."
),
)
assert expected_error_log_record in caplog.record_tuples
# Clear mocking state
caplog.clear()
# incompatible flags check (Test both --metadata and --author,
# then --metadata and --name)
# fmt: off
result = cli_runner.invoke(
cli,
[
"upload",
"--url", "https://deposit.swh.test/1",
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--archive", sample_archive["path"],
"--metadata", metadata_path,
"--author", "Jane Doe",
"--slug", slug,
],
)
# fmt: on
assert result.exit_code == 1, result.output
assert result.output == ""
expected_error_log_record = (
"swh.deposit.cli.client",
logging.ERROR,
(
"Problem during parsing options: "
"Using --metadata flag is incompatible with --author "
"and --name and --create-origin (those are used to generate "
"one metadata file)."
),
)
assert expected_error_log_record in caplog.record_tuples
caplog.clear()
def test_cli_validation_no_actionable_command(caplog, cli_runner):
"""Multiple metadata flags scenario (missing, conflicts) properly fails the calls
"""
# no actionable command
# fmt: off
result = cli_runner.invoke(
cli,
[
"upload",
"--url", "https://deposit.swh.test/1",
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--partial",
],
)
# fmt: on
assert result.exit_code == 1, result.output
assert result.output == ""
expected_error_log_record = (
"swh.deposit.cli.client",
logging.ERROR,
(
"Problem during parsing options: "
"Please provide an actionable command. See --help for more information"
),
)
assert expected_error_log_record in caplog.record_tuples
def test_cli_validation_replace_with_no_deposit_id_fails(
sample_archive, caplog, patched_tmp_path, requests_mock_datadir, datadir, cli_runner
):
"""--replace flags require --deposit-id otherwise fails
"""
metadata_path = os.path.join(datadir, "atom", "entry-data-deposit-binary.xml")
# fmt: off
result = cli_runner.invoke(
cli,
[
"upload",
"--url", "https://deposit.swh.test/1",
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--metadata", metadata_path,
"--archive", sample_archive["path"],
"--replace",
],
)
# fmt: on
assert result.exit_code == 1, result.output
assert result.output == ""
expected_error_log_record = (
"swh.deposit.cli.client",
logging.ERROR,
(
"Problem during parsing options: "
"To update an existing deposit, you must provide its id"
),
)
assert expected_error_log_record in caplog.record_tuples
def test_cli_single_deposit_slug_generation(
sample_archive, patched_tmp_path, requests_mock_datadir, cli_runner
):
"""Single deposit scenario without providing the slug, it should
not be generated.
"""
metadata_path = os.path.join(patched_tmp_path, "metadata.xml")
# fmt: off
result = cli_runner.invoke(
cli,
[
"upload",
"--url", "https://deposit.swh.test/1",
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--name", "test-project",
"--archive", sample_archive["path"],
"--author", "Jane Doe",
"--format", "json",
],
)
# fmt: on
assert result.exit_code == 0, result.output
assert json.loads(result.output) == {
"deposit_id": "615",
"deposit_status": "partial",
"deposit_status_detail": None,
"deposit_date": "Oct. 8, 2020, 4:57 p.m.",
}
with open(metadata_path) as fd:
metadata_xml = fd.read()
actual_metadata = dict(parse_xml(metadata_xml))
assert "codemeta:identifier" not in actual_metadata
def test_cli_multisteps_deposit(
sample_archive, datadir, slug, requests_mock_datadir, cli_runner
):
""" First deposit a partial deposit (no metadata, only archive), then update the metadata part.
https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html#multisteps-deposit
""" # noqa
api_url = "https://deposit.test.metadata/1"
deposit_id = 666
# Create a partial deposit with only 1 archive
# fmt: off
result = cli_runner.invoke(
cli,
[
"upload",
"--url", api_url,
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--archive", sample_archive["path"],
"--slug", slug,
"--format", "json",
"--partial",
],
)
# fmt: on
assert result.exit_code == 0, f"unexpected output: {result.output}"
actual_deposit = json.loads(result.output)
assert actual_deposit == {
"deposit_id": str(deposit_id),
"deposit_status": "partial",
"deposit_status_detail": None,
"deposit_date": "Oct. 8, 2020, 4:57 p.m.",
}
# Update the partial deposit with only 1 archive
# fmt: off
result = cli_runner.invoke(
cli,
[
"upload",
"--url", api_url,
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--archive", sample_archive["path"],
"--deposit-id", deposit_id,
"--slug", slug,
"--format", "json",
"--partial", # in-progress: True, because remains the metadata to upload
],
)
# fmt: on
assert result.exit_code == 0, f"unexpected output: {result.output}"
assert result.output is not None
actual_deposit = json.loads(result.output)
# deposit update scenario actually returns a deposit status dict
assert actual_deposit["deposit_id"] == str(deposit_id)
assert actual_deposit["deposit_status"] == "partial"
# Update the partial deposit with only some metadata (and then finalize it)
# https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html#add-content-or-metadata-to-the-deposit
metadata_path = os.path.join(datadir, "atom", "entry-data-deposit-binary.xml")
# Update deposit with metadata
# fmt: off
result = cli_runner.invoke(
cli,
[
"upload",
"--url", api_url,
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--metadata", metadata_path,
"--deposit-id", deposit_id,
"--slug", slug,
"--format", "json",
],
# this time, ^ we no longer flag it to partial, so the status changes to
# in-progress false
)
# fmt: on
assert result.exit_code == 0, f"unexpected output: {result.output}"
assert result.output is not None
actual_deposit = json.loads(result.output)
# deposit update scenario actually returns a deposit status dict
assert actual_deposit["deposit_id"] == str(deposit_id)
# FIXME: should be "deposited" but current limitation in the
# requests_mock_datadir_visits use, cannot find a way to make it work right now
assert actual_deposit["deposit_status"] == "partial"
@pytest.mark.parametrize(
"output_format,parser_fn",
[
("json", json.loads),
("yaml", yaml.safe_load),
(
"logging",
ast.literal_eval,
), # not enough though, the caplog fixture is needed
],
)
def test_cli_deposit_status_with_output_format(
output_format, parser_fn, datadir, slug, requests_mock_datadir, caplog, cli_runner
):
"""Check deposit status cli with all possible output formats (json, yaml, logging).
"""
api_url_basename = "deposit.test.status"
deposit_id = 1033
expected_deposit_status = {
"deposit_id": str(deposit_id),
"deposit_status": "done",
"deposit_status_detail": (
"The deposit has been successfully loaded into the "
"Software Heritage archive"
),
"deposit_swh_id": "swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea",
"deposit_swh_id_context": "swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea;origin=https://www.softwareheritage.org/check-deposit-2020-10-08T13:52:34.509655;visit=swh:1:snp:c477c6ef51833127b13a86ece7d75e5b3cc4e93d;anchor=swh:1:rev:f26f3960c175f15f6e24200171d446b86f6f7230;path=/", # noqa
"deposit_external_id": "check-deposit-2020-10-08T13:52:34.509655",
}
# fmt: off
result = cli_runner.invoke(
cli,
[
"status",
"--url", f"https://{api_url_basename}/1",
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--deposit-id", deposit_id,
"--format", output_format,
],
)
# fmt: on
assert result.exit_code == 0, f"unexpected output: {result.output}"
if output_format == "logging":
assert len(caplog.record_tuples) == 1
# format: (<module>, <log-level>, <log-msg>)
_, _, result_output = caplog.record_tuples[0]
else:
result_output = result.output
actual_deposit = parser_fn(result_output)
assert actual_deposit == expected_deposit_status
def test_cli_update_metadata_with_swhid_on_completed_deposit(
datadir, requests_mock_datadir, cli_runner
):
"""Update new metadata on a completed deposit (status done) is ok
"""
api_url_basename = "deposit.test.updateswhid"
deposit_id = 123
expected_deposit_status = {
"deposit_external_id": "check-deposit-2020-10-08T13:52:34.509655",
"deposit_id": str(deposit_id),
"deposit_status": "done",
"deposit_status_detail": (
"The deposit has been successfully loaded into the "
"Software Heritage archive"
),
"deposit_swh_id": "swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea",
"deposit_swh_id_context": "swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea;origin=https://www.softwareheritage.org/check-deposit-2020-10-08T13:52:34.509655;visit=swh:1:snp:c477c6ef51833127b13a86ece7d75e5b3cc4e93d;anchor=swh:1:rev:f26f3960c175f15f6e24200171d446b86f6f7230;path=/", # noqa
}
assert expected_deposit_status["deposit_status"] == "done"
assert expected_deposit_status["deposit_swh_id"] is not None
# fmt: off
result = cli_runner.invoke(
cli,
[
"upload",
"--url", f"https://{api_url_basename}/1",
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--name", "test-project",
"--author", "John Doe",
"--deposit-id", deposit_id,
"--swhid", expected_deposit_status["deposit_swh_id"],
"--format", "json",
],
)
# fmt: on
assert result.exit_code == 0, result.output
actual_deposit_status = json.loads(result.output)
assert "error" not in actual_deposit_status
assert actual_deposit_status == expected_deposit_status
def test_cli_update_metadata_with_swhid_on_other_status_deposit(
datadir, requests_mock_datadir, cli_runner
):
"""Update new metadata with swhid on other deposit status is not possible
"""
api_url_basename = "deposit.test.updateswhid"
deposit_id = "321"
# fmt: off
result = cli_runner.invoke(
cli,
[
"upload",
"--url", f"https://{api_url_basename}/1",
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--name", "test-project",
"--author", "John Doe",
"--deposit-id", deposit_id,
"--swhid", "swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea",
"--format", "json",
],
)
# fmt: on
assert result.exit_code == 0, result.output
actual_result = json.loads(result.output)
assert "error" in actual_result
assert actual_result == {
"error": "You can only update metadata on deposit with status 'done'",
"detail": f"The deposit {deposit_id} has status 'partial'",
"deposit_status": "partial",
"deposit_id": deposit_id,
}
def test_cli_metadata_only_deposit_full_metadata_file(
datadir, requests_mock_datadir, cli_runner, atom_dataset, tmp_path,
):
"""Post metadata-only deposit through cli
The metadata file posted by the client already contains the swhid
"""
api_url_basename = "deposit.test.metadataonly"
swhid = "swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea"
metadata = atom_dataset["entry-data-with-swhid"].format(swhid=swhid)
metadata_path = os.path.join(tmp_path, "entry-data-with-swhid.xml")
with open(metadata_path, "w") as m:
m.write(metadata)
expected_deposit_status = {
"deposit_id": "100",
"deposit_status": "done",
"deposit_date": "2020-10-08T13:52:34.509655",
}
assert expected_deposit_status["deposit_status"] == "done"
# fmt: off
result = cli_runner.invoke(
cli,
[
"metadata-only",
"--url", f"https://{api_url_basename}/1",
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--metadata", metadata_path,
"--format", "json",
],
)
# fmt: on
assert result.exit_code == 0, result.output
actual_deposit_status = json.loads(result.output)
assert "error" not in actual_deposit_status
assert actual_deposit_status == expected_deposit_status
def test_cli_metadata_only_deposit_invalid_swhid(
datadir, requests_mock_datadir, cli_runner, atom_dataset, tmp_path,
):
"""Post metadata-only deposit through cli with invalid swhid raises
"""
api_url_basename = "deposit.test.metadataonly"
invalid_swhid = "ssh:2:sth:xxx"
metadata = atom_dataset["entry-data-with-swhid"].format(swhid=invalid_swhid)
metadata_path = os.path.join(tmp_path, "entry-data-with-swhid.xml")
with open(metadata_path, "w") as f:
f.write(metadata)
with pytest.raises(ValidationError, match="Invalid"):
# fmt: off
cli_runner.invoke(
cli,
[
"metadata-only",
"--url", f"https://{api_url_basename}/1",
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--metadata", metadata_path,
"--format", "json",
],
catch_exceptions=False,
)
# fmt: on
def test_cli_metadata_only_deposit_no_swhid(
datadir, requests_mock_datadir, cli_runner, atom_dataset, tmp_path,
):
"""Post metadata-only deposit through cli with invalid swhid raises
"""
api_url_basename = "deposit.test.metadataonly"
metadata = atom_dataset["entry-data-minimal"]
metadata_path = os.path.join(tmp_path, "entry-data-minimal.xml")
with open(metadata_path, "w") as f:
f.write(metadata)
with pytest.raises(InputError, match="SWHID must be provided"):
# fmt: off
cli_runner.invoke(
cli,
[
"metadata-only",
"--url", f"https://{api_url_basename}/1",
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--metadata", metadata_path,
"--format", "json",
],
catch_exceptions=False,
)
# fmt: on
@pytest.mark.parametrize(
"metadata_entry_key", ["entry-data-with-add-to-origin", "entry-only-create-origin"]
)
def test_cli_deposit_warning_missing_origin(
sample_archive,
metadata_entry_key,
tmp_path,
atom_dataset,
caplog,
cli_runner,
requests_mock_datadir,
):
"""Deposit cli should log warning when the provided metadata xml is missing origins
"""
# For the next deposit, no warning should be logged as either <swh:create_origin> or
# <swh:origin_to_add> are provided
metadata_raw = atom_dataset[metadata_entry_key] % "some-url"
metadata_path = os.path.join(tmp_path, "metadata-with-origin-tag-to-deposit.xml")
with open(metadata_path, "w") as f:
f.write(metadata_raw)
# fmt: off
cli_runner.invoke(
cli,
[
"upload",
"--url", "https://deposit.swh.test/1",
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--metadata", metadata_path,
],
)
# fmt: on
for (_, log_level, _) in caplog.record_tuples:
# all messages are info or below messages so everything is fine
assert log_level < logging.WARNING
def test_cli_failure_should_be_parseable(atom_dataset, mocker):
summary = "Cannot load metadata"
verbose_description = (
"Cannot load metadata on swh:1:dir:0eda267e7d3c2e37b3f6a78e542b16190ac4574e, "
"this directory object does not exist in the archive (yet?)."
)
error_xml = atom_dataset["error-cli"].format(
summary=summary, verboseDescription=verbose_description
)
api_call = BaseDepositClient(url="https://somewhere.org/")
actual_error = api_call.parse_result_error(error_xml)
assert actual_error == {
"summary": summary,
"detail": "",
"sword:verboseDescription": verbose_description,
}
def test_cli_service_document_failure(atom_dataset, mocker):
"""Ensure service document failures are properly served
"""
summary = "Invalid user credentials"
error_xml = atom_dataset["error-cli"].format(summary=summary, verboseDescription="")
api_call = ServiceDocumentDepositClient(url="https://somewhere.org/")
actual_error = api_call.parse_result_error(error_xml)
assert actual_error == {"error": summary}
@pytest.mark.parametrize(
"output_format,parser_fn",
[
("json", json.loads),
("yaml", yaml.safe_load),
(
"logging",
ast.literal_eval,
), # not enough though, the caplog fixture is needed
],
)
def test_cli_deposit_collection_list(
output_format, parser_fn, datadir, slug, requests_mock_datadir, caplog, cli_runner
):
"""Check deposit status cli with all possible output formats (json, yaml, logging).
"""
api_url_basename = "deposit.test.list"
expected_deposits = {
"count": "3",
"deposits": [
{
"external_id": "check-deposit-2020-10-09T13:10:00.000000",
"id": "1031",
"status": "rejected",
"status_detail": "Deposit without archive",
},
{
"external_id": "check-deposit-2020-10-10T13:20:00.000000",
"id": "1032",
"status": "rejected",
"status_detail": "Deposit without archive",
},
{
"complete_date": "2020-10-08T13:52:34.509655",
"external_id": "check-deposit-2020-10-08T13:52:34.509655",
"id": "1033",
"reception_date": "2020-10-08T13:50:30",
"status": "done",
"status_detail": "The deposit has been successfully loaded into "
"the Software Heritage archive",
"swhid": "swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea",
"swhid_context": "swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea;origin=https://www.softwareheritage.org/check-deposit-2020-10-08T13:52:34.509655;visit=swh:1:snp:c477c6ef51833127b13a86ece7d75e5b3cc4e93d;anchor=swh:1:rev:f26f3960c175f15f6e24200171d446b86f6f7230;path=/", # noqa
},
],
}
# fmt: off
result = cli_runner.invoke(
cli,
[
"list",
"--url", f"https://{api_url_basename}/1",
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--page", 1,
"--page-size", 10,
"--format", output_format,
],
)
# fmt: on
assert result.exit_code == 0, f"unexpected output: {result.output}"
if output_format == "logging":
assert len(caplog.record_tuples) == 1
# format: (<module>, <log-level>, <log-msg>)
_, _, result_output = caplog.record_tuples[0]
else:
result_output = result.output
actual_deposit = parser_fn(result_output)
assert actual_deposit == expected_deposits
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Sat, Jun 21, 7:18 PM (2 w, 3 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3236747
Attached To
rDDEP Push deposit
Event Timeline
Log In to Comment