diff --git a/swh/deposit/api/checks.py b/swh/deposit/api/checks.py
index dbf61404..45e9d787 100644
--- a/swh/deposit/api/checks.py
+++ b/swh/deposit/api/checks.py
@@ -1,129 +1,165 @@
# Copyright (C) 2017-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
"""Functional Metadata checks:
Mandatory fields:
- 'author'
- 'name' or 'title'
Suggested fields:
- metadata-provenance
"""
import dataclasses
import functools
from typing import Dict, Optional, Tuple
+import urllib
from xml.etree import ElementTree
import pkg_resources
import xmlschema
from swh.deposit.errors import FORBIDDEN, DepositError
from swh.deposit.utils import NAMESPACES, parse_swh_metadata_provenance
MANDATORY_FIELDS_MISSING = "Mandatory fields are missing"
INVALID_DATE_FORMAT = "Invalid date format"
SUGGESTED_FIELDS_MISSING = "Suggested fields are missing"
METADATA_PROVENANCE_KEY = "swh:metadata-provenance"
+def extra_validator(element, xsd_element):
+ """Performs extra checks on Atom elements that cannot be implemented purely
+ within XML Schema.
+
+ For now, this only checks URIs are absolute."""
+ type_name = xsd_element.type.name
+ if type_name == "{http://www.w3.org/2001/XMLSchema}anyURI":
+ # Check their URI is absolute.
+ # This could technically be implemented in the schema like this:
+ #
+ #
+ #
+ #
+ #
+ #
+ # However, this would give an unreadable error, so we implement it here
+ # in Python instead.
+ try:
+ url = urllib.parse.urlparse(element.text)
+ except ValueError:
+ raise xmlschema.XMLSchemaValidationError(
+ xsd_element, element, f"{element.text!r} is not a valid URI",
+ ) from None
+ else:
+ if not url.scheme or not url.netloc:
+ raise xmlschema.XMLSchemaValidationError(
+ xsd_element, element, f"{element.text!r} is not an absolute URI",
+ )
+ elif " " in url.netloc:
+ # urllib is a little too permissive...
+ raise xmlschema.XMLSchemaValidationError(
+ xsd_element, element, f"{element.text!r} is not a valid URI",
+ )
+
+
@dataclasses.dataclass
class Schemas:
swh: xmlschema.XMLSchema11
codemeta: xmlschema.XMLSchema11
@functools.lru_cache(1)
def schemas() -> Schemas:
def load_xsd(name) -> xmlschema.XMLSchema11:
return xmlschema.XMLSchema11(
pkg_resources.resource_string("swh.deposit", f"xsd/{name}.xsd").decode()
)
return Schemas(swh=load_xsd("swh"), codemeta=load_xsd("codemeta"))
def check_metadata(metadata: ElementTree.Element) -> Tuple[bool, Optional[Dict]]:
"""Check metadata for mandatory field presence and date format.
Args:
metadata: Metadata dictionary to check
Returns:
tuple (status, error_detail):
- (True, None) if metadata are ok and suggested fields are also present
- (True, ) if metadata are ok but some suggestions are missing
- (False, ) otherwise.
"""
suggested_fields = []
# at least one value per couple below is mandatory
alternate_fields = {
("atom:name", "atom:title", "codemeta:name"): False,
("atom:author", "codemeta:author"): False,
}
for possible_names in alternate_fields:
for possible_name in possible_names:
if metadata.find(possible_name, namespaces=NAMESPACES) is not None:
alternate_fields[possible_names] = True
continue
mandatory_result = [" or ".join(k) for k, v in alternate_fields.items() if not v]
# provenance metadata is optional
provenance_meta = parse_swh_metadata_provenance(metadata)
if provenance_meta is None:
suggested_fields = [
{"summary": SUGGESTED_FIELDS_MISSING, "fields": [METADATA_PROVENANCE_KEY]}
]
if mandatory_result:
detail = [{"summary": MANDATORY_FIELDS_MISSING, "fields": mandatory_result}]
return False, {"metadata": detail + suggested_fields}
deposit_elt = metadata.find("swh:deposit", namespaces=NAMESPACES)
if deposit_elt:
try:
- schemas().swh.validate(deposit_elt)
+ schemas().swh.validate(deposit_elt, extra_validator=extra_validator)
except xmlschema.exceptions.XMLSchemaException as e:
return False, {"metadata": [{"fields": ["swh:deposit"], "summary": str(e)}]}
detail = []
for child in metadata:
for schema_element in schemas().codemeta.root_elements:
if child.tag in schema_element.name:
break
else:
# Tag is not specified in the schema, don't validate it
continue
try:
- schemas().codemeta.validate(child)
+ schemas().codemeta.validate(child, extra_validator=extra_validator)
except xmlschema.exceptions.XMLSchemaException as e:
detail.append({"fields": [schema_element.prefixed_name], "summary": str(e)})
if detail:
return False, {"metadata": detail + suggested_fields}
if suggested_fields: # it's fine but warn about missing suggested fields
return True, {"metadata": suggested_fields}
return True, None
def check_url_match_provider(url: str, provider_url: str) -> None:
"""Check url matches the provider url.
Raises DepositError in case of mismatch
"""
provider_url = provider_url.rstrip("/") + "/"
if not url.startswith(provider_url):
raise DepositError(
FORBIDDEN, f"URL mismatch: {url} must start with {provider_url}",
)
diff --git a/swh/deposit/tests/api/test_checks.py b/swh/deposit/tests/api/test_checks.py
index a32b18f9..163e9306 100644
--- a/swh/deposit/tests/api/test_checks.py
+++ b/swh/deposit/tests/api/test_checks.py
@@ -1,771 +1,934 @@
# Copyright (C) 2017-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
# disable flake8 on this file because of line length
# flake8: noqa
import pprint
import re
import textwrap
from typing import Any, Dict
from xml.etree import ElementTree
import pytest
from swh.deposit.api.checks import (
METADATA_PROVENANCE_KEY,
SUGGESTED_FIELDS_MISSING,
check_metadata,
)
METADATA_PROVENANCE_DICT: Dict[str, Any] = {
"swh:deposit": {
METADATA_PROVENANCE_KEY: {"schema:url": "some-metadata-provenance-url"}
}
}
XMLNS = """xmlns="http://www.w3.org/2005/Atom"
xmlns:swh="https://www.softwareheritage.org/schema/2018/deposit"
xmlns:codemeta="https://doi.org/10.5063/SCHEMA/CODEMETA-2.0"
xmlns:schema="http://schema.org/" """
PROVENANCE_XML = """
some-metadata-provenance-url
"""
_parameters1 = [
pytest.param(textwrap.dedent(metadata_ok), id=id_)
for (id_, metadata_ok,) in [
(
"atom-only-with-name",
f"""\
something
something-else
foo
someone
{PROVENANCE_XML}
""",
),
(
"atom-only-with-title",
f"""\
something
something-else
bar
someone
""",
),
(
"atom-only-and-external_identifier",
f"""\
something
something-else
foo
no one
{PROVENANCE_XML}
""",
),
(
"atom-and-codemeta-minimal",
f"""\
some url
bar
no one
{PROVENANCE_XML}
""",
),
(
"unknown-codemeta-inner-element-after",
f"""\
some url
bar
someone
should allow anything here
{PROVENANCE_XML}
""",
),
(
"unknown-schema-inner-element-after",
f"""\
some url
bar
someone
should allow anything here
{PROVENANCE_XML}
""",
),
(
"unknown-schema-inner-element-before",
f"""\
some url
bar
should allow anything here
someone
{PROVENANCE_XML}
""",
),
(
"unknown-schema-inner-element-before-and-after",
f"""\
some url
bar
should allow anything here
someone
should allow anything here
{PROVENANCE_XML}
""",
),
(
"codemeta-dates",
f"""\
some url
some id
nar
no one
2020-12-21
2020-12-21
2020-12-25
+ 2020-12-25
{PROVENANCE_XML}
""",
),
(
"codemeta-datetimes",
# technically, only Date is allowed for datePublished; but we allow DateTime
# for backward compatibility with old swh-deposit versions
f"""\
some url
some id
nar
no one
2020-12-21T12:00:00
2020-12-21T12:00:00
2020-12-25T12:00:00
{PROVENANCE_XML}
""",
),
(
"codemeta-affiliation",
f"""\
some url
bar
someone
My Orga
{PROVENANCE_XML}
""",
),
(
"swh:add_to_origin",
f"""\
something
something-else
bar
someone
some-metadata-provenance-url
""",
),
(
"swh:reference-origin",
f"""\
something
something-else
bar
someone
some-metadata-provenance-url
""",
),
(
"swh:reference-object",
f"""\
something
something-else
bar
someone
some-metadata-provenance-url
""",
),
+ (
+ # a full example with every tag we know
+ "codemeta-full",
+ f"""\
+
+ something
+ foo
+ someone
+
+ The Author
+ http://example.org/~theauthor/
+ author@example.org
+
+ University 1
+
+ https://sandbox.orcid.org/0000-0002-9227-8514
+
+
+ A Contributor
+
+ University 2
+
+
+ something
+ something else
+ http://example.org/
+ Blah blah
+ 1.0.0
+ 1.0.0
+ kw1
+ kw2
+ Blah blah
+ http://example.org/
+ http://example.org/
+ http://example.org/
+ {PROVENANCE_XML}
+
+ """,
+ ),
]
]
@pytest.mark.parametrize(
"metadata_ok", _parameters1,
)
def test_api_checks_check_metadata_ok(metadata_ok, swh_checks_deposit):
actual_check, detail = check_metadata(ElementTree.fromstring(metadata_ok))
assert actual_check is True, f"Unexpected result:\n{pprint.pformat(detail)}"
if "swh:deposit" in metadata_ok:
# no missing suggested field
assert detail is None
else:
# missing suggested field
assert detail == {
"metadata": [
{
"fields": [METADATA_PROVENANCE_KEY],
"summary": SUGGESTED_FIELDS_MISSING,
}
]
}
_parameters2 = [
pytest.param(textwrap.dedent(metadata_ko), expected_summary, id=id_)
for (id_, metadata_ko, expected_summary) in [
(
"no-name-or-title",
f"""\
something
something-else
someone
{PROVENANCE_XML}
""",
{
"summary": "Mandatory fields are missing",
"fields": ["atom:name or atom:title or codemeta:name"],
},
),
(
"no-author",
f"""\
something
something-else
foobar
{PROVENANCE_XML}
""",
{
"summary": "Mandatory fields are missing",
"fields": ["atom:author or codemeta:author"],
},
),
(
"wrong-title-namespace",
f"""\
something
something-else
bar
someone
{PROVENANCE_XML}
""",
{
"summary": "Mandatory fields are missing",
"fields": ["atom:name or atom:title or codemeta:name"],
},
),
(
"wrong-author-namespace",
f"""\
something
something-else
foobar
foo
{PROVENANCE_XML}
""",
{
"summary": "Mandatory fields are missing",
"fields": ["atom:author or codemeta:author"],
},
),
(
"wrong-author-tag",
f"""\
something
something-else
bar
someone
{PROVENANCE_XML}
""",
{
"summary": "Mandatory fields are missing",
"fields": ["atom:author or codemeta:author"],
},
),
]
]
@pytest.mark.parametrize("metadata_ko,expected_summary", _parameters2)
def test_api_checks_check_metadata_ko(
metadata_ko, expected_summary, swh_checks_deposit
):
actual_check, error_detail = check_metadata(ElementTree.fromstring(metadata_ko))
assert actual_check is False
assert error_detail == {"metadata": [expected_summary]}
_parameters3 = [
pytest.param(textwrap.dedent(metadata_ko), expected_summary, id=id_)
for (id_, metadata_ko, expected_summary) in [
(
"child-element-in-name",
f"""\
some url
bar
no one
{PROVENANCE_XML}
""",
[
{
"summary": ".*Reason: a simple content element can't have child elements.*",
"fields": ["codemeta:name"],
},
],
),
(
"chardata-in-affiliation",
f"""\
some url
bar
someone
My Orga
{PROVENANCE_XML}
""",
[
{
"summary": ".*Reason: character data between child elements.*",
"fields": ["codemeta:author"],
},
],
),
(
"chardata-in-author",
f"""\
some url
bar
no one
{PROVENANCE_XML}
""",
[
{
"summary": ".*Reason: character data between child elements.*",
"fields": ["codemeta:author"],
},
],
),
(
"author-with-no-name",
f"""\
some url
bar
should allow anything here
{PROVENANCE_XML}
""",
[
{
"summary": ".*Tag '?codemeta:name'? expected.*",
"fields": ["codemeta:author"],
},
],
),
+ (
+ "contributor-with-no-name",
+ f"""\
+
+ some url
+ bar
+
+ should allow anything here
+
+
+ abc
+
+ {PROVENANCE_XML}
+
+ """,
+ [
+ {
+ "summary": ".*Tag '?codemeta:name'? expected.*",
+ "fields": ["codemeta:contributor"],
+ },
+ ],
+ ),
+ (
+ "id-is-not-url",
+ f"""\
+
+ some url
+ bar
+
+ The Author
+ http://not a url/
+
+ {PROVENANCE_XML}
+
+ """,
+ [
+ {
+ "summary": ".*Reason: 'http://not a url/' is not a valid URI.*",
+ "fields": ["codemeta:author"],
+ },
+ ],
+ ),
+ (
+ "identifier-is-invalid-url",
+ f"""\
+
+ some url
+ bar
+
+ The Author
+ http://[invalid-url/
+
+ {PROVENANCE_XML}
+
+ """,
+ [
+ {
+ "summary": (
+ r".*Reason: 'http://\[invalid-url/' is not a valid URI.*"
+ ),
+ "fields": ["codemeta:author"],
+ },
+ ],
+ ),
+ (
+ "identifier-is-not-url",
+ f"""\
+
+ some url
+ bar
+
+ The Author
+ http://not a url/
+
+ {PROVENANCE_XML}
+
+ """,
+ [
+ {
+ "summary": ".*Reason: 'http://not a url/' is not a valid URI.*",
+ "fields": ["codemeta:author"],
+ },
+ ],
+ ),
+ (
+ "identifier-is-not-url2",
+ f"""\
+
+ some url
+ bar
+
+ The Author
+ not a url
+
+ {PROVENANCE_XML}
+
+ """,
+ [
+ {
+ "summary": ".*Reason: 'not a url' is not an absolute URI.*",
+ "fields": ["codemeta:author"],
+ },
+ ],
+ ),
(
"invalid-dates",
f"""\
something
something-else
bar
someone
2020-aa-21
2020-12-bb
{PROVENANCE_XML}
""",
[
{
"summary": ".*Reason: invalid value '2020-aa-21'.*",
"fields": ["codemeta:datePublished"],
},
{
"summary": ".*Reason: invalid value '2020-12-bb'.*",
"fields": ["codemeta:dateCreated"],
},
],
),
(
"invalid-dateModified",
f"""\
some url
someid
bar
no one
2020-12-aa
{PROVENANCE_XML}
""",
[
{
"summary": ".*Reason: invalid value '2020-12-aa'.*",
"fields": ["codemeta:dateModified"],
},
],
),
+ (
+ "invalid-embargoDate",
+ f"""\
+
+ some url
+ someid
+ bar
+ no one
+ 2022-02-28T12:00:00
+ {PROVENANCE_XML}
+
+ """,
+ [
+ {
+ "summary": ".*Invalid datetime string '2022-02-28T12:00:00'.*",
+ "fields": ["codemeta:embargoDate"],
+ },
+ ],
+ ),
(
"error-and-missing-provenance",
f"""\
some url
bar
no one
""",
[
{
"summary": ".*Reason: character data between child elements.*",
"fields": ["codemeta:author"],
},
{
"summary": "Suggested fields are missing",
"fields": ["swh:metadata-provenance"],
},
],
),
(
"unknown-tag-in-swh-namespace",
f"""\
something
something-else
bar
someone
some-metadata-provenance-url
""",
[
{
"summary": (
r".*Reason: Unexpected child with tag 'swh:invalid'.*"
r"Instance:.*swh:invalid.*"
),
"fields": ["swh:deposit"],
}
],
),
(
"multiple-swh:add_to_origin",
f"""\
something
something-else
bar
someone
some-metadata-provenance-url
""",
[
{
"summary": (
r".*Reason: Unexpected child with tag 'swh:add_to_origin'.*"
),
"fields": ["swh:deposit"],
}
],
),
(
"swh:add_to_origin-and-swh:create_origin",
f"""\
something
something-else
bar
someone
some-metadata-provenance-url
""",
[
{
"summary": (
r".*Reason: assertion test if false.*"
r"Schema:\n*"
r' *]+ id="swhdeposit-incompatible-create-and-add".*'
),
"fields": ["swh:deposit"],
}
],
),
(
"swh:reference-and-swh:create_origin",
f"""\
something
something-else
bar
someone
some-metadata-provenance-url
""",
[
{
"summary": (
r".*Reason: assertion test if false.*"
r"Schema:\n*"
r' *]+ id="swhdeposit-incompatible-create-and-reference".*'
),
"fields": ["swh:deposit"],
}
],
),
(
"swh:add_to_origin-and-swh:reference",
f"""\
something
something-else
bar
someone
some-metadata-provenance-url
""",
[
{
"summary": (
r".*Reason: assertion test if false.*"
r"Schema:\n*"
r' *]+ id="swhdeposit-incompatible-add-and-reference".*'
),
"fields": ["swh:deposit"],
}
],
),
(
"swh:reference-two-children",
f"""\
something
something-else
bar
someone
some-metadata-provenance-url
""",
[
{
"summary": r".*Reason: Unexpected child with tag 'swh:origin'.*",
"fields": ["swh:deposit"],
},
],
),
(
"swh:reference-two-origins",
f"""\
something
something-else
bar
someone
some-metadata-provenance-url
""",
[
{
"summary": r".*Reason: Unexpected child with tag 'swh:origin'.*",
"fields": ["swh:deposit"],
},
],
),
(
"swh:reference-two-objects",
f"""\
something
something-else
bar
someone
some-metadata-provenance-url
""",
[
{
"summary": r".*Reason: Unexpected child with tag 'swh:object'.*",
"fields": ["swh:deposit"],
},
],
),
]
]
@pytest.mark.parametrize("metadata_ko,expected_summaries", _parameters3)
def test_api_checks_check_metadata_ko_schema(
metadata_ko, expected_summaries, swh_checks_deposit
):
actual_check, error_detail = check_metadata(ElementTree.fromstring(metadata_ko))
assert actual_check is False
assert len(error_detail["metadata"]) == len(expected_summaries), error_detail[
"metadata"
]
for (detail, expected_summary) in zip(error_detail["metadata"], expected_summaries):
assert detail["fields"] == expected_summary["fields"]
# xmlschema returns very detailed errors, we cannot reasonably test them
# for equality
summary = detail["summary"]
assert re.match(
expected_summary["summary"], summary, re.DOTALL
), f"Failed to match {expected_summary['summary']!r} with:\n{summary}"
diff --git a/swh/deposit/tests/cli/test_client.py b/swh/deposit/tests/cli/test_client.py
index 4974845b..185fe87a 100644
--- a/swh/deposit/tests/cli/test_client.py
+++ b/swh/deposit/tests/cli/test_client.py
@@ -1,1176 +1,1176 @@
# Copyright (C) 2020-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import ast
import contextlib
import json
import logging
import os
from typing import Optional
from unittest.mock import MagicMock
from xml.etree import ElementTree
import pytest
import yaml
from swh.deposit.api.checks import (
METADATA_PROVENANCE_KEY,
SUGGESTED_FIELDS_MISSING,
check_metadata,
)
from swh.deposit.cli import deposit as cli
from swh.deposit.cli.client import InputError, _collection, _url, generate_metadata
from swh.deposit.client import (
BaseDepositClient,
MaintenanceError,
PublicApiDepositClient,
ServiceDocumentDepositClient,
)
from swh.deposit.parsers import parse_xml
from swh.deposit.utils import NAMESPACES
from swh.model.exceptions import ValidationError
from ..conftest import TEST_USER
def generate_slug() -> str:
"""Generate a slug (sample purposes).
"""
import uuid
return str(uuid.uuid4())
@pytest.fixture
def datadir(request):
"""Override default datadir to target main test datadir"""
return os.path.join(os.path.dirname(str(request.fspath)), "../data")
@pytest.fixture
def slug():
return generate_slug()
@pytest.fixture
def patched_tmp_path(tmp_path, mocker):
mocker.patch(
"tempfile.TemporaryDirectory",
return_value=contextlib.nullcontext(str(tmp_path)),
)
return tmp_path
@pytest.fixture
def client_mock_api_down(mocker, slug):
"""A mock client whose connection with api fails due to maintenance issue
"""
mock_client = MagicMock()
mocker.patch("swh.deposit.client.PublicApiDepositClient", return_value=mock_client)
mock_client.service_document.side_effect = MaintenanceError(
"Database backend maintenance: Temporarily unavailable, try again later."
)
return mock_client
def test_cli_url():
assert _url("http://deposit") == "http://deposit/1"
assert _url("https://other/1") == "https://other/1"
def test_cli_collection_error():
mock_client = MagicMock()
mock_client.service_document.return_value = {"error": "something went wrong"}
with pytest.raises(InputError) as e:
_collection(mock_client)
assert "Service document retrieval: something went wrong" == str(e.value)
def test_cli_collection_ok(requests_mock_datadir):
client = PublicApiDepositClient(
url="https://deposit.swh.test/1", auth=("test", "test")
)
collection_name = _collection(client)
assert collection_name == "test"
def test_cli_collection_ko_because_downtime():
mock_client = MagicMock()
mock_client.service_document.side_effect = MaintenanceError("downtime")
with pytest.raises(MaintenanceError, match="downtime"):
_collection(mock_client)
def test_cli_upload_conflictual_flags(
datadir, requests_mock_datadir, cli_runner, atom_dataset, tmp_path,
):
"""Post metadata-only deposit through cli with invalid swhid raises
"""
api_url_basename = "deposit.test.metadataonly"
metadata = atom_dataset["entry-data-minimal"]
metadata_path = os.path.join(tmp_path, "entry-data-minimal.xml")
with open(metadata_path, "w") as f:
f.write(metadata)
with pytest.raises(InputError, match="both with different values"):
# fmt: off
cli_runner.invoke(
cli,
[
"upload",
"--url", f"https://{api_url_basename}/1",
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--metadata", metadata_path,
"--slug", "some-slug", # deprecated flag
"--create-origin", "some-other-slug", # conflictual value, so raise
"--format", "json",
],
catch_exceptions=False,
)
# fmt: on
def test_cli_deposit_with_server_down_for_maintenance(
sample_archive, caplog, client_mock_api_down, slug, patched_tmp_path, cli_runner
):
""" Deposit failure due to maintenance down time should be explicit
"""
# fmt: off
result = cli_runner.invoke(
cli,
[
"upload",
"--url", "https://deposit.swh.test/1",
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--name", "test-project",
"--archive", sample_archive["path"],
"--author", "Jane Doe",
],
)
# fmt: on
assert result.exit_code == 1, result.output
assert result.output == ""
down_for_maintenance_log_record = (
"swh.deposit.cli.client",
logging.ERROR,
"Database backend maintenance: Temporarily unavailable, try again later.",
)
assert down_for_maintenance_log_record in caplog.record_tuples
client_mock_api_down.service_document.assert_called_once_with()
def test_cli_client_generate_metadata_ok(slug):
"""Generated metadata is well formed and pass service side metadata checks
"""
actual_metadata_xml = generate_metadata(
"deposit-client",
"project-name",
authors=["some", "authors"],
- external_id="external-id",
+ external_id="http://example.org/external-id",
create_origin="origin-url",
metadata_provenance_url="meta-prov-url",
)
actual_metadata = parse_xml(actual_metadata_xml)
assert (
actual_metadata.findtext("atom:author", namespaces=NAMESPACES)
== "deposit-client"
)
assert (
actual_metadata.findtext("atom:title", namespaces=NAMESPACES) == "project-name"
)
assert actual_metadata.findtext("atom:updated", namespaces=NAMESPACES) is not None
assert (
actual_metadata.findtext("codemeta:name", namespaces=NAMESPACES)
== "project-name"
)
assert (
actual_metadata.findtext("codemeta:identifier", namespaces=NAMESPACES)
- == "external-id"
+ == "http://example.org/external-id"
)
authors = actual_metadata.findall(
"codemeta:author/codemeta:name", namespaces=NAMESPACES
)
assert len(authors) == 2
assert authors[0].text == "some"
assert authors[1].text == "authors"
assert (
actual_metadata.find(
"swh:deposit/swh:create_origin/swh:origin", namespaces=NAMESPACES
).attrib["url"]
== "origin-url"
)
assert (
actual_metadata.findtext(
"swh:deposit/swh:metadata-provenance/schema:url", namespaces=NAMESPACES
)
== "meta-prov-url"
)
checks_ok, detail = check_metadata(ElementTree.fromstring(actual_metadata_xml))
assert checks_ok is True
assert detail is None
def test_cli_client_generate_metadata_ok2(slug):
"""Generated metadata is well formed and pass service side metadata checks
"""
actual_metadata_xml = generate_metadata(
"deposit-client", "project-name", authors=["some", "authors"],
)
actual_metadata = parse_xml(actual_metadata_xml)
assert (
actual_metadata.findtext("atom:author", namespaces=NAMESPACES)
== "deposit-client"
)
assert (
actual_metadata.findtext("atom:title", namespaces=NAMESPACES) == "project-name"
)
assert actual_metadata.findtext("atom:updated", namespaces=NAMESPACES) is not None
assert (
actual_metadata.findtext("codemeta:name", namespaces=NAMESPACES)
== "project-name"
)
authors = actual_metadata.findall(
"codemeta:author/codemeta:name", namespaces=NAMESPACES
)
assert len(authors) == 2
assert authors[0].text == "some"
assert authors[1].text == "authors"
assert actual_metadata.find("codemeta:identifier", namespaces=NAMESPACES) is None
assert actual_metadata.find("swh:deposit", namespaces=NAMESPACES) is None
checks_ok, detail = check_metadata(ElementTree.fromstring(actual_metadata_xml))
assert checks_ok is True
assert detail == {
"metadata": [
{"summary": SUGGESTED_FIELDS_MISSING, "fields": [METADATA_PROVENANCE_KEY]}
]
}
def test_cli_single_minimal_deposit_with_slug(
sample_archive, slug, patched_tmp_path, requests_mock_datadir, cli_runner, caplog,
):
""" This ensure a single deposit upload through the cli is fine, cf.
https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html#single-deposit
""" # noqa
metadata_path = os.path.join(patched_tmp_path, "metadata.xml")
# fmt: off
result = cli_runner.invoke(
cli,
[
"upload",
"--url", "https://deposit.swh.test/1",
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--name", "test-project",
"--archive", sample_archive["path"],
"--metadata-provenance-url", "meta-prov-url",
"--author", "Jane Doe",
"--slug", slug,
"--format", "json",
],
)
# fmt: on
assert result.exit_code == 0, result.output
assert json.loads(result.output) == {
"deposit_id": "615",
"deposit_status": "partial",
"deposit_status_detail": None,
"deposit_date": "2020-10-08T13:52:34.509655Z",
}
with open(metadata_path) as fd:
actual_metadata = parse_xml(fd.read())
assert (
actual_metadata.findtext("atom:author", namespaces=NAMESPACES)
== TEST_USER["username"]
)
assert (
actual_metadata.findtext("codemeta:name", namespaces=NAMESPACES)
== "test-project"
)
assert (
actual_metadata.findtext("atom:title", namespaces=NAMESPACES)
== "test-project"
)
assert (
actual_metadata.findtext("atom:updated", namespaces=NAMESPACES) is not None
)
assert (
actual_metadata.findtext("codemeta:identifier", namespaces=NAMESPACES)
== slug
)
authors = actual_metadata.findall(
"codemeta:author/codemeta:name", namespaces=NAMESPACES
)
assert len(authors) == 1
assert authors[0].text == "Jane Doe"
count_warnings = 0
for (_, log_level, _) in caplog.record_tuples:
count_warnings += 1 if log_level == logging.WARNING else 0
assert (
count_warnings == 1
), "We should have 1 warning as we are using slug instead of create_origin"
def test_cli_single_minimal_deposit_with_create_origin(
sample_archive, slug, patched_tmp_path, requests_mock_datadir, cli_runner, caplog,
):
""" This ensure a single deposit upload through the cli is fine, cf.
https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html#single-deposit
""" # noqa
metadata_path = os.path.join(patched_tmp_path, "metadata.xml")
origin = slug
# fmt: off
result = cli_runner.invoke(
cli,
[
"upload",
"--url", "https://deposit.swh.test/1",
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--name", "test-project",
"--archive", sample_archive["path"],
"--author", "Jane Doe",
"--create-origin", origin,
"--metadata-provenance-url", "meta-prov-url",
"--format", "json",
],
)
# fmt: on
assert result.exit_code == 0, result.output
assert json.loads(result.output) == {
"deposit_id": "615",
"deposit_status": "partial",
"deposit_status_detail": None,
"deposit_date": "2020-10-08T13:52:34.509655Z",
}
with open(metadata_path) as fd:
actual_metadata = parse_xml(fd.read())
assert (
actual_metadata.findtext("atom:author", namespaces=NAMESPACES)
== TEST_USER["username"]
)
assert (
actual_metadata.findtext("codemeta:name", namespaces=NAMESPACES)
== "test-project"
)
assert (
actual_metadata.findtext("atom:title", namespaces=NAMESPACES)
== "test-project"
)
assert (
actual_metadata.findtext("atom:updated", namespaces=NAMESPACES) is not None
)
assert (
actual_metadata.find(
"swh:deposit/swh:create_origin/swh:origin", namespaces=NAMESPACES
).attrib["url"]
== origin
)
assert (
actual_metadata.findtext(
"swh:deposit/swh:metadata-provenance/schema:url", namespaces=NAMESPACES
)
== "meta-prov-url"
)
authors = actual_metadata.findall(
"codemeta:author/codemeta:name", namespaces=NAMESPACES
)
assert len(authors) == 1
assert authors[0].text == "Jane Doe"
count_warnings = 0
for (_, log_level, _) in caplog.record_tuples:
count_warnings += 1 if log_level == logging.WARNING else 0
assert (
count_warnings == 0
), "We should have no warning as we are using create_origin"
def test_cli_validation_metadata(
sample_archive, caplog, patched_tmp_path, cli_runner, slug
):
"""Multiple metadata flags scenario (missing, conflicts) properly fails the calls
"""
metadata_path = os.path.join(patched_tmp_path, "metadata.xml")
with open(metadata_path, "a"):
pass # creates the file
for flag_title_or_name, author_or_name in [
("--author", "no one"),
("--name", "test-project"),
]:
# Test missing author then missing name
# fmt: off
result = cli_runner.invoke(
cli,
[
"upload",
"--url", "https://deposit.swh.test/1",
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--archive", sample_archive["path"],
"--slug", slug,
flag_title_or_name,
author_or_name,
],
)
# fmt: on
assert result.exit_code == 1, f"unexpected result: {result.output}"
assert result.output == ""
expected_error_log_record = (
"swh.deposit.cli.client",
logging.ERROR,
(
"Problem during parsing options: "
"For metadata deposit request, either a metadata file with "
"--metadata or both --author and --name must be provided. "
),
)
assert expected_error_log_record in caplog.record_tuples
# Clear mocking state
caplog.clear()
# incompatible flags: Test both --metadata and --author, then --metadata and
# --name
# fmt: off
result = cli_runner.invoke(
cli,
[
"upload",
"--url", "https://deposit.swh.test/1",
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--name", "test-project",
"--deposit-id", 666,
"--archive", sample_archive["path"],
"--slug", slug,
],
)
# fmt: on
assert result.exit_code == 1, f"unexpected result: {result.output}"
assert result.output == ""
expected_error_log_record = (
"swh.deposit.cli.client",
logging.ERROR,
(
"Problem during parsing options: "
"For metadata deposit request, either a metadata file with "
"--metadata or both --author and --name must be provided."
),
)
assert expected_error_log_record in caplog.record_tuples
# Clear mocking state
caplog.clear()
# incompatible flags check (Test both --metadata and --author,
# then --metadata and --name)
# fmt: off
result = cli_runner.invoke(
cli,
[
"upload",
"--url", "https://deposit.swh.test/1",
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--archive", sample_archive["path"],
"--metadata", metadata_path,
"--author", "Jane Doe",
"--slug", slug,
],
)
# fmt: on
assert result.exit_code == 1, result.output
assert result.output == ""
expected_error_log_record = (
"swh.deposit.cli.client",
logging.ERROR,
(
"Problem during parsing options: "
"Using --metadata flag is incompatible with --author "
"and --name and --create-origin (those are used to generate "
"one metadata file)."
),
)
assert expected_error_log_record in caplog.record_tuples
caplog.clear()
def test_cli_validation_no_actionable_command(caplog, cli_runner):
"""Multiple metadata flags scenario (missing, conflicts) properly fails the calls
"""
# no actionable command
# fmt: off
result = cli_runner.invoke(
cli,
[
"upload",
"--url", "https://deposit.swh.test/1",
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--partial",
],
)
# fmt: on
assert result.exit_code == 1, result.output
assert result.output == ""
expected_error_log_record = (
"swh.deposit.cli.client",
logging.ERROR,
(
"Problem during parsing options: "
"Please provide an actionable command. See --help for more information"
),
)
assert expected_error_log_record in caplog.record_tuples
def test_cli_validation_replace_with_no_deposit_id_fails(
sample_archive, caplog, patched_tmp_path, requests_mock_datadir, datadir, cli_runner
):
"""--replace flags require --deposit-id otherwise fails
"""
metadata_path = os.path.join(datadir, "atom", "entry-data-deposit-binary.xml")
# fmt: off
result = cli_runner.invoke(
cli,
[
"upload",
"--url", "https://deposit.swh.test/1",
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--metadata", metadata_path,
"--archive", sample_archive["path"],
"--replace",
],
)
# fmt: on
assert result.exit_code == 1, result.output
assert result.output == ""
expected_error_log_record = (
"swh.deposit.cli.client",
logging.ERROR,
(
"Problem during parsing options: "
"To update an existing deposit, you must provide its id"
),
)
assert expected_error_log_record in caplog.record_tuples
def test_cli_single_deposit_slug_generation(
sample_archive, patched_tmp_path, requests_mock_datadir, cli_runner
):
"""Single deposit scenario without providing the slug, it should
not be generated.
"""
metadata_path = os.path.join(patched_tmp_path, "metadata.xml")
# fmt: off
result = cli_runner.invoke(
cli,
[
"upload",
"--url", "https://deposit.swh.test/1",
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--name", "test-project",
"--archive", sample_archive["path"],
"--author", "Jane Doe",
"--format", "json",
],
)
# fmt: on
assert result.exit_code == 0, result.output
assert json.loads(result.output) == {
"deposit_id": "615",
"deposit_status": "partial",
"deposit_status_detail": None,
"deposit_date": "2020-10-08T13:52:34.509655Z",
}
with open(metadata_path) as fd:
metadata_xml = fd.read()
actual_metadata = parse_xml(metadata_xml)
assert "codemeta:identifier" not in actual_metadata
def test_cli_multisteps_deposit(
sample_archive, datadir, slug, requests_mock_datadir, cli_runner
):
""" First deposit a partial deposit (no metadata, only archive), then update the metadata part.
https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html#multisteps-deposit
""" # noqa
api_url = "https://deposit.test.metadata/1"
deposit_id = 666
# Create a partial deposit with only 1 archive
# fmt: off
result = cli_runner.invoke(
cli,
[
"upload",
"--url", api_url,
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--archive", sample_archive["path"],
"--slug", slug,
"--format", "json",
"--partial",
],
)
# fmt: on
assert result.exit_code == 0, f"unexpected output: {result.output}"
actual_deposit = json.loads(result.output)
assert actual_deposit == {
"deposit_id": str(deposit_id),
"deposit_status": "partial",
"deposit_status_detail": None,
"deposit_date": "2020-10-08T13:52:34.509655Z",
}
# Update the partial deposit with only 1 archive
# fmt: off
result = cli_runner.invoke(
cli,
[
"upload",
"--url", api_url,
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--archive", sample_archive["path"],
"--deposit-id", deposit_id,
"--slug", slug,
"--format", "json",
"--partial", # in-progress: True, because remains the metadata to upload
],
)
# fmt: on
assert result.exit_code == 0, f"unexpected output: {result.output}"
assert result.output is not None
actual_deposit = json.loads(result.output)
# deposit update scenario actually returns a deposit status dict
assert actual_deposit["deposit_id"] == str(deposit_id)
assert actual_deposit["deposit_status"] == "partial"
# Update the partial deposit with only some metadata (and then finalize it)
# https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html#add-content-or-metadata-to-the-deposit
metadata_path = os.path.join(datadir, "atom", "entry-data-deposit-binary.xml")
# Update deposit with metadata
# fmt: off
result = cli_runner.invoke(
cli,
[
"upload",
"--url", api_url,
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--metadata", metadata_path,
"--deposit-id", deposit_id,
"--slug", slug,
"--format", "json",
],
# this time, ^ we no longer flag it to partial, so the status changes to
# in-progress false
)
# fmt: on
assert result.exit_code == 0, f"unexpected output: {result.output}"
assert result.output is not None
actual_deposit = json.loads(result.output)
# deposit update scenario actually returns a deposit status dict
assert actual_deposit["deposit_id"] == str(deposit_id)
# FIXME: should be "deposited" but current limitation in the
# requests_mock_datadir_visits use, cannot find a way to make it work right now
assert actual_deposit["deposit_status"] == "partial"
@pytest.mark.parametrize(
"output_format,parser_fn",
[
("json", json.loads),
("yaml", yaml.safe_load),
(
"logging",
ast.literal_eval,
), # not enough though, the caplog fixture is needed
],
)
def test_cli_deposit_status_with_output_format(
output_format, parser_fn, datadir, slug, requests_mock_datadir, caplog, cli_runner
):
"""Check deposit status cli with all possible output formats (json, yaml, logging).
"""
api_url_basename = "deposit.test.status"
deposit_id = 1033
expected_deposit_status = {
"deposit_id": str(deposit_id),
"deposit_status": "done",
"deposit_status_detail": (
"The deposit has been successfully loaded into the "
"Software Heritage archive"
),
"deposit_swh_id": "swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea",
"deposit_swh_id_context": "swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea;origin=https://www.softwareheritage.org/check-deposit-2020-10-08T13:52:34.509655;visit=swh:1:snp:c477c6ef51833127b13a86ece7d75e5b3cc4e93d;anchor=swh:1:rev:f26f3960c175f15f6e24200171d446b86f6f7230;path=/", # noqa
"deposit_external_id": "check-deposit-2020-10-08T13:52:34.509655",
}
# fmt: off
result = cli_runner.invoke(
cli,
[
"status",
"--url", f"https://{api_url_basename}/1",
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--deposit-id", deposit_id,
"--format", output_format,
],
)
# fmt: on
assert result.exit_code == 0, f"unexpected output: {result.output}"
if output_format == "logging":
assert len(caplog.record_tuples) == 1
# format: (, , )
_, _, result_output = caplog.record_tuples[0]
else:
result_output = result.output
actual_deposit = parser_fn(result_output)
assert actual_deposit == expected_deposit_status
def test_cli_update_metadata_with_swhid_on_completed_deposit(
datadir, requests_mock_datadir, cli_runner
):
"""Update new metadata on a completed deposit (status done) is ok
"""
api_url_basename = "deposit.test.updateswhid"
deposit_id = 123
expected_deposit_status = {
"deposit_external_id": "check-deposit-2020-10-08T13:52:34.509655",
"deposit_id": str(deposit_id),
"deposit_status": "done",
"deposit_status_detail": (
"The deposit has been successfully loaded into the "
"Software Heritage archive"
),
"deposit_swh_id": "swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea",
"deposit_swh_id_context": "swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea;origin=https://www.softwareheritage.org/check-deposit-2020-10-08T13:52:34.509655;visit=swh:1:snp:c477c6ef51833127b13a86ece7d75e5b3cc4e93d;anchor=swh:1:rev:f26f3960c175f15f6e24200171d446b86f6f7230;path=/", # noqa
}
assert expected_deposit_status["deposit_status"] == "done"
assert expected_deposit_status["deposit_swh_id"] is not None
# fmt: off
result = cli_runner.invoke(
cli,
[
"upload",
"--url", f"https://{api_url_basename}/1",
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--name", "test-project",
"--author", "John Doe",
"--deposit-id", deposit_id,
"--swhid", expected_deposit_status["deposit_swh_id"],
"--format", "json",
],
)
# fmt: on
assert result.exit_code == 0, result.output
actual_deposit_status = json.loads(result.output)
assert "error" not in actual_deposit_status
assert actual_deposit_status == expected_deposit_status
def test_cli_update_metadata_with_swhid_on_other_status_deposit(
datadir, requests_mock_datadir, cli_runner
):
"""Update new metadata with swhid on other deposit status is not possible
"""
api_url_basename = "deposit.test.updateswhid"
deposit_id = "321"
# fmt: off
result = cli_runner.invoke(
cli,
[
"upload",
"--url", f"https://{api_url_basename}/1",
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--name", "test-project",
"--author", "John Doe",
"--deposit-id", deposit_id,
"--swhid", "swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea",
"--format", "json",
],
)
# fmt: on
assert result.exit_code == 0, result.output
actual_result = json.loads(result.output)
assert "error" in actual_result
assert actual_result == {
"error": "You can only update metadata on deposit with status 'done'",
"detail": f"The deposit {deposit_id} has status 'partial'",
"deposit_status": "partial",
"deposit_id": deposit_id,
}
@pytest.mark.parametrize(
"metadata_entry_key", ["entry-data-with-swhid", "entry-data-with-swhid-no-prov"]
)
def test_cli_metadata_only_deposit_full_metadata_file(
datadir,
requests_mock_datadir,
cli_runner,
atom_dataset,
tmp_path,
metadata_entry_key,
caplog,
):
"""Post metadata-only deposit through cli
The metadata file posted by the client already contains the swhid
"""
api_url_basename = "deposit.test.metadataonly"
swhid = "swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea"
atom_data = atom_dataset[metadata_entry_key]
if metadata_entry_key == "entry-data-with-swhid":
metadata = atom_data.format(
swhid=swhid,
metadata_provenance_url=(
"https://inria.halpreprod.archives-ouvertes.fr/hal-abcdefgh"
),
)
else:
metadata = atom_data.format(swhid=swhid)
metadata_path = os.path.join(tmp_path, "entry-data-with-swhid.xml")
with open(metadata_path, "w") as m:
m.write(metadata)
expected_deposit_status = {
"deposit_id": "100",
"deposit_status": "done",
"deposit_date": "2020-10-08T13:52:34.509655Z",
}
assert expected_deposit_status["deposit_status"] == "done"
# fmt: off
result = cli_runner.invoke(
cli,
[
"metadata-only",
"--url", f"https://{api_url_basename}/1",
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--metadata", metadata_path,
"--format", "json",
],
)
# fmt: on
assert result.exit_code == 0, result.output
actual_deposit_status = json.loads(result.output)
assert "error" not in actual_deposit_status
assert actual_deposit_status == expected_deposit_status
count_warnings = 0
warning_record: Optional[str] = None
for (_, log_level, msg) in caplog.record_tuples:
if log_level == logging.WARNING:
count_warnings += 1
warning_record = msg
if "no-prov" in metadata_entry_key:
assert count_warnings == 1
assert "metadata-provenance>' should be provided" in warning_record
else:
assert count_warnings == 0
def test_cli_metadata_only_deposit_invalid_swhid(
datadir, requests_mock_datadir, cli_runner, atom_dataset, tmp_path,
):
"""Post metadata-only deposit through cli with invalid swhid raises
"""
api_url_basename = "deposit.test.metadataonly"
invalid_swhid = "ssh:2:sth:xxx"
metadata = atom_dataset["entry-data-with-swhid-no-prov"].format(swhid=invalid_swhid)
metadata_path = os.path.join(tmp_path, "entry-data-with-swhid.xml")
with open(metadata_path, "w") as f:
f.write(metadata)
with pytest.raises(ValidationError, match="Invalid"):
# fmt: off
cli_runner.invoke(
cli,
[
"metadata-only",
"--url", f"https://{api_url_basename}/1",
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--metadata", metadata_path,
"--format", "json",
],
catch_exceptions=False,
)
# fmt: on
def test_cli_metadata_only_deposit_no_swhid(
datadir, requests_mock_datadir, cli_runner, atom_dataset, tmp_path,
):
"""Post metadata-only deposit through cli with invalid swhid raises
"""
api_url_basename = "deposit.test.metadataonly"
metadata = atom_dataset["entry-data-minimal"]
metadata_path = os.path.join(tmp_path, "entry-data-minimal.xml")
with open(metadata_path, "w") as f:
f.write(metadata)
with pytest.raises(InputError, match="SWHID must be provided"):
# fmt: off
cli_runner.invoke(
cli,
[
"metadata-only",
"--url", f"https://{api_url_basename}/1",
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--metadata", metadata_path,
"--format", "json",
],
catch_exceptions=False,
)
# fmt: on
@pytest.mark.parametrize(
"metadata_entry_key", ["entry-data-with-add-to-origin", "entry-only-create-origin"]
)
def test_cli_deposit_warning_missing_origin(
metadata_entry_key,
tmp_path,
atom_dataset,
caplog,
cli_runner,
requests_mock_datadir,
):
"""Deposit cli should warn when provided metadata xml is missing 'origins' tags
"""
# For the next deposit, no warning should be logged as either or
# are provided, and is always
# provided.
metadata_raw = atom_dataset[metadata_entry_key] % "some-url"
metadata_path = os.path.join(tmp_path, "metadata-with-origin-tag-to-deposit.xml")
with open(metadata_path, "w") as f:
f.write(metadata_raw)
# fmt: off
cli_runner.invoke(
cli,
[
"upload",
"--url", "https://deposit.swh.test/1",
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--metadata", metadata_path,
],
)
# fmt: on
for (_, log_level, _) in caplog.record_tuples:
# all messages are info or below messages so everything is fine
assert log_level < logging.WARNING
def test_cli_deposit_warning_missing_provenance_url(
tmp_path, atom_dataset, caplog, cli_runner, requests_mock_datadir,
):
"""Deposit cli should warn when no metadata provenance is provided
"""
atom_template = atom_dataset["entry-data-with-add-to-origin-no-prov"]
metadata_raw = atom_template % "some-url"
metadata_path = os.path.join(tmp_path, "metadata-with-missing-prov-url.xml")
with open(metadata_path, "w") as f:
f.write(metadata_raw)
# fmt: off
cli_runner.invoke(
cli,
[
"upload",
"--url", "https://deposit.swh.test/1",
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--metadata", metadata_path,
],
)
# fmt: on
count_warnings = sum(
1 for (_, log_level, _) in caplog.record_tuples if log_level == logging.WARNING
)
assert count_warnings == 1
def test_cli_failure_should_be_parseable(atom_dataset, mocker):
summary = "Cannot load metadata"
verbose_description = (
"Cannot load metadata on swh:1:dir:0eda267e7d3c2e37b3f6a78e542b16190ac4574e, "
"this directory object does not exist in the archive (yet?)."
)
error_xml = atom_dataset["error-cli"].format(
summary=summary, verboseDescription=verbose_description
)
api_call = BaseDepositClient(url="https://somewhere.org/")
actual_error = api_call.parse_result_error(error_xml)
assert actual_error == {
"summary": summary,
"detail": "",
"sword:verboseDescription": verbose_description,
}
def test_cli_service_document_failure(atom_dataset, mocker):
"""Ensure service document failures are properly served
"""
summary = "Invalid user credentials"
error_xml = atom_dataset["error-cli"].format(summary=summary, verboseDescription="")
api_call = ServiceDocumentDepositClient(url="https://somewhere.org/")
actual_error = api_call.parse_result_error(error_xml)
assert actual_error == {"error": summary}
@pytest.mark.parametrize(
"output_format,parser_fn",
[
("json", json.loads),
("yaml", yaml.safe_load),
(
"logging",
ast.literal_eval,
), # not enough though, the caplog fixture is needed
],
)
def test_cli_deposit_collection_list(
output_format, parser_fn, datadir, slug, requests_mock_datadir, caplog, cli_runner
):
"""Check deposit status cli with all possible output formats (json, yaml, logging).
"""
api_url_basename = "deposit.test.list"
expected_deposits = {
"count": "3",
"deposits": [
{
"external_id": "check-deposit-2020-10-09T13:10:00.000000",
"id": "1031",
"status": "rejected",
"status_detail": "Deposit without archive",
},
{
"external_id": "check-deposit-2020-10-10T13:20:00.000000",
"id": "1032",
"status": "rejected",
"status_detail": "Deposit without archive",
},
{
"complete_date": "2020-10-08T13:52:34.509655",
"external_id": "check-deposit-2020-10-08T13:52:34.509655",
"id": "1033",
"reception_date": "2020-10-08T13:50:30",
"status": "done",
"status_detail": "The deposit has been successfully loaded into "
"the Software Heritage archive",
"swhid": "swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea",
"swhid_context": "swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea;origin=https://www.softwareheritage.org/check-deposit-2020-10-08T13:52:34.509655;visit=swh:1:snp:c477c6ef51833127b13a86ece7d75e5b3cc4e93d;anchor=swh:1:rev:f26f3960c175f15f6e24200171d446b86f6f7230;path=/", # noqa
},
],
}
# fmt: off
result = cli_runner.invoke(
cli,
[
"list",
"--url", f"https://{api_url_basename}/1",
"--username", TEST_USER["username"],
"--password", TEST_USER["password"],
"--page", 1,
"--page-size", 10,
"--format", output_format,
],
)
# fmt: on
assert result.exit_code == 0, f"unexpected output: {result.output}"
if output_format == "logging":
assert len(caplog.record_tuples) == 1
# format: (, , )
_, _, result_output = caplog.record_tuples[0]
else:
result_output = result.output
actual_deposit = parser_fn(result_output)
assert actual_deposit == expected_deposits
diff --git a/swh/deposit/xsd/codemeta.xsd b/swh/deposit/xsd/codemeta.xsd
index cfe7380e..3b4271db 100644
--- a/swh/deposit/xsd/codemeta.xsd
+++ b/swh/deposit/xsd/codemeta.xsd
@@ -1,41 +1,60 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+