diff --git a/swh/deposit/tests/api/test_checks.py b/swh/deposit/tests/api/test_checks.py
index 01ebd382..cf4718cc 100644
--- a/swh/deposit/tests/api/test_checks.py
+++ b/swh/deposit/tests/api/test_checks.py
@@ -1,712 +1,734 @@
# Copyright (C) 2017-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
# disable flake8 on this file because of line length
# flake8: noqa
import re
import textwrap
from typing import Any, Dict
from xml.etree import ElementTree
import pytest
from swh.deposit.api.checks import (
METADATA_PROVENANCE_KEY,
SUGGESTED_FIELDS_MISSING,
check_metadata,
)
METADATA_PROVENANCE_DICT: Dict[str, Any] = {
"swh:deposit": {
METADATA_PROVENANCE_KEY: {"schema:url": "some-metadata-provenance-url"}
}
}
XMLNS = """xmlns="http://www.w3.org/2005/Atom"
xmlns:swh="https://www.softwareheritage.org/schema/2018/deposit"
xmlns:codemeta="https://doi.org/10.5063/SCHEMA/CODEMETA-2.0"
xmlns:schema="http://schema.org/" """
PROVENANCE_XML = """
some-metadata-provenance-url
"""
_parameters1 = [
- textwrap.dedent(metadata_ok)
- for (metadata_ok,) in [
+ pytest.param(textwrap.dedent(metadata_ok), id=id_)
+ for (id_, metadata_ok,) in [
(
+ "atom-only-with-name",
f"""\
something
something-else
foo
someone
{PROVENANCE_XML}
""",
),
(
+ "atom-only-with-title",
+ f"""\
+
+ something
+ something-else
+ bar
+ someone
+
+ """,
+ ),
+ (
+ "atom-only-and-external_identifier",
f"""\
something
something-else
foo
no one
{PROVENANCE_XML}
""",
),
(
+ "atom-and-codemeta-minimal",
f"""\
some url
bar
no one
{PROVENANCE_XML}
""",
),
(
+ "unknown-codemeta-inner-element-after",
+ f"""\
+
+ some url
+ bar
+
+ someone
+ should allow anything here
+
+ {PROVENANCE_XML}
+
+ """,
+ ),
+ (
+ "unknown-schema-inner-element-after",
f"""\
some url
bar
someone
should allow anything here
{PROVENANCE_XML}
""",
),
(
+ "unknown-schema-inner-element-before",
f"""\
some url
bar
should allow anything here
someone
{PROVENANCE_XML}
""",
),
(
+ "unknown-schema-inner-element-before-and-after",
f"""\
some url
bar
should allow anything here
someone
should allow anything here
{PROVENANCE_XML}
""",
),
(
+ "codemeta-dates",
f"""\
some url
some id
nar
no one
2020-12-21
2020-12-21
2020-12-25
{PROVENANCE_XML}
""",
),
(
+ "codemeta-datetimes",
# technically, only Date is allowed for datePublished; but we allow DateTime
# for backward compatibility with old swh-deposit versions
f"""\
some url
some id
nar
no one
2020-12-21T12:00:00
2020-12-21T12:00:00
2020-12-25T12:00:00
{PROVENANCE_XML}
""",
),
(
- f"""\
-
- something
- something-else
- bar
- someone
-
- """,
- ),
- (
+ "swh:add_to_origin",
f"""\
something
something-else
bar
someone
some-metadata-provenance-url
""",
),
(
+ "swh:reference-origin",
f"""\
something
something-else
bar
someone
some-metadata-provenance-url
""",
),
(
+ "swh:reference-object",
f"""\
something
something-else
bar
someone
some-metadata-provenance-url
""",
),
]
]
@pytest.mark.parametrize(
"metadata_ok", _parameters1,
)
def test_api_checks_check_metadata_ok(metadata_ok, swh_checks_deposit):
actual_check, detail = check_metadata(ElementTree.fromstring(metadata_ok))
assert actual_check is True, f"Unexpected result: {detail}"
if "swh:deposit" in metadata_ok:
# no missing suggested field
assert detail is None
else:
# missing suggested field
assert detail == {
"metadata": [
{
"fields": [METADATA_PROVENANCE_KEY],
"summary": SUGGESTED_FIELDS_MISSING,
}
]
}
_parameters2 = [
- (textwrap.dedent(metadata_ko), expected_summary)
- for (metadata_ko, expected_summary) in [
+ pytest.param(textwrap.dedent(metadata_ko), expected_summary, id=id_)
+ for (id_, metadata_ko, expected_summary) in [
(
+ "no-name-or-title",
f"""\
something
something-else
someone
{PROVENANCE_XML}
""",
{
"summary": "Mandatory fields are missing",
"fields": ["atom:name or atom:title or codemeta:name"],
},
),
(
+ "no-author",
f"""\
something
something-else
foobar
{PROVENANCE_XML}
""",
{
"summary": "Mandatory fields are missing",
"fields": ["atom:author or codemeta:author"],
},
),
(
+ "wrong-title-namespace",
f"""\
something
something-else
bar
someone
{PROVENANCE_XML}
""",
{
"summary": "Mandatory fields are missing",
"fields": ["atom:name or atom:title or codemeta:name"],
},
),
(
+ "wrong-author-namespace",
f"""\
something
something-else
foobar
foo
{PROVENANCE_XML}
""",
{
"summary": "Mandatory fields are missing",
"fields": ["atom:author or codemeta:author"],
},
),
(
+ "wrong-author-tag",
f"""\
something
something-else
bar
someone
{PROVENANCE_XML}
""",
{
"summary": "Mandatory fields are missing",
"fields": ["atom:author or codemeta:author"],
},
),
]
]
@pytest.mark.parametrize("metadata_ko,expected_summary", _parameters2)
def test_api_checks_check_metadata_ko(
metadata_ko, expected_summary, swh_checks_deposit
):
actual_check, error_detail = check_metadata(ElementTree.fromstring(metadata_ko))
assert actual_check is False
assert error_detail == {"metadata": [expected_summary]}
_parameters3 = [
- (textwrap.dedent(metadata_ko), expected_summary)
- for (metadata_ko, expected_summary) in [
+ pytest.param(textwrap.dedent(metadata_ko), expected_summary, id=id_)
+ for (id_, metadata_ko, expected_summary) in [
(
+ "child-element-in-name",
f"""\
some url
bar
no one
{PROVENANCE_XML}
""",
[
{
"summary": ".*Reason: a simple content element can't have child elements.*",
"fields": ["codemeta:name"],
},
],
),
(
+ "chardata-in-author",
f"""\
some url
bar
no one
{PROVENANCE_XML}
""",
[
{
"summary": ".*Reason: character data between child elements.*",
"fields": ["codemeta:author"],
},
],
),
(
+ "author-with-no-name",
f"""\
some url
bar
should allow anything here
{PROVENANCE_XML}
""",
[
{
"summary": ".*Tag '?codemeta:name'? expected.*",
"fields": ["codemeta:author"],
},
],
),
(
+ "invalid-dates",
f"""\
something
something-else
bar
someone
2020-aa-21
2020-12-bb
{PROVENANCE_XML}
""",
[
{
"summary": ".*Reason: invalid value '2020-aa-21'.*",
"fields": ["codemeta:datePublished"],
},
{
"summary": ".*Reason: invalid value '2020-12-bb'.*",
"fields": ["codemeta:dateCreated"],
},
],
),
(
- f"""\
-
- some url
- someid
- bar
- no one
- 2020-aa-21
- 2020-12-bb
- {PROVENANCE_XML}
-
- """,
- [
- {
- "summary": ".*Reason: invalid value '2020-aa-21'.*",
- "fields": ["codemeta:datePublished"],
- },
- {
- "summary": ".*Reason: invalid value '2020-12-bb'.*",
- "fields": ["codemeta:dateCreated"],
- },
- ],
- ),
- (
+ "invalid-dateModified",
f"""\
some url
someid
bar
no one
2020-12-aa
{PROVENANCE_XML}
""",
[
{
"summary": ".*Reason: invalid value '2020-12-aa'.*",
"fields": ["codemeta:dateModified"],
},
],
),
(
+ "error-and-missing-provenance",
f"""\
some url
bar
no one
""",
[
{
"summary": ".*Reason: character data between child elements.*",
"fields": ["codemeta:author"],
},
{
"summary": "Suggested fields are missing",
"fields": ["swh:metadata-provenance"],
},
],
),
(
+ "unknown-tag-in-swh-namespace",
f"""\
something
something-else
bar
someone
some-metadata-provenance-url
""",
[
{
"summary": (
r".*Reason: Unexpected child with tag 'swh:invalid'.*"
r"Instance:.*swh:invalid.*"
),
"fields": ["swh:deposit"],
}
],
),
(
+ "multiple-swh:add_to_origin",
f"""\
something
something-else
bar
someone
some-metadata-provenance-url
""",
[
{
"summary": (
r".*Reason: Unexpected child with tag 'swh:add_to_origin'.*"
),
"fields": ["swh:deposit"],
}
],
),
(
+ "swh:add_to_origin-and-swh:create_origin",
f"""\
something
something-else
bar
someone
some-metadata-provenance-url
""",
[
{
"summary": (
r".*Reason: assertion test if false.*"
r"Schema:\n*"
r' *]+ id="swhdeposit-incompatible-create-and-add".*'
),
"fields": ["swh:deposit"],
}
],
),
(
+ "swh:reference-and-swh:create_origin",
f"""\
something
something-else
bar
someone
some-metadata-provenance-url
""",
[
{
"summary": (
r".*Reason: assertion test if false.*"
r"Schema:\n*"
r' *]+ id="swhdeposit-incompatible-create-and-reference".*'
),
"fields": ["swh:deposit"],
}
],
),
(
+ "swh:add_to_origin-and-swh:reference",
f"""\
something
something-else
bar
someone
some-metadata-provenance-url
""",
[
{
"summary": (
r".*Reason: assertion test if false.*"
r"Schema:\n*"
r' *]+ id="swhdeposit-incompatible-add-and-reference".*'
),
"fields": ["swh:deposit"],
}
],
),
(
+ "swh:reference-two-children",
f"""\
something
something-else
bar
someone
some-metadata-provenance-url
""",
[
{
"summary": r".*Reason: Unexpected child with tag 'swh:origin'.*",
"fields": ["swh:deposit"],
},
],
),
(
+ "swh:reference-two-origins",
f"""\
something
something-else
bar
someone
some-metadata-provenance-url
""",
[
{
"summary": r".*Reason: Unexpected child with tag 'swh:origin'.*",
"fields": ["swh:deposit"],
},
],
),
(
+ "swh:reference-two-objects",
f"""\
something
something-else
bar
someone
-
+
some-metadata-provenance-url
""",
[
{
"summary": r".*Reason: Unexpected child with tag 'swh:object'.*",
"fields": ["swh:deposit"],
},
],
),
]
]
@pytest.mark.parametrize("metadata_ko,expected_summaries", _parameters3)
def test_api_checks_check_metadata_ko_schema(
metadata_ko, expected_summaries, swh_checks_deposit
):
actual_check, error_detail = check_metadata(ElementTree.fromstring(metadata_ko))
assert actual_check is False
assert len(error_detail["metadata"]) == len(expected_summaries), error_detail[
"metadata"
]
for (detail, expected_summary) in zip(error_detail["metadata"], expected_summaries):
assert detail["fields"] == expected_summary["fields"]
# xmlschema returns very detailed errors, we cannot reasonably test them
# for equality
summary = detail["summary"]
assert re.match(
expected_summary["summary"], summary, re.DOTALL
), f"Failed to match {expected_summary['summary']!r} with:\n{summary}"