diff --git a/swh/deposit/tests/api/test_checks.py b/swh/deposit/tests/api/test_checks.py index 01ebd382..cf4718cc 100644 --- a/swh/deposit/tests/api/test_checks.py +++ b/swh/deposit/tests/api/test_checks.py @@ -1,712 +1,734 @@ # Copyright (C) 2017-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information # disable flake8 on this file because of line length # flake8: noqa import re import textwrap from typing import Any, Dict from xml.etree import ElementTree import pytest from swh.deposit.api.checks import ( METADATA_PROVENANCE_KEY, SUGGESTED_FIELDS_MISSING, check_metadata, ) METADATA_PROVENANCE_DICT: Dict[str, Any] = { "swh:deposit": { METADATA_PROVENANCE_KEY: {"schema:url": "some-metadata-provenance-url"} } } XMLNS = """xmlns="http://www.w3.org/2005/Atom" xmlns:swh="https://www.softwareheritage.org/schema/2018/deposit" xmlns:codemeta="https://doi.org/10.5063/SCHEMA/CODEMETA-2.0" xmlns:schema="http://schema.org/" """ PROVENANCE_XML = """ some-metadata-provenance-url """ _parameters1 = [ - textwrap.dedent(metadata_ok) - for (metadata_ok,) in [ + pytest.param(textwrap.dedent(metadata_ok), id=id_) + for (id_, metadata_ok,) in [ ( + "atom-only-with-name", f"""\ something something-else foo someone {PROVENANCE_XML} """, ), ( + "atom-only-with-title", + f"""\ + + something + something-else + bar + someone + + """, + ), + ( + "atom-only-and-external_identifier", f"""\ something something-else foo no one {PROVENANCE_XML} """, ), ( + "atom-and-codemeta-minimal", f"""\ some url bar no one {PROVENANCE_XML} """, ), ( + "unknown-codemeta-inner-element-after", + f"""\ + + some url + bar + + someone + should allow anything here + + {PROVENANCE_XML} + + """, + ), + ( + "unknown-schema-inner-element-after", f"""\ some url bar someone should allow anything here {PROVENANCE_XML} """, ), ( + "unknown-schema-inner-element-before", f"""\ some url bar should allow anything here someone {PROVENANCE_XML} """, ), ( + "unknown-schema-inner-element-before-and-after", f"""\ some url bar should allow anything here someone should allow anything here {PROVENANCE_XML} """, ), ( + "codemeta-dates", f"""\ some url some id nar no one 2020-12-21 2020-12-21 2020-12-25 {PROVENANCE_XML} """, ), ( + "codemeta-datetimes", # technically, only Date is allowed for datePublished; but we allow DateTime # for backward compatibility with old swh-deposit versions f"""\ some url some id nar no one 2020-12-21T12:00:00 2020-12-21T12:00:00 2020-12-25T12:00:00 {PROVENANCE_XML} """, ), ( - f"""\ - - something - something-else - bar - someone - - """, - ), - ( + "swh:add_to_origin", f"""\ something something-else bar someone some-metadata-provenance-url """, ), ( + "swh:reference-origin", f"""\ something something-else bar someone some-metadata-provenance-url """, ), ( + "swh:reference-object", f"""\ something something-else bar someone some-metadata-provenance-url """, ), ] ] @pytest.mark.parametrize( "metadata_ok", _parameters1, ) def test_api_checks_check_metadata_ok(metadata_ok, swh_checks_deposit): actual_check, detail = check_metadata(ElementTree.fromstring(metadata_ok)) assert actual_check is True, f"Unexpected result: {detail}" if "swh:deposit" in metadata_ok: # no missing suggested field assert detail is None else: # missing suggested field assert detail == { "metadata": [ { "fields": [METADATA_PROVENANCE_KEY], "summary": SUGGESTED_FIELDS_MISSING, } ] } _parameters2 = [ - (textwrap.dedent(metadata_ko), expected_summary) - for (metadata_ko, expected_summary) in [ + pytest.param(textwrap.dedent(metadata_ko), expected_summary, id=id_) + for (id_, metadata_ko, expected_summary) in [ ( + "no-name-or-title", f"""\ something something-else someone {PROVENANCE_XML} """, { "summary": "Mandatory fields are missing", "fields": ["atom:name or atom:title or codemeta:name"], }, ), ( + "no-author", f"""\ something something-else foobar {PROVENANCE_XML} """, { "summary": "Mandatory fields are missing", "fields": ["atom:author or codemeta:author"], }, ), ( + "wrong-title-namespace", f"""\ something something-else bar someone {PROVENANCE_XML} """, { "summary": "Mandatory fields are missing", "fields": ["atom:name or atom:title or codemeta:name"], }, ), ( + "wrong-author-namespace", f"""\ something something-else foobar foo {PROVENANCE_XML} """, { "summary": "Mandatory fields are missing", "fields": ["atom:author or codemeta:author"], }, ), ( + "wrong-author-tag", f"""\ something something-else bar someone {PROVENANCE_XML} """, { "summary": "Mandatory fields are missing", "fields": ["atom:author or codemeta:author"], }, ), ] ] @pytest.mark.parametrize("metadata_ko,expected_summary", _parameters2) def test_api_checks_check_metadata_ko( metadata_ko, expected_summary, swh_checks_deposit ): actual_check, error_detail = check_metadata(ElementTree.fromstring(metadata_ko)) assert actual_check is False assert error_detail == {"metadata": [expected_summary]} _parameters3 = [ - (textwrap.dedent(metadata_ko), expected_summary) - for (metadata_ko, expected_summary) in [ + pytest.param(textwrap.dedent(metadata_ko), expected_summary, id=id_) + for (id_, metadata_ko, expected_summary) in [ ( + "child-element-in-name", f"""\ some url bar no one {PROVENANCE_XML} """, [ { "summary": ".*Reason: a simple content element can't have child elements.*", "fields": ["codemeta:name"], }, ], ), ( + "chardata-in-author", f"""\ some url bar no one {PROVENANCE_XML} """, [ { "summary": ".*Reason: character data between child elements.*", "fields": ["codemeta:author"], }, ], ), ( + "author-with-no-name", f"""\ some url bar should allow anything here {PROVENANCE_XML} """, [ { "summary": ".*Tag '?codemeta:name'? expected.*", "fields": ["codemeta:author"], }, ], ), ( + "invalid-dates", f"""\ something something-else bar someone 2020-aa-21 2020-12-bb {PROVENANCE_XML} """, [ { "summary": ".*Reason: invalid value '2020-aa-21'.*", "fields": ["codemeta:datePublished"], }, { "summary": ".*Reason: invalid value '2020-12-bb'.*", "fields": ["codemeta:dateCreated"], }, ], ), ( - f"""\ - - some url - someid - bar - no one - 2020-aa-21 - 2020-12-bb - {PROVENANCE_XML} - - """, - [ - { - "summary": ".*Reason: invalid value '2020-aa-21'.*", - "fields": ["codemeta:datePublished"], - }, - { - "summary": ".*Reason: invalid value '2020-12-bb'.*", - "fields": ["codemeta:dateCreated"], - }, - ], - ), - ( + "invalid-dateModified", f"""\ some url someid bar no one 2020-12-aa {PROVENANCE_XML} """, [ { "summary": ".*Reason: invalid value '2020-12-aa'.*", "fields": ["codemeta:dateModified"], }, ], ), ( + "error-and-missing-provenance", f"""\ some url bar no one """, [ { "summary": ".*Reason: character data between child elements.*", "fields": ["codemeta:author"], }, { "summary": "Suggested fields are missing", "fields": ["swh:metadata-provenance"], }, ], ), ( + "unknown-tag-in-swh-namespace", f"""\ something something-else bar someone some-metadata-provenance-url """, [ { "summary": ( r".*Reason: Unexpected child with tag 'swh:invalid'.*" r"Instance:.*swh:invalid.*" ), "fields": ["swh:deposit"], } ], ), ( + "multiple-swh:add_to_origin", f"""\ something something-else bar someone some-metadata-provenance-url """, [ { "summary": ( r".*Reason: Unexpected child with tag 'swh:add_to_origin'.*" ), "fields": ["swh:deposit"], } ], ), ( + "swh:add_to_origin-and-swh:create_origin", f"""\ something something-else bar someone some-metadata-provenance-url """, [ { "summary": ( r".*Reason: assertion test if false.*" r"Schema:\n*" r' *]+ id="swhdeposit-incompatible-create-and-add".*' ), "fields": ["swh:deposit"], } ], ), ( + "swh:reference-and-swh:create_origin", f"""\ something something-else bar someone some-metadata-provenance-url """, [ { "summary": ( r".*Reason: assertion test if false.*" r"Schema:\n*" r' *]+ id="swhdeposit-incompatible-create-and-reference".*' ), "fields": ["swh:deposit"], } ], ), ( + "swh:add_to_origin-and-swh:reference", f"""\ something something-else bar someone some-metadata-provenance-url """, [ { "summary": ( r".*Reason: assertion test if false.*" r"Schema:\n*" r' *]+ id="swhdeposit-incompatible-add-and-reference".*' ), "fields": ["swh:deposit"], } ], ), ( + "swh:reference-two-children", f"""\ something something-else bar someone some-metadata-provenance-url """, [ { "summary": r".*Reason: Unexpected child with tag 'swh:origin'.*", "fields": ["swh:deposit"], }, ], ), ( + "swh:reference-two-origins", f"""\ something something-else bar someone some-metadata-provenance-url """, [ { "summary": r".*Reason: Unexpected child with tag 'swh:origin'.*", "fields": ["swh:deposit"], }, ], ), ( + "swh:reference-two-objects", f"""\ something something-else bar someone - + some-metadata-provenance-url """, [ { "summary": r".*Reason: Unexpected child with tag 'swh:object'.*", "fields": ["swh:deposit"], }, ], ), ] ] @pytest.mark.parametrize("metadata_ko,expected_summaries", _parameters3) def test_api_checks_check_metadata_ko_schema( metadata_ko, expected_summaries, swh_checks_deposit ): actual_check, error_detail = check_metadata(ElementTree.fromstring(metadata_ko)) assert actual_check is False assert len(error_detail["metadata"]) == len(expected_summaries), error_detail[ "metadata" ] for (detail, expected_summary) in zip(error_detail["metadata"], expected_summaries): assert detail["fields"] == expected_summary["fields"] # xmlschema returns very detailed errors, we cannot reasonably test them # for equality summary = detail["summary"] assert re.match( expected_summary["summary"], summary, re.DOTALL ), f"Failed to match {expected_summary['summary']!r} with:\n{summary}"