Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9342127
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
21 KB
Subscribers
None
View Options
diff --git a/swh/deposit/tests/api/test_checks.py b/swh/deposit/tests/api/test_checks.py
index 9896db60..c2a1e722 100644
--- a/swh/deposit/tests/api/test_checks.py
+++ b/swh/deposit/tests/api/test_checks.py
@@ -1,573 +1,591 @@
# Copyright (C) 2017-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
# disable flake8 on this file because of line length
# flake8: noqa
import re
import textwrap
from typing import Any, Dict
from xml.etree import ElementTree
import pytest
from swh.deposit.api.checks import (
METADATA_PROVENANCE_KEY,
SUGGESTED_FIELDS_MISSING,
check_metadata,
)
METADATA_PROVENANCE_DICT: Dict[str, Any] = {
"swh:deposit": {
METADATA_PROVENANCE_KEY: {"schema:url": "some-metadata-provenance-url"}
}
}
XMLNS = """xmlns="http://www.w3.org/2005/Atom"
xmlns:swh="https://www.softwareheritage.org/schema/2018/deposit"
xmlns:codemeta="https://doi.org/10.5063/SCHEMA/CODEMETA-2.0"
xmlns:schema="http://schema.org/" """
PROVENANCE_XML = """
<swh:deposit>
<swh:metadata-provenance>
<schema:url>some-metadata-provenance-url</schema:url>
</swh:metadata-provenance>
</swh:deposit>"""
_parameters1 = [
textwrap.dedent(metadata_ok)
for (metadata_ok,) in [
(
f"""\
<entry {XMLNS}>
<url>something</url>
<external_identifier>something-else</external_identifier>
<name>foo</name>
<author>someone</author>
{PROVENANCE_XML}
</entry>
""",
),
(
f"""\
<entry {XMLNS}>
<url>something</url>
<external_identifier>something-else</external_identifier>
<name>foo</name>
<author>no one</author>
{PROVENANCE_XML}
</entry>
""",
),
(
f"""\
<entry {XMLNS}>
<url>some url</url>
<codemeta:name>bar</codemeta:name>
<codemeta:author>no one</codemeta:author>
{PROVENANCE_XML}
</entry>
""",
),
(
f"""\
<entry {XMLNS}>
<url>some url</url>
<external_identifier>some id</external_identifier>
<name>nar</name>
<author>no one</author>
<codemeta:datePublished>2020-12-21</codemeta:datePublished>
<codemeta:dateCreated>2020-12-21</codemeta:dateCreated>
<codemeta:dateModified>2020-12-25</codemeta:dateModified>
{PROVENANCE_XML}
</entry>
""",
),
(
# technically, only Date is allowed for datePublished; but we allow DateTime
# for backward compatibility with old swh-deposit versions
f"""\
<entry {XMLNS}>
<url>some url</url>
<external_identifier>some id</external_identifier>
<name>nar</name>
<author>no one</author>
<codemeta:datePublished>2020-12-21T12:00:00</codemeta:datePublished>
<codemeta:dateCreated>2020-12-21T12:00:00</codemeta:dateCreated>
<codemeta:dateModified>2020-12-25T12:00:00</codemeta:dateModified>
{PROVENANCE_XML}
</entry>
""",
),
(
f"""\
<entry {XMLNS}>
<url>something</url>
<external_identifier>something-else</external_identifier>
<title>bar</title>
<author>someone</author>
</entry>
""",
),
(
f"""\
<entry {XMLNS}>
<url>something</url>
<external_identifier>something-else</external_identifier>
<title>bar</title>
<author>someone</author>
<swh:deposit>
<swh:add_to_origin>
<swh:origin url="http://example.org" />
</swh:add_to_origin>
<swh:metadata-provenance>
<schema:url>some-metadata-provenance-url</schema:url>
</swh:metadata-provenance>
</swh:deposit>
</entry>
""",
),
(
f"""\
<entry {XMLNS}>
<url>something</url>
<external_identifier>something-else</external_identifier>
<title>bar</title>
<author>someone</author>
<swh:deposit>
<swh:reference>
<swh:origin url="http://example.org" />
</swh:reference>
<swh:metadata-provenance>
<schema:url>some-metadata-provenance-url</schema:url>
</swh:metadata-provenance>
</swh:deposit>
</entry>
""",
),
(
f"""\
<entry {XMLNS}>
<url>something</url>
<external_identifier>something-else</external_identifier>
<title>bar</title>
<author>someone</author>
<swh:deposit>
<swh:reference>
<swh:object swhid="swh:1:dir:0000000000000000000000000000000000000000" />
</swh:reference>
<swh:metadata-provenance>
<schema:url>some-metadata-provenance-url</schema:url>
</swh:metadata-provenance>
</swh:deposit>
</entry>
""",
),
]
]
@pytest.mark.parametrize(
"metadata_ok", _parameters1,
)
def test_api_checks_check_metadata_ok(metadata_ok, swh_checks_deposit):
actual_check, detail = check_metadata(ElementTree.fromstring(metadata_ok))
assert actual_check is True, f"Unexpected result: {detail}"
if "swh:deposit" in metadata_ok:
# no missing suggested field
assert detail is None
else:
# missing suggested field
assert detail == {
"metadata": [
{
"fields": [METADATA_PROVENANCE_KEY],
"summary": SUGGESTED_FIELDS_MISSING,
}
]
}
_parameters2 = [
(textwrap.dedent(metadata_ko), expected_summary)
for (metadata_ko, expected_summary) in [
(
f"""\
<entry {XMLNS}>
<url>something</url>
<external_identifier>something-else</external_identifier>
<author>someone</author>
{PROVENANCE_XML}
</entry>
""",
{
"summary": "Mandatory fields are missing",
"fields": ["atom:name or atom:title or codemeta:name"],
},
),
(
f"""\
<entry {XMLNS}>
<url>something</url>
<external_identifier>something-else</external_identifier>
<title>foobar</title>
{PROVENANCE_XML}
</entry>
""",
{
"summary": "Mandatory fields are missing",
"fields": ["atom:author or codemeta:author"],
},
),
(
f"""\
<entry {XMLNS}>
<url>something</url>
<external_identifier>something-else</external_identifier>
<codemeta:title>bar</codemeta:title>
<author>someone</author>
{PROVENANCE_XML}
</entry>
""",
{
"summary": "Mandatory fields are missing",
"fields": ["atom:name or atom:title or codemeta:name"],
},
),
(
f"""\
<entry xmlns:atom="http://www.w3.org/2005/Atom"
xmlns:swh="https://www.softwareheritage.org/schema/2018/deposit"
xmlns:codemeta="https://doi.org/10.5063/SCHEMA/CODEMETA-2.0"
xmlns:schema="http://schema.org/">
<atom:url>something</atom:url>
<atom:external_identifier>something-else</atom:external_identifier>
<atom:title>foobar</atom:title>
<author>foo</author>
{PROVENANCE_XML}
</entry>
""",
{
"summary": "Mandatory fields are missing",
"fields": ["atom:author or codemeta:author"],
},
),
(
f"""\
<entry {XMLNS}>
<url>something</url>
<external_identifier>something-else</external_identifier>
<title>bar</title>
<authorblahblah>someone</authorblahblah>
{PROVENANCE_XML}
</entry>
""",
{
"summary": "Mandatory fields are missing",
"fields": ["atom:author or codemeta:author"],
},
),
]
]
@pytest.mark.parametrize("metadata_ko,expected_summary", _parameters2)
def test_api_checks_check_metadata_ko(
metadata_ko, expected_summary, swh_checks_deposit
):
actual_check, error_detail = check_metadata(ElementTree.fromstring(metadata_ko))
assert actual_check is False
assert error_detail == {"metadata": [expected_summary]}
_parameters3 = [
(textwrap.dedent(metadata_ko), expected_summary)
for (metadata_ko, expected_summary) in [
+ (
+ f"""\
+ <entry {XMLNS}>
+ <url>some url</url>
+ <codemeta:name>
+ <codemeta:name>bar</codemeta:name>
+ </codemeta:name>
+ <author>no one</author>
+ {PROVENANCE_XML}
+ </entry>
+ """,
+ [
+ {
+ "summary": ".*Reason: a simple content element can't have child elements.*",
+ "fields": ["codemeta:name"],
+ },
+ ],
+ ),
(
f"""\
<entry {XMLNS}>
<url>something</url>
<external_identifier>something-else</external_identifier>
<title>bar</title>
<author>someone</author>
<codemeta:datePublished>2020-aa-21</codemeta:datePublished>
<codemeta:dateCreated>2020-12-bb</codemeta:dateCreated>
{PROVENANCE_XML}
</entry>
""",
[
{
"summary": ".*Reason: invalid value '2020-aa-21'.*",
"fields": ["codemeta:datePublished"],
},
{
"summary": ".*Reason: invalid value '2020-12-bb'.*",
"fields": ["codemeta:dateCreated"],
},
],
),
(
f"""\
<entry {XMLNS}>
<url>some url</url>
<external_identifier>someid</external_identifier>
<title>bar</title>
<author>no one</author>
<codemeta:datePublished>2020-aa-21</codemeta:datePublished>
<codemeta:dateCreated>2020-12-bb</codemeta:dateCreated>
</entry>
""",
[
{
"summary": ".*Reason: invalid value '2020-aa-21'.*",
"fields": ["codemeta:datePublished"],
},
{
"summary": ".*Reason: invalid value '2020-12-bb'.*",
"fields": ["codemeta:dateCreated"],
},
],
),
(
f"""\
<entry {XMLNS}>
<url>some url</url>
<external_identifier>someid</external_identifier>
<title>bar</title>
<author>no one</author>
<codemeta:dateModified>2020-12-aa</codemeta:dateModified>
</entry>
""",
[
{
"summary": ".*Reason: invalid value '2020-12-aa'.*",
"fields": ["codemeta:dateModified"],
},
],
),
(
f"""\
<entry {XMLNS}>
<url>something</url>
<external_identifier>something-else</external_identifier>
<title>bar</title>
<author>someone</author>
<swh:deposit>
<swh:invalid>
<swh:origin url="http://example.org" />
</swh:invalid>
</swh:deposit>
</entry>
""",
[
{
"summary": (
r".*Reason: Unexpected child with tag 'swh:invalid'.*"
r"Instance:.*swh:invalid.*"
),
"fields": ["swh:deposit"],
}
],
),
(
f"""\
<entry {XMLNS}>
<url>something</url>
<external_identifier>something-else</external_identifier>
<title>bar</title>
<author>someone</author>
<swh:deposit>
<swh:add_to_origin>
<swh:origin url="http://example.org" />
</swh:add_to_origin>
<swh:add_to_origin>
<swh:origin url="http://example.org" />
</swh:add_to_origin>
</swh:deposit>
</entry>
""",
[
{
"summary": (
r".*Reason: Unexpected child with tag 'swh:add_to_origin'.*"
),
"fields": ["swh:deposit"],
}
],
),
(
f"""\
<entry {XMLNS}>
<url>something</url>
<external_identifier>something-else</external_identifier>
<title>bar</title>
<author>someone</author>
<swh:deposit>
<swh:add_to_origin>
<swh:origin url="http://example.org" />
</swh:add_to_origin>
<swh:create_origin>
<swh:origin url="http://example.org" />
</swh:create_origin>
</swh:deposit>
</entry>
""",
[
{
"summary": (
r".*Reason: assertion test if false.*"
r"Schema:\n*"
r' *<xsd:assert[^>]+ id="swhdeposit-incompatible-create-and-add".*'
),
"fields": ["swh:deposit"],
}
],
),
(
f"""\
<entry {XMLNS}>
<url>something</url>
<external_identifier>something-else</external_identifier>
<title>bar</title>
<author>someone</author>
<swh:deposit>
<swh:create_origin>
<swh:origin url="http://example.org" />
</swh:create_origin>
<swh:reference>
<swh:origin url="http://example.org" />
</swh:reference>
</swh:deposit>
</entry>
""",
[
{
"summary": (
r".*Reason: assertion test if false.*"
r"Schema:\n*"
r' *<xsd:assert[^>]+ id="swhdeposit-incompatible-create-and-reference".*'
),
"fields": ["swh:deposit"],
}
],
),
(
f"""\
<entry {XMLNS}>
<url>something</url>
<external_identifier>something-else</external_identifier>
<title>bar</title>
<author>someone</author>
<swh:deposit>
<swh:add_to_origin>
<swh:origin url="http://example.org" />
</swh:add_to_origin>
<swh:reference>
<swh:origin url="http://example.org" />
</swh:reference>
</swh:deposit>
</entry>
""",
[
{
"summary": (
r".*Reason: assertion test if false.*"
r"Schema:\n*"
r' *<xsd:assert[^>]+ id="swhdeposit-incompatible-add-and-reference".*'
),
"fields": ["swh:deposit"],
}
],
),
(
f"""\
<entry {XMLNS}>
<url>something</url>
<external_identifier>something-else</external_identifier>
<title>bar</title>
<author>someone</author>
<swh:deposit>
<swh:reference>
<swh:object swhid="swh:1:dir:0000000000000000000000000000000000000000" />
<swh:origin url="http://example.org" />
</swh:reference>
</swh:deposit>
</entry>
""",
[
{
"summary": r".*Reason: Unexpected child with tag 'swh:origin'.*",
"fields": ["swh:deposit"],
},
],
),
(
f"""\
<entry {XMLNS}>
<url>something</url>
<external_identifier>something-else</external_identifier>
<title>bar</title>
<author>someone</author>
<swh:deposit>
<swh:reference>
<swh:origin url="http://example.org" />
<swh:origin url="http://example.org" />
</swh:reference>
</swh:deposit>
</entry>
""",
[
{
"summary": r".*Reason: Unexpected child with tag 'swh:origin'.*",
"fields": ["swh:deposit"],
},
],
),
(
f"""\
<entry {XMLNS}>
<url>something</url>
<external_identifier>something-else</external_identifier>
<title>bar</title>
<author>someone</author>
<swh:deposit>
<swh:reference>
<swh:origin url="http://example.org" />
<swh:object swhid="swh:1:dir:0000000000000000000000000000000000000000" />
</swh:reference>
</swh:deposit>
</entry>
""",
[
{
"summary": r".*Reason: Unexpected child with tag 'swh:object'.*",
"fields": ["swh:deposit"],
},
],
),
]
]
@pytest.mark.parametrize("metadata_ko,expected_summaries", _parameters3)
def test_api_checks_check_metadata_ko_schema(
metadata_ko, expected_summaries, swh_checks_deposit
):
actual_check, error_detail = check_metadata(ElementTree.fromstring(metadata_ko))
assert actual_check is False
assert len(error_detail["metadata"]) == len(expected_summaries), error_detail[
"metadata"
]
for (detail, expected_summary) in zip(error_detail["metadata"], expected_summaries):
assert detail["fields"] == expected_summary["fields"]
# xmlschema returns very detailed errors, we cannot reasonably test them
# for equality
summary = detail["summary"]
assert re.match(
expected_summary["summary"], summary, re.DOTALL
), f"Failed to match {expected_summary['summary']!r} with:\n{summary}"
diff --git a/swh/deposit/xsd/codemeta.xsd b/swh/deposit/xsd/codemeta.xsd
index 5544172a..696f8eee 100644
--- a/swh/deposit/xsd/codemeta.xsd
+++ b/swh/deposit/xsd/codemeta.xsd
@@ -1,19 +1,21 @@
<?xml version="1.0" encoding="utf-8"?>
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"
targetNamespace="https://doi.org/10.5063/SCHEMA/CODEMETA-2.0"
xmlns:codemeta="https://doi.org/10.5063/SCHEMA/CODEMETA-2.0"
elementFormDefault="qualified"
>
<!-- This is a non-exhaustive schema used to find likely errors in XML-encoded
Codemeta documents -->
<!-- technically, only Date is allowed for datePublished; but we allow DateTime
for backward compatibility with old swh-deposit versions -->
<xsd:element name="datePublished" type="codemeta:DateOrDateTime" />
<xsd:element name="dateCreated" type="codemeta:DateOrDateTime" />
<xsd:element name="dateModified" type="codemeta:DateOrDateTime" />
<xsd:simpleType name="DateOrDateTime">
<xsd:union memberTypes="xsd:date xsd:dateTime"/>
</xsd:simpleType>
+
+ <xsd:element name="name" type="xsd:string" />
</xsd:schema>
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Fri, Jul 4, 12:30 PM (2 w, 2 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3312790
Attached To
rDDEP Push deposit
Event Timeline
Log In to Comment