diff --git a/swh/deposit/api/checks.py b/swh/deposit/api/checks.py index de76a2da..ad7ba48d 100644 --- a/swh/deposit/api/checks.py +++ b/swh/deposit/api/checks.py @@ -1,47 +1,74 @@ # Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information """Functional Metadata checks: Mandatory fields: - 'author' - 'name' or 'title' """ from typing import Dict, Optional, Tuple +import iso8601 + +from swh.deposit.utils import normalize_date + MANDATORY_FIELDS_MISSING = "Mandatory fields are missing" +INVALID_DATE_FORMAT = "Invalid date format" def check_metadata(metadata: Dict) -> Tuple[bool, Optional[Dict]]: - """Check metadata for mandatory field presence. + """Check metadata for mandatory field presence and date format. Args: - metadata: Metadata dictionary to check for mandatory fields + metadata: Metadata dictionary to check Returns: tuple (status, error_detail): True, None if metadata are ok (False, ) otherwise. """ # at least one value per couple below is mandatory alternate_fields = { ("atom:name", "atom:title", "codemeta:name"): False, ("atom:author", "codemeta:author"): False, } for field, value in metadata.items(): for possible_names in alternate_fields: if field in possible_names: alternate_fields[possible_names] = True continue mandatory_result = [" or ".join(k) for k, v in alternate_fields.items() if not v] - if mandatory_result == []: - return True, None - detail = [{"summary": MANDATORY_FIELDS_MISSING, "fields": mandatory_result}] - return False, {"metadata": detail} + if mandatory_result: + detail = [{"summary": MANDATORY_FIELDS_MISSING, "fields": mandatory_result}] + return False, {"metadata": detail} + + fields = [] + + commit_date = metadata.get("codemeta:datePublished") + author_date = metadata.get("codemeta:dateCreated") + + if commit_date: + try: + normalize_date(commit_date) + except iso8601.iso8601.ParseError: + fields.append("codemeta:datePublished") + + if author_date: + try: + normalize_date(author_date) + except iso8601.iso8601.ParseError: + fields.append("codemeta:dateCreated") + + if fields: + detail = [{"summary": INVALID_DATE_FORMAT, "fields": fields}] + return False, {"metadata": detail} + + return True, None diff --git a/swh/deposit/tests/api/test_checks.py b/swh/deposit/tests/api/test_checks.py index 3536d0d4..226e83ee 100644 --- a/swh/deposit/tests/api/test_checks.py +++ b/swh/deposit/tests/api/test_checks.py @@ -1,106 +1,125 @@ # Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import pytest from swh.deposit.api.checks import check_metadata @pytest.mark.parametrize( "metadata_ok", [ { "atom:url": "something", "atom:external_identifier": "something-else", "atom:name": "foo", "atom:author": "someone", }, { "atom:url": "some url", "atom:external_identifier": "some id", "atom:title": "bar", "atom:author": "no one", }, {"atom:url": "some url", "codemeta:name": "bar", "codemeta:author": "no one",}, + { + "atom:url": "some url", + "atom:external_identifier": "some id", + "atom:title": "bar", + "atom:author": "no one", + "codemeta:datePublished": "2020-12-21", + "codemeta:dateCreated": "2020-12-21", + }, ], ) def test_api_checks_check_metadata_ok(metadata_ok, swh_checks_deposit): actual_check, detail = check_metadata(metadata_ok) assert actual_check is True, detail assert detail is None @pytest.mark.parametrize( "metadata_ko,expected_summary", [ ( { "atom:url": "something", "atom:external_identifier": "something-else", "atom:author": "someone", }, { "summary": "Mandatory fields are missing", "fields": ["atom:name or atom:title or codemeta:name"], }, ), ( { "atom:url": "something", "atom:external_identifier": "something-else", "atom:title": "foobar", }, { "summary": "Mandatory fields are missing", "fields": ["atom:author or codemeta:author"], }, ), ( { "atom:url": "something", "atom:external_identifier": "something-else", "codemeta:title": "bar", "atom:author": "someone", }, { "summary": "Mandatory fields are missing", "fields": ["atom:name or atom:title or codemeta:name"], }, ), ( { "atom:url": "something", "atom:external_identifier": "something-else", "atom:title": "foobar", "author": "foo", }, { "summary": "Mandatory fields are missing", "fields": ["atom:author or codemeta:author"], }, ), ( { "atom:url": "something", "atom:external_identifier": "something-else", "atom:title": "foobar", "atom:authorblahblah": "foo", }, { "summary": "Mandatory fields are missing", "fields": ["atom:author or codemeta:author"], }, ), + ( + { + "atom:url": "some url", + "atom:external_identifier": "some id", + "atom:title": "bar", + "atom:author": "no one", + "codemeta:datePublished": "2020-aa-21", + "codemeta:dateCreated": "2020-12-bb", + }, + { + "summary": "Invalid date format", + "fields": ["codemeta:datePublished", "codemeta:dateCreated"], + }, + ), ], ) def test_api_checks_check_metadata_ko( metadata_ko, expected_summary, swh_checks_deposit ): - """Missing optional field should be caught - - """ actual_check, error_detail = check_metadata(metadata_ko) assert actual_check is False assert error_detail == {"metadata": [expected_summary]}