diff --git a/swh/deposit/api/checks.py b/swh/deposit/api/checks.py --- a/swh/deposit/api/checks.py +++ b/swh/deposit/api/checks.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2020 The Software Heritage developers +# Copyright (C) 2017-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -9,17 +9,23 @@ - 'author' - 'name' or 'title' +Suggested fields: +- metadata-provenance + """ from typing import Dict, Optional, Tuple import iso8601 -from swh.deposit.utils import normalize_date +from swh.deposit.utils import normalize_date, parse_swh_metadata_provenance MANDATORY_FIELDS_MISSING = "Mandatory fields are missing" INVALID_DATE_FORMAT = "Invalid date format" +SUGGESTED_FIELDS_MISSING = "Suggested fields are missing" +METADATA_PROVENANCE_KEY = "swh:metadata-provenance" + def check_metadata(metadata: Dict) -> Tuple[bool, Optional[Dict]]: """Check metadata for mandatory field presence and date format. @@ -28,10 +34,13 @@ metadata: Metadata dictionary to check Returns: - tuple (status, error_detail): True, None if metadata are - ok (False, ) otherwise. + tuple (status, error_detail): + - (True, None) if metadata are ok and suggested fields are also present + - (True, ) if metadata are ok but some suggestions are missing + - (False, ) otherwise. """ + suggested_fields = [] # at least one value per couple below is mandatory alternate_fields = { ("atom:name", "atom:title", "codemeta:name"): False, @@ -46,9 +55,16 @@ mandatory_result = [" or ".join(k) for k, v in alternate_fields.items() if not v] + # provenance metadata is optional + provenance_meta = parse_swh_metadata_provenance(metadata) + if provenance_meta is None: + suggested_fields = [ + {"summary": SUGGESTED_FIELDS_MISSING, "fields": [METADATA_PROVENANCE_KEY]} + ] + if mandatory_result: detail = [{"summary": MANDATORY_FIELDS_MISSING, "fields": mandatory_result}] - return False, {"metadata": detail} + return False, {"metadata": detail + suggested_fields} fields = [] @@ -69,6 +85,8 @@ if fields: detail = [{"summary": INVALID_DATE_FORMAT, "fields": fields}] - return False, {"metadata": detail} + return False, {"metadata": detail + suggested_fields} + if suggested_fields: # it's fine but warn about missing suggested fields + return True, {"metadata": suggested_fields} return True, None diff --git a/swh/deposit/tests/api/test_checks.py b/swh/deposit/tests/api/test_checks.py --- a/swh/deposit/tests/api/test_checks.py +++ b/swh/deposit/tests/api/test_checks.py @@ -1,11 +1,23 @@ -# Copyright (C) 2017-2020 The Software Heritage developers +# Copyright (C) 2017-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +from typing import Any, Dict + import pytest -from swh.deposit.api.checks import check_metadata +from swh.deposit.api.checks import ( + METADATA_PROVENANCE_KEY, + SUGGESTED_FIELDS_MISSING, + check_metadata, +) + +METADATA_PROVENANCE_DICT: Dict[str, Any] = { + "swh:deposit": { + METADATA_PROVENANCE_KEY: {"schema:url": "some-metadata-provenance-url"} + } +} @pytest.mark.parametrize( @@ -16,14 +28,21 @@ "atom:external_identifier": "something-else", "atom:name": "foo", "atom:author": "someone", + **METADATA_PROVENANCE_DICT, }, { "atom:url": "some url", "atom:external_identifier": "some id", "atom:title": "bar", "atom:author": "no one", + **METADATA_PROVENANCE_DICT, + }, + { + "atom:url": "some url", + "codemeta:name": "bar", + "codemeta:author": "no one", + **METADATA_PROVENANCE_DICT, }, - {"atom:url": "some url", "codemeta:name": "bar", "codemeta:author": "no one",}, { "atom:url": "some url", "atom:external_identifier": "some id", @@ -36,8 +55,20 @@ ) def test_api_checks_check_metadata_ok(metadata_ok, swh_checks_deposit): actual_check, detail = check_metadata(metadata_ok) - assert actual_check is True, detail - assert detail is None + assert actual_check is True, f"Unexpected result: {detail}" + if "swh:deposit" in metadata_ok: + # no missing suggested field + assert detail is None + else: + # missing suggested field + assert detail == { + "metadata": [ + { + "fields": [METADATA_PROVENANCE_KEY], + "summary": SUGGESTED_FIELDS_MISSING, + } + ] + } @pytest.mark.parametrize( @@ -48,6 +79,7 @@ "atom:url": "something", "atom:external_identifier": "something-else", "atom:author": "someone", + **METADATA_PROVENANCE_DICT, }, { "summary": "Mandatory fields are missing", @@ -59,6 +91,7 @@ "atom:url": "something", "atom:external_identifier": "something-else", "atom:title": "foobar", + **METADATA_PROVENANCE_DICT, }, { "summary": "Mandatory fields are missing", @@ -71,6 +104,7 @@ "atom:external_identifier": "something-else", "codemeta:title": "bar", "atom:author": "someone", + **METADATA_PROVENANCE_DICT, }, { "summary": "Mandatory fields are missing", @@ -83,6 +117,7 @@ "atom:external_identifier": "something-else", "atom:title": "foobar", "author": "foo", + **METADATA_PROVENANCE_DICT, }, { "summary": "Mandatory fields are missing", @@ -95,12 +130,38 @@ "atom:external_identifier": "something-else", "atom:title": "foobar", "atom:authorblahblah": "foo", + **METADATA_PROVENANCE_DICT, }, { "summary": "Mandatory fields are missing", "fields": ["atom:author or codemeta:author"], }, ), + ( + { + "atom:url": "something", + "atom:external_identifier": "something-else", + "atom:author": "someone", + **METADATA_PROVENANCE_DICT, + }, + { + "summary": "Mandatory fields are missing", + "fields": ["atom:name or atom:title or codemeta:name"], + }, + ), + ], +) +def test_api_checks_check_metadata_ko( + metadata_ko, expected_summary, swh_checks_deposit +): + actual_check, error_detail = check_metadata(metadata_ko) + assert actual_check is False + assert error_detail == {"metadata": [expected_summary]} + + +@pytest.mark.parametrize( + "metadata_ko,expected_invalid_summary", + [ ( { "atom:url": "some url", @@ -117,9 +178,12 @@ ), ], ) -def test_api_checks_check_metadata_ko( - metadata_ko, expected_summary, swh_checks_deposit +def test_api_checks_check_metadata_fields_ko_and_missing_suggested_fields( + metadata_ko, expected_invalid_summary, swh_checks_deposit ): actual_check, error_detail = check_metadata(metadata_ko) assert actual_check is False - assert error_detail == {"metadata": [expected_summary]} + assert error_detail == { + "metadata": [expected_invalid_summary] + + [{"fields": [METADATA_PROVENANCE_KEY], "summary": SUGGESTED_FIELDS_MISSING,}] + } diff --git a/swh/deposit/tests/api/test_deposit_private_check.py b/swh/deposit/tests/api/test_deposit_private_check.py --- a/swh/deposit/tests/api/test_deposit_private_check.py +++ b/swh/deposit/tests/api/test_deposit_private_check.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2021 The Software Heritage developers +# Copyright (C) 2017-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -7,7 +7,11 @@ import pytest from rest_framework import status -from swh.deposit.api.checks import MANDATORY_FIELDS_MISSING +from swh.deposit.api.checks import ( + MANDATORY_FIELDS_MISSING, + METADATA_PROVENANCE_KEY, + SUGGESTED_FIELDS_MISSING, +) from swh.deposit.api.private.deposit_check import ( MANDATORY_ARCHIVE_INVALID, MANDATORY_ARCHIVE_MISSING, @@ -130,7 +134,7 @@ assert len(details["archive"]) == 1 assert details["archive"][0]["summary"] == MANDATORY_ARCHIVE_UNSUPPORTED # metadata check failure - assert len(details["metadata"]) == 1 + assert len(details["metadata"]) == 2 mandatory = details["metadata"][0] assert mandatory["summary"] == MANDATORY_FIELDS_MISSING assert set(mandatory["fields"]) == set( @@ -139,6 +143,9 @@ "atom:name or atom:title or codemeta:name", ] ) + suggested = details["metadata"][1] + assert suggested["summary"] == SUGGESTED_FIELDS_MISSING + assert set(suggested["fields"]) == set([METADATA_PROVENANCE_KEY]) deposit = Deposit.objects.get(pk=deposit.id) assert deposit.status == DEPOSIT_STATUS_REJECTED diff --git a/swh/deposit/tests/cli/test_client.py b/swh/deposit/tests/cli/test_client.py --- a/swh/deposit/tests/cli/test_client.py +++ b/swh/deposit/tests/cli/test_client.py @@ -1,4 +1,4 @@ -# Copyright (C) 2020-2021 The Software Heritage developers +# Copyright (C) 2020-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -14,7 +14,11 @@ import pytest import yaml -from swh.deposit.api.checks import check_metadata +from swh.deposit.api.checks import ( + METADATA_PROVENANCE_KEY, + SUGGESTED_FIELDS_MISSING, + check_metadata, +) from swh.deposit.cli import deposit as cli from swh.deposit.cli.client import InputError, _collection, _url, generate_metadata from swh.deposit.client import ( @@ -195,7 +199,12 @@ checks_ok, detail = check_metadata(actual_metadata) assert checks_ok is True - assert detail is None + # FIXME: Open the flag to suggest the provenance metadata url in the cli + assert detail == { + "metadata": [ + {"summary": SUGGESTED_FIELDS_MISSING, "fields": [METADATA_PROVENANCE_KEY]} + ] + } def test_cli_client_generate_metadata_ok2(slug): @@ -221,7 +230,12 @@ checks_ok, detail = check_metadata(actual_metadata) assert checks_ok is True - assert detail is None + # FIXME: Open the flag to suggest the provenance metadata url in the cli + assert detail == { + "metadata": [ + {"summary": SUGGESTED_FIELDS_MISSING, "fields": [METADATA_PROVENANCE_KEY]} + ] + } def test_cli_single_minimal_deposit_with_slug(