diff --git a/swh/deposit/cli/client.py b/swh/deposit/cli/client.py --- a/swh/deposit/cli/client.py +++ b/swh/deposit/cli/client.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2020 The Software Heritage developers +# Copyright (C) 2017-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -74,6 +74,7 @@ authors: List[str], external_id: Optional[str] = None, create_origin: Optional[str] = None, + metadata_provenance_url: Optional[str] = None, ) -> str: """Generate sword compliant xml metadata with the minimum required metadata. @@ -101,6 +102,7 @@ name: Software name authors: List of author names create_origin: Origin concerned by the deposit + metadata_provenance_url: Provenance metadata url Returns: metadata xml string @@ -113,6 +115,7 @@ "atom:entry": { "@xmlns:atom": "http://www.w3.org/2005/Atom", "@xmlns:codemeta": "https://doi.org/10.5063/SCHEMA/CODEMETA-2.0", + "@xmlns:schema": "http://schema.org/", "atom:updated": datetime.now(tz=timezone.utc), # mandatory, cf. docstring "atom:author": deposit_client, # mandatory, cf. docstring "atom:title": name, # mandatory, cf. docstring @@ -125,13 +128,24 @@ if external_id: document["atom:entry"]["codemeta:identifier"] = external_id - if create_origin: + swh_deposit_dict: Dict = {} + if create_origin or metadata_provenance_url: document["atom:entry"][ "@xmlns:swh" ] = "https://www.softwareheritage.org/schema/2018/deposit" - document["atom:entry"]["swh:deposit"] = { - "swh:create_origin": {"swh:origin": {"@url": create_origin}} - } + + if create_origin: + swh_deposit_dict.update( + {"swh:create_origin": {"swh:origin": {"@url": create_origin}}} + ) + + if metadata_provenance_url: + swh_deposit_dict.update( + {"swh:metadata-provenance": {"schema:url": metadata_provenance_url}} + ) + + if swh_deposit_dict: + document["atom:entry"]["swh:deposit"] = swh_deposit_dict logging.debug("Atom entry dict to generate as xml: %s", document) return xmltodict.unparse(document, pretty=True) @@ -160,6 +174,7 @@ collection: Optional[str], slug: Optional[str], create_origin: Optional[str], + metadata_provenance_url: Optional[str], partial: bool, deposit_id: Optional[int], swhid: Optional[str], @@ -204,6 +219,7 @@ "metadata": the metadata file to deposit "collection": the user's collection under which to put the deposit "create_origin": the origin concerned by the deposit + "metadata_provenance_url": the metadata provenance url "in_progress": if the deposit is partial or not "url": deposit's server main entry point "deposit_id": optional deposit identifier @@ -215,7 +231,12 @@ metadata_path = os.path.join(temp_dir, "metadata.xml") logging.debug("Temporary file: %s", metadata_path) metadata_xml = generate_metadata( - username, name, authors, external_id=slug, create_origin=create_origin + username, + name, + authors, + external_id=slug, + create_origin=create_origin, + metadata_provenance_url=metadata_provenance_url, ) logging.debug("Metadata xml generated: %s", metadata_xml) with open(metadata_path, "w") as f: @@ -255,19 +276,28 @@ ) if metadata: - from swh.deposit.utils import parse_xml + from swh.deposit.utils import parse_swh_metadata_provenance, parse_xml metadata_raw = open(metadata, "r").read() - metadata_dict = parse_xml(metadata_raw).get("swh:deposit", {}) + metadata_dict = parse_xml(metadata_raw) + metadata_swh = metadata_dict.get("swh:deposit", {}) if ( - "swh:create_origin" not in metadata_dict - and "swh:add_to_origin" not in metadata_dict + "swh:create_origin" not in metadata_swh + and "swh:add_to_origin" not in metadata_swh ): logger.warning( "The metadata file provided should contain " '"" or "" tag', ) + meta_prov_url = parse_swh_metadata_provenance(metadata_dict) + + if not meta_prov_url: + logger.warning( + "The metadata file provided should contain " + '"" tag' + ) + if replace and not deposit_id: raise InputError("To update an existing deposit, you must provide its id") @@ -370,6 +400,13 @@ "provide to the deposit server." ), ) +@click.option( + "--metadata-provenance-url", + help=( + "(Optional) Provenance metadata url to indicate from where the metadata is " + "coming from." + ), +) @click.option( "--partial/--no-partial", default=False, @@ -414,6 +451,7 @@ collection: Optional[str], slug: Optional[str], create_origin: Optional[str], + metadata_provenance_url: Optional[str], partial: bool, deposit_id: Optional[int], swhid: Optional[str], @@ -473,6 +511,7 @@ collection, slug, create_origin, + metadata_provenance_url, partial, deposit_id, swhid, diff --git a/swh/deposit/tests/cli/test_client.py b/swh/deposit/tests/cli/test_client.py --- a/swh/deposit/tests/cli/test_client.py +++ b/swh/deposit/tests/cli/test_client.py @@ -179,6 +179,7 @@ authors=["some", "authors"], external_id="external-id", create_origin="origin-url", + metadata_provenance_url="meta-prov-url", ) actual_metadata = dict(parse_xml(actual_metadata_xml)) @@ -195,16 +196,15 @@ actual_metadata["swh:deposit"]["swh:create_origin"]["swh:origin"]["@url"] == "origin-url" ) + assert ( + actual_metadata["swh:deposit"]["swh:metadata-provenance"]["schema:url"] + == "meta-prov-url" + ) checks_ok, detail = check_metadata(actual_metadata) assert checks_ok is True - # FIXME: Open the flag to suggest the provenance metadata url in the cli - assert detail == { - "metadata": [ - {"summary": SUGGESTED_FIELDS_MISSING, "fields": [METADATA_PROVENANCE_KEY]} - ] - } + assert detail is None def test_cli_client_generate_metadata_ok2(slug): @@ -230,7 +230,6 @@ checks_ok, detail = check_metadata(actual_metadata) assert checks_ok is True - # FIXME: Open the flag to suggest the provenance metadata url in the cli assert detail == { "metadata": [ {"summary": SUGGESTED_FIELDS_MISSING, "fields": [METADATA_PROVENANCE_KEY]} @@ -256,6 +255,7 @@ "--password", TEST_USER["password"], "--name", "test-project", "--archive", sample_archive["path"], + "--metadata-provenance-url", "meta-prov-url", "--author", "Jane Doe", "--slug", slug, "--format", "json", @@ -313,6 +313,7 @@ "--archive", sample_archive["path"], "--author", "Jane Doe", "--create-origin", origin, + "--metadata-provenance-url", "meta-prov-url", "--format", "json", ], ) @@ -336,6 +337,10 @@ actual_metadata["swh:deposit"]["swh:create_origin"]["swh:origin"]["@url"] == origin ) + assert ( + actual_metadata["swh:deposit"]["swh:metadata-provenance"]["schema:url"] + == "meta-prov-url" + ) assert actual_metadata["codemeta:author"] == OrderedDict( [("codemeta:name", "Jane Doe")] ) @@ -899,7 +904,6 @@ "metadata_entry_key", ["entry-data-with-add-to-origin", "entry-only-create-origin"] ) def test_cli_deposit_warning_missing_origin( - sample_archive, metadata_entry_key, tmp_path, atom_dataset, @@ -907,11 +911,12 @@ cli_runner, requests_mock_datadir, ): - """Deposit cli should log warning when the provided metadata xml is missing origins + """Deposit cli should warn when provided metadata xml is missing 'origins' tags """ # For the next deposit, no warning should be logged as either or - # are provided + # are provided, and is always + # provided. metadata_raw = atom_dataset[metadata_entry_key] % "some-url" metadata_path = os.path.join(tmp_path, "metadata-with-origin-tag-to-deposit.xml") @@ -936,6 +941,37 @@ assert log_level < logging.WARNING +def test_cli_deposit_warning_missing_provenance_url( + tmp_path, atom_dataset, caplog, cli_runner, requests_mock_datadir, +): + """Deposit cli should warn when no metadata provenance is provided + + """ + atom_template = atom_dataset["entry-data-with-add-to-origin-no-prov"] + metadata_raw = atom_template % "some-url" + metadata_path = os.path.join(tmp_path, "metadata-with-missing-prov-url.xml") + with open(metadata_path, "w") as f: + f.write(metadata_raw) + + # fmt: off + cli_runner.invoke( + cli, + [ + "upload", + "--url", "https://deposit.swh.test/1", + "--username", TEST_USER["username"], + "--password", TEST_USER["password"], + "--metadata", metadata_path, + ], + ) + # fmt: on + + count_warnings = sum( + 1 for (_, log_level, _) in caplog.record_tuples if log_level == logging.WARNING + ) + assert count_warnings == 1 + + def test_cli_failure_should_be_parseable(atom_dataset, mocker): summary = "Cannot load metadata" verbose_description = ( diff --git a/swh/deposit/tests/data/atom/entry-data-with-add-to-origin.xml b/swh/deposit/tests/data/atom/entry-data-with-add-to-origin-no-prov.xml copy from swh/deposit/tests/data/atom/entry-data-with-add-to-origin.xml copy to swh/deposit/tests/data/atom/entry-data-with-add-to-origin-no-prov.xml --- a/swh/deposit/tests/data/atom/entry-data-with-add-to-origin.xml +++ b/swh/deposit/tests/data/atom/entry-data-with-add-to-origin-no-prov.xml @@ -1,7 +1,8 @@ + xmlns:codemeta="https://doi.org/10.5063/SCHEMA/CODEMETA-2.0" + xmlns:schema="http://schema.org/" + xmlns:swh="https://www.softwareheritage.org/schema/2018/deposit"> Awesome Compiler urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a dudess diff --git a/swh/deposit/tests/data/atom/entry-data-with-add-to-origin.xml b/swh/deposit/tests/data/atom/entry-data-with-add-to-origin.xml --- a/swh/deposit/tests/data/atom/entry-data-with-add-to-origin.xml +++ b/swh/deposit/tests/data/atom/entry-data-with-add-to-origin.xml @@ -1,7 +1,8 @@ + xmlns:codemeta="https://doi.org/10.5063/SCHEMA/CODEMETA-2.0" + xmlns:schema="http://schema.org/" + xmlns:swh="https://www.softwareheritage.org/schema/2018/deposit"> Awesome Compiler urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a dudess @@ -9,5 +10,8 @@ + + http://some-url/metadata-provenance-url + diff --git a/swh/deposit/tests/data/atom/entry-only-create-origin.xml b/swh/deposit/tests/data/atom/entry-only-create-origin.xml --- a/swh/deposit/tests/data/atom/entry-only-create-origin.xml +++ b/swh/deposit/tests/data/atom/entry-only-create-origin.xml @@ -1,10 +1,14 @@ + + http://some-url/metadata-provenance-url +