diff --git a/swh/deposit/cli/client.py b/swh/deposit/cli/client.py --- a/swh/deposit/cli/client.py +++ b/swh/deposit/cli/client.py @@ -5,6 +5,7 @@ from __future__ import annotations +from datetime import datetime, timezone import logging # WARNING: do not import unnecessary things here to keep cli startup time under @@ -57,49 +58,57 @@ return url -def generate_metadata_file( - name: str, external_id: str, authors: List[str], temp_dir: str +def generate_metadata( + deposit_client: str, name: str, external_id: str, authors: List[str] ) -> str: - """Generate a temporary metadata file with the minimum required metadata + """Generate sword compliant xml metadata with the minimum required metadata. - This generates a xml file in a temporary location and returns the - path to that file. + The Atom spec, https://tools.ietf.org/html/rfc4287, says that: - This is up to the client of that function to clean up the - temporary file. + - atom:entry elements MUST contain one or more atom:author elements + - atom:entry elements MUST contain exactly one atom:title element. + - atom:entry elements MUST contain exactly one atom:updated element. + + However, we are also using CodeMeta, so we want some basic information to be + mandatory. + + Therefore, we generate the following mandatory fields: + - http://www.w3.org/2005/Atom#updated + - http://www.w3.org/2005/Atom#author + - http://www.w3.org/2005/Atom#title + - https://doi.org/10.5063/SCHEMA/CODEMETA-2.0#name (yes, in addition to + http://www.w3.org/2005/Atom#title, even if they have somewhat the same meaning) + - https://doi.org/10.5063/SCHEMA/CODEMETA-2.0#author Args: - name: Software's name + deposit_client: Deposit client username, + name: Software name external_id: External identifier (slug) or generated one authors: List of author names Returns: - Filepath to the metadata generated file + metadata xml string """ import xmltodict - path = os.path.join(temp_dir, "metadata.xml") # generate a metadata file with the minimum required metadata codemetadata = { "entry": { - "@xmlns": "http://www.w3.org/2005/Atom", + "@xmlns:atom": "http://www.w3.org/2005/Atom", "@xmlns:codemeta": "https://doi.org/10.5063/SCHEMA/CODEMETA-2.0", - "codemeta:name": name, "codemeta:identifier": external_id, - "codemeta:author": [ + "atom:updated": datetime.now(tz=timezone.utc), # mandatory, cf. docstring + "atom:author": deposit_client, # mandatory, cf. docstring + "atom:title": name, # mandatory, cf. docstring + "codemeta:name": name, # mandatory, cf. docstring + "codemeta:author": [ # mandatory, cf. docstring {"codemeta:name": author_name} for author_name in authors ], }, } - - logging.debug("Temporary file: %s", path) logging.debug("Metadata dict to generate as xml: %s", codemetadata) - s = xmltodict.unparse(codemetadata, pretty=True) - logging.debug("Metadata dict as xml generated: %s", s) - with open(path, "w") as fp: - fp.write(s) - return path + return xmltodict.unparse(codemetadata, pretty=True) def _client(url: str, username: str, password: str) -> PublicApiDepositClient: @@ -207,7 +216,13 @@ "deposit, either a filepath with --metadata or --name and --author" ) if name and authors: - metadata = generate_metadata_file(name, slug, authors, temp_dir) + metadata_path = os.path.join(temp_dir, "metadata.xml") + logging.debug("Temporary file: %s", metadata_path) + metadata_xml = generate_metadata(username, name, slug, authors) + logging.debug("Metadata xml generated: %s", metadata_xml) + with open(metadata_path, "w") as f: + f.write(metadata_xml) + metadata = metadata_path elif not archive_deposit and not partial and not deposit_id: # If we meet all the following conditions: # * this is not an archive-only deposit request diff --git a/swh/deposit/tests/cli/test_client.py b/swh/deposit/tests/cli/test_client.py --- a/swh/deposit/tests/cli/test_client.py +++ b/swh/deposit/tests/cli/test_client.py @@ -4,6 +4,7 @@ # See top-level LICENSE file for more information import ast +from collections import OrderedDict import contextlib import json import logging @@ -13,8 +14,16 @@ import pytest import yaml +from swh.deposit.api.checks import check_metadata from swh.deposit.cli import deposit as cli -from swh.deposit.cli.client import InputError, _client, _collection, _url, generate_slug +from swh.deposit.cli.client import ( + InputError, + _client, + _collection, + _url, + generate_metadata, + generate_slug, +) from swh.deposit.client import MaintenanceError, PublicApiDepositClient from swh.deposit.parsers import parse_xml @@ -132,6 +141,31 @@ client_mock_api_down.service_document.assert_called_once_with() +def test_cli_client_generate_metadata_ok(slug): + """Generated metadata is well formed and pass service side metadata checks + + """ + actual_metadata_xml = generate_metadata( + "deposit-client", "project-name", "external-id", authors=["some", "authors"] + ) + + actual_metadata = dict(parse_xml(actual_metadata_xml)) + assert actual_metadata["author"] == "deposit-client" + assert actual_metadata["title"] == "project-name" + assert actual_metadata["updated"] is not None + assert actual_metadata["codemeta:name"] == "project-name" + assert actual_metadata["codemeta:identifier"] == "external-id" + assert actual_metadata["codemeta:author"] == [ + OrderedDict([("codemeta:name", "some")]), + OrderedDict([("codemeta:name", "authors")]), + ] + + checks_ok, detail = check_metadata(actual_metadata) + + assert checks_ok is True + assert detail is None + + def test_cli_single_minimal_deposit( sample_archive, slug, patched_tmp_path, requests_mock_datadir, cli_runner ): @@ -172,18 +206,14 @@ } with open(metadata_path) as fd: - assert ( - fd.read() - == f"""\ - - -\ttest-project -\t{slug} -\t -\t\tJane Doe -\t -""" + actual_metadata = dict(parse_xml(fd.read())) + assert actual_metadata["author"] == TEST_USER["username"] + assert actual_metadata["codemeta:name"] == "test-project" + assert actual_metadata["title"] == "test-project" + assert actual_metadata["updated"] is not None + assert actual_metadata["codemeta:identifier"] == slug + assert actual_metadata["codemeta:author"] == OrderedDict( + [("codemeta:name", "Jane Doe")] ) @@ -460,7 +490,7 @@ with open(metadata_path) as fd: metadata_xml = fd.read() - actual_metadata = parse_xml(metadata_xml) + actual_metadata = dict(parse_xml(metadata_xml)) assert actual_metadata["codemeta:identifier"] is not None