Page MenuHomeSoftware Heritage

D7214.id26169.diff
No OneTemporary

D7214.id26169.diff

diff --git a/swh/deposit/cli/client.py b/swh/deposit/cli/client.py
--- a/swh/deposit/cli/client.py
+++ b/swh/deposit/cli/client.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2017-2020 The Software Heritage developers
+# Copyright (C) 2017-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -74,6 +74,7 @@
authors: List[str],
external_id: Optional[str] = None,
create_origin: Optional[str] = None,
+ metadata_provenance_url: Optional[str] = None,
) -> str:
"""Generate sword compliant xml metadata with the minimum required metadata.
@@ -101,6 +102,7 @@
name: Software name
authors: List of author names
create_origin: Origin concerned by the deposit
+ metadata_provenance_url: Provenance metadata url
Returns:
metadata xml string
@@ -113,6 +115,7 @@
"atom:entry": {
"@xmlns:atom": "http://www.w3.org/2005/Atom",
"@xmlns:codemeta": "https://doi.org/10.5063/SCHEMA/CODEMETA-2.0",
+ "@xmlns:schema": "http://schema.org/",
"atom:updated": datetime.now(tz=timezone.utc), # mandatory, cf. docstring
"atom:author": deposit_client, # mandatory, cf. docstring
"atom:title": name, # mandatory, cf. docstring
@@ -125,13 +128,24 @@
if external_id:
document["atom:entry"]["codemeta:identifier"] = external_id
- if create_origin:
+ swh_deposit_dict: Dict = {}
+ if create_origin or metadata_provenance_url:
document["atom:entry"][
"@xmlns:swh"
] = "https://www.softwareheritage.org/schema/2018/deposit"
- document["atom:entry"]["swh:deposit"] = {
- "swh:create_origin": {"swh:origin": {"@url": create_origin}}
- }
+
+ if create_origin:
+ swh_deposit_dict.update(
+ {"swh:create_origin": {"swh:origin": {"@url": create_origin}}}
+ )
+
+ if metadata_provenance_url:
+ swh_deposit_dict.update(
+ {"swh:metadata-provenance": {"schema:url": metadata_provenance_url}}
+ )
+
+ if swh_deposit_dict:
+ document["atom:entry"]["swh:deposit"] = swh_deposit_dict
logging.debug("Atom entry dict to generate as xml: %s", document)
return xmltodict.unparse(document, pretty=True)
@@ -160,6 +174,7 @@
collection: Optional[str],
slug: Optional[str],
create_origin: Optional[str],
+ metadata_provenance_url: Optional[str],
partial: bool,
deposit_id: Optional[int],
swhid: Optional[str],
@@ -204,6 +219,7 @@
"metadata": the metadata file to deposit
"collection": the user's collection under which to put the deposit
"create_origin": the origin concerned by the deposit
+ "metadata_provenance_url": the metadata provenance url
"in_progress": if the deposit is partial or not
"url": deposit's server main entry point
"deposit_id": optional deposit identifier
@@ -215,7 +231,12 @@
metadata_path = os.path.join(temp_dir, "metadata.xml")
logging.debug("Temporary file: %s", metadata_path)
metadata_xml = generate_metadata(
- username, name, authors, external_id=slug, create_origin=create_origin
+ username,
+ name,
+ authors,
+ external_id=slug,
+ create_origin=create_origin,
+ metadata_provenance_url=metadata_provenance_url,
)
logging.debug("Metadata xml generated: %s", metadata_xml)
with open(metadata_path, "w") as f:
@@ -255,19 +276,28 @@
)
if metadata:
- from swh.deposit.utils import parse_xml
+ from swh.deposit.utils import parse_swh_metadata_provenance, parse_xml
metadata_raw = open(metadata, "r").read()
- metadata_dict = parse_xml(metadata_raw).get("swh:deposit", {})
+ metadata_dict = parse_xml(metadata_raw)
+ metadata_swh = metadata_dict.get("swh:deposit", {})
if (
- "swh:create_origin" not in metadata_dict
- and "swh:add_to_origin" not in metadata_dict
+ "swh:create_origin" not in metadata_swh
+ and "swh:add_to_origin" not in metadata_swh
):
logger.warning(
"The metadata file provided should contain "
'"<swh:create_origin>" or "<swh:add_to_origin>" tag',
)
+ meta_prov_url = parse_swh_metadata_provenance(metadata_dict)
+
+ if not meta_prov_url:
+ logger.warning(
+ "The metadata file provided should contain "
+ '"<swh:metadata-provenance>" tag'
+ )
+
if replace and not deposit_id:
raise InputError("To update an existing deposit, you must provide its id")
@@ -370,6 +400,13 @@
"provide to the deposit server."
),
)
+@click.option(
+ "--metadata-provenance-url",
+ help=(
+ "(Optional) Provenance metadata url to indicate from where the metadata is "
+ "coming from."
+ ),
+)
@click.option(
"--partial/--no-partial",
default=False,
@@ -414,6 +451,7 @@
collection: Optional[str],
slug: Optional[str],
create_origin: Optional[str],
+ metadata_provenance_url: Optional[str],
partial: bool,
deposit_id: Optional[int],
swhid: Optional[str],
@@ -473,6 +511,7 @@
collection,
slug,
create_origin,
+ metadata_provenance_url,
partial,
deposit_id,
swhid,
diff --git a/swh/deposit/tests/cli/test_client.py b/swh/deposit/tests/cli/test_client.py
--- a/swh/deposit/tests/cli/test_client.py
+++ b/swh/deposit/tests/cli/test_client.py
@@ -179,6 +179,7 @@
authors=["some", "authors"],
external_id="external-id",
create_origin="origin-url",
+ metadata_provenance_url="meta-prov-url",
)
actual_metadata = dict(parse_xml(actual_metadata_xml))
@@ -195,16 +196,15 @@
actual_metadata["swh:deposit"]["swh:create_origin"]["swh:origin"]["@url"]
== "origin-url"
)
+ assert (
+ actual_metadata["swh:deposit"]["swh:metadata-provenance"]["schema:url"]
+ == "meta-prov-url"
+ )
checks_ok, detail = check_metadata(actual_metadata)
assert checks_ok is True
- # FIXME: Open the flag to suggest the provenance metadata url in the cli
- assert detail == {
- "metadata": [
- {"summary": SUGGESTED_FIELDS_MISSING, "fields": [METADATA_PROVENANCE_KEY]}
- ]
- }
+ assert detail is None
def test_cli_client_generate_metadata_ok2(slug):
@@ -230,7 +230,6 @@
checks_ok, detail = check_metadata(actual_metadata)
assert checks_ok is True
- # FIXME: Open the flag to suggest the provenance metadata url in the cli
assert detail == {
"metadata": [
{"summary": SUGGESTED_FIELDS_MISSING, "fields": [METADATA_PROVENANCE_KEY]}
@@ -256,6 +255,7 @@
"--password", TEST_USER["password"],
"--name", "test-project",
"--archive", sample_archive["path"],
+ "--metadata-provenance-url", "meta-prov-url",
"--author", "Jane Doe",
"--slug", slug,
"--format", "json",
@@ -313,6 +313,7 @@
"--archive", sample_archive["path"],
"--author", "Jane Doe",
"--create-origin", origin,
+ "--metadata-provenance-url", "meta-prov-url",
"--format", "json",
],
)
@@ -336,6 +337,10 @@
actual_metadata["swh:deposit"]["swh:create_origin"]["swh:origin"]["@url"]
== origin
)
+ assert (
+ actual_metadata["swh:deposit"]["swh:metadata-provenance"]["schema:url"]
+ == "meta-prov-url"
+ )
assert actual_metadata["codemeta:author"] == OrderedDict(
[("codemeta:name", "Jane Doe")]
)
@@ -899,7 +904,6 @@
"metadata_entry_key", ["entry-data-with-add-to-origin", "entry-only-create-origin"]
)
def test_cli_deposit_warning_missing_origin(
- sample_archive,
metadata_entry_key,
tmp_path,
atom_dataset,
@@ -907,11 +911,12 @@
cli_runner,
requests_mock_datadir,
):
- """Deposit cli should log warning when the provided metadata xml is missing origins
+ """Deposit cli should warn when provided metadata xml is missing 'origins' tags
"""
# For the next deposit, no warning should be logged as either <swh:create_origin> or
- # <swh:origin_to_add> are provided
+ # <swh:origin_to_add> are provided, and <swh:metadata-provenance-url> is always
+ # provided.
metadata_raw = atom_dataset[metadata_entry_key] % "some-url"
metadata_path = os.path.join(tmp_path, "metadata-with-origin-tag-to-deposit.xml")
@@ -936,6 +941,37 @@
assert log_level < logging.WARNING
+def test_cli_deposit_warning_missing_provenance_url(
+ tmp_path, atom_dataset, caplog, cli_runner, requests_mock_datadir,
+):
+ """Deposit cli should warn when no metadata provenance is provided
+
+ """
+ atom_template = atom_dataset["entry-data-with-add-to-origin-no-prov"]
+ metadata_raw = atom_template % "some-url"
+ metadata_path = os.path.join(tmp_path, "metadata-with-missing-prov-url.xml")
+ with open(metadata_path, "w") as f:
+ f.write(metadata_raw)
+
+ # fmt: off
+ cli_runner.invoke(
+ cli,
+ [
+ "upload",
+ "--url", "https://deposit.swh.test/1",
+ "--username", TEST_USER["username"],
+ "--password", TEST_USER["password"],
+ "--metadata", metadata_path,
+ ],
+ )
+ # fmt: on
+
+ count_warnings = sum(
+ 1 for (_, log_level, _) in caplog.record_tuples if log_level == logging.WARNING
+ )
+ assert count_warnings == 1
+
+
def test_cli_failure_should_be_parseable(atom_dataset, mocker):
summary = "Cannot load metadata"
verbose_description = (
diff --git a/swh/deposit/tests/data/atom/entry-data-with-add-to-origin.xml b/swh/deposit/tests/data/atom/entry-data-with-add-to-origin-no-prov.xml
copy from swh/deposit/tests/data/atom/entry-data-with-add-to-origin.xml
copy to swh/deposit/tests/data/atom/entry-data-with-add-to-origin-no-prov.xml
--- a/swh/deposit/tests/data/atom/entry-data-with-add-to-origin.xml
+++ b/swh/deposit/tests/data/atom/entry-data-with-add-to-origin-no-prov.xml
@@ -1,7 +1,8 @@
<?xml version="1.0"?>
<entry xmlns="http://www.w3.org/2005/Atom"
- xmlns:codemeta="https://doi.org/10.5063/SCHEMA/CODEMETA-2.0"
- xmlns:swh="https://www.softwareheritage.org/schema/2018/deposit">
+ xmlns:codemeta="https://doi.org/10.5063/SCHEMA/CODEMETA-2.0"
+ xmlns:schema="http://schema.org/"
+ xmlns:swh="https://www.softwareheritage.org/schema/2018/deposit">
<title>Awesome Compiler</title>
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
<author>dudess</author>
diff --git a/swh/deposit/tests/data/atom/entry-data-with-add-to-origin.xml b/swh/deposit/tests/data/atom/entry-data-with-add-to-origin.xml
--- a/swh/deposit/tests/data/atom/entry-data-with-add-to-origin.xml
+++ b/swh/deposit/tests/data/atom/entry-data-with-add-to-origin.xml
@@ -1,7 +1,8 @@
<?xml version="1.0"?>
<entry xmlns="http://www.w3.org/2005/Atom"
- xmlns:codemeta="https://doi.org/10.5063/SCHEMA/CODEMETA-2.0"
- xmlns:swh="https://www.softwareheritage.org/schema/2018/deposit">
+ xmlns:codemeta="https://doi.org/10.5063/SCHEMA/CODEMETA-2.0"
+ xmlns:schema="http://schema.org/"
+ xmlns:swh="https://www.softwareheritage.org/schema/2018/deposit">
<title>Awesome Compiler</title>
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
<author>dudess</author>
@@ -9,5 +10,8 @@
<swh:add_to_origin>
<swh:origin url="%s" />
</swh:add_to_origin>
+ <swh:metadata-provenance>
+ <schema:url>http://some-url/metadata-provenance-url</schema:url>
+ </swh:metadata-provenance>
</swh:deposit>
</entry>
diff --git a/swh/deposit/tests/data/atom/entry-only-create-origin.xml b/swh/deposit/tests/data/atom/entry-only-create-origin.xml
--- a/swh/deposit/tests/data/atom/entry-only-create-origin.xml
+++ b/swh/deposit/tests/data/atom/entry-only-create-origin.xml
@@ -1,10 +1,14 @@
<?xml version="1.0"?>
<entry xmlns="http://www.w3.org/2005/Atom"
+ xmlns:schema="http://schema.org/"
xmlns:swh="https://www.softwareheritage.org/schema/2018/deposit">
<swh:deposit>
<swh:create_origin>
<swh:origin url="%s" />
</swh:create_origin>
+ <swh:metadata-provenance>
+ <schema:url>http://some-url/metadata-provenance-url</schema:url>
+ </swh:metadata-provenance>
</swh:deposit>
</entry>

File Metadata

Mime Type
text/plain
Expires
Dec 18 2024, 4:17 AM (13 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3224576

Event Timeline