Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7123235
D7214.id26169.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
12 KB
Subscribers
None
D7214.id26169.diff
View Options
diff --git a/swh/deposit/cli/client.py b/swh/deposit/cli/client.py
--- a/swh/deposit/cli/client.py
+++ b/swh/deposit/cli/client.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2017-2020 The Software Heritage developers
+# Copyright (C) 2017-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -74,6 +74,7 @@
authors: List[str],
external_id: Optional[str] = None,
create_origin: Optional[str] = None,
+ metadata_provenance_url: Optional[str] = None,
) -> str:
"""Generate sword compliant xml metadata with the minimum required metadata.
@@ -101,6 +102,7 @@
name: Software name
authors: List of author names
create_origin: Origin concerned by the deposit
+ metadata_provenance_url: Provenance metadata url
Returns:
metadata xml string
@@ -113,6 +115,7 @@
"atom:entry": {
"@xmlns:atom": "http://www.w3.org/2005/Atom",
"@xmlns:codemeta": "https://doi.org/10.5063/SCHEMA/CODEMETA-2.0",
+ "@xmlns:schema": "http://schema.org/",
"atom:updated": datetime.now(tz=timezone.utc), # mandatory, cf. docstring
"atom:author": deposit_client, # mandatory, cf. docstring
"atom:title": name, # mandatory, cf. docstring
@@ -125,13 +128,24 @@
if external_id:
document["atom:entry"]["codemeta:identifier"] = external_id
- if create_origin:
+ swh_deposit_dict: Dict = {}
+ if create_origin or metadata_provenance_url:
document["atom:entry"][
"@xmlns:swh"
] = "https://www.softwareheritage.org/schema/2018/deposit"
- document["atom:entry"]["swh:deposit"] = {
- "swh:create_origin": {"swh:origin": {"@url": create_origin}}
- }
+
+ if create_origin:
+ swh_deposit_dict.update(
+ {"swh:create_origin": {"swh:origin": {"@url": create_origin}}}
+ )
+
+ if metadata_provenance_url:
+ swh_deposit_dict.update(
+ {"swh:metadata-provenance": {"schema:url": metadata_provenance_url}}
+ )
+
+ if swh_deposit_dict:
+ document["atom:entry"]["swh:deposit"] = swh_deposit_dict
logging.debug("Atom entry dict to generate as xml: %s", document)
return xmltodict.unparse(document, pretty=True)
@@ -160,6 +174,7 @@
collection: Optional[str],
slug: Optional[str],
create_origin: Optional[str],
+ metadata_provenance_url: Optional[str],
partial: bool,
deposit_id: Optional[int],
swhid: Optional[str],
@@ -204,6 +219,7 @@
"metadata": the metadata file to deposit
"collection": the user's collection under which to put the deposit
"create_origin": the origin concerned by the deposit
+ "metadata_provenance_url": the metadata provenance url
"in_progress": if the deposit is partial or not
"url": deposit's server main entry point
"deposit_id": optional deposit identifier
@@ -215,7 +231,12 @@
metadata_path = os.path.join(temp_dir, "metadata.xml")
logging.debug("Temporary file: %s", metadata_path)
metadata_xml = generate_metadata(
- username, name, authors, external_id=slug, create_origin=create_origin
+ username,
+ name,
+ authors,
+ external_id=slug,
+ create_origin=create_origin,
+ metadata_provenance_url=metadata_provenance_url,
)
logging.debug("Metadata xml generated: %s", metadata_xml)
with open(metadata_path, "w") as f:
@@ -255,19 +276,28 @@
)
if metadata:
- from swh.deposit.utils import parse_xml
+ from swh.deposit.utils import parse_swh_metadata_provenance, parse_xml
metadata_raw = open(metadata, "r").read()
- metadata_dict = parse_xml(metadata_raw).get("swh:deposit", {})
+ metadata_dict = parse_xml(metadata_raw)
+ metadata_swh = metadata_dict.get("swh:deposit", {})
if (
- "swh:create_origin" not in metadata_dict
- and "swh:add_to_origin" not in metadata_dict
+ "swh:create_origin" not in metadata_swh
+ and "swh:add_to_origin" not in metadata_swh
):
logger.warning(
"The metadata file provided should contain "
'"<swh:create_origin>" or "<swh:add_to_origin>" tag',
)
+ meta_prov_url = parse_swh_metadata_provenance(metadata_dict)
+
+ if not meta_prov_url:
+ logger.warning(
+ "The metadata file provided should contain "
+ '"<swh:metadata-provenance>" tag'
+ )
+
if replace and not deposit_id:
raise InputError("To update an existing deposit, you must provide its id")
@@ -370,6 +400,13 @@
"provide to the deposit server."
),
)
+@click.option(
+ "--metadata-provenance-url",
+ help=(
+ "(Optional) Provenance metadata url to indicate from where the metadata is "
+ "coming from."
+ ),
+)
@click.option(
"--partial/--no-partial",
default=False,
@@ -414,6 +451,7 @@
collection: Optional[str],
slug: Optional[str],
create_origin: Optional[str],
+ metadata_provenance_url: Optional[str],
partial: bool,
deposit_id: Optional[int],
swhid: Optional[str],
@@ -473,6 +511,7 @@
collection,
slug,
create_origin,
+ metadata_provenance_url,
partial,
deposit_id,
swhid,
diff --git a/swh/deposit/tests/cli/test_client.py b/swh/deposit/tests/cli/test_client.py
--- a/swh/deposit/tests/cli/test_client.py
+++ b/swh/deposit/tests/cli/test_client.py
@@ -179,6 +179,7 @@
authors=["some", "authors"],
external_id="external-id",
create_origin="origin-url",
+ metadata_provenance_url="meta-prov-url",
)
actual_metadata = dict(parse_xml(actual_metadata_xml))
@@ -195,16 +196,15 @@
actual_metadata["swh:deposit"]["swh:create_origin"]["swh:origin"]["@url"]
== "origin-url"
)
+ assert (
+ actual_metadata["swh:deposit"]["swh:metadata-provenance"]["schema:url"]
+ == "meta-prov-url"
+ )
checks_ok, detail = check_metadata(actual_metadata)
assert checks_ok is True
- # FIXME: Open the flag to suggest the provenance metadata url in the cli
- assert detail == {
- "metadata": [
- {"summary": SUGGESTED_FIELDS_MISSING, "fields": [METADATA_PROVENANCE_KEY]}
- ]
- }
+ assert detail is None
def test_cli_client_generate_metadata_ok2(slug):
@@ -230,7 +230,6 @@
checks_ok, detail = check_metadata(actual_metadata)
assert checks_ok is True
- # FIXME: Open the flag to suggest the provenance metadata url in the cli
assert detail == {
"metadata": [
{"summary": SUGGESTED_FIELDS_MISSING, "fields": [METADATA_PROVENANCE_KEY]}
@@ -256,6 +255,7 @@
"--password", TEST_USER["password"],
"--name", "test-project",
"--archive", sample_archive["path"],
+ "--metadata-provenance-url", "meta-prov-url",
"--author", "Jane Doe",
"--slug", slug,
"--format", "json",
@@ -313,6 +313,7 @@
"--archive", sample_archive["path"],
"--author", "Jane Doe",
"--create-origin", origin,
+ "--metadata-provenance-url", "meta-prov-url",
"--format", "json",
],
)
@@ -336,6 +337,10 @@
actual_metadata["swh:deposit"]["swh:create_origin"]["swh:origin"]["@url"]
== origin
)
+ assert (
+ actual_metadata["swh:deposit"]["swh:metadata-provenance"]["schema:url"]
+ == "meta-prov-url"
+ )
assert actual_metadata["codemeta:author"] == OrderedDict(
[("codemeta:name", "Jane Doe")]
)
@@ -899,7 +904,6 @@
"metadata_entry_key", ["entry-data-with-add-to-origin", "entry-only-create-origin"]
)
def test_cli_deposit_warning_missing_origin(
- sample_archive,
metadata_entry_key,
tmp_path,
atom_dataset,
@@ -907,11 +911,12 @@
cli_runner,
requests_mock_datadir,
):
- """Deposit cli should log warning when the provided metadata xml is missing origins
+ """Deposit cli should warn when provided metadata xml is missing 'origins' tags
"""
# For the next deposit, no warning should be logged as either <swh:create_origin> or
- # <swh:origin_to_add> are provided
+ # <swh:origin_to_add> are provided, and <swh:metadata-provenance-url> is always
+ # provided.
metadata_raw = atom_dataset[metadata_entry_key] % "some-url"
metadata_path = os.path.join(tmp_path, "metadata-with-origin-tag-to-deposit.xml")
@@ -936,6 +941,37 @@
assert log_level < logging.WARNING
+def test_cli_deposit_warning_missing_provenance_url(
+ tmp_path, atom_dataset, caplog, cli_runner, requests_mock_datadir,
+):
+ """Deposit cli should warn when no metadata provenance is provided
+
+ """
+ atom_template = atom_dataset["entry-data-with-add-to-origin-no-prov"]
+ metadata_raw = atom_template % "some-url"
+ metadata_path = os.path.join(tmp_path, "metadata-with-missing-prov-url.xml")
+ with open(metadata_path, "w") as f:
+ f.write(metadata_raw)
+
+ # fmt: off
+ cli_runner.invoke(
+ cli,
+ [
+ "upload",
+ "--url", "https://deposit.swh.test/1",
+ "--username", TEST_USER["username"],
+ "--password", TEST_USER["password"],
+ "--metadata", metadata_path,
+ ],
+ )
+ # fmt: on
+
+ count_warnings = sum(
+ 1 for (_, log_level, _) in caplog.record_tuples if log_level == logging.WARNING
+ )
+ assert count_warnings == 1
+
+
def test_cli_failure_should_be_parseable(atom_dataset, mocker):
summary = "Cannot load metadata"
verbose_description = (
diff --git a/swh/deposit/tests/data/atom/entry-data-with-add-to-origin.xml b/swh/deposit/tests/data/atom/entry-data-with-add-to-origin-no-prov.xml
copy from swh/deposit/tests/data/atom/entry-data-with-add-to-origin.xml
copy to swh/deposit/tests/data/atom/entry-data-with-add-to-origin-no-prov.xml
--- a/swh/deposit/tests/data/atom/entry-data-with-add-to-origin.xml
+++ b/swh/deposit/tests/data/atom/entry-data-with-add-to-origin-no-prov.xml
@@ -1,7 +1,8 @@
<?xml version="1.0"?>
<entry xmlns="http://www.w3.org/2005/Atom"
- xmlns:codemeta="https://doi.org/10.5063/SCHEMA/CODEMETA-2.0"
- xmlns:swh="https://www.softwareheritage.org/schema/2018/deposit">
+ xmlns:codemeta="https://doi.org/10.5063/SCHEMA/CODEMETA-2.0"
+ xmlns:schema="http://schema.org/"
+ xmlns:swh="https://www.softwareheritage.org/schema/2018/deposit">
<title>Awesome Compiler</title>
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
<author>dudess</author>
diff --git a/swh/deposit/tests/data/atom/entry-data-with-add-to-origin.xml b/swh/deposit/tests/data/atom/entry-data-with-add-to-origin.xml
--- a/swh/deposit/tests/data/atom/entry-data-with-add-to-origin.xml
+++ b/swh/deposit/tests/data/atom/entry-data-with-add-to-origin.xml
@@ -1,7 +1,8 @@
<?xml version="1.0"?>
<entry xmlns="http://www.w3.org/2005/Atom"
- xmlns:codemeta="https://doi.org/10.5063/SCHEMA/CODEMETA-2.0"
- xmlns:swh="https://www.softwareheritage.org/schema/2018/deposit">
+ xmlns:codemeta="https://doi.org/10.5063/SCHEMA/CODEMETA-2.0"
+ xmlns:schema="http://schema.org/"
+ xmlns:swh="https://www.softwareheritage.org/schema/2018/deposit">
<title>Awesome Compiler</title>
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
<author>dudess</author>
@@ -9,5 +10,8 @@
<swh:add_to_origin>
<swh:origin url="%s" />
</swh:add_to_origin>
+ <swh:metadata-provenance>
+ <schema:url>http://some-url/metadata-provenance-url</schema:url>
+ </swh:metadata-provenance>
</swh:deposit>
</entry>
diff --git a/swh/deposit/tests/data/atom/entry-only-create-origin.xml b/swh/deposit/tests/data/atom/entry-only-create-origin.xml
--- a/swh/deposit/tests/data/atom/entry-only-create-origin.xml
+++ b/swh/deposit/tests/data/atom/entry-only-create-origin.xml
@@ -1,10 +1,14 @@
<?xml version="1.0"?>
<entry xmlns="http://www.w3.org/2005/Atom"
+ xmlns:schema="http://schema.org/"
xmlns:swh="https://www.softwareheritage.org/schema/2018/deposit">
<swh:deposit>
<swh:create_origin>
<swh:origin url="%s" />
</swh:create_origin>
+ <swh:metadata-provenance>
+ <schema:url>http://some-url/metadata-provenance-url</schema:url>
+ </swh:metadata-provenance>
</swh:deposit>
</entry>
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Dec 18 2024, 4:17 AM (13 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3224576
Attached To
D7214: deposit.cli.client: Allow user to define the metadata provenance url
Event Timeline
Log In to Comment