Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9346247
D4228.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
21 KB
Subscribers
None
D4228.diff
View Options
diff --git a/swh/deposit/cli/client.py b/swh/deposit/cli/client.py
--- a/swh/deposit/cli/client.py
+++ b/swh/deposit/cli/client.py
@@ -141,6 +141,7 @@
slug: Optional[str],
partial: bool,
deposit_id: Optional[int],
+ swhid: Optional[str],
replace: bool,
url: str,
name: Optional[str],
@@ -188,6 +189,7 @@
'url': deposit's server main entry point
'deposit_type': deposit's type (binary, multipart, metadata)
'deposit_id': optional deposit identifier
+ 'swhid': optional deposit swhid
"""
if archive_deposit and metadata_deposit:
@@ -264,6 +266,7 @@
"client": client,
"url": url,
"deposit_id": deposit_id,
+ "swhid": swhid,
"replace": replace,
}
@@ -299,6 +302,7 @@
"slug",
"in_progress",
"replace",
+ "swhid",
)
return client.deposit_update(**_subdict(config, keys))
@@ -355,6 +359,11 @@
default=None,
help="(Optional) Update an existing partial deposit with its identifier",
) # noqa
+@click.option(
+ "--swhid",
+ default=None,
+ help="(Optional) Update existing completed deposit (status done) with new metadata",
+)
@click.option(
"--replace/--no-replace",
default=False,
@@ -397,6 +406,7 @@
slug: Optional[str] = None,
partial: bool = False,
deposit_id: Optional[int] = None,
+ swhid: Optional[str] = None,
replace: bool = False,
url: str = "https://deposit.softwareheritage.org",
verbose: bool = False,
@@ -433,6 +443,7 @@
slug,
partial,
deposit_id,
+ swhid,
replace,
url,
name,
diff --git a/swh/deposit/client.py b/swh/deposit/client.py
--- a/swh/deposit/client.py
+++ b/swh/deposit/client.py
@@ -11,7 +11,7 @@
import hashlib
import logging
import os
-from typing import Any, Dict
+from typing import Any, Dict, Optional
from urllib.parse import urljoin
import requests
@@ -22,6 +22,64 @@
logger = logging.getLogger(__name__)
+def compute_unified_information(
+ collection: str,
+ in_progress: bool,
+ slug: str,
+ *,
+ filepath: Optional[str] = None,
+ swhid: Optional[str] = None,
+ **kwargs,
+) -> Dict[str, Any]:
+ """Given a filepath, compute necessary information on that file.
+
+ Args:
+ collection: Deposit collection
+ in_progress: do we finalize the deposit?
+ slug: external id to use
+ filepath: Path to the file to compute the necessary information out of
+ swhid: Deposit swhid if any
+
+ Returns:
+ dict with keys:
+
+ 'slug': external id to use
+ 'in_progress': do we finalize the deposit?
+ 'content-type': content type associated
+ 'md5sum': md5 sum
+ 'filename': filename
+ 'filepath': filepath
+ 'swhid': deposit swhid
+
+ """
+ result: Dict[str, Any] = {
+ "slug": slug,
+ "in_progress": in_progress,
+ "swhid": swhid,
+ }
+ content_type: Optional[str] = None
+ md5sum: Optional[str] = None
+
+ if filepath:
+ filename = os.path.basename(filepath)
+ md5sum = hashlib.md5(open(filepath, "rb").read()).hexdigest()
+ extension = filename.split(".")[-1]
+ if "zip" in extension:
+ content_type = "application/zip"
+ else:
+ content_type = "application/x-tar"
+ result.update(
+ {
+ "content-type": content_type,
+ "md5sum": md5sum,
+ "filename": filename,
+ "filepath": filepath,
+ }
+ )
+
+ return result
+
+
class MaintenanceError(ValueError):
"""Informational maintenance error exception
@@ -243,7 +301,7 @@
"""
pass
- def compute_information(self, *args, **kwargs):
+ def compute_information(self, *args, **kwargs) -> Dict[str, Any]:
"""Compute some more information given the inputs (e.g http headers,
...)
@@ -415,50 +473,7 @@
],
)
- def _compute_information(
- self, collection, filepath, in_progress, slug, is_archive=True
- ):
- """Given a filepath, compute necessary information on that file.
-
- Args:
- filepath (str): Path to a file
- is_archive (bool): is it an archive or not?
-
- Returns:
- dict with keys:
- 'content-type': content type associated
- 'md5sum': md5 sum
- 'filename': filename
- """
- filename = os.path.basename(filepath)
-
- if is_archive:
- md5sum = hashlib.md5(open(filepath, "rb").read()).hexdigest()
- extension = filename.split(".")[-1]
- if "zip" in extension:
- content_type = "application/zip"
- else:
- content_type = "application/x-tar"
- else:
- content_type = None
- md5sum = None
-
- return {
- "slug": slug,
- "in_progress": in_progress,
- "content-type": content_type,
- "md5sum": md5sum,
- "filename": filename,
- "filepath": filepath,
- }
-
- def compute_information(
- self, collection, filepath, in_progress, slug, is_archive=True, **kwargs
- ):
- info = self._compute_information(
- collection, filepath, in_progress, slug, is_archive=is_archive
- )
- info["headers"] = self.compute_headers(info)
+ def compute_headers(self, info: Dict[str, Any]) -> Dict[str, Any]:
return info
def do_execute(self, method, url, info):
@@ -478,6 +493,13 @@
"CONTENT-DISPOSITION": "attachment; filename=%s" % (info["filename"],),
}
+ def compute_information(self, *args, **kwargs) -> Dict[str, Any]:
+ info = compute_unified_information(
+ *args, filepath=kwargs["archive_path"], **kwargs
+ )
+ info["headers"] = self.compute_headers(info)
+ return info
+
class UpdateArchiveDepositClient(CreateArchiveDepositClient):
"""Update (add/replace) an archive (binary) deposit client."""
@@ -499,17 +521,37 @@
"CONTENT-TYPE": "application/atom+xml;type=entry",
}
+ def compute_information(self, *args, **kwargs) -> Dict[str, Any]:
+ info = compute_unified_information(
+ *args, filepath=kwargs["metadata_path"], **kwargs
+ )
+ info["headers"] = self.compute_headers(info)
+ return info
+
-class UpdateMetadataDepositClient(CreateMetadataDepositClient):
- """Update (add/replace) a metadata deposit client."""
+class UpdateMetadataOnPartialDepositClient(CreateMetadataDepositClient):
+ """Update (add/replace) metadata on partial deposit scenario."""
def compute_url(self, collection, *args, deposit_id=None, **kwargs):
- return "/%s/%s/metadata/" % (collection, deposit_id)
+ return f"/{collection}/{deposit_id}/metadata/"
- def compute_method(self, *args, replace=False, **kwargs):
+ def compute_method(self, *args, replace: bool = False, **kwargs) -> str:
return "put" if replace else "post"
+class UpdateMetadataOnDoneDepositClient(UpdateMetadataOnPartialDepositClient):
+ """Update metadata on "done" deposit. This requires the deposit swhid."""
+
+ def compute_headers(self, info: Dict[str, Any]) -> Dict[str, Any]:
+ return {
+ "CONTENT-TYPE": "application/atom+xml;type=entry",
+ "X_CHECK_SWHID": info["swhid"],
+ }
+
+ def compute_method(self, *args, **kwargs) -> str:
+ return "put"
+
+
class CreateMultipartDepositClient(BaseCreateDepositClient):
"""Create a multipart deposit client."""
@@ -537,12 +579,10 @@
return files, headers
- def compute_information(
- self, collection, archive, metadata, in_progress, slug, **kwargs
- ):
- info = self._compute_information(collection, archive, in_progress, slug)
- info_meta = self._compute_information(
- collection, metadata, in_progress, slug, is_archive=False
+ def compute_information(self, *args, **kwargs) -> Dict[str, Any]:
+ info = compute_unified_information(*args, filepath=kwargs["archive_path"],)
+ info_meta = compute_unified_information(
+ *args, filepath=kwargs["metadata_path"],
)
files, headers = self._multipart_info(info, info_meta)
return {"files": files, "headers": headers}
@@ -568,36 +608,46 @@
"""Retrieve service document endpoint's information."""
return ServiceDocumentDepositClient(self.config).execute()
- def deposit_status(self, collection, deposit_id):
+ def deposit_status(self, collection: str, deposit_id: int):
"""Retrieve status information on a deposit."""
return StatusDepositClient(self.config).execute(collection, deposit_id)
def deposit_create(
- self, collection, slug, archive=None, metadata=None, in_progress=False
+ self,
+ collection: str,
+ slug: str,
+ archive: Optional[str] = None,
+ metadata: Optional[str] = None,
+ in_progress: bool = False,
):
"""Create a new deposit (archive, metadata, both as multipart)."""
if archive and not metadata:
return CreateArchiveDepositClient(self.config).execute(
- collection, archive, in_progress, slug
+ collection, in_progress, slug, archive_path=archive
)
elif not archive and metadata:
return CreateMetadataDepositClient(self.config).execute(
- collection, metadata, in_progress, slug, is_archive=False
+ collection, in_progress, slug, metadata_path=metadata
)
else:
return CreateMultipartDepositClient(self.config).execute(
- collection, archive, metadata, in_progress, slug
+ collection,
+ in_progress,
+ slug,
+ archive_path=archive,
+ metadata_path=metadata,
)
def deposit_update(
self,
- collection,
- deposit_id,
- slug,
- archive=None,
- metadata=None,
- in_progress=False,
- replace=False,
+ collection: str,
+ deposit_id: int,
+ slug: str,
+ archive: Optional[str] = None,
+ metadata: Optional[str] = None,
+ in_progress: bool = False,
+ replace: bool = False,
+ swhid: Optional[str] = None,
):
"""Update (add/replace) existing deposit (archive, metadata, both)."""
r = self.deposit_status(collection, deposit_id)
@@ -605,39 +655,55 @@
return r
status = r["deposit_status"]
- if status != "partial":
+ if swhid is None and status != "partial":
return {
"error": "You can only act on deposit with status 'partial'",
- "detail": "The deposit %s has status '%s'" % (deposit_id, status),
+ "detail": f"The deposit {deposit_id} has status '{status}'",
+ "deposit_status": status,
+ "deposit_id": deposit_id,
+ }
+ if swhid is not None and status != "done":
+ return {
+ "error": "You can only update metadata on deposit with status 'done'",
+ "detail": f"The deposit {deposit_id} has status '{status}'",
"deposit_status": status,
"deposit_id": deposit_id,
}
if archive and not metadata:
r = UpdateArchiveDepositClient(self.config).execute(
collection,
- archive,
in_progress,
slug,
deposit_id=deposit_id,
+ archive_path=archive,
replace=replace,
)
- elif not archive and metadata:
- r = UpdateMetadataDepositClient(self.config).execute(
+ elif not archive and metadata and swhid is None:
+ r = UpdateMetadataOnPartialDepositClient(self.config).execute(
collection,
- metadata,
in_progress,
slug,
deposit_id=deposit_id,
+ metadata_path=metadata,
replace=replace,
)
+ elif not archive and metadata and swhid is not None:
+ r = UpdateMetadataOnDoneDepositClient(self.config).execute(
+ collection,
+ in_progress,
+ slug,
+ deposit_id=deposit_id,
+ metadata_path=metadata,
+ swhid=swhid,
+ )
else:
r = UpdateMultipartDepositClient(self.config).execute(
collection,
- archive,
- metadata,
in_progress,
slug,
deposit_id=deposit_id,
+ archive_path=archive,
+ metadata_path=metadata,
replace=replace,
)
diff --git a/swh/deposit/tests/cli/test_client.py b/swh/deposit/tests/cli/test_client.py
--- a/swh/deposit/tests/cli/test_client.py
+++ b/swh/deposit/tests/cli/test_client.py
@@ -599,7 +599,7 @@
)
with open(deposit_status_xml_path, "r") as f:
deposit_status_xml = f.read()
- expected_deposit_dict = dict(parse_xml(deposit_status_xml))
+ expected_deposit_status = dict(parse_xml(deposit_status_xml))
result = cli_runner.invoke(
cli,
@@ -627,4 +627,97 @@
result_output = result.output
actual_deposit = callable_fn(result_output)
- assert actual_deposit == expected_deposit_dict
+ assert actual_deposit == expected_deposit_status
+
+
+def test_cli_update_metadata_with_swhid_on_completed_deposit(
+ datadir, requests_mock_datadir, cli_runner
+):
+ """Update new metadata on a completed deposit (status done) is ok
+ """
+ api_url_basename = "deposit.test.updateswhid"
+ deposit_id = 123
+ deposit_status_xml_path = os.path.join(
+ datadir, f"https_{api_url_basename}", f"1_test_{deposit_id}_status"
+ )
+ with open(deposit_status_xml_path, "r") as f:
+ deposit_status_xml = f.read()
+ expected_deposit_status = dict(parse_xml(deposit_status_xml))
+
+ assert expected_deposit_status["deposit_status"] == "done"
+ assert expected_deposit_status["deposit_swh_id"] is not None
+
+ result = cli_runner.invoke(
+ cli,
+ [
+ "upload",
+ "--url",
+ f"https://{api_url_basename}/1",
+ "--username",
+ TEST_USER["username"],
+ "--password",
+ TEST_USER["password"],
+ "--name",
+ "test-project",
+ "--author",
+ "John Doe",
+ "--deposit-id",
+ deposit_id,
+ "--swhid",
+ expected_deposit_status["deposit_swh_id"],
+ "--format",
+ "json",
+ ],
+ )
+ assert result.exit_code == 0, result.output
+ actual_deposit_status = json.loads(result.output)
+ assert "error" not in actual_deposit_status
+ assert actual_deposit_status == expected_deposit_status
+
+
+def test_cli_update_metadata_with_swhid_on_other_status_deposit(
+ datadir, requests_mock_datadir, cli_runner
+):
+ """Update new metadata with swhid on other deposit status is not possible
+ """
+ api_url_basename = "deposit.test.updateswhid"
+ deposit_id = 321
+ deposit_status_xml_path = os.path.join(
+ datadir, f"https_{api_url_basename}", f"1_test_{deposit_id}_status"
+ )
+ with open(deposit_status_xml_path, "r") as f:
+ deposit_status_xml = f.read()
+ expected_deposit_status = dict(parse_xml(deposit_status_xml))
+ assert expected_deposit_status["deposit_status"] != "done"
+
+ result = cli_runner.invoke(
+ cli,
+ [
+ "upload",
+ "--url",
+ f"https://{api_url_basename}/1",
+ "--username",
+ TEST_USER["username"],
+ "--password",
+ TEST_USER["password"],
+ "--name",
+ "test-project",
+ "--author",
+ "John Doe",
+ "--deposit-id",
+ deposit_id,
+ "--swhid",
+ "swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea",
+ "--format",
+ "json",
+ ],
+ )
+ assert result.exit_code == 0, result.output
+ actual_result = json.loads(result.output)
+ assert "error" in actual_result
+ assert actual_result == {
+ "error": "You can only update metadata on deposit with status 'done'",
+ "detail": "The deposit 321 has status 'partial'",
+ "deposit_status": "partial",
+ "deposit_id": 321,
+ }
diff --git a/swh/deposit/tests/data/https_deposit.test.updateswhid/1_servicedocument b/swh/deposit/tests/data/https_deposit.test.updateswhid/1_servicedocument
new file mode 100644
--- /dev/null
+++ b/swh/deposit/tests/data/https_deposit.test.updateswhid/1_servicedocument
@@ -0,0 +1,26 @@
+<?xml version="1.0" ?>
+<service xmlns:dcterms="http://purl.org/dc/terms/"
+ xmlns:sword="http://purl.org/net/sword/terms/"
+ xmlns:atom="http://www.w3.org/2005/Atom"
+ xmlns="http://www.w3.org/2007/app">
+
+ <sword:version>2.0</sword:version>
+ <sword:maxUploadSize>209715200</sword:maxUploadSize>
+
+ <workspace>
+ <atom:title>The Software Heritage (SWH) Archive</atom:title>
+ <collection href="test">
+ <atom:title>test Software Collection</atom:title>
+ <accept>application/zip</accept>
+ <accept>application/x-tar</accept>
+ <sword:collectionPolicy>Collection Policy</sword:collectionPolicy>
+ <dcterms:abstract>Software Heritage Archive</dcterms:abstract>
+ <sword:treatment>Collect, Preserve, Share</sword:treatment>
+ <sword:mediation>false</sword:mediation>
+ <sword:metadataRelevantHeader>false</sword:metadataRelevantHeader>
+ <sword:acceptPackaging>http://purl.org/net/sword/package/SimpleZip</sword:acceptPackaging>
+ <sword:service>https://deposit.test.updateswhid/1/test/</sword:service>
+ <sword:name>test</sword:name>
+ </collection>
+ </workspace>
+</service>
diff --git a/swh/deposit/tests/data/https_deposit.test.updateswhid/1_test_123_metadata b/swh/deposit/tests/data/https_deposit.test.updateswhid/1_test_123_metadata
new file mode 100644
--- /dev/null
+++ b/swh/deposit/tests/data/https_deposit.test.updateswhid/1_test_123_metadata
@@ -0,0 +1,10 @@
+<entry xmlns="http://www.w3.org/2005/Atom"
+ xmlns:sword="http://purl.org/net/sword/"
+ xmlns:dcterms="http://purl.org/dc/terms/">
+ <deposit_id>123</deposit_id>
+ <deposit_status>done</deposit_status>
+ <deposit_status_detail>The deposit has been successfully loaded into the Software Heritage archive</deposit_status_detail>
+ <deposit_swh_id>swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea</deposit_swh_id>
+ <deposit_swh_id_context>swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea;origin=https://www.softwareheritage.org/check-deposit-2020-10-08T13:52:34.509655;visit=swh:1:snp:c477c6ef51833127b13a86ece7d75e5b3cc4e93d;anchor=swh:1:rev:f26f3960c175f15f6e24200171d446b86f6f7230;path=/</deposit_swh_id_context>
+ <deposit_external_id>check-deposit-2020-10-08T13:52:34.509655</deposit_external_id>
+</entry>
diff --git a/swh/deposit/tests/data/https_deposit.test.updateswhid/1_test_123_status b/swh/deposit/tests/data/https_deposit.test.updateswhid/1_test_123_status
new file mode 100644
--- /dev/null
+++ b/swh/deposit/tests/data/https_deposit.test.updateswhid/1_test_123_status
@@ -0,0 +1,10 @@
+<entry xmlns="http://www.w3.org/2005/Atom"
+ xmlns:sword="http://purl.org/net/sword/"
+ xmlns:dcterms="http://purl.org/dc/terms/">
+ <deposit_id>123</deposit_id>
+ <deposit_status>done</deposit_status>
+ <deposit_status_detail>The deposit has been successfully loaded into the Software Heritage archive</deposit_status_detail>
+ <deposit_swh_id>swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea</deposit_swh_id>
+ <deposit_swh_id_context>swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea;origin=https://www.softwareheritage.org/check-deposit-2020-10-08T13:52:34.509655;visit=swh:1:snp:c477c6ef51833127b13a86ece7d75e5b3cc4e93d;anchor=swh:1:rev:f26f3960c175f15f6e24200171d446b86f6f7230;path=/</deposit_swh_id_context>
+ <deposit_external_id>check-deposit-2020-10-08T13:52:34.509655</deposit_external_id>
+</entry>
diff --git a/swh/deposit/tests/data/https_deposit.test.updateswhid/1_test_321_status b/swh/deposit/tests/data/https_deposit.test.updateswhid/1_test_321_status
new file mode 100644
--- /dev/null
+++ b/swh/deposit/tests/data/https_deposit.test.updateswhid/1_test_321_status
@@ -0,0 +1,8 @@
+<entry xmlns="http://www.w3.org/2005/Atom"
+ xmlns:sword="http://purl.org/net/sword/"
+ xmlns:dcterms="http://purl.org/dc/terms/">
+ <deposit_id>321</deposit_id>
+ <deposit_status>partial</deposit_status>
+ <deposit_status_detail>The deposit is in partial state</deposit_status_detail>
+ <deposit_external_id>check-deposit-2020-10-08T13:52:34.509655</deposit_external_id>
+</entry>
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Jul 3, 3:50 PM (2 w, 1 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3222447
Attached To
D4228: deposit_client: Allow deposit metadata update on completed deposit
Event Timeline
Log In to Comment