diff --git a/swh/deposit/cli/client.py b/swh/deposit/cli/client.py
--- a/swh/deposit/cli/client.py
+++ b/swh/deposit/cli/client.py
@@ -141,6 +141,7 @@
slug: Optional[str],
partial: bool,
deposit_id: Optional[int],
+ swhid: Optional[str],
replace: bool,
url: str,
name: Optional[str],
@@ -188,6 +189,7 @@
'url': deposit's server main entry point
'deposit_type': deposit's type (binary, multipart, metadata)
'deposit_id': optional deposit identifier
+ 'swhid': optional deposit swhid
"""
if archive_deposit and metadata_deposit:
@@ -264,6 +266,7 @@
"client": client,
"url": url,
"deposit_id": deposit_id,
+ "swhid": swhid,
"replace": replace,
}
@@ -299,6 +302,7 @@
"slug",
"in_progress",
"replace",
+ "swhid",
)
return client.deposit_update(**_subdict(config, keys))
@@ -355,6 +359,11 @@
default=None,
help="(Optional) Update an existing partial deposit with its identifier",
) # noqa
+@click.option(
+ "--swhid",
+ default=None,
+ help="(Optional) Update existing completed deposit (status done) with new metadata",
+)
@click.option(
"--replace/--no-replace",
default=False,
@@ -397,6 +406,7 @@
slug: Optional[str] = None,
partial: bool = False,
deposit_id: Optional[int] = None,
+ swhid: Optional[str] = None,
replace: bool = False,
url: str = "https://deposit.softwareheritage.org",
verbose: bool = False,
@@ -433,6 +443,7 @@
slug,
partial,
deposit_id,
+ swhid,
replace,
url,
name,
diff --git a/swh/deposit/client.py b/swh/deposit/client.py
--- a/swh/deposit/client.py
+++ b/swh/deposit/client.py
@@ -11,7 +11,7 @@
import hashlib
import logging
import os
-from typing import Any, Dict
+from typing import Any, Dict, Optional
from urllib.parse import urljoin
import requests
@@ -22,6 +22,64 @@
logger = logging.getLogger(__name__)
+def compute_unified_information(
+ collection: str,
+ in_progress: bool,
+ slug: str,
+ *,
+ filepath: Optional[str] = None,
+ swhid: Optional[str] = None,
+ **kwargs,
+) -> Dict[str, Any]:
+ """Given a filepath, compute necessary information on that file.
+
+ Args:
+ collection: Deposit collection
+ in_progress: do we finalize the deposit?
+ slug: external id to use
+ filepath: Path to the file to compute the necessary information out of
+ swhid: Deposit swhid if any
+
+ Returns:
+ dict with keys:
+
+ 'slug': external id to use
+ 'in_progress': do we finalize the deposit?
+ 'content-type': content type associated
+ 'md5sum': md5 sum
+ 'filename': filename
+ 'filepath': filepath
+ 'swhid': deposit swhid
+
+ """
+ result: Dict[str, Any] = {
+ "slug": slug,
+ "in_progress": in_progress,
+ "swhid": swhid,
+ }
+ content_type: Optional[str] = None
+ md5sum: Optional[str] = None
+
+ if filepath:
+ filename = os.path.basename(filepath)
+ md5sum = hashlib.md5(open(filepath, "rb").read()).hexdigest()
+ extension = filename.split(".")[-1]
+ if "zip" in extension:
+ content_type = "application/zip"
+ else:
+ content_type = "application/x-tar"
+ result.update(
+ {
+ "content-type": content_type,
+ "md5sum": md5sum,
+ "filename": filename,
+ "filepath": filepath,
+ }
+ )
+
+ return result
+
+
class MaintenanceError(ValueError):
"""Informational maintenance error exception
@@ -243,7 +301,7 @@
"""
pass
- def compute_information(self, *args, **kwargs):
+ def compute_information(self, *args, **kwargs) -> Dict[str, Any]:
"""Compute some more information given the inputs (e.g http headers,
...)
@@ -415,50 +473,7 @@
],
)
- def _compute_information(
- self, collection, filepath, in_progress, slug, is_archive=True
- ):
- """Given a filepath, compute necessary information on that file.
-
- Args:
- filepath (str): Path to a file
- is_archive (bool): is it an archive or not?
-
- Returns:
- dict with keys:
- 'content-type': content type associated
- 'md5sum': md5 sum
- 'filename': filename
- """
- filename = os.path.basename(filepath)
-
- if is_archive:
- md5sum = hashlib.md5(open(filepath, "rb").read()).hexdigest()
- extension = filename.split(".")[-1]
- if "zip" in extension:
- content_type = "application/zip"
- else:
- content_type = "application/x-tar"
- else:
- content_type = None
- md5sum = None
-
- return {
- "slug": slug,
- "in_progress": in_progress,
- "content-type": content_type,
- "md5sum": md5sum,
- "filename": filename,
- "filepath": filepath,
- }
-
- def compute_information(
- self, collection, filepath, in_progress, slug, is_archive=True, **kwargs
- ):
- info = self._compute_information(
- collection, filepath, in_progress, slug, is_archive=is_archive
- )
- info["headers"] = self.compute_headers(info)
+ def compute_headers(self, info: Dict[str, Any]) -> Dict[str, Any]:
return info
def do_execute(self, method, url, info):
@@ -478,6 +493,11 @@
"CONTENT-DISPOSITION": "attachment; filename=%s" % (info["filename"],),
}
+ def compute_information(self, *args, **kwargs) -> Dict[str, Any]:
+ info = compute_unified_information(*args, filepath=kwargs["archive_path"])
+ info["headers"] = self.compute_headers(info)
+ return info
+
class UpdateArchiveDepositClient(CreateArchiveDepositClient):
"""Update (add/replace) an archive (binary) deposit client."""
@@ -499,17 +519,35 @@
"CONTENT-TYPE": "application/atom+xml;type=entry",
}
+ def compute_information(self, *args, **kwargs) -> Dict[str, Any]:
+ info = compute_unified_information(*args, filepath=kwargs["metadata_path"])
+ info["headers"] = self.compute_headers(info)
+ return info
+
-class UpdateMetadataDepositClient(CreateMetadataDepositClient):
- """Update (add/replace) a metadata deposit client."""
+class UpdateMetadataOnPartialDepositClient(CreateMetadataDepositClient):
+ """Update (add/replace) metadata on partial deposit scenario."""
def compute_url(self, collection, *args, deposit_id=None, **kwargs):
- return "/%s/%s/metadata/" % (collection, deposit_id)
+ return f"/{collection}/{deposit_id}/metadata/"
- def compute_method(self, *args, replace=False, **kwargs):
+ def compute_method(self, *args, replace: bool = False, **kwargs) -> str:
return "put" if replace else "post"
+class UpdateMetadataOnDoneDepositClient(UpdateMetadataOnPartialDepositClient):
+ """Update metadata on "done" deposit. This requires the deposit swhid."""
+
+ def compute_headers(self, info: Dict[str, Any]) -> Dict[str, Any]:
+ return {
+ "CONTENT-TYPE": "application/atom+xml;type=entry",
+ "HTTP_X_CHECK_SWHID": info["swhid"],
+ }
+
+ def compute_method(self, *args, **kwargs) -> str:
+ return "put"
+
+
class CreateMultipartDepositClient(BaseCreateDepositClient):
"""Create a multipart deposit client."""
@@ -537,12 +575,10 @@
return files, headers
- def compute_information(
- self, collection, archive, metadata, in_progress, slug, **kwargs
- ):
- info = self._compute_information(collection, archive, in_progress, slug)
- info_meta = self._compute_information(
- collection, metadata, in_progress, slug, is_archive=False
+ def compute_information(self, *args, **kwargs) -> Dict[str, Any]:
+ info = compute_unified_information(*args, filepath=kwargs["archive_path"],)
+ info_meta = compute_unified_information(
+ *args, filepath=kwargs["metadata_path"],
)
files, headers = self._multipart_info(info, info_meta)
return {"files": files, "headers": headers}
@@ -568,36 +604,46 @@
"""Retrieve service document endpoint's information."""
return ServiceDocumentDepositClient(self.config).execute()
- def deposit_status(self, collection, deposit_id):
+ def deposit_status(self, collection: str, deposit_id: int):
"""Retrieve status information on a deposit."""
return StatusDepositClient(self.config).execute(collection, deposit_id)
def deposit_create(
- self, collection, slug, archive=None, metadata=None, in_progress=False
+ self,
+ collection: str,
+ slug: str,
+ archive: Optional[str] = None,
+ metadata: Optional[str] = None,
+ in_progress: bool = False,
):
"""Create a new deposit (archive, metadata, both as multipart)."""
if archive and not metadata:
return CreateArchiveDepositClient(self.config).execute(
- collection, archive, in_progress, slug
+ collection, in_progress, slug, archive_path=archive
)
elif not archive and metadata:
return CreateMetadataDepositClient(self.config).execute(
- collection, metadata, in_progress, slug, is_archive=False
+ collection, in_progress, slug, metadata_path=metadata
)
else:
return CreateMultipartDepositClient(self.config).execute(
- collection, archive, metadata, in_progress, slug
+ collection,
+ in_progress,
+ slug,
+ archive_path=archive,
+ metadata_path=metadata,
)
def deposit_update(
self,
- collection,
- deposit_id,
- slug,
- archive=None,
- metadata=None,
- in_progress=False,
- replace=False,
+ collection: str,
+ deposit_id: int,
+ slug: str,
+ archive: Optional[str] = None,
+ metadata: Optional[str] = None,
+ in_progress: bool = False,
+ replace: bool = False,
+ swhid: Optional[str] = None,
):
"""Update (add/replace) existing deposit (archive, metadata, both)."""
r = self.deposit_status(collection, deposit_id)
@@ -605,39 +651,55 @@
return r
status = r["deposit_status"]
- if status != "partial":
+ if swhid is None and status != "partial":
return {
"error": "You can only act on deposit with status 'partial'",
- "detail": "The deposit %s has status '%s'" % (deposit_id, status),
+ "detail": f"The deposit {deposit_id} has status '{status}'",
+ "deposit_status": status,
+ "deposit_id": deposit_id,
+ }
+ if swhid is not None and status != "done":
+ return {
+ "error": "You can only update metadata on deposit with status 'done'",
+ "detail": f"The deposit {deposit_id} has status '{status}'",
"deposit_status": status,
"deposit_id": deposit_id,
}
if archive and not metadata:
r = UpdateArchiveDepositClient(self.config).execute(
collection,
- archive,
in_progress,
slug,
deposit_id=deposit_id,
+ archive_path=archive,
replace=replace,
)
- elif not archive and metadata:
- r = UpdateMetadataDepositClient(self.config).execute(
+ elif not archive and metadata and swhid is None:
+ r = UpdateMetadataOnPartialDepositClient(self.config).execute(
collection,
- metadata,
in_progress,
slug,
deposit_id=deposit_id,
+ metadata_path=metadata,
replace=replace,
)
+ elif not archive and metadata and swhid is not None:
+ r = UpdateMetadataOnDoneDepositClient(self.config).execute(
+ collection,
+ in_progress,
+ slug,
+ deposit_id=deposit_id,
+ metadata_path=metadata,
+ swhid=swhid,
+ )
else:
r = UpdateMultipartDepositClient(self.config).execute(
collection,
- archive,
- metadata,
in_progress,
slug,
deposit_id=deposit_id,
+ archive_path=archive,
+ metadata_path=metadata,
replace=replace,
)
diff --git a/swh/deposit/tests/cli/test_client.py b/swh/deposit/tests/cli/test_client.py
--- a/swh/deposit/tests/cli/test_client.py
+++ b/swh/deposit/tests/cli/test_client.py
@@ -597,7 +597,7 @@
)
with open(deposit_status_xml_path, "r") as f:
deposit_status_xml = f.read()
- expected_deposit_dict = dict(parse_xml(deposit_status_xml))
+ expected_deposit_status = dict(parse_xml(deposit_status_xml))
result = cli_runner.invoke(
cli,
@@ -625,4 +625,97 @@
result_output = result.output
actual_deposit = callable_fn(result_output)
- assert actual_deposit == expected_deposit_dict
+ assert actual_deposit == expected_deposit_status
+
+
+def test_cli_update_metadata_with_swhid_on_completed_deposit(
+ datadir, requests_mock_datadir, cli_runner
+):
+ """Update new metadata on a completed deposit (status done) is ok
+ """
+ api_url_basename = "deposit.test.updateswhid"
+ deposit_id = 123
+ deposit_status_xml_path = os.path.join(
+ datadir, f"https_{api_url_basename}", f"1_test_{deposit_id}_status"
+ )
+ with open(deposit_status_xml_path, "r") as f:
+ deposit_status_xml = f.read()
+ expected_deposit_status = dict(parse_xml(deposit_status_xml))
+
+ assert expected_deposit_status["deposit_status"] == "done"
+ assert expected_deposit_status["deposit_swh_id"] is not None
+
+ result = cli_runner.invoke(
+ cli,
+ [
+ "upload",
+ "--url",
+ f"https://{api_url_basename}/1",
+ "--username",
+ TEST_USER["username"],
+ "--password",
+ TEST_USER["password"],
+ "--name",
+ "test-project",
+ "--author",
+ "John Doe",
+ "--deposit-id",
+ deposit_id,
+ "--swhid",
+ expected_deposit_status["deposit_swh_id"],
+ "--format",
+ "json",
+ ],
+ )
+ assert result.exit_code == 0, result.output
+ actual_deposit_status = json.loads(result.output)
+ assert "error" not in actual_deposit_status
+ assert actual_deposit_status == expected_deposit_status
+
+
+def test_cli_update_metadata_with_swhid_on_other_status_deposit(
+ datadir, requests_mock_datadir, cli_runner
+):
+ """Update new metadata with swhid on other deposit status is not possible
+ """
+ api_url_basename = "deposit.test.updateswhid"
+ deposit_id = 321
+ deposit_status_xml_path = os.path.join(
+ datadir, f"https_{api_url_basename}", f"1_test_{deposit_id}_status"
+ )
+ with open(deposit_status_xml_path, "r") as f:
+ deposit_status_xml = f.read()
+ expected_deposit_status = dict(parse_xml(deposit_status_xml))
+ assert expected_deposit_status["deposit_status"] != "done"
+
+ result = cli_runner.invoke(
+ cli,
+ [
+ "upload",
+ "--url",
+ f"https://{api_url_basename}/1",
+ "--username",
+ TEST_USER["username"],
+ "--password",
+ TEST_USER["password"],
+ "--name",
+ "test-project",
+ "--author",
+ "John Doe",
+ "--deposit-id",
+ deposit_id,
+ "--swhid",
+ "swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea",
+ "--format",
+ "json",
+ ],
+ )
+ assert result.exit_code == 0, result.output
+ actual_result = json.loads(result.output)
+ assert "error" in actual_result
+ assert actual_result == {
+ "error": "You can only update metadata on deposit with status 'done'",
+ "detail": "The deposit 321 has status 'partial'",
+ "deposit_status": "partial",
+ "deposit_id": 321,
+ }
diff --git a/swh/deposit/tests/data/https_deposit.test.updateswhid/1_servicedocument b/swh/deposit/tests/data/https_deposit.test.updateswhid/1_servicedocument
new file mode 100644
--- /dev/null
+++ b/swh/deposit/tests/data/https_deposit.test.updateswhid/1_servicedocument
@@ -0,0 +1,26 @@
+
+
+
+ 2.0
+ 209715200
+
+
+ The Software Heritage (SWH) Archive
+
+ test Software Collection
+ application/zip
+ application/x-tar
+ Collection Policy
+ Software Heritage Archive
+ Collect, Preserve, Share
+ false
+ false
+ http://purl.org/net/sword/package/SimpleZip
+ https://deposit.test.updateswhid/1/test/
+ test
+
+
+
diff --git a/swh/deposit/tests/data/https_deposit.test.updateswhid/1_test_123_metadata b/swh/deposit/tests/data/https_deposit.test.updateswhid/1_test_123_metadata
new file mode 100644
--- /dev/null
+++ b/swh/deposit/tests/data/https_deposit.test.updateswhid/1_test_123_metadata
@@ -0,0 +1,10 @@
+
+ 123
+ done
+ The deposit has been successfully loaded into the Software Heritage archive
+ swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea
+ swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea;origin=https://www.softwareheritage.org/check-deposit-2020-10-08T13:52:34.509655;visit=swh:1:snp:c477c6ef51833127b13a86ece7d75e5b3cc4e93d;anchor=swh:1:rev:f26f3960c175f15f6e24200171d446b86f6f7230;path=/
+ check-deposit-2020-10-08T13:52:34.509655
+
diff --git a/swh/deposit/tests/data/https_deposit.test.updateswhid/1_test_123_status b/swh/deposit/tests/data/https_deposit.test.updateswhid/1_test_123_status
new file mode 100644
--- /dev/null
+++ b/swh/deposit/tests/data/https_deposit.test.updateswhid/1_test_123_status
@@ -0,0 +1,10 @@
+
+ 123
+ done
+ The deposit has been successfully loaded into the Software Heritage archive
+ swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea
+ swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea;origin=https://www.softwareheritage.org/check-deposit-2020-10-08T13:52:34.509655;visit=swh:1:snp:c477c6ef51833127b13a86ece7d75e5b3cc4e93d;anchor=swh:1:rev:f26f3960c175f15f6e24200171d446b86f6f7230;path=/
+ check-deposit-2020-10-08T13:52:34.509655
+
diff --git a/swh/deposit/tests/data/https_deposit.test.updateswhid/1_test_321_status b/swh/deposit/tests/data/https_deposit.test.updateswhid/1_test_321_status
new file mode 100644
--- /dev/null
+++ b/swh/deposit/tests/data/https_deposit.test.updateswhid/1_test_321_status
@@ -0,0 +1,8 @@
+
+ 321
+ partial
+ The deposit is in partial state
+ check-deposit-2020-10-08T13:52:34.509655
+