diff --git a/swh/deposit/cli/client.py b/swh/deposit/cli/client.py --- a/swh/deposit/cli/client.py +++ b/swh/deposit/cli/client.py @@ -141,6 +141,7 @@ slug: Optional[str], partial: bool, deposit_id: Optional[int], + swhid: Optional[str], replace: bool, url: str, name: Optional[str], @@ -188,6 +189,7 @@ 'url': deposit's server main entry point 'deposit_type': deposit's type (binary, multipart, metadata) 'deposit_id': optional deposit identifier + 'swhid': optional deposit swhid """ if archive_deposit and metadata_deposit: @@ -264,6 +266,7 @@ "client": client, "url": url, "deposit_id": deposit_id, + "swhid": swhid, "replace": replace, } @@ -299,6 +302,7 @@ "slug", "in_progress", "replace", + "swhid", ) return client.deposit_update(**_subdict(config, keys)) @@ -355,6 +359,11 @@ default=None, help="(Optional) Update an existing partial deposit with its identifier", ) # noqa +@click.option( + "--swhid", + default=None, + help="(Optional) Update an existing done deposit with new metadata", +) @click.option( "--replace/--no-replace", default=False, @@ -397,6 +406,7 @@ slug: Optional[str] = None, partial: bool = False, deposit_id: Optional[int] = None, + swhid: Optional[str] = None, replace: bool = False, url: str = "https://deposit.softwareheritage.org", verbose: bool = False, @@ -433,6 +443,7 @@ slug, partial, deposit_id, + swhid, replace, url, name, diff --git a/swh/deposit/client.py b/swh/deposit/client.py --- a/swh/deposit/client.py +++ b/swh/deposit/client.py @@ -11,7 +11,7 @@ import hashlib import logging import os -from typing import Any, Dict +from typing import Any, Dict, Optional from urllib.parse import urljoin import requests @@ -416,47 +416,89 @@ ) def _compute_information( - self, collection, filepath, in_progress, slug, is_archive=True - ): + self, + collection: str, + in_progress: bool, + slug: str, + archive_path: Optional[str] = None, + metadata_path: Optional[str] = None, + swhid: Optional[str] = None, + ) -> Dict[str, Any]: """Given a filepath, compute necessary information on that file. Args: - filepath (str): Path to a file - is_archive (bool): is it an archive or not? + collection: Deposit collection + in_progress: do we finalize the deposit? + slug: external id to use + archive_path: Path to the archive file + metadata_path: Path to the metadata file + swhid: Deposit swhid if any Returns: dict with keys: + + 'slug': external id to use + 'in_progress': do we finalize the deposit? 'content-type': content type associated 'md5sum': md5 sum 'filename': filename + 'filepath': filepath + 'swhid': deposit swhid + """ - filename = os.path.basename(filepath) + result: Dict[str, Any] = { + "slug": slug, + "in_progress": in_progress, + "swhid": swhid, + } + content_type: Optional[str] = None + md5sum: Optional[str] = None + + if archive_path is not None: + filepath = archive_path + elif metadata_path is not None: + filepath = metadata_path - if is_archive: + if filepath: + filename = os.path.basename(filepath) md5sum = hashlib.md5(open(filepath, "rb").read()).hexdigest() extension = filename.split(".")[-1] if "zip" in extension: content_type = "application/zip" else: content_type = "application/x-tar" - else: - content_type = None - md5sum = None + result.update( + { + "content-type": content_type, + "md5sum": md5sum, + "filename": filename, + "filepath": filepath, + } + ) - return { - "slug": slug, - "in_progress": in_progress, - "content-type": content_type, - "md5sum": md5sum, - "filename": filename, - "filepath": filepath, - } + return result + + def compute_headers(self, info: Dict[str, Any]) -> Dict[str, Any]: + return info def compute_information( - self, collection, filepath, in_progress, slug, is_archive=True, **kwargs - ): + self, + collection: str, + in_progress: bool, + slug: str, + replace: bool = False, + deposit_id: Optional[int] = None, + archive_path: Optional[str] = None, + metadata_path: Optional[str] = None, + swhid: Optional[str] = None, + ) -> Dict[str, Any]: info = self._compute_information( - collection, filepath, in_progress, slug, is_archive=is_archive + collection, + in_progress, + slug, + archive_path=archive_path, + metadata_path=metadata_path, + swhid=swhid, ) info["headers"] = self.compute_headers(info) return info @@ -500,16 +542,29 @@ } -class UpdateMetadataDepositClient(CreateMetadataDepositClient): - """Update (add/replace) a metadata deposit client.""" +class UpdateMetadataOnPartialDepositClient(CreateMetadataDepositClient): + """Update (add/replace) metadata on partial deposit scenario.""" def compute_url(self, collection, *args, deposit_id=None, **kwargs): - return "/%s/%s/metadata/" % (collection, deposit_id) + return f"/{collection}/{deposit_id}/metadata/" - def compute_method(self, *args, replace=False, **kwargs): + def compute_method(self, *args, replace: bool = False, **kwargs) -> str: return "put" if replace else "post" +class UpdateMetadataOnDoneDepositClient(UpdateMetadataOnPartialDepositClient): + """Update metadata on "done" deposit. This requires the deposit swhid.""" + + def compute_headers(self, info: Dict[str, Any]) -> Dict[str, Any]: + return { + "CONTENT-TYPE": "application/atom+xml;type=entry", + "HTTP_X_CHECK_SWHID": info["swhid"], + } + + def compute_method(self, *args, **kwargs) -> str: + return "put" + + class CreateMultipartDepositClient(BaseCreateDepositClient): """Create a multipart deposit client.""" @@ -538,11 +593,21 @@ return files, headers def compute_information( - self, collection, archive, metadata, in_progress, slug, **kwargs - ): - info = self._compute_information(collection, archive, in_progress, slug) + self, + collection: str, + in_progress: bool, + slug: str, + replace: bool = False, + deposit_id: Optional[int] = None, + archive_path: Optional[str] = None, + metadata_path: Optional[str] = None, + swhid: Optional[str] = None, + ) -> Dict[str, Any]: + info = self._compute_information( + collection, in_progress, slug, archive_path=archive_path + ) info_meta = self._compute_information( - collection, metadata, in_progress, slug, is_archive=False + collection, in_progress, slug, metadata_path=metadata_path, ) files, headers = self._multipart_info(info, info_meta) return {"files": files, "headers": headers} @@ -568,36 +633,46 @@ """Retrieve service document endpoint's information.""" return ServiceDocumentDepositClient(self.config).execute() - def deposit_status(self, collection, deposit_id): + def deposit_status(self, collection: str, deposit_id: int): """Retrieve status information on a deposit.""" return StatusDepositClient(self.config).execute(collection, deposit_id) def deposit_create( - self, collection, slug, archive=None, metadata=None, in_progress=False + self, + collection: str, + slug: str, + archive: Optional[str] = None, + metadata: Optional[str] = None, + in_progress: bool = False, ): """Create a new deposit (archive, metadata, both as multipart).""" if archive and not metadata: return CreateArchiveDepositClient(self.config).execute( - collection, archive, in_progress, slug + collection, in_progress, slug, archive_path=archive ) elif not archive and metadata: return CreateMetadataDepositClient(self.config).execute( - collection, metadata, in_progress, slug, is_archive=False + collection, in_progress, slug, metadata_path=metadata ) else: return CreateMultipartDepositClient(self.config).execute( - collection, archive, metadata, in_progress, slug + collection, + in_progress, + slug, + archive_path=archive, + metadata_path=metadata, ) def deposit_update( self, - collection, - deposit_id, - slug, - archive=None, - metadata=None, - in_progress=False, - replace=False, + collection: str, + deposit_id: int, + slug: str, + archive: Optional[str] = None, + metadata: Optional[str] = None, + in_progress: bool = False, + replace: bool = False, + swhid: Optional[str] = None, ): """Update (add/replace) existing deposit (archive, metadata, both).""" r = self.deposit_status(collection, deposit_id) @@ -605,39 +680,55 @@ return r status = r["deposit_status"] - if status != "partial": + if swhid is None and status != "partial": return { "error": "You can only act on deposit with status 'partial'", - "detail": "The deposit %s has status '%s'" % (deposit_id, status), + "detail": f"The deposit {deposit_id} has status '{status}'", + "deposit_status": status, + "deposit_id": deposit_id, + } + if swhid is not None and status != "done": + return { + "error": "You can only update metadata on deposit with status 'done'", + "detail": f"The deposit {deposit_id} has status '{status}'", "deposit_status": status, "deposit_id": deposit_id, } if archive and not metadata: r = UpdateArchiveDepositClient(self.config).execute( collection, - archive, in_progress, slug, deposit_id=deposit_id, + archive_path=archive, replace=replace, ) - elif not archive and metadata: - r = UpdateMetadataDepositClient(self.config).execute( + elif not archive and metadata and swhid is None: + r = UpdateMetadataOnPartialDepositClient(self.config).execute( collection, - metadata, in_progress, slug, deposit_id=deposit_id, + metadata_path=metadata, replace=replace, ) + elif not archive and metadata and swhid is not None: + r = UpdateMetadataOnDoneDepositClient(self.config).execute( + collection, + in_progress, + slug, + deposit_id=deposit_id, + metadata_path=metadata, + swhid=swhid, + ) else: r = UpdateMultipartDepositClient(self.config).execute( collection, - archive, - metadata, in_progress, slug, deposit_id=deposit_id, + archive_path=archive, + metadata_path=metadata, replace=replace, ) diff --git a/swh/deposit/tests/cli/test_client.py b/swh/deposit/tests/cli/test_client.py --- a/swh/deposit/tests/cli/test_client.py +++ b/swh/deposit/tests/cli/test_client.py @@ -626,3 +626,43 @@ actual_deposit = callable_fn(result_output) assert actual_deposit == expected_deposit_dict + + +def test_cli_deposit_done_update_metadata_with_swhid( + sample_archive, tmp_path, requests_mock_datadir, cli_runner +): + """Update new metadata on a deposit with status "done" + """ + result = cli_runner.invoke( + cli, + [ + "upload", + "--url", + "https://deposit.test.updateswhid/1", + "--username", + TEST_USER["username"], + "--password", + TEST_USER["password"], + "--name", + "test-project", + "--author", + "John Doe", + "--deposit-id", + 123, + "--swhid", + "swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea", + "--format", + "json", + ], + ) + assert result.exit_code == 0, result.output + actual_deposit_status = json.loads(result.output) + assert "error" not in actual_deposit_status + assert actual_deposit_status == { + "deposit_id": "123", + "deposit_status": "done", + "deposit_status_detail": "The deposit has been successfully loaded into the Software Heritage archive", + "deposit_swh_id": "swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea", + "deposit_swh_id_context": "swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea;origin=https://www.softwareheritage.org/check-deposit-2020-10-08T13:52:34.509655;visit=swh:1:snp:c477c6ef51833127b13a86ece7d75e5b3cc4e93d;anchor=swh:1:rev:f26f3960c175f15f6e24200171d446b86f6f7230;path=/", + "deposit_external_id": "check-deposit-2020-10-08T13:52:34.509655", + } diff --git a/swh/deposit/tests/data/https_deposit.test.updateswhid/1_servicedocument b/swh/deposit/tests/data/https_deposit.test.updateswhid/1_servicedocument new file mode 100644 --- /dev/null +++ b/swh/deposit/tests/data/https_deposit.test.updateswhid/1_servicedocument @@ -0,0 +1,26 @@ + + + + 2.0 + 209715200 + + + The Software Heritage (SWH) Archive + + test Software Collection + application/zip + application/x-tar + Collection Policy + Software Heritage Archive + Collect, Preserve, Share + false + false + http://purl.org/net/sword/package/SimpleZip + https://deposit.test.updateswhid/1/test/ + test + + + diff --git a/swh/deposit/tests/data/https_deposit.test.updateswhid/1_test_123_metadata b/swh/deposit/tests/data/https_deposit.test.updateswhid/1_test_123_metadata new file mode 100644 --- /dev/null +++ b/swh/deposit/tests/data/https_deposit.test.updateswhid/1_test_123_metadata @@ -0,0 +1,10 @@ + + 123 + done + The deposit has been successfully loaded into the Software Heritage archive + swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea + swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea;origin=https://www.softwareheritage.org/check-deposit-2020-10-08T13:52:34.509655;visit=swh:1:snp:c477c6ef51833127b13a86ece7d75e5b3cc4e93d;anchor=swh:1:rev:f26f3960c175f15f6e24200171d446b86f6f7230;path=/ + check-deposit-2020-10-08T13:52:34.509655 + diff --git a/swh/deposit/tests/data/https_deposit.test.updateswhid/1_test_123_status b/swh/deposit/tests/data/https_deposit.test.updateswhid/1_test_123_status new file mode 100644 --- /dev/null +++ b/swh/deposit/tests/data/https_deposit.test.updateswhid/1_test_123_status @@ -0,0 +1,10 @@ + + 123 + done + The deposit has been successfully loaded into the Software Heritage archive + swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea + swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea;origin=https://www.softwareheritage.org/check-deposit-2020-10-08T13:52:34.509655;visit=swh:1:snp:c477c6ef51833127b13a86ece7d75e5b3cc4e93d;anchor=swh:1:rev:f26f3960c175f15f6e24200171d446b86f6f7230;path=/ + check-deposit-2020-10-08T13:52:34.509655 +