diff --git a/swh/deposit/cli/client.py b/swh/deposit/cli/client.py --- a/swh/deposit/cli/client.py +++ b/swh/deposit/cli/client.py @@ -141,6 +141,7 @@ slug: Optional[str], partial: bool, deposit_id: Optional[int], + swhid: Optional[str], replace: bool, url: str, name: Optional[str], @@ -188,6 +189,7 @@ 'url': deposit's server main entry point 'deposit_type': deposit's type (binary, multipart, metadata) 'deposit_id': optional deposit identifier + 'swhid': optional deposit swhid """ if archive_deposit and metadata_deposit: @@ -264,6 +266,7 @@ "client": client, "url": url, "deposit_id": deposit_id, + "swhid": swhid, "replace": replace, } @@ -299,6 +302,7 @@ "slug", "in_progress", "replace", + "swhid", ) return client.deposit_update(**_subdict(config, keys)) @@ -355,6 +359,11 @@ default=None, help="(Optional) Update an existing partial deposit with its identifier", ) # noqa +@click.option( + "--swhid", + default=None, + help="(Optional) Update existing completed deposit (status done) with new metadata", +) @click.option( "--replace/--no-replace", default=False, @@ -397,6 +406,7 @@ slug: Optional[str] = None, partial: bool = False, deposit_id: Optional[int] = None, + swhid: Optional[str] = None, replace: bool = False, url: str = "https://deposit.softwareheritage.org", verbose: bool = False, @@ -433,6 +443,7 @@ slug, partial, deposit_id, + swhid, replace, url, name, diff --git a/swh/deposit/client.py b/swh/deposit/client.py --- a/swh/deposit/client.py +++ b/swh/deposit/client.py @@ -11,7 +11,7 @@ import hashlib import logging import os -from typing import Any, Dict +from typing import Any, Dict, Optional from urllib.parse import urljoin import requests @@ -22,6 +22,64 @@ logger = logging.getLogger(__name__) +def compute_unified_information( + collection: str, + in_progress: bool, + slug: str, + *, + filepath: Optional[str] = None, + swhid: Optional[str] = None, + **kwargs, +) -> Dict[str, Any]: + """Given a filepath, compute necessary information on that file. + + Args: + collection: Deposit collection + in_progress: do we finalize the deposit? + slug: external id to use + filepath: Path to the file to compute the necessary information out of + swhid: Deposit swhid if any + + Returns: + dict with keys: + + 'slug': external id to use + 'in_progress': do we finalize the deposit? + 'content-type': content type associated + 'md5sum': md5 sum + 'filename': filename + 'filepath': filepath + 'swhid': deposit swhid + + """ + result: Dict[str, Any] = { + "slug": slug, + "in_progress": in_progress, + "swhid": swhid, + } + content_type: Optional[str] = None + md5sum: Optional[str] = None + + if filepath: + filename = os.path.basename(filepath) + md5sum = hashlib.md5(open(filepath, "rb").read()).hexdigest() + extension = filename.split(".")[-1] + if "zip" in extension: + content_type = "application/zip" + else: + content_type = "application/x-tar" + result.update( + { + "content-type": content_type, + "md5sum": md5sum, + "filename": filename, + "filepath": filepath, + } + ) + + return result + + class MaintenanceError(ValueError): """Informational maintenance error exception @@ -243,7 +301,7 @@ """ pass - def compute_information(self, *args, **kwargs): + def compute_information(self, *args, **kwargs) -> Dict[str, Any]: """Compute some more information given the inputs (e.g http headers, ...) @@ -415,50 +473,7 @@ ], ) - def _compute_information( - self, collection, filepath, in_progress, slug, is_archive=True - ): - """Given a filepath, compute necessary information on that file. - - Args: - filepath (str): Path to a file - is_archive (bool): is it an archive or not? - - Returns: - dict with keys: - 'content-type': content type associated - 'md5sum': md5 sum - 'filename': filename - """ - filename = os.path.basename(filepath) - - if is_archive: - md5sum = hashlib.md5(open(filepath, "rb").read()).hexdigest() - extension = filename.split(".")[-1] - if "zip" in extension: - content_type = "application/zip" - else: - content_type = "application/x-tar" - else: - content_type = None - md5sum = None - - return { - "slug": slug, - "in_progress": in_progress, - "content-type": content_type, - "md5sum": md5sum, - "filename": filename, - "filepath": filepath, - } - - def compute_information( - self, collection, filepath, in_progress, slug, is_archive=True, **kwargs - ): - info = self._compute_information( - collection, filepath, in_progress, slug, is_archive=is_archive - ) - info["headers"] = self.compute_headers(info) + def compute_headers(self, info: Dict[str, Any]) -> Dict[str, Any]: return info def do_execute(self, method, url, info): @@ -478,6 +493,11 @@ "CONTENT-DISPOSITION": "attachment; filename=%s" % (info["filename"],), } + def compute_information(self, *args, **kwargs) -> Dict[str, Any]: + info = compute_unified_information(*args, filepath=kwargs["archive_path"]) + info["headers"] = self.compute_headers(info) + return info + class UpdateArchiveDepositClient(CreateArchiveDepositClient): """Update (add/replace) an archive (binary) deposit client.""" @@ -499,17 +519,35 @@ "CONTENT-TYPE": "application/atom+xml;type=entry", } + def compute_information(self, *args, **kwargs) -> Dict[str, Any]: + info = compute_unified_information(*args, filepath=kwargs["metadata_path"]) + info["headers"] = self.compute_headers(info) + return info + -class UpdateMetadataDepositClient(CreateMetadataDepositClient): - """Update (add/replace) a metadata deposit client.""" +class UpdateMetadataOnPartialDepositClient(CreateMetadataDepositClient): + """Update (add/replace) metadata on partial deposit scenario.""" def compute_url(self, collection, *args, deposit_id=None, **kwargs): - return "/%s/%s/metadata/" % (collection, deposit_id) + return f"/{collection}/{deposit_id}/metadata/" - def compute_method(self, *args, replace=False, **kwargs): + def compute_method(self, *args, replace: bool = False, **kwargs) -> str: return "put" if replace else "post" +class UpdateMetadataOnDoneDepositClient(UpdateMetadataOnPartialDepositClient): + """Update metadata on "done" deposit. This requires the deposit swhid.""" + + def compute_headers(self, info: Dict[str, Any]) -> Dict[str, Any]: + return { + "CONTENT-TYPE": "application/atom+xml;type=entry", + "HTTP_X_CHECK_SWHID": info["swhid"], + } + + def compute_method(self, *args, **kwargs) -> str: + return "put" + + class CreateMultipartDepositClient(BaseCreateDepositClient): """Create a multipart deposit client.""" @@ -537,12 +575,10 @@ return files, headers - def compute_information( - self, collection, archive, metadata, in_progress, slug, **kwargs - ): - info = self._compute_information(collection, archive, in_progress, slug) - info_meta = self._compute_information( - collection, metadata, in_progress, slug, is_archive=False + def compute_information(self, *args, **kwargs) -> Dict[str, Any]: + info = compute_unified_information(*args, filepath=kwargs["archive_path"],) + info_meta = compute_unified_information( + *args, filepath=kwargs["metadata_path"], ) files, headers = self._multipart_info(info, info_meta) return {"files": files, "headers": headers} @@ -568,36 +604,46 @@ """Retrieve service document endpoint's information.""" return ServiceDocumentDepositClient(self.config).execute() - def deposit_status(self, collection, deposit_id): + def deposit_status(self, collection: str, deposit_id: int): """Retrieve status information on a deposit.""" return StatusDepositClient(self.config).execute(collection, deposit_id) def deposit_create( - self, collection, slug, archive=None, metadata=None, in_progress=False + self, + collection: str, + slug: str, + archive: Optional[str] = None, + metadata: Optional[str] = None, + in_progress: bool = False, ): """Create a new deposit (archive, metadata, both as multipart).""" if archive and not metadata: return CreateArchiveDepositClient(self.config).execute( - collection, archive, in_progress, slug + collection, in_progress, slug, archive_path=archive ) elif not archive and metadata: return CreateMetadataDepositClient(self.config).execute( - collection, metadata, in_progress, slug, is_archive=False + collection, in_progress, slug, metadata_path=metadata ) else: return CreateMultipartDepositClient(self.config).execute( - collection, archive, metadata, in_progress, slug + collection, + in_progress, + slug, + archive_path=archive, + metadata_path=metadata, ) def deposit_update( self, - collection, - deposit_id, - slug, - archive=None, - metadata=None, - in_progress=False, - replace=False, + collection: str, + deposit_id: int, + slug: str, + archive: Optional[str] = None, + metadata: Optional[str] = None, + in_progress: bool = False, + replace: bool = False, + swhid: Optional[str] = None, ): """Update (add/replace) existing deposit (archive, metadata, both).""" r = self.deposit_status(collection, deposit_id) @@ -605,39 +651,55 @@ return r status = r["deposit_status"] - if status != "partial": + if swhid is None and status != "partial": return { "error": "You can only act on deposit with status 'partial'", - "detail": "The deposit %s has status '%s'" % (deposit_id, status), + "detail": f"The deposit {deposit_id} has status '{status}'", + "deposit_status": status, + "deposit_id": deposit_id, + } + if swhid is not None and status != "done": + return { + "error": "You can only update metadata on deposit with status 'done'", + "detail": f"The deposit {deposit_id} has status '{status}'", "deposit_status": status, "deposit_id": deposit_id, } if archive and not metadata: r = UpdateArchiveDepositClient(self.config).execute( collection, - archive, in_progress, slug, deposit_id=deposit_id, + archive_path=archive, replace=replace, ) - elif not archive and metadata: - r = UpdateMetadataDepositClient(self.config).execute( + elif not archive and metadata and swhid is None: + r = UpdateMetadataOnPartialDepositClient(self.config).execute( collection, - metadata, in_progress, slug, deposit_id=deposit_id, + metadata_path=metadata, replace=replace, ) + elif not archive and metadata and swhid is not None: + r = UpdateMetadataOnDoneDepositClient(self.config).execute( + collection, + in_progress, + slug, + deposit_id=deposit_id, + metadata_path=metadata, + swhid=swhid, + ) else: r = UpdateMultipartDepositClient(self.config).execute( collection, - archive, - metadata, in_progress, slug, deposit_id=deposit_id, + archive_path=archive, + metadata_path=metadata, replace=replace, ) diff --git a/swh/deposit/tests/cli/test_client.py b/swh/deposit/tests/cli/test_client.py --- a/swh/deposit/tests/cli/test_client.py +++ b/swh/deposit/tests/cli/test_client.py @@ -597,7 +597,7 @@ ) with open(deposit_status_xml_path, "r") as f: deposit_status_xml = f.read() - expected_deposit_dict = dict(parse_xml(deposit_status_xml)) + expected_deposit_status = dict(parse_xml(deposit_status_xml)) result = cli_runner.invoke( cli, @@ -625,4 +625,97 @@ result_output = result.output actual_deposit = callable_fn(result_output) - assert actual_deposit == expected_deposit_dict + assert actual_deposit == expected_deposit_status + + +def test_cli_update_metadata_with_swhid_on_completed_deposit( + datadir, requests_mock_datadir, cli_runner +): + """Update new metadata on a completed deposit (status done) is ok + """ + api_url_basename = "deposit.test.updateswhid" + deposit_id = 123 + deposit_status_xml_path = os.path.join( + datadir, f"https_{api_url_basename}", f"1_test_{deposit_id}_status" + ) + with open(deposit_status_xml_path, "r") as f: + deposit_status_xml = f.read() + expected_deposit_status = dict(parse_xml(deposit_status_xml)) + + assert expected_deposit_status["deposit_status"] == "done" + assert expected_deposit_status["deposit_swh_id"] is not None + + result = cli_runner.invoke( + cli, + [ + "upload", + "--url", + f"https://{api_url_basename}/1", + "--username", + TEST_USER["username"], + "--password", + TEST_USER["password"], + "--name", + "test-project", + "--author", + "John Doe", + "--deposit-id", + deposit_id, + "--swhid", + expected_deposit_status["deposit_swh_id"], + "--format", + "json", + ], + ) + assert result.exit_code == 0, result.output + actual_deposit_status = json.loads(result.output) + assert "error" not in actual_deposit_status + assert actual_deposit_status == expected_deposit_status + + +def test_cli_update_metadata_with_swhid_on_other_status_deposit( + datadir, requests_mock_datadir, cli_runner +): + """Update new metadata with swhid on other deposit status is not possible + """ + api_url_basename = "deposit.test.updateswhid" + deposit_id = 321 + deposit_status_xml_path = os.path.join( + datadir, f"https_{api_url_basename}", f"1_test_{deposit_id}_status" + ) + with open(deposit_status_xml_path, "r") as f: + deposit_status_xml = f.read() + expected_deposit_status = dict(parse_xml(deposit_status_xml)) + assert expected_deposit_status["deposit_status"] != "done" + + result = cli_runner.invoke( + cli, + [ + "upload", + "--url", + f"https://{api_url_basename}/1", + "--username", + TEST_USER["username"], + "--password", + TEST_USER["password"], + "--name", + "test-project", + "--author", + "John Doe", + "--deposit-id", + deposit_id, + "--swhid", + "swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea", + "--format", + "json", + ], + ) + assert result.exit_code == 0, result.output + actual_result = json.loads(result.output) + assert "error" in actual_result + assert actual_result == { + "error": "You can only update metadata on deposit with status 'done'", + "detail": "The deposit 321 has status 'partial'", + "deposit_status": "partial", + "deposit_id": 321, + } diff --git a/swh/deposit/tests/data/https_deposit.test.updateswhid/1_servicedocument b/swh/deposit/tests/data/https_deposit.test.updateswhid/1_servicedocument new file mode 100644 --- /dev/null +++ b/swh/deposit/tests/data/https_deposit.test.updateswhid/1_servicedocument @@ -0,0 +1,26 @@ + + + + 2.0 + 209715200 + + + The Software Heritage (SWH) Archive + + test Software Collection + application/zip + application/x-tar + Collection Policy + Software Heritage Archive + Collect, Preserve, Share + false + false + http://purl.org/net/sword/package/SimpleZip + https://deposit.test.updateswhid/1/test/ + test + + + diff --git a/swh/deposit/tests/data/https_deposit.test.updateswhid/1_test_123_metadata b/swh/deposit/tests/data/https_deposit.test.updateswhid/1_test_123_metadata new file mode 100644 --- /dev/null +++ b/swh/deposit/tests/data/https_deposit.test.updateswhid/1_test_123_metadata @@ -0,0 +1,10 @@ + + 123 + done + The deposit has been successfully loaded into the Software Heritage archive + swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea + swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea;origin=https://www.softwareheritage.org/check-deposit-2020-10-08T13:52:34.509655;visit=swh:1:snp:c477c6ef51833127b13a86ece7d75e5b3cc4e93d;anchor=swh:1:rev:f26f3960c175f15f6e24200171d446b86f6f7230;path=/ + check-deposit-2020-10-08T13:52:34.509655 + diff --git a/swh/deposit/tests/data/https_deposit.test.updateswhid/1_test_123_status b/swh/deposit/tests/data/https_deposit.test.updateswhid/1_test_123_status new file mode 100644 --- /dev/null +++ b/swh/deposit/tests/data/https_deposit.test.updateswhid/1_test_123_status @@ -0,0 +1,10 @@ + + 123 + done + The deposit has been successfully loaded into the Software Heritage archive + swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea + swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea;origin=https://www.softwareheritage.org/check-deposit-2020-10-08T13:52:34.509655;visit=swh:1:snp:c477c6ef51833127b13a86ece7d75e5b3cc4e93d;anchor=swh:1:rev:f26f3960c175f15f6e24200171d446b86f6f7230;path=/ + check-deposit-2020-10-08T13:52:34.509655 + diff --git a/swh/deposit/tests/data/https_deposit.test.updateswhid/1_test_321_status b/swh/deposit/tests/data/https_deposit.test.updateswhid/1_test_321_status new file mode 100644 --- /dev/null +++ b/swh/deposit/tests/data/https_deposit.test.updateswhid/1_test_321_status @@ -0,0 +1,8 @@ + + 321 + partial + The deposit is in partial state + check-deposit-2020-10-08T13:52:34.509655 +