diff --git a/swh/deposit/cli/client.py b/swh/deposit/cli/client.py --- a/swh/deposit/cli/client.py +++ b/swh/deposit/cli/client.py @@ -480,3 +480,72 @@ click.echo(yaml.dump(data)) else: logger.info(data) + + +@deposit.command("metadata-only") +@click.option( + "--url", + default="https://deposit.softwareheritage.org", + help="(Optional) Deposit server api endpoint. By default, " + "https://deposit.softwareheritage.org/1", +) +@click.option("--username", required=True, help="(Mandatory) User's name") +@click.option( + "--password", required=True, help="(Mandatory) User's associated password" +) +@click.option("--swhid", default=None, help="SWHID") +@click.option( + "--metadata", + "metadata_path", + type=click.Path(exists=True), + required=True, + help="Path to xml metadata file", +) +@click.option( + "-f", + "--format", + "output_format", + default="logging", + type=click.Choice(["logging", "yaml", "json"]), + help="Output format results.", +) +@click.pass_context +def metadata_only(ctx, url, username, password, swhid, metadata_path, output_format): + """Deposit metadata only upload + + """ + import xmltodict + + from swh.deposit.client import MaintenanceError, PublicApiDepositClient + from swh.deposit.utils import parse_swh_reference, parse_xml + + # Parse to check for a swhid presence within the metadata file + with open(metadata_path, "r") as f: + metadata_raw = f.read() + metadata_dict = parse_xml(metadata_raw, only_atom_entry=False) + actual_swhid = parse_swh_reference(metadata_dict["atom:entry"]) + + if not swhid and not actual_swhid: + raise InputError("A SWHID must be provided for a metadata-only deposit") + elif swhid is not None and actual_swhid is None: + # Add swhid within metadata-path passed as parameter if not present + metadata_dict["atom:entry"].update( + {"swh:deposit": {"swh:reference": {"swh:object": {"@swhid": swhid,}}}} + ) + updated_metadata_raw = xmltodict.unparse(metadata_dict, pretty=True) + + with open(metadata_path, "w") as f: + f.write(updated_metadata_raw) + + try: + client = PublicApiDepositClient(url=_url(url), auth=(username, password)) + collection = _collection(client) + result = client.deposit_metadata_only(collection, metadata_path) + except InputError as e: + logger.error("Problem during parsing options: %s", e) + sys.exit(1) + except MaintenanceError as e: + logger.error(e) + sys.exit(1) + + print_result(result, output_format) diff --git a/swh/deposit/client.py b/swh/deposit/client.py --- a/swh/deposit/client.py +++ b/swh/deposit/client.py @@ -545,6 +545,29 @@ return "put" +class CreateMetadataOnlyDepositClient(BaseCreateDepositClient): + """Create metadata-only deposit.""" + + def compute_information(self, *args, **kwargs) -> Dict[str, Any]: + return { + "headers": {"CONTENT-TYPE": "application/atom+xml;type=entry",}, + "filepath": kwargs["metadata_path"], + } + + def parse_result_ok(self, xml_content): + """Given an xml content as string, returns a deposit dict. + + """ + data = parse_xml(xml_content) + keys = [ + "deposit_id", + "deposit_swh_id", + "deposit_status", + "deposit_date", + ] + return {key: data.get("swh:" + key) for key in keys} + + class CreateMultipartDepositClient(BaseCreateDepositClient): """Create a multipart deposit client.""" @@ -712,3 +735,11 @@ if "error" in r: return r return self.deposit_status(collection, deposit_id) + + def deposit_metadata_only( + self, collection: str, metadata: Optional[str] = None, + ): + assert metadata is not None + return CreateMetadataOnlyDepositClient( + url=self.base_url, auth=self.auth + ).execute(collection, metadata_path=metadata) diff --git a/swh/deposit/tests/cli/test_client.py b/swh/deposit/tests/cli/test_client.py --- a/swh/deposit/tests/cli/test_client.py +++ b/swh/deposit/tests/cli/test_client.py @@ -25,6 +25,7 @@ ) from swh.deposit.client import MaintenanceError, PublicApiDepositClient from swh.deposit.parsers import parse_xml +from swh.model.exceptions import ValidationError from ..conftest import TEST_USER @@ -637,3 +638,165 @@ "deposit_status": "partial", "deposit_id": 321, } + + +def test_cli_metadata_only_deposit_full_metadata_file( + datadir, requests_mock_datadir, cli_runner, atom_dataset, tmp_path, +): + """Post metadata-only deposit through cli + + The metadata file posted by the client already contains the swhid + + """ + api_url_basename = "deposit.test.metadataonly" + swhid = "swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea" + metadata = atom_dataset["entry-data-with-swhid"].format(swhid=swhid) + metadata_path = os.path.join(tmp_path, "entry-data-with-swhid.xml") + with open(metadata_path, "w") as m: + m.write(metadata) + + # receipt_path = os.path.join(datadir, f"https_{api_url_basename}", "1_test") + # with open(receipt_path, "w") as f: + # # ~> io.UnsupportedOperation: not readable ¯\_(ツ)_/¯ + # full_receipt_d = parse_xml(f.read()) + # expected_deposit_status = { + # k: v for k, v in full_receipt_d.items() if k in [ + # "deposit_id", "deposit_date", "deposit_swh_id", "deposit_status" + # ] + # } + # FIXME: /me is sad ^ + + expected_deposit_status = { + "deposit_id": "100", + "deposit_status": "done", + "deposit_swh_id": swhid, + "deposit_date": "2020-10-08T13:52:34.509655", + } + + assert expected_deposit_status["deposit_status"] == "done" + assert expected_deposit_status["deposit_swh_id"] is not None + + # fmt: off + result = cli_runner.invoke( + cli, + [ + "metadata-only", + "--url", f"https://{api_url_basename}/1", + "--username", TEST_USER["username"], + "--password", TEST_USER["password"], + "--metadata", metadata_path, + "--format", "json", + ], + ) + # fmt: on + assert result.exit_code == 0, result.output + actual_deposit_status = json.loads(result.output) + assert "error" not in actual_deposit_status + assert actual_deposit_status == expected_deposit_status + + +def test_cli_metadata_only_deposit_pass_swhid_as_flag( + datadir, requests_mock_datadir, cli_runner, atom_dataset, tmp_path, +): + """Post metadata-only deposit through cli + + The metadata file posted by the client does not contain the swhid. + The client passes along the swhid. + This updates the metadata file and adds the swhid accordingly. + + ^ Is that reasonable? + + """ + api_url_basename = "deposit.test.metadataonly" + swhid = "swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea" + metadata = atom_dataset["entry-data-minimal"] + metadata_path = os.path.join(tmp_path, "entry-data.xml") + with open(metadata_path, "w") as m: + m.write(metadata) + + expected_deposit_status = { + "deposit_id": "100", + "deposit_status": "done", + "deposit_swh_id": swhid, + "deposit_date": "2020-10-08T13:52:34.509655", + } + + assert expected_deposit_status["deposit_status"] == "done" + assert expected_deposit_status["deposit_swh_id"] is not None + + # fmt: off + result = cli_runner.invoke( + cli, + [ + "metadata-only", + "--url", f"https://{api_url_basename}/1", + "--username", TEST_USER["username"], + "--password", TEST_USER["password"], + "--metadata", metadata_path, + "--swhid", swhid, + "--format", "json", + ], + ) + # fmt: on + assert result.exit_code == 0, result.output + actual_deposit_status = json.loads(result.output) + assert "error" not in actual_deposit_status + assert actual_deposit_status == expected_deposit_status + + +def test_cli_metadata_only_deposit_invalid_swhid( + datadir, requests_mock_datadir, cli_runner, atom_dataset, tmp_path, +): + """Post metadata-only deposit through cli with invalid swhid raises + + """ + api_url_basename = "deposit.test.metadataonly" + invalid_swhid = "ssh:2:sth:xxx" + metadata = atom_dataset["entry-data-with-swhid"].format(swhid=invalid_swhid) + metadata_path = os.path.join(tmp_path, "entry-data-with-swhid.xml") + with open(metadata_path, "w") as f: + f.write(metadata) + + # fmt: off + with pytest.raises(ValidationError, match="Invalid"): + cli_runner.invoke( + cli, + [ + "metadata-only", + "--url", f"https://{api_url_basename}/1", + "--username", TEST_USER["username"], + "--password", TEST_USER["password"], + "--metadata", metadata_path, + "--format", "json", + ], + catch_exceptions=False, + ) + # fmt: on + + +def test_cli_metadata_only_deposit_no_swhid( + datadir, requests_mock_datadir, cli_runner, atom_dataset, tmp_path, +): + """Post metadata-only deposit through cli with invalid swhid raises + + """ + api_url_basename = "deposit.test.metadataonly" + metadata = atom_dataset["entry-data-minimal"] + metadata_path = os.path.join(tmp_path, "entry-data-minimal.xml") + with open(metadata_path, "w") as f: + f.write(metadata) + + with pytest.raises(InputError, match="SWHID must be provided"): + cli_runner.invoke( + cli, + [ + "metadata-only", + "--url", f"https://{api_url_basename}/1", + "--username", TEST_USER["username"], + "--password", TEST_USER["password"], + "--metadata", metadata_path, + "--format", "json", + ], + catch_exceptions=False, + ) + # fmt: on diff --git a/swh/deposit/tests/data/https_deposit.test.metadataonly/1_servicedocument b/swh/deposit/tests/data/https_deposit.test.metadataonly/1_servicedocument new file mode 100644 --- /dev/null +++ b/swh/deposit/tests/data/https_deposit.test.metadataonly/1_servicedocument @@ -0,0 +1,26 @@ + + + + 2.0 + 209715200 + + + The Software Heritage (SWH) Archive + + test Software Collection + application/zip + application/x-tar + Collection Policy + Software Heritage Archive + Collect, Preserve, Share + false + false + http://purl.org/net/sword/package/SimpleZip + https://deposit.test.metadataonly/1/test/ + test + + + diff --git a/swh/deposit/tests/data/https_deposit.test.metadataonly/1_test b/swh/deposit/tests/data/https_deposit.test.metadataonly/1_test new file mode 100644 --- /dev/null +++ b/swh/deposit/tests/data/https_deposit.test.metadataonly/1_test @@ -0,0 +1,12 @@ + + + 100 + 2020-10-08T13:52:34.509655 + done + swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea + swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea;origin=https://www.softwareheritage.org/check-deposit-2020-10-08T13:52:34.509655;visit=swh:1:snp:c477c6ef51833127b13a86ece7d75e5b3cc4e93d;anchor=swh:1:rev:f26f3960c175f15f6e24200171d446b86f6f7230;path=/ + + diff --git a/swh/deposit/utils.py b/swh/deposit/utils.py --- a/swh/deposit/utils.py +++ b/swh/deposit/utils.py @@ -25,7 +25,7 @@ logger = logging.getLogger(__name__) -def parse_xml(stream, encoding="utf-8"): +def parse_xml(stream, encoding: str = "utf-8", only_atom_entry: bool = True): namespaces = { "http://www.w3.org/2005/Atom": "atom", "http://www.w3.org/2007/app": "app", @@ -42,7 +42,7 @@ process_namespaces=True, dict_constructor=dict, ) - if "atom:entry" in data: + if only_atom_entry and "atom:entry" in data: data = data["atom:entry"] return data