diff --git a/swh/deposit/cli/client.py b/swh/deposit/cli/client.py
--- a/swh/deposit/cli/client.py
+++ b/swh/deposit/cli/client.py
@@ -480,3 +480,72 @@
click.echo(yaml.dump(data))
else:
logger.info(data)
+
+
+@deposit.command("metadata-only")
+@click.option(
+ "--url",
+ default="https://deposit.softwareheritage.org",
+ help="(Optional) Deposit server api endpoint. By default, "
+ "https://deposit.softwareheritage.org/1",
+)
+@click.option("--username", required=True, help="(Mandatory) User's name")
+@click.option(
+ "--password", required=True, help="(Mandatory) User's associated password"
+)
+@click.option("--swhid", default=None, help="SWHID")
+@click.option(
+ "--metadata",
+ "metadata_path",
+ type=click.Path(exists=True),
+ required=True,
+ help="Path to xml metadata file",
+)
+@click.option(
+ "-f",
+ "--format",
+ "output_format",
+ default="logging",
+ type=click.Choice(["logging", "yaml", "json"]),
+ help="Output format results.",
+)
+@click.pass_context
+def metadata_only(ctx, url, username, password, swhid, metadata_path, output_format):
+ """Deposit metadata only upload
+
+ """
+ import xmltodict
+
+ from swh.deposit.client import MaintenanceError, PublicApiDepositClient
+ from swh.deposit.utils import parse_swh_reference, parse_xml
+
+ # Parse to check for a swhid presence within the metadata file
+ with open(metadata_path, "r") as f:
+ metadata_raw = f.read()
+ metadata_dict = parse_xml(metadata_raw, only_atom_entry=False)
+ actual_swhid = parse_swh_reference(metadata_dict["atom:entry"])
+
+ if not swhid and not actual_swhid:
+ raise InputError("A SWHID must be provided for a metadata-only deposit")
+ elif swhid is not None and actual_swhid is None:
+ # Add swhid within metadata-path passed as parameter if not present
+ metadata_dict["atom:entry"].update(
+ {"swh:deposit": {"swh:reference": {"swh:object": {"@swhid": swhid,}}}}
+ )
+ updated_metadata_raw = xmltodict.unparse(metadata_dict, pretty=True)
+
+ with open(metadata_path, "w") as f:
+ f.write(updated_metadata_raw)
+
+ try:
+ client = PublicApiDepositClient(url=_url(url), auth=(username, password))
+ collection = _collection(client)
+ result = client.deposit_metadata_only(collection, metadata_path)
+ except InputError as e:
+ logger.error("Problem during parsing options: %s", e)
+ sys.exit(1)
+ except MaintenanceError as e:
+ logger.error(e)
+ sys.exit(1)
+
+ print_result(result, output_format)
diff --git a/swh/deposit/client.py b/swh/deposit/client.py
--- a/swh/deposit/client.py
+++ b/swh/deposit/client.py
@@ -545,6 +545,29 @@
return "put"
+class CreateMetadataOnlyDepositClient(BaseCreateDepositClient):
+ """Create metadata-only deposit."""
+
+ def compute_information(self, *args, **kwargs) -> Dict[str, Any]:
+ return {
+ "headers": {"CONTENT-TYPE": "application/atom+xml;type=entry",},
+ "filepath": kwargs["metadata_path"],
+ }
+
+ def parse_result_ok(self, xml_content):
+ """Given an xml content as string, returns a deposit dict.
+
+ """
+ data = parse_xml(xml_content)
+ keys = [
+ "deposit_id",
+ "deposit_swh_id",
+ "deposit_status",
+ "deposit_date",
+ ]
+ return {key: data.get("swh:" + key) for key in keys}
+
+
class CreateMultipartDepositClient(BaseCreateDepositClient):
"""Create a multipart deposit client."""
@@ -712,3 +735,11 @@
if "error" in r:
return r
return self.deposit_status(collection, deposit_id)
+
+ def deposit_metadata_only(
+ self, collection: str, metadata: Optional[str] = None,
+ ):
+ assert metadata is not None
+ return CreateMetadataOnlyDepositClient(
+ url=self.base_url, auth=self.auth
+ ).execute(collection, metadata_path=metadata)
diff --git a/swh/deposit/tests/cli/test_client.py b/swh/deposit/tests/cli/test_client.py
--- a/swh/deposit/tests/cli/test_client.py
+++ b/swh/deposit/tests/cli/test_client.py
@@ -25,6 +25,7 @@
)
from swh.deposit.client import MaintenanceError, PublicApiDepositClient
from swh.deposit.parsers import parse_xml
+from swh.model.exceptions import ValidationError
from ..conftest import TEST_USER
@@ -637,3 +638,165 @@
"deposit_status": "partial",
"deposit_id": 321,
}
+
+
+def test_cli_metadata_only_deposit_full_metadata_file(
+ datadir, requests_mock_datadir, cli_runner, atom_dataset, tmp_path,
+):
+ """Post metadata-only deposit through cli
+
+ The metadata file posted by the client already contains the swhid
+
+ """
+ api_url_basename = "deposit.test.metadataonly"
+ swhid = "swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea"
+ metadata = atom_dataset["entry-data-with-swhid"].format(swhid=swhid)
+ metadata_path = os.path.join(tmp_path, "entry-data-with-swhid.xml")
+ with open(metadata_path, "w") as m:
+ m.write(metadata)
+
+ # receipt_path = os.path.join(datadir, f"https_{api_url_basename}", "1_test")
+ # with open(receipt_path, "w") as f:
+ # # ~> io.UnsupportedOperation: not readable ¯\_(ツ)_/¯
+ # full_receipt_d = parse_xml(f.read())
+ # expected_deposit_status = {
+ # k: v for k, v in full_receipt_d.items() if k in [
+ # "deposit_id", "deposit_date", "deposit_swh_id", "deposit_status"
+ # ]
+ # }
+ # FIXME: /me is sad ^
+
+ expected_deposit_status = {
+ "deposit_id": "100",
+ "deposit_status": "done",
+ "deposit_swh_id": swhid,
+ "deposit_date": "2020-10-08T13:52:34.509655",
+ }
+
+ assert expected_deposit_status["deposit_status"] == "done"
+ assert expected_deposit_status["deposit_swh_id"] is not None
+
+ # fmt: off
+ result = cli_runner.invoke(
+ cli,
+ [
+ "metadata-only",
+ "--url", f"https://{api_url_basename}/1",
+ "--username", TEST_USER["username"],
+ "--password", TEST_USER["password"],
+ "--metadata", metadata_path,
+ "--format", "json",
+ ],
+ )
+ # fmt: on
+ assert result.exit_code == 0, result.output
+ actual_deposit_status = json.loads(result.output)
+ assert "error" not in actual_deposit_status
+ assert actual_deposit_status == expected_deposit_status
+
+
+def test_cli_metadata_only_deposit_pass_swhid_as_flag(
+ datadir, requests_mock_datadir, cli_runner, atom_dataset, tmp_path,
+):
+ """Post metadata-only deposit through cli
+
+ The metadata file posted by the client does not contain the swhid.
+ The client passes along the swhid.
+ This updates the metadata file and adds the swhid accordingly.
+
+ ^ Is that reasonable?
+
+ """
+ api_url_basename = "deposit.test.metadataonly"
+ swhid = "swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea"
+ metadata = atom_dataset["entry-data-minimal"]
+ metadata_path = os.path.join(tmp_path, "entry-data.xml")
+ with open(metadata_path, "w") as m:
+ m.write(metadata)
+
+ expected_deposit_status = {
+ "deposit_id": "100",
+ "deposit_status": "done",
+ "deposit_swh_id": swhid,
+ "deposit_date": "2020-10-08T13:52:34.509655",
+ }
+
+ assert expected_deposit_status["deposit_status"] == "done"
+ assert expected_deposit_status["deposit_swh_id"] is not None
+
+ # fmt: off
+ result = cli_runner.invoke(
+ cli,
+ [
+ "metadata-only",
+ "--url", f"https://{api_url_basename}/1",
+ "--username", TEST_USER["username"],
+ "--password", TEST_USER["password"],
+ "--metadata", metadata_path,
+ "--swhid", swhid,
+ "--format", "json",
+ ],
+ )
+ # fmt: on
+ assert result.exit_code == 0, result.output
+ actual_deposit_status = json.loads(result.output)
+ assert "error" not in actual_deposit_status
+ assert actual_deposit_status == expected_deposit_status
+
+
+def test_cli_metadata_only_deposit_invalid_swhid(
+ datadir, requests_mock_datadir, cli_runner, atom_dataset, tmp_path,
+):
+ """Post metadata-only deposit through cli with invalid swhid raises
+
+ """
+ api_url_basename = "deposit.test.metadataonly"
+ invalid_swhid = "ssh:2:sth:xxx"
+ metadata = atom_dataset["entry-data-with-swhid"].format(swhid=invalid_swhid)
+ metadata_path = os.path.join(tmp_path, "entry-data-with-swhid.xml")
+ with open(metadata_path, "w") as f:
+ f.write(metadata)
+
+ # fmt: off
+ with pytest.raises(ValidationError, match="Invalid"):
+ cli_runner.invoke(
+ cli,
+ [
+ "metadata-only",
+ "--url", f"https://{api_url_basename}/1",
+ "--username", TEST_USER["username"],
+ "--password", TEST_USER["password"],
+ "--metadata", metadata_path,
+ "--format", "json",
+ ],
+ catch_exceptions=False,
+ )
+ # fmt: on
+
+
+def test_cli_metadata_only_deposit_no_swhid(
+ datadir, requests_mock_datadir, cli_runner, atom_dataset, tmp_path,
+):
+ """Post metadata-only deposit through cli with invalid swhid raises
+
+ """
+ api_url_basename = "deposit.test.metadataonly"
+ metadata = atom_dataset["entry-data-minimal"]
+ metadata_path = os.path.join(tmp_path, "entry-data-minimal.xml")
+ with open(metadata_path, "w") as f:
+ f.write(metadata)
+
+ with pytest.raises(InputError, match="SWHID must be provided"):
+ cli_runner.invoke(
+ cli,
+ [
+ "metadata-only",
+ "--url", f"https://{api_url_basename}/1",
+ "--username", TEST_USER["username"],
+ "--password", TEST_USER["password"],
+ "--metadata", metadata_path,
+ "--format", "json",
+ ],
+ catch_exceptions=False,
+ )
+ # fmt: on
diff --git a/swh/deposit/tests/data/https_deposit.test.metadataonly/1_servicedocument b/swh/deposit/tests/data/https_deposit.test.metadataonly/1_servicedocument
new file mode 100644
--- /dev/null
+++ b/swh/deposit/tests/data/https_deposit.test.metadataonly/1_servicedocument
@@ -0,0 +1,26 @@
+
+
+
+ 2.0
+ 209715200
+
+
+ The Software Heritage (SWH) Archive
+
+ test Software Collection
+ application/zip
+ application/x-tar
+ Collection Policy
+ Software Heritage Archive
+ Collect, Preserve, Share
+ false
+ false
+ http://purl.org/net/sword/package/SimpleZip
+ https://deposit.test.metadataonly/1/test/
+ test
+
+
+
diff --git a/swh/deposit/tests/data/https_deposit.test.metadataonly/1_test b/swh/deposit/tests/data/https_deposit.test.metadataonly/1_test
new file mode 100644
--- /dev/null
+++ b/swh/deposit/tests/data/https_deposit.test.metadataonly/1_test
@@ -0,0 +1,12 @@
+
+
+ 100
+ 2020-10-08T13:52:34.509655
+ done
+ swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea
+ swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea;origin=https://www.softwareheritage.org/check-deposit-2020-10-08T13:52:34.509655;visit=swh:1:snp:c477c6ef51833127b13a86ece7d75e5b3cc4e93d;anchor=swh:1:rev:f26f3960c175f15f6e24200171d446b86f6f7230;path=/
+
+
diff --git a/swh/deposit/utils.py b/swh/deposit/utils.py
--- a/swh/deposit/utils.py
+++ b/swh/deposit/utils.py
@@ -25,7 +25,7 @@
logger = logging.getLogger(__name__)
-def parse_xml(stream, encoding="utf-8"):
+def parse_xml(stream, encoding: str = "utf-8", only_atom_entry: bool = True):
namespaces = {
"http://www.w3.org/2005/Atom": "atom",
"http://www.w3.org/2007/app": "app",
@@ -42,7 +42,7 @@
process_namespaces=True,
dict_constructor=dict,
)
- if "atom:entry" in data:
+ if only_atom_entry and "atom:entry" in data:
data = data["atom:entry"]
return data