diff --git a/swh/deposit/client.py b/swh/deposit/client.py
--- a/swh/deposit/client.py
+++ b/swh/deposit/client.py
@@ -16,6 +16,7 @@
import requests
from requests import Response
+from requests.utils import parse_header_links
from swh.core.config import load_from_envvar
from swh.deposit import __version__ as swh_deposit_version
@@ -273,7 +274,9 @@
"""Http method to use on the url"""
raise NotImplementedError
- def parse_result_ok(self, xml_content: str) -> Dict[str, Any]:
+ def parse_result_ok(
+ self, xml_content: str, headers: Optional[Dict] = None
+ ) -> Dict[str, Any]:
"""Given an xml result from the api endpoint, parse it and returns a
dict.
@@ -305,15 +308,18 @@
"sword:verboseDescription": sword_error.get("sword:verboseDescription", ""),
}
- def do_execute(self, method: str, url: str, info: Dict) -> Response:
+ def do_execute(self, method: str, url: str, info: Dict, **kwargs) -> Response:
"""Execute the http query to url using method and info information.
- By default, execute a simple query to url with the http
- method. Override this in daughter class to improve the
- default behavior if needed.
+ By default, execute a simple query to url with the http method. Override this in
+ subclass to improve the default behavior if needed.
"""
- return self.do(method, url)
+ return self.do(method, url, **kwargs)
+
+ def compute_params(self, **kwargs) -> Dict[str, Any]:
+ """Determine the params out of the kwargs"""
+ return {}
def execute(self, *args, **kwargs) -> Dict[str, Any]:
"""Main endpoint to prepare and execute the http query to the api.
@@ -328,9 +334,10 @@
url = self.compute_url(*args, **kwargs)
method = self.compute_method(*args, **kwargs)
info = self.compute_information(*args, **kwargs)
+ params = self.compute_params(**kwargs)
try:
- response = self.do_execute(method, url, info)
+ response = self.do_execute(method, url, info, params=params)
except Exception as e:
msg = self.error_msg % (url, e)
result = self.empty_result
@@ -343,7 +350,8 @@
if int(response.status_code) == 204: # 204 returns no body
return {"status": response.status_code}
else:
- return self.parse_result_ok(response.text)
+ headers = dict(response.headers) if response.headers else None
+ return self.parse_result_ok(response.text, headers)
else:
error = self.parse_result_error(response.text)
empty = self.empty_result
@@ -380,7 +388,9 @@
def compute_method(self, *args, **kwargs):
return "get"
- def parse_result_ok(self, xml_content: str) -> Dict[str, Any]:
+ def parse_result_ok(
+ self, xml_content: str, headers: Optional[Dict] = None
+ ) -> Dict[str, Any]:
"""Parse service document's success response.
"""
@@ -415,7 +425,9 @@
def compute_method(self, *args, **kwargs):
return "get"
- def parse_result_ok(self, xml_content: str) -> Dict[str, Any]:
+ def parse_result_ok(
+ self, xml_content: str, headers: Optional[Dict] = None
+ ) -> Dict[str, Any]:
"""Given an xml content as string, returns a deposit dict.
"""
@@ -431,6 +443,72 @@
return {key: data.get("swh:" + key) for key in keys}
+class CollectionListDepositClient(BaseDepositClient):
+ """List a collection of deposits (owned by a user)
+
+ """
+
+ def __init__(self, config=None, url=None, auth=None):
+ super().__init__(
+ url=url,
+ auth=auth,
+ config=config,
+ error_msg="List deposits failure at %s: %s",
+ empty_result={},
+ )
+
+ def compute_url(self, collection, **kwargs):
+ return f"/{collection}/"
+
+ def compute_method(self, *args, **kwargs):
+ return "get"
+
+ def compute_params(self, **kwargs) -> Dict[str, Any]:
+ """Transmit pagination params if values provided are not None
+ (e.g. page, page_size)
+
+ """
+ return {k: v for k, v in kwargs.items() if v is not None}
+
+ def parse_result_ok(
+ self, xml_content: str, headers: Optional[Dict] = None
+ ) -> Dict[str, Any]:
+ """Given an xml content as string, returns a deposit dict.
+
+ """
+ link_header = headers.get("Link", "") if headers else ""
+ links = parse_header_links(link_header)
+ data = parse_xml(xml_content)["atom:feed"]
+ total_result = data.get("swh:count", 0)
+ keys = [
+ "id",
+ "reception_date",
+ "complete_date",
+ "external_id",
+ "swhid",
+ "status",
+ "status_detail",
+ "swhid_context",
+ "origin_url",
+ ]
+ entries_ = data.get("atom:entry", [])
+ entries = [entries_] if isinstance(entries_, dict) else entries_
+ deposits_d = [
+ {
+ key: deposit.get(f"swh:{key}")
+ for key in keys
+ if deposit.get(f"swh:{key}") is not None
+ }
+ for deposit in entries
+ ]
+
+ return {
+ "count": total_result,
+ "deposits": deposits_d,
+ **{entry["rel"]: entry["url"] for entry in links},
+ }
+
+
class BaseCreateDepositClient(BaseDepositClient):
"""Deposit client base class to post new deposit.
@@ -451,7 +529,9 @@
def compute_method(self, *args, **kwargs):
return "post"
- def parse_result_ok(self, xml_content: str) -> Dict[str, Any]:
+ def parse_result_ok(
+ self, xml_content: str, headers: Optional[Dict] = None
+ ) -> Dict[str, Any]:
"""Given an xml content as string, returns a deposit dict.
"""
@@ -467,7 +547,7 @@
def compute_headers(self, info: Dict[str, Any]) -> Dict[str, Any]:
return info
- def do_execute(self, method, url, info):
+ def do_execute(self, method, url, info, **kwargs):
with open(info["filepath"], "rb") as f:
return self.do(method, url, data=f, headers=info["headers"])
@@ -559,7 +639,9 @@
"filepath": kwargs["metadata_path"],
}
- def parse_result_ok(self, xml_content: str) -> Dict[str, Any]:
+ def parse_result_ok(
+ self, xml_content: str, headers: Optional[Dict] = None
+ ) -> Dict[str, Any]:
"""Given an xml content as string, returns a deposit dict.
"""
@@ -608,7 +690,7 @@
files, headers = self._multipart_info(info, info_meta)
return {"files": files, "headers": headers}
- def do_execute(self, method, url, info):
+ def do_execute(self, method, url, info, **kwargs):
return self.do(method, url, files=info["files"], headers=info["headers"])
@@ -635,6 +717,17 @@
collection, deposit_id
)
+ def deposit_list(
+ self,
+ collection: str,
+ page: Optional[int] = None,
+ page_size: Optional[int] = None,
+ ):
+ """List deposits from the collection"""
+ return CollectionListDepositClient(url=self.base_url, auth=self.auth).execute(
+ collection, page=page, page_size=page_size
+ )
+
def deposit_create(
self,
collection: str,
diff --git a/swh/deposit/tests/data/atom/entry-list-deposits-page1.xml b/swh/deposit/tests/data/atom/entry-list-deposits-page1.xml
new file mode 100644
--- /dev/null
+++ b/swh/deposit/tests/data/atom/entry-list-deposits-page1.xml
@@ -0,0 +1,18 @@
+
+ 3
+
+ 1031
+ rejected
+ Deposit without archive
+ check-deposit-2020-10-09T13:10:00.000000
+
+
+ 1032
+ rejected
+ Deposit without archive
+ check-deposit-2020-10-10T13:20:00.000000
+
+
diff --git a/swh/deposit/tests/data/atom/entry-list-deposits-page2.xml b/swh/deposit/tests/data/atom/entry-list-deposits-page2.xml
new file mode 100644
--- /dev/null
+++ b/swh/deposit/tests/data/atom/entry-list-deposits-page2.xml
@@ -0,0 +1,16 @@
+
+ 3
+
+ 1033
+ 2020-10-08T13:50:30
+ 2020-10-08T13:52:34.509655
+ done
+ The deposit has been successfully loaded into the Software Heritage archive
+ swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea
+ swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea;origin=https://www.softwareheritage.org/check-deposit-2020-10-08T13:52:34.509655;visit=swh:1:snp:c477c6ef51833127b13a86ece7d75e5b3cc4e93d;anchor=swh:1:rev:f26f3960c175f15f6e24200171d446b86f6f7230;path=/
+ check-deposit-2020-10-08T13:52:34.509655
+
+
diff --git a/swh/deposit/tests/data/atom/entry-list-deposits.xml b/swh/deposit/tests/data/atom/entry-list-deposits.xml
new file mode 100644
--- /dev/null
+++ b/swh/deposit/tests/data/atom/entry-list-deposits.xml
@@ -0,0 +1,28 @@
+
+ 3
+
+ 1031
+ rejected
+ Deposit without archive
+ check-deposit-2020-10-09T13:10:00.000000
+
+
+ 1032
+ rejected
+ Deposit without archive
+ check-deposit-2020-10-10T13:20:00.000000
+
+
+ 1033
+ 2020-10-08T13:50:30
+ 2020-10-08T13:52:34.509655
+ done
+ The deposit has been successfully loaded into the Software Heritage archive
+ swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea
+ swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea;origin=https://www.softwareheritage.org/check-deposit-2020-10-08T13:52:34.509655;visit=swh:1:snp:c477c6ef51833127b13a86ece7d75e5b3cc4e93d;anchor=swh:1:rev:f26f3960c175f15f6e24200171d446b86f6f7230;path=/
+ check-deposit-2020-10-08T13:52:34.509655
+
+
diff --git a/swh/deposit/tests/test_client_module.py b/swh/deposit/tests/test_client_module.py
--- a/swh/deposit/tests/test_client_module.py
+++ b/swh/deposit/tests/test_client_module.py
@@ -11,10 +11,13 @@
import pytest
from swh.deposit.client import (
+ CollectionListDepositClient,
MaintenanceError,
+ PublicApiDepositClient,
ServiceDocumentDepositClient,
StatusDepositClient,
)
+from swh.deposit.utils import to_header_link
def test_client_read_data_ok(requests_mock_datadir):
@@ -94,3 +97,119 @@
with pytest.raises(MaintenanceError, match="forbidden"):
client.execute(collection, deposit_id)
+
+
+EXPECTED_DEPOSIT = {
+ "id": "1031",
+ "external_id": "check-deposit-2020-10-09T13:10:00.000000",
+ "status": "rejected",
+ "status_detail": "Deposit without archive",
+}
+
+EXPECTED_DEPOSIT2 = {
+ "id": "1032",
+ "external_id": "check-deposit-2020-10-10T13:20:00.000000",
+ "status": "rejected",
+ "status_detail": "Deposit without archive",
+}
+
+EXPECTED_DEPOSIT3 = {
+ "id": "1033",
+ "external_id": "check-deposit-2020-10-08T13:52:34.509655",
+ "status": "done",
+ "status_detail": (
+ "The deposit has been successfully loaded into the Software " "Heritage archive"
+ ),
+ "reception_date": "2020-10-08T13:50:30",
+ "complete_date": "2020-10-08T13:52:34.509655",
+ "swhid": "swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea",
+ "swhid_context": "swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea;origin=https://www.softwareheritage.org/check-deposit-2020-10-08T13:52:34.509655;visit=swh:1:snp:c477c6ef51833127b13a86ece7d75e5b3cc4e93d;anchor=swh:1:rev:f26f3960c175f15f6e24200171d446b86f6f7230;path=/", # noqa
+}
+
+
+def test_client_collection_list(requests_mock, atom_dataset):
+ collection_list_xml = atom_dataset["entry-list-deposits"]
+ base_url = "https://deposit.test.list/1"
+ collection = "test"
+ url = f"{base_url}/{collection}/"
+ requests_mock.get(url, status_code=200, text=collection_list_xml)
+ expected_result = {
+ "count": "3",
+ "deposits": [EXPECTED_DEPOSIT, EXPECTED_DEPOSIT2, EXPECTED_DEPOSIT3],
+ }
+
+ # use dedicated client
+ client = CollectionListDepositClient(url=base_url, auth=("test", "test"))
+
+ # no pagination
+ result = client.execute(collection)
+
+ assert result == expected_result
+
+ # The main public client should work the same way
+ client2 = PublicApiDepositClient(url=base_url, auth=("test", "test"))
+ result2 = client2.deposit_list(collection)
+
+ assert result2 == expected_result
+
+ assert requests_mock.called
+ request_history = [m.url for m in requests_mock.request_history]
+ assert request_history == [url] * 2
+
+
+def test_client_collection_list_with_pagination_headers(requests_mock, atom_dataset):
+ collection_list_xml_page1 = atom_dataset["entry-list-deposits-page1"]
+ collection_list_xml_page2 = atom_dataset["entry-list-deposits-page2"]
+ base_url = "https://deposit.test.list/1"
+ collection = "test"
+ url = f"{base_url}/{collection}/"
+ page1 = 1
+ page2 = 2
+ page_size = 10
+ url_page1 = f"{url}?page={page1}"
+ url_page2 = f"{url}?page={page2}&page_size={page_size}"
+ requests_mock.get(
+ url_page1,
+ status_code=200,
+ text=collection_list_xml_page1,
+ headers={"Link": to_header_link(url_page2, "next"),},
+ )
+ requests_mock.get(
+ url_page2,
+ status_code=200,
+ text=collection_list_xml_page2,
+ headers={"Link": to_header_link(url_page1, "previous"),},
+ )
+
+ expected_result_page1 = {
+ "count": "3",
+ "deposits": [EXPECTED_DEPOSIT, EXPECTED_DEPOSIT2],
+ "next": url_page2,
+ }
+ expected_result_page2 = {
+ "count": "3",
+ "deposits": [EXPECTED_DEPOSIT3],
+ "previous": url_page1,
+ }
+
+ client = CollectionListDepositClient(
+ url="https://deposit.test.list/1", auth=("test", "test")
+ )
+ client2 = PublicApiDepositClient(url=base_url, auth=("test", "test"))
+
+ result = client.execute(collection, page=page1)
+ assert result == expected_result_page1
+
+ result2 = client.execute(collection, page=page2, page_size=page_size)
+ assert result2 == expected_result_page2
+
+ # The main public client should work the same way
+ result = client2.deposit_list(collection, page=page1)
+ assert result == expected_result_page1
+
+ result2 = client2.deposit_list(collection, page=page2, page_size=page_size)
+ assert result2 == expected_result_page2
+
+ assert requests_mock.called
+ request_history = [m.url for m in requests_mock.request_history]
+ assert request_history == [url_page1, url_page2] * 2
diff --git a/swh/deposit/utils.py b/swh/deposit/utils.py
--- a/swh/deposit/utils.py
+++ b/swh/deposit/utils.py
@@ -238,3 +238,16 @@
"""Used to get the target of a metadata object from a ,
as the latter uses a QualifiedSWHID."""
return ExtendedSWHID.from_string(str(swhid).split(";")[0])
+
+
+def to_header_link(link: str, link_name: str) -> str:
+ """Build a single header link.
+
+ >>> link_next = to_header_link("next-url", "next")
+ >>> link_next
+ '; rel="next"'
+ >>> ','.join([link_next, to_header_link("prev-url", "prev")])
+ '; rel="next",; rel="prev"'
+
+ """
+ return f'<{link}>; rel="{link_name}"'