diff --git a/swh/deposit/client.py b/swh/deposit/client.py --- a/swh/deposit/client.py +++ b/swh/deposit/client.py @@ -16,6 +16,7 @@ import requests from requests import Response +from requests.utils import parse_header_links from swh.core.config import load_from_envvar from swh.deposit import __version__ as swh_deposit_version @@ -273,7 +274,9 @@ """Http method to use on the url""" raise NotImplementedError - def parse_result_ok(self, xml_content: str) -> Dict[str, Any]: + def parse_result_ok( + self, xml_content: str, headers: Optional[Dict] = None + ) -> Dict[str, Any]: """Given an xml result from the api endpoint, parse it and returns a dict. @@ -305,15 +308,18 @@ "sword:verboseDescription": sword_error.get("sword:verboseDescription", ""), } - def do_execute(self, method: str, url: str, info: Dict) -> Response: + def do_execute(self, method: str, url: str, info: Dict, **kwargs) -> Response: """Execute the http query to url using method and info information. - By default, execute a simple query to url with the http - method. Override this in daughter class to improve the - default behavior if needed. + By default, execute a simple query to url with the http method. Override this in + subclass to improve the default behavior if needed. """ - return self.do(method, url) + return self.do(method, url, **kwargs) + + def compute_params(self, **kwargs) -> Dict[str, Any]: + """Determine the params out of the kwargs""" + return {} def execute(self, *args, **kwargs) -> Dict[str, Any]: """Main endpoint to prepare and execute the http query to the api. @@ -328,9 +334,10 @@ url = self.compute_url(*args, **kwargs) method = self.compute_method(*args, **kwargs) info = self.compute_information(*args, **kwargs) + params = self.compute_params(**kwargs) try: - response = self.do_execute(method, url, info) + response = self.do_execute(method, url, info, params=params) except Exception as e: msg = self.error_msg % (url, e) result = self.empty_result @@ -343,7 +350,8 @@ if int(response.status_code) == 204: # 204 returns no body return {"status": response.status_code} else: - return self.parse_result_ok(response.text) + headers = dict(response.headers) if response.headers else None + return self.parse_result_ok(response.text, headers) else: error = self.parse_result_error(response.text) empty = self.empty_result @@ -380,7 +388,9 @@ def compute_method(self, *args, **kwargs): return "get" - def parse_result_ok(self, xml_content: str) -> Dict[str, Any]: + def parse_result_ok( + self, xml_content: str, headers: Optional[Dict] = None + ) -> Dict[str, Any]: """Parse service document's success response. """ @@ -415,7 +425,9 @@ def compute_method(self, *args, **kwargs): return "get" - def parse_result_ok(self, xml_content: str) -> Dict[str, Any]: + def parse_result_ok( + self, xml_content: str, headers: Optional[Dict] = None + ) -> Dict[str, Any]: """Given an xml content as string, returns a deposit dict. """ @@ -431,6 +443,72 @@ return {key: data.get("swh:" + key) for key in keys} +class CollectionListDepositClient(BaseDepositClient): + """List a collection of deposits (owned by a user) + + """ + + def __init__(self, config=None, url=None, auth=None): + super().__init__( + url=url, + auth=auth, + config=config, + error_msg="List deposits failure at %s: %s", + empty_result={}, + ) + + def compute_url(self, collection, **kwargs): + return f"/{collection}/" + + def compute_method(self, *args, **kwargs): + return "get" + + def compute_params(self, **kwargs) -> Dict[str, Any]: + """Transmit pagination params if values provided are not None + (e.g. page, page_size) + + """ + return {k: v for k, v in kwargs.items() if v is not None} + + def parse_result_ok( + self, xml_content: str, headers: Optional[Dict] = None + ) -> Dict[str, Any]: + """Given an xml content as string, returns a deposit dict. + + """ + link_header = headers.get("Link", "") if headers else "" + links = parse_header_links(link_header) + data = parse_xml(xml_content)["atom:feed"] + total_result = data.get("swh:count", 0) + keys = [ + "id", + "reception_date", + "complete_date", + "external_id", + "swhid", + "status", + "status_detail", + "swhid_context", + "origin_url", + ] + entries_ = data.get("atom:entry", []) + entries = [entries_] if isinstance(entries_, dict) else entries_ + deposits_d = [ + { + key: deposit.get(f"swh:{key}") + for key in keys + if deposit.get(f"swh:{key}") is not None + } + for deposit in entries + ] + + return { + "count": total_result, + "deposits": deposits_d, + **{entry["rel"]: entry["url"] for entry in links}, + } + + class BaseCreateDepositClient(BaseDepositClient): """Deposit client base class to post new deposit. @@ -451,7 +529,9 @@ def compute_method(self, *args, **kwargs): return "post" - def parse_result_ok(self, xml_content: str) -> Dict[str, Any]: + def parse_result_ok( + self, xml_content: str, headers: Optional[Dict] = None + ) -> Dict[str, Any]: """Given an xml content as string, returns a deposit dict. """ @@ -467,7 +547,7 @@ def compute_headers(self, info: Dict[str, Any]) -> Dict[str, Any]: return info - def do_execute(self, method, url, info): + def do_execute(self, method, url, info, **kwargs): with open(info["filepath"], "rb") as f: return self.do(method, url, data=f, headers=info["headers"]) @@ -559,7 +639,9 @@ "filepath": kwargs["metadata_path"], } - def parse_result_ok(self, xml_content: str) -> Dict[str, Any]: + def parse_result_ok( + self, xml_content: str, headers: Optional[Dict] = None + ) -> Dict[str, Any]: """Given an xml content as string, returns a deposit dict. """ @@ -608,7 +690,7 @@ files, headers = self._multipart_info(info, info_meta) return {"files": files, "headers": headers} - def do_execute(self, method, url, info): + def do_execute(self, method, url, info, **kwargs): return self.do(method, url, files=info["files"], headers=info["headers"]) @@ -635,6 +717,17 @@ collection, deposit_id ) + def deposit_list( + self, + collection: str, + page: Optional[int] = None, + page_size: Optional[int] = None, + ): + """List deposits from the collection""" + return CollectionListDepositClient(url=self.base_url, auth=self.auth).execute( + collection, page=page, page_size=page_size + ) + def deposit_create( self, collection: str, diff --git a/swh/deposit/tests/data/atom/entry-list-deposits-page1.xml b/swh/deposit/tests/data/atom/entry-list-deposits-page1.xml new file mode 100644 --- /dev/null +++ b/swh/deposit/tests/data/atom/entry-list-deposits-page1.xml @@ -0,0 +1,18 @@ + + 3 + + 1031 + rejected + Deposit without archive + check-deposit-2020-10-09T13:10:00.000000 + + + 1032 + rejected + Deposit without archive + check-deposit-2020-10-10T13:20:00.000000 + + diff --git a/swh/deposit/tests/data/atom/entry-list-deposits-page2.xml b/swh/deposit/tests/data/atom/entry-list-deposits-page2.xml new file mode 100644 --- /dev/null +++ b/swh/deposit/tests/data/atom/entry-list-deposits-page2.xml @@ -0,0 +1,16 @@ + + 3 + + 1033 + 2020-10-08T13:50:30 + 2020-10-08T13:52:34.509655 + done + The deposit has been successfully loaded into the Software Heritage archive + swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea + swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea;origin=https://www.softwareheritage.org/check-deposit-2020-10-08T13:52:34.509655;visit=swh:1:snp:c477c6ef51833127b13a86ece7d75e5b3cc4e93d;anchor=swh:1:rev:f26f3960c175f15f6e24200171d446b86f6f7230;path=/ + check-deposit-2020-10-08T13:52:34.509655 + + diff --git a/swh/deposit/tests/data/atom/entry-list-deposits.xml b/swh/deposit/tests/data/atom/entry-list-deposits.xml new file mode 100644 --- /dev/null +++ b/swh/deposit/tests/data/atom/entry-list-deposits.xml @@ -0,0 +1,28 @@ + + 3 + + 1031 + rejected + Deposit without archive + check-deposit-2020-10-09T13:10:00.000000 + + + 1032 + rejected + Deposit without archive + check-deposit-2020-10-10T13:20:00.000000 + + + 1033 + 2020-10-08T13:50:30 + 2020-10-08T13:52:34.509655 + done + The deposit has been successfully loaded into the Software Heritage archive + swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea + swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea;origin=https://www.softwareheritage.org/check-deposit-2020-10-08T13:52:34.509655;visit=swh:1:snp:c477c6ef51833127b13a86ece7d75e5b3cc4e93d;anchor=swh:1:rev:f26f3960c175f15f6e24200171d446b86f6f7230;path=/ + check-deposit-2020-10-08T13:52:34.509655 + + diff --git a/swh/deposit/tests/test_client_module.py b/swh/deposit/tests/test_client_module.py --- a/swh/deposit/tests/test_client_module.py +++ b/swh/deposit/tests/test_client_module.py @@ -11,10 +11,13 @@ import pytest from swh.deposit.client import ( + CollectionListDepositClient, MaintenanceError, + PublicApiDepositClient, ServiceDocumentDepositClient, StatusDepositClient, ) +from swh.deposit.utils import to_header_link def test_client_read_data_ok(requests_mock_datadir): @@ -94,3 +97,119 @@ with pytest.raises(MaintenanceError, match="forbidden"): client.execute(collection, deposit_id) + + +EXPECTED_DEPOSIT = { + "id": "1031", + "external_id": "check-deposit-2020-10-09T13:10:00.000000", + "status": "rejected", + "status_detail": "Deposit without archive", +} + +EXPECTED_DEPOSIT2 = { + "id": "1032", + "external_id": "check-deposit-2020-10-10T13:20:00.000000", + "status": "rejected", + "status_detail": "Deposit without archive", +} + +EXPECTED_DEPOSIT3 = { + "id": "1033", + "external_id": "check-deposit-2020-10-08T13:52:34.509655", + "status": "done", + "status_detail": ( + "The deposit has been successfully loaded into the Software " "Heritage archive" + ), + "reception_date": "2020-10-08T13:50:30", + "complete_date": "2020-10-08T13:52:34.509655", + "swhid": "swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea", + "swhid_context": "swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea;origin=https://www.softwareheritage.org/check-deposit-2020-10-08T13:52:34.509655;visit=swh:1:snp:c477c6ef51833127b13a86ece7d75e5b3cc4e93d;anchor=swh:1:rev:f26f3960c175f15f6e24200171d446b86f6f7230;path=/", # noqa +} + + +def test_client_collection_list(requests_mock, atom_dataset): + collection_list_xml = atom_dataset["entry-list-deposits"] + base_url = "https://deposit.test.list/1" + collection = "test" + url = f"{base_url}/{collection}/" + requests_mock.get(url, status_code=200, text=collection_list_xml) + expected_result = { + "count": "3", + "deposits": [EXPECTED_DEPOSIT, EXPECTED_DEPOSIT2, EXPECTED_DEPOSIT3], + } + + # use dedicated client + client = CollectionListDepositClient(url=base_url, auth=("test", "test")) + + # no pagination + result = client.execute(collection) + + assert result == expected_result + + # The main public client should work the same way + client2 = PublicApiDepositClient(url=base_url, auth=("test", "test")) + result2 = client2.deposit_list(collection) + + assert result2 == expected_result + + assert requests_mock.called + request_history = [m.url for m in requests_mock.request_history] + assert request_history == [url] * 2 + + +def test_client_collection_list_with_pagination_headers(requests_mock, atom_dataset): + collection_list_xml_page1 = atom_dataset["entry-list-deposits-page1"] + collection_list_xml_page2 = atom_dataset["entry-list-deposits-page2"] + base_url = "https://deposit.test.list/1" + collection = "test" + url = f"{base_url}/{collection}/" + page1 = 1 + page2 = 2 + page_size = 10 + url_page1 = f"{url}?page={page1}" + url_page2 = f"{url}?page={page2}&page_size={page_size}" + requests_mock.get( + url_page1, + status_code=200, + text=collection_list_xml_page1, + headers={"Link": to_header_link(url_page2, "next"),}, + ) + requests_mock.get( + url_page2, + status_code=200, + text=collection_list_xml_page2, + headers={"Link": to_header_link(url_page1, "previous"),}, + ) + + expected_result_page1 = { + "count": "3", + "deposits": [EXPECTED_DEPOSIT, EXPECTED_DEPOSIT2], + "next": url_page2, + } + expected_result_page2 = { + "count": "3", + "deposits": [EXPECTED_DEPOSIT3], + "previous": url_page1, + } + + client = CollectionListDepositClient( + url="https://deposit.test.list/1", auth=("test", "test") + ) + client2 = PublicApiDepositClient(url=base_url, auth=("test", "test")) + + result = client.execute(collection, page=page1) + assert result == expected_result_page1 + + result2 = client.execute(collection, page=page2, page_size=page_size) + assert result2 == expected_result_page2 + + # The main public client should work the same way + result = client2.deposit_list(collection, page=page1) + assert result == expected_result_page1 + + result2 = client2.deposit_list(collection, page=page2, page_size=page_size) + assert result2 == expected_result_page2 + + assert requests_mock.called + request_history = [m.url for m in requests_mock.request_history] + assert request_history == [url_page1, url_page2] * 2 diff --git a/swh/deposit/utils.py b/swh/deposit/utils.py --- a/swh/deposit/utils.py +++ b/swh/deposit/utils.py @@ -238,3 +238,16 @@ """Used to get the target of a metadata object from a , as the latter uses a QualifiedSWHID.""" return ExtendedSWHID.from_string(str(swhid).split(";")[0]) + + +def to_header_link(link: str, link_name: str) -> str: + """Build a single header link. + + >>> link_next = to_header_link("next-url", "next") + >>> link_next + '; rel="next"' + >>> ','.join([link_next, to_header_link("prev-url", "prev")]) + '; rel="next",; rel="prev"' + + """ + return f'<{link}>; rel="{link_name}"'