Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7123038
D5769.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
16 KB
Subscribers
None
D5769.diff
View Options
diff --git a/swh/deposit/client.py b/swh/deposit/client.py
--- a/swh/deposit/client.py
+++ b/swh/deposit/client.py
@@ -16,6 +16,7 @@
import requests
from requests import Response
+from requests.utils import parse_header_links
from swh.core.config import load_from_envvar
from swh.deposit import __version__ as swh_deposit_version
@@ -273,7 +274,9 @@
"""Http method to use on the url"""
raise NotImplementedError
- def parse_result_ok(self, xml_content: str) -> Dict[str, Any]:
+ def parse_result_ok(
+ self, xml_content: str, headers: Optional[Dict] = None
+ ) -> Dict[str, Any]:
"""Given an xml result from the api endpoint, parse it and returns a
dict.
@@ -305,15 +308,18 @@
"sword:verboseDescription": sword_error.get("sword:verboseDescription", ""),
}
- def do_execute(self, method: str, url: str, info: Dict) -> Response:
+ def do_execute(self, method: str, url: str, info: Dict, **kwargs) -> Response:
"""Execute the http query to url using method and info information.
- By default, execute a simple query to url with the http
- method. Override this in daughter class to improve the
- default behavior if needed.
+ By default, execute a simple query to url with the http method. Override this in
+ subclass to improve the default behavior if needed.
"""
- return self.do(method, url)
+ return self.do(method, url, **kwargs)
+
+ def compute_params(self, **kwargs) -> Dict[str, Any]:
+ """Determine the params out of the kwargs"""
+ return {}
def execute(self, *args, **kwargs) -> Dict[str, Any]:
"""Main endpoint to prepare and execute the http query to the api.
@@ -328,9 +334,10 @@
url = self.compute_url(*args, **kwargs)
method = self.compute_method(*args, **kwargs)
info = self.compute_information(*args, **kwargs)
+ params = self.compute_params(**kwargs)
try:
- response = self.do_execute(method, url, info)
+ response = self.do_execute(method, url, info, params=params)
except Exception as e:
msg = self.error_msg % (url, e)
result = self.empty_result
@@ -343,7 +350,8 @@
if int(response.status_code) == 204: # 204 returns no body
return {"status": response.status_code}
else:
- return self.parse_result_ok(response.text)
+ headers = dict(response.headers) if response.headers else None
+ return self.parse_result_ok(response.text, headers)
else:
error = self.parse_result_error(response.text)
empty = self.empty_result
@@ -380,7 +388,9 @@
def compute_method(self, *args, **kwargs):
return "get"
- def parse_result_ok(self, xml_content: str) -> Dict[str, Any]:
+ def parse_result_ok(
+ self, xml_content: str, headers: Optional[Dict] = None
+ ) -> Dict[str, Any]:
"""Parse service document's success response.
"""
@@ -415,7 +425,9 @@
def compute_method(self, *args, **kwargs):
return "get"
- def parse_result_ok(self, xml_content: str) -> Dict[str, Any]:
+ def parse_result_ok(
+ self, xml_content: str, headers: Optional[Dict] = None
+ ) -> Dict[str, Any]:
"""Given an xml content as string, returns a deposit dict.
"""
@@ -431,6 +443,72 @@
return {key: data.get("swh:" + key) for key in keys}
+class CollectionListDepositClient(BaseDepositClient):
+ """List a collection of deposits (owned by a user)
+
+ """
+
+ def __init__(self, config=None, url=None, auth=None):
+ super().__init__(
+ url=url,
+ auth=auth,
+ config=config,
+ error_msg="List deposits failure at %s: %s",
+ empty_result={},
+ )
+
+ def compute_url(self, collection, **kwargs):
+ return f"/{collection}/"
+
+ def compute_method(self, *args, **kwargs):
+ return "get"
+
+ def compute_params(self, **kwargs) -> Dict[str, Any]:
+ """Transmit pagination params if values provided are not None
+ (e.g. page, page_size)
+
+ """
+ return {k: v for k, v in kwargs.items() if v is not None}
+
+ def parse_result_ok(
+ self, xml_content: str, headers: Optional[Dict] = None
+ ) -> Dict[str, Any]:
+ """Given an xml content as string, returns a deposit dict.
+
+ """
+ link_header = headers.get("Link", "") if headers else ""
+ links = parse_header_links(link_header)
+ data = parse_xml(xml_content)["atom:feed"]
+ total_result = data.get("swh:count", 0)
+ keys = [
+ "id",
+ "reception_date",
+ "complete_date",
+ "external_id",
+ "swhid",
+ "status",
+ "status_detail",
+ "swhid_context",
+ "origin_url",
+ ]
+ entries_ = data.get("atom:entry", [])
+ entries = [entries_] if isinstance(entries_, dict) else entries_
+ deposits_d = [
+ {
+ key: deposit.get(f"swh:{key}")
+ for key in keys
+ if deposit.get(f"swh:{key}") is not None
+ }
+ for deposit in entries
+ ]
+
+ return {
+ "count": total_result,
+ "deposits": deposits_d,
+ **{entry["rel"]: entry["url"] for entry in links},
+ }
+
+
class BaseCreateDepositClient(BaseDepositClient):
"""Deposit client base class to post new deposit.
@@ -451,7 +529,9 @@
def compute_method(self, *args, **kwargs):
return "post"
- def parse_result_ok(self, xml_content: str) -> Dict[str, Any]:
+ def parse_result_ok(
+ self, xml_content: str, headers: Optional[Dict] = None
+ ) -> Dict[str, Any]:
"""Given an xml content as string, returns a deposit dict.
"""
@@ -467,7 +547,7 @@
def compute_headers(self, info: Dict[str, Any]) -> Dict[str, Any]:
return info
- def do_execute(self, method, url, info):
+ def do_execute(self, method, url, info, **kwargs):
with open(info["filepath"], "rb") as f:
return self.do(method, url, data=f, headers=info["headers"])
@@ -559,7 +639,9 @@
"filepath": kwargs["metadata_path"],
}
- def parse_result_ok(self, xml_content: str) -> Dict[str, Any]:
+ def parse_result_ok(
+ self, xml_content: str, headers: Optional[Dict] = None
+ ) -> Dict[str, Any]:
"""Given an xml content as string, returns a deposit dict.
"""
@@ -608,7 +690,7 @@
files, headers = self._multipart_info(info, info_meta)
return {"files": files, "headers": headers}
- def do_execute(self, method, url, info):
+ def do_execute(self, method, url, info, **kwargs):
return self.do(method, url, files=info["files"], headers=info["headers"])
@@ -635,6 +717,17 @@
collection, deposit_id
)
+ def deposit_list(
+ self,
+ collection: str,
+ page: Optional[int] = None,
+ page_size: Optional[int] = None,
+ ):
+ """List deposits from the collection"""
+ return CollectionListDepositClient(url=self.base_url, auth=self.auth).execute(
+ collection, page=page, page_size=page_size
+ )
+
def deposit_create(
self,
collection: str,
diff --git a/swh/deposit/tests/data/atom/entry-list-deposits-page1.xml b/swh/deposit/tests/data/atom/entry-list-deposits-page1.xml
new file mode 100644
--- /dev/null
+++ b/swh/deposit/tests/data/atom/entry-list-deposits-page1.xml
@@ -0,0 +1,18 @@
+<feed xmlns="http://www.w3.org/2005/Atom"
+ xmlns:sword="http://purl.org/net/sword/terms/"
+ xmlns:dcterms="http://purl.org/dc/terms/"
+ xmlns:sd="https://www.softwareheritage.org/schema/2018/deposit">
+ <sd:count>3</sd:count>
+ <entry>
+ <sd:id>1031</sd:id>
+ <sd:status>rejected</sd:status>
+ <sd:status_detail>Deposit without archive</sd:status_detail>
+ <sd:external_id>check-deposit-2020-10-09T13:10:00.000000</sd:external_id>
+ </entry>
+ <entry>
+ <sd:id>1032</sd:id>
+ <sd:status>rejected</sd:status>
+ <sd:status_detail>Deposit without archive</sd:status_detail>
+ <sd:external_id>check-deposit-2020-10-10T13:20:00.000000</sd:external_id>
+ </entry>
+</feed>
diff --git a/swh/deposit/tests/data/atom/entry-list-deposits-page2.xml b/swh/deposit/tests/data/atom/entry-list-deposits-page2.xml
new file mode 100644
--- /dev/null
+++ b/swh/deposit/tests/data/atom/entry-list-deposits-page2.xml
@@ -0,0 +1,16 @@
+<feed xmlns="http://www.w3.org/2005/Atom"
+ xmlns:sword="http://purl.org/net/sword/terms/"
+ xmlns:dcterms="http://purl.org/dc/terms/"
+ xmlns:sd="https://www.softwareheritage.org/schema/2018/deposit">
+ <sd:count>3</sd:count>
+ <entry>
+ <sd:id>1033</sd:id>
+ <sd:reception_date>2020-10-08T13:50:30</sd:reception_date>
+ <sd:complete_date>2020-10-08T13:52:34.509655</sd:complete_date>
+ <sd:status>done</sd:status>
+ <sd:status_detail>The deposit has been successfully loaded into the Software Heritage archive</sd:status_detail>
+ <sd:swhid>swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea</sd:swhid>
+ <sd:swhid_context>swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea;origin=https://www.softwareheritage.org/check-deposit-2020-10-08T13:52:34.509655;visit=swh:1:snp:c477c6ef51833127b13a86ece7d75e5b3cc4e93d;anchor=swh:1:rev:f26f3960c175f15f6e24200171d446b86f6f7230;path=/</sd:swhid_context>
+ <sd:external_id>check-deposit-2020-10-08T13:52:34.509655</sd:external_id>
+ </entry>
+</feed>
diff --git a/swh/deposit/tests/data/atom/entry-list-deposits.xml b/swh/deposit/tests/data/atom/entry-list-deposits.xml
new file mode 100644
--- /dev/null
+++ b/swh/deposit/tests/data/atom/entry-list-deposits.xml
@@ -0,0 +1,28 @@
+<feed xmlns="http://www.w3.org/2005/Atom"
+ xmlns:sword="http://purl.org/net/sword/terms/"
+ xmlns:dcterms="http://purl.org/dc/terms/"
+ xmlns:sd="https://www.softwareheritage.org/schema/2018/deposit">
+ <sd:count>3</sd:count>
+ <entry>
+ <sd:id>1031</sd:id>
+ <sd:status>rejected</sd:status>
+ <sd:status_detail>Deposit without archive</sd:status_detail>
+ <sd:external_id>check-deposit-2020-10-09T13:10:00.000000</sd:external_id>
+ </entry>
+ <entry>
+ <sd:id>1032</sd:id>
+ <sd:status>rejected</sd:status>
+ <sd:status_detail>Deposit without archive</sd:status_detail>
+ <sd:external_id>check-deposit-2020-10-10T13:20:00.000000</sd:external_id>
+ </entry>
+ <entry>
+ <sd:id>1033</sd:id>
+ <sd:reception_date>2020-10-08T13:50:30</sd:reception_date>
+ <sd:complete_date>2020-10-08T13:52:34.509655</sd:complete_date>
+ <sd:status>done</sd:status>
+ <sd:status_detail>The deposit has been successfully loaded into the Software Heritage archive</sd:status_detail>
+ <sd:swhid>swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea</sd:swhid>
+ <sd:swhid_context>swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea;origin=https://www.softwareheritage.org/check-deposit-2020-10-08T13:52:34.509655;visit=swh:1:snp:c477c6ef51833127b13a86ece7d75e5b3cc4e93d;anchor=swh:1:rev:f26f3960c175f15f6e24200171d446b86f6f7230;path=/</sd:swhid_context>
+ <sd:external_id>check-deposit-2020-10-08T13:52:34.509655</sd:external_id>
+ </entry>
+</feed>
diff --git a/swh/deposit/tests/test_client_module.py b/swh/deposit/tests/test_client_module.py
--- a/swh/deposit/tests/test_client_module.py
+++ b/swh/deposit/tests/test_client_module.py
@@ -11,10 +11,13 @@
import pytest
from swh.deposit.client import (
+ CollectionListDepositClient,
MaintenanceError,
+ PublicApiDepositClient,
ServiceDocumentDepositClient,
StatusDepositClient,
)
+from swh.deposit.utils import to_header_link
def test_client_read_data_ok(requests_mock_datadir):
@@ -94,3 +97,119 @@
with pytest.raises(MaintenanceError, match="forbidden"):
client.execute(collection, deposit_id)
+
+
+EXPECTED_DEPOSIT = {
+ "id": "1031",
+ "external_id": "check-deposit-2020-10-09T13:10:00.000000",
+ "status": "rejected",
+ "status_detail": "Deposit without archive",
+}
+
+EXPECTED_DEPOSIT2 = {
+ "id": "1032",
+ "external_id": "check-deposit-2020-10-10T13:20:00.000000",
+ "status": "rejected",
+ "status_detail": "Deposit without archive",
+}
+
+EXPECTED_DEPOSIT3 = {
+ "id": "1033",
+ "external_id": "check-deposit-2020-10-08T13:52:34.509655",
+ "status": "done",
+ "status_detail": (
+ "The deposit has been successfully loaded into the Software " "Heritage archive"
+ ),
+ "reception_date": "2020-10-08T13:50:30",
+ "complete_date": "2020-10-08T13:52:34.509655",
+ "swhid": "swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea",
+ "swhid_context": "swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea;origin=https://www.softwareheritage.org/check-deposit-2020-10-08T13:52:34.509655;visit=swh:1:snp:c477c6ef51833127b13a86ece7d75e5b3cc4e93d;anchor=swh:1:rev:f26f3960c175f15f6e24200171d446b86f6f7230;path=/", # noqa
+}
+
+
+def test_client_collection_list(requests_mock, atom_dataset):
+ collection_list_xml = atom_dataset["entry-list-deposits"]
+ base_url = "https://deposit.test.list/1"
+ collection = "test"
+ url = f"{base_url}/{collection}/"
+ requests_mock.get(url, status_code=200, text=collection_list_xml)
+ expected_result = {
+ "count": "3",
+ "deposits": [EXPECTED_DEPOSIT, EXPECTED_DEPOSIT2, EXPECTED_DEPOSIT3],
+ }
+
+ # use dedicated client
+ client = CollectionListDepositClient(url=base_url, auth=("test", "test"))
+
+ # no pagination
+ result = client.execute(collection)
+
+ assert result == expected_result
+
+ # The main public client should work the same way
+ client2 = PublicApiDepositClient(url=base_url, auth=("test", "test"))
+ result2 = client2.deposit_list(collection)
+
+ assert result2 == expected_result
+
+ assert requests_mock.called
+ request_history = [m.url for m in requests_mock.request_history]
+ assert request_history == [url] * 2
+
+
+def test_client_collection_list_with_pagination_headers(requests_mock, atom_dataset):
+ collection_list_xml_page1 = atom_dataset["entry-list-deposits-page1"]
+ collection_list_xml_page2 = atom_dataset["entry-list-deposits-page2"]
+ base_url = "https://deposit.test.list/1"
+ collection = "test"
+ url = f"{base_url}/{collection}/"
+ page1 = 1
+ page2 = 2
+ page_size = 10
+ url_page1 = f"{url}?page={page1}"
+ url_page2 = f"{url}?page={page2}&page_size={page_size}"
+ requests_mock.get(
+ url_page1,
+ status_code=200,
+ text=collection_list_xml_page1,
+ headers={"Link": to_header_link(url_page2, "next"),},
+ )
+ requests_mock.get(
+ url_page2,
+ status_code=200,
+ text=collection_list_xml_page2,
+ headers={"Link": to_header_link(url_page1, "previous"),},
+ )
+
+ expected_result_page1 = {
+ "count": "3",
+ "deposits": [EXPECTED_DEPOSIT, EXPECTED_DEPOSIT2],
+ "next": url_page2,
+ }
+ expected_result_page2 = {
+ "count": "3",
+ "deposits": [EXPECTED_DEPOSIT3],
+ "previous": url_page1,
+ }
+
+ client = CollectionListDepositClient(
+ url="https://deposit.test.list/1", auth=("test", "test")
+ )
+ client2 = PublicApiDepositClient(url=base_url, auth=("test", "test"))
+
+ result = client.execute(collection, page=page1)
+ assert result == expected_result_page1
+
+ result2 = client.execute(collection, page=page2, page_size=page_size)
+ assert result2 == expected_result_page2
+
+ # The main public client should work the same way
+ result = client2.deposit_list(collection, page=page1)
+ assert result == expected_result_page1
+
+ result2 = client2.deposit_list(collection, page=page2, page_size=page_size)
+ assert result2 == expected_result_page2
+
+ assert requests_mock.called
+ request_history = [m.url for m in requests_mock.request_history]
+ assert request_history == [url_page1, url_page2] * 2
diff --git a/swh/deposit/utils.py b/swh/deposit/utils.py
--- a/swh/deposit/utils.py
+++ b/swh/deposit/utils.py
@@ -238,3 +238,16 @@
"""Used to get the target of a metadata object from a <swh:reference>,
as the latter uses a QualifiedSWHID."""
return ExtendedSWHID.from_string(str(swhid).split(";")[0])
+
+
+def to_header_link(link: str, link_name: str) -> str:
+ """Build a single header link.
+
+ >>> link_next = to_header_link("next-url", "next")
+ >>> link_next
+ '<next-url>; rel="next"'
+ >>> ','.join([link_next, to_header_link("prev-url", "prev")])
+ '<next-url>; rel="next",<prev-url>; rel="prev"'
+
+ """
+ return f'<{link}>; rel="{link_name}"'
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Tue, Dec 17, 5:03 PM (6 d, 22 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3218034
Attached To
D5769: client: Open a paginated list user deposits endpoint
Event Timeline
Log In to Comment