Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7124810
D3982.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
20 KB
Subscribers
None
D3982.diff
View Options
diff --git a/docs/README.rst b/docs/README.rst
--- a/docs/README.rst
+++ b/docs/README.rst
@@ -12,7 +12,7 @@
from swh.web.client.client import WebAPIClient
cli = WebAPIClient()
- # retrieve any archived object via its PID
+ # retrieve any archived object via its SWHID
cli.get('swh:1:rev:aafb16d69fd30ff58afdd69036a26047f3aebdc6')
# same, but for specific object types
diff --git a/swh/web/client/client.py b/swh/web/client/client.py
--- a/swh/web/client/client.py
+++ b/swh/web/client/client.py
@@ -13,7 +13,7 @@
from swh.web.client.client import WebAPIClient
cli = WebAPIClient()
- # retrieve any archived object via its PID
+ # retrieve any archived object via its SWHID
cli.get('swh:1:rev:aafb16d69fd30ff58afdd69036a26047f3aebdc6')
# same, but for specific object types
@@ -28,27 +28,33 @@
"""
-from typing import Any, Callable, Dict, Generator, List, Optional, Union
+from typing import Any, Callable, Dict, Iterator, List, Optional, Union
from urllib.parse import urlparse
import dateutil.parser
import requests
-from swh.model.identifiers import CONTENT, DIRECTORY, RELEASE, REVISION, SNAPSHOT
-from swh.model.identifiers import PersistentId as PID
-from swh.model.identifiers import parse_persistent_identifier as parse_pid
+from swh.model.identifiers import (
+ CONTENT,
+ DIRECTORY,
+ RELEASE,
+ REVISION,
+ SNAPSHOT,
+ SWHID,
+ parse_swhid,
+)
-PIDish = Union[PID, str]
+SWHIDish = Union[SWHID, str]
ORIGIN_VISIT = "origin_visit"
-def _get_pid(pidish: PIDish) -> PID:
- """Parse string to PID if needed"""
- if isinstance(pidish, str):
- return parse_pid(pidish)
+def _get_swhid(swhidish: SWHIDish) -> SWHID:
+ """Parse string to SWHID if needed"""
+ if isinstance(swhidish, str):
+ return parse_swhid(swhidish)
else:
- return pidish
+ return swhidish
def typify(data: Any, obj_type: str) -> Any:
@@ -56,13 +62,13 @@
The following conversions are performed:
- - identifiers are converted from strings to PersistentId instances
+ - identifiers are converted from strings to SWHID instances
- timestamps are converted from strings to datetime.datetime objects
"""
- def to_pid(object_type, s):
- return PID(object_type=object_type, object_id=s)
+ def to_swhid(object_type, s):
+ return SWHID(object_type=object_type, object_id=s)
def to_date(s):
return dateutil.parser.parse(s)
@@ -80,25 +86,25 @@
if obj_type == SNAPSHOT:
for name, target in data.items():
if target["target_type"] != "alias":
- # alias targets do not point to objects via PIDs; others do
- target["target"] = to_pid(target["target_type"], target["target"])
+ # alias targets do not point to objects via SWHIDs; others do
+ target["target"] = to_swhid(target["target_type"], target["target"])
elif obj_type == REVISION:
- data["id"] = to_pid(obj_type, data["id"])
- data["directory"] = to_pid(DIRECTORY, data["directory"])
+ data["id"] = to_swhid(obj_type, data["id"])
+ data["directory"] = to_swhid(DIRECTORY, data["directory"])
for key in ("date", "committer_date"):
data[key] = to_date(data[key])
for parent in data["parents"]:
- parent["id"] = to_pid(REVISION, parent["id"])
+ parent["id"] = to_swhid(REVISION, parent["id"])
elif obj_type == RELEASE:
- data["id"] = to_pid(obj_type, data["id"])
+ data["id"] = to_swhid(obj_type, data["id"])
data["date"] = to_date(data["date"])
- data["target"] = to_pid(data["target_type"], data["target"])
+ data["target"] = to_swhid(data["target_type"], data["target"])
elif obj_type == DIRECTORY:
- dir_pid = None
+ dir_swhid = None
for entry in data:
- dir_pid = dir_pid or to_pid(obj_type, entry["dir_id"])
- entry["dir_id"] = dir_pid
- entry["target"] = to_pid(
+ dir_swhid = dir_swhid or to_swhid(obj_type, entry["dir_id"])
+ entry["dir_id"] = dir_swhid
+ entry["target"] = to_swhid(
obj_type_of_entry_type(entry["type"]), entry["target"]
)
elif obj_type == CONTENT:
@@ -106,7 +112,7 @@
elif obj_type == ORIGIN_VISIT:
data["date"] = to_date(data["date"])
if data["snapshot"] is not None:
- data["snapshot"] = to_pid(SNAPSHOT, data["snapshot"])
+ data["snapshot"] = to_swhid(SNAPSHOT, data["snapshot"])
else:
raise ValueError(f"invalid object type: {obj_type}")
@@ -141,7 +147,7 @@
self.api_path = u.path
self.bearer_token = bearer_token
- self._getters: Dict[str, Callable[[PIDish], Any]] = {
+ self._getters: Dict[str, Callable[[SWHIDish], Any]] = {
CONTENT: self.content,
DIRECTORY: self.directory,
RELEASE: self.release,
@@ -184,18 +190,18 @@
return r
- def _get_snapshot(self, pid: PIDish) -> Dict[str, Any]:
+ def _get_snapshot(self, swhid: SWHIDish) -> Dict[str, Any]:
"""Analogous to self.snapshot(), but zipping through partial snapshots,
merging them together before returning
"""
snapshot = {}
- for snp in self.snapshot(pid):
+ for snp in self.snapshot(swhid):
snapshot.update(snp)
return snapshot
- def get(self, pid: PIDish, **req_args) -> Any:
+ def get(self, swhid: SWHIDish, **req_args) -> Any:
"""Retrieve information about an object of any kind
Dispatcher method over the more specific methods content(),
@@ -207,35 +213,35 @@
"""
- pid_ = _get_pid(pid)
- return self._getters[pid_.object_type](pid_)
+ swhid_ = _get_swhid(swhid)
+ return self._getters[swhid_.object_type](swhid_)
- def iter(self, pid: PIDish, **req_args) -> Generator[Dict[str, Any], None, None]:
+ def iter(self, swhid: SWHIDish, **req_args) -> Iterator[Dict[str, Any]]:
"""Stream over the information about an object of any kind
Streaming variant of get()
"""
- pid_ = _get_pid(pid)
- obj_type = pid_.object_type
+ swhid_ = _get_swhid(swhid)
+ obj_type = swhid_.object_type
if obj_type == SNAPSHOT:
- yield from self.snapshot(pid_)
+ yield from self.snapshot(swhid_)
elif obj_type == REVISION:
- yield from [self.revision(pid_)]
+ yield from [self.revision(swhid_)]
elif obj_type == RELEASE:
- yield from [self.release(pid_)]
+ yield from [self.release(swhid_)]
elif obj_type == DIRECTORY:
- yield from self.directory(pid_)
+ yield from self.directory(swhid_)
elif obj_type == CONTENT:
- yield from [self.content(pid_)]
+ yield from [self.content(swhid_)]
else:
raise ValueError(f"invalid object type: {obj_type}")
- def content(self, pid: PIDish, **req_args) -> Dict[str, Any]:
+ def content(self, swhid: SWHIDish, **req_args) -> Dict[str, Any]:
"""Retrieve information about a content object
Args:
- pid: object identifier
+ swhid: object persistent identifier
req_args: extra keyword arguments for requests.get()
Raises:
@@ -244,16 +250,16 @@
"""
return typify(
self._call(
- f"content/sha1_git:{_get_pid(pid).object_id}/", **req_args
+ f"content/sha1_git:{_get_swhid(swhid).object_id}/", **req_args
).json(),
CONTENT,
)
- def directory(self, pid: PIDish, **req_args) -> List[Dict[str, Any]]:
+ def directory(self, swhid: SWHIDish, **req_args) -> List[Dict[str, Any]]:
"""Retrieve information about a directory object
Args:
- pid: object identifier
+ swhid: object persistent identifier
req_args: extra keyword arguments for requests.get()
Raises:
@@ -261,15 +267,15 @@
"""
return typify(
- self._call(f"directory/{_get_pid(pid).object_id}/", **req_args).json(),
+ self._call(f"directory/{_get_swhid(swhid).object_id}/", **req_args).json(),
DIRECTORY,
)
- def revision(self, pid: PIDish, **req_args) -> Dict[str, Any]:
+ def revision(self, swhid: SWHIDish, **req_args) -> Dict[str, Any]:
"""Retrieve information about a revision object
Args:
- pid: object identifier
+ swhid: object persistent identifier
req_args: extra keyword arguments for requests.get()
Raises:
@@ -277,15 +283,15 @@
"""
return typify(
- self._call(f"revision/{_get_pid(pid).object_id}/", **req_args).json(),
+ self._call(f"revision/{_get_swhid(swhid).object_id}/", **req_args).json(),
REVISION,
)
- def release(self, pid: PIDish, **req_args) -> Dict[str, Any]:
+ def release(self, swhid: SWHIDish, **req_args) -> Dict[str, Any]:
"""Retrieve information about a release object
Args:
- pid: object identifier
+ swhid: object persistent identifier
req_args: extra keyword arguments for requests.get()
Raises:
@@ -293,17 +299,15 @@
"""
return typify(
- self._call(f"release/{_get_pid(pid).object_id}/", **req_args).json(),
+ self._call(f"release/{_get_swhid(swhid).object_id}/", **req_args).json(),
RELEASE,
)
- def snapshot(
- self, pid: PIDish, **req_args
- ) -> Generator[Dict[str, Any], None, None]:
+ def snapshot(self, swhid: SWHIDish, **req_args) -> Iterator[Dict[str, Any]]:
"""Retrieve information about a snapshot object
Args:
- pid: object identifier
+ swhid: object persistent identifier
req_args: extra keyword arguments for requests.get()
Returns:
@@ -317,7 +321,7 @@
"""
done = False
r = None
- query = f"snapshot/{_get_pid(pid).object_id}/"
+ query = f"snapshot/{_get_swhid(swhid).object_id}/"
while not done:
r = self._call(query, http_method="get", **req_args)
@@ -333,7 +337,7 @@
per_page: Optional[int] = None,
last_visit: Optional[int] = None,
**req_args,
- ) -> Generator[Dict[str, Any], None, None]:
+ ) -> Iterator[Dict[str, Any]]:
"""List visits of an origin
Args:
@@ -369,11 +373,11 @@
else:
done = True
- def content_exists(self, pid: PIDish, **req_args) -> bool:
+ def content_exists(self, swhid: SWHIDish, **req_args) -> bool:
"""Check if a content object exists in the archive
Args:
- pid: object identifier
+ swhid: object persistent identifier
req_args: extra keyword arguments for requests.head()
Raises:
@@ -382,17 +386,17 @@
"""
return bool(
self._call(
- f"content/sha1_git:{_get_pid(pid).object_id}/",
+ f"content/sha1_git:{_get_swhid(swhid).object_id}/",
http_method="head",
**req_args,
)
)
- def directory_exists(self, pid: PIDish, **req_args) -> bool:
+ def directory_exists(self, swhid: SWHIDish, **req_args) -> bool:
"""Check if a directory object exists in the archive
Args:
- pid: object identifier
+ swhid: object persistent identifier
req_args: extra keyword arguments for requests.head()
Raises:
@@ -401,15 +405,17 @@
"""
return bool(
self._call(
- f"directory/{_get_pid(pid).object_id}/", http_method="head", **req_args
+ f"directory/{_get_swhid(swhid).object_id}/",
+ http_method="head",
+ **req_args,
)
)
- def revision_exists(self, pid: PIDish, **req_args) -> bool:
+ def revision_exists(self, swhid: SWHIDish, **req_args) -> bool:
"""Check if a revision object exists in the archive
Args:
- pid: object identifier
+ swhid: object persistent identifier
req_args: extra keyword arguments for requests.head()
Raises:
@@ -418,15 +424,17 @@
"""
return bool(
self._call(
- f"revision/{_get_pid(pid).object_id}/", http_method="head", **req_args
+ f"revision/{_get_swhid(swhid).object_id}/",
+ http_method="head",
+ **req_args,
)
)
- def release_exists(self, pid: PIDish, **req_args) -> bool:
+ def release_exists(self, swhid: SWHIDish, **req_args) -> bool:
"""Check if a release object exists in the archive
Args:
- pid: object identifier
+ swhid: object persistent identifier
req_args: extra keyword arguments for requests.head()
Raises:
@@ -435,15 +443,17 @@
"""
return bool(
self._call(
- f"release/{_get_pid(pid).object_id}/", http_method="head", **req_args
+ f"release/{_get_swhid(swhid).object_id}/",
+ http_method="head",
+ **req_args,
)
)
- def snapshot_exists(self, pid: PIDish, **req_args) -> bool:
+ def snapshot_exists(self, swhid: SWHIDish, **req_args) -> bool:
"""Check if a snapshot object exists in the archive
Args:
- pid: object identifier
+ swhid: object persistent identifier
req_args: extra keyword arguments for requests.head()
Raises:
@@ -452,15 +462,17 @@
"""
return bool(
self._call(
- f"snapshot/{_get_pid(pid).object_id}/", http_method="head", **req_args
+ f"snapshot/{_get_swhid(swhid).object_id}/",
+ http_method="head",
+ **req_args,
)
)
- def content_raw(self, pid: PIDish, **req_args) -> Generator[bytes, None, None]:
+ def content_raw(self, swhid: SWHIDish, **req_args) -> Iterator[bytes]:
"""Iterate over the raw content of a content object
Args:
- pid: object identifier
+ swhid: object persistent identifier
req_args: extra keyword arguments for requests.get()
Raises:
@@ -468,7 +480,9 @@
"""
r = self._call(
- f"content/sha1_git:{_get_pid(pid).object_id}/raw/", stream=True, **req_args
+ f"content/sha1_git:{_get_swhid(swhid).object_id}/raw/",
+ stream=True,
+ **req_args,
)
r.raise_for_status()
diff --git a/swh/web/client/tests/test_web_api_client.py b/swh/web/client/tests/test_web_api_client.py
--- a/swh/web/client/tests/test_web_api_client.py
+++ b/swh/web/client/tests/test_web_api_client.py
@@ -5,62 +5,62 @@
from dateutil.parser import parse as parse_date
-from swh.model.identifiers import parse_persistent_identifier as parse_pid
+from swh.model.identifiers import parse_swhid
def test_get_content(web_api_client, web_api_mock):
- pid = parse_pid("swh:1:cnt:fe95a46679d128ff167b7c55df5d02356c5a1ae1")
- obj = web_api_client.get(pid)
+ swhid = parse_swhid("swh:1:cnt:fe95a46679d128ff167b7c55df5d02356c5a1ae1")
+ obj = web_api_client.get(swhid)
assert obj["length"] == 151810
for key in ("length", "status", "checksums", "data_url"):
assert key in obj
- assert obj["checksums"]["sha1_git"] == str(pid).split(":")[3]
+ assert obj["checksums"]["sha1_git"] == str(swhid).split(":")[3]
assert obj["checksums"]["sha1"] == "dc2830a9e72f23c1dfebef4413003221baa5fb62"
- assert obj == web_api_client.content(pid)
+ assert obj == web_api_client.content(swhid)
def test_get_directory(web_api_client, web_api_mock):
- pid = parse_pid("swh:1:dir:977fc4b98c0e85816348cebd3b12026407c368b6")
- obj = web_api_client.get(pid)
+ swhid = parse_swhid("swh:1:dir:977fc4b98c0e85816348cebd3b12026407c368b6")
+ obj = web_api_client.get(swhid)
assert len(obj) == 35 # number of directory entries
- assert all(map(lambda entry: entry["dir_id"] == pid, obj))
+ assert all(map(lambda entry: entry["dir_id"] == swhid, obj))
dir_entry = obj[0]
assert dir_entry["type"] == "file"
- assert dir_entry["target"] == parse_pid(
+ assert dir_entry["target"] == parse_swhid(
"swh:1:cnt:58471109208922c9ee8c4b06135725f03ed16814"
)
assert dir_entry["name"] == ".bzrignore"
assert dir_entry["length"] == 582
- assert obj == web_api_client.directory(pid)
+ assert obj == web_api_client.directory(swhid)
def test_get_release(web_api_client, web_api_mock):
- pid = parse_pid("swh:1:rel:b9db10d00835e9a43e2eebef2db1d04d4ae82342")
- obj = web_api_client.get(pid)
+ swhid = parse_swhid("swh:1:rel:b9db10d00835e9a43e2eebef2db1d04d4ae82342")
+ obj = web_api_client.get(swhid)
- assert obj["id"] == pid
+ assert obj["id"] == swhid
assert obj["author"]["fullname"] == "Paul Tagliamonte <tag@pault.ag>"
assert obj["author"]["name"] == "Paul Tagliamonte"
assert obj["date"] == parse_date("2013-07-06T19:34:11-04:00")
assert obj["name"] == "0.9.9"
assert obj["target_type"] == "revision"
- assert obj["target"] == parse_pid(
+ assert obj["target"] == parse_swhid(
"swh:1:rev:e005cb773c769436709ca6a1d625dc784dbc1636"
)
assert not obj["synthetic"]
- assert obj == web_api_client.release(pid)
+ assert obj == web_api_client.release(swhid)
def test_get_revision(web_api_client, web_api_mock):
- pid = parse_pid("swh:1:rev:aafb16d69fd30ff58afdd69036a26047f3aebdc6")
- obj = web_api_client.get(pid)
+ swhid = parse_swhid("swh:1:rev:aafb16d69fd30ff58afdd69036a26047f3aebdc6")
+ obj = web_api_client.get(swhid)
- assert obj["id"] == pid
+ assert obj["id"] == swhid
for role in ("author", "committer"):
assert (
obj[role]["fullname"] == "Nicolas Dandrimont <nicolas.dandrimont@crans.org>"
@@ -72,36 +72,36 @@
assert obj["message"].startswith("Merge branch")
assert obj["merge"]
assert len(obj["parents"]) == 2
- assert obj["parents"][0]["id"] == parse_pid(
+ assert obj["parents"][0]["id"] == parse_swhid(
"swh:1:rev:26307d261279861c2d9c9eca3bb38519f951bea4"
)
- assert obj["parents"][1]["id"] == parse_pid(
+ assert obj["parents"][1]["id"] == parse_swhid(
"swh:1:rev:37fc9e08d0c4b71807a4f1ecb06112e78d91c283"
)
- assert obj == web_api_client.revision(pid)
+ assert obj == web_api_client.revision(swhid)
def test_get_snapshot(web_api_client, web_api_mock):
# small snapshot, the one from Web API doc
- pid = parse_pid("swh:1:snp:6a3a2cf0b2b90ce7ae1cf0a221ed68035b686f5a")
- obj = web_api_client.get(pid)
+ swhid = parse_swhid("swh:1:snp:6a3a2cf0b2b90ce7ae1cf0a221ed68035b686f5a")
+ obj = web_api_client.get(swhid)
assert len(obj) == 4
assert obj["refs/heads/master"]["target_type"] == "revision"
- assert obj["refs/heads/master"]["target"] == parse_pid(
+ assert obj["refs/heads/master"]["target"] == parse_swhid(
"swh:1:rev:83c20a6a63a7ebc1a549d367bc07a61b926cecf3"
)
assert obj["refs/tags/dpkt-1.7"]["target_type"] == "revision"
- assert obj["refs/tags/dpkt-1.7"]["target"] == parse_pid(
+ assert obj["refs/tags/dpkt-1.7"]["target"] == parse_swhid(
"swh:1:rev:0c9dbfbc0974ec8ac1d8253aa1092366a03633a8"
)
def test_iter_snapshot(web_api_client, web_api_mock):
# large snapshot from the Linux kernel, usually spanning two pages
- pid = parse_pid("swh:1:snp:cabcc7d7bf639bbe1cc3b41989e1806618dd5764")
- obj = web_api_client.snapshot(pid)
+ swhid = parse_swhid("swh:1:snp:cabcc7d7bf639bbe1cc3b41989e1806618dd5764")
+ obj = web_api_client.snapshot(swhid)
snp = {}
for partial in obj:
@@ -119,8 +119,8 @@
web_api_client.bearer_token = refresh_token
- pid = parse_pid(f"swh:1:rel:{rel_id}")
- web_api_client.get(pid)
+ swhid = parse_swhid(f"swh:1:rel:{rel_id}")
+ web_api_client.get(swhid)
sent_request = web_api_mock._adapter.last_request
@@ -141,5 +141,5 @@
assert visits[0]["date"] == timestamp
assert visits[0]["snapshot"] is None
- snapshot_pid = "swh:1:snp:456550ea74af4e2eecaa406629efaaf0b9b5f976"
- assert visits[7]["snapshot"] == parse_pid(snapshot_pid)
+ snapshot_swhid = "swh:1:snp:456550ea74af4e2eecaa406629efaaf0b9b5f976"
+ assert visits[7]["snapshot"] == parse_swhid(snapshot_swhid)
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Dec 21 2024, 8:13 PM (11 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3216048
Attached To
D3982: Replace deprecated PersistentID class with SWHID
Event Timeline
Log In to Comment