Page MenuHomeSoftware Heritage

D3982.diff
No OneTemporary

D3982.diff

diff --git a/docs/README.rst b/docs/README.rst
--- a/docs/README.rst
+++ b/docs/README.rst
@@ -12,7 +12,7 @@
from swh.web.client.client import WebAPIClient
cli = WebAPIClient()
- # retrieve any archived object via its PID
+ # retrieve any archived object via its SWHID
cli.get('swh:1:rev:aafb16d69fd30ff58afdd69036a26047f3aebdc6')
# same, but for specific object types
diff --git a/swh/web/client/client.py b/swh/web/client/client.py
--- a/swh/web/client/client.py
+++ b/swh/web/client/client.py
@@ -13,7 +13,7 @@
from swh.web.client.client import WebAPIClient
cli = WebAPIClient()
- # retrieve any archived object via its PID
+ # retrieve any archived object via its SWHID
cli.get('swh:1:rev:aafb16d69fd30ff58afdd69036a26047f3aebdc6')
# same, but for specific object types
@@ -28,27 +28,33 @@
"""
-from typing import Any, Callable, Dict, Generator, List, Optional, Union
+from typing import Any, Callable, Dict, Iterator, List, Optional, Union
from urllib.parse import urlparse
import dateutil.parser
import requests
-from swh.model.identifiers import CONTENT, DIRECTORY, RELEASE, REVISION, SNAPSHOT
-from swh.model.identifiers import PersistentId as PID
-from swh.model.identifiers import parse_persistent_identifier as parse_pid
+from swh.model.identifiers import (
+ CONTENT,
+ DIRECTORY,
+ RELEASE,
+ REVISION,
+ SNAPSHOT,
+ SWHID,
+ parse_swhid,
+)
-PIDish = Union[PID, str]
+SWHIDish = Union[SWHID, str]
ORIGIN_VISIT = "origin_visit"
-def _get_pid(pidish: PIDish) -> PID:
- """Parse string to PID if needed"""
- if isinstance(pidish, str):
- return parse_pid(pidish)
+def _get_swhid(swhidish: SWHIDish) -> SWHID:
+ """Parse string to SWHID if needed"""
+ if isinstance(swhidish, str):
+ return parse_swhid(swhidish)
else:
- return pidish
+ return swhidish
def typify(data: Any, obj_type: str) -> Any:
@@ -56,13 +62,13 @@
The following conversions are performed:
- - identifiers are converted from strings to PersistentId instances
+ - identifiers are converted from strings to SWHID instances
- timestamps are converted from strings to datetime.datetime objects
"""
- def to_pid(object_type, s):
- return PID(object_type=object_type, object_id=s)
+ def to_swhid(object_type, s):
+ return SWHID(object_type=object_type, object_id=s)
def to_date(s):
return dateutil.parser.parse(s)
@@ -80,25 +86,25 @@
if obj_type == SNAPSHOT:
for name, target in data.items():
if target["target_type"] != "alias":
- # alias targets do not point to objects via PIDs; others do
- target["target"] = to_pid(target["target_type"], target["target"])
+ # alias targets do not point to objects via SWHIDs; others do
+ target["target"] = to_swhid(target["target_type"], target["target"])
elif obj_type == REVISION:
- data["id"] = to_pid(obj_type, data["id"])
- data["directory"] = to_pid(DIRECTORY, data["directory"])
+ data["id"] = to_swhid(obj_type, data["id"])
+ data["directory"] = to_swhid(DIRECTORY, data["directory"])
for key in ("date", "committer_date"):
data[key] = to_date(data[key])
for parent in data["parents"]:
- parent["id"] = to_pid(REVISION, parent["id"])
+ parent["id"] = to_swhid(REVISION, parent["id"])
elif obj_type == RELEASE:
- data["id"] = to_pid(obj_type, data["id"])
+ data["id"] = to_swhid(obj_type, data["id"])
data["date"] = to_date(data["date"])
- data["target"] = to_pid(data["target_type"], data["target"])
+ data["target"] = to_swhid(data["target_type"], data["target"])
elif obj_type == DIRECTORY:
- dir_pid = None
+ dir_swhid = None
for entry in data:
- dir_pid = dir_pid or to_pid(obj_type, entry["dir_id"])
- entry["dir_id"] = dir_pid
- entry["target"] = to_pid(
+ dir_swhid = dir_swhid or to_swhid(obj_type, entry["dir_id"])
+ entry["dir_id"] = dir_swhid
+ entry["target"] = to_swhid(
obj_type_of_entry_type(entry["type"]), entry["target"]
)
elif obj_type == CONTENT:
@@ -106,7 +112,7 @@
elif obj_type == ORIGIN_VISIT:
data["date"] = to_date(data["date"])
if data["snapshot"] is not None:
- data["snapshot"] = to_pid(SNAPSHOT, data["snapshot"])
+ data["snapshot"] = to_swhid(SNAPSHOT, data["snapshot"])
else:
raise ValueError(f"invalid object type: {obj_type}")
@@ -141,7 +147,7 @@
self.api_path = u.path
self.bearer_token = bearer_token
- self._getters: Dict[str, Callable[[PIDish], Any]] = {
+ self._getters: Dict[str, Callable[[SWHIDish], Any]] = {
CONTENT: self.content,
DIRECTORY: self.directory,
RELEASE: self.release,
@@ -184,18 +190,18 @@
return r
- def _get_snapshot(self, pid: PIDish) -> Dict[str, Any]:
+ def _get_snapshot(self, swhid: SWHIDish) -> Dict[str, Any]:
"""Analogous to self.snapshot(), but zipping through partial snapshots,
merging them together before returning
"""
snapshot = {}
- for snp in self.snapshot(pid):
+ for snp in self.snapshot(swhid):
snapshot.update(snp)
return snapshot
- def get(self, pid: PIDish, **req_args) -> Any:
+ def get(self, swhid: SWHIDish, **req_args) -> Any:
"""Retrieve information about an object of any kind
Dispatcher method over the more specific methods content(),
@@ -207,35 +213,35 @@
"""
- pid_ = _get_pid(pid)
- return self._getters[pid_.object_type](pid_)
+ swhid_ = _get_swhid(swhid)
+ return self._getters[swhid_.object_type](swhid_)
- def iter(self, pid: PIDish, **req_args) -> Generator[Dict[str, Any], None, None]:
+ def iter(self, swhid: SWHIDish, **req_args) -> Iterator[Dict[str, Any]]:
"""Stream over the information about an object of any kind
Streaming variant of get()
"""
- pid_ = _get_pid(pid)
- obj_type = pid_.object_type
+ swhid_ = _get_swhid(swhid)
+ obj_type = swhid_.object_type
if obj_type == SNAPSHOT:
- yield from self.snapshot(pid_)
+ yield from self.snapshot(swhid_)
elif obj_type == REVISION:
- yield from [self.revision(pid_)]
+ yield from [self.revision(swhid_)]
elif obj_type == RELEASE:
- yield from [self.release(pid_)]
+ yield from [self.release(swhid_)]
elif obj_type == DIRECTORY:
- yield from self.directory(pid_)
+ yield from self.directory(swhid_)
elif obj_type == CONTENT:
- yield from [self.content(pid_)]
+ yield from [self.content(swhid_)]
else:
raise ValueError(f"invalid object type: {obj_type}")
- def content(self, pid: PIDish, **req_args) -> Dict[str, Any]:
+ def content(self, swhid: SWHIDish, **req_args) -> Dict[str, Any]:
"""Retrieve information about a content object
Args:
- pid: object identifier
+ swhid: object persistent identifier
req_args: extra keyword arguments for requests.get()
Raises:
@@ -244,16 +250,16 @@
"""
return typify(
self._call(
- f"content/sha1_git:{_get_pid(pid).object_id}/", **req_args
+ f"content/sha1_git:{_get_swhid(swhid).object_id}/", **req_args
).json(),
CONTENT,
)
- def directory(self, pid: PIDish, **req_args) -> List[Dict[str, Any]]:
+ def directory(self, swhid: SWHIDish, **req_args) -> List[Dict[str, Any]]:
"""Retrieve information about a directory object
Args:
- pid: object identifier
+ swhid: object persistent identifier
req_args: extra keyword arguments for requests.get()
Raises:
@@ -261,15 +267,15 @@
"""
return typify(
- self._call(f"directory/{_get_pid(pid).object_id}/", **req_args).json(),
+ self._call(f"directory/{_get_swhid(swhid).object_id}/", **req_args).json(),
DIRECTORY,
)
- def revision(self, pid: PIDish, **req_args) -> Dict[str, Any]:
+ def revision(self, swhid: SWHIDish, **req_args) -> Dict[str, Any]:
"""Retrieve information about a revision object
Args:
- pid: object identifier
+ swhid: object persistent identifier
req_args: extra keyword arguments for requests.get()
Raises:
@@ -277,15 +283,15 @@
"""
return typify(
- self._call(f"revision/{_get_pid(pid).object_id}/", **req_args).json(),
+ self._call(f"revision/{_get_swhid(swhid).object_id}/", **req_args).json(),
REVISION,
)
- def release(self, pid: PIDish, **req_args) -> Dict[str, Any]:
+ def release(self, swhid: SWHIDish, **req_args) -> Dict[str, Any]:
"""Retrieve information about a release object
Args:
- pid: object identifier
+ swhid: object persistent identifier
req_args: extra keyword arguments for requests.get()
Raises:
@@ -293,17 +299,15 @@
"""
return typify(
- self._call(f"release/{_get_pid(pid).object_id}/", **req_args).json(),
+ self._call(f"release/{_get_swhid(swhid).object_id}/", **req_args).json(),
RELEASE,
)
- def snapshot(
- self, pid: PIDish, **req_args
- ) -> Generator[Dict[str, Any], None, None]:
+ def snapshot(self, swhid: SWHIDish, **req_args) -> Iterator[Dict[str, Any]]:
"""Retrieve information about a snapshot object
Args:
- pid: object identifier
+ swhid: object persistent identifier
req_args: extra keyword arguments for requests.get()
Returns:
@@ -317,7 +321,7 @@
"""
done = False
r = None
- query = f"snapshot/{_get_pid(pid).object_id}/"
+ query = f"snapshot/{_get_swhid(swhid).object_id}/"
while not done:
r = self._call(query, http_method="get", **req_args)
@@ -333,7 +337,7 @@
per_page: Optional[int] = None,
last_visit: Optional[int] = None,
**req_args,
- ) -> Generator[Dict[str, Any], None, None]:
+ ) -> Iterator[Dict[str, Any]]:
"""List visits of an origin
Args:
@@ -369,11 +373,11 @@
else:
done = True
- def content_exists(self, pid: PIDish, **req_args) -> bool:
+ def content_exists(self, swhid: SWHIDish, **req_args) -> bool:
"""Check if a content object exists in the archive
Args:
- pid: object identifier
+ swhid: object persistent identifier
req_args: extra keyword arguments for requests.head()
Raises:
@@ -382,17 +386,17 @@
"""
return bool(
self._call(
- f"content/sha1_git:{_get_pid(pid).object_id}/",
+ f"content/sha1_git:{_get_swhid(swhid).object_id}/",
http_method="head",
**req_args,
)
)
- def directory_exists(self, pid: PIDish, **req_args) -> bool:
+ def directory_exists(self, swhid: SWHIDish, **req_args) -> bool:
"""Check if a directory object exists in the archive
Args:
- pid: object identifier
+ swhid: object persistent identifier
req_args: extra keyword arguments for requests.head()
Raises:
@@ -401,15 +405,17 @@
"""
return bool(
self._call(
- f"directory/{_get_pid(pid).object_id}/", http_method="head", **req_args
+ f"directory/{_get_swhid(swhid).object_id}/",
+ http_method="head",
+ **req_args,
)
)
- def revision_exists(self, pid: PIDish, **req_args) -> bool:
+ def revision_exists(self, swhid: SWHIDish, **req_args) -> bool:
"""Check if a revision object exists in the archive
Args:
- pid: object identifier
+ swhid: object persistent identifier
req_args: extra keyword arguments for requests.head()
Raises:
@@ -418,15 +424,17 @@
"""
return bool(
self._call(
- f"revision/{_get_pid(pid).object_id}/", http_method="head", **req_args
+ f"revision/{_get_swhid(swhid).object_id}/",
+ http_method="head",
+ **req_args,
)
)
- def release_exists(self, pid: PIDish, **req_args) -> bool:
+ def release_exists(self, swhid: SWHIDish, **req_args) -> bool:
"""Check if a release object exists in the archive
Args:
- pid: object identifier
+ swhid: object persistent identifier
req_args: extra keyword arguments for requests.head()
Raises:
@@ -435,15 +443,17 @@
"""
return bool(
self._call(
- f"release/{_get_pid(pid).object_id}/", http_method="head", **req_args
+ f"release/{_get_swhid(swhid).object_id}/",
+ http_method="head",
+ **req_args,
)
)
- def snapshot_exists(self, pid: PIDish, **req_args) -> bool:
+ def snapshot_exists(self, swhid: SWHIDish, **req_args) -> bool:
"""Check if a snapshot object exists in the archive
Args:
- pid: object identifier
+ swhid: object persistent identifier
req_args: extra keyword arguments for requests.head()
Raises:
@@ -452,15 +462,17 @@
"""
return bool(
self._call(
- f"snapshot/{_get_pid(pid).object_id}/", http_method="head", **req_args
+ f"snapshot/{_get_swhid(swhid).object_id}/",
+ http_method="head",
+ **req_args,
)
)
- def content_raw(self, pid: PIDish, **req_args) -> Generator[bytes, None, None]:
+ def content_raw(self, swhid: SWHIDish, **req_args) -> Iterator[bytes]:
"""Iterate over the raw content of a content object
Args:
- pid: object identifier
+ swhid: object persistent identifier
req_args: extra keyword arguments for requests.get()
Raises:
@@ -468,7 +480,9 @@
"""
r = self._call(
- f"content/sha1_git:{_get_pid(pid).object_id}/raw/", stream=True, **req_args
+ f"content/sha1_git:{_get_swhid(swhid).object_id}/raw/",
+ stream=True,
+ **req_args,
)
r.raise_for_status()
diff --git a/swh/web/client/tests/test_web_api_client.py b/swh/web/client/tests/test_web_api_client.py
--- a/swh/web/client/tests/test_web_api_client.py
+++ b/swh/web/client/tests/test_web_api_client.py
@@ -5,62 +5,62 @@
from dateutil.parser import parse as parse_date
-from swh.model.identifiers import parse_persistent_identifier as parse_pid
+from swh.model.identifiers import parse_swhid
def test_get_content(web_api_client, web_api_mock):
- pid = parse_pid("swh:1:cnt:fe95a46679d128ff167b7c55df5d02356c5a1ae1")
- obj = web_api_client.get(pid)
+ swhid = parse_swhid("swh:1:cnt:fe95a46679d128ff167b7c55df5d02356c5a1ae1")
+ obj = web_api_client.get(swhid)
assert obj["length"] == 151810
for key in ("length", "status", "checksums", "data_url"):
assert key in obj
- assert obj["checksums"]["sha1_git"] == str(pid).split(":")[3]
+ assert obj["checksums"]["sha1_git"] == str(swhid).split(":")[3]
assert obj["checksums"]["sha1"] == "dc2830a9e72f23c1dfebef4413003221baa5fb62"
- assert obj == web_api_client.content(pid)
+ assert obj == web_api_client.content(swhid)
def test_get_directory(web_api_client, web_api_mock):
- pid = parse_pid("swh:1:dir:977fc4b98c0e85816348cebd3b12026407c368b6")
- obj = web_api_client.get(pid)
+ swhid = parse_swhid("swh:1:dir:977fc4b98c0e85816348cebd3b12026407c368b6")
+ obj = web_api_client.get(swhid)
assert len(obj) == 35 # number of directory entries
- assert all(map(lambda entry: entry["dir_id"] == pid, obj))
+ assert all(map(lambda entry: entry["dir_id"] == swhid, obj))
dir_entry = obj[0]
assert dir_entry["type"] == "file"
- assert dir_entry["target"] == parse_pid(
+ assert dir_entry["target"] == parse_swhid(
"swh:1:cnt:58471109208922c9ee8c4b06135725f03ed16814"
)
assert dir_entry["name"] == ".bzrignore"
assert dir_entry["length"] == 582
- assert obj == web_api_client.directory(pid)
+ assert obj == web_api_client.directory(swhid)
def test_get_release(web_api_client, web_api_mock):
- pid = parse_pid("swh:1:rel:b9db10d00835e9a43e2eebef2db1d04d4ae82342")
- obj = web_api_client.get(pid)
+ swhid = parse_swhid("swh:1:rel:b9db10d00835e9a43e2eebef2db1d04d4ae82342")
+ obj = web_api_client.get(swhid)
- assert obj["id"] == pid
+ assert obj["id"] == swhid
assert obj["author"]["fullname"] == "Paul Tagliamonte <tag@pault.ag>"
assert obj["author"]["name"] == "Paul Tagliamonte"
assert obj["date"] == parse_date("2013-07-06T19:34:11-04:00")
assert obj["name"] == "0.9.9"
assert obj["target_type"] == "revision"
- assert obj["target"] == parse_pid(
+ assert obj["target"] == parse_swhid(
"swh:1:rev:e005cb773c769436709ca6a1d625dc784dbc1636"
)
assert not obj["synthetic"]
- assert obj == web_api_client.release(pid)
+ assert obj == web_api_client.release(swhid)
def test_get_revision(web_api_client, web_api_mock):
- pid = parse_pid("swh:1:rev:aafb16d69fd30ff58afdd69036a26047f3aebdc6")
- obj = web_api_client.get(pid)
+ swhid = parse_swhid("swh:1:rev:aafb16d69fd30ff58afdd69036a26047f3aebdc6")
+ obj = web_api_client.get(swhid)
- assert obj["id"] == pid
+ assert obj["id"] == swhid
for role in ("author", "committer"):
assert (
obj[role]["fullname"] == "Nicolas Dandrimont <nicolas.dandrimont@crans.org>"
@@ -72,36 +72,36 @@
assert obj["message"].startswith("Merge branch")
assert obj["merge"]
assert len(obj["parents"]) == 2
- assert obj["parents"][0]["id"] == parse_pid(
+ assert obj["parents"][0]["id"] == parse_swhid(
"swh:1:rev:26307d261279861c2d9c9eca3bb38519f951bea4"
)
- assert obj["parents"][1]["id"] == parse_pid(
+ assert obj["parents"][1]["id"] == parse_swhid(
"swh:1:rev:37fc9e08d0c4b71807a4f1ecb06112e78d91c283"
)
- assert obj == web_api_client.revision(pid)
+ assert obj == web_api_client.revision(swhid)
def test_get_snapshot(web_api_client, web_api_mock):
# small snapshot, the one from Web API doc
- pid = parse_pid("swh:1:snp:6a3a2cf0b2b90ce7ae1cf0a221ed68035b686f5a")
- obj = web_api_client.get(pid)
+ swhid = parse_swhid("swh:1:snp:6a3a2cf0b2b90ce7ae1cf0a221ed68035b686f5a")
+ obj = web_api_client.get(swhid)
assert len(obj) == 4
assert obj["refs/heads/master"]["target_type"] == "revision"
- assert obj["refs/heads/master"]["target"] == parse_pid(
+ assert obj["refs/heads/master"]["target"] == parse_swhid(
"swh:1:rev:83c20a6a63a7ebc1a549d367bc07a61b926cecf3"
)
assert obj["refs/tags/dpkt-1.7"]["target_type"] == "revision"
- assert obj["refs/tags/dpkt-1.7"]["target"] == parse_pid(
+ assert obj["refs/tags/dpkt-1.7"]["target"] == parse_swhid(
"swh:1:rev:0c9dbfbc0974ec8ac1d8253aa1092366a03633a8"
)
def test_iter_snapshot(web_api_client, web_api_mock):
# large snapshot from the Linux kernel, usually spanning two pages
- pid = parse_pid("swh:1:snp:cabcc7d7bf639bbe1cc3b41989e1806618dd5764")
- obj = web_api_client.snapshot(pid)
+ swhid = parse_swhid("swh:1:snp:cabcc7d7bf639bbe1cc3b41989e1806618dd5764")
+ obj = web_api_client.snapshot(swhid)
snp = {}
for partial in obj:
@@ -119,8 +119,8 @@
web_api_client.bearer_token = refresh_token
- pid = parse_pid(f"swh:1:rel:{rel_id}")
- web_api_client.get(pid)
+ swhid = parse_swhid(f"swh:1:rel:{rel_id}")
+ web_api_client.get(swhid)
sent_request = web_api_mock._adapter.last_request
@@ -141,5 +141,5 @@
assert visits[0]["date"] == timestamp
assert visits[0]["snapshot"] is None
- snapshot_pid = "swh:1:snp:456550ea74af4e2eecaa406629efaaf0b9b5f976"
- assert visits[7]["snapshot"] == parse_pid(snapshot_pid)
+ snapshot_swhid = "swh:1:snp:456550ea74af4e2eecaa406629efaaf0b9b5f976"
+ assert visits[7]["snapshot"] == parse_swhid(snapshot_swhid)

File Metadata

Mime Type
text/plain
Expires
Dec 21 2024, 8:13 PM (11 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3216048

Event Timeline