diff --git a/docs/README.rst b/docs/README.rst --- a/docs/README.rst +++ b/docs/README.rst @@ -12,7 +12,7 @@ from swh.web.client.client import WebAPIClient cli = WebAPIClient() - # retrieve any archived object via its PID + # retrieve any archived object via its SWHID cli.get('swh:1:rev:aafb16d69fd30ff58afdd69036a26047f3aebdc6') # same, but for specific object types diff --git a/swh/web/client/client.py b/swh/web/client/client.py --- a/swh/web/client/client.py +++ b/swh/web/client/client.py @@ -13,7 +13,7 @@ from swh.web.client.client import WebAPIClient cli = WebAPIClient() - # retrieve any archived object via its PID + # retrieve any archived object via its SWHID cli.get('swh:1:rev:aafb16d69fd30ff58afdd69036a26047f3aebdc6') # same, but for specific object types @@ -28,27 +28,33 @@ """ -from typing import Any, Callable, Dict, Generator, List, Optional, Union +from typing import Any, Callable, Dict, Iterator, List, Optional, Union from urllib.parse import urlparse import dateutil.parser import requests -from swh.model.identifiers import CONTENT, DIRECTORY, RELEASE, REVISION, SNAPSHOT -from swh.model.identifiers import PersistentId as PID -from swh.model.identifiers import parse_persistent_identifier as parse_pid +from swh.model.identifiers import ( + CONTENT, + DIRECTORY, + RELEASE, + REVISION, + SNAPSHOT, + SWHID, + parse_swhid, +) -PIDish = Union[PID, str] +SWHIDish = Union[SWHID, str] ORIGIN_VISIT = "origin_visit" -def _get_pid(pidish: PIDish) -> PID: - """Parse string to PID if needed""" - if isinstance(pidish, str): - return parse_pid(pidish) +def _get_swhid(swhidish: SWHIDish) -> SWHID: + """Parse string to SWHID if needed""" + if isinstance(swhidish, str): + return parse_swhid(swhidish) else: - return pidish + return swhidish def typify(data: Any, obj_type: str) -> Any: @@ -56,13 +62,13 @@ The following conversions are performed: - - identifiers are converted from strings to PersistentId instances + - identifiers are converted from strings to SWHID instances - timestamps are converted from strings to datetime.datetime objects """ - def to_pid(object_type, s): - return PID(object_type=object_type, object_id=s) + def to_swhid(object_type, s): + return SWHID(object_type=object_type, object_id=s) def to_date(s): return dateutil.parser.parse(s) @@ -80,25 +86,25 @@ if obj_type == SNAPSHOT: for name, target in data.items(): if target["target_type"] != "alias": - # alias targets do not point to objects via PIDs; others do - target["target"] = to_pid(target["target_type"], target["target"]) + # alias targets do not point to objects via SWHIDs; others do + target["target"] = to_swhid(target["target_type"], target["target"]) elif obj_type == REVISION: - data["id"] = to_pid(obj_type, data["id"]) - data["directory"] = to_pid(DIRECTORY, data["directory"]) + data["id"] = to_swhid(obj_type, data["id"]) + data["directory"] = to_swhid(DIRECTORY, data["directory"]) for key in ("date", "committer_date"): data[key] = to_date(data[key]) for parent in data["parents"]: - parent["id"] = to_pid(REVISION, parent["id"]) + parent["id"] = to_swhid(REVISION, parent["id"]) elif obj_type == RELEASE: - data["id"] = to_pid(obj_type, data["id"]) + data["id"] = to_swhid(obj_type, data["id"]) data["date"] = to_date(data["date"]) - data["target"] = to_pid(data["target_type"], data["target"]) + data["target"] = to_swhid(data["target_type"], data["target"]) elif obj_type == DIRECTORY: - dir_pid = None + dir_swhid = None for entry in data: - dir_pid = dir_pid or to_pid(obj_type, entry["dir_id"]) - entry["dir_id"] = dir_pid - entry["target"] = to_pid( + dir_swhid = dir_swhid or to_swhid(obj_type, entry["dir_id"]) + entry["dir_id"] = dir_swhid + entry["target"] = to_swhid( obj_type_of_entry_type(entry["type"]), entry["target"] ) elif obj_type == CONTENT: @@ -106,7 +112,7 @@ elif obj_type == ORIGIN_VISIT: data["date"] = to_date(data["date"]) if data["snapshot"] is not None: - data["snapshot"] = to_pid(SNAPSHOT, data["snapshot"]) + data["snapshot"] = to_swhid(SNAPSHOT, data["snapshot"]) else: raise ValueError(f"invalid object type: {obj_type}") @@ -141,7 +147,7 @@ self.api_path = u.path self.bearer_token = bearer_token - self._getters: Dict[str, Callable[[PIDish], Any]] = { + self._getters: Dict[str, Callable[[SWHIDish], Any]] = { CONTENT: self.content, DIRECTORY: self.directory, RELEASE: self.release, @@ -184,18 +190,18 @@ return r - def _get_snapshot(self, pid: PIDish) -> Dict[str, Any]: + def _get_snapshot(self, swhid: SWHIDish) -> Dict[str, Any]: """Analogous to self.snapshot(), but zipping through partial snapshots, merging them together before returning """ snapshot = {} - for snp in self.snapshot(pid): + for snp in self.snapshot(swhid): snapshot.update(snp) return snapshot - def get(self, pid: PIDish, **req_args) -> Any: + def get(self, swhid: SWHIDish, **req_args) -> Any: """Retrieve information about an object of any kind Dispatcher method over the more specific methods content(), @@ -207,35 +213,35 @@ """ - pid_ = _get_pid(pid) - return self._getters[pid_.object_type](pid_) + swhid_ = _get_swhid(swhid) + return self._getters[swhid_.object_type](swhid_) - def iter(self, pid: PIDish, **req_args) -> Generator[Dict[str, Any], None, None]: + def iter(self, swhid: SWHIDish, **req_args) -> Iterator[Dict[str, Any]]: """Stream over the information about an object of any kind Streaming variant of get() """ - pid_ = _get_pid(pid) - obj_type = pid_.object_type + swhid_ = _get_swhid(swhid) + obj_type = swhid_.object_type if obj_type == SNAPSHOT: - yield from self.snapshot(pid_) + yield from self.snapshot(swhid_) elif obj_type == REVISION: - yield from [self.revision(pid_)] + yield from [self.revision(swhid_)] elif obj_type == RELEASE: - yield from [self.release(pid_)] + yield from [self.release(swhid_)] elif obj_type == DIRECTORY: - yield from self.directory(pid_) + yield from self.directory(swhid_) elif obj_type == CONTENT: - yield from [self.content(pid_)] + yield from [self.content(swhid_)] else: raise ValueError(f"invalid object type: {obj_type}") - def content(self, pid: PIDish, **req_args) -> Dict[str, Any]: + def content(self, swhid: SWHIDish, **req_args) -> Dict[str, Any]: """Retrieve information about a content object Args: - pid: object identifier + swhid: object persistent identifier req_args: extra keyword arguments for requests.get() Raises: @@ -244,16 +250,16 @@ """ return typify( self._call( - f"content/sha1_git:{_get_pid(pid).object_id}/", **req_args + f"content/sha1_git:{_get_swhid(swhid).object_id}/", **req_args ).json(), CONTENT, ) - def directory(self, pid: PIDish, **req_args) -> List[Dict[str, Any]]: + def directory(self, swhid: SWHIDish, **req_args) -> List[Dict[str, Any]]: """Retrieve information about a directory object Args: - pid: object identifier + swhid: object persistent identifier req_args: extra keyword arguments for requests.get() Raises: @@ -261,15 +267,15 @@ """ return typify( - self._call(f"directory/{_get_pid(pid).object_id}/", **req_args).json(), + self._call(f"directory/{_get_swhid(swhid).object_id}/", **req_args).json(), DIRECTORY, ) - def revision(self, pid: PIDish, **req_args) -> Dict[str, Any]: + def revision(self, swhid: SWHIDish, **req_args) -> Dict[str, Any]: """Retrieve information about a revision object Args: - pid: object identifier + swhid: object persistent identifier req_args: extra keyword arguments for requests.get() Raises: @@ -277,15 +283,15 @@ """ return typify( - self._call(f"revision/{_get_pid(pid).object_id}/", **req_args).json(), + self._call(f"revision/{_get_swhid(swhid).object_id}/", **req_args).json(), REVISION, ) - def release(self, pid: PIDish, **req_args) -> Dict[str, Any]: + def release(self, swhid: SWHIDish, **req_args) -> Dict[str, Any]: """Retrieve information about a release object Args: - pid: object identifier + swhid: object persistent identifier req_args: extra keyword arguments for requests.get() Raises: @@ -293,17 +299,15 @@ """ return typify( - self._call(f"release/{_get_pid(pid).object_id}/", **req_args).json(), + self._call(f"release/{_get_swhid(swhid).object_id}/", **req_args).json(), RELEASE, ) - def snapshot( - self, pid: PIDish, **req_args - ) -> Generator[Dict[str, Any], None, None]: + def snapshot(self, swhid: SWHIDish, **req_args) -> Iterator[Dict[str, Any]]: """Retrieve information about a snapshot object Args: - pid: object identifier + swhid: object persistent identifier req_args: extra keyword arguments for requests.get() Returns: @@ -317,7 +321,7 @@ """ done = False r = None - query = f"snapshot/{_get_pid(pid).object_id}/" + query = f"snapshot/{_get_swhid(swhid).object_id}/" while not done: r = self._call(query, http_method="get", **req_args) @@ -333,7 +337,7 @@ per_page: Optional[int] = None, last_visit: Optional[int] = None, **req_args, - ) -> Generator[Dict[str, Any], None, None]: + ) -> Iterator[Dict[str, Any]]: """List visits of an origin Args: @@ -369,11 +373,11 @@ else: done = True - def content_exists(self, pid: PIDish, **req_args) -> bool: + def content_exists(self, swhid: SWHIDish, **req_args) -> bool: """Check if a content object exists in the archive Args: - pid: object identifier + swhid: object persistent identifier req_args: extra keyword arguments for requests.head() Raises: @@ -382,17 +386,17 @@ """ return bool( self._call( - f"content/sha1_git:{_get_pid(pid).object_id}/", + f"content/sha1_git:{_get_swhid(swhid).object_id}/", http_method="head", **req_args, ) ) - def directory_exists(self, pid: PIDish, **req_args) -> bool: + def directory_exists(self, swhid: SWHIDish, **req_args) -> bool: """Check if a directory object exists in the archive Args: - pid: object identifier + swhid: object persistent identifier req_args: extra keyword arguments for requests.head() Raises: @@ -401,15 +405,17 @@ """ return bool( self._call( - f"directory/{_get_pid(pid).object_id}/", http_method="head", **req_args + f"directory/{_get_swhid(swhid).object_id}/", + http_method="head", + **req_args, ) ) - def revision_exists(self, pid: PIDish, **req_args) -> bool: + def revision_exists(self, swhid: SWHIDish, **req_args) -> bool: """Check if a revision object exists in the archive Args: - pid: object identifier + swhid: object persistent identifier req_args: extra keyword arguments for requests.head() Raises: @@ -418,15 +424,17 @@ """ return bool( self._call( - f"revision/{_get_pid(pid).object_id}/", http_method="head", **req_args + f"revision/{_get_swhid(swhid).object_id}/", + http_method="head", + **req_args, ) ) - def release_exists(self, pid: PIDish, **req_args) -> bool: + def release_exists(self, swhid: SWHIDish, **req_args) -> bool: """Check if a release object exists in the archive Args: - pid: object identifier + swhid: object persistent identifier req_args: extra keyword arguments for requests.head() Raises: @@ -435,15 +443,17 @@ """ return bool( self._call( - f"release/{_get_pid(pid).object_id}/", http_method="head", **req_args + f"release/{_get_swhid(swhid).object_id}/", + http_method="head", + **req_args, ) ) - def snapshot_exists(self, pid: PIDish, **req_args) -> bool: + def snapshot_exists(self, swhid: SWHIDish, **req_args) -> bool: """Check if a snapshot object exists in the archive Args: - pid: object identifier + swhid: object persistent identifier req_args: extra keyword arguments for requests.head() Raises: @@ -452,15 +462,17 @@ """ return bool( self._call( - f"snapshot/{_get_pid(pid).object_id}/", http_method="head", **req_args + f"snapshot/{_get_swhid(swhid).object_id}/", + http_method="head", + **req_args, ) ) - def content_raw(self, pid: PIDish, **req_args) -> Generator[bytes, None, None]: + def content_raw(self, swhid: SWHIDish, **req_args) -> Iterator[bytes]: """Iterate over the raw content of a content object Args: - pid: object identifier + swhid: object persistent identifier req_args: extra keyword arguments for requests.get() Raises: @@ -468,7 +480,9 @@ """ r = self._call( - f"content/sha1_git:{_get_pid(pid).object_id}/raw/", stream=True, **req_args + f"content/sha1_git:{_get_swhid(swhid).object_id}/raw/", + stream=True, + **req_args, ) r.raise_for_status() diff --git a/swh/web/client/tests/test_web_api_client.py b/swh/web/client/tests/test_web_api_client.py --- a/swh/web/client/tests/test_web_api_client.py +++ b/swh/web/client/tests/test_web_api_client.py @@ -5,62 +5,62 @@ from dateutil.parser import parse as parse_date -from swh.model.identifiers import parse_persistent_identifier as parse_pid +from swh.model.identifiers import parse_swhid def test_get_content(web_api_client, web_api_mock): - pid = parse_pid("swh:1:cnt:fe95a46679d128ff167b7c55df5d02356c5a1ae1") - obj = web_api_client.get(pid) + swhid = parse_swhid("swh:1:cnt:fe95a46679d128ff167b7c55df5d02356c5a1ae1") + obj = web_api_client.get(swhid) assert obj["length"] == 151810 for key in ("length", "status", "checksums", "data_url"): assert key in obj - assert obj["checksums"]["sha1_git"] == str(pid).split(":")[3] + assert obj["checksums"]["sha1_git"] == str(swhid).split(":")[3] assert obj["checksums"]["sha1"] == "dc2830a9e72f23c1dfebef4413003221baa5fb62" - assert obj == web_api_client.content(pid) + assert obj == web_api_client.content(swhid) def test_get_directory(web_api_client, web_api_mock): - pid = parse_pid("swh:1:dir:977fc4b98c0e85816348cebd3b12026407c368b6") - obj = web_api_client.get(pid) + swhid = parse_swhid("swh:1:dir:977fc4b98c0e85816348cebd3b12026407c368b6") + obj = web_api_client.get(swhid) assert len(obj) == 35 # number of directory entries - assert all(map(lambda entry: entry["dir_id"] == pid, obj)) + assert all(map(lambda entry: entry["dir_id"] == swhid, obj)) dir_entry = obj[0] assert dir_entry["type"] == "file" - assert dir_entry["target"] == parse_pid( + assert dir_entry["target"] == parse_swhid( "swh:1:cnt:58471109208922c9ee8c4b06135725f03ed16814" ) assert dir_entry["name"] == ".bzrignore" assert dir_entry["length"] == 582 - assert obj == web_api_client.directory(pid) + assert obj == web_api_client.directory(swhid) def test_get_release(web_api_client, web_api_mock): - pid = parse_pid("swh:1:rel:b9db10d00835e9a43e2eebef2db1d04d4ae82342") - obj = web_api_client.get(pid) + swhid = parse_swhid("swh:1:rel:b9db10d00835e9a43e2eebef2db1d04d4ae82342") + obj = web_api_client.get(swhid) - assert obj["id"] == pid + assert obj["id"] == swhid assert obj["author"]["fullname"] == "Paul Tagliamonte " assert obj["author"]["name"] == "Paul Tagliamonte" assert obj["date"] == parse_date("2013-07-06T19:34:11-04:00") assert obj["name"] == "0.9.9" assert obj["target_type"] == "revision" - assert obj["target"] == parse_pid( + assert obj["target"] == parse_swhid( "swh:1:rev:e005cb773c769436709ca6a1d625dc784dbc1636" ) assert not obj["synthetic"] - assert obj == web_api_client.release(pid) + assert obj == web_api_client.release(swhid) def test_get_revision(web_api_client, web_api_mock): - pid = parse_pid("swh:1:rev:aafb16d69fd30ff58afdd69036a26047f3aebdc6") - obj = web_api_client.get(pid) + swhid = parse_swhid("swh:1:rev:aafb16d69fd30ff58afdd69036a26047f3aebdc6") + obj = web_api_client.get(swhid) - assert obj["id"] == pid + assert obj["id"] == swhid for role in ("author", "committer"): assert ( obj[role]["fullname"] == "Nicolas Dandrimont " @@ -72,36 +72,36 @@ assert obj["message"].startswith("Merge branch") assert obj["merge"] assert len(obj["parents"]) == 2 - assert obj["parents"][0]["id"] == parse_pid( + assert obj["parents"][0]["id"] == parse_swhid( "swh:1:rev:26307d261279861c2d9c9eca3bb38519f951bea4" ) - assert obj["parents"][1]["id"] == parse_pid( + assert obj["parents"][1]["id"] == parse_swhid( "swh:1:rev:37fc9e08d0c4b71807a4f1ecb06112e78d91c283" ) - assert obj == web_api_client.revision(pid) + assert obj == web_api_client.revision(swhid) def test_get_snapshot(web_api_client, web_api_mock): # small snapshot, the one from Web API doc - pid = parse_pid("swh:1:snp:6a3a2cf0b2b90ce7ae1cf0a221ed68035b686f5a") - obj = web_api_client.get(pid) + swhid = parse_swhid("swh:1:snp:6a3a2cf0b2b90ce7ae1cf0a221ed68035b686f5a") + obj = web_api_client.get(swhid) assert len(obj) == 4 assert obj["refs/heads/master"]["target_type"] == "revision" - assert obj["refs/heads/master"]["target"] == parse_pid( + assert obj["refs/heads/master"]["target"] == parse_swhid( "swh:1:rev:83c20a6a63a7ebc1a549d367bc07a61b926cecf3" ) assert obj["refs/tags/dpkt-1.7"]["target_type"] == "revision" - assert obj["refs/tags/dpkt-1.7"]["target"] == parse_pid( + assert obj["refs/tags/dpkt-1.7"]["target"] == parse_swhid( "swh:1:rev:0c9dbfbc0974ec8ac1d8253aa1092366a03633a8" ) def test_iter_snapshot(web_api_client, web_api_mock): # large snapshot from the Linux kernel, usually spanning two pages - pid = parse_pid("swh:1:snp:cabcc7d7bf639bbe1cc3b41989e1806618dd5764") - obj = web_api_client.snapshot(pid) + swhid = parse_swhid("swh:1:snp:cabcc7d7bf639bbe1cc3b41989e1806618dd5764") + obj = web_api_client.snapshot(swhid) snp = {} for partial in obj: @@ -119,8 +119,8 @@ web_api_client.bearer_token = refresh_token - pid = parse_pid(f"swh:1:rel:{rel_id}") - web_api_client.get(pid) + swhid = parse_swhid(f"swh:1:rel:{rel_id}") + web_api_client.get(swhid) sent_request = web_api_mock._adapter.last_request @@ -141,5 +141,5 @@ assert visits[0]["date"] == timestamp assert visits[0]["snapshot"] is None - snapshot_pid = "swh:1:snp:456550ea74af4e2eecaa406629efaaf0b9b5f976" - assert visits[7]["snapshot"] == parse_pid(snapshot_pid) + snapshot_swhid = "swh:1:snp:456550ea74af4e2eecaa406629efaaf0b9b5f976" + assert visits[7]["snapshot"] == parse_swhid(snapshot_swhid)