diff --git a/swh/web/client/client.py b/swh/web/client/client.py --- a/swh/web/client/client.py +++ b/swh/web/client/client.py @@ -57,7 +57,7 @@ return swhidish -def typify(data: Any, obj_type: str) -> Any: +def typify_json(data: Any, obj_type: str) -> Any: """Type API responses using pythonic types where appropriate The following conversions are performed: @@ -147,7 +147,7 @@ self.api_path = u.path self.bearer_token = bearer_token - self._getters: Dict[str, Callable[[SWHIDish], Any]] = { + self._getters: Dict[str, Callable[[SWHIDish, bool], Any]] = { CONTENT: self.content, DIRECTORY: self.directory, RELEASE: self.release, @@ -190,18 +190,18 @@ return r - def _get_snapshot(self, swhid: SWHIDish) -> Dict[str, Any]: + def _get_snapshot(self, swhid: SWHIDish, typify: bool = True) -> Dict[str, Any]: """Analogous to self.snapshot(), but zipping through partial snapshots, merging them together before returning """ snapshot = {} - for snp in self.snapshot(swhid): + for snp in self.snapshot(swhid, typify): snapshot.update(snp) return snapshot - def get(self, swhid: SWHIDish, **req_args) -> Any: + def get(self, swhid: SWHIDish, typify: bool = True, **req_args) -> Any: """Retrieve information about an object of any kind Dispatcher method over the more specific methods content(), @@ -214,9 +214,11 @@ """ swhid_ = _get_swhid(swhid) - return self._getters[swhid_.object_type](swhid_) + return self._getters[swhid_.object_type](swhid_, typify) - def iter(self, swhid: SWHIDish, **req_args) -> Iterator[Dict[str, Any]]: + def iter( + self, swhid: SWHIDish, typify: bool = True, **req_args + ) -> Iterator[Dict[str, Any]]: """Stream over the information about an object of any kind Streaming variant of get() @@ -225,89 +227,103 @@ swhid_ = _get_swhid(swhid) obj_type = swhid_.object_type if obj_type == SNAPSHOT: - yield from self.snapshot(swhid_) + yield from self.snapshot(swhid_, typify) elif obj_type == REVISION: - yield from [self.revision(swhid_)] + yield from [self.revision(swhid_, typify)] elif obj_type == RELEASE: - yield from [self.release(swhid_)] + yield from [self.release(swhid_, typify)] elif obj_type == DIRECTORY: - yield from self.directory(swhid_) + yield from self.directory(swhid_, typify) elif obj_type == CONTENT: - yield from [self.content(swhid_)] + yield from [self.content(swhid_, typify)] else: raise ValueError(f"invalid object type: {obj_type}") - def content(self, swhid: SWHIDish, **req_args) -> Dict[str, Any]: + def content( + self, swhid: SWHIDish, typify: bool = True, **req_args + ) -> Dict[str, Any]: """Retrieve information about a content object Args: swhid: object persistent identifier + typify: if True, convert return value to pythonic types wherever + possible, otherwise return raw JSON types (default: True) req_args: extra keyword arguments for requests.get() Raises: requests.HTTPError: if HTTP request fails """ - return typify( - self._call( - f"content/sha1_git:{_get_swhid(swhid).object_id}/", **req_args - ).json(), - CONTENT, - ) - - def directory(self, swhid: SWHIDish, **req_args) -> List[Dict[str, Any]]: + json = self._call( + f"content/sha1_git:{_get_swhid(swhid).object_id}/", **req_args + ).json() + return typify_json(json, CONTENT) if typify else json + + def directory( + self, swhid: SWHIDish, typify: bool = True, **req_args + ) -> List[Dict[str, Any]]: """Retrieve information about a directory object Args: swhid: object persistent identifier + typify: if True, convert return value to pythonic types wherever + possible, otherwise return raw JSON types (default: True) req_args: extra keyword arguments for requests.get() Raises: requests.HTTPError: if HTTP request fails """ - return typify( - self._call(f"directory/{_get_swhid(swhid).object_id}/", **req_args).json(), - DIRECTORY, - ) - - def revision(self, swhid: SWHIDish, **req_args) -> Dict[str, Any]: + json = self._call( + f"directory/{_get_swhid(swhid).object_id}/", **req_args + ).json() + return typify_json(json, DIRECTORY) if typify else json + + def revision( + self, swhid: SWHIDish, typify: bool = True, **req_args + ) -> Dict[str, Any]: """Retrieve information about a revision object Args: swhid: object persistent identifier + typify: if True, convert return value to pythonic types wherever + possible, otherwise return raw JSON types (default: True) req_args: extra keyword arguments for requests.get() Raises: requests.HTTPError: if HTTP request fails """ - return typify( - self._call(f"revision/{_get_swhid(swhid).object_id}/", **req_args).json(), - REVISION, - ) + json = self._call(f"revision/{_get_swhid(swhid).object_id}/", **req_args).json() + return typify_json(json, REVISION) if typify else json - def release(self, swhid: SWHIDish, **req_args) -> Dict[str, Any]: + def release( + self, swhid: SWHIDish, typify: bool = True, **req_args + ) -> Dict[str, Any]: """Retrieve information about a release object Args: swhid: object persistent identifier + typify: if True, convert return value to pythonic types wherever + possible, otherwise return raw JSON types (default: True) req_args: extra keyword arguments for requests.get() Raises: requests.HTTPError: if HTTP request fails """ - return typify( - self._call(f"release/{_get_swhid(swhid).object_id}/", **req_args).json(), - RELEASE, - ) + json = self._call(f"release/{_get_swhid(swhid).object_id}/", **req_args).json() + return typify_json(json, RELEASE) if typify else json - def snapshot(self, swhid: SWHIDish, **req_args) -> Iterator[Dict[str, Any]]: + def snapshot( + self, swhid: SWHIDish, typify: bool = True, **req_args + ) -> Iterator[Dict[str, Any]]: """Retrieve information about a snapshot object Args: swhid: object persistent identifier + typify: if True, convert return value to pythonic types wherever + possible, otherwise return raw JSON types (default: True) req_args: extra keyword arguments for requests.get() Returns: @@ -325,7 +341,8 @@ while not done: r = self._call(query, http_method="get", **req_args) - yield typify(r.json()["branches"], SNAPSHOT) + json = r.json()["branches"] + yield typify_json(json, SNAPSHOT) if typify else json if "next" in r.links and "url" in r.links["next"]: query = r.links["next"]["url"] else: @@ -336,6 +353,7 @@ origin: str, per_page: Optional[int] = None, last_visit: Optional[int] = None, + typify: bool = True, **req_args, ) -> Iterator[Dict[str, Any]]: """List visits of an origin @@ -344,6 +362,8 @@ origin: the URL of a software origin per_page: the number of visits to list last_visit: visit to start listing from + typify: if True, convert return value to pythonic types wherever + possible, otherwise return raw JSON types (default: True) req_args: extra keyword arguments for requests.get() Returns: @@ -366,7 +386,7 @@ while not done: r = self._call(query, http_method="get", params=params, **req_args) - yield from [typify(v, ORIGIN_VISIT) for v in r.json()] + yield from [typify_json(v, ORIGIN_VISIT) if typify else v for v in r.json()] if "next" in r.links and "url" in r.links["next"]: params = [] query = r.links["next"]["url"] diff --git a/swh/web/client/tests/test_web_api_client.py b/swh/web/client/tests/test_web_api_client.py --- a/swh/web/client/tests/test_web_api_client.py +++ b/swh/web/client/tests/test_web_api_client.py @@ -3,10 +3,14 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +import json + from dateutil.parser import parse as parse_date from swh.model.identifiers import parse_swhid +from .api_data import API_DATA + def test_get_content(web_api_client, web_api_mock): swhid = parse_swhid("swh:1:cnt:fe95a46679d128ff167b7c55df5d02356c5a1ae1") @@ -143,3 +147,28 @@ assert visits[0]["snapshot"] is None snapshot_swhid = "swh:1:snp:456550ea74af4e2eecaa406629efaaf0b9b5f976" assert visits[7]["snapshot"] == parse_swhid(snapshot_swhid) + + +def test_get_json(web_api_client, web_api_mock): + swhids = [ + "swh:1:cnt:fe95a46679d128ff167b7c55df5d02356c5a1ae1", + "swh:1:dir:977fc4b98c0e85816348cebd3b12026407c368b6", + "swh:1:rel:b9db10d00835e9a43e2eebef2db1d04d4ae82342", + "swh:1:rev:aafb16d69fd30ff58afdd69036a26047f3aebdc6", + "swh:1:snp:6a3a2cf0b2b90ce7ae1cf0a221ed68035b686f5a", + ] + + for swhid in swhids: + actual = web_api_client.get(swhid, typify=False) + expected = None + # Fetch raw JSON data from the generated API_DATA + for url, data in API_DATA.items(): + object_id = swhid[len("swh:1:XXX:") :] + if object_id in url: + expected = json.loads(data) + # Special case: snapshots response differs slightly from the Web API + if swhid.startswith("swh:1:snp:"): + expected = expected["branches"] + break + + assert actual == expected