diff --git a/swh/web/client/client.py b/swh/web/client/client.py --- a/swh/web/client/client.py +++ b/swh/web/client/client.py @@ -35,28 +35,30 @@ import dateutil.parser import requests -from swh.model.identifiers import ( - CONTENT, - DIRECTORY, - RELEASE, - REVISION, - SNAPSHOT, - SWHID, - parse_swhid, -) +from swh.model.hashutil import hash_to_bytes, hash_to_hex +from swh.model.identifiers import CoreSWHID, ObjectType from swh.web.client.cli import DEFAULT_CONFIG -SWHIDish = Union[SWHID, str] +SWHIDish = Union[CoreSWHID, str] + +CONTENT = "content" +DIRECTORY = "directory" +REVISION = "revision" +RELEASE = "release" +SNAPSHOT = "snapshot" ORIGIN_VISIT = "origin_visit" +ORIGIN = "origin" -def _get_swhid(swhidish: SWHIDish) -> SWHID: - """Parse string to SWHID if needed""" +def _get_object_id_hex(swhidish: SWHIDish) -> str: + """Parse string or SWHID and return the hex value of the object_id""" if isinstance(swhidish, str): - return parse_swhid(swhidish) + swhid = CoreSWHID.from_string(swhidish) else: - return swhidish + swhid = swhidish + + return hash_to_hex(swhid.object_id) def typify_json(data: Any, obj_type: str) -> Any: @@ -69,8 +71,12 @@ """ - def to_swhid(object_type: str, s: Any) -> SWHID: - return SWHID(object_type=object_type, object_id=s) + def to_swhid(object_type: Union[str, ObjectType], s: Any) -> CoreSWHID: + if isinstance(object_type, str): + parsed_object_type = ObjectType[object_type.upper()] + else: + parsed_object_type = object_type + return CoreSWHID(object_type=parsed_object_type, object_id=hash_to_bytes(s)) def to_date(date: str) -> datetime: return dateutil.parser.parse(date) @@ -82,11 +88,11 @@ def obj_type_of_entry_type(s): if s == "file": - return CONTENT + return ObjectType.CONTENT elif s == "dir": - return DIRECTORY + return ObjectType.DIRECTORY elif s == "rev": - return REVISION + return ObjectType.REVISION else: raise ValueError(f"invalid directory entry type: {s}") @@ -119,7 +125,7 @@ elif obj_type == ORIGIN_VISIT: data["date"] = to_date(data["date"]) if data["snapshot"] is not None: - data["snapshot"] = to_swhid(SNAPSHOT, data["snapshot"]) + data["snapshot"] = to_swhid("snapshot", data["snapshot"]) else: raise ValueError(f"invalid object type: {obj_type}") @@ -153,12 +159,12 @@ self.api_path = u.path self.bearer_token = bearer_token - self._getters: Dict[str, Callable[[SWHIDish, bool], Any]] = { - CONTENT: self.content, - DIRECTORY: self.directory, - RELEASE: self.release, - REVISION: self.revision, - SNAPSHOT: self._get_snapshot, + self._getters: Dict[ObjectType, Callable[[SWHIDish, bool], Any]] = { + ObjectType.CONTENT: self.content, + ObjectType.DIRECTORY: self.directory, + ObjectType.RELEASE: self.release, + ObjectType.REVISION: self.revision, + ObjectType.SNAPSHOT: self._get_snapshot, } def _call( @@ -221,9 +227,11 @@ streaming. """ - - swhid_ = _get_swhid(swhid) - return self._getters[swhid_.object_type](swhid_, typify) + if isinstance(swhid, str): + obj_type = CoreSWHID.from_string(swhid).object_type + else: + obj_type = swhid.object_type + return self._getters[obj_type](swhid, typify) def iter( self, swhid: SWHIDish, typify: bool = True, **req_args @@ -233,18 +241,20 @@ Streaming variant of get() """ - swhid_ = _get_swhid(swhid) - obj_type = swhid_.object_type + if isinstance(swhid, str): + obj_type = CoreSWHID.from_string(swhid).object_type + else: + obj_type = swhid.object_type if obj_type == SNAPSHOT: - yield from self.snapshot(swhid_, typify) + yield from self.snapshot(swhid, typify) elif obj_type == REVISION: - yield from [self.revision(swhid_, typify)] + yield from [self.revision(swhid, typify)] elif obj_type == RELEASE: - yield from [self.release(swhid_, typify)] + yield from [self.release(swhid, typify)] elif obj_type == DIRECTORY: - yield from self.directory(swhid_, typify) + yield from self.directory(swhid, typify) elif obj_type == CONTENT: - yield from [self.content(swhid_, typify)] + yield from [self.content(swhid, typify)] else: raise ValueError(f"invalid object type: {obj_type}") @@ -264,7 +274,7 @@ """ json = self._call( - f"content/sha1_git:{_get_swhid(swhid).object_id}/", **req_args + f"content/sha1_git:{_get_object_id_hex(swhid)}/", **req_args ).json() return typify_json(json, CONTENT) if typify else json @@ -283,9 +293,7 @@ requests.HTTPError: if HTTP request fails """ - json = self._call( - f"directory/{_get_swhid(swhid).object_id}/", **req_args - ).json() + json = self._call(f"directory/{_get_object_id_hex(swhid)}/", **req_args).json() return typify_json(json, DIRECTORY) if typify else json def revision( @@ -303,7 +311,7 @@ requests.HTTPError: if HTTP request fails """ - json = self._call(f"revision/{_get_swhid(swhid).object_id}/", **req_args).json() + json = self._call(f"revision/{_get_object_id_hex(swhid)}/", **req_args).json() return typify_json(json, REVISION) if typify else json def release( @@ -321,7 +329,7 @@ requests.HTTPError: if HTTP request fails """ - json = self._call(f"release/{_get_swhid(swhid).object_id}/", **req_args).json() + json = self._call(f"release/{_get_object_id_hex(swhid)}/", **req_args).json() return typify_json(json, RELEASE) if typify else json def snapshot( @@ -346,7 +354,7 @@ """ done = False r = None - query = f"snapshot/{_get_swhid(swhid).object_id}/" + query = f"snapshot/{_get_object_id_hex(swhid)}/" while not done: r = self._call(query, http_method="get", **req_args) @@ -404,7 +412,7 @@ def known( self, swhids: Iterator[SWHIDish], **req_args - ) -> Dict[SWHID, Dict[Any, Any]]: + ) -> Dict[CoreSWHID, Dict[Any, Any]]: """Verify the presence in the archive of several objects at once Args: @@ -422,7 +430,7 @@ r = self._call( "known/", http_method="post", json=list(map(str, swhids)), **req_args ) - return {parse_swhid(k): v for k, v in r.json().items()} + return {CoreSWHID.from_string(k): v for k, v in r.json().items()} def content_exists(self, swhid: SWHIDish, **req_args) -> bool: """Check if a content object exists in the archive @@ -437,7 +445,7 @@ """ return bool( self._call( - f"content/sha1_git:{_get_swhid(swhid).object_id}/", + f"content/sha1_git:{_get_object_id_hex(swhid)}/", http_method="head", **req_args, ) @@ -456,7 +464,7 @@ """ return bool( self._call( - f"directory/{_get_swhid(swhid).object_id}/", + f"directory/{_get_object_id_hex(swhid)}/", http_method="head", **req_args, ) @@ -475,7 +483,7 @@ """ return bool( self._call( - f"revision/{_get_swhid(swhid).object_id}/", + f"revision/{_get_object_id_hex(swhid)}/", http_method="head", **req_args, ) @@ -494,9 +502,7 @@ """ return bool( self._call( - f"release/{_get_swhid(swhid).object_id}/", - http_method="head", - **req_args, + f"release/{_get_object_id_hex(swhid)}/", http_method="head", **req_args, ) ) @@ -513,7 +519,7 @@ """ return bool( self._call( - f"snapshot/{_get_swhid(swhid).object_id}/", + f"snapshot/{_get_object_id_hex(swhid)}/", http_method="head", **req_args, ) @@ -546,7 +552,7 @@ """ r = self._call( - f"content/sha1_git:{_get_swhid(swhid).object_id}/raw/", + f"content/sha1_git:{_get_object_id_hex(swhid)}/raw/", stream=True, **req_args, ) diff --git a/swh/web/client/tests/test_web_api_client.py b/swh/web/client/tests/test_web_api_client.py --- a/swh/web/client/tests/test_web_api_client.py +++ b/swh/web/client/tests/test_web_api_client.py @@ -7,14 +7,14 @@ from dateutil.parser import parse as parse_date -from swh.model.identifiers import REVISION, parse_swhid +from swh.model.identifiers import REVISION, CoreSWHID from swh.web.client.client import typify_json from .api_data import API_DATA def test_get_content(web_api_client, web_api_mock): - swhid = parse_swhid("swh:1:cnt:fe95a46679d128ff167b7c55df5d02356c5a1ae1") + swhid = CoreSWHID.from_string("swh:1:cnt:fe95a46679d128ff167b7c55df5d02356c5a1ae1") obj = web_api_client.get(swhid) assert obj["length"] == 151810 @@ -27,14 +27,14 @@ def test_get_directory(web_api_client, web_api_mock): - swhid = parse_swhid("swh:1:dir:977fc4b98c0e85816348cebd3b12026407c368b6") + swhid = CoreSWHID.from_string("swh:1:dir:977fc4b98c0e85816348cebd3b12026407c368b6") obj = web_api_client.get(swhid) assert len(obj) == 35 # number of directory entries assert all(map(lambda entry: entry["dir_id"] == swhid, obj)) dir_entry = obj[0] assert dir_entry["type"] == "file" - assert dir_entry["target"] == parse_swhid( + assert dir_entry["target"] == CoreSWHID.from_string( "swh:1:cnt:58471109208922c9ee8c4b06135725f03ed16814" ) assert dir_entry["name"] == ".bzrignore" @@ -44,7 +44,7 @@ def test_get_release(web_api_client, web_api_mock): - swhid = parse_swhid("swh:1:rel:b9db10d00835e9a43e2eebef2db1d04d4ae82342") + swhid = CoreSWHID.from_string("swh:1:rel:b9db10d00835e9a43e2eebef2db1d04d4ae82342") obj = web_api_client.get(swhid) assert obj["id"] == swhid @@ -53,7 +53,7 @@ assert obj["date"] == parse_date("2013-07-06T19:34:11-04:00") assert obj["name"] == "0.9.9" assert obj["target_type"] == "revision" - assert obj["target"] == parse_swhid( + assert obj["target"] == CoreSWHID.from_string( "swh:1:rev:e005cb773c769436709ca6a1d625dc784dbc1636" ) assert not obj["synthetic"] @@ -62,7 +62,7 @@ def test_get_revision(web_api_client, web_api_mock): - swhid = parse_swhid("swh:1:rev:aafb16d69fd30ff58afdd69036a26047f3aebdc6") + swhid = CoreSWHID.from_string("swh:1:rev:aafb16d69fd30ff58afdd69036a26047f3aebdc6") obj = web_api_client.get(swhid) assert obj["id"] == swhid @@ -77,10 +77,10 @@ assert obj["message"].startswith("Merge branch") assert obj["merge"] assert len(obj["parents"]) == 2 - assert obj["parents"][0]["id"] == parse_swhid( + assert obj["parents"][0]["id"] == CoreSWHID.from_string( "swh:1:rev:26307d261279861c2d9c9eca3bb38519f951bea4" ) - assert obj["parents"][1]["id"] == parse_swhid( + assert obj["parents"][1]["id"] == CoreSWHID.from_string( "swh:1:rev:37fc9e08d0c4b71807a4f1ecb06112e78d91c283" ) @@ -89,23 +89,23 @@ def test_get_snapshot(web_api_client, web_api_mock): # small snapshot, the one from Web API doc - swhid = parse_swhid("swh:1:snp:6a3a2cf0b2b90ce7ae1cf0a221ed68035b686f5a") + swhid = CoreSWHID.from_string("swh:1:snp:6a3a2cf0b2b90ce7ae1cf0a221ed68035b686f5a") obj = web_api_client.get(swhid) assert len(obj) == 4 assert obj["refs/heads/master"]["target_type"] == "revision" - assert obj["refs/heads/master"]["target"] == parse_swhid( + assert obj["refs/heads/master"]["target"] == CoreSWHID.from_string( "swh:1:rev:83c20a6a63a7ebc1a549d367bc07a61b926cecf3" ) assert obj["refs/tags/dpkt-1.7"]["target_type"] == "revision" - assert obj["refs/tags/dpkt-1.7"]["target"] == parse_swhid( + assert obj["refs/tags/dpkt-1.7"]["target"] == CoreSWHID.from_string( "swh:1:rev:0c9dbfbc0974ec8ac1d8253aa1092366a03633a8" ) def test_iter_snapshot(web_api_client, web_api_mock): # large snapshot from the Linux kernel, usually spanning two pages - swhid = parse_swhid("swh:1:snp:cabcc7d7bf639bbe1cc3b41989e1806618dd5764") + swhid = CoreSWHID.from_string("swh:1:snp:cabcc7d7bf639bbe1cc3b41989e1806618dd5764") obj = web_api_client.snapshot(swhid) snp = {} @@ -124,7 +124,7 @@ web_api_client.bearer_token = refresh_token - swhid = parse_swhid(f"swh:1:rel:{rel_id}") + swhid = CoreSWHID.from_string(f"swh:1:rel:{rel_id}") web_api_client.get(swhid) sent_request = web_api_mock._adapter.last_request @@ -147,7 +147,7 @@ assert visits[0]["snapshot"] is None snapshot_swhid = "swh:1:snp:456550ea74af4e2eecaa406629efaaf0b9b5f976" - assert visits[7]["snapshot"] == parse_swhid(snapshot_swhid) + assert visits[7]["snapshot"] == CoreSWHID.from_string(snapshot_swhid) def test_origin_search(web_api_client, web_api_mock): @@ -231,5 +231,5 @@ } revision_typed = typify_json(revision_data, REVISION) pid = "swh:1:rev:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" - assert revision_typed["id"] == parse_swhid(pid) + assert revision_typed["id"] == CoreSWHID.from_string(pid) assert revision_typed["date"] is None