diff --git a/swh/web/api/views/identifiers.py b/swh/web/api/views/identifiers.py --- a/swh/web/api/views/identifiers.py +++ b/swh/web/api/views/identifiers.py @@ -109,12 +109,10 @@ # group swhids by their type swhids_by_type = group_swhids(swhids) # search for hashes not present in the storage - missing_hashes = set( - map(hash_to_bytes, archive.lookup_missing_hashes(swhids_by_type)) - ) + missing_hashes = set(archive.lookup_missing_hashes(swhids_by_type)) for swhid in swhids: - if swhid.object_id not in missing_hashes: + if hash_to_bytes(swhid.object_id) not in missing_hashes: response[str(swhid)]["known"] = True return response diff --git a/swh/web/common/archive.py b/swh/web/common/archive.py --- a/swh/web/common/archive.py +++ b/swh/web/common/archive.py @@ -1406,7 +1406,7 @@ raise ValueError(f"Unexpected object type variant: {object_type}") -def lookup_missing_hashes(grouped_swhids: Dict[str, List[bytes]]) -> Set[str]: +def lookup_missing_hashes(grouped_swhids: Dict[str, List[bytes]]) -> Set[bytes]: """Lookup missing Software Heritage persistent identifier hash, using batch processing. @@ -1415,7 +1415,7 @@ keys: object types values: object hashes Returns: - A set(hexadecimal) of the hashes not found in the storage + A set(bytes) of the hashes not found in the storage """ missing_hashes = [] @@ -1431,9 +1431,7 @@ elif obj_type == ObjectType.SNAPSHOT: missing_hashes.append(storage.snapshot_missing(obj_ids)) - missing = set( - map(lambda x: hashutil.hash_to_hex(x), itertools.chain(*missing_hashes)) - ) + missing = set(itertools.chain(*missing_hashes)) return missing diff --git a/swh/web/tests/common/test_archive.py b/swh/web/tests/common/test_archive.py --- a/swh/web/tests/common/test_archive.py +++ b/swh/web/tests/common/test_archive.py @@ -31,7 +31,7 @@ from swh.web.common.exc import BadInputExc, NotFoundExc from swh.web.common.typing import OriginInfo, PagedResult from swh.web.tests.conftest import ctags_json_missing, fossology_missing -from swh.web.tests.data import random_content, random_sha1 +from swh.web.tests.data import random_content, random_sha1, random_sha1_bytes from swh.web.tests.strategies import new_origin, new_revision, visit_dates @@ -941,18 +941,18 @@ def test_lookup_missing_hashes_non_present(): - missing_cnt = random_sha1() - missing_dir = random_sha1() - missing_rev = random_sha1() - missing_rel = random_sha1() - missing_snp = random_sha1() + missing_cnt = random_sha1_bytes() + missing_dir = random_sha1_bytes() + missing_rev = random_sha1_bytes() + missing_rel = random_sha1_bytes() + missing_snp = random_sha1_bytes() grouped_swhids = { - ObjectType.CONTENT: [hash_to_bytes(missing_cnt)], - ObjectType.DIRECTORY: [hash_to_bytes(missing_dir)], - ObjectType.REVISION: [hash_to_bytes(missing_rev)], - ObjectType.RELEASE: [hash_to_bytes(missing_rel)], - ObjectType.SNAPSHOT: [hash_to_bytes(missing_snp)], + ObjectType.CONTENT: [missing_cnt], + ObjectType.DIRECTORY: [missing_dir], + ObjectType.REVISION: [missing_rev], + ObjectType.RELEASE: [missing_rel], + ObjectType.SNAPSHOT: [missing_snp], } actual_result = archive.lookup_missing_hashes(grouped_swhids) @@ -967,16 +967,16 @@ def test_lookup_missing_hashes_some_present(content, directory): - missing_rev = random_sha1() - missing_rel = random_sha1() - missing_snp = random_sha1() + missing_rev = random_sha1_bytes() + missing_rel = random_sha1_bytes() + missing_snp = random_sha1_bytes() grouped_swhids = { ObjectType.CONTENT: [hash_to_bytes(content["sha1_git"])], - ObjectType.DIRECTORY: [hash_to_bytes(directory)], - ObjectType.REVISION: [hash_to_bytes(missing_rev)], - ObjectType.RELEASE: [hash_to_bytes(missing_rel)], - ObjectType.SNAPSHOT: [hash_to_bytes(missing_snp)], + ObjectType.DIRECTORY: [directory], + ObjectType.REVISION: [missing_rev], + ObjectType.RELEASE: [missing_rel], + ObjectType.SNAPSHOT: [missing_snp], } actual_result = archive.lookup_missing_hashes(grouped_swhids)