diff --git a/swh/loader/package/archive/loader.py b/swh/loader/package/archive/loader.py --- a/swh/loader/package/archive/loader.py +++ b/swh/loader/package/archive/loader.py @@ -41,7 +41,11 @@ manifest = manifest_format.substitute( {k: str(v) for (k, v) in self.raw_info.items()} ) - return (self.EXTID_TYPE, hashlib.sha256(manifest.encode()).digest()) + return ( + self.EXTID_TYPE, + self.EXTID_VERSION, + hashlib.sha256(manifest.encode()).digest(), + ) @classmethod def from_metadata(cls, a_metadata: Dict[str, Any]) -> "ArchivePackageInfo": diff --git a/swh/loader/package/archive/tests/test_archive.py b/swh/loader/package/archive/tests/test_archive.py --- a/swh/loader/package/archive/tests/test_archive.py +++ b/swh/loader/package/archive/tests/test_archive.py @@ -365,6 +365,7 @@ actual_id = p_info.extid(manifest_format=manifest_format) assert actual_id == ( "package-manifest-sha256", + 0, hashlib.sha256(expected_manifest).digest(), ) diff --git a/swh/loader/package/debian/loader.py b/swh/loader/package/debian/loader.py --- a/swh/loader/package/debian/loader.py +++ b/swh/loader/package/debian/loader.py @@ -25,6 +25,7 @@ UPLOADERS_SPLIT = re.compile(r"(?<=\>)\s*,\s*") EXTID_TYPE = "dsc-sha256" +EXTID_VERSION = 1 class DscCountError(ValueError): @@ -107,7 +108,7 @@ f"got {len(dsc_files)}" ) - return (EXTID_TYPE, hash_to_bytes(dsc_files[0].sha256)) + return (EXTID_TYPE, EXTID_VERSION, hash_to_bytes(dsc_files[0].sha256)) @attr.s diff --git a/swh/loader/package/loader.py b/swh/loader/package/loader.py --- a/swh/loader/package/loader.py +++ b/swh/loader/package/loader.py @@ -72,7 +72,7 @@ of downloaded archive files.""" -PartialExtID = Tuple[str, bytes] +PartialExtID = Tuple[str, int, bytes] """The ``extid_type`` and ``extid`` fields of an :class:`ExtID` object.""" @@ -112,6 +112,7 @@ before hashing it to produce an ExtID.""" EXTID_TYPE: str = "package-manifest-sha256" + EXTID_VERSION: int = 0 # The following attribute has kw_only=True in order to allow subclasses # to add attributes. Without kw_only, attributes without default values cannot @@ -137,7 +138,11 @@ manifest = self.MANIFEST_FORMAT.substitute( {k: str(v) for (k, v) in attr.asdict(self).items()} ) - return (self.EXTID_TYPE, hashlib.sha256(manifest.encode()).digest()) + return ( + self.EXTID_TYPE, + self.EXTID_VERSION, + hashlib.sha256(manifest.encode()).digest(), + ) TPackageInfo = TypeVar("TPackageInfo", bound=BasePackageInfo) @@ -229,20 +234,24 @@ loaded in the archive, and returns them if any.""" # Compute the ExtIDs of all the new packages, grouped by extid type - new_extids: Dict[str, List[bytes]] = {} + new_extids: Dict[Tuple[str, int], List[bytes]] = {} for p_info in packages_info: res = p_info.extid() if res is not None: - (extid_type, extid_extid) = res - new_extids.setdefault(extid_type, []).append(extid_extid) + (extid_type, extid_version, extid_extid) = res + new_extids.setdefault((extid_type, extid_version), []).append( + extid_extid + ) # For each extid type, call extid_get_from_extid() with all the extids of # that type, and store them in the '(type, extid) -> target' map. known_extids: Dict[PartialExtID, List[CoreSWHID]] = {} - for (extid_type, extids) in new_extids.items(): - for extid in self.storage.extid_get_from_extid(extid_type, extids): + for ((extid_type, extid_version), extids) in new_extids.items(): + for extid in self.storage.extid_get_from_extid( + extid_type, extids, version=extid_version + ): if extid is not None: - key = (extid.extid_type, extid.extid) + key = (extid.extid_type, extid_version, extid.extid) known_extids.setdefault(key, []).append(extid.target) return known_extids @@ -647,12 +656,17 @@ if add_extid: partial_extid = p_info.extid() if partial_extid is not None: - (extid_type, extid) = partial_extid + (extid_type, extid_version, extid) = partial_extid release_swhid = CoreSWHID( object_type=ObjectType.RELEASE, object_id=release_id ) new_extids.add( - ExtID(extid_type=extid_type, extid=extid, target=release_swhid) + ExtID( + extid_type=extid_type, + extid_version=extid_version, + extid=extid, + target=release_swhid, + ) ) tmp_releases[p_info.version].append((branch_name, release_id)) diff --git a/swh/loader/package/maven/loader.py b/swh/loader/package/maven/loader.py --- a/swh/loader/package/maven/loader.py +++ b/swh/loader/package/maven/loader.py @@ -46,6 +46,9 @@ logger = logging.getLogger(__name__) +EXTID_TYPE = "maven-jar" +EXTID_VERSION = 0 + @attr.s class MavenPackageInfo(BasePackageInfo): @@ -75,7 +78,7 @@ "time": str(self.time), } ) - return ("maven-jar", hashlib.sha256(manifest.encode()).digest()) + return (EXTID_TYPE, EXTID_VERSION, hashlib.sha256(manifest.encode()).digest()) @classmethod def from_metadata(cls, a_metadata: Dict[str, Any]) -> "MavenPackageInfo": diff --git a/swh/loader/package/maven/tests/test_maven.py b/swh/loader/package/maven/tests/test_maven.py --- a/swh/loader/package/maven/tests/test_maven.py +++ b/swh/loader/package/maven/tests/test_maven.py @@ -558,7 +558,11 @@ (None, "{gid} {aid} {version} {url} {time}".format(**metadata).encode()), ]: actual_id = p_info.extid(manifest_format=manifest_format) - assert actual_id == ("maven-jar", hashlib.sha256(expected_manifest).digest(),) + assert actual_id == ( + "maven-jar", + 0, + hashlib.sha256(expected_manifest).digest(), + ) with pytest.raises(KeyError): p_info.extid(manifest_format=string.Template("$a $unknown_key")) diff --git a/swh/loader/package/nixguix/loader.py b/swh/loader/package/nixguix/loader.py --- a/swh/loader/package/nixguix/loader.py +++ b/swh/loader/package/nixguix/loader.py @@ -34,6 +34,8 @@ """The ExtID is an ASCII string, as defined by https://w3c.github.io/webappsec-subresource-integrity/""" +EXTID_VERSION = 0 + @attr.s class NixGuixPackageInfo(BasePackageInfo): @@ -56,7 +58,7 @@ ) def extid(self) -> PartialExtID: - return (EXTID_TYPE, self.integrity.encode("ascii")) + return (EXTID_TYPE, EXTID_VERSION, self.integrity.encode("ascii")) class NixGuixLoader(PackageLoader[NixGuixPackageInfo]): diff --git a/swh/loader/package/npm/loader.py b/swh/loader/package/npm/loader.py --- a/swh/loader/package/npm/loader.py +++ b/swh/loader/package/npm/loader.py @@ -39,6 +39,7 @@ EXTID_TYPE = "npm-archive-sha1" +EXTID_VERSION = 0 @attr.s @@ -84,7 +85,7 @@ ) def extid(self) -> PartialExtID: - return (EXTID_TYPE, hash_to_bytes(self.shasum)) + return (EXTID_TYPE, EXTID_VERSION, hash_to_bytes(self.shasum)) class NpmLoader(PackageLoader[NpmPackageInfo]): diff --git a/swh/loader/package/pypi/loader.py b/swh/loader/package/pypi/loader.py --- a/swh/loader/package/pypi/loader.py +++ b/swh/loader/package/pypi/loader.py @@ -35,6 +35,7 @@ EXTID_TYPE = "pypi-archive-sha256" +EXTID_VERSION = 0 @attr.s @@ -67,7 +68,7 @@ ) def extid(self) -> PartialExtID: - return (EXTID_TYPE, hash_to_bytes(self.sha256)) + return (EXTID_TYPE, EXTID_VERSION, hash_to_bytes(self.sha256)) class PyPILoader(PackageLoader[PyPIPackageInfo]): diff --git a/swh/loader/package/tests/test_loader.py b/swh/loader/package/tests/test_loader.py --- a/swh/loader/package/tests/test_loader.py +++ b/swh/loader/package/tests/test_loader.py @@ -67,7 +67,7 @@ patch.object( p_info, "extid", - return_value=(extid_type, f"extid-of-{version}".encode()), + return_value=(extid_type, 0, f"extid-of-{version}".encode()), autospec=True, ).start() yield (f"branch-{version}", p_info) @@ -100,7 +100,7 @@ # The PackageInfo does not support extids p_info.extid.return_value = None known_extids = { - ("extid-type", b"extid-of-aaaa"): [ + ("extid-type", 0, b"extid-of-aaaa"): [ CoreSWHID(object_type=ObjectType.RELEASE, object_id=b"a" * 20), ] } @@ -108,12 +108,12 @@ assert loader.resolve_object_from_extids(known_extids, p_info, whitelist) is None # Some known extid, and the PackageInfo is not one of them (ie. cache miss) - p_info.extid.return_value = ("extid-type", b"extid-of-cccc") + p_info.extid.return_value = ("extid-type", 0, b"extid-of-cccc") assert loader.resolve_object_from_extids(known_extids, p_info, whitelist) is None # Some known extid, and the PackageInfo is one of them (ie. cache hit), # but the target release was not in the previous snapshot - p_info.extid.return_value = ("extid-type", b"extid-of-aaaa") + p_info.extid.return_value = ("extid-type", 0, b"extid-of-aaaa") assert loader.resolve_object_from_extids(known_extids, p_info, whitelist) is None # Some known extid, and the PackageInfo is one of them (ie. cache hit), @@ -127,7 +127,7 @@ # release whitelist = {b"a" * 20} known_extids = { - ("extid-type", b"extid-of-aaaa"): [ + ("extid-type", 0, b"extid-of-aaaa"): [ CoreSWHID(object_type=ObjectType.RELEASE, object_id=b"b" * 20), CoreSWHID(object_type=ObjectType.RELEASE, object_id=b"a" * 20), ] @@ -148,8 +148,8 @@ # Calls should be grouped by extid type storage.extid_get_from_extid.assert_has_calls( [ - call("extid-type1", [b"extid-of-v1.0", b"extid-of-v2.0"]), - call("extid-type2", [b"extid-of-v3.0", b"extid-of-v4.0"]), + call("extid-type1", [b"extid-of-v1.0", b"extid-of-v2.0"], version=0), + call("extid-type2", [b"extid-of-v3.0", b"extid-of-v4.0"], version=0), ], any_order=True, ) @@ -319,8 +319,8 @@ # Results of a previous load storage.extid_add( [ - ExtID("extid-type1", b"extid-of-v1.0", rev1_swhid), - ExtID("extid-type1", b"extid-of-v2.0", rev2_swhid), + ExtID("extid-type1", b"extid-of-v1.0", rev1_swhid, 0), + ExtID("extid-type1", b"extid-of-v2.0", rev2_swhid, 0), ] ) storage.revision_add([rev1]) @@ -431,7 +431,7 @@ ) actual_id = p_info.extid() - assert actual_id == ("package-manifest-sha256", hashlib.sha256(b"1 2").digest()) + assert actual_id == ("package-manifest-sha256", 0, hashlib.sha256(b"1 2").digest()) def test_no_env_swh_config_filename_raise(monkeypatch):