diff --git a/swh/loader/package/loader.py b/swh/loader/package/loader.py --- a/swh/loader/package/loader.py +++ b/swh/loader/package/loader.py @@ -326,9 +326,9 @@ for extid_target in extid_targets if extid_target.object_type == ObjectType.RELEASE } - if release_extid_targets: - assert len(release_extid_targets) == 1, release_extid_targets - return list(release_extid_targets)[0] + extid_target2 = self.select_extid_target(p_info, release_extid_targets) + if extid_target2: + return extid_target2 # If there is no release extid (ie. if the package was only loaded with # older versions of this loader, which produced revision objects instead @@ -341,6 +341,23 @@ # No target found (this is probably a new package version) return None + def select_extid_target( + self, p_info: TPackageInfo, extid_targets: Set[CoreSWHID] + ) -> Optional[CoreSWHID]: + """Given a list of release extid targets, choses one appropriate for the + given package info. + + Package loaders shyould implement this if their ExtIDs may map to multiple + releases, so they can fetch releases from the storage and inspect their fields + to select the right one for this ``p_info``. + """ + if extid_targets: + # The base package loader does not have the domain-specific knowledge + # to select the right release -> crash if there is more than one. + assert len(extid_targets) == 1, extid_targets + return list(extid_targets)[0] + return None + def download_package( self, p_info: TPackageInfo, tmpdir: str ) -> List[Tuple[str, Mapping]]: diff --git a/swh/loader/package/nixguix/loader.py b/swh/loader/package/nixguix/loader.py --- a/swh/loader/package/nixguix/loader.py +++ b/swh/loader/package/nixguix/loader.py @@ -7,7 +7,7 @@ import json import logging import re -from typing import Any, Dict, Iterator, List, Mapping, Optional, Tuple +from typing import Any, Dict, Iterator, List, Mapping, Optional, Set, Tuple import attr @@ -26,6 +26,7 @@ Release, Sha1Git, ) +from swh.model.swhids import CoreSWHID from swh.storage.interface import StorageInterface logger = logging.getLogger(__name__) @@ -130,6 +131,24 @@ ) yield url, p_info + def select_extid_target( + self, p_info: NixGuixPackageInfo, extid_targets: Set[CoreSWHID] + ) -> Optional[CoreSWHID]: + if extid_targets: + # The archive URL is part of the release name. As that URL is not + # intrinsic metadata, it means different releases may be created for + # the same SRI so they have the same extid. + # Therefore, we need to pick the one with the right URL. + releases = self.storage.release_get( + [target.object_id for target in extid_targets] + ) + extid_targets = { + release.swhid() + for release in releases + if release is not None and release.name == p_info.version.encode() + } + return super().select_extid_target(p_info, extid_targets) + def extra_branches(self) -> Dict[bytes, Mapping[str, Any]]: """We add a branch to the snapshot called 'evaluation' pointing to the revision used to generate the sources.json file. This revision diff --git a/swh/loader/package/nixguix/tests/data/https_github.com/owner-3_repository-1_revision-1.tgz b/swh/loader/package/nixguix/tests/data/https_github.com/owner-3_repository-1_revision-1.tgz new file mode 120000 --- /dev/null +++ b/swh/loader/package/nixguix/tests/data/https_github.com/owner-3_repository-1_revision-1.tgz @@ -0,0 +1 @@ +owner-1_repository-1_revision-1.tgz \ No newline at end of file diff --git a/swh/loader/package/nixguix/tests/data/https_nix-community.github.io/nixpkgs-swh_sources.json b/swh/loader/package/nixguix/tests/data/https_nix-community.github.io/nixpkgs-swh_sources.json --- a/swh/loader/package/nixguix/tests/data/https_nix-community.github.io/nixpkgs-swh_sources.json +++ b/swh/loader/package/nixguix/tests/data/https_nix-community.github.io/nixpkgs-swh_sources.json @@ -5,6 +5,11 @@ "urls": [ "https://github.com/owner-1/repository-1/revision-1.tgz" ], "integrity": "sha256-3vm2Nt+O4zHf3Ovd/qsv1gKTEUwodX9FLxlrQdry0zs=" }, + { + "type": "url", + "urls": [ "https://github.com/owner-3/repository-1/revision-1.tgz" ], + "integrity": "sha256-3vm2Nt+O4zHf3Ovd/qsv1gKTEUwodX9FLxlrQdry0zs=" + }, { "type": "url", "urls": [ "https://example.com/file.txt" ], diff --git a/swh/loader/package/nixguix/tests/test_nixguix.py b/swh/loader/package/nixguix/tests/test_nixguix.py --- a/swh/loader/package/nixguix/tests/test_nixguix.py +++ b/swh/loader/package/nixguix/tests/test_nixguix.py @@ -57,7 +57,7 @@ SNAPSHOT1 = Snapshot( - id=hash_to_bytes("efe5145f85af3fc87f34102d8b8481cd5198f4f8"), + id=hash_to_bytes("fafcfe32016d018bd892114fce211f37a36a092a"), branches={ b"evaluation": SnapshotBranch( target=hash_to_bytes("cc4e04c26672dd74e5fd0fecb78b435fb55368f7"), @@ -67,6 +67,10 @@ target=hash_to_bytes("df7811b9644ed8ef088e2e7add62ed32b0bab15f"), target_type=TargetType.RELEASE, ), + b"https://github.com/owner-3/repository-1/revision-1.tgz": SnapshotBranch( + target=hash_to_bytes("dc7dc10a664396d5c88adc56352904db231bde14"), + target_type=TargetType.RELEASE, + ), }, ) @@ -99,7 +103,7 @@ def test_retrieve_sources(swh_storage, requests_mock_datadir): j = parse_sources(retrieve_sources(sources_url)) assert "sources" in j.keys() - assert len(j["sources"]) == 2 + assert len(j["sources"]) == 3 def test_nixguix_url_not_found(swh_storage, requests_mock_datadir): @@ -306,7 +310,7 @@ "directory": 3, "origin": 1, "origin_visit": 1, - "release": 1, + "release": 2, "revision": 0, "skipped_content": 0, "snapshot": 1, @@ -429,7 +433,7 @@ "directory": 3, "origin": 1, "origin_visit": 1, - "release": 1, + "release": 2, "revision": 0, "skipped_content": 0, "snapshot": 1, @@ -480,7 +484,7 @@ "directory": 5, "origin": 1, "origin_visit": 2, - "release": 2, + "release": 3, "revision": 0, "skipped_content": 0, "snapshot": 2, @@ -571,7 +575,7 @@ check_snapshot(SNAPSHOT1, storage=swh_storage) - assert len(mock_download.mock_calls) == 2 + assert len(mock_download.mock_calls) == 3 def test_load_nixguix_one_common_artifact_from_other_loader(