diff --git a/swh/loader/package/functional/loader.py b/swh/loader/package/functional/loader.py --- a/swh/loader/package/functional/loader.py +++ b/swh/loader/package/functional/loader.py @@ -6,7 +6,9 @@ import json import requests -from typing import Dict, Optional, Any +from typing import Dict, Optional, Any, Mapping + +from swh.model import hashutil from swh.model.model import ( Sha1Git, Revision, RevisionType @@ -36,8 +38,13 @@ def __init__(self, url): super().__init__(url=url) - self.sources = retrieve_sources(url)['sources'] + s = retrieve_sources(url) + self.sources = s['sources'] self.provider_url = url + # The revision used to create the sources.json file. For Nix, + # this revision belongs to the github.com/nixos/nixpkgs + # repository + self.revision = s['revision'] # Note: this could be renamed get_artifacts in the PackageLoader # base class. @@ -63,6 +70,29 @@ return rev_id return None + def extra_branches(self) -> Dict[bytes, Mapping[str, Any]]: + """We add a branch to the snapshot called 'evaluation' pointing to the + revision used to generate the sources.json file. This revision + is specified in the sources.json file itself. For the nixpkgs + origin, this revision is coming from the + github.com/nixos/nixpkgs repository. + + Note this repository is not loaded explicitly. So, this + pointer can target a nonexistent revision for a time. However, + the github and gnu loaders are supposed to load this revision + and should create the revision pointed by this branch. + + This branch can be used to identify the snapshot associated to + a Nix/Guix evaluation. + + """ + return { + b'evaluation': { + 'target_type': 'revision', + 'target': hashutil.hash_to_bytes(self.revision) + } + } + def build_revision(self, a_metadata: Dict, uncompressed_path: str, directory: Sha1Git) -> Optional[Revision]: return Revision( diff --git a/swh/loader/package/functional/tests/data/https_nix-community.github.io/nixpkgs-swh_sources.json b/swh/loader/package/functional/tests/data/https_nix-community.github.io/nixpkgs-swh_sources.json --- a/swh/loader/package/functional/tests/data/https_nix-community.github.io/nixpkgs-swh_sources.json +++ b/swh/loader/package/functional/tests/data/https_nix-community.github.io/nixpkgs-swh_sources.json @@ -9,5 +9,6 @@ "url": [ "https://example.com/file.txt" ] } ], - "version": 1 + "version": 1, + "revision": "cc4e04c26672dd74e5fd0fecb78b435fb55368f7" } diff --git a/swh/loader/package/functional/tests/data/https_nix-community.github.io/nixpkgs-swh_sources.json_visit1 b/swh/loader/package/functional/tests/data/https_nix-community.github.io/nixpkgs-swh_sources.json_visit1 --- a/swh/loader/package/functional/tests/data/https_nix-community.github.io/nixpkgs-swh_sources.json_visit1 +++ b/swh/loader/package/functional/tests/data/https_nix-community.github.io/nixpkgs-swh_sources.json_visit1 @@ -13,5 +13,6 @@ "url": [ "https://example.com/file.txt" ] } ], - "version": 1 + "version": 1, + "revision": "602140776b2ce6c9159bcf52ada73a297c063d5e" } diff --git a/swh/loader/package/functional/tests/test_functional.py b/swh/loader/package/functional/tests/test_functional.py --- a/swh/loader/package/functional/tests/test_functional.py +++ b/swh/loader/package/functional/tests/test_functional.py @@ -90,12 +90,16 @@ loader = FunctionalLoader(sources_url) loader.load() - expected_snapshot_id = '2c7f01ef3115f7999a013979fa27bfa12dcb63eb' + expected_snapshot_id = '0c5881c74283793ebe9a09a105a9381e41380383' assert load_status == { 'status': 'eventful', 'snapshot_id': expected_snapshot_id } expected_branches = { + 'evaluation': { + 'target': 'cc4e04c26672dd74e5fd0fecb78b435fb55368f7', + 'target_type': 'revision' + }, 'https://github.com/owner-1/repository-1/revision-1.tgz': { 'target': '488ad4e7b8e2511258725063cf43a2b897c503b4', 'target_type': 'revision' @@ -128,13 +132,17 @@ """ loader = FunctionalLoader(sources_url) load_status = loader.load() - expected_snapshot_id = '2c7f01ef3115f7999a013979fa27bfa12dcb63eb' + expected_snapshot_id = '0c5881c74283793ebe9a09a105a9381e41380383' assert load_status == { 'status': 'eventful', 'snapshot_id': expected_snapshot_id } expected_branches = { + 'evaluation': { + 'target': 'cc4e04c26672dd74e5fd0fecb78b435fb55368f7', + 'target_type': 'revision' + }, 'https://github.com/owner-1/repository-1/revision-1.tgz': { 'target': '488ad4e7b8e2511258725063cf43a2b897c503b4', 'target_type': 'revision' @@ -162,7 +170,7 @@ loader = FunctionalLoader(sources_url) load_status = loader.load() - expected_snapshot_id = '9c4fbfd991b35c7de876cd66bcda2967a8f476ac' + expected_snapshot_id = 'b0bfa75cbd0cc90aac3b9e95fb0f59c731176d97' assert load_status == { 'status': 'eventful', 'snapshot_id': expected_snapshot_id @@ -172,6 +180,10 @@ # second time an url, because of the requests_mock_datadir_visits # fixture, the file has to end with `_visit1`. expected_branches = { + 'evaluation': { + 'target': '602140776b2ce6c9159bcf52ada73a297c063d5e', + 'target_type': 'revision' + }, 'https://github.com/owner-1/repository-1/revision-1.tgz': { 'target': '488ad4e7b8e2511258725063cf43a2b897c503b4', 'target_type': 'revision' @@ -214,3 +226,27 @@ assert loader.resolve_revision_from(known_artifacts, metadata) == 'id1' metadata = {'url': 'url3'} assert loader.resolve_revision_from(known_artifacts, metadata) == None # noqa + + +def test_evaluation_branch(swh_config, requests_mock_datadir): + loader = FunctionalLoader(sources_url) + res = loader.load() + assert res['status'] == 'eventful' + + expected_branches = { + 'https://github.com/owner-1/repository-1/revision-1.tgz': { + 'target': '488ad4e7b8e2511258725063cf43a2b897c503b4', + 'target_type': 'revision', + }, + 'evaluation': { + 'target': 'cc4e04c26672dd74e5fd0fecb78b435fb55368f7', + 'target_type': 'revision', + }, + } + + expected_snapshot = { + 'id': '0c5881c74283793ebe9a09a105a9381e41380383', + 'branches': expected_branches, + } + + check_snapshot(expected_snapshot, storage=loader.storage) diff --git a/swh/loader/package/loader.py b/swh/loader/package/loader.py --- a/swh/loader/package/loader.py +++ b/swh/loader/package/loader.py @@ -210,6 +210,13 @@ uncompress(a_path, dest=uncompressed_path) return uncompressed_path + def extra_branches(self) -> Dict[bytes, Mapping[str, Any]]: + """Return an extra dict of branches that are used to update the set of + branches. + + """ + return {} + def load(self) -> Dict: """Load for a specific origin the associated contents. @@ -319,6 +326,13 @@ 'target': target, } + for name, target in self.extra_branches().items(): + if name in branches: + logger.error("Extra branch '%s' has been ignored", + name) + else: + branches[name] = target + snapshot_data = { 'branches': branches }