diff --git a/swh/vault/cookers/git_bare.py b/swh/vault/cookers/git_bare.py --- a/swh/vault/cookers/git_bare.py +++ b/swh/vault/cookers/git_bare.py @@ -303,7 +303,41 @@ loaded_from_graph = False if self.graph: - pass # TODO + revision_ids = [] + release_ids = [] + + from swh.graph.client import GraphArgumentException + + # First, try to cook using swh-graph, as it is more efficient than + # swh-storage for querying the history + obj_swhid = identifiers.CoreSWHID( + object_type=identifiers.ObjectType.SNAPSHOT, object_id=obj_id, + ) + try: + swhids = map( + identifiers.CoreSWHID.from_string, + self.graph.visit_nodes(str(obj_swhid), edges="snp:*,rel:*,rev:rev"), + ) + for swhid in swhids: + if swhid.object_type == identifiers.ObjectType.REVISION: + revision_ids.append(swhid.object_id) + elif swhid.object_type == identifiers.ObjectType.RELEASE: + release_ids.append(swhid.object_id) + elif swhid.object_type == identifiers.ObjectType.SNAPSHOT: + assert ( + swhid.object_id == obj_id + ), f"Snapshot {obj_id.hex()} references a different snapshot" + else: + raise NotImplementedError( + f"{swhid.object_type} objects in snapshot subgraphs." + ) + except GraphArgumentException: + # Revision not found in the graph + pass + else: + self._push(self._rev_stack, revision_ids) + self._push(self._rel_stack, release_ids) + loaded_from_graph = True # TODO: when self.graph is available and supports edge labels, use it # directly to get branch names. diff --git a/swh/vault/tests/test_git_bare_cooker.py b/swh/vault/tests/test_git_bare_cooker.py --- a/swh/vault/tests/test_git_bare_cooker.py +++ b/swh/vault/tests/test_git_bare_cooker.py @@ -10,7 +10,9 @@ """ import datetime +import glob import io +import itertools import subprocess import tarfile import tempfile @@ -23,19 +25,27 @@ Content, Directory, DirectoryEntry, + ObjectType, Person, + Release, Revision, RevisionType, + Snapshot, + SnapshotBranch, + TargetType, TimestampWithTimezone, ) from swh.vault.cookers.git_bare import GitBareCooker from swh.vault.in_memory_backend import InMemoryVaultBackend -def get_objects(last_revision_in_graph): +def get_objects(up_to_date_graph, release): """ Build objects:: + rel2 + | + v rev1 <------ rev2 | | v v @@ -43,6 +53,9 @@ | / | v / v cnt1 <----° cnt2 + + If up_to_date_graph is true, then swh-graph contains all objects. + Else, rev2, rel2, and the snapshot are missing from the graph. """ date = TimestampWithTimezone.from_datetime( datetime.datetime(2021, 5, 7, 8, 43, 59, tzinfo=datetime.timezone.utc) @@ -98,42 +111,75 @@ synthetic=True, ) - if last_revision_in_graph: - nodes = [str(n.swhid()) for n in [cnt1, cnt2, dir1, dir2, rev1, rev2]] + rel2 = Release( + name=b"1.0.0", + message=b"tag2", + target_type=ObjectType.REVISION, + target=rev2.id, + synthetic=True, + ) + + branches = { + b"refs/heads/master": SnapshotBranch( + target=rev2.id, target_type=TargetType.REVISION + ) + } + if release: + branches[b"refs/tags/1.0.0"] = SnapshotBranch( + target=rel2.id, target_type=TargetType.RELEASE + ) + snp = Snapshot(branches=branches) + + if up_to_date_graph: + nodes = [str(n.swhid()) for n in [cnt1, cnt2, dir1, dir2, rev1, rev2, snp]] edges = [ - (str(s.swhid()), str(d.swhid())) - for (s, d) in [ - (dir1, cnt1), - (dir2, cnt1), - (dir2, cnt2), - (rev1, dir1), - (rev2, dir2), - (rev2, rev1), - ] + (dir1, cnt1), + (dir2, cnt1), + (dir2, cnt2), + (rev1, dir1), + (rev2, dir2), + (rev2, rev1), + (snp, rev2), ] + if release: + nodes.append(str(rel2.swhid())) + edges.append((rel2, rev2)) + edges.append((snp, rel2)) else: nodes = [str(n.swhid()) for n in [cnt1, cnt2, dir1, dir2, rev1]] + if release: + nodes.append(str(rel2.swhid())) edges = [ - (str(s.swhid()), str(d.swhid())) - for (s, d) in [(dir1, cnt1), (dir2, cnt1), (dir2, cnt2), (rev1, dir1),] + (dir1, cnt1), + (dir2, cnt1), + (dir2, cnt2), + (rev1, dir1), ] - return (cnt1, cnt2, dir1, dir2, rev1, rev2, nodes, edges) + edges = [(str(s.swhid()), str(d.swhid())) for (s, d) in edges] + + return (cnt1, cnt2, dir1, dir2, rev1, rev2, rel2, snp, nodes, edges) @pytest.mark.graph -@pytest.mark.parametrize("last_revision_in_graph", [True, False]) -def test_graph_revisions(swh_storage, last_revision_in_graph): +@pytest.mark.parametrize( + "snapshot,up_to_date_graph,release", + list(itertools.product([False], [True, False], [False])) # no snp implies no rel + + list(itertools.product([True], [True, False], [True, False])), +) +def test_graph_revisions(swh_storage, up_to_date_graph, snapshot, release): from swh.graph.naive_client import NaiveClient as GraphClient - (cnt1, cnt2, dir1, dir2, rev1, rev2, nodes, edges) = get_objects( - last_revision_in_graph + (cnt1, cnt2, dir1, dir2, rev1, rev2, rel2, snp, nodes, edges) = get_objects( + up_to_date_graph, release=release, ) # Add all objects to storage swh_storage.content_add([cnt1, cnt2]) swh_storage.directory_add([dir1, dir2]) swh_storage.revision_add([rev1, rev2]) + swh_storage.release_add([rel2]) + swh_storage.snapshot_add([snp]) # Add spy on swh_storage, to make sure revision_log is not called # (the graph must be used instead) @@ -144,17 +190,19 @@ # Cook backend = InMemoryVaultBackend() + if snapshot: + cooker_name = "snapshot_gitbare" + cooked_id = snp.id + else: + cooker_name = "revision_gitbare" + cooked_id = rev2.id cooker = GitBareCooker( - "revision_gitbare", - rev2.id, - backend=backend, - storage=swh_storage, - graph=swh_graph, + cooker_name, cooked_id, backend=backend, storage=swh_storage, graph=swh_graph, ) cooker.cook() # Get bundle - bundle = backend.fetch("revision_gitbare", rev2.id) + bundle = backend.fetch(cooker_name, cooked_id) # Extract bundle and make sure both revisions are in it with tempfile.TemporaryDirectory("swh-vault-test-bare") as tempdir: @@ -165,7 +213,7 @@ [ "git", "-C", - f"{tempdir}/{rev2.swhid()}.git", + glob.glob(f"{tempdir}/*{cooked_id.hex()}.git")[0], "log", "--format=oneline", "--decorate=", @@ -175,8 +223,28 @@ assert output.decode() == f"{rev2.id.hex()} msg2\n{rev1.id.hex()} msg1\n" # Make sure the graph was used instead of swh_storage.revision_log - swh_graph.visit_nodes.assert_called_once_with(str(rev2.swhid()), edges="rev:rev") - if last_revision_in_graph: + if snapshot: + if up_to_date_graph: + # The graph has everything, so the first call succeeds and returns + # all objects transitively pointed by the snapshot + swh_graph.visit_nodes.assert_has_calls( + [unittest.mock.call(str(snp.swhid()), edges="snp:*,rel:*,rev:rev"),] + ) + else: + # The graph does not have everything, so the first call returns nothing. + # However, the second call (on the top rev) succeeds and returns + # all objects but the rev and the rel + swh_graph.visit_nodes.assert_has_calls( + [ + unittest.mock.call(str(snp.swhid()), edges="snp:*,rel:*,rev:rev"), + unittest.mock.call(str(rev2.swhid()), edges="rev:rev"), + ] + ) + else: + swh_graph.visit_nodes.assert_has_calls( + [unittest.mock.call(str(rev2.swhid()), edges="rev:rev")] + ) + if up_to_date_graph: swh_storage.revision_log.assert_not_called() swh_storage.revision_shortlog.assert_not_called() else: