diff --git a/swh/vault/cookers/git_bare.py b/swh/vault/cookers/git_bare.py --- a/swh/vault/cookers/git_bare.py +++ b/swh/vault/cookers/git_bare.py @@ -303,7 +303,41 @@ loaded_from_graph = False if self.graph: - pass # TODO + revision_ids = [] + release_ids = [] + + from swh.graph.client import GraphArgumentException + + # First, try to cook using swh-graph, as it is more efficient than + # swh-storage for querying the history + obj_swhid = identifiers.CoreSWHID( + object_type=identifiers.ObjectType.SNAPSHOT, object_id=obj_id, + ) + try: + swhids = map( + identifiers.CoreSWHID.from_string, + self.graph.visit_nodes(str(obj_swhid), edges="snp:*,rel:*,rev:rev"), + ) + for swhid in swhids: + if swhid.object_type == identifiers.ObjectType.REVISION: + revision_ids.append(swhid.object_id) + elif swhid.object_type == identifiers.ObjectType.RELEASE: + release_ids.append(swhid.object_id) + elif swhid.object_type == identifiers.ObjectType.SNAPSHOT: + assert ( + swhid.object_id == obj_id + ), f"Snapshot {obj_id.hex()} references a different snapshot" + else: + raise NotImplementedError( + f"{swhid.object_type} objects in snapshot subgraphs." + ) + except GraphArgumentException: + # Revision not found in the graph + pass + else: + self._push(self._rev_stack, revision_ids) + self._push(self._rel_stack, release_ids) + loaded_from_graph = True # TODO: when self.graph is available and supports edge labels, use it # directly to get branch names. diff --git a/swh/vault/tests/test_git_bare_cooker.py b/swh/vault/tests/test_git_bare_cooker.py --- a/swh/vault/tests/test_git_bare_cooker.py +++ b/swh/vault/tests/test_git_bare_cooker.py @@ -10,7 +10,9 @@ """ import datetime +import glob import io +import itertools import subprocess import tarfile import tempfile @@ -23,19 +25,27 @@ Content, Directory, DirectoryEntry, + ObjectType, Person, + Release, Revision, RevisionType, + Snapshot, + SnapshotBranch, + TargetType, TimestampWithTimezone, ) from swh.vault.cookers.git_bare import GitBareCooker from swh.vault.in_memory_backend import InMemoryVaultBackend -def get_objects(last_revision_in_graph): +def get_objects(last_revision_in_graph, release): """ Build objects:: + rel2 + | + v rev1 <------ rev2 | | v v @@ -98,42 +108,71 @@ synthetic=True, ) + rel2 = Release( + name=b"1.0.0", + message=b"tag2", + target_type=ObjectType.REVISION, + target=rev2.id, + synthetic=True, + ) + + branches = {} + if release: + branches[b"refs/tags/1.0.0"] = SnapshotBranch( + target=rel2.id, target_type=TargetType.RELEASE + ) + else: + branches[b"refs/heads/master"] = SnapshotBranch( + target=rev2.id, target_type=TargetType.REVISION + ) + snp = Snapshot(branches=branches) + if last_revision_in_graph: - nodes = [str(n.swhid()) for n in [cnt1, cnt2, dir1, dir2, rev1, rev2]] + nodes = [str(n.swhid()) for n in [cnt1, cnt2, dir1, dir2, rev1, rev2, snp]] edges = [ - (str(s.swhid()), str(d.swhid())) - for (s, d) in [ - (dir1, cnt1), - (dir2, cnt1), - (dir2, cnt2), - (rev1, dir1), - (rev2, dir2), - (rev2, rev1), - ] + (dir1, cnt1), + (dir2, cnt1), + (dir2, cnt2), + (rev1, dir1), + (rev2, dir2), + (rev2, rev1), + (snp, rev2), ] + if release: + nodes.append(rel2) + edges.append((rel2, rev2)) + edges.append((snp, rel2)) else: nodes = [str(n.swhid()) for n in [cnt1, cnt2, dir1, dir2, rev1]] edges = [ - (str(s.swhid()), str(d.swhid())) - for (s, d) in [(dir1, cnt1), (dir2, cnt1), (dir2, cnt2), (rev1, dir1),] + (dir1, cnt1), + (dir2, cnt1), + (dir2, cnt2), + (rev1, dir1), ] - return (cnt1, cnt2, dir1, dir2, rev1, rev2, nodes, edges) + edges = [(str(s.swhid()), str(d.swhid())) for (s, d) in edges] + + return (cnt1, cnt2, dir1, dir2, rev1, rev2, rel2, snp, nodes, edges) @pytest.mark.graph -@pytest.mark.parametrize("last_revision_in_graph", [True, False]) -def test_graph_revisions(swh_storage, last_revision_in_graph): +@pytest.mark.parametrize( + "last_revision_in_graph,snapshot", itertools.product([True, False], [True, False]) +) +def test_graph_revisions(swh_storage, last_revision_in_graph, snapshot): from swh.graph.naive_client import NaiveClient as GraphClient - (cnt1, cnt2, dir1, dir2, rev1, rev2, nodes, edges) = get_objects( - last_revision_in_graph + (cnt1, cnt2, dir1, dir2, rev1, rev2, rel2, snp, nodes, edges) = get_objects( + last_revision_in_graph, release=False ) # Add all objects to storage swh_storage.content_add([cnt1, cnt2]) swh_storage.directory_add([dir1, dir2]) swh_storage.revision_add([rev1, rev2]) + swh_storage.release_add([rel2]) + swh_storage.snapshot_add([snp]) # Add spy on swh_storage, to make sure revision_log is not called # (the graph must be used instead) @@ -144,17 +183,19 @@ # Cook backend = InMemoryVaultBackend() + if snapshot: + cooker_name = "snapshot_gitbare" + cooked_id = snp.id + else: + cooker_name = "revision_gitbare" + cooked_id = rev2.id cooker = GitBareCooker( - "revision_gitbare", - rev2.id, - backend=backend, - storage=swh_storage, - graph=swh_graph, + cooker_name, cooked_id, backend=backend, storage=swh_storage, graph=swh_graph, ) cooker.cook() # Get bundle - bundle = backend.fetch("revision_gitbare", rev2.id) + bundle = backend.fetch(cooker_name, cooked_id) # Extract bundle and make sure both revisions are in it with tempfile.TemporaryDirectory("swh-vault-test-bare") as tempdir: @@ -165,7 +206,7 @@ [ "git", "-C", - f"{tempdir}/{rev2.swhid()}.git", + glob.glob(f"{tempdir}/*{cooked_id.hex()}.git")[0], "log", "--format=oneline", "--decorate=", @@ -175,7 +216,27 @@ assert output.decode() == f"{rev2.id.hex()} msg2\n{rev1.id.hex()} msg1\n" # Make sure the graph was used instead of swh_storage.revision_log - swh_graph.visit_nodes.assert_called_once_with(str(rev2.swhid()), edges="rev:rev") + if snapshot: + if last_revision_in_graph: + # The graph has everything, so the first call succeeds and returns + # all objects transitively pointed by the snapshot + swh_graph.visit_nodes.assert_has_calls( + [unittest.mock.call(str(snp.swhid()), edges="snp:*,rel:*,rev:rev"),] + ) + else: + # The graph does not have everything, so the first call returns nothing. + # However, the second call (on the top rev) succeeds and returns + # all objects but the rev and the rel + swh_graph.visit_nodes.assert_has_calls( + [ + unittest.mock.call(str(snp.swhid()), edges="snp:*,rel:*,rev:rev"), + unittest.mock.call(str(rev2.swhid()), edges="rev:rev"), + ] + ) + else: + swh_graph.visit_nodes.assert_has_calls( + [unittest.mock.call(str(rev2.swhid()), edges="rev:rev")] + ) if last_revision_in_graph: swh_storage.revision_log.assert_not_called() swh_storage.revision_shortlog.assert_not_called()