diff --git a/swh/vault/cookers/git_bare.py b/swh/vault/cookers/git_bare.py --- a/swh/vault/cookers/git_bare.py +++ b/swh/vault/cookers/git_bare.py @@ -328,6 +328,7 @@ if self.graph: revision_ids = [] release_ids = [] + directory_ids = [] from swh.graph.client import GraphArgumentException @@ -346,6 +347,8 @@ revision_ids.append(swhid.object_id) elif swhid.object_type == identifiers.ObjectType.RELEASE: release_ids.append(swhid.object_id) + elif swhid.object_type == identifiers.ObjectType.DIRECTORY: + directory_ids.append(swhid.object_id) elif swhid.object_type == identifiers.ObjectType.SNAPSHOT: assert ( swhid.object_id == obj_id @@ -364,6 +367,7 @@ else: self._push(self._rev_stack, revision_ids) self._push(self._rel_stack, release_ids) + self._push(self._dir_stack, directory_ids) loaded_from_graph = True # TODO: when self.graph is available and supports edge labels, use it @@ -382,6 +386,8 @@ # Nothing to do, this for loop also iterates on the target branch # (if it exists) pass + elif branch.target_type == TargetType.DIRECTORY: + self._push(self._dir_stack, [branch.target]) else: raise NotImplementedError(f"{branch.target_type} branches") diff --git a/swh/vault/tests/test_git_bare_cooker.py b/swh/vault/tests/test_git_bare_cooker.py --- a/swh/vault/tests/test_git_bare_cooker.py +++ b/swh/vault/tests/test_git_bare_cooker.py @@ -12,13 +12,13 @@ import datetime import glob import io -import itertools import subprocess import tarfile import tempfile import unittest.mock import pytest +from pytest import param from swh.model.from_disk import DentryPerms from swh.model.model import ( @@ -39,23 +39,23 @@ from swh.vault.in_memory_backend import InMemoryVaultBackend -def get_objects(up_to_date_graph, release): +def get_objects(up_to_date_graph, release, tree_ref): """ Build objects:: rel2 <------ snp - | / - v / - rev1 <------ rev2 <----° - | | - v v - dir1 dir2 - | / | - v / v - cnt1 <----° cnt2 + | / | + v / v + rev1 <------ rev2 <----° dir4 + | | | + v v v + dir1 dir2 dir3 + | / | | + v / v v + cnt1 <----° cnt2 cnt3 If up_to_date_graph is true, then swh-graph contains all objects. - Else, rev2, rel2, and the snapshot are missing from the graph. + Else, dir4, rev2, rel2, and snp are missing from the graph. """ date = TimestampWithTimezone.from_datetime( datetime.datetime(2021, 5, 7, 8, 43, 59, tzinfo=datetime.timezone.utc) @@ -63,6 +63,7 @@ author = Person.from_fullname(b"Foo ") cnt1 = Content.from_data(b"hello") cnt2 = Content.from_data(b"world") + cnt3 = Content.from_data(b"!") dir1 = Directory( entries=( DirectoryEntry( @@ -89,6 +90,26 @@ ), ) ) + dir3 = Directory( + entries=( + DirectoryEntry( + name=b"file3", + type="file", + perms=DentryPerms.content, + target=cnt3.sha1_git, + ), + ) + ) + dir4 = Directory( + entries=( + DirectoryEntry( + name=b"directory3", + type="dir", + perms=DentryPerms.directory, + target=dir3.id, + ), + ) + ) rev1 = Revision( message=b"msg1", date=date, @@ -122,16 +143,20 @@ branches = { b"refs/heads/master": SnapshotBranch( target=rev2.id, target_type=TargetType.REVISION - ) + ), } if release: branches[b"refs/tags/1.0.0"] = SnapshotBranch( target=rel2.id, target_type=TargetType.RELEASE ) + if tree_ref: + branches[b"refs/heads/tree-ref"] = SnapshotBranch( + target=dir4.id, target_type=TargetType.DIRECTORY + ) snp = Snapshot(branches=branches) if up_to_date_graph: - nodes = [str(n.swhid()) for n in [cnt1, cnt2, dir1, dir2, rev1, rev2, snp]] + nodes = [cnt1, cnt2, dir1, dir2, rev1, rev2, snp] edges = [ (dir1, cnt1), (dir2, cnt1), @@ -142,41 +167,60 @@ (snp, rev2), ] if release: - nodes.append(str(rel2.swhid())) + nodes.append(rel2) edges.append((rel2, rev2)) edges.append((snp, rel2)) + if tree_ref: + nodes.extend([cnt3, dir3, dir4]) + edges.extend( + [(dir3, cnt3), (dir4, dir3), (snp, dir4),] + ) else: - nodes = [str(n.swhid()) for n in [cnt1, cnt2, dir1, dir2, rev1]] - if release: - nodes.append(str(rel2.swhid())) + nodes = [cnt1, cnt2, cnt3, dir1, dir2, dir3, rev1] edges = [ (dir1, cnt1), (dir2, cnt1), (dir2, cnt2), + (dir3, cnt3), (rev1, dir1), ] + if release: + nodes.append(rel2) + if tree_ref: + nodes.extend([cnt3, dir3]) + edges.extend([(dir3, cnt3)]) + nodes = [str(n.swhid()) for n in nodes] edges = [(str(s.swhid()), str(d.swhid())) for (s, d) in edges] - return (cnt1, cnt2, dir1, dir2, rev1, rev2, rel2, snp, nodes, edges) + r = (cnt1, cnt2, cnt3, dir1, dir2, dir3, dir4, rev1, rev2, rel2, snp, nodes, edges) + return r @pytest.mark.graph @pytest.mark.parametrize( - "snapshot,up_to_date_graph,release", - list(itertools.product([False], [True, False], [False])) # no snp implies no rel - + list(itertools.product([True], [True, False], [True, False])), + "snapshot,up_to_date_graph,release,tree_ref", + [ + # 'no snp' imples no release or tree, because there can only be one root object. + param(False, False, False, False, id="no snp, outdated graph, no release/tree"), + param(False, True, False, False, id="no snp, updated graph, no release/tree"), + param(True, False, False, False, id="snp, outdated graph, no release/tree"), + param(True, True, False, False, id="snp, updated graph, no release/tree"), + param(True, False, True, False, id="snp, outdated graph, w/ release, no tree"), + param(True, True, True, False, id="snp, updated graph, w/ release, no tree"), + param(True, False, True, True, id="snp, outdated graph, w/ release and tree"), + param(True, True, True, True, id="snp, updated graph, w/ release and tree"), + ], ) -def test_graph_revisions(swh_storage, up_to_date_graph, snapshot, release): +def test_graph_revisions(swh_storage, up_to_date_graph, snapshot, release, tree_ref): from swh.graph.naive_client import NaiveClient as GraphClient - (cnt1, cnt2, dir1, dir2, rev1, rev2, rel2, snp, nodes, edges) = get_objects( - up_to_date_graph, release=release, - ) + r = get_objects(up_to_date_graph, release=release, tree_ref=tree_ref) + (cnt1, cnt2, cnt3, dir1, dir2, dir3, dir4, rev1, rev2, rel2, snp, nodes, edges) = r # Add all objects to storage - swh_storage.content_add([cnt1, cnt2]) - swh_storage.directory_add([dir1, dir2]) + swh_storage.content_add([cnt1, cnt2, cnt3]) + swh_storage.directory_add([dir1, dir2, dir3, dir4]) swh_storage.revision_add([rev1, rev2]) swh_storage.release_add([rel2]) swh_storage.snapshot_add([snp]) @@ -199,6 +243,11 @@ cooker = GitBareCooker( cooker_name, cooked_id, backend=backend, storage=swh_storage, graph=swh_graph, ) + + if tree_ref: + # git-fsck now rejects refs pointing to trees, but some old git repos have them. + cooker.use_fsck = False + cooker.cook() # Get bundle