diff --git a/swh/vault/cookers/git_bare.py b/swh/vault/cookers/git_bare.py --- a/swh/vault/cookers/git_bare.py +++ b/swh/vault/cookers/git_bare.py @@ -30,6 +30,7 @@ from swh.model import identifiers from swh.model.hashutil import hash_to_bytehex, hash_to_hex from swh.model.model import ( + ObjectType, Person, Revision, RevisionType, @@ -42,6 +43,7 @@ from swh.vault.cookers.base import BaseVaultCooker from swh.vault.to_disk import HIDDEN_MESSAGE, SKIPPED_MESSAGE +RELEASE_BATCH_SIZE = 10000 REVISION_BATCH_SIZE = 10000 DIRECTORY_BATCH_SIZE = 10000 CONTENT_BATCH_SIZE = 100 @@ -83,6 +85,7 @@ def prepare_bundle(self): # Objects we will visit soon: + self._rel_stack: List[Sha1Git] = [] self._rev_stack: List[Sha1Git] = [] self._dir_stack: List[Sha1Git] = [] self._cnt_stack: List[Sha1Git] = [] @@ -241,8 +244,14 @@ f"GitBareCooker.queue_subgraph({obj_type!r}, ...)" ) + ######################## + # Orchestration + def load_objects(self) -> None: - while self._rev_stack or self._dir_stack or self._cnt_stack: + while self._rel_stack or self._rev_stack or self._dir_stack or self._cnt_stack: + release_ids = self._pop(self._rel_stack, RELEASE_BATCH_SIZE) + self.push_releases_subgraphs(release_ids) + revision_ids = self._pop(self._rev_stack, REVISION_BATCH_SIZE) self.load_revisions(revision_ids) @@ -307,6 +316,8 @@ if not loaded_from_graph: if branch.target_type == TargetType.REVISION: self.push_revision_subgraph(branch.target) + elif branch.target_type == TargetType.RELEASE: + self.push_releases_subgraphs([branch.target]) elif branch.target_type == TargetType.ALIAS: # Nothing to do, this for loop also iterates on the target branch # (if it exists) @@ -329,6 +340,24 @@ git_object = identifiers.revision_git_object(revision) return self.write_object(revision["id"], git_object) + def push_releases_subgraphs(self, obj_ids: List[Sha1Git]) -> None: + """Given a list of release ids, loads these releases and adds their + target to the list of objects to visit""" + releases = self.storage.release_get(obj_ids) + revision_ids: List[Sha1Git] = [] + for release in releases: + self.write_release_node(release.to_dict()) + if release.target_type == ObjectType.REVISION: + self.push_revision_subgraph(release.target) + else: + raise NotImplementedError(f"{release.target_type} release targets") + self._push(self._rev_stack, revision_ids) + + def write_release_node(self, release: Dict[str, Any]) -> bool: + """Writes a release object to disk""" + git_object = identifiers.release_git_object(release) + return self.write_object(release["id"], git_object) + def load_directories(self, obj_ids: List[Sha1Git]) -> None: for obj_id in obj_ids: self.load_directory(obj_id) diff --git a/swh/vault/tests/test_cookers.py b/swh/vault/tests/test_cookers.py --- a/swh/vault/tests/test_cookers.py +++ b/swh/vault/tests/test_cookers.py @@ -127,6 +127,15 @@ self.git_shell("reset", "--hard", "HEAD") return ret + def tag(self, name, target=b"HEAD", message=None): + dulwich.porcelain.tag_create( + self.repo, + name, + message=message, + annotated=message is not None, + objectish=target, + ) + def merge(self, parent_sha_list, message="Merge branches."): self.git_shell( "merge", @@ -875,6 +884,72 @@ ert.checkout(b"HEAD") assert (p / "file").stat().st_mode == 0o100644 + def load_repo_tags(self, git_loader): + # v-- t2 + # + # 1---2----5 <-- master, t5, and t5a (annotated) + # \ + # ----3----4 <-- t4a (annotated) + # + repo = TestRepo() + with repo as rp: + (rp / "file1").write_text(TEST_CONTENT) + repo.commit("Add file1") + + (rp / "file2").write_text(TEST_CONTENT) + repo.commit("Add file2") # create c2 + + repo.tag(b"t2") + + (rp / "file3").write_text(TEST_CONTENT) + repo.commit("add file3") + + (rp / "file4").write_text(TEST_CONTENT) + repo.commit("add file4") + + repo.tag(b"t4a", message=b"tag 4") + + # Go back to c2 + repo.git_shell("reset", "--hard", "HEAD^^") + + (rp / "file5").write_text(TEST_CONTENT) + repo.commit("add file5") # create c5 + + repo.tag(b"t5") + repo.tag(b"t5a", message=b"tag 5") + + obj_id_hex = repo.repo.refs[b"HEAD"].decode() + obj_id = hashutil.hash_to_bytes(obj_id_hex) + loader = git_loader(str(rp)) + loader.load() + return (loader, obj_id) + + def check_snapshot_tags(self, ert, p, obj_id): + assert ( + hashutil.hash_to_bytehex(obj_id) + == ert.repo.refs[b"HEAD"] + == ert.repo.refs[b"refs/heads/master"] + == ert.repo.refs[b"refs/remotes/origin/HEAD"] + == ert.repo.refs[b"refs/remotes/origin/master"] + == ert.repo.refs[b"refs/tags/t5"] + ) + + c2_id = ert.repo.refs[b"refs/tags/t2"] + c5_id = hashutil.hash_to_bytehex(obj_id) + + assert ert.repo[c5_id].parents == [c2_id] + + t5a = ert.repo[ert.repo.refs[b"refs/tags/t5a"]] + assert t5a.message == b"tag 5" + assert t5a.object == (dulwich.objects.Commit, c5_id) + + t4a = ert.repo[ert.repo.refs[b"refs/tags/t4a"]] + (_, c4_id) = t4a.object + assert ert.repo[c4_id].message == b"add file4\n" + (c3_id,) = ert.repo[c4_id].parents + assert ert.repo[c3_id].message == b"add file3\n" + assert ert.repo[c3_id].parents == [c2_id] + class TestRevisionCooker(RepoFixtures): def test_revision_simple(self, git_loader, cook_extract_revision): @@ -979,3 +1054,9 @@ snp_id = loader.loaded_snapshot_id with cook_extract_snapshot(loader.storage, snp_id) as (ert, p): self.check_revision_filtered_objects(ert, p, main_rev_id) + + def test_snapshot_tags(self, git_loader, cook_extract_snapshot): + (loader, main_rev_id) = self.load_repo_tags(git_loader) + snp_id = loader.loaded_snapshot_id + with cook_extract_snapshot(loader.storage, snp_id) as (ert, p): + self.check_snapshot_tags(ert, p, main_rev_id)