Page MenuHomeSoftware Heritage

D6000.id21659.diff
No OneTemporary

D6000.id21659.diff

diff --git a/swh/vault/cookers/git_bare.py b/swh/vault/cookers/git_bare.py
--- a/swh/vault/cookers/git_bare.py
+++ b/swh/vault/cookers/git_bare.py
@@ -30,6 +30,7 @@
from swh.model import identifiers
from swh.model.hashutil import hash_to_bytehex, hash_to_hex
from swh.model.model import (
+ ObjectType,
Person,
Revision,
RevisionType,
@@ -42,6 +43,7 @@
from swh.vault.cookers.base import BaseVaultCooker
from swh.vault.to_disk import HIDDEN_MESSAGE, SKIPPED_MESSAGE
+RELEASE_BATCH_SIZE = 10000
REVISION_BATCH_SIZE = 10000
DIRECTORY_BATCH_SIZE = 10000
CONTENT_BATCH_SIZE = 100
@@ -83,6 +85,7 @@
def prepare_bundle(self):
# Objects we will visit soon:
+ self._rel_stack: List[Sha1Git] = []
self._rev_stack: List[Sha1Git] = []
self._dir_stack: List[Sha1Git] = []
self._cnt_stack: List[Sha1Git] = []
@@ -241,8 +244,14 @@
f"GitBareCooker.queue_subgraph({obj_type!r}, ...)"
)
+ ########################
+ # Orchestration
+
def load_objects(self) -> None:
- while self._rev_stack or self._dir_stack or self._cnt_stack:
+ while self._rel_stack or self._rev_stack or self._dir_stack or self._cnt_stack:
+ release_ids = self._pop(self._rel_stack, RELEASE_BATCH_SIZE)
+ self.push_releases_subgraphs(release_ids)
+
revision_ids = self._pop(self._rev_stack, REVISION_BATCH_SIZE)
self.load_revisions(revision_ids)
@@ -307,6 +316,8 @@
if not loaded_from_graph:
if branch.target_type == TargetType.REVISION:
self.push_revision_subgraph(branch.target)
+ elif branch.target_type == TargetType.RELEASE:
+ self.push_releases_subgraphs([branch.target])
elif branch.target_type == TargetType.ALIAS:
# Nothing to do, this for loop also iterates on the target branch
# (if it exists)
@@ -329,6 +340,24 @@
git_object = identifiers.revision_git_object(revision)
return self.write_object(revision["id"], git_object)
+ def push_releases_subgraphs(self, obj_ids: List[Sha1Git]) -> None:
+ """Given a list of release ids, loads these releases and adds their
+ target to the list of objects to visit"""
+ releases = self.storage.release_get(obj_ids)
+ revision_ids: List[Sha1Git] = []
+ for release in releases:
+ self.write_release_node(release.to_dict())
+ if release.target_type == ObjectType.REVISION:
+ self.push_revision_subgraph(release.target)
+ else:
+ raise NotImplementedError(f"{release.target_type} release targets")
+ self._push(self._rev_stack, revision_ids)
+
+ def write_release_node(self, release: Dict[str, Any]) -> bool:
+ """Writes a release object to disk"""
+ git_object = identifiers.release_git_object(release)
+ return self.write_object(release["id"], git_object)
+
def load_directories(self, obj_ids: List[Sha1Git]) -> None:
for obj_id in obj_ids:
self.load_directory(obj_id)
diff --git a/swh/vault/tests/test_cookers.py b/swh/vault/tests/test_cookers.py
--- a/swh/vault/tests/test_cookers.py
+++ b/swh/vault/tests/test_cookers.py
@@ -127,6 +127,15 @@
self.git_shell("reset", "--hard", "HEAD")
return ret
+ def tag(self, name, target=b"HEAD", message=None):
+ dulwich.porcelain.tag_create(
+ self.repo,
+ name,
+ message=message,
+ annotated=message is not None,
+ objectish=target,
+ )
+
def merge(self, parent_sha_list, message="Merge branches."):
self.git_shell(
"merge",
@@ -875,6 +884,72 @@
ert.checkout(b"HEAD")
assert (p / "file").stat().st_mode == 0o100644
+ def load_repo_tags(self, git_loader):
+ # v-- t2
+ #
+ # 1---2----5 <-- master, t5, and t5a (annotated)
+ # \
+ # ----3----4 <-- t4a (annotated)
+ #
+ repo = TestRepo()
+ with repo as rp:
+ (rp / "file1").write_text(TEST_CONTENT)
+ repo.commit("Add file1")
+
+ (rp / "file2").write_text(TEST_CONTENT)
+ repo.commit("Add file2") # create c2
+
+ repo.tag(b"t2")
+
+ (rp / "file3").write_text(TEST_CONTENT)
+ repo.commit("add file3")
+
+ (rp / "file4").write_text(TEST_CONTENT)
+ repo.commit("add file4")
+
+ repo.tag(b"t4a", message=b"tag 4")
+
+ # Go back to c2
+ repo.git_shell("reset", "--hard", "HEAD^^")
+
+ (rp / "file5").write_text(TEST_CONTENT)
+ repo.commit("add file5") # create c5
+
+ repo.tag(b"t5")
+ repo.tag(b"t5a", message=b"tag 5")
+
+ obj_id_hex = repo.repo.refs[b"HEAD"].decode()
+ obj_id = hashutil.hash_to_bytes(obj_id_hex)
+ loader = git_loader(str(rp))
+ loader.load()
+ return (loader, obj_id)
+
+ def check_snapshot_tags(self, ert, p, obj_id):
+ assert (
+ hashutil.hash_to_bytehex(obj_id)
+ == ert.repo.refs[b"HEAD"]
+ == ert.repo.refs[b"refs/heads/master"]
+ == ert.repo.refs[b"refs/remotes/origin/HEAD"]
+ == ert.repo.refs[b"refs/remotes/origin/master"]
+ == ert.repo.refs[b"refs/tags/t5"]
+ )
+
+ c2_id = ert.repo.refs[b"refs/tags/t2"]
+ c5_id = hashutil.hash_to_bytehex(obj_id)
+
+ assert ert.repo[c5_id].parents == [c2_id]
+
+ t5a = ert.repo[ert.repo.refs[b"refs/tags/t5a"]]
+ assert t5a.message == b"tag 5"
+ assert t5a.object == (dulwich.objects.Commit, c5_id)
+
+ t4a = ert.repo[ert.repo.refs[b"refs/tags/t4a"]]
+ (_, c4_id) = t4a.object
+ assert ert.repo[c4_id].message == b"add file4\n"
+ (c3_id,) = ert.repo[c4_id].parents
+ assert ert.repo[c3_id].message == b"add file3\n"
+ assert ert.repo[c3_id].parents == [c2_id]
+
class TestRevisionCooker(RepoFixtures):
def test_revision_simple(self, git_loader, cook_extract_revision):
@@ -979,3 +1054,9 @@
snp_id = loader.loaded_snapshot_id
with cook_extract_snapshot(loader.storage, snp_id) as (ert, p):
self.check_revision_filtered_objects(ert, p, main_rev_id)
+
+ def test_snapshot_tags(self, git_loader, cook_extract_snapshot):
+ (loader, main_rev_id) = self.load_repo_tags(git_loader)
+ snp_id = loader.loaded_snapshot_id
+ with cook_extract_snapshot(loader.storage, snp_id) as (ert, p):
+ self.check_snapshot_tags(ert, p, main_rev_id)

File Metadata

Mime Type
text/plain
Expires
Thu, Jul 3, 12:22 PM (2 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3231973

Event Timeline