Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9346384
D5999.id21775.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
16 KB
Subscribers
None
D5999.id21775.diff
View Options
diff --git a/swh/vault/cookers/__init__.py b/swh/vault/cookers/__init__.py
--- a/swh/vault/cookers/__init__.py
+++ b/swh/vault/cookers/__init__.py
@@ -22,6 +22,7 @@
"directory": DirectoryCooker,
"revision_flat": RevisionFlatCooker,
"revision_gitfast": RevisionGitfastCooker,
+ "snapshot_git_bare": GitBareCooker,
"revision_git_bare": GitBareCooker,
"directory_git_bare": GitBareCooker,
}
diff --git a/swh/vault/cookers/git_bare.py b/swh/vault/cookers/git_bare.py
--- a/swh/vault/cookers/git_bare.py
+++ b/swh/vault/cookers/git_bare.py
@@ -23,7 +23,7 @@
import subprocess
import tarfile
import tempfile
-from typing import Any, Dict, Iterable, List, Set
+from typing import Any, Dict, Iterable, List, Optional, Set
import zlib
from swh.core.api.classes import stream_results
@@ -34,9 +34,11 @@
Revision,
RevisionType,
Sha1Git,
+ TargetType,
TimestampWithTimezone,
)
from swh.storage.algos.revisions_walker import DFSRevisionsWalker
+from swh.storage.algos.snapshot import snapshot_get_all_branches
from swh.vault.cookers.base import BaseVaultCooker
from swh.vault.to_disk import HIDDEN_MESSAGE, SKIPPED_MESSAGE
@@ -57,6 +59,8 @@
return not list(self.storage.revision_missing([self.obj_id]))
elif obj_type == "directory":
return not list(self.storage.directory_missing([self.obj_id]))
+ if obj_type == "snapshot":
+ return not list(self.storage.snapshot_missing([self.obj_id]))
else:
raise NotImplementedError(f"GitBareCooker for {obj_type}")
@@ -85,6 +89,7 @@
# Set of objects already in any of the stacks:
self._seen: Set[Sha1Git] = set()
+ self._walker_state: Optional[Any] = None
# Set of errors we expect git-fsck to raise at the end:
self._expected_fsck_errors = set()
@@ -102,7 +107,7 @@
# Load and write all the objects to disk
self.load_objects()
- # Write the root object as a ref.
+ # Write the root object as a ref (this step is skipped if it's a snapshot)
# This must be done before repacking; git-repack ignores orphan objects.
self.write_refs()
@@ -150,7 +155,8 @@
)
)
- def write_refs(self):
+ def write_refs(self, snapshot=None):
+ refs: Dict[bytes, bytes] # ref name -> target
obj_type = self.obj_type.split("_")[0]
if obj_type == "directory":
# We need a synthetic revision pointing to the directory
@@ -171,14 +177,29 @@
synthetic=True,
)
self.write_revision_node(revision.to_dict())
- head = revision.id
+ refs = {b"refs/heads/master": hash_to_bytehex(revision.id)}
elif obj_type == "revision":
- head = self.obj_id
+ refs = {b"refs/heads/master": hash_to_bytehex(self.obj_id)}
+ elif obj_type == "snapshot":
+ if snapshot is None:
+ # refs were already written in a previous step
+ return
+ refs = {
+ branch_name: (
+ b"ref: " + branch.target
+ if branch.target_type == TargetType.ALIAS
+ else hash_to_bytehex(branch.target)
+ )
+ for (branch_name, branch) in snapshot.branches.items()
+ }
else:
assert False, obj_type
- with open(os.path.join(self.gitdir, "refs", "heads", "master"), "wb") as fd:
- fd.write(hash_to_bytehex(head))
+ for (ref_name, ref_target) in refs.items():
+ path = os.path.join(self.gitdir.encode(), ref_name)
+ os.makedirs(os.path.dirname(path), exist_ok=True)
+ with open(path, "wb") as fd:
+ fd.write(ref_target)
def write_archive(self):
with tarfile.TarFile(mode="w", fileobj=self.fileobj) as tf:
@@ -213,6 +234,8 @@
self.push_revision_subgraph(obj_id)
elif obj_type == "directory":
self._push(self._dir_stack, [obj_id])
+ elif obj_type == "snapshot":
+ self.push_snapshot_subgraph(obj_id)
else:
raise NotImplementedError(
f"GitBareCooker.queue_subgraph({obj_type!r}, ...)"
@@ -261,10 +284,37 @@
# swh-graph, fall back to self.storage.revision_log.
# self.storage.revision_log also gives us the full revisions,
# so we load them right now instead of just pushing them on the stack.
- walker = DFSRevisionsWalker(self.storage, obj_id)
+ walker = DFSRevisionsWalker(self.storage, obj_id, state=self._walker_state)
for revision in walker:
self.write_revision_node(revision)
self._push(self._dir_stack, [revision["directory"]])
+ # Save the state, so the next call to the walker won't return the same
+ # revisions
+ self._walker_state = walker.export_state()
+
+ def push_snapshot_subgraph(self, obj_id: Sha1Git) -> None:
+ """Fetches a snapshot and all its children, and writes them to disk"""
+ loaded_from_graph = False
+
+ if self.graph:
+ pass # TODO
+
+ # TODO: when self.graph is available and supports edge labels, use it
+ # directly to get branch names.
+ snapshot = snapshot_get_all_branches(self.storage, obj_id)
+ assert snapshot, "Unknown snapshot" # should have been caught by check_exists()
+ for branch in snapshot.branches.values():
+ if not loaded_from_graph:
+ if branch.target_type == TargetType.REVISION:
+ self.push_revision_subgraph(branch.target)
+ elif branch.target_type == TargetType.ALIAS:
+ # Nothing to do, this for loop also iterates on the target branch
+ # (if it exists)
+ pass
+ else:
+ raise NotImplementedError(f"{branch.target_type} branches")
+
+ self.write_refs(snapshot=snapshot)
def load_revisions(self, obj_ids: List[Sha1Git]) -> None:
"""Given a list of revision ids, loads these revisions and their directories;
diff --git a/swh/vault/tests/test_cookers.py b/swh/vault/tests/test_cookers.py
--- a/swh/vault/tests/test_cookers.py
+++ b/swh/vault/tests/test_cookers.py
@@ -25,7 +25,7 @@
import pytest
from swh.loader.git.from_disk import GitLoaderFromDisk
-from swh.model import from_disk, hashutil
+from swh.model import from_disk, hashutil, identifiers
from swh.model.model import (
Directory,
DirectoryEntry,
@@ -294,14 +294,19 @@
@contextlib.contextmanager
-def cook_extract_revision_git_bare(storage, obj_id, fsck=True):
+def cook_extract_git_bare(storage, swhid, fsck=True):
"""Context manager that cooks a revision and extract it,
using GitBareCooker"""
backend = unittest.mock.MagicMock()
backend.storage = storage
# Cook the object
- cooker = GitBareCooker("revision", obj_id, backend=backend, storage=storage)
+ cooker = GitBareCooker(
+ swhid.object_type.name.lower(),
+ swhid.object_id,
+ backend=backend,
+ storage=storage,
+ )
cooker.use_fsck = fsck # Some tests try edge-cases that git-fsck rejects
cooker.fileobj = io.BytesIO()
assert cooker.check_exists()
@@ -317,18 +322,25 @@
with tempfile.TemporaryDirectory(prefix="tmp-vault-clone-") as clone_dir:
clone_dir = pathlib.Path(clone_dir)
subprocess.check_call(
- [
- "git",
- "clone",
- os.path.join(td, f"swh:1:rev:{obj_id.hex()}.git"),
- clone_dir,
- ]
+ ["git", "clone", os.path.join(td, f"{swhid}.git"), clone_dir,]
)
test_repo = TestRepo(clone_dir)
with test_repo:
yield test_repo, clone_dir
+@contextlib.contextmanager
+def cook_extract_revision_git_bare(storage, obj_id, fsck=True):
+ with cook_extract_git_bare(
+ storage,
+ identifiers.CoreSWHID(
+ object_type=identifiers.ObjectType.REVISION, object_id=obj_id
+ ),
+ fsck=fsck,
+ ) as res:
+ yield res
+
+
@pytest.fixture(
scope="module",
params=[cook_extract_revision_gitfast, cook_extract_revision_git_bare],
@@ -339,6 +351,27 @@
return request.param
+@contextlib.contextmanager
+def cook_extract_snapshot_git_bare(storage, obj_id, fsck=True):
+ with cook_extract_git_bare(
+ storage,
+ identifiers.CoreSWHID(
+ object_type=identifiers.ObjectType.SNAPSHOT, object_id=obj_id
+ ),
+ fsck=fsck,
+ ) as res:
+ yield res
+
+
+@pytest.fixture(
+ scope="module", params=[cook_extract_snapshot_git_bare],
+)
+def cook_extract_snapshot(request):
+ """Equivalent to cook_extract_snapshot_git_bare; but analogous to
+ cook_extract_revision in case we ever have more cookers supporting snapshots"""
+ return request.param
+
+
TEST_CONTENT = (
" test content\n" "and unicode \N{BLACK HEART SUIT}\n" " and trailing spaces "
)
@@ -612,6 +645,53 @@
def check_revision_two_roots(self, ert, p, obj_id):
assert ert.repo.refs[b"HEAD"].decode() == obj_id.hex()
+ (c3,) = ert.repo[hashutil.hash_to_bytehex(obj_id)].parents
+ assert len(ert.repo[c3].parents) == 2
+
+ def load_repo_two_heads(self, git_loader):
+ #
+ # 1---2----4 <-- master and b1
+ # \
+ # ----3 <-- b2
+ #
+ repo = TestRepo()
+ with repo as rp:
+ (rp / "file1").write_text(TEST_CONTENT)
+ repo.commit("Add file1")
+
+ (rp / "file2").write_text(TEST_CONTENT)
+ c2 = repo.commit("Add file2")
+
+ repo.repo.refs[b"refs/heads/b2"] = c2 # branch b2 from master
+
+ (rp / "file3").write_text(TEST_CONTENT)
+ repo.commit("add file3", ref=b"refs/heads/b2")
+
+ (rp / "file4").write_text(TEST_CONTENT)
+ c4 = repo.commit("add file4", ref=b"refs/heads/master")
+ repo.repo.refs[b"refs/heads/b1"] = c4 # branch b1 from master
+
+ obj_id_hex = repo.repo.refs[b"HEAD"].decode()
+ obj_id = hashutil.hash_to_bytes(obj_id_hex)
+ loader = git_loader(str(rp))
+ loader.load()
+ return (loader, obj_id)
+
+ def check_snapshot_two_heads(self, ert, p, obj_id):
+ assert (
+ hashutil.hash_to_bytehex(obj_id)
+ == ert.repo.refs[b"HEAD"]
+ == ert.repo.refs[b"refs/heads/master"]
+ == ert.repo.refs[b"refs/remotes/origin/HEAD"]
+ == ert.repo.refs[b"refs/remotes/origin/master"]
+ == ert.repo.refs[b"refs/remotes/origin/b1"]
+ )
+
+ c4_id = hashutil.hash_to_bytehex(obj_id)
+ c3_id = ert.repo.refs[b"refs/remotes/origin/b2"]
+
+ assert ert.repo[c3_id].parents == ert.repo[c4_id].parents
+
def load_repo_two_double_fork_merge(self, git_loader):
#
# 2---4---6
@@ -621,22 +701,22 @@
repo = TestRepo()
with repo as rp:
(rp / "file1").write_text(TEST_CONTENT)
- c1 = repo.commit("Add file1")
- repo.repo.refs[b"refs/heads/c1"] = c1
+ c1 = repo.commit("Add file1") # create commit 1
+ repo.repo.refs[b"refs/heads/c1"] = c1 # branch c1 from master
(rp / "file2").write_text(TEST_CONTENT)
- repo.commit("Add file2")
+ repo.commit("Add file2") # create commit 2
(rp / "file3").write_text(TEST_CONTENT)
- c3 = repo.commit("Add file3", ref=b"refs/heads/c1")
- repo.repo.refs[b"refs/heads/c3"] = c3
+ c3 = repo.commit("Add file3", ref=b"refs/heads/c1") # create commit 3 on c1
+ repo.repo.refs[b"refs/heads/c3"] = c3 # branch c3 from c1
- repo.merge([c3])
+ repo.merge([c3]) # create commit 4
(rp / "file5").write_text(TEST_CONTENT)
- c5 = repo.commit("Add file3", ref=b"refs/heads/c3")
+ c5 = repo.commit("Add file3", ref=b"refs/heads/c3") # create commit 5 on c3
- repo.merge([c5])
+ repo.merge([c5]) # create commit 6
obj_id_hex = repo.repo.refs[b"HEAD"].decode()
obj_id = hashutil.hash_to_bytes(obj_id_hex)
@@ -647,6 +727,21 @@
def check_revision_two_double_fork_merge(self, ert, p, obj_id):
assert ert.repo.refs[b"HEAD"].decode() == obj_id.hex()
+ def check_snapshot_two_double_fork_merge(self, ert, p, obj_id):
+ assert (
+ hashutil.hash_to_bytehex(obj_id)
+ == ert.repo.refs[b"HEAD"]
+ == ert.repo.refs[b"refs/heads/master"]
+ == ert.repo.refs[b"refs/remotes/origin/HEAD"]
+ == ert.repo.refs[b"refs/remotes/origin/master"]
+ )
+
+ (c4_id, c5_id) = ert.repo[obj_id.hex().encode()].parents
+ assert c5_id == ert.repo.refs[b"refs/remotes/origin/c3"]
+
+ (c2_id, c3_id) = ert.repo[c4_id].parents
+ assert c3_id == ert.repo.refs[b"refs/remotes/origin/c1"]
+
def load_repo_triple_merge(self, git_loader):
#
# .---.---5
@@ -676,6 +771,25 @@
def check_revision_triple_merge(self, ert, p, obj_id):
assert ert.repo.refs[b"HEAD"].decode() == obj_id.hex()
+ def check_snapshot_triple_merge(self, ert, p, obj_id):
+ assert (
+ hashutil.hash_to_bytehex(obj_id)
+ == ert.repo.refs[b"HEAD"]
+ == ert.repo.refs[b"refs/heads/master"]
+ == ert.repo.refs[b"refs/remotes/origin/HEAD"]
+ == ert.repo.refs[b"refs/remotes/origin/master"]
+ )
+
+ (c2_id, c3_id, c4_id) = ert.repo[obj_id.hex().encode()].parents
+ assert c3_id == ert.repo.refs[b"refs/remotes/origin/b1"]
+ assert c4_id == ert.repo.refs[b"refs/remotes/origin/b2"]
+
+ assert (
+ ert.repo[c2_id].parents
+ == ert.repo[c3_id].parents
+ == ert.repo[c4_id].parents
+ )
+
def load_repo_filtered_objects(self, git_loader):
repo = TestRepo()
with repo as rp:
@@ -825,3 +939,43 @@
with cook_stream_revision_gitfast(swh_storage, rev.id) as stream:
pattern = "M 160000 {} submodule".format(target_rev).encode()
assert pattern in stream.read()
+
+
+class TestSnapshotCooker(RepoFixtures):
+ def test_snapshot_simple(self, git_loader, cook_extract_snapshot):
+ (loader, main_rev_id) = self.load_repo_simple(git_loader)
+ snp_id = loader.loaded_snapshot_id
+ with cook_extract_snapshot(loader.storage, snp_id) as (ert, p):
+ self.check_revision_simple(ert, p, main_rev_id)
+
+ def test_snapshot_two_roots(self, git_loader, cook_extract_snapshot):
+ (loader, main_rev_id) = self.load_repo_two_roots(git_loader)
+ snp_id = loader.loaded_snapshot_id
+ with cook_extract_snapshot(loader.storage, snp_id) as (ert, p):
+ self.check_revision_two_roots(ert, p, main_rev_id)
+
+ def test_snapshot_two_heads(self, git_loader, cook_extract_snapshot):
+ (loader, main_rev_id) = self.load_repo_two_heads(git_loader)
+ snp_id = loader.loaded_snapshot_id
+ with cook_extract_snapshot(loader.storage, snp_id) as (ert, p):
+ self.check_snapshot_two_heads(ert, p, main_rev_id)
+
+ def test_snapshot_two_double_fork_merge(self, git_loader, cook_extract_snapshot):
+ (loader, main_rev_id) = self.load_repo_two_double_fork_merge(git_loader)
+ snp_id = loader.loaded_snapshot_id
+ with cook_extract_snapshot(loader.storage, snp_id) as (ert, p):
+ self.check_revision_two_double_fork_merge(ert, p, main_rev_id)
+ self.check_snapshot_two_double_fork_merge(ert, p, main_rev_id)
+
+ def test_snapshot_triple_merge(self, git_loader, cook_extract_snapshot):
+ (loader, main_rev_id) = self.load_repo_triple_merge(git_loader)
+ snp_id = loader.loaded_snapshot_id
+ with cook_extract_snapshot(loader.storage, snp_id) as (ert, p):
+ self.check_revision_triple_merge(ert, p, main_rev_id)
+ self.check_snapshot_triple_merge(ert, p, main_rev_id)
+
+ def test_snapshot_filtered_objects(self, git_loader, cook_extract_snapshot):
+ (loader, main_rev_id) = self.load_repo_filtered_objects(git_loader)
+ snp_id = loader.loaded_snapshot_id
+ with cook_extract_snapshot(loader.storage, snp_id) as (ert, p):
+ self.check_revision_filtered_objects(ert, p, main_rev_id)
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Jul 3, 3:58 PM (2 w, 2 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3219266
Attached To
D5999: git_bare: Add partial support for snapshots (no release or swh-graph support yet)
Event Timeline
Log In to Comment