diff --git a/swh/vault/cookers/__init__.py b/swh/vault/cookers/__init__.py index 9bbbf5d..1ab2a8d 100644 --- a/swh/vault/cookers/__init__.py +++ b/swh/vault/cookers/__init__.py @@ -1,112 +1,113 @@ # Copyright (C) 2017-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from __future__ import annotations import os from typing import Any, Dict from swh.core.config import load_named_config from swh.core.config import read as read_config from swh.storage import get_storage from swh.vault import get_vault from swh.vault.cookers.base import DEFAULT_CONFIG, DEFAULT_CONFIG_PATH from swh.vault.cookers.directory import DirectoryCooker from swh.vault.cookers.git_bare import GitBareCooker from swh.vault.cookers.revision_flat import RevisionFlatCooker from swh.vault.cookers.revision_gitfast import RevisionGitfastCooker COOKER_TYPES = { "directory": DirectoryCooker, "revision_flat": RevisionFlatCooker, "revision_gitfast": RevisionGitfastCooker, + "snapshot_git_bare": GitBareCooker, "revision_git_bare": GitBareCooker, "directory_git_bare": GitBareCooker, } def get_cooker_cls(obj_type): return COOKER_TYPES[obj_type] def check_config(cfg: Dict[str, Any]) -> Dict[str, Any]: """Ensure the configuration is ok to run a vault worker, and propagate defaults Raises: EnvironmentError if the configuration is not for remote instance ValueError if one of the following keys is missing: vault, storage Returns: New configuration dict to instantiate a vault worker instance """ cfg = cfg.copy() if "vault" not in cfg: raise ValueError("missing 'vault' configuration") vcfg = cfg["vault"] if vcfg["cls"] != "remote": raise EnvironmentError( "This vault backend can only be a 'remote' configuration" ) # TODO: Soft-deprecation of args key. Remove when ready. vcfg.update(vcfg.get("args", {})) # Default to top-level value if any if "storage" not in vcfg: vcfg["storage"] = cfg.get("storage") if not vcfg.get("storage"): raise ValueError("invalid configuration: missing 'storage' config entry.") return cfg def get_cooker(obj_type: str, obj_id: str): """Instantiate a cooker class of type obj_type. Returns: Cooker class in charge of cooking the obj_type with id obj_id. Raises: ValueError in case of a missing top-level vault key configuration or a storage key. EnvironmentError in case the vault configuration reference a non remote class. """ if "SWH_CONFIG_FILENAME" in os.environ: cfg = read_config(os.environ["SWH_CONFIG_FILENAME"], DEFAULT_CONFIG) else: cfg = load_named_config(DEFAULT_CONFIG_PATH, DEFAULT_CONFIG) cooker_cls = get_cooker_cls(obj_type) cfg = check_config(cfg) vcfg = cfg["vault"] storage = get_storage(**vcfg.pop("storage")) backend = get_vault(**vcfg) try: from swh.graph.client import RemoteGraphClient # optional dependency graph = RemoteGraphClient(**vcfg["graph"]) if vcfg.get("graph") else None except ModuleNotFoundError: if vcfg.get("graph"): raise EnvironmentError( "Graph configuration required but module is not installed." ) else: graph = None return cooker_cls( obj_type, obj_id, backend=backend, storage=storage, graph=graph, max_bundle_size=cfg["max_bundle_size"], ) diff --git a/swh/vault/cookers/git_bare.py b/swh/vault/cookers/git_bare.py index f6e18c8..13e8a49 100644 --- a/swh/vault/cookers/git_bare.py +++ b/swh/vault/cookers/git_bare.py @@ -1,358 +1,408 @@ # Copyright (C) 2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information """ This cooker creates tarballs containing a bare .git directory, that can be unpacked and cloned like any git repository. It works in three steps: 1. Write objects one by one in :file:`.git/objects/` 2. Calls ``git repack`` to pack all these objects into git packfiles. 3. Creates a tarball of the resulting repository It keeps a set of all written (or about-to-be-written) object hashes in memory to avoid downloading and writing the same objects twice. """ import datetime import os.path import re import subprocess import tarfile import tempfile -from typing import Any, Dict, Iterable, List, Set +from typing import Any, Dict, Iterable, List, Optional, Set import zlib from swh.core.api.classes import stream_results from swh.model import identifiers from swh.model.hashutil import hash_to_bytehex, hash_to_hex from swh.model.model import ( Person, Revision, RevisionType, Sha1Git, + TargetType, TimestampWithTimezone, ) from swh.storage.algos.revisions_walker import DFSRevisionsWalker +from swh.storage.algos.snapshot import snapshot_get_all_branches from swh.vault.cookers.base import BaseVaultCooker from swh.vault.to_disk import HIDDEN_MESSAGE, SKIPPED_MESSAGE REVISION_BATCH_SIZE = 10000 DIRECTORY_BATCH_SIZE = 10000 CONTENT_BATCH_SIZE = 100 class GitBareCooker(BaseVaultCooker): use_fsck = True def cache_type_key(self) -> str: return self.obj_type def check_exists(self): obj_type = self.obj_type.split("_")[0] if obj_type == "revision": return not list(self.storage.revision_missing([self.obj_id])) elif obj_type == "directory": return not list(self.storage.directory_missing([self.obj_id])) + if obj_type == "snapshot": + return not list(self.storage.snapshot_missing([self.obj_id])) else: raise NotImplementedError(f"GitBareCooker for {obj_type}") def obj_swhid(self) -> identifiers.CoreSWHID: obj_type = self.obj_type.split("_")[0] return identifiers.CoreSWHID( object_type=identifiers.ObjectType[obj_type.upper()], object_id=self.obj_id, ) def _push(self, stack: List[Sha1Git], obj_ids: Iterable[Sha1Git]) -> None: assert not isinstance(obj_ids, bytes) revision_ids = [id_ for id_ in obj_ids if id_ not in self._seen] self._seen.update(revision_ids) stack.extend(revision_ids) def _pop(self, stack: List[Sha1Git], n: int) -> List[Sha1Git]: obj_ids = stack[-n:] stack[-n:] = [] return obj_ids def prepare_bundle(self): # Objects we will visit soon: self._rev_stack: List[Sha1Git] = [] self._dir_stack: List[Sha1Git] = [] self._cnt_stack: List[Sha1Git] = [] # Set of objects already in any of the stacks: self._seen: Set[Sha1Git] = set() + self._walker_state: Optional[Any] = None # Set of errors we expect git-fsck to raise at the end: self._expected_fsck_errors = set() with tempfile.TemporaryDirectory(prefix="swh-vault-gitbare-") as workdir: # Initialize a Git directory self.workdir = workdir self.gitdir = os.path.join(workdir, "clone.git") os.mkdir(self.gitdir) self.init_git() # Add the root object to the stack of objects to visit self.push_subgraph(self.obj_type.split("_")[0], self.obj_id) # Load and write all the objects to disk self.load_objects() - # Write the root object as a ref. + # Write the root object as a ref (this step is skipped if it's a snapshot) # This must be done before repacking; git-repack ignores orphan objects. self.write_refs() self.repack() self.write_archive() def init_git(self) -> None: subprocess.run(["git", "-C", self.gitdir, "init", "--bare"], check=True) # Create all possible dirs ahead of time, so we don't have to check for # existence every time. for byte in range(256): os.mkdir(os.path.join(self.gitdir, "objects", f"{byte:02x}")) def repack(self) -> None: if self.use_fsck: self.git_fsck() # Add objects we wrote in a pack subprocess.run(["git", "-C", self.gitdir, "repack"], check=True) # Remove their non-packed originals subprocess.run(["git", "-C", self.gitdir, "prune-packed"], check=True) def git_fsck(self) -> None: proc = subprocess.run( ["git", "-C", self.gitdir, "fsck"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, env={"LANG": "C.utf8"}, ) if not self._expected_fsck_errors: # All went well, there should not be any error proc.check_returncode() return # Split on newlines not followed by a space errors = re.split("\n(?! )", proc.stdout.decode()) unexpected_errors = set(filter(bool, errors)) - self._expected_fsck_errors if unexpected_errors: raise Exception( "\n".join( ["Unexpected errors from git-fsck:"] + sorted(unexpected_errors) ) ) - def write_refs(self): + def write_refs(self, snapshot=None): + refs: Dict[bytes, bytes] # ref name -> target obj_type = self.obj_type.split("_")[0] if obj_type == "directory": # We need a synthetic revision pointing to the directory author = Person.from_fullname( b"swh-vault, git-bare cooker " ) dt = datetime.datetime.now(tz=datetime.timezone.utc) dt = dt.replace(microsecond=0) # not supported by git date = TimestampWithTimezone.from_datetime(dt) revision = Revision( author=author, committer=author, date=date, committer_date=date, message=b"Initial commit", type=RevisionType.GIT, directory=self.obj_id, synthetic=True, ) self.write_revision_node(revision.to_dict()) - head = revision.id + refs = {b"refs/heads/master": hash_to_bytehex(revision.id)} elif obj_type == "revision": - head = self.obj_id + refs = {b"refs/heads/master": hash_to_bytehex(self.obj_id)} + elif obj_type == "snapshot": + if snapshot is None: + # refs were already written in a previous step + return + refs = { + branch_name: ( + b"ref: " + branch.target + if branch.target_type == TargetType.ALIAS + else hash_to_bytehex(branch.target) + ) + for (branch_name, branch) in snapshot.branches.items() + } else: assert False, obj_type - with open(os.path.join(self.gitdir, "refs", "heads", "master"), "wb") as fd: - fd.write(hash_to_bytehex(head)) + for (ref_name, ref_target) in refs.items(): + path = os.path.join(self.gitdir.encode(), ref_name) + os.makedirs(os.path.dirname(path), exist_ok=True) + with open(path, "wb") as fd: + fd.write(ref_target) def write_archive(self): with tarfile.TarFile(mode="w", fileobj=self.fileobj) as tf: tf.add(self.gitdir, arcname=f"{self.obj_swhid()}.git", recursive=True) def _obj_path(self, obj_id: Sha1Git): return os.path.join(self.gitdir, self._obj_relative_path(obj_id)) def _obj_relative_path(self, obj_id: Sha1Git): obj_id_hex = hash_to_hex(obj_id) directory = obj_id_hex[0:2] filename = obj_id_hex[2:] return os.path.join("objects", directory, filename) def object_exists(self, obj_id: Sha1Git) -> bool: return os.path.exists(self._obj_path(obj_id)) def write_object(self, obj_id: Sha1Git, obj: bytes) -> bool: """Writes a git object on disk. Returns whether it was already written.""" # Git requires objects to be zlib-compressed; but repacking decompresses and # removes them, so we don't need to compress them too much. data = zlib.compress(obj, level=1) with open(self._obj_path(obj_id), "wb") as fd: fd.write(data) return True def push_subgraph(self, obj_type, obj_id) -> None: if obj_type == "revision": self.push_revision_subgraph(obj_id) elif obj_type == "directory": self._push(self._dir_stack, [obj_id]) + elif obj_type == "snapshot": + self.push_snapshot_subgraph(obj_id) else: raise NotImplementedError( f"GitBareCooker.queue_subgraph({obj_type!r}, ...)" ) def load_objects(self) -> None: while self._rev_stack or self._dir_stack or self._cnt_stack: revision_ids = self._pop(self._rev_stack, REVISION_BATCH_SIZE) self.load_revisions(revision_ids) directory_ids = self._pop(self._dir_stack, DIRECTORY_BATCH_SIZE) self.load_directories(directory_ids) content_ids = self._pop(self._cnt_stack, CONTENT_BATCH_SIZE) self.load_contents(content_ids) def push_revision_subgraph(self, obj_id: Sha1Git) -> None: """Fetches a revision and all its children, and writes them to disk""" loaded_from_graph = False if self.graph: from swh.graph.client import GraphArgumentException # First, try to cook using swh-graph, as it is more efficient than # swh-storage for querying the history obj_swhid = identifiers.CoreSWHID( object_type=identifiers.ObjectType.REVISION, object_id=obj_id, ) try: revision_ids = ( swhid.object_id for swhid in map( identifiers.CoreSWHID.from_string, self.graph.visit_nodes(str(obj_swhid), edges="rev:rev"), ) ) self._push(self._rev_stack, revision_ids) except GraphArgumentException: # Revision not found in the graph pass else: loaded_from_graph = True if not loaded_from_graph: # If swh-graph is not available, or the revision is not yet in # swh-graph, fall back to self.storage.revision_log. # self.storage.revision_log also gives us the full revisions, # so we load them right now instead of just pushing them on the stack. - walker = DFSRevisionsWalker(self.storage, obj_id) + walker = DFSRevisionsWalker(self.storage, obj_id, state=self._walker_state) for revision in walker: self.write_revision_node(revision) self._push(self._dir_stack, [revision["directory"]]) + # Save the state, so the next call to the walker won't return the same + # revisions + self._walker_state = walker.export_state() + + def push_snapshot_subgraph(self, obj_id: Sha1Git) -> None: + """Fetches a snapshot and all its children, and writes them to disk""" + loaded_from_graph = False + + if self.graph: + pass # TODO + + # TODO: when self.graph is available and supports edge labels, use it + # directly to get branch names. + snapshot = snapshot_get_all_branches(self.storage, obj_id) + assert snapshot, "Unknown snapshot" # should have been caught by check_exists() + for branch in snapshot.branches.values(): + if not loaded_from_graph: + if branch.target_type == TargetType.REVISION: + self.push_revision_subgraph(branch.target) + elif branch.target_type == TargetType.ALIAS: + # Nothing to do, this for loop also iterates on the target branch + # (if it exists) + pass + else: + raise NotImplementedError(f"{branch.target_type} branches") + + self.write_refs(snapshot=snapshot) def load_revisions(self, obj_ids: List[Sha1Git]) -> None: """Given a list of revision ids, loads these revisions and their directories; but not their parent revisions.""" revisions = self.storage.revision_get(obj_ids) for revision in revisions: self.write_revision_node(revision.to_dict()) self._push(self._dir_stack, (rev.directory for rev in revisions)) def write_revision_node(self, revision: Dict[str, Any]) -> bool: """Writes a revision object to disk""" git_object = identifiers.revision_git_object(revision) return self.write_object(revision["id"], git_object) def load_directories(self, obj_ids: List[Sha1Git]) -> None: for obj_id in obj_ids: self.load_directory(obj_id) def load_directory(self, obj_id: Sha1Git) -> None: # Load the directory entries = [ entry.to_dict() for entry in stream_results(self.storage.directory_get_entries, obj_id) ] directory = {"id": obj_id, "entries": entries} git_object = identifiers.directory_git_object(directory) self.write_object(obj_id, git_object) # Add children to the stack entry_loaders: Dict[str, List[Sha1Git]] = { "file": self._cnt_stack, "dir": self._dir_stack, "rev": self._rev_stack, } for entry in directory["entries"]: stack = entry_loaders[entry["type"]] self._push(stack, [entry["target"]]) def load_contents(self, obj_ids: List[Sha1Git]) -> None: # TODO: add support of filtered objects, somehow? # It's tricky, because, by definition, we can't write a git object with # the expected hash, so git-fsck *will* choke on it. contents = self.storage.content_get(obj_ids, "sha1_git") visible_contents = [] for (obj_id, content) in zip(obj_ids, contents): if content is None: # FIXME: this may also happen for missing content self.write_content(obj_id, SKIPPED_MESSAGE) self._expect_mismatched_object_error(obj_id) elif content.status == "visible": visible_contents.append(content) elif content.status == "hidden": self.write_content(obj_id, HIDDEN_MESSAGE) self._expect_mismatched_object_error(obj_id) else: assert False, ( f"unexpected status {content.status!r} " f"for content {hash_to_hex(content.sha1_git)}" ) if self.objstorage is None: for content in visible_contents: data = self.storage.content_get_data(content.sha1) self.write_content(content.sha1_git, data) else: content_data = self.objstorage.get_batch(c.sha1 for c in visible_contents) for (content, data) in zip(contents, content_data): self.write_content(content.sha1_git, data) def write_content(self, obj_id: Sha1Git, content: bytes) -> None: header = identifiers.git_object_header("blob", len(content)) self.write_object(obj_id, header + content) def _expect_mismatched_object_error(self, obj_id): obj_id_hex = hash_to_hex(obj_id) obj_path = self._obj_relative_path(obj_id) # For Git < 2.21: self._expected_fsck_errors.add( f"error: sha1 mismatch for ./{obj_path} (expected {obj_id_hex})" ) # For Git >= 2.21: self._expected_fsck_errors.add( f"error: hash mismatch for ./{obj_path} (expected {obj_id_hex})" ) self._expected_fsck_errors.add( f"error: {obj_id_hex}: object corrupt or missing: ./{obj_path}" ) self._expected_fsck_errors.add(f"missing blob {obj_id_hex}") diff --git a/swh/vault/tests/test_cookers.py b/swh/vault/tests/test_cookers.py index d48d86c..b105806 100644 --- a/swh/vault/tests/test_cookers.py +++ b/swh/vault/tests/test_cookers.py @@ -1,827 +1,981 @@ # Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import contextlib import datetime import glob import gzip import io import os import pathlib import shutil import subprocess import tarfile import tempfile import unittest import unittest.mock import dulwich.fastexport import dulwich.index import dulwich.objects import dulwich.porcelain import dulwich.repo import pytest from swh.loader.git.from_disk import GitLoaderFromDisk -from swh.model import from_disk, hashutil +from swh.model import from_disk, hashutil, identifiers from swh.model.model import ( Directory, DirectoryEntry, Person, Revision, RevisionType, TimestampWithTimezone, ) from swh.vault.cookers import DirectoryCooker, GitBareCooker, RevisionGitfastCooker from swh.vault.tests.vault_testing import hash_content from swh.vault.to_disk import HIDDEN_MESSAGE, SKIPPED_MESSAGE class TestRepo: """A tiny context manager for a test git repository, with some utility functions to perform basic git stuff. """ def __init__(self, repo_dir=None): self.repo_dir = repo_dir def __enter__(self): if self.repo_dir: self.tmp_dir = None self.repo = dulwich.repo.Repo(self.repo_dir) else: self.tmp_dir = tempfile.TemporaryDirectory(prefix="tmp-vault-repo-") self.repo_dir = self.tmp_dir.__enter__() self.repo = dulwich.repo.Repo.init(self.repo_dir) self.author_name = b"Test Author" self.author_email = b"test@softwareheritage.org" self.author = b"%s <%s>" % (self.author_name, self.author_email) self.base_date = 258244200 self.counter = 0 return pathlib.Path(self.repo_dir) def __exit__(self, exc, value, tb): if self.tmp_dir is not None: self.tmp_dir.__exit__(exc, value, tb) self.repo_dir = None def checkout(self, rev_sha): rev = self.repo[rev_sha] dulwich.index.build_index_from_tree( str(self.repo_dir), self.repo.index_path(), self.repo.object_store, rev.tree ) def git_shell(self, *cmd, stdout=subprocess.DEVNULL, **kwargs): name = self.author_name email = self.author_email date = "%d +0000" % (self.base_date + self.counter) env = { # Set git commit format "GIT_AUTHOR_NAME": name, "GIT_AUTHOR_EMAIL": email, "GIT_AUTHOR_DATE": date, "GIT_COMMITTER_NAME": name, "GIT_COMMITTER_EMAIL": email, "GIT_COMMITTER_DATE": date, # Ignore all the system-wide and user configurations "GIT_CONFIG_NOSYSTEM": "1", "HOME": str(self.tmp_dir), "XDG_CONFIG_HOME": str(self.tmp_dir), } kwargs.setdefault("env", {}).update(env) subprocess.check_call( ("git", "-C", self.repo_dir) + cmd, stdout=stdout, **kwargs ) def commit(self, message="Commit test\n", ref=b"HEAD"): """Commit the current working tree in a new commit with message on the branch 'ref'. At the end of the commit, the reference should stay the same and the index should be clean. """ paths = [ os.path.relpath(path, self.repo_dir) for path in glob.glob(self.repo_dir + "/**/*", recursive=True) ] self.repo.stage(paths) message = message.encode() + b"\n" ret = self.repo.do_commit( message=message, committer=self.author, commit_timestamp=self.base_date + self.counter, commit_timezone=0, ref=ref, ) self.counter += 1 # committing on another branch leaves # dangling files in index if ref != b"HEAD": # XXX this should work (but does not) # dulwich.porcelain.reset(self.repo, 'hard') self.git_shell("reset", "--hard", "HEAD") return ret def merge(self, parent_sha_list, message="Merge branches."): self.git_shell( "merge", "--allow-unrelated-histories", "-m", message, *[p.decode() for p in parent_sha_list], ) self.counter += 1 return self.repo.refs[b"HEAD"] def print_debug_graph(self, reflog=False): args = ["log", "--all", "--graph", "--decorate"] if reflog: args.append("--reflog") self.git_shell(*args, stdout=None) @pytest.fixture def git_loader(swh_storage,): """Instantiate a Git Loader using the storage instance as storage. """ def _create_loader(directory): return GitLoaderFromDisk( swh_storage, "fake_origin", directory=directory, visit_date=datetime.datetime.now(datetime.timezone.utc), ) return _create_loader @contextlib.contextmanager def cook_extract_directory_dircooker(storage, obj_id, fsck=True): """Context manager that cooks a directory and extract it.""" backend = unittest.mock.MagicMock() backend.storage = storage cooker = DirectoryCooker("directory", obj_id, backend=backend, storage=storage) cooker.fileobj = io.BytesIO() assert cooker.check_exists() cooker.prepare_bundle() cooker.fileobj.seek(0) with tempfile.TemporaryDirectory(prefix="tmp-vault-extract-") as td: with tarfile.open(fileobj=cooker.fileobj, mode="r") as tar: tar.extractall(td) yield pathlib.Path(td) / hashutil.hash_to_hex(obj_id) cooker.storage = None @contextlib.contextmanager def cook_extract_directory_gitfast(storage, obj_id, fsck=True): """Context manager that cooks a revision containing a directory and extract it, using RevisionGitfastCooker""" test_repo = TestRepo() with test_repo as p: date = TimestampWithTimezone.from_datetime( datetime.datetime.now(datetime.timezone.utc) ) revision = Revision( directory=obj_id, message=b"dummy message", author=Person.from_fullname(b"someone"), committer=Person.from_fullname(b"someone"), date=date, committer_date=date, type=RevisionType.GIT, synthetic=False, ) storage.revision_add([revision]) with cook_stream_revision_gitfast(storage, revision.id) as stream, test_repo as p: processor = dulwich.fastexport.GitImportProcessor(test_repo.repo) processor.import_stream(stream) test_repo.checkout(b"HEAD") shutil.rmtree(p / ".git") yield p @contextlib.contextmanager def cook_extract_directory_git_bare( storage, obj_id, fsck=True, direct_objstorage=False ): """Context manager that cooks a revision and extract it, using GitBareCooker""" backend = unittest.mock.MagicMock() backend.storage = storage # Cook the object cooker = GitBareCooker( "directory", obj_id, backend=backend, storage=storage, objstorage=storage.objstorage if direct_objstorage else None, ) cooker.use_fsck = fsck # Some tests try edge-cases that git-fsck rejects cooker.fileobj = io.BytesIO() assert cooker.check_exists() cooker.prepare_bundle() cooker.fileobj.seek(0) # Extract it with tempfile.TemporaryDirectory(prefix="tmp-vault-extract-") as td: with tarfile.open(fileobj=cooker.fileobj, mode="r") as tar: tar.extractall(td) # Clone it with Dulwich with tempfile.TemporaryDirectory(prefix="tmp-vault-clone-") as clone_dir: clone_dir = pathlib.Path(clone_dir) subprocess.check_call( [ "git", "clone", os.path.join(td, f"swh:1:dir:{obj_id.hex()}.git"), clone_dir, ] ) shutil.rmtree(clone_dir / ".git") yield clone_dir @pytest.fixture( scope="module", params=[ cook_extract_directory_dircooker, cook_extract_directory_gitfast, cook_extract_directory_git_bare, ], ) def cook_extract_directory(request): """A fixture that is instantiated as either cook_extract_directory_dircooker or cook_extract_directory_git_bare.""" return request.param @contextlib.contextmanager def cook_stream_revision_gitfast(storage, obj_id): """Context manager that cooks a revision and stream its fastexport.""" backend = unittest.mock.MagicMock() backend.storage = storage cooker = RevisionGitfastCooker( "revision_gitfast", obj_id, backend=backend, storage=storage ) cooker.fileobj = io.BytesIO() assert cooker.check_exists() cooker.prepare_bundle() cooker.fileobj.seek(0) fastexport_stream = gzip.GzipFile(fileobj=cooker.fileobj) yield fastexport_stream cooker.storage = None @contextlib.contextmanager def cook_extract_revision_gitfast(storage, obj_id, fsck=True): """Context manager that cooks a revision and extract it, using RevisionGitfastCooker""" test_repo = TestRepo() with cook_stream_revision_gitfast(storage, obj_id) as stream, test_repo as p: processor = dulwich.fastexport.GitImportProcessor(test_repo.repo) processor.import_stream(stream) yield test_repo, p @contextlib.contextmanager -def cook_extract_revision_git_bare(storage, obj_id, fsck=True): +def cook_extract_git_bare(storage, swhid, fsck=True): """Context manager that cooks a revision and extract it, using GitBareCooker""" backend = unittest.mock.MagicMock() backend.storage = storage # Cook the object - cooker = GitBareCooker("revision", obj_id, backend=backend, storage=storage) + cooker = GitBareCooker( + swhid.object_type.name.lower(), + swhid.object_id, + backend=backend, + storage=storage, + ) cooker.use_fsck = fsck # Some tests try edge-cases that git-fsck rejects cooker.fileobj = io.BytesIO() assert cooker.check_exists() cooker.prepare_bundle() cooker.fileobj.seek(0) # Extract it with tempfile.TemporaryDirectory(prefix="tmp-vault-extract-") as td: with tarfile.open(fileobj=cooker.fileobj, mode="r") as tar: tar.extractall(td) # Clone it with Dulwich with tempfile.TemporaryDirectory(prefix="tmp-vault-clone-") as clone_dir: clone_dir = pathlib.Path(clone_dir) subprocess.check_call( - [ - "git", - "clone", - os.path.join(td, f"swh:1:rev:{obj_id.hex()}.git"), - clone_dir, - ] + ["git", "clone", os.path.join(td, f"{swhid}.git"), clone_dir,] ) test_repo = TestRepo(clone_dir) with test_repo: yield test_repo, clone_dir +@contextlib.contextmanager +def cook_extract_revision_git_bare(storage, obj_id, fsck=True): + with cook_extract_git_bare( + storage, + identifiers.CoreSWHID( + object_type=identifiers.ObjectType.REVISION, object_id=obj_id + ), + fsck=fsck, + ) as res: + yield res + + @pytest.fixture( scope="module", params=[cook_extract_revision_gitfast, cook_extract_revision_git_bare], ) def cook_extract_revision(request): """A fixture that is instantiated as either cook_extract_revision_gitfast or cook_extract_revision_git_bare.""" return request.param +@contextlib.contextmanager +def cook_extract_snapshot_git_bare(storage, obj_id, fsck=True): + with cook_extract_git_bare( + storage, + identifiers.CoreSWHID( + object_type=identifiers.ObjectType.SNAPSHOT, object_id=obj_id + ), + fsck=fsck, + ) as res: + yield res + + +@pytest.fixture( + scope="module", params=[cook_extract_snapshot_git_bare], +) +def cook_extract_snapshot(request): + """Equivalent to cook_extract_snapshot_git_bare; but analogous to + cook_extract_revision in case we ever have more cookers supporting snapshots""" + return request.param + + TEST_CONTENT = ( " test content\n" "and unicode \N{BLACK HEART SUIT}\n" " and trailing spaces " ) TEST_EXECUTABLE = b"\x42\x40\x00\x00\x05" class TestDirectoryCooker: def test_directory_simple(self, git_loader, cook_extract_directory): repo = TestRepo() with repo as rp: (rp / "file").write_text(TEST_CONTENT) (rp / "executable").write_bytes(TEST_EXECUTABLE) (rp / "executable").chmod(0o755) (rp / "link").symlink_to("file") (rp / "dir1/dir2").mkdir(parents=True) (rp / "dir1/dir2/file").write_text(TEST_CONTENT) c = repo.commit() loader = git_loader(str(rp)) loader.load() obj_id_hex = repo.repo[c].tree.decode() obj_id = hashutil.hash_to_bytes(obj_id_hex) with cook_extract_directory(loader.storage, obj_id) as p: assert (p / "file").stat().st_mode == 0o100644 assert (p / "file").read_text() == TEST_CONTENT assert (p / "executable").stat().st_mode == 0o100755 assert (p / "executable").read_bytes() == TEST_EXECUTABLE assert (p / "link").is_symlink() assert os.readlink(str(p / "link")) == "file" assert (p / "dir1/dir2/file").stat().st_mode == 0o100644 assert (p / "dir1/dir2/file").read_text() == TEST_CONTENT directory = from_disk.Directory.from_disk(path=bytes(p)) assert obj_id_hex == hashutil.hash_to_hex(directory.hash) def test_directory_filtered_objects(self, git_loader, cook_extract_directory): repo = TestRepo() with repo as rp: file_1, id_1 = hash_content(b"test1") file_2, id_2 = hash_content(b"test2") file_3, id_3 = hash_content(b"test3") (rp / "file").write_bytes(file_1) (rp / "hidden_file").write_bytes(file_2) (rp / "absent_file").write_bytes(file_3) c = repo.commit() loader = git_loader(str(rp)) loader.load() obj_id_hex = repo.repo[c].tree.decode() obj_id = hashutil.hash_to_bytes(obj_id_hex) # FIXME: storage.content_update() should be changed to allow things # like that with loader.storage.get_db().transaction() as cur: cur.execute( """update content set status = 'visible' where sha1 = %s""", (id_1,), ) cur.execute( """update content set status = 'hidden' where sha1 = %s""", (id_2,), ) cur.execute( """ insert into skipped_content (sha1, sha1_git, sha256, blake2s256, length, reason) select sha1, sha1_git, sha256, blake2s256, length, 'no reason' from content where sha1 = %s """, (id_3,), ) cur.execute("delete from content where sha1 = %s", (id_3,)) with cook_extract_directory(loader.storage, obj_id) as p: assert (p / "file").read_bytes() == b"test1" assert (p / "hidden_file").read_bytes() == HIDDEN_MESSAGE assert (p / "absent_file").read_bytes() == SKIPPED_MESSAGE def test_directory_bogus_perms(self, git_loader, cook_extract_directory): # Some early git repositories have 664/775 permissions... let's check # if all the weird modes are properly normalized in the directory # cooker. repo = TestRepo() with repo as rp: (rp / "file").write_text(TEST_CONTENT) (rp / "file").chmod(0o664) (rp / "executable").write_bytes(TEST_EXECUTABLE) (rp / "executable").chmod(0o775) (rp / "wat").write_text(TEST_CONTENT) (rp / "wat").chmod(0o604) # Disable mode cleanup with unittest.mock.patch("dulwich.index.cleanup_mode", lambda mode: mode): c = repo.commit() # Make sure Dulwich didn't normalize the permissions itself. # (if it did, then the test can't check the cooker normalized them) tree_id = repo.repo[c].tree assert {entry.mode for entry in repo.repo[tree_id].items()} == { 0o100775, 0o100664, 0o100604, } # Disable mode checks with unittest.mock.patch("dulwich.objects.Tree.check", lambda self: None): loader = git_loader(str(rp)) loader.load() # Make sure swh-loader didn't normalize them either dir_entries = loader.storage.directory_ls(hashutil.bytehex_to_hash(tree_id)) assert {entry["perms"] for entry in dir_entries} == { 0o100664, 0o100775, 0o100604, } obj_id_hex = repo.repo[c].tree.decode() obj_id = hashutil.hash_to_bytes(obj_id_hex) with cook_extract_directory(loader.storage, obj_id) as p: assert (p / "file").stat().st_mode == 0o100644 assert (p / "executable").stat().st_mode == 0o100755 assert (p / "wat").stat().st_mode == 0o100644 @pytest.mark.parametrize("direct_objstorage", [True, False]) def test_directory_objstorage( self, swh_storage, git_loader, mocker, direct_objstorage ): """Like test_directory_simple, but using swh_objstorage directly, without going through swh_storage.content_get_data()""" repo = TestRepo() with repo as rp: (rp / "file").write_text(TEST_CONTENT) (rp / "executable").write_bytes(TEST_EXECUTABLE) (rp / "executable").chmod(0o755) (rp / "link").symlink_to("file") (rp / "dir1/dir2").mkdir(parents=True) (rp / "dir1/dir2/file").write_text(TEST_CONTENT) c = repo.commit() loader = git_loader(str(rp)) loader.load() obj_id_hex = repo.repo[c].tree.decode() obj_id = hashutil.hash_to_bytes(obj_id_hex) # Set-up spies storage_content_get_data = mocker.patch.object( swh_storage, "content_get_data", wraps=swh_storage.content_get_data ) objstorage_content_batch = mocker.patch.object( swh_storage.objstorage, "get_batch", wraps=swh_storage.objstorage.get_batch ) with cook_extract_directory_git_bare( loader.storage, obj_id, direct_objstorage=direct_objstorage ) as p: assert (p / "file").stat().st_mode == 0o100644 assert (p / "file").read_text() == TEST_CONTENT assert (p / "executable").stat().st_mode == 0o100755 assert (p / "executable").read_bytes() == TEST_EXECUTABLE assert (p / "link").is_symlink() assert os.readlink(str(p / "link")) == "file" assert (p / "dir1/dir2/file").stat().st_mode == 0o100644 assert (p / "dir1/dir2/file").read_text() == TEST_CONTENT directory = from_disk.Directory.from_disk(path=bytes(p)) assert obj_id_hex == hashutil.hash_to_hex(directory.hash) if direct_objstorage: storage_content_get_data.assert_not_called() objstorage_content_batch.assert_called() else: storage_content_get_data.assert_called() objstorage_content_batch.assert_not_called() def test_directory_revision_data(self, swh_storage): target_rev = "0e8a3ad980ec179856012b7eecf4327e99cd44cd" dir = Directory( entries=( DirectoryEntry( name=b"submodule", type="rev", target=hashutil.hash_to_bytes(target_rev), perms=0o100644, ), ), ) swh_storage.directory_add([dir]) with cook_extract_directory_dircooker(swh_storage, dir.id, fsck=False) as p: assert (p / "submodule").is_symlink() assert os.readlink(str(p / "submodule")) == target_rev class RepoFixtures: """Shared loading and checking methods that can be reused by different types of tests.""" def load_repo_simple(self, git_loader): # # 1--2--3--4--5--6--7 # repo = TestRepo() with repo as rp: (rp / "file1").write_text(TEST_CONTENT) repo.commit("add file1") (rp / "file2").write_text(TEST_CONTENT) repo.commit("add file2") (rp / "dir1/dir2").mkdir(parents=True) (rp / "dir1/dir2/file").write_text(TEST_CONTENT) repo.commit("add dir1/dir2/file") (rp / "bin1").write_bytes(TEST_EXECUTABLE) (rp / "bin1").chmod(0o755) repo.commit("add bin1") (rp / "link1").symlink_to("file1") repo.commit("link link1 to file1") (rp / "file2").unlink() repo.commit("remove file2") (rp / "bin1").rename(rp / "bin") repo.commit("rename bin1 to bin") loader = git_loader(str(rp)) loader.load() obj_id_hex = repo.repo.refs[b"HEAD"].decode() obj_id = hashutil.hash_to_bytes(obj_id_hex) return (loader, obj_id) def check_revision_simple(self, ert, p, obj_id): ert.checkout(b"HEAD") assert (p / "file1").stat().st_mode == 0o100644 assert (p / "file1").read_text() == TEST_CONTENT assert (p / "link1").is_symlink() assert os.readlink(str(p / "link1")) == "file1" assert (p / "bin").stat().st_mode == 0o100755 assert (p / "bin").read_bytes() == TEST_EXECUTABLE assert (p / "dir1/dir2/file").read_text() == TEST_CONTENT assert (p / "dir1/dir2/file").stat().st_mode == 0o100644 assert ert.repo.refs[b"HEAD"].decode() == obj_id.hex() def load_repo_two_roots(self, git_loader): # # 1----3---4 # / # 2---- # repo = TestRepo() with repo as rp: (rp / "file1").write_text(TEST_CONTENT) c1 = repo.commit("Add file1") del repo.repo.refs[b"refs/heads/master"] # git update-ref -d HEAD (rp / "file2").write_text(TEST_CONTENT) repo.commit("Add file2") repo.merge([c1]) (rp / "file3").write_text(TEST_CONTENT) repo.commit("add file3") obj_id_hex = repo.repo.refs[b"HEAD"].decode() obj_id = hashutil.hash_to_bytes(obj_id_hex) loader = git_loader(str(rp)) loader.load() return (loader, obj_id) def check_revision_two_roots(self, ert, p, obj_id): assert ert.repo.refs[b"HEAD"].decode() == obj_id.hex() + (c3,) = ert.repo[hashutil.hash_to_bytehex(obj_id)].parents + assert len(ert.repo[c3].parents) == 2 + + def load_repo_two_heads(self, git_loader): + # + # 1---2----4 <-- master and b1 + # \ + # ----3 <-- b2 + # + repo = TestRepo() + with repo as rp: + (rp / "file1").write_text(TEST_CONTENT) + repo.commit("Add file1") + + (rp / "file2").write_text(TEST_CONTENT) + c2 = repo.commit("Add file2") + + repo.repo.refs[b"refs/heads/b2"] = c2 # branch b2 from master + + (rp / "file3").write_text(TEST_CONTENT) + repo.commit("add file3", ref=b"refs/heads/b2") + + (rp / "file4").write_text(TEST_CONTENT) + c4 = repo.commit("add file4", ref=b"refs/heads/master") + repo.repo.refs[b"refs/heads/b1"] = c4 # branch b1 from master + + obj_id_hex = repo.repo.refs[b"HEAD"].decode() + obj_id = hashutil.hash_to_bytes(obj_id_hex) + loader = git_loader(str(rp)) + loader.load() + return (loader, obj_id) + + def check_snapshot_two_heads(self, ert, p, obj_id): + assert ( + hashutil.hash_to_bytehex(obj_id) + == ert.repo.refs[b"HEAD"] + == ert.repo.refs[b"refs/heads/master"] + == ert.repo.refs[b"refs/remotes/origin/HEAD"] + == ert.repo.refs[b"refs/remotes/origin/master"] + == ert.repo.refs[b"refs/remotes/origin/b1"] + ) + + c4_id = hashutil.hash_to_bytehex(obj_id) + c3_id = ert.repo.refs[b"refs/remotes/origin/b2"] + + assert ert.repo[c3_id].parents == ert.repo[c4_id].parents + def load_repo_two_double_fork_merge(self, git_loader): # # 2---4---6 # / / / # 1---3---5 # repo = TestRepo() with repo as rp: (rp / "file1").write_text(TEST_CONTENT) - c1 = repo.commit("Add file1") - repo.repo.refs[b"refs/heads/c1"] = c1 + c1 = repo.commit("Add file1") # create commit 1 + repo.repo.refs[b"refs/heads/c1"] = c1 # branch c1 from master (rp / "file2").write_text(TEST_CONTENT) - repo.commit("Add file2") + repo.commit("Add file2") # create commit 2 (rp / "file3").write_text(TEST_CONTENT) - c3 = repo.commit("Add file3", ref=b"refs/heads/c1") - repo.repo.refs[b"refs/heads/c3"] = c3 + c3 = repo.commit("Add file3", ref=b"refs/heads/c1") # create commit 3 on c1 + repo.repo.refs[b"refs/heads/c3"] = c3 # branch c3 from c1 - repo.merge([c3]) + repo.merge([c3]) # create commit 4 (rp / "file5").write_text(TEST_CONTENT) - c5 = repo.commit("Add file3", ref=b"refs/heads/c3") + c5 = repo.commit("Add file3", ref=b"refs/heads/c3") # create commit 5 on c3 - repo.merge([c5]) + repo.merge([c5]) # create commit 6 obj_id_hex = repo.repo.refs[b"HEAD"].decode() obj_id = hashutil.hash_to_bytes(obj_id_hex) loader = git_loader(str(rp)) loader.load() return (loader, obj_id) def check_revision_two_double_fork_merge(self, ert, p, obj_id): assert ert.repo.refs[b"HEAD"].decode() == obj_id.hex() + def check_snapshot_two_double_fork_merge(self, ert, p, obj_id): + assert ( + hashutil.hash_to_bytehex(obj_id) + == ert.repo.refs[b"HEAD"] + == ert.repo.refs[b"refs/heads/master"] + == ert.repo.refs[b"refs/remotes/origin/HEAD"] + == ert.repo.refs[b"refs/remotes/origin/master"] + ) + + (c4_id, c5_id) = ert.repo[obj_id.hex().encode()].parents + assert c5_id == ert.repo.refs[b"refs/remotes/origin/c3"] + + (c2_id, c3_id) = ert.repo[c4_id].parents + assert c3_id == ert.repo.refs[b"refs/remotes/origin/c1"] + def load_repo_triple_merge(self, git_loader): # # .---.---5 # / / / # 2 3 4 # / / / # 1---.---. # repo = TestRepo() with repo as rp: (rp / "file1").write_text(TEST_CONTENT) c1 = repo.commit("Commit 1") repo.repo.refs[b"refs/heads/b1"] = c1 repo.repo.refs[b"refs/heads/b2"] = c1 repo.commit("Commit 2") c3 = repo.commit("Commit 3", ref=b"refs/heads/b1") c4 = repo.commit("Commit 4", ref=b"refs/heads/b2") repo.merge([c3, c4]) obj_id_hex = repo.repo.refs[b"HEAD"].decode() obj_id = hashutil.hash_to_bytes(obj_id_hex) loader = git_loader(str(rp)) loader.load() return (loader, obj_id) def check_revision_triple_merge(self, ert, p, obj_id): assert ert.repo.refs[b"HEAD"].decode() == obj_id.hex() + def check_snapshot_triple_merge(self, ert, p, obj_id): + assert ( + hashutil.hash_to_bytehex(obj_id) + == ert.repo.refs[b"HEAD"] + == ert.repo.refs[b"refs/heads/master"] + == ert.repo.refs[b"refs/remotes/origin/HEAD"] + == ert.repo.refs[b"refs/remotes/origin/master"] + ) + + (c2_id, c3_id, c4_id) = ert.repo[obj_id.hex().encode()].parents + assert c3_id == ert.repo.refs[b"refs/remotes/origin/b1"] + assert c4_id == ert.repo.refs[b"refs/remotes/origin/b2"] + + assert ( + ert.repo[c2_id].parents + == ert.repo[c3_id].parents + == ert.repo[c4_id].parents + ) + def load_repo_filtered_objects(self, git_loader): repo = TestRepo() with repo as rp: file_1, id_1 = hash_content(b"test1") file_2, id_2 = hash_content(b"test2") file_3, id_3 = hash_content(b"test3") (rp / "file").write_bytes(file_1) (rp / "hidden_file").write_bytes(file_2) (rp / "absent_file").write_bytes(file_3) repo.commit() obj_id_hex = repo.repo.refs[b"HEAD"].decode() obj_id = hashutil.hash_to_bytes(obj_id_hex) loader = git_loader(str(rp)) loader.load() # FIXME: storage.content_update() should be changed to allow things # like that with loader.storage.get_db().transaction() as cur: cur.execute( """update content set status = 'visible' where sha1 = %s""", (id_1,), ) cur.execute( """update content set status = 'hidden' where sha1 = %s""", (id_2,), ) cur.execute( """ insert into skipped_content (sha1, sha1_git, sha256, blake2s256, length, reason) select sha1, sha1_git, sha256, blake2s256, length, 'no reason' from content where sha1 = %s """, (id_3,), ) cur.execute("delete from content where sha1 = %s", (id_3,)) return (loader, obj_id) def check_revision_filtered_objects(self, ert, p, obj_id): ert.checkout(b"HEAD") assert (p / "file").read_bytes() == b"test1" assert (p / "hidden_file").read_bytes() == HIDDEN_MESSAGE assert (p / "absent_file").read_bytes() == SKIPPED_MESSAGE def load_repo_null_fields(self, git_loader): # Our schema doesn't enforce a lot of non-null revision fields. We need # to check these cases don't break the cooker. repo = TestRepo() with repo as rp: (rp / "file").write_text(TEST_CONTENT) c = repo.commit("initial commit") loader = git_loader(str(rp)) loader.load() repo.repo.refs[b"HEAD"].decode() dir_id_hex = repo.repo[c].tree.decode() dir_id = hashutil.hash_to_bytes(dir_id_hex) test_revision = Revision( message=b"", author=Person(name=None, email=None, fullname=b""), date=None, committer=Person(name=None, email=None, fullname=b""), committer_date=None, parents=(), type=RevisionType.GIT, directory=dir_id, metadata={}, synthetic=True, ) storage = loader.storage storage.revision_add([test_revision]) return (loader, test_revision.id) def check_revision_null_fields(self, ert, p, obj_id): ert.checkout(b"HEAD") assert (p / "file").stat().st_mode == 0o100644 class TestRevisionCooker(RepoFixtures): def test_revision_simple(self, git_loader, cook_extract_revision): (loader, obj_id) = self.load_repo_simple(git_loader) with cook_extract_revision(loader.storage, obj_id) as (ert, p): self.check_revision_simple(ert, p, obj_id) def test_revision_two_roots(self, git_loader, cook_extract_revision): (loader, obj_id) = self.load_repo_two_roots(git_loader) with cook_extract_revision(loader.storage, obj_id) as (ert, p): self.check_revision_two_roots(ert, p, obj_id) def test_revision_two_double_fork_merge(self, git_loader, cook_extract_revision): (loader, obj_id) = self.load_repo_two_double_fork_merge(git_loader) with cook_extract_revision(loader.storage, obj_id) as (ert, p): self.check_revision_two_double_fork_merge(ert, p, obj_id) def test_revision_triple_merge(self, git_loader, cook_extract_revision): (loader, obj_id) = self.load_repo_triple_merge(git_loader) with cook_extract_revision(loader.storage, obj_id) as (ert, p): self.check_revision_triple_merge(ert, p, obj_id) def test_revision_filtered_objects(self, git_loader, cook_extract_revision): (loader, obj_id) = self.load_repo_filtered_objects(git_loader) with cook_extract_revision(loader.storage, obj_id) as (ert, p): self.check_revision_filtered_objects(ert, p, obj_id) def test_revision_null_fields(self, git_loader, cook_extract_revision): (loader, obj_id) = self.load_repo_null_fields(git_loader) with cook_extract_revision(loader.storage, obj_id, fsck=False) as (ert, p): self.check_revision_null_fields(ert, p, obj_id) def test_revision_revision_data(self, swh_storage): target_rev = "0e8a3ad980ec179856012b7eecf4327e99cd44cd" dir = Directory( entries=( DirectoryEntry( name=b"submodule", type="rev", target=hashutil.hash_to_bytes(target_rev), perms=0o100644, ), ), ) swh_storage.directory_add([dir]) rev = Revision( message=b"", author=Person(name=None, email=None, fullname=b""), date=None, committer=Person(name=None, email=None, fullname=b""), committer_date=None, parents=(), type=RevisionType.GIT, directory=dir.id, metadata={}, synthetic=True, ) swh_storage.revision_add([rev]) with cook_stream_revision_gitfast(swh_storage, rev.id) as stream: pattern = "M 160000 {} submodule".format(target_rev).encode() assert pattern in stream.read() + + +class TestSnapshotCooker(RepoFixtures): + def test_snapshot_simple(self, git_loader, cook_extract_snapshot): + (loader, main_rev_id) = self.load_repo_simple(git_loader) + snp_id = loader.loaded_snapshot_id + with cook_extract_snapshot(loader.storage, snp_id) as (ert, p): + self.check_revision_simple(ert, p, main_rev_id) + + def test_snapshot_two_roots(self, git_loader, cook_extract_snapshot): + (loader, main_rev_id) = self.load_repo_two_roots(git_loader) + snp_id = loader.loaded_snapshot_id + with cook_extract_snapshot(loader.storage, snp_id) as (ert, p): + self.check_revision_two_roots(ert, p, main_rev_id) + + def test_snapshot_two_heads(self, git_loader, cook_extract_snapshot): + (loader, main_rev_id) = self.load_repo_two_heads(git_loader) + snp_id = loader.loaded_snapshot_id + with cook_extract_snapshot(loader.storage, snp_id) as (ert, p): + self.check_snapshot_two_heads(ert, p, main_rev_id) + + def test_snapshot_two_double_fork_merge(self, git_loader, cook_extract_snapshot): + (loader, main_rev_id) = self.load_repo_two_double_fork_merge(git_loader) + snp_id = loader.loaded_snapshot_id + with cook_extract_snapshot(loader.storage, snp_id) as (ert, p): + self.check_revision_two_double_fork_merge(ert, p, main_rev_id) + self.check_snapshot_two_double_fork_merge(ert, p, main_rev_id) + + def test_snapshot_triple_merge(self, git_loader, cook_extract_snapshot): + (loader, main_rev_id) = self.load_repo_triple_merge(git_loader) + snp_id = loader.loaded_snapshot_id + with cook_extract_snapshot(loader.storage, snp_id) as (ert, p): + self.check_revision_triple_merge(ert, p, main_rev_id) + self.check_snapshot_triple_merge(ert, p, main_rev_id) + + def test_snapshot_filtered_objects(self, git_loader, cook_extract_snapshot): + (loader, main_rev_id) = self.load_repo_filtered_objects(git_loader) + snp_id = loader.loaded_snapshot_id + with cook_extract_snapshot(loader.storage, snp_id) as (ert, p): + self.check_revision_filtered_objects(ert, p, main_rev_id)