Changeset View
Changeset View
Standalone View
Standalone View
swh/vault/tests/test_cookers.py
Show All 19 Lines | |||||
import dulwich.fastexport | import dulwich.fastexport | ||||
import dulwich.index | import dulwich.index | ||||
import dulwich.objects | import dulwich.objects | ||||
import dulwich.porcelain | import dulwich.porcelain | ||||
import dulwich.repo | import dulwich.repo | ||||
import pytest | import pytest | ||||
from swh.loader.git.from_disk import GitLoaderFromDisk | from swh.loader.git.from_disk import GitLoaderFromDisk | ||||
from swh.model import from_disk, hashutil | from swh.model import from_disk, hashutil, identifiers | ||||
from swh.model.model import ( | from swh.model.model import ( | ||||
Directory, | Directory, | ||||
DirectoryEntry, | DirectoryEntry, | ||||
Person, | Person, | ||||
Revision, | Revision, | ||||
RevisionType, | RevisionType, | ||||
TimestampWithTimezone, | TimestampWithTimezone, | ||||
) | ) | ||||
▲ Show 20 Lines • Show All 252 Lines • ▼ Show 20 Lines | def cook_extract_revision_gitfast(storage, obj_id, fsck=True): | ||||
test_repo = TestRepo() | test_repo = TestRepo() | ||||
with cook_stream_revision_gitfast(storage, obj_id) as stream, test_repo as p: | with cook_stream_revision_gitfast(storage, obj_id) as stream, test_repo as p: | ||||
processor = dulwich.fastexport.GitImportProcessor(test_repo.repo) | processor = dulwich.fastexport.GitImportProcessor(test_repo.repo) | ||||
processor.import_stream(stream) | processor.import_stream(stream) | ||||
yield test_repo, p | yield test_repo, p | ||||
@contextlib.contextmanager | @contextlib.contextmanager | ||||
def cook_extract_revision_git_bare(storage, obj_id, fsck=True): | def cook_extract_git_bare(storage, swhid, fsck=True): | ||||
"""Context manager that cooks a revision and extract it, | """Context manager that cooks a revision and extract it, | ||||
using GitBareCooker""" | using GitBareCooker""" | ||||
backend = unittest.mock.MagicMock() | backend = unittest.mock.MagicMock() | ||||
backend.storage = storage | backend.storage = storage | ||||
# Cook the object | # Cook the object | ||||
cooker = GitBareCooker("revision", obj_id, backend=backend, storage=storage) | cooker = GitBareCooker( | ||||
swhid.object_type.name.lower(), | |||||
swhid.object_id, | |||||
backend=backend, | |||||
storage=storage, | |||||
) | |||||
cooker.use_fsck = fsck # Some tests try edge-cases that git-fsck rejects | cooker.use_fsck = fsck # Some tests try edge-cases that git-fsck rejects | ||||
cooker.fileobj = io.BytesIO() | cooker.fileobj = io.BytesIO() | ||||
assert cooker.check_exists() | assert cooker.check_exists() | ||||
cooker.prepare_bundle() | cooker.prepare_bundle() | ||||
cooker.fileobj.seek(0) | cooker.fileobj.seek(0) | ||||
# Extract it | # Extract it | ||||
with tempfile.TemporaryDirectory(prefix="tmp-vault-extract-") as td: | with tempfile.TemporaryDirectory(prefix="tmp-vault-extract-") as td: | ||||
with tarfile.open(fileobj=cooker.fileobj, mode="r") as tar: | with tarfile.open(fileobj=cooker.fileobj, mode="r") as tar: | ||||
tar.extractall(td) | tar.extractall(td) | ||||
# Clone it with Dulwich | # Clone it with Dulwich | ||||
with tempfile.TemporaryDirectory(prefix="tmp-vault-clone-") as clone_dir: | with tempfile.TemporaryDirectory(prefix="tmp-vault-clone-") as clone_dir: | ||||
clone_dir = pathlib.Path(clone_dir) | clone_dir = pathlib.Path(clone_dir) | ||||
subprocess.check_call( | subprocess.check_call( | ||||
[ | ["git", "clone", os.path.join(td, f"{swhid}.git"), clone_dir,] | ||||
"git", | |||||
"clone", | |||||
os.path.join(td, f"swh:1:rev:{obj_id.hex()}.git"), | |||||
clone_dir, | |||||
] | |||||
) | ) | ||||
test_repo = TestRepo(clone_dir) | test_repo = TestRepo(clone_dir) | ||||
with test_repo: | with test_repo: | ||||
yield test_repo, clone_dir | yield test_repo, clone_dir | ||||
@contextlib.contextmanager | |||||
def cook_extract_revision_git_bare(storage, obj_id, fsck=True): | |||||
with cook_extract_git_bare( | |||||
storage, | |||||
identifiers.CoreSWHID( | |||||
object_type=identifiers.ObjectType.REVISION, object_id=obj_id | |||||
), | |||||
fsck=fsck, | |||||
) as res: | |||||
yield res | |||||
@pytest.fixture( | @pytest.fixture( | ||||
scope="module", | scope="module", | ||||
params=[cook_extract_revision_gitfast, cook_extract_revision_git_bare], | params=[cook_extract_revision_gitfast, cook_extract_revision_git_bare], | ||||
) | ) | ||||
def cook_extract_revision(request): | def cook_extract_revision(request): | ||||
"""A fixture that is instantiated as either cook_extract_revision_gitfast or | """A fixture that is instantiated as either cook_extract_revision_gitfast or | ||||
cook_extract_revision_git_bare.""" | cook_extract_revision_git_bare.""" | ||||
return request.param | return request.param | ||||
@contextlib.contextmanager | |||||
def cook_extract_snapshot_git_bare(storage, obj_id, fsck=True): | |||||
with cook_extract_git_bare( | |||||
storage, | |||||
identifiers.CoreSWHID( | |||||
object_type=identifiers.ObjectType.SNAPSHOT, object_id=obj_id | |||||
), | |||||
fsck=fsck, | |||||
) as res: | |||||
yield res | |||||
@pytest.fixture( | |||||
scope="module", params=[cook_extract_snapshot_git_bare], | |||||
) | |||||
def cook_extract_snapshot(request): | |||||
"""Equivalent to cook_extract_snapshot_git_bare; but analogous to | |||||
cook_extract_revision in case we ever have more cookers supporting snapshots""" | |||||
return request.param | |||||
TEST_CONTENT = ( | TEST_CONTENT = ( | ||||
" test content\n" "and unicode \N{BLACK HEART SUIT}\n" " and trailing spaces " | " test content\n" "and unicode \N{BLACK HEART SUIT}\n" " and trailing spaces " | ||||
) | ) | ||||
TEST_EXECUTABLE = b"\x42\x40\x00\x00\x05" | TEST_EXECUTABLE = b"\x42\x40\x00\x00\x05" | ||||
class TestDirectoryCooker: | class TestDirectoryCooker: | ||||
def test_directory_simple(self, git_loader, cook_extract_directory): | def test_directory_simple(self, git_loader, cook_extract_directory): | ||||
▲ Show 20 Lines • Show All 254 Lines • ▼ Show 20 Lines | def load_repo_two_roots(self, git_loader): | ||||
(rp / "file3").write_text(TEST_CONTENT) | (rp / "file3").write_text(TEST_CONTENT) | ||||
repo.commit("add file3") | repo.commit("add file3") | ||||
obj_id_hex = repo.repo.refs[b"HEAD"].decode() | obj_id_hex = repo.repo.refs[b"HEAD"].decode() | ||||
obj_id = hashutil.hash_to_bytes(obj_id_hex) | obj_id = hashutil.hash_to_bytes(obj_id_hex) | ||||
loader = git_loader(str(rp)) | loader = git_loader(str(rp)) | ||||
loader.load() | loader.load() | ||||
return (loader, obj_id) | return (loader, obj_id) | ||||
def check_revision_two_roots(self, ert, p, obj_id): | def check_revision_two_roots(self, ert, p, obj_id): | ||||
ardumont: what does `ert` and `p` stand for?
might be `p` is for `pointer`. | |||||
Done Inline ActionsI don't know, I'm just copy-pasting vlorentz: I don't know, I'm just copy-pasting | |||||
assert ert.repo.refs[b"HEAD"].decode() == obj_id.hex() | assert ert.repo.refs[b"HEAD"].decode() == obj_id.hex() | ||||
(c3,) = ert.repo[hashutil.hash_to_bytehex(obj_id)].parents | |||||
assert len(ert.repo[c3].parents) == 2 | |||||
def load_repo_two_heads(self, git_loader): | |||||
# | |||||
# 1---2----4 <-- master and b1 | |||||
# \ | |||||
# ----3 <-- b2 | |||||
# | |||||
repo = TestRepo() | |||||
with repo as rp: | |||||
(rp / "file1").write_text(TEST_CONTENT) | |||||
repo.commit("Add file1") | |||||
(rp / "file2").write_text(TEST_CONTENT) | |||||
c2 = repo.commit("Add file2") | |||||
repo.repo.refs[b"refs/heads/b2"] = c2 # branch b2 from master | |||||
(rp / "file3").write_text(TEST_CONTENT) | |||||
repo.commit("add file3", ref=b"refs/heads/b2") | |||||
(rp / "file4").write_text(TEST_CONTENT) | |||||
c4 = repo.commit("add file4", ref=b"refs/heads/master") | |||||
repo.repo.refs[b"refs/heads/b1"] = c4 # branch b1 from master | |||||
obj_id_hex = repo.repo.refs[b"HEAD"].decode() | |||||
obj_id = hashutil.hash_to_bytes(obj_id_hex) | |||||
loader = git_loader(str(rp)) | |||||
loader.load() | |||||
return (loader, obj_id) | |||||
def check_snapshot_two_heads(self, ert, p, obj_id): | |||||
assert ( | |||||
hashutil.hash_to_bytehex(obj_id) | |||||
== ert.repo.refs[b"HEAD"] | |||||
== ert.repo.refs[b"refs/heads/master"] | |||||
== ert.repo.refs[b"refs/remotes/origin/HEAD"] | |||||
== ert.repo.refs[b"refs/remotes/origin/master"] | |||||
== ert.repo.refs[b"refs/remotes/origin/b1"] | |||||
) | |||||
c4_id = hashutil.hash_to_bytehex(obj_id) | |||||
c3_id = ert.repo.refs[b"refs/remotes/origin/b2"] | |||||
assert ert.repo[c3_id].parents == ert.repo[c4_id].parents | |||||
def load_repo_two_double_fork_merge(self, git_loader): | def load_repo_two_double_fork_merge(self, git_loader): | ||||
# | # | ||||
# 2---4---6 | # 2---4---6 | ||||
# / / / | # / / / | ||||
# 1---3---5 | # 1---3---5 | ||||
# | # | ||||
repo = TestRepo() | repo = TestRepo() | ||||
with repo as rp: | with repo as rp: | ||||
(rp / "file1").write_text(TEST_CONTENT) | (rp / "file1").write_text(TEST_CONTENT) | ||||
c1 = repo.commit("Add file1") | c1 = repo.commit("Add file1") # create commit 1 | ||||
repo.repo.refs[b"refs/heads/c1"] = c1 | repo.repo.refs[b"refs/heads/c1"] = c1 # branch c1 from master | ||||
(rp / "file2").write_text(TEST_CONTENT) | (rp / "file2").write_text(TEST_CONTENT) | ||||
repo.commit("Add file2") | repo.commit("Add file2") # create commit 2 | ||||
(rp / "file3").write_text(TEST_CONTENT) | (rp / "file3").write_text(TEST_CONTENT) | ||||
c3 = repo.commit("Add file3", ref=b"refs/heads/c1") | c3 = repo.commit("Add file3", ref=b"refs/heads/c1") # create commit 3 on c1 | ||||
repo.repo.refs[b"refs/heads/c3"] = c3 | repo.repo.refs[b"refs/heads/c3"] = c3 # branch c3 from c1 | ||||
repo.merge([c3]) | repo.merge([c3]) # create commit 4 | ||||
(rp / "file5").write_text(TEST_CONTENT) | (rp / "file5").write_text(TEST_CONTENT) | ||||
c5 = repo.commit("Add file3", ref=b"refs/heads/c3") | c5 = repo.commit("Add file3", ref=b"refs/heads/c3") # create commit 5 on c3 | ||||
repo.merge([c5]) | repo.merge([c5]) # create commit 6 | ||||
obj_id_hex = repo.repo.refs[b"HEAD"].decode() | obj_id_hex = repo.repo.refs[b"HEAD"].decode() | ||||
obj_id = hashutil.hash_to_bytes(obj_id_hex) | obj_id = hashutil.hash_to_bytes(obj_id_hex) | ||||
loader = git_loader(str(rp)) | loader = git_loader(str(rp)) | ||||
loader.load() | loader.load() | ||||
return (loader, obj_id) | return (loader, obj_id) | ||||
def check_revision_two_double_fork_merge(self, ert, p, obj_id): | def check_revision_two_double_fork_merge(self, ert, p, obj_id): | ||||
assert ert.repo.refs[b"HEAD"].decode() == obj_id.hex() | assert ert.repo.refs[b"HEAD"].decode() == obj_id.hex() | ||||
def check_snapshot_two_double_fork_merge(self, ert, p, obj_id): | |||||
assert ( | |||||
hashutil.hash_to_bytehex(obj_id) | |||||
== ert.repo.refs[b"HEAD"] | |||||
== ert.repo.refs[b"refs/heads/master"] | |||||
== ert.repo.refs[b"refs/remotes/origin/HEAD"] | |||||
== ert.repo.refs[b"refs/remotes/origin/master"] | |||||
) | |||||
(c4_id, c5_id) = ert.repo[obj_id.hex().encode()].parents | |||||
assert c5_id == ert.repo.refs[b"refs/remotes/origin/c3"] | |||||
(c2_id, c3_id) = ert.repo[c4_id].parents | |||||
assert c3_id == ert.repo.refs[b"refs/remotes/origin/c1"] | |||||
def load_repo_triple_merge(self, git_loader): | def load_repo_triple_merge(self, git_loader): | ||||
# | # | ||||
# .---.---5 | # .---.---5 | ||||
# / / / | # / / / | ||||
# 2 3 4 | # 2 3 4 | ||||
# / / / | # / / / | ||||
# 1---.---. | # 1---.---. | ||||
# | # | ||||
Show All 13 Lines | def load_repo_triple_merge(self, git_loader): | ||||
obj_id = hashutil.hash_to_bytes(obj_id_hex) | obj_id = hashutil.hash_to_bytes(obj_id_hex) | ||||
loader = git_loader(str(rp)) | loader = git_loader(str(rp)) | ||||
loader.load() | loader.load() | ||||
return (loader, obj_id) | return (loader, obj_id) | ||||
def check_revision_triple_merge(self, ert, p, obj_id): | def check_revision_triple_merge(self, ert, p, obj_id): | ||||
assert ert.repo.refs[b"HEAD"].decode() == obj_id.hex() | assert ert.repo.refs[b"HEAD"].decode() == obj_id.hex() | ||||
def check_snapshot_triple_merge(self, ert, p, obj_id): | |||||
assert ( | |||||
hashutil.hash_to_bytehex(obj_id) | |||||
== ert.repo.refs[b"HEAD"] | |||||
== ert.repo.refs[b"refs/heads/master"] | |||||
== ert.repo.refs[b"refs/remotes/origin/HEAD"] | |||||
== ert.repo.refs[b"refs/remotes/origin/master"] | |||||
) | |||||
(c2_id, c3_id, c4_id) = ert.repo[obj_id.hex().encode()].parents | |||||
assert c3_id == ert.repo.refs[b"refs/remotes/origin/b1"] | |||||
assert c4_id == ert.repo.refs[b"refs/remotes/origin/b2"] | |||||
assert ( | |||||
ert.repo[c2_id].parents | |||||
== ert.repo[c3_id].parents | |||||
== ert.repo[c4_id].parents | |||||
) | |||||
def load_repo_filtered_objects(self, git_loader): | def load_repo_filtered_objects(self, git_loader): | ||||
repo = TestRepo() | repo = TestRepo() | ||||
with repo as rp: | with repo as rp: | ||||
file_1, id_1 = hash_content(b"test1") | file_1, id_1 = hash_content(b"test1") | ||||
file_2, id_2 = hash_content(b"test2") | file_2, id_2 = hash_content(b"test2") | ||||
file_3, id_3 = hash_content(b"test3") | file_3, id_3 = hash_content(b"test3") | ||||
(rp / "file").write_bytes(file_1) | (rp / "file").write_bytes(file_1) | ||||
▲ Show 20 Lines • Show All 133 Lines • ▼ Show 20 Lines | def test_revision_revision_data(self, swh_storage): | ||||
metadata={}, | metadata={}, | ||||
synthetic=True, | synthetic=True, | ||||
) | ) | ||||
swh_storage.revision_add([rev]) | swh_storage.revision_add([rev]) | ||||
with cook_stream_revision_gitfast(swh_storage, rev.id) as stream: | with cook_stream_revision_gitfast(swh_storage, rev.id) as stream: | ||||
pattern = "M 160000 {} submodule".format(target_rev).encode() | pattern = "M 160000 {} submodule".format(target_rev).encode() | ||||
assert pattern in stream.read() | assert pattern in stream.read() | ||||
class TestSnapshotCooker(RepoFixtures): | |||||
def test_snapshot_simple(self, git_loader, cook_extract_snapshot): | |||||
(loader, main_rev_id) = self.load_repo_simple(git_loader) | |||||
snp_id = loader.loaded_snapshot_id | |||||
with cook_extract_snapshot(loader.storage, snp_id) as (ert, p): | |||||
self.check_revision_simple(ert, p, main_rev_id) | |||||
def test_snapshot_two_roots(self, git_loader, cook_extract_snapshot): | |||||
(loader, main_rev_id) = self.load_repo_two_roots(git_loader) | |||||
snp_id = loader.loaded_snapshot_id | |||||
with cook_extract_snapshot(loader.storage, snp_id) as (ert, p): | |||||
self.check_revision_two_roots(ert, p, main_rev_id) | |||||
def test_snapshot_two_heads(self, git_loader, cook_extract_snapshot): | |||||
(loader, main_rev_id) = self.load_repo_two_heads(git_loader) | |||||
snp_id = loader.loaded_snapshot_id | |||||
with cook_extract_snapshot(loader.storage, snp_id) as (ert, p): | |||||
self.check_snapshot_two_heads(ert, p, main_rev_id) | |||||
def test_snapshot_two_double_fork_merge(self, git_loader, cook_extract_snapshot): | |||||
(loader, main_rev_id) = self.load_repo_two_double_fork_merge(git_loader) | |||||
snp_id = loader.loaded_snapshot_id | |||||
with cook_extract_snapshot(loader.storage, snp_id) as (ert, p): | |||||
self.check_revision_two_double_fork_merge(ert, p, main_rev_id) | |||||
self.check_snapshot_two_double_fork_merge(ert, p, main_rev_id) | |||||
def test_snapshot_triple_merge(self, git_loader, cook_extract_snapshot): | |||||
(loader, main_rev_id) = self.load_repo_triple_merge(git_loader) | |||||
snp_id = loader.loaded_snapshot_id | |||||
with cook_extract_snapshot(loader.storage, snp_id) as (ert, p): | |||||
self.check_revision_triple_merge(ert, p, main_rev_id) | |||||
self.check_snapshot_triple_merge(ert, p, main_rev_id) | |||||
def test_snapshot_filtered_objects(self, git_loader, cook_extract_snapshot): | |||||
(loader, main_rev_id) = self.load_repo_filtered_objects(git_loader) | |||||
snp_id = loader.loaded_snapshot_id | |||||
with cook_extract_snapshot(loader.storage, snp_id) as (ert, p): | |||||
self.check_revision_filtered_objects(ert, p, main_rev_id) |
what does ert and p stand for?
might be p is for pointer.