Changeset View
Changeset View
Standalone View
Standalone View
swh/vault/tests/test_cookers.py
Show All 20 Lines | |||||
import dulwich.objects | import dulwich.objects | ||||
import dulwich.porcelain | import dulwich.porcelain | ||||
import dulwich.repo | import dulwich.repo | ||||
from swh.loader.git.from_disk import GitLoaderFromDisk | from swh.loader.git.from_disk import GitLoaderFromDisk | ||||
from swh.model import hashutil | from swh.model import hashutil | ||||
from swh.model.from_disk import Directory | from swh.model.from_disk import Directory | ||||
from swh.vault.cookers import DirectoryCooker, RevisionGitfastCooker | from swh.vault.cookers import DirectoryCooker, RevisionGitfastCooker | ||||
from swh.vault.tests.vault_testing import VaultTestFixture, hash_content | from swh.vault.tests.vault_testing import hash_content | ||||
from swh.vault.to_disk import SKIPPED_MESSAGE, HIDDEN_MESSAGE | from swh.vault.to_disk import SKIPPED_MESSAGE, HIDDEN_MESSAGE | ||||
class TestRepo: | class TestRepo: | ||||
"""A tiny context manager for a test git repository, with some utility | """A tiny context manager for a test git repository, with some utility | ||||
functions to perform basic git stuff. | functions to perform basic git stuff. | ||||
""" | """ | ||||
def __enter__(self): | def __enter__(self): | ||||
▲ Show 20 Lines • Show All 58 Lines • ▼ Show 20 Lines | class TestRepo: | ||||
def print_debug_graph(self, reflog=False): | def print_debug_graph(self, reflog=False): | ||||
args = ['log', '--all', '--graph', '--decorate'] | args = ['log', '--all', '--graph', '--decorate'] | ||||
if reflog: | if reflog: | ||||
args.append('--reflog') | args.append('--reflog') | ||||
self.git_shell(*args, stdout=None) | self.git_shell(*args, stdout=None) | ||||
@pytest.mark.config_issue | @pytest.fixture | ||||
class BaseTestCookers(VaultTestFixture): | def swh_git_loader(swh_vault): | ||||
"""Base class of cookers unit tests""" | loader = GitLoaderFromDisk() | ||||
def setUp(self): | loader.storage = swh_vault.storage | ||||
super().setUp() | return loader | ||||
self.loader = GitLoaderFromDisk() | |||||
self.loader.storage = self.storage | |||||
def tearDown(self): | |||||
self.loader = None | |||||
super().tearDown() | |||||
def load(self, repo_path): | |||||
def load(loader, repo_path): | |||||
"""Load a repository in the test storage""" | """Load a repository in the test storage""" | ||||
self.loader.load('fake_origin', repo_path, datetime.datetime.now()) | loader.load('fake_origin', repo_path, datetime.datetime.now()) | ||||
@contextlib.contextmanager | @contextlib.contextmanager | ||||
def cook_extract_directory(self, obj_id): | def cook_extract_directory(storage, obj_id): | ||||
"""Context manager that cooks a directory and extract it.""" | """Context manager that cooks a directory and extract it.""" | ||||
cooker = DirectoryCooker('directory', obj_id) | cooker = DirectoryCooker( | ||||
cooker.storage = self.storage | 'directory', obj_id, | ||||
cooker.backend = unittest.mock.MagicMock() | backend=unittest.mock.MagicMock(), | ||||
storage=storage) | |||||
cooker.fileobj = io.BytesIO() | cooker.fileobj = io.BytesIO() | ||||
assert cooker.check_exists() | assert cooker.check_exists() | ||||
cooker.prepare_bundle() | cooker.prepare_bundle() | ||||
cooker.fileobj.seek(0) | cooker.fileobj.seek(0) | ||||
with tempfile.TemporaryDirectory(prefix='tmp-vault-extract-') as td: | with tempfile.TemporaryDirectory(prefix='tmp-vault-extract-') as td: | ||||
with tarfile.open(fileobj=cooker.fileobj, mode='r') as tar: | with tarfile.open(fileobj=cooker.fileobj, mode='r') as tar: | ||||
tar.extractall(td) | tar.extractall(td) | ||||
yield pathlib.Path(td) / hashutil.hash_to_hex(obj_id) | yield pathlib.Path(td) / hashutil.hash_to_hex(obj_id) | ||||
cooker.storage = None | cooker.storage = None | ||||
@contextlib.contextmanager | @contextlib.contextmanager | ||||
def cook_stream_revision_gitfast(self, obj_id): | def cook_stream_revision_gitfast(storage, obj_id): | ||||
"""Context manager that cooks a revision and stream its fastexport.""" | """Context manager that cooks a revision and stream its fastexport.""" | ||||
cooker = RevisionGitfastCooker('revision_gitfast', obj_id) | cooker = RevisionGitfastCooker( | ||||
cooker.storage = self.storage | 'revision_gitfast', obj_id, | ||||
cooker.backend = unittest.mock.MagicMock() | backend=unittest.mock.MagicMock(), | ||||
storage=storage) | |||||
cooker.fileobj = io.BytesIO() | cooker.fileobj = io.BytesIO() | ||||
assert cooker.check_exists() | assert cooker.check_exists() | ||||
cooker.prepare_bundle() | cooker.prepare_bundle() | ||||
cooker.fileobj.seek(0) | cooker.fileobj.seek(0) | ||||
fastexport_stream = gzip.GzipFile(fileobj=cooker.fileobj) | fastexport_stream = gzip.GzipFile(fileobj=cooker.fileobj) | ||||
yield fastexport_stream | yield fastexport_stream | ||||
cooker.storage = None | cooker.storage = None | ||||
@contextlib.contextmanager | @contextlib.contextmanager | ||||
def cook_extract_revision_gitfast(self, obj_id): | def cook_extract_revision_gitfast(storage, obj_id): | ||||
"""Context manager that cooks a revision and extract it.""" | """Context manager that cooks a revision and extract it.""" | ||||
test_repo = TestRepo() | test_repo = TestRepo() | ||||
with self.cook_stream_revision_gitfast(obj_id) as stream, \ | with cook_stream_revision_gitfast(storage, obj_id) as stream, \ | ||||
test_repo as p: | test_repo as p: | ||||
processor = dulwich.fastexport.GitImportProcessor(test_repo.repo) | processor = dulwich.fastexport.GitImportProcessor(test_repo.repo) | ||||
processor.import_stream(stream) | processor.import_stream(stream) | ||||
yield test_repo, p | yield test_repo, p | ||||
TEST_CONTENT = (" test content\n" | TEST_CONTENT = (" test content\n" | ||||
"and unicode \N{BLACK HEART SUIT}\n" | "and unicode \N{BLACK HEART SUIT}\n" | ||||
" and trailing spaces ") | " and trailing spaces ") | ||||
TEST_EXECUTABLE = b'\x42\x40\x00\x00\x05' | TEST_EXECUTABLE = b'\x42\x40\x00\x00\x05' | ||||
class TestDirectoryCooker(BaseTestCookers, unittest.TestCase): | class TestDirectoryCooker: | ||||
def test_directory_simple(self): | def test_directory_simple(self, swh_git_loader): | ||||
repo = TestRepo() | repo = TestRepo() | ||||
with repo as rp: | with repo as rp: | ||||
(rp / 'file').write_text(TEST_CONTENT) | (rp / 'file').write_text(TEST_CONTENT) | ||||
(rp / 'executable').write_bytes(TEST_EXECUTABLE) | (rp / 'executable').write_bytes(TEST_EXECUTABLE) | ||||
(rp / 'executable').chmod(0o755) | (rp / 'executable').chmod(0o755) | ||||
(rp / 'link').symlink_to('file') | (rp / 'link').symlink_to('file') | ||||
(rp / 'dir1/dir2').mkdir(parents=True) | (rp / 'dir1/dir2').mkdir(parents=True) | ||||
(rp / 'dir1/dir2/file').write_text(TEST_CONTENT) | (rp / 'dir1/dir2/file').write_text(TEST_CONTENT) | ||||
c = repo.commit() | c = repo.commit() | ||||
self.load(str(rp)) | load(swh_git_loader, str(rp)) | ||||
obj_id_hex = repo.repo[c].tree.decode() | obj_id_hex = repo.repo[c].tree.decode() | ||||
obj_id = hashutil.hash_to_bytes(obj_id_hex) | obj_id = hashutil.hash_to_bytes(obj_id_hex) | ||||
with self.cook_extract_directory(obj_id) as p: | with cook_extract_directory(swh_git_loader.storage, obj_id) as p: | ||||
self.assertEqual((p / 'file').stat().st_mode, 0o100644) | assert (p / 'file').stat().st_mode == 0o100644 | ||||
self.assertEqual((p / 'file').read_text(), TEST_CONTENT) | assert (p / 'file').read_text() == TEST_CONTENT | ||||
self.assertEqual((p / 'executable').stat().st_mode, 0o100755) | assert (p / 'executable').stat().st_mode == 0o100755 | ||||
self.assertEqual((p / 'executable').read_bytes(), TEST_EXECUTABLE) | assert (p / 'executable').read_bytes() == TEST_EXECUTABLE | ||||
self.assertTrue((p / 'link').is_symlink) | assert (p / 'link').is_symlink | ||||
self.assertEqual(os.readlink(str(p / 'link')), 'file') | assert os.readlink(str(p / 'link')) == 'file' | ||||
self.assertEqual((p / 'dir1/dir2/file').stat().st_mode, 0o100644) | assert (p / 'dir1/dir2/file').stat().st_mode == 0o100644 | ||||
self.assertEqual((p / 'dir1/dir2/file').read_text(), TEST_CONTENT) | assert (p / 'dir1/dir2/file').read_text() == TEST_CONTENT | ||||
directory = Directory.from_disk(path=bytes(p)) | directory = Directory.from_disk(path=bytes(p)) | ||||
self.assertEqual(obj_id_hex, hashutil.hash_to_hex(directory.hash)) | assert obj_id_hex == hashutil.hash_to_hex(directory.hash) | ||||
def test_directory_filtered_objects(self): | def test_directory_filtered_objects(self, swh_git_loader): | ||||
repo = TestRepo() | repo = TestRepo() | ||||
with repo as rp: | with repo as rp: | ||||
file_1, id_1 = hash_content(b'test1') | file_1, id_1 = hash_content(b'test1') | ||||
file_2, id_2 = hash_content(b'test2') | file_2, id_2 = hash_content(b'test2') | ||||
file_3, id_3 = hash_content(b'test3') | file_3, id_3 = hash_content(b'test3') | ||||
(rp / 'file').write_bytes(file_1) | (rp / 'file').write_bytes(file_1) | ||||
(rp / 'hidden_file').write_bytes(file_2) | (rp / 'hidden_file').write_bytes(file_2) | ||||
(rp / 'absent_file').write_bytes(file_3) | (rp / 'absent_file').write_bytes(file_3) | ||||
c = repo.commit() | c = repo.commit() | ||||
self.load(str(rp)) | load(swh_git_loader, str(rp)) | ||||
obj_id_hex = repo.repo[c].tree.decode() | obj_id_hex = repo.repo[c].tree.decode() | ||||
obj_id = hashutil.hash_to_bytes(obj_id_hex) | obj_id = hashutil.hash_to_bytes(obj_id_hex) | ||||
# FIXME: storage.content_update() should be changed to allow things | # FIXME: storage.content_update() should be changed to allow things | ||||
# like that | # like that | ||||
with self.storage.get_db().transaction() as cur: | with swh_git_loader.storage.get_db().transaction() as cur: | ||||
cur.execute("""update content set status = 'visible' | cur.execute("""update content set status = 'visible' | ||||
where sha1 = %s""", (id_1,)) | where sha1 = %s""", (id_1,)) | ||||
cur.execute("""update content set status = 'hidden' | cur.execute("""update content set status = 'hidden' | ||||
where sha1 = %s""", (id_2,)) | where sha1 = %s""", (id_2,)) | ||||
cur.execute("""update content set status = 'absent' | cur.execute("""update content set status = 'absent' | ||||
where sha1 = %s""", (id_3,)) | where sha1 = %s""", (id_3,)) | ||||
with self.cook_extract_directory(obj_id) as p: | with cook_extract_directory(swh_git_loader.storage, obj_id) as p: | ||||
self.assertEqual((p / 'file').read_bytes(), b'test1') | assert (p / 'file').read_bytes() == b'test1' | ||||
self.assertEqual((p / 'hidden_file').read_bytes(), HIDDEN_MESSAGE) | assert (p / 'hidden_file').read_bytes() == HIDDEN_MESSAGE | ||||
self.assertEqual((p / 'absent_file').read_bytes(), SKIPPED_MESSAGE) | assert (p / 'absent_file').read_bytes() == SKIPPED_MESSAGE | ||||
def test_directory_bogus_perms(self): | def test_directory_bogus_perms(self, swh_git_loader): | ||||
# Some early git repositories have 664/775 permissions... let's check | # Some early git repositories have 664/775 permissions... let's check | ||||
# if all the weird modes are properly normalized in the directory | # if all the weird modes are properly normalized in the directory | ||||
# cooker. | # cooker. | ||||
repo = TestRepo() | repo = TestRepo() | ||||
with repo as rp: | with repo as rp: | ||||
(rp / 'file').write_text(TEST_CONTENT) | (rp / 'file').write_text(TEST_CONTENT) | ||||
(rp / 'file').chmod(0o664) | (rp / 'file').chmod(0o664) | ||||
(rp / 'executable').write_bytes(TEST_EXECUTABLE) | (rp / 'executable').write_bytes(TEST_EXECUTABLE) | ||||
(rp / 'executable').chmod(0o775) | (rp / 'executable').chmod(0o775) | ||||
(rp / 'wat').write_text(TEST_CONTENT) | (rp / 'wat').write_text(TEST_CONTENT) | ||||
(rp / 'wat').chmod(0o604) | (rp / 'wat').chmod(0o604) | ||||
c = repo.commit() | c = repo.commit() | ||||
self.load(str(rp)) | load(swh_git_loader, str(rp)) | ||||
obj_id_hex = repo.repo[c].tree.decode() | obj_id_hex = repo.repo[c].tree.decode() | ||||
obj_id = hashutil.hash_to_bytes(obj_id_hex) | obj_id = hashutil.hash_to_bytes(obj_id_hex) | ||||
with self.cook_extract_directory(obj_id) as p: | with cook_extract_directory(swh_git_loader.storage, obj_id) as p: | ||||
self.assertEqual((p / 'file').stat().st_mode, 0o100644) | assert (p / 'file').stat().st_mode == 0o100644 | ||||
self.assertEqual((p / 'executable').stat().st_mode, 0o100755) | assert (p / 'executable').stat().st_mode == 0o100755 | ||||
self.assertEqual((p / 'wat').stat().st_mode, 0o100644) | assert (p / 'wat').stat().st_mode == 0o100644 | ||||
def test_directory_revision_data(self): | def test_directory_revision_data(self, swh_git_loader): | ||||
target_rev = '0e8a3ad980ec179856012b7eecf4327e99cd44cd' | target_rev = '0e8a3ad980ec179856012b7eecf4327e99cd44cd' | ||||
d = hashutil.hash_to_bytes('17a3e48bce37be5226490e750202ad3a9a1a3fe9') | d = hashutil.hash_to_bytes('17a3e48bce37be5226490e750202ad3a9a1a3fe9') | ||||
dir = { | dir = { | ||||
'id': d, | 'id': d, | ||||
'entries': [ | 'entries': [ | ||||
{ | { | ||||
'name': b'submodule', | 'name': b'submodule', | ||||
'type': 'rev', | 'type': 'rev', | ||||
'target': hashutil.hash_to_bytes(target_rev), | 'target': hashutil.hash_to_bytes(target_rev), | ||||
'perms': 0o100644, | 'perms': 0o100644, | ||||
} | } | ||||
], | ], | ||||
} | } | ||||
self.storage.directory_add([dir]) | swh_git_loader.storage.directory_add([dir]) | ||||
with self.cook_extract_directory(d) as p: | with cook_extract_directory(swh_git_loader.storage, d) as p: | ||||
self.assertTrue((p / 'submodule').is_symlink()) | assert (p / 'submodule').is_symlink() | ||||
self.assertEqual(os.readlink(str(p / 'submodule')), target_rev) | assert os.readlink(str(p / 'submodule')) == target_rev | ||||
class TestRevisionGitfastCooker(BaseTestCookers, unittest.TestCase): | class TestRevisionGitfastCooker: | ||||
def test_revision_simple(self): | def test_revision_simple(self, swh_git_loader): | ||||
# | # | ||||
# 1--2--3--4--5--6--7 | # 1--2--3--4--5--6--7 | ||||
# | # | ||||
storage = swh_git_loader.storage | |||||
repo = TestRepo() | repo = TestRepo() | ||||
with repo as rp: | with repo as rp: | ||||
(rp / 'file1').write_text(TEST_CONTENT) | (rp / 'file1').write_text(TEST_CONTENT) | ||||
repo.commit('add file1') | repo.commit('add file1') | ||||
(rp / 'file2').write_text(TEST_CONTENT) | (rp / 'file2').write_text(TEST_CONTENT) | ||||
repo.commit('add file2') | repo.commit('add file2') | ||||
(rp / 'dir1/dir2').mkdir(parents=True) | (rp / 'dir1/dir2').mkdir(parents=True) | ||||
(rp / 'dir1/dir2/file').write_text(TEST_CONTENT) | (rp / 'dir1/dir2/file').write_text(TEST_CONTENT) | ||||
repo.commit('add dir1/dir2/file') | repo.commit('add dir1/dir2/file') | ||||
(rp / 'bin1').write_bytes(TEST_EXECUTABLE) | (rp / 'bin1').write_bytes(TEST_EXECUTABLE) | ||||
(rp / 'bin1').chmod(0o755) | (rp / 'bin1').chmod(0o755) | ||||
repo.commit('add bin1') | repo.commit('add bin1') | ||||
(rp / 'link1').symlink_to('file1') | (rp / 'link1').symlink_to('file1') | ||||
repo.commit('link link1 to file1') | repo.commit('link link1 to file1') | ||||
(rp / 'file2').unlink() | (rp / 'file2').unlink() | ||||
repo.commit('remove file2') | repo.commit('remove file2') | ||||
(rp / 'bin1').rename(rp / 'bin') | (rp / 'bin1').rename(rp / 'bin') | ||||
repo.commit('rename bin1 to bin') | repo.commit('rename bin1 to bin') | ||||
self.load(str(rp)) | load(swh_git_loader, str(rp)) | ||||
obj_id_hex = repo.repo.refs[b'HEAD'].decode() | obj_id_hex = repo.repo.refs[b'HEAD'].decode() | ||||
obj_id = hashutil.hash_to_bytes(obj_id_hex) | obj_id = hashutil.hash_to_bytes(obj_id_hex) | ||||
with self.cook_extract_revision_gitfast(obj_id) as (ert, p): | with cook_extract_revision_gitfast(storage, obj_id) as (ert, p): | ||||
ert.checkout(b'HEAD') | ert.checkout(b'HEAD') | ||||
self.assertEqual((p / 'file1').stat().st_mode, 0o100644) | assert (p / 'file1').stat().st_mode == 0o100644 | ||||
self.assertEqual((p / 'file1').read_text(), TEST_CONTENT) | assert (p / 'file1').read_text() == TEST_CONTENT | ||||
self.assertTrue((p / 'link1').is_symlink) | assert (p / 'link1').is_symlink | ||||
self.assertEqual(os.readlink(str(p / 'link1')), 'file1') | assert os.readlink(str(p / 'link1')) == 'file1' | ||||
self.assertEqual((p / 'bin').stat().st_mode, 0o100755) | assert (p / 'bin').stat().st_mode == 0o100755 | ||||
self.assertEqual((p / 'bin').read_bytes(), TEST_EXECUTABLE) | assert (p / 'bin').read_bytes() == TEST_EXECUTABLE | ||||
self.assertEqual((p / 'dir1/dir2/file').read_text(), TEST_CONTENT) | assert (p / 'dir1/dir2/file').read_text() == TEST_CONTENT | ||||
self.assertEqual((p / 'dir1/dir2/file').stat().st_mode, 0o100644) | assert (p / 'dir1/dir2/file').stat().st_mode == 0o100644 | ||||
self.assertEqual(ert.repo.refs[b'HEAD'].decode(), obj_id_hex) | assert ert.repo.refs[b'HEAD'].decode() == obj_id_hex | ||||
def test_revision_two_roots(self): | def test_revision_two_roots(self, swh_git_loader): | ||||
# | # | ||||
# 1----3---4 | # 1----3---4 | ||||
# / | # / | ||||
# 2---- | # 2---- | ||||
# | # | ||||
storage = swh_git_loader.storage | |||||
repo = TestRepo() | repo = TestRepo() | ||||
with repo as rp: | with repo as rp: | ||||
(rp / 'file1').write_text(TEST_CONTENT) | (rp / 'file1').write_text(TEST_CONTENT) | ||||
c1 = repo.commit('Add file1') | c1 = repo.commit('Add file1') | ||||
del repo.repo.refs[b'refs/heads/master'] # git update-ref -d HEAD | del repo.repo.refs[b'refs/heads/master'] # git update-ref -d HEAD | ||||
(rp / 'file2').write_text(TEST_CONTENT) | (rp / 'file2').write_text(TEST_CONTENT) | ||||
repo.commit('Add file2') | repo.commit('Add file2') | ||||
repo.merge([c1]) | repo.merge([c1]) | ||||
(rp / 'file3').write_text(TEST_CONTENT) | (rp / 'file3').write_text(TEST_CONTENT) | ||||
repo.commit('add file3') | repo.commit('add file3') | ||||
obj_id_hex = repo.repo.refs[b'HEAD'].decode() | obj_id_hex = repo.repo.refs[b'HEAD'].decode() | ||||
obj_id = hashutil.hash_to_bytes(obj_id_hex) | obj_id = hashutil.hash_to_bytes(obj_id_hex) | ||||
self.load(str(rp)) | load(swh_git_loader, str(rp)) | ||||
with self.cook_extract_revision_gitfast(obj_id) as (ert, p): | with cook_extract_revision_gitfast(storage, obj_id) as (ert, p): | ||||
self.assertEqual(ert.repo.refs[b'HEAD'].decode(), obj_id_hex) | assert ert.repo.refs[b'HEAD'].decode() == obj_id_hex | ||||
def test_revision_two_double_fork_merge(self): | def test_revision_two_double_fork_merge(self, swh_git_loader): | ||||
# | # | ||||
# 2---4---6 | # 2---4---6 | ||||
# / / / | # / / / | ||||
# 1---3---5 | # 1---3---5 | ||||
# | # | ||||
storage = swh_git_loader.storage | |||||
repo = TestRepo() | repo = TestRepo() | ||||
with repo as rp: | with repo as rp: | ||||
(rp / 'file1').write_text(TEST_CONTENT) | (rp / 'file1').write_text(TEST_CONTENT) | ||||
c1 = repo.commit('Add file1') | c1 = repo.commit('Add file1') | ||||
repo.repo.refs[b'refs/heads/c1'] = c1 | repo.repo.refs[b'refs/heads/c1'] = c1 | ||||
(rp / 'file2').write_text(TEST_CONTENT) | (rp / 'file2').write_text(TEST_CONTENT) | ||||
repo.commit('Add file2') | repo.commit('Add file2') | ||||
(rp / 'file3').write_text(TEST_CONTENT) | (rp / 'file3').write_text(TEST_CONTENT) | ||||
c3 = repo.commit('Add file3', ref=b'refs/heads/c1') | c3 = repo.commit('Add file3', ref=b'refs/heads/c1') | ||||
repo.repo.refs[b'refs/heads/c3'] = c3 | repo.repo.refs[b'refs/heads/c3'] = c3 | ||||
repo.merge([c3]) | repo.merge([c3]) | ||||
(rp / 'file5').write_text(TEST_CONTENT) | (rp / 'file5').write_text(TEST_CONTENT) | ||||
c5 = repo.commit('Add file3', ref=b'refs/heads/c3') | c5 = repo.commit('Add file3', ref=b'refs/heads/c3') | ||||
repo.merge([c5]) | repo.merge([c5]) | ||||
obj_id_hex = repo.repo.refs[b'HEAD'].decode() | obj_id_hex = repo.repo.refs[b'HEAD'].decode() | ||||
obj_id = hashutil.hash_to_bytes(obj_id_hex) | obj_id = hashutil.hash_to_bytes(obj_id_hex) | ||||
self.load(str(rp)) | load(swh_git_loader, str(rp)) | ||||
with self.cook_extract_revision_gitfast(obj_id) as (ert, p): | with cook_extract_revision_gitfast(storage, obj_id) as (ert, p): | ||||
self.assertEqual(ert.repo.refs[b'HEAD'].decode(), obj_id_hex) | assert ert.repo.refs[b'HEAD'].decode() == obj_id_hex | ||||
def test_revision_triple_merge(self): | def test_revision_triple_merge(self, swh_git_loader): | ||||
# | # | ||||
# .---.---5 | # .---.---5 | ||||
# / / / | # / / / | ||||
# 2 3 4 | # 2 3 4 | ||||
# / / / | # / / / | ||||
# 1---.---. | # 1---.---. | ||||
# | # | ||||
storage = swh_git_loader.storage | |||||
repo = TestRepo() | repo = TestRepo() | ||||
with repo as rp: | with repo as rp: | ||||
(rp / 'file1').write_text(TEST_CONTENT) | (rp / 'file1').write_text(TEST_CONTENT) | ||||
c1 = repo.commit('Commit 1') | c1 = repo.commit('Commit 1') | ||||
repo.repo.refs[b'refs/heads/b1'] = c1 | repo.repo.refs[b'refs/heads/b1'] = c1 | ||||
repo.repo.refs[b'refs/heads/b2'] = c1 | repo.repo.refs[b'refs/heads/b2'] = c1 | ||||
repo.commit('Commit 2') | repo.commit('Commit 2') | ||||
c3 = repo.commit('Commit 3', ref=b'refs/heads/b1') | c3 = repo.commit('Commit 3', ref=b'refs/heads/b1') | ||||
c4 = repo.commit('Commit 4', ref=b'refs/heads/b2') | c4 = repo.commit('Commit 4', ref=b'refs/heads/b2') | ||||
repo.merge([c3, c4]) | repo.merge([c3, c4]) | ||||
obj_id_hex = repo.repo.refs[b'HEAD'].decode() | obj_id_hex = repo.repo.refs[b'HEAD'].decode() | ||||
obj_id = hashutil.hash_to_bytes(obj_id_hex) | obj_id = hashutil.hash_to_bytes(obj_id_hex) | ||||
self.load(str(rp)) | load(swh_git_loader, str(rp)) | ||||
with self.cook_extract_revision_gitfast(obj_id) as (ert, p): | with cook_extract_revision_gitfast(storage, obj_id) as (ert, p): | ||||
self.assertEqual(ert.repo.refs[b'HEAD'].decode(), obj_id_hex) | assert ert.repo.refs[b'HEAD'].decode() == obj_id_hex | ||||
def test_revision_filtered_objects(self): | def test_revision_filtered_objects(self, swh_git_loader): | ||||
storage = swh_git_loader.storage | |||||
repo = TestRepo() | repo = TestRepo() | ||||
with repo as rp: | with repo as rp: | ||||
file_1, id_1 = hash_content(b'test1') | file_1, id_1 = hash_content(b'test1') | ||||
file_2, id_2 = hash_content(b'test2') | file_2, id_2 = hash_content(b'test2') | ||||
file_3, id_3 = hash_content(b'test3') | file_3, id_3 = hash_content(b'test3') | ||||
(rp / 'file').write_bytes(file_1) | (rp / 'file').write_bytes(file_1) | ||||
(rp / 'hidden_file').write_bytes(file_2) | (rp / 'hidden_file').write_bytes(file_2) | ||||
(rp / 'absent_file').write_bytes(file_3) | (rp / 'absent_file').write_bytes(file_3) | ||||
repo.commit() | repo.commit() | ||||
obj_id_hex = repo.repo.refs[b'HEAD'].decode() | obj_id_hex = repo.repo.refs[b'HEAD'].decode() | ||||
obj_id = hashutil.hash_to_bytes(obj_id_hex) | obj_id = hashutil.hash_to_bytes(obj_id_hex) | ||||
self.load(str(rp)) | load(swh_git_loader, str(rp)) | ||||
# FIXME: storage.content_update() should be changed to allow things | # FIXME: storage.content_update() should be changed to allow things | ||||
# like that | # like that | ||||
with self.storage.get_db().transaction() as cur: | with storage.get_db().transaction() as cur: | ||||
cur.execute("""update content set status = 'visible' | cur.execute("""update content set status = 'visible' | ||||
where sha1 = %s""", (id_1,)) | where sha1 = %s""", (id_1,)) | ||||
cur.execute("""update content set status = 'hidden' | cur.execute("""update content set status = 'hidden' | ||||
where sha1 = %s""", (id_2,)) | where sha1 = %s""", (id_2,)) | ||||
cur.execute("""update content set status = 'absent' | cur.execute("""update content set status = 'absent' | ||||
where sha1 = %s""", (id_3,)) | where sha1 = %s""", (id_3,)) | ||||
with self.cook_extract_revision_gitfast(obj_id) as (ert, p): | with cook_extract_revision_gitfast(storage, obj_id) as (ert, p): | ||||
ert.checkout(b'HEAD') | ert.checkout(b'HEAD') | ||||
self.assertEqual((p / 'file').read_bytes(), b'test1') | assert (p / 'file').read_bytes() == b'test1' | ||||
self.assertEqual((p / 'hidden_file').read_bytes(), HIDDEN_MESSAGE) | assert (p / 'hidden_file').read_bytes() == HIDDEN_MESSAGE | ||||
self.assertEqual((p / 'absent_file').read_bytes(), SKIPPED_MESSAGE) | assert (p / 'absent_file').read_bytes() == SKIPPED_MESSAGE | ||||
def test_revision_bogus_perms(self): | def test_revision_bogus_perms(self, swh_git_loader): | ||||
# Some early git repositories have 664/775 permissions... let's check | # Some early git repositories have 664/775 permissions... let's check | ||||
# if all the weird modes are properly normalized in the revision | # if all the weird modes are properly normalized in the revision | ||||
# cooker. | # cooker. | ||||
storage = swh_git_loader.storage | |||||
repo = TestRepo() | repo = TestRepo() | ||||
with repo as rp: | with repo as rp: | ||||
(rp / 'file').write_text(TEST_CONTENT) | (rp / 'file').write_text(TEST_CONTENT) | ||||
(rp / 'file').chmod(0o664) | (rp / 'file').chmod(0o664) | ||||
(rp / 'executable').write_bytes(TEST_EXECUTABLE) | (rp / 'executable').write_bytes(TEST_EXECUTABLE) | ||||
(rp / 'executable').chmod(0o775) | (rp / 'executable').chmod(0o775) | ||||
(rp / 'wat').write_text(TEST_CONTENT) | (rp / 'wat').write_text(TEST_CONTENT) | ||||
(rp / 'wat').chmod(0o604) | (rp / 'wat').chmod(0o604) | ||||
repo.commit('initial commit') | repo.commit('initial commit') | ||||
self.load(str(rp)) | load(swh_git_loader, str(rp)) | ||||
obj_id_hex = repo.repo.refs[b'HEAD'].decode() | obj_id_hex = repo.repo.refs[b'HEAD'].decode() | ||||
obj_id = hashutil.hash_to_bytes(obj_id_hex) | obj_id = hashutil.hash_to_bytes(obj_id_hex) | ||||
with self.cook_extract_revision_gitfast(obj_id) as (ert, p): | with cook_extract_revision_gitfast(storage, obj_id) as (ert, p): | ||||
ert.checkout(b'HEAD') | ert.checkout(b'HEAD') | ||||
self.assertEqual((p / 'file').stat().st_mode, 0o100644) | assert (p / 'file').stat().st_mode == 0o100644 | ||||
self.assertEqual((p / 'executable').stat().st_mode, 0o100755) | assert (p / 'executable').stat().st_mode == 0o100755 | ||||
self.assertEqual((p / 'wat').stat().st_mode, 0o100644) | assert (p / 'wat').stat().st_mode == 0o100644 | ||||
def test_revision_null_fields(self): | def test_revision_null_fields(self, swh_git_loader): | ||||
# Our schema doesn't enforce a lot of non-null revision fields. We need | # Our schema doesn't enforce a lot of non-null revision fields. We need | ||||
# to check these cases don't break the cooker. | # to check these cases don't break the cooker. | ||||
storage = swh_git_loader.storage | |||||
repo = TestRepo() | repo = TestRepo() | ||||
with repo as rp: | with repo as rp: | ||||
(rp / 'file').write_text(TEST_CONTENT) | (rp / 'file').write_text(TEST_CONTENT) | ||||
c = repo.commit('initial commit') | c = repo.commit('initial commit') | ||||
self.load(str(rp)) | load(swh_git_loader, str(rp)) | ||||
repo.repo.refs[b'HEAD'].decode() | repo.repo.refs[b'HEAD'].decode() | ||||
dir_id_hex = repo.repo[c].tree.decode() | dir_id_hex = repo.repo[c].tree.decode() | ||||
dir_id = hashutil.hash_to_bytes(dir_id_hex) | dir_id = hashutil.hash_to_bytes(dir_id_hex) | ||||
test_id = b'56789012345678901234' | test_id = b'56789012345678901234' | ||||
test_revision = { | test_revision = { | ||||
'id': test_id, | 'id': test_id, | ||||
'message': None, | 'message': None, | ||||
'author': {'name': None, 'email': None, 'fullname': ''}, | 'author': {'name': None, 'email': None, 'fullname': ''}, | ||||
'date': None, | 'date': None, | ||||
'committer': {'name': None, 'email': None, 'fullname': ''}, | 'committer': {'name': None, 'email': None, 'fullname': ''}, | ||||
'committer_date': None, | 'committer_date': None, | ||||
'parents': [], | 'parents': [], | ||||
'type': 'git', | 'type': 'git', | ||||
'directory': dir_id, | 'directory': dir_id, | ||||
'metadata': {}, | 'metadata': {}, | ||||
'synthetic': True | 'synthetic': True | ||||
} | } | ||||
self.storage.revision_add([test_revision]) | storage.revision_add([test_revision]) | ||||
with self.cook_extract_revision_gitfast(test_id) as (ert, p): | with cook_extract_revision_gitfast(storage, test_id) as (ert, p): | ||||
ert.checkout(b'HEAD') | ert.checkout(b'HEAD') | ||||
self.assertEqual((p / 'file').stat().st_mode, 0o100644) | assert (p / 'file').stat().st_mode == 0o100644 | ||||
def test_revision_revision_data(self): | def test_revision_revision_data(self, swh_git_loader): | ||||
storage = swh_git_loader.storage | |||||
target_rev = '0e8a3ad980ec179856012b7eecf4327e99cd44cd' | target_rev = '0e8a3ad980ec179856012b7eecf4327e99cd44cd' | ||||
d = hashutil.hash_to_bytes('17a3e48bce37be5226490e750202ad3a9a1a3fe9') | d = hashutil.hash_to_bytes('17a3e48bce37be5226490e750202ad3a9a1a3fe9') | ||||
r = hashutil.hash_to_bytes('1ecc9270c4fc61cfddbc65a774e91ef5c425a6f0') | r = hashutil.hash_to_bytes('1ecc9270c4fc61cfddbc65a774e91ef5c425a6f0') | ||||
dir = { | dir = { | ||||
'id': d, | 'id': d, | ||||
'entries': [ | 'entries': [ | ||||
{ | { | ||||
'name': b'submodule', | 'name': b'submodule', | ||||
'type': 'rev', | 'type': 'rev', | ||||
'target': hashutil.hash_to_bytes(target_rev), | 'target': hashutil.hash_to_bytes(target_rev), | ||||
'perms': 0o100644, | 'perms': 0o100644, | ||||
} | } | ||||
], | ], | ||||
} | } | ||||
self.storage.directory_add([dir]) | storage.directory_add([dir]) | ||||
rev = { | rev = { | ||||
'id': r, | 'id': r, | ||||
'message': None, | 'message': None, | ||||
'author': {'name': None, 'email': None, 'fullname': ''}, | 'author': {'name': None, 'email': None, 'fullname': ''}, | ||||
'date': None, | 'date': None, | ||||
'committer': {'name': None, 'email': None, 'fullname': ''}, | 'committer': {'name': None, 'email': None, 'fullname': ''}, | ||||
'committer_date': None, | 'committer_date': None, | ||||
'parents': [], | 'parents': [], | ||||
'type': 'git', | 'type': 'git', | ||||
'directory': d, | 'directory': d, | ||||
'metadata': {}, | 'metadata': {}, | ||||
'synthetic': True | 'synthetic': True | ||||
} | } | ||||
self.storage.revision_add([rev]) | storage.revision_add([rev]) | ||||
with self.cook_stream_revision_gitfast(r) as stream: | with cook_stream_revision_gitfast(storage, r) as stream: | ||||
pattern = 'M 160000 {} submodule'.format(target_rev).encode() | pattern = 'M 160000 {} submodule'.format(target_rev).encode() | ||||
self.assertIn(pattern, stream.read()) | assert pattern in stream.read() |