diff --git a/setup.py b/setup.py --- a/setup.py +++ b/setup.py @@ -30,4 +30,5 @@ setup_requires=['vcversioner'], vcversioner={}, include_package_data=True, + zip_safe=False, ) diff --git a/swh/vault/backend.py b/swh/vault/backend.py --- a/swh/vault/backend.py +++ b/swh/vault/backend.py @@ -88,6 +88,9 @@ cursor_factory=psycopg2.extras.RealDictCursor, ) + def close(self): + self.db.close() + def cursor(self): """Return a fresh cursor on the database, with auto-reconnection in case of failure""" diff --git a/swh/vault/cookers/base.py b/swh/vault/cookers/base.py --- a/swh/vault/cookers/base.py +++ b/swh/vault/cookers/base.py @@ -57,13 +57,19 @@ cache: the cache where to store the bundle obj_id: id of the object to be cooked into a bundle. """ + self.config = config + self.obj_type = obj_type + self.obj_id = hashutil.hash_to_bytes(obj_id) + + def __enter__(self): # Imported here to avoid circular dependency from swh.vault.backend import VaultBackend + self.backend = VaultBackend(self.config) + self.storage = get_storage(**self.config['storage']) + return self - self.storage = get_storage(**config['storage']) - self.backend = VaultBackend(config) - self.obj_type = obj_type - self.obj_id = hashutil.hash_to_bytes(obj_id) + def __exit__(self, *_): + self.backend.close() @abc.abstractmethod def check_exists(self): diff --git a/swh/vault/cooking_tasks.py b/swh/vault/cooking_tasks.py --- a/swh/vault/cooking_tasks.py +++ b/swh/vault/cooking_tasks.py @@ -13,5 +13,5 @@ task_queue = 'swh_vault_cooking' def run_task(self, config, obj_type, obj_id): - cooker = get_cooker(obj_type)(config, obj_type, obj_id) - cooker.cook() + with get_cooker(obj_type)(config, obj_type, obj_id) as cooker: + cooker.cook() diff --git a/swh/vault/tests/test_cookers.py b/swh/vault/tests/test_cookers.py new file mode 100644 --- /dev/null +++ b/swh/vault/tests/test_cookers.py @@ -0,0 +1,267 @@ +# Copyright (C) 2017 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import contextlib +import datetime +import gzip +import io +import os +import pathlib +import subprocess +import tarfile +import tempfile +import unittest + +import dulwich.fastexport +import dulwich.index +import dulwich.objects +import dulwich.porcelain +import dulwich.repo + +from swh.core.tests.db_testing import DbTestFixture +from swh.loader.git.loader import GitLoader +from swh.model import hashutil +from swh.model.git import compute_hashes_from_directory +from swh.storage.tests.storage_testing import StorageTestFixture +from swh.vault.cookers import DirectoryCooker, RevisionGitfastCooker +from swh.vault.tests.vault_testing import VaultTestFixture + + +class TestRepo: + def __enter__(self): + self.tmp_dir = tempfile.TemporaryDirectory(prefix='tmp-vault-repo-') + self.repo_dir = self.tmp_dir.__enter__() + self.repo = dulwich.repo.Repo.init(self.repo_dir) + self.author = '"Test Author" '.encode() + return pathlib.Path(self.repo_dir) + + def __exit__(self, exc, value, tb): + self.tmp_dir.__exit__(exc, value, tb) + + def checkout(self, rev_sha): + rev = self.repo[rev_sha] + dulwich.index.build_index_from_tree(self.repo_dir, + self.repo.index_path(), + self.repo.object_store, + rev.tree) + + def git_shell(self, *cmd, stdout=subprocess.DEVNULL, **kwargs): + subprocess.check_call(('git', '-C', self.repo_dir) + cmd, + stdout=stdout, **kwargs) + + def commit(self, message='Commit test\n', ref=b'HEAD'): + self.git_shell('add', '.') + message = message.encode() + b'\n' + return self.repo.do_commit(message=message, committer=self.author, + ref=ref) + + def merge(self, parent_sha_list, message='Merge branches.'): + self.git_shell('merge', '--allow-unrelated-histories', + '-m', message, *[p.decode() for p in parent_sha_list]) + return self.repo.refs[b'HEAD'] + + def print_debug_graph(self, reflog=False): + args = ['log', '--all', '--graph', '--decorate'] + if reflog: + args.append('--reflog') + self.git_shell(*args, stdout=None) + + +TEST_CONTENT = (" test content\n" + "and unicode \N{BLACK HEART SUIT}\n" + " and trailing spaces ") +TEST_EXECUTABLE = b'\x42\x40\x00\x00\x05' + + +class BaseTestCookers(VaultTestFixture, StorageTestFixture, DbTestFixture): + def setUp(self): + super().setUp() + self.loader = GitLoader() + self.loader.storage = self.storage + + def load(self, repo_path): + self.loader.load('fake_origin', repo_path, datetime.datetime.now()) + + @contextlib.contextmanager + def cook_extract_directory(self, obj_id): + cooker = DirectoryCooker(self.vault_config, 'directory', obj_id) + with cooker: + cooker.check_exists() # Raises if false + tarball = b''.join(cooker.prepare_bundle()) + with tempfile.TemporaryDirectory('tmp-vault-extract-') as td: + fobj = io.BytesIO(tarball) + with tarfile.open(fileobj=fobj, mode='r') as tar: + tar.extractall(td) + p = pathlib.Path(td) / hashutil.hash_to_hex(obj_id) + yield p + + @contextlib.contextmanager + def cook_extract_revision_gitfast(self, obj_id): + cooker = RevisionGitfastCooker(self.vault_config, 'revision_gitfast', + obj_id) + with cooker: + cooker.check_exists() # Raises if false + fastexport = b''.join(cooker.prepare_bundle()) + fastexport_stream = gzip.GzipFile(fileobj=io.BytesIO(fastexport)) + test_repo = TestRepo() + with test_repo as p: + processor = dulwich.fastexport.GitImportProcessor(test_repo.repo) + processor.import_stream(fastexport_stream) + yield test_repo, p + + +class TestDirectoryCooker(BaseTestCookers, unittest.TestCase): + def test_directory_simple(self): + repo = TestRepo() + with repo as rp: + (rp / 'file').write_text(TEST_CONTENT) + (rp / 'executable').write_bytes(TEST_EXECUTABLE) + (rp / 'executable').chmod(0o755) + (rp / 'link').symlink_to('file') + (rp / 'dir1/dir2').mkdir(parents=True) + (rp / 'dir1/dir2/file').write_text(TEST_CONTENT) + c = repo.commit() + self.load(str(rp)) + + obj_id_hex = repo.repo[c].tree.decode() + obj_id = hashutil.hash_to_bytes(obj_id_hex) + + with self.cook_extract_directory(obj_id) as p: + self.assertEqual((p / 'file').stat().st_mode, 0o100644) + self.assertEqual((p / 'file').read_text(), TEST_CONTENT) + self.assertEqual((p / 'executable').stat().st_mode, 0o100755) + self.assertEqual((p / 'executable').read_bytes(), TEST_EXECUTABLE) + self.assertTrue((p / 'link').is_symlink) + self.assertEqual(os.readlink(str(p / 'link')), 'file') + self.assertEqual((p / 'dir1/dir2/file').stat().st_mode, 0o100644) + self.assertEqual((p / 'dir1/dir2/file').read_text(), TEST_CONTENT) + + dir_pb = bytes(p) + dir_hashes = compute_hashes_from_directory(dir_pb)[dir_pb] + dir_hash = dir_hashes['checksums']['sha1_git'] + self.assertEqual(obj_id_hex, hashutil.hash_to_hex(dir_hash)) + + +class TestRevisionGitfastCooker(BaseTestCookers, unittest.TestCase): + def test_revision_simple(self): + # + # 1--2--3--4--5--6--7 + # + repo = TestRepo() + with repo as rp: + (rp / 'file1').write_text(TEST_CONTENT) + repo.commit('add file1') + (rp / 'file2').write_text(TEST_CONTENT) + repo.commit('add file2') + (rp / 'dir1/dir2').mkdir(parents=True) + (rp / 'dir1/dir2/file').write_text(TEST_CONTENT) + repo.commit('add dir1/dir2/file') + (rp / 'bin1').write_bytes(TEST_EXECUTABLE) + (rp / 'bin1').chmod(0o755) + repo.commit('add bin1') + (rp / 'link1').symlink_to('file1') + repo.commit('link link1 to file1') + (rp / 'file2').unlink() + repo.commit('remove file2') + (rp / 'bin1').rename(rp / 'bin') + repo.commit('rename bin1 to bin') + self.load(str(rp)) + obj_id_hex = repo.repo.refs[b'HEAD'].decode() + obj_id = hashutil.hash_to_bytes(obj_id_hex) + + with self.cook_extract_revision_gitfast(obj_id) as (ert, p): + ert.checkout(b'HEAD') + self.assertEqual((p / 'file1').stat().st_mode, 0o100644) + self.assertEqual((p / 'file1').read_text(), TEST_CONTENT) + self.assertTrue((p / 'link1').is_symlink) + self.assertEqual(os.readlink(str(p / 'link1')), 'file1') + self.assertEqual((p / 'bin').stat().st_mode, 0o100755) + self.assertEqual((p / 'bin').read_bytes(), TEST_EXECUTABLE) + self.assertEqual((p / 'dir1/dir2/file').read_text(), TEST_CONTENT) + self.assertEqual((p / 'dir1/dir2/file').stat().st_mode, 0o100644) + self.assertEqual(ert.repo.refs[b'HEAD'].decode(), obj_id_hex) + + def test_revision_two_roots(self): + # + # 1----3---4 + # / + # 2---- + # + repo = TestRepo() + with repo as rp: + (rp / 'file1').write_text(TEST_CONTENT) + c1 = repo.commit('Add file1') + del repo.repo.refs[b'refs/heads/master'] # git update-ref -d HEAD + (rp / 'file2').write_text(TEST_CONTENT) + repo.commit('Add file2') + repo.merge([c1]) + (rp / 'file3').write_text(TEST_CONTENT) + repo.commit('add file3') + obj_id_hex = repo.repo.refs[b'HEAD'].decode() + obj_id = hashutil.hash_to_bytes(obj_id_hex) + self.load(str(rp)) + + with self.cook_extract_revision_gitfast(obj_id) as (ert, p): + self.assertEqual(ert.repo.refs[b'HEAD'].decode(), obj_id_hex) + + def test_revision_two_double_fork_merge(self): + # + # 2---4---6 + # / / / + # 1---3---5 + # + repo = TestRepo() + with repo as rp: + (rp / 'file1').write_text(TEST_CONTENT) + c1 = repo.commit('Add file1') + repo.repo.refs[b'refs/heads/c1'] = c1 + + (rp / 'file2').write_text(TEST_CONTENT) + repo.commit('Add file2') + + (rp / 'file3').write_text(TEST_CONTENT) + c3 = repo.commit('Add file3', ref=b'refs/heads/c1') + repo.repo.refs[b'refs/heads/c3'] = c3 + + repo.merge([c3]) + + (rp / 'file5').write_text(TEST_CONTENT) + c5 = repo.commit('Add file3', ref=b'refs/heads/c3') + + repo.merge([c5]) + + obj_id_hex = repo.repo.refs[b'HEAD'].decode() + obj_id = hashutil.hash_to_bytes(obj_id_hex) + self.load(str(rp)) + + with self.cook_extract_revision_gitfast(obj_id) as (ert, p): + self.assertEqual(ert.repo.refs[b'HEAD'].decode(), obj_id_hex) + + def test_revision_triple_merge(self): + # + # .---.---5 + # / / / + # 2 3 4 + # / / / + # 1---.---. + # + repo = TestRepo() + with repo as rp: + (rp / 'file1').write_text(TEST_CONTENT) + c1 = repo.commit('Commit 1') + repo.repo.refs[b'refs/heads/b1'] = c1 + repo.repo.refs[b'refs/heads/b2'] = c1 + + repo.commit('Commit 2') + c3 = repo.commit('Commit 3', ref=b'refs/heads/b1') + c4 = repo.commit('Commit 4', ref=b'refs/heads/b2') + repo.merge([c3, c4]) + + obj_id_hex = repo.repo.refs[b'HEAD'].decode() + obj_id = hashutil.hash_to_bytes(obj_id_hex) + self.load(str(rp)) + + with self.cook_extract_revision_gitfast(obj_id) as (ert, p): + self.assertEqual(ert.repo.refs[b'HEAD'].decode(), obj_id_hex) diff --git a/swh/vault/tests/vault_testing.py b/swh/vault/tests/vault_testing.py new file mode 100644 --- /dev/null +++ b/swh/vault/tests/vault_testing.py @@ -0,0 +1,51 @@ +# Copyright (C) 2017 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import tempfile +import pathlib +from swh.vault.backend import VaultBackend + + +class VaultTestFixture: + """Mix this in a test subject class to get Vault Database testing support. + + This fixture requires to come before DbTestFixture and StorageTestFixture + in the inheritance list as it uses their methods to setup its own internal + components. + + Usage example: + + class TestVault(VaultTestFixture, StorageTestFixture, DbTestFixture): + ... + """ + TEST_VAULT_DB_NAME = 'softwareheritage-test-vault' + + @classmethod + def setUpClass(cls): + if not hasattr(cls, 'DB_TEST_FIXTURE_IMPORTED'): + raise RuntimeError("VaultTestFixture needs to be followed by " + "DbTestFixture in the inheritance list.") + + test_dir = pathlib.Path(__file__).absolute().parent + test_db_dump = test_dir / '../../../sql/swh-vault-schema.sql' + test_db_dump = test_db_dump.absolute() + cls.add_db(cls.TEST_VAULT_DB_NAME, str(test_db_dump), 'psql') + super().setUpClass() + + def setUp(self): + super().setUp() + self.cache_root = tempfile.TemporaryDirectory('vault-cache-') + self.vault_config = { + 'storage': self.storage_config, + 'vault_db': 'postgresql:///' + self.TEST_VAULT_DB_NAME, + 'cache': {'root': self.cache_root.name} + } + self.vault_backend = VaultBackend(self.vault_config) + + def tearDown(self): + self.reset_tables() + self.cache_root.cleanup() + self.vault_backend.close() + super().tearDown()