diff --git a/swh/vault/sql/30-swh-schema.sql b/swh/vault/sql/30-swh-schema.sql index 1d477f1..f7be87f 100644 --- a/swh/vault/sql/30-swh-schema.sql +++ b/swh/vault/sql/30-swh-schema.sql @@ -1,59 +1,59 @@ -create table dbversion +create table if not exists dbversion ( version int primary key, release timestamptz not null, description text not null ); comment on table dbversion is 'Schema update tracking'; insert into dbversion (version, release, description) values (1, now(), 'Initial version'); create domain obj_hash as bytea; create type cook_type as enum ('directory', 'revision_gitfast'); comment on type cook_type is 'Type of the requested bundle'; create type cook_status as enum ('new', 'pending', 'done', 'failed'); comment on type cook_status is 'Status of the cooking'; create table vault_bundle ( id bigserial primary key, type cook_type not null, -- requested cooking type object_id obj_hash not null, -- requested object ID task_id integer, -- scheduler task id task_status cook_status not null default 'new', -- status of the task sticky boolean not null default false, -- bundle cannot expire ts_created timestamptz not null default now(), -- timestamp of creation ts_done timestamptz, -- timestamp of the cooking result ts_last_access timestamptz not null default now(), -- last access progress_msg text -- progress message ); create unique index concurrently vault_bundle_type_object on vault_bundle (type, object_id); create index concurrently vault_bundle_task_id on vault_bundle (task_id); create table vault_notif_email ( id bigserial primary key, email text not null, -- e-mail to notify bundle_id bigint not null references vault_bundle(id) on delete cascade ); create index concurrently vault_notif_email_bundle on vault_notif_email (bundle_id); create index concurrently vault_notif_email_email on vault_notif_email (email); create table vault_batch ( id bigserial primary key ); create table vault_batch_bundle ( batch_id bigint not null references vault_batch(id) on delete cascade, bundle_id bigint not null references vault_bundle(id) on delete cascade ); create unique index concurrently vault_batch_bundle_pkey on vault_batch_bundle (batch_id, bundle_id); diff --git a/swh/vault/tests/__init__.py b/swh/vault/tests/__init__.py index e69de29..d3d2d8a 100644 --- a/swh/vault/tests/__init__.py +++ b/swh/vault/tests/__init__.py @@ -0,0 +1,5 @@ +from os import path +import swh.vault + + +SQL_DIR = path.join(path.dirname(swh.vault.__file__), 'sql') diff --git a/swh/vault/tests/test_backend.py b/swh/vault/tests/test_backend.py index 5e04421..752b471 100644 --- a/swh/vault/tests/test_backend.py +++ b/swh/vault/tests/test_backend.py @@ -1,328 +1,327 @@ # Copyright (C) 2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import contextlib import datetime import psycopg2 import unittest from unittest.mock import patch from swh.model import hashutil -from swh.storage.tests.storage_testing import StorageTestFixture from swh.vault.tests.vault_testing import VaultTestFixture, hash_content -class BaseTestBackend(VaultTestFixture, StorageTestFixture): +class BaseTestBackend(VaultTestFixture): @contextlib.contextmanager def mock_cooking(self): with patch.object(self.vault_backend, '_send_task') as mt: mt.return_value = 42 with patch('swh.vault.backend.get_cooker') as mg: mcc = unittest.mock.MagicMock() mc = unittest.mock.MagicMock() mg.return_value = mcc mcc.return_value = mc mc.check_exists.return_value = True yield {'send_task': mt, 'get_cooker': mg, 'cooker_cls': mcc, 'cooker': mc} def assertTimestampAlmostNow(self, ts, tolerance_secs=1.0): # noqa now = datetime.datetime.now(datetime.timezone.utc) creation_delta_secs = (ts - now).total_seconds() self.assertLess(creation_delta_secs, tolerance_secs) def fake_cook(self, obj_type, result_content, sticky=False): content, obj_id = hash_content(result_content) with self.mock_cooking(): self.vault_backend.create_task(obj_type, obj_id, sticky) self.vault_backend.cache.add(obj_type, obj_id, b'content') self.vault_backend.set_status(obj_type, obj_id, 'done') return obj_id, content def fail_cook(self, obj_type, obj_id, failure_reason): with self.mock_cooking(): self.vault_backend.create_task(obj_type, obj_id) self.vault_backend.set_status(obj_type, obj_id, 'failed') self.vault_backend.set_progress(obj_type, obj_id, failure_reason) TEST_TYPE = 'revision_gitfast' TEST_HEX_ID = '4a4b9771542143cf070386f86b4b92d42966bdbc' TEST_OBJ_ID = hashutil.hash_to_bytes(TEST_HEX_ID) TEST_PROGRESS = ("Mr. White, You're telling me you're cooking again?" " \N{ASTONISHED FACE} ") TEST_EMAIL = 'ouiche@example.com' class TestBackend(BaseTestBackend, unittest.TestCase): def test_create_task_simple(self): with self.mock_cooking() as m: self.vault_backend.create_task(TEST_TYPE, TEST_OBJ_ID) m['get_cooker'].assert_called_once_with(TEST_TYPE) args = m['cooker_cls'].call_args[0] self.assertEqual(args[0], TEST_TYPE) self.assertEqual(args[1], TEST_HEX_ID) self.assertEqual(m['cooker'].check_exists.call_count, 1) self.assertEqual(m['send_task'].call_count, 1) args = m['send_task'].call_args[0][0] self.assertEqual(args[0], TEST_TYPE) self.assertEqual(args[1], TEST_HEX_ID) info = self.vault_backend.task_info(TEST_TYPE, TEST_OBJ_ID) self.assertEqual(info['object_id'], TEST_OBJ_ID) self.assertEqual(info['type'], TEST_TYPE) self.assertEqual(info['task_status'], 'new') self.assertEqual(info['task_id'], 42) self.assertTimestampAlmostNow(info['ts_created']) self.assertEqual(info['ts_done'], None) self.assertEqual(info['progress_msg'], None) def test_create_fail_duplicate_task(self): with self.mock_cooking(): self.vault_backend.create_task(TEST_TYPE, TEST_OBJ_ID) with self.assertRaises(psycopg2.IntegrityError): self.vault_backend.create_task(TEST_TYPE, TEST_OBJ_ID) def test_create_fail_nonexisting_object(self): with self.mock_cooking() as m: m['cooker'].check_exists.side_effect = ValueError('Nothing here.') with self.assertRaises(ValueError): self.vault_backend.create_task(TEST_TYPE, TEST_OBJ_ID) def test_create_set_progress(self): with self.mock_cooking(): self.vault_backend.create_task(TEST_TYPE, TEST_OBJ_ID) info = self.vault_backend.task_info(TEST_TYPE, TEST_OBJ_ID) self.assertEqual(info['progress_msg'], None) self.vault_backend.set_progress(TEST_TYPE, TEST_OBJ_ID, TEST_PROGRESS) info = self.vault_backend.task_info(TEST_TYPE, TEST_OBJ_ID) self.assertEqual(info['progress_msg'], TEST_PROGRESS) def test_create_set_status(self): with self.mock_cooking(): self.vault_backend.create_task(TEST_TYPE, TEST_OBJ_ID) info = self.vault_backend.task_info(TEST_TYPE, TEST_OBJ_ID) self.assertEqual(info['task_status'], 'new') self.assertEqual(info['ts_done'], None) self.vault_backend.set_status(TEST_TYPE, TEST_OBJ_ID, 'pending') info = self.vault_backend.task_info(TEST_TYPE, TEST_OBJ_ID) self.assertEqual(info['task_status'], 'pending') self.assertEqual(info['ts_done'], None) self.vault_backend.set_status(TEST_TYPE, TEST_OBJ_ID, 'done') info = self.vault_backend.task_info(TEST_TYPE, TEST_OBJ_ID) self.assertEqual(info['task_status'], 'done') self.assertTimestampAlmostNow(info['ts_done']) def test_create_update_access_ts(self): with self.mock_cooking(): self.vault_backend.create_task(TEST_TYPE, TEST_OBJ_ID) info = self.vault_backend.task_info(TEST_TYPE, TEST_OBJ_ID) access_ts_1 = info['ts_last_access'] self.assertTimestampAlmostNow(access_ts_1) self.vault_backend.update_access_ts(TEST_TYPE, TEST_OBJ_ID) info = self.vault_backend.task_info(TEST_TYPE, TEST_OBJ_ID) access_ts_2 = info['ts_last_access'] self.assertTimestampAlmostNow(access_ts_2) self.vault_backend.update_access_ts(TEST_TYPE, TEST_OBJ_ID) info = self.vault_backend.task_info(TEST_TYPE, TEST_OBJ_ID) access_ts_3 = info['ts_last_access'] self.assertTimestampAlmostNow(access_ts_3) self.assertLess(access_ts_1, access_ts_2) self.assertLess(access_ts_2, access_ts_3) def test_cook_request_idempotent(self): with self.mock_cooking(): info1 = self.vault_backend.cook_request(TEST_TYPE, TEST_OBJ_ID) info2 = self.vault_backend.cook_request(TEST_TYPE, TEST_OBJ_ID) info3 = self.vault_backend.cook_request(TEST_TYPE, TEST_OBJ_ID) self.assertEqual(info1, info2) self.assertEqual(info1, info3) def test_cook_email_pending_done(self): with self.mock_cooking(), \ patch.object(self.vault_backend, 'add_notif_email') as madd, \ patch.object(self.vault_backend, 'send_notification') as msend: self.vault_backend.cook_request(TEST_TYPE, TEST_OBJ_ID) madd.assert_not_called() msend.assert_not_called() madd.reset_mock() msend.reset_mock() self.vault_backend.cook_request(TEST_TYPE, TEST_OBJ_ID, email=TEST_EMAIL) madd.assert_called_once_with(TEST_TYPE, TEST_OBJ_ID, TEST_EMAIL) msend.assert_not_called() madd.reset_mock() msend.reset_mock() self.vault_backend.set_status(TEST_TYPE, TEST_OBJ_ID, 'done') self.vault_backend.cook_request(TEST_TYPE, TEST_OBJ_ID, email=TEST_EMAIL) msend.assert_called_once_with(None, TEST_EMAIL, TEST_TYPE, TEST_OBJ_ID, 'done') madd.assert_not_called() def test_send_all_emails(self): with self.mock_cooking(): emails = ('a@example.com', 'billg@example.com', 'test+42@example.org') for email in emails: self.vault_backend.cook_request(TEST_TYPE, TEST_OBJ_ID, email=email) self.vault_backend.set_status(TEST_TYPE, TEST_OBJ_ID, 'done') with patch.object(self.vault_backend, 'smtp_server') as m: self.vault_backend.send_all_notifications(TEST_TYPE, TEST_OBJ_ID) sent_emails = {k[0][0] for k in m.send_message.call_args_list} self.assertEqual({k['To'] for k in sent_emails}, set(emails)) for e in sent_emails: self.assertIn('info@softwareheritage.org', e['From']) self.assertIn(TEST_TYPE, e['Subject']) self.assertIn(TEST_HEX_ID[:5], e['Subject']) self.assertIn(TEST_TYPE, str(e)) self.assertIn('https://archive.softwareheritage.org/', str(e)) self.assertIn(TEST_HEX_ID[:5], str(e)) self.assertIn('--\x20\n', str(e)) # Well-formated signature!!! # Check that the entries have been deleted and recalling the # function does not re-send the e-mails m.reset_mock() self.vault_backend.send_all_notifications(TEST_TYPE, TEST_OBJ_ID) m.assert_not_called() def test_available(self): self.assertFalse(self.vault_backend.is_available(TEST_TYPE, TEST_OBJ_ID)) with self.mock_cooking(): self.vault_backend.create_task(TEST_TYPE, TEST_OBJ_ID) self.assertFalse(self.vault_backend.is_available(TEST_TYPE, TEST_OBJ_ID)) self.vault_backend.cache.add(TEST_TYPE, TEST_OBJ_ID, b'content') self.assertFalse(self.vault_backend.is_available(TEST_TYPE, TEST_OBJ_ID)) self.vault_backend.set_status(TEST_TYPE, TEST_OBJ_ID, 'done') self.assertTrue(self.vault_backend.is_available(TEST_TYPE, TEST_OBJ_ID)) def test_fetch(self): self.assertEqual(self.vault_backend.fetch(TEST_TYPE, TEST_OBJ_ID), None) obj_id, content = self.fake_cook(TEST_TYPE, b'content') info = self.vault_backend.task_info(TEST_TYPE, obj_id) access_ts_before = info['ts_last_access'] self.assertEqual(self.vault_backend.fetch(TEST_TYPE, obj_id), b'content') info = self.vault_backend.task_info(TEST_TYPE, obj_id) access_ts_after = info['ts_last_access'] self.assertTimestampAlmostNow(access_ts_after) self.assertLess(access_ts_before, access_ts_after) def test_cache_expire_oldest(self): r = range(1, 10) inserted = {} for i in r: sticky = (i == 5) content = b'content%s' % str(i).encode() obj_id, content = self.fake_cook(TEST_TYPE, content, sticky) inserted[i] = (obj_id, content) self.vault_backend.update_access_ts(TEST_TYPE, inserted[2][0]) self.vault_backend.update_access_ts(TEST_TYPE, inserted[3][0]) self.vault_backend.cache_expire_oldest(n=4) should_be_still_here = {2, 3, 5, 8, 9} for i in r: self.assertEqual(self.vault_backend.is_available( TEST_TYPE, inserted[i][0]), i in should_be_still_here) def test_cache_expire_until(self): r = range(1, 10) inserted = {} for i in r: sticky = (i == 5) content = b'content%s' % str(i).encode() obj_id, content = self.fake_cook(TEST_TYPE, content, sticky) inserted[i] = (obj_id, content) if i == 7: cutoff_date = datetime.datetime.now() self.vault_backend.update_access_ts(TEST_TYPE, inserted[2][0]) self.vault_backend.update_access_ts(TEST_TYPE, inserted[3][0]) self.vault_backend.cache_expire_until(date=cutoff_date) should_be_still_here = {2, 3, 5, 8, 9} for i in r: self.assertEqual(self.vault_backend.is_available( TEST_TYPE, inserted[i][0]), i in should_be_still_here) def test_fail_cook_simple(self): self.fail_cook(TEST_TYPE, TEST_OBJ_ID, 'error42') self.assertFalse(self.vault_backend.is_available(TEST_TYPE, TEST_OBJ_ID)) info = self.vault_backend.task_info(TEST_TYPE, TEST_OBJ_ID) self.assertEqual(info['progress_msg'], 'error42') def test_send_failure_email(self): with self.mock_cooking(): self.vault_backend.cook_request(TEST_TYPE, TEST_OBJ_ID, email='a@example.com') self.vault_backend.set_status(TEST_TYPE, TEST_OBJ_ID, 'failed') self.vault_backend.set_progress(TEST_TYPE, TEST_OBJ_ID, 'test error') with patch.object(self.vault_backend, 'smtp_server') as m: self.vault_backend.send_all_notifications(TEST_TYPE, TEST_OBJ_ID) e = [k[0][0] for k in m.send_message.call_args_list][0] self.assertEqual(e['To'], 'a@example.com') self.assertIn('info@softwareheritage.org', e['From']) self.assertIn(TEST_TYPE, e['Subject']) self.assertIn(TEST_HEX_ID[:5], e['Subject']) self.assertIn('fail', e['Subject']) self.assertIn(TEST_TYPE, str(e)) self.assertIn(TEST_HEX_ID[:5], str(e)) self.assertIn('test error', str(e)) self.assertIn('--\x20\n', str(e)) # Well-formated signature def test_retry_failed_bundle(self): self.fail_cook(TEST_TYPE, TEST_OBJ_ID, 'error42') info = self.vault_backend.task_info(TEST_TYPE, TEST_OBJ_ID) self.assertEqual(info['task_status'], 'failed') with self.mock_cooking(): self.vault_backend.cook_request(TEST_TYPE, TEST_OBJ_ID) info = self.vault_backend.task_info(TEST_TYPE, TEST_OBJ_ID) self.assertEqual(info['task_status'], 'new') diff --git a/swh/vault/tests/test_cache.py b/swh/vault/tests/test_cache.py index 8481477..ab4ab49 100644 --- a/swh/vault/tests/test_cache.py +++ b/swh/vault/tests/test_cache.py @@ -1,75 +1,74 @@ # Copyright (C) 2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest from swh.model import hashutil -from swh.storage.tests.storage_testing import StorageTestFixture from swh.vault.tests.vault_testing import VaultTestFixture TEST_TYPE_1 = 'revision_gitfast' TEST_TYPE_2 = 'directory' TEST_HEX_ID_1 = '4a4b9771542143cf070386f86b4b92d42966bdbc' TEST_HEX_ID_2 = '17a3e48bce37be5226490e750202ad3a9a1a3fe9' TEST_OBJ_ID_1 = hashutil.hash_to_bytes(TEST_HEX_ID_1) TEST_OBJ_ID_2 = hashutil.hash_to_bytes(TEST_HEX_ID_2) TEST_CONTENT_1 = b'test content 1' TEST_CONTENT_2 = b'test content 2' -class BaseTestVaultCache(VaultTestFixture, StorageTestFixture): +class BaseTestVaultCache(VaultTestFixture): def setUp(self): super().setUp() self.cache = self.vault_backend.cache # little shortcut class TestVaultCache(BaseTestVaultCache, unittest.TestCase): # Let's try to avoid replicating edge-cases already tested in # swh-objstorage, and instead focus on testing behaviors specific to the # Vault cache here. def test_internal_id(self): sid = self.cache._get_internal_id(TEST_TYPE_1, TEST_OBJ_ID_1) self.assertEqual(hashutil.hash_to_hex(sid), '6829cda55b54c295aa043a611a4e0320239988d9') def test_simple_add_get(self): self.cache.add(TEST_TYPE_1, TEST_OBJ_ID_1, TEST_CONTENT_1) self.assertEqual(self.cache.get(TEST_TYPE_1, TEST_OBJ_ID_1), TEST_CONTENT_1) self.assertTrue(self.cache.is_cached(TEST_TYPE_1, TEST_OBJ_ID_1)) def test_different_type_same_id(self): self.cache.add(TEST_TYPE_1, TEST_OBJ_ID_1, TEST_CONTENT_1) self.cache.add(TEST_TYPE_2, TEST_OBJ_ID_1, TEST_CONTENT_2) self.assertEqual(self.cache.get(TEST_TYPE_1, TEST_OBJ_ID_1), TEST_CONTENT_1) self.assertEqual(self.cache.get(TEST_TYPE_2, TEST_OBJ_ID_1), TEST_CONTENT_2) self.assertTrue(self.cache.is_cached(TEST_TYPE_1, TEST_OBJ_ID_1)) self.assertTrue(self.cache.is_cached(TEST_TYPE_2, TEST_OBJ_ID_1)) def test_different_type_same_content(self): self.cache.add(TEST_TYPE_1, TEST_OBJ_ID_1, TEST_CONTENT_1) self.cache.add(TEST_TYPE_2, TEST_OBJ_ID_1, TEST_CONTENT_1) self.assertEqual(self.cache.get(TEST_TYPE_1, TEST_OBJ_ID_1), TEST_CONTENT_1) self.assertEqual(self.cache.get(TEST_TYPE_2, TEST_OBJ_ID_1), TEST_CONTENT_1) self.assertTrue(self.cache.is_cached(TEST_TYPE_1, TEST_OBJ_ID_1)) self.assertTrue(self.cache.is_cached(TEST_TYPE_2, TEST_OBJ_ID_1)) def test_different_id_same_type(self): self.cache.add(TEST_TYPE_1, TEST_OBJ_ID_1, TEST_CONTENT_1) self.cache.add(TEST_TYPE_1, TEST_OBJ_ID_2, TEST_CONTENT_2) self.assertEqual(self.cache.get(TEST_TYPE_1, TEST_OBJ_ID_1), TEST_CONTENT_1) self.assertEqual(self.cache.get(TEST_TYPE_1, TEST_OBJ_ID_2), TEST_CONTENT_2) self.assertTrue(self.cache.is_cached(TEST_TYPE_1, TEST_OBJ_ID_1)) self.assertTrue(self.cache.is_cached(TEST_TYPE_1, TEST_OBJ_ID_2)) diff --git a/swh/vault/tests/test_cookers.py b/swh/vault/tests/test_cookers.py index 10c9818..6a13c20 100644 --- a/swh/vault/tests/test_cookers.py +++ b/swh/vault/tests/test_cookers.py @@ -1,517 +1,516 @@ # Copyright (C) 2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import contextlib import datetime import gzip import io import os import pathlib import subprocess import tarfile import tempfile import unittest import unittest.mock import dulwich.fastexport import dulwich.index import dulwich.objects import dulwich.porcelain import dulwich.repo from swh.loader.git.loader import GitLoader from swh.model import hashutil from swh.model.from_disk import Directory -from swh.storage.tests.storage_testing import StorageTestFixture from swh.vault.cookers import DirectoryCooker, RevisionGitfastCooker from swh.vault.tests.vault_testing import VaultTestFixture, hash_content from swh.vault.to_disk import SKIPPED_MESSAGE, HIDDEN_MESSAGE class TestRepo: """A tiny context manager for a test git repository, with some utility functions to perform basic git stuff. """ def __enter__(self): self.tmp_dir = tempfile.TemporaryDirectory(prefix='tmp-vault-repo-') self.repo_dir = self.tmp_dir.__enter__() self.repo = dulwich.repo.Repo.init(self.repo_dir) self.author_name = b'Test Author' self.author_email = b'test@softwareheritage.org' self.author = b'%s <%s>' % (self.author_name, self.author_email) self.base_date = 258244200 self.counter = 0 return pathlib.Path(self.repo_dir) def __exit__(self, exc, value, tb): self.tmp_dir.__exit__(exc, value, tb) def checkout(self, rev_sha): rev = self.repo[rev_sha] dulwich.index.build_index_from_tree(self.repo_dir, self.repo.index_path(), self.repo.object_store, rev.tree) def git_shell(self, *cmd, stdout=subprocess.DEVNULL, **kwargs): name = self.author_name email = self.author_email date = '%d +0000' % (self.base_date + self.counter) env = { # Set git commit format 'GIT_AUTHOR_NAME': name, 'GIT_AUTHOR_EMAIL': email, 'GIT_AUTHOR_DATE': date, 'GIT_COMMITTER_NAME': name, 'GIT_COMMITTER_EMAIL': email, 'GIT_COMMITTER_DATE': date, # Ignore all the system-wide and user configurations 'GIT_CONFIG_NOSYSTEM': '1', 'HOME': str(self.tmp_dir), 'XDG_CONFIG_HOME': str(self.tmp_dir), } kwargs.setdefault('env', {}).update(env) subprocess.check_call(('git', '-C', self.repo_dir) + cmd, stdout=stdout, **kwargs) def commit(self, message='Commit test\n', ref=b'HEAD'): self.git_shell('add', '.') message = message.encode() + b'\n' ret = self.repo.do_commit( message=message, committer=self.author, commit_timestamp=self.base_date + self.counter, commit_timezone=0, ref=ref) self.counter += 1 return ret def merge(self, parent_sha_list, message='Merge branches.'): self.git_shell('merge', '--allow-unrelated-histories', '-m', message, *[p.decode() for p in parent_sha_list]) self.counter += 1 return self.repo.refs[b'HEAD'] def print_debug_graph(self, reflog=False): args = ['log', '--all', '--graph', '--decorate'] if reflog: args.append('--reflog') self.git_shell(*args, stdout=None) -class BaseTestCookers(VaultTestFixture, StorageTestFixture): +class BaseTestCookers(VaultTestFixture): """Base class of cookers unit tests""" def setUp(self): super().setUp() self.loader = GitLoader() self.loader.storage = self.storage def tearDown(self): self.loader = None super().tearDown() def load(self, repo_path): """Load a repository in the test storage""" self.loader.load('fake_origin', repo_path, datetime.datetime.now()) @contextlib.contextmanager def cook_extract_directory(self, obj_id): """Context manager that cooks a directory and extract it.""" cooker = DirectoryCooker('directory', obj_id) cooker.storage = self.storage cooker.backend = unittest.mock.MagicMock() cooker.fileobj = io.BytesIO() assert cooker.check_exists() cooker.prepare_bundle() cooker.fileobj.seek(0) with tempfile.TemporaryDirectory(prefix='tmp-vault-extract-') as td: with tarfile.open(fileobj=cooker.fileobj, mode='r') as tar: tar.extractall(td) yield pathlib.Path(td) / hashutil.hash_to_hex(obj_id) cooker.storage = None @contextlib.contextmanager def cook_stream_revision_gitfast(self, obj_id): """Context manager that cooks a revision and stream its fastexport.""" cooker = RevisionGitfastCooker('revision_gitfast', obj_id) cooker.storage = self.storage cooker.backend = unittest.mock.MagicMock() cooker.fileobj = io.BytesIO() assert cooker.check_exists() cooker.prepare_bundle() cooker.fileobj.seek(0) fastexport_stream = gzip.GzipFile(fileobj=cooker.fileobj) yield fastexport_stream cooker.storage = None @contextlib.contextmanager def cook_extract_revision_gitfast(self, obj_id): """Context manager that cooks a revision and extract it.""" test_repo = TestRepo() with self.cook_stream_revision_gitfast(obj_id) as stream, \ test_repo as p: processor = dulwich.fastexport.GitImportProcessor(test_repo.repo) processor.import_stream(stream) yield test_repo, p TEST_CONTENT = (" test content\n" "and unicode \N{BLACK HEART SUIT}\n" " and trailing spaces ") TEST_EXECUTABLE = b'\x42\x40\x00\x00\x05' class TestDirectoryCooker(BaseTestCookers, unittest.TestCase): def test_directory_simple(self): repo = TestRepo() with repo as rp: (rp / 'file').write_text(TEST_CONTENT) (rp / 'executable').write_bytes(TEST_EXECUTABLE) (rp / 'executable').chmod(0o755) (rp / 'link').symlink_to('file') (rp / 'dir1/dir2').mkdir(parents=True) (rp / 'dir1/dir2/file').write_text(TEST_CONTENT) c = repo.commit() self.load(str(rp)) obj_id_hex = repo.repo[c].tree.decode() obj_id = hashutil.hash_to_bytes(obj_id_hex) with self.cook_extract_directory(obj_id) as p: self.assertEqual((p / 'file').stat().st_mode, 0o100644) self.assertEqual((p / 'file').read_text(), TEST_CONTENT) self.assertEqual((p / 'executable').stat().st_mode, 0o100755) self.assertEqual((p / 'executable').read_bytes(), TEST_EXECUTABLE) self.assertTrue((p / 'link').is_symlink) self.assertEqual(os.readlink(str(p / 'link')), 'file') self.assertEqual((p / 'dir1/dir2/file').stat().st_mode, 0o100644) self.assertEqual((p / 'dir1/dir2/file').read_text(), TEST_CONTENT) directory = Directory.from_disk(path=bytes(p)) self.assertEqual(obj_id_hex, hashutil.hash_to_hex(directory.hash)) def test_directory_filtered_objects(self): repo = TestRepo() with repo as rp: file_1, id_1 = hash_content(b'test1') file_2, id_2 = hash_content(b'test2') file_3, id_3 = hash_content(b'test3') (rp / 'file').write_bytes(file_1) (rp / 'hidden_file').write_bytes(file_2) (rp / 'absent_file').write_bytes(file_3) c = repo.commit() self.load(str(rp)) obj_id_hex = repo.repo[c].tree.decode() obj_id = hashutil.hash_to_bytes(obj_id_hex) # FIXME: storage.content_update() should be changed to allow things # like that with self.storage.get_db().transaction() as cur: cur.execute("""update content set status = 'visible' where sha1 = %s""", (id_1,)) cur.execute("""update content set status = 'hidden' where sha1 = %s""", (id_2,)) cur.execute("""update content set status = 'absent' where sha1 = %s""", (id_3,)) with self.cook_extract_directory(obj_id) as p: self.assertEqual((p / 'file').read_bytes(), b'test1') self.assertEqual((p / 'hidden_file').read_bytes(), HIDDEN_MESSAGE) self.assertEqual((p / 'absent_file').read_bytes(), SKIPPED_MESSAGE) def test_directory_bogus_perms(self): # Some early git repositories have 664/775 permissions... let's check # if all the weird modes are properly normalized in the directory # cooker. repo = TestRepo() with repo as rp: (rp / 'file').write_text(TEST_CONTENT) (rp / 'file').chmod(0o664) (rp / 'executable').write_bytes(TEST_EXECUTABLE) (rp / 'executable').chmod(0o775) (rp / 'wat').write_text(TEST_CONTENT) (rp / 'wat').chmod(0o604) c = repo.commit() self.load(str(rp)) obj_id_hex = repo.repo[c].tree.decode() obj_id = hashutil.hash_to_bytes(obj_id_hex) with self.cook_extract_directory(obj_id) as p: self.assertEqual((p / 'file').stat().st_mode, 0o100644) self.assertEqual((p / 'executable').stat().st_mode, 0o100755) self.assertEqual((p / 'wat').stat().st_mode, 0o100644) def test_directory_revision_data(self): target_rev = '0e8a3ad980ec179856012b7eecf4327e99cd44cd' d = hashutil.hash_to_bytes('17a3e48bce37be5226490e750202ad3a9a1a3fe9') dir = { 'id': d, 'entries': [ { 'name': b'submodule', 'type': 'rev', 'target': hashutil.hash_to_bytes(target_rev), 'perms': 0o100644, } ], } self.storage.directory_add([dir]) with self.cook_extract_directory(d) as p: self.assertTrue((p / 'submodule').is_symlink()) self.assertEqual(os.readlink(str(p / 'submodule')), target_rev) class TestRevisionGitfastCooker(BaseTestCookers, unittest.TestCase): def test_revision_simple(self): # # 1--2--3--4--5--6--7 # repo = TestRepo() with repo as rp: (rp / 'file1').write_text(TEST_CONTENT) repo.commit('add file1') (rp / 'file2').write_text(TEST_CONTENT) repo.commit('add file2') (rp / 'dir1/dir2').mkdir(parents=True) (rp / 'dir1/dir2/file').write_text(TEST_CONTENT) repo.commit('add dir1/dir2/file') (rp / 'bin1').write_bytes(TEST_EXECUTABLE) (rp / 'bin1').chmod(0o755) repo.commit('add bin1') (rp / 'link1').symlink_to('file1') repo.commit('link link1 to file1') (rp / 'file2').unlink() repo.commit('remove file2') (rp / 'bin1').rename(rp / 'bin') repo.commit('rename bin1 to bin') self.load(str(rp)) obj_id_hex = repo.repo.refs[b'HEAD'].decode() obj_id = hashutil.hash_to_bytes(obj_id_hex) with self.cook_extract_revision_gitfast(obj_id) as (ert, p): ert.checkout(b'HEAD') self.assertEqual((p / 'file1').stat().st_mode, 0o100644) self.assertEqual((p / 'file1').read_text(), TEST_CONTENT) self.assertTrue((p / 'link1').is_symlink) self.assertEqual(os.readlink(str(p / 'link1')), 'file1') self.assertEqual((p / 'bin').stat().st_mode, 0o100755) self.assertEqual((p / 'bin').read_bytes(), TEST_EXECUTABLE) self.assertEqual((p / 'dir1/dir2/file').read_text(), TEST_CONTENT) self.assertEqual((p / 'dir1/dir2/file').stat().st_mode, 0o100644) self.assertEqual(ert.repo.refs[b'HEAD'].decode(), obj_id_hex) def test_revision_two_roots(self): # # 1----3---4 # / # 2---- # repo = TestRepo() with repo as rp: (rp / 'file1').write_text(TEST_CONTENT) c1 = repo.commit('Add file1') del repo.repo.refs[b'refs/heads/master'] # git update-ref -d HEAD (rp / 'file2').write_text(TEST_CONTENT) repo.commit('Add file2') repo.merge([c1]) (rp / 'file3').write_text(TEST_CONTENT) repo.commit('add file3') obj_id_hex = repo.repo.refs[b'HEAD'].decode() obj_id = hashutil.hash_to_bytes(obj_id_hex) self.load(str(rp)) with self.cook_extract_revision_gitfast(obj_id) as (ert, p): self.assertEqual(ert.repo.refs[b'HEAD'].decode(), obj_id_hex) def test_revision_two_double_fork_merge(self): # # 2---4---6 # / / / # 1---3---5 # repo = TestRepo() with repo as rp: (rp / 'file1').write_text(TEST_CONTENT) c1 = repo.commit('Add file1') repo.repo.refs[b'refs/heads/c1'] = c1 (rp / 'file2').write_text(TEST_CONTENT) repo.commit('Add file2') (rp / 'file3').write_text(TEST_CONTENT) c3 = repo.commit('Add file3', ref=b'refs/heads/c1') repo.repo.refs[b'refs/heads/c3'] = c3 repo.merge([c3]) (rp / 'file5').write_text(TEST_CONTENT) c5 = repo.commit('Add file3', ref=b'refs/heads/c3') repo.merge([c5]) obj_id_hex = repo.repo.refs[b'HEAD'].decode() obj_id = hashutil.hash_to_bytes(obj_id_hex) self.load(str(rp)) with self.cook_extract_revision_gitfast(obj_id) as (ert, p): self.assertEqual(ert.repo.refs[b'HEAD'].decode(), obj_id_hex) def test_revision_triple_merge(self): # # .---.---5 # / / / # 2 3 4 # / / / # 1---.---. # repo = TestRepo() with repo as rp: (rp / 'file1').write_text(TEST_CONTENT) c1 = repo.commit('Commit 1') repo.repo.refs[b'refs/heads/b1'] = c1 repo.repo.refs[b'refs/heads/b2'] = c1 repo.commit('Commit 2') c3 = repo.commit('Commit 3', ref=b'refs/heads/b1') c4 = repo.commit('Commit 4', ref=b'refs/heads/b2') repo.merge([c3, c4]) obj_id_hex = repo.repo.refs[b'HEAD'].decode() obj_id = hashutil.hash_to_bytes(obj_id_hex) self.load(str(rp)) with self.cook_extract_revision_gitfast(obj_id) as (ert, p): self.assertEqual(ert.repo.refs[b'HEAD'].decode(), obj_id_hex) def test_revision_filtered_objects(self): repo = TestRepo() with repo as rp: file_1, id_1 = hash_content(b'test1') file_2, id_2 = hash_content(b'test2') file_3, id_3 = hash_content(b'test3') (rp / 'file').write_bytes(file_1) (rp / 'hidden_file').write_bytes(file_2) (rp / 'absent_file').write_bytes(file_3) repo.commit() obj_id_hex = repo.repo.refs[b'HEAD'].decode() obj_id = hashutil.hash_to_bytes(obj_id_hex) self.load(str(rp)) # FIXME: storage.content_update() should be changed to allow things # like that with self.storage.get_db().transaction() as cur: cur.execute("""update content set status = 'visible' where sha1 = %s""", (id_1,)) cur.execute("""update content set status = 'hidden' where sha1 = %s""", (id_2,)) cur.execute("""update content set status = 'absent' where sha1 = %s""", (id_3,)) with self.cook_extract_revision_gitfast(obj_id) as (ert, p): ert.checkout(b'HEAD') self.assertEqual((p / 'file').read_bytes(), b'test1') self.assertEqual((p / 'hidden_file').read_bytes(), HIDDEN_MESSAGE) self.assertEqual((p / 'absent_file').read_bytes(), SKIPPED_MESSAGE) def test_revision_bogus_perms(self): # Some early git repositories have 664/775 permissions... let's check # if all the weird modes are properly normalized in the revision # cooker. repo = TestRepo() with repo as rp: (rp / 'file').write_text(TEST_CONTENT) (rp / 'file').chmod(0o664) (rp / 'executable').write_bytes(TEST_EXECUTABLE) (rp / 'executable').chmod(0o775) (rp / 'wat').write_text(TEST_CONTENT) (rp / 'wat').chmod(0o604) repo.commit('initial commit') self.load(str(rp)) obj_id_hex = repo.repo.refs[b'HEAD'].decode() obj_id = hashutil.hash_to_bytes(obj_id_hex) with self.cook_extract_revision_gitfast(obj_id) as (ert, p): ert.checkout(b'HEAD') self.assertEqual((p / 'file').stat().st_mode, 0o100644) self.assertEqual((p / 'executable').stat().st_mode, 0o100755) self.assertEqual((p / 'wat').stat().st_mode, 0o100644) def test_revision_null_fields(self): # Our schema doesn't enforce a lot of non-null revision fields. We need # to check these cases don't break the cooker. repo = TestRepo() with repo as rp: (rp / 'file').write_text(TEST_CONTENT) c = repo.commit('initial commit') self.load(str(rp)) repo.repo.refs[b'HEAD'].decode() dir_id_hex = repo.repo[c].tree.decode() dir_id = hashutil.hash_to_bytes(dir_id_hex) test_id = b'56789012345678901234' test_revision = { 'id': test_id, 'message': None, 'author': {'name': None, 'email': None, 'fullname': ''}, 'date': None, 'committer': {'name': None, 'email': None, 'fullname': ''}, 'committer_date': None, 'parents': [], 'type': 'git', 'directory': dir_id, 'metadata': {}, 'synthetic': True } self.storage.revision_add([test_revision]) with self.cook_extract_revision_gitfast(test_id) as (ert, p): ert.checkout(b'HEAD') self.assertEqual((p / 'file').stat().st_mode, 0o100644) def test_revision_revision_data(self): target_rev = '0e8a3ad980ec179856012b7eecf4327e99cd44cd' d = hashutil.hash_to_bytes('17a3e48bce37be5226490e750202ad3a9a1a3fe9') r = hashutil.hash_to_bytes('1ecc9270c4fc61cfddbc65a774e91ef5c425a6f0') dir = { 'id': d, 'entries': [ { 'name': b'submodule', 'type': 'rev', 'target': hashutil.hash_to_bytes(target_rev), 'perms': 0o100644, } ], } self.storage.directory_add([dir]) rev = { 'id': r, 'message': None, 'author': {'name': None, 'email': None, 'fullname': ''}, 'date': None, 'committer': {'name': None, 'email': None, 'fullname': ''}, 'committer_date': None, 'parents': [], 'type': 'git', 'directory': d, 'metadata': {}, 'synthetic': True } self.storage.revision_add([rev]) with self.cook_stream_revision_gitfast(r) as stream: pattern = 'M 160000 {} submodule'.format(target_rev).encode() self.assertIn(pattern, stream.read()) diff --git a/swh/vault/tests/vault_testing.py b/swh/vault/tests/vault_testing.py index b600ebe..7b71330 100644 --- a/swh/vault/tests/vault_testing.py +++ b/swh/vault/tests/vault_testing.py @@ -1,71 +1,64 @@ # Copyright (C) 2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +import os import tempfile -import pathlib from swh.model import hashutil from swh.vault.backend import VaultBackend +from swh.storage.tests.storage_testing import StorageTestFixture +from swh.vault.tests import SQL_DIR -class VaultTestFixture: + +class VaultTestFixture(StorageTestFixture): """Mix this in a test subject class to get Vault Database testing support. This fixture requires to come before DbTestFixture and StorageTestFixture in the inheritance list as it uses their methods to setup its own internal components. Usage example: - class TestVault(VaultTestFixture, StorageTestFixture, DbTestFixture): + class TestVault(VaultTestFixture, unittest.TestCase): ... """ - TEST_VAULT_DB_NAME = 'softwareheritage-test-vault' - - @classmethod - def setUpClass(cls): - if not hasattr(cls, 'DB_TEST_FIXTURE_IMPORTED'): - raise RuntimeError("VaultTestFixture needs to be followed by " - "DbTestFixture in the inheritance list.") - - test_dir = pathlib.Path(__file__).absolute().parent - test_db_dump = test_dir / '../../../sql/swh-vault-schema.sql' - test_db_dump = test_db_dump.absolute() - cls.add_db(cls.TEST_VAULT_DB_NAME, str(test_db_dump), 'psql') - super().setUpClass() + TEST_DB_NAME = 'softwareheritage-test-vault' + TEST_DB_DUMP = [StorageTestFixture.TEST_DB_DUMP, + os.path.join(SQL_DIR, '*.sql')] def setUp(self): super().setUp() self.cache_root = tempfile.TemporaryDirectory('vault-cache-') self.vault_config = { 'storage': self.storage_config, 'cache': { 'cls': 'pathslicing', 'args': { 'root': self.cache_root.name, 'slicing': '0:1/1:5', 'allow_delete': True, } }, - 'db': 'postgresql:///' + self.TEST_VAULT_DB_NAME, + 'db': 'postgresql:///' + self.TEST_DB_NAME, 'scheduler': None, } self.vault_backend = VaultBackend(self.vault_config) def tearDown(self): self.cache_root.cleanup() self.vault_backend.close() self.reset_storage_tables() self.reset_vault_tables() super().tearDown() def reset_vault_tables(self): excluded = {'dbversion'} - self.reset_db_tables(self.TEST_VAULT_DB_NAME, excluded=excluded) + self.reset_db_tables(self.TEST_DB_NAME, excluded=excluded) def hash_content(content): obj_id = hashutil.hash_data(content)['sha1'] return content, obj_id