diff --git a/requirements-test.txt b/requirements-test.txt index 40e16eb..b439342 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -1,4 +1,5 @@ -pytest +pytest < 4 +pytest-postgresql dulwich >= 0.18.7 swh.loader.git >= 0.0.48 swh.storage[testing] diff --git a/swh/vault/tests/conftest.py b/swh/vault/tests/conftest.py new file mode 100644 index 0000000..27f4864 --- /dev/null +++ b/swh/vault/tests/conftest.py @@ -0,0 +1,80 @@ +import pytest +import glob +import os +import pkg_resources.extern.packaging.version + +from swh.core.utils import numfile_sortkey as sortkey +from swh.vault import get_vault +from swh.vault.tests import SQL_DIR +from swh.storage.tests import SQL_DIR as STORAGE_SQL_DIR +from pytest_postgresql import factories + + +pytest_v = pkg_resources.get_distribution("pytest").parsed_version +if pytest_v < pkg_resources.extern.packaging.version.parse('3.9'): + @pytest.fixture + def tmp_path(request): + import tempfile + import pathlib + with tempfile.TemporaryDirectory() as tmpdir: + yield pathlib.Path(tmpdir) + + +def db_url(name, postgresql_proc): + return 'postgresql://{user}@{host}:{port}/{dbname}'.format( + host=postgresql_proc.host, + port=postgresql_proc.port, + user='postgres', + dbname=name) + + +postgresql2 = factories.postgresql('postgresql_proc', 'tests2') + + +@pytest.fixture +def swh_vault(request, postgresql_proc, postgresql, postgresql2, tmp_path): + + for sql_dir, pg in ((SQL_DIR, postgresql), (STORAGE_SQL_DIR, postgresql2)): + dump_files = os.path.join(sql_dir, '*.sql') + all_dump_files = sorted(glob.glob(dump_files), key=sortkey) + + cursor = pg.cursor() + for fname in all_dump_files: + with open(fname) as fobj: + # disable concurrent index creation since we run in a + # transaction + cursor.execute(fobj.read().replace('concurrently', '')) + pg.commit() + + vault_config = { + 'db': db_url('tests', postgresql_proc), + 'storage': { + 'cls': 'local', + 'args': { + 'db': db_url('tests2', postgresql_proc), + 'objstorage': { + 'cls': 'pathslicing', + 'args': { + 'root': str(tmp_path), + 'slicing': '0:1/1:5', + }, + }, + }, + }, + 'cache': { + 'cls': 'pathslicing', + 'args': { + 'root': str(tmp_path), + 'slicing': '0:1/1:5', + 'allow_delete': True, + } + }, + 'scheduler': { + 'cls': 'remote', + 'args': { + 'url': 'http://swh-scheduler:5008', + }, + }, + } + + return get_vault('local', vault_config) diff --git a/swh/vault/tests/test_backend.py b/swh/vault/tests/test_backend.py index 752b471..dd44a9a 100644 --- a/swh/vault/tests/test_backend.py +++ b/swh/vault/tests/test_backend.py @@ -1,327 +1,337 @@ # Copyright (C) 2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import contextlib import datetime import psycopg2 -import unittest -from unittest.mock import patch +from unittest.mock import patch, MagicMock +import pytest from swh.model import hashutil -from swh.vault.tests.vault_testing import VaultTestFixture, hash_content - - -class BaseTestBackend(VaultTestFixture): - @contextlib.contextmanager - def mock_cooking(self): - with patch.object(self.vault_backend, '_send_task') as mt: - mt.return_value = 42 - with patch('swh.vault.backend.get_cooker') as mg: - mcc = unittest.mock.MagicMock() - mc = unittest.mock.MagicMock() - mg.return_value = mcc - mcc.return_value = mc - mc.check_exists.return_value = True - - yield {'send_task': mt, - 'get_cooker': mg, - 'cooker_cls': mcc, - 'cooker': mc} - - def assertTimestampAlmostNow(self, ts, tolerance_secs=1.0): # noqa - now = datetime.datetime.now(datetime.timezone.utc) - creation_delta_secs = (ts - now).total_seconds() - self.assertLess(creation_delta_secs, tolerance_secs) - - def fake_cook(self, obj_type, result_content, sticky=False): - content, obj_id = hash_content(result_content) - with self.mock_cooking(): - self.vault_backend.create_task(obj_type, obj_id, sticky) - self.vault_backend.cache.add(obj_type, obj_id, b'content') - self.vault_backend.set_status(obj_type, obj_id, 'done') - return obj_id, content - - def fail_cook(self, obj_type, obj_id, failure_reason): - with self.mock_cooking(): - self.vault_backend.create_task(obj_type, obj_id) - self.vault_backend.set_status(obj_type, obj_id, 'failed') - self.vault_backend.set_progress(obj_type, obj_id, failure_reason) +from swh.vault.tests.vault_testing import hash_content + + +@contextlib.contextmanager +def mock_cooking(vault_backend): + with patch.object(vault_backend, '_send_task') as mt: + mt.return_value = 42 + with patch('swh.vault.backend.get_cooker_cls') as mg: + mcc = MagicMock() + mc = MagicMock() + mg.return_value = mcc + mcc.return_value = mc + mc.check_exists.return_value = True + + yield {'_send_task': mt, + 'get_cooker_cls': mg, + 'cooker_cls': mcc, + 'cooker': mc} + +def assertTimestampAlmostNow(ts, tolerance_secs=1.0): # noqa + now = datetime.datetime.now(datetime.timezone.utc) + creation_delta_secs = (ts - now).total_seconds() + assert creation_delta_secs < tolerance_secs + + +def fake_cook(backend, obj_type, result_content, sticky=False): + content, obj_id = hash_content(result_content) + with mock_cooking(backend): + backend.create_task(obj_type, obj_id, sticky) + backend.cache.add(obj_type, obj_id, b'content') + backend.set_status(obj_type, obj_id, 'done') + return obj_id, content + + +def fail_cook(backend, obj_type, obj_id, failure_reason): + with mock_cooking(backend): + backend.create_task(obj_type, obj_id) + backend.set_status(obj_type, obj_id, 'failed') + backend.set_progress(obj_type, obj_id, failure_reason) TEST_TYPE = 'revision_gitfast' TEST_HEX_ID = '4a4b9771542143cf070386f86b4b92d42966bdbc' TEST_OBJ_ID = hashutil.hash_to_bytes(TEST_HEX_ID) TEST_PROGRESS = ("Mr. White, You're telling me you're cooking again?" " \N{ASTONISHED FACE} ") -TEST_EMAIL = 'ouiche@example.com' - - -class TestBackend(BaseTestBackend, unittest.TestCase): - def test_create_task_simple(self): - with self.mock_cooking() as m: - self.vault_backend.create_task(TEST_TYPE, TEST_OBJ_ID) - - m['get_cooker'].assert_called_once_with(TEST_TYPE) - - args = m['cooker_cls'].call_args[0] - self.assertEqual(args[0], TEST_TYPE) - self.assertEqual(args[1], TEST_HEX_ID) - - self.assertEqual(m['cooker'].check_exists.call_count, 1) - - self.assertEqual(m['send_task'].call_count, 1) - args = m['send_task'].call_args[0][0] - self.assertEqual(args[0], TEST_TYPE) - self.assertEqual(args[1], TEST_HEX_ID) - - info = self.vault_backend.task_info(TEST_TYPE, TEST_OBJ_ID) - self.assertEqual(info['object_id'], TEST_OBJ_ID) - self.assertEqual(info['type'], TEST_TYPE) - self.assertEqual(info['task_status'], 'new') - self.assertEqual(info['task_id'], 42) - - self.assertTimestampAlmostNow(info['ts_created']) - - self.assertEqual(info['ts_done'], None) - self.assertEqual(info['progress_msg'], None) - - def test_create_fail_duplicate_task(self): - with self.mock_cooking(): - self.vault_backend.create_task(TEST_TYPE, TEST_OBJ_ID) - with self.assertRaises(psycopg2.IntegrityError): - self.vault_backend.create_task(TEST_TYPE, TEST_OBJ_ID) - - def test_create_fail_nonexisting_object(self): - with self.mock_cooking() as m: - m['cooker'].check_exists.side_effect = ValueError('Nothing here.') - with self.assertRaises(ValueError): - self.vault_backend.create_task(TEST_TYPE, TEST_OBJ_ID) - - def test_create_set_progress(self): - with self.mock_cooking(): - self.vault_backend.create_task(TEST_TYPE, TEST_OBJ_ID) - - info = self.vault_backend.task_info(TEST_TYPE, TEST_OBJ_ID) - self.assertEqual(info['progress_msg'], None) - self.vault_backend.set_progress(TEST_TYPE, TEST_OBJ_ID, - TEST_PROGRESS) - info = self.vault_backend.task_info(TEST_TYPE, TEST_OBJ_ID) - self.assertEqual(info['progress_msg'], TEST_PROGRESS) - - def test_create_set_status(self): - with self.mock_cooking(): - self.vault_backend.create_task(TEST_TYPE, TEST_OBJ_ID) - - info = self.vault_backend.task_info(TEST_TYPE, TEST_OBJ_ID) - self.assertEqual(info['task_status'], 'new') - self.assertEqual(info['ts_done'], None) - - self.vault_backend.set_status(TEST_TYPE, TEST_OBJ_ID, 'pending') - info = self.vault_backend.task_info(TEST_TYPE, TEST_OBJ_ID) - self.assertEqual(info['task_status'], 'pending') - self.assertEqual(info['ts_done'], None) - - self.vault_backend.set_status(TEST_TYPE, TEST_OBJ_ID, 'done') - info = self.vault_backend.task_info(TEST_TYPE, TEST_OBJ_ID) - self.assertEqual(info['task_status'], 'done') - self.assertTimestampAlmostNow(info['ts_done']) - - def test_create_update_access_ts(self): - with self.mock_cooking(): - self.vault_backend.create_task(TEST_TYPE, TEST_OBJ_ID) - - info = self.vault_backend.task_info(TEST_TYPE, TEST_OBJ_ID) - access_ts_1 = info['ts_last_access'] - self.assertTimestampAlmostNow(access_ts_1) - - self.vault_backend.update_access_ts(TEST_TYPE, TEST_OBJ_ID) - info = self.vault_backend.task_info(TEST_TYPE, TEST_OBJ_ID) - access_ts_2 = info['ts_last_access'] - self.assertTimestampAlmostNow(access_ts_2) - - self.vault_backend.update_access_ts(TEST_TYPE, TEST_OBJ_ID) - info = self.vault_backend.task_info(TEST_TYPE, TEST_OBJ_ID) - access_ts_3 = info['ts_last_access'] - self.assertTimestampAlmostNow(access_ts_3) - - self.assertLess(access_ts_1, access_ts_2) - self.assertLess(access_ts_2, access_ts_3) - - def test_cook_request_idempotent(self): - with self.mock_cooking(): - info1 = self.vault_backend.cook_request(TEST_TYPE, TEST_OBJ_ID) - info2 = self.vault_backend.cook_request(TEST_TYPE, TEST_OBJ_ID) - info3 = self.vault_backend.cook_request(TEST_TYPE, TEST_OBJ_ID) - self.assertEqual(info1, info2) - self.assertEqual(info1, info3) - - def test_cook_email_pending_done(self): - with self.mock_cooking(), \ - patch.object(self.vault_backend, 'add_notif_email') as madd, \ - patch.object(self.vault_backend, 'send_notification') as msend: - - self.vault_backend.cook_request(TEST_TYPE, TEST_OBJ_ID) - madd.assert_not_called() - msend.assert_not_called() - - madd.reset_mock() - msend.reset_mock() - - self.vault_backend.cook_request(TEST_TYPE, TEST_OBJ_ID, - email=TEST_EMAIL) - madd.assert_called_once_with(TEST_TYPE, TEST_OBJ_ID, TEST_EMAIL) - msend.assert_not_called() - - madd.reset_mock() - msend.reset_mock() - - self.vault_backend.set_status(TEST_TYPE, TEST_OBJ_ID, 'done') - self.vault_backend.cook_request(TEST_TYPE, TEST_OBJ_ID, - email=TEST_EMAIL) - msend.assert_called_once_with(None, TEST_EMAIL, - TEST_TYPE, TEST_OBJ_ID, 'done') - madd.assert_not_called() - - def test_send_all_emails(self): - with self.mock_cooking(): - emails = ('a@example.com', - 'billg@example.com', - 'test+42@example.org') - for email in emails: - self.vault_backend.cook_request(TEST_TYPE, TEST_OBJ_ID, - email=email) - - self.vault_backend.set_status(TEST_TYPE, TEST_OBJ_ID, 'done') - - with patch.object(self.vault_backend, 'smtp_server') as m: - self.vault_backend.send_all_notifications(TEST_TYPE, TEST_OBJ_ID) - - sent_emails = {k[0][0] for k in m.send_message.call_args_list} - self.assertEqual({k['To'] for k in sent_emails}, set(emails)) - - for e in sent_emails: - self.assertIn('info@softwareheritage.org', e['From']) - self.assertIn(TEST_TYPE, e['Subject']) - self.assertIn(TEST_HEX_ID[:5], e['Subject']) - self.assertIn(TEST_TYPE, str(e)) - self.assertIn('https://archive.softwareheritage.org/', str(e)) - self.assertIn(TEST_HEX_ID[:5], str(e)) - self.assertIn('--\x20\n', str(e)) # Well-formated signature!!! - - # Check that the entries have been deleted and recalling the - # function does not re-send the e-mails - m.reset_mock() - self.vault_backend.send_all_notifications(TEST_TYPE, TEST_OBJ_ID) - m.assert_not_called() - - def test_available(self): - self.assertFalse(self.vault_backend.is_available(TEST_TYPE, - TEST_OBJ_ID)) - with self.mock_cooking(): - self.vault_backend.create_task(TEST_TYPE, TEST_OBJ_ID) - self.assertFalse(self.vault_backend.is_available(TEST_TYPE, - TEST_OBJ_ID)) - self.vault_backend.cache.add(TEST_TYPE, TEST_OBJ_ID, b'content') - self.assertFalse(self.vault_backend.is_available(TEST_TYPE, - TEST_OBJ_ID)) - self.vault_backend.set_status(TEST_TYPE, TEST_OBJ_ID, 'done') - self.assertTrue(self.vault_backend.is_available(TEST_TYPE, - TEST_OBJ_ID)) - - def test_fetch(self): - self.assertEqual(self.vault_backend.fetch(TEST_TYPE, TEST_OBJ_ID), - None) - obj_id, content = self.fake_cook(TEST_TYPE, b'content') - - info = self.vault_backend.task_info(TEST_TYPE, obj_id) - access_ts_before = info['ts_last_access'] - - self.assertEqual(self.vault_backend.fetch(TEST_TYPE, obj_id), - b'content') - - info = self.vault_backend.task_info(TEST_TYPE, obj_id) - access_ts_after = info['ts_last_access'] - - self.assertTimestampAlmostNow(access_ts_after) - self.assertLess(access_ts_before, access_ts_after) - - def test_cache_expire_oldest(self): - r = range(1, 10) - inserted = {} - for i in r: - sticky = (i == 5) - content = b'content%s' % str(i).encode() - obj_id, content = self.fake_cook(TEST_TYPE, content, sticky) - inserted[i] = (obj_id, content) - - self.vault_backend.update_access_ts(TEST_TYPE, inserted[2][0]) - self.vault_backend.update_access_ts(TEST_TYPE, inserted[3][0]) - self.vault_backend.cache_expire_oldest(n=4) - - should_be_still_here = {2, 3, 5, 8, 9} - for i in r: - self.assertEqual(self.vault_backend.is_available( - TEST_TYPE, inserted[i][0]), i in should_be_still_here) - - def test_cache_expire_until(self): - r = range(1, 10) - inserted = {} - for i in r: - sticky = (i == 5) - content = b'content%s' % str(i).encode() - obj_id, content = self.fake_cook(TEST_TYPE, content, sticky) - inserted[i] = (obj_id, content) - - if i == 7: - cutoff_date = datetime.datetime.now() - - self.vault_backend.update_access_ts(TEST_TYPE, inserted[2][0]) - self.vault_backend.update_access_ts(TEST_TYPE, inserted[3][0]) - self.vault_backend.cache_expire_until(date=cutoff_date) - - should_be_still_here = {2, 3, 5, 8, 9} - for i in r: - self.assertEqual(self.vault_backend.is_available( - TEST_TYPE, inserted[i][0]), i in should_be_still_here) - - def test_fail_cook_simple(self): - self.fail_cook(TEST_TYPE, TEST_OBJ_ID, 'error42') - self.assertFalse(self.vault_backend.is_available(TEST_TYPE, - TEST_OBJ_ID)) - info = self.vault_backend.task_info(TEST_TYPE, TEST_OBJ_ID) - self.assertEqual(info['progress_msg'], 'error42') - - def test_send_failure_email(self): - with self.mock_cooking(): - self.vault_backend.cook_request(TEST_TYPE, TEST_OBJ_ID, - email='a@example.com') - - self.vault_backend.set_status(TEST_TYPE, TEST_OBJ_ID, 'failed') - self.vault_backend.set_progress(TEST_TYPE, TEST_OBJ_ID, 'test error') - - with patch.object(self.vault_backend, 'smtp_server') as m: - self.vault_backend.send_all_notifications(TEST_TYPE, TEST_OBJ_ID) - - e = [k[0][0] for k in m.send_message.call_args_list][0] - self.assertEqual(e['To'], 'a@example.com') - - self.assertIn('info@softwareheritage.org', e['From']) - self.assertIn(TEST_TYPE, e['Subject']) - self.assertIn(TEST_HEX_ID[:5], e['Subject']) - self.assertIn('fail', e['Subject']) - self.assertIn(TEST_TYPE, str(e)) - self.assertIn(TEST_HEX_ID[:5], str(e)) - self.assertIn('test error', str(e)) - self.assertIn('--\x20\n', str(e)) # Well-formated signature - - def test_retry_failed_bundle(self): - self.fail_cook(TEST_TYPE, TEST_OBJ_ID, 'error42') - info = self.vault_backend.task_info(TEST_TYPE, TEST_OBJ_ID) - self.assertEqual(info['task_status'], 'failed') - with self.mock_cooking(): - self.vault_backend.cook_request(TEST_TYPE, TEST_OBJ_ID) - info = self.vault_backend.task_info(TEST_TYPE, TEST_OBJ_ID) - self.assertEqual(info['task_status'], 'new') +TEST_EMAIL = 'ouiche@lorraine.fr' + + +def test_create_task_simple(swh_vault): + with mock_cooking(swh_vault) as m: + swh_vault.create_task(TEST_TYPE, TEST_OBJ_ID) + + m['get_cooker_cls'].assert_called_once_with(TEST_TYPE) + + args = m['cooker_cls'].call_args[0] + assert args[0] == TEST_TYPE + assert args[1] == TEST_HEX_ID + + assert m['cooker'].check_exists.call_count == 1 + assert m['_send_task'].call_count == 1 + + args = m['_send_task'].call_args[0] + assert args[0] == TEST_TYPE + assert args[1] == TEST_HEX_ID + + info = swh_vault.task_info(TEST_TYPE, TEST_OBJ_ID) + assert info['object_id'] == TEST_OBJ_ID + assert info['type'] == TEST_TYPE + assert info['task_status'] == 'new' + assert info['task_id'] == 42 + + assertTimestampAlmostNow(info['ts_created']) + + assert info['ts_done'] is None + assert info['progress_msg'] is None + + +def test_create_fail_duplicate_task(swh_vault): + with mock_cooking(swh_vault): + swh_vault.create_task(TEST_TYPE, TEST_OBJ_ID) + with pytest.raises(psycopg2.IntegrityError): + swh_vault.create_task(TEST_TYPE, TEST_OBJ_ID) + + +def test_create_fail_nonexisting_object(swh_vault): + with mock_cooking(swh_vault) as m: + m['cooker'].check_exists.side_effect = ValueError('Nothing here.') + with pytest.raises(ValueError): + swh_vault.create_task(TEST_TYPE, TEST_OBJ_ID) + + +def test_create_set_progress(swh_vault): + with mock_cooking(swh_vault): + swh_vault.create_task(TEST_TYPE, TEST_OBJ_ID) + + info = swh_vault.task_info(TEST_TYPE, TEST_OBJ_ID) + assert info['progress_msg'] is None + swh_vault.set_progress(TEST_TYPE, TEST_OBJ_ID, + TEST_PROGRESS) + info = swh_vault.task_info(TEST_TYPE, TEST_OBJ_ID) + assert info['progress_msg'] == TEST_PROGRESS + + +def test_create_set_status(swh_vault): + with mock_cooking(swh_vault): + swh_vault.create_task(TEST_TYPE, TEST_OBJ_ID) + + info = swh_vault.task_info(TEST_TYPE, TEST_OBJ_ID) + assert info['task_status'] == 'new' + assert info['ts_done'] is None + + swh_vault.set_status(TEST_TYPE, TEST_OBJ_ID, 'pending') + info = swh_vault.task_info(TEST_TYPE, TEST_OBJ_ID) + assert info['task_status'] == 'pending' + assert info['ts_done'] is None + + swh_vault.set_status(TEST_TYPE, TEST_OBJ_ID, 'done') + info = swh_vault.task_info(TEST_TYPE, TEST_OBJ_ID) + assert info['task_status'] == 'done' + assertTimestampAlmostNow(info['ts_done']) + + +def test_create_update_access_ts(swh_vault): + with mock_cooking(swh_vault): + swh_vault.create_task(TEST_TYPE, TEST_OBJ_ID) + + info = swh_vault.task_info(TEST_TYPE, TEST_OBJ_ID) + access_ts_1 = info['ts_last_access'] + assertTimestampAlmostNow(access_ts_1) + + swh_vault.update_access_ts(TEST_TYPE, TEST_OBJ_ID) + info = swh_vault.task_info(TEST_TYPE, TEST_OBJ_ID) + access_ts_2 = info['ts_last_access'] + assertTimestampAlmostNow(access_ts_2) + + swh_vault.update_access_ts(TEST_TYPE, TEST_OBJ_ID) + info = swh_vault.task_info(TEST_TYPE, TEST_OBJ_ID) + access_ts_3 = info['ts_last_access'] + assertTimestampAlmostNow(access_ts_3) + + assert access_ts_1 < access_ts_2 + assert access_ts_2 < access_ts_3 + + +def test_cook_request_idempotent(swh_vault): + with mock_cooking(swh_vault): + info1 = swh_vault.cook_request(TEST_TYPE, TEST_OBJ_ID) + info2 = swh_vault.cook_request(TEST_TYPE, TEST_OBJ_ID) + info3 = swh_vault.cook_request(TEST_TYPE, TEST_OBJ_ID) + assert info1 == info2 + assert info1 == info3 + + +def test_cook_email_pending_done(swh_vault): + with mock_cooking(swh_vault), \ + patch.object(swh_vault, 'add_notif_email') as madd, \ + patch.object(swh_vault, 'send_notification') as msend: + + swh_vault.cook_request(TEST_TYPE, TEST_OBJ_ID) + madd.assert_not_called() + msend.assert_not_called() + + madd.reset_mock() + msend.reset_mock() + + swh_vault.cook_request(TEST_TYPE, TEST_OBJ_ID, + email=TEST_EMAIL) + madd.assert_called_once_with(TEST_TYPE, TEST_OBJ_ID, TEST_EMAIL) + msend.assert_not_called() + + madd.reset_mock() + msend.reset_mock() + + swh_vault.set_status(TEST_TYPE, TEST_OBJ_ID, 'done') + swh_vault.cook_request(TEST_TYPE, TEST_OBJ_ID, + email=TEST_EMAIL) + msend.assert_called_once_with(None, TEST_EMAIL, + TEST_TYPE, TEST_OBJ_ID, 'done') + madd.assert_not_called() + + +def test_send_all_emails(swh_vault): + with mock_cooking(swh_vault): + emails = ('a@example.com', + 'billg@example.com', + 'test+42@example.org') + for email in emails: + swh_vault.cook_request(TEST_TYPE, TEST_OBJ_ID, + email=email) + + swh_vault.set_status(TEST_TYPE, TEST_OBJ_ID, 'done') + + with patch.object(swh_vault, 'smtp_server') as m: + swh_vault.send_all_notifications(TEST_TYPE, TEST_OBJ_ID) + + sent_emails = {k[0][0] for k in m.send_message.call_args_list} + assert {k['To'] for k in sent_emails} == set(emails) + + for e in sent_emails: + assert 'info@softwareheritage.org' in e['From'] + assert TEST_TYPE in e['Subject'] + assert TEST_HEX_ID[:5] in e['Subject'] + assert TEST_TYPE in str(e) + assert 'https://archive.softwareheritage.org/' in str(e) + assert TEST_HEX_ID[:5] in str(e) + assert '--\x20\n' in str(e) # Well-formated signature!!! + + # Check that the entries have been deleted and recalling the + # function does not re-send the e-mails + m.reset_mock() + swh_vault.send_all_notifications(TEST_TYPE, TEST_OBJ_ID) + m.assert_not_called() + + +def test_available(swh_vault): + assert not swh_vault.is_available(TEST_TYPE, TEST_OBJ_ID) + + with mock_cooking(swh_vault): + swh_vault.create_task(TEST_TYPE, TEST_OBJ_ID) + assert not swh_vault.is_available(TEST_TYPE, TEST_OBJ_ID) + + swh_vault.cache.add(TEST_TYPE, TEST_OBJ_ID, b'content') + assert not swh_vault.is_available(TEST_TYPE, TEST_OBJ_ID) + + swh_vault.set_status(TEST_TYPE, TEST_OBJ_ID, 'done') + assert swh_vault.is_available(TEST_TYPE, TEST_OBJ_ID) + + +def test_fetch(swh_vault): + assert swh_vault.fetch(TEST_TYPE, TEST_OBJ_ID) is None + obj_id, content = fake_cook(swh_vault, TEST_TYPE, b'content') + + info = swh_vault.task_info(TEST_TYPE, obj_id) + access_ts_before = info['ts_last_access'] + + assert swh_vault.fetch(TEST_TYPE, obj_id) == b'content' + + info = swh_vault.task_info(TEST_TYPE, obj_id) + access_ts_after = info['ts_last_access'] + + assertTimestampAlmostNow(access_ts_after) + assert access_ts_before < access_ts_after + + +def test_cache_expire_oldest(swh_vault): + r = range(1, 10) + inserted = {} + for i in r: + sticky = (i == 5) + content = b'content%s' % str(i).encode() + obj_id, content = fake_cook(swh_vault, TEST_TYPE, content, sticky) + inserted[i] = (obj_id, content) + + swh_vault.update_access_ts(TEST_TYPE, inserted[2][0]) + swh_vault.update_access_ts(TEST_TYPE, inserted[3][0]) + swh_vault.cache_expire_oldest(n=4) + + should_be_still_here = {2, 3, 5, 8, 9} + for i in r: + assert swh_vault.is_available( + TEST_TYPE, inserted[i][0]) == (i in should_be_still_here) + + +def test_cache_expire_until(swh_vault): + r = range(1, 10) + inserted = {} + for i in r: + sticky = (i == 5) + content = b'content%s' % str(i).encode() + obj_id, content = fake_cook(swh_vault, TEST_TYPE, content, sticky) + inserted[i] = (obj_id, content) + + if i == 7: + cutoff_date = datetime.datetime.now() + + swh_vault.update_access_ts(TEST_TYPE, inserted[2][0]) + swh_vault.update_access_ts(TEST_TYPE, inserted[3][0]) + swh_vault.cache_expire_until(date=cutoff_date) + + should_be_still_here = {2, 3, 5, 8, 9} + for i in r: + assert swh_vault.is_available( + TEST_TYPE, inserted[i][0]) == (i in should_be_still_here) + + +def test_fail_cook_simple(swh_vault): + fail_cook(swh_vault, TEST_TYPE, TEST_OBJ_ID, 'error42') + assert not swh_vault.is_available(TEST_TYPE, TEST_OBJ_ID) + info = swh_vault.task_info(TEST_TYPE, TEST_OBJ_ID) + assert info['progress_msg'] == 'error42' + + +def test_send_failure_email(swh_vault): + with mock_cooking(swh_vault): + swh_vault.cook_request(TEST_TYPE, TEST_OBJ_ID, email='a@example.com') + + swh_vault.set_status(TEST_TYPE, TEST_OBJ_ID, 'failed') + swh_vault.set_progress(TEST_TYPE, TEST_OBJ_ID, 'test error') + + with patch.object(swh_vault, 'smtp_server') as m: + swh_vault.send_all_notifications(TEST_TYPE, TEST_OBJ_ID) + + e = [k[0][0] for k in m.send_message.call_args_list][0] + assert e['To'] == 'a@example.com' + + assert 'info@softwareheritage.org' in e['From'] + assert TEST_TYPE in e['Subject'] + assert TEST_HEX_ID[:5] in e['Subject'] + assert 'fail' in e['Subject'] + assert TEST_TYPE in str(e) + assert TEST_HEX_ID[:5] in str(e) + assert 'test error' in str(e) + assert '--\x20\n' in str(e) # Well-formated signature + + +def test_retry_failed_bundle(swh_vault): + fail_cook(swh_vault, TEST_TYPE, TEST_OBJ_ID, 'error42') + info = swh_vault.task_info(TEST_TYPE, TEST_OBJ_ID) + assert info['task_status'] == 'failed' + with mock_cooking(swh_vault): + swh_vault.cook_request(TEST_TYPE, TEST_OBJ_ID) + info = swh_vault.task_info(TEST_TYPE, TEST_OBJ_ID) + assert info['task_status'] == 'new' diff --git a/swh/vault/tests/test_cache.py b/swh/vault/tests/test_cache.py index ab4ab49..0084b52 100644 --- a/swh/vault/tests/test_cache.py +++ b/swh/vault/tests/test_cache.py @@ -1,74 +1,69 @@ # Copyright (C) 2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -import unittest from swh.model import hashutil -from swh.vault.tests.vault_testing import VaultTestFixture TEST_TYPE_1 = 'revision_gitfast' TEST_TYPE_2 = 'directory' TEST_HEX_ID_1 = '4a4b9771542143cf070386f86b4b92d42966bdbc' TEST_HEX_ID_2 = '17a3e48bce37be5226490e750202ad3a9a1a3fe9' TEST_OBJ_ID_1 = hashutil.hash_to_bytes(TEST_HEX_ID_1) TEST_OBJ_ID_2 = hashutil.hash_to_bytes(TEST_HEX_ID_2) TEST_CONTENT_1 = b'test content 1' TEST_CONTENT_2 = b'test content 2' -class BaseTestVaultCache(VaultTestFixture): - def setUp(self): - super().setUp() - self.cache = self.vault_backend.cache # little shortcut - - -class TestVaultCache(BaseTestVaultCache, unittest.TestCase): - # Let's try to avoid replicating edge-cases already tested in - # swh-objstorage, and instead focus on testing behaviors specific to the - # Vault cache here. - - def test_internal_id(self): - sid = self.cache._get_internal_id(TEST_TYPE_1, TEST_OBJ_ID_1) - self.assertEqual(hashutil.hash_to_hex(sid), - '6829cda55b54c295aa043a611a4e0320239988d9') - - def test_simple_add_get(self): - self.cache.add(TEST_TYPE_1, TEST_OBJ_ID_1, TEST_CONTENT_1) - self.assertEqual(self.cache.get(TEST_TYPE_1, TEST_OBJ_ID_1), - TEST_CONTENT_1) - self.assertTrue(self.cache.is_cached(TEST_TYPE_1, TEST_OBJ_ID_1)) - - def test_different_type_same_id(self): - self.cache.add(TEST_TYPE_1, TEST_OBJ_ID_1, TEST_CONTENT_1) - self.cache.add(TEST_TYPE_2, TEST_OBJ_ID_1, TEST_CONTENT_2) - self.assertEqual(self.cache.get(TEST_TYPE_1, TEST_OBJ_ID_1), - TEST_CONTENT_1) - self.assertEqual(self.cache.get(TEST_TYPE_2, TEST_OBJ_ID_1), - TEST_CONTENT_2) - self.assertTrue(self.cache.is_cached(TEST_TYPE_1, TEST_OBJ_ID_1)) - self.assertTrue(self.cache.is_cached(TEST_TYPE_2, TEST_OBJ_ID_1)) - - def test_different_type_same_content(self): - self.cache.add(TEST_TYPE_1, TEST_OBJ_ID_1, TEST_CONTENT_1) - self.cache.add(TEST_TYPE_2, TEST_OBJ_ID_1, TEST_CONTENT_1) - self.assertEqual(self.cache.get(TEST_TYPE_1, TEST_OBJ_ID_1), - TEST_CONTENT_1) - self.assertEqual(self.cache.get(TEST_TYPE_2, TEST_OBJ_ID_1), - TEST_CONTENT_1) - self.assertTrue(self.cache.is_cached(TEST_TYPE_1, TEST_OBJ_ID_1)) - self.assertTrue(self.cache.is_cached(TEST_TYPE_2, TEST_OBJ_ID_1)) - - def test_different_id_same_type(self): - self.cache.add(TEST_TYPE_1, TEST_OBJ_ID_1, TEST_CONTENT_1) - self.cache.add(TEST_TYPE_1, TEST_OBJ_ID_2, TEST_CONTENT_2) - self.assertEqual(self.cache.get(TEST_TYPE_1, TEST_OBJ_ID_1), - TEST_CONTENT_1) - self.assertEqual(self.cache.get(TEST_TYPE_1, TEST_OBJ_ID_2), - TEST_CONTENT_2) - self.assertTrue(self.cache.is_cached(TEST_TYPE_1, TEST_OBJ_ID_1)) - self.assertTrue(self.cache.is_cached(TEST_TYPE_1, TEST_OBJ_ID_2)) +# Let's try to avoid replicating edge-cases already tested in +# swh-objstorage, and instead focus on testing behaviors specific to the +# Vault cache here. + +def test_internal_id(swh_vault): + sid = swh_vault.cache._get_internal_id(TEST_TYPE_1, TEST_OBJ_ID_1) + assert hashutil.hash_to_hex(sid) == \ + '6829cda55b54c295aa043a611a4e0320239988d9' + + +def test_simple_add_get(swh_vault): + swh_vault.cache.add(TEST_TYPE_1, TEST_OBJ_ID_1, TEST_CONTENT_1) + assert swh_vault.cache.get(TEST_TYPE_1, TEST_OBJ_ID_1) == \ + TEST_CONTENT_1 + assert swh_vault.cache.is_cached(TEST_TYPE_1, TEST_OBJ_ID_1) + + +def test_different_type_same_id(swh_vault): + swh_vault.cache.add(TEST_TYPE_1, TEST_OBJ_ID_1, TEST_CONTENT_1) + swh_vault.cache.add(TEST_TYPE_2, TEST_OBJ_ID_1, TEST_CONTENT_2) + assert swh_vault.cache.get(TEST_TYPE_1, TEST_OBJ_ID_1) == \ + TEST_CONTENT_1 + assert swh_vault.cache.get(TEST_TYPE_2, TEST_OBJ_ID_1) == \ + TEST_CONTENT_2 + assert swh_vault.cache.is_cached(TEST_TYPE_1, TEST_OBJ_ID_1) + assert swh_vault.cache.is_cached(TEST_TYPE_2, TEST_OBJ_ID_1) + + +def test_different_type_same_content(swh_vault): + swh_vault.cache.add(TEST_TYPE_1, TEST_OBJ_ID_1, TEST_CONTENT_1) + swh_vault.cache.add(TEST_TYPE_2, TEST_OBJ_ID_1, TEST_CONTENT_1) + assert swh_vault.cache.get(TEST_TYPE_1, TEST_OBJ_ID_1) == \ + TEST_CONTENT_1 + assert swh_vault.cache.get(TEST_TYPE_2, TEST_OBJ_ID_1) == \ + TEST_CONTENT_1 + assert swh_vault.cache.is_cached(TEST_TYPE_1, TEST_OBJ_ID_1) + assert swh_vault.cache.is_cached(TEST_TYPE_2, TEST_OBJ_ID_1) + + +def test_different_id_same_type(swh_vault): + swh_vault.cache.add(TEST_TYPE_1, TEST_OBJ_ID_1, TEST_CONTENT_1) + swh_vault.cache.add(TEST_TYPE_1, TEST_OBJ_ID_2, TEST_CONTENT_2) + assert swh_vault.cache.get(TEST_TYPE_1, TEST_OBJ_ID_1) == \ + TEST_CONTENT_1 + assert swh_vault.cache.get(TEST_TYPE_1, TEST_OBJ_ID_2) == \ + TEST_CONTENT_2 + assert swh_vault.cache.is_cached(TEST_TYPE_1, TEST_OBJ_ID_1) + assert swh_vault.cache.is_cached(TEST_TYPE_1, TEST_OBJ_ID_2) diff --git a/swh/vault/tests/test_cookers.py b/swh/vault/tests/test_cookers.py index 9589197..00251b7 100644 --- a/swh/vault/tests/test_cookers.py +++ b/swh/vault/tests/test_cookers.py @@ -1,518 +1,526 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import contextlib import datetime import gzip import io import os import pathlib import pytest import subprocess import tarfile import tempfile import unittest import unittest.mock import dulwich.fastexport import dulwich.index import dulwich.objects import dulwich.porcelain import dulwich.repo from swh.loader.git.from_disk import GitLoaderFromDisk from swh.model import hashutil from swh.model.from_disk import Directory from swh.vault.cookers import DirectoryCooker, RevisionGitfastCooker -from swh.vault.tests.vault_testing import VaultTestFixture, hash_content +from swh.vault.tests.vault_testing import hash_content from swh.vault.to_disk import SKIPPED_MESSAGE, HIDDEN_MESSAGE class TestRepo: """A tiny context manager for a test git repository, with some utility functions to perform basic git stuff. """ def __enter__(self): self.tmp_dir = tempfile.TemporaryDirectory(prefix='tmp-vault-repo-') self.repo_dir = self.tmp_dir.__enter__() self.repo = dulwich.repo.Repo.init(self.repo_dir) self.author_name = b'Test Author' self.author_email = b'test@softwareheritage.org' self.author = b'%s <%s>' % (self.author_name, self.author_email) self.base_date = 258244200 self.counter = 0 return pathlib.Path(self.repo_dir) def __exit__(self, exc, value, tb): self.tmp_dir.__exit__(exc, value, tb) def checkout(self, rev_sha): rev = self.repo[rev_sha] dulwich.index.build_index_from_tree(self.repo_dir, self.repo.index_path(), self.repo.object_store, rev.tree) def git_shell(self, *cmd, stdout=subprocess.DEVNULL, **kwargs): name = self.author_name email = self.author_email date = '%d +0000' % (self.base_date + self.counter) env = { # Set git commit format 'GIT_AUTHOR_NAME': name, 'GIT_AUTHOR_EMAIL': email, 'GIT_AUTHOR_DATE': date, 'GIT_COMMITTER_NAME': name, 'GIT_COMMITTER_EMAIL': email, 'GIT_COMMITTER_DATE': date, # Ignore all the system-wide and user configurations 'GIT_CONFIG_NOSYSTEM': '1', 'HOME': str(self.tmp_dir), 'XDG_CONFIG_HOME': str(self.tmp_dir), } kwargs.setdefault('env', {}).update(env) subprocess.check_call(('git', '-C', self.repo_dir) + cmd, stdout=stdout, **kwargs) def commit(self, message='Commit test\n', ref=b'HEAD'): self.git_shell('add', '.') message = message.encode() + b'\n' ret = self.repo.do_commit( message=message, committer=self.author, commit_timestamp=self.base_date + self.counter, commit_timezone=0, ref=ref) self.counter += 1 return ret def merge(self, parent_sha_list, message='Merge branches.'): self.git_shell('merge', '--allow-unrelated-histories', '-m', message, *[p.decode() for p in parent_sha_list]) self.counter += 1 return self.repo.refs[b'HEAD'] def print_debug_graph(self, reflog=False): args = ['log', '--all', '--graph', '--decorate'] if reflog: args.append('--reflog') self.git_shell(*args, stdout=None) -@pytest.mark.config_issue -class BaseTestCookers(VaultTestFixture): - """Base class of cookers unit tests""" - def setUp(self): - super().setUp() - self.loader = GitLoaderFromDisk() - self.loader.storage = self.storage - - def tearDown(self): - self.loader = None - super().tearDown() - - def load(self, repo_path): - """Load a repository in the test storage""" - self.loader.load('fake_origin', repo_path, datetime.datetime.now()) - - @contextlib.contextmanager - def cook_extract_directory(self, obj_id): - """Context manager that cooks a directory and extract it.""" - cooker = DirectoryCooker('directory', obj_id) - cooker.storage = self.storage - cooker.backend = unittest.mock.MagicMock() - cooker.fileobj = io.BytesIO() - assert cooker.check_exists() - cooker.prepare_bundle() - cooker.fileobj.seek(0) - with tempfile.TemporaryDirectory(prefix='tmp-vault-extract-') as td: - with tarfile.open(fileobj=cooker.fileobj, mode='r') as tar: - tar.extractall(td) - yield pathlib.Path(td) / hashutil.hash_to_hex(obj_id) - cooker.storage = None - - @contextlib.contextmanager - def cook_stream_revision_gitfast(self, obj_id): - """Context manager that cooks a revision and stream its fastexport.""" - cooker = RevisionGitfastCooker('revision_gitfast', obj_id) - cooker.storage = self.storage - cooker.backend = unittest.mock.MagicMock() - cooker.fileobj = io.BytesIO() - assert cooker.check_exists() - cooker.prepare_bundle() - cooker.fileobj.seek(0) - fastexport_stream = gzip.GzipFile(fileobj=cooker.fileobj) - yield fastexport_stream - cooker.storage = None - - @contextlib.contextmanager - def cook_extract_revision_gitfast(self, obj_id): - """Context manager that cooks a revision and extract it.""" - test_repo = TestRepo() - with self.cook_stream_revision_gitfast(obj_id) as stream, \ - test_repo as p: - processor = dulwich.fastexport.GitImportProcessor(test_repo.repo) - processor.import_stream(stream) - yield test_repo, p +@pytest.fixture +def swh_git_loader(swh_vault): + loader = GitLoaderFromDisk() + loader.storage = swh_vault.storage + return loader + + +def load(loader, repo_path): + """Load a repository in the test storage""" + loader.load('fake_origin', repo_path, datetime.datetime.now()) + + +@contextlib.contextmanager +def cook_extract_directory(storage, obj_id): + """Context manager that cooks a directory and extract it.""" + backend = unittest.mock.MagicMock() + backend.storage = storage + cooker = DirectoryCooker( + 'directory', obj_id, backend=backend, storage=storage) + cooker.fileobj = io.BytesIO() + assert cooker.check_exists() + cooker.prepare_bundle() + cooker.fileobj.seek(0) + with tempfile.TemporaryDirectory(prefix='tmp-vault-extract-') as td: + with tarfile.open(fileobj=cooker.fileobj, mode='r') as tar: + tar.extractall(td) + yield pathlib.Path(td) / hashutil.hash_to_hex(obj_id) + cooker.storage = None + + +@contextlib.contextmanager +def cook_stream_revision_gitfast(storage, obj_id): + """Context manager that cooks a revision and stream its fastexport.""" + backend = unittest.mock.MagicMock() + backend.storage = storage + cooker = RevisionGitfastCooker( + 'revision_gitfast', obj_id, backend=backend, storage=storage) + cooker.fileobj = io.BytesIO() + assert cooker.check_exists() + cooker.prepare_bundle() + cooker.fileobj.seek(0) + fastexport_stream = gzip.GzipFile(fileobj=cooker.fileobj) + yield fastexport_stream + cooker.storage = None + + +@contextlib.contextmanager +def cook_extract_revision_gitfast(storage, obj_id): + """Context manager that cooks a revision and extract it.""" + test_repo = TestRepo() + with cook_stream_revision_gitfast(storage, obj_id) as stream, \ + test_repo as p: + processor = dulwich.fastexport.GitImportProcessor(test_repo.repo) + processor.import_stream(stream) + yield test_repo, p TEST_CONTENT = (" test content\n" "and unicode \N{BLACK HEART SUIT}\n" " and trailing spaces ") TEST_EXECUTABLE = b'\x42\x40\x00\x00\x05' -class TestDirectoryCooker(BaseTestCookers, unittest.TestCase): - def test_directory_simple(self): +class TestDirectoryCooker: + def test_directory_simple(self, swh_git_loader): repo = TestRepo() with repo as rp: (rp / 'file').write_text(TEST_CONTENT) (rp / 'executable').write_bytes(TEST_EXECUTABLE) (rp / 'executable').chmod(0o755) (rp / 'link').symlink_to('file') (rp / 'dir1/dir2').mkdir(parents=True) (rp / 'dir1/dir2/file').write_text(TEST_CONTENT) c = repo.commit() - self.load(str(rp)) + load(swh_git_loader, str(rp)) obj_id_hex = repo.repo[c].tree.decode() obj_id = hashutil.hash_to_bytes(obj_id_hex) - with self.cook_extract_directory(obj_id) as p: - self.assertEqual((p / 'file').stat().st_mode, 0o100644) - self.assertEqual((p / 'file').read_text(), TEST_CONTENT) - self.assertEqual((p / 'executable').stat().st_mode, 0o100755) - self.assertEqual((p / 'executable').read_bytes(), TEST_EXECUTABLE) - self.assertTrue((p / 'link').is_symlink) - self.assertEqual(os.readlink(str(p / 'link')), 'file') - self.assertEqual((p / 'dir1/dir2/file').stat().st_mode, 0o100644) - self.assertEqual((p / 'dir1/dir2/file').read_text(), TEST_CONTENT) + with cook_extract_directory(swh_git_loader.storage, obj_id) as p: + assert (p / 'file').stat().st_mode == 0o100644 + assert (p / 'file').read_text() == TEST_CONTENT + assert (p / 'executable').stat().st_mode == 0o100755 + assert (p / 'executable').read_bytes() == TEST_EXECUTABLE + assert (p / 'link').is_symlink + assert os.readlink(str(p / 'link')) == 'file' + assert (p / 'dir1/dir2/file').stat().st_mode == 0o100644 + assert (p / 'dir1/dir2/file').read_text() == TEST_CONTENT directory = Directory.from_disk(path=bytes(p)) - self.assertEqual(obj_id_hex, hashutil.hash_to_hex(directory.hash)) + assert obj_id_hex == hashutil.hash_to_hex(directory.hash) - def test_directory_filtered_objects(self): + def test_directory_filtered_objects(self, swh_git_loader): repo = TestRepo() with repo as rp: file_1, id_1 = hash_content(b'test1') file_2, id_2 = hash_content(b'test2') file_3, id_3 = hash_content(b'test3') (rp / 'file').write_bytes(file_1) (rp / 'hidden_file').write_bytes(file_2) (rp / 'absent_file').write_bytes(file_3) c = repo.commit() - self.load(str(rp)) + load(swh_git_loader, str(rp)) obj_id_hex = repo.repo[c].tree.decode() obj_id = hashutil.hash_to_bytes(obj_id_hex) # FIXME: storage.content_update() should be changed to allow things # like that - with self.storage.get_db().transaction() as cur: + with swh_git_loader.storage.get_db().transaction() as cur: cur.execute("""update content set status = 'visible' where sha1 = %s""", (id_1,)) cur.execute("""update content set status = 'hidden' where sha1 = %s""", (id_2,)) cur.execute("""update content set status = 'absent' where sha1 = %s""", (id_3,)) - with self.cook_extract_directory(obj_id) as p: - self.assertEqual((p / 'file').read_bytes(), b'test1') - self.assertEqual((p / 'hidden_file').read_bytes(), HIDDEN_MESSAGE) - self.assertEqual((p / 'absent_file').read_bytes(), SKIPPED_MESSAGE) + with cook_extract_directory(swh_git_loader.storage, obj_id) as p: + assert (p / 'file').read_bytes() == b'test1' + assert (p / 'hidden_file').read_bytes() == HIDDEN_MESSAGE + assert (p / 'absent_file').read_bytes() == SKIPPED_MESSAGE - def test_directory_bogus_perms(self): + def test_directory_bogus_perms(self, swh_git_loader): # Some early git repositories have 664/775 permissions... let's check # if all the weird modes are properly normalized in the directory # cooker. repo = TestRepo() with repo as rp: (rp / 'file').write_text(TEST_CONTENT) (rp / 'file').chmod(0o664) (rp / 'executable').write_bytes(TEST_EXECUTABLE) (rp / 'executable').chmod(0o775) (rp / 'wat').write_text(TEST_CONTENT) (rp / 'wat').chmod(0o604) c = repo.commit() - self.load(str(rp)) + load(swh_git_loader, str(rp)) obj_id_hex = repo.repo[c].tree.decode() obj_id = hashutil.hash_to_bytes(obj_id_hex) - with self.cook_extract_directory(obj_id) as p: - self.assertEqual((p / 'file').stat().st_mode, 0o100644) - self.assertEqual((p / 'executable').stat().st_mode, 0o100755) - self.assertEqual((p / 'wat').stat().st_mode, 0o100644) + with cook_extract_directory(swh_git_loader.storage, obj_id) as p: + assert (p / 'file').stat().st_mode == 0o100644 + assert (p / 'executable').stat().st_mode == 0o100755 + assert (p / 'wat').stat().st_mode == 0o100644 - def test_directory_revision_data(self): + def test_directory_revision_data(self, swh_git_loader): target_rev = '0e8a3ad980ec179856012b7eecf4327e99cd44cd' d = hashutil.hash_to_bytes('17a3e48bce37be5226490e750202ad3a9a1a3fe9') dir = { 'id': d, 'entries': [ { 'name': b'submodule', 'type': 'rev', 'target': hashutil.hash_to_bytes(target_rev), 'perms': 0o100644, } ], } - self.storage.directory_add([dir]) + swh_git_loader.storage.directory_add([dir]) - with self.cook_extract_directory(d) as p: - self.assertTrue((p / 'submodule').is_symlink()) - self.assertEqual(os.readlink(str(p / 'submodule')), target_rev) + with cook_extract_directory(swh_git_loader.storage, d) as p: + assert (p / 'submodule').is_symlink() + assert os.readlink(str(p / 'submodule')) == target_rev -class TestRevisionGitfastCooker(BaseTestCookers, unittest.TestCase): - def test_revision_simple(self): +class TestRevisionGitfastCooker: + def test_revision_simple(self, swh_git_loader): # # 1--2--3--4--5--6--7 # + storage = swh_git_loader.storage repo = TestRepo() with repo as rp: (rp / 'file1').write_text(TEST_CONTENT) repo.commit('add file1') (rp / 'file2').write_text(TEST_CONTENT) repo.commit('add file2') (rp / 'dir1/dir2').mkdir(parents=True) (rp / 'dir1/dir2/file').write_text(TEST_CONTENT) repo.commit('add dir1/dir2/file') (rp / 'bin1').write_bytes(TEST_EXECUTABLE) (rp / 'bin1').chmod(0o755) repo.commit('add bin1') (rp / 'link1').symlink_to('file1') repo.commit('link link1 to file1') (rp / 'file2').unlink() repo.commit('remove file2') (rp / 'bin1').rename(rp / 'bin') repo.commit('rename bin1 to bin') - self.load(str(rp)) + load(swh_git_loader, str(rp)) obj_id_hex = repo.repo.refs[b'HEAD'].decode() obj_id = hashutil.hash_to_bytes(obj_id_hex) - with self.cook_extract_revision_gitfast(obj_id) as (ert, p): + with cook_extract_revision_gitfast(storage, obj_id) as (ert, p): ert.checkout(b'HEAD') - self.assertEqual((p / 'file1').stat().st_mode, 0o100644) - self.assertEqual((p / 'file1').read_text(), TEST_CONTENT) - self.assertTrue((p / 'link1').is_symlink) - self.assertEqual(os.readlink(str(p / 'link1')), 'file1') - self.assertEqual((p / 'bin').stat().st_mode, 0o100755) - self.assertEqual((p / 'bin').read_bytes(), TEST_EXECUTABLE) - self.assertEqual((p / 'dir1/dir2/file').read_text(), TEST_CONTENT) - self.assertEqual((p / 'dir1/dir2/file').stat().st_mode, 0o100644) - self.assertEqual(ert.repo.refs[b'HEAD'].decode(), obj_id_hex) - - def test_revision_two_roots(self): + assert (p / 'file1').stat().st_mode == 0o100644 + assert (p / 'file1').read_text() == TEST_CONTENT + assert (p / 'link1').is_symlink + assert os.readlink(str(p / 'link1')) == 'file1' + assert (p / 'bin').stat().st_mode == 0o100755 + assert (p / 'bin').read_bytes() == TEST_EXECUTABLE + assert (p / 'dir1/dir2/file').read_text() == TEST_CONTENT + assert (p / 'dir1/dir2/file').stat().st_mode == 0o100644 + assert ert.repo.refs[b'HEAD'].decode() == obj_id_hex + + def test_revision_two_roots(self, swh_git_loader): # # 1----3---4 # / # 2---- # + storage = swh_git_loader.storage repo = TestRepo() with repo as rp: (rp / 'file1').write_text(TEST_CONTENT) c1 = repo.commit('Add file1') del repo.repo.refs[b'refs/heads/master'] # git update-ref -d HEAD (rp / 'file2').write_text(TEST_CONTENT) repo.commit('Add file2') repo.merge([c1]) (rp / 'file3').write_text(TEST_CONTENT) repo.commit('add file3') obj_id_hex = repo.repo.refs[b'HEAD'].decode() obj_id = hashutil.hash_to_bytes(obj_id_hex) - self.load(str(rp)) + load(swh_git_loader, str(rp)) - with self.cook_extract_revision_gitfast(obj_id) as (ert, p): - self.assertEqual(ert.repo.refs[b'HEAD'].decode(), obj_id_hex) + with cook_extract_revision_gitfast(storage, obj_id) as (ert, p): + assert ert.repo.refs[b'HEAD'].decode() == obj_id_hex - def test_revision_two_double_fork_merge(self): + def test_revision_two_double_fork_merge(self, swh_git_loader): # # 2---4---6 # / / / # 1---3---5 # + storage = swh_git_loader.storage repo = TestRepo() with repo as rp: (rp / 'file1').write_text(TEST_CONTENT) c1 = repo.commit('Add file1') repo.repo.refs[b'refs/heads/c1'] = c1 (rp / 'file2').write_text(TEST_CONTENT) repo.commit('Add file2') (rp / 'file3').write_text(TEST_CONTENT) c3 = repo.commit('Add file3', ref=b'refs/heads/c1') repo.repo.refs[b'refs/heads/c3'] = c3 repo.merge([c3]) (rp / 'file5').write_text(TEST_CONTENT) c5 = repo.commit('Add file3', ref=b'refs/heads/c3') repo.merge([c5]) obj_id_hex = repo.repo.refs[b'HEAD'].decode() obj_id = hashutil.hash_to_bytes(obj_id_hex) - self.load(str(rp)) + load(swh_git_loader, str(rp)) - with self.cook_extract_revision_gitfast(obj_id) as (ert, p): - self.assertEqual(ert.repo.refs[b'HEAD'].decode(), obj_id_hex) + with cook_extract_revision_gitfast(storage, obj_id) as (ert, p): + assert ert.repo.refs[b'HEAD'].decode() == obj_id_hex - def test_revision_triple_merge(self): + def test_revision_triple_merge(self, swh_git_loader): # # .---.---5 # / / / # 2 3 4 # / / / # 1---.---. # + storage = swh_git_loader.storage repo = TestRepo() with repo as rp: (rp / 'file1').write_text(TEST_CONTENT) c1 = repo.commit('Commit 1') repo.repo.refs[b'refs/heads/b1'] = c1 repo.repo.refs[b'refs/heads/b2'] = c1 repo.commit('Commit 2') c3 = repo.commit('Commit 3', ref=b'refs/heads/b1') c4 = repo.commit('Commit 4', ref=b'refs/heads/b2') repo.merge([c3, c4]) obj_id_hex = repo.repo.refs[b'HEAD'].decode() obj_id = hashutil.hash_to_bytes(obj_id_hex) - self.load(str(rp)) + load(swh_git_loader, str(rp)) - with self.cook_extract_revision_gitfast(obj_id) as (ert, p): - self.assertEqual(ert.repo.refs[b'HEAD'].decode(), obj_id_hex) + with cook_extract_revision_gitfast(storage, obj_id) as (ert, p): + assert ert.repo.refs[b'HEAD'].decode() == obj_id_hex - def test_revision_filtered_objects(self): + def test_revision_filtered_objects(self, swh_git_loader): + storage = swh_git_loader.storage repo = TestRepo() with repo as rp: file_1, id_1 = hash_content(b'test1') file_2, id_2 = hash_content(b'test2') file_3, id_3 = hash_content(b'test3') (rp / 'file').write_bytes(file_1) (rp / 'hidden_file').write_bytes(file_2) (rp / 'absent_file').write_bytes(file_3) repo.commit() obj_id_hex = repo.repo.refs[b'HEAD'].decode() obj_id = hashutil.hash_to_bytes(obj_id_hex) - self.load(str(rp)) + load(swh_git_loader, str(rp)) # FIXME: storage.content_update() should be changed to allow things # like that - with self.storage.get_db().transaction() as cur: + with storage.get_db().transaction() as cur: cur.execute("""update content set status = 'visible' where sha1 = %s""", (id_1,)) cur.execute("""update content set status = 'hidden' where sha1 = %s""", (id_2,)) cur.execute("""update content set status = 'absent' where sha1 = %s""", (id_3,)) - with self.cook_extract_revision_gitfast(obj_id) as (ert, p): + with cook_extract_revision_gitfast(storage, obj_id) as (ert, p): ert.checkout(b'HEAD') - self.assertEqual((p / 'file').read_bytes(), b'test1') - self.assertEqual((p / 'hidden_file').read_bytes(), HIDDEN_MESSAGE) - self.assertEqual((p / 'absent_file').read_bytes(), SKIPPED_MESSAGE) + assert (p / 'file').read_bytes() == b'test1' + assert (p / 'hidden_file').read_bytes() == HIDDEN_MESSAGE + assert (p / 'absent_file').read_bytes() == SKIPPED_MESSAGE - def test_revision_bogus_perms(self): + def test_revision_bogus_perms(self, swh_git_loader): # Some early git repositories have 664/775 permissions... let's check # if all the weird modes are properly normalized in the revision # cooker. + storage = swh_git_loader.storage repo = TestRepo() with repo as rp: (rp / 'file').write_text(TEST_CONTENT) (rp / 'file').chmod(0o664) (rp / 'executable').write_bytes(TEST_EXECUTABLE) (rp / 'executable').chmod(0o775) (rp / 'wat').write_text(TEST_CONTENT) (rp / 'wat').chmod(0o604) repo.commit('initial commit') - self.load(str(rp)) + load(swh_git_loader, str(rp)) obj_id_hex = repo.repo.refs[b'HEAD'].decode() obj_id = hashutil.hash_to_bytes(obj_id_hex) - with self.cook_extract_revision_gitfast(obj_id) as (ert, p): + with cook_extract_revision_gitfast(storage, obj_id) as (ert, p): ert.checkout(b'HEAD') - self.assertEqual((p / 'file').stat().st_mode, 0o100644) - self.assertEqual((p / 'executable').stat().st_mode, 0o100755) - self.assertEqual((p / 'wat').stat().st_mode, 0o100644) + assert (p / 'file').stat().st_mode == 0o100644 + assert (p / 'executable').stat().st_mode == 0o100755 + assert (p / 'wat').stat().st_mode == 0o100644 - def test_revision_null_fields(self): + def test_revision_null_fields(self, swh_git_loader): # Our schema doesn't enforce a lot of non-null revision fields. We need # to check these cases don't break the cooker. + storage = swh_git_loader.storage repo = TestRepo() with repo as rp: (rp / 'file').write_text(TEST_CONTENT) c = repo.commit('initial commit') - self.load(str(rp)) + load(swh_git_loader, str(rp)) repo.repo.refs[b'HEAD'].decode() dir_id_hex = repo.repo[c].tree.decode() dir_id = hashutil.hash_to_bytes(dir_id_hex) test_id = b'56789012345678901234' test_revision = { 'id': test_id, 'message': None, 'author': {'name': None, 'email': None, 'fullname': ''}, 'date': None, 'committer': {'name': None, 'email': None, 'fullname': ''}, 'committer_date': None, 'parents': [], 'type': 'git', 'directory': dir_id, 'metadata': {}, 'synthetic': True } - self.storage.revision_add([test_revision]) + storage.revision_add([test_revision]) - with self.cook_extract_revision_gitfast(test_id) as (ert, p): + with cook_extract_revision_gitfast(storage, test_id) as (ert, p): ert.checkout(b'HEAD') - self.assertEqual((p / 'file').stat().st_mode, 0o100644) + assert (p / 'file').stat().st_mode == 0o100644 - def test_revision_revision_data(self): + def test_revision_revision_data(self, swh_git_loader): + storage = swh_git_loader.storage target_rev = '0e8a3ad980ec179856012b7eecf4327e99cd44cd' d = hashutil.hash_to_bytes('17a3e48bce37be5226490e750202ad3a9a1a3fe9') r = hashutil.hash_to_bytes('1ecc9270c4fc61cfddbc65a774e91ef5c425a6f0') dir = { 'id': d, 'entries': [ { 'name': b'submodule', 'type': 'rev', 'target': hashutil.hash_to_bytes(target_rev), 'perms': 0o100644, } ], } - self.storage.directory_add([dir]) + storage.directory_add([dir]) rev = { 'id': r, 'message': None, 'author': {'name': None, 'email': None, 'fullname': ''}, 'date': None, 'committer': {'name': None, 'email': None, 'fullname': ''}, 'committer_date': None, 'parents': [], 'type': 'git', 'directory': d, 'metadata': {}, 'synthetic': True } - self.storage.revision_add([rev]) + storage.revision_add([rev]) - with self.cook_stream_revision_gitfast(r) as stream: + with cook_stream_revision_gitfast(storage, r) as stream: pattern = 'M 160000 {} submodule'.format(target_rev).encode() - self.assertIn(pattern, stream.read()) + assert pattern in stream.read() diff --git a/swh/vault/tests/test_cookers_base.py b/swh/vault/tests/test_cookers_base.py index 55586b6..35b28f1 100644 --- a/swh/vault/tests/test_cookers_base.py +++ b/swh/vault/tests/test_cookers_base.py @@ -1,78 +1,81 @@ # Copyright (C) 2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -import unittest from unittest.mock import MagicMock from swh.model import hashutil from swh.vault.cookers.base import BaseVaultCooker TEST_BUNDLE_CHUNKS = [b"test content 1\n", b"test content 2\n", b"test content 3\n"] TEST_BUNDLE_CONTENT = b''.join(TEST_BUNDLE_CHUNKS) TEST_OBJ_TYPE = 'test_type' TEST_HEX_ID = '17a3e48bce37be5226490e750202ad3a9a1a3fe9' TEST_OBJ_ID = hashutil.hash_to_bytes(TEST_HEX_ID) class BaseVaultCookerMock(BaseVaultCooker): CACHE_TYPE_KEY = TEST_OBJ_TYPE - def __init__(self, *args, **kwargs): - super().__init__(self.CACHE_TYPE_KEY, TEST_OBJ_ID, *args, **kwargs) + def __init__(self): + # we do not call super() here to bypass the building of db objects from + # config since we do mock these db objects + self.config = {} self.storage = MagicMock() self.backend = MagicMock() + self.obj_type = self.CACHE_TYPE_KEY + self.obj_id = hashutil.hash_to_bytes(TEST_OBJ_ID) + self.max_bundle_size = 1024 def check_exists(self): return True def prepare_bundle(self): for chunk in TEST_BUNDLE_CHUNKS: self.write(chunk) -class TestBaseVaultCooker(unittest.TestCase): - def test_simple_cook(self): - cooker = BaseVaultCookerMock() - cooker.cook() - cooker.backend.put_bundle.assert_called_once_with( - TEST_OBJ_TYPE, TEST_OBJ_ID, TEST_BUNDLE_CONTENT) - cooker.backend.set_status.assert_called_with( - TEST_OBJ_TYPE, TEST_OBJ_ID, 'done') - cooker.backend.set_progress.assert_called_with( - TEST_OBJ_TYPE, TEST_OBJ_ID, None) - cooker.backend.send_notif.assert_called_with( - TEST_OBJ_TYPE, TEST_OBJ_ID) - - def test_code_exception_cook(self): - cooker = BaseVaultCookerMock() - cooker.prepare_bundle = MagicMock() - cooker.prepare_bundle.side_effect = RuntimeError("Nope") - cooker.cook() - - # Potentially remove this when we have objstorage streaming - cooker.backend.put_bundle.assert_not_called() - - cooker.backend.set_status.assert_called_with( - TEST_OBJ_TYPE, TEST_OBJ_ID, 'failed') - self.assertNotIn("Nope", cooker.backend.set_progress.call_args[0][2]) - cooker.backend.send_notif.assert_called_with( - TEST_OBJ_TYPE, TEST_OBJ_ID) - - def test_policy_exception_cook(self): - cooker = BaseVaultCookerMock() - cooker.max_bundle_size = 8 - cooker.cook() - - # Potentially remove this when we have objstorage streaming - cooker.backend.put_bundle.assert_not_called() - - cooker.backend.set_status.assert_called_with( - TEST_OBJ_TYPE, TEST_OBJ_ID, 'failed') - self.assertIn("exceeds", cooker.backend.set_progress.call_args[0][2]) - cooker.backend.send_notif.assert_called_with( - TEST_OBJ_TYPE, TEST_OBJ_ID) +def test_simple_cook(): + cooker = BaseVaultCookerMock() + cooker.cook() + cooker.backend.put_bundle.assert_called_once_with( + TEST_OBJ_TYPE, TEST_OBJ_ID, TEST_BUNDLE_CONTENT) + cooker.backend.set_status.assert_called_with( + TEST_OBJ_TYPE, TEST_OBJ_ID, 'done') + cooker.backend.set_progress.assert_called_with( + TEST_OBJ_TYPE, TEST_OBJ_ID, None) + cooker.backend.send_notif.assert_called_with( + TEST_OBJ_TYPE, TEST_OBJ_ID) + + +def test_code_exception_cook(): + cooker = BaseVaultCookerMock() + cooker.prepare_bundle = MagicMock() + cooker.prepare_bundle.side_effect = RuntimeError("Nope") + cooker.cook() + + # Potentially remove this when we have objstorage streaming + cooker.backend.put_bundle.assert_not_called() + + cooker.backend.set_status.assert_called_with( + TEST_OBJ_TYPE, TEST_OBJ_ID, 'failed') + assert "Nope" not in cooker.backend.set_progress.call_args[0][2] + cooker.backend.send_notif.assert_called_with(TEST_OBJ_TYPE, TEST_OBJ_ID) + + +def test_policy_exception_cook(): + cooker = BaseVaultCookerMock() + cooker.max_bundle_size = 8 + cooker.cook() + + # Potentially remove this when we have objstorage streaming + cooker.backend.put_bundle.assert_not_called() + + cooker.backend.set_status.assert_called_with( + TEST_OBJ_TYPE, TEST_OBJ_ID, 'failed') + assert "exceeds" in cooker.backend.set_progress.call_args[0][2] + cooker.backend.send_notif.assert_called_with(TEST_OBJ_TYPE, TEST_OBJ_ID) diff --git a/swh/vault/tests/vault_testing.py b/swh/vault/tests/vault_testing.py index 2ab5280..c15c8f9 100644 --- a/swh/vault/tests/vault_testing.py +++ b/swh/vault/tests/vault_testing.py @@ -1,74 +1,21 @@ # Copyright (C) 2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -import os -import tempfile - from swh.model import hashutil -from swh.vault.backend import VaultBackend - -from swh.storage.tests.storage_testing import StorageTestFixture -from swh.vault.tests import SQL_DIR - - -class VaultTestFixture(StorageTestFixture): - """Mix this in a test subject class to get Vault Database testing support. - - This fixture requires to come before DbTestFixture and StorageTestFixture - in the inheritance list as it uses their methods to setup its own internal - components. - - Usage example: - - class TestVault(VaultTestFixture, unittest.TestCase): - ... - """ - TEST_DB_NAME = 'softwareheritage-test-vault' - TEST_DB_DUMP = [StorageTestFixture.TEST_DB_DUMP, - os.path.join(SQL_DIR, '*.sql')] - - def setUp(self): - super().setUp() - self.cache_root = tempfile.TemporaryDirectory('vault-cache-') - self.vault_config = { - 'storage': self.storage_config, - 'cache': { - 'cls': 'pathslicing', - 'args': { - 'root': self.cache_root.name, - 'slicing': '0:1/1:5', - 'allow_delete': True, - } - }, - 'db': 'postgresql:///' + self.TEST_DB_NAME, - 'scheduler': None, - } - self.vault_backend = VaultBackend(self.vault_config) - - def tearDown(self): - self.cache_root.cleanup() - self.vault_backend.close() - self.reset_storage_tables() - self.reset_vault_tables() - super().tearDown() - - def reset_vault_tables(self): - excluded = {'dbversion'} - self.reset_db_tables(self.TEST_DB_NAME, excluded=excluded) def hash_content(content): """Hash the content's id (sha1). Args: content (bytes): Content to hash Returns: The tuple (content, content's id as bytes) """ hashes = hashutil.MultiHash.from_data( content, hash_names=['sha1']).digest() return content, hashes['sha1'] diff --git a/tox.ini b/tox.ini index fb7a4fc..0fb07c6 100644 --- a/tox.ini +++ b/tox.ini @@ -1,17 +1,16 @@ [tox] envlist=flake8,py3 [testenv:py3] deps = .[testing] pytest-cov - pifpaf commands = - pifpaf run postgresql -- pytest --cov=swh --cov-branch {posargs} -m 'not config_issue' + pytest --cov=swh --cov-branch {posargs} [testenv:flake8] skip_install = true deps = flake8 commands = {envpython} -m flake8