diff --git a/mypy.ini b/mypy.ini index 265b730b..e7ee4309 100644 --- a/mypy.ini +++ b/mypy.ini @@ -1,30 +1,33 @@ [mypy] namespace_packages = True # due to the conditional import logic on swh.journal, in some cases a specific # type: ignore is neede, in other it isn't... warn_unused_ignores = False # support for sqlalchemy magic: see https://github.com/dropbox/sqlalchemy-stubs plugins = sqlmypy # 3rd party libraries without stubs (yet) # only shipped indirectly via hypothesis [mypy-django.*] ignore_missing_imports = True [mypy-pkg_resources.*] ignore_missing_imports = True [mypy-psycopg2.*] ignore_missing_imports = True [mypy-pytest.*] ignore_missing_imports = True # temporary work-around for landing typing support in spite of the current # journal<->storage dependency loop [mypy-swh.journal.*] ignore_missing_imports = True + +[mypy-pytest_postgresql.*] +ignore_missing_imports = True diff --git a/requirements-test.txt b/requirements-test.txt index 42bc6260..0d34e1aa 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -1,3 +1,4 @@ hypothesis >= 3.11.0 pytest +pytest-postgresql sqlalchemy-stubs diff --git a/swh/storage/tests/algos/test_snapshot.py b/swh/storage/tests/algos/test_snapshot.py index 2741c58d..aa50b773 100644 --- a/swh/storage/tests/algos/test_snapshot.py +++ b/swh/storage/tests/algos/test_snapshot.py @@ -1,53 +1,53 @@ # Copyright (C) 2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest import pytest from hypothesis import given from hypothesis.strategies import datetimes from swh.model.identifiers import snapshot_identifier, identifier_to_bytes from swh.model.hypothesis_strategies import \ origins, snapshots, branch_names, branch_targets -from swh.storage.tests.storage_testing import StorageTestFixture from swh.storage.algos.snapshot import snapshot_get_all_branches +@pytest.mark.xfail @pytest.mark.db @pytest.mark.property_based -class TestSnapshotAllBranches(StorageTestFixture, unittest.TestCase): +class TestSnapshotAllBranches(unittest.TestCase): @given(origins().map(lambda x: x.to_dict()), datetimes(), snapshots(min_size=0, max_size=10, only_objects=False)) def test_snapshot_small(self, origin, ts, snapshot): snapshot = snapshot.to_dict() self.storage.snapshot_add([snapshot]) returned_snapshot = snapshot_get_all_branches(self.storage, snapshot['id']) self.assertEqual(snapshot, returned_snapshot) @given(origins().map(lambda x: x.to_dict()), datetimes(), branch_names(), branch_targets(only_objects=True)) def test_snapshot_large(self, origin, ts, branch_name, branch_target): branch_target = branch_target.to_dict() snapshot = { 'branches': { b'%s%05d' % (branch_name, i): branch_target for i in range(10000) } } snapshot['id'] = identifier_to_bytes(snapshot_identifier(snapshot)) self.storage.snapshot_add([snapshot]) returned_snapshot = snapshot_get_all_branches(self.storage, snapshot['id']) self.assertEqual(snapshot, returned_snapshot) diff --git a/swh/storage/tests/conftest.py b/swh/storage/tests/conftest.py new file mode 100644 index 00000000..7a65d980 --- /dev/null +++ b/swh/storage/tests/conftest.py @@ -0,0 +1,197 @@ +from os import path, environ +import glob +import pytest + +from pytest_postgresql import factories +from pytest_postgresql.janitor import DatabaseJanitor, psycopg2 +from hypothesis import strategies +from swh.model.hypothesis_strategies import origins, contents + +from swh.core.utils import numfile_sortkey as sortkey +import swh.storage + +SQL_DIR = path.join(path.dirname(swh.storage.__file__), 'sql') + +environ['LC_ALL'] = 'C.UTF-8' + +DUMP_FILES = path.join(SQL_DIR, '*.sql') + + +@pytest.fixture +def swh_storage(postgresql_proc, swh_storage_postgresql, tmp_path): + + objdir = tmp_path / 'objstorage' + objdir.mkdir() + storage_config = { + 'cls': 'local', + 'args': { + 'db': 'postgresql://{user}@{host}:{port}/{dbname}'.format( + host=postgresql_proc.host, + port=postgresql_proc.port, + user='postgres', + dbname='tests'), + 'objstorage': { + 'cls': 'pathslicing', + 'args': { + 'root': str(objdir), + 'slicing': '0:1/1:5', + }, + }, + 'journal_writer': { + 'cls': 'memory', + }, + }, + } + storage = swh.storage.get_storage(**storage_config) + return storage + + +def gen_origins(n=20): + return strategies.lists( + origins().map(lambda x: x.to_dict()), + unique_by=lambda x: x['url'], + min_size=n, max_size=n).example() + + +def gen_contents(n=20): + return strategies.lists( + contents().map(lambda x: x.to_dict()), + unique_by=lambda x: (x['sha1'], x['sha1_git']), + min_size=n, max_size=n).example() + + +@pytest.fixture +def swh_contents(swh_storage): + contents = gen_contents() + swh_storage.content_add(contents) + return contents + + +@pytest.fixture +def swh_origins(swh_storage): + origins = gen_origins() + swh_storage.origin_add(origins) + return origins + + +# the postgres_fact factory fixture below is mostly a copy of the code +# from pytest-postgresql. We need a custom version here to be able to +# specify our version of the DBJanitor we use. +def postgresql_fact(process_fixture_name, db_name=None): + @pytest.fixture + def postgresql_factory(request): + """ + Fixture factory for PostgreSQL. + + :param FixtureRequest request: fixture request object + :rtype: psycopg2.connection + :returns: postgresql client + """ + config = factories.get_config(request) + if not psycopg2: + raise ImportError( + 'No module named psycopg2. Please install it.' + ) + proc_fixture = request.getfixturevalue(process_fixture_name) + + # _, config = try_import('psycopg2', request) + pg_host = proc_fixture.host + pg_port = proc_fixture.port + pg_user = proc_fixture.user + pg_options = proc_fixture.options + pg_db = db_name or config['dbname'] + + with SwhDatabaseJanitor( + pg_user, pg_host, pg_port, pg_db, proc_fixture.version + ): + connection = psycopg2.connect( + dbname=pg_db, + user=pg_user, + host=pg_host, + port=pg_port, + options=pg_options + ) + yield connection + connection.close() + + return postgresql_factory + + +swh_storage_postgresql = postgresql_fact('postgresql_proc') + + +# This version of the DatabaseJanitor implement a different setup/teardown +# behavior than than the stock one: instead of droping, creating and +# initializing the database for each test, it create and initialize the db only +# once, then it truncate the tables. This is needed to have acceptable test +# performances. +class SwhDatabaseJanitor(DatabaseJanitor): + def db_setup(self): + with psycopg2.connect( + dbname=self.db_name, + user=self.user, + host=self.host, + port=self.port, + ) as cnx: + with cnx.cursor() as cur: + all_dump_files = sorted( + glob.glob(DUMP_FILES), key=sortkey) + for fname in all_dump_files: + with open(fname) as fobj: + sql = fobj.read().replace('concurrently', '') + cur.execute(sql) + cnx.commit() + + def db_reset(self): + with psycopg2.connect( + dbname=self.db_name, + user=self.user, + host=self.host, + port=self.port, + ) as cnx: + with cnx.cursor() as cur: + cur.execute( + "SELECT table_name FROM information_schema.tables " + "WHERE table_schema = %s", ('public',)) + tables = set(table for (table,) in cur.fetchall()) + for table in tables: + cur.execute('truncate table %s cascade' % table) + + cur.execute( + "SELECT sequence_name FROM information_schema.sequences " + "WHERE sequence_schema = %s", ('public',)) + seqs = set(seq for (seq,) in cur.fetchall()) + for seq in seqs: + cur.execute('ALTER SEQUENCE %s RESTART;' % seq) + cnx.commit() + + def init(self): + with self.cursor() as cur: + cur.execute( + "SELECT COUNT(1) FROM pg_database WHERE datname=%s;", + (self.db_name,)) + db_exists = cur.fetchone()[0] == 1 + if db_exists: + cur.execute( + 'UPDATE pg_database SET datallowconn=true ' + 'WHERE datname = %s;', + (self.db_name,)) + + if db_exists: + self.db_reset() + else: + with self.cursor() as cur: + cur.execute('CREATE DATABASE "{}";'.format(self.db_name)) + self.db_setup() + + def drop(self): + pid_column = 'pid' + with self.cursor() as cur: + cur.execute( + 'UPDATE pg_database SET datallowconn=false ' + 'WHERE datname = %s;', (self.db_name,)) + cur.execute( + 'SELECT pg_terminate_backend(pg_stat_activity.{})' + 'FROM pg_stat_activity ' + 'WHERE pg_stat_activity.datname = %s;'.format(pid_column), + (self.db_name,)) diff --git a/swh/storage/tests/data/storage.yml b/swh/storage/tests/data/storage.yml new file mode 100644 index 00000000..97f9f67b --- /dev/null +++ b/swh/storage/tests/data/storage.yml @@ -0,0 +1,13 @@ +storage: + cls: local + args: + db: dbname=%s + + objstorage: + cls: pathslicing + args: + root: TMPDIR + slicing: "0:1/1:5" + + journal_writer: + cls: inmemory diff --git a/swh/storage/tests/storage_data.py b/swh/storage/tests/storage_data.py new file mode 100644 index 00000000..56836330 --- /dev/null +++ b/swh/storage/tests/storage_data.py @@ -0,0 +1,532 @@ +# Copyright (C) 2015-2019 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import datetime +from swh.model.hashutil import hash_to_bytes +from swh.model import from_disk + + +class StorageData: + def __getattr__(self, key): + v = globals()[key] + if hasattr(v, 'copy'): + return v.copy() + return v + + +data = StorageData() + + +cont = { + 'data': b'42\n', + 'length': 3, + 'sha1': hash_to_bytes( + '34973274ccef6ab4dfaaf86599792fa9c3fe4689'), + 'sha1_git': hash_to_bytes( + 'd81cc0710eb6cf9efd5b920a8453e1e07157b6cd'), + 'sha256': hash_to_bytes( + '673650f936cb3b0a2f93ce09d81be10748b1b203c19e8176b4eefc1964a0cf3a'), + 'blake2s256': hash_to_bytes( + 'd5fe1939576527e42cfd76a9455a2432fe7f56669564577dd93c4280e76d661d'), + 'status': 'visible', +} + +cont2 = { + 'data': b'4242\n', + 'length': 5, + 'sha1': hash_to_bytes( + '61c2b3a30496d329e21af70dd2d7e097046d07b7'), + 'sha1_git': hash_to_bytes( + '36fade77193cb6d2bd826161a0979d64c28ab4fa'), + 'sha256': hash_to_bytes( + '859f0b154fdb2d630f45e1ecae4a862915435e663248bb8461d914696fc047cd'), + 'blake2s256': hash_to_bytes( + '849c20fad132b7c2d62c15de310adfe87be94a379941bed295e8141c6219810d'), + 'status': 'visible', +} + +cont3 = { + 'data': b'424242\n', + 'length': 7, + 'sha1': hash_to_bytes( + '3e21cc4942a4234c9e5edd8a9cacd1670fe59f13'), + 'sha1_git': hash_to_bytes( + 'c932c7649c6dfa4b82327d121215116909eb3bea'), + 'sha256': hash_to_bytes( + '92fb72daf8c6818288a35137b72155f507e5de8d892712ab96277aaed8cf8a36'), + 'blake2s256': hash_to_bytes( + '76d0346f44e5a27f6bafdd9c2befd304aff83780f93121d801ab6a1d4769db11'), + 'status': 'visible', +} + +contents = (cont, cont2, cont3) + + +missing_cont = { + 'data': b'missing\n', + 'length': 8, + 'sha1': hash_to_bytes( + 'f9c24e2abb82063a3ba2c44efd2d3c797f28ac90'), + 'sha1_git': hash_to_bytes( + '33e45d56f88993aae6a0198013efa80716fd8919'), + 'sha256': hash_to_bytes( + '6bbd052ab054ef222c1c87be60cd191addedd24cc882d1f5f7f7be61dc61bb3a'), + 'blake2s256': hash_to_bytes( + '306856b8fd879edb7b6f1aeaaf8db9bbecc993cd7f776c333ac3a782fa5c6eba'), + 'status': 'absent', +} + +skipped_cont = { + 'length': 1024 * 1024 * 200, + 'sha1_git': hash_to_bytes( + '33e45d56f88993aae6a0198013efa80716fd8920'), + 'sha1': hash_to_bytes( + '43e45d56f88993aae6a0198013efa80716fd8920'), + 'sha256': hash_to_bytes( + '7bbd052ab054ef222c1c87be60cd191addedd24cc882d1f5f7f7be61dc61bb3a'), + 'blake2s256': hash_to_bytes( + 'ade18b1adecb33f891ca36664da676e12c772cc193778aac9a137b8dc5834b9b'), + 'reason': 'Content too long', + 'status': 'absent', + 'origin': 'file:///dev/zero', +} + +skipped_cont2 = { + 'length': 1024 * 1024 * 300, + 'sha1_git': hash_to_bytes( + '44e45d56f88993aae6a0198013efa80716fd8921'), + 'sha1': hash_to_bytes( + '54e45d56f88993aae6a0198013efa80716fd8920'), + 'sha256': hash_to_bytes( + '8cbd052ab054ef222c1c87be60cd191addedd24cc882d1f5f7f7be61dc61bb3a'), + 'blake2s256': hash_to_bytes( + '9ce18b1adecb33f891ca36664da676e12c772cc193778aac9a137b8dc5834b9b'), + 'reason': 'Content too long', + 'status': 'absent', +} + +dir = { + 'id': hash_to_bytes( + '340133423253310030f531e632a733ff37c3a930'), + 'entries': [ + { + 'name': b'foo', + 'type': 'file', + 'target': hash_to_bytes( # cont + 'd81cc0710eb6cf9efd5b920a8453e1e07157b6cd'), + 'perms': from_disk.DentryPerms.content, + }, + { + 'name': b'bar\xc3', + 'type': 'dir', + 'target': b'12345678901234567890', + 'perms': from_disk.DentryPerms.directory, + }, + ], +} + +dir2 = { + 'id': hash_to_bytes( + '340133423253310030f531e632a733ff37c3a935'), + 'entries': [ + { + 'name': b'oof', + 'type': 'file', + 'target': hash_to_bytes( # cont2 + '36fade77193cb6d2bd826161a0979d64c28ab4fa'), + 'perms': from_disk.DentryPerms.content, + } + ], +} + +dir3 = { + 'id': hash_to_bytes('33e45d56f88993aae6a0198013efa80716fd8921'), + 'entries': [ + { + 'name': b'foo', + 'type': 'file', + 'target': hash_to_bytes( # cont + 'd81cc0710eb6cf9efd5b920a8453e1e07157b6cd'), + 'perms': from_disk.DentryPerms.content, + }, + { + 'name': b'subdir', + 'type': 'dir', + 'target': hash_to_bytes( # dir + '340133423253310030f531e632a733ff37c3a930'), + 'perms': from_disk.DentryPerms.directory, + }, + { + 'name': b'hello', + 'type': 'file', + 'target': b'12345678901234567890', + 'perms': from_disk.DentryPerms.content, + }, + ], +} + +dir4 = { + 'id': hash_to_bytes('33e45d56f88993aae6a0198013efa80716fd8922'), + 'entries': [ + { + 'name': b'subdir1', + 'type': 'dir', + 'target': hash_to_bytes( + '33e45d56f88993aae6a0198013efa80716fd8921'), # dir3 + 'perms': from_disk.DentryPerms.directory, + }, + ] +} + +dierctories = (dir, dir2, dir3, dir4) + + +minus_offset = datetime.timezone(datetime.timedelta(minutes=-120)) +plus_offset = datetime.timezone(datetime.timedelta(minutes=120)) + +revision = { + 'id': b'56789012345678901234', + 'message': b'hello', + 'author': { + 'name': b'Nicolas Dandrimont', + 'email': b'nicolas@example.com', + 'fullname': b'Nicolas Dandrimont ', + }, + 'date': { + 'timestamp': 1234567890, + 'offset': 120, + 'negative_utc': None, + }, + 'committer': { + 'name': b'St\xc3fano Zacchiroli', + 'email': b'stefano@example.com', + 'fullname': b'St\xc3fano Zacchiroli ' + }, + 'committer_date': { + 'timestamp': 1123456789, + 'offset': 0, + 'negative_utc': True, + }, + 'parents': [b'01234567890123456789', b'23434512345123456789'], + 'type': 'git', + 'directory': hash_to_bytes( # dir + '340133423253310030f531e632a733ff37c3a930'), + 'metadata': { + 'checksums': { + 'sha1': 'tarball-sha1', + 'sha256': 'tarball-sha256', + }, + 'signed-off-by': 'some-dude', + 'extra_headers': [ + ['gpgsig', b'test123'], + ['mergetags', [b'foo\\bar', b'\x22\xaf\x89\x80\x01\x00']], + ], + }, + 'synthetic': True +} + +revision2 = { + 'id': b'87659012345678904321', + 'message': b'hello again', + 'author': { + 'name': b'Roberto Dicosmo', + 'email': b'roberto@example.com', + 'fullname': b'Roberto Dicosmo ', + }, + 'date': { + 'timestamp': { + 'seconds': 1234567843, + 'microseconds': 220000, + }, + 'offset': -720, + 'negative_utc': None, + }, + 'committer': { + 'name': b'tony', + 'email': b'ar@dumont.fr', + 'fullname': b'tony ', + }, + 'committer_date': { + 'timestamp': 1123456789, + 'offset': 0, + 'negative_utc': False, + }, + 'parents': [b'01234567890123456789'], + 'type': 'git', + 'directory': hash_to_bytes( # dir2 + '340133423253310030f531e632a733ff37c3a935'), + 'metadata': None, + 'synthetic': False +} + +revision3 = { + 'id': hash_to_bytes('7026b7c1a2af56521e951c01ed20f255fa054238'), + 'message': b'a simple revision with no parents this time', + 'author': { + 'name': b'Roberto Dicosmo', + 'email': b'roberto@example.com', + 'fullname': b'Roberto Dicosmo ', + }, + 'date': { + 'timestamp': { + 'seconds': 1234567843, + 'microseconds': 220000, + }, + 'offset': -720, + 'negative_utc': None, + }, + 'committer': { + 'name': b'tony', + 'email': b'ar@dumont.fr', + 'fullname': b'tony ', + }, + 'committer_date': { + 'timestamp': 1127351742, + 'offset': 0, + 'negative_utc': False, + }, + 'parents': [], + 'type': 'git', + 'directory': hash_to_bytes( # dir2 + '340133423253310030f531e632a733ff37c3a935'), + 'metadata': None, + 'synthetic': True +} + +revision4 = { + 'id': hash_to_bytes('368a48fe15b7db2383775f97c6b247011b3f14f4'), + 'message': b'parent of self.revision2', + 'author': { + 'name': b'me', + 'email': b'me@soft.heri', + 'fullname': b'me ', + }, + 'date': { + 'timestamp': { + 'seconds': 1244567843, + 'microseconds': 220000, + }, + 'offset': -720, + 'negative_utc': None, + }, + 'committer': { + 'name': b'committer-dude', + 'email': b'committer@dude.com', + 'fullname': b'committer-dude ', + }, + 'committer_date': { + 'timestamp': { + 'seconds': 1244567843, + 'microseconds': 220000, + }, + 'offset': -720, + 'negative_utc': None, + }, + 'parents': [hash_to_bytes( # revision3 + '7026b7c1a2af56521e951c01ed20f255fa054238')], + 'type': 'git', + 'directory': hash_to_bytes( # dir + '340133423253310030f531e632a733ff37c3a930'), + 'metadata': None, + 'synthetic': False +} + +revisions = (revision, revision2, revision3, revision4) + + +origin = { + 'url': 'file:///dev/null', + 'type': 'git', +} + +origin2 = { + 'url': 'file:///dev/zero', + 'type': 'hg', +} + +origins = (origin, origin2) + + +provider = { + 'name': 'hal', + 'type': 'deposit-client', + 'url': 'http:///hal/inria', + 'metadata': { + 'location': 'France' + } +} + +metadata_tool = { + 'name': 'swh-deposit', + 'version': '0.0.1', + 'configuration': { + 'sword_version': '2' + } +} + +date_visit1 = datetime.datetime(2015, 1, 1, 23, 0, 0, + tzinfo=datetime.timezone.utc) + +date_visit2 = datetime.datetime(2017, 1, 1, 23, 0, 0, + tzinfo=datetime.timezone.utc) + +date_visit3 = datetime.datetime(2018, 1, 1, 23, 0, 0, + tzinfo=datetime.timezone.utc) + +release = { + 'id': b'87659012345678901234', + 'name': b'v0.0.1', + 'author': { + 'name': b'olasd', + 'email': b'nic@olasd.fr', + 'fullname': b'olasd ', + }, + 'date': { + 'timestamp': 1234567890, + 'offset': 42, + 'negative_utc': None, + }, + 'target': b'43210987654321098765', + 'target_type': 'revision', + 'message': b'synthetic release', + 'synthetic': True, +} + +release2 = { + 'id': b'56789012348765901234', + 'name': b'v0.0.2', + 'author': { + 'name': b'tony', + 'email': b'ar@dumont.fr', + 'fullname': b'tony ', + }, + 'date': { + 'timestamp': 1634366813, + 'offset': -120, + 'negative_utc': None, + }, + 'target': b'432109\xa9765432\xc309\x00765', + 'target_type': 'revision', + 'message': b'v0.0.2\nMisc performance improvements + bug fixes', + 'synthetic': False +} + +release3 = { + 'id': b'87659012345678904321', + 'name': b'v0.0.2', + 'author': { + 'name': b'tony', + 'email': b'tony@ardumont.fr', + 'fullname': b'tony ', + }, + 'date': { + 'timestamp': 1634336813, + 'offset': 0, + 'negative_utc': False, + }, + 'target': b'87659012345678904321', # revision2 + 'target_type': 'revision', + 'message': b'yet another synthetic release', + 'synthetic': True, +} + +releases = (release, release2, release3) + + +fetch_history_date = datetime.datetime( + 2015, 1, 2, 21, 0, 0, + tzinfo=datetime.timezone.utc) + +fetch_history_end = datetime.datetime( + 2015, 1, 2, 23, 0, 0, + tzinfo=datetime.timezone.utc) + +fetch_history_data = { + 'status': True, + 'result': {'foo': 'bar'}, + 'stdout': 'blabla', + 'stderr': 'blablabla', +} + +snapshot = { + 'id': hash_to_bytes('2498dbf535f882bc7f9a18fb16c9ad27fda7bab7'), + 'branches': { + b'master': { + 'target': b'56789012345678901234', # revision + 'target_type': 'revision', + }, + }, +} + +empty_snapshot = { + 'id': hash_to_bytes('1a8893e6a86f444e8be8e7bda6cb34fb1735a00e'), + 'branches': {}, +} + +complete_snapshot = { + 'id': hash_to_bytes('6e65b86363953b780d92b0a928f3e8fcdd10db36'), + 'branches': { + b'directory': { + 'target': hash_to_bytes( + '1bd0e65f7d2ff14ae994de17a1e7fe65111dcad8'), + 'target_type': 'directory', + }, + b'directory2': { + 'target': hash_to_bytes( + '1bd0e65f7d2ff14ae994de17a1e7fe65111dcad8'), + 'target_type': 'directory', + }, + b'content': { + 'target': hash_to_bytes( + 'fe95a46679d128ff167b7c55df5d02356c5a1ae1'), + 'target_type': 'content', + }, + b'alias': { + 'target': b'revision', + 'target_type': 'alias', + }, + b'revision': { + 'target': hash_to_bytes( + 'aafb16d69fd30ff58afdd69036a26047f3aebdc6'), + 'target_type': 'revision', + }, + b'release': { + 'target': hash_to_bytes( + '7045404f3d1c54e6473c71bbb716529fbad4be24'), + 'target_type': 'release', + }, + b'snapshot': { + 'target': hash_to_bytes( + '1a8893e6a86f444e8be8e7bda6cb34fb1735a00e'), + 'target_type': 'snapshot', + }, + b'dangling': None, + } +} + +origin_metadata = { + 'origin': origin, + 'discovery_date': datetime.datetime(2015, 1, 1, 23, 0, 0, + tzinfo=datetime.timezone.utc), + 'provider': provider, + 'tool': 'swh-deposit', + 'metadata': { + 'name': 'test_origin_metadata', + 'version': '0.0.1' + } + } +origin_metadata2 = { + 'origin': origin, + 'discovery_date': datetime.datetime(2017, 1, 1, 23, 0, 0, + tzinfo=datetime.timezone.utc), + 'provider': provider, + 'tool': 'swh-deposit', + 'metadata': { + 'name': 'test_origin_metadata', + 'version': '0.0.1' + } + } + +fetch_history_duration = (fetch_history_end - fetch_history_date) diff --git a/swh/storage/tests/test_api_client.py b/swh/storage/tests/test_api_client.py index cc029076..ed38b9e2 100644 --- a/swh/storage/tests/test_api_client.py +++ b/swh/storage/tests/test_api_client.py @@ -1,152 +1,159 @@ # Copyright (C) 2015-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from contextlib import contextmanager import shutil import tempfile import unittest import pytest from swh.core.api.tests.server_testing import ServerTestFixture from swh.journal.writer import get_journal_writer from swh.storage.api.client import RemoteStorage import swh.storage.api.server as server from swh.storage.api.server import app from swh.storage.in_memory import Storage as InMemoryStorage import swh.storage.storage from swh.storage.db import Db -from swh.storage.tests.test_storage import \ - CommonTestStorage, CommonPropTestStorage, StorageTestDbFixture +from swh.storage.tests.test_storage import ( + TestStorage as _TestStorage, + TestStorageCommonProp as _TestStorageCommonProp, + StorageTestDbFixture) +@pytest.mark.xfail class RemotePgStorageFixture(StorageTestDbFixture, ServerTestFixture, unittest.TestCase): def setUp(self): journal_writer = get_journal_writer(cls='memory') def mock_get_journal_writer(cls, args=None): assert cls == 'memory' return journal_writer self.journal_writer = journal_writer server.storage = None self.get_journal_writer = get_journal_writer swh.storage.storage.get_journal_writer = mock_get_journal_writer # ServerTestFixture needs to have self.objroot for # setUp() method, but this field is defined in # AbstractTestStorage's setUp() # To avoid confusion, override the self.objroot to a # one chosen in this class. self.storage_base = tempfile.mkdtemp() self.objroot = self.storage_base self.config = { 'storage': { 'cls': 'local', 'args': { 'db': 'dbname=%s' % self.TEST_DB_NAME, 'objstorage': { 'cls': 'pathslicing', 'args': { 'root': self.storage_base, 'slicing': '0:2', }, }, 'journal_writer': { 'cls': 'memory', } } } } self.app = app super().setUp() self.storage = RemoteStorage(self.url()) def tearDown(self): super().tearDown() shutil.rmtree(self.storage_base) swh.storage.storage.get_journal_writer = self.get_journal_writer def reset_storage(self): excluded = {'dbversion', 'tool'} self.reset_db_tables(self.TEST_DB_NAME, excluded=excluded) self.journal_writer.objects[:] = [] @contextmanager def get_db(self): yield Db(self.conn) +@pytest.mark.xfail class RemoteMemStorageFixture(ServerTestFixture, unittest.TestCase): def setUp(self): self.config = { 'storage': { 'cls': 'memory', 'args': { 'journal_writer': { 'cls': 'memory', } } } } self.__storage = InMemoryStorage( journal_writer={'cls': 'memory'}) self._get_storage_patcher = unittest.mock.patch( 'swh.storage.api.server.get_storage', return_value=self.__storage) self._get_storage_patcher.start() self.app = app super().setUp() self.storage = RemoteStorage(self.url()) self.journal_writer = self.__storage.journal_writer def tearDown(self): super().tearDown() self._get_storage_patcher.stop() def reset_storage(self): self.storage.reset() self.journal_writer.objects[:] = [] +@pytest.mark.xfail @pytest.mark.network -class TestRemoteMemStorage(CommonTestStorage, RemoteMemStorageFixture): +class TestRemoteMemStorage(_TestStorage, RemoteMemStorageFixture): @pytest.mark.skip('refresh_stat_counters not available in the remote api.') def test_stat_counters(self): pass @pytest.mark.skip('postgresql-specific test') def test_content_add_db(self): pass @pytest.mark.skip('postgresql-specific test') def test_skipped_content_add_db(self): pass @pytest.mark.skip('postgresql-specific test') def test_content_add_metadata_db(self): pass @pytest.mark.skip( 'not implemented, see https://forge.softwareheritage.org/T1633') def test_skipped_content_add(self): pass +@pytest.mark.xfail @pytest.mark.db @pytest.mark.network -class TestRemotePgStorage(CommonTestStorage, RemotePgStorageFixture): +class TestRemotePgStorage(_TestStorage, RemotePgStorageFixture): @pytest.mark.skip('refresh_stat_counters not available in the remote api.') def test_stat_counters(self): pass +@pytest.mark.xfail @pytest.mark.db @pytest.mark.property_based -class PropTestRemotePgStorage(CommonPropTestStorage, RemotePgStorageFixture): +class PropTestRemotePgStorage(_TestStorageCommonProp, RemotePgStorageFixture): @pytest.mark.skip('too slow') def test_add_arbitrary(self): pass diff --git a/swh/storage/tests/test_db.py b/swh/storage/tests/test_db.py index cb0de06f..5b6205a7 100644 --- a/swh/storage/tests/test_db.py +++ b/swh/storage/tests/test_db.py @@ -1,50 +1,31 @@ # Copyright (C) 2015-2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -import os -import unittest -import pytest - -from swh.core.db.tests.db_testing import SingleDbTestFixture from swh.model.hashutil import hash_to_bytes -from swh.storage.db import Db -from . import SQL_DIR - - -@pytest.mark.db -class TestDb(SingleDbTestFixture, unittest.TestCase): - TEST_DB_NAME = 'softwareheritage-test-storage' - TEST_DB_DUMP = os.path.join(SQL_DIR, '*.sql') - - def setUp(self): - super().setUp() - self.db = Db(self.conn) - def tearDown(self): - self.db.conn.close() - super().tearDown() - def test_add_content(self): - cur = self.cursor +def test_add_content(swh_storage): + with swh_storage.db() as db: + cur = db.cursor() sha1 = hash_to_bytes('34973274ccef6ab4dfaaf86599792fa9c3fe4689') - self.db.mktemp('content', cur) - self.db.copy_to([{ + db.mktemp('content', cur) + db.copy_to([{ 'sha1': sha1, 'sha1_git': hash_to_bytes( 'd81cc0710eb6cf9efd5b920a8453e1e07157b6cd'), 'sha256': hash_to_bytes( '673650f936cb3b0a2f93ce09d81be107' '48b1b203c19e8176b4eefc1964a0cf3a'), 'blake2s256': hash_to_bytes('69217a3079908094e11121d042354a7c' '1f55b6482ca1a51e1b250dfd1ed0eef9'), 'length': 3}], 'tmp_content', ['sha1', 'sha1_git', 'sha256', 'blake2s256', 'length'], cur) - self.db.content_add_from_temp(cur) - self.cursor.execute('SELECT sha1 FROM content WHERE sha1 = %s', - (sha1,)) - self.assertEqual(self.cursor.fetchone()[0], sha1) + db.content_add_from_temp(cur) + cur.execute('SELECT sha1 FROM content WHERE sha1 = %s', + (sha1,)) + assert cur.fetchone()[0] == sha1 diff --git a/swh/storage/tests/test_in_memory.py b/swh/storage/tests/test_in_memory.py index 67cdb845..e1e6413f 100644 --- a/swh/storage/tests/test_in_memory.py +++ b/swh/storage/tests/test_in_memory.py @@ -1,78 +1,81 @@ # Copyright (C) 2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest import pytest from swh.storage.in_memory import Storage, ENABLE_ORIGIN_IDS -from swh.storage.tests.test_storage import \ - CommonTestStorage, CommonPropTestStorage +from swh.storage.tests.test_storage import ( + TestStorage as _TestStorage, + TestStorageCommonProp as _TestStorageCommonProp) -class TestInMemoryStorage(CommonTestStorage, unittest.TestCase): +@pytest.mark.xfail +class TestInMemoryStorage(_TestStorage, unittest.TestCase): """Test the in-memory storage API This class doesn't define any tests as we want identical functionality between local and remote storage. All the tests are therefore defined in CommonTestStorage. """ _test_origin_ids = ENABLE_ORIGIN_IDS def setUp(self): super().setUp() self.reset_storage() @pytest.mark.skip('postgresql-specific test') def test_content_add_db(self): pass @pytest.mark.skip('postgresql-specific test') def test_skipped_content_add_db(self): pass @pytest.mark.skip('postgresql-specific test') def test_content_add_metadata_db(self): pass if not _test_origin_ids: @pytest.mark.skip('requires origin ids') def test_origin_metadata_add(self): pass @pytest.mark.skip('requires origin ids') def test_origin_metadata_get(self): pass @pytest.mark.skip('requires origin ids') def test_origin_metadata_get_by_provider_type(self): pass def reset_storage(self): self.storage = Storage(journal_writer={'cls': 'memory'}) self.journal_writer = self.storage.journal_writer +@pytest.mark.xfail @pytest.mark.property_based -class PropTestInMemoryStorage(CommonPropTestStorage, unittest.TestCase): +class PropTestInMemoryStorage(_TestStorageCommonProp, unittest.TestCase): """Test the in-memory storage API This class doesn't define any tests as we want identical functionality between local and remote storage. All the tests are therefore defined in CommonPropTestStorage. """ _test_origin_ids = ENABLE_ORIGIN_IDS def setUp(self): super().setUp() self.storage = Storage() def reset_storage(self): self.storage = Storage() if not _test_origin_ids: @pytest.mark.skip('requires origin ids') def test_origin_get_range(self, new_origins): pass diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py index c1243d8d..e182d390 100644 --- a/swh/storage/tests/test_storage.py +++ b/swh/storage/tests/test_storage.py @@ -1,4153 +1,3522 @@ # Copyright (C) 2015-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import copy from contextlib import contextmanager import datetime import itertools import queue -import random import threading -import unittest from collections import defaultdict from unittest.mock import Mock, patch import psycopg2 import pytest from hypothesis import given, strategies, settings, HealthCheck from typing import ClassVar, Optional from swh.model import from_disk, identifiers from swh.model.hashutil import hash_to_bytes -from swh.model.hypothesis_strategies import origins, objects +from swh.model.hypothesis_strategies import objects from swh.storage.tests.storage_testing import StorageTestFixture from swh.storage import HashCollision -from .generate_data_test import gen_contents +from .storage_data import data @pytest.mark.db class StorageTestDbFixture(StorageTestFixture): def setUp(self): super().setUp() self.maxDiff = None def tearDown(self): self.reset_storage() if hasattr(self.storage, '_pool') and self.storage._pool: self.storage._pool.closeall() super().tearDown() def get_db(self): return self.storage.db() @contextmanager def db_transaction(self): with self.get_db() as db: with db.transaction() as cur: yield db, cur -class TestStorageData: - def setUp(self, *args, **kwargs): - super().setUp(*args, **kwargs) - - self.cont = { - 'data': b'42\n', - 'length': 3, - 'sha1': hash_to_bytes( - '34973274ccef6ab4dfaaf86599792fa9c3fe4689'), - 'sha1_git': hash_to_bytes( - 'd81cc0710eb6cf9efd5b920a8453e1e07157b6cd'), - 'sha256': hash_to_bytes( - '673650f936cb3b0a2f93ce09d81be107' - '48b1b203c19e8176b4eefc1964a0cf3a'), - 'blake2s256': hash_to_bytes('d5fe1939576527e42cfd76a9455a2' - '432fe7f56669564577dd93c4280e76d661d'), - 'status': 'visible', - } - - self.cont2 = { - 'data': b'4242\n', - 'length': 5, - 'sha1': hash_to_bytes( - '61c2b3a30496d329e21af70dd2d7e097046d07b7'), - 'sha1_git': hash_to_bytes( - '36fade77193cb6d2bd826161a0979d64c28ab4fa'), - 'sha256': hash_to_bytes( - '859f0b154fdb2d630f45e1ecae4a8629' - '15435e663248bb8461d914696fc047cd'), - 'blake2s256': hash_to_bytes('849c20fad132b7c2d62c15de310adfe87be' - '94a379941bed295e8141c6219810d'), - 'status': 'visible', - } - - self.cont3 = { - 'data': b'424242\n', - 'length': 7, - 'sha1': hash_to_bytes( - '3e21cc4942a4234c9e5edd8a9cacd1670fe59f13'), - 'sha1_git': hash_to_bytes( - 'c932c7649c6dfa4b82327d121215116909eb3bea'), - 'sha256': hash_to_bytes( - '92fb72daf8c6818288a35137b72155f5' - '07e5de8d892712ab96277aaed8cf8a36'), - 'blake2s256': hash_to_bytes('76d0346f44e5a27f6bafdd9c2befd304af' - 'f83780f93121d801ab6a1d4769db11'), - 'status': 'visible', - } - - self.missing_cont = { - 'data': b'missing\n', - 'length': 8, - 'sha1': hash_to_bytes( - 'f9c24e2abb82063a3ba2c44efd2d3c797f28ac90'), - 'sha1_git': hash_to_bytes( - '33e45d56f88993aae6a0198013efa80716fd8919'), - 'sha256': hash_to_bytes( - '6bbd052ab054ef222c1c87be60cd191a' - 'ddedd24cc882d1f5f7f7be61dc61bb3a'), - 'blake2s256': hash_to_bytes('306856b8fd879edb7b6f1aeaaf8db9bbecc9' - '93cd7f776c333ac3a782fa5c6eba'), - 'status': 'absent', - } - - self.skipped_cont = { - 'length': 1024 * 1024 * 200, - 'sha1_git': hash_to_bytes( - '33e45d56f88993aae6a0198013efa80716fd8920'), - 'sha1': hash_to_bytes( - '43e45d56f88993aae6a0198013efa80716fd8920'), - 'sha256': hash_to_bytes( - '7bbd052ab054ef222c1c87be60cd191a' - 'ddedd24cc882d1f5f7f7be61dc61bb3a'), - 'blake2s256': hash_to_bytes( - 'ade18b1adecb33f891ca36664da676e1' - '2c772cc193778aac9a137b8dc5834b9b'), - 'reason': 'Content too long', - 'status': 'absent', - 'origin': 'file:///dev/zero', - } - - self.skipped_cont2 = { - 'length': 1024 * 1024 * 300, - 'sha1_git': hash_to_bytes( - '44e45d56f88993aae6a0198013efa80716fd8921'), - 'sha1': hash_to_bytes( - '54e45d56f88993aae6a0198013efa80716fd8920'), - 'sha256': hash_to_bytes( - '8cbd052ab054ef222c1c87be60cd191a' - 'ddedd24cc882d1f5f7f7be61dc61bb3a'), - 'blake2s256': hash_to_bytes( - '9ce18b1adecb33f891ca36664da676e1' - '2c772cc193778aac9a137b8dc5834b9b'), - 'reason': 'Content too long', - 'status': 'absent', - } - - self.dir = { - 'id': b'4\x013\x422\x531\x000\xf51\xe62\xa73\xff7\xc3\xa90', - 'entries': [ - { - 'name': b'foo', - 'type': 'file', - 'target': self.cont['sha1_git'], - 'perms': from_disk.DentryPerms.content, - }, - { - 'name': b'bar\xc3', - 'type': 'dir', - 'target': b'12345678901234567890', - 'perms': from_disk.DentryPerms.directory, - }, - ], - } - - self.dir2 = { - 'id': b'4\x013\x422\x531\x000\xf51\xe62\xa73\xff7\xc3\xa95', - 'entries': [ - { - 'name': b'oof', - 'type': 'file', - 'target': self.cont2['sha1_git'], - 'perms': from_disk.DentryPerms.content, - } - ], - } - - self.dir3 = { - 'id': hash_to_bytes('33e45d56f88993aae6a0198013efa80716fd8921'), - 'entries': [ - { - 'name': b'foo', - 'type': 'file', - 'target': self.cont['sha1_git'], - 'perms': from_disk.DentryPerms.content, - }, - { - 'name': b'subdir', - 'type': 'dir', - 'target': self.dir['id'], - 'perms': from_disk.DentryPerms.directory, - }, - { - 'name': b'hello', - 'type': 'file', - 'target': b'12345678901234567890', - 'perms': from_disk.DentryPerms.content, - }, - - ], - } - - self.dir4 = { - 'id': hash_to_bytes('33e45d56f88993aae6a0198013efa80716fd8922'), - 'entries': [ - { - 'name': b'subdir1', - 'type': 'dir', - 'target': self.dir3['id'], - 'perms': from_disk.DentryPerms.directory, - }, - ] - } - - self.minus_offset = datetime.timezone(datetime.timedelta(minutes=-120)) - self.plus_offset = datetime.timezone(datetime.timedelta(minutes=120)) - - self.revision = { - 'id': b'56789012345678901234', - 'message': b'hello', - 'author': { - 'name': b'Nicolas Dandrimont', - 'email': b'nicolas@example.com', - 'fullname': b'Nicolas Dandrimont ', - }, - 'date': { - 'timestamp': 1234567890, - 'offset': 120, - 'negative_utc': None, - }, - 'committer': { - 'name': b'St\xc3fano Zacchiroli', - 'email': b'stefano@example.com', - 'fullname': b'St\xc3fano Zacchiroli ' - }, - 'committer_date': { - 'timestamp': 1123456789, - 'offset': 0, - 'negative_utc': True, - }, - 'parents': [b'01234567890123456789', b'23434512345123456789'], - 'type': 'git', - 'directory': self.dir['id'], - 'metadata': { - 'checksums': { - 'sha1': 'tarball-sha1', - 'sha256': 'tarball-sha256', - }, - 'signed-off-by': 'some-dude', - 'extra_headers': [ - ['gpgsig', b'test123'], - ['mergetags', [b'foo\\bar', b'\x22\xaf\x89\x80\x01\x00']], - ], - }, - 'synthetic': True - } - - self.revision2 = { - 'id': b'87659012345678904321', - 'message': b'hello again', - 'author': { - 'name': b'Roberto Dicosmo', - 'email': b'roberto@example.com', - 'fullname': b'Roberto Dicosmo ', - }, - 'date': { - 'timestamp': { - 'seconds': 1234567843, - 'microseconds': 220000, - }, - 'offset': -720, - 'negative_utc': None, - }, - 'committer': { - 'name': b'tony', - 'email': b'ar@dumont.fr', - 'fullname': b'tony ', - }, - 'committer_date': { - 'timestamp': 1123456789, - 'offset': 0, - 'negative_utc': False, - }, - 'parents': [b'01234567890123456789'], - 'type': 'git', - 'directory': self.dir2['id'], - 'metadata': None, - 'synthetic': False - } +@contextmanager +def db_transaction(storage): + with storage.db() as db: + with db.transaction() as cur: + yield db, cur - self.revision3 = { - 'id': hash_to_bytes('7026b7c1a2af56521e951c01ed20f255fa054238'), - 'message': b'a simple revision with no parents this time', - 'author': { - 'name': b'Roberto Dicosmo', - 'email': b'roberto@example.com', - 'fullname': b'Roberto Dicosmo ', - }, - 'date': { - 'timestamp': { - 'seconds': 1234567843, - 'microseconds': 220000, - }, - 'offset': -720, - 'negative_utc': None, - }, - 'committer': { - 'name': b'tony', - 'email': b'ar@dumont.fr', - 'fullname': b'tony ', - }, - 'committer_date': { - 'timestamp': 1127351742, - 'offset': 0, - 'negative_utc': False, - }, - 'parents': [], - 'type': 'git', - 'directory': self.dir2['id'], - 'metadata': None, - 'synthetic': True - } - self.revision4 = { - 'id': hash_to_bytes('368a48fe15b7db2383775f97c6b247011b3f14f4'), - 'message': b'parent of self.revision2', - 'author': { - 'name': b'me', - 'email': b'me@soft.heri', - 'fullname': b'me ', - }, - 'date': { - 'timestamp': { - 'seconds': 1244567843, - 'microseconds': 220000, - }, - 'offset': -720, - 'negative_utc': None, - }, - 'committer': { - 'name': b'committer-dude', - 'email': b'committer@dude.com', - 'fullname': b'committer-dude ', - }, - 'committer_date': { - 'timestamp': { - 'seconds': 1244567843, - 'microseconds': 220000, - }, - 'offset': -720, - 'negative_utc': None, - }, - 'parents': [self.revision3['id']], - 'type': 'git', - 'directory': self.dir['id'], - 'metadata': None, - 'synthetic': False - } +def normalize_entity(entity): + entity = copy.deepcopy(entity) + for key in ('date', 'committer_date'): + if key in entity: + entity[key] = identifiers.normalize_timestamp(entity[key]) + return entity - self.origin = { - 'url': 'file:///dev/null', - 'type': 'git', - } - self.origin2 = { - 'url': 'file:///dev/zero', - 'type': 'hg', +def transform_entries(dir_, *, prefix=b''): + for ent in dir_['entries']: + yield { + 'dir_id': dir_['id'], + 'type': ent['type'], + 'target': ent['target'], + 'name': prefix + ent['name'], + 'perms': ent['perms'], + 'status': None, + 'sha1': None, + 'sha1_git': None, + 'sha256': None, + 'length': None, } - self.provider = { - 'name': 'hal', - 'type': 'deposit-client', - 'url': 'http:///hal/inria', - 'metadata': { - 'location': 'France' - } - } - self.metadata_tool = { - 'name': 'swh-deposit', - 'version': '0.0.1', - 'configuration': { - 'sword_version': '2' - } - } +def cmpdir(directory): + return (directory['type'], directory['dir_id']) - self.origin_metadata = { - 'origin': self.origin, - 'discovery_date': datetime.datetime(2015, 1, 1, 23, 0, 0, - tzinfo=datetime.timezone.utc), - 'provider': self.provider, - 'tool': 'swh-deposit', - 'metadata': { - 'name': 'test_origin_metadata', - 'version': '0.0.1' - } - } - - self.origin_metadata2 = { - 'origin': self.origin, - 'discovery_date': datetime.datetime(2017, 1, 1, 23, 0, 0, - tzinfo=datetime.timezone.utc), - 'provider': self.provider, - 'tool': 'swh-deposit', - 'metadata': { - 'name': 'test_origin_metadata', - 'version': '0.0.1' - } - } - - self.date_visit1 = datetime.datetime(2015, 1, 1, 23, 0, 0, - tzinfo=datetime.timezone.utc) - - self.date_visit2 = datetime.datetime(2017, 1, 1, 23, 0, 0, - tzinfo=datetime.timezone.utc) - - self.date_visit3 = datetime.datetime(2018, 1, 1, 23, 0, 0, - tzinfo=datetime.timezone.utc) - - self.release = { - 'id': b'87659012345678901234', - 'name': b'v0.0.1', - 'author': { - 'name': b'olasd', - 'email': b'nic@olasd.fr', - 'fullname': b'olasd ', - }, - 'date': { - 'timestamp': 1234567890, - 'offset': 42, - 'negative_utc': None, - }, - 'target': b'43210987654321098765', - 'target_type': 'revision', - 'message': b'synthetic release', - 'synthetic': True, - } - self.release2 = { - 'id': b'56789012348765901234', - 'name': b'v0.0.2', - 'author': { - 'name': b'tony', - 'email': b'ar@dumont.fr', - 'fullname': b'tony ', - }, - 'date': { - 'timestamp': 1634366813, - 'offset': -120, - 'negative_utc': None, - }, - 'target': b'432109\xa9765432\xc309\x00765', - 'target_type': 'revision', - 'message': b'v0.0.2\nMisc performance improvements + bug fixes', - 'synthetic': False - } +def short_revision(revision): + return [revision['id'], revision['parents']] - self.release3 = { - 'id': b'87659012345678904321', - 'name': b'v0.0.2', - 'author': { - 'name': b'tony', - 'email': b'tony@ardumont.fr', - 'fullname': b'tony ', - }, - 'date': { - 'timestamp': 1634336813, - 'offset': 0, - 'negative_utc': False, - }, - 'target': self.revision2['id'], - 'target_type': 'revision', - 'message': b'yet another synthetic release', - 'synthetic': True, - } - - self.fetch_history_date = datetime.datetime( - 2015, 1, 2, 21, 0, 0, - tzinfo=datetime.timezone.utc) - self.fetch_history_end = datetime.datetime( - 2015, 1, 2, 23, 0, 0, - tzinfo=datetime.timezone.utc) - - self.fetch_history_duration = (self.fetch_history_end - - self.fetch_history_date) - - self.fetch_history_data = { - 'status': True, - 'result': {'foo': 'bar'}, - 'stdout': 'blabla', - 'stderr': 'blablabla', - } - self.snapshot = { - 'id': hash_to_bytes('2498dbf535f882bc7f9a18fb16c9ad27fda7bab7'), - 'branches': { - b'master': { - 'target': self.revision['id'], - 'target_type': 'revision', - }, - }, - } - - self.empty_snapshot = { - 'id': hash_to_bytes('1a8893e6a86f444e8be8e7bda6cb34fb1735a00e'), - 'branches': {}, - } - - self.complete_snapshot = { - 'id': hash_to_bytes('6e65b86363953b780d92b0a928f3e8fcdd10db36'), - 'branches': { - b'directory': { - 'target': hash_to_bytes( - '1bd0e65f7d2ff14ae994de17a1e7fe65111dcad8'), - 'target_type': 'directory', - }, - b'directory2': { - 'target': hash_to_bytes( - '1bd0e65f7d2ff14ae994de17a1e7fe65111dcad8'), - 'target_type': 'directory', - }, - b'content': { - 'target': hash_to_bytes( - 'fe95a46679d128ff167b7c55df5d02356c5a1ae1'), - 'target_type': 'content', - }, - b'alias': { - 'target': b'revision', - 'target_type': 'alias', - }, - b'revision': { - 'target': hash_to_bytes( - 'aafb16d69fd30ff58afdd69036a26047f3aebdc6'), - 'target_type': 'revision', - }, - b'release': { - 'target': hash_to_bytes( - '7045404f3d1c54e6473c71bbb716529fbad4be24'), - 'target_type': 'release', - }, - b'snapshot': { - 'target': hash_to_bytes( - '1a8893e6a86f444e8be8e7bda6cb34fb1735a00e'), - 'target_type': 'snapshot', - }, - b'dangling': None, - }, - } - - -class CommonTestStorage(TestStorageData): +class TestStorage: """Base class for Storage testing. This class is used as-is to test local storage (see TestLocalStorage below) and remote storage (see TestRemoteStorage in test_remote_storage.py. We need to have the two classes inherit from this base class separately to avoid nosetests running the tests from the base class twice. - """ maxDiff = None # type: ClassVar[Optional[int]] _test_origin_ids = True - @staticmethod - def normalize_entity(entity): - entity = copy.deepcopy(entity) - for key in ('date', 'committer_date'): - if key in entity: - entity[key] = identifiers.normalize_timestamp(entity[key]) - - return entity + def test_check_config(self, swh_storage): + assert swh_storage.check_config(check_write=True) + assert swh_storage.check_config(check_write=False) - def test_check_config(self): - self.assertTrue(self.storage.check_config(check_write=True)) - self.assertTrue(self.storage.check_config(check_write=False)) - - def test_content_add(self): - cont = self.cont + def test_content_add(self, swh_storage): + cont = data.cont insertion_start_time = datetime.datetime.now(tz=datetime.timezone.utc) - actual_result = self.storage.content_add([cont]) + actual_result = swh_storage.content_add([cont]) insertion_end_time = datetime.datetime.now(tz=datetime.timezone.utc) - self.assertEqual(actual_result, { + assert actual_result == { 'content:add': 1, 'content:add:bytes': cont['length'], 'skipped_content:add': 0 - }) + } - self.assertEqual(list(self.storage.content_get([cont['sha1']])), - [{'sha1': cont['sha1'], 'data': cont['data']}]) + assert list(swh_storage.content_get([cont['sha1']])) == \ + [{'sha1': cont['sha1'], 'data': cont['data']}] - expected_cont = cont.copy() + expected_cont = data.cont del expected_cont['data'] - journal_objects = list(self.journal_writer.objects) + journal_objects = list(swh_storage.journal_writer.objects) for (obj_type, obj) in journal_objects: - self.assertLessEqual(insertion_start_time, obj['ctime']) - self.assertLessEqual(obj['ctime'], insertion_end_time) + assert insertion_start_time <= obj['ctime'] + assert obj['ctime'] <= insertion_end_time del obj['ctime'] - self.assertEqual(journal_objects, - [('content', expected_cont)]) + assert journal_objects == [('content', expected_cont)] - def test_content_add_validation(self): - cont = self.cont + def test_content_add_validation(self, swh_storage): + cont = data.cont - with self.assertRaisesRegex(ValueError, 'status'): - self.storage.content_add([{**cont, 'status': 'foobar'}]) + with pytest.raises(ValueError, match='status'): + swh_storage.content_add([{**cont, 'status': 'foobar'}]) - with self.assertRaisesRegex(ValueError, "(?i)length"): - self.storage.content_add([{**cont, 'length': -2}]) + with pytest.raises(ValueError, match="(?i)length"): + swh_storage.content_add([{**cont, 'length': -2}]) - with self.assertRaisesRegex( - (ValueError, psycopg2.IntegrityError), 'reason') as cm: - self.storage.content_add([{**cont, 'status': 'absent'}]) + with pytest.raises((ValueError, psycopg2.IntegrityError), + match='reason') as cm: + swh_storage.content_add([{**cont, 'status': 'absent'}]) - if type(cm.exception) == psycopg2.IntegrityError: - self.assertEqual(cm.exception.pgcode, - psycopg2.errorcodes.NOT_NULL_VIOLATION) + if type(cm.value) == psycopg2.IntegrityError: + assert cm.exception.pgcode == \ + psycopg2.errorcodes.NOT_NULL_VIOLATION - with self.assertRaisesRegex( + with pytest.raises( ValueError, - "^Must not provide a reason if content is not absent.$"): - self.storage.content_add([{**cont, 'reason': 'foobar'}]) + match="^Must not provide a reason if content is not absent.$"): + swh_storage.content_add([{**cont, 'reason': 'foobar'}]) - def test_content_get_missing(self): - cont = self.cont + def test_content_get_missing(self, swh_storage): + cont = data.cont - self.storage.content_add([cont]) + swh_storage.content_add([cont]) # Query a single missing content - results = list(self.storage.content_get( - [self.cont2['sha1']])) - self.assertEqual(results, - [None]) + results = list(swh_storage.content_get( + [data.cont2['sha1']])) + assert results == [None] # Check content_get does not abort after finding a missing content - results = list(self.storage.content_get( - [self.cont['sha1'], self.cont2['sha1']])) - self.assertEqual(results, - [{'sha1': cont['sha1'], 'data': cont['data']}, None]) + results = list(swh_storage.content_get( + [data.cont['sha1'], data.cont2['sha1']])) + assert results == [{'sha1': cont['sha1'], 'data': cont['data']}, None] # Check content_get does not discard found countent when it finds # a missing content. - results = list(self.storage.content_get( - [self.cont2['sha1'], self.cont['sha1']])) - self.assertEqual(results, - [None, {'sha1': cont['sha1'], 'data': cont['data']}]) + results = list(swh_storage.content_get( + [data.cont2['sha1'], data.cont['sha1']])) + assert results == [None, {'sha1': cont['sha1'], 'data': cont['data']}] - def test_content_add_same_input(self): - cont = self.cont + def test_content_add_same_input(self, swh_storage): + cont = data.cont - actual_result = self.storage.content_add([cont, cont]) - self.assertEqual(actual_result, { + actual_result = swh_storage.content_add([cont, cont]) + assert actual_result == { 'content:add': 1, 'content:add:bytes': cont['length'], 'skipped_content:add': 0 - }) + } - def test_content_add_different_input(self): - cont = self.cont - cont2 = self.cont2 + def test_content_add_different_input(self, swh_storage): + cont = data.cont + cont2 = data.cont2 - actual_result = self.storage.content_add([cont, cont2]) - self.assertEqual(actual_result, { + actual_result = swh_storage.content_add([cont, cont2]) + assert actual_result == { 'content:add': 2, 'content:add:bytes': cont['length'] + cont2['length'], 'skipped_content:add': 0 - }) + } - def test_content_add_twice(self): - actual_result = self.storage.content_add([self.cont]) - self.assertEqual(actual_result, { + def test_content_add_twice(self, swh_storage): + actual_result = swh_storage.content_add([data.cont]) + assert actual_result == { 'content:add': 1, - 'content:add:bytes': self.cont['length'], + 'content:add:bytes': data.cont['length'], 'skipped_content:add': 0 - }) - self.assertEqual(len(self.journal_writer.objects), 1) + } + assert len(swh_storage.journal_writer.objects) == 1 - actual_result = self.storage.content_add([self.cont, self.cont2]) - self.assertEqual(actual_result, { + actual_result = swh_storage.content_add([data.cont, data.cont2]) + assert actual_result == { 'content:add': 1, - 'content:add:bytes': self.cont2['length'], + 'content:add:bytes': data.cont2['length'], 'skipped_content:add': 0 - }) - self.assertEqual(len(self.journal_writer.objects), 2) + } + assert len(swh_storage.journal_writer.objects) == 2 - self.assertEqual(len(self.storage.content_find(self.cont)), 1) - self.assertEqual(len(self.storage.content_find(self.cont2)), 1) + assert len(swh_storage.content_find(data.cont)) == 1 + assert len(swh_storage.content_find(data.cont2)) == 1 - def test_content_add_db(self): - cont = self.cont + def test_content_add_db(self, swh_storage): + cont = data.cont - actual_result = self.storage.content_add([cont]) + actual_result = swh_storage.content_add([cont]) - self.assertEqual(actual_result, { + assert actual_result == { 'content:add': 1, 'content:add:bytes': cont['length'], 'skipped_content:add': 0 - }) + } - if hasattr(self.storage, 'objstorage'): - self.assertIn(cont['sha1'], self.storage.objstorage) + if hasattr(swh_storage, 'objstorage'): + assert cont['sha1'] in swh_storage.objstorage - with self.db_transaction() as (_, cur): + with db_transaction(swh_storage) as (_, cur): cur.execute('SELECT sha1, sha1_git, sha256, length, status' ' FROM content WHERE sha1 = %s', (cont['sha1'],)) datum = cur.fetchone() - self.assertEqual( - datum, - (cont['sha1'], cont['sha1_git'], cont['sha256'], - cont['length'], 'visible')) + assert datum == (cont['sha1'], cont['sha1_git'], cont['sha256'], + cont['length'], 'visible') expected_cont = cont.copy() del expected_cont['data'] - journal_objects = list(self.journal_writer.objects) + journal_objects = list(swh_storage.journal_writer.objects) for (obj_type, obj) in journal_objects: del obj['ctime'] - self.assertEqual(journal_objects, - [('content', expected_cont)]) + assert journal_objects == [('content', expected_cont)] - def test_content_add_collision(self): - cont1 = self.cont + def test_content_add_collision(self, swh_storage): + cont1 = data.cont # create (corrupted) content with same sha1{,_git} but != sha256 cont1b = cont1.copy() sha256_array = bytearray(cont1b['sha256']) sha256_array[0] += 1 cont1b['sha256'] = bytes(sha256_array) - with self.assertRaises(HashCollision) as cm: - self.storage.content_add([cont1, cont1b]) + with pytest.raises(HashCollision) as cm: + swh_storage.content_add([cont1, cont1b]) - self.assertIn(cm.exception.args[0], ['sha1', 'sha1_git', 'blake2s256']) + assert cm.value.args[0] in ['sha1', 'sha1_git', 'blake2s256'] - def test_content_add_metadata(self): - cont = self.cont.copy() + def test_content_add_metadata(self, swh_storage): + cont = data.cont del cont['data'] cont['ctime'] = datetime.datetime.now() - actual_result = self.storage.content_add_metadata([cont]) - self.assertEqual(actual_result, { + actual_result = swh_storage.content_add_metadata([cont]) + assert actual_result == { 'content:add': 1, 'skipped_content:add': 0 - }) + } expected_cont = cont.copy() del expected_cont['ctime'] - self.assertEqual( - list(self.storage.content_get_metadata([cont['sha1']])), - [expected_cont]) + assert list(swh_storage.content_get_metadata([cont['sha1']])) == \ + [expected_cont] - self.assertEqual(list(self.journal_writer.objects), - [('content', cont)]) + assert list(swh_storage.journal_writer.objects) == [('content', cont)] - def test_content_add_metadata_same_input(self): - cont = self.cont.copy() + def test_content_add_metadata_same_input(self, swh_storage): + cont = data.cont del cont['data'] cont['ctime'] = datetime.datetime.now() - actual_result = self.storage.content_add_metadata([cont, cont]) - self.assertEqual(actual_result, { + actual_result = swh_storage.content_add_metadata([cont, cont]) + assert actual_result == { 'content:add': 1, 'skipped_content:add': 0 - }) + } - def test_content_add_metadata_different_input(self): - cont = self.cont.copy() + def test_content_add_metadata_different_input(self, swh_storage): + cont = data.cont del cont['data'] cont['ctime'] = datetime.datetime.now() - cont2 = self.cont2.copy() + cont2 = data.cont2 del cont2['data'] cont2['ctime'] = datetime.datetime.now() - actual_result = self.storage.content_add_metadata([cont, cont2]) - self.assertEqual(actual_result, { + actual_result = swh_storage.content_add_metadata([cont, cont2]) + assert actual_result == { 'content:add': 2, 'skipped_content:add': 0 - }) + } - def test_content_add_metadata_db(self): - cont = self.cont.copy() + def test_content_add_metadata_db(self, swh_storage): + cont = data.cont del cont['data'] cont['ctime'] = datetime.datetime.now() - actual_result = self.storage.content_add_metadata([cont]) + actual_result = swh_storage.content_add_metadata([cont]) - self.assertEqual(actual_result, { + assert actual_result == { 'content:add': 1, 'skipped_content:add': 0 - }) - - if hasattr(self.storage, 'objstorage'): - self.assertNotIn(cont['sha1'], self.storage.objstorage) + } - with self.db_transaction() as (_, cur): + if hasattr(swh_storage, 'objstorage'): + assert cont['sha1'] not in swh_storage.objstorage + with db_transaction(swh_storage) as (_, cur): cur.execute('SELECT sha1, sha1_git, sha256, length, status' ' FROM content WHERE sha1 = %s', (cont['sha1'],)) datum = cur.fetchone() + assert datum == (cont['sha1'], cont['sha1_git'], cont['sha256'], + cont['length'], 'visible') - self.assertEqual( - datum, - (cont['sha1'], cont['sha1_git'], cont['sha256'], - cont['length'], 'visible')) - - self.assertEqual(list(self.journal_writer.objects), - [('content', cont)]) + assert list(swh_storage.journal_writer.objects) == [('content', cont)] - def test_content_add_metadata_collision(self): - cont1 = self.cont.copy() + def test_content_add_metadata_collision(self, swh_storage): + cont1 = data.cont del cont1['data'] cont1['ctime'] = datetime.datetime.now() # create (corrupted) content with same sha1{,_git} but != sha256 cont1b = cont1.copy() sha256_array = bytearray(cont1b['sha256']) sha256_array[0] += 1 cont1b['sha256'] = bytes(sha256_array) - with self.assertRaises(HashCollision) as cm: - self.storage.content_add_metadata([cont1, cont1b]) + with pytest.raises(HashCollision) as cm: + swh_storage.content_add_metadata([cont1, cont1b]) - self.assertIn(cm.exception.args[0], ['sha1', 'sha1_git', 'blake2s256']) + assert cm.value.args[0] in ['sha1', 'sha1_git', 'blake2s256'] - def test_skipped_content_add_db(self): - cont = self.skipped_cont.copy() - cont2 = self.skipped_cont2.copy() + def test_skipped_content_add_db(self, swh_storage): + cont = data.skipped_cont + cont2 = data.skipped_cont2 cont2['blake2s256'] = None - actual_result = self.storage.content_add([cont, cont, cont2]) + actual_result = swh_storage.content_add([cont, cont, cont2]) - self.assertEqual(actual_result, { + assert actual_result == { 'content:add': 0, 'content:add:bytes': 0, 'skipped_content:add': 2, - }) + } - with self.db_transaction() as (_, cur): + with db_transaction(swh_storage) as (_, cur): cur.execute('SELECT sha1, sha1_git, sha256, blake2s256, ' 'length, status, reason ' 'FROM skipped_content ORDER BY sha1_git') - data = cur.fetchall() + datum = cur.fetchall() - self.assertEqual(2, len(data)) - self.assertEqual( - data[0], - (cont['sha1'], cont['sha1_git'], cont['sha256'], - cont['blake2s256'], cont['length'], 'absent', - 'Content too long') - ) + assert len(datum) == 2 + assert datum[0] == (cont['sha1'], cont['sha1_git'], cont['sha256'], + cont['blake2s256'], cont['length'], 'absent', + 'Content too long') - self.assertEqual( - data[1], - (cont2['sha1'], cont2['sha1_git'], cont2['sha256'], - cont2['blake2s256'], cont2['length'], 'absent', - 'Content too long') - ) + assert datum[1] == (cont2['sha1'], cont2['sha1_git'], cont2['sha256'], + cont2['blake2s256'], cont2['length'], 'absent', + 'Content too long') - def test_skipped_content_add(self): - cont = self.skipped_cont.copy() - cont2 = self.skipped_cont2.copy() + def test_skipped_content_add(self, swh_storage): + cont = data.skipped_cont + cont2 = data.skipped_cont2 cont2['blake2s256'] = None - missing = list(self.storage.skipped_content_missing([cont, cont2])) + missing = list(swh_storage.skipped_content_missing([cont, cont2])) - self.assertEqual(len(missing), 2, missing) + assert len(missing) == 2 - actual_result = self.storage.content_add([cont, cont, cont2]) + actual_result = swh_storage.content_add([cont, cont, cont2]) - self.assertEqual(actual_result, { + assert actual_result == { 'content:add': 0, 'content:add:bytes': 0, 'skipped_content:add': 2, - }) + } - missing = list(self.storage.skipped_content_missing([cont, cont2])) + missing = list(swh_storage.skipped_content_missing([cont, cont2])) - self.assertEqual(missing, []) + assert missing == [] @pytest.mark.property_based @settings(deadline=None) # this test is very slow @given(strategies.sets( elements=strategies.sampled_from( ['sha256', 'sha1_git', 'blake2s256']), min_size=0)) - def test_content_missing(self, algos): + def test_content_missing(self, swh_storage, algos): algos |= {'sha1'} - cont2 = self.cont2 - missing_cont = self.missing_cont - self.storage.content_add([cont2]) + cont2 = data.cont2 + missing_cont = data.missing_cont + swh_storage.content_add([cont2]) test_contents = [cont2] missing_per_hash = defaultdict(list) for i in range(256): test_content = missing_cont.copy() for hash in algos: test_content[hash] = bytes([i]) + test_content[hash][1:] missing_per_hash[hash].append(test_content[hash]) test_contents.append(test_content) - self.assertCountEqual( - self.storage.content_missing(test_contents), - missing_per_hash['sha1'] - ) + assert set(swh_storage.content_missing(test_contents)) == \ + set(missing_per_hash['sha1']) for hash in algos: - self.assertCountEqual( - self.storage.content_missing(test_contents, key_hash=hash), - missing_per_hash[hash] - ) + assert set(swh_storage.content_missing( + test_contents, key_hash=hash)) == set(missing_per_hash[hash]) @pytest.mark.property_based @given(strategies.sets( elements=strategies.sampled_from( ['sha256', 'sha1_git', 'blake2s256']), min_size=0)) - def test_content_missing_unknown_algo(self, algos): + def test_content_missing_unknown_algo(self, swh_storage, algos): algos |= {'sha1'} - cont2 = self.cont2 - missing_cont = self.missing_cont - self.storage.content_add([cont2]) + cont2 = data.cont2 + missing_cont = data.missing_cont + swh_storage.content_add([cont2]) test_contents = [cont2] missing_per_hash = defaultdict(list) for i in range(16): test_content = missing_cont.copy() for hash in algos: test_content[hash] = bytes([i]) + test_content[hash][1:] missing_per_hash[hash].append(test_content[hash]) test_content['nonexisting_algo'] = b'\x00' test_contents.append(test_content) - self.assertCountEqual( - self.storage.content_missing(test_contents), - missing_per_hash['sha1'] - ) + assert set( + swh_storage.content_missing(test_contents)) == set( + missing_per_hash['sha1']) for hash in algos: - self.assertCountEqual( - self.storage.content_missing(test_contents, key_hash=hash), - missing_per_hash[hash] - ) + assert set(swh_storage.content_missing( + test_contents, key_hash=hash)) == set( + missing_per_hash[hash]) - def test_content_missing_per_sha1(self): + def test_content_missing_per_sha1(self, swh_storage): # given - cont2 = self.cont2 - missing_cont = self.missing_cont - self.storage.content_add([cont2]) + cont2 = data.cont2 + missing_cont = data.missing_cont + swh_storage.content_add([cont2]) # when - gen = self.storage.content_missing_per_sha1([cont2['sha1'], - missing_cont['sha1']]) - + gen = swh_storage.content_missing_per_sha1([cont2['sha1'], + missing_cont['sha1']]) # then - self.assertEqual(list(gen), [missing_cont['sha1']]) + assert list(gen) == [missing_cont['sha1']] - def test_content_get_metadata(self): - cont1 = self.cont.copy() - cont2 = self.cont2.copy() + def test_content_get_metadata(self, swh_storage): + cont1 = data.cont + cont2 = data.cont2 - self.storage.content_add([cont1, cont2]) + swh_storage.content_add([cont1, cont2]) - gen = self.storage.content_get_metadata([cont1['sha1'], cont2['sha1']]) + actual_md = list(swh_storage.content_get_metadata( + [cont1['sha1'], cont2['sha1']])) # we only retrieve the metadata cont1.pop('data') cont2.pop('data') - self.assertCountEqual(list(gen), [cont1, cont2]) + assert actual_md in ([cont1, cont2], [cont2, cont1]) - def test_content_get_metadata_missing_sha1(self): - cont1 = self.cont.copy() - cont2 = self.cont2.copy() + def test_content_get_metadata_missing_sha1(self, swh_storage): + cont1 = data.cont + cont2 = data.cont2 + missing_cont = data.missing_cont - missing_cont = self.missing_cont.copy() + swh_storage.content_add([cont1, cont2]) - self.storage.content_add([cont1, cont2]) - - gen = self.storage.content_get_metadata([missing_cont['sha1']]) + gen = swh_storage.content_get_metadata([missing_cont['sha1']]) # All the metadata keys are None missing_cont.pop('data') - for key in list(missing_cont): + for key in missing_cont: if key != 'sha1': missing_cont[key] = None - self.assertEqual(list(gen), [missing_cont]) - - @staticmethod - def _transform_entries(dir_, *, prefix=b''): - for ent in dir_['entries']: - yield { - 'dir_id': dir_['id'], - 'type': ent['type'], - 'target': ent['target'], - 'name': prefix + ent['name'], - 'perms': ent['perms'], - 'status': None, - 'sha1': None, - 'sha1_git': None, - 'sha256': None, - 'length': None, - } + assert list(gen) == [missing_cont] - def test_directory_add(self): - init_missing = list(self.storage.directory_missing([self.dir['id']])) - self.assertEqual([self.dir['id']], init_missing) + def test_directory_add(self, swh_storage): + init_missing = list(swh_storage.directory_missing([data.dir['id']])) + assert [data.dir['id']] == init_missing - actual_result = self.storage.directory_add([self.dir]) - self.assertEqual(actual_result, {'directory:add': 1}) + actual_result = swh_storage.directory_add([data.dir]) + assert actual_result == {'directory:add': 1} - self.assertEqual(list(self.journal_writer.objects), - [('directory', self.dir)]) + assert list(swh_storage.journal_writer.objects) == \ + [('directory', data.dir)] - actual_data = list(self.storage.directory_ls(self.dir['id'])) - expected_data = list(self._transform_entries(self.dir)) - self.assertCountEqual(expected_data, actual_data) + actual_data = list(swh_storage.directory_ls(data.dir['id'])) + expected_data = list(transform_entries(data.dir)) - after_missing = list(self.storage.directory_missing([self.dir['id']])) - self.assertEqual([], after_missing) + assert sorted(expected_data, key=cmpdir) \ + == sorted(actual_data, key=cmpdir) - def test_directory_add_validation(self): - dir_ = copy.deepcopy(self.dir) + after_missing = list(swh_storage.directory_missing([data.dir['id']])) + assert after_missing == [] + + def test_directory_add_validation(self, swh_storage): + dir_ = copy.deepcopy(data.dir) dir_['entries'][0]['type'] = 'foobar' - with self.assertRaisesRegex(ValueError, 'type.*foobar'): - self.storage.directory_add([dir_]) + with pytest.raises(ValueError, match='type.*foobar'): + swh_storage.directory_add([dir_]) - dir_ = copy.deepcopy(self.dir) + dir_ = copy.deepcopy(data.dir) del dir_['entries'][0]['target'] - with self.assertRaisesRegex( - (TypeError, psycopg2.IntegrityError), 'target') as cm: - self.storage.directory_add([dir_]) + with pytest.raises((TypeError, psycopg2.IntegrityError), + match='target') as cm: + swh_storage.directory_add([dir_]) - if type(cm.exception) == psycopg2.IntegrityError: - self.assertEqual(cm.exception.pgcode, - psycopg2.errorcodes.NOT_NULL_VIOLATION) + if type(cm.value) == psycopg2.IntegrityError: + assert cm.value.pgcode == psycopg2.errorcodes.NOT_NULL_VIOLATION - def test_directory_add_twice(self): - actual_result = self.storage.directory_add([self.dir]) - self.assertEqual(actual_result, {'directory:add': 1}) + def test_directory_add_twice(self, swh_storage): + actual_result = swh_storage.directory_add([data.dir]) + assert actual_result == {'directory:add': 1} - self.assertEqual(list(self.journal_writer.objects), - [('directory', self.dir)]) + assert list(swh_storage.journal_writer.objects) \ + == [('directory', data.dir)] - actual_result = self.storage.directory_add([self.dir]) - self.assertEqual(actual_result, {'directory:add': 0}) + actual_result = swh_storage.directory_add([data.dir]) + assert actual_result == {'directory:add': 0} - self.assertEqual(list(self.journal_writer.objects), - [('directory', self.dir)]) + assert list(swh_storage.journal_writer.objects) \ + == [('directory', data.dir)] - def test_directory_get_recursive(self): - init_missing = list(self.storage.directory_missing([self.dir['id']])) - self.assertEqual([self.dir['id']], init_missing) + def test_directory_get_recursive(self, swh_storage): + init_missing = list(swh_storage.directory_missing([data.dir['id']])) + assert init_missing == [data.dir['id']] - actual_result = self.storage.directory_add( - [self.dir, self.dir2, self.dir3]) - self.assertEqual(actual_result, {'directory:add': 3}) + actual_result = swh_storage.directory_add( + [data.dir, data.dir2, data.dir3]) + assert actual_result == {'directory:add': 3} - self.assertEqual(list(self.journal_writer.objects), - [('directory', self.dir), - ('directory', self.dir2), - ('directory', self.dir3)]) + assert list(swh_storage.journal_writer.objects) == [ + ('directory', data.dir), + ('directory', data.dir2), + ('directory', data.dir3)] # List directory containing a file and an unknown subdirectory - actual_data = list(self.storage.directory_ls( - self.dir['id'], recursive=True)) - expected_data = list(self._transform_entries(self.dir)) - self.assertCountEqual(expected_data, actual_data) + actual_data = list(swh_storage.directory_ls( + data.dir['id'], recursive=True)) + expected_data = list(transform_entries(data.dir)) + assert sorted(expected_data, key=cmpdir) \ + == sorted(actual_data, key=cmpdir) # List directory containing a file and an unknown subdirectory - actual_data = list(self.storage.directory_ls( - self.dir2['id'], recursive=True)) - expected_data = list(self._transform_entries(self.dir2)) - self.assertCountEqual(expected_data, actual_data) + actual_data = list(swh_storage.directory_ls( + data.dir2['id'], recursive=True)) + expected_data = list(transform_entries(data.dir2)) + assert sorted(expected_data, key=cmpdir) \ + == sorted(actual_data, key=cmpdir) # List directory containing a known subdirectory, entries should # be both those of the directory and of the subdir - actual_data = list(self.storage.directory_ls( - self.dir3['id'], recursive=True)) + actual_data = list(swh_storage.directory_ls( + data.dir3['id'], recursive=True)) expected_data = list(itertools.chain( - self._transform_entries(self.dir3), - self._transform_entries(self.dir, prefix=b'subdir/'))) - self.assertCountEqual(expected_data, actual_data) + transform_entries(data.dir3), + transform_entries(data.dir, prefix=b'subdir/'))) + assert sorted(expected_data, key=cmpdir) \ + == sorted(actual_data, key=cmpdir) - def test_directory_get_non_recursive(self): - init_missing = list(self.storage.directory_missing([self.dir['id']])) - self.assertEqual([self.dir['id']], init_missing) + def test_directory_get_non_recursive(self, swh_storage): + init_missing = list(swh_storage.directory_missing([data.dir['id']])) + assert init_missing == [data.dir['id']] - actual_result = self.storage.directory_add( - [self.dir, self.dir2, self.dir3]) - self.assertEqual(actual_result, {'directory:add': 3}) + actual_result = swh_storage.directory_add( + [data.dir, data.dir2, data.dir3]) + assert actual_result == {'directory:add': 3} - self.assertEqual(list(self.journal_writer.objects), - [('directory', self.dir), - ('directory', self.dir2), - ('directory', self.dir3)]) + assert list(swh_storage.journal_writer.objects) == [ + ('directory', data.dir), + ('directory', data.dir2), + ('directory', data.dir3)] # List directory containing a file and an unknown subdirectory - actual_data = list(self.storage.directory_ls(self.dir['id'])) - expected_data = list(self._transform_entries(self.dir)) - self.assertCountEqual(expected_data, actual_data) + actual_data = list(swh_storage.directory_ls(data.dir['id'])) + expected_data = list(transform_entries(data.dir)) + assert sorted(expected_data, key=cmpdir) \ + == sorted(actual_data, key=cmpdir) # List directory contaiining a single file - actual_data = list(self.storage.directory_ls(self.dir2['id'])) - expected_data = list(self._transform_entries(self.dir2)) - self.assertCountEqual(expected_data, actual_data) + actual_data = list(swh_storage.directory_ls(data.dir2['id'])) + expected_data = list(transform_entries(data.dir2)) + assert sorted(expected_data, key=cmpdir) \ + == sorted(actual_data, key=cmpdir) # List directory containing a known subdirectory, entries should # only be those of the parent directory, not of the subdir - actual_data = list(self.storage.directory_ls(self.dir3['id'])) - expected_data = list(self._transform_entries(self.dir3)) - self.assertCountEqual(expected_data, actual_data) + actual_data = list(swh_storage.directory_ls(data.dir3['id'])) + expected_data = list(transform_entries(data.dir3)) + assert sorted(expected_data, key=cmpdir) \ + == sorted(actual_data, key=cmpdir) - def test_directory_entry_get_by_path(self): + def test_directory_entry_get_by_path(self, swh_storage): # given - init_missing = list(self.storage.directory_missing([self.dir3['id']])) - self.assertEqual([self.dir3['id']], init_missing) + init_missing = list(swh_storage.directory_missing([data.dir3['id']])) + assert [data.dir3['id']] == init_missing - actual_result = self.storage.directory_add([self.dir3, self.dir4]) - self.assertEqual(actual_result, {'directory:add': 2}) + actual_result = swh_storage.directory_add([data.dir3, data.dir4]) + assert actual_result == {'directory:add': 2} expected_entries = [ { - 'dir_id': self.dir3['id'], + 'dir_id': data.dir3['id'], 'name': b'foo', 'type': 'file', - 'target': self.cont['sha1_git'], + 'target': data.cont['sha1_git'], 'sha1': None, 'sha1_git': None, 'sha256': None, 'status': None, 'perms': from_disk.DentryPerms.content, 'length': None, }, { - 'dir_id': self.dir3['id'], + 'dir_id': data.dir3['id'], 'name': b'subdir', 'type': 'dir', - 'target': self.dir['id'], + 'target': data.dir['id'], 'sha1': None, 'sha1_git': None, 'sha256': None, 'status': None, 'perms': from_disk.DentryPerms.directory, 'length': None, }, { - 'dir_id': self.dir3['id'], + 'dir_id': data.dir3['id'], 'name': b'hello', 'type': 'file', 'target': b'12345678901234567890', 'sha1': None, 'sha1_git': None, 'sha256': None, 'status': None, 'perms': from_disk.DentryPerms.content, 'length': None, }, ] # when (all must be found here) - for entry, expected_entry in zip(self.dir3['entries'], - expected_entries): - actual_entry = self.storage.directory_entry_get_by_path( - self.dir3['id'], + for entry, expected_entry in zip( + data.dir3['entries'], expected_entries): + actual_entry = swh_storage.directory_entry_get_by_path( + data.dir3['id'], [entry['name']]) - self.assertEqual(actual_entry, expected_entry) + assert actual_entry == expected_entry # same, but deeper - for entry, expected_entry in zip(self.dir3['entries'], - expected_entries): - actual_entry = self.storage.directory_entry_get_by_path( - self.dir4['id'], + for entry, expected_entry in zip( + data.dir3['entries'], expected_entries): + actual_entry = swh_storage.directory_entry_get_by_path( + data.dir4['id'], [b'subdir1', entry['name']]) expected_entry = expected_entry.copy() expected_entry['name'] = b'subdir1/' + expected_entry['name'] - self.assertEqual(actual_entry, expected_entry) + assert actual_entry == expected_entry - # when (nothing should be found here since self.dir is not persisted.) - for entry in self.dir['entries']: - actual_entry = self.storage.directory_entry_get_by_path( - self.dir['id'], + # when (nothing should be found here since data.dir is not persisted.) + for entry in data.dir['entries']: + actual_entry = swh_storage.directory_entry_get_by_path( + data.dir['id'], [entry['name']]) - self.assertIsNone(actual_entry) + assert actual_entry is None + + def test_revision_add(self, swh_storage): + init_missing = swh_storage.revision_missing([data.revision['id']]) + assert list(init_missing) == [data.revision['id']] - def test_revision_add(self): - init_missing = self.storage.revision_missing([self.revision['id']]) - self.assertEqual([self.revision['id']], list(init_missing)) + actual_result = swh_storage.revision_add([data.revision]) + assert actual_result == {'revision:add': 1} - actual_result = self.storage.revision_add([self.revision]) - self.assertEqual(actual_result, {'revision:add': 1}) + end_missing = swh_storage.revision_missing([data.revision['id']]) + assert list(end_missing) == [] - end_missing = self.storage.revision_missing([self.revision['id']]) - self.assertEqual([], list(end_missing)) + assert list(swh_storage.journal_writer.objects) \ + == [('revision', data.revision)] - self.assertEqual(list(self.journal_writer.objects), - [('revision', self.revision)]) + # already there so nothing added + actual_result = swh_storage.revision_add([data.revision]) + assert actual_result == {'revision:add': 0} - def test_revision_add_validation(self): - rev = copy.deepcopy(self.revision) + def test_revision_add_validation(self, swh_storage): + rev = copy.deepcopy(data.revision) rev['date']['offset'] = 2**16 - with self.assertRaisesRegex( - (ValueError, psycopg2.DataError), 'offset') as cm: - self.storage.revision_add([rev]) + with pytest.raises((ValueError, psycopg2.DataError), + match='offset') as cm: + swh_storage.revision_add([rev]) - if type(cm.exception) == psycopg2.DataError: - self.assertEqual(cm.exception.pgcode, - psycopg2.errorcodes.NUMERIC_VALUE_OUT_OF_RANGE) + if type(cm.value) == psycopg2.DataError: + assert cm.value.pgcode \ + == psycopg2.errorcodes.NUMERIC_VALUE_OUT_OF_RANGE - rev = copy.deepcopy(self.revision) + rev = copy.deepcopy(data.revision) rev['committer_date']['offset'] = 2**16 - with self.assertRaisesRegex( - (ValueError, psycopg2.DataError), 'offset') as cm: - self.storage.revision_add([rev]) + with pytest.raises((ValueError, psycopg2.DataError), + match='offset') as cm: + swh_storage.revision_add([rev]) - if type(cm.exception) == psycopg2.DataError: - self.assertEqual(cm.exception.pgcode, - psycopg2.errorcodes.NUMERIC_VALUE_OUT_OF_RANGE) + if type(cm.value) == psycopg2.DataError: + assert cm.value.pgcode \ + == psycopg2.errorcodes.NUMERIC_VALUE_OUT_OF_RANGE - rev = copy.deepcopy(self.revision) + rev = copy.deepcopy(data.revision) rev['type'] = 'foobar' - with self.assertRaisesRegex( - (ValueError, psycopg2.DataError), '(?i)type') as cm: - self.storage.revision_add([rev]) + with pytest.raises((ValueError, psycopg2.DataError), + match='(?i)type') as cm: + swh_storage.revision_add([rev]) + + if type(cm.value) == psycopg2.DataError: + assert cm.value.pgcode == \ + psycopg2.errorcodes.INVALID_TEXT_REPRESENTATION - if type(cm.exception) == psycopg2.DataError: - self.assertEqual(cm.exception.pgcode, - psycopg2.errorcodes.INVALID_TEXT_REPRESENTATION) + def test_revision_add_twice(self, swh_storage): + actual_result = swh_storage.revision_add([data.revision]) + assert actual_result == {'revision:add': 1} - def test_revision_add_twice(self): - actual_result = self.storage.revision_add([self.revision]) - self.assertEqual(actual_result, {'revision:add': 1}) + assert list(swh_storage.journal_writer.objects) \ + == [('revision', data.revision)] - self.assertEqual(list(self.journal_writer.objects), - [('revision', self.revision)]) + actual_result = swh_storage.revision_add( + [data.revision, data.revision2]) + assert actual_result == {'revision:add': 1} - actual_result = self.storage.revision_add( - [self.revision, self.revision2]) - self.assertEqual(actual_result, {'revision:add': 1}) + assert list(swh_storage.journal_writer.objects) \ + == [('revision', data.revision), + ('revision', data.revision2)] - self.assertEqual(list(self.journal_writer.objects), - [('revision', self.revision), - ('revision', self.revision2)]) + def test_revision_add_name_clash(self, swh_storage): + revision1 = data.revision + revision2 = data.revision2 - def test_revision_add_name_clash(self): - revision1 = self.revision.copy() - revision2 = self.revision2.copy() revision1['author'] = { 'fullname': b'John Doe ', 'name': b'John Doe', 'email': b'john.doe@example.com' } revision2['author'] = { 'fullname': b'John Doe ', 'name': b'John Doe ', 'email': b'john.doe@example.com ' } - actual_result = self.storage.revision_add([revision1, revision2]) - self.assertEqual(actual_result, {'revision:add': 2}) + actual_result = swh_storage.revision_add([revision1, revision2]) + assert actual_result == {'revision:add': 2} - def test_revision_log(self): + def test_revision_log(self, swh_storage): # given - # self.revision4 -is-child-of-> self.revision3 - self.storage.revision_add([self.revision3, - self.revision4]) + # data.revision4 -is-child-of-> data.revision3 + swh_storage.revision_add([data.revision3, + data.revision4]) # when - actual_results = list(self.storage.revision_log( - [self.revision4['id']])) + actual_results = list(swh_storage.revision_log( + [data.revision4['id']])) # hack: ids generated for actual_result in actual_results: if 'id' in actual_result['author']: del actual_result['author']['id'] if 'id' in actual_result['committer']: del actual_result['committer']['id'] - self.assertEqual(len(actual_results), 2) # rev4 -child-> rev3 - self.assertEqual(actual_results[0], - self.normalize_entity(self.revision4)) - self.assertEqual(actual_results[1], - self.normalize_entity(self.revision3)) + assert len(actual_results) == 2 # rev4 -child-> rev3 + assert actual_results[0] == normalize_entity(data.revision4) + assert actual_results[1] == normalize_entity(data.revision3) - self.assertEqual(list(self.journal_writer.objects), - [('revision', self.revision3), - ('revision', self.revision4)]) + assert list(swh_storage.journal_writer.objects) == [ + ('revision', data.revision3), + ('revision', data.revision4)] - def test_revision_log_with_limit(self): + def test_revision_log_with_limit(self, swh_storage): # given - # self.revision4 -is-child-of-> self.revision3 - self.storage.revision_add([self.revision3, - self.revision4]) - actual_results = list(self.storage.revision_log( - [self.revision4['id']], 1)) + # data.revision4 -is-child-of-> data.revision3 + swh_storage.revision_add([data.revision3, + data.revision4]) + actual_results = list(swh_storage.revision_log( + [data.revision4['id']], 1)) # hack: ids generated for actual_result in actual_results: if 'id' in actual_result['author']: del actual_result['author']['id'] if 'id' in actual_result['committer']: del actual_result['committer']['id'] - self.assertEqual(len(actual_results), 1) - self.assertEqual(actual_results[0], self.revision4) + assert len(actual_results) == 1 + assert actual_results[0] == data.revision4 - def test_revision_log_unknown_revision(self): - rev_log = list(self.storage.revision_log([self.revision['id']])) - self.assertEqual(rev_log, []) + def test_revision_log_unknown_revision(self, swh_storage): + rev_log = list(swh_storage.revision_log([data.revision['id']])) + assert rev_log == [] - @staticmethod - def _short_revision(revision): - return [revision['id'], revision['parents']] - - def test_revision_shortlog(self): + def test_revision_shortlog(self, swh_storage): # given - # self.revision4 -is-child-of-> self.revision3 - self.storage.revision_add([self.revision3, - self.revision4]) + # data.revision4 -is-child-of-> data.revision3 + swh_storage.revision_add([data.revision3, + data.revision4]) # when - actual_results = list(self.storage.revision_shortlog( - [self.revision4['id']])) + actual_results = list(swh_storage.revision_shortlog( + [data.revision4['id']])) - self.assertEqual(len(actual_results), 2) # rev4 -child-> rev3 - self.assertEqual(list(actual_results[0]), - self._short_revision(self.revision4)) - self.assertEqual(list(actual_results[1]), - self._short_revision(self.revision3)) + assert len(actual_results) == 2 # rev4 -child-> rev3 + assert list(actual_results[0]) == short_revision(data.revision4) + assert list(actual_results[1]) == short_revision(data.revision3) - def test_revision_shortlog_with_limit(self): + def test_revision_shortlog_with_limit(self, swh_storage): # given - # self.revision4 -is-child-of-> self.revision3 - self.storage.revision_add([self.revision3, - self.revision4]) - actual_results = list(self.storage.revision_shortlog( - [self.revision4['id']], 1)) + # data.revision4 -is-child-of-> data.revision3 + swh_storage.revision_add([data.revision3, + data.revision4]) + actual_results = list(swh_storage.revision_shortlog( + [data.revision4['id']], 1)) - self.assertEqual(len(actual_results), 1) - self.assertEqual(list(actual_results[0]), - self._short_revision(self.revision4)) + assert len(actual_results) == 1 + assert list(actual_results[0]) == short_revision(data.revision4) - def test_revision_get(self): - self.storage.revision_add([self.revision]) + def test_revision_get(self, swh_storage): + swh_storage.revision_add([data.revision]) - actual_revisions = list(self.storage.revision_get( - [self.revision['id'], self.revision2['id']])) + actual_revisions = list(swh_storage.revision_get( + [data.revision['id'], data.revision2['id']])) # when if 'id' in actual_revisions[0]['author']: del actual_revisions[0]['author']['id'] # hack: ids are generated if 'id' in actual_revisions[0]['committer']: del actual_revisions[0]['committer']['id'] - self.assertEqual(len(actual_revisions), 2) - self.assertEqual(actual_revisions[0], - self.normalize_entity(self.revision)) - self.assertIsNone(actual_revisions[1]) + assert len(actual_revisions) == 2 + assert actual_revisions[0] == normalize_entity(data.revision) + assert actual_revisions[1] is None + + def test_revision_get_no_parents(self, swh_storage): + swh_storage.revision_add([data.revision3]) + + get = list(swh_storage.revision_get([data.revision3['id']])) - def test_revision_get_no_parents(self): - self.storage.revision_add([self.revision3]) + assert len(get) == 1 + assert get[0]['parents'] == [] # no parents on this one - get = list(self.storage.revision_get([self.revision3['id']])) + def test_release_add(self, swh_storage): + init_missing = swh_storage.release_missing([data.release['id'], + data.release2['id']]) + assert [data.release['id'], data.release2['id']] == list(init_missing) - self.assertEqual(len(get), 1) - self.assertEqual(get[0]['parents'], []) # no parents on this one + actual_result = swh_storage.release_add([data.release, data.release2]) + assert actual_result == {'release:add': 2} - def test_release_add(self): - init_missing = self.storage.release_missing([self.release['id'], - self.release2['id']]) - self.assertEqual([self.release['id'], self.release2['id']], - list(init_missing)) + end_missing = swh_storage.release_missing([data.release['id'], + data.release2['id']]) + assert list(end_missing) == [] - actual_result = self.storage.release_add([self.release, self.release2]) - self.assertEqual(actual_result, {'release:add': 2}) + assert list(swh_storage.journal_writer.objects) == [ + ('release', data.release), + ('release', data.release2)] - end_missing = self.storage.release_missing([self.release['id'], - self.release2['id']]) - self.assertEqual([], list(end_missing)) + # already present so nothing added + actual_result = swh_storage.release_add([data.release, data.release2]) + assert actual_result == {'release:add': 0} - self.assertEqual(list(self.journal_writer.objects), - [('release', self.release), - ('release', self.release2)]) + def test_release_add_no_author_date(self, swh_storage): + release = data.release - def test_release_add_no_author_date(self): - release = self.release.copy() release['author'] = None release['date'] = None - actual_result = self.storage.release_add([release]) - self.assertEqual(actual_result, {'release:add': 1}) + actual_result = swh_storage.release_add([release]) + assert actual_result == {'release:add': 1} - end_missing = self.storage.release_missing([self.release['id']]) - self.assertEqual([], list(end_missing)) + end_missing = swh_storage.release_missing([data.release['id']]) + assert list(end_missing) == [] - self.assertEqual(list(self.journal_writer.objects), - [('release', release)]) + assert list(swh_storage.journal_writer.objects) \ + == [('release', release)] - def test_release_add_validation(self): - rel = copy.deepcopy(self.release) + def test_release_add_validation(self, swh_storage): + rel = copy.deepcopy(data.release) rel['date']['offset'] = 2**16 - with self.assertRaisesRegex( - (ValueError, psycopg2.DataError), 'offset') as cm: - self.storage.release_add([rel]) + with pytest.raises((ValueError, psycopg2.DataError), + match='offset') as cm: + swh_storage.release_add([rel]) - if type(cm.exception) == psycopg2.DataError: - self.assertEqual(cm.exception.pgcode, - psycopg2.errorcodes.NUMERIC_VALUE_OUT_OF_RANGE) + if type(cm.value) == psycopg2.DataError: + assert cm.value.pgcode \ + == psycopg2.errorcodes.NUMERIC_VALUE_OUT_OF_RANGE - rel = copy.deepcopy(self.release) + rel = copy.deepcopy(data.release) rel['author'] = None - with self.assertRaisesRegex( - (ValueError, psycopg2.IntegrityError), 'date') as cm: - self.storage.release_add([rel]) + with pytest.raises((ValueError, psycopg2.IntegrityError), + match='date') as cm: + swh_storage.release_add([rel]) - if type(cm.exception) == psycopg2.IntegrityError: - self.assertEqual(cm.exception.pgcode, - psycopg2.errorcodes.CHECK_VIOLATION) + if type(cm.value) == psycopg2.IntegrityError: + assert cm.value.pgcode == psycopg2.errorcodes.CHECK_VIOLATION - def test_release_add_twice(self): - actual_result = self.storage.release_add([self.release]) - self.assertEqual(actual_result, {'release:add': 1}) + def test_release_add_twice(self, swh_storage): + actual_result = swh_storage.release_add([data.release]) + assert actual_result == {'release:add': 1} - self.assertEqual(list(self.journal_writer.objects), - [('release', self.release)]) + assert list(swh_storage.journal_writer.objects) \ + == [('release', data.release)] - actual_result = self.storage.release_add([self.release, self.release2]) - self.assertEqual(actual_result, {'release:add': 1}) + actual_result = swh_storage.release_add([data.release, data.release2]) + assert actual_result == {'release:add': 1} - self.assertEqual(list(self.journal_writer.objects), - [('release', self.release), - ('release', self.release2)]) + assert list(swh_storage.journal_writer.objects) \ + == [('release', data.release), + ('release', data.release2)] + + def test_release_add_name_clash(self, swh_storage): + release1 = data.release.copy() + release2 = data.release2.copy() - def test_release_add_name_clash(self): - release1 = self.release.copy() - release2 = self.release2.copy() release1['author'] = { 'fullname': b'John Doe ', 'name': b'John Doe', 'email': b'john.doe@example.com' } release2['author'] = { 'fullname': b'John Doe ', 'name': b'John Doe ', 'email': b'john.doe@example.com ' } - actual_result = self.storage.release_add([release1, release2]) - self.assertEqual(actual_result, {'release:add': 2}) + actual_result = swh_storage.release_add([release1, release2]) + assert actual_result == {'release:add': 2} - def test_release_get(self): + def test_release_get(self, swh_storage): # given - self.storage.release_add([self.release, self.release2]) + swh_storage.release_add([data.release, data.release2]) # when - actual_releases = list(self.storage.release_get([self.release['id'], - self.release2['id']])) + actual_releases = list(swh_storage.release_get([data.release['id'], + data.release2['id']])) # then for actual_release in actual_releases: if 'id' in actual_release['author']: del actual_release['author']['id'] # hack: ids are generated - self.assertEqual([self.normalize_entity(self.release), - self.normalize_entity(self.release2)], - [actual_releases[0], actual_releases[1]]) + assert [ + normalize_entity(data.release), normalize_entity(data.release2)] \ + == [actual_releases[0], actual_releases[1]] unknown_releases = \ - list(self.storage.release_get([self.release3['id']])) + list(swh_storage.release_get([data.release3['id']])) - self.assertIsNone(unknown_releases[0]) + assert unknown_releases[0] is None - def test_origin_add_one(self): - origin0 = self.storage.origin_get(self.origin) - self.assertIsNone(origin0) + def test_origin_add_one(self, swh_storage): + origin0 = swh_storage.origin_get(data.origin) + assert origin0 is None - id = self.storage.origin_add_one(self.origin) + id = swh_storage.origin_add_one(data.origin) - actual_origin = self.storage.origin_get({'url': self.origin['url']}) + actual_origin = swh_storage.origin_get({'url': data.origin['url']}) if self._test_origin_ids: - self.assertEqual(actual_origin['id'], id) - self.assertEqual(actual_origin['url'], self.origin['url']) + assert actual_origin['id'] == id + assert actual_origin['url'] == data.origin['url'] - id2 = self.storage.origin_add_one(self.origin) + id2 = swh_storage.origin_add_one(data.origin) - self.assertEqual(id, id2) + assert id == id2 - def test_origin_add(self): - origin0 = self.storage.origin_get([self.origin])[0] - self.assertIsNone(origin0) + def test_origin_add(self, swh_storage): + origin0 = swh_storage.origin_get([data.origin])[0] + assert origin0 is None - origin1, origin2 = self.storage.origin_add([self.origin, self.origin2]) + origin1, origin2 = swh_storage.origin_add([data.origin, data.origin2]) - actual_origin = self.storage.origin_get([{ - 'url': self.origin['url'], + actual_origin = swh_storage.origin_get([{ + 'url': data.origin['url'], }])[0] if self._test_origin_ids: - self.assertEqual(actual_origin['id'], origin1['id']) - self.assertEqual(actual_origin['url'], origin1['url']) + assert actual_origin['id'] == origin1['id'] + assert actual_origin['url'] == origin1['url'] - actual_origin2 = self.storage.origin_get([{ - 'url': self.origin2['url'], + actual_origin2 = swh_storage.origin_get([{ + 'url': data.origin2['url'], }])[0] if self._test_origin_ids: - self.assertEqual(actual_origin2['id'], origin2['id']) - self.assertEqual(actual_origin2['url'], origin2['url']) + assert actual_origin2['id'] == origin2['id'] + assert actual_origin2['url'] == origin2['url'] if 'id' in actual_origin: del actual_origin['id'] del actual_origin2['id'] - self.assertEqual(list(self.journal_writer.objects), - [('origin', actual_origin), - ('origin', actual_origin2)]) - - def test_origin_add_twice(self): - add1 = self.storage.origin_add([self.origin, self.origin2]) + assert list(swh_storage.journal_writer.objects) \ + == [('origin', actual_origin), + ('origin', actual_origin2)] - self.assertEqual(list(self.journal_writer.objects), - [('origin', self.origin), - ('origin', self.origin2)]) + def test_origin_add_twice(self, swh_storage): + add1 = swh_storage.origin_add([data.origin, data.origin2]) + assert list(swh_storage.journal_writer.objects) \ + == [('origin', data.origin), + ('origin', data.origin2)] - add2 = self.storage.origin_add([self.origin, self.origin2]) + add2 = swh_storage.origin_add([data.origin, data.origin2]) + assert list(swh_storage.journal_writer.objects) \ + == [('origin', data.origin), + ('origin', data.origin2)] - self.assertEqual(list(self.journal_writer.objects), - [('origin', self.origin), - ('origin', self.origin2)]) + assert add1 == add2 - self.assertEqual(add1, add2) + def test_origin_add_validation(self, swh_storage): + with pytest.raises((TypeError, KeyError), match='url'): + swh_storage.origin_add([{'type': 'git'}]) - def test_origin_add_validation(self): - with self.assertRaisesRegex((TypeError, KeyError), 'url'): - self.storage.origin_add([{'type': 'git'}]) - - def test_origin_get_legacy(self): - self.assertIsNone(self.storage.origin_get(self.origin)) - id = self.storage.origin_add_one(self.origin) + def test_origin_get_legacy(self, swh_storage): + assert swh_storage.origin_get(data.origin) is None + id = swh_storage.origin_add_one(data.origin) # lookup per url (returns id) - actual_origin0 = self.storage.origin_get( - {'url': self.origin['url']}) + actual_origin0 = swh_storage.origin_get( + {'url': data.origin['url']}) if self._test_origin_ids: - self.assertEqual(actual_origin0['id'], id) - self.assertEqual(actual_origin0['url'], self.origin['url']) + assert actual_origin0['id'] == id + assert actual_origin0['url'] == data.origin['url'] # lookup per id (returns dict) if self._test_origin_ids: - actual_origin1 = self.storage.origin_get({'id': id}) + actual_origin1 = swh_storage.origin_get({'id': id}) - self.assertEqual(actual_origin1, {'id': id, - 'type': self.origin['type'], - 'url': self.origin['url']}) + assert actual_origin1 == {'id': id, + 'type': data.origin['type'], + 'url': data.origin['url']} - def test_origin_get(self): - self.assertIsNone(self.storage.origin_get(self.origin)) - origin_id = self.storage.origin_add_one(self.origin) + def test_origin_get(self, swh_storage): + assert swh_storage.origin_get(data.origin) is None + origin_id = swh_storage.origin_add_one(data.origin) # lookup per url (returns id) - actual_origin0 = self.storage.origin_get( - [{'url': self.origin['url']}]) - self.assertEqual(len(actual_origin0), 1, actual_origin0) - if self._test_origin_ids: - self.assertEqual(actual_origin0[0]['id'], origin_id) - self.assertEqual(actual_origin0[0]['url'], self.origin['url']) + actual_origin0 = swh_storage.origin_get( + [{'url': data.origin['url']}]) + assert len(actual_origin0) == 1 + assert actual_origin0[0]['url'] == data.origin['url'] if self._test_origin_ids: # lookup per id (returns dict) - actual_origin1 = self.storage.origin_get([{'id': origin_id}]) + actual_origin1 = swh_storage.origin_get([{'id': origin_id}]) - self.assertEqual(len(actual_origin1), 1, actual_origin1) - self.assertEqual(actual_origin1[0], {'id': origin_id, - 'type': self.origin['type'], - 'url': self.origin['url']}) + assert len(actual_origin1) == 1 + assert actual_origin1[0] == {'id': origin_id, + 'type': data.origin['type'], + 'url': data.origin['url']} - def test_origin_get_consistency(self): - self.assertIsNone(self.storage.origin_get(self.origin)) - id = self.storage.origin_add_one(self.origin) + def test_origin_get_consistency(self, swh_storage): + assert swh_storage.origin_get(data.origin) is None + id = swh_storage.origin_add_one(data.origin) - with self.assertRaises(ValueError): - self.storage.origin_get([ - {'url': self.origin['url']}, + with pytest.raises(ValueError): + swh_storage.origin_get([ + {'url': data.origin['url']}, {'id': id}]) - def test_origin_search_single_result(self): - found_origins = list(self.storage.origin_search(self.origin['url'])) - self.assertEqual(len(found_origins), 0) + def test_origin_search_single_result(self, swh_storage): + found_origins = list(swh_storage.origin_search(data.origin['url'])) + assert len(found_origins) == 0 - found_origins = list(self.storage.origin_search(self.origin['url'], - regexp=True)) - self.assertEqual(len(found_origins), 0) + found_origins = list(swh_storage.origin_search(data.origin['url'], + regexp=True)) + assert len(found_origins) == 0 - self.storage.origin_add_one(self.origin) + swh_storage.origin_add_one(data.origin) origin_data = { - 'type': self.origin['type'], - 'url': self.origin['url']} - found_origins = list(self.storage.origin_search(self.origin['url'])) - self.assertEqual(len(found_origins), 1) + 'type': data.origin['type'], + 'url': data.origin['url']} + found_origins = list(swh_storage.origin_search(data.origin['url'])) + assert len(found_origins) == 1 if 'id' in found_origins[0]: del found_origins[0]['id'] - self.assertEqual(found_origins[0], origin_data) + assert found_origins[0] == origin_data - found_origins = list(self.storage.origin_search( - '.' + self.origin['url'][1:-1] + '.', regexp=True)) - self.assertEqual(len(found_origins), 1) + found_origins = list(swh_storage.origin_search( + '.' + data.origin['url'][1:-1] + '.', regexp=True)) + assert len(found_origins) == 1 if 'id' in found_origins[0]: del found_origins[0]['id'] - self.assertEqual(found_origins[0], origin_data) + assert found_origins[0] == origin_data - self.storage.origin_add_one(self.origin2) + swh_storage.origin_add_one(data.origin2) origin2_data = { - 'type': self.origin2['type'], - 'url': self.origin2['url']} - found_origins = list(self.storage.origin_search(self.origin2['url'])) - self.assertEqual(len(found_origins), 1) + 'type': data.origin2['type'], + 'url': data.origin2['url']} + found_origins = list(swh_storage.origin_search(data.origin2['url'])) + assert len(found_origins) == 1 if 'id' in found_origins[0]: del found_origins[0]['id'] - self.assertEqual(found_origins[0], origin2_data) + assert found_origins[0] == origin2_data - found_origins = list(self.storage.origin_search( - '.' + self.origin2['url'][1:-1] + '.', regexp=True)) - self.assertEqual(len(found_origins), 1) + found_origins = list(swh_storage.origin_search( + '.' + data.origin2['url'][1:-1] + '.', regexp=True)) + assert len(found_origins) == 1 if 'id' in found_origins[0]: del found_origins[0]['id'] - self.assertEqual(found_origins[0], origin2_data) + assert found_origins[0] == origin2_data - def test_origin_search_no_regexp(self): - self.storage.origin_add_one(self.origin) - self.storage.origin_add_one(self.origin2) + def test_origin_search_no_regexp(self, swh_storage): + swh_storage.origin_add_one(data.origin) + swh_storage.origin_add_one(data.origin2) - origin = self.storage.origin_get({'url': self.origin['url']}) - origin2 = self.storage.origin_get({'url': self.origin2['url']}) + origin = swh_storage.origin_get({'url': data.origin['url']}) + origin2 = swh_storage.origin_get({'url': data.origin2['url']}) # no pagination - - found_origins = list(self.storage.origin_search('/')) - self.assertEqual(len(found_origins), 2) + found_origins = list(swh_storage.origin_search('/')) + assert len(found_origins) == 2 # offset=0 - - found_origins0 = list(self.storage.origin_search('/', offset=0, limit=1)) # noqa - self.assertEqual(len(found_origins0), 1) - self.assertIn(found_origins0[0], [origin, origin2]) + found_origins0 = list(swh_storage.origin_search('/', offset=0, limit=1)) # noqa + assert len(found_origins0) == 1 + assert found_origins0[0] in [origin, origin2] # offset=1 - - found_origins1 = list(self.storage.origin_search('/', offset=1, limit=1)) # noqa - self.assertEqual(len(found_origins1), 1) - self.assertIn(found_origins1[0], [origin, origin2]) + found_origins1 = list(swh_storage.origin_search('/', offset=1, limit=1)) # noqa + assert len(found_origins1) == 1 + assert found_origins1[0] in [origin, origin2] # check both origins were returned + assert found_origins0 != found_origins1 - self.assertCountEqual(found_origins0 + found_origins1, - [origin, origin2]) + def test_origin_search_regexp_substring(self, swh_storage): + swh_storage.origin_add_one(data.origin) + swh_storage.origin_add_one(data.origin2) - def test_origin_search_regexp_substring(self): - self.storage.origin_add_one(self.origin) - self.storage.origin_add_one(self.origin2) - - origin = self.storage.origin_get({'url': self.origin['url']}) - origin2 = self.storage.origin_get({'url': self.origin2['url']}) + origin = swh_storage.origin_get({'url': data.origin['url']}) + origin2 = swh_storage.origin_get({'url': data.origin2['url']}) # no pagination - - found_origins = list(self.storage.origin_search('/', regexp=True)) - self.assertEqual(len(found_origins), 2) + found_origins = list(swh_storage.origin_search('/', regexp=True)) + assert len(found_origins) == 2 # offset=0 - - found_origins0 = list(self.storage.origin_search('/', offset=0, limit=1, regexp=True)) # noqa - self.assertEqual(len(found_origins0), 1) - self.assertIn(found_origins0[0], [origin, origin2]) + found_origins0 = list(swh_storage.origin_search('/', offset=0, limit=1, regexp=True)) # noqa + assert len(found_origins0) == 1 + assert found_origins0[0] in [origin, origin2] # offset=1 - - found_origins1 = list(self.storage.origin_search('/', offset=1, limit=1, regexp=True)) # noqa - self.assertEqual(len(found_origins1), 1) - self.assertIn(found_origins1[0], [origin, origin2]) + found_origins1 = list(swh_storage.origin_search('/', offset=1, limit=1, regexp=True)) # noqa + assert len(found_origins1) == 1 + assert found_origins1[0] in [origin, origin2] # check both origins were returned + assert found_origins0 != found_origins1 - self.assertCountEqual(found_origins0 + found_origins1, - [origin, origin2]) - - def test_origin_search_regexp_fullstring(self): - self.storage.origin_add_one(self.origin) - self.storage.origin_add_one(self.origin2) + def test_origin_search_regexp_fullstring(self, swh_storage): + swh_storage.origin_add_one(data.origin) + swh_storage.origin_add_one(data.origin2) - origin = self.storage.origin_get({'url': self.origin['url']}) - origin2 = self.storage.origin_get({'url': self.origin2['url']}) + origin = swh_storage.origin_get({'url': data.origin['url']}) + origin2 = swh_storage.origin_get({'url': data.origin2['url']}) # no pagination - - found_origins = list(self.storage.origin_search('.*/.*', regexp=True)) - self.assertEqual(len(found_origins), 2) + found_origins = list(swh_storage.origin_search('.*/.*', regexp=True)) + assert len(found_origins) == 2 # offset=0 - - found_origins0 = list(self.storage.origin_search('.*/.*', offset=0, limit=1, regexp=True)) # noqa - self.assertEqual(len(found_origins0), 1) - self.assertIn(found_origins0[0], [origin, origin2]) + found_origins0 = list(swh_storage.origin_search('.*/.*', offset=0, limit=1, regexp=True)) # noqa + assert len(found_origins0) == 1 + assert found_origins0[0] in [origin, origin2] # offset=1 - - found_origins1 = list(self.storage.origin_search('.*/.*', offset=1, limit=1, regexp=True)) # noqa - self.assertEqual(len(found_origins1), 1) - self.assertIn(found_origins1[0], [origin, origin2]) + found_origins1 = list(swh_storage.origin_search('.*/.*', offset=1, limit=1, regexp=True)) # noqa + assert len(found_origins1) == 1 + assert found_origins1[0] in [origin, origin2] # check both origins were returned + assert found_origins0 != found_origins1 - self.assertCountEqual( - found_origins0 + found_origins1, - [origin, origin2]) - - @given(strategies.booleans()) - def test_origin_visit_add(self, use_url): + @pytest.mark.parametrize('use_url', [True, False]) + def test_origin_visit_add(self, swh_storage, use_url): if not self._test_origin_ids and not use_url: return - self.reset_storage() - # given - self.assertIsNone(self.storage.origin_get([self.origin2])[0]) - - origin_id = self.storage.origin_add_one(self.origin2) - self.assertIsNotNone(origin_id) + origin_id = swh_storage.origin_add_one(data.origin2) + assert origin_id is not None - origin_id_or_url = self.origin2['url'] if use_url else origin_id + origin_id_or_url = data.origin2['url'] if use_url else origin_id # when - origin_visit1 = self.storage.origin_visit_add( + date_visit = datetime.datetime.now(datetime.timezone.utc) + origin_visit1 = swh_storage.origin_visit_add( origin_id_or_url, type='git', - date=self.date_visit2) + date=date_visit) - actual_origin_visits = list(self.storage.origin_visit_get( + actual_origin_visits = list(swh_storage.origin_visit_get( origin_id_or_url)) - self.assertEqual(actual_origin_visits, - [{ - 'origin': origin_id, - 'date': self.date_visit2, - 'visit': origin_visit1['visit'], - 'type': 'git', - 'status': 'ongoing', - 'metadata': None, - 'snapshot': None, - }]) - - expected_origin = self.origin2.copy() - data = { + assert { + 'origin': origin_id, + 'date': date_visit, + 'visit': origin_visit1['visit'], + 'type': 'git', + 'status': 'ongoing', + 'metadata': None, + 'snapshot': None, + } in actual_origin_visits + + expected_origin = data.origin2 + origin_visit = { 'origin': expected_origin, - 'date': self.date_visit2, + 'date': date_visit, 'visit': origin_visit1['visit'], 'type': 'git', 'status': 'ongoing', 'metadata': None, 'snapshot': None, } - self.assertEqual(list(self.journal_writer.objects), - [('origin', expected_origin), - ('origin_visit', data)]) + objects = list(swh_storage.journal_writer.objects) + assert ('origin', expected_origin) in objects + assert ('origin_visit', origin_visit) in objects - def test_origin_visit_get__unknown_origin(self): - self.assertEqual([], list(self.storage.origin_visit_get('foo'))) + def test_origin_visit_get__unknown_origin(self, swh_storage): + assert [] == list(swh_storage.origin_visit_get('foo')) if self._test_origin_ids: - self.assertEqual([], list(self.storage.origin_visit_get(10))) + assert list(swh_storage.origin_visit_get(10)) == [] - @given(strategies.booleans()) - def test_origin_visit_add_default_type(self, use_url): + @pytest.mark.parametrize('use_url', [True, False]) + def test_origin_visit_add_default_type(self, swh_storage, use_url): if not self._test_origin_ids and not use_url: return - self.reset_storage() - # given - self.assertIsNone(self.storage.origin_get([self.origin2])[0]) - - origin_id = self.storage.origin_add_one(self.origin2) - origin_id_or_url = self.origin2['url'] if use_url else origin_id - self.assertIsNotNone(origin_id) + origin_id = swh_storage.origin_add_one(data.origin2) + origin_id_or_url = data.origin2['url'] if use_url else origin_id + assert origin_id is not None # when - origin_visit1 = self.storage.origin_visit_add( + date_visit = datetime.datetime.now(datetime.timezone.utc) + date_visit2 = date_visit + datetime.timedelta(minutes=1) + origin_visit1 = swh_storage.origin_visit_add( origin_id_or_url, - date=self.date_visit2) - origin_visit2 = self.storage.origin_visit_add( + date=date_visit) + origin_visit2 = swh_storage.origin_visit_add( origin_id_or_url, - date='2018-01-01 23:00:00+00') + date=date_visit2) # then - self.assertEqual(origin_visit1['origin'], origin_id) - self.assertIsNotNone(origin_visit1['visit']) + assert origin_visit1['origin'] == origin_id + assert origin_visit1['visit'] is not None - actual_origin_visits = list(self.storage.origin_visit_get( + actual_origin_visits = list(swh_storage.origin_visit_get( origin_id_or_url)) - self.assertEqual(actual_origin_visits, [ - { - 'origin': origin_id, - 'date': self.date_visit2, - 'visit': origin_visit1['visit'], - 'type': 'hg', - 'status': 'ongoing', - 'metadata': None, - 'snapshot': None, - }, - { - 'origin': origin_id, - 'date': self.date_visit3, - 'visit': origin_visit2['visit'], - 'type': 'hg', - 'status': 'ongoing', - 'metadata': None, - 'snapshot': None, - }, - ]) + expected_visits = [ + { + 'origin': origin_id, + 'date': date_visit, + 'visit': origin_visit1['visit'], + 'type': 'hg', + 'status': 'ongoing', + 'metadata': None, + 'snapshot': None, + }, + { + 'origin': origin_id, + 'date': date_visit2, + 'visit': origin_visit2['visit'], + 'type': 'hg', + 'status': 'ongoing', + 'metadata': None, + 'snapshot': None, + }, + ] + for visit in expected_visits: + assert visit in actual_origin_visits - expected_origin = self.origin2.copy() - data1 = { - 'origin': expected_origin, - 'date': self.date_visit2, - 'visit': origin_visit1['visit'], - 'type': 'hg', - 'status': 'ongoing', - 'metadata': None, - 'snapshot': None, - } - data2 = { - 'origin': expected_origin, - 'date': self.date_visit3, - 'visit': origin_visit2['visit'], - 'type': 'hg', - 'status': 'ongoing', - 'metadata': None, - 'snapshot': None, - } - self.assertEqual(list(self.journal_writer.objects), - [('origin', expected_origin), - ('origin_visit', data1), - ('origin_visit', data2)]) + objects = list(swh_storage.journal_writer.objects) + assert ('origin', data.origin2) in objects - def test_origin_visit_add_validation(self): - origin_id_or_url = self.storage.origin_add_one(self.origin2) + for visit in expected_visits: + visit['origin'] = data.origin2 + assert ('origin_visit', visit) in objects - with self.assertRaises((TypeError, psycopg2.ProgrammingError)) as cm: - self.storage.origin_visit_add(origin_id_or_url, date=[b'foo']) + def test_origin_visit_add_validation(self, swh_storage): + origin_id_or_url = swh_storage.origin_add_one(data.origin2) - if type(cm.exception) == psycopg2.ProgrammingError: - self.assertEqual(cm.exception.pgcode, - psycopg2.errorcodes.UNDEFINED_FUNCTION) + with pytest.raises((TypeError, psycopg2.ProgrammingError)) as cm: + swh_storage.origin_visit_add(origin_id_or_url, date=[b'foo']) - @given(strategies.booleans()) - def test_origin_visit_update(self, use_url): + if type(cm.value) == psycopg2.ProgrammingError: + assert cm.value.pgcode \ + == psycopg2.errorcodes.UNDEFINED_FUNCTION + + @pytest.mark.parametrize('use_url', [True, False]) + def test_origin_visit_update(self, swh_storage, use_url): if not self._test_origin_ids and not use_url: return - self.reset_storage() - # given - origin_id = self.storage.origin_add_one(self.origin) - origin_id2 = self.storage.origin_add_one(self.origin2) - origin2_id_or_url = self.origin2['url'] if use_url else origin_id2 + swh_storage.origin_add_one(data.origin) + origin_url = data.origin['url'] - origin_id_or_url = self.origin['url'] if use_url else origin_id + date_visit = datetime.datetime.now(datetime.timezone.utc) + origin_visit1 = swh_storage.origin_visit_add( + origin_url, + date=date_visit) - origin_visit1 = self.storage.origin_visit_add( - origin_id_or_url, - date=self.date_visit2) - - origin_visit2 = self.storage.origin_visit_add( - origin_id_or_url, - date=self.date_visit3) + date_visit2 = date_visit + datetime.timedelta(minutes=1) + origin_visit2 = swh_storage.origin_visit_add( + origin_url, + date=date_visit2) - origin_visit3 = self.storage.origin_visit_add( - origin2_id_or_url, - date=self.date_visit3) + swh_storage.origin_add_one(data.origin2) + origin_url2 = data.origin2['url'] + origin_visit3 = swh_storage.origin_visit_add( + origin_url2, + date=date_visit2) # when visit1_metadata = { 'contents': 42, 'directories': 22, } - self.storage.origin_visit_update( - origin_id_or_url, + swh_storage.origin_visit_update( + origin_url, origin_visit1['visit'], status='full', metadata=visit1_metadata) - self.storage.origin_visit_update( - origin2_id_or_url, + swh_storage.origin_visit_update( + origin_url2, origin_visit3['visit'], status='partial') # then - actual_origin_visits = list(self.storage.origin_visit_get( - origin_id_or_url)) - self.assertEqual(actual_origin_visits, [{ + actual_origin_visits = list(swh_storage.origin_visit_get( + origin_url)) + expected_visits = [{ 'origin': origin_visit2['origin'], - 'date': self.date_visit2, + 'date': date_visit, 'visit': origin_visit1['visit'], - 'type': self.origin['type'], + 'type': data.origin['type'], 'status': 'full', 'metadata': visit1_metadata, 'snapshot': None, }, { 'origin': origin_visit2['origin'], - 'date': self.date_visit3, + 'date': date_visit2, 'visit': origin_visit2['visit'], - 'type': self.origin['type'], + 'type': data.origin['type'], 'status': 'ongoing', 'metadata': None, 'snapshot': None, - }]) + }] + for visit in expected_visits: + assert visit in actual_origin_visits - actual_origin_visits_bis = list(self.storage.origin_visit_get( - origin_id_or_url, + actual_origin_visits_bis = list(swh_storage.origin_visit_get( + origin_url, limit=1)) - self.assertEqual(actual_origin_visits_bis, - [{ - 'origin': origin_visit2['origin'], - 'date': self.date_visit2, - 'visit': origin_visit1['visit'], - 'type': self.origin['type'], - 'status': 'full', - 'metadata': visit1_metadata, - 'snapshot': None, - }]) - - actual_origin_visits_ter = list(self.storage.origin_visit_get( - origin_id_or_url, + assert actual_origin_visits_bis == [ + { + 'origin': origin_visit2['origin'], + 'date': date_visit, + 'visit': origin_visit1['visit'], + 'type': data.origin['type'], + 'status': 'full', + 'metadata': visit1_metadata, + 'snapshot': None, + }] + + actual_origin_visits_ter = list(swh_storage.origin_visit_get( + origin_url, last_visit=origin_visit1['visit'])) - self.assertEqual(actual_origin_visits_ter, - [{ - 'origin': origin_visit2['origin'], - 'date': self.date_visit3, - 'visit': origin_visit2['visit'], - 'type': self.origin['type'], - 'status': 'ongoing', - 'metadata': None, - 'snapshot': None, - }]) - - actual_origin_visits2 = list(self.storage.origin_visit_get( - origin2_id_or_url)) - self.assertEqual(actual_origin_visits2, - [{ - 'origin': origin_visit3['origin'], - 'date': self.date_visit3, - 'visit': origin_visit3['visit'], - 'type': self.origin2['type'], - 'status': 'partial', - 'metadata': None, - 'snapshot': None, - }]) - - expected_origin = self.origin.copy() - expected_origin2 = self.origin2.copy() + assert actual_origin_visits_ter == [ + { + 'origin': origin_visit2['origin'], + 'date': date_visit2, + 'visit': origin_visit2['visit'], + 'type': data.origin['type'], + 'status': 'ongoing', + 'metadata': None, + 'snapshot': None, + }] + + actual_origin_visits2 = list(swh_storage.origin_visit_get( + origin_url2)) + assert actual_origin_visits2 == [ + { + 'origin': origin_visit3['origin'], + 'date': date_visit2, + 'visit': origin_visit3['visit'], + 'type': data.origin2['type'], + 'status': 'partial', + 'metadata': None, + 'snapshot': None, + }] + + expected_origin = data.origin.copy() + expected_origin2 = data.origin2.copy() data1 = { 'origin': expected_origin, - 'date': self.date_visit2, + 'date': date_visit, 'visit': origin_visit1['visit'], - 'type': self.origin['type'], + 'type': data.origin['type'], 'status': 'ongoing', 'metadata': None, 'snapshot': None, } data2 = { 'origin': expected_origin, - 'date': self.date_visit3, + 'date': date_visit2, 'visit': origin_visit2['visit'], - 'type': self.origin['type'], + 'type': data.origin['type'], 'status': 'ongoing', 'metadata': None, 'snapshot': None, } data3 = { 'origin': expected_origin2, - 'date': self.date_visit3, + 'date': date_visit2, 'visit': origin_visit3['visit'], - 'type': self.origin2['type'], + 'type': data.origin2['type'], 'status': 'ongoing', 'metadata': None, 'snapshot': None, } data4 = { 'origin': expected_origin, - 'date': self.date_visit2, + 'date': date_visit, 'visit': origin_visit1['visit'], - 'type': self.origin['type'], + 'type': data.origin['type'], 'metadata': visit1_metadata, 'status': 'full', 'snapshot': None, } data5 = { 'origin': expected_origin2, - 'date': self.date_visit3, + 'date': date_visit2, 'visit': origin_visit3['visit'], - 'type': self.origin2['type'], + 'type': data.origin2['type'], 'status': 'partial', 'metadata': None, 'snapshot': None, } - self.assertEqual(list(self.journal_writer.objects), - [('origin', expected_origin), - ('origin', expected_origin2), - ('origin_visit', data1), - ('origin_visit', data2), - ('origin_visit', data3), - ('origin_visit', data4), - ('origin_visit', data5)]) - - def test_origin_visit_update_validation(self): - origin_id = self.storage.origin_add_one(self.origin) - visit = self.storage.origin_visit_add( + objects = list(swh_storage.journal_writer.objects) + assert ('origin', expected_origin) in objects + assert ('origin', expected_origin2) in objects + assert ('origin_visit', data1) in objects + assert ('origin_visit', data2) in objects + assert ('origin_visit', data3) in objects + assert ('origin_visit', data4) in objects + assert ('origin_visit', data5) in objects + + def test_origin_visit_update_validation(self, swh_storage): + origin_id = swh_storage.origin_add_one(data.origin) + visit = swh_storage.origin_visit_add( origin_id, - date=self.date_visit2) + date=data.date_visit2) - with self.assertRaisesRegex( - (ValueError, psycopg2.DataError), 'status') as cm: - self.storage.origin_visit_update( + with pytest.raises((ValueError, psycopg2.DataError), + match='status') as cm: + swh_storage.origin_visit_update( origin_id, visit['visit'], status='foobar') - if type(cm.exception) == psycopg2.DataError: - self.assertEqual(cm.exception.pgcode, - psycopg2.errorcodes.INVALID_TEXT_REPRESENTATION) + if type(cm.value) == psycopg2.DataError: + assert cm.value.pgcode == \ + psycopg2.errorcodes.INVALID_TEXT_REPRESENTATION - def test_origin_visit_find_by_date(self): + def test_origin_visit_find_by_date(self, swh_storage): # given - self.storage.origin_add_one(self.origin) + swh_storage.origin_add_one(data.origin) - self.storage.origin_visit_add( - self.origin['url'], - date=self.date_visit2) + swh_storage.origin_visit_add( + data.origin['url'], + date=data.date_visit2) - origin_visit2 = self.storage.origin_visit_add( - self.origin['url'], - date=self.date_visit3) + origin_visit2 = swh_storage.origin_visit_add( + data.origin['url'], + date=data.date_visit3) - origin_visit3 = self.storage.origin_visit_add( - self.origin['url'], - date=self.date_visit2) + origin_visit3 = swh_storage.origin_visit_add( + data.origin['url'], + date=data.date_visit2) # Simple case - visit = self.storage.origin_visit_find_by_date( - self.origin['url'], self.date_visit3) - self.assertEqual(visit['visit'], origin_visit2['visit']) + visit = swh_storage.origin_visit_find_by_date( + data.origin['url'], data.date_visit3) + assert visit['visit'] == origin_visit2['visit'] # There are two visits at the same date, the latest must be returned - visit = self.storage.origin_visit_find_by_date( - self.origin['url'], self.date_visit2) - self.assertEqual(visit['visit'], origin_visit3['visit']) + visit = swh_storage.origin_visit_find_by_date( + data.origin['url'], data.date_visit2) + assert visit['visit'] == origin_visit3['visit'] - def test_origin_visit_find_by_date__unknown_origin(self): - self.storage.origin_visit_find_by_date('foo', self.date_visit2) + def test_origin_visit_find_by_date__unknown_origin(self, swh_storage): + swh_storage.origin_visit_find_by_date('foo', data.date_visit2) - @settings(deadline=None) - @given(strategies.booleans()) - def test_origin_visit_update_missing_snapshot(self, use_url): + @pytest.mark.parametrize('use_url', [True, False]) + def test_origin_visit_update_missing_snapshot(self, swh_storage, use_url): if not self._test_origin_ids and not use_url: return - self.reset_storage() - # given - origin_id = self.storage.origin_add_one(self.origin) - origin_id_or_url = self.origin['url'] if use_url else origin_id + origin_id = swh_storage.origin_add_one(data.origin) + origin_id_or_url = data.origin['url'] if use_url else origin_id - origin_visit = self.storage.origin_visit_add( + origin_visit = swh_storage.origin_visit_add( origin_id_or_url, - date=self.date_visit1) + date=data.date_visit1) # when - self.storage.origin_visit_update( + swh_storage.origin_visit_update( origin_id_or_url, origin_visit['visit'], - snapshot=self.snapshot['id']) + snapshot=data.snapshot['id']) # then - actual_origin_visit = self.storage.origin_visit_get_by( + actual_origin_visit = swh_storage.origin_visit_get_by( origin_id_or_url, origin_visit['visit']) - self.assertEqual(actual_origin_visit['snapshot'], self.snapshot['id']) + assert actual_origin_visit['snapshot'] == data.snapshot['id'] # when - self.storage.snapshot_add([self.snapshot]) - self.assertEqual(actual_origin_visit['snapshot'], self.snapshot['id']) + swh_storage.snapshot_add([data.snapshot]) + assert actual_origin_visit['snapshot'] == data.snapshot['id'] - @settings(deadline=None) - @given(strategies.booleans()) - def test_origin_visit_get_by(self, use_url): + @pytest.mark.parametrize('use_url', [True, False]) + def test_origin_visit_get_by(self, swh_storage, use_url): if not self._test_origin_ids and not use_url: return - self.reset_storage() + origin_id = swh_storage.origin_add_one(data.origin) + origin_id2 = swh_storage.origin_add_one(data.origin2) - origin_id = self.storage.origin_add_one(self.origin) - origin_id2 = self.storage.origin_add_one(self.origin2) + origin_id_or_url = data.origin['url'] if use_url else origin_id + origin2_id_or_url = data.origin2['url'] if use_url else origin_id2 - origin_id_or_url = self.origin['url'] if use_url else origin_id - origin2_id_or_url = self.origin2['url'] if use_url else origin_id2 - - origin_visit1 = self.storage.origin_visit_add( + origin_visit1 = swh_storage.origin_visit_add( origin_id_or_url, - date=self.date_visit2) + date=data.date_visit2) - self.storage.snapshot_add([self.snapshot]) - self.storage.origin_visit_update( + swh_storage.snapshot_add([data.snapshot]) + swh_storage.origin_visit_update( origin_id_or_url, origin_visit1['visit'], - snapshot=self.snapshot['id']) + snapshot=data.snapshot['id']) # Add some other {origin, visit} entries - self.storage.origin_visit_add( + swh_storage.origin_visit_add( origin_id_or_url, - date=self.date_visit3) - self.storage.origin_visit_add( + date=data.date_visit3) + swh_storage.origin_visit_add( origin2_id_or_url, - date=self.date_visit3) + date=data.date_visit3) # when visit1_metadata = { 'contents': 42, 'directories': 22, } - self.storage.origin_visit_update( + swh_storage.origin_visit_update( origin_id_or_url, origin_visit1['visit'], status='full', metadata=visit1_metadata) expected_origin_visit = origin_visit1.copy() expected_origin_visit.update({ 'origin': origin_id, 'visit': origin_visit1['visit'], - 'date': self.date_visit2, - 'type': self.origin['type'], + 'date': data.date_visit2, + 'type': data.origin['type'], 'metadata': visit1_metadata, 'status': 'full', - 'snapshot': self.snapshot['id'], + 'snapshot': data.snapshot['id'], }) # when - actual_origin_visit1 = self.storage.origin_visit_get_by( + actual_origin_visit1 = swh_storage.origin_visit_get_by( origin_id_or_url, origin_visit1['visit']) # then - self.assertEqual(actual_origin_visit1, expected_origin_visit) + assert actual_origin_visit1 == expected_origin_visit - def test_origin_visit_get_by__unknown_origin(self): + def test_origin_visit_get_by__unknown_origin(self, swh_storage): if self._test_origin_ids: - self.assertIsNone(self.storage.origin_visit_get_by(2, 10)) - self.assertIsNone(self.storage.origin_visit_get_by('foo', 10)) + assert swh_storage.origin_visit_get_by(2, 10) is None + assert swh_storage.origin_visit_get_by('foo', 10) is None - @given(strategies.booleans()) - def test_origin_visit_upsert_new(self, use_url): + @pytest.mark.parametrize('use_url', [True, False]) + def test_origin_visit_upsert_new(self, swh_storage, use_url): if not self._test_origin_ids and not use_url: return - self.reset_storage() - # given - self.assertIsNone(self.storage.origin_get([self.origin2])[0]) - - origin_id = self.storage.origin_add_one(self.origin2) - origin_id_or_url = self.origin2['url'] if use_url else origin_id - self.assertIsNotNone(origin_id) + origin_id = swh_storage.origin_add_one(data.origin2) + origin_url = data.origin2['url'] + assert origin_id is not None # when - self.storage.origin_visit_upsert([ + swh_storage.origin_visit_upsert([ { - 'origin': self.origin2, - 'date': self.date_visit2, + 'origin': data.origin2, + 'date': data.date_visit2, 'visit': 123, - 'type': self.origin2['type'], + 'type': data.origin2['type'], 'status': 'full', 'metadata': None, 'snapshot': None, }, { - 'origin': self.origin2, + 'origin': data.origin2, 'date': '2018-01-01 23:00:00+00', 'visit': 1234, - 'type': self.origin2['type'], + 'type': data.origin2['type'], 'status': 'full', 'metadata': None, 'snapshot': None, }, ]) # then - actual_origin_visits = list(self.storage.origin_visit_get( - origin_id_or_url)) - self.assertEqual(actual_origin_visits, [ + actual_origin_visits = list(swh_storage.origin_visit_get( + origin_url)) + assert actual_origin_visits == [ { 'origin': origin_id, - 'date': self.date_visit2, + 'date': data.date_visit2, 'visit': 123, - 'type': self.origin2['type'], + 'type': data.origin2['type'], 'status': 'full', 'metadata': None, 'snapshot': None, }, { 'origin': origin_id, - 'date': self.date_visit3, + 'date': data.date_visit3, 'visit': 1234, - 'type': self.origin2['type'], + 'type': data.origin2['type'], 'status': 'full', 'metadata': None, 'snapshot': None, }, - ]) + ] - expected_origin = self.origin2.copy() + expected_origin = data.origin2 data1 = { 'origin': expected_origin, - 'date': self.date_visit2, + 'date': data.date_visit2, 'visit': 123, - 'type': self.origin2['type'], + 'type': data.origin2['type'], 'status': 'full', 'metadata': None, 'snapshot': None, } data2 = { 'origin': expected_origin, - 'date': self.date_visit3, + 'date': data.date_visit3, 'visit': 1234, - 'type': self.origin2['type'], + 'type': data.origin2['type'], 'status': 'full', 'metadata': None, 'snapshot': None, } - self.assertEqual(list(self.journal_writer.objects), - [('origin', expected_origin), - ('origin_visit', data1), - ('origin_visit', data2)]) - - @settings(deadline=None) - @given(strategies.booleans()) - def test_origin_visit_upsert_existing(self, use_url): + assert list(swh_storage.journal_writer.objects) == [ + ('origin', expected_origin), + ('origin_visit', data1), + ('origin_visit', data2)] + + @pytest.mark.parametrize('use_url', [True, False]) + def test_origin_visit_upsert_existing(self, swh_storage, use_url): if not self._test_origin_ids and not use_url: return - self.reset_storage() - # given - self.assertIsNone(self.storage.origin_get([self.origin2])[0]) - - origin_id = self.storage.origin_add_one(self.origin2) - origin_id_or_url = self.origin2['url'] if use_url else origin_id - self.assertIsNotNone(origin_id) + origin_id = swh_storage.origin_add_one(data.origin2) + origin_url = data.origin2['url'] + assert origin_id is not None # when - origin_visit1 = self.storage.origin_visit_add( - origin_id_or_url, - date=self.date_visit2) - self.storage.origin_visit_upsert([{ - 'origin': self.origin2, - 'date': self.date_visit2, + origin_visit1 = swh_storage.origin_visit_add( + origin_url, + date=data.date_visit2) + swh_storage.origin_visit_upsert([{ + 'origin': data.origin2, + 'date': data.date_visit2, 'visit': origin_visit1['visit'], - 'type': self.origin2['type'], + 'type': data.origin2['type'], 'status': 'full', 'metadata': None, 'snapshot': None, }]) # then - self.assertEqual(origin_visit1['origin'], origin_id) - self.assertIsNotNone(origin_visit1['visit']) + assert origin_visit1['origin'] == origin_id + assert origin_visit1['visit'] is not None - actual_origin_visits = list(self.storage.origin_visit_get( - origin_id_or_url)) - self.assertEqual(actual_origin_visits, - [{ - 'origin': origin_id, - 'date': self.date_visit2, - 'visit': origin_visit1['visit'], - 'type': self.origin2['type'], - 'status': 'full', - 'metadata': None, - 'snapshot': None, - }]) - - expected_origin = self.origin2.copy() + actual_origin_visits = list(swh_storage.origin_visit_get( + origin_url)) + assert actual_origin_visits == [ + { + 'origin': origin_id, + 'date': data.date_visit2, + 'visit': origin_visit1['visit'], + 'type': data.origin2['type'], + 'status': 'full', + 'metadata': None, + 'snapshot': None, + }] + + expected_origin = data.origin2 data1 = { 'origin': expected_origin, - 'date': self.date_visit2, + 'date': data.date_visit2, 'visit': origin_visit1['visit'], - 'type': self.origin2['type'], + 'type': data.origin2['type'], 'status': 'ongoing', 'metadata': None, 'snapshot': None, } data2 = { 'origin': expected_origin, - 'date': self.date_visit2, + 'date': data.date_visit2, 'visit': origin_visit1['visit'], - 'type': self.origin2['type'], + 'type': data.origin2['type'], 'status': 'full', 'metadata': None, 'snapshot': None, } - self.assertEqual(list(self.journal_writer.objects), - [('origin', expected_origin), - ('origin_visit', data1), - ('origin_visit', data2)]) + assert list(swh_storage.journal_writer.objects) == [ + ('origin', expected_origin), + ('origin_visit', data1), + ('origin_visit', data2)] - def test_origin_visit_get_by_no_result(self): + def test_origin_visit_get_by_no_result(self, swh_storage): if self._test_origin_ids: - actual_origin_visit = self.storage.origin_visit_get_by( + actual_origin_visit = swh_storage.origin_visit_get_by( 10, 999) - self.assertIsNone(actual_origin_visit) + assert actual_origin_visit is None - self.storage.origin_add([self.origin]) - actual_origin_visit = self.storage.origin_visit_get_by( - self.origin['url'], 999) - self.assertIsNone(actual_origin_visit) + swh_storage.origin_add([data.origin]) + actual_origin_visit = swh_storage.origin_visit_get_by( + data.origin['url'], 999) + assert actual_origin_visit is None - @settings(deadline=None) # this test is very slow - @given(strategies.booleans()) - def test_origin_visit_get_latest(self, use_url): + @pytest.mark.parametrize('use_url', [True, False]) + def test_origin_visit_get_latest(self, swh_storage, use_url): if not self._test_origin_ids and not use_url: return - self.reset_storage() - - origin_id = self.storage.origin_add_one(self.origin) - origin_id_or_url = self.origin['url'] if use_url else origin_id - origin_url = self.origin['url'] - origin_visit1 = self.storage.origin_visit_add( - origin_id_or_url, - self.date_visit1) + swh_storage.origin_add_one(data.origin) + origin_url = data.origin['url'] + origin_visit1 = swh_storage.origin_visit_add( + origin_url, + data.date_visit1) visit1_id = origin_visit1['visit'] - origin_visit2 = self.storage.origin_visit_add( - origin_id_or_url, - self.date_visit2) + origin_visit2 = swh_storage.origin_visit_add( + origin_url, + data.date_visit2) visit2_id = origin_visit2['visit'] # Add a visit with the same date as the previous one - origin_visit3 = self.storage.origin_visit_add( - origin_id_or_url, - self.date_visit2) + origin_visit3 = swh_storage.origin_visit_add( + origin_url, + data.date_visit2) visit3_id = origin_visit3['visit'] - origin_visit1 = self.storage.origin_visit_get_by(origin_url, visit1_id) - origin_visit2 = self.storage.origin_visit_get_by(origin_url, visit2_id) - origin_visit3 = self.storage.origin_visit_get_by(origin_url, visit3_id) + origin_visit1 = swh_storage.origin_visit_get_by(origin_url, visit1_id) + origin_visit2 = swh_storage.origin_visit_get_by(origin_url, visit2_id) + origin_visit3 = swh_storage.origin_visit_get_by(origin_url, visit3_id) # Two visits, both with no snapshot - self.assertEqual( - origin_visit3, - self.storage.origin_visit_get_latest(origin_url)) - self.assertIsNone( - self.storage.origin_visit_get_latest(origin_url, - require_snapshot=True)) + assert origin_visit3 == swh_storage.origin_visit_get_latest(origin_url) + assert swh_storage.origin_visit_get_latest( + origin_url, require_snapshot=True) is None # Add snapshot to visit1; require_snapshot=True makes it return # visit1 and require_snapshot=False still returns visit2 - self.storage.snapshot_add([self.complete_snapshot]) - self.storage.origin_visit_update( - origin_id_or_url, visit1_id, - snapshot=self.complete_snapshot['id']) - self.assertEqual( - {**origin_visit1, 'snapshot': self.complete_snapshot['id']}, - self.storage.origin_visit_get_latest( + swh_storage.snapshot_add([data.complete_snapshot]) + swh_storage.origin_visit_update( + origin_url, visit1_id, + snapshot=data.complete_snapshot['id']) + assert {**origin_visit1, 'snapshot': data.complete_snapshot['id']} \ + == swh_storage.origin_visit_get_latest( origin_url, require_snapshot=True) - ) - self.assertEqual( - origin_visit3, - self.storage.origin_visit_get_latest(origin_url) - ) + + assert origin_visit3 == swh_storage.origin_visit_get_latest(origin_url) # Status filter: all three visits are status=ongoing, so no visit # returned - self.assertIsNone( - self.storage.origin_visit_get_latest( - origin_url, allowed_statuses=['full']) - ) + assert swh_storage.origin_visit_get_latest( + origin_url, allowed_statuses=['full']) is None # Mark the first visit as completed and check status filter again - self.storage.origin_visit_update( - origin_id_or_url, + swh_storage.origin_visit_update( + origin_url, visit1_id, status='full') - self.assertEqual( - { - **origin_visit1, - 'snapshot': self.complete_snapshot['id'], - 'status': 'full'}, - self.storage.origin_visit_get_latest( - origin_url, allowed_statuses=['full']), - ) - self.assertEqual( - origin_visit3, - self.storage.origin_visit_get_latest(origin_url), - ) + assert { + **origin_visit1, + 'snapshot': data.complete_snapshot['id'], + 'status': 'full'} == swh_storage.origin_visit_get_latest( + origin_url, allowed_statuses=['full']) + + assert origin_visit3 == swh_storage.origin_visit_get_latest(origin_url) # Add snapshot to visit2 and check that the new snapshot is returned - self.storage.snapshot_add([self.empty_snapshot]) - self.storage.origin_visit_update( - origin_id_or_url, visit2_id, - snapshot=self.empty_snapshot['id']) - self.assertEqual( - {**origin_visit2, 'snapshot': self.empty_snapshot['id']}, - self.storage.origin_visit_get_latest( - origin_url, require_snapshot=True), - ) - self.assertEqual( - origin_visit3, - self.storage.origin_visit_get_latest(origin_url), - ) + swh_storage.snapshot_add([data.empty_snapshot]) + swh_storage.origin_visit_update( + origin_url, visit2_id, + snapshot=data.empty_snapshot['id']) + assert {**origin_visit2, 'snapshot': data.empty_snapshot['id']} == \ + swh_storage.origin_visit_get_latest( + origin_url, require_snapshot=True) + + assert origin_visit3 == swh_storage.origin_visit_get_latest(origin_url) # Check that the status filter is still working - self.assertEqual( - { - **origin_visit1, - 'snapshot': self.complete_snapshot['id'], - 'status': 'full'}, - self.storage.origin_visit_get_latest( - origin_url, allowed_statuses=['full']), - ) + assert { + **origin_visit1, + 'snapshot': data.complete_snapshot['id'], + 'status': 'full'} == swh_storage.origin_visit_get_latest( + origin_url, allowed_statuses=['full']) # Add snapshot to visit3 (same date as visit2) - self.storage.snapshot_add([self.complete_snapshot]) - self.storage.origin_visit_update( - origin_id_or_url, visit3_id, snapshot=self.complete_snapshot['id']) - self.assertEqual( - { - **origin_visit1, - 'snapshot': self.complete_snapshot['id'], - 'status': 'full'}, - self.storage.origin_visit_get_latest( - origin_url, allowed_statuses=['full']), - ) - self.assertEqual( - { - **origin_visit1, - 'snapshot': self.complete_snapshot['id'], - 'status': 'full'}, - self.storage.origin_visit_get_latest( - origin_url, allowed_statuses=['full'], require_snapshot=True), - ) - self.assertEqual( - {**origin_visit3, 'snapshot': self.complete_snapshot['id']}, - self.storage.origin_visit_get_latest( - origin_url), - ) - self.assertEqual( - {**origin_visit3, 'snapshot': self.complete_snapshot['id']}, - self.storage.origin_visit_get_latest( - origin_url, require_snapshot=True), - ) + swh_storage.snapshot_add([data.complete_snapshot]) + swh_storage.origin_visit_update( + origin_url, visit3_id, snapshot=data.complete_snapshot['id']) + assert { + **origin_visit1, + 'snapshot': data.complete_snapshot['id'], + 'status': 'full'} == swh_storage.origin_visit_get_latest( + origin_url, allowed_statuses=['full']) + assert { + **origin_visit1, + 'snapshot': data.complete_snapshot['id'], + 'status': 'full'} == swh_storage.origin_visit_get_latest( + origin_url, allowed_statuses=['full'], require_snapshot=True) + assert { + **origin_visit3, + 'snapshot': data.complete_snapshot['id'] + } == swh_storage.origin_visit_get_latest(origin_url) + + assert { + **origin_visit3, + 'snapshot': data.complete_snapshot['id'] + } == swh_storage.origin_visit_get_latest( + origin_url, require_snapshot=True) - def test_person_fullname_unicity(self): + def test_person_fullname_unicity(self, swh_storage): # given (person injection through revisions for example) - revision = self.revision + revision = data.revision # create a revision with same committer fullname but wo name and email - revision2 = copy.deepcopy(self.revision2) + revision2 = copy.deepcopy(data.revision2) revision2['committer'] = dict(revision['committer']) revision2['committer']['email'] = None revision2['committer']['name'] = None - self.storage.revision_add([revision]) - self.storage.revision_add([revision2]) + swh_storage.revision_add([revision]) + swh_storage.revision_add([revision2]) # when getting added revisions revisions = list( - self.storage.revision_get([revision['id'], revision2['id']])) + swh_storage.revision_get([revision['id'], revision2['id']])) # then # check committers are the same - self.assertEqual(revisions[0]['committer'], - revisions[1]['committer']) + assert revisions[0]['committer'] == revisions[1]['committer'] - def test_snapshot_add_get_empty(self): - origin_id = self.storage.origin_add_one(self.origin) - origin_visit1 = self.storage.origin_visit_add(origin_id, - self.date_visit1) + def test_snapshot_add_get_empty(self, swh_storage): + origin_id = swh_storage.origin_add_one(data.origin) + origin_visit1 = swh_storage.origin_visit_add( + origin_id, data.date_visit1) visit_id = origin_visit1['visit'] - actual_result = self.storage.snapshot_add([self.empty_snapshot]) - self.assertEqual(actual_result, {'snapshot:add': 1}) + actual_result = swh_storage.snapshot_add([data.empty_snapshot]) + assert actual_result == {'snapshot:add': 1} - self.storage.origin_visit_update( - origin_id, visit_id, snapshot=self.empty_snapshot['id']) + swh_storage.origin_visit_update( + origin_id, visit_id, snapshot=data.empty_snapshot['id']) - by_id = self.storage.snapshot_get(self.empty_snapshot['id']) - self.assertEqual(by_id, {**self.empty_snapshot, 'next_branch': None}) + by_id = swh_storage.snapshot_get(data.empty_snapshot['id']) + assert by_id == {**data.empty_snapshot, 'next_branch': None} - by_ov = self.storage.snapshot_get_by_origin_visit(origin_id, visit_id) - self.assertEqual(by_ov, {**self.empty_snapshot, 'next_branch': None}) + by_ov = swh_storage.snapshot_get_by_origin_visit(origin_id, visit_id) + assert by_ov == {**data.empty_snapshot, 'next_branch': None} - expected_origin = self.origin.copy() + expected_origin = data.origin.copy() data1 = { 'origin': expected_origin, - 'date': self.date_visit1, + 'date': data.date_visit1, 'visit': origin_visit1['visit'], - 'type': self.origin['type'], + 'type': data.origin['type'], 'status': 'ongoing', 'metadata': None, 'snapshot': None, } data2 = { 'origin': expected_origin, - 'date': self.date_visit1, + 'date': data.date_visit1, 'visit': origin_visit1['visit'], - 'type': self.origin['type'], + 'type': data.origin['type'], 'status': 'ongoing', 'metadata': None, - 'snapshot': self.empty_snapshot['id'], - } - self.assertEqual(list(self.journal_writer.objects), - [('origin', expected_origin), - ('origin_visit', data1), - ('snapshot', self.empty_snapshot), - ('origin_visit', data2)]) - - def test_snapshot_add_get_complete(self): - origin_id = self.storage.origin_add_one(self.origin) - origin_visit1 = self.storage.origin_visit_add(origin_id, - self.date_visit1) + 'snapshot': data.empty_snapshot['id'], + } + assert list(swh_storage.journal_writer.objects) == \ + [('origin', expected_origin), + ('origin_visit', data1), + ('snapshot', data.empty_snapshot), + ('origin_visit', data2)] + + def test_snapshot_add_get_complete(self, swh_storage): + origin_id = swh_storage.origin_add_one(data.origin) + origin_visit1 = swh_storage.origin_visit_add( + origin_id, data.date_visit1) visit_id = origin_visit1['visit'] - actual_result = self.storage.snapshot_add([self.complete_snapshot]) - self.storage.origin_visit_update( - origin_id, visit_id, snapshot=self.complete_snapshot['id']) - self.assertEqual(actual_result, {'snapshot:add': 1}) + actual_result = swh_storage.snapshot_add([data.complete_snapshot]) + swh_storage.origin_visit_update( + origin_id, visit_id, snapshot=data.complete_snapshot['id']) + assert actual_result == {'snapshot:add': 1} - by_id = self.storage.snapshot_get(self.complete_snapshot['id']) - self.assertEqual(by_id, - {**self.complete_snapshot, 'next_branch': None}) + by_id = swh_storage.snapshot_get(data.complete_snapshot['id']) + assert by_id == {**data.complete_snapshot, 'next_branch': None} - by_ov = self.storage.snapshot_get_by_origin_visit(origin_id, visit_id) - self.assertEqual(by_ov, - {**self.complete_snapshot, 'next_branch': None}) + by_ov = swh_storage.snapshot_get_by_origin_visit(origin_id, visit_id) + assert by_ov == {**data.complete_snapshot, 'next_branch': None} - def test_snapshot_add_many(self): - actual_result = self.storage.snapshot_add( - [self.snapshot, self.complete_snapshot]) - self.assertEqual(actual_result, {'snapshot:add': 2}) + def test_snapshot_add_many(self, swh_storage): + actual_result = swh_storage.snapshot_add( + [data.snapshot, data.complete_snapshot]) + assert actual_result == {'snapshot:add': 2} - self.assertEqual( - {**self.complete_snapshot, 'next_branch': None}, - self.storage.snapshot_get(self.complete_snapshot['id'])) + assert {**data.complete_snapshot, 'next_branch': None} \ + == swh_storage.snapshot_get(data.complete_snapshot['id']) - self.assertEqual( - {**self.snapshot, 'next_branch': None}, - self.storage.snapshot_get(self.snapshot['id'])) + assert {**data.snapshot, 'next_branch': None} \ + == swh_storage.snapshot_get(data.snapshot['id']) - def test_snapshot_add_many_incremental(self): - actual_result = self.storage.snapshot_add([self.complete_snapshot]) - self.assertEqual(actual_result, {'snapshot:add': 1}) + def test_snapshot_add_many_incremental(self, swh_storage): + actual_result = swh_storage.snapshot_add([data.complete_snapshot]) + assert actual_result == {'snapshot:add': 1} - actual_result2 = self.storage.snapshot_add( - [self.snapshot, self.complete_snapshot]) - self.assertEqual(actual_result2, {'snapshot:add': 1}) + actual_result2 = swh_storage.snapshot_add( + [data.snapshot, data.complete_snapshot]) + assert actual_result2 == {'snapshot:add': 1} - self.assertEqual( - {**self.complete_snapshot, 'next_branch': None}, - self.storage.snapshot_get(self.complete_snapshot['id'])) + assert {**data.complete_snapshot, 'next_branch': None} \ + == swh_storage.snapshot_get(data.complete_snapshot['id']) - self.assertEqual( - {**self.snapshot, 'next_branch': None}, - self.storage.snapshot_get(self.snapshot['id'])) + assert {**data.snapshot, 'next_branch': None} \ + == swh_storage.snapshot_get(data.snapshot['id']) - def test_snapshot_add_twice(self): - actual_result = self.storage.snapshot_add([self.empty_snapshot]) - self.assertEqual(actual_result, {'snapshot:add': 1}) + def test_snapshot_add_twice(self, swh_storage): + actual_result = swh_storage.snapshot_add([data.empty_snapshot]) + assert actual_result == {'snapshot:add': 1} - self.assertEqual(list(self.journal_writer.objects), - [('snapshot', self.empty_snapshot)]) + assert list(swh_storage.journal_writer.objects) \ + == [('snapshot', data.empty_snapshot)] - actual_result = self.storage.snapshot_add([self.snapshot]) - self.assertEqual(actual_result, {'snapshot:add': 1}) + actual_result = swh_storage.snapshot_add([data.snapshot]) + assert actual_result == {'snapshot:add': 1} - self.assertEqual(list(self.journal_writer.objects), - [('snapshot', self.empty_snapshot), - ('snapshot', self.snapshot)]) + assert list(swh_storage.journal_writer.objects) \ + == [('snapshot', data.empty_snapshot), + ('snapshot', data.snapshot)] - def test_snapshot_add_validation(self): - snap = copy.deepcopy(self.snapshot) + def test_snapshot_add_validation(self, swh_storage): + snap = copy.deepcopy(data.snapshot) snap['branches'][b'foo'] = {'target_type': 'revision'} - with self.assertRaisesRegex(KeyError, 'target'): - self.storage.snapshot_add([snap]) + with pytest.raises(KeyError, match='target'): + swh_storage.snapshot_add([snap]) - snap = copy.deepcopy(self.snapshot) + snap = copy.deepcopy(data.snapshot) snap['branches'][b'foo'] = {'target': b'\x42'*20} - with self.assertRaisesRegex(KeyError, 'target_type'): - self.storage.snapshot_add([snap]) + with pytest.raises(KeyError, match='target_type'): + swh_storage.snapshot_add([snap]) - def test_snapshot_add_count_branches(self): - origin_id = self.storage.origin_add_one(self.origin) - origin_visit1 = self.storage.origin_visit_add(origin_id, - self.date_visit1) + def test_snapshot_add_count_branches(self, swh_storage): + origin_id = swh_storage.origin_add_one(data.origin) + origin_visit1 = swh_storage.origin_visit_add( + origin_id, data.date_visit1) visit_id = origin_visit1['visit'] - actual_result = self.storage.snapshot_add([self.complete_snapshot]) - self.storage.origin_visit_update( - origin_id, visit_id, snapshot=self.complete_snapshot['id']) - self.assertEqual(actual_result, {'snapshot:add': 1}) + actual_result = swh_storage.snapshot_add([data.complete_snapshot]) + swh_storage.origin_visit_update( + origin_id, visit_id, snapshot=data.complete_snapshot['id']) + assert actual_result == {'snapshot:add': 1} - snp_id = self.complete_snapshot['id'] - snp_size = self.storage.snapshot_count_branches(snp_id) + snp_id = data.complete_snapshot['id'] + snp_size = swh_storage.snapshot_count_branches(snp_id) expected_snp_size = { 'alias': 1, 'content': 1, 'directory': 2, 'release': 1, 'revision': 1, 'snapshot': 1, None: 1 } + assert snp_size == expected_snp_size - self.assertEqual(snp_size, expected_snp_size) - - def test_snapshot_add_get_paginated(self): - origin_id = self.storage.origin_add_one(self.origin) - origin_visit1 = self.storage.origin_visit_add(origin_id, - self.date_visit1) + def test_snapshot_add_get_paginated(self, swh_storage): + origin_id = swh_storage.origin_add_one(data.origin) + origin_visit1 = swh_storage.origin_visit_add( + origin_id, data.date_visit1) visit_id = origin_visit1['visit'] - self.storage.snapshot_add([self.complete_snapshot]) - self.storage.origin_visit_update( + swh_storage.snapshot_add([data.complete_snapshot]) + swh_storage.origin_visit_update( origin_id, visit_id, - snapshot=self.complete_snapshot['id']) + snapshot=data.complete_snapshot['id']) - snp_id = self.complete_snapshot['id'] - branches = self.complete_snapshot['branches'] + snp_id = data.complete_snapshot['id'] + branches = data.complete_snapshot['branches'] branch_names = list(sorted(branches)) # Test branch_from - - snapshot = self.storage.snapshot_get_branches(snp_id, - branches_from=b'release') + snapshot = swh_storage.snapshot_get_branches( + snp_id, branches_from=b'release') rel_idx = branch_names.index(b'release') expected_snapshot = { 'id': snp_id, 'branches': { name: branches[name] for name in branch_names[rel_idx:] }, 'next_branch': None, } - self.assertEqual(snapshot, expected_snapshot) + assert snapshot == expected_snapshot # Test branches_count - - snapshot = self.storage.snapshot_get_branches(snp_id, - branches_count=1) + snapshot = swh_storage.snapshot_get_branches( + snp_id, branches_count=1) expected_snapshot = { 'id': snp_id, 'branches': { branch_names[0]: branches[branch_names[0]], }, 'next_branch': b'content', } - self.assertEqual(snapshot, expected_snapshot) + assert snapshot == expected_snapshot # test branch_from + branches_count - snapshot = self.storage.snapshot_get_branches( + snapshot = swh_storage.snapshot_get_branches( snp_id, branches_from=b'directory', branches_count=3) dir_idx = branch_names.index(b'directory') expected_snapshot = { 'id': snp_id, 'branches': { name: branches[name] for name in branch_names[dir_idx:dir_idx + 3] }, 'next_branch': branch_names[dir_idx + 3], } - self.assertEqual(snapshot, expected_snapshot) + assert snapshot == expected_snapshot - def test_snapshot_add_get_filtered(self): - origin_id = self.storage.origin_add_one(self.origin) - origin_visit1 = self.storage.origin_visit_add(origin_id, - self.date_visit1) + def test_snapshot_add_get_filtered(self, swh_storage): + origin_id = swh_storage.origin_add_one(data.origin) + origin_visit1 = swh_storage.origin_visit_add( + origin_id, data.date_visit1) visit_id = origin_visit1['visit'] - self.storage.snapshot_add([self.complete_snapshot]) - self.storage.origin_visit_update( - origin_id, visit_id, snapshot=self.complete_snapshot['id']) + swh_storage.snapshot_add([data.complete_snapshot]) + swh_storage.origin_visit_update( + origin_id, visit_id, snapshot=data.complete_snapshot['id']) - snp_id = self.complete_snapshot['id'] - branches = self.complete_snapshot['branches'] + snp_id = data.complete_snapshot['id'] + branches = data.complete_snapshot['branches'] - snapshot = self.storage.snapshot_get_branches( + snapshot = swh_storage.snapshot_get_branches( snp_id, target_types=['release', 'revision']) expected_snapshot = { 'id': snp_id, 'branches': { name: tgt for name, tgt in branches.items() if tgt and tgt['target_type'] in ['release', 'revision'] }, 'next_branch': None, } - self.assertEqual(snapshot, expected_snapshot) + assert snapshot == expected_snapshot - snapshot = self.storage.snapshot_get_branches(snp_id, - target_types=['alias']) + snapshot = swh_storage.snapshot_get_branches( + snp_id, target_types=['alias']) expected_snapshot = { 'id': snp_id, 'branches': { name: tgt for name, tgt in branches.items() if tgt and tgt['target_type'] == 'alias' }, 'next_branch': None, } - self.assertEqual(snapshot, expected_snapshot) + assert snapshot == expected_snapshot - def test_snapshot_add_get_filtered_and_paginated(self): - origin_id = self.storage.origin_add_one(self.origin) - origin_visit1 = self.storage.origin_visit_add(origin_id, - self.date_visit1) + def test_snapshot_add_get_filtered_and_paginated(self, swh_storage): + origin_id = swh_storage.origin_add_one(data.origin) + origin_visit1 = swh_storage.origin_visit_add( + origin_id, data.date_visit1) visit_id = origin_visit1['visit'] - self.storage.snapshot_add([self.complete_snapshot]) - self.storage.origin_visit_update( - origin_id, visit_id, snapshot=self.complete_snapshot['id']) + swh_storage.snapshot_add([data.complete_snapshot]) + swh_storage.origin_visit_update( + origin_id, visit_id, snapshot=data.complete_snapshot['id']) - snp_id = self.complete_snapshot['id'] - branches = self.complete_snapshot['branches'] + snp_id = data.complete_snapshot['id'] + branches = data.complete_snapshot['branches'] branch_names = list(sorted(branches)) # Test branch_from - snapshot = self.storage.snapshot_get_branches( + snapshot = swh_storage.snapshot_get_branches( snp_id, target_types=['directory', 'release'], branches_from=b'directory2') expected_snapshot = { 'id': snp_id, 'branches': { name: branches[name] for name in (b'directory2', b'release') }, 'next_branch': None, } - self.assertEqual(snapshot, expected_snapshot) + assert snapshot == expected_snapshot # Test branches_count - snapshot = self.storage.snapshot_get_branches( + snapshot = swh_storage.snapshot_get_branches( snp_id, target_types=['directory', 'release'], branches_count=1) expected_snapshot = { 'id': snp_id, 'branches': { b'directory': branches[b'directory'] }, 'next_branch': b'directory2', } - self.assertEqual(snapshot, expected_snapshot) + assert snapshot == expected_snapshot # Test branches_count - snapshot = self.storage.snapshot_get_branches( + snapshot = swh_storage.snapshot_get_branches( snp_id, target_types=['directory', 'release'], branches_count=2) expected_snapshot = { 'id': snp_id, 'branches': { name: branches[name] for name in (b'directory', b'directory2') }, 'next_branch': b'release', } - self.assertEqual(snapshot, expected_snapshot) + assert snapshot == expected_snapshot # test branch_from + branches_count - snapshot = self.storage.snapshot_get_branches( + snapshot = swh_storage.snapshot_get_branches( snp_id, target_types=['directory', 'release'], branches_from=b'directory2', branches_count=1) dir_idx = branch_names.index(b'directory2') expected_snapshot = { 'id': snp_id, 'branches': { branch_names[dir_idx]: branches[branch_names[dir_idx]], }, 'next_branch': b'release', } - self.assertEqual(snapshot, expected_snapshot) + assert snapshot == expected_snapshot - def test_snapshot_add_get(self): - origin_id = self.storage.origin_add_one(self.origin) - origin_visit1 = self.storage.origin_visit_add(origin_id, - self.date_visit1) + def test_snapshot_add_get(self, swh_storage): + origin_id = swh_storage.origin_add_one(data.origin) + origin_visit1 = swh_storage.origin_visit_add( + origin_id, data.date_visit1) visit_id = origin_visit1['visit'] - self.storage.snapshot_add([self.snapshot]) - self.storage.origin_visit_update( - origin_id, visit_id, snapshot=self.snapshot['id']) + swh_storage.snapshot_add([data.snapshot]) + swh_storage.origin_visit_update( + origin_id, visit_id, snapshot=data.snapshot['id']) - by_id = self.storage.snapshot_get(self.snapshot['id']) - self.assertEqual(by_id, {**self.snapshot, 'next_branch': None}) + by_id = swh_storage.snapshot_get(data.snapshot['id']) + assert by_id == {**data.snapshot, 'next_branch': None} - by_ov = self.storage.snapshot_get_by_origin_visit(origin_id, visit_id) - self.assertEqual(by_ov, {**self.snapshot, 'next_branch': None}) + by_ov = swh_storage.snapshot_get_by_origin_visit(origin_id, visit_id) + assert by_ov == {**data.snapshot, 'next_branch': None} - origin_visit_info = self.storage.origin_visit_get_by(origin_id, - visit_id) - self.assertEqual(origin_visit_info['snapshot'], self.snapshot['id']) + origin_visit_info = swh_storage.origin_visit_get_by( + origin_id, visit_id) + assert origin_visit_info['snapshot'] == data.snapshot['id'] - def test_snapshot_add_nonexistent_visit(self): - origin_id = self.storage.origin_add_one(self.origin) + def test_snapshot_add_nonexistent_visit(self, swh_storage): + origin_id = swh_storage.origin_add_one(data.origin) visit_id = 54164461156 - self.journal_writer.objects[:] = [] + swh_storage.journal_writer.objects[:] = [] - self.storage.snapshot_add([self.snapshot]) + swh_storage.snapshot_add([data.snapshot]) - with self.assertRaises(ValueError): - self.storage.origin_visit_update( - origin_id, visit_id, snapshot=self.snapshot['id']) + with pytest.raises(ValueError): + swh_storage.origin_visit_update( + origin_id, visit_id, snapshot=data.snapshot['id']) - self.assertEqual(list(self.journal_writer.objects), [ - ('snapshot', self.snapshot)]) + assert list(swh_storage.journal_writer.objects) == [ + ('snapshot', data.snapshot)] - def test_snapshot_add_twice__by_origin_visit(self): - origin_id = self.storage.origin_add_one(self.origin) - origin_visit1 = self.storage.origin_visit_add(origin_id, - self.date_visit1) + def test_snapshot_add_twice__by_origin_visit(self, swh_storage): + origin_id = swh_storage.origin_add_one(data.origin) + origin_visit1 = swh_storage.origin_visit_add( + origin_id, data.date_visit1) visit1_id = origin_visit1['visit'] - self.storage.snapshot_add([self.snapshot]) - self.storage.origin_visit_update( - origin_id, visit1_id, snapshot=self.snapshot['id']) + swh_storage.snapshot_add([data.snapshot]) + swh_storage.origin_visit_update( + origin_id, visit1_id, snapshot=data.snapshot['id']) - by_ov1 = self.storage.snapshot_get_by_origin_visit(origin_id, - visit1_id) - self.assertEqual(by_ov1, {**self.snapshot, 'next_branch': None}) + by_ov1 = swh_storage.snapshot_get_by_origin_visit( + origin_id, visit1_id) + assert by_ov1 == {**data.snapshot, 'next_branch': None} - origin_visit2 = self.storage.origin_visit_add(origin_id, - self.date_visit2) + origin_visit2 = swh_storage.origin_visit_add( + origin_id, data.date_visit2) visit2_id = origin_visit2['visit'] - self.storage.snapshot_add([self.snapshot]) - self.storage.origin_visit_update( - origin_id, visit2_id, snapshot=self.snapshot['id']) + swh_storage.snapshot_add([data.snapshot]) + swh_storage.origin_visit_update( + origin_id, visit2_id, snapshot=data.snapshot['id']) - by_ov2 = self.storage.snapshot_get_by_origin_visit(origin_id, - visit2_id) - self.assertEqual(by_ov2, {**self.snapshot, 'next_branch': None}) + by_ov2 = swh_storage.snapshot_get_by_origin_visit( + origin_id, visit2_id) + assert by_ov2 == {**data.snapshot, 'next_branch': None} - expected_origin = self.origin.copy() + expected_origin = data.origin.copy() data1 = { 'origin': expected_origin, - 'date': self.date_visit1, + 'date': data.date_visit1, 'visit': origin_visit1['visit'], - 'type': self.origin['type'], + 'type': data.origin['type'], 'status': 'ongoing', 'metadata': None, 'snapshot': None, } data2 = { 'origin': expected_origin, - 'date': self.date_visit1, + 'date': data.date_visit1, 'visit': origin_visit1['visit'], - 'type': self.origin['type'], + 'type': data.origin['type'], 'status': 'ongoing', 'metadata': None, - 'snapshot': self.snapshot['id'], + 'snapshot': data.snapshot['id'], } data3 = { 'origin': expected_origin, - 'date': self.date_visit2, + 'date': data.date_visit2, 'visit': origin_visit2['visit'], - 'type': self.origin['type'], + 'type': data.origin['type'], 'status': 'ongoing', 'metadata': None, 'snapshot': None, } data4 = { 'origin': expected_origin, - 'date': self.date_visit2, + 'date': data.date_visit2, 'visit': origin_visit2['visit'], - 'type': self.origin['type'], + 'type': data.origin['type'], 'status': 'ongoing', 'metadata': None, - 'snapshot': self.snapshot['id'], - } - self.assertEqual(list(self.journal_writer.objects), - [('origin', expected_origin), - ('origin_visit', data1), - ('snapshot', self.snapshot), - ('origin_visit', data2), - ('origin_visit', data3), - ('origin_visit', data4)]) - - @settings(deadline=None) # this test is very slow - @given(strategies.booleans()) - def test_snapshot_get_latest(self, use_url): + 'snapshot': data.snapshot['id'], + } + assert list(swh_storage.journal_writer.objects) \ + == [('origin', expected_origin), + ('origin_visit', data1), + ('snapshot', data.snapshot), + ('origin_visit', data2), + ('origin_visit', data3), + ('origin_visit', data4)] + + @pytest.mark.parametrize('use_url', [True, False]) + def test_snapshot_get_latest(self, swh_storage, use_url): if not self._test_origin_ids and not use_url: return - self.reset_storage() - - origin_id = self.storage.origin_add_one(self.origin) - origin_id_or_url = self.origin['url'] if use_url else origin_id - origin_visit1 = self.storage.origin_visit_add(origin_id, - self.date_visit1) + origin_id = swh_storage.origin_add_one(data.origin) + origin_url = data.origin['url'] + origin_visit1 = swh_storage.origin_visit_add( + origin_id, data.date_visit1) visit1_id = origin_visit1['visit'] - origin_visit2 = self.storage.origin_visit_add(origin_id, - self.date_visit2) + origin_visit2 = swh_storage.origin_visit_add( + origin_id, data.date_visit2) visit2_id = origin_visit2['visit'] # Add a visit with the same date as the previous one - origin_visit3 = self.storage.origin_visit_add(origin_id, - self.date_visit2) + origin_visit3 = swh_storage.origin_visit_add( + origin_id, data.date_visit2) visit3_id = origin_visit3['visit'] # Two visits, both with no snapshot: latest snapshot is None - self.assertIsNone(self.storage.snapshot_get_latest( - origin_id_or_url)) + assert swh_storage.snapshot_get_latest(origin_url) is None # Add snapshot to visit1, latest snapshot = visit 1 snapshot - self.storage.snapshot_add([self.complete_snapshot]) - self.storage.origin_visit_update( - origin_id, visit1_id, snapshot=self.complete_snapshot['id']) - self.assertEqual({**self.complete_snapshot, 'next_branch': None}, - self.storage.snapshot_get_latest( - origin_id_or_url)) + swh_storage.snapshot_add([data.complete_snapshot]) + swh_storage.origin_visit_update( + origin_id, visit1_id, snapshot=data.complete_snapshot['id']) + assert {**data.complete_snapshot, 'next_branch': None} \ + == swh_storage.snapshot_get_latest(origin_url) # Status filter: all three visits are status=ongoing, so no snapshot # returned - self.assertIsNone( - self.storage.snapshot_get_latest( - origin_id_or_url, - allowed_statuses=['full']) - ) + assert swh_storage.snapshot_get_latest( + origin_url, + allowed_statuses=['full']) is None # Mark the first visit as completed and check status filter again - self.storage.origin_visit_update(origin_id, visit1_id, status='full') - self.assertEqual( - {**self.complete_snapshot, 'next_branch': None}, - self.storage.snapshot_get_latest( - origin_id_or_url, - allowed_statuses=['full']), - ) + swh_storage.origin_visit_update(origin_id, visit1_id, status='full') + assert {**data.complete_snapshot, 'next_branch': None} \ + == swh_storage.snapshot_get_latest( + origin_url, + allowed_statuses=['full']) # Add snapshot to visit2 and check that the new snapshot is returned - self.storage.snapshot_add([self.empty_snapshot]) - self.storage.origin_visit_update( - origin_id, visit2_id, snapshot=self.empty_snapshot['id']) - self.assertEqual({**self.empty_snapshot, 'next_branch': None}, - self.storage.snapshot_get_latest(origin_id)) + swh_storage.snapshot_add([data.empty_snapshot]) + swh_storage.origin_visit_update( + origin_id, visit2_id, snapshot=data.empty_snapshot['id']) + assert {**data.empty_snapshot, 'next_branch': None} \ + == swh_storage.snapshot_get_latest(origin_id) # Check that the status filter is still working - self.assertEqual( - {**self.complete_snapshot, 'next_branch': None}, - self.storage.snapshot_get_latest( - origin_id_or_url, - allowed_statuses=['full']), - ) + assert {**data.complete_snapshot, 'next_branch': None} \ + == swh_storage.snapshot_get_latest( + origin_url, + allowed_statuses=['full']) # Add snapshot to visit3 (same date as visit2) and check that # the new snapshot is returned - self.storage.snapshot_add([self.complete_snapshot]) - self.storage.origin_visit_update( - origin_id, visit3_id, snapshot=self.complete_snapshot['id']) - self.assertEqual({**self.complete_snapshot, 'next_branch': None}, - self.storage.snapshot_get_latest( - origin_id_or_url)) - - @given(strategies.booleans()) - def test_snapshot_get_latest__missing_snapshot(self, use_url): + swh_storage.snapshot_add([data.complete_snapshot]) + swh_storage.origin_visit_update( + origin_id, visit3_id, snapshot=data.complete_snapshot['id']) + assert {**data.complete_snapshot, 'next_branch': None} \ + == swh_storage.snapshot_get_latest(origin_url) + + @pytest.mark.parametrize('use_url', [True, False]) + def test_snapshot_get_latest__missing_snapshot(self, swh_storage, use_url): if not self._test_origin_ids and not use_url: return - self.reset_storage() - # Origin does not exist - self.assertIsNone(self.storage.snapshot_get_latest( - self.origin['url'] if use_url else 999)) + origin_url = data.origin['url'] + assert swh_storage.snapshot_get_latest(origin_url) is None - origin_id = self.storage.origin_add_one(self.origin) - origin_id_or_url = self.origin['url'] if use_url else origin_id - origin_visit1 = self.storage.origin_visit_add( - origin_id_or_url, - self.date_visit1) + swh_storage.origin_add_one(data.origin) + origin_visit1 = swh_storage.origin_visit_add( + origin_url, + data.date_visit1) visit1_id = origin_visit1['visit'] - origin_visit2 = self.storage.origin_visit_add( - origin_id_or_url, - self.date_visit2) + origin_visit2 = swh_storage.origin_visit_add( + origin_url, + data.date_visit2) visit2_id = origin_visit2['visit'] # Two visits, both with no snapshot: latest snapshot is None - self.assertIsNone(self.storage.snapshot_get_latest( - origin_id_or_url)) + assert swh_storage.snapshot_get_latest(origin_url) is None # Add unknown snapshot to visit1, check that the inconsistency is # detected - self.storage.origin_visit_update( - origin_id_or_url, - visit1_id, snapshot=self.complete_snapshot['id']) - with self.assertRaises(ValueError): - self.storage.snapshot_get_latest( - origin_id_or_url) + swh_storage.origin_visit_update( + origin_url, + visit1_id, snapshot=data.complete_snapshot['id']) + with pytest.raises(ValueError): + swh_storage.snapshot_get_latest( + origin_url) # Status filter: both visits are status=ongoing, so no snapshot # returned - self.assertIsNone( - self.storage.snapshot_get_latest( - origin_id_or_url, - allowed_statuses=['full']) - ) + assert swh_storage.snapshot_get_latest( + origin_url, + allowed_statuses=['full']) is None # Mark the first visit as completed and check status filter again - self.storage.origin_visit_update( - origin_id_or_url, + swh_storage.origin_visit_update( + origin_url, visit1_id, status='full') - with self.assertRaises(ValueError): - self.storage.snapshot_get_latest( - origin_id_or_url, + with pytest.raises(ValueError): + swh_storage.snapshot_get_latest( + origin_url, allowed_statuses=['full']), # Actually add the snapshot and check status filter again - self.storage.snapshot_add([self.complete_snapshot]) - self.assertEqual( - {**self.complete_snapshot, 'next_branch': None}, - self.storage.snapshot_get_latest( - origin_id_or_url) - ) + swh_storage.snapshot_add([data.complete_snapshot]) + assert {**data.complete_snapshot, 'next_branch': None} \ + == swh_storage.snapshot_get_latest(origin_url) # Add unknown snapshot to visit2 and check that the inconsistency # is detected - self.storage.origin_visit_update( - origin_id_or_url, - visit2_id, snapshot=self.snapshot['id']) - with self.assertRaises(ValueError): - self.storage.snapshot_get_latest( - origin_id_or_url) + swh_storage.origin_visit_update( + origin_url, + visit2_id, snapshot=data.snapshot['id']) + with pytest.raises(ValueError): + swh_storage.snapshot_get_latest( + origin_url) # Actually add that snapshot and check that the new one is returned - self.storage.snapshot_add([self.snapshot]) - self.assertEqual( - {**self.snapshot, 'next_branch': None}, - self.storage.snapshot_get_latest( - origin_id_or_url) - ) + swh_storage.snapshot_add([data.snapshot]) + assert{**data.snapshot, 'next_branch': None} \ + == swh_storage.snapshot_get_latest(origin_url) - def test_stat_counters(self): + def test_stat_counters(self, swh_storage): expected_keys = ['content', 'directory', 'origin', 'revision'] # Initially, all counters are 0 - self.storage.refresh_stat_counters() - counters = self.storage.stat_counters() - self.assertTrue(set(expected_keys) <= set(counters)) + swh_storage.refresh_stat_counters() + counters = swh_storage.stat_counters() + assert set(expected_keys) <= set(counters) for key in expected_keys: - self.assertEqual(counters[key], 0) + assert counters[key] == 0 # Add a content. Only the content counter should increase. - self.storage.content_add([self.cont]) + swh_storage.content_add([data.cont]) - self.storage.refresh_stat_counters() - counters = self.storage.stat_counters() + swh_storage.refresh_stat_counters() + counters = swh_storage.stat_counters() - self.assertTrue(set(expected_keys) <= set(counters)) + assert set(expected_keys) <= set(counters) for key in expected_keys: if key != 'content': - self.assertEqual(counters[key], 0) - self.assertEqual(counters['content'], 1) + assert counters[key] == 0 + assert counters['content'] == 1 # Add other objects. Check their counter increased as well. - self.storage.origin_add_one(self.origin2) - origin_visit1 = self.storage.origin_visit_add( - self.origin2['url'], date=self.date_visit2) - self.storage.snapshot_add([self.snapshot]) - self.storage.origin_visit_update( - self.origin2['url'], origin_visit1['visit'], - snapshot=self.snapshot['id']) - self.storage.directory_add([self.dir]) - self.storage.revision_add([self.revision]) - self.storage.release_add([self.release]) - - self.storage.refresh_stat_counters() - counters = self.storage.stat_counters() - self.assertEqual(counters['content'], 1) - self.assertEqual(counters['directory'], 1) - self.assertEqual(counters['snapshot'], 1) - self.assertEqual(counters['origin'], 1) - self.assertEqual(counters['origin_visit'], 1) - self.assertEqual(counters['revision'], 1) - self.assertEqual(counters['release'], 1) - self.assertEqual(counters['snapshot'], 1) + swh_storage.origin_add_one(data.origin2) + origin_visit1 = swh_storage.origin_visit_add( + data.origin2['url'], date=data.date_visit2) + swh_storage.snapshot_add([data.snapshot]) + swh_storage.origin_visit_update( + data.origin2['url'], origin_visit1['visit'], + snapshot=data.snapshot['id']) + swh_storage.directory_add([data.dir]) + swh_storage.revision_add([data.revision]) + swh_storage.release_add([data.release]) + + swh_storage.refresh_stat_counters() + counters = swh_storage.stat_counters() + assert counters['content'] == 1 + assert counters['directory'] == 1 + assert counters['snapshot'] == 1 + assert counters['origin'] == 1 + assert counters['origin_visit'] == 1 + assert counters['revision'] == 1 + assert counters['release'] == 1 + assert counters['snapshot'] == 1 if 'person' in counters: - self.assertEqual(counters['person'], 3) + assert counters['person'] == 3 - def test_content_find_ctime(self): - cont = self.cont.copy() + def test_content_find_ctime(self, swh_storage): + cont = data.cont.copy() del cont['data'] now = datetime.datetime.now(tz=datetime.timezone.utc) cont['ctime'] = now - self.storage.content_add_metadata([cont]) + swh_storage.content_add_metadata([cont]) - actually_present = self.storage.content_find({'sha1': cont['sha1']}) + actually_present = swh_storage.content_find({'sha1': cont['sha1']}) # check ctime up to one second dt = actually_present[0]['ctime'] - now - self.assertLessEqual(abs(dt.total_seconds()), 1, dt) + assert abs(dt.total_seconds()) <= 1 del actually_present[0]['ctime'] - self.assertEqual(actually_present[0], { + assert actually_present[0] == { 'sha1': cont['sha1'], 'sha256': cont['sha256'], 'sha1_git': cont['sha1_git'], 'blake2s256': cont['blake2s256'], 'length': cont['length'], 'status': 'visible' - }) + } - def test_content_find_with_present_content(self): + def test_content_find_with_present_content(self, swh_storage): # 1. with something to find - cont = self.cont - self.storage.content_add([cont, self.cont2]) + cont = data.cont + swh_storage.content_add([cont, data.cont2]) - actually_present = self.storage.content_find( + actually_present = swh_storage.content_find( {'sha1': cont['sha1']} ) - self.assertEqual(1, len(actually_present)) + assert 1 == len(actually_present) actually_present[0].pop('ctime') - self.assertEqual(actually_present[0], { + assert actually_present[0] == { 'sha1': cont['sha1'], 'sha256': cont['sha256'], 'sha1_git': cont['sha1_git'], 'blake2s256': cont['blake2s256'], 'length': cont['length'], 'status': 'visible' - }) + } # 2. with something to find - actually_present = self.storage.content_find( + actually_present = swh_storage.content_find( {'sha1_git': cont['sha1_git']}) - self.assertEqual(1, len(actually_present)) + assert 1 == len(actually_present) actually_present[0].pop('ctime') - self.assertEqual(actually_present[0], { + assert actually_present[0] == { 'sha1': cont['sha1'], 'sha256': cont['sha256'], 'sha1_git': cont['sha1_git'], 'blake2s256': cont['blake2s256'], 'length': cont['length'], 'status': 'visible' - }) + } # 3. with something to find - actually_present = self.storage.content_find( + actually_present = swh_storage.content_find( {'sha256': cont['sha256']}) - self.assertEqual(1, len(actually_present)) + assert 1 == len(actually_present) actually_present[0].pop('ctime') - self.assertEqual(actually_present[0], { + assert actually_present[0] == { 'sha1': cont['sha1'], 'sha256': cont['sha256'], 'sha1_git': cont['sha1_git'], 'blake2s256': cont['blake2s256'], 'length': cont['length'], 'status': 'visible' - }) + } # 4. with something to find - actually_present = self.storage.content_find({ + actually_present = swh_storage.content_find({ 'sha1': cont['sha1'], 'sha1_git': cont['sha1_git'], 'sha256': cont['sha256'], 'blake2s256': cont['blake2s256'], }) - self.assertEqual(1, len(actually_present)) + assert 1 == len(actually_present) actually_present[0].pop('ctime') - self.assertEqual(actually_present[0], { + assert actually_present[0] == { 'sha1': cont['sha1'], 'sha256': cont['sha256'], 'sha1_git': cont['sha1_git'], 'blake2s256': cont['blake2s256'], 'length': cont['length'], 'status': 'visible' - }) + } - def test_content_find_with_non_present_content(self): + def test_content_find_with_non_present_content(self, swh_storage): # 1. with something that does not exist - missing_cont = self.missing_cont + missing_cont = data.missing_cont - actually_present = self.storage.content_find( + actually_present = swh_storage.content_find( {'sha1': missing_cont['sha1']}) - self.assertEqual(actually_present, []) + assert actually_present == [] # 2. with something that does not exist - actually_present = self.storage.content_find( + actually_present = swh_storage.content_find( {'sha1_git': missing_cont['sha1_git']}) - self.assertEqual(actually_present, []) + assert actually_present == [] # 3. with something that does not exist - actually_present = self.storage.content_find( + actually_present = swh_storage.content_find( {'sha256': missing_cont['sha256']}) - self.assertEqual(actually_present, []) + assert actually_present == [] - def test_content_find_with_duplicate_input(self): - cont1 = self.cont + def test_content_find_with_duplicate_input(self, swh_storage): + cont1 = data.cont duplicate_cont = cont1.copy() # Create fake data with colliding sha256 and blake2s256 sha1_array = bytearray(duplicate_cont['sha1']) sha1_array[0] += 1 duplicate_cont['sha1'] = bytes(sha1_array) sha1git_array = bytearray(duplicate_cont['sha1_git']) sha1git_array[0] += 1 duplicate_cont['sha1_git'] = bytes(sha1git_array) # Inject the data - self.storage.content_add([cont1, duplicate_cont]) + swh_storage.content_add([cont1, duplicate_cont]) finder = {'blake2s256': duplicate_cont['blake2s256'], 'sha256': duplicate_cont['sha256']} - actual_result = list(self.storage.content_find(finder)) + actual_result = list(swh_storage.content_find(finder)) cont1.pop('data') duplicate_cont.pop('data') actual_result[0].pop('ctime') actual_result[1].pop('ctime') expected_result = [ cont1, duplicate_cont ] - self.assertCountEqual(expected_result, actual_result) + for result in expected_result: + assert result in actual_result - def test_content_find_with_duplicate_sha256(self): - cont1 = self.cont + def test_content_find_with_duplicate_sha256(self, swh_storage): + cont1 = data.cont duplicate_cont = cont1.copy() - # Create fake data with colliding sha256 and blake2s256 - sha1_array = bytearray(duplicate_cont['sha1']) - sha1_array[0] += 1 - duplicate_cont['sha1'] = bytes(sha1_array) - sha1git_array = bytearray(duplicate_cont['sha1_git']) - sha1git_array[0] += 1 - duplicate_cont['sha1_git'] = bytes(sha1git_array) - blake2s256_array = bytearray(duplicate_cont['blake2s256']) - blake2s256_array[0] += 1 - duplicate_cont['blake2s256'] = bytes(blake2s256_array) - self.storage.content_add([cont1, duplicate_cont]) + # Create fake data with colliding sha256 + for hashalgo in ('sha1', 'sha1_git', 'blake2s256'): + value = bytearray(duplicate_cont[hashalgo]) + value[0] += 1 + duplicate_cont[hashalgo] = bytes(value) + swh_storage.content_add([cont1, duplicate_cont]) + finder = { 'sha256': duplicate_cont['sha256'] } - actual_result = list(self.storage.content_find(finder)) + actual_result = list(swh_storage.content_find(finder)) + assert len(actual_result) == 2 cont1.pop('data') duplicate_cont.pop('data') actual_result[0].pop('ctime') actual_result[1].pop('ctime') expected_result = [ cont1, duplicate_cont ] - self.assertCountEqual(expected_result, actual_result) + assert expected_result == sorted(actual_result, + key=lambda x: x['sha1']) + # Find with both sha256 and blake2s256 finder = { 'sha256': duplicate_cont['sha256'], 'blake2s256': duplicate_cont['blake2s256'] } - actual_result = list(self.storage.content_find(finder)) - + actual_result = list(swh_storage.content_find(finder)) + assert len(actual_result) == 1 actual_result[0].pop('ctime') - expected_result = [ - duplicate_cont - ] - self.assertCountEqual(expected_result, actual_result) + expected_result = [duplicate_cont] + assert actual_result[0] == duplicate_cont - def test_content_find_with_duplicate_blake2s256(self): - cont1 = self.cont + def test_content_find_with_duplicate_blake2s256(self, swh_storage): + cont1 = data.cont duplicate_cont = cont1.copy() # Create fake data with colliding sha256 and blake2s256 sha1_array = bytearray(duplicate_cont['sha1']) sha1_array[0] += 1 duplicate_cont['sha1'] = bytes(sha1_array) sha1git_array = bytearray(duplicate_cont['sha1_git']) sha1git_array[0] += 1 duplicate_cont['sha1_git'] = bytes(sha1git_array) sha256_array = bytearray(duplicate_cont['sha256']) sha256_array[0] += 1 duplicate_cont['sha256'] = bytes(sha256_array) - self.storage.content_add([cont1, duplicate_cont]) + swh_storage.content_add([cont1, duplicate_cont]) finder = { 'blake2s256': duplicate_cont['blake2s256'] } - actual_result = list(self.storage.content_find(finder)) + actual_result = list(swh_storage.content_find(finder)) cont1.pop('data') duplicate_cont.pop('data') actual_result[0].pop('ctime') actual_result[1].pop('ctime') expected_result = [ cont1, duplicate_cont ] - self.assertCountEqual(expected_result, actual_result) + for result in expected_result: + assert result in actual_result + # Find with both sha256 and blake2s256 finder = { 'sha256': duplicate_cont['sha256'], 'blake2s256': duplicate_cont['blake2s256'] } - actual_result = list(self.storage.content_find(finder)) + actual_result = list(swh_storage.content_find(finder)) actual_result[0].pop('ctime') expected_result = [ duplicate_cont ] - self.assertCountEqual(expected_result, actual_result) + assert expected_result == actual_result - def test_content_find_bad_input(self): + def test_content_find_bad_input(self, swh_storage): # 1. with bad input - with self.assertRaises(ValueError): - self.storage.content_find({}) # empty is bad + with pytest.raises(ValueError): + swh_storage.content_find({}) # empty is bad # 2. with bad input - with self.assertRaises(ValueError): - self.storage.content_find( + with pytest.raises(ValueError): + swh_storage.content_find( {'unknown-sha1': 'something'}) # not the right key - def test_object_find_by_sha1_git(self): + def test_object_find_by_sha1_git(self, swh_storage): sha1_gits = [b'00000000000000000000'] expected = { b'00000000000000000000': [], } - self.storage.content_add([self.cont]) - sha1_gits.append(self.cont['sha1_git']) - expected[self.cont['sha1_git']] = [{ - 'sha1_git': self.cont['sha1_git'], + swh_storage.content_add([data.cont]) + sha1_gits.append(data.cont['sha1_git']) + expected[data.cont['sha1_git']] = [{ + 'sha1_git': data.cont['sha1_git'], 'type': 'content', - 'id': self.cont['sha1'], + 'id': data.cont['sha1'], }] - self.storage.directory_add([self.dir]) - sha1_gits.append(self.dir['id']) - expected[self.dir['id']] = [{ - 'sha1_git': self.dir['id'], + swh_storage.directory_add([data.dir]) + sha1_gits.append(data.dir['id']) + expected[data.dir['id']] = [{ + 'sha1_git': data.dir['id'], 'type': 'directory', - 'id': self.dir['id'], + 'id': data.dir['id'], }] - self.storage.revision_add([self.revision]) - sha1_gits.append(self.revision['id']) - expected[self.revision['id']] = [{ - 'sha1_git': self.revision['id'], + swh_storage.revision_add([data.revision]) + sha1_gits.append(data.revision['id']) + expected[data.revision['id']] = [{ + 'sha1_git': data.revision['id'], 'type': 'revision', - 'id': self.revision['id'], + 'id': data.revision['id'], }] - self.storage.release_add([self.release]) - sha1_gits.append(self.release['id']) - expected[self.release['id']] = [{ - 'sha1_git': self.release['id'], + swh_storage.release_add([data.release]) + sha1_gits.append(data.release['id']) + expected[data.release['id']] = [{ + 'sha1_git': data.release['id'], 'type': 'release', - 'id': self.release['id'], + 'id': data.release['id'], }] - ret = self.storage.object_find_by_sha1_git(sha1_gits) + ret = swh_storage.object_find_by_sha1_git(sha1_gits) for val in ret.values(): for obj in val: if 'object_id' in obj: del obj['object_id'] - self.assertEqual(expected, ret) + assert expected == ret - def test_tool_add(self): + def test_tool_add(self, swh_storage): tool = { 'name': 'some-unknown-tool', 'version': 'some-version', 'configuration': {"debian-package": "some-package"}, } - actual_tool = self.storage.tool_get(tool) - self.assertIsNone(actual_tool) # does not exist + actual_tool = swh_storage.tool_get(tool) + assert actual_tool is None # does not exist # add it - actual_tools = self.storage.tool_add([tool]) + actual_tools = swh_storage.tool_add([tool]) - self.assertEqual(len(actual_tools), 1) + assert len(actual_tools) == 1 actual_tool = actual_tools[0] - self.assertIsNotNone(actual_tool) # now it exists + assert actual_tool is not None # now it exists new_id = actual_tool.pop('id') - self.assertEqual(actual_tool, tool) + assert actual_tool == tool - actual_tools2 = self.storage.tool_add([tool]) + actual_tools2 = swh_storage.tool_add([tool]) actual_tool2 = actual_tools2[0] - self.assertIsNotNone(actual_tool2) # now it exists + assert actual_tool2 is not None # now it exists new_id2 = actual_tool2.pop('id') - self.assertEqual(new_id, new_id2) - self.assertEqual(actual_tool, actual_tool2) + assert new_id == new_id2 + assert actual_tool == actual_tool2 - def test_tool_add_multiple(self): + def test_tool_add_multiple(self, swh_storage): tool = { 'name': 'some-unknown-tool', 'version': 'some-version', 'configuration': {"debian-package": "some-package"}, } - actual_tools = list(self.storage.tool_add([tool])) - self.assertEqual(len(actual_tools), 1) + actual_tools = list(swh_storage.tool_add([tool])) + assert len(actual_tools) == 1 new_tools = [tool, { 'name': 'yet-another-tool', 'version': 'version', 'configuration': {}, }] - actual_tools = self.storage.tool_add(new_tools) - self.assertEqual(len(actual_tools), 2) + actual_tools = swh_storage.tool_add(new_tools) + assert len(actual_tools) == 2 # order not guaranteed, so we iterate over results to check for tool in actual_tools: _id = tool.pop('id') - self.assertIsNotNone(_id) - self.assertIn(tool, new_tools) + assert _id is not None + assert tool in new_tools - def test_tool_get_missing(self): + def test_tool_get_missing(self, swh_storage): tool = { 'name': 'unknown-tool', 'version': '3.1.0rc2-31-ga2cbb8c', 'configuration': {"command_line": "nomossa "}, } - actual_tool = self.storage.tool_get(tool) + actual_tool = swh_storage.tool_get(tool) - self.assertIsNone(actual_tool) + assert actual_tool is None - def test_tool_metadata_get_missing_context(self): + def test_tool_metadata_get_missing_context(self, swh_storage): tool = { 'name': 'swh-metadata-translator', 'version': '0.0.1', 'configuration': {"context": "unknown-context"}, } - actual_tool = self.storage.tool_get(tool) + actual_tool = swh_storage.tool_get(tool) - self.assertIsNone(actual_tool) + assert actual_tool is None - def test_tool_metadata_get(self): + def test_tool_metadata_get(self, swh_storage): tool = { 'name': 'swh-metadata-translator', 'version': '0.0.1', 'configuration': {"type": "local", "context": "npm"}, } - - tools = self.storage.tool_add([tool]) - expected_tool = tools[0] + expected_tool = swh_storage.tool_add([tool])[0] # when - actual_tool = self.storage.tool_get(tool) + actual_tool = swh_storage.tool_get(tool) # then - self.assertEqual(expected_tool, actual_tool) + assert expected_tool == actual_tool - def test_metadata_provider_get(self): + def test_metadata_provider_get(self, swh_storage): # given - no_provider = self.storage.metadata_provider_get(6459456445615) - self.assertIsNone(no_provider) + no_provider = swh_storage.metadata_provider_get(6459456445615) + assert no_provider is None # when - provider_id = self.storage.metadata_provider_add( - self.provider['name'], - self.provider['type'], - self.provider['url'], - self.provider['metadata']) + provider_id = swh_storage.metadata_provider_add( + data.provider['name'], + data.provider['type'], + data.provider['url'], + data.provider['metadata']) - actual_provider = self.storage.metadata_provider_get(provider_id) + actual_provider = swh_storage.metadata_provider_get(provider_id) expected_provider = { - 'provider_name': self.provider['name'], - 'provider_url': self.provider['url'] + 'provider_name': data.provider['name'], + 'provider_url': data.provider['url'] } # then del actual_provider['id'] - self.assertTrue(actual_provider, expected_provider) + assert actual_provider, expected_provider - def test_metadata_provider_get_by(self): + def test_metadata_provider_get_by(self, swh_storage): # given - no_provider = self.storage.metadata_provider_get_by({ - 'provider_name': self.provider['name'], - 'provider_url': self.provider['url'] + no_provider = swh_storage.metadata_provider_get_by({ + 'provider_name': data.provider['name'], + 'provider_url': data.provider['url'] }) - self.assertIsNone(no_provider) + assert no_provider is None # when - provider_id = self.storage.metadata_provider_add( - self.provider['name'], - self.provider['type'], - self.provider['url'], - self.provider['metadata']) - - actual_provider = self.storage.metadata_provider_get_by({ - 'provider_name': self.provider['name'], - 'provider_url': self.provider['url'] + provider_id = swh_storage.metadata_provider_add( + data.provider['name'], + data.provider['type'], + data.provider['url'], + data.provider['metadata']) + + actual_provider = swh_storage.metadata_provider_get_by({ + 'provider_name': data.provider['name'], + 'provider_url': data.provider['url'] }) # then - self.assertTrue(provider_id, actual_provider['id']) + assert provider_id, actual_provider['id'] + + @pytest.mark.parametrize('use_url', [True, False]) + def test_origin_metadata_add(self, swh_storage, use_url): + if not self._test_origin_ids: + pytest.skip('requires origin id') - @given(strategies.booleans()) - def test_origin_metadata_add(self, use_url): - self.reset_storage() # given - origin = self.storage.origin_add([self.origin])[0] - origin_id = origin['id'] - if use_url: - origin = origin['url'] - else: - origin = origin['id'] - origin_metadata0 = list(self.storage.origin_metadata_get_by( - origin)) - self.assertEqual(len(origin_metadata0), 0, origin_metadata0) + origin = swh_storage.origin_add([data.origin])[0] - tools = self.storage.tool_add([self.metadata_tool]) + tools = swh_storage.tool_add([data.metadata_tool]) tool = tools[0] - self.storage.metadata_provider_add( - self.provider['name'], - self.provider['type'], - self.provider['url'], - self.provider['metadata']) - provider = self.storage.metadata_provider_get_by({ - 'provider_name': self.provider['name'], - 'provider_url': self.provider['url'] + swh_storage.metadata_provider_add( + data.provider['name'], + data.provider['type'], + data.provider['url'], + data.provider['metadata']) + provider = swh_storage.metadata_provider_get_by({ + 'provider_name': data.provider['name'], + 'provider_url': data.provider['url'] }) # when adding for the same origin 2 metadatas - self.storage.origin_metadata_add( + origin = origin['url' if use_url else 'id'] + + n_om = len(list(swh_storage.origin_metadata_get_by(origin))) + swh_storage.origin_metadata_add( origin, - self.origin_metadata['discovery_date'], + data.origin_metadata['discovery_date'], provider['id'], tool['id'], - self.origin_metadata['metadata']) - self.storage.origin_metadata_add( + data.origin_metadata['metadata']) + swh_storage.origin_metadata_add( origin, '2015-01-01 23:00:00+00', provider['id'], tool['id'], - self.origin_metadata2['metadata']) - actual_om = list(self.storage.origin_metadata_get_by( - origin)) + data.origin_metadata2['metadata']) + n_actual_om = len(list(swh_storage.origin_metadata_get_by(origin))) # then - self.assertCountEqual( - [item['origin_id'] for item in actual_om], - [origin_id, origin_id]) + assert n_actual_om == n_om + 2 + + def test_origin_metadata_get(self, swh_storage): + if not self._test_origin_ids: + pytest.skip('requires origin id') - def test_origin_metadata_get(self): # given - origin_id = self.storage.origin_add([self.origin])[0]['id'] - origin_id2 = self.storage.origin_add([self.origin2])[0]['id'] - - self.storage.metadata_provider_add(self.provider['name'], - self.provider['type'], - self.provider['url'], - self.provider['metadata']) - provider = self.storage.metadata_provider_get_by({ - 'provider_name': self.provider['name'], - 'provider_url': self.provider['url'] - }) - tool = self.storage.tool_add([self.metadata_tool])[0] + origin_id = swh_storage.origin_add([data.origin])[0]['id'] + origin_id2 = swh_storage.origin_add([data.origin2])[0]['id'] + + swh_storage.metadata_provider_add(data.provider['name'], + data.provider['type'], + data.provider['url'], + data.provider['metadata']) + provider = swh_storage.metadata_provider_get_by({ + 'provider_name': data.provider['name'], + 'provider_url': data.provider['url'] + }) + tool = swh_storage.tool_add([data.metadata_tool])[0] # when adding for the same origin 2 metadatas - self.storage.origin_metadata_add( + swh_storage.origin_metadata_add( origin_id, - self.origin_metadata['discovery_date'], + data.origin_metadata['discovery_date'], provider['id'], tool['id'], - self.origin_metadata['metadata']) - self.storage.origin_metadata_add( + data.origin_metadata['metadata']) + swh_storage.origin_metadata_add( origin_id2, - self.origin_metadata2['discovery_date'], + data.origin_metadata2['discovery_date'], provider['id'], tool['id'], - self.origin_metadata2['metadata']) - self.storage.origin_metadata_add( + data.origin_metadata2['metadata']) + swh_storage.origin_metadata_add( origin_id, - self.origin_metadata2['discovery_date'], + data.origin_metadata2['discovery_date'], provider['id'], tool['id'], - self.origin_metadata2['metadata']) - all_metadatas = list(self.storage.origin_metadata_get_by( + data.origin_metadata2['metadata']) + all_metadatas = list(swh_storage.origin_metadata_get_by( origin_id)) - metadatas_for_origin2 = list(self.storage.origin_metadata_get_by( + metadatas_for_origin2 = list(swh_storage.origin_metadata_get_by( origin_id2)) expected_results = [{ 'origin_id': origin_id, 'discovery_date': datetime.datetime( 2017, 1, 1, 23, 0, tzinfo=datetime.timezone.utc), 'metadata': { 'name': 'test_origin_metadata', 'version': '0.0.1' }, 'provider_id': provider['id'], 'provider_name': 'hal', 'provider_type': 'deposit-client', 'provider_url': 'http:///hal/inria', 'tool_id': tool['id'] }, { 'origin_id': origin_id, 'discovery_date': datetime.datetime( 2015, 1, 1, 23, 0, tzinfo=datetime.timezone.utc), 'metadata': { 'name': 'test_origin_metadata', 'version': '0.0.1' }, 'provider_id': provider['id'], 'provider_name': 'hal', 'provider_type': 'deposit-client', 'provider_url': 'http:///hal/inria', 'tool_id': tool['id'] }] # then - self.assertEqual(len(all_metadatas), 2) - self.assertEqual(len(metadatas_for_origin2), 1) - self.assertCountEqual(all_metadatas, expected_results) + assert len(all_metadatas) == 2 + assert len(metadatas_for_origin2) == 1 + assert all_metadatas == expected_results - def test_metadata_provider_add(self): + def test_metadata_provider_add(self, swh_storage): provider = { 'provider_name': 'swMATH', 'provider_type': 'registry', 'provider_url': 'http://www.swmath.org/', 'metadata': { 'email': 'contact@swmath.org', 'license': 'All rights reserved' } } - provider['id'] = provider_id = self.storage.metadata_provider_add( + provider['id'] = provider_id = swh_storage.metadata_provider_add( **provider) - self.assertEqual( - provider, - self.storage.metadata_provider_get_by({ - 'provider_name': 'swMATH', - 'provider_url': 'http://www.swmath.org/' - })) - self.assertEqual( - provider, - self.storage.metadata_provider_get(provider_id)) - - def test_origin_metadata_get_by_provider_type(self): + assert provider == swh_storage.metadata_provider_get_by( + {'provider_name': 'swMATH', + 'provider_url': 'http://www.swmath.org/'}) + assert provider == swh_storage.metadata_provider_get(provider_id) + + def test_origin_metadata_get_by_provider_type(self, swh_storage): # given - origin_id = self.storage.origin_add([self.origin])[0]['id'] - origin_id2 = self.storage.origin_add([self.origin2])[0]['id'] - provider1_id = self.storage.metadata_provider_add( - self.provider['name'], - self.provider['type'], - self.provider['url'], - self.provider['metadata']) - provider1 = self.storage.metadata_provider_get_by({ - 'provider_name': self.provider['name'], - 'provider_url': self.provider['url'] + if not self._test_origin_ids: + pytest.skip('reauires origin id') + + origin_id = swh_storage.origin_add([data.origin])[0]['id'] + origin_id2 = swh_storage.origin_add([data.origin2])[0]['id'] + provider1_id = swh_storage.metadata_provider_add( + data.provider['name'], + data.provider['type'], + data.provider['url'], + data.provider['metadata']) + provider1 = swh_storage.metadata_provider_get_by({ + 'provider_name': data.provider['name'], + 'provider_url': data.provider['url'] }) - self.assertEqual(provider1, - self.storage.metadata_provider_get(provider1_id)) + assert provider1 == swh_storage.metadata_provider_get(provider1_id) - provider2_id = self.storage.metadata_provider_add( + provider2_id = swh_storage.metadata_provider_add( 'swMATH', 'registry', 'http://www.swmath.org/', {'email': 'contact@swmath.org', 'license': 'All rights reserved'}) - provider2 = self.storage.metadata_provider_get_by({ + provider2 = swh_storage.metadata_provider_get_by({ 'provider_name': 'swMATH', 'provider_url': 'http://www.swmath.org/' }) - self.assertEqual(provider2, - self.storage.metadata_provider_get(provider2_id)) + assert provider2 == swh_storage.metadata_provider_get(provider2_id) # using the only tool now inserted in the data.sql, but for this # provider should be a crawler tool (not yet implemented) - tool = self.storage.tool_add([self.metadata_tool])[0] + tool = swh_storage.tool_add([data.metadata_tool])[0] # when adding for the same origin 2 metadatas - self.storage.origin_metadata_add( + swh_storage.origin_metadata_add( origin_id, - self.origin_metadata['discovery_date'], + data.origin_metadata['discovery_date'], provider1['id'], tool['id'], - self.origin_metadata['metadata']) - self.storage.origin_metadata_add( + data.origin_metadata['metadata']) + swh_storage.origin_metadata_add( origin_id2, - self.origin_metadata2['discovery_date'], + data.origin_metadata2['discovery_date'], provider2['id'], tool['id'], - self.origin_metadata2['metadata']) + data.origin_metadata2['metadata']) provider_type = 'registry' - m_by_provider = list(self.storage.origin_metadata_get_by( + m_by_provider = list(swh_storage.origin_metadata_get_by( origin_id2, provider_type)) for item in m_by_provider: if 'id' in item: del item['id'] expected_results = [{ 'origin_id': origin_id2, 'discovery_date': datetime.datetime( 2017, 1, 1, 23, 0, tzinfo=datetime.timezone.utc), 'metadata': { 'name': 'test_origin_metadata', 'version': '0.0.1' }, 'provider_id': provider2['id'], 'provider_name': 'swMATH', 'provider_type': provider_type, 'provider_url': 'http://www.swmath.org/', 'tool_id': tool['id'] }] # then - self.assertEqual(len(m_by_provider), 1) - self.assertEqual(m_by_provider, expected_results) + assert len(m_by_provider) == 1 + assert m_by_provider == expected_results -class CommonPropTestStorage: +class TestStorageCommonProp: _test_origin_ids = True def assert_contents_ok(self, expected_contents, actual_contents, keys_to_check={'sha1', 'data'}): """Assert that a given list of contents matches on a given set of keys. """ for k in keys_to_check: - expected_list = sorted([c[k] for c in expected_contents]) - actual_list = sorted([c[k] for c in actual_contents]) - self.assertEqual(actual_list, expected_list) - - @given(gen_contents(min_size=1, max_size=4)) - def test_generate_content_get(self, contents): - self.reset_storage() - # add contents to storage - self.storage.content_add(contents) + expected_list = set([c.get(k) for c in expected_contents]) + actual_list = set([c.get(k) for c in actual_contents]) + assert actual_list == expected_list, k + def test_generate_content_get(self, swh_storage, swh_contents): + contents_with_data = [c for c in swh_contents + if c['status'] != 'absent'] # input the list of sha1s we want from storage - get_sha1s = [c['sha1'] for c in contents] + get_sha1s = [c['sha1'] for c in contents_with_data] # retrieve contents - actual_contents = list(self.storage.content_get(get_sha1s)) - - self.assert_contents_ok(contents, actual_contents) - - @given(gen_contents(min_size=1, max_size=4)) - def test_generate_content_get_metadata(self, contents): - self.reset_storage() - # add contents to storage - self.storage.content_add(contents) + actual_contents = list(swh_storage.content_get(get_sha1s)) + assert None not in actual_contents + self.assert_contents_ok(contents_with_data, actual_contents) + def test_generate_content_get_metadata(self, swh_storage, swh_contents): # input the list of sha1s we want from storage - get_sha1s = [c['sha1'] for c in contents] + expected_contents = [c for c in swh_contents + if c['status'] != 'absent'] + get_sha1s = [c['sha1'] for c in expected_contents] # retrieve contents - actual_contents = list(self.storage.content_get_metadata(get_sha1s)) + actual_contents = list(swh_storage.content_get_metadata(get_sha1s)) - self.assertEqual(len(actual_contents), len(contents)) + assert len(actual_contents) == len(get_sha1s) - # will check that all contents are retrieved correctly - one_content = contents[0] - # content_get_metadata does not return data - keys_to_check = set(one_content.keys()) - {'data'} - self.assert_contents_ok(contents, actual_contents, + keys_to_check = {'length', 'status', + 'sha1', 'sha1_git', 'sha256', 'blake2s256'} + self.assert_contents_ok(expected_contents, actual_contents, keys_to_check=keys_to_check) - @given(gen_contents(), - strategies.binary(min_size=20, max_size=20), - strategies.binary(min_size=20, max_size=20)) - def test_generate_content_get_range(self, contents, start, end): + def test_generate_content_get_range(self, swh_storage, swh_contents): """content_get_range paginates results if limit exceeded""" - self.reset_storage() # add contents to storage - self.storage.content_add(contents) + present_contents = [c for c in swh_contents + if c['status'] != 'absent'] - actual_result = self.storage.content_get_range(start, end) + get_sha1s = sorted([c['sha1'] for c in swh_contents + if c['status'] != 'absent']) + start = get_sha1s[2] + end = get_sha1s[-2] + actual_result = swh_storage.content_get_range(start, end) + + assert actual_result['next'] is None actual_contents = actual_result['contents'] - actual_next = actual_result['next'] + expected_contents = [c for c in present_contents + if start <= c['sha1'] <= end] + if expected_contents: + self.assert_contents_ok( + expected_contents, actual_contents, ['sha1']) + else: + assert actual_contents == [] + + def test_generate_content_get_range_full(self, swh_storage, swh_contents): + """content_get_range for a full range returns all available contents""" + present_contents = [c for c in swh_contents + if c['status'] != 'absent'] - self.assertEqual(actual_next, None) + start = b'0' * 40 + end = b'f' * 40 + actual_result = swh_storage.content_get_range(start, end) + assert actual_result['next'] is None - expected_contents = [c for c in contents + actual_contents = actual_result['contents'] + expected_contents = [c for c in present_contents if start <= c['sha1'] <= end] if expected_contents: - keys_to_check = set(contents[0].keys()) - {'data'} - self.assert_contents_ok(expected_contents, actual_contents, - keys_to_check) + self.assert_contents_ok( + expected_contents, actual_contents, ['sha1']) else: - self.assertEqual(actual_contents, []) + assert actual_contents == [] - def test_generate_content_get_range_limit_none(self): + def test_generate_content_get_range_empty(self, swh_storage, swh_contents): + """content_get_range for an empty range returns nothing""" + start = b'0' * 40 + end = b'f' * 40 + actual_result = swh_storage.content_get_range(end, start) + assert actual_result['next'] is None + assert len(actual_result['contents']) == 0 + + def test_generate_content_get_range_limit_none(self, swh_storage): """content_get_range call with wrong limit input should fail""" - with self.assertRaises(ValueError) as e: - self.storage.content_get_range(start=None, end=None, limit=None) + with pytest.raises(ValueError) as e: + swh_storage.content_get_range(start=None, end=None, limit=None) - self.assertEqual(e.exception.args, ( - 'Development error: limit should not be None',)) + assert e.value.args == ('Development error: limit should not be None',) - @given(gen_contents(min_size=1, max_size=4)) - def test_generate_content_get_range_no_limit(self, contents): + def test_generate_content_get_range_no_limit( + self, swh_storage, swh_contents): """content_get_range returns contents within range provided""" - self.reset_storage() # add contents to storage - self.storage.content_add(contents) - # input the list of sha1s we want from storage - get_sha1s = sorted([c['sha1'] for c in contents]) + get_sha1s = sorted([c['sha1'] for c in swh_contents + if c['status'] != 'absent']) start = get_sha1s[0] end = get_sha1s[-1] # retrieve contents - actual_result = self.storage.content_get_range(start, end) + actual_result = swh_storage.content_get_range(start, end) actual_contents = actual_result['contents'] - actual_next = actual_result['next'] - - self.assertEqual(len(contents), len(actual_contents)) - self.assertIsNone(actual_next) + assert actual_result['next'] is None + assert len(actual_contents) == len(get_sha1s) - one_content = contents[0] - keys_to_check = set(one_content.keys()) - {'data'} - self.assert_contents_ok(contents, actual_contents, keys_to_check) + expected_contents = [c for c in swh_contents + if c['status'] != 'absent'] + self.assert_contents_ok( + expected_contents, actual_contents, ['sha1']) - @given(gen_contents(min_size=4, max_size=4)) - def test_generate_content_get_range_limit(self, contents): + def test_generate_content_get_range_limit(self, swh_storage, swh_contents): """content_get_range paginates results if limit exceeded""" - self.reset_storage() - contents_map = {c['sha1']: c for c in contents} - - # add contents to storage - self.storage.content_add(contents) + contents_map = {c['sha1']: c for c in swh_contents} # input the list of sha1s we want from storage - get_sha1s = sorted([c['sha1'] for c in contents]) + get_sha1s = sorted([c['sha1'] for c in swh_contents + if c['status'] != 'absent']) start = get_sha1s[0] end = get_sha1s[-1] - # retrieve contents limited to 3 results - limited_results = len(contents) - 1 - actual_result = self.storage.content_get_range(start, end, - limit=limited_results) + # retrieve contents limited to n-1 results + limited_results = len(get_sha1s) - 1 + actual_result = swh_storage.content_get_range( + start, end, limit=limited_results) actual_contents = actual_result['contents'] - actual_next = actual_result['next'] - - self.assertEqual(limited_results, len(actual_contents)) - self.assertIsNotNone(actual_next) - self.assertEqual(actual_next, get_sha1s[-1]) + assert actual_result['next'] == get_sha1s[-1] + assert len(actual_contents) == limited_results expected_contents = [contents_map[sha1] for sha1 in get_sha1s[:-1]] - keys_to_check = set(contents[0].keys()) - {'data'} - self.assert_contents_ok(expected_contents, actual_contents, - keys_to_check) + self.assert_contents_ok( + expected_contents, actual_contents, ['sha1']) # retrieve next part - actual_results2 = self.storage.content_get_range(start=end, end=end) + actual_results2 = swh_storage.content_get_range(start=end, end=end) + assert actual_results2['next'] is None actual_contents2 = actual_results2['contents'] - actual_next2 = actual_results2['next'] - - self.assertEqual(1, len(actual_contents2)) - self.assertIsNone(actual_next2) + assert len(actual_contents2) == 1 - self.assert_contents_ok([contents_map[actual_next]], actual_contents2, + self.assert_contents_ok( + [contents_map[get_sha1s[-1]]], actual_contents2, ['sha1']) - keys_to_check) - - def test_origin_get_invalid_id_legacy(self): + def test_origin_get_invalid_id_legacy(self, swh_storage): if self._test_origin_ids: invalid_origin_id = 1 - origin_info = self.storage.origin_get({'id': invalid_origin_id}) - self.assertIsNone(origin_info) + origin_info = swh_storage.origin_get({'id': invalid_origin_id}) + assert origin_info is None - origin_visits = list(self.storage.origin_visit_get( + origin_visits = list(swh_storage.origin_visit_get( invalid_origin_id)) - self.assertEqual(origin_visits, []) + assert origin_visits == [] - def test_origin_get_invalid_id(self): + def test_origin_get_invalid_id(self, swh_storage): if self._test_origin_ids: - origin_info = self.storage.origin_get([{'id': 1}, {'id': 2}]) - self.assertEqual(origin_info, [None, None]) + origin_info = swh_storage.origin_get([{'id': 1}, {'id': 2}]) + assert origin_info == [None, None] - origin_visits = list(self.storage.origin_visit_get(1)) - self.assertEqual(origin_visits, []) + origin_visits = list(swh_storage.origin_visit_get(1)) + assert origin_visits == [] - @given(strategies.lists(origins().map(lambda x: x.to_dict()), - unique_by=lambda x: x['url'], - min_size=6, max_size=15)) - def test_origin_get_range(self, new_origins): - self.reset_storage() - nb_origins = len(new_origins) + def test_origin_get_range(self, swh_storage, swh_origins): + if not self._test_origin_ids: + pytest.skip('requires origin id') + + actual_origins = list( + swh_storage.origin_get_range(origin_from=0, + origin_count=0)) + assert len(actual_origins) == 0 - self.storage.origin_add(new_origins) + actual_origins = list( + swh_storage.origin_get_range(origin_from=0, + origin_count=1)) + assert len(actual_origins) == 1 + assert actual_origins[0]['id'] == 1 - origin_from = random.randint(1, nb_origins-1) - origin_count = random.randint(1, nb_origins - origin_from) + actual_origins = list( + swh_storage.origin_get_range(origin_from=1, + origin_count=1)) + assert len(actual_origins) == 1 + assert actual_origins[0]['id'] == 1 actual_origins = list( - self.storage.origin_get_range(origin_from=origin_from, - origin_count=origin_count)) + swh_storage.origin_get_range(origin_from=1, + origin_count=10)) + assert len(actual_origins) == 10 + assert actual_origins[0]['id'] == 1 + assert actual_origins[-1]['id'] == 10 - for origin in actual_origins: - del origin['id'] + actual_origins = list( + swh_storage.origin_get_range(origin_from=1, + origin_count=20)) + assert len(actual_origins) == 20 + assert actual_origins[0]['id'] == 1 + assert actual_origins[-1]['id'] == 20 - for origin in actual_origins: - self.assertIn(origin, new_origins) + actual_origins = list( + swh_storage.origin_get_range(origin_from=1, + origin_count=21)) + assert len(actual_origins) == 20 + assert actual_origins[0]['id'] == 1 + assert actual_origins[-1]['id'] == 20 - origin_from = -1 - origin_count = 5 - origins = list( - self.storage.origin_get_range(origin_from=origin_from, - origin_count=origin_count)) - self.assertEqual(len(origins), origin_count) + actual_origins = list( + swh_storage.origin_get_range(origin_from=11, + origin_count=0)) + assert len(actual_origins) == 0 - origin_from = 10000 - origins = list( - self.storage.origin_get_range(origin_from=origin_from, - origin_count=origin_count)) - self.assertEqual(len(origins), 0) + actual_origins = list( + swh_storage.origin_get_range(origin_from=11, + origin_count=10)) + assert len(actual_origins) == 10 + assert actual_origins[0]['id'] == 11 + assert actual_origins[-1]['id'] == 20 - def test_origin_count(self): + actual_origins = list( + swh_storage.origin_get_range(origin_from=11, + origin_count=11)) + assert len(actual_origins) == 10 + assert actual_origins[0]['id'] == 11 + assert actual_origins[-1]['id'] == 20 + def test_origin_count(self, swh_storage): new_origins = [ { 'type': 'git', 'url': 'https://github.com/user1/repo1' }, { 'type': 'git', 'url': 'https://github.com/user2/repo1' }, { 'type': 'git', 'url': 'https://github.com/user3/repo1' }, { 'type': 'git', 'url': 'https://gitlab.com/user1/repo1' }, { 'type': 'git', 'url': 'https://gitlab.com/user2/repo1' } ] - self.storage.origin_add(new_origins) + swh_storage.origin_add(new_origins) - self.assertEqual(self.storage.origin_count('github'), 3) - self.assertEqual(self.storage.origin_count('gitlab'), 2) - self.assertEqual( - self.storage.origin_count('.*user.*', regexp=True), 5) - self.assertEqual( - self.storage.origin_count('.*user.*', regexp=False), 0) - self.assertEqual( - self.storage.origin_count('.*user1.*', regexp=True), 2) - self.assertEqual( - self.storage.origin_count('.*user1.*', regexp=False), 0) + assert swh_storage.origin_count('github') == 3 + assert swh_storage.origin_count('gitlab') == 2 + assert swh_storage.origin_count('.*user.*', regexp=True) == 5 + assert swh_storage.origin_count('.*user.*', regexp=False) == 0 + assert swh_storage.origin_count('.*user1.*', regexp=True) == 2 + assert swh_storage.origin_count('.*user1.*', regexp=False) == 0 @settings(suppress_health_check=[HealthCheck.too_slow]) @given(strategies.lists(objects(), max_size=2)) - def test_add_arbitrary(self, objects): - self.reset_storage() + def test_add_arbitrary(self, swh_storage, objects): for (obj_type, obj) in objects: obj = obj.to_dict() if obj_type == 'origin_visit': - origin_id = self.storage.origin_add_one(obj.pop('origin')) + origin_id = swh_storage.origin_add_one(obj.pop('origin')) if 'visit' in obj: del obj['visit'] - self.storage.origin_visit_add( + swh_storage.origin_visit_add( origin_id, obj['date'], obj['type']) else: - method = getattr(self.storage, obj_type + '_add') + method = getattr(swh_storage, obj_type + '_add') try: method([obj]) except HashCollision: pass @pytest.mark.db -class TestLocalStorage(CommonTestStorage, StorageTestDbFixture, - unittest.TestCase): +class TestLocalStorage: """Test the local storage""" + _test_origin_ids = True # Can only be tested with local storage as you can't mock # datetimes for the remote server - @given(strategies.booleans()) - def test_fetch_history(self, use_url): + @pytest.mark.parametrize('use_url', [True, False]) + def test_fetch_history(self, swh_storage, use_url): if not self._test_origin_ids and not use_url: return - self.reset_storage() - origin_id = self.storage.origin_add_one(self.origin) - origin_id_or_url = self.origin['url'] if use_url else origin_id + origin_id = swh_storage.origin_add_one(data.origin) + origin_id_or_url = data.origin['url'] if use_url else origin_id with patch('datetime.datetime'): - datetime.datetime.now.return_value = self.fetch_history_date - fetch_history_id = self.storage.fetch_history_start( + datetime.datetime.now.return_value = data.fetch_history_date + fetch_history_id = swh_storage.fetch_history_start( origin_id_or_url) datetime.datetime.now.assert_called_with(tz=datetime.timezone.utc) with patch('datetime.datetime'): - datetime.datetime.now.return_value = self.fetch_history_end - self.storage.fetch_history_end(fetch_history_id, - self.fetch_history_data) + datetime.datetime.now.return_value = data.fetch_history_end + swh_storage.fetch_history_end(fetch_history_id, + data.fetch_history_data) - fetch_history = self.storage.fetch_history_get(fetch_history_id) - expected_fetch_history = self.fetch_history_data.copy() + fetch_history = swh_storage.fetch_history_get(fetch_history_id) + expected_fetch_history = data.fetch_history_data.copy() expected_fetch_history['id'] = fetch_history_id expected_fetch_history['origin'] = origin_id - expected_fetch_history['date'] = self.fetch_history_date - expected_fetch_history['duration'] = self.fetch_history_duration + expected_fetch_history['date'] = data.fetch_history_date + expected_fetch_history['duration'] = data.fetch_history_duration - self.assertEqual(expected_fetch_history, fetch_history) + assert expected_fetch_history == fetch_history # This test is only relevant on the local storage, with an actual # objstorage raising an exception - def test_content_add_objstorage_exception(self): - self.storage.objstorage.add = Mock( + def test_content_add_objstorage_exception(self, swh_storage): + swh_storage.objstorage.add = Mock( side_effect=Exception('mocked broken objstorage') ) - with self.assertRaises(Exception) as e: - self.storage.content_add([self.cont]) + with pytest.raises(Exception) as e: + swh_storage.content_add([data.cont]) - self.assertEqual(e.exception.args, ('mocked broken objstorage',)) - missing = list(self.storage.content_missing([self.cont])) - self.assertEqual(missing, [self.cont['sha1']]) + assert e.value.args == ('mocked broken objstorage',) + missing = list(swh_storage.content_missing([data.cont])) + assert missing == [data.cont['sha1']] @pytest.mark.db -class TestStorageRaceConditions(TestStorageData, StorageTestDbFixture, - unittest.TestCase): +class TestStorageRaceConditions: @pytest.mark.xfail - def test_content_add_race(self): + def test_content_add_race(self, swh_storage): results = queue.Queue() def thread(): try: - with self.db_transaction() as (db, cur): - ret = self.storage.content_add([self.cont], db=db, - cur=cur) + with db_transaction(swh_storage) as (db, cur): + ret = swh_storage.content_add([data.cont], db=db, + cur=cur) results.put((threading.get_ident(), 'data', ret)) except Exception as e: results.put((threading.get_ident(), 'exc', e)) t1 = threading.Thread(target=thread) t2 = threading.Thread(target=thread) t1.start() # this avoids the race condition # import time # time.sleep(1) t2.start() t1.join() t2.join() r1 = results.get(block=False) r2 = results.get(block=False) with pytest.raises(queue.Empty): results.get(block=False) assert r1[0] != r2[0] assert r1[1] == 'data', 'Got exception %r in Thread%s' % (r1[2], r1[0]) assert r2[1] == 'data', 'Got exception %r in Thread%s' % (r2[2], r2[0]) @pytest.mark.db -@pytest.mark.property_based -class PropTestLocalStorage(CommonPropTestStorage, StorageTestDbFixture, - unittest.TestCase): - pass - - -class AlteringSchemaTest(TestStorageData, StorageTestDbFixture, - unittest.TestCase): +class TestAlteringSchema: """This class is dedicated for the rare case where the schema needs to be altered dynamically. Otherwise, the tests could be blocking when ran altogether. """ - def test_content_update(self): - self.storage.journal_writer = None # TODO, not supported + def test_content_update(self, swh_storage): + swh_storage.journal_writer = None # TODO, not supported - cont = copy.deepcopy(self.cont) + cont = copy.deepcopy(data.cont) - self.storage.content_add([cont]) + swh_storage.content_add([cont]) # alter the sha1_git for example cont['sha1_git'] = hash_to_bytes( '3a60a5275d0333bf13468e8b3dcab90f4046e654') - self.storage.content_update([cont], keys=['sha1_git']) + swh_storage.content_update([cont], keys=['sha1_git']) - with self.db_transaction() as (_, cur): + with db_transaction(swh_storage) as (_, cur): cur.execute('SELECT sha1, sha1_git, sha256, length, status' ' FROM content WHERE sha1 = %s', (cont['sha1'],)) datum = cur.fetchone() - self.assertEqual( - (datum[0], datum[1], datum[2], - datum[3], datum[4]), - (cont['sha1'], cont['sha1_git'], cont['sha256'], - cont['length'], 'visible')) + assert datum == (cont['sha1'], cont['sha1_git'], cont['sha256'], + cont['length'], 'visible') - def test_content_update_with_new_cols(self): - self.storage.journal_writer = None # TODO, not supported + def test_content_update_with_new_cols(self, swh_storage): + swh_storage.journal_writer = None # TODO, not supported - with self.db_transaction() as (db, cur): + with db_transaction(swh_storage) as (_, cur): cur.execute("""alter table content add column test text default null, add column test2 text default null""") - cont = copy.deepcopy(self.cont2) - self.storage.content_add([cont]) + cont = copy.deepcopy(data.cont2) + swh_storage.content_add([cont]) cont['test'] = 'value-1' cont['test2'] = 'value-2' - self.storage.content_update([cont], keys=['test', 'test2']) - with self.db_transaction() as (_, cur): + swh_storage.content_update([cont], keys=['test', 'test2']) + with db_transaction(swh_storage) as (_, cur): cur.execute( '''SELECT sha1, sha1_git, sha256, length, status, test, test2 FROM content WHERE sha1 = %s''', (cont['sha1'],)) datum = cur.fetchone() - self.assertEqual( - (datum[0], datum[1], datum[2], - datum[3], datum[4], datum[5], datum[6]), - (cont['sha1'], cont['sha1_git'], cont['sha256'], - cont['length'], 'visible', cont['test'], cont['test2'])) + assert datum == (cont['sha1'], cont['sha1_git'], cont['sha256'], + cont['length'], 'visible', + cont['test'], cont['test2']) - with self.db_transaction() as (_, cur): + with db_transaction(swh_storage) as (_, cur): cur.execute("""alter table content drop column test, drop column test2""") diff --git a/tox.ini b/tox.ini index 1923226d..78178f46 100644 --- a/tox.ini +++ b/tox.ini @@ -1,50 +1,48 @@ [tox] envlist=flake8,mypy,py3-no-origin-ids,py3 [testenv:py3] deps = .[testing] .[listener] pytest-cov - pifpaf commands = - pifpaf run postgresql -- pytest --hypothesis-profile=fast \ + pytest --hypothesis-profile=fast \ --cov={envsitepackagesdir}/swh/storage \ {envsitepackagesdir}/swh/storage \ --cov-branch {posargs} [testenv:py3-slow] deps = .[testing] .[listener] pytest-cov - pifpaf commands = - pifpaf run postgresql -- pytest --hypothesis-profile=slow \ + pytest --hypothesis-profile=slow \ --cov={envsitepackagesdir}/swh/storage \ {envsitepackagesdir}/swh/storage \ --cov-branch {posargs} [testenv:py3-no-origin-ids] deps = .[testing] pytest-cov setenv = SWH_STORAGE_IN_MEMORY_ENABLE_ORIGIN_IDS=false commands = pytest --hypothesis-profile=fast {posargs} {envsitepackagesdir}/swh/storage/tests/test_in_memory.py [testenv:flake8] skip_install = true deps = flake8 commands = {envpython} -m flake8 [testenv:mypy] skip_install = true deps = .[testing] mypy commands = mypy swh