diff --git a/swh/storage/tests/conftest.py b/swh/storage/tests/conftest.py index 0777f170..602305f5 100644 --- a/swh/storage/tests/conftest.py +++ b/swh/storage/tests/conftest.py @@ -1,265 +1,223 @@ # Copyright (C) 2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import glob import pytest from typing import Union from pytest_postgresql import factories from pytest_postgresql.janitor import DatabaseJanitor, psycopg2, Version from os import path, environ from hypothesis import settings from typing import Dict import swh.storage from swh.core.utils import numfile_sortkey as sortkey from swh.model.tests.generate_testdata import gen_contents, gen_origins SQL_DIR = path.join(path.dirname(swh.storage.__file__), 'sql') environ['LC_ALL'] = 'C.UTF-8' DUMP_FILES = path.join(SQL_DIR, '*.sql') # define tests profile. Full documentation is at: # https://hypothesis.readthedocs.io/en/latest/settings.html#settings-profiles settings.register_profile("fast", max_examples=5, deadline=5000) settings.register_profile("slow", max_examples=20, deadline=5000) @pytest.fixture def swh_storage(postgresql_proc, swh_storage_postgresql): storage_config = { 'cls': 'local', 'db': 'postgresql://{user}@{host}:{port}/{dbname}'.format( host=postgresql_proc.host, port=postgresql_proc.port, user='postgres', dbname='tests'), 'objstorage': { 'cls': 'memory', 'args': {} }, 'journal_writer': { 'cls': 'memory', }, } storage = swh.storage.get_storage(**storage_config) return storage @pytest.fixture def swh_contents(swh_storage): contents = gen_contents(n=20) swh_storage.content_add(contents) return contents @pytest.fixture def swh_origins(swh_storage): origins = gen_origins(n=100) swh_storage.origin_add(origins) return origins # the postgres_fact factory fixture below is mostly a copy of the code # from pytest-postgresql. We need a custom version here to be able to # specify our version of the DBJanitor we use. def postgresql_fact(process_fixture_name, db_name=None, dump_files=DUMP_FILES): @pytest.fixture def postgresql_factory(request): """ Fixture factory for PostgreSQL. :param FixtureRequest request: fixture request object :rtype: psycopg2.connection :returns: postgresql client """ config = factories.get_config(request) if not psycopg2: raise ImportError( 'No module named psycopg2. Please install it.' ) proc_fixture = request.getfixturevalue(process_fixture_name) # _, config = try_import('psycopg2', request) pg_host = proc_fixture.host pg_port = proc_fixture.port pg_user = proc_fixture.user pg_options = proc_fixture.options pg_db = db_name or config['dbname'] with SwhDatabaseJanitor( pg_user, pg_host, pg_port, pg_db, proc_fixture.version, dump_files=dump_files ): connection = psycopg2.connect( dbname=pg_db, user=pg_user, host=pg_host, port=pg_port, options=pg_options ) yield connection connection.close() return postgresql_factory swh_storage_postgresql = postgresql_fact('postgresql_proc') # This version of the DatabaseJanitor implement a different setup/teardown # behavior than than the stock one: instead of dropping, creating and # initializing the database for each test, it create and initialize the db only # once, then it truncate the tables. This is needed to have acceptable test # performances. class SwhDatabaseJanitor(DatabaseJanitor): def __init__( self, user: str, host: str, port: str, db_name: str, version: Union[str, float, Version], dump_files: str = DUMP_FILES ) -> None: super().__init__(user, host, port, db_name, version) self.dump_files = sorted( glob.glob(dump_files), key=sortkey) def db_setup(self): with psycopg2.connect( dbname=self.db_name, user=self.user, host=self.host, port=self.port, ) as cnx: with cnx.cursor() as cur: for fname in self.dump_files: with open(fname) as fobj: sql = fobj.read().replace('concurrently', '').strip() if sql: cur.execute(sql) cnx.commit() def db_reset(self): with psycopg2.connect( dbname=self.db_name, user=self.user, host=self.host, port=self.port, ) as cnx: with cnx.cursor() as cur: cur.execute( "SELECT table_name FROM information_schema.tables " "WHERE table_schema = %s", ('public',)) tables = set(table for (table,) in cur.fetchall()) for table in tables: cur.execute('truncate table %s cascade' % table) cur.execute( "SELECT sequence_name FROM information_schema.sequences " "WHERE sequence_schema = %s", ('public',)) seqs = set(seq for (seq,) in cur.fetchall()) for seq in seqs: cur.execute('ALTER SEQUENCE %s RESTART;' % seq) cnx.commit() def init(self): with self.cursor() as cur: cur.execute( "SELECT COUNT(1) FROM pg_database WHERE datname=%s;", (self.db_name,)) db_exists = cur.fetchone()[0] == 1 if db_exists: cur.execute( 'UPDATE pg_database SET datallowconn=true ' 'WHERE datname = %s;', (self.db_name,)) if db_exists: self.db_reset() else: with self.cursor() as cur: cur.execute('CREATE DATABASE "{}";'.format(self.db_name)) self.db_setup() def drop(self): pid_column = 'pid' with self.cursor() as cur: cur.execute( 'UPDATE pg_database SET datallowconn=false ' 'WHERE datname = %s;', (self.db_name,)) cur.execute( 'SELECT pg_terminate_backend(pg_stat_activity.{})' 'FROM pg_stat_activity ' 'WHERE pg_stat_activity.datname = %s;'.format(pid_column), (self.db_name,)) @pytest.fixture def sample_data() -> Dict: """Pre-defined sample storage object data to manipulate Returns: - Dict of data (keys: content, directory, revision, person) + Dict of data (keys: content, directory, revision, release, person) """ - sample_content = { - 'blake2s256': b'\xbf?\x05\xed\xc1U\xd2\xc5\x168Xm\x93\xde}f(HO@\xd0\xacn\x04\x1e\x9a\xb9\xfa\xbf\xcc\x08\xc7', # noqa - 'sha1': b'g\x15y+\xcb][\\\n\xf28\xb2\x0c_P[\xc8\x89Hk', - 'sha1_git': b'\xf2\xae\xfa\xba\xfa\xa6B\x9b^\xf9Z\xf5\x14\x0cna\xb0\xef\x8b', # noqa - 'sha256': b"\x87\x022\xedZN\x84\xe8za\xf8'(oA\xc9k\xb1\x80c\x80\xe7J\x06\xea\xd2\xd5\xbeB\x19\xb8\xce", # noqa - 'length': 48, - 'data': b'temp file for testing content storage conversion', - 'status': 'visible', - } - - sample_content2 = { - 'blake2s256': b'\xbf?\x05\xed\xc1U\xd2\xc5\x168Xm\x93\xde}f(HO@\xd0\xacn\x04\x1e\x9a\xb9\xfa\xbf\xcc\x08\xc7', # noqa - 'sha1': b'f\x15y+\xcb][\\\n\xf28\xb2\x0c_P[\xc8\x89Hk', - 'sha1_git': b'\xc2\xae\xfa\xba\xfa\xa6B\x9b^\xf9Z\xf5\x14\x0cna\xb0\xef\x8b', # noqa - 'sha256': b"\x77\x022\xedZN\x84\xe8za\xf8'(oA\xc9k\xb1\x80c\x80\xe7J\x06\xea\xd2\xd5\xbeB\x19\xb8\xce", # noqa - 'length': 50, - 'data': b'temp file for testing content storage conversion 2', - 'status': 'visible', - } - - sample_directory = { - 'id': b'f\x15y+\xcb][\\\n\xf28\xb2\x0c_P[\xc8\x89Hk', - 'entries': [] - } - - sample_person = { - 'name': b'John Doe', - 'email': b'john.doe@institute.org', - 'fullname': b'John Doe ' - } - - sample_revision = { - 'id': b'f\x15y+\xcb][\\\n\xf28\xb2\x0c_P[\xc8\x89Hk', - 'message': b'something', - 'author': sample_person, - 'committer': sample_person, - 'date': 1567591673, - 'committer_date': 1567591673, - 'type': 'tar', - 'directory': b'\xc2\xae\xfa\xba\xfa\xa6B\x9b^\xf9Z\xf5\x14\x0cna\xb0\xef\x8b', # noqa - 'synthetic': False, - 'metadata': {}, - 'parents': [], - } + from .storage_data import data return { - 'content': [sample_content, sample_content2], - 'person': [sample_person], - 'directory': [sample_directory], - 'revision': [sample_revision], + 'content': [data.cont, data.cont2], + 'person': [data.person], + 'directory': [data.dir2], + 'revision': [data.revision], + 'release': [data.release, data.release2, data.release3], } diff --git a/swh/storage/tests/storage_data.py b/swh/storage/tests/storage_data.py index febd0c7a..6df3c5de 100644 --- a/swh/storage/tests/storage_data.py +++ b/swh/storage/tests/storage_data.py @@ -1,516 +1,522 @@ # Copyright (C) 2015-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import datetime from swh.model.hashutil import hash_to_bytes from swh.model import from_disk class StorageData: def __getattr__(self, key): v = globals()[key] if hasattr(v, 'copy'): return v.copy() return v data = StorageData() cont = { 'data': b'42\n', 'length': 3, 'sha1': hash_to_bytes( '34973274ccef6ab4dfaaf86599792fa9c3fe4689'), 'sha1_git': hash_to_bytes( 'd81cc0710eb6cf9efd5b920a8453e1e07157b6cd'), 'sha256': hash_to_bytes( '673650f936cb3b0a2f93ce09d81be10748b1b203c19e8176b4eefc1964a0cf3a'), 'blake2s256': hash_to_bytes( 'd5fe1939576527e42cfd76a9455a2432fe7f56669564577dd93c4280e76d661d'), 'status': 'visible', } cont2 = { 'data': b'4242\n', 'length': 5, 'sha1': hash_to_bytes( '61c2b3a30496d329e21af70dd2d7e097046d07b7'), 'sha1_git': hash_to_bytes( '36fade77193cb6d2bd826161a0979d64c28ab4fa'), 'sha256': hash_to_bytes( '859f0b154fdb2d630f45e1ecae4a862915435e663248bb8461d914696fc047cd'), 'blake2s256': hash_to_bytes( '849c20fad132b7c2d62c15de310adfe87be94a379941bed295e8141c6219810d'), 'status': 'visible', } cont3 = { 'data': b'424242\n', 'length': 7, 'sha1': hash_to_bytes( '3e21cc4942a4234c9e5edd8a9cacd1670fe59f13'), 'sha1_git': hash_to_bytes( 'c932c7649c6dfa4b82327d121215116909eb3bea'), 'sha256': hash_to_bytes( '92fb72daf8c6818288a35137b72155f507e5de8d892712ab96277aaed8cf8a36'), 'blake2s256': hash_to_bytes( '76d0346f44e5a27f6bafdd9c2befd304aff83780f93121d801ab6a1d4769db11'), 'status': 'visible', } contents = (cont, cont2, cont3) missing_cont = { 'data': b'missing\n', 'length': 8, 'sha1': hash_to_bytes( 'f9c24e2abb82063a3ba2c44efd2d3c797f28ac90'), 'sha1_git': hash_to_bytes( '33e45d56f88993aae6a0198013efa80716fd8919'), 'sha256': hash_to_bytes( '6bbd052ab054ef222c1c87be60cd191addedd24cc882d1f5f7f7be61dc61bb3a'), 'blake2s256': hash_to_bytes( '306856b8fd879edb7b6f1aeaaf8db9bbecc993cd7f776c333ac3a782fa5c6eba'), 'status': 'absent', } skipped_cont = { 'length': 1024 * 1024 * 200, 'sha1_git': hash_to_bytes( '33e45d56f88993aae6a0198013efa80716fd8920'), 'sha1': hash_to_bytes( '43e45d56f88993aae6a0198013efa80716fd8920'), 'sha256': hash_to_bytes( '7bbd052ab054ef222c1c87be60cd191addedd24cc882d1f5f7f7be61dc61bb3a'), 'blake2s256': hash_to_bytes( 'ade18b1adecb33f891ca36664da676e12c772cc193778aac9a137b8dc5834b9b'), 'reason': 'Content too long', 'status': 'absent', 'origin': 'file:///dev/zero', } skipped_cont2 = { 'length': 1024 * 1024 * 300, 'sha1_git': hash_to_bytes( '44e45d56f88993aae6a0198013efa80716fd8921'), 'sha1': hash_to_bytes( '54e45d56f88993aae6a0198013efa80716fd8920'), 'sha256': hash_to_bytes( '8cbd052ab054ef222c1c87be60cd191addedd24cc882d1f5f7f7be61dc61bb3a'), 'blake2s256': hash_to_bytes( '9ce18b1adecb33f891ca36664da676e12c772cc193778aac9a137b8dc5834b9b'), 'reason': 'Content too long', 'status': 'absent', } dir = { 'id': hash_to_bytes( '340133423253310030f531e632a733ff37c3a930'), 'entries': [ { 'name': b'foo', 'type': 'file', 'target': hash_to_bytes( # cont 'd81cc0710eb6cf9efd5b920a8453e1e07157b6cd'), 'perms': from_disk.DentryPerms.content, }, { 'name': b'bar\xc3', 'type': 'dir', 'target': b'12345678901234567890', 'perms': from_disk.DentryPerms.directory, }, ], } dir2 = { 'id': hash_to_bytes( '340133423253310030f531e632a733ff37c3a935'), 'entries': [ { 'name': b'oof', 'type': 'file', 'target': hash_to_bytes( # cont2 '36fade77193cb6d2bd826161a0979d64c28ab4fa'), 'perms': from_disk.DentryPerms.content, } ], } dir3 = { 'id': hash_to_bytes('33e45d56f88993aae6a0198013efa80716fd8921'), 'entries': [ { 'name': b'foo', 'type': 'file', 'target': hash_to_bytes( # cont 'd81cc0710eb6cf9efd5b920a8453e1e07157b6cd'), 'perms': from_disk.DentryPerms.content, }, { 'name': b'subdir', 'type': 'dir', 'target': hash_to_bytes( # dir '340133423253310030f531e632a733ff37c3a930'), 'perms': from_disk.DentryPerms.directory, }, { 'name': b'hello', 'type': 'file', 'target': b'12345678901234567890', 'perms': from_disk.DentryPerms.content, }, ], } dir4 = { 'id': hash_to_bytes('33e45d56f88993aae6a0198013efa80716fd8922'), 'entries': [ { 'name': b'subdir1', 'type': 'dir', 'target': hash_to_bytes( '33e45d56f88993aae6a0198013efa80716fd8921'), # dir3 'perms': from_disk.DentryPerms.directory, }, ] } dierctories = (dir, dir2, dir3, dir4) minus_offset = datetime.timezone(datetime.timedelta(minutes=-120)) plus_offset = datetime.timezone(datetime.timedelta(minutes=120)) revision = { 'id': b'56789012345678901234', 'message': b'hello', 'author': { 'name': b'Nicolas Dandrimont', 'email': b'nicolas@example.com', 'fullname': b'Nicolas Dandrimont ', }, 'date': { 'timestamp': 1234567890, 'offset': 120, 'negative_utc': None, }, 'committer': { 'name': b'St\xc3fano Zacchiroli', 'email': b'stefano@example.com', 'fullname': b'St\xc3fano Zacchiroli ' }, 'committer_date': { 'timestamp': 1123456789, 'offset': 0, 'negative_utc': True, }, 'parents': [b'01234567890123456789', b'23434512345123456789'], 'type': 'git', 'directory': hash_to_bytes( # dir '340133423253310030f531e632a733ff37c3a930'), 'metadata': { 'checksums': { 'sha1': 'tarball-sha1', 'sha256': 'tarball-sha256', }, 'signed-off-by': 'some-dude', 'extra_headers': [ ['gpgsig', b'test123'], ['mergetags', [b'foo\\bar', b'\x22\xaf\x89\x80\x01\x00']], ], }, 'synthetic': True } revision2 = { 'id': b'87659012345678904321', 'message': b'hello again', 'author': { 'name': b'Roberto Dicosmo', 'email': b'roberto@example.com', 'fullname': b'Roberto Dicosmo ', }, 'date': { 'timestamp': { 'seconds': 1234567843, 'microseconds': 220000, }, 'offset': -720, 'negative_utc': None, }, 'committer': { 'name': b'tony', 'email': b'ar@dumont.fr', 'fullname': b'tony ', }, 'committer_date': { 'timestamp': 1123456789, 'offset': 0, 'negative_utc': False, }, 'parents': [b'01234567890123456789'], 'type': 'git', 'directory': hash_to_bytes( # dir2 '340133423253310030f531e632a733ff37c3a935'), 'metadata': None, 'synthetic': False } revision3 = { 'id': hash_to_bytes('7026b7c1a2af56521e951c01ed20f255fa054238'), 'message': b'a simple revision with no parents this time', 'author': { 'name': b'Roberto Dicosmo', 'email': b'roberto@example.com', 'fullname': b'Roberto Dicosmo ', }, 'date': { 'timestamp': { 'seconds': 1234567843, 'microseconds': 220000, }, 'offset': -720, 'negative_utc': None, }, 'committer': { 'name': b'tony', 'email': b'ar@dumont.fr', 'fullname': b'tony ', }, 'committer_date': { 'timestamp': 1127351742, 'offset': 0, 'negative_utc': False, }, 'parents': [], 'type': 'git', 'directory': hash_to_bytes( # dir2 '340133423253310030f531e632a733ff37c3a935'), 'metadata': None, 'synthetic': True } revision4 = { 'id': hash_to_bytes('368a48fe15b7db2383775f97c6b247011b3f14f4'), 'message': b'parent of self.revision2', 'author': { 'name': b'me', 'email': b'me@soft.heri', 'fullname': b'me ', }, 'date': { 'timestamp': { 'seconds': 1244567843, 'microseconds': 220000, }, 'offset': -720, 'negative_utc': None, }, 'committer': { 'name': b'committer-dude', 'email': b'committer@dude.com', 'fullname': b'committer-dude ', }, 'committer_date': { 'timestamp': { 'seconds': 1244567843, 'microseconds': 220000, }, 'offset': -720, 'negative_utc': None, }, 'parents': [hash_to_bytes( # revision3 '7026b7c1a2af56521e951c01ed20f255fa054238')], 'type': 'git', 'directory': hash_to_bytes( # dir '340133423253310030f531e632a733ff37c3a930'), 'metadata': None, 'synthetic': False } revisions = (revision, revision2, revision3, revision4) origin = { 'url': 'file:///dev/null', } origin2 = { 'url': 'file:///dev/zero', } origins = (origin, origin2) provider = { 'name': 'hal', 'type': 'deposit-client', 'url': 'http:///hal/inria', 'metadata': { 'location': 'France' } } metadata_tool = { 'name': 'swh-deposit', 'version': '0.0.1', 'configuration': { 'sword_version': '2' } } date_visit1 = datetime.datetime(2015, 1, 1, 23, 0, 0, tzinfo=datetime.timezone.utc) type_visit1 = 'git' date_visit2 = datetime.datetime(2017, 1, 1, 23, 0, 0, tzinfo=datetime.timezone.utc) type_visit2 = 'hg' date_visit3 = datetime.datetime(2018, 1, 1, 23, 0, 0, tzinfo=datetime.timezone.utc) type_visit3 = 'deb' release = { 'id': b'87659012345678901234', 'name': b'v0.0.1', 'author': { 'name': b'olasd', 'email': b'nic@olasd.fr', 'fullname': b'olasd ', }, 'date': { 'timestamp': 1234567890, 'offset': 42, 'negative_utc': None, }, 'target': b'43210987654321098765', 'target_type': 'revision', 'message': b'synthetic release', 'synthetic': True, } release2 = { 'id': b'56789012348765901234', 'name': b'v0.0.2', 'author': { 'name': b'tony', 'email': b'ar@dumont.fr', 'fullname': b'tony ', }, 'date': { 'timestamp': 1634366813, 'offset': -120, 'negative_utc': None, }, 'target': b'432109\xa9765432\xc309\x00765', 'target_type': 'revision', 'message': b'v0.0.2\nMisc performance improvements + bug fixes', 'synthetic': False } release3 = { 'id': b'87659012345678904321', 'name': b'v0.0.2', 'author': { 'name': b'tony', 'email': b'tony@ardumont.fr', 'fullname': b'tony ', }, 'date': { 'timestamp': 1634336813, 'offset': 0, 'negative_utc': False, }, 'target': b'87659012345678904321', # revision2 'target_type': 'revision', 'message': b'yet another synthetic release', 'synthetic': True, } releases = (release, release2, release3) snapshot = { 'id': hash_to_bytes('2498dbf535f882bc7f9a18fb16c9ad27fda7bab7'), 'branches': { b'master': { 'target': b'56789012345678901234', # revision 'target_type': 'revision', }, }, } empty_snapshot = { 'id': hash_to_bytes('1a8893e6a86f444e8be8e7bda6cb34fb1735a00e'), 'branches': {}, } complete_snapshot = { 'id': hash_to_bytes('6e65b86363953b780d92b0a928f3e8fcdd10db36'), 'branches': { b'directory': { 'target': hash_to_bytes( '1bd0e65f7d2ff14ae994de17a1e7fe65111dcad8'), 'target_type': 'directory', }, b'directory2': { 'target': hash_to_bytes( '1bd0e65f7d2ff14ae994de17a1e7fe65111dcad8'), 'target_type': 'directory', }, b'content': { 'target': hash_to_bytes( 'fe95a46679d128ff167b7c55df5d02356c5a1ae1'), 'target_type': 'content', }, b'alias': { 'target': b'revision', 'target_type': 'alias', }, b'revision': { 'target': hash_to_bytes( 'aafb16d69fd30ff58afdd69036a26047f3aebdc6'), 'target_type': 'revision', }, b'release': { 'target': hash_to_bytes( '7045404f3d1c54e6473c71bbb716529fbad4be24'), 'target_type': 'release', }, b'snapshot': { 'target': hash_to_bytes( '1a8893e6a86f444e8be8e7bda6cb34fb1735a00e'), 'target_type': 'snapshot', }, b'dangling': None, } } origin_metadata = { 'origin': origin, 'discovery_date': datetime.datetime(2015, 1, 1, 23, 0, 0, tzinfo=datetime.timezone.utc), 'provider': provider, 'tool': 'swh-deposit', 'metadata': { 'name': 'test_origin_metadata', 'version': '0.0.1' } } origin_metadata2 = { 'origin': origin, 'discovery_date': datetime.datetime(2017, 1, 1, 23, 0, 0, tzinfo=datetime.timezone.utc), 'provider': provider, 'tool': 'swh-deposit', 'metadata': { 'name': 'test_origin_metadata', 'version': '0.0.1' } } + +person = { + 'name': b'John Doe', + 'email': b'john.doe@institute.org', + 'fullname': b'John Doe ' +} diff --git a/swh/storage/tests/test_buffer.py b/swh/storage/tests/test_buffer.py index 1f35f999..8c566088 100644 --- a/swh/storage/tests/test_buffer.py +++ b/swh/storage/tests/test_buffer.py @@ -1,179 +1,179 @@ # Copyright (C) 2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.storage.buffer import BufferingProxyStorage def test_buffering_proxy_storage_content_threshold_not_hit(sample_data): contents = sample_data['content'] storage = BufferingProxyStorage( storage={'cls': 'memory'}, min_batch_size={ 'content': 10, } ) s = storage.content_add([contents[0], contents[1]]) assert s == {} # contents have not been written to storage missing_contents = storage.content_missing( [contents[0], contents[1]]) assert set(missing_contents) == set( [contents[0]['sha1'], contents[1]['sha1']]) s = storage.flush() assert s == { 'content:add': 1 + 1, 'content:add:bytes': contents[0]['length'] + contents[1]['length'], 'skipped_content:add': 0 } missing_contents = storage.content_missing( [contents[0], contents[1]]) assert list(missing_contents) == [] def test_buffering_proxy_storage_content_threshold_nb_hit(sample_data): contents = sample_data['content'] storage = BufferingProxyStorage( storage={'cls': 'memory'}, min_batch_size={ 'content': 1, } ) s = storage.content_add([contents[0]]) assert s == { 'content:add': 1, 'content:add:bytes': contents[0]['length'], 'skipped_content:add': 0 } missing_contents = storage.content_missing([contents[0]]) assert list(missing_contents) == [] s = storage.flush() assert s == {} def test_buffering_proxy_storage_content_threshold_bytes_hit(sample_data): contents = sample_data['content'] - content_bytes_min_batch_size = 20 + content_bytes_min_batch_size = 2 storage = BufferingProxyStorage( storage={'cls': 'memory'}, min_batch_size={ 'content': 10, 'content_bytes': content_bytes_min_batch_size, } ) assert contents[0]['length'] > content_bytes_min_batch_size s = storage.content_add([contents[0]]) assert s == { 'content:add': 1, 'content:add:bytes': contents[0]['length'], 'skipped_content:add': 0 } missing_contents = storage.content_missing([contents[0]]) assert list(missing_contents) == [] s = storage.flush() assert s == {} def test_buffering_proxy_storage_directory_threshold_not_hit(sample_data): directories = sample_data['directory'] storage = BufferingProxyStorage( storage={'cls': 'memory'}, min_batch_size={ 'directory': 10, } ) s = storage.directory_add([directories[0]]) assert s == {} directory_id = directories[0]['id'] missing_directories = storage.directory_missing( [directory_id]) assert list(missing_directories) == [directory_id] s = storage.flush() assert s == { 'directory:add': 1, } missing_directories = storage.directory_missing( [directory_id]) assert list(missing_directories) == [] def test_buffering_proxy_storage_directory_threshold_hit(sample_data): directories = sample_data['directory'] storage = BufferingProxyStorage( storage={'cls': 'memory'}, min_batch_size={ 'directory': 1, } ) s = storage.directory_add([directories[0]]) assert s == { 'directory:add': 1, } missing_directories = storage.directory_missing( [directories[0]['id']]) assert list(missing_directories) == [] s = storage.flush() assert s == {} def test_buffering_proxy_storage_revision_threshold_not_hit(sample_data): revisions = sample_data['revision'] storage = BufferingProxyStorage( storage={'cls': 'memory'}, min_batch_size={ 'revision': 10, } ) s = storage.revision_add([revisions[0]]) assert s == {} revision_id = revisions[0]['id'] missing_revisions = storage.revision_missing( [revision_id]) assert list(missing_revisions) == [revision_id] s = storage.flush() assert s == { 'revision:add': 1, } missing_revisions = storage.revision_missing( [revision_id]) assert list(missing_revisions) == [] def test_buffering_proxy_storage_revision_threshold_hit(sample_data): revisions = sample_data['revision'] storage = BufferingProxyStorage( storage={'cls': 'memory'}, min_batch_size={ 'revision': 1, } ) s = storage.revision_add([revisions[0]]) assert s == { 'revision:add': 1, } missing_revisions = storage.revision_missing( [revisions[0]['id']]) assert list(missing_revisions) == [] s = storage.flush() assert s == {} diff --git a/swh/storage/tests/test_filter.py b/swh/storage/tests/test_filter.py index 83fc5862..8ec68e71 100644 --- a/swh/storage/tests/test_filter.py +++ b/swh/storage/tests/test_filter.py @@ -1,74 +1,74 @@ # Copyright (C) 2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.storage.filter import FilteringProxyStorage def test_filtering_proxy_storage_content(sample_data): sample_content = sample_data['content'][0] storage = FilteringProxyStorage(storage={'cls': 'memory'}) content = next(storage.content_get([sample_content['sha1']])) assert not content s = storage.content_add([sample_content]) assert s == { 'content:add': 1, - 'content:add:bytes': 48, + 'content:add:bytes': sample_content['length'], 'skipped_content:add': 0 } content = next(storage.content_get([sample_content['sha1']])) assert content is not None s = storage.content_add([sample_content]) assert s == { 'content:add': 0, 'content:add:bytes': 0, 'skipped_content:add': 0 } def test_filtering_proxy_storage_revision(sample_data): sample_revision = sample_data['revision'][0] storage = FilteringProxyStorage(storage={'cls': 'memory'}) revision = next(storage.revision_get([sample_revision['id']])) assert not revision s = storage.revision_add([sample_revision]) assert s == { 'revision:add': 1, } revision = next(storage.revision_get([sample_revision['id']])) assert revision is not None s = storage.revision_add([sample_revision]) assert s == { 'revision:add': 0, } def test_filtering_proxy_storage_directory(sample_data): sample_directory = sample_data['directory'][0] storage = FilteringProxyStorage(storage={'cls': 'memory'}) directory = next(storage.directory_missing([sample_directory['id']])) assert directory s = storage.directory_add([sample_directory]) assert s == { 'directory:add': 1, } directory = list(storage.directory_missing([sample_directory['id']])) assert not directory s = storage.directory_add([sample_directory]) assert s == { 'directory:add': 0, }