Differential D2090 Diff 7160 swh/storage/tests/conftest.py

Changeset View

Standalone View

swh/storage/tests/conftest.py

This file was added.

				from os import path, environ
				import glob
				import pytest

				from pytest_postgresql import factories
				from pytest_postgresql.janitor import DatabaseJanitor, psycopg2
				from hypothesis import strategies
				from swh.model.hypothesis_strategies import origins, contents

				from swh.core.utils import numfile_sortkey as sortkey
				import swh.storage

				SQL_DIR = path.join(path.dirname(swh.storage.__file__), 'sql')

				environ['LC_ALL'] = 'C.UTF-8'

				DUMP_FILES = path.join(SQL_DIR, '*.sql')
				ardumontUnsubmitted Not Done Inline Actions Don't we want a fixture `sqldir` similar to the datadir we introduced in swh-core for the sql files? ardumont: Don't we want a fixture `sqldir` similar to the datadir we introduced in swh-core for the sql…


				@pytest.fixture
				def swh_storage(postgresql_proc, swh_storage_postgresql, tmp_path):
				vlorentzUnsubmitted Not Done Inline Actions I'd like to have "postgres" explicitely in names whenever possible in new code vlorentz: I'd like to have "postgres" explicitely in names whenever possible in new code
				vlorentzUnsubmitted Not Done Inline Actions nvm vlorentz: nvm

				objdir = tmp_path / 'objstorage'
				objdir.mkdir()
				storage_config = {
				'cls': 'local',
				'args': {
				'db': 'postgresql://{user}@{host}:{port}/{dbname}'.format(
				host=postgresql_proc.host,
				port=postgresql_proc.port,
				user='postgres',
				dbname='tests'),
				'objstorage': {
				'cls': 'pathslicing',
				'args': {
				'root': str(objdir),
				'slicing': '0:1/1:5',
				ardumontUnsubmitted Not Done Inline Actions Isn't a memory objstorage enough? ardumont: Isn't a memory objstorage enough?
				},
				},
				'journal_writer': {
				'cls': 'memory',
				ardumontUnsubmitted Done Inline Actions 'memory' (we fixed it together ;) also olasd migrated the existing entries in storage recently. ardumont: 'memory' (we fixed it together ;) also olasd migrated the existing entries in storage recently.
				},
				},
				}
				storage = swh.storage.get_storage(**storage_config)
				return storage


				def gen_origins(n=20):
				return strategies.lists(
				origins().map(lambda x: x.to_dict()),
				unique_by=lambda x: x['url'],
				min_size=n, max_size=n).example()


				def gen_contents(n=20):
				return strategies.lists(
				contents().map(lambda x: x.to_dict()),
				unique_by=lambda x: (x['sha1'], x['sha1_git']),
				vlorentzUnsubmitted Done Inline Actions and by sha1_git vlorentz: and by sha1_git
				vlorentzUnsubmitted Done Inline Actions not done vlorentz: not done
				min_size=n, max_size=n).example()
				vlorentzUnsubmitted Not Done Inline Actions should be named `origin_lists` and `contents_lists`. vlorentz: should be named `origin_lists` and `contents_lists`.
				vlorentzUnsubmitted Not Done Inline Actions not done vlorentz: not done
				douarddaAuthorUnsubmitted Done Inline Actions I do prefer `gen_xxx` since it's a function that generates these objects. `xxx_lists` seems less clear to me. douardda: I do prefer `gen_xxx` since it's a function that generates these objects. `xxx_lists` seems…
				vlorentzUnsubmitted Not Done Inline Actions but it doesn't follow Hypothesis' naming convention. vlorentz: but it doesn't follow Hypothesis' naming convention.


				@pytest.fixture
				def swh_contents():
				return gen_contents


				@pytest.fixture
				def swh_origins(swh_storage):
				origin_gen = strategies.lists(
				origins().map(lambda x: x.to_dict()),
				unique_by=lambda x: x['url'],
				min_size=20, max_size=20)
				vlorentzUnsubmitted Not Done Inline Actions why 20? using these many origins even for tests that won't use it wil make Hypothesis explore much more space than needed. vlorentz: why 20? using these many origins even for tests that won't use it wil make Hypothesis explore…
				douarddaAuthorUnsubmitted Done Inline Actions Have you noticed I do not use hypothesis for exploring anything here? I'm using it as an easy way of generating (fixed-size) datasets. But, it would make sense to use the gen_origins() in this fixture, instead of reimplementing it. douardda: Have you noticed I do not use hypothesis for exploring anything here? I'm using it as an easy…
				douarddaAuthorUnsubmitted Done Inline Actions 20: because I do not ask hypothesis to explore anything, I just generate ONE list of 20 (by default) origin/content objects. douardda: 20: because I do not ask hypothesis to explore anything, I just generate ONE list of 20 (by…
				ardumontUnsubmitted Not Done Inline Actions But, it would make sense to use the gen_origins() in this fixture, instead of reimplementing it. Yes, i don't get why you did not do it since it's the same code ;) ardumont: > But, it would make sense to use the gen_origins() in this fixture, instead of reimplementing…

				new_origins = origin_gen.example()
				return swh_storage.origin_add(new_origins)
				vlorentzUnsubmitted Not Done Inline Actions If we're going with the `gen_` prefix, then these functions should have it too. And `swh_origins` is way too different from `swh_contents` to have such a close name. vlorentz: If we're going with the `gen_` prefix, then these functions should have it too. And…
				douarddaAuthorUnsubmitted Done Inline Actions I agree with you, the names for swh_origins and swh_contents are confusing here. douardda: I agree with you, the names for swh_origins and swh_contents are confusing here.


				# the postgres_fact factory fixture below is mostly a copy of the code
				# from pytest-postgresql. We need a custom version here to be able to
				# specify our version of the DBJanitor we use.
				ardumontUnsubmitted Not Done Inline Actions I don't even know what's the DBJanitor you are talking about ;) ardumont: I don't even know what's the DBJanitor you are talking about ;)
				def postgresql_fact(process_fixture_name, db_name=None):
				@pytest.fixture
				def postgresql_factory(request):
				"""
				Fixture factory for PostgreSQL.

				:param FixtureRequest request: fixture request object
				:rtype: psycopg2.connection
				:returns: postgresql client
				"""
				config = factories.get_config(request)
				if not psycopg2:
				raise ImportError(
				'No module named psycopg2. Please install it.'
				vlorentzUnsubmitted Done Inline Actions swh.storage.storage does not support psycopg2cffi. vlorentz: swh.storage.storage does not support psycopg2cffi.
				vlorentzUnsubmitted Done Inline Actions not done vlorentz: not done
				douarddaAuthorUnsubmitted Done Inline Actions well, I would have expected a comment about the fact there is no comment in this code stating that this function is stolen from pytest-postgresql ;-), and explaining why it is needed. douardda: well, I would have expected a comment about the fact there is no comment in this code stating…
				)
				proc_fixture = request.getfixturevalue(process_fixture_name)

				# _, config = try_import('psycopg2', request)
				pg_host = proc_fixture.host
				pg_port = proc_fixture.port
				pg_user = proc_fixture.user
				pg_options = proc_fixture.options
				pg_db = db_name or config['dbname']

				with SwhDatabaseJanitor(
				pg_user, pg_host, pg_port, pg_db, proc_fixture.version
				):
				connection = psycopg2.connect(
				dbname=pg_db,
				user=pg_user,
				host=pg_host,
				port=pg_port,
				options=pg_options
				)
				yield connection
				connection.close()

				return postgresql_factory


				swh_storage_postgresql = postgresql_fact('postgresql_proc')


				# This version of the DatabaseJanitor implement a different setup/teardown
				# behavior than than the stock one: instead of droping, creating and
				# initializing the database for each test, it create and initialize the db only
				# once, then it truncate the tables. This is needed to have acceptable test
				# performances.
				class SwhDatabaseJanitor(DatabaseJanitor):
				def db_setup(self):
				with psycopg2.connect(
				dbname=self.db_name,
				user=self.user,
				host=self.host,
				port=self.port,
				) as cnx:
				with cnx.cursor() as cur:
				all_dump_files = sorted(
				glob.glob(DUMP_FILES), key=sortkey)
				for fname in all_dump_files:
				with open(fname) as fobj:
				sql = fobj.read().replace('concurrently', '')
				cur.execute(sql)
				cnx.commit()

				def db_reset(self):
				with psycopg2.connect(
				dbname=self.db_name,
				user=self.user,
				host=self.host,
				port=self.port,
				) as cnx:
				with cnx.cursor() as cur:
				cur.execute(
				"SELECT table_name FROM information_schema.tables "
				"WHERE table_schema = %s", ('public',))
				tables = set(table for (table,) in cur.fetchall())
				for table in tables:
				cur.execute('truncate table %s cascade' % table)

				cur.execute(
				"SELECT sequence_name FROM information_schema.sequences "
				"WHERE sequence_schema = %s", ('public',))
				seqs = set(seq for (seq,) in cur.fetchall())
				for seq in seqs:
				cur.execute('ALTER SEQUENCE %s RESTART;' % seq)
				cnx.commit()

				def init(self):
				with self.cursor() as cur:
				cur.execute(
				"SELECT COUNT(1) FROM pg_database WHERE datname=%s;",
				(self.db_name,))
				db_exists = cur.fetchone()[0] == 1
				if db_exists:
				cur.execute(
				'UPDATE pg_database SET datallowconn=true '
				'WHERE datname = %s;',
				(self.db_name,))

				if db_exists:
				self.db_reset()
				else:
				with self.cursor() as cur:
				cur.execute('CREATE DATABASE "{}";'.format(self.db_name))
				self.db_setup()

				def drop(self):
				pid_column = 'pid'
				with self.cursor() as cur:
				cur.execute(
				'UPDATE pg_database SET datallowconn=false '
				'WHERE datname = %s;', (self.db_name,))
				cur.execute(
				'SELECT pg_terminate_backend(pg_stat_activity.{})'
				'FROM pg_stat_activity '
				'WHERE pg_stat_activity.datname = %s;'.format(pid_column),
				(self.db_name,))