Changeset View
Standalone View
swh/storage/tests/conftest.py
- This file was added.
from os import path, environ | |||||
import glob | |||||
import pytest | |||||
from pytest_postgresql import factories | |||||
from pytest_postgresql.janitor import DatabaseJanitor, psycopg2 | |||||
from hypothesis import strategies | |||||
from swh.model.hypothesis_strategies import origins, contents | |||||
from swh.core.utils import numfile_sortkey as sortkey | |||||
import swh.storage | |||||
SQL_DIR = path.join(path.dirname(swh.storage.__file__), 'sql') | |||||
environ['LC_ALL'] = 'C.UTF-8' | |||||
DUMP_FILES = path.join(SQL_DIR, '*.sql') | |||||
ardumont: Don't we want a fixture `sqldir` similar to the datadir we introduced in swh-core for the sql… | |||||
@pytest.fixture | |||||
def swh_storage(postgresql_proc, swh_storage_postgresql, tmp_path): | |||||
Not Done Inline ActionsI'd like to have "postgres" explicitely in names whenever possible in new code vlorentz: I'd like to have "postgres" explicitely in names whenever possible in new code | |||||
Not Done Inline Actionsnvm vlorentz: nvm | |||||
objdir = tmp_path / 'objstorage' | |||||
objdir.mkdir() | |||||
storage_config = { | |||||
'cls': 'local', | |||||
'args': { | |||||
'db': 'postgresql://{user}@{host}:{port}/{dbname}'.format( | |||||
host=postgresql_proc.host, | |||||
port=postgresql_proc.port, | |||||
user='postgres', | |||||
dbname='tests'), | |||||
'objstorage': { | |||||
'cls': 'pathslicing', | |||||
'args': { | |||||
'root': str(objdir), | |||||
'slicing': '0:1/1:5', | |||||
ardumontUnsubmitted Not Done Inline ActionsIsn't a memory objstorage enough? ardumont: Isn't a memory objstorage enough? | |||||
}, | |||||
}, | |||||
'journal_writer': { | |||||
'cls': 'memory', | |||||
Done Inline Actions'memory' (we fixed it together ;) also olasd migrated the existing entries in storage recently. ardumont: 'memory' (we fixed it together ;)
also olasd migrated the existing entries in storage recently. | |||||
}, | |||||
}, | |||||
} | |||||
storage = swh.storage.get_storage(**storage_config) | |||||
return storage | |||||
def gen_origins(n=20): | |||||
return strategies.lists( | |||||
origins().map(lambda x: x.to_dict()), | |||||
unique_by=lambda x: x['url'], | |||||
min_size=n, max_size=n).example() | |||||
def gen_contents(n=20): | |||||
return strategies.lists( | |||||
contents().map(lambda x: x.to_dict()), | |||||
unique_by=lambda x: (x['sha1'], x['sha1_git']), | |||||
Done Inline Actionsand by sha1_git vlorentz: and by sha1_git | |||||
Done Inline Actionsnot done vlorentz: not done | |||||
min_size=n, max_size=n).example() | |||||
Not Done Inline Actionsshould be named origin_lists and contents_lists. vlorentz: should be named `origin_lists` and `contents_lists`. | |||||
Not Done Inline Actionsnot done vlorentz: not done | |||||
Done Inline ActionsI do prefer gen_xxx since it's a function that generates these objects. xxx_lists seems less clear to me. douardda: I do prefer `gen_xxx` since it's a function that generates these objects. `xxx_lists` seems… | |||||
Not Done Inline Actionsbut it doesn't follow Hypothesis' naming convention. vlorentz: but it doesn't follow Hypothesis' naming convention. | |||||
@pytest.fixture | |||||
def swh_contents(): | |||||
return gen_contents | |||||
@pytest.fixture | |||||
def swh_origins(swh_storage): | |||||
origin_gen = strategies.lists( | |||||
origins().map(lambda x: x.to_dict()), | |||||
unique_by=lambda x: x['url'], | |||||
min_size=20, max_size=20) | |||||
vlorentzUnsubmitted Not Done Inline Actionswhy 20? using these many origins even for tests that won't use it wil make Hypothesis explore much more space than needed. vlorentz: why 20?
using these many origins even for tests that won't use it wil make Hypothesis explore… | |||||
douarddaAuthorUnsubmitted Done Inline ActionsHave you noticed I do not use hypothesis for exploring anything here? I'm using it as an easy way of generating (fixed-size) datasets. But, it would make sense to use the gen_origins() in this fixture, instead of reimplementing it. douardda: Have you noticed I do not use hypothesis for exploring anything here? I'm using it as an easy… | |||||
douarddaAuthorUnsubmitted Done Inline Actions20: because I do not ask hypothesis to explore anything, I just generate ONE list of 20 (by default) origin/content objects. douardda: 20: because I do not ask hypothesis to explore anything, I just generate ONE list of 20 (by… | |||||
ardumontUnsubmitted Not Done Inline Actions
Yes, i don't get why you did not do it since it's the same code ;) ardumont: > But, it would make sense to use the gen_origins() in this fixture, instead of reimplementing… | |||||
new_origins = origin_gen.example() | |||||
return swh_storage.origin_add(new_origins) | |||||
vlorentzUnsubmitted Not Done Inline ActionsIf we're going with the gen_ prefix, then these functions should have it too. And swh_origins is way too different from swh_contents to have such a close name. vlorentz: If we're going with the `gen_` prefix, then these functions should have it too.
And… | |||||
douarddaAuthorUnsubmitted Done Inline ActionsI agree with you, the names for swh_origins and swh_contents are confusing here. douardda: I agree with you, the names for swh_origins and swh_contents are confusing here. | |||||
# the postgres_fact factory fixture below is mostly a copy of the code | |||||
# from pytest-postgresql. We need a custom version here to be able to | |||||
# specify our version of the DBJanitor we use. | |||||
ardumontUnsubmitted Not Done Inline ActionsI don't even know what's the DBJanitor you are talking about ;) ardumont: I don't even know what's the DBJanitor you are talking about ;) | |||||
def postgresql_fact(process_fixture_name, db_name=None): | |||||
@pytest.fixture | |||||
def postgresql_factory(request): | |||||
""" | |||||
Fixture factory for PostgreSQL. | |||||
:param FixtureRequest request: fixture request object | |||||
:rtype: psycopg2.connection | |||||
:returns: postgresql client | |||||
""" | |||||
config = factories.get_config(request) | |||||
if not psycopg2: | |||||
raise ImportError( | |||||
'No module named psycopg2. Please install it.' | |||||
Done Inline Actionsswh.storage.storage does not support psycopg2cffi. vlorentz: swh.storage.storage does not support psycopg2cffi. | |||||
Done Inline Actionsnot done vlorentz: not done | |||||
Done Inline Actionswell, I would have expected a comment about the fact there is no comment in this code stating that this function is stolen from pytest-postgresql ;-), and explaining why it is needed. douardda: well, I would have expected a comment about the fact there is no comment in this code stating… | |||||
) | |||||
proc_fixture = request.getfixturevalue(process_fixture_name) | |||||
# _, config = try_import('psycopg2', request) | |||||
pg_host = proc_fixture.host | |||||
pg_port = proc_fixture.port | |||||
pg_user = proc_fixture.user | |||||
pg_options = proc_fixture.options | |||||
pg_db = db_name or config['dbname'] | |||||
with SwhDatabaseJanitor( | |||||
pg_user, pg_host, pg_port, pg_db, proc_fixture.version | |||||
): | |||||
connection = psycopg2.connect( | |||||
dbname=pg_db, | |||||
user=pg_user, | |||||
host=pg_host, | |||||
port=pg_port, | |||||
options=pg_options | |||||
) | |||||
yield connection | |||||
connection.close() | |||||
return postgresql_factory | |||||
swh_storage_postgresql = postgresql_fact('postgresql_proc') | |||||
# This version of the DatabaseJanitor implement a different setup/teardown | |||||
# behavior than than the stock one: instead of droping, creating and | |||||
# initializing the database for each test, it create and initialize the db only | |||||
# once, then it truncate the tables. This is needed to have acceptable test | |||||
# performances. | |||||
class SwhDatabaseJanitor(DatabaseJanitor): | |||||
def db_setup(self): | |||||
with psycopg2.connect( | |||||
dbname=self.db_name, | |||||
user=self.user, | |||||
host=self.host, | |||||
port=self.port, | |||||
) as cnx: | |||||
with cnx.cursor() as cur: | |||||
all_dump_files = sorted( | |||||
glob.glob(DUMP_FILES), key=sortkey) | |||||
for fname in all_dump_files: | |||||
with open(fname) as fobj: | |||||
sql = fobj.read().replace('concurrently', '') | |||||
cur.execute(sql) | |||||
cnx.commit() | |||||
def db_reset(self): | |||||
with psycopg2.connect( | |||||
dbname=self.db_name, | |||||
user=self.user, | |||||
host=self.host, | |||||
port=self.port, | |||||
) as cnx: | |||||
with cnx.cursor() as cur: | |||||
cur.execute( | |||||
"SELECT table_name FROM information_schema.tables " | |||||
"WHERE table_schema = %s", ('public',)) | |||||
tables = set(table for (table,) in cur.fetchall()) | |||||
for table in tables: | |||||
cur.execute('truncate table %s cascade' % table) | |||||
cur.execute( | |||||
"SELECT sequence_name FROM information_schema.sequences " | |||||
"WHERE sequence_schema = %s", ('public',)) | |||||
seqs = set(seq for (seq,) in cur.fetchall()) | |||||
for seq in seqs: | |||||
cur.execute('ALTER SEQUENCE %s RESTART;' % seq) | |||||
cnx.commit() | |||||
def init(self): | |||||
with self.cursor() as cur: | |||||
cur.execute( | |||||
"SELECT COUNT(1) FROM pg_database WHERE datname=%s;", | |||||
(self.db_name,)) | |||||
db_exists = cur.fetchone()[0] == 1 | |||||
if db_exists: | |||||
cur.execute( | |||||
'UPDATE pg_database SET datallowconn=true ' | |||||
'WHERE datname = %s;', | |||||
(self.db_name,)) | |||||
if db_exists: | |||||
self.db_reset() | |||||
else: | |||||
with self.cursor() as cur: | |||||
cur.execute('CREATE DATABASE "{}";'.format(self.db_name)) | |||||
self.db_setup() | |||||
def drop(self): | |||||
pid_column = 'pid' | |||||
with self.cursor() as cur: | |||||
cur.execute( | |||||
'UPDATE pg_database SET datallowconn=false ' | |||||
'WHERE datname = %s;', (self.db_name,)) | |||||
cur.execute( | |||||
'SELECT pg_terminate_backend(pg_stat_activity.{})' | |||||
'FROM pg_stat_activity ' | |||||
'WHERE pg_stat_activity.datname = %s;'.format(pid_column), | |||||
(self.db_name,)) |
Don't we want a fixture sqldir similar to the datadir we introduced in swh-core for the sql files?