diff --git a/PKG-INFO b/PKG-INFO index 7b0f805..cead7a6 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,10 +1,10 @@ Metadata-Version: 1.0 Name: swh.core -Version: 0.0.5 +Version: 0.0.6 Summary: Software Heritage core utilities Home-page: https://forge.softwareheritage.org/diffusion/DCORE/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Description: UNKNOWN Platform: UNKNOWN diff --git a/debian/control b/debian/control index 77ee085..2fa13a3 100644 --- a/debian/control +++ b/debian/control @@ -1,20 +1,21 @@ Source: swh-core Maintainer: Software Heritage developers Section: python Priority: optional Build-Depends: debhelper (>= 9), dh-python, python3-all, python3-celery, python3-dateutil, python3-msgpack, python3-nose, + python3-psycopg2, python3-setuptools, python3-vcversioner Standards-Version: 3.9.6 Homepage: https://forge.softwareheritage.org/diffusion/DCORE/ Package: python3-swh.core Architecture: all Depends: ${misc:Depends}, ${python3:Depends} Description: Software Heritage core utilities diff --git a/requirements.txt b/requirements.txt index ae46c28..5da06d5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ celery msgpack-python +psycopg2 python-dateutil vcversioner diff --git a/sql/log-schema.sql b/sql/log-schema.sql new file mode 100644 index 0000000..d8dd5ec --- /dev/null +++ b/sql/log-schema.sql @@ -0,0 +1,33 @@ +--- +--- logging data model +--- + +create table dbversion +( + version int primary key, + release timestamptz, + description text +); + +insert into dbversion(version, release, description) + values(1, now(), 'Work In Progress'); + + +create type log_level as enum ('debug', 'info', 'warning', 'error', 'critical'); + +create table log +( + id bigserial primary key, + ts timestamptz not null default now(), + level log_level not null default 'info', -- importance + message text not null, -- human readable message + data jsonb, -- extra data; when NOT NULL, must contain a key "type" + -- denoting the kind of message within src_module + src_module text, -- fully-qualified source module, e.g., "swh.loader.git" + src_host text, -- FQDN source hostname, e.g., "worker03.softwareheritage.org" + src_pid int -- originating PID, relative to src_host +); + +create index on log (ts); +create index on log (src_module); +create index on log (src_host); diff --git a/swh.core.egg-info/PKG-INFO b/swh.core.egg-info/PKG-INFO index 7b0f805..cead7a6 100644 --- a/swh.core.egg-info/PKG-INFO +++ b/swh.core.egg-info/PKG-INFO @@ -1,10 +1,10 @@ Metadata-Version: 1.0 Name: swh.core -Version: 0.0.5 +Version: 0.0.6 Summary: Software Heritage core utilities Home-page: https://forge.softwareheritage.org/diffusion/DCORE/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Description: UNKNOWN Platform: UNKNOWN diff --git a/swh.core.egg-info/SOURCES.txt b/swh.core.egg-info/SOURCES.txt index 12e5195..08b0a2e 100644 --- a/swh.core.egg-info/SOURCES.txt +++ b/swh.core.egg-info/SOURCES.txt @@ -1,27 +1,30 @@ .gitignore MANIFEST.in Makefile requirements.txt setup.py version.txt bin/swh-hashdir bin/swh-hashfile debian/changelog debian/compat debian/control debian/copyright debian/rules debian/source/format +sql/log-schema.sql swh.core.egg-info/PKG-INFO swh.core.egg-info/SOURCES.txt swh.core.egg-info/dependency_links.txt swh.core.egg-info/requires.txt swh.core.egg-info/top_level.txt swh/core/config.py swh/core/hashutil.py +swh/core/logger.py swh/core/scheduling.py swh/core/serializers.py +swh/core/tests/db_testing.py swh/core/tests/test_config.py swh/core/tests/test_hashutil.py swh/core/tests/test_scheduling.py swh/core/tests/test_serializers.py \ No newline at end of file diff --git a/swh.core.egg-info/requires.txt b/swh.core.egg-info/requires.txt index ae46c28..5da06d5 100644 --- a/swh.core.egg-info/requires.txt +++ b/swh.core.egg-info/requires.txt @@ -1,4 +1,5 @@ celery msgpack-python +psycopg2 python-dateutil vcversioner diff --git a/swh/core/config.py b/swh/core/config.py index 7634184..0e285ee 100644 --- a/swh/core/config.py +++ b/swh/core/config.py @@ -1,184 +1,185 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import configparser import os SWH_CONFIG_DIRECTORIES = [ '~/.config/swh', '~/.swh', '/etc/softwareheritage', ] SWH_GLOBAL_CONFIG = 'global.ini' SWH_DEFAULT_GLOBAL_CONFIG = { 'content_size_limit': ('int', 100 * 1024 * 1024), + 'log_db': ('str', 'dbname=softwareheritage-log'), } # conversion per type _map_convert_fn = { 'int': int, 'bool': lambda x: x.lower() == 'true', 'list[str]': lambda x: [value.strip() for value in x.split(',')], 'list[int]': lambda x: [int(value.strip()) for value in x.split(',')], } def read(conf_file=None, default_conf=None): """Read the user's configuration file. Fill in the gap using `default_conf`. `default_conf` is similar to this: DEFAULT_CONF = { 'a': ('string', '/tmp/swh-loader-git/log'), 'b': ('string', 'dbname=swhloadergit') 'c': ('bool', true) 'e': ('bool', None) 'd': ('int', 10) } If conf_file is None, return the default config. """ conf = {} if conf_file: config_path = os.path.expanduser(conf_file) if os.path.exists(config_path): config = configparser.ConfigParser(defaults=default_conf) config.read(os.path.expanduser(conf_file)) if 'main' in config._sections: conf = config._sections['main'] if not default_conf: default_conf = {} # remaining missing default configuration key are set # also type conversion is enforced for underneath layer for key in default_conf: nature_type, default_value = default_conf[key] val = conf.get(key, None) if not val: # fallback to default value conf[key] = default_value else: # value present but in string format, force type conversion conf[key] = _map_convert_fn.get(nature_type, lambda x: x)(val) return conf def priority_read(conf_filenames, default_conf=None): """Try reading the configuration files from conf_filenames, in order, and return the configuration from the first one that exists. default_conf has the same specification as it does in read. """ # Try all the files in order for filename in conf_filenames: full_filename = os.path.expanduser(filename) if os.path.exists(full_filename): return read(full_filename, default_conf) # Else, return the default configuration return read(None, default_conf) def merge_default_configs(base_config, *other_configs): """Merge several default config dictionaries, from left to right""" full_config = base_config.copy() for config in other_configs: full_config.update(config) return full_config def swh_config_paths(base_filename): """Return the Software Heritage specific configuration paths for the given filename.""" return [os.path.join(dirname, base_filename) for dirname in SWH_CONFIG_DIRECTORIES] def prepare_folders(conf, *keys): """Prepare the folder mentioned in config under keys. """ def makedir(folder): if not os.path.exists(folder): os.makedirs(folder) for key in keys: makedir(conf[key]) def load_global_config(): """Load the global Software Heritage config""" return priority_read( swh_config_paths(SWH_GLOBAL_CONFIG), SWH_DEFAULT_GLOBAL_CONFIG, ) class SWHConfig: """Mixin to add configuration parsing abilities to classes The class should override the class attributes: - DEFAULT_CONFIG (default configuration to be parsed) - CONFIG_FILENAME (the filename of the configuration to be used) This class defines one classmethod, parse_config_file, which parses a configuration file using the default config as set in the class attribute. """ DEFAULT_CONFIG = {} CONFIG_BASE_FILENAME = '' @classmethod def parse_config_file(cls, base_filename=None, config_filename=None, additional_configs=None, global_config=True): """Parse the configuration file associated to the current class. By default, parse_config_file will load the configuration cls.CONFIG_BASE_FILENAME from one of the Software Heritage configuration directories, in order, unless it is overridden by base_filename or config_filename (which shortcuts the file lookup completely). Args: - base_filename (str) overrides the default cls.CONFIG_BASE_FILENAME - config_filename (str) sets the file to parse instead of the defaults set from cls.CONFIG_BASE_FILENAME - additional_configs (list of default configuration dicts) allows to override or extend the configuration set in cls.DEFAULT_CONFIG. - global_config (bool): Load the global configuration (default: True) """ if config_filename: config_filenames = [config_filename] else: if not base_filename: base_filename = cls.CONFIG_BASE_FILENAME config_filenames = swh_config_paths(base_filename) if not additional_configs: additional_configs = [] full_default_config = merge_default_configs(cls.DEFAULT_CONFIG, *additional_configs) config = {} if global_config: config = load_global_config() config.update(priority_read(config_filenames, full_default_config)) return config diff --git a/swh/core/logger.py b/swh/core/logger.py new file mode 100644 index 0000000..0c2298a --- /dev/null +++ b/swh/core/logger.py @@ -0,0 +1,83 @@ +# Copyright (C) 2015 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import logging +import os +import psycopg2 +import socket + +from psycopg2.extras import Json + + +EXTRA_LOGDATA_PREFIX = 'swh_' + + +def db_level_of_py_level(lvl): + """convert a log level of the logging module to a log level suitable for the + logging Postgres DB + + """ + return logging.getLevelName(lvl).lower() + + +class PostgresHandler(logging.Handler): + """log handler that store messages in a Postgres DB + + See swh-core/sql/log-schema.sql for the DB schema. + + All logging methods can be used as usual. Additionally, arbitrary metadata + can be passed to logging methods, requesting that they will be stored in + the DB as a single JSONB value. To do so, pass a dictionary to the 'extra' + kwarg of any logging method; all keys in that dictionary that start with + EXTRA_LOGDATA_PREFIX (currently: 'swh_') will be extracted to form the + JSONB dictionary. The prefix will be stripped and not included in the DB. + + Sample usage: + + logging.basicConfig(level=logging.INFO) + h = PostgresHandler({'log_db': 'dbname=softwareheritage-log'}) + logging.getLogger().addHandler(h) + + logger.info('not so important notice', + extra={'swh_type': 'swh_logging_test', + 'swh_meditation': 'guru'}) + logger.warn('something weird just happened, did you see that?') + + """ + + def __init__(self, config): + """ + Create a Postgres log handler. + + Args: + config: configuration dictionary, with a key "log_db" containing a + libpq connection string to the log DB + """ + super().__init__() + self.config = config + + self.conn = psycopg2.connect(self.config['log_db']) + + self.fqdn = socket.getfqdn() # cache FQDN value + + def emit(self, record): + log_data = record.__dict__ + + extra_data = {k[len(EXTRA_LOGDATA_PREFIX):]: v + for k, v in log_data.items() + if k.startswith(EXTRA_LOGDATA_PREFIX)} + log_entry = (db_level_of_py_level(log_data['levelno']), + log_data['msg'], + Json(extra_data), + log_data['module'], + self.fqdn, + os.getpid()) + + with self.conn.cursor() as cur: + cur.execute('INSERT INTO log ' + '(level, message, data, src_module, src_host, src_pid)' + 'VALUES (%s, %s, %s, %s, %s, %s)', + log_entry) + self.conn.commit() diff --git a/swh/core/tests/db_testing.py b/swh/core/tests/db_testing.py new file mode 100644 index 0000000..8cba731 --- /dev/null +++ b/swh/core/tests/db_testing.py @@ -0,0 +1,140 @@ +# Copyright (C) 2015 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import os +import psycopg2 +import subprocess + + +TEST_DB_NAME = 'softwareheritage-test' +TEST_DIR = os.path.dirname(os.path.abspath(__file__)) +TEST_DATA_DIR = os.path.join(TEST_DIR, '../../../../swh-storage-testdata') +TEST_DB_DUMP = os.path.join(TEST_DATA_DIR, 'dumps/swh.dump') + + +def pg_restore(dbname, dumpfile): + subprocess.check_call(['pg_restore', '--no-owner', '--no-privileges', + '--dbname', dbname, dumpfile]) + + +def pg_dump(dbname, dumpfile): + subprocess.check_call(['pg_dump', '--no-owner', '--no-privileges', '-Fc', + '-f', dumpfile, dbname]) + + +def pg_dropdb(dbname): + subprocess.check_call(['dropdb', dbname]) + + +def pg_createdb(dbname): + subprocess.check_call(['createdb', dbname]) + + +def db_create(test_subj, dbname=TEST_DB_NAME, dbdump=TEST_DB_DUMP): + """create the test DB and load the test data dump into it + + context: setUpClass + + """ + try: + pg_createdb(dbname) + except subprocess.CalledProcessError: # try recovering once, in case + pg_dropdb(dbname) # the db already existed + pg_createdb(dbname) + pg_restore(dbname, dbdump) + test_subj.dbname = dbname + + +def db_destroy(test_subj): + """destroy the test DB + + context: tearDownClass + + """ + pg_dropdb(test_subj.dbname) + + +def db_connect(test_subj): + """connect to the test DB and open a cursor + + context: setUp + + """ + test_subj.conn = psycopg2.connect('dbname=' + test_subj.dbname) + test_subj.cursor = test_subj.conn.cursor() + + +def db_close(test_subj): + """rollback current transaction and disconnet from the test DB + + context: tearDown + + """ + if not test_subj.conn.closed: + test_subj.conn.rollback() + test_subj.conn.close() + + +class DbTestFixture(): + """Mix this in a test subject class to get DB testing support. + + The test case class will then have the following attributes, accessible via + self: + + dbname: name of the test database + conn: psycopg2 connection object + cursor: open psycopg2 cursor to the DB + + To ensure test isolation, each test method of the test case class will + execute in its own connection, cursor, and transaction. + + To ensure setup/teardown methods are called, in case of multiple + inheritance DbTestFixture should be the first class in the inheritance + hierarchy. + + Note that if you want to define setup/teardown methods, you need to + explicitly call super() to ensure that the fixture setup/teardown methods + are invoked. Here is an example where all setup/teardown methods are + defined in a test case: + + class TestDb(DbTestFixture, unittest.TestCase): + + @classmethod + def setUpClass(cls): + super().setUpClass() + # your class setup code here + + def setUp(self): + super().setUp() + # your instance setup code here + + def tearDown(self): + # your instance teardown code here + super().tearDown() + + @classmethod + def tearDownClass(cls): + # your class teardown code here + super().tearDownClass() + + """ + + @classmethod + def setUpClass(cls): + db_create(cls) + super().setUpClass() + + def setUp(self): + db_connect(self) + super().setUp() + + def tearDown(self): + super().tearDown() + db_close(self) + + @classmethod + def tearDownClass(cls): + super().tearDownClass() + db_destroy(cls) diff --git a/version.txt b/version.txt index f65fcd3..a78aec6 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -v0.0.5-0-g4beac75 \ No newline at end of file +v0.0.6-0-gbe6d313 \ No newline at end of file