diff --git a/PKG-INFO b/PKG-INFO index c633be4..862c2ca 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,39 +1,39 @@ Metadata-Version: 2.1 Name: swh.core -Version: 2.9.0 +Version: 2.10 Summary: Software Heritage core utilities Home-page: https://forge.softwareheritage.org/diffusion/DCORE/ Author: Software Heritage developers Author-email: swh-devel@inria.fr Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Project-URL: Funding, https://www.softwareheritage.org/donate Project-URL: Source, https://forge.softwareheritage.org/source/swh-core Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-core/ Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 5 - Production/Stable Requires-Python: >=3.7 Description-Content-Type: text/x-rst Provides-Extra: testing-core Provides-Extra: logging Provides-Extra: db Provides-Extra: http Provides-Extra: github Provides-Extra: testing License-File: LICENSE License-File: AUTHORS Software Heritage - Core foundations ==================================== Low-level utilities and helpers used by almost all other modules in the stack. core library for swh's modules: - config parser - serialization - logging mechanism - database connection - http-based RPC client/server diff --git a/debian/changelog b/debian/changelog index 77872d9..aa4d75c 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,1376 +1,1384 @@ -swh-core (2.9.0-1~swh1~bpo10+1) buster-swh; urgency=medium +swh-core (2.10-1~swh1) unstable-swh; urgency=medium - * Rebuild for buster-swh - - -- Software Heritage autobuilder (on jenkins-debian1) Mon, 30 May 2022 15:43:34 +0000 + * New upstream release 2.10 - (tagged by Antoine R. Dumont + (@ardumont) on 2022-06-02 16:03:41 + +0200) + * Upstream changes: - v2.10 - github/utils: Deal with exotic + urls to canonicalize - deprecate the db/pytest_plugin.py module + - move initialize_database_for_module in db_utils - mark + postgresql_fact fixture factory function as deprecated - tests: + use stock pytest_postgresql factory function - docs/db: Update + datastore requirement + + -- Software Heritage autobuilder (on jenkins-debian1) Thu, 02 Jun 2022 14:39:11 +0000 swh-core (2.9.0-1~swh1) unstable-swh; urgency=medium * New upstream release 2.9.0 - (tagged by Antoine R. Dumont (@ardumont) on 2022-05-30 17:36:23 +0200) * Upstream changes: - v2.9.0 - Allow module to specify another config key than their module name - cli.db: Reword ignore sentence -- Software Heritage autobuilder (on jenkins-debian1) Mon, 30 May 2022 15:41:42 +0000 swh-core (2.8.1-1~swh1) unstable-swh; urgency=medium * New upstream release 2.8.1 - (tagged by Antoine R. Dumont (@ardumont) on 2022-05-30 17:06:11 +0200) * Upstream changes: - v2.8.1 - cli.db: Use attribute current_version instead of undeclared getter - cli.db: Fix help message typo -- Software Heritage autobuilder (on jenkins-debian1) Mon, 30 May 2022 15:10:17 +0000 swh-core (2.8.0-1~swh1) unstable-swh; urgency=medium * New upstream release 2.8.0 - (tagged by Antoine R. Dumont (@ardumont) on 2022-05-20 18:17:20 +0200) * Upstream changes: - v2.8.0 - Deal with git protocol url to canonicalize to https -- Software Heritage autobuilder (on jenkins-debian1) Fri, 20 May 2022 16:21:13 +0000 swh-core (2.7.0-1~swh1) unstable-swh; urgency=medium * New upstream release 2.7.0 - (tagged by Antoine R. Dumont (@ardumont) on 2022-05-20 15:26:30 +0200) * Upstream changes: - v2.7.0 - Use GitHubSession to make canonical computation deal with rate limit -- Software Heritage autobuilder (on jenkins-debian1) Fri, 20 May 2022 13:30:15 +0000 swh-core (2.6.0-1~swh2) unstable-swh; urgency=medium * Bump new release -- Antoine R. Dumont (@ardumont) Fri, 20 May 2022 10:26:03 +0200 swh-core (2.6.0-1~swh1) unstable-swh; urgency=medium * New upstream release 2.6.0 - (tagged by Antoine R. Dumont (@ardumont) on 2022-05-20 08:54:46 +0200) * Upstream changes: - v2.6.0 - Extract reusable github tests fixtures into its own pytest_plugin - test_db/test_db_copy_to: Fix hypothesis FailedHealthCheck error - Refactor swh.lister.github.utils to swh.core.github.utils - Add utility function to retrieve canonical github urls - Make 'python -m swh' work as cli entry point - db_utils: Make connect_to_conninfo use through contextmanager - Upgrade mypy to 0.942 to fix support of types-psycopg2 >= 2.9.12 -- Software Heritage autobuilder (on jenkins-debian1) Fri, 20 May 2022 06:58:23 +0000 swh-core (2.5.0-1~swh1) unstable-swh; urgency=medium * New upstream release 2.5.0 - (tagged by Valentin Lorentz on 2022-04-25 13:51:26 +0200) * Upstream changes: - v2.5.0 - * Make db_transaction's client_options configurable at run time - * sentry: always override init settings with the environment variables - * Add support for disabling logging-based events in sentry - * RPC server: explicitly handle sentry exception capture (instead of gunicorn) - * statsd: add an error_type tag to @timed error counters - * cli: Ensure tests don't mess with the global logging setup - * mypy/pre-commit/pytest maintenance -- Software Heritage autobuilder (on jenkins-debian1) Mon, 25 Apr 2022 11:56:02 +0000 swh-core (2.4.0-1~swh1) unstable-swh; urgency=medium * New upstream release 2.4.0 - (tagged by Valentin Lorentz on 2022-03-29 14:39:15 +0200) * Upstream changes: - v2.4.0 - * Fix support of Werkzeug 2.1.0 -- Software Heritage autobuilder (on jenkins-debian1) Tue, 29 Mar 2022 12:42:28 +0000 swh-core (2.3.0-1~swh1) unstable-swh; urgency=medium * New upstream release 2.3.0 - (tagged by David Douard on 2022-03-14 17:19:37 +0100) * Upstream changes: - v2.3.0 -- Software Heritage autobuilder (on jenkins-debian1) Mon, 14 Mar 2022 16:23:52 +0000 swh-core (2.2.2-1~swh1) unstable-swh; urgency=medium * New upstream release 2.2.2 - (tagged by David Douard on 2022-03-09 17:44:25 +0100) * Upstream changes: - v2.2.2 - small fixes in the `swh db upgrade` command -- Software Heritage autobuilder (on jenkins-debian1) Wed, 09 Mar 2022 16:48:21 +0000 swh-core (2.2.1-1~swh1) unstable-swh; urgency=medium * New upstream release 2.2.1 - (tagged by Nicolas Dandrimont on 2022-03-03 20:20:11 +0100) * Upstream changes: - Release swh.core v2.2.1 - quiesce deprecation warnings for autogenerated RPC clients - fix typing for a few methods in db_utils -- Software Heritage autobuilder (on jenkins-debian1) Thu, 03 Mar 2022 19:24:59 +0000 swh-core (2.2.0-1~swh1) unstable-swh; urgency=medium * New upstream release 2.2.0 - (tagged by Valentin Lorentz on 2022-03-02 11:45:18 +0100) * Upstream changes: - v2.2.0 - * utils: Add a new 'iter_chunks' function -- Software Heritage autobuilder (on jenkins-debian1) Wed, 02 Mar 2022 10:47:47 +0000 swh-core (2.1.1-1~swh1) unstable-swh; urgency=medium * New upstream release 2.1.1 - (tagged by Valentin Lorentz on 2022-03-01 17:00:36 +0100) * Upstream changes: - v2.1.1 - * Fix wrong version numbers in deprecation message. -- Software Heritage autobuilder (on jenkins-debian1) Tue, 01 Mar 2022 16:03:05 +0000 swh-core (2.1.0-1~swh1) unstable-swh; urgency=medium * New upstream release 2.1.0 - (tagged by Valentin Lorentz on 2022-03-01 16:58:02 +0100) * Upstream changes: - v2.1.0 - * RPCClient: Make methods {get,post}{,_stream} protected - * RPCServerApp: Add hooks to add behaviors to generated methods -- Software Heritage autobuilder (on jenkins-debian1) Tue, 01 Mar 2022 16:00:51 +0000 swh-core (2.0.0-2~swh1) unstable-swh; urgency=medium * Bump dependency constranints for pytest and pytest-postgresql. -- David Douard Wed, 23 Feb 2022 10:56:37 +0100 swh-core (2.0.0-1~swh1) unstable-swh; urgency=medium * New upstream release 2.0.0 - (tagged by David Douard on 2022-02-17 15:26:59 +0100) * Upstream changes: - v2.0.0 - add support for generic db version handling for postgresql backends, - add support for generic db upgrade (for postgresql backends), - upgrade pytest- postgresql based tests scaffolding to use the - template-based db creation (instead of the truncate-based db reset). -- Software Heritage autobuilder (on jenkins-debian1) Wed, 23 Feb 2022 09:34:41 +0000 swh-core (1.1.1-1~swh1) unstable-swh; urgency=medium * New upstream release 1.1.1 - (tagged by Valentin Lorentz on 2022-02-04 12:52:06 +0100) * Upstream changes: - v1.1.1 - * Require pytest to be <7.0.0 -- Software Heritage autobuilder (on jenkins-debian1) Fri, 04 Feb 2022 11:54:45 +0000 swh-core (1.1.0-1~swh1) unstable-swh; urgency=medium * New upstream release 1.1.0 - (tagged by David Douard on 2022-01-20 15:39:02 +0100) * Upstream changes: - v1.1.0 - add a Statsd.status_gauge() context manager - add support for env var substitution in STATS_TAGS - pin mypy version and clean reauirements a bit -- Software Heritage autobuilder (on jenkins-debian1) Thu, 20 Jan 2022 14:44:14 +0000 swh-core (1.0.0-2~swh1) unstable-swh; urgency=medium * Add missing B-D on python3-blinker for sentry-sdk -- Nicolas Dandrimont Thu, 02 Dec 2021 12:34:50 +0100 swh-core (1.0.0-1~swh1) unstable-swh; urgency=medium * New upstream release 1.0.0 - (tagged by David Douard on 2021-12-02 10:51:10 +0100) * Upstream changes: - v1.0.0 -- Software Heritage autobuilder (on jenkins-debian1) Thu, 02 Dec 2021 10:00:19 +0000 swh-core (0.15.1-1~swh1) unstable-swh; urgency=medium * New upstream release 0.15.1 - (tagged by Valentin Lorentz on 2021-11-08 14:10:08 +0100) * Upstream changes: - v0.15.1 - * Require pytest-postgresql < 4.0 -- Software Heritage autobuilder (on jenkins-debian1) Mon, 08 Nov 2021 13:12:36 +0000 swh-core (0.15.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.15.0 - (tagged by Antoine R. Dumont (@ardumont) on 2021-09-20 11:45:26 +0200) * Upstream changes: - v0.15.0 - tarball: Fallback to guess archive format from mimetype when no format detected -- Software Heritage autobuilder (on jenkins-debian1) Mon, 20 Sep 2021 09:48:28 +0000 swh-core (0.14.6-1~swh1) unstable-swh; urgency=medium * New upstream release 0.14.6 - (tagged by Antoine Lambert on 2021-09-16 10:48:35 +0200) * Upstream changes: - version 0.14.6 -- Software Heritage autobuilder (on jenkins-debian1) Thu, 16 Sep 2021 08:51:58 +0000 swh-core (0.14.5-1~swh1) unstable-swh; urgency=medium * New upstream release 0.14.5 - (tagged by Valentin Lorentz on 2021-08-30 10:31:47 +0200) * Upstream changes: - v0.14.5 - * tarball: Add support for .tbz, .tbz2, and .jar -- Software Heritage autobuilder (on jenkins-debian1) Mon, 30 Aug 2021 08:34:03 +0000 swh-core (0.14.4-1~swh1) unstable-swh; urgency=medium * New upstream release 0.14.4 - (tagged by Valentin Lorentz on 2021-07-30 16:44:26 +0200) * Upstream changes: - v0.14.4 - * add stream_results_optional -- Software Heritage autobuilder (on jenkins-debian1) Fri, 30 Jul 2021 14:47:25 +0000 swh-core (0.14.3-1~swh1) unstable-swh; urgency=medium * New upstream release 0.14.3 - (tagged by Antoine Lambert on 2021-06-11 15:41:38 +0200) * Upstream changes: - version 0.14.3 -- Software Heritage autobuilder (on jenkins-debian1) Fri, 11 Jun 2021 13:47:13 +0000 swh-core (0.14.2-2~swh1) unstable-swh; urgency=medium * Rebuild v0.14.2 after missing unzip dependency -- Antoine Lambert Thu, 10 Jun 2021 16:24:09 +0200 swh-core (0.14.2-1~swh1) unstable-swh; urgency=medium * New upstream release 0.14.2 - (tagged by Antoine Lambert on 2021-06-10 16:09:06 +0200) * Upstream changes: - version 0.14.2 -- Software Heritage autobuilder (on jenkins-debian1) Thu, 10 Jun 2021 14:13:25 +0000 swh-core (0.14.1-1~swh1) unstable-swh; urgency=medium * New upstream release 0.14.1 - (tagged by Valentin Lorentz on 2021-05-06 15:33:56 +0200) * Upstream changes: - v0.14.1 - * Fix reserved name being used in pytest plugin -- Software Heritage autobuilder (on jenkins-debian1) Thu, 06 May 2021 13:38:24 +0000 swh-core (0.14.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.14.0 - (tagged by Valentin Lorentz on 2021-05-06 14:14:10 +0200) * Upstream changes: - v0.14.0 - Add support for pytest- postgresql 3.0.0 - For consistency, I renamed db_name to dbname everywhere, so this will - affect other SWH packages. -- Software Heritage autobuilder (on jenkins-debian1) Thu, 06 May 2021 12:17:31 +0000 swh-core (0.13.3-1~swh1) unstable-swh; urgency=medium * New upstream release 0.13.3 - (tagged by Antoine Lambert on 2021-05-06 13:54:51 +0200) * Upstream changes: - version 0.13.3 -- Software Heritage autobuilder (on jenkins-debian1) Thu, 06 May 2021 11:59:49 +0000 swh-core (0.13.2-1~swh1) unstable-swh; urgency=medium * New upstream release 0.13.2 - (tagged by Valentin Lorentz on 2021-05-06 10:40:51 +0200) * Upstream changes: - v0.13.2 - * tarball: properly normalize perms for all extracted files - * requirements-db-pytestplugin: Don't install pytest-postgresql 3.0+ -- Software Heritage autobuilder (on jenkins-debian1) Thu, 06 May 2021 08:44:45 +0000 swh-core (0.13.1-1~swh1) unstable-swh; urgency=medium * New upstream release 0.13.1 - (tagged by Antoine Lambert on 2021-04-29 14:21:29 +0200) * Upstream changes: - version 0.13.1 -- Software Heritage autobuilder (on jenkins-debian1) Thu, 29 Apr 2021 12:25:15 +0000 swh-core (0.13.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.13.0 - (tagged by Vincent SELLIER on 2021-04-06 19:01:37 +0200) * Upstream changes: - v0.13.0 - Support several backends on RPCServerApp -- Software Heritage autobuilder (on jenkins-debian1) Tue, 06 Apr 2021 17:07:10 +0000 swh-core (0.12.1-1~swh1) unstable-swh; urgency=medium * New upstream release 0.12.1 - (tagged by Valentin Lorentz on 2021-04-06 12:34:34 +0200) * Upstream changes: - v0.12.1 - * tests: Drop hypothesis < 6 requirement - * README.rst: Remove getting-started instructions, they are duplicates - * Improve/fix documentation of requests_mock_datadir - * Remove dependency on 'decorator' (fixes a regression in decorator 5.0.5) -- Software Heritage autobuilder (on jenkins-debian1) Tue, 06 Apr 2021 10:37:17 +0000 swh-core (0.12.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.12.0 - (tagged by David Douard on 2021-02-16 11:48:58 +0100) * Upstream changes: - v0.12.0 -- Software Heritage autobuilder (on jenkins-debian1) Tue, 16 Feb 2021 10:52:14 +0000 swh-core (0.11.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.11.0 - (tagged by David Douard on 2020-12-08 15:35:05 +0100) * Upstream changes: - v0.11.0 -- Software Heritage autobuilder (on jenkins-debian1) Tue, 08 Dec 2020 14:38:23 +0000 swh-core (0.10.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.10.0 - (tagged by Nicolas Dandrimont on 2020-12-02 11:46:38 +0100) * Upstream changes: - Release swh.core 0.10.0 - db.tests.db_testing: Drop unused database test utilities. - api.serializers: Add support for serializing large negative integers with - msgpack. - Do not mutate api.serializers.ENCODERS or DECODERS. - swh cli: Add support for setting up the log level of multiple loggers. -- Software Heritage autobuilder (on jenkins-debian1) Wed, 02 Dec 2020 10:50:35 +0000 swh-core (0.9.1-1~swh1) unstable-swh; urgency=medium * New upstream release 0.9.1 - (tagged by Antoine Lambert on 2020-11-23 11:24:50 +0100) * Upstream changes: - version 0.9.1 -- Software Heritage autobuilder (on jenkins-debian1) Mon, 23 Nov 2020 10:29:15 +0000 swh-core (0.9.0-1~swh2) unstable-swh; urgency=medium * Split packages python3-swh.core and python3-swh.core.db.pytestplugin -- Antoine R. Dumont (@ardumont) Fri, 20 Nov 2020 16:37:00 +0000 swh-core (0.9.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.9.0 - (tagged by Antoine R. Dumont (@ardumont) on 2020-11-20 15:35:53 +0100) * Upstream changes: - v0.9.0 - Clarify names around the swh.core.db.pytest_plugin tests - setup: Separate pytest- postgresql dependency and declare it when needed - RPCClient: Fix reraise_exceptions regression - api/serializers: Add Exception type encoder and decoder - Makefile.local: Ensure all tests are executed when invoking make test - core.db.cli: Add coverage and ensure `swh db *` works as expected - core tests: disambiguate arg 'request' through typing -- Software Heritage autobuilder (on jenkins-debian1) Fri, 20 Nov 2020 14:37:00 +0000 swh-core (0.8.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.8.0 - (tagged by Antoine R. Dumont (@ardumont) on 2020-10-30 09:12:17 +0100) * Upstream changes: - v0.8.0 - cli.db: Open init-admin subcmd to initialize superuser-level scripts -- Software Heritage autobuilder (on jenkins-debian1) Fri, 30 Oct 2020 08:13:02 +0000 swh-core (0.7.1-1~swh1) unstable-swh; urgency=medium * New upstream release 0.7.1 - (tagged by Antoine R. Dumont (@ardumont) on 2020-10-22 18:51:16 +0200) * Upstream changes: - v0.7.1 - Move SWHDatabaseJanitor to db.pytest_plugin module -- Software Heritage autobuilder (on jenkins-debian1) Thu, 22 Oct 2020 16:51:59 +0000 swh-core (0.7.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.7.0 - (tagged by Antoine R. Dumont (@ardumont) on 2020-10-22 18:31:18 +0200) * Upstream changes: - v0.7.0 - remote_api_endpoint: Allow to declare what http method to use - api.RPCServerApp: Adapt sync rpc server with async rpc server -- Software Heritage autobuilder (on jenkins-debian1) Thu, 22 Oct 2020 16:32:04 +0000 swh-core (0.6.1-1~swh1) unstable-swh; urgency=medium * New upstream release 0.6.1 - (tagged by Antoine R. Dumont (@ardumont) on 2020-10-22 13:53:41 +0200) * Upstream changes: - v0.6.1 - Move pytest_plugin declaration to top-level conftest -- Software Heritage autobuilder (on jenkins-debian1) Thu, 22 Oct 2020 11:54:49 +0000 swh-core (0.6.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.6.0 - (tagged by Antoine R. Dumont (@ardumont) on 2020-10-22 13:24:55 +0200) * Upstream changes: - v0.6.0 - asynchronous.RPCServerApp: Align implementation with api.RPCServerApp -- Software Heritage autobuilder (on jenkins-debian1) Thu, 22 Oct 2020 11:25:29 +0000 swh-core (0.5.0-1~swh2) unstable-swh; urgency=medium * Bump new release with dependency updated -- Antoine R. Dumont thu, 22 Oct 2020 11:21:04 +0200 swh-core (0.5.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.5.0 - (tagged by Antoine R. Dumont (@ardumont) on 2020-10-22 11:11:29 +0200) * Upstream changes: - v0.5.0 - Install postgresql_fact fixture for faster postgres tests - api.tests.test_async: Simplify fixture setup - core.config: Drop no longer used SWHConfig -- Software Heritage autobuilder (on jenkins-debian1) Thu, 22 Oct 2020 09:12:14 +0000 swh-core (0.4.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.4.0 - (tagged by Antoine R. Dumont (@ardumont) on 2020-10-02 11:44:10 +0200) * Upstream changes: - v0.4.0 - config: Deprecate SWHConfig in favor of load_from_envvar function -- Software Heritage autobuilder (on jenkins-debian1) Fri, 02 Oct 2020 09:44:53 +0000 swh-core (0.3.1-1~swh1) unstable-swh; urgency=medium * New upstream release 0.3.1 - (tagged by Valentin Lorentz on 2020-10-01 12:38:08 +0200) * Upstream changes: - v0.3.1 - * Add specific celery task arguments to metadata sent to systemd-journald - * SortedList: Don't inherit from UserList. -- Software Heritage autobuilder (on jenkins-debian1) Thu, 01 Oct 2020 10:40:27 +0000 swh-core (0.3.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.3.0 - (tagged by David Douard on 2020-09-23 16:24:40 +0200) * Upstream changes: - v0.3.0 -- Software Heritage autobuilder (on jenkins-debian1) Wed, 23 Sep 2020 14:27:08 +0000 swh-core (0.2.3-1~swh1) unstable-swh; urgency=medium * New upstream release 0.2.3 - (tagged by Valentin Lorentz on 2020-08-17 13:55:41 +0200) * Upstream changes: - v0.2.3 - * tarball: add test for permissions. - * Move SortedList from swh-storage. -- Software Heritage autobuilder (on jenkins-debian1) Mon, 17 Aug 2020 11:57:43 +0000 swh-core (0.2.2-1~swh1) unstable-swh; urgency=medium * New upstream release 0.2.2 - (tagged by Antoine R. Dumont (@ardumont) on 2020-07-31 13:41:19 +0200) * Upstream changes: - v0.2.2 - api.classes: Open swh.core.api.classes.stream_results -- Software Heritage autobuilder (on jenkins-debian1) Fri, 31 Jul 2020 11:44:33 +0000 swh-core (0.2.1-1~swh1) unstable-swh; urgency=medium * New upstream release 0.2.1 - (tagged by Valentin Lorentz on 2020-07-30 19:16:57 +0200) * Upstream changes: - v0.2.1 - Make @remote_api_endpoint preserve typing information for mypy. -- Software Heritage autobuilder (on jenkins-debian1) Thu, 30 Jul 2020 17:20:05 +0000 swh-core (0.2.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.2.0 - (tagged by Antoine R. Dumont (@ardumont) on 2020-07-29 11:18:37 +0200) * Upstream changes: - v0.2.0 - core.api: Expose serializable PagedResult object for pagination api - test_serializers: Refactor using pytest - Migrate from vcversioner to setuptools- scm -- Software Heritage autobuilder (on jenkins-debian1) Wed, 29 Jul 2020 09:20:56 +0000 swh-core (0.1.2-1~swh1) unstable-swh; urgency=medium * New upstream release 0.1.2 - (tagged by Antoine R. Dumont (@ardumont) on 2020-07-08 13:41:51 +0200) * Upstream changes: - v0.1.2 - test_serializers: Move to pytest for that specific erratic assertion -- Software Heritage autobuilder (on jenkins-debian1) Wed, 08 Jul 2020 11:43:25 +0000 swh-core (0.1.1-1~swh1) unstable-swh; urgency=medium * New upstream release 0.1.1 - (tagged by Antoine R. Dumont (@ardumont) on 2020-07-08 13:03:52 +0200) * Upstream changes: - v0.1.1 - api.tests: Fix unsupported matches keyword to match - requirements-db: Move typing- extension from test to runtime deps -- Software Heritage autobuilder (on jenkins-debian1) Wed, 08 Jul 2020 11:07:55 +0000 swh-core (0.1.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.1.0 - (tagged by David Douard on 2020-07-06 14:33:28 +0200) * Upstream changes: - v0.1.0 -- Software Heritage autobuilder (on jenkins-debian1) Mon, 06 Jul 2020 12:36:15 +0000 swh-core (0.0.95-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.95 - (tagged by Nicolas Dandrimont on 2020-04-17 17:20:35 +0200) * Upstream changes: - Release swh.core v0.0.95 - support serializing large integers in msgpack - add documentation for CLI - move formatting to black -- Software Heritage autobuilder (on jenkins-debian1) Fri, 17 Apr 2020 16:05:01 +0000 swh-core (0.0.94-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.94 - (tagged by Valentin Lorentz on 2020-02-28 14:28:11 +0100) * Upstream changes: - v0.0.94 - Allow subclasses of RPCClient to override methods. -- Software Heritage autobuilder (on jenkins-debian1) Fri, 28 Feb 2020 13:32:41 +0000 swh-core (0.0.93-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.93 - (tagged by Valentin Lorentz on 2020-02-26 15:26:29 +0100) * Upstream changes: - v0.0.93 - Reintroduce support for decoding legacy msgpack encoding -- Software Heritage autobuilder (on jenkins-debian1) Wed, 26 Feb 2020 14:29:46 +0000 swh-core (0.0.92-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.92 - (tagged by Valentin Lorentz on 2020-02-19 15:40:57 +0100) * Upstream changes: - v0.0.92 - Add support for msgpack 1.0.0 -- Software Heritage autobuilder (on jenkins-debian1) Wed, 19 Feb 2020 14:45:27 +0000 swh-core (0.0.91-1~swh2) unstable-swh; urgency=medium * Add missing python3-iso8601 build dependency -- Antoine Lambert Wed, 19 Feb 2020 11:18:34 +0100 swh-core (0.0.91-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.91 - (tagged by Antoine Lambert on 2020-02-18 16:43:59 +0100) * Upstream changes: - version 0.0.91 -- Software Heritage autobuilder (on jenkins-debian1) Tue, 18 Feb 2020 15:51:39 +0000 swh-core (0.0.90-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.90 - (tagged by Valentin Lorentz on 2020-02-18 13:54:30 +0100) * Upstream changes: - v0.0.90 - Remove exception pickling from the async server, as in the sync server. -- Software Heritage autobuilder (on jenkins-debian1) Tue, 18 Feb 2020 12:59:38 +0000 swh-core (0.0.89-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.89 - (tagged by Valentin Lorentz on 2020-02-18 11:35:01 +0100) * Upstream changes: - v0.0.89 - * Change msgpack serialization to be closer to the JSON one. - * Add support for extra {de,en}coders. - * Add extra_type_encoders and extra_type_decoders attributes to RPC clients and servers. - * Use iso8601.parse_date instead of dateutil.parser.parse. -- Software Heritage autobuilder (on jenkins-debian1) Tue, 18 Feb 2020 10:43:34 +0000 swh-core (0.0.88-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.88 - (tagged by Valentin Lorentz on 2020-02-14 12:22:23 +0100) * Upstream changes: - v0.0.88 - In case of errors, return a simple dictionary instead of pickled exception. -- Software Heritage autobuilder (on jenkins-debian1) Fri, 14 Feb 2020 11:27:02 +0000 swh-core (0.0.87-1~swh2) unstable-swh; urgency=medium * Fix package build -- Antoine R. Dumont (@ardumont) Thu, 30 Jan 2020 14:06:54 +0100 swh-core (0.0.87-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.87 - (tagged by Valentin Lorentz on 2020-01-29 12:21:48 +0100) * Upstream changes: - v0.0.87 - Make db_transaction* remove db/cur from the signature. -- Software Heritage autobuilder (on jenkins-debian1) Wed, 29 Jan 2020 11:28:23 +0000 swh-core (0.0.86-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.86 - (tagged by Antoine R. Dumont (@ardumont) on 2020-01-23 09:08:56 +0100) * Upstream changes: - v0.0.86 - sentry: Add environment variable $SWH_SENTRY_ENVIRONMENT - pytest_plugin: Fix sphinx warnings -- Software Heritage autobuilder (on jenkins-debian1) Thu, 23 Jan 2020 08:12:40 +0000 swh-core (0.0.85-1~swh2) unstable-swh; urgency=medium * Fix package build on buster -- Antoine Lambert Thu, 16 Jan 2020 11:00:00 +0000 swh-core (0.0.85-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.85 - (tagged by Valentin Lorentz on 2020-01-15 13:07:36 +0100) * Upstream changes: - v0.0.85 - Add env var SWH_MAIN_PACKAGE -- Software Heritage autobuilder (on jenkins-debian1) Wed, 15 Jan 2020 12:11:49 +0000 swh-core (0.0.84-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.84 - (tagged by Antoine R. Dumont (@ardumont) on 2019-12-13 09:08:00 +0100) * Upstream changes: - v0.0.84 - Improve tarball support for tar.lz, tar.x, tar.Z files -- Software Heritage autobuilder (on jenkins-debian1) Fri, 13 Dec 2019 08:14:23 +0000 swh-core (0.0.83-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.83 - (tagged by Antoine R. Dumont (@ardumont) on 2019-12-12 16:03:25 +0100) * Upstream changes: - v0.0.83 - core.config: Rename configuration key -- Software Heritage autobuilder (on jenkins-debian1) Thu, 12 Dec 2019 15:06:28 +0000 swh-core (0.0.82-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.82 - (tagged by Nicolas Dandrimont on 2019-12-11 15:04:09 +0100) * Upstream changes: - Release swh.core 0.0.82 - Add missing conftest.py to MANIFEST.in -- Software Heritage autobuilder (on jenkins-debian1) Wed, 11 Dec 2019 14:09:00 +0000 swh-core (0.0.81-1~swh2) unstable-swh; urgency=medium * Add dependency to python3-sentry-sdk -- Nicolas Dandrimont Wed, 11 Dec 2019 14:58:54 +0100 swh-core (0.0.81-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.81 - (tagged by Valentin Lorentz on 2019-12-10 13:59:17 +0100) * Upstream changes: - v0.0.81 - * Include all requirements in MANIFEST.in - * Split test requirements to try and properly minimize dependencies - * Make the CLI initialize sentry-sdk based on CLI options/envvars. - * Add gunicorn config script to initialize sentry-sdk based on envvars. -- Software Heritage autobuilder (on jenkins-debian1) Tue, 10 Dec 2019 13:03:13 +0000 swh-core (0.0.80-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.80 - (tagged by Nicolas Dandrimont on 2019-11-19 16:36:35 +0100) * Upstream changes: - Release swh.core v0.0.80 - Let TypeErrors pass through the RPC layer - Register SIGINT/SIGTERM handlers for the CLI -- Software Heritage autobuilder (on jenkins-debian1) Tue, 19 Nov 2019 15:41:09 +0000 swh-core (0.0.79-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.79 - (tagged by Stefano Zacchiroli on 2019-11-18 13:35:19 +0100) * Upstream changes: - v0.0.79 ======= - * RPCClient: add response attribute to RemoteException - * RPCClent: rename and refactor check_status (now raise_for_status) - * RPCClient: check HTTP status code for errors also when streaming - * cli: Add support for loading a logging configuration file - * cli: Allow adding a Notes section between Options and Commands - * Add trailing dot to help texts for consistency - * logger: only flatten dicts if all keys are strings - * Move to @pytest.fixture from yield_fixture - * test_rpc_client_server.py: fix typo in docstring -- Software Heritage autobuilder (on jenkins-debian1) Mon, 18 Nov 2019 12:42:12 +0000 swh-core (0.0.78-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.78 - (tagged by Nicolas Dandrimont on 2019-11-06 18:01:56 +0100) * Upstream changes: - Release swh.core 0.0.78 - allow the swh command to work even when a plugin fails - hardcode bytea and bytea[] type oids in BaseDb -- Software Heritage autobuilder (on jenkins-debian1) Wed, 06 Nov 2019 17:05:38 +0000 swh-core (0.0.77-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.77 - (tagged by Antoine R. Dumont (@ardumont) on 2019-11-06 14:10:58 +0100) * Upstream changes: - v0.0.77 - pytest_plugin: Decode url to resolve filename - api/serializers: Force json module use to decode requests text response -- Software Heritage autobuilder (on jenkins-debian1) Wed, 06 Nov 2019 13:15:03 +0000 swh-core (0.0.76-1~swh2) unstable-swh; urgency=medium * Force using the swh.core pytest plugin -- Nicolas Dandrimont Wed, 23 Oct 2019 14:50:04 +0200 swh-core (0.0.76-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.76 - (tagged by Nicolas Dandrimont on 2019-10-18 10:16:20 +0200) * Upstream changes: - Release swh.core v0.0.76 - Make the systemd dependency optional -- Software Heritage autobuilder (on jenkins-debian1) Fri, 18 Oct 2019 08:25:15 +0000 swh-core (0.0.75-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.75 - (tagged by Antoine R. Dumont (@ardumont) on 2019-10-14 17:51:58 +0200) * Upstream changes: - v0.0.75 - pytest_plugin: Add support for http requests -- Software Heritage autobuilder (on jenkins-debian1) Mon, 14 Oct 2019 15:57:05 +0000 swh-core (0.0.74-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.74 - (tagged by David Douard on 2019-10-11 15:30:51 +0200) * Upstream changes: - v0.0.74 -- Software Heritage autobuilder (on jenkins-debian1) Fri, 11 Oct 2019 13:35:17 +0000 swh-core (0.0.73-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.73 - (tagged by Antoine R. Dumont (@ardumont) on 2019-10-09 14:16:04 +0200) * Upstream changes: - v0.0.73 - Improve pytest-plugin fixture to ease testing with pagination -- Software Heritage autobuilder (on jenkins-debian1) Wed, 09 Oct 2019 12:20:36 +0000 swh-core (0.0.72-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.72 - (tagged by Antoine R. Dumont (@ardumont) on 2019-10-09 10:59:28 +0200) * Upstream changes: - v0.0.72 - Fix tox.ini's py3 environment -- Software Heritage autobuilder (on jenkins-debian1) Wed, 09 Oct 2019 09:02:51 +0000 swh-core (0.0.70-1~swh2) unstable-swh; urgency=medium * Add new dependency on python3-tz -- Nicolas Dandrimont Tue, 01 Oct 2019 15:07:09 +0200 swh-core (0.0.70-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.70 - (tagged by Stefano Zacchiroli on 2019-09-27 10:16:41 +0200) * Upstream changes: - v0.0.70 - init.py: switch to documented way of extending path -- Software Heritage autobuilder (on jenkins-debian1) Fri, 27 Sep 2019 08:21:29 +0000 swh-core (0.0.69-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.69 - (tagged by Stefano Zacchiroli on 2019-09-20 15:50:52 +0200) * Upstream changes: - v0.0.69 - MANIFEST.in: ship py.typed -- Software Heritage autobuilder (on jenkins-debian1) Fri, 20 Sep 2019 13:54:15 +0000 swh-core (0.0.68-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.68 - (tagged by Stefano Zacchiroli on 2019-09-20 15:05:29 +0200) * Upstream changes: - v0.0.68 - * mypy: ignore django-stubs, needed only by hypothesis - * mypy: use conffile to ignore requests_mock - * typing: minimal changes to make a no-op mypy run pass - * db_testing.py: do not explode when TEST_DB_DUMP = None - * swh.core.config.parse_config_file: fix sphinx markup in docstring - * statsd: protect access to the statsd's socket - * tests: add tests for swh.logger and swh.tarball modules - * Remove fallback when aiohttp_utils is not installed. -- Software Heritage autobuilder (on jenkins-debian1) Fri, 20 Sep 2019 13:09:50 +0000 swh-core (0.0.67-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.67 - (tagged by Valentin Lorentz on 2019-08-22 13:56:36 +0200) * Upstream changes: - v0.0.67 - Improve error handling in Db.copy_to -- Software Heritage autobuilder (on jenkins-debian1) Thu, 22 Aug 2019 12:02:14 +0000 swh-core (0.0.66-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.66 - (tagged by David Douard on 2019-07-30 13:55:16 +0200) * Upstream changes: - v0.0.66 -- Software Heritage autobuilder (on jenkins-debian1) Tue, 30 Jul 2019 11:58:47 +0000 swh-core (0.0.65-1~swh2) unstable-swh; urgency=medium * debian/control: add missing dependencies. -- David Douard Tue, 16 Jul 2019 14:46:43 +0200 swh-core (0.0.65-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.65 - (tagged by David Douard on 2019-07-15 16:49:47 +0200) * Upstream changes: - v0.0.65 - needed to fix my mess with 0.0.64 tag, since the wrong 0.0.64 version has - already been pushed to pypi. -- Software Heritage autobuilder (on jenkins-debian1) Mon, 15 Jul 2019 14:53:29 +0000 swh-core (0.0.64-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.64 - (tagged by David Douard on 2019-07-15 16:33:32 +0200) * Upstream changes: - v0.0.64 -- Software Heritage autobuilder (on jenkins-debian1) Mon, 15 Jul 2019 14:37:04 +0000 swh-core (0.0.63-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.63 - (tagged by Antoine Lambert on 2019-05-21 13:12:11 +0200) * Upstream changes: - version 0.0.63 -- Software Heritage autobuilder (on jenkins-debian1) Tue, 21 May 2019 11:15:45 +0000 swh-core (0.0.62-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.62 - (tagged by Antoine Lambert on 2019-05-20 14:56:05 +0200) * Upstream changes: - version 0.0.62 -- Software Heritage autobuilder (on jenkins-debian1) Mon, 20 May 2019 13:01:38 +0000 swh-core (0.0.61-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.61 - (tagged by David Douard on 2019-05-17 10:32:07 +0200) * Upstream changes: - v0.0.61 -- Software Heritage autobuilder (on jenkins-debian1) Fri, 17 May 2019 08:38:08 +0000 swh-core (0.0.60-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.60 - (tagged by David Douard on 2019-05-06 15:27:44 +0200) * Upstream changes: - v0.0.60 -- Software Heritage autobuilder (on jenkins-debian1) Mon, 06 May 2019 13:32:48 +0000 swh-core (0.0.59-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.59 - (tagged by Valentin Lorentz on 2019-04-09 16:55:41 +0200) * Upstream changes: - Explicitly give Db connections back to the pool. - So they gracefully release the connection on error instead - of relying on reference-counting to call the Db's `__del__` - (which does not happen in Hypothesis tests) because a ref - to it is kept via the traceback object. -- Software Heritage autobuilder (on jenkins-debian1) Tue, 09 Apr 2019 16:12:32 +0000 swh-core (0.0.58-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.58 - (tagged by Antoine Lambert on 2019-04-02 17:19:05 +0200) * Upstream changes: - version 0.0.58 -- Software Heritage autobuilder (on jenkins-debian1) Tue, 02 Apr 2019 15:24:34 +0000 swh-core (0.0.57-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.57 - (tagged by Nicolas Dandrimont on 2019-03-28 15:51:27 +0100) * Upstream changes: - Release swh.core v0.0.57 - Move to native async primitives - Fix statsd.timed exceptional behavior bug/misfeature - Fix SWHRemoteAPI post_stream method -- Software Heritage autobuilder (on jenkins-debian1) Thu, 28 Mar 2019 14:55:58 +0000 swh-core (0.0.56-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.56 - (tagged by David Douard on 2019-03-19 10:17:06 +0100) * Upstream changes: - v0.0.56 -- Software Heritage autobuilder (on jenkins-debian1) Tue, 19 Mar 2019 09:27:18 +0000 swh-core (0.0.55-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.55 - (tagged by Antoine R. Dumont (@ardumont) on 2019-02-19 12:28:26 +0100) * Upstream changes: - v0.0.55 - Fix runtime dependencies -- Software Heritage autobuilder (on jenkins-debian1) Tue, 19 Feb 2019 11:32:28 +0000 swh-core (0.0.54-1~swh2) unstable-swh; urgency=medium * New upstream release 0.0.54 * Upstream changes: - Add missing build dependencies -- Antoine R. Dumont (@ardumont) Tue, 12 Feb 2019 16:25:34 +0000 swh-core (0.0.54-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.54 - (tagged by Valentin Lorentz on 2019-02-11 16:47:18 +0100) * Upstream changes: - Add test for BaseDb.connect. -- Software Heritage autobuilder (on jenkins-debian1) Tue, 12 Feb 2019 12:37:43 +0000 swh-core (0.0.53-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.53 - (tagged by Antoine R. Dumont (@ardumont) on 2019-02-08 09:09:30 +0100) * Upstream changes: - v0.0.53 - Fix debian build -- Software Heritage autobuilder (on jenkins-debian1) Fri, 08 Feb 2019 08:12:31 +0000 swh-core (0.0.52-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.52 - (tagged by David Douard on 2019-02-06 15:24:04 +0100) * Upstream changes: - v0.0.52 -- Software Heritage autobuilder (on jenkins-debian1) Wed, 06 Feb 2019 14:27:14 +0000 swh-core (0.0.51-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.51 - (tagged by David Douard on 2019-02-01 14:28:27 +0100) * Upstream changes: - v0.0.51 -- Software Heritage autobuilder (on jenkins-debian1) Fri, 01 Feb 2019 13:31:45 +0000 swh-core (0.0.50-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.50 - (tagged by Nicolas Dandrimont on 2019-01-09 15:50:58 +0100) * Upstream changes: - Release swh.core v0.0.50 - Add statsd client module - Log used config files -- Software Heritage autobuilder (on jenkins-debian1) Wed, 09 Jan 2019 14:54:37 +0000 swh-core (0.0.49-1~swh1) unstable-swh; urgency=medium * Make DbTestFixture.setUp() accept and pass *args and **kwargs. -- Software Heritage autobuilder (on jenkins-debian1) Tue, 08 Jan 2019 16:38:02 +0000 swh-core (0.0.48-1~swh1) unstable-swh; urgency=medium * v0.0.48 * swh.core.cli: Update swh-db-init to make it idemtpotent -- Antoine R. Dumont (@ardumont) Tue, 08 Jan 2019 15:33:15 +0000 swh-core (0.0.47-1~swh1) unstable-swh; urgency=medium * v0.0.47 * swh.core.cli: Fix flag -- Antoine R. Dumont (@ardumont) Tue, 08 Jan 2019 15:16:09 +0000 swh-core (0.0.46-1~swh1) unstable-swh; urgency=medium * v0.0.46 * utils.grouper: Improve implementation * Remove now-obsolete information about swh.core.worker -- Antoine R. Dumont (@ardumont) Tue, 08 Jan 2019 14:37:34 +0000 swh-core (0.0.45-1~swh1) unstable-swh; urgency=medium * Release swh.core v0.0.45 * Compatibility with recent msgpack * Debian packaging-related cleanups -- Nicolas Dandrimont Thu, 22 Nov 2018 21:09:53 +0100 swh-core (0.0.44-1~swh1) unstable-swh; urgency=medium * Release swh.core v0.0.44 * Refactor the database testing fixtures * Stop unsafe serialization/deserialization constructs * Update tests to use nose -- Nicolas Dandrimont Thu, 18 Oct 2018 18:20:12 +0200 swh-core (0.0.43-1~swh1) unstable-swh; urgency=medium * v0.0.43 * Fix missing dependency declaration -- Antoine R. Dumont (@ardumont) Thu, 11 Oct 2018 15:47:06 +0200 swh-core (0.0.42-1~swh1) unstable-swh; urgency=medium * v0.0.42 * Fix missing dependency declaration -- Antoine R. Dumont (@ardumont) Thu, 11 Oct 2018 15:45:25 +0200 swh-core (0.0.41-1~swh1) unstable-swh; urgency=medium * Add functions to generate HTTP API clients and servers from databases. * Summary: This moves the interesting parts of D505 into the core, so other components can use them as well. * Test Plan: `make test` * Reviewers: ardumont, seirl, #reviewers * Reviewed By: ardumont, #reviewers * Subscribers: douardda * Differential Revision: https://forge.softwareheritage.org/D507 -- Valentin Lorentz Thu, 11 Oct 2018 10:57:27 +0200 swh-core (0.0.40-1~swh1) unstable-swh; urgency=medium * v0.0.40 * swh.core.api.SWHRemoteAPI: Permit to set a query timeout option -- Antoine R. Dumont (@ardumont) Thu, 24 May 2018 12:10:03 +0200 swh-core (0.0.39-1~swh1) unstable-swh; urgency=medium * v0.0.39 * package: Add missing runtime dependency -- Antoine R. Dumont (@ardumont) Thu, 26 Apr 2018 15:24:22 +0200 swh-core (0.0.38-1~swh1) unstable-swh; urgency=medium * v0.0.38 * tests: Use more reasonable psql options for db restores * swh.core.serializers: Add custom types serialization -- Antoine R. Dumont (@ardumont) Thu, 26 Apr 2018 15:15:27 +0200 swh-core (0.0.37-1~swh1) unstable-swh; urgency=medium * v0.0.37 * Move test fixture in swh.core.tests.server_testing module -- Antoine R. Dumont (@ardumont) Wed, 25 Apr 2018 15:00:02 +0200 swh-core (0.0.36-1~swh1) unstable-swh; urgency=medium * v0.0.36 * Migrate swh.loader.tar.tarball module in swh.core -- Antoine R. Dumont (@ardumont) Wed, 06 Dec 2017 12:03:29 +0100 swh-core (0.0.35-1~swh1) unstable-swh; urgency=medium * Release swh.core version 0.0.35 * Update packaging runes -- Nicolas Dandrimont Thu, 12 Oct 2017 18:07:50 +0200 swh-core (0.0.34-1~swh1) unstable-swh; urgency=medium * Release swh.core v0.0.34 * New modular database test fixture -- Nicolas Dandrimont Mon, 07 Aug 2017 18:29:48 +0200 swh-core (0.0.33-1~swh1) unstable-swh; urgency=medium * Release swh.core v0.0.33 * Be more conservative with remote API responses -- Nicolas Dandrimont Mon, 19 Jun 2017 19:01:38 +0200 swh-core (0.0.32-1~swh1) unstable-swh; urgency=medium * Release swh-core v0.0.32 * Add asynchronous streaming methods for internal APIs * Remove task arguments from systemd-journal loggers -- Nicolas Dandrimont Tue, 09 May 2017 14:04:22 +0200 swh-core (0.0.31-1~swh1) unstable-swh; urgency=medium * Release swh.core v0.0.31 * Add explicit dependency on python3-systemd -- Nicolas Dandrimont Fri, 07 Apr 2017 15:11:26 +0200 swh-core (0.0.30-1~swh1) unstable-swh; urgency=medium * Release swh.core v0.0.30 * drop swh.core.hashutil (moved to swh.model.hashutil) * add a systemd logger -- Nicolas Dandrimont Fri, 07 Apr 2017 11:49:15 +0200 swh-core (0.0.29-1~swh1) unstable-swh; urgency=medium * Release swh.core v0.0.29 * Catch proper exception in the base API client -- Nicolas Dandrimont Thu, 02 Feb 2017 00:19:25 +0100 swh-core (0.0.28-1~swh1) unstable-swh; urgency=medium * v0.0.28 * Refactoring some common code into swh.core -- Antoine R. Dumont (@ardumont) Thu, 26 Jan 2017 14:54:22 +0100 swh-core (0.0.27-1~swh1) unstable-swh; urgency=medium * v0.0.27 * Fix issue with default boolean value -- Antoine R. Dumont (@ardumont) Thu, 20 Oct 2016 16:15:20 +0200 swh-core (0.0.26-1~swh1) unstable-swh; urgency=medium * Release swh.core v0.0.26 * Raise an exception when a configuration file exists and is unreadable -- Nicolas Dandrimont Wed, 12 Oct 2016 10:16:09 +0200 swh-core (0.0.25-1~swh1) unstable-swh; urgency=medium * v0.0.25 * Add new function utils.cwd -- Antoine R. Dumont (@ardumont) Thu, 29 Sep 2016 21:29:37 +0200 swh-core (0.0.24-1~swh1) unstable-swh; urgency=medium * v0.0.24 * Deal with edge case in logger regarding json -- Antoine R. Dumont (@ardumont) Thu, 22 Sep 2016 12:21:09 +0200 swh-core (0.0.23-1~swh1) unstable-swh; urgency=medium * Release swh.core v0.0.23 * Properly fix the PyYAML dependency -- Nicolas Dandrimont Tue, 23 Aug 2016 16:20:29 +0200 swh-core (0.0.22-1~swh1) unstable-swh; urgency=medium * Release swh.core v0.0.22 * Proper loading of yaml and ini files in all paths -- Nicolas Dandrimont Fri, 19 Aug 2016 15:45:55 +0200 swh-core (0.0.21-1~swh1) unstable-swh; urgency=medium * v0.0.21 * Update test tools -- Antoine R. Dumont (@ardumont) Tue, 19 Jul 2016 14:47:01 +0200 swh-core (0.0.20-1~swh1) unstable-swh; urgency=medium * Release swh.core v0.0.20 * Add some generic bytes <-> escaped unicode methods -- Nicolas Dandrimont Tue, 14 Jun 2016 16:54:41 +0200 swh-core (0.0.19-1~swh1) unstable-swh; urgency=medium * v0.0.19 * Resurrect swh.core.utils -- Antoine R. Dumont (@ardumont) Fri, 15 Apr 2016 12:40:43 +0200 swh-core (0.0.18-1~swh1) unstable-swh; urgency=medium * v0.0.18 * Add swh.core.utils * serializers: support UUIDs all around -- Antoine R. Dumont (@ardumont) Sat, 26 Mar 2016 11:16:33 +0100 swh-core (0.0.17-1~swh1) unstable-swh; urgency=medium * Release swh.core v0.0.17 * Allow serialization of UUIDs -- Nicolas Dandrimont Fri, 04 Mar 2016 11:40:56 +0100 swh-core (0.0.16-1~swh1) unstable-swh; urgency=medium * Release swh.core version 0.0.16 * add bytehex_to_hash and hash_to_bytehex in hashutil * move scheduling utilities to swh.scheduler -- Nicolas Dandrimont Fri, 19 Feb 2016 18:12:10 +0100 swh-core (0.0.15-1~swh1) unstable-swh; urgency=medium * Release v0.0.15 * Add hashutil.hash_git_object -- Nicolas Dandrimont Wed, 16 Dec 2015 16:31:26 +0100 swh-core (0.0.14-1~swh1) unstable-swh; urgency=medium * v0.0.14 * Add simple README * Update license * swh.core.hashutil.hashfile can now deal with filepath as bytes -- Antoine R. Dumont (@ardumont) Fri, 23 Oct 2015 11:13:14 +0200 swh-core (0.0.13-1~swh1) unstable-swh; urgency=medium * Prepare deployment of swh.core v0.0.13 -- Nicolas Dandrimont Fri, 09 Oct 2015 17:32:49 +0200 swh-core (0.0.12-1~swh1) unstable-swh; urgency=medium * Prepare deployment of swh.core v0.0.12 -- Nicolas Dandrimont Tue, 06 Oct 2015 17:34:34 +0200 swh-core (0.0.11-1~swh1) unstable-swh; urgency=medium * Prepare deployment of swh.core v0.0.11 -- Nicolas Dandrimont Sat, 03 Oct 2015 15:57:03 +0200 swh-core (0.0.10-1~swh1) unstable-swh; urgency=medium * Prepare deploying swh.core v0.0.10 -- Nicolas Dandrimont Sat, 03 Oct 2015 12:28:52 +0200 swh-core (0.0.9-1~swh1) unstable-swh; urgency=medium * Prepare deploying swh.core v0.0.9 -- Nicolas Dandrimont Sat, 03 Oct 2015 11:36:55 +0200 swh-core (0.0.8-1~swh1) unstable-swh; urgency=medium * Prepare deployment of swh.core v0.0.8 -- Nicolas Dandrimont Thu, 01 Oct 2015 12:31:44 +0200 swh-core (0.0.7-1~swh1) unstable-swh; urgency=medium * Prepare deployment of swh.core v0.0.7 -- Nicolas Dandrimont Thu, 01 Oct 2015 11:29:04 +0200 swh-core (0.0.6-1~swh1) unstable-swh; urgency=medium * Prepare deployment of swh.core v0.0.6 -- Nicolas Dandrimont Tue, 29 Sep 2015 16:48:44 +0200 swh-core (0.0.5-1~swh1) unstable-swh; urgency=medium * Prepare v0.0.5 deployment -- Nicolas Dandrimont Tue, 29 Sep 2015 16:08:32 +0200 swh-core (0.0.4-1~swh1) unstable-swh; urgency=medium * Tagging swh.core 0.0.4 -- Nicolas Dandrimont Fri, 25 Sep 2015 15:41:26 +0200 swh-core (0.0.3-1~swh1) unstable-swh; urgency=medium * Tag swh.core v0.0.3 -- Nicolas Dandrimont Fri, 25 Sep 2015 11:07:10 +0200 swh-core (0.0.2-1~swh1) unstable-swh; urgency=medium * Deploy v0.0.2 -- Nicolas Dandrimont Wed, 23 Sep 2015 12:08:50 +0200 swh-core (0.0.1-1~swh1) unstable-swh; urgency=medium * Initial release * Tag v0.0.1 for deployment -- Nicolas Dandrimont Tue, 22 Sep 2015 14:52:26 +0200 diff --git a/docs/db.rst b/docs/db.rst index b17a9cb..930f121 100644 --- a/docs/db.rst +++ b/docs/db.rst @@ -1,176 +1,176 @@ .. _swh-core-db: Common database utilities ========================= The ``swh.core.db`` module offers a set of common (postgresql) database handling utilities and features for other swh packages implementing a `datastore`, aka a service responsible for providing a data store via a common interface which can use a postgresql database as backend. Examples are :mod:`swh.storage` or :mod:`swh.scheduler`. Most of the time, this database-based data storage facility will depend on a data schema (may be based in :mod:`swh.model` or not) and provide a unified interface based on an Python class to abstract access to this datastore. Some packages may implement only a postgresql backend, some may provide more backends. This :mod:`swh.core.db` only deals with the postgresql part and provides common features and tooling to manage the database lifecycle in a consistent and unified way among all the :mod:`swh` packages. It comes with a few command line tools to manage the specific :mod:`swh` package database. As such, most of the database management cli commands require a configuration file holding the database connection information. For example, for the :mod:`swh.storage` package, one will be able to create, initialize and upgrade the postgresql database using simple commands. To create the database and perform superuser initialization steps (see below): .. code-block:: bash $ swh db create storage --dbname=postgresql://superuser:passwd@localhost:5433/test-storage If the database already exists but lacks superuser level initialization steps, you may use: .. code-block:: bash $ swh db init-admin storage --dbname=postgresql://superuser:passwd@localhost:5433/test-storage Then assuming the ``config.yml`` file existence: .. code-block:: yaml storage: cls: postgresql db: host=localhost, port=5433, dbname=test-storage, username=normal-user, password=pwd objstorage: cls: memory then you can run: .. code-block:: bash $ swh db --config-file=config.yml init storage DONE database for storage initialized (flavor default) at version 182 Note: you can define the ``SWH_CONFIG_FILENAME`` environment variable instead of using the ``--config-name`` command line option. or check the actual data model version of this database: .. code-block:: bash $ swh db --config-file=config.yml version storage module: storage flavor: default version: 182 as well as the migration history for the database: .. code-block:: bash $ swh db --config-file=config.yml version --all storage module: storage flavor: default 182 [2022-02-11 15:08:31.806070+01:00] Work In Progress 181 [2022-02-11 14:06:27.435010+01:00] Work In Progress The database migration is done using the ``swh db upgrade`` command. Implementation of a swh.core.db datastore ----------------------------------------- To use this database management tooling, in a :mod:`swh` package, the following conditions are expected: - the package should provide an ``sql`` directory in its root namespace providing initialization sql scripts. Scripts should be named like ``nn-xxx.sql`` and are executed in order according to the ``nn`` integer value. Scripts having ``-superuser-`` in their name will be executed by the ``init-admin`` tool and are expected to require superuser access level, whereas scripts without ``-superuser-`` in their name will be executed by the ``swh db init`` command and are expected to require write access level (with no need for superuser access level). - the package should provide a ``sql/upgrade`` directory with SQL migration scripts in its root namespace. Script names are expected to be of the form ``nnn.sql`` where `nnn` is the version to which this script does the migration from a database at version `nnn - 1`. - the initialization and migration scripts should not create nor fill the metadata related tables (``dbversion`` and ``dbmodule``). - the package should provide a ``get_datastore`` function in its root namespace returning an instance of the datastore object. Normally, this datastore object uses ``swh.core.db.BaseDb`` to interact with the actual database. -- The datastore object should provide a ``get_current_version()`` method - returning the database version expected by the code. +- The datastore object should provide a ``current_version`` attribute returning the + database version expected by the code. See existing ``swh`` packages like ``swh.storage`` or ``swh.scheduler`` for usage examples. Writing tests ------------- The ``swh.core.db.pytest_plugin`` provides a few helper tools to write unit tests for postgresql based datastores. By default, when using these fixtures, a posgresql server will be started (by the pytest_postgresql fixture) and a template database will be created using the ``postgresql_proc`` fixture factory provided by ``pytest_postgresql``. Then a dedicated fixture must be declared to use the ``postgresql_proc`` fixture generated by the fixture factory function. This template database will then be used to create a new database for test using this dedicated fixture. In order to help the database initialization process and make it consistent with the database initialization tools from the ``swh db`` cli, an ``initialize_database_for_module()`` function is provided to be used with the fixture factory described above. Typically, writing tests for a ``swh`` package ``swh.example`` would look like: .. code-block:: python from functools import partial from pytest_postgresql import factories from swh.core.db.pytest_plugin import postgresql_fact from swh.core.db.pytest_plugin import initialize_database_for_module example_postgresql_proc = factories.postgresql_proc( dbname="example", load=[partial(initialize_database_for_module, modname="example", version=1)] ) postgresql_example = postgresql_fact("example_postgresql_proc") def test_example(postgresql_example): with postgresql_example.cursor() as c: c.execute("select version from dbversion limit 1") assert c.fecthone()[0] == 1 Note: most of the time, you will want to put the scaffolding part of the code above in a ``conftest.py`` file. The ``load`` argument of the ``factories.postgresql_proc`` will be used to initialize the template database that will be used to create a new database for each test, while the ``load`` argument of the ``postgresql_fact`` fixture will be executed before each test (in the database created from the template database and dedicated to the test being executed). diff --git a/swh.core.egg-info/PKG-INFO b/swh.core.egg-info/PKG-INFO index c633be4..862c2ca 100644 --- a/swh.core.egg-info/PKG-INFO +++ b/swh.core.egg-info/PKG-INFO @@ -1,39 +1,39 @@ Metadata-Version: 2.1 Name: swh.core -Version: 2.9.0 +Version: 2.10 Summary: Software Heritage core utilities Home-page: https://forge.softwareheritage.org/diffusion/DCORE/ Author: Software Heritage developers Author-email: swh-devel@inria.fr Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Project-URL: Funding, https://www.softwareheritage.org/donate Project-URL: Source, https://forge.softwareheritage.org/source/swh-core Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-core/ Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 5 - Production/Stable Requires-Python: >=3.7 Description-Content-Type: text/x-rst Provides-Extra: testing-core Provides-Extra: logging Provides-Extra: db Provides-Extra: http Provides-Extra: github Provides-Extra: testing License-File: LICENSE License-File: AUTHORS Software Heritage - Core foundations ==================================== Low-level utilities and helpers used by almost all other modules in the stack. core library for swh's modules: - config parser - serialization - logging mechanism - database connection - http-based RPC client/server diff --git a/swh/core/db/db_utils.py b/swh/core/db/db_utils.py index d78f9ec..d28a0b8 100644 --- a/swh/core/db/db_utils.py +++ b/swh/core/db/db_utils.py @@ -1,664 +1,691 @@ # Copyright (C) 2015-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from contextlib import contextmanager from datetime import datetime, timezone import functools from importlib import import_module import logging from os import path import pathlib import re import subprocess from typing import Collection, Dict, Iterator, List, Optional, Tuple, Union, cast import psycopg2 import psycopg2.errors import psycopg2.extensions from psycopg2.extensions import connection as pgconnection from psycopg2.extensions import encodings as pgencodings from psycopg2.extensions import make_dsn from psycopg2.extensions import parse_dsn as _parse_dsn from swh.core.utils import numfile_sortkey as sortkey logger = logging.getLogger(__name__) def now(): return datetime.now(tz=timezone.utc) def stored_procedure(stored_proc): """decorator to execute remote stored procedure, specified as argument Generally, the body of the decorated function should be empty. If it is not, the stored procedure will be executed first; the function body then. """ def wrap(meth): @functools.wraps(meth) def _meth(self, *args, **kwargs): cur = kwargs.get("cur", None) self._cursor(cur).execute("SELECT %s()" % stored_proc) meth(self, *args, **kwargs) return _meth return wrap def jsonize(value): """Convert a value to a psycopg2 JSON object if necessary""" if isinstance(value, dict): return psycopg2.extras.Json(value) return value @contextmanager def connect_to_conninfo( db_or_conninfo: Union[str, pgconnection] ) -> Iterator[pgconnection]: """Connect to the database passed as argument. Args: db_or_conninfo: A database connection, or a database connection info string Returns: a connected database handle or None if the database is not initialized """ if isinstance(db_or_conninfo, pgconnection): yield db_or_conninfo else: if "=" not in db_or_conninfo and "//" not in db_or_conninfo: # Database name db_or_conninfo = f"dbname={db_or_conninfo}" try: db = psycopg2.connect(db_or_conninfo) except psycopg2.Error: logger.exception("Failed to connect to `%s`", db_or_conninfo) else: yield db def swh_db_version(db_or_conninfo: Union[str, pgconnection]) -> Optional[int]: """Retrieve the swh version of the database. If the database is not initialized, this logs a warning and returns None. Args: db_or_conninfo: A database connection, or a database connection info string Returns: Either the version of the database, or None if it couldn't be detected """ try: with connect_to_conninfo(db_or_conninfo) as db: if not db: return None with db.cursor() as c: query = "select version from dbversion order by dbversion desc limit 1" try: c.execute(query) result = c.fetchone() if result: return result[0] except psycopg2.errors.UndefinedTable: return None except Exception: logger.exception("Could not get version from `%s`", db_or_conninfo) return None def swh_db_versions( db_or_conninfo: Union[str, pgconnection] ) -> Optional[List[Tuple[int, datetime, str]]]: """Retrieve the swh version history of the database. If the database is not initialized, this logs a warning and returns None. Args: db_or_conninfo: A database connection, or a database connection info string Returns: Either the version of the database, or None if it couldn't be detected """ try: with connect_to_conninfo(db_or_conninfo) as db: if not db: return None with db.cursor() as c: query = ( "select version, release, description " "from dbversion order by dbversion desc" ) try: c.execute(query) return cast(List[Tuple[int, datetime, str]], c.fetchall()) except psycopg2.errors.UndefinedTable: return None except Exception: logger.exception("Could not get versions from `%s`", db_or_conninfo) return None def swh_db_upgrade( conninfo: str, modname: str, to_version: Optional[int] = None ) -> int: """Upgrade the database at `conninfo` for module `modname` This will run migration scripts found in the `sql/upgrades` subdirectory of the module `modname`. By default, this will upgrade to the latest declared version. Args: conninfo: A database connection, or a database connection info string modname: datastore module the database stores content for to_version: if given, update the database to this version rather than the latest """ if to_version is None: to_version = 99999999 db_module, db_version, db_flavor = get_database_info(conninfo) if db_version is None: raise ValueError("Unable to retrieve the current version of the database") if db_module is None: raise ValueError("Unable to retrieve the module of the database") if db_module != modname: raise ValueError( "The stored module of the database is different than the given one" ) sqlfiles = [ fname for fname in get_sql_for_package(modname, upgrade=True) if db_version < int(fname.stem) <= to_version ] if not sqlfiles: return db_version for sqlfile in sqlfiles: new_version = int(path.splitext(path.basename(sqlfile))[0]) logger.info("Executing migration script '%s'", sqlfile) if db_version is not None and (new_version - db_version) > 1: logger.error( f"There are missing migration steps between {db_version} and " f"{new_version}. It might be expected but it most unlikely is not. " "Will stop here." ) return db_version execute_sqlfiles([sqlfile], conninfo, db_flavor) # check if the db version has been updated by the upgrade script db_version = swh_db_version(conninfo) assert db_version is not None if db_version == new_version: # nothing to do, upgrade script did the job pass elif db_version == new_version - 1: # it has not (new style), so do it swh_set_db_version( conninfo, new_version, desc=f"Upgraded to version {new_version} using {sqlfile}", ) db_version = swh_db_version(conninfo) else: # upgrade script did it wrong logger.error( f"The upgrade script {sqlfile} did not update the dbversion table " f"consistently ({db_version} vs. expected {new_version}). " "Will stop migration here. Please check your migration scripts." ) return db_version return new_version def swh_db_module(db_or_conninfo: Union[str, pgconnection]) -> Optional[str]: """Retrieve the swh module used to create the database. If the database is not initialized, this logs a warning and returns None. Args: db_or_conninfo: A database connection, or a database connection info string Returns: Either the module of the database, or None if it couldn't be detected """ try: with connect_to_conninfo(db_or_conninfo) as db: if not db: return None with db.cursor() as c: query = "select dbmodule from dbmodule limit 1" try: c.execute(query) resp = c.fetchone() if resp: return resp[0] except psycopg2.errors.UndefinedTable: return None except Exception: logger.exception("Could not get module from `%s`", db_or_conninfo) return None def swh_set_db_module( db_or_conninfo: Union[str, pgconnection], module: str, force=False ) -> None: """Set the swh module used to create the database. Fails if the dbmodule is already set or the table does not exist. Args: db_or_conninfo: A database connection, or a database connection info string module: the swh module to register (without the leading 'swh.') """ update = False if module.startswith("swh."): module = module[4:] current_module = swh_db_module(db_or_conninfo) if current_module is not None: if current_module == module: logger.warning("The database module is already set to %s", module) return if not force: raise ValueError( "The database module is already set to a value %s " "different than given %s", current_module, module, ) # force is True update = True with connect_to_conninfo(db_or_conninfo) as db: if not db: return None sqlfiles = [ fname for fname in get_sql_for_package("swh.core.db") if "dbmodule" in fname.stem ] execute_sqlfiles(sqlfiles, db_or_conninfo) with db.cursor() as c: if update: query = "update dbmodule set dbmodule = %s" else: query = "insert into dbmodule(dbmodule) values (%s)" c.execute(query, (module,)) db.commit() def swh_set_db_version( db_or_conninfo: Union[str, pgconnection], version: int, ts: Optional[datetime] = None, desc: str = "Work in progress", ) -> None: """Set the version of the database. Fails if the dbversion table does not exists. Args: db_or_conninfo: A database connection, or a database connection info string version: the version to add """ if ts is None: ts = now() with connect_to_conninfo(db_or_conninfo) as db: if not db: return None with db.cursor() as c: query = ( "insert into dbversion(version, release, description) " "values (%s, %s, %s)" ) c.execute(query, (version, ts, desc)) db.commit() def swh_db_flavor(db_or_conninfo: Union[str, pgconnection]) -> Optional[str]: """Retrieve the swh flavor of the database. If the database is not initialized, or the database doesn't support flavors, this returns None. Args: db_or_conninfo: A database connection, or a database connection info string Returns: The flavor of the database, or None if it could not be detected. """ try: with connect_to_conninfo(db_or_conninfo) as db: if not db: return None with db.cursor() as c: query = "select swh_get_dbflavor()" try: c.execute(query) result = c.fetchone() assert result is not None # to keep mypy happy return result[0] except psycopg2.errors.UndefinedFunction: # function not found: no flavor return None except Exception: logger.exception("Could not get flavor from `%s`", db_or_conninfo) return None # The following code has been imported from psycopg2, version 2.7.4, # https://github.com/psycopg/psycopg2/tree/5afb2ce803debea9533e293eef73c92ffce95bcd # and modified by Software Heritage. # # Original file: lib/extras.py # # psycopg2 is free software: you can redistribute it and/or modify it under the # terms of the GNU Lesser General Public License as published by the Free # Software Foundation, either version 3 of the License, or (at your option) any # later version. def _paginate(seq, page_size): """Consume an iterable and return it in chunks. Every chunk is at most `page_size`. Never return an empty chunk. """ page = [] it = iter(seq) while 1: try: for i in range(page_size): page.append(next(it)) yield page page = [] except StopIteration: if page: yield page return def _split_sql(sql): """Split *sql* on a single ``%s`` placeholder. Split on the %s, perform %% replacement and return pre, post lists of snippets. """ curr = pre = [] post = [] tokens = re.split(rb"(%.)", sql) for token in tokens: if len(token) != 2 or token[:1] != b"%": curr.append(token) continue if token[1:] == b"s": if curr is pre: curr = post else: raise ValueError("the query contains more than one '%s' placeholder") elif token[1:] == b"%": curr.append(b"%") else: raise ValueError( "unsupported format character: '%s'" % token[1:].decode("ascii", "replace") ) if curr is pre: raise ValueError("the query doesn't contain any '%s' placeholder") return pre, post def execute_values_generator(cur, sql, argslist, template=None, page_size=100): """Execute a statement using SQL ``VALUES`` with a sequence of parameters. Rows returned by the query are returned through a generator. You need to consume the generator for the queries to be executed! :param cur: the cursor to use to execute the query. :param sql: the query to execute. It must contain a single ``%s`` placeholder, which will be replaced by a `VALUES list`__. Example: ``"INSERT INTO mytable (id, f1, f2) VALUES %s"``. :param argslist: sequence of sequences or dictionaries with the arguments to send to the query. The type and content must be consistent with *template*. :param template: the snippet to merge to every item in *argslist* to compose the query. - If the *argslist* items are sequences it should contain positional placeholders (e.g. ``"(%s, %s, %s)"``, or ``"(%s, %s, 42)``" if there are constants value...). - If the *argslist* items are mappings it should contain named placeholders (e.g. ``"(%(id)s, %(f1)s, 42)"``). If not specified, assume the arguments are sequence and use a simple positional template (i.e. ``(%s, %s, ...)``), with the number of placeholders sniffed by the first element in *argslist*. :param page_size: maximum number of *argslist* items to include in every statement. If there are more items the function will execute more than one statement. :param yield_from_cur: Whether to yield results from the cursor in this function directly. .. __: https://www.postgresql.org/docs/current/static/queries-values.html After the execution of the function the `cursor.rowcount` property will **not** contain a total result. """ # we can't just use sql % vals because vals is bytes: if sql is bytes # there will be some decoding error because of stupid codec used, and Py3 # doesn't implement % on bytes. if not isinstance(sql, bytes): sql = sql.encode(pgencodings[cur.connection.encoding]) pre, post = _split_sql(sql) for page in _paginate(argslist, page_size=page_size): if template is None: template = b"(" + b",".join([b"%s"] * len(page[0])) + b")" parts = pre[:] for args in page: parts.append(cur.mogrify(template, args)) parts.append(b",") parts[-1:] = post cur.execute(b"".join(parts)) yield from cur def import_swhmodule(modname): if not modname.startswith("swh."): modname = f"swh.{modname}" try: m = import_module(modname) except ImportError as exc: logger.error(f"Could not load the {modname} module: {exc}") return None return m def get_sql_for_package(modname: str, upgrade: bool = False) -> List[pathlib.Path]: """Return the (sorted) list of sql script files for the given swh module If upgrade is True, return the list of available migration scripts, otherwise, return the list of initialization scripts. """ m = import_swhmodule(modname) if m is None: raise ValueError(f"Module {modname} cannot be loaded") sqldir = pathlib.Path(m.__file__).parent / "sql" if upgrade: sqldir /= "upgrades" if not sqldir.is_dir(): raise ValueError( "Module {} does not provide a db schema (no sql/ dir)".format(modname) ) return sorted(sqldir.glob("*.sql"), key=lambda x: sortkey(x.name)) def populate_database_for_package( modname: str, conninfo: str, flavor: Optional[str] = None ) -> Tuple[bool, Optional[int], Optional[str]]: """Populate the database, pointed at with ``conninfo``, using the SQL files found in the package ``modname``. Also fill the 'dbmodule' table with the given ``modname``. Args: modname: Name of the module of which we're loading the files conninfo: connection info string for the SQL database flavor: the module-specific flavor which we want to initialize the database under Returns: Tuple with three elements: whether the database has been initialized; the current version of the database; if it exists, the flavor of the database. """ current_version = swh_db_version(conninfo) if current_version is not None: dbflavor = swh_db_flavor(conninfo) return False, current_version, dbflavor def globalsortkey(key): "like sortkey but only on basenames" return sortkey(path.basename(key)) sqlfiles = get_sql_for_package(modname) + get_sql_for_package("swh.core.db") sqlfiles = sorted(sqlfiles, key=lambda x: sortkey(x.stem)) sqlfiles = [fpath for fpath in sqlfiles if "-superuser-" not in fpath.stem] execute_sqlfiles(sqlfiles, conninfo, flavor) # populate the dbmodule table swh_set_db_module(conninfo, modname) current_db_version = swh_db_version(conninfo) dbflavor = swh_db_flavor(conninfo) return True, current_db_version, dbflavor +def initialize_database_for_module(modname: str, version: int, **kwargs): + """Helper function to initialize and populate a database for the given module + + This aims at helping the usage of pytest_postgresql for swh.core.db based datastores. + Typical usage will be (here for swh.storage):: + + from pytest_postgresql import factories + + storage_postgresql_proc = factories.postgresql_proc( + load=[partial(initialize_database_for_module, modname="storage", version=42)] + ) + storage_postgresql = factories.postgresql("storage_postgresql_proc") + + """ + conninfo = psycopg2.connect(**kwargs).dsn + init_admin_extensions(modname, conninfo) + populate_database_for_package(modname, conninfo) + try: + swh_set_db_version(conninfo, version) + except psycopg2.errors.UniqueViolation: + logger.warn( + "Version already set by db init scripts. " + f"This generally means the swh.{modname} package needs to be " + "updated for swh.core>=1.2" + ) + + def get_database_info( conninfo: str, ) -> Tuple[Optional[str], Optional[int], Optional[str]]: """Get version, flavor and module of the db""" dbmodule = swh_db_module(conninfo) dbversion = swh_db_version(conninfo) dbflavor = None if dbversion is not None: dbflavor = swh_db_flavor(conninfo) return (dbmodule, dbversion, dbflavor) def parse_dsn_or_dbname(dsn_or_dbname: str) -> Dict[str, str]: """Parse a psycopg2 dsn, falling back to supporting plain database names as well""" try: return _parse_dsn(dsn_or_dbname) except psycopg2.ProgrammingError: # psycopg2 failed to parse the DSN; it's probably a database name, # handle it as such return _parse_dsn(f"dbname={dsn_or_dbname}") def init_admin_extensions(modname: str, conninfo: str) -> None: """The remaining initialization process -- running -superuser- SQL files -- is done using the given conninfo, thus connecting to the newly created database """ sqlfiles = get_sql_for_package(modname) sqlfiles = [fname for fname in sqlfiles if "-superuser-" in fname.stem] execute_sqlfiles(sqlfiles, conninfo) def create_database_for_package( modname: str, conninfo: str, template: str = "template1" ): """Create the database pointed at with ``conninfo``, and initialize it using -superuser- SQL files found in the package ``modname``. Args: modname: Name of the module of which we're loading the files conninfo: connection info string or plain database name for the SQL database template: the name of the database to connect to and use as template to create the new database """ # Use the given conninfo string, but with dbname replaced by the template dbname # for the database creation step creation_dsn = parse_dsn_or_dbname(conninfo) dbname = creation_dsn["dbname"] creation_dsn["dbname"] = template logger.debug("db_create dbname=%s (from %s)", dbname, template) subprocess.check_call( [ "psql", "--quiet", "--no-psqlrc", "-v", "ON_ERROR_STOP=1", "-d", make_dsn(**creation_dsn), "-c", f'CREATE DATABASE "{dbname}"', ] ) init_admin_extensions(modname, conninfo) def execute_sqlfiles( sqlfiles: Collection[pathlib.Path], db_or_conninfo: Union[str, pgconnection], flavor: Optional[str] = None, ): """Execute a list of SQL files on the database pointed at with ``db_or_conninfo``. Args: sqlfiles: List of SQL files to execute db_or_conninfo: A database connection, or a database connection info string flavor: the database flavor to initialize """ if isinstance(db_or_conninfo, str): conninfo = db_or_conninfo else: conninfo = db_or_conninfo.dsn psql_command = [ "psql", "--quiet", "--no-psqlrc", "-v", "ON_ERROR_STOP=1", "-d", conninfo, ] flavor_set = False for sqlfile in sqlfiles: logger.debug(f"execute SQL file {sqlfile} dbname={conninfo}") subprocess.check_call(psql_command + ["-f", str(sqlfile)]) if ( flavor is not None and not flavor_set and sqlfile.name.endswith("-flavor.sql") ): logger.debug("Setting database flavor %s", flavor) query = f"insert into dbflavor (flavor) values ('{flavor}')" subprocess.check_call(psql_command + ["-c", query]) flavor_set = True if flavor is not None and not flavor_set: logger.warn( "Asked for flavor %s, but module does not support database flavors", flavor, ) diff --git a/swh/core/db/pytest_plugin.py b/swh/core/db/pytest_plugin.py index e12a0f3..23b0609 100644 --- a/swh/core/db/pytest_plugin.py +++ b/swh/core/db/pytest_plugin.py @@ -1,281 +1,276 @@ # Copyright (C) 2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import glob from importlib import import_module import logging import subprocess from typing import Callable, Iterable, Iterator, List, Optional, Sequence, Set, Union +import warnings from _pytest.fixtures import FixtureRequest +from deprecated import deprecated import psycopg2 import pytest from pytest_postgresql.compat import check_for_psycopg2, connection from pytest_postgresql.executor import PostgreSQLExecutor from pytest_postgresql.executor_noop import NoopExecutor from pytest_postgresql.janitor import DatabaseJanitor -from swh.core.db.db_utils import ( - init_admin_extensions, - populate_database_for_package, - swh_set_db_version, -) +from swh.core.db.db_utils import initialize_database_for_module from swh.core.utils import basename_sortkey # to keep mypy happy regardless pytest-postgresql version try: _pytest_pgsql_get_config_module = import_module("pytest_postgresql.config") except ImportError: # pytest_postgresql < 3.0.0 _pytest_pgsql_get_config_module = import_module("pytest_postgresql.factories") _pytest_postgresql_get_config = getattr(_pytest_pgsql_get_config_module, "get_config") logger = logging.getLogger(__name__) +initialize_database_for_module = deprecated( + version="2.10", + reason="Use swh.core.db.db_utils.initialize_database_for_module instead.", +)(initialize_database_for_module) + +warnings.warn( + "This pytest plugin is deprecated, it should not be used any more.", + category=DeprecationWarning, +) + class SWHDatabaseJanitor(DatabaseJanitor): """SWH database janitor implementation with a a different setup/teardown policy than than the stock one. Instead of dropping, creating and initializing the database for each test, it creates and initializes the db once, then truncates the tables (and sequences) in between tests. This is needed to have acceptable test performances. """ def __init__( self, user: str, host: str, port: int, dbname: str, version: Union[str, float], password: Optional[str] = None, isolation_level: Optional[int] = None, connection_timeout: int = 60, dump_files: Optional[Union[str, Sequence[str]]] = None, no_truncate_tables: Set[str] = set(), no_db_drop: bool = False, ) -> None: super().__init__(user, host, port, dbname, version) # do no truncate the following tables self.no_truncate_tables = set(no_truncate_tables) self.no_db_drop = no_db_drop self.dump_files = dump_files def psql_exec(self, fname: str) -> None: conninfo = ( f"host={self.host} user={self.user} port={self.port} dbname={self.dbname}" ) subprocess.check_call( [ "psql", "--quiet", "--no-psqlrc", "-v", "ON_ERROR_STOP=1", "-d", conninfo, "-f", fname, ] ) def db_reset(self) -> None: """Truncate tables (all but self.no_truncate_tables set) and sequences""" with psycopg2.connect( dbname=self.dbname, user=self.user, host=self.host, port=self.port, ) as cnx: with cnx.cursor() as cur: cur.execute( "SELECT table_name FROM information_schema.tables " "WHERE table_schema = %s", ("public",), ) all_tables = set(table for (table,) in cur.fetchall()) tables_to_truncate = all_tables - self.no_truncate_tables for table in tables_to_truncate: cur.execute("TRUNCATE TABLE %s CASCADE" % table) cur.execute( "SELECT sequence_name FROM information_schema.sequences " "WHERE sequence_schema = %s", ("public",), ) seqs = set(seq for (seq,) in cur.fetchall()) for seq in seqs: cur.execute("ALTER SEQUENCE %s RESTART;" % seq) cnx.commit() def _db_exists(self, cur, dbname): cur.execute( "SELECT EXISTS " "(SELECT datname FROM pg_catalog.pg_database WHERE datname= %s);", (dbname,), ) row = cur.fetchone() return (row is not None) and row[0] def init(self) -> None: """Create database in postgresql out of a template it if it exists, bare creation otherwise.""" template_name = f"{self.dbname}_tmpl" logger.debug("Initialize DB %s", self.dbname) with self.cursor() as cur: tmpl_exists = self._db_exists(cur, template_name) db_exists = self._db_exists(cur, self.dbname) if not db_exists: if tmpl_exists: logger.debug( "Create %s from template %s", self.dbname, template_name ) cur.execute( f'CREATE DATABASE "{self.dbname}" TEMPLATE "{template_name}";' ) else: logger.debug("Create %s from scratch", self.dbname) cur.execute(f'CREATE DATABASE "{self.dbname}";') if self.dump_files: logger.warning( "Using dump_files on the postgresql_fact fixture " "is deprecated. See swh.core documentation for more " "details." ) for dump_file in gen_dump_files(self.dump_files): logger.info(f"Loading {dump_file}") self.psql_exec(dump_file) else: logger.debug("Reset %s", self.dbname) self.db_reset() def drop(self) -> None: """Drop database in postgresql.""" if self.no_db_drop: with self.cursor() as cur: self._terminate_connection(cur, self.dbname) else: super().drop() # the postgres_fact factory fixture below is mostly a copy of the code # from pytest-postgresql. We need a custom version here to be able to # specify our version of the DBJanitor we use. +@deprecated(version="2.10", reason="Use stock pytest_postgresql factory instead") def postgresql_fact( process_fixture_name: str, dbname: Optional[str] = None, load: Optional[Sequence[Union[Callable, str]]] = None, isolation_level: Optional[int] = None, modname: Optional[str] = None, dump_files: Optional[Union[str, List[str]]] = None, no_truncate_tables: Set[str] = {"dbversion"}, no_db_drop: bool = False, ) -> Callable[[FixtureRequest], Iterator[connection]]: """ Return connection fixture factory for PostgreSQL. :param process_fixture_name: name of the process fixture :param dbname: database name :param load: SQL, function or function import paths to automatically load into our test database :param isolation_level: optional postgresql isolation level defaults to server's default :param modname: (swh) module name for which the database is created :dump_files: (deprecated, use load instead) list of sql script files to execute after the database has been created :no_truncate_tables: list of table not to truncate between tests (only used when no_db_drop is True) :no_db_drop: if True, keep the database between tests; in which case, the database is reset (see SWHDatabaseJanitor.db_reset()) by truncating most of the tables. Note that this makes de facto tests (potentially) interdependent, use with extra caution. :returns: function which makes a connection to postgresql """ @pytest.fixture def postgresql_factory(request: FixtureRequest) -> Iterator[connection]: """ Fixture factory for PostgreSQL. :param request: fixture request object :returns: postgresql client """ check_for_psycopg2() proc_fixture: Union[PostgreSQLExecutor, NoopExecutor] = request.getfixturevalue( process_fixture_name ) pg_host = proc_fixture.host pg_port = proc_fixture.port pg_user = proc_fixture.user pg_password = proc_fixture.password pg_options = proc_fixture.options pg_db = dbname or proc_fixture.dbname pg_load = load or [] assert pg_db is not None with SWHDatabaseJanitor( pg_user, pg_host, pg_port, pg_db, proc_fixture.version, pg_password, isolation_level=isolation_level, dump_files=dump_files, no_truncate_tables=no_truncate_tables, no_db_drop=no_db_drop, ) as janitor: db_connection: connection = psycopg2.connect( dbname=pg_db, user=pg_user, password=pg_password, host=pg_host, port=pg_port, options=pg_options, ) for load_element in pg_load: janitor.load(load_element) try: yield db_connection finally: db_connection.close() return postgresql_factory -def initialize_database_for_module(modname, version, **kwargs): - conninfo = psycopg2.connect(**kwargs).dsn - init_admin_extensions(modname, conninfo) - populate_database_for_package(modname, conninfo) - try: - swh_set_db_version(conninfo, version) - except psycopg2.errors.UniqueViolation: - logger.warn( - "Version already set by db init scripts. " - "This generally means the swh.{modname} package needs to be " - "updated for swh.core>=1.2" - ) - - def gen_dump_files(dump_files: Union[str, Iterable[str]]) -> Iterator[str]: """Generate files potentially resolving glob patterns if any""" if isinstance(dump_files, str): dump_files = [dump_files] for dump_file in dump_files: if glob.has_magic(dump_file): # if the dump_file is a glob pattern one, resolve it yield from ( fname for fname in sorted(glob.glob(dump_file), key=basename_sortkey) ) else: # otherwise, just return the filename yield dump_file diff --git a/swh/core/db/tests/test_db.py b/swh/core/db/tests/test_db.py index 726f1a1..2d12707 100644 --- a/swh/core/db/tests/test_db.py +++ b/swh/core/db/tests/test_db.py @@ -1,466 +1,466 @@ # Copyright (C) 2019-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from dataclasses import dataclass import datetime from enum import IntEnum import inspect from string import printable from typing import Any from unittest.mock import MagicMock, Mock import uuid from hypothesis import given, settings, strategies from hypothesis.extra.pytz import timezones import psycopg2 import pytest +from pytest_postgresql import factories from typing_extensions import Protocol from swh.core.db import BaseDb from swh.core.db.common import db_transaction, db_transaction_generator -from swh.core.db.pytest_plugin import postgresql_fact from swh.core.db.tests.conftest import function_scoped_fixture_check # workaround mypy bug https://github.com/python/mypy/issues/5485 class Converter(Protocol): def __call__(self, x: Any) -> Any: ... @dataclass class Field: name: str """Column name""" pg_type: str """Type of the PostgreSQL column""" example: Any """Example value for the static tests""" strategy: strategies.SearchStrategy """Hypothesis strategy to generate these values""" in_wrapper: Converter = lambda x: x """Wrapper to convert this data type for the static tests""" out_converter: Converter = lambda x: x """Converter from the raw PostgreSQL column value to this data type""" # Limit PostgreSQL integer values pg_int = strategies.integers(-2147483648, +2147483647) pg_text = strategies.text( alphabet=strategies.characters( blacklist_categories=["Cs"], # surrogates blacklist_characters=[ "\x00", # pgsql does not support the null codepoint "\r", # pgsql normalizes those ], ), ) pg_bytea = strategies.binary() def pg_bytea_a(min_size: int, max_size: int) -> strategies.SearchStrategy: """Generate a PostgreSQL bytea[]""" return strategies.lists(pg_bytea, min_size=min_size, max_size=max_size) def pg_bytea_a_a(min_size: int, max_size: int) -> strategies.SearchStrategy: """Generate a PostgreSQL bytea[][]. The inner lists must all have the same size.""" return strategies.integers(min_value=max(1, min_size), max_value=max_size).flatmap( lambda n: strategies.lists( pg_bytea_a(min_size=n, max_size=n), min_size=min_size, max_size=max_size ) ) def pg_tstz() -> strategies.SearchStrategy: """Generate values that fit in a PostgreSQL timestamptz. Notes: We're forbidding old datetimes, because until 1956, many timezones had seconds in their "UTC offsets" (see ), which is not representable by PostgreSQL. """ min_value = datetime.datetime(1960, 1, 1, 0, 0, 0) return strategies.datetimes(min_value=min_value, timezones=timezones()) def pg_jsonb(min_size: int, max_size: int) -> strategies.SearchStrategy: """Generate values representable as a PostgreSQL jsonb object (dict).""" return strategies.dictionaries( strategies.text(printable), strategies.recursive( # should use floats() instead of integers(), but PostgreSQL # coerces large integers into floats, making the tests fail. We # only store ints in our generated data anyway. strategies.none() | strategies.booleans() | strategies.integers(-2147483648, +2147483647) | strategies.text(printable), lambda children: strategies.lists(children, max_size=max_size) | strategies.dictionaries( strategies.text(printable), children, max_size=max_size ), ), min_size=min_size, max_size=max_size, ) def tuple_2d_to_list_2d(v): """Convert a 2D tuple to a 2D list""" return [list(inner) for inner in v] def list_2d_to_tuple_2d(v): """Convert a 2D list to a 2D tuple""" return tuple(tuple(inner) for inner in v) class TestIntEnum(IntEnum): foo = 1 bar = 2 def now(): return datetime.datetime.now(tz=datetime.timezone.utc) FIELDS = ( Field("i", "int", 1, pg_int), Field("txt", "text", "foo", pg_text), Field("bytes", "bytea", b"bar", strategies.binary()), Field( "bytes_array", "bytea[]", [b"baz1", b"baz2"], pg_bytea_a(min_size=0, max_size=5), ), Field( "bytes_tuple", "bytea[]", (b"baz1", b"baz2"), pg_bytea_a(min_size=0, max_size=5).map(tuple), in_wrapper=list, out_converter=tuple, ), Field( "bytes_2d", "bytea[][]", [[b"quux1"], [b"quux2"]], pg_bytea_a_a(min_size=0, max_size=5), ), Field( "bytes_2d_tuple", "bytea[][]", ((b"quux1",), (b"quux2",)), pg_bytea_a_a(min_size=0, max_size=5).map(list_2d_to_tuple_2d), in_wrapper=tuple_2d_to_list_2d, out_converter=list_2d_to_tuple_2d, ), Field( "ts", "timestamptz", now(), pg_tstz(), ), Field( "dict", "jsonb", {"str": "bar", "int": 1, "list": ["a", "b"], "nested": {"a": "b"}}, pg_jsonb(min_size=0, max_size=5), in_wrapper=psycopg2.extras.Json, ), Field( "intenum", "int", TestIntEnum.foo, strategies.sampled_from(TestIntEnum), in_wrapper=int, out_converter=lambda x: TestIntEnum(x), # lambda needed by mypy ), Field("uuid", "uuid", uuid.uuid4(), strategies.uuids()), Field( "text_list", "text[]", # All the funky corner cases ["null", "NULL", None, "\\", "\t", "\n", "\r", " ", "'", ",", '"', "{", "}"], strategies.lists(pg_text, min_size=0, max_size=5), ), Field( "tstz_list", "timestamptz[]", [now(), now() + datetime.timedelta(days=1)], strategies.lists(pg_tstz(), min_size=0, max_size=5), ), Field( "tstz_range", "tstzrange", psycopg2.extras.DateTimeTZRange( lower=now(), upper=now() + datetime.timedelta(days=1), bounds="[)", ), strategies.tuples( # generate two sorted timestamptzs for use as bounds strategies.tuples(pg_tstz(), pg_tstz()).map(sorted), # and a set of bounds strategies.sampled_from(["[]", "()", "[)", "(]"]), ).map( # and build the actual DateTimeTZRange object from these args lambda args: psycopg2.extras.DateTimeTZRange( lower=args[0][0], upper=args[0][1], bounds=args[1], ) ), ), ) INIT_SQL = "create table test_table (%s)" % ", ".join( f"{field.name} {field.pg_type}" for field in FIELDS ) COLUMNS = tuple(field.name for field in FIELDS) INSERT_SQL = "insert into test_table (%s) values (%s)" % ( ", ".join(COLUMNS), ", ".join("%s" for i in range(len(COLUMNS))), ) STATIC_ROW_IN = tuple(field.in_wrapper(field.example) for field in FIELDS) EXPECTED_ROW_OUT = tuple(field.example for field in FIELDS) db_rows = strategies.lists(strategies.tuples(*(field.strategy for field in FIELDS))) def convert_lines(cur): return [ tuple(field.out_converter(x) for x, field in zip(line, FIELDS)) for line in cur ] -test_db = postgresql_fact("postgresql_proc", dbname="test-db2") +test_db = factories.postgresql("postgresql_proc", dbname="test-db2") @pytest.fixture def db_with_data(test_db, request): """Fixture to initialize a db with some data out of the "INIT_SQL above""" db = BaseDb.connect(test_db.dsn) with db.cursor() as cur: psycopg2.extras.register_default_jsonb(cur) cur.execute(INIT_SQL) yield db db.conn.rollback() db.conn.close() @pytest.mark.db def test_db_connect(db_with_data): with db_with_data.cursor() as cur: psycopg2.extras.register_default_jsonb(cur) cur.execute(INSERT_SQL, STATIC_ROW_IN) cur.execute("select * from test_table;") output = convert_lines(cur) assert len(output) == 1 assert EXPECTED_ROW_OUT == output[0] def test_db_initialized(db_with_data): with db_with_data.cursor() as cur: psycopg2.extras.register_default_jsonb(cur) cur.execute(INSERT_SQL, STATIC_ROW_IN) cur.execute("select * from test_table;") output = convert_lines(cur) assert len(output) == 1 assert EXPECTED_ROW_OUT == output[0] def test_db_copy_to_static(db_with_data): items = [{field.name: field.example for field in FIELDS}] db_with_data.copy_to(items, "test_table", COLUMNS) with db_with_data.cursor() as cur: cur.execute("select * from test_table;") output = convert_lines(cur) assert len(output) == 1 assert EXPECTED_ROW_OUT == output[0] @settings(suppress_health_check=function_scoped_fixture_check, max_examples=5) @given(db_rows) def test_db_copy_to(db_with_data, data): items = [dict(zip(COLUMNS, item)) for item in data] with db_with_data.cursor() as cur: cur.execute("TRUNCATE TABLE test_table CASCADE") db_with_data.copy_to(items, "test_table", COLUMNS) with db_with_data.cursor() as cur: cur.execute("select * from test_table;") converted_lines = convert_lines(cur) assert converted_lines == data def test_db_copy_to_thread_exception(db_with_data): data = [(2**65, "foo", b"bar")] items = [dict(zip(COLUMNS, item)) for item in data] with pytest.raises(psycopg2.errors.NumericValueOutOfRange): db_with_data.copy_to(items, "test_table", COLUMNS) def test_db_transaction(mocker): expected_cur = object() called = False class Storage: @db_transaction() def endpoint(self, cur=None, db=None): nonlocal called called = True assert cur is expected_cur storage = Storage() # 'with storage.get_db().transaction() as cur:' should cause # 'cur' to be 'expected_cur' db_mock = Mock() db_mock.transaction.return_value = MagicMock() db_mock.transaction.return_value.__enter__.return_value = expected_cur mocker.patch.object(storage, "get_db", return_value=db_mock, create=True) put_db_mock = mocker.patch.object(storage, "put_db", create=True) storage.endpoint() assert called put_db_mock.assert_called_once_with(db_mock) def test_db_transaction__with_generator(): with pytest.raises(ValueError, match="generator"): class Storage: @db_transaction() def endpoint(self, cur=None, db=None): yield None def test_db_transaction_signature(): """Checks db_transaction removes the 'cur' and 'db' arguments.""" def f(self, foo, *, bar=None): pass expected_sig = inspect.signature(f) @db_transaction() def g(self, foo, *, bar=None, db=None, cur=None): pass actual_sig = inspect.signature(g) assert actual_sig == expected_sig def test_db_transaction_generator(mocker): expected_cur = object() called = False class Storage: @db_transaction_generator() def endpoint(self, cur=None, db=None): nonlocal called called = True assert cur is expected_cur yield None storage = Storage() # 'with storage.get_db().transaction() as cur:' should cause # 'cur' to be 'expected_cur' db_mock = Mock() db_mock.transaction.return_value = MagicMock() db_mock.transaction.return_value.__enter__.return_value = expected_cur mocker.patch.object(storage, "get_db", return_value=db_mock, create=True) put_db_mock = mocker.patch.object(storage, "put_db", create=True) list(storage.endpoint()) assert called put_db_mock.assert_called_once_with(db_mock) def test_db_transaction_generator__with_nongenerator(): with pytest.raises(ValueError, match="generator"): class Storage: @db_transaction_generator() def endpoint(self, cur=None, db=None): pass def test_db_transaction_generator_signature(): """Checks db_transaction removes the 'cur' and 'db' arguments.""" def f(self, foo, *, bar=None): pass expected_sig = inspect.signature(f) @db_transaction_generator() def g(self, foo, *, bar=None, db=None, cur=None): yield None actual_sig = inspect.signature(g) assert actual_sig == expected_sig @pytest.mark.parametrize( "query_options", (None, {"something": 42, "statement_timeout": 200}) ) @pytest.mark.parametrize("use_generator", (True, False)) def test_db_transaction_query_options(mocker, use_generator, query_options): class Storage: @db_transaction(statement_timeout=100) def endpoint(self, cur=None, db=None): return [None] @db_transaction_generator(statement_timeout=100) def gen_endpoint(self, cur=None, db=None): yield None storage = Storage() # mockers mocked_apply = mocker.patch("swh.core.db.common.apply_options") # 'with storage.get_db().transaction() as cur:' should cause # 'cur' to be 'expected_cur' expected_cur = object() db_mock = MagicMock() db_mock.transaction.return_value.__enter__.return_value = expected_cur mocker.patch.object(storage, "get_db", return_value=db_mock, create=True) mocker.patch.object(storage, "put_db", create=True) if query_options: storage.query_options = { "endpoint": query_options, "gen_endpoint": query_options, } if use_generator: list(storage.gen_endpoint()) else: list(storage.endpoint()) mocked_apply.assert_called_once_with( expected_cur, query_options if query_options is not None else {"statement_timeout": 100}, ) diff --git a/swh/core/github/tests/test_github_utils.py b/swh/core/github/tests/test_github_utils.py index d9d940c..c7b7087 100644 --- a/swh/core/github/tests/test_github_utils.py +++ b/swh/core/github/tests/test_github_utils.py @@ -1,199 +1,205 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import logging import pytest from swh.core.github.pytest_plugin import HTTP_GITHUB_API_URL from swh.core.github.utils import ( GitHubSession, _sanitize_github_url, _url_github_api, get_canonical_github_origin_url, ) KNOWN_GH_REPO = "https://github.com/user/repo" -def _url_github_html(user_repo: str, protocol: str = "https") -> str: - """Given the user repo, returns the expected github html url.""" - return f"{protocol}://github.com/{user_repo}" - - @pytest.mark.parametrize( "user_repo, expected_url", [ ("user/repo.git", KNOWN_GH_REPO), ("user/repo.git/", KNOWN_GH_REPO), ("user/repo/", KNOWN_GH_REPO), ("user/repo", KNOWN_GH_REPO), ("user/repo/.git", KNOWN_GH_REPO), ("unknown/page", None), # unknown gh origin returns None ("user/with/deps", None), # url kind is not dealt with ], ) def test_get_canonical_github_origin_url( user_repo, expected_url, requests_mock, github_credentials ): """It should return a canonical github origin when it exists, None otherwise""" - for protocol in ["https", "git", "http"]: - html_input_url = _url_github_html(user_repo, protocol=protocol) - html_url = _url_github_html(user_repo) - api_url = _url_github_api(_sanitize_github_url(user_repo)) - - if expected_url is not None: - status_code = 200 - response = {"html_url": _sanitize_github_url(html_url)} - else: - status_code = 404 - response = {} - - requests_mock.get(api_url, [{"status_code": status_code, "json": response}]) - - # anonymous - assert get_canonical_github_origin_url(html_input_url) == expected_url - - # with credentials - assert ( - get_canonical_github_origin_url( - html_input_url, credentials=github_credentials + for separator in ["/", ":"]: + for prefix in [ + "http://", + "https://", + "git://", + "ssh://", + "//", + "git@", + "ssh://git@", + "https://${env.GITHUB_TOKEN_USR}:${env.GITHUB_TOKEN_PSW}@", + "[fetch=]git@", + ]: + html_input_url = f"{prefix}github.com{separator}{user_repo}" + html_url = f"https://github.com/{user_repo}" + api_url = _url_github_api(_sanitize_github_url(user_repo)) + + if expected_url is not None: + status_code = 200 + response = {"html_url": _sanitize_github_url(html_url)} + else: + status_code = 404 + response = {} + + requests_mock.get(api_url, [{"status_code": status_code, "json": response}]) + + # anonymous + assert get_canonical_github_origin_url(html_input_url) == expected_url + + # with credentials + assert ( + get_canonical_github_origin_url( + html_input_url, credentials=github_credentials + ) + == expected_url + ) + + # anonymous + assert ( + GitHubSession( + user_agent="GitHub Session Test", + ).get_canonical_url(html_input_url) + == expected_url + ) + + # with credentials + assert ( + GitHubSession( + user_agent="GitHub Session Test", credentials=github_credentials + ).get_canonical_url(html_input_url) + == expected_url ) - == expected_url - ) - - # anonymous - assert ( - GitHubSession( - user_agent="GitHub Session Test", - ).get_canonical_url(html_input_url) - == expected_url - ) - - # with credentials - assert ( - GitHubSession( - user_agent="GitHub Session Test", credentials=github_credentials - ).get_canonical_url(html_input_url) - == expected_url - ) def test_get_canonical_github_origin_url_not_gh_origin(): """It should return the input url when that origin is not a github one""" url = "https://example.org" assert get_canonical_github_origin_url(url) == url assert ( GitHubSession( user_agent="GitHub Session Test", ).get_canonical_url(url) == url ) def test_github_session_anonymous_session(): user_agent = ("GitHub Session Test",) github_session = GitHubSession( user_agent=user_agent, ) assert github_session.anonymous is True actual_headers = github_session.session.headers assert actual_headers["Accept"] == "application/vnd.github.v3+json" assert actual_headers["User-Agent"] == user_agent @pytest.mark.parametrize( "num_ratelimit", [1] # return a single rate-limit response, then continue ) def test_github_session_ratelimit_once_recovery( caplog, requests_ratelimited, num_ratelimit, monkeypatch_sleep_calls, github_credentials, ): """GitHubSession should recover from hitting the rate-limit once""" caplog.set_level(logging.DEBUG, "swh.core.github.utils") github_session = GitHubSession( user_agent="GitHub Session Test", credentials=github_credentials ) res = github_session.request(f"{HTTP_GITHUB_API_URL}?per_page=1000&since=10") assert res.status_code == 200 token_users = [] for record in caplog.records: if "Using authentication token" in record.message: token_users.append(record.args[0]) # check that we used one more token than we saw rate limited requests assert len(token_users) == 1 + num_ratelimit # check that we slept for one second between our token uses assert monkeypatch_sleep_calls == [1] def test_github_session_authenticated_credentials( caplog, github_credentials, all_tokens ): """GitHubSession should have Authorization headers set in authenticated mode""" caplog.set_level(logging.DEBUG, "swh.core.github.utils") github_session = GitHubSession( "GitHub Session Test", credentials=github_credentials ) assert github_session.anonymous is False assert github_session.token_index == 0 assert ( sorted(github_session.credentials, key=lambda t: t["username"]) == github_credentials ) assert github_session.session.headers["Authorization"] in [ f"token {t}" for t in all_tokens ] @pytest.mark.parametrize( # Do 5 successful requests, return 6 ratelimits (to exhaust the credentials) with a # set value for X-Ratelimit-Reset, then resume listing successfully. "num_before_ratelimit, num_ratelimit, ratelimit_reset", [(5, 6, 123456)], ) def test_github_session_ratelimit_reset_sleep( caplog, requests_ratelimited, monkeypatch_sleep_calls, num_before_ratelimit, num_ratelimit, ratelimit_reset, github_credentials, ): """GitHubSession should handle rate-limit with authentication tokens.""" caplog.set_level(logging.DEBUG, "swh.core.github.utils") github_session = GitHubSession( user_agent="GitHub Session Test", credentials=github_credentials ) for _ in range(num_ratelimit): github_session.request(f"{HTTP_GITHUB_API_URL}?per_page=1000&since=10") # We sleep 1 second every time we change credentials, then we sleep until # ratelimit_reset + 1 expected_sleep_calls = len(github_credentials) * [1] + [ratelimit_reset + 1] assert monkeypatch_sleep_calls == expected_sleep_calls found_exhaustion_message = False for record in caplog.records: if record.levelname == "INFO": if "Rate limits exhausted for all tokens" in record.message: found_exhaustion_message = True break assert found_exhaustion_message is True diff --git a/swh/core/github/utils.py b/swh/core/github/utils.py index 867b2e4..80ffa2b 100644 --- a/swh/core/github/utils.py +++ b/swh/core/github/utils.py @@ -1,225 +1,227 @@ # Copyright (C) 2020-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import logging import random import re import time from typing import Dict, List, Optional import requests from tenacity import ( retry, retry_any, retry_if_exception_type, retry_if_result, wait_exponential, ) -GITHUB_PATTERN = re.compile(r"(git|https?)://github.com/(?P.*)") +GITHUB_PATTERN = re.compile( + r"(//|git://|git@|git//|https?://|ssh://|.*@)github.com[/:](?P.*)" +) logger = logging.getLogger(__name__) def _url_github_api(user_repo: str) -> str: """Given the user_repo, returns the expected github api url.""" return f"https://api.github.com/repos/{user_repo}" def _sanitize_github_url(url: str) -> str: """Sanitize github url.""" return url.lower().rstrip("/").rstrip(".git").rstrip("/") def get_canonical_github_origin_url( url: str, credentials: Optional[List[Dict[str, str]]] = None ) -> Optional[str]: """Retrieve canonical github url out of an url if any or None otherwise. This triggers an http request to the github api url to determine the canonical repository url (if no credentials is provided, the http request is anonymous. Either way that request can be rate-limited by github.) """ return GitHubSession( user_agent="SWH core library", credentials=credentials ).get_canonical_url(url) class RateLimited(Exception): def __init__(self, response): self.reset_time: Optional[int] # Figure out how long we need to sleep because of that rate limit ratelimit_reset = response.headers.get("X-Ratelimit-Reset") retry_after = response.headers.get("Retry-After") if ratelimit_reset is not None: self.reset_time = int(ratelimit_reset) elif retry_after is not None: self.reset_time = int(time.time()) + int(retry_after) + 1 else: logger.warning( "Received a rate-limit-like status code %s, but no rate-limit " "headers set. Response content: %s", response.status_code, response.content, ) self.reset_time = None self.response = response class MissingRateLimitReset(Exception): pass class GitHubSession: """Manages a :class:`requests.Session` with (optionally) multiple credentials, and cycles through them when reaching rate-limits.""" credentials: Optional[List[Dict[str, str]]] = None def __init__( self, user_agent: str, credentials: Optional[List[Dict[str, str]]] = None ) -> None: """Initialize a requests session with the proper headers for requests to GitHub.""" if credentials: creds = credentials.copy() random.shuffle(creds) self.credentials = creds self.session = requests.Session() self.session.headers.update( {"Accept": "application/vnd.github.v3+json", "User-Agent": user_agent} ) self.anonymous = not self.credentials if self.anonymous: logger.warning("No tokens set in configuration, using anonymous mode") self.token_index = -1 self.current_user: Optional[str] = None if not self.anonymous: # Initialize the first token value in the session headers self.set_next_session_token() def set_next_session_token(self) -> None: """Update the current authentication token with the next one in line.""" assert self.credentials self.token_index = (self.token_index + 1) % len(self.credentials) auth = self.credentials[self.token_index] self.current_user = auth["username"] logger.debug("Using authentication token for user %s", self.current_user) if "password" in auth: token = auth["password"] else: token = auth["token"] self.session.headers.update({"Authorization": f"token {token}"}) @retry( wait=wait_exponential(multiplier=1, min=4, max=10), retry=retry_any( # ChunkedEncodingErrors happen when the TLS connection gets reset, e.g. # when running the lister on a connection with high latency retry_if_exception_type(requests.exceptions.ChunkedEncodingError), # 502 status codes happen for a Server Error, sometimes retry_if_result(lambda r: r.status_code == 502), ), ) def _request(self, url: str) -> requests.Response: response = self.session.get(url) if ( # GitHub returns inconsistent status codes between unauthenticated # rate limit and authenticated rate limits. Handle both. response.status_code == 429 or (self.anonymous and response.status_code == 403) ): raise RateLimited(response) return response def request(self, url) -> requests.Response: """Repeatedly requests the given URL, cycling through credentials and sleeping if necessary; until either a successful response or :exc:`MissingRateLimitReset` """ # The following for/else loop handles rate limiting; if successful, # it provides the rest of the function with a `response` object. # # If all tokens are rate-limited, we sleep until the reset time, # then `continue` into another iteration of the outer while loop, # attempting to get data from the same URL again. while True: max_attempts = len(self.credentials) if self.credentials else 1 reset_times: Dict[int, int] = {} # token index -> time for attempt in range(max_attempts): try: return self._request(url) except RateLimited as e: reset_info = "(unknown reset)" if e.reset_time is not None: reset_times[self.token_index] = e.reset_time reset_info = "(resetting in %ss)" % (e.reset_time - time.time()) if not self.anonymous: logger.info( "Rate limit exhausted for current user %s %s", self.current_user, reset_info, ) # Use next token in line self.set_next_session_token() # Wait one second to avoid triggering GitHub's abuse rate limits time.sleep(1) # All tokens have been rate-limited. What do we do? if not reset_times: logger.warning( "No X-Ratelimit-Reset value found in responses for any token; " "Giving up." ) raise MissingRateLimitReset() sleep_time = max(reset_times.values()) - time.time() + 1 logger.info( "Rate limits exhausted for all tokens. Sleeping for %f seconds.", sleep_time, ) time.sleep(sleep_time) def get_canonical_url(self, url: str) -> Optional[str]: """Retrieve canonical github url out of an url if any or None otherwise. This triggers an http request to the github api url to determine the canonical repository url. Returns The canonical url if any, None otherwise. """ url_ = url.lower() match = GITHUB_PATTERN.match(url_) if not match: return url user_repo = _sanitize_github_url(match.groupdict()["user_repo"]) response = self.request(_url_github_api(user_repo)) if response.status_code != 200: return None data = response.json() return data["html_url"]