diff --git a/PKG-INFO b/PKG-INFO index 2bbc0ed..9a92675 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,33 +1,33 @@ Metadata-Version: 2.1 Name: swh.scheduler -Version: 1.2.0 +Version: 1.2.1 Summary: Software Heritage Scheduler Home-page: https://forge.softwareheritage.org/diffusion/DSCH/ Author: Software Heritage developers Author-email: swh-devel@inria.fr Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Project-URL: Funding, https://www.softwareheritage.org/donate Project-URL: Source, https://forge.softwareheritage.org/source/swh-scheduler Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-scheduler/ Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 5 - Production/Stable Requires-Python: >=3.7 Description-Content-Type: text/markdown Provides-Extra: testing Provides-Extra: journal Provides-Extra: simulator License-File: LICENSE License-File: LICENSE.Celery License-File: AUTHORS swh-scheduler ============= Job scheduler for the Software Heritage project. Task manager for asynchronous/delayed tasks, used for both recurrent (e.g., listing a forge, loading new stuff from a Git repository) and one-off activities (e.g., loading a specific version of a source package). diff --git a/debian/changelog b/debian/changelog index 98275db..b232271 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,1160 +1,1162 @@ -swh-scheduler (1.2.0-1~swh1~bpo10+1) buster-swh; urgency=medium +swh-scheduler (1.2.1-1~swh1) unstable-swh; urgency=medium - * Rebuild for buster-swh + * New upstream release 1.2.1 - (tagged by David Douard + on 2022-07-08 14:57:07 +0200) + * Upstream changes: - v1.2.1 - -- Software Heritage autobuilder (on jenkins-debian1) Fri, 03 Jun 2022 13:52:19 +0000 + -- Software Heritage autobuilder (on jenkins-debian1) Fri, 08 Jul 2022 14:53:11 +0000 swh-scheduler (1.2.0-1~swh1) unstable-swh; urgency=medium * New upstream release 1.2.0 - (tagged by Antoine R. Dumont (@ardumont) on 2022-06-03 15:37:19 +0200) * Upstream changes: - v1.2.0 - Remove unused get_current_version method - tests: use stock pytest_postgresql factory function -- Software Heritage autobuilder (on jenkins-debian1) Fri, 03 Jun 2022 13:47:47 +0000 swh-scheduler (1.1.2-1~swh1) unstable-swh; urgency=medium * New upstream release 1.1.2 - (tagged by Antoine Lambert on 2022-05-12 13:49:42 +0200) * Upstream changes: - version 1.1.2 -- Software Heritage autobuilder (on jenkins-debian1) Thu, 12 May 2022 11:55:14 +0000 swh-scheduler (1.1.1-1~swh1) unstable-swh; urgency=medium * New upstream release 1.1.1 - (tagged by Valentin Lorentz on 2022-04-28 11:31:00 +0200) * Upstream changes: - v1.1.1 - * cli/utils: Fix parsing of empty strings - * Bump mypy version -- Software Heritage autobuilder (on jenkins-debian1) Thu, 28 Apr 2022 09:36:24 +0000 swh-scheduler (1.1.0-1~swh1) unstable-swh; urgency=medium * New upstream release 1.1.0 - (tagged by Valentin Lorentz on 2022-04-26 12:29:19 +0200) * Upstream changes: - v1.1.0 - * Add 'lister_name' and 'lister_instance_name' arguments to all tasks created from ListedOrigin - * Make scheduling policy used in schedule_recurrent configurable - * Update a bit the documentation for the new origin visit scheduler - * test/lint maitenance -- Software Heritage autobuilder (on jenkins-debian1) Tue, 26 Apr 2022 10:35:51 +0000 swh-scheduler (1.0.0-1~swh1) unstable-swh; urgency=medium * New upstream release 1.0.0 - (tagged by David Douard on 2022-02-24 16:56:30 +0100) * Upstream changes: - v1.0.0 -- Software Heritage autobuilder (on jenkins-debian1) Thu, 24 Feb 2022 16:03:55 +0000 swh-scheduler (0.23.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.23.0 - (tagged by Vincent SELLIER on 2022-01-06 09:35:24 +0100) * Upstream changes: - v0.23.0 - Changelog: - * Allow to specify the visit grab parameters per visit type and policy - * Pin mypy and drop type annotations which makes mypy unhappy - * Use a temporary table to update scheduler metrics - * Clean up disabled scheduler archival task related services -- Software Heritage autobuilder (on jenkins-debian1) Thu, 06 Jan 2022 08:39:47 +0000 swh-scheduler (0.22.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.22.0 - (tagged by Vincent SELLIER on 2021-12-08 09:28:57 +0100) * Upstream changes: - v0.22.0 - changelog: - Make next_visit_queue_position an integer -- Software Heritage autobuilder (on jenkins-debian1) Wed, 08 Dec 2021 09:06:01 +0000 swh-scheduler (0.21.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.21.0 - (tagged by Vincent SELLIER on 2021-12-07 08:41:11 +0100) * Upstream changes: - v0.21.0 - Changelog: - Ensure there is no duplicated origins in the insertion batches -- Software Heritage autobuilder (on jenkins-debian1) Tue, 07 Dec 2021 07:45:40 +0000 swh-scheduler (0.20.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.20.0 - (tagged by Antoine R. Dumont (@ardumont) on 2021-11-22 16:11:16 +0100) * Upstream changes: - v0.20.0 - recurrent visits scheduler: use policy weights instead of ratios - recurrent visits scheduler: Improve docs rendering - backend: Fix CardinalityViolation in grab_next_visits on duplicate origins -- Software Heritage autobuilder (on jenkins-debian1) Mon, 22 Nov 2021 15:14:56 +0000 swh-scheduler (0.19.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.19.0 - (tagged by Antoine R. Dumont (@ardumont) on 2021-10-28 13:10:55 +0200) * Upstream changes: - v0.19.0 - Add a new cli endpoint to schedule recurrent visits in Celery - grab_next_visits: avoid time interval calculations in PostgreSQL - Restrict the click version to avoid conflict version with celery's - Add docstring to runner and listener modules - Drop deprecated listener module - scheduler: Deprecate unused main celery runner -- Software Heritage autobuilder (on jenkins-debian1) Thu, 28 Oct 2021 11:15:10 +0000 swh-scheduler (0.18.2-1~swh1) unstable-swh; urgency=medium * New upstream release 0.18.2 - (tagged by Antoine R. Dumont (@ardumont) on 2021-10-18 15:11:59 +0200) * Upstream changes: - v0.18.2 - Use swh_storage fixture for cli tests -- Software Heritage autobuilder (on jenkins-debian1) Mon, 18 Oct 2021 13:18:56 +0000 swh-scheduler (0.18.1-1~swh1) unstable-swh; urgency=medium * New upstream release 0.18.1 - (tagged by Antoine R. Dumont (@ardumont) on 2021-10-15 15:49:35 +0200) * Upstream changes: - v0.18.1 - Return 0 slot if no more slots available in the queues -- Software Heritage autobuilder (on jenkins-debian1) Fri, 15 Oct 2021 13:53:38 +0000 swh-scheduler (0.18.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.18.0 - (tagged by Antoine R. Dumont (@ardumont) on 2021-09-02 11:32:59 +0200) * Upstream changes: - v0.18.0 - Refine scheduling policy for origins with no known last update - Add a swh scheduler origin send-to-celery subcommand - runner: Improve help message on the task types flag. - send-to-celery: Add more options to allow scheduling of edge cases - Add table sampling option to grab_next_visits - journal_client: Only upsert if we have something to upsert -- Software Heritage autobuilder (on jenkins-debian1) Thu, 02 Sep 2021 09:35:32 +0000 swh-scheduler (0.17.1-1~swh1) unstable-swh; urgency=medium * New upstream release 0.17.1 - (tagged by Antoine R. Dumont (@ardumont) on 2021-08-26 10:30:12 +0200) * Upstream changes: - v0.17.1 - journal_client: Ensure queue position does not overflow -- Software Heritage autobuilder (on jenkins-debian1) Thu, 26 Aug 2021 08:41:41 +0000 swh-scheduler (0.17.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.17.0 - (tagged by Antoine R. Dumont (@ardumont) on 2021-08-05 15:29:18 +0200) * Upstream changes: - v0.17.0 - Introduce new scheduling policy to grab origins without last update - journal_client: Disable origins when too many visited attempts failed - journal_client: Record last_visited and last_successful in origin_visit_stats - Add a specific cooldown for notfound origins - Add a (longer) specific cooldown for failed origin visits - Make the origin visit scheduling cooldown configurable - Various refactoring to simplify the grab next visits logic and updates -- Software Heritage autobuilder (on jenkins-debian1) Fri, 06 Aug 2021 09:11:54 +0000 swh-scheduler (0.16.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.16.0 - (tagged by Antoine Lambert on 2021-06-22 17:35:55 +0200) * Upstream changes: - version 0.16.0 -- Software Heritage autobuilder (on jenkins-debian1) Tue, 22 Jun 2021 15:39:45 +0000 swh-scheduler (0.15.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.15.0 - (tagged by Antoine R. Dumont (@ardumont) on 2021-06-10 16:09:06 +0200) * Upstream changes: - v0.15.0 - separate-runner runner: Separate scheduling tasks with and without priority concern - Refactor and extract a get_available_slots utility - Add typing stubs dependencies for mypy>0.900 - pytest_plugin: Explicitly set hostname in broker_url for celery TestApp -- Software Heritage autobuilder (on jenkins-debian1) Thu, 10 Jun 2021 14:48:52 +0000 swh-scheduler (0.14.2-1~swh1) unstable-swh; urgency=medium * New upstream release 0.14.2 - (tagged by Valentin Lorentz on 2021-05-06 17:09:00 +0200) * Upstream changes: - v0.14.2 - * Fix flaky tests -- Software Heritage autobuilder (on jenkins-debian1) Thu, 06 May 2021 15:13:11 +0000 swh-scheduler (0.14.1-1~swh1) unstable-swh; urgency=medium * New upstream release 0.14.1 - (tagged by Antoine R. Dumont (@ardumont) on 2021-05-06 16:00:07 +0200) * Upstream changes: - v0.14.1 - Use swh.core 0.14 -- Software Heritage autobuilder (on jenkins-debian1) Thu, 06 May 2021 14:17:39 +0000 swh-scheduler (0.13.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.13.0 - (tagged by Antoine R. Dumont (@ardumont) on 2021-04-20 11:46:51 +0200) * Upstream changes: - v0.13.0 - scheduler: Clean up priority/ratio task dead code - Parse task_ids before calling set_status_tasks. - tests: Complete checks on message with priority consumption -- Software Heritage autobuilder (on jenkins-debian1) Tue, 20 Apr 2021 09:51:00 +0000 swh-scheduler (0.12.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.12.0 - (tagged by Antoine R. Dumont (@ardumont) on 2021-04-15 13:31:30 +0200) * Upstream changes: - v0.12.0 - Route priority tasks to dedicated save code now queues - Fix various Sphinx warnings -- Software Heritage autobuilder (on jenkins-debian1) Thu, 15 Apr 2021 11:36:13 +0000 swh-scheduler (0.11.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.11.0 - (tagged by Antoine R. Dumont (@ardumont) on 2021-04-14 18:15:53 +0200) * Upstream changes: - v0.11.0 - separate-queues backend: Open endpoints to peek/grab tasks with any priority - Make origin_visit_stats_get return results from all pages - journal client: Filter out status messages without type - Simplify max_date() - journal_client: Fix date computations for (un)eventful visits - journal_client: Deal with failed status message -- Software Heritage autobuilder (on jenkins-debian1) Wed, 14 Apr 2021 16:19:31 +0000 swh-scheduler (0.10.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.10.0 - (tagged by Nicolas Dandrimont on 2021-02-03 22:53:20 +0100) * Upstream changes: - Release swh.scheduler 0.10.0 - Eagerly acknowledge celery tasks - Loads of simulator improvements - grab_next_visits: - clean up query building - account for schedule time to avoid rescheduling visits too fast - allow overriding the scheduling timestamp for the simulator -- Software Heritage autobuilder (on jenkins-debian1) Wed, 03 Feb 2021 22:10:13 +0000 swh-scheduler (0.9.2-1~swh1) unstable-swh; urgency=medium * New upstream release 0.9.2 - (tagged by Antoine Lambert on 2021-01-25 16:27:41 +0100) * Upstream changes: - version 0.9.2 -- Software Heritage autobuilder (on jenkins-debian1) Mon, 25 Jan 2021 15:31:21 +0000 swh-scheduler (0.9.1-1~swh1) unstable-swh; urgency=medium * New upstream release 0.9.1 - (tagged by Vincent SELLIER on 2021-01-21 19:20:33 +0100) * Upstream changes: - v0.9.1 - * Solve uneventful/eventful with unordered messages with snapshots - * Do not consider duplicated messages as uneventful event - * Reorganize grab_next_visits tests to better check sorting behavior -- Software Heritage autobuilder (on jenkins-debian1) Thu, 21 Jan 2021 18:28:00 +0000 swh-scheduler (0.9.0-1~swh2) unstable-swh; urgency=medium * Bump new release to unstuck packaging -- Antoine R. Dumont (@ardumont) Thu, 21 Jan 2021 13:20:14 +0000 swh-scheduler (0.9.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.9.0 - (tagged by Antoine R. Dumont (@ardumont) on 2021-01-21 11:54:47 +0100) * Upstream changes: - v0.9.0 - Populate origin_visit_stats table out of the origin_visit_status topic - Introduce a scheduler policy simulator (old task-based scheduler, ...) - Implement basic aggregated metrics on listed origins - scheduler.cli.journal: Add `swh scheduler journal-client` cli - Filter origins by visit type when scheduling the next visits - Introduce a `swh scheduler origin schedule-next` cli - Introduce a `swh scheduler origin grab-next` cli - Add an new origin visit stats model object and related backend api - Implement a basic endpoint for getting the next origins to visit - doc: Add a cli section to the doc -- Software Heritage autobuilder (on jenkins-debian1) Thu, 21 Jan 2021 11:00:29 +0000 swh-scheduler (0.8.2-1~swh2) unstable-swh; urgency=medium * Bump dependency -- Antoine R. Dumont (@ardumont) Tue, 08 Dec 2020 09:29:26 +0000 swh-scheduler (0.8.2-1~swh1) unstable-swh; urgency=medium * New upstream release 0.8.2 - (tagged by Antoine R. Dumont (@ardumont) on 2020-12-07 09:52:28 +0100) * Upstream changes: - v0.8.2 - requirement: Adapt celery requirements - Replace usage of arrow datetime objects in favor of pure datetime ones - Stop using the deprecated configuration scheme - cli.task_type: All task_type clis without a scheduler should raise -- Software Heritage autobuilder (on jenkins-debian1) Mon, 07 Dec 2020 08:55:39 +0000 swh-scheduler (0.8.1-1~swh1) unstable-swh; urgency=medium * New upstream release 0.8.1 - (tagged by Antoine R. Dumont (@ardumont) on 2020-11-24 14:13:36 +0100) * Upstream changes: - v0.8.1 - conftest: Reference swh.core.db.pytest_plugin -- Software Heritage autobuilder (on jenkins-debian1) Tue, 24 Nov 2020 13:16:08 +0000 swh-scheduler (0.8.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.8.0 - (tagged by Antoine R. Dumont (@ardumont) on 2020-11-23 13:42:05 +0100) * Upstream changes: - v0.8.0 - requirements-test.txt: Drop no longer needed pytest-postgresql requirement - scheduler.pytest_plugin: Make scheduler tests faster -- Software Heritage autobuilder (on jenkins-debian1) Mon, 23 Nov 2020 12:44:40 +0000 swh-scheduler (0.7.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.7.0 - (tagged by Antoine R. Dumont (@ardumont) on 2020-10-19 09:30:36 +0200) * Upstream changes: - v0.7.0 - scheduler: Type and unify get_scheduler factory with other factories - pytest_plugin: Explicitly name the scheduler test db differently - test_server: Simplify exception manipulations - tox.ini: pin black to the pre-commit version (19.10b0) to avoid flip-flops -- Software Heritage autobuilder (on jenkins-debian1) Mon, 19 Oct 2020 07:33:54 +0000 swh-scheduler (0.6.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.6.0 - (tagged by David Douard on 2020-09-25 12:03:33 +0200) * Upstream changes: - v0.6.0 -- Software Heritage autobuilder (on jenkins-debian1) Fri, 25 Sep 2020 10:06:32 +0000 swh-scheduler (0.5.3-1~swh1) unstable-swh; urgency=medium * New upstream release 0.5.3 - (tagged by Nicolas Dandrimont on 2020-09-24 17:49:27 +0200) * Upstream changes: - Release swh.scheduler v0.5.3 - Improve swh cli startup time - Add isort and update flake8 - Improve pytest execution time - Support recent kombu versions -- Software Heritage autobuilder (on jenkins-debian1) Thu, 24 Sep 2020 15:53:25 +0000 swh-scheduler (0.5.2-1~swh1) unstable-swh; urgency=medium * New upstream release 0.5.2 - (tagged by Antoine R. Dumont (@ardumont) on 2020-07-10 13:01:48 +0200) * Upstream changes: - v0.5.2 - Do no expose pytest-plugin through setuptools, let modules require it when needed -- Software Heritage autobuilder (on jenkins-debian1) Fri, 10 Jul 2020 11:08:30 +0000 swh-scheduler (0.5.1-1~swh1) unstable-swh; urgency=medium * New upstream release 0.5.1 - (tagged by Nicolas Dandrimont on 2020-07-09 10:18:03 +0200) * Upstream changes: - Release swh.scheduler 0.5.1 - Drop dependency on future (not needed anymore) -- Software Heritage autobuilder (on jenkins-debian1) Thu, 09 Jul 2020 09:51:38 +0000 swh-scheduler (0.5.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.5.0 - (tagged by Nicolas Dandrimont on 2020-07-09 10:16:57 +0200) * Upstream changes: - Release swh.scheduler v0.5.0 - Move celery fixtures to the pytest plugin -- Software Heritage autobuilder (on jenkins-debian1) Thu, 09 Jul 2020 08:20:42 +0000 swh-scheduler (0.4.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.4.0 - (tagged by Nicolas Dandrimont on 2020-07-06 16:47:28 +0200) * Upstream changes: - Release swh.scheduler 0.4.0 - Extract pytest fixtures to a pytest plugin -- Software Heritage autobuilder (on jenkins-debian1) Mon, 06 Jul 2020 14:52:42 +0000 swh-scheduler (0.3.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.3.0 - (tagged by Nicolas Dandrimont on 2020-07-06 12:18:28 +0200) * Upstream changes: - Release swh.scheduler 0.3.0 - Add get_listed_origins endpoint -- Software Heritage autobuilder (on jenkins-debian1) Mon, 06 Jul 2020 10:23:31 +0000 swh-scheduler (0.2.2-1~swh1) unstable-swh; urgency=medium * New upstream release 0.2.2 - (tagged by Nicolas Dandrimont on 2020-06-22 14:03:34 +0200) * Upstream changes: - Release swh.scheduler 0.2.2 - Re- introduce root endpoint for the RPC server -- Software Heritage autobuilder (on jenkins-debian1) Mon, 22 Jun 2020 12:07:05 +0000 swh-scheduler (0.2.1-1~swh1) unstable-swh; urgency=medium [ Nicolas Dandrimont ] * Force celery >= 4.3 [ Software Heritage autobuilder (on jenkins-debian1) ] * New upstream release 0.2.1 - (tagged by Nicolas Dandrimont on 2020-06-22 12:09:32 +0200) * Upstream changes: - Release swh.scheduler 0.2.1 - Bump celery requirement to 4.3+ -- Software Heritage autobuilder (on jenkins-debian1) Mon, 22 Jun 2020 10:12:50 +0000 swh-scheduler (0.2.0-1~swh1) unstable-swh; urgency=medium [ Nicolas Dandrimont ] * Switch from vcversioner to setuptools-scm * wrap-and-sort [ Software Heritage autobuilder (on jenkins-debian1) ] * New upstream release 0.2.0 - (tagged by Nicolas Dandrimont on 2020-06-22 10:33:11 +0200) * Upstream changes: - Release swh.scheduler 0.2.0 - Implement storage of lister and listed origin information - Add swh scheduler celery-monitor command - Overhaul RPC to use automatic generation -- Software Heritage autobuilder (on jenkins-debian1) Mon, 22 Jun 2020 08:36:49 +0000 swh-scheduler (0.1.1-1~swh1) unstable-swh; urgency=medium * New upstream release 0.1.1 - (tagged by Nicolas Dandrimont on 2020-06-03 11:34:19 +0200) * Upstream changes: - Release swh.scheduler v0.1.1 - Add missing dependency on future for celery 4.4.4 -- Software Heritage autobuilder (on jenkins-debian1) Wed, 03 Jun 2020 09:39:25 +0000 swh-scheduler (0.1.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.1.0 - (tagged by Nicolas Dandrimont on 2020-05-19 11:48:34 +0200) * Upstream changes: - Release swh.scheduler v0.1.0 - Blacken source code - Disable azure http logspam - Only schedule tasks when the buffer is somewhat empty -- Software Heritage autobuilder (on jenkins-debian1) Tue, 19 May 2020 09:52:31 +0000 swh-scheduler (0.0.72-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.72 - (tagged by Nicolas Dandrimont on 2020-03-23 13:07:38 +0100) * Upstream changes: - Release swh.scheduler v0.0.72 - Update instantiation of storage in tests - ensure that create_task_type is idempotent - introduce new listener based on pika -- Software Heritage autobuilder (on jenkins-debian1) Mon, 23 Mar 2020 12:12:00 +0000 swh-scheduler (0.0.71-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.71 - (tagged by Antoine R. Dumont (@ardumont) on 2020-01-23 14:24:56 +0100) * Upstream changes: - v0.0.71 - sentry: Fix initialization init_sentry call -- Software Heritage autobuilder (on jenkins-debian1) Thu, 23 Jan 2020 13:29:33 +0000 swh-scheduler (0.0.70-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.70 - (tagged by Antoine R. Dumont (@ardumont) on 2020-01-23 13:43:35 +0100) * Upstream changes: - v0.0.70 - Use swh.core.sentry instead of calling sentry_sdk.init directly - backend_es: Fix configuration mapping -- Software Heritage autobuilder (on jenkins-debian1) Thu, 23 Jan 2020 12:47:43 +0000 swh-scheduler (0.0.69-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.69 - (tagged by Antoine R. Dumont (@ardumont) on 2019-12-17 16:00:24 +0100) * Upstream changes: - v0.0.69 - Fix scheduler's archive task cli - Make the filter task endpoint a paginated endpoint - Add coverage on the archive task cli -- Software Heritage autobuilder (on jenkins-debian1) Tue, 17 Dec 2019 15:04:48 +0000 swh-scheduler (0.0.68-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.68 - (tagged by Antoine R. Dumont (@ardumont) on 2019-12-17 15:28:13 +0100) * Upstream changes: - v0.0.68 - Fix scheduler's archive task cli - Make the filter task endpoint a paginated endpoint - Add coverage on the archive task cli -- Software Heritage autobuilder (on jenkins-debian1) Tue, 17 Dec 2019 14:33:33 +0000 swh-scheduler (0.0.67-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.67 - (tagged by Antoine R. Dumont (@ardumont) on 2019-12-17 14:33:36 +0100) * Upstream changes: - v0.0.67 - Fix scheduler's archive task cli - Make the filter task endpoint a paginated endpoint - Add coverage on the archive task cli -- Software Heritage autobuilder (on jenkins-debian1) Tue, 17 Dec 2019 13:38:03 +0000 swh-scheduler (0.0.66-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.66 - (tagged by Nicolas Dandrimont on 2019-12-17 12:04:20 +0100) * Upstream changes: - Release swh.scheduler v0.0.66 - initialize sentry on celery worker startup - improve task archival endpoints in backend api -- Software Heritage autobuilder (on jenkins-debian1) Tue, 17 Dec 2019 11:08:25 +0000 swh-scheduler (0.0.65-1~swh2) unstable-swh; urgency=medium * Add pytest-mock build-dependency. -- Nicolas Dandrimont Fri, 13 Dec 2019 11:57:41 +0100 swh-scheduler (0.0.65-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.65 - (tagged by Nicolas Dandrimont on 2019-12-13 11:45:55 +0100) * Upstream changes: - Release swh.scheduler v0.0.65 - Drop the scheduler updater - Add a statsd probe for task execution timestamps - Add listener and runner statsd probes - CLI updates - Python packaging housekeeping -- Software Heritage autobuilder (on jenkins-debian1) Fri, 13 Dec 2019 10:54:31 +0000 swh-scheduler (0.0.64-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.64 - (tagged by Antoine R. Dumont (@ardumont) on 2019-11-20 14:26:00 +0100) * Upstream changes: - v0.0.64 - req-swh*: Remove old package loader backend names -- Software Heritage autobuilder (on jenkins-debian1) Wed, 20 Nov 2019 13:29:37 +0000 swh-scheduler (0.0.63-1~swh2) unstable-swh; urgency=medium * Update build dependency -- Antoine R. Dumont (@ardumont) Tue, 19 Nov 2019 17:07:40 +0100 swh-scheduler (0.0.63-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.63 - (tagged by Antoine R. Dumont (@ardumont) on 2019-11-19 14:09:12 +0100) * Upstream changes: - v0.0.63 - swh.scheduler.cli: Add `swh scheduler task-type register` cli - Use the shared_task decorator instead of binding to a specific celery app - celery/tests: mostly revert e770eb30 to fix celery app initialization in tests -- Software Heritage autobuilder (on jenkins-debian1) Tue, 19 Nov 2019 13:14:59 +0000 swh-scheduler (0.0.62-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.62 - (tagged by Antoine R. Dumont (@ardumont) on 2019-10-18 13:39:27 +0200) * Upstream changes: - v0.0.62 - celery_backend.config: Make JournalHandler import optional - tests: rewrite tests using pytest fixtures -- Software Heritage autobuilder (on jenkins-debian1) Fri, 18 Oct 2019 11:46:26 +0000 swh-scheduler (0.0.61-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.61 - (tagged by Nicolas Dandrimont on 2019-10-07 16:33:17 +0200) * Upstream changes: - Release swh.scheduler v0.0.61 - Remove bogus dict.get(default=) statement -- Software Heritage autobuilder (on jenkins-debian1) Mon, 07 Oct 2019 14:37:37 +0000 swh-scheduler (0.0.60-1~swh2) unstable-swh; urgency=medium * Force postgresql executable to a pg_ctl that exists when running tests. -- Nicolas Dandrimont Tue, 01 Oct 2019 18:14:39 +0200 swh-scheduler (0.0.60-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.60 - (tagged by Stefano Zacchiroli on 2019-10-01 13:13:13 +0200) * Upstream changes: - v0.0.60 - * tox: anticipate mypy run to just after flake8 - * init.py: switch to documented way of extending path - * tox.ini: add mypy section - * typing: minimal changes to make a no-op mypy run pass - * fix typo in docstring and sample file name - * admin CLI: drop obsolete backward compatibility aliases - * click "required" param wants bool, not int -- Software Heritage autobuilder (on jenkins-debian1) Tue, 01 Oct 2019 11:22:43 +0000 swh-scheduler (0.0.59-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.59 - (tagged by David Douard on 2019-09-04 16:08:27 +0200) * Upstream changes: - v0.0.59 -- Software Heritage autobuilder (on jenkins-debian1) Wed, 04 Sep 2019 14:11:48 +0000 swh-scheduler (0.0.58-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.58 - (tagged by Antoine R. Dumont (@ardumont) on 2019-09-03 10:19:34 +0200) * Upstream changes: - v0.0.58 - celery: auto add tasks declared in the swh.workers entry point in task_modules - api/client: use RPCClient instead of deprecated SWHRemoteAPI - Make schedule_origins use origin urls instead of ids in task arguments. - docs: add code of conduct document - docs: very beginning of a practical documentation on the scheduler - config: Add a pre-commit config file - data: Insert new cgit instance lister task - data: Insert load-tar task-type -- Software Heritage autobuilder (on jenkins-debian1) Tue, 03 Sep 2019 08:28:19 +0000 swh-scheduler (0.0.57-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.57 - (tagged by David Douard on 2019-06-26 14:56:32 +0200) * Upstream changes: - v0.0.57 -- Software Heritage autobuilder (on jenkins-debian1) Wed, 26 Jun 2019 13:05:20 +0000 swh-scheduler (0.0.56-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.56 - (tagged by Nicolas Dandrimont on 2019-05-07 18:16:20 +0200) * Upstream changes: - listener: Release the db object after using it - This is the contract that get_db/put_db is supposed to conform to. -- Software Heritage autobuilder (on jenkins-debian1) Tue, 14 May 2019 12:40:09 +0000 swh-scheduler (0.0.55-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.55 - (tagged by Antoine Lambert on 2019-05-06 11:47:43 +0200) * Upstream changes: - version 0.0.55 -- Software Heritage autobuilder (on jenkins-debian1) Mon, 06 May 2019 09:54:51 +0000 swh-scheduler (0.0.54-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.54 - (tagged by Antoine R. Dumont (@ardumont) on 2019-04-11 11:33:40 +0200) * Upstream changes: - v0.0.54 - cli_utils: Use yaml.safe_load instead of yaml.load - Fix support of latest versions of swh- core and psycopg2 - sql/data: Add npm related task types -- Software Heritage autobuilder (on jenkins-debian1) Thu, 11 Apr 2019 09:40:14 +0000 swh-scheduler (0.0.53-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.53 - (tagged by Antoine Lambert on 2019-04-04 16:45:56 +0200) * Upstream changes: - version 0.0.53 -- Software Heritage autobuilder (on jenkins-debian1) Thu, 04 Apr 2019 14:55:20 +0000 swh-scheduler (0.0.52-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.52 - (tagged by Nicolas Dandrimont on 2019-04-03 10:54:06 +0200) * Upstream changes: - Release swh.scheduler v0.0.52 - Move to result_serializer = json to work around celery 4.3 bug - Fix db initialization -- Software Heritage autobuilder (on jenkins-debian1) Wed, 03 Apr 2019 08:59:00 +0000 swh-scheduler (0.0.51-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.51 - (tagged by Antoine R. Dumont (@ardumont) on 2019-03-22 12:09:22 +0100) * Upstream changes: - v0.0.51 - requirements.txt: Remove kombu dependency -- Software Heritage autobuilder (on jenkins-debian1) Fri, 22 Mar 2019 11:16:06 +0000 swh-scheduler (0.0.50-1~swh2) unstable-swh; urgency=medium * Update build- and runtime dependencies -- Nicolas Dandrimont Fri, 15 Mar 2019 18:24:11 +0100 swh-scheduler (0.0.50-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.50 - (tagged by Nicolas Dandrimont on 2019-03-15 18:07:24 +0100) * Upstream changes: - Release swh.scheduler v0.0.50 - Add an explicit log target for stdout and/or journald - Avoid useless log lines - Improve test coverage - Add support for non- string options in the CLI -- Software Heritage autobuilder (on jenkins-debian1) Fri, 15 Mar 2019 17:16:03 +0000 swh-scheduler (0.0.49-1~swh2) unstable-swh; urgency=medium * Export LC_ALL=C.UTF-8 -- Nicolas Dandrimont Thu, 14 Mar 2019 13:42:24 +0100 swh-scheduler (0.0.49-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.49 - (tagged by Nicolas Dandrimont on 2019-03-03 08:48:04 +0100) * Upstream changes: - Release swh.scheduler v0.0.49 - various fixes around celery behavior - move wsgi endpoint to a separate module - add tests for the CLI -- Software Heritage autobuilder (on jenkins-debian1) Sun, 03 Mar 2019 07:55:41 +0000 swh-scheduler (0.0.48-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.48 - (tagged by Antoine R. Dumont (@ardumont) on 2019-02-22 16:11:51 +0100) * Upstream changes: - v0.0.48 - Fix comment on main scheduler schema -- Software Heritage autobuilder (on jenkins-debian1) Fri, 22 Feb 2019 15:17:20 +0000 swh-scheduler (0.0.47-1~swh2) unstable-swh; urgency=low * Upstream release to fix build dependencies issue -- Antoine Romain Dumont (@ardumont) Thu, 21 Feb 2019 15:41:24 +0100 swh-scheduler (0.0.47-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.47 - (tagged by Valentin Lorentz on 2019-02-20 16:53:20 +0100) * Upstream changes: - Fix crash of SchedulerBackend.search_tasks when no argument is given. -- Software Heritage autobuilder (on jenkins-debian1) Thu, 21 Feb 2019 09:13:07 +0000 swh-scheduler (0.0.46-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.46 - (tagged by Antoine R. Dumont (@ardumont) on 2019-02-15 15:05:47 +0100) * Upstream changes: - v0.0.46 - scheduler.task: Remove no longer used Task class -- Software Heritage autobuilder (on jenkins-debian1) Fri, 15 Feb 2019 14:15:26 +0000 swh-scheduler (0.0.45-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.45 - (tagged by Antoine R. Dumont (@ardumont) on 2019-02-15 10:43:07 +0100) * Upstream changes: - v0.0.45 - celery_backend/config: Fix loglevel for amqp module -- Software Heritage autobuilder (on jenkins-debian1) Fri, 15 Feb 2019 09:48:25 +0000 swh-scheduler (0.0.44-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.44 - (tagged by Antoine R. Dumont (@ardumont) on 2019-02-13 16:29:05 +0100) * Upstream changes: - v0.0.44 - swh-scheduler-api: Fix configuration read too many times -- Software Heritage autobuilder (on jenkins-debian1) Wed, 13 Feb 2019 15:34:34 +0000 swh-scheduler (0.0.43-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.43 - (tagged by David Douard on 2019-02-13 15:27:27 +0100) * Upstream changes: - v0.0.43 -- Software Heritage autobuilder (on jenkins-debian1) Wed, 13 Feb 2019 14:46:59 +0000 swh-scheduler (0.0.42-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.42 - (tagged by Antoine R. Dumont (@ardumont) on 2019-02-11 14:28:10 +0100) * Upstream changes: - v0.0.42 - Fix dependency requirements for hypothesis -- Software Heritage autobuilder (on jenkins-debian1) Mon, 11 Feb 2019 13:33:48 +0000 swh-scheduler (0.0.41-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.41 - (tagged by David Douard on 2019-02-06 15:25:56 +0100) * Upstream changes: - v0.0.41 -- Software Heritage autobuilder (on jenkins-debian1) Wed, 06 Feb 2019 15:33:04 +0000 swh-scheduler (0.0.40-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.40 - (tagged by Antoine R. Dumont (@ardumont) on 2019-01-28 16:24:04 +0100) * Upstream changes: - v0.0.40 - swh.scheduler.tests: Mark db tests as such - Force tox environment to C.UTF-8 locale - Add debug logging in the SWHTask class -- Software Heritage autobuilder (on jenkins-debian1) Mon, 28 Jan 2019 15:30:41 +0000 swh-scheduler (0.0.39-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.39 - (tagged by David Douard on 2019-01-16 13:37:58 +0100) * Upstream changes: - v0.0.39 -- Software Heritage autobuilder (on jenkins-debian1) Wed, 16 Jan 2019 12:42:37 +0000 swh-scheduler (0.0.38-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.38 - (tagged by David Douard on 2018-12-20 14:39:59 +0100) * Upstream changes: - v0.0.38 -- Software Heritage autobuilder (on jenkins-debian1) Wed, 09 Jan 2019 18:32:14 +0000 swh-scheduler (0.0.35-1~swh1) unstable-swh; urgency=medium * v0.0.35 * tests: Add SchedulerTestFixture * swh.scheduler.utils: Allow to add more task information * sql/40-swh-data: Update new indexer task types for local db -- Antoine R. Dumont (@ardumont) Mon, 29 Oct 2018 10:07:08 +0100 swh-scheduler (0.0.34-1~swh1) unstable-swh; urgency=medium * v0.0.34 * Finalize pytest migration -- Antoine R. Dumont (@ardumont) Thu, 25 Oct 2018 17:52:03 +0200 swh-scheduler (0.0.33-1~swh1) unstable-swh; urgency=medium * v0.0.33 -- David Douard Thu, 25 Oct 2018 16:03:16 +0200 swh-scheduler (0.0.32-1~swh1) unstable-swh; urgency=medium * v0.0.32 * tests: Add celery fixture to ease tests * tests: make tests use sql/ files from the package * tests: Starting migration towards pytest * listener: Make the listener code compatible with new celery (debian buster) * Make swh_scheduler_create_tasks_from_temp use indexes * setup: prepare for pypi upload * docs: add a simple README file -- Antoine R. Dumont (@ardumont) Mon, 22 Oct 2018 15:37:51 +0200 swh-scheduler (0.0.31-1~swh1) unstable-swh; urgency=medium * v0.0.31 * sql/swh-scheduler: Make the create_tasks call idempotent * swh.scheduler.utils: Open create_task_dict function * sql/scheduler-data: Add lister gitlab task types * sql/scheduler-data: Reference the existing production lister data * swh.scheduler.backend_es: Open sniffing options -- Antoine R. Dumont (@ardumont) Tue, 31 Jul 2018 06:55:39 +0200 swh-scheduler (0.0.30-1~swh1) unstable-swh; urgency=medium * v0.0.30 * swh-scheduler-schema.sql: Archive disabled oneshot tasks as well * swh.scheduler.cli: Add policy to pretty printing task routine * swh.scheduler.cli: Fix broken cli list-pending since api change -- Antoine R. Dumont (@ardumont) Fri, 22 Jun 2018 18:07:02 +0200 swh-scheduler (0.0.29-1~swh1) unstable-swh; urgency=medium * v0.0.29 * swh.scheduler.cli: Change archival period to rolling month - 1 week * swh.scheduler.updater.writer: Force filter resolution to list * swh.scheduler.cli: Change default archival period to current month * swh.scheduler.cli: Improve logging message * swh.scheduler.updater.backend: Adapt configuration path accordingly -- Antoine R. Dumont (@ardumont) Thu, 31 May 2018 11:42:51 +0200 swh-scheduler (0.0.28-1~swh1) unstable-swh; urgency=medium * v0.0.28 * Fix wrong runtime dependencies -- Antoine R. Dumont (@ardumont) Tue, 29 May 2018 14:12:15 +0200 swh-scheduler (0.0.27-1~swh1) unstable-swh; urgency=medium * v0.0.27 * scheduler: Deal with priority in tasks * scheduler-update: new package python3-swh.scheduler.updater * Contains tools in charge of consuming events from arbitrary sources * and update the scheduler db -- Antoine R. Dumont (@ardumont) Tue, 29 May 2018 12:27:34 +0200 swh-scheduler (0.0.26-1~swh1) unstable-swh; urgency=medium * v0.0.26 * swh.scheduler: Fix package build * swh.scheduler.tests: Test remote scheduler api as well * swh.scheduler: Add tests around removing archivable tasks * swh.scheduler: Add tests around filtering archivable tasks * swh-scheduler-schema: Fix unneeded drop instructions * swh.scheduler.cli: Improve docstring * swh.scheduler.cli: Permit to specify the backend to use in cli * swh.scheduler.api: Bootstrap scheduler's remote api * swh.scheduler: Use `get_scheduler` api to instantiate a scheduler * swh.scheduler.backend: Fix docstring -- Antoine R. Dumont (@ardumont) Thu, 26 Apr 2018 17:34:07 +0200 swh-scheduler (0.0.25-1~swh1) unstable-swh; urgency=medium * v0.0.25 * swh.scheduler.cli.archive: Index arguments.kwargs as text -- Antoine R. Dumont (@ardumont) Wed, 18 Apr 2018 12:34:43 +0200 swh-scheduler (0.0.24-1~swh1) unstable-swh; urgency=medium * v0.0.24 * data/template: Do not index the arguments field (it's in _source) * data/README: Add a small readme to explain es install step * swh.scheduler.cli: Add a bulk index flag to separate read from index -- Antoine R. Dumont (@ardumont) Fri, 13 Apr 2018 14:55:32 +0200 swh-scheduler (0.0.23-1~swh1) unstable-swh; urgency=medium * swh.scheduler.cli.archive: Delete only completely indexed tasks * Prior to this commit, it could happen that we removed tasks even * though we did not yet index associated task_run. * Related T986 -- Antoine R. Dumont (@ardumont) Tue, 10 Apr 2018 17:43:07 +0200 swh-scheduler (0.0.22-1~swh1) unstable-swh; urgency=medium * v0.0.22 * Update to a more recent python3-elasticsearch client -- Antoine R. Dumont (@ardumont) Mon, 09 Apr 2018 16:09:16 +0200 swh-scheduler (0.0.21-1~swh1) unstable-swh; urgency=medium * v0.0.21 * Adapt default configuration * Fix typo in configuration variable name -- Antoine R. Dumont (@ardumont) Fri, 30 Mar 2018 15:02:55 +0200 swh-scheduler (0.0.20-1~swh1) unstable-swh; urgency=medium * v0.0.20 * swh.scheduler.cli.archive: Open completed oneshot or disabled * recurring tasks archival endpoint * swh.core.serializer: Move to msgpack serialization format * swh.scheduler.cli: Unify pretty print output * sql/data: Add new task type for loading mercurial dump * swh.scheduler.cli: Add sample use case for the scheduling cli * swh.scheduler.cli: Open policy column to the scheduling cli * swh.scheduler.cli: Open the delimiter option as cli argument * Fix issue when updating task-type without any retry delay defined * swh-scheduler/data: Add new oneshot scheduling load-mercurial task * backend: fix default scheduling_db value for consistency * backend: doc: fix return value of create_tasks -- Antoine R. Dumont (@ardumont) Fri, 30 Mar 2018 11:44:18 +0200 swh-scheduler (0.0.19-1~swh1) unstable-swh; urgency=medium * v0.0.19 * swh.scheduler.utils: Open utility function to create oneshot task -- Antoine R. Dumont (@ardumont) Wed, 29 Nov 2017 12:51:15 +0100 swh-scheduler (0.0.18-1~swh1) unstable-swh; urgency=medium * Release swh.scheduler v0.0.18 * Celery 4 compatibility -- Nicolas Dandrimont Wed, 08 Nov 2017 17:06:22 +0100 swh-scheduler (0.0.17-1~swh1) unstable-swh; urgency=medium * Release swh.scheduler version 0.0.17 * Update packaging runes -- Nicolas Dandrimont Thu, 12 Oct 2017 18:49:02 +0200 swh-scheduler (0.0.16-1~swh1) unstable-swh; urgency=medium * Release swh-scheduler v0.0.16 * add some tests * implement one-shot tasks * implement retry on temporary failure -- Nicolas Dandrimont Mon, 07 Aug 2017 18:44:03 +0200 swh-scheduler (0.0.15-1~swh1) unstable-swh; urgency=medium * Release swh-scheduler v0.0.15 * Add some methods to get the length of task queues * worker: Show logs on stdout if loglevel = debug -- Nicolas Dandrimont Mon, 19 Jun 2017 19:44:56 +0200 swh-scheduler (0.0.14-1~swh1) unstable-swh; urgency=medium * Release swh.scheduler 0.0.14 * Make the return value of tasks available in the listener -- Nicolas Dandrimont Mon, 12 Jun 2017 17:50:32 +0200 swh-scheduler (0.0.13-1~swh1) unstable-swh; urgency=medium * Release swh.scheduler v0.0.13 * Use systemd for logging rather than PostgreSQL -- Nicolas Dandrimont Fri, 07 Apr 2017 11:57:50 +0200 swh-scheduler (0.0.12-1~swh1) unstable-swh; urgency=medium * Release swh.scheduler v0.0.12 * Only log to database if the configuration is present -- Nicolas Dandrimont Thu, 09 Mar 2017 11:12:45 +0100 swh-scheduler (0.0.11-1~swh1) unstable-swh; urgency=medium * Release swh.scheduler v0.0.11 * add utils.get_task -- Nicolas Dandrimont Tue, 14 Feb 2017 19:49:34 +0100 swh-scheduler (0.0.10-1~swh1) unstable-swh; urgency=medium * Release swh.scheduler v0.0.10 * Allow disabling tasks -- Nicolas Dandrimont Thu, 20 Oct 2016 17:20:17 +0200 swh-scheduler (0.0.9-1~swh1) unstable-swh; urgency=medium * Release swh.scheduler v0.0.9 * Revert management of one shot tasks * Add possibility of launching several worker instances -- Nicolas Dandrimont Fri, 02 Sep 2016 17:09:18 +0200 swh-scheduler (0.0.7-1~swh1) unstable-swh; urgency=medium * v0.0.7 * Add oneshot task -- Antoine R. Dumont (@ardumont) Fri, 01 Jul 2016 16:42:45 +0200 swh-scheduler (0.0.6-1~swh1) unstable-swh; urgency=medium * Release swh-scheduler v0.0.6 * More reliability and efficiency when scheduling a lot of tasks -- Nicolas Dandrimont Wed, 24 Feb 2016 18:46:57 +0100 swh-scheduler (0.0.5-1~swh1) unstable-swh; urgency=medium * Release swh.scheduler v0.0.5 * Use copy for task mass-scheduling -- Nicolas Dandrimont Wed, 24 Feb 2016 12:13:38 +0100 swh-scheduler (0.0.4-1~swh1) unstable-swh; urgency=medium * Release swh-scheduler v0.0.4 * general cleanup of the backend * use arrow instead of dateutil * add new cli program -- Nicolas Dandrimont Tue, 23 Feb 2016 17:46:04 +0100 swh-scheduler (0.0.3-1~swh1) unstable-swh; urgency=medium * Release swh.scheduler version 0.0.3 * Implement the timestamp arguments to the task_run functions * Make the celery event listener use a reliable queue -- Nicolas Dandrimont Mon, 22 Feb 2016 15:14:28 +0100 swh-scheduler (0.0.2-1~swh1) unstable-swh; urgency=medium * Release swh.scheduler v0.0.2 * Multiple schema changes * Initial releases for the celery job runner and the event listener -- Nicolas Dandrimont Fri, 19 Feb 2016 18:50:47 +0100 swh-scheduler (0.0.1-1~swh1) unstable-swh; urgency=medium * Initial release * Release swh.scheduler v0.0.1 * Move swh.core.scheduling and swh.core.worker to swh.scheduler -- Nicolas Dandrimont Mon, 15 Feb 2016 11:07:30 +0100 diff --git a/swh.scheduler.egg-info/PKG-INFO b/swh.scheduler.egg-info/PKG-INFO index 2bbc0ed..9a92675 100644 --- a/swh.scheduler.egg-info/PKG-INFO +++ b/swh.scheduler.egg-info/PKG-INFO @@ -1,33 +1,33 @@ Metadata-Version: 2.1 Name: swh.scheduler -Version: 1.2.0 +Version: 1.2.1 Summary: Software Heritage Scheduler Home-page: https://forge.softwareheritage.org/diffusion/DSCH/ Author: Software Heritage developers Author-email: swh-devel@inria.fr Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Project-URL: Funding, https://www.softwareheritage.org/donate Project-URL: Source, https://forge.softwareheritage.org/source/swh-scheduler Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-scheduler/ Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 5 - Production/Stable Requires-Python: >=3.7 Description-Content-Type: text/markdown Provides-Extra: testing Provides-Extra: journal Provides-Extra: simulator License-File: LICENSE License-File: LICENSE.Celery License-File: AUTHORS swh-scheduler ============= Job scheduler for the Software Heritage project. Task manager for asynchronous/delayed tasks, used for both recurrent (e.g., listing a forge, loading new stuff from a Git repository) and one-off activities (e.g., loading a specific version of a source package). diff --git a/swh/scheduler/__init__.py b/swh/scheduler/__init__.py index b1f98ba..cfb15fa 100644 --- a/swh/scheduler/__init__.py +++ b/swh/scheduler/__init__.py @@ -1,76 +1,76 @@ # Copyright (C) 2018-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from __future__ import annotations from importlib import import_module from typing import TYPE_CHECKING, Any, Dict import warnings DEFAULT_CONFIG = { "scheduler": ( "dict", { - "cls": "local", + "cls": "postgresql", "db": "dbname=softwareheritage-scheduler-dev", }, ) } # current configuration. To be set by the config loading mechanism CONFIG = {} # type: Dict[str, Any] if TYPE_CHECKING: from swh.scheduler.interface import SchedulerInterface BACKEND_TYPES: Dict[str, str] = { "postgresql": ".backend.SchedulerBackend", "remote": ".api.client.RemoteScheduler", # deprecated "local": ".backend.SchedulerBackend", } def get_scheduler(cls: str, **kwargs) -> SchedulerInterface: """ Get a scheduler object of class `cls` with arguments `**kwargs`. Args: cls: scheduler's class, either 'local' or 'remote' kwargs: arguments to pass to the class' constructor Returns: an instance of swh.scheduler, either local or remote: local: swh.scheduler.backend.SchedulerBackend remote: swh.scheduler.api.client.RemoteScheduler Raises: ValueError if passed an unknown storage class. """ if "args" in kwargs: warnings.warn( 'Explicit "args" key is deprecated, use keys directly instead.', DeprecationWarning, ) kwargs = kwargs["args"] class_path = BACKEND_TYPES.get(cls) if class_path is None: raise ValueError( f"Unknown Scheduler class `{cls}`. " f"Supported: {', '.join(BACKEND_TYPES)}" ) (module_path, class_name) = class_path.rsplit(".", 1) module = import_module(module_path, package=__package__) BackendClass = getattr(module, class_name) return BackendClass(**kwargs) get_datastore = get_scheduler diff --git a/swh/scheduler/api/server.py b/swh/scheduler/api/server.py index 5854095..abc3ab8 100644 --- a/swh/scheduler/api/server.py +++ b/swh/scheduler/api/server.py @@ -1,150 +1,150 @@ # Copyright (C) 2018-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import logging import os from swh.core import config from swh.core.api import JSONFormatter, MsgpackFormatter, RPCServerApp from swh.core.api import encode_data_server as encode_data from swh.core.api import error_handler, negotiate from swh.scheduler import get_scheduler from swh.scheduler.exc import SchedulerException from swh.scheduler.interface import SchedulerInterface from .serializers import DECODERS, ENCODERS scheduler = None def get_global_scheduler(): global scheduler if not scheduler: scheduler = get_scheduler(**app.config["scheduler"]) return scheduler class SchedulerServerApp(RPCServerApp): extra_type_decoders = DECODERS extra_type_encoders = ENCODERS app = SchedulerServerApp( __name__, backend_class=SchedulerInterface, backend_factory=get_global_scheduler ) @app.errorhandler(SchedulerException) def argument_error_handler(exception): return error_handler(exception, encode_data, status_code=400) @app.errorhandler(Exception) def my_error_handler(exception): return error_handler(exception, encode_data) def has_no_empty_params(rule): return len(rule.defaults or ()) >= len(rule.arguments or ()) @app.route("/") def index(): return """ Software Heritage scheduler RPC server

You have reached the Software Heritage scheduler RPC server.
See its documentation and API for more information

""" @app.route("/site-map") @negotiate(MsgpackFormatter) @negotiate(JSONFormatter) def site_map(): links = [] for rule in app.url_map.iter_rules(): if has_no_empty_params(rule) and hasattr(SchedulerInterface, rule.endpoint): links.append( dict( rule=rule.rule, description=getattr(SchedulerInterface, rule.endpoint).__doc__, ) ) # links is now a list of url, endpoint tuples return links def load_and_check_config(config_path, type="local"): """Check the minimal configuration is set to run the api or raise an error explanation. Args: config_path (str): Path to the configuration file to load type (str): configuration type. For 'local' type, more checks are done. Raises: Error if the setup is not as expected Returns: configuration as a dict """ if not config_path: raise EnvironmentError("Configuration file must be defined") if not os.path.exists(config_path): raise FileNotFoundError(f"Configuration file {config_path} does not exist") cfg = config.read(config_path) vcfg = cfg.get("scheduler") if not vcfg: raise KeyError("Missing '%scheduler' configuration") if type == "local": cls = vcfg.get("cls") - if cls != "local": + if cls not in ("local", "postgresql"): raise ValueError( - "The scheduler backend can only be started with a 'local' " + "The scheduler backend can only be started with a 'postgresql' " "configuration" ) db = vcfg.get("db") if not db: raise KeyError("Invalid configuration; missing 'db' config entry") return cfg api_cfg = None def make_app_from_configfile(): """Run the WSGI app from the webserver, loading the configuration from a configuration file. SWH_CONFIG_FILENAME environment variable defines the configuration path to load. """ global api_cfg if not api_cfg: config_path = os.environ.get("SWH_CONFIG_FILENAME") api_cfg = load_and_check_config(config_path) app.config.update(api_cfg) handler = logging.StreamHandler() app.logger.addHandler(handler) return app if __name__ == "__main__": print('Please use the "swh-scheduler api-server" command') diff --git a/swh/scheduler/celery_backend/pika_listener.py b/swh/scheduler/celery_backend/pika_listener.py index a5c48f2..426b271 100644 --- a/swh/scheduler/celery_backend/pika_listener.py +++ b/swh/scheduler/celery_backend/pika_listener.py @@ -1,110 +1,112 @@ # Copyright (C) 2020-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information """This is the scheduler listener. It is in charge of listening to rabbitmq events (the task result) and flushes the "oneshot" tasks' status in the scheduler backend. It's the final step after a task is done. The scheduler runner :mod:`swh.scheduler.celery_backend.runner` is the module in charge of pushing tasks in the queue. """ import json import logging import sys import pika from swh.core.statsd import statsd from swh.scheduler import get_scheduler from swh.scheduler.utils import utcnow logger = logging.getLogger(__name__) def get_listener(broker_url, queue_name, scheduler_backend): connection = pika.BlockingConnection(pika.URLParameters(broker_url)) channel = connection.channel() channel.queue_declare(queue=queue_name, durable=True) exchange = "celeryev" routing_key = "#" channel.queue_bind(queue=queue_name, exchange=exchange, routing_key=routing_key) channel.basic_qos(prefetch_count=1000) channel.basic_consume( queue=queue_name, on_message_callback=get_on_message(scheduler_backend), ) return channel def get_on_message(scheduler_backend): def on_message(channel, method_frame, properties, body): try: events = json.loads(body) except Exception: logger.warning("Could not parse body %r", body) events = [] if not isinstance(events, list): events = [events] for event in events: logger.debug("Received event %r", event) process_event(event, scheduler_backend) channel.basic_ack(delivery_tag=method_frame.delivery_tag) return on_message def process_event(event, scheduler_backend): uuid = event.get("uuid") if not uuid: return event_type = event["type"] statsd.increment( "swh_scheduler_listener_handled_event_total", tags={"event_type": event_type} ) if event_type == "task-started": scheduler_backend.start_task_run( uuid, timestamp=utcnow(), metadata={"worker": event.get("hostname")}, ) elif event_type == "task-result": result = event["result"] status = None if isinstance(result, dict) and "status" in result: status = result["status"] if status == "success": status = "eventful" if result.get("eventful") else "uneventful" if status is None: status = "eventful" if result else "uneventful" scheduler_backend.end_task_run( uuid, timestamp=utcnow(), status=status, result=result ) elif event_type == "task-failed": scheduler_backend.end_task_run(uuid, timestamp=utcnow(), status="failed") if __name__ == "__main__": url = sys.argv[1] logging.basicConfig(level=logging.DEBUG) - scheduler_backend = get_scheduler("local", args={"db": "service=swh-scheduler"}) + scheduler_backend = get_scheduler( + "postgresql", args={"db": "service=swh-scheduler"} + ) channel = get_listener(url, "celeryev.test", scheduler_backend) logger.info("Start consuming") channel.start_consuming() diff --git a/swh/scheduler/celery_backend/runner.py b/swh/scheduler/celery_backend/runner.py index be59acf..525df02 100644 --- a/swh/scheduler/celery_backend/runner.py +++ b/swh/scheduler/celery_backend/runner.py @@ -1,184 +1,184 @@ # Copyright (C) 2015-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information """This is the first scheduler runner. It is in charge of scheduling "oneshot" tasks (e.g save code now, indexer, vault, deposit, ...). To do this, it reads tasks ouf of the scheduler backend and pushes those to their associated rabbitmq queues. The scheduler listener :mod:`swh.scheduler.celery_backend.pika_listener` is the module in charge of finalizing the task results. """ import logging from typing import Dict, List, Tuple from deprecated import deprecated from kombu.utils.uuid import uuid from swh.core.statsd import statsd from swh.scheduler import get_scheduler from swh.scheduler.celery_backend.config import get_available_slots from swh.scheduler.interface import SchedulerInterface from swh.scheduler.utils import utcnow logger = logging.getLogger(__name__) # Max batch size for tasks MAX_NUM_TASKS = 10000 def run_ready_tasks( backend: SchedulerInterface, app, task_types: List[Dict] = [], with_priority: bool = False, ) -> List[Dict]: """Schedule tasks ready to be scheduled. This lookups any tasks per task type and mass schedules those accordingly (send messages to rabbitmq and mark as scheduled equivalent tasks in the scheduler backend). If tasks (per task type) with priority exist, they will get redirected to dedicated high priority queue (standard queue name prefixed with `save_code_now:`). Args: backend: scheduler backend to interact with (read/update tasks) app (App): Celery application to send tasks to task_types: The list of task types dict to iterate over. By default, empty. When empty, the full list of task types referenced in the scheduler will be used. with_priority: If True, only tasks with priority set will be fetched and scheduled. By default, False. Returns: A list of dictionaries:: { 'task': the scheduler's task id, 'backend_id': Celery's task id, 'scheduler': utcnow() } The result can be used to block-wait for the tasks' results:: backend_tasks = run_ready_tasks(self.scheduler, app) for task in backend_tasks: AsyncResult(id=task['backend_id']).get() """ all_backend_tasks: List[Dict] = [] while True: if not task_types: task_types = backend.get_task_types() task_types_d = {} pending_tasks = [] for task_type in task_types: task_type_name = task_type["type"] task_types_d[task_type_name] = task_type max_queue_length = task_type["max_queue_length"] if max_queue_length is None: max_queue_length = 0 backend_name = task_type["backend_name"] if with_priority: # grab max_queue_length (or 10) potential tasks with any priority for # the same type (limit the result to avoid too long running queries) grabbed_priority_tasks = backend.grab_ready_priority_tasks( task_type_name, num_tasks=max_queue_length or 10 ) if grabbed_priority_tasks: pending_tasks.extend(grabbed_priority_tasks) logger.info( "Grabbed %s tasks %s (priority)", len(grabbed_priority_tasks), task_type_name, ) statsd.increment( "swh_scheduler_runner_scheduled_task_total", len(grabbed_priority_tasks), tags={"task_type": task_type_name}, ) else: num_tasks = get_available_slots(app, backend_name, max_queue_length) # only pull tasks if the buffer is at least 1/5th empty (= 80% # full), to help postgresql use properly indexed queries. if num_tasks > min(MAX_NUM_TASKS, max_queue_length) // 5: # Only grab num_tasks tasks with no priority grabbed_tasks = backend.grab_ready_tasks( task_type_name, num_tasks=num_tasks ) if grabbed_tasks: pending_tasks.extend(grabbed_tasks) logger.info( "Grabbed %s tasks %s", len(grabbed_tasks), task_type_name ) statsd.increment( "swh_scheduler_runner_scheduled_task_total", len(grabbed_tasks), tags={"task_type": task_type_name}, ) if not pending_tasks: return all_backend_tasks backend_tasks = [] celery_tasks: List[Tuple[bool, str, str, List, Dict]] = [] for task in pending_tasks: args = task["arguments"]["args"] kwargs = task["arguments"]["kwargs"] backend_name = task_types_d[task["type"]]["backend_name"] backend_id = uuid() celery_tasks.append( ( task.get("priority") is not None, backend_name, backend_id, args, kwargs, ) ) data = { "task": task["id"], "backend_id": backend_id, "scheduled": utcnow(), } backend_tasks.append(data) logger.debug("Sent %s celery tasks", len(backend_tasks)) backend.mass_schedule_task_runs(backend_tasks) for with_priority, backend_name, backend_id, args, kwargs in celery_tasks: kw = dict( task_id=backend_id, args=args, kwargs=kwargs, ) if with_priority: kw["queue"] = f"save_code_now:{backend_name}" app.send_task(backend_name, **kw) all_backend_tasks.extend(backend_tasks) @deprecated(version="0.18", reason="Use `swh scheduler start-runner` instead") def main(): from .config import app as main_app for module in main_app.conf.CELERY_IMPORTS: __import__(module) - main_backend = get_scheduler("local") + main_backend = get_scheduler("postgresql") try: run_ready_tasks(main_backend, main_app) except Exception: main_backend.rollback() raise if __name__ == "__main__": main() diff --git a/swh/scheduler/cli/__init__.py b/swh/scheduler/cli/__init__.py index cda7259..8eeadaa 100644 --- a/swh/scheduler/cli/__init__.py +++ b/swh/scheduler/cli/__init__.py @@ -1,102 +1,102 @@ # Copyright (C) 2016-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information # WARNING: do not import unnecessary things here to keep cli startup time under # control import logging import click from swh.core.cli import CONTEXT_SETTINGS, AliasedGroup from swh.core.cli import swh as swh_cli_group # If you're looking for subcommand imports, they are further down this file to # avoid a circular import! @swh_cli_group.group( name="scheduler", context_settings=CONTEXT_SETTINGS, cls=AliasedGroup ) @click.option( "--config-file", "-C", default=None, type=click.Path( exists=True, dir_okay=False, ), help="Configuration file.", ) @click.option( "--database", "-d", default=None, - help="Scheduling database DSN (imply cls is 'local')", + help="Scheduling database DSN (imply cls is 'postgresql')", ) @click.option( "--url", "-u", default=None, help="Scheduler's url access (imply cls is 'remote')" ) @click.option( "--no-stdout", is_flag=True, default=False, help="Do NOT output logs on the console" ) @click.pass_context def cli(ctx, config_file, database, url, no_stdout): """Software Heritage Scheduler tools. Use a local scheduler instance by default (plugged to the main scheduler db). """ try: from psycopg2 import OperationalError except ImportError: class OperationalError(Exception): pass from swh.core import config from swh.scheduler import DEFAULT_CONFIG, get_scheduler ctx.ensure_object(dict) logger = logging.getLogger(__name__) scheduler = None conf = config.read(config_file, DEFAULT_CONFIG) if "scheduler" not in conf: raise ValueError("missing 'scheduler' configuration") if database: - conf["scheduler"]["cls"] = "local" + conf["scheduler"]["cls"] = "postgresql" conf["scheduler"]["db"] = database elif url: conf["scheduler"]["cls"] = "remote" conf["scheduler"]["url"] = url sched_conf = conf["scheduler"] try: logger.debug("Instantiating scheduler with %s", sched_conf) scheduler = get_scheduler(**sched_conf) except (ValueError, OperationalError): # it's the subcommand to decide whether not having a proper # scheduler instance is a problem. pass ctx.obj["scheduler"] = scheduler ctx.obj["config"] = conf from . import admin, celery_monitor, journal, origin, simulator, task, task_type # noqa def main(): import click.core click.core.DEPRECATED_HELP_NOTICE = """ DEPRECATED! Please use the command 'swh scheduler'.""" cli.deprecated = True return cli(auto_envvar_prefix="SWH_SCHEDULER") if __name__ == "__main__": main() diff --git a/swh/scheduler/pytest_plugin.py b/swh/scheduler/pytest_plugin.py index dfa3165..f569bb4 100644 --- a/swh/scheduler/pytest_plugin.py +++ b/swh/scheduler/pytest_plugin.py @@ -1,111 +1,111 @@ # Copyright (C) 2020-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from datetime import timedelta from functools import partial from celery.contrib.testing import worker from celery.contrib.testing.app import TestApp, setup_default_app import pkg_resources import pytest from pytest_postgresql import factories from swh.core.db.pytest_plugin import initialize_database_for_module from swh.scheduler import get_scheduler from swh.scheduler.backend import SchedulerBackend # celery tasks for testing purpose; tasks themselves should be # in swh/scheduler/tests/tasks.py TASK_NAMES = ["ping", "multiping", "add", "error", "echo"] scheduler_postgresql_proc = factories.postgresql_proc( load=[ partial( initialize_database_for_module, modname="scheduler", version=SchedulerBackend.current_version, ) ], ) postgresql_scheduler = factories.postgresql("scheduler_postgresql_proc") @pytest.fixture def swh_scheduler_config(request, postgresql_scheduler): return { "db": postgresql_scheduler.dsn, } @pytest.fixture def swh_scheduler(swh_scheduler_config): - scheduler = get_scheduler("local", **swh_scheduler_config) + scheduler = get_scheduler("postgresql", **swh_scheduler_config) for taskname in TASK_NAMES: scheduler.create_task_type( { "type": "swh-test-{}".format(taskname), "description": "The {} testing task".format(taskname), "backend_name": "swh.scheduler.tests.tasks.{}".format(taskname), "default_interval": timedelta(days=1), "min_interval": timedelta(hours=6), "max_interval": timedelta(days=12), } ) return scheduler # this alias is used to be able to easily instantiate a db-backed Scheduler # eg. for the RPC client/server test suite. swh_db_scheduler = swh_scheduler @pytest.fixture(scope="session") def swh_scheduler_celery_app(): """Set up a Celery app as swh.scheduler and swh worker tests would expect it""" test_app = TestApp( set_as_current=True, enable_logging=True, task_cls="swh.scheduler.task:SWHTask", config={ "accept_content": ["application/x-msgpack", "application/json"], "broker_url": "memory://guest@localhost//", "task_serializer": "msgpack", "result_serializer": "json", }, ) with setup_default_app(test_app, use_trap=False): from swh.scheduler.celery_backend import config config.app = test_app test_app.set_default() test_app.set_current() yield test_app @pytest.fixture(scope="session") def swh_scheduler_celery_includes(): """List of task modules that should be loaded by the swh_scheduler_celery_worker on startup.""" task_modules = ["swh.scheduler.tests.tasks"] for entrypoint in pkg_resources.iter_entry_points("swh.workers"): task_modules.extend(entrypoint.load()().get("task_modules", [])) return task_modules @pytest.fixture(scope="session") def swh_scheduler_celery_worker( swh_scheduler_celery_app, swh_scheduler_celery_includes, ): """Spawn a worker""" for module in swh_scheduler_celery_includes: swh_scheduler_celery_app.loader.import_task_module(module) with worker.start_worker(swh_scheduler_celery_app, pool="solo") as w: yield w diff --git a/swh/scheduler/tests/test_cli_journal.py b/swh/scheduler/tests/test_cli_journal.py index 3c7e723..023385f 100644 --- a/swh/scheduler/tests/test_cli_journal.py +++ b/swh/scheduler/tests/test_cli_journal.py @@ -1,132 +1,132 @@ # Copyright (C) 2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os from typing import Dict, List from click.testing import CliRunner, Result from confluent_kafka import Producer import pytest import yaml from swh.journal.serializers import value_to_kafka from swh.scheduler import get_scheduler from swh.scheduler.cli import cli from swh.scheduler.tests.test_journal_client import VISIT_STATUSES_1 @pytest.fixture def swh_scheduler_cfg(postgresql_scheduler, kafka_server): """Journal client configuration ready""" return { "scheduler": { - "cls": "local", + "cls": "postgresql", "db": postgresql_scheduler.dsn, }, "journal": { "brokers": [kafka_server], "group_id": "test-consume-visit-status", }, } def _write_configuration_path(config: Dict, tmp_path: str) -> str: config_path = os.path.join(str(tmp_path), "scheduler.yml") with open(config_path, "w") as f: f.write(yaml.dump(config)) return config_path @pytest.fixture def swh_scheduler_cfg_path(swh_scheduler_cfg, tmp_path): """Write scheduler configuration in temporary path and returns such path""" return _write_configuration_path(swh_scheduler_cfg, tmp_path) def invoke(args: List[str], config_path: str, catch_exceptions: bool = False) -> Result: """Invoke swh scheduler journal subcommands""" runner = CliRunner() result = runner.invoke(cli, ["-C" + config_path] + args) if not catch_exceptions and result.exception: print(result.output) raise result.exception return result def test_cli_journal_client_origin_visit_status_misconfiguration_no_scheduler( swh_scheduler_cfg, tmp_path ): config = swh_scheduler_cfg.copy() config["scheduler"] = {"cls": "foo"} config_path = _write_configuration_path(config, tmp_path) with pytest.raises(ValueError, match="must be instantiated"): invoke( [ "journal-client", "--stop-after-objects", "1", ], config_path, ) def test_cli_journal_client_origin_visit_status_misconfiguration_missing_journal_conf( swh_scheduler_cfg, tmp_path ): config = swh_scheduler_cfg.copy() config.pop("journal", None) config_path = _write_configuration_path(config, tmp_path) with pytest.raises(ValueError, match="Missing 'journal'"): invoke( [ "journal-client", "--stop-after-objects", "1", ], config_path, ) def test_cli_journal_client_origin_visit_status( swh_scheduler_cfg, swh_scheduler_cfg_path, ): kafka_server = swh_scheduler_cfg["journal"]["brokers"][0] swh_scheduler = get_scheduler(**swh_scheduler_cfg["scheduler"]) producer = Producer( { "bootstrap.servers": kafka_server, "client.id": "test visit-stats producer", "acks": "all", } ) visit_status = VISIT_STATUSES_1[0] value = value_to_kafka(visit_status) topic = "swh.journal.objects.origin_visit_status" producer.produce(topic=topic, key=b"bogus-origin", value=value) producer.flush() result = invoke( [ "journal-client", "--stop-after-objects", "1", ], swh_scheduler_cfg_path, ) # Check the output expected_output = "Processed 1 message(s).\nDone.\n" assert result.exit_code == 0, result.output assert result.output == expected_output actual_visit_stats = swh_scheduler.origin_visit_stats_get( [(visit_status["origin"], visit_status["type"])] ) assert actual_visit_stats assert len(actual_visit_stats) == 1 diff --git a/swh/scheduler/tests/test_init.py b/swh/scheduler/tests/test_init.py index 9a97548..7673d6d 100644 --- a/swh/scheduler/tests/test_init.py +++ b/swh/scheduler/tests/test_init.py @@ -1,77 +1,77 @@ # Copyright (C) 2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import inspect import pytest from swh.scheduler import get_scheduler from swh.scheduler.api.client import RemoteScheduler from swh.scheduler.backend import SchedulerBackend from swh.scheduler.interface import SchedulerInterface SERVER_IMPLEMENTATIONS = [ ("remote", RemoteScheduler, {"url": "localhost"}), - ("local", SchedulerBackend, {"db": "something"}), + ("postgresql", SchedulerBackend, {"db": "something"}), ] @pytest.fixture def mock_psycopg2(mocker): mocker.patch("swh.scheduler.backend.psycopg2.pool") def test_init_get_scheduler_failure(): with pytest.raises(ValueError, match="Unknown Scheduler class"): get_scheduler("unknown-scheduler-storage") @pytest.mark.parametrize("class_name,expected_class,kwargs", SERVER_IMPLEMENTATIONS) def test_init_get_scheduler(class_name, expected_class, kwargs, mock_psycopg2): concrete_scheduler = get_scheduler(class_name, **kwargs) assert isinstance(concrete_scheduler, expected_class) assert isinstance(concrete_scheduler, SchedulerInterface) @pytest.mark.parametrize("class_name,expected_class,kwargs", SERVER_IMPLEMENTATIONS) def test_init_get_scheduler_deprecation_warning( class_name, expected_class, kwargs, mock_psycopg2 ): with pytest.warns(DeprecationWarning): concrete_scheduler = get_scheduler(class_name, args=kwargs) assert isinstance(concrete_scheduler, expected_class) def test_types(swh_scheduler) -> None: """Checks all methods of SchedulerInterface are implemented by this backend, and that they have the same signature.""" # Create an instance of the protocol (which cannot be instantiated # directly, so this creates a subclass, then instantiates it) interface = type("_", (SchedulerInterface,), {})() missing_methods = [] for meth_name in dir(interface): if meth_name.startswith("_"): continue interface_meth = getattr(interface, meth_name) try: concrete_meth = getattr(swh_scheduler, meth_name) except AttributeError: missing_methods.append(meth_name) continue expected_signature = inspect.signature(interface_meth) actual_signature = inspect.signature(concrete_meth) assert expected_signature == actual_signature, meth_name assert missing_methods == [] # If all the assertions above succeed, then this one should too. # But there's no harm in double-checking. # And we could replace the assertions above by this one, but unlike # the assertions above, it doesn't explain what is missing. assert isinstance(swh_scheduler, SchedulerInterface) diff --git a/swh/scheduler/tests/test_recurrent_visits.py b/swh/scheduler/tests/test_recurrent_visits.py index b5e9ebf..ad3cfd2 100644 --- a/swh/scheduler/tests/test_recurrent_visits.py +++ b/swh/scheduler/tests/test_recurrent_visits.py @@ -1,216 +1,216 @@ # Copyright (C) 2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from datetime import timedelta import logging from queue import Queue from unittest.mock import MagicMock import pytest from swh.scheduler.celery_backend.recurrent_visits import ( DEFAULT_DVCS_POLICY, VisitSchedulerThreads, grab_next_visits_policy_weights, send_visits_for_visit_type, spawn_visit_scheduler_thread, terminate_visit_scheduler_threads, visit_scheduler_thread, ) from .test_cli import invoke TEST_MAX_QUEUE = 10000 MODULE_NAME = "swh.scheduler.celery_backend.recurrent_visits" def _compute_backend_name(visit_type: str) -> str: "Build a dummy reproducible backend name" return f"swh.loader.{visit_type}.tasks" @pytest.fixture def swh_scheduler(swh_scheduler): """Override default fixture of the scheduler to install some more task types.""" for visit_type in ["test-git", "test-hg", "test-svn"]: task_type = f"load-{visit_type}" swh_scheduler.create_task_type( { "type": task_type, "max_queue_length": TEST_MAX_QUEUE, "description": "The {} testing task".format(task_type), "backend_name": _compute_backend_name(visit_type), "default_interval": timedelta(days=1), "min_interval": timedelta(hours=6), "max_interval": timedelta(days=12), } ) return swh_scheduler def test_cli_schedule_recurrent_unknown_visit_type(swh_scheduler): """When passed an unknown visit type, the recurrent visit scheduler should refuse to start.""" with pytest.raises(ValueError, match="Unknown"): invoke( swh_scheduler, False, [ "schedule-recurrent", "--visit-type", "unknown", "--visit-type", "test-git", ], ) def test_cli_schedule_recurrent_noop(swh_scheduler, mocker): """When passing no visit types, the recurrent visit scheduler should start.""" spawn_visit_scheduler_thread = mocker.patch( f"{MODULE_NAME}.spawn_visit_scheduler_thread" ) spawn_visit_scheduler_thread.side_effect = SystemExit # The actual scheduling threads won't spawn, they'll immediately terminate. This # only exercises the logic to pull task types out of the database result = invoke(swh_scheduler, False, ["schedule-recurrent"]) assert result.exit_code == 0, result.output def test_recurrent_visit_scheduling( swh_scheduler, caplog, listed_origins_by_type, mocker, ): """Scheduling known tasks is ok.""" caplog.set_level(logging.DEBUG, MODULE_NAME) nb_origins = 1000 mock_celery_app = MagicMock() mock_available_slots = mocker.patch(f"{MODULE_NAME}.get_available_slots") mock_available_slots.return_value = nb_origins # Slots available in queue # Make sure the scheduler is properly configured in terms of visit/task types all_task_types = { task_type_d["type"]: task_type_d for task_type_d in swh_scheduler.get_task_types() } visit_types = list(listed_origins_by_type.keys()) assert len(visit_types) > 0 task_types = [] origins = [] for visit_type, _origins in listed_origins_by_type.items(): origins.extend(swh_scheduler.record_listed_origins(_origins)) task_type_name = f"load-{visit_type}" assert task_type_name in all_task_types.keys() task_type = all_task_types[task_type_name] task_type["visit_type"] = visit_type # we'll limit the orchestrator to the origins' type we know task_types.append(task_type) for visit_type in ["test-git", "test-svn"]: task_type = f"load-{visit_type}" send_visits_for_visit_type( swh_scheduler, mock_celery_app, visit_type, all_task_types[task_type], DEFAULT_DVCS_POLICY, ) assert mock_available_slots.called, "The available slots functions should be called" records = [record.message for record in caplog.records] # Mapping over the dict ratio/policies entries can change overall order so let's # check the set of records expected_records = set() for task_type in task_types: visit_type = task_type["visit_type"] queue_name = task_type["backend_name"] msg = ( f"{nb_origins} available slots for visit type {visit_type} " f"in queue {queue_name}" ) expected_records.add(msg) for expected_record in expected_records: assert expected_record in set(records) @pytest.mark.parametrize( "visit_type, extras", [("test-hg", {}), ("test-git", {"tablesample": 0.1})], ) def test_recurrent_visit_additional_parameters( swh_scheduler, mocker, visit_type, extras ): """Testing additional policy parameters""" mock_grab_next_visits = mocker.patch.object(swh_scheduler, "grab_next_visits") mock_grab_next_visits.return_value = [] policy_cfg = DEFAULT_DVCS_POLICY[:] for policy in policy_cfg: policy.update(extras) grab_next_visits_policy_weights(swh_scheduler, visit_type, 10, policy_cfg) for call in mock_grab_next_visits.call_args_list: assert call[1].get("tablesample") == extras.get("tablesample") @pytest.fixture def scheduler_config(swh_scheduler_config): - return {"scheduler": {"cls": "local", **swh_scheduler_config}, "celery": {}} + return {"scheduler": {"cls": "postgresql", **swh_scheduler_config}, "celery": {}} def test_visit_scheduler_thread_unknown_task( swh_scheduler, scheduler_config, ): """Starting a thread with unknown task type reports the error""" unknown_visit_type = "unknown" command_queue = Queue() exc_queue = Queue() visit_scheduler_thread( scheduler_config, unknown_visit_type, command_queue, exc_queue ) assert command_queue.empty() is True assert exc_queue.empty() is False assert len(exc_queue.queue) == 1 result = exc_queue.queue.pop() assert result[0] == unknown_visit_type assert isinstance(result[1], ValueError) def test_spawn_visit_scheduler_thread_noop(scheduler_config, visit_types, mocker): """Spawning and terminating threads runs smoothly""" threads: VisitSchedulerThreads = {} exc_queue = Queue() mock_build_app = mocker.patch("swh.scheduler.celery_backend.config.build_app") mock_build_app.return_value = MagicMock() assert len(threads) == 0 for visit_type in visit_types: spawn_visit_scheduler_thread(threads, exc_queue, scheduler_config, visit_type) # This actually only checks the spawning and terminating logic is sound assert len(threads) == len(visit_types) actual_threads = terminate_visit_scheduler_threads(threads) assert not len(actual_threads) assert mock_build_app.called diff --git a/swh/scheduler/tests/test_server.py b/swh/scheduler/tests/test_server.py index a678dd8..50c5b41 100644 --- a/swh/scheduler/tests/test_server.py +++ b/swh/scheduler/tests/test_server.py @@ -1,100 +1,100 @@ # Copyright (C) 2019-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import pytest import yaml from swh.scheduler.api.server import load_and_check_config def prepare_config_file(tmpdir, content, name="config.yml"): """Prepare configuration file in `$tmpdir/name` with content `content`. Args: tmpdir (LocalPath): root directory content (str/dict): Content of the file either as string or as a dict. If a dict, converts the dict into a yaml string. name (str): configuration filename Returns path (str) of the configuration file prepared. """ config_path = tmpdir / name if isinstance(content, dict): # convert if needed content = yaml.dump(content) config_path.write_text(content, encoding="utf-8") # pytest on python3.5 does not support LocalPath manipulation, so # convert path to string return str(config_path) @pytest.mark.parametrize("scheduler_class", [None, ""]) def test_load_and_check_config_no_configuration(scheduler_class): """Inexistent configuration files raises""" with pytest.raises(EnvironmentError, match="Configuration file must be defined"): load_and_check_config(scheduler_class) def test_load_and_check_config_inexistent_fil(): """Inexistent config filepath should raise""" config_path = "/some/inexistent/config.yml" expected_error = f"Configuration file {config_path} does not exist" with pytest.raises(FileNotFoundError, match=expected_error): load_and_check_config(config_path) def test_load_and_check_config_wrong_configuration(tmpdir): """Wrong configuration raises""" config_path = prepare_config_file(tmpdir, "something: useless") with pytest.raises(KeyError, match="Missing '%scheduler' configuration"): load_and_check_config(config_path) def test_load_and_check_config_remote_config_local_type_raise(tmpdir): """Configuration without 'local' storage is rejected""" config = {"scheduler": {"cls": "remote"}} config_path = prepare_config_file(tmpdir, config) expected_error = ( - "The scheduler backend can only be started with a 'local'" " configuration" + "The scheduler backend can only be started with a 'postgresql'" " configuration" ) with pytest.raises(ValueError, match=expected_error): load_and_check_config(config_path, type="local") def test_load_and_check_config_local_incomplete_configuration(tmpdir): """Incomplete 'local' configuration should raise""" config = { "scheduler": { - "cls": "local", + "cls": "postgresql", "something": "needed-for-test", } } config_path = prepare_config_file(tmpdir, config) expected_error = "Invalid configuration; missing 'db' config entry" with pytest.raises(KeyError, match=expected_error): load_and_check_config(config_path) def test_load_and_check_config_local_config_fine(tmpdir): """Local configuration is fine""" config = { "scheduler": { - "cls": "local", + "cls": "postgresql", "db": "db", } } config_path = prepare_config_file(tmpdir, config) cfg = load_and_check_config(config_path, type="local") assert cfg == config def test_load_and_check_config_remote_config_fine(tmpdir): """Remote configuration is fine""" config = {"scheduler": {"cls": "remote"}} config_path = prepare_config_file(tmpdir, config) cfg = load_and_check_config(config_path, type="any") assert cfg == config