diff --git a/PKG-INFO b/PKG-INFO index 0fb2113..55ef8e3 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,37 +1,37 @@ Metadata-Version: 2.1 Name: swh.scheduler -Version: 0.18.0 +Version: 0.18.2 Summary: Software Heritage Scheduler Home-page: https://forge.softwareheritage.org/diffusion/DSCH/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Project-URL: Funding, https://www.softwareheritage.org/donate Project-URL: Source, https://forge.softwareheritage.org/source/swh-scheduler Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-scheduler/ Platform: UNKNOWN Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 5 - Production/Stable Requires-Python: >=3.7 Description-Content-Type: text/markdown Provides-Extra: testing Provides-Extra: journal Provides-Extra: simulator License-File: LICENSE License-File: LICENSE.Celery License-File: AUTHORS swh-scheduler ============= Job scheduler for the Software Heritage project. Task manager for asynchronous/delayed tasks, used for both recurrent (e.g., listing a forge, loading new stuff from a Git repository) and one-off activities (e.g., loading a specific version of a source package). diff --git a/conftest.py b/conftest.py index 745fbdf..1f63a7b 100644 --- a/conftest.py +++ b/conftest.py @@ -1,15 +1,19 @@ # Copyright (C) 2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from hypothesis import settings # define tests profile. Full documentation is at: # https://hypothesis.readthedocs.io/en/latest/settings.html#settings-profiles settings.register_profile("fast", max_examples=5, deadline=5000) settings.register_profile("slow", max_examples=20, deadline=5000) -pytest_plugins = ["swh.scheduler.pytest_plugin", "swh.core.db.pytest_plugin"] +pytest_plugins = [ + "swh.scheduler.pytest_plugin", + "swh.core.db.pytest_plugin", + "swh.storage.pytest_plugin", +] diff --git a/debian/changelog b/debian/changelog index ecc4454..0a03ef4 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,1035 +1,1049 @@ -swh-scheduler (0.18.0-1~swh1~bpo10+1) buster-swh; urgency=medium +swh-scheduler (0.18.2-1~swh1) unstable-swh; urgency=medium - * Rebuild for buster-swh + * New upstream release 0.18.2 - (tagged by Antoine R. Dumont + (@ardumont) on 2021-10-18 15:11:59 + +0200) + * Upstream changes: - v0.18.2 - Use swh_storage fixture for + cli tests + + -- Software Heritage autobuilder (on jenkins-debian1) Mon, 18 Oct 2021 13:18:56 +0000 + +swh-scheduler (0.18.1-1~swh1) unstable-swh; urgency=medium + + * New upstream release 0.18.1 - (tagged by Antoine R. Dumont + (@ardumont) on 2021-10-15 15:49:35 + +0200) + * Upstream changes: - v0.18.1 - Return 0 slot if no more slots + available in the queues - -- Software Heritage autobuilder (on jenkins-debian1) Thu, 02 Sep 2021 09:37:34 +0000 + -- Software Heritage autobuilder (on jenkins-debian1) Fri, 15 Oct 2021 13:53:38 +0000 swh-scheduler (0.18.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.18.0 - (tagged by Antoine R. Dumont (@ardumont) on 2021-09-02 11:32:59 +0200) * Upstream changes: - v0.18.0 - Refine scheduling policy for origins with no known last update - Add a swh scheduler origin send-to-celery subcommand - runner: Improve help message on the task types flag. - send-to-celery: Add more options to allow scheduling of edge cases - Add table sampling option to grab_next_visits - journal_client: Only upsert if we have something to upsert -- Software Heritage autobuilder (on jenkins-debian1) Thu, 02 Sep 2021 09:35:32 +0000 swh-scheduler (0.17.1-1~swh1) unstable-swh; urgency=medium * New upstream release 0.17.1 - (tagged by Antoine R. Dumont (@ardumont) on 2021-08-26 10:30:12 +0200) * Upstream changes: - v0.17.1 - journal_client: Ensure queue position does not overflow -- Software Heritage autobuilder (on jenkins-debian1) Thu, 26 Aug 2021 08:41:41 +0000 swh-scheduler (0.17.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.17.0 - (tagged by Antoine R. Dumont (@ardumont) on 2021-08-05 15:29:18 +0200) * Upstream changes: - v0.17.0 - Introduce new scheduling policy to grab origins without last update - journal_client: Disable origins when too many visited attempts failed - journal_client: Record last_visited and last_successful in origin_visit_stats - Add a specific cooldown for notfound origins - Add a (longer) specific cooldown for failed origin visits - Make the origin visit scheduling cooldown configurable - Various refactoring to simplify the grab next visits logic and updates -- Software Heritage autobuilder (on jenkins-debian1) Fri, 06 Aug 2021 09:11:54 +0000 swh-scheduler (0.16.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.16.0 - (tagged by Antoine Lambert on 2021-06-22 17:35:55 +0200) * Upstream changes: - version 0.16.0 -- Software Heritage autobuilder (on jenkins-debian1) Tue, 22 Jun 2021 15:39:45 +0000 swh-scheduler (0.15.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.15.0 - (tagged by Antoine R. Dumont (@ardumont) on 2021-06-10 16:09:06 +0200) * Upstream changes: - v0.15.0 - separate-runner runner: Separate scheduling tasks with and without priority concern - Refactor and extract a get_available_slots utility - Add typing stubs dependencies for mypy>0.900 - pytest_plugin: Explicitly set hostname in broker_url for celery TestApp -- Software Heritage autobuilder (on jenkins-debian1) Thu, 10 Jun 2021 14:48:52 +0000 swh-scheduler (0.14.2-1~swh1) unstable-swh; urgency=medium * New upstream release 0.14.2 - (tagged by Valentin Lorentz on 2021-05-06 17:09:00 +0200) * Upstream changes: - v0.14.2 - * Fix flaky tests -- Software Heritage autobuilder (on jenkins-debian1) Thu, 06 May 2021 15:13:11 +0000 swh-scheduler (0.14.1-1~swh1) unstable-swh; urgency=medium * New upstream release 0.14.1 - (tagged by Antoine R. Dumont (@ardumont) on 2021-05-06 16:00:07 +0200) * Upstream changes: - v0.14.1 - Use swh.core 0.14 -- Software Heritage autobuilder (on jenkins-debian1) Thu, 06 May 2021 14:17:39 +0000 swh-scheduler (0.13.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.13.0 - (tagged by Antoine R. Dumont (@ardumont) on 2021-04-20 11:46:51 +0200) * Upstream changes: - v0.13.0 - scheduler: Clean up priority/ratio task dead code - Parse task_ids before calling set_status_tasks. - tests: Complete checks on message with priority consumption -- Software Heritage autobuilder (on jenkins-debian1) Tue, 20 Apr 2021 09:51:00 +0000 swh-scheduler (0.12.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.12.0 - (tagged by Antoine R. Dumont (@ardumont) on 2021-04-15 13:31:30 +0200) * Upstream changes: - v0.12.0 - Route priority tasks to dedicated save code now queues - Fix various Sphinx warnings -- Software Heritage autobuilder (on jenkins-debian1) Thu, 15 Apr 2021 11:36:13 +0000 swh-scheduler (0.11.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.11.0 - (tagged by Antoine R. Dumont (@ardumont) on 2021-04-14 18:15:53 +0200) * Upstream changes: - v0.11.0 - separate-queues backend: Open endpoints to peek/grab tasks with any priority - Make origin_visit_stats_get return results from all pages - journal client: Filter out status messages without type - Simplify max_date() - journal_client: Fix date computations for (un)eventful visits - journal_client: Deal with failed status message -- Software Heritage autobuilder (on jenkins-debian1) Wed, 14 Apr 2021 16:19:31 +0000 swh-scheduler (0.10.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.10.0 - (tagged by Nicolas Dandrimont on 2021-02-03 22:53:20 +0100) * Upstream changes: - Release swh.scheduler 0.10.0 - Eagerly acknowledge celery tasks - Loads of simulator improvements - grab_next_visits: - clean up query building - account for schedule time to avoid rescheduling visits too fast - allow overriding the scheduling timestamp for the simulator -- Software Heritage autobuilder (on jenkins-debian1) Wed, 03 Feb 2021 22:10:13 +0000 swh-scheduler (0.9.2-1~swh1) unstable-swh; urgency=medium * New upstream release 0.9.2 - (tagged by Antoine Lambert on 2021-01-25 16:27:41 +0100) * Upstream changes: - version 0.9.2 -- Software Heritage autobuilder (on jenkins-debian1) Mon, 25 Jan 2021 15:31:21 +0000 swh-scheduler (0.9.1-1~swh1) unstable-swh; urgency=medium * New upstream release 0.9.1 - (tagged by Vincent SELLIER on 2021-01-21 19:20:33 +0100) * Upstream changes: - v0.9.1 - * Solve uneventful/eventful with unordered messages with snapshots - * Do not consider duplicated messages as uneventful event - * Reorganize grab_next_visits tests to better check sorting behavior -- Software Heritage autobuilder (on jenkins-debian1) Thu, 21 Jan 2021 18:28:00 +0000 swh-scheduler (0.9.0-1~swh2) unstable-swh; urgency=medium * Bump new release to unstuck packaging -- Antoine R. Dumont (@ardumont) Thu, 21 Jan 2021 13:20:14 +0000 swh-scheduler (0.9.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.9.0 - (tagged by Antoine R. Dumont (@ardumont) on 2021-01-21 11:54:47 +0100) * Upstream changes: - v0.9.0 - Populate origin_visit_stats table out of the origin_visit_status topic - Introduce a scheduler policy simulator (old task-based scheduler, ...) - Implement basic aggregated metrics on listed origins - scheduler.cli.journal: Add `swh scheduler journal-client` cli - Filter origins by visit type when scheduling the next visits - Introduce a `swh scheduler origin schedule-next` cli - Introduce a `swh scheduler origin grab-next` cli - Add an new origin visit stats model object and related backend api - Implement a basic endpoint for getting the next origins to visit - doc: Add a cli section to the doc -- Software Heritage autobuilder (on jenkins-debian1) Thu, 21 Jan 2021 11:00:29 +0000 swh-scheduler (0.8.2-1~swh2) unstable-swh; urgency=medium * Bump dependency -- Antoine R. Dumont (@ardumont) Tue, 08 Dec 2020 09:29:26 +0000 swh-scheduler (0.8.2-1~swh1) unstable-swh; urgency=medium * New upstream release 0.8.2 - (tagged by Antoine R. Dumont (@ardumont) on 2020-12-07 09:52:28 +0100) * Upstream changes: - v0.8.2 - requirement: Adapt celery requirements - Replace usage of arrow datetime objects in favor of pure datetime ones - Stop using the deprecated configuration scheme - cli.task_type: All task_type clis without a scheduler should raise -- Software Heritage autobuilder (on jenkins-debian1) Mon, 07 Dec 2020 08:55:39 +0000 swh-scheduler (0.8.1-1~swh1) unstable-swh; urgency=medium * New upstream release 0.8.1 - (tagged by Antoine R. Dumont (@ardumont) on 2020-11-24 14:13:36 +0100) * Upstream changes: - v0.8.1 - conftest: Reference swh.core.db.pytest_plugin -- Software Heritage autobuilder (on jenkins-debian1) Tue, 24 Nov 2020 13:16:08 +0000 swh-scheduler (0.8.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.8.0 - (tagged by Antoine R. Dumont (@ardumont) on 2020-11-23 13:42:05 +0100) * Upstream changes: - v0.8.0 - requirements-test.txt: Drop no longer needed pytest-postgresql requirement - scheduler.pytest_plugin: Make scheduler tests faster -- Software Heritage autobuilder (on jenkins-debian1) Mon, 23 Nov 2020 12:44:40 +0000 swh-scheduler (0.7.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.7.0 - (tagged by Antoine R. Dumont (@ardumont) on 2020-10-19 09:30:36 +0200) * Upstream changes: - v0.7.0 - scheduler: Type and unify get_scheduler factory with other factories - pytest_plugin: Explicitly name the scheduler test db differently - test_server: Simplify exception manipulations - tox.ini: pin black to the pre-commit version (19.10b0) to avoid flip-flops -- Software Heritage autobuilder (on jenkins-debian1) Mon, 19 Oct 2020 07:33:54 +0000 swh-scheduler (0.6.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.6.0 - (tagged by David Douard on 2020-09-25 12:03:33 +0200) * Upstream changes: - v0.6.0 -- Software Heritage autobuilder (on jenkins-debian1) Fri, 25 Sep 2020 10:06:32 +0000 swh-scheduler (0.5.3-1~swh1) unstable-swh; urgency=medium * New upstream release 0.5.3 - (tagged by Nicolas Dandrimont on 2020-09-24 17:49:27 +0200) * Upstream changes: - Release swh.scheduler v0.5.3 - Improve swh cli startup time - Add isort and update flake8 - Improve pytest execution time - Support recent kombu versions -- Software Heritage autobuilder (on jenkins-debian1) Thu, 24 Sep 2020 15:53:25 +0000 swh-scheduler (0.5.2-1~swh1) unstable-swh; urgency=medium * New upstream release 0.5.2 - (tagged by Antoine R. Dumont (@ardumont) on 2020-07-10 13:01:48 +0200) * Upstream changes: - v0.5.2 - Do no expose pytest-plugin through setuptools, let modules require it when needed -- Software Heritage autobuilder (on jenkins-debian1) Fri, 10 Jul 2020 11:08:30 +0000 swh-scheduler (0.5.1-1~swh1) unstable-swh; urgency=medium * New upstream release 0.5.1 - (tagged by Nicolas Dandrimont on 2020-07-09 10:18:03 +0200) * Upstream changes: - Release swh.scheduler 0.5.1 - Drop dependency on future (not needed anymore) -- Software Heritage autobuilder (on jenkins-debian1) Thu, 09 Jul 2020 09:51:38 +0000 swh-scheduler (0.5.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.5.0 - (tagged by Nicolas Dandrimont on 2020-07-09 10:16:57 +0200) * Upstream changes: - Release swh.scheduler v0.5.0 - Move celery fixtures to the pytest plugin -- Software Heritage autobuilder (on jenkins-debian1) Thu, 09 Jul 2020 08:20:42 +0000 swh-scheduler (0.4.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.4.0 - (tagged by Nicolas Dandrimont on 2020-07-06 16:47:28 +0200) * Upstream changes: - Release swh.scheduler 0.4.0 - Extract pytest fixtures to a pytest plugin -- Software Heritage autobuilder (on jenkins-debian1) Mon, 06 Jul 2020 14:52:42 +0000 swh-scheduler (0.3.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.3.0 - (tagged by Nicolas Dandrimont on 2020-07-06 12:18:28 +0200) * Upstream changes: - Release swh.scheduler 0.3.0 - Add get_listed_origins endpoint -- Software Heritage autobuilder (on jenkins-debian1) Mon, 06 Jul 2020 10:23:31 +0000 swh-scheduler (0.2.2-1~swh1) unstable-swh; urgency=medium * New upstream release 0.2.2 - (tagged by Nicolas Dandrimont on 2020-06-22 14:03:34 +0200) * Upstream changes: - Release swh.scheduler 0.2.2 - Re- introduce root endpoint for the RPC server -- Software Heritage autobuilder (on jenkins-debian1) Mon, 22 Jun 2020 12:07:05 +0000 swh-scheduler (0.2.1-1~swh1) unstable-swh; urgency=medium [ Nicolas Dandrimont ] * Force celery >= 4.3 [ Software Heritage autobuilder (on jenkins-debian1) ] * New upstream release 0.2.1 - (tagged by Nicolas Dandrimont on 2020-06-22 12:09:32 +0200) * Upstream changes: - Release swh.scheduler 0.2.1 - Bump celery requirement to 4.3+ -- Software Heritage autobuilder (on jenkins-debian1) Mon, 22 Jun 2020 10:12:50 +0000 swh-scheduler (0.2.0-1~swh1) unstable-swh; urgency=medium [ Nicolas Dandrimont ] * Switch from vcversioner to setuptools-scm * wrap-and-sort [ Software Heritage autobuilder (on jenkins-debian1) ] * New upstream release 0.2.0 - (tagged by Nicolas Dandrimont on 2020-06-22 10:33:11 +0200) * Upstream changes: - Release swh.scheduler 0.2.0 - Implement storage of lister and listed origin information - Add swh scheduler celery-monitor command - Overhaul RPC to use automatic generation -- Software Heritage autobuilder (on jenkins-debian1) Mon, 22 Jun 2020 08:36:49 +0000 swh-scheduler (0.1.1-1~swh1) unstable-swh; urgency=medium * New upstream release 0.1.1 - (tagged by Nicolas Dandrimont on 2020-06-03 11:34:19 +0200) * Upstream changes: - Release swh.scheduler v0.1.1 - Add missing dependency on future for celery 4.4.4 -- Software Heritage autobuilder (on jenkins-debian1) Wed, 03 Jun 2020 09:39:25 +0000 swh-scheduler (0.1.0-1~swh1) unstable-swh; urgency=medium * New upstream release 0.1.0 - (tagged by Nicolas Dandrimont on 2020-05-19 11:48:34 +0200) * Upstream changes: - Release swh.scheduler v0.1.0 - Blacken source code - Disable azure http logspam - Only schedule tasks when the buffer is somewhat empty -- Software Heritage autobuilder (on jenkins-debian1) Tue, 19 May 2020 09:52:31 +0000 swh-scheduler (0.0.72-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.72 - (tagged by Nicolas Dandrimont on 2020-03-23 13:07:38 +0100) * Upstream changes: - Release swh.scheduler v0.0.72 - Update instantiation of storage in tests - ensure that create_task_type is idempotent - introduce new listener based on pika -- Software Heritage autobuilder (on jenkins-debian1) Mon, 23 Mar 2020 12:12:00 +0000 swh-scheduler (0.0.71-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.71 - (tagged by Antoine R. Dumont (@ardumont) on 2020-01-23 14:24:56 +0100) * Upstream changes: - v0.0.71 - sentry: Fix initialization init_sentry call -- Software Heritage autobuilder (on jenkins-debian1) Thu, 23 Jan 2020 13:29:33 +0000 swh-scheduler (0.0.70-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.70 - (tagged by Antoine R. Dumont (@ardumont) on 2020-01-23 13:43:35 +0100) * Upstream changes: - v0.0.70 - Use swh.core.sentry instead of calling sentry_sdk.init directly - backend_es: Fix configuration mapping -- Software Heritage autobuilder (on jenkins-debian1) Thu, 23 Jan 2020 12:47:43 +0000 swh-scheduler (0.0.69-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.69 - (tagged by Antoine R. Dumont (@ardumont) on 2019-12-17 16:00:24 +0100) * Upstream changes: - v0.0.69 - Fix scheduler's archive task cli - Make the filter task endpoint a paginated endpoint - Add coverage on the archive task cli -- Software Heritage autobuilder (on jenkins-debian1) Tue, 17 Dec 2019 15:04:48 +0000 swh-scheduler (0.0.68-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.68 - (tagged by Antoine R. Dumont (@ardumont) on 2019-12-17 15:28:13 +0100) * Upstream changes: - v0.0.68 - Fix scheduler's archive task cli - Make the filter task endpoint a paginated endpoint - Add coverage on the archive task cli -- Software Heritage autobuilder (on jenkins-debian1) Tue, 17 Dec 2019 14:33:33 +0000 swh-scheduler (0.0.67-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.67 - (tagged by Antoine R. Dumont (@ardumont) on 2019-12-17 14:33:36 +0100) * Upstream changes: - v0.0.67 - Fix scheduler's archive task cli - Make the filter task endpoint a paginated endpoint - Add coverage on the archive task cli -- Software Heritage autobuilder (on jenkins-debian1) Tue, 17 Dec 2019 13:38:03 +0000 swh-scheduler (0.0.66-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.66 - (tagged by Nicolas Dandrimont on 2019-12-17 12:04:20 +0100) * Upstream changes: - Release swh.scheduler v0.0.66 - initialize sentry on celery worker startup - improve task archival endpoints in backend api -- Software Heritage autobuilder (on jenkins-debian1) Tue, 17 Dec 2019 11:08:25 +0000 swh-scheduler (0.0.65-1~swh2) unstable-swh; urgency=medium * Add pytest-mock build-dependency. -- Nicolas Dandrimont Fri, 13 Dec 2019 11:57:41 +0100 swh-scheduler (0.0.65-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.65 - (tagged by Nicolas Dandrimont on 2019-12-13 11:45:55 +0100) * Upstream changes: - Release swh.scheduler v0.0.65 - Drop the scheduler updater - Add a statsd probe for task execution timestamps - Add listener and runner statsd probes - CLI updates - Python packaging housekeeping -- Software Heritage autobuilder (on jenkins-debian1) Fri, 13 Dec 2019 10:54:31 +0000 swh-scheduler (0.0.64-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.64 - (tagged by Antoine R. Dumont (@ardumont) on 2019-11-20 14:26:00 +0100) * Upstream changes: - v0.0.64 - req-swh*: Remove old package loader backend names -- Software Heritage autobuilder (on jenkins-debian1) Wed, 20 Nov 2019 13:29:37 +0000 swh-scheduler (0.0.63-1~swh2) unstable-swh; urgency=medium * Update build dependency -- Antoine R. Dumont (@ardumont) Tue, 19 Nov 2019 17:07:40 +0100 swh-scheduler (0.0.63-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.63 - (tagged by Antoine R. Dumont (@ardumont) on 2019-11-19 14:09:12 +0100) * Upstream changes: - v0.0.63 - swh.scheduler.cli: Add `swh scheduler task-type register` cli - Use the shared_task decorator instead of binding to a specific celery app - celery/tests: mostly revert e770eb30 to fix celery app initialization in tests -- Software Heritage autobuilder (on jenkins-debian1) Tue, 19 Nov 2019 13:14:59 +0000 swh-scheduler (0.0.62-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.62 - (tagged by Antoine R. Dumont (@ardumont) on 2019-10-18 13:39:27 +0200) * Upstream changes: - v0.0.62 - celery_backend.config: Make JournalHandler import optional - tests: rewrite tests using pytest fixtures -- Software Heritage autobuilder (on jenkins-debian1) Fri, 18 Oct 2019 11:46:26 +0000 swh-scheduler (0.0.61-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.61 - (tagged by Nicolas Dandrimont on 2019-10-07 16:33:17 +0200) * Upstream changes: - Release swh.scheduler v0.0.61 - Remove bogus dict.get(default=) statement -- Software Heritage autobuilder (on jenkins-debian1) Mon, 07 Oct 2019 14:37:37 +0000 swh-scheduler (0.0.60-1~swh2) unstable-swh; urgency=medium * Force postgresql executable to a pg_ctl that exists when running tests. -- Nicolas Dandrimont Tue, 01 Oct 2019 18:14:39 +0200 swh-scheduler (0.0.60-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.60 - (tagged by Stefano Zacchiroli on 2019-10-01 13:13:13 +0200) * Upstream changes: - v0.0.60 - * tox: anticipate mypy run to just after flake8 - * init.py: switch to documented way of extending path - * tox.ini: add mypy section - * typing: minimal changes to make a no-op mypy run pass - * fix typo in docstring and sample file name - * admin CLI: drop obsolete backward compatibility aliases - * click "required" param wants bool, not int -- Software Heritage autobuilder (on jenkins-debian1) Tue, 01 Oct 2019 11:22:43 +0000 swh-scheduler (0.0.59-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.59 - (tagged by David Douard on 2019-09-04 16:08:27 +0200) * Upstream changes: - v0.0.59 -- Software Heritage autobuilder (on jenkins-debian1) Wed, 04 Sep 2019 14:11:48 +0000 swh-scheduler (0.0.58-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.58 - (tagged by Antoine R. Dumont (@ardumont) on 2019-09-03 10:19:34 +0200) * Upstream changes: - v0.0.58 - celery: auto add tasks declared in the swh.workers entry point in task_modules - api/client: use RPCClient instead of deprecated SWHRemoteAPI - Make schedule_origins use origin urls instead of ids in task arguments. - docs: add code of conduct document - docs: very beginning of a practical documentation on the scheduler - config: Add a pre-commit config file - data: Insert new cgit instance lister task - data: Insert load-tar task-type -- Software Heritage autobuilder (on jenkins-debian1) Tue, 03 Sep 2019 08:28:19 +0000 swh-scheduler (0.0.57-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.57 - (tagged by David Douard on 2019-06-26 14:56:32 +0200) * Upstream changes: - v0.0.57 -- Software Heritage autobuilder (on jenkins-debian1) Wed, 26 Jun 2019 13:05:20 +0000 swh-scheduler (0.0.56-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.56 - (tagged by Nicolas Dandrimont on 2019-05-07 18:16:20 +0200) * Upstream changes: - listener: Release the db object after using it - This is the contract that get_db/put_db is supposed to conform to. -- Software Heritage autobuilder (on jenkins-debian1) Tue, 14 May 2019 12:40:09 +0000 swh-scheduler (0.0.55-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.55 - (tagged by Antoine Lambert on 2019-05-06 11:47:43 +0200) * Upstream changes: - version 0.0.55 -- Software Heritage autobuilder (on jenkins-debian1) Mon, 06 May 2019 09:54:51 +0000 swh-scheduler (0.0.54-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.54 - (tagged by Antoine R. Dumont (@ardumont) on 2019-04-11 11:33:40 +0200) * Upstream changes: - v0.0.54 - cli_utils: Use yaml.safe_load instead of yaml.load - Fix support of latest versions of swh- core and psycopg2 - sql/data: Add npm related task types -- Software Heritage autobuilder (on jenkins-debian1) Thu, 11 Apr 2019 09:40:14 +0000 swh-scheduler (0.0.53-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.53 - (tagged by Antoine Lambert on 2019-04-04 16:45:56 +0200) * Upstream changes: - version 0.0.53 -- Software Heritage autobuilder (on jenkins-debian1) Thu, 04 Apr 2019 14:55:20 +0000 swh-scheduler (0.0.52-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.52 - (tagged by Nicolas Dandrimont on 2019-04-03 10:54:06 +0200) * Upstream changes: - Release swh.scheduler v0.0.52 - Move to result_serializer = json to work around celery 4.3 bug - Fix db initialization -- Software Heritage autobuilder (on jenkins-debian1) Wed, 03 Apr 2019 08:59:00 +0000 swh-scheduler (0.0.51-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.51 - (tagged by Antoine R. Dumont (@ardumont) on 2019-03-22 12:09:22 +0100) * Upstream changes: - v0.0.51 - requirements.txt: Remove kombu dependency -- Software Heritage autobuilder (on jenkins-debian1) Fri, 22 Mar 2019 11:16:06 +0000 swh-scheduler (0.0.50-1~swh2) unstable-swh; urgency=medium * Update build- and runtime dependencies -- Nicolas Dandrimont Fri, 15 Mar 2019 18:24:11 +0100 swh-scheduler (0.0.50-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.50 - (tagged by Nicolas Dandrimont on 2019-03-15 18:07:24 +0100) * Upstream changes: - Release swh.scheduler v0.0.50 - Add an explicit log target for stdout and/or journald - Avoid useless log lines - Improve test coverage - Add support for non- string options in the CLI -- Software Heritage autobuilder (on jenkins-debian1) Fri, 15 Mar 2019 17:16:03 +0000 swh-scheduler (0.0.49-1~swh2) unstable-swh; urgency=medium * Export LC_ALL=C.UTF-8 -- Nicolas Dandrimont Thu, 14 Mar 2019 13:42:24 +0100 swh-scheduler (0.0.49-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.49 - (tagged by Nicolas Dandrimont on 2019-03-03 08:48:04 +0100) * Upstream changes: - Release swh.scheduler v0.0.49 - various fixes around celery behavior - move wsgi endpoint to a separate module - add tests for the CLI -- Software Heritage autobuilder (on jenkins-debian1) Sun, 03 Mar 2019 07:55:41 +0000 swh-scheduler (0.0.48-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.48 - (tagged by Antoine R. Dumont (@ardumont) on 2019-02-22 16:11:51 +0100) * Upstream changes: - v0.0.48 - Fix comment on main scheduler schema -- Software Heritage autobuilder (on jenkins-debian1) Fri, 22 Feb 2019 15:17:20 +0000 swh-scheduler (0.0.47-1~swh2) unstable-swh; urgency=low * Upstream release to fix build dependencies issue -- Antoine Romain Dumont (@ardumont) Thu, 21 Feb 2019 15:41:24 +0100 swh-scheduler (0.0.47-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.47 - (tagged by Valentin Lorentz on 2019-02-20 16:53:20 +0100) * Upstream changes: - Fix crash of SchedulerBackend.search_tasks when no argument is given. -- Software Heritage autobuilder (on jenkins-debian1) Thu, 21 Feb 2019 09:13:07 +0000 swh-scheduler (0.0.46-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.46 - (tagged by Antoine R. Dumont (@ardumont) on 2019-02-15 15:05:47 +0100) * Upstream changes: - v0.0.46 - scheduler.task: Remove no longer used Task class -- Software Heritage autobuilder (on jenkins-debian1) Fri, 15 Feb 2019 14:15:26 +0000 swh-scheduler (0.0.45-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.45 - (tagged by Antoine R. Dumont (@ardumont) on 2019-02-15 10:43:07 +0100) * Upstream changes: - v0.0.45 - celery_backend/config: Fix loglevel for amqp module -- Software Heritage autobuilder (on jenkins-debian1) Fri, 15 Feb 2019 09:48:25 +0000 swh-scheduler (0.0.44-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.44 - (tagged by Antoine R. Dumont (@ardumont) on 2019-02-13 16:29:05 +0100) * Upstream changes: - v0.0.44 - swh-scheduler-api: Fix configuration read too many times -- Software Heritage autobuilder (on jenkins-debian1) Wed, 13 Feb 2019 15:34:34 +0000 swh-scheduler (0.0.43-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.43 - (tagged by David Douard on 2019-02-13 15:27:27 +0100) * Upstream changes: - v0.0.43 -- Software Heritage autobuilder (on jenkins-debian1) Wed, 13 Feb 2019 14:46:59 +0000 swh-scheduler (0.0.42-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.42 - (tagged by Antoine R. Dumont (@ardumont) on 2019-02-11 14:28:10 +0100) * Upstream changes: - v0.0.42 - Fix dependency requirements for hypothesis -- Software Heritage autobuilder (on jenkins-debian1) Mon, 11 Feb 2019 13:33:48 +0000 swh-scheduler (0.0.41-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.41 - (tagged by David Douard on 2019-02-06 15:25:56 +0100) * Upstream changes: - v0.0.41 -- Software Heritage autobuilder (on jenkins-debian1) Wed, 06 Feb 2019 15:33:04 +0000 swh-scheduler (0.0.40-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.40 - (tagged by Antoine R. Dumont (@ardumont) on 2019-01-28 16:24:04 +0100) * Upstream changes: - v0.0.40 - swh.scheduler.tests: Mark db tests as such - Force tox environment to C.UTF-8 locale - Add debug logging in the SWHTask class -- Software Heritage autobuilder (on jenkins-debian1) Mon, 28 Jan 2019 15:30:41 +0000 swh-scheduler (0.0.39-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.39 - (tagged by David Douard on 2019-01-16 13:37:58 +0100) * Upstream changes: - v0.0.39 -- Software Heritage autobuilder (on jenkins-debian1) Wed, 16 Jan 2019 12:42:37 +0000 swh-scheduler (0.0.38-1~swh1) unstable-swh; urgency=medium * New upstream release 0.0.38 - (tagged by David Douard on 2018-12-20 14:39:59 +0100) * Upstream changes: - v0.0.38 -- Software Heritage autobuilder (on jenkins-debian1) Wed, 09 Jan 2019 18:32:14 +0000 swh-scheduler (0.0.35-1~swh1) unstable-swh; urgency=medium * v0.0.35 * tests: Add SchedulerTestFixture * swh.scheduler.utils: Allow to add more task information * sql/40-swh-data: Update new indexer task types for local db -- Antoine R. Dumont (@ardumont) Mon, 29 Oct 2018 10:07:08 +0100 swh-scheduler (0.0.34-1~swh1) unstable-swh; urgency=medium * v0.0.34 * Finalize pytest migration -- Antoine R. Dumont (@ardumont) Thu, 25 Oct 2018 17:52:03 +0200 swh-scheduler (0.0.33-1~swh1) unstable-swh; urgency=medium * v0.0.33 -- David Douard Thu, 25 Oct 2018 16:03:16 +0200 swh-scheduler (0.0.32-1~swh1) unstable-swh; urgency=medium * v0.0.32 * tests: Add celery fixture to ease tests * tests: make tests use sql/ files from the package * tests: Starting migration towards pytest * listener: Make the listener code compatible with new celery (debian buster) * Make swh_scheduler_create_tasks_from_temp use indexes * setup: prepare for pypi upload * docs: add a simple README file -- Antoine R. Dumont (@ardumont) Mon, 22 Oct 2018 15:37:51 +0200 swh-scheduler (0.0.31-1~swh1) unstable-swh; urgency=medium * v0.0.31 * sql/swh-scheduler: Make the create_tasks call idempotent * swh.scheduler.utils: Open create_task_dict function * sql/scheduler-data: Add lister gitlab task types * sql/scheduler-data: Reference the existing production lister data * swh.scheduler.backend_es: Open sniffing options -- Antoine R. Dumont (@ardumont) Tue, 31 Jul 2018 06:55:39 +0200 swh-scheduler (0.0.30-1~swh1) unstable-swh; urgency=medium * v0.0.30 * swh-scheduler-schema.sql: Archive disabled oneshot tasks as well * swh.scheduler.cli: Add policy to pretty printing task routine * swh.scheduler.cli: Fix broken cli list-pending since api change -- Antoine R. Dumont (@ardumont) Fri, 22 Jun 2018 18:07:02 +0200 swh-scheduler (0.0.29-1~swh1) unstable-swh; urgency=medium * v0.0.29 * swh.scheduler.cli: Change archival period to rolling month - 1 week * swh.scheduler.updater.writer: Force filter resolution to list * swh.scheduler.cli: Change default archival period to current month * swh.scheduler.cli: Improve logging message * swh.scheduler.updater.backend: Adapt configuration path accordingly -- Antoine R. Dumont (@ardumont) Thu, 31 May 2018 11:42:51 +0200 swh-scheduler (0.0.28-1~swh1) unstable-swh; urgency=medium * v0.0.28 * Fix wrong runtime dependencies -- Antoine R. Dumont (@ardumont) Tue, 29 May 2018 14:12:15 +0200 swh-scheduler (0.0.27-1~swh1) unstable-swh; urgency=medium * v0.0.27 * scheduler: Deal with priority in tasks * scheduler-update: new package python3-swh.scheduler.updater * Contains tools in charge of consuming events from arbitrary sources * and update the scheduler db -- Antoine R. Dumont (@ardumont) Tue, 29 May 2018 12:27:34 +0200 swh-scheduler (0.0.26-1~swh1) unstable-swh; urgency=medium * v0.0.26 * swh.scheduler: Fix package build * swh.scheduler.tests: Test remote scheduler api as well * swh.scheduler: Add tests around removing archivable tasks * swh.scheduler: Add tests around filtering archivable tasks * swh-scheduler-schema: Fix unneeded drop instructions * swh.scheduler.cli: Improve docstring * swh.scheduler.cli: Permit to specify the backend to use in cli * swh.scheduler.api: Bootstrap scheduler's remote api * swh.scheduler: Use `get_scheduler` api to instantiate a scheduler * swh.scheduler.backend: Fix docstring -- Antoine R. Dumont (@ardumont) Thu, 26 Apr 2018 17:34:07 +0200 swh-scheduler (0.0.25-1~swh1) unstable-swh; urgency=medium * v0.0.25 * swh.scheduler.cli.archive: Index arguments.kwargs as text -- Antoine R. Dumont (@ardumont) Wed, 18 Apr 2018 12:34:43 +0200 swh-scheduler (0.0.24-1~swh1) unstable-swh; urgency=medium * v0.0.24 * data/template: Do not index the arguments field (it's in _source) * data/README: Add a small readme to explain es install step * swh.scheduler.cli: Add a bulk index flag to separate read from index -- Antoine R. Dumont (@ardumont) Fri, 13 Apr 2018 14:55:32 +0200 swh-scheduler (0.0.23-1~swh1) unstable-swh; urgency=medium * swh.scheduler.cli.archive: Delete only completely indexed tasks * Prior to this commit, it could happen that we removed tasks even * though we did not yet index associated task_run. * Related T986 -- Antoine R. Dumont (@ardumont) Tue, 10 Apr 2018 17:43:07 +0200 swh-scheduler (0.0.22-1~swh1) unstable-swh; urgency=medium * v0.0.22 * Update to a more recent python3-elasticsearch client -- Antoine R. Dumont (@ardumont) Mon, 09 Apr 2018 16:09:16 +0200 swh-scheduler (0.0.21-1~swh1) unstable-swh; urgency=medium * v0.0.21 * Adapt default configuration * Fix typo in configuration variable name -- Antoine R. Dumont (@ardumont) Fri, 30 Mar 2018 15:02:55 +0200 swh-scheduler (0.0.20-1~swh1) unstable-swh; urgency=medium * v0.0.20 * swh.scheduler.cli.archive: Open completed oneshot or disabled * recurring tasks archival endpoint * swh.core.serializer: Move to msgpack serialization format * swh.scheduler.cli: Unify pretty print output * sql/data: Add new task type for loading mercurial dump * swh.scheduler.cli: Add sample use case for the scheduling cli * swh.scheduler.cli: Open policy column to the scheduling cli * swh.scheduler.cli: Open the delimiter option as cli argument * Fix issue when updating task-type without any retry delay defined * swh-scheduler/data: Add new oneshot scheduling load-mercurial task * backend: fix default scheduling_db value for consistency * backend: doc: fix return value of create_tasks -- Antoine R. Dumont (@ardumont) Fri, 30 Mar 2018 11:44:18 +0200 swh-scheduler (0.0.19-1~swh1) unstable-swh; urgency=medium * v0.0.19 * swh.scheduler.utils: Open utility function to create oneshot task -- Antoine R. Dumont (@ardumont) Wed, 29 Nov 2017 12:51:15 +0100 swh-scheduler (0.0.18-1~swh1) unstable-swh; urgency=medium * Release swh.scheduler v0.0.18 * Celery 4 compatibility -- Nicolas Dandrimont Wed, 08 Nov 2017 17:06:22 +0100 swh-scheduler (0.0.17-1~swh1) unstable-swh; urgency=medium * Release swh.scheduler version 0.0.17 * Update packaging runes -- Nicolas Dandrimont Thu, 12 Oct 2017 18:49:02 +0200 swh-scheduler (0.0.16-1~swh1) unstable-swh; urgency=medium * Release swh-scheduler v0.0.16 * add some tests * implement one-shot tasks * implement retry on temporary failure -- Nicolas Dandrimont Mon, 07 Aug 2017 18:44:03 +0200 swh-scheduler (0.0.15-1~swh1) unstable-swh; urgency=medium * Release swh-scheduler v0.0.15 * Add some methods to get the length of task queues * worker: Show logs on stdout if loglevel = debug -- Nicolas Dandrimont Mon, 19 Jun 2017 19:44:56 +0200 swh-scheduler (0.0.14-1~swh1) unstable-swh; urgency=medium * Release swh.scheduler 0.0.14 * Make the return value of tasks available in the listener -- Nicolas Dandrimont Mon, 12 Jun 2017 17:50:32 +0200 swh-scheduler (0.0.13-1~swh1) unstable-swh; urgency=medium * Release swh.scheduler v0.0.13 * Use systemd for logging rather than PostgreSQL -- Nicolas Dandrimont Fri, 07 Apr 2017 11:57:50 +0200 swh-scheduler (0.0.12-1~swh1) unstable-swh; urgency=medium * Release swh.scheduler v0.0.12 * Only log to database if the configuration is present -- Nicolas Dandrimont Thu, 09 Mar 2017 11:12:45 +0100 swh-scheduler (0.0.11-1~swh1) unstable-swh; urgency=medium * Release swh.scheduler v0.0.11 * add utils.get_task -- Nicolas Dandrimont Tue, 14 Feb 2017 19:49:34 +0100 swh-scheduler (0.0.10-1~swh1) unstable-swh; urgency=medium * Release swh.scheduler v0.0.10 * Allow disabling tasks -- Nicolas Dandrimont Thu, 20 Oct 2016 17:20:17 +0200 swh-scheduler (0.0.9-1~swh1) unstable-swh; urgency=medium * Release swh.scheduler v0.0.9 * Revert management of one shot tasks * Add possibility of launching several worker instances -- Nicolas Dandrimont Fri, 02 Sep 2016 17:09:18 +0200 swh-scheduler (0.0.7-1~swh1) unstable-swh; urgency=medium * v0.0.7 * Add oneshot task -- Antoine R. Dumont (@ardumont) Fri, 01 Jul 2016 16:42:45 +0200 swh-scheduler (0.0.6-1~swh1) unstable-swh; urgency=medium * Release swh-scheduler v0.0.6 * More reliability and efficiency when scheduling a lot of tasks -- Nicolas Dandrimont Wed, 24 Feb 2016 18:46:57 +0100 swh-scheduler (0.0.5-1~swh1) unstable-swh; urgency=medium * Release swh.scheduler v0.0.5 * Use copy for task mass-scheduling -- Nicolas Dandrimont Wed, 24 Feb 2016 12:13:38 +0100 swh-scheduler (0.0.4-1~swh1) unstable-swh; urgency=medium * Release swh-scheduler v0.0.4 * general cleanup of the backend * use arrow instead of dateutil * add new cli program -- Nicolas Dandrimont Tue, 23 Feb 2016 17:46:04 +0100 swh-scheduler (0.0.3-1~swh1) unstable-swh; urgency=medium * Release swh.scheduler version 0.0.3 * Implement the timestamp arguments to the task_run functions * Make the celery event listener use a reliable queue -- Nicolas Dandrimont Mon, 22 Feb 2016 15:14:28 +0100 swh-scheduler (0.0.2-1~swh1) unstable-swh; urgency=medium * Release swh.scheduler v0.0.2 * Multiple schema changes * Initial releases for the celery job runner and the event listener -- Nicolas Dandrimont Fri, 19 Feb 2016 18:50:47 +0100 swh-scheduler (0.0.1-1~swh1) unstable-swh; urgency=medium * Initial release * Release swh.scheduler v0.0.1 * Move swh.core.scheduling and swh.core.worker to swh.scheduler -- Nicolas Dandrimont Mon, 15 Feb 2016 11:07:30 +0100 diff --git a/requirements-test.txt b/requirements-test.txt index b6a054d..fc6c024 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -1,9 +1,10 @@ pytest pytest-mock celery >= 4.3 hypothesis >= 3.11.0 swh.lister +swh.storage[testing] types-click types-flask types-pyyaml types-requests diff --git a/swh.scheduler.egg-info/PKG-INFO b/swh.scheduler.egg-info/PKG-INFO index 0fb2113..55ef8e3 100644 --- a/swh.scheduler.egg-info/PKG-INFO +++ b/swh.scheduler.egg-info/PKG-INFO @@ -1,37 +1,37 @@ Metadata-Version: 2.1 Name: swh.scheduler -Version: 0.18.0 +Version: 0.18.2 Summary: Software Heritage Scheduler Home-page: https://forge.softwareheritage.org/diffusion/DSCH/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Project-URL: Funding, https://www.softwareheritage.org/donate Project-URL: Source, https://forge.softwareheritage.org/source/swh-scheduler Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-scheduler/ Platform: UNKNOWN Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 5 - Production/Stable Requires-Python: >=3.7 Description-Content-Type: text/markdown Provides-Extra: testing Provides-Extra: journal Provides-Extra: simulator License-File: LICENSE License-File: LICENSE.Celery License-File: AUTHORS swh-scheduler ============= Job scheduler for the Software Heritage project. Task manager for asynchronous/delayed tasks, used for both recurrent (e.g., listing a forge, loading new stuff from a Git repository) and one-off activities (e.g., loading a specific version of a source package). diff --git a/swh.scheduler.egg-info/requires.txt b/swh.scheduler.egg-info/requires.txt index 061e94b..a772f88 100644 --- a/swh.scheduler.egg-info/requires.txt +++ b/swh.scheduler.egg-info/requires.txt @@ -1,36 +1,37 @@ attrs attrs-strict celery!=5.0.3,>=4.3 click elasticsearch>5.4 flask humanize pika>=1.1.0 psycopg2 pyyaml requests setuptools typing-extensions swh.core[db,http]>=0.14.0 swh.storage>=0.11.1 [journal] swh.journal [simulator] plotille simpy<4,>=3 [testing] pytest pytest-mock celery>=4.3 hypothesis>=3.11.0 swh.lister +swh.storage[testing] types-click types-flask types-pyyaml types-requests swh.journal plotille simpy<4,>=3 diff --git a/swh/scheduler/celery_backend/config.py b/swh/scheduler/celery_backend/config.py index 82f701f..b7c2315 100644 --- a/swh/scheduler/celery_backend/config.py +++ b/swh/scheduler/celery_backend/config.py @@ -1,363 +1,368 @@ # Copyright (C) 2015-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import functools import logging import os from time import monotonic as _monotonic import traceback from typing import Any, Dict, Optional import urllib.parse from celery import Celery from celery.signals import celeryd_after_setup, setup_logging, worker_init from celery.utils.log import ColorFormatter from celery.worker.control import Panel from kombu import Exchange, Queue import pkg_resources import requests from swh.core.config import load_named_config, merge_configs from swh.core.sentry import init_sentry from swh.scheduler import CONFIG as SWH_CONFIG try: from swh.core.logger import JournalHandler except ImportError: JournalHandler = None # type: ignore DEFAULT_CONFIG_NAME = "worker" CONFIG_NAME_ENVVAR = "SWH_WORKER_INSTANCE" CONFIG_NAME_TEMPLATE = "worker/%s" DEFAULT_CONFIG = { "task_broker": ("str", "amqp://guest@localhost//"), "task_modules": ("list[str]", []), "task_queues": ("list[str]", []), "task_soft_time_limit": ("int", 0), } logger = logging.getLogger(__name__) # Celery eats tracebacks in signal callbacks, this decorator catches # and prints them. # Also tries to notify Sentry if possible. def _print_errors(f): @functools.wraps(f) def newf(*args, **kwargs): try: return f(*args, **kwargs) except Exception as exc: traceback.print_exc() try: import sentry_sdk sentry_sdk.capture_exception(exc) except Exception: traceback.print_exc() return newf @setup_logging.connect @_print_errors def setup_log_handler( loglevel=None, logfile=None, format=None, colorize=None, log_console=None, log_journal=None, **kwargs, ): """Setup logging according to Software Heritage preferences. We use the command-line loglevel for tasks only, as we never really care about the debug messages from celery. """ if loglevel is None: loglevel = logging.DEBUG if isinstance(loglevel, str): loglevel = logging._nameToLevel[loglevel] formatter = logging.Formatter(format) root_logger = logging.getLogger("") root_logger.setLevel(logging.INFO) log_target = os.environ.get("SWH_LOG_TARGET", "console") if log_target == "console": log_console = True elif log_target == "journal": log_journal = True # this looks for log levels *higher* than DEBUG if loglevel <= logging.DEBUG and log_console is None: log_console = True if log_console: color_formatter = ColorFormatter(format) if colorize else formatter console = logging.StreamHandler() console.setLevel(logging.DEBUG) console.setFormatter(color_formatter) root_logger.addHandler(console) if log_journal: if not JournalHandler: root_logger.warning( "JournalHandler is not available, skipping. " "Please install swh-core[logging]." ) else: systemd_journal = JournalHandler() systemd_journal.setLevel(logging.DEBUG) systemd_journal.setFormatter(formatter) root_logger.addHandler(systemd_journal) logging.getLogger("celery").setLevel(logging.INFO) # Silence amqp heartbeat_tick messages logger = logging.getLogger("amqp") logger.addFilter(lambda record: not record.msg.startswith("heartbeat_tick")) logger.setLevel(loglevel) # Silence useless "Starting new HTTP connection" messages logging.getLogger("urllib3").setLevel(logging.WARNING) # Completely disable azure logspam azure_logger = logging.getLogger("azure.core.pipeline.policies.http_logging_policy") azure_logger.setLevel(logging.WARNING) logging.getLogger("swh").setLevel(loglevel) # get_task_logger makes the swh tasks loggers children of celery.task logging.getLogger("celery.task").setLevel(loglevel) return loglevel @celeryd_after_setup.connect @_print_errors def setup_queues_and_tasks(sender, instance, **kwargs): """Signal called on worker start. This automatically registers swh.scheduler.task.Task subclasses as available celery tasks. This also subscribes the worker to the "implicit" per-task queues defined for these task classes. """ logger.info("Setup Queues & Tasks for %s", sender) instance.app.conf["worker_name"] = sender @worker_init.connect @_print_errors def on_worker_init(*args, **kwargs): try: from sentry_sdk.integrations.celery import CeleryIntegration except ImportError: integrations = [] else: integrations = [CeleryIntegration()] sentry_dsn = None # will be set in `init_sentry` function init_sentry(sentry_dsn, integrations=integrations) @Panel.register def monotonic(state): """Get the current value for the monotonic clock""" return {"monotonic": _monotonic()} def route_for_task(name, args, kwargs, options, task=None, **kw): """Route tasks according to the task_queue attribute in the task class""" if name is not None and name.startswith("swh."): return {"queue": name} def get_queue_stats(app, queue_name): """Get the statistics regarding a queue on the broker. Arguments: queue_name: name of the queue to check Returns a dictionary raw from the RabbitMQ management API; or `None` if the current configuration does not use RabbitMQ. Interesting keys: - Consumers (number of consumers for the queue) - messages (number of messages in queue) - messages_unacknowledged (number of messages currently being processed) Documentation: https://www.rabbitmq.com/management.html#http-api """ conn_info = app.connection().info() if conn_info["transport"] == "memory": # We're running in a test environment, without RabbitMQ. return None url = "http://{hostname}:{port}/api/queues/{vhost}/{queue}".format( hostname=conn_info["hostname"], port=conn_info["port"] + 10000, vhost=urllib.parse.quote(conn_info["virtual_host"], safe=""), queue=urllib.parse.quote(queue_name, safe=""), ) credentials = (conn_info["userid"], conn_info["password"]) r = requests.get(url, auth=credentials) if r.status_code == 404: return {} if r.status_code != 200: raise ValueError( "Got error %s when reading queue stats: %s" % (r.status_code, r.json()) ) return r.json() def get_queue_length(app, queue_name): """Shortcut to get a queue's length""" stats = get_queue_stats(app, queue_name) if stats: return stats.get("messages") MAX_NUM_TASKS = 10000 def get_available_slots(app, queue_name: str, max_length: Optional[int]): """Get the number of tasks that can be sent to `queue_name`, when - the queue is limited to `max_length`.""" + the queue is limited to `max_length`. + + Returns: + The number of available slots in the queue. That result should be positive. + + """ if not max_length: return MAX_NUM_TASKS try: queue_length = get_queue_length(app, queue_name) # Clamp the return value to MAX_NUM_TASKS - max_val = min(max_length - queue_length, MAX_NUM_TASKS) + max_val = max(0, min(max_length - queue_length, MAX_NUM_TASKS)) except (ValueError, TypeError): # Unknown queue length, just schedule all the tasks max_val = MAX_NUM_TASKS return max_val def register_task_class(app, name, cls): """Register a class-based task under the given name""" if name in app.tasks: return task_instance = cls() task_instance.name = name app.register_task(task_instance) INSTANCE_NAME = os.environ.get(CONFIG_NAME_ENVVAR) CONFIG_NAME = os.environ.get("SWH_CONFIG_FILENAME") CONFIG = {} # type: Dict[str, Any] if CONFIG_NAME: # load the celery config from the main config file given as # SWH_CONFIG_FILENAME environment variable. # This is expected to have a [celery] section in which we have the # celery specific configuration. SWH_CONFIG.clear() SWH_CONFIG.update(load_named_config(CONFIG_NAME)) CONFIG = SWH_CONFIG.get("celery", {}) if not CONFIG: # otherwise, back to compat config loading mechanism if INSTANCE_NAME: CONFIG_NAME = CONFIG_NAME_TEMPLATE % INSTANCE_NAME else: CONFIG_NAME = DEFAULT_CONFIG_NAME # Load the Celery config CONFIG = load_named_config(CONFIG_NAME, DEFAULT_CONFIG) CONFIG.setdefault("task_modules", []) # load tasks modules declared as plugin entry points for entrypoint in pkg_resources.iter_entry_points("swh.workers"): worker_registrer_fn = entrypoint.load() # The registry function is expected to return a dict which the 'tasks' key # is a string (or a list of strings) with the name of the python module in # which celery tasks are defined. task_modules = worker_registrer_fn().get("task_modules", []) CONFIG["task_modules"].extend(task_modules) # Celery Queues CELERY_QUEUES = [Queue("celery", Exchange("celery"), routing_key="celery")] CELERY_DEFAULT_CONFIG = dict( # Timezone configuration: all in UTC enable_utc=True, timezone="UTC", # Imported modules imports=CONFIG.get("task_modules", []), # Time (in seconds, or a timedelta object) for when after stored task # tombstones will be deleted. None means to never expire results. result_expires=None, # A string identifying the default serialization method to use. Can # be json (default), pickle, yaml, msgpack, or any custom # serialization methods that have been registered with task_serializer="msgpack", # Result serialization format result_serializer="msgpack", # Acknowledge tasks as soon as they're received. We can do this as we have # external monitoring to decide if we need to retry tasks. task_acks_late=False, # A string identifying the default serialization method to use. # Can be pickle (default), json, yaml, msgpack or any custom serialization # methods that have been registered with kombu.serialization.registry accept_content=["msgpack", "json"], # If True the task will report its status as “started” # when the task is executed by a worker. task_track_started=True, # Default compression used for task messages. Can be gzip, bzip2 # (if available), or any custom compression schemes registered # in the Kombu compression registry. # result_compression='bzip2', # task_compression='bzip2', # Disable all rate limits, even if tasks has explicit rate limits set. # (Disabling rate limits altogether is recommended if you don’t have any # tasks using them.) worker_disable_rate_limits=True, # Task routing task_routes=route_for_task, # Allow pool restarts from remote worker_pool_restarts=True, # Do not prefetch tasks worker_prefetch_multiplier=1, # Send events worker_send_task_events=True, # Do not send useless task_sent events task_send_sent_event=False, ) def build_app(config=None): config = merge_configs( {k: v for (k, (_, v)) in DEFAULT_CONFIG.items()}, config or {} ) config["task_queues"] = CELERY_QUEUES + [ Queue(queue, Exchange(queue), routing_key=queue) for queue in config.get("task_queues", ()) ] logger.debug("Creating a Celery app with %s", config) # Instantiate the Celery app app = Celery(broker=config["task_broker"], task_cls="swh.scheduler.task:SWHTask") app.add_defaults(CELERY_DEFAULT_CONFIG) app.add_defaults(config) return app app = build_app(CONFIG) # XXX for BW compat Celery.get_queue_length = get_queue_length diff --git a/swh/scheduler/tests/test_cli.py b/swh/scheduler/tests/test_cli.py index 497780c..044a1d8 100644 --- a/swh/scheduler/tests/test_cli.py +++ b/swh/scheduler/tests/test_cli.py @@ -1,858 +1,856 @@ # Copyright (C) 2019-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import datetime from itertools import islice import logging import random import re import tempfile from unittest.mock import patch from click.testing import CliRunner import pytest from swh.core.api.classes import stream_results from swh.model.model import Origin from swh.scheduler.cli import cli from swh.scheduler.utils import create_task_dict, utcnow -from swh.storage import get_storage CLI_CONFIG = """ scheduler: cls: foo args: {} """ def invoke(scheduler, catch_exceptions, args): runner = CliRunner() with patch( "swh.scheduler.get_scheduler" ) as get_scheduler_mock, tempfile.NamedTemporaryFile( "a", suffix=".yml" ) as config_fd: config_fd.write(CLI_CONFIG) config_fd.seek(0) get_scheduler_mock.return_value = scheduler args = ["-C" + config_fd.name,] + args result = runner.invoke(cli, args, obj={"log_level": logging.WARNING}) if not catch_exceptions and result.exception: print(result.output) raise result.exception return result def test_schedule_tasks(swh_scheduler): csv_data = ( b'swh-test-ping;[["arg1", "arg2"]];{"key": "value"};' + utcnow().isoformat().encode() + b"\n" + b'swh-test-ping;[["arg3", "arg4"]];{"key": "value"};' + utcnow().isoformat().encode() + b"\n" ) with tempfile.NamedTemporaryFile(suffix=".csv") as csv_fd: csv_fd.write(csv_data) csv_fd.seek(0) result = invoke( swh_scheduler, False, ["task", "schedule", "-d", ";", csv_fd.name] ) expected = r""" Created 2 tasks Task 1 Next run: today \(.*\) Interval: 1 day, 0:00:00 Type: swh-test-ping Policy: recurring Args: \['arg1', 'arg2'\] Keyword args: key: 'value' Task 2 Next run: today \(.*\) Interval: 1 day, 0:00:00 Type: swh-test-ping Policy: recurring Args: \['arg3', 'arg4'\] Keyword args: key: 'value' """.lstrip() assert result.exit_code == 0, result.output assert re.fullmatch(expected, result.output, re.MULTILINE), result.output def test_schedule_tasks_columns(swh_scheduler): with tempfile.NamedTemporaryFile(suffix=".csv") as csv_fd: csv_fd.write(b'swh-test-ping;oneshot;["arg1", "arg2"];{"key": "value"}\n') csv_fd.seek(0) result = invoke( swh_scheduler, False, [ "task", "schedule", "-c", "type", "-c", "policy", "-c", "args", "-c", "kwargs", "-d", ";", csv_fd.name, ], ) expected = r""" Created 1 tasks Task 1 Next run: today \(.*\) Interval: 1 day, 0:00:00 Type: swh-test-ping Policy: oneshot Args: 'arg1' 'arg2' Keyword args: key: 'value' """.lstrip() assert result.exit_code == 0, result.output assert re.fullmatch(expected, result.output, re.MULTILINE), result.output def test_schedule_task(swh_scheduler): result = invoke( swh_scheduler, False, ["task", "add", "swh-test-ping", "arg1", "arg2", "key=value",], ) expected = r""" Created 1 tasks Task 1 Next run: today \(.*\) Interval: 1 day, 0:00:00 Type: swh-test-ping Policy: recurring Args: 'arg1' 'arg2' Keyword args: key: 'value' """.lstrip() assert result.exit_code == 0, result.output assert re.fullmatch(expected, result.output, re.MULTILINE), result.output def test_list_pending_tasks_none(swh_scheduler): result = invoke(swh_scheduler, False, ["task", "list-pending", "swh-test-ping",]) expected = r""" Found 0 swh-test-ping tasks """.lstrip() assert result.exit_code == 0, result.output assert re.fullmatch(expected, result.output, re.MULTILINE), result.output def test_list_pending_tasks(swh_scheduler): task1 = create_task_dict("swh-test-ping", "oneshot", key="value1") task2 = create_task_dict("swh-test-ping", "oneshot", key="value2") task2["next_run"] += datetime.timedelta(days=1) swh_scheduler.create_tasks([task1, task2]) result = invoke(swh_scheduler, False, ["task", "list-pending", "swh-test-ping",]) expected = r""" Found 1 swh-test-ping tasks Task 1 Next run: today \(.*\) Interval: 1 day, 0:00:00 Type: swh-test-ping Policy: oneshot Args: Keyword args: key: 'value1' """.lstrip() assert result.exit_code == 0, result.output assert re.fullmatch(expected, result.output, re.MULTILINE), result.output swh_scheduler.grab_ready_tasks("swh-test-ping") result = invoke(swh_scheduler, False, ["task", "list-pending", "swh-test-ping",]) expected = r""" Found 0 swh-test-ping tasks """.lstrip() assert result.exit_code == 0, result.output assert re.fullmatch(expected, result.output, re.MULTILINE), result.output def test_list_pending_tasks_filter(swh_scheduler): task = create_task_dict("swh-test-multiping", "oneshot", key="value") swh_scheduler.create_tasks([task]) result = invoke(swh_scheduler, False, ["task", "list-pending", "swh-test-ping",]) expected = r""" Found 0 swh-test-ping tasks """.lstrip() assert result.exit_code == 0, result.output assert re.fullmatch(expected, result.output, re.MULTILINE), result.output def test_list_pending_tasks_filter_2(swh_scheduler): swh_scheduler.create_tasks( [ create_task_dict("swh-test-multiping", "oneshot", key="value"), create_task_dict("swh-test-ping", "oneshot", key="value2"), ] ) result = invoke(swh_scheduler, False, ["task", "list-pending", "swh-test-ping",]) expected = r""" Found 1 swh-test-ping tasks Task 2 Next run: today \(.*\) Interval: 1 day, 0:00:00 Type: swh-test-ping Policy: oneshot Args: Keyword args: key: 'value2' """.lstrip() assert result.exit_code == 0, result.output assert re.fullmatch(expected, result.output, re.MULTILINE), result.output # Fails because "task list-pending --limit 3" only returns 2 tasks, because # of how compute_nb_tasks_from works. @pytest.mark.xfail def test_list_pending_tasks_limit(swh_scheduler): swh_scheduler.create_tasks( [ create_task_dict("swh-test-ping", "oneshot", key="value%d" % i) for i in range(10) ] ) result = invoke( swh_scheduler, False, ["task", "list-pending", "swh-test-ping", "--limit", "3",] ) expected = r""" Found 2 swh-test-ping tasks Task 1 Next run: today \(.*\) Interval: 1 day, 0:00:00 Type: swh-test-ping Policy: oneshot Args: Keyword args: key: 'value0' Task 2 Next run: today \(.*\) Interval: 1 day, 0:00:00 Type: swh-test-ping Policy: oneshot Args: Keyword args: key: 'value1' Task 3 Next run: today \(.*\) Interval: 1 day, 0:00:00 Type: swh-test-ping Policy: oneshot Args: Keyword args: key: 'value2' """.lstrip() assert result.exit_code == 0, result.output assert re.fullmatch(expected, result.output, re.MULTILINE), result.output def test_list_pending_tasks_before(swh_scheduler): task1 = create_task_dict("swh-test-ping", "oneshot", key="value") task2 = create_task_dict("swh-test-ping", "oneshot", key="value2") task1["next_run"] += datetime.timedelta(days=3) task2["next_run"] += datetime.timedelta(days=1) swh_scheduler.create_tasks([task1, task2]) result = invoke( swh_scheduler, False, [ "task", "list-pending", "swh-test-ping", "--before", (datetime.date.today() + datetime.timedelta(days=2)).isoformat(), ], ) expected = r""" Found 1 swh-test-ping tasks Task 2 Next run: tomorrow \(.*\) Interval: 1 day, 0:00:00 Type: swh-test-ping Policy: oneshot Args: Keyword args: key: 'value2' """.lstrip() assert result.exit_code == 0, result.output assert re.fullmatch(expected, result.output, re.MULTILINE), result.output def test_list_tasks(swh_scheduler): task1 = create_task_dict("swh-test-ping", "oneshot", key="value1") task2 = create_task_dict("swh-test-ping", "oneshot", key="value2") task1["next_run"] += datetime.timedelta(days=3, hours=2) swh_scheduler.create_tasks([task1, task2]) swh_scheduler.grab_ready_tasks("swh-test-ping") result = invoke(swh_scheduler, False, ["task", "list",]) expected = r""" Found 2 tasks Task 1 Next run: .+ \(.*\) Interval: 1 day, 0:00:00 Type: swh-test-ping Policy: oneshot Status: next_run_not_scheduled Priority:\x20 Args: Keyword args: key: 'value1' Task 2 Next run: today \(.*\) Interval: 1 day, 0:00:00 Type: swh-test-ping Policy: oneshot Status: next_run_scheduled Priority:\x20 Args: Keyword args: key: 'value2' """.lstrip() assert result.exit_code == 0, result.output assert re.fullmatch(expected, result.output, re.MULTILINE), result.output def test_list_tasks_id(swh_scheduler): task1 = create_task_dict("swh-test-ping", "oneshot", key="value1") task2 = create_task_dict("swh-test-ping", "oneshot", key="value2") task3 = create_task_dict("swh-test-ping", "oneshot", key="value3") swh_scheduler.create_tasks([task1, task2, task3]) result = invoke(swh_scheduler, False, ["task", "list", "--task-id", "2",]) expected = r""" Found 1 tasks Task 2 Next run: today \(.*\) Interval: 1 day, 0:00:00 Type: swh-test-ping Policy: oneshot Status: next_run_not_scheduled Priority:\x20 Args: Keyword args: key: 'value2' """.lstrip() assert result.exit_code == 0, result.output assert re.fullmatch(expected, result.output, re.MULTILINE), result.output def test_list_tasks_id_2(swh_scheduler): task1 = create_task_dict("swh-test-ping", "oneshot", key="value1") task2 = create_task_dict("swh-test-ping", "oneshot", key="value2") task3 = create_task_dict("swh-test-ping", "oneshot", key="value3") swh_scheduler.create_tasks([task1, task2, task3]) result = invoke( swh_scheduler, False, ["task", "list", "--task-id", "2", "--task-id", "3"] ) expected = r""" Found 2 tasks Task 2 Next run: today \(.*\) Interval: 1 day, 0:00:00 Type: swh-test-ping Policy: oneshot Status: next_run_not_scheduled Priority:\x20 Args: Keyword args: key: 'value2' Task 3 Next run: today \(.*\) Interval: 1 day, 0:00:00 Type: swh-test-ping Policy: oneshot Status: next_run_not_scheduled Priority:\x20 Args: Keyword args: key: 'value3' """.lstrip() assert result.exit_code == 0, result.output assert re.fullmatch(expected, result.output, re.MULTILINE), result.output def test_list_tasks_type(swh_scheduler): task1 = create_task_dict("swh-test-ping", "oneshot", key="value1") task2 = create_task_dict("swh-test-multiping", "oneshot", key="value2") task3 = create_task_dict("swh-test-ping", "oneshot", key="value3") swh_scheduler.create_tasks([task1, task2, task3]) result = invoke( swh_scheduler, False, ["task", "list", "--task-type", "swh-test-ping"] ) expected = r""" Found 2 tasks Task 1 Next run: today \(.*\) Interval: 1 day, 0:00:00 Type: swh-test-ping Policy: oneshot Status: next_run_not_scheduled Priority:\x20 Args: Keyword args: key: 'value1' Task 3 Next run: today \(.*\) Interval: 1 day, 0:00:00 Type: swh-test-ping Policy: oneshot Status: next_run_not_scheduled Priority:\x20 Args: Keyword args: key: 'value3' """.lstrip() assert result.exit_code == 0, result.output assert re.fullmatch(expected, result.output, re.MULTILINE), result.output def test_list_tasks_limit(swh_scheduler): task1 = create_task_dict("swh-test-ping", "oneshot", key="value1") task2 = create_task_dict("swh-test-ping", "oneshot", key="value2") task3 = create_task_dict("swh-test-ping", "oneshot", key="value3") swh_scheduler.create_tasks([task1, task2, task3]) result = invoke(swh_scheduler, False, ["task", "list", "--limit", "2",]) expected = r""" Found 2 tasks Task 1 Next run: today \(.*\) Interval: 1 day, 0:00:00 Type: swh-test-ping Policy: oneshot Status: next_run_not_scheduled Priority:\x20 Args: Keyword args: key: 'value1' Task 2 Next run: today \(.*\) Interval: 1 day, 0:00:00 Type: swh-test-ping Policy: oneshot Status: next_run_not_scheduled Priority:\x20 Args: Keyword args: key: 'value2' """.lstrip() assert result.exit_code == 0, result.output assert re.fullmatch(expected, result.output, re.MULTILINE), result.output def test_list_tasks_before(swh_scheduler): task1 = create_task_dict("swh-test-ping", "oneshot", key="value1") task2 = create_task_dict("swh-test-ping", "oneshot", key="value2") task1["next_run"] += datetime.timedelta(days=3, hours=2) swh_scheduler.create_tasks([task1, task2]) swh_scheduler.grab_ready_tasks("swh-test-ping") result = invoke( swh_scheduler, False, [ "task", "list", "--before", (datetime.date.today() + datetime.timedelta(days=2)).isoformat(), ], ) expected = r""" Found 1 tasks Task 2 Next run: today \(.*\) Interval: 1 day, 0:00:00 Type: swh-test-ping Policy: oneshot Status: next_run_scheduled Priority:\x20 Args: Keyword args: key: 'value2' """.lstrip() assert result.exit_code == 0, result.output assert re.fullmatch(expected, result.output, re.MULTILINE), result.output def test_list_tasks_after(swh_scheduler): task1 = create_task_dict("swh-test-ping", "oneshot", key="value1") task2 = create_task_dict("swh-test-ping", "oneshot", key="value2") task1["next_run"] += datetime.timedelta(days=3, hours=2) swh_scheduler.create_tasks([task1, task2]) swh_scheduler.grab_ready_tasks("swh-test-ping") result = invoke( swh_scheduler, False, [ "task", "list", "--after", (datetime.date.today() + datetime.timedelta(days=2)).isoformat(), ], ) expected = r""" Found 1 tasks Task 1 Next run: .+ \(.*\) Interval: 1 day, 0:00:00 Type: swh-test-ping Policy: oneshot Status: next_run_not_scheduled Priority:\x20 Args: Keyword args: key: 'value1' """.lstrip() assert result.exit_code == 0, result.output assert re.fullmatch(expected, result.output, re.MULTILINE), result.output def _fill_storage_with_origins(storage, nb_origins): origins = [Origin(url=f"http://example.com/{i}") for i in range(nb_origins)] storage.origin_add(origins) return origins @pytest.fixture -def storage(): +def storage(swh_storage): """An instance of in-memory storage that gets injected into the CLI functions.""" - storage = get_storage(cls="memory") with patch("swh.storage.get_storage") as get_storage_mock: - get_storage_mock.return_value = storage - yield storage + get_storage_mock.return_value = swh_storage + yield swh_storage @patch("swh.scheduler.cli.utils.TASK_BATCH_SIZE", 3) def test_task_schedule_origins_dry_run(swh_scheduler, storage): """Tests the scheduling when origin_batch_size*task_batch_size is a divisor of nb_origins.""" _fill_storage_with_origins(storage, 90) result = invoke( swh_scheduler, False, ["task", "schedule_origins", "--dry-run", "swh-test-ping",], ) # Check the output expected = r""" Scheduled 3 tasks \(30 origins\). Scheduled 6 tasks \(60 origins\). Scheduled 9 tasks \(90 origins\). Done. """.lstrip() assert result.exit_code == 0, result.output assert re.fullmatch(expected, result.output, re.MULTILINE), repr(result.output) # Check scheduled tasks tasks = swh_scheduler.search_tasks() assert len(tasks) == 0 def _assert_origin_tasks_contraints(tasks, max_tasks, max_task_size, expected_origins): # check there are not too many tasks assert len(tasks) <= max_tasks # check tasks are not too large assert all(len(task["arguments"]["args"][0]) <= max_task_size for task in tasks) # check the tasks are exhaustive assert sum([len(task["arguments"]["args"][0]) for task in tasks]) == len( expected_origins ) assert set.union(*(set(task["arguments"]["args"][0]) for task in tasks)) == { origin.url for origin in expected_origins } @patch("swh.scheduler.cli.utils.TASK_BATCH_SIZE", 3) def test_task_schedule_origins(swh_scheduler, storage): """Tests the scheduling when neither origin_batch_size or task_batch_size is a divisor of nb_origins.""" origins = _fill_storage_with_origins(storage, 70) result = invoke( swh_scheduler, False, ["task", "schedule_origins", "swh-test-ping", "--batch-size", "20",], ) # Check the output expected = r""" Scheduled 3 tasks \(60 origins\). Scheduled 4 tasks \(70 origins\). Done. """.lstrip() assert result.exit_code == 0, result.output assert re.fullmatch(expected, result.output, re.MULTILINE), repr(result.output) # Check tasks tasks = swh_scheduler.search_tasks() _assert_origin_tasks_contraints(tasks, 4, 20, origins) assert all(task["arguments"]["kwargs"] == {} for task in tasks) def test_task_schedule_origins_kwargs(swh_scheduler, storage): """Tests support of extra keyword-arguments.""" origins = _fill_storage_with_origins(storage, 30) result = invoke( swh_scheduler, False, [ "task", "schedule_origins", "swh-test-ping", "--batch-size", "20", 'key1="value1"', 'key2="value2"', ], ) # Check the output expected = r""" Scheduled 2 tasks \(30 origins\). Done. """.lstrip() assert result.exit_code == 0, result.output assert re.fullmatch(expected, result.output, re.MULTILINE), repr(result.output) # Check tasks tasks = swh_scheduler.search_tasks() _assert_origin_tasks_contraints(tasks, 2, 20, origins) assert all( task["arguments"]["kwargs"] == {"key1": "value1", "key2": "value2"} for task in tasks ) def test_task_schedule_origins_with_limit(swh_scheduler, storage): """Tests support of extra keyword-arguments.""" _fill_storage_with_origins(storage, 50) limit = 20 expected_origins = list(islice(stream_results(storage.origin_list), limit)) nb_origins = len(expected_origins) assert nb_origins == limit max_task_size = 5 nb_tasks, remainder = divmod(nb_origins, max_task_size) assert remainder == 0 # made the numbers go round result = invoke( swh_scheduler, False, [ "task", "schedule_origins", "swh-test-ping", "--batch-size", max_task_size, "--limit", limit, ], ) # Check the output expected = rf""" Scheduled {nb_tasks} tasks \({nb_origins} origins\). Done. """.lstrip() assert result.exit_code == 0, result.output assert re.fullmatch(expected, result.output, re.MULTILINE), repr(result.output) tasks = swh_scheduler.search_tasks() _assert_origin_tasks_contraints(tasks, max_task_size, nb_origins, expected_origins) def test_task_schedule_origins_with_page_token(swh_scheduler, storage): """Tests support of extra keyword-arguments.""" nb_total_origins = 50 origins = _fill_storage_with_origins(storage, nb_total_origins) # prepare page_token and origins result expectancy page_result = storage.origin_list(limit=10) assert len(page_result.results) == 10 page_token = page_result.next_page_token assert page_token is not None # remove the first 10 origins listed as we won't see those in tasks expected_origins = [o for o in origins if o not in page_result.results] nb_origins = len(expected_origins) assert nb_origins == nb_total_origins - len(page_result.results) max_task_size = 10 nb_tasks, remainder = divmod(nb_origins, max_task_size) assert remainder == 0 result = invoke( swh_scheduler, False, [ "task", "schedule_origins", "swh-test-ping", "--batch-size", max_task_size, "--page-token", page_token, ], ) # Check the output expected = rf""" Scheduled {nb_tasks} tasks \({nb_origins} origins\). Done. """.lstrip() assert result.exit_code == 0, result.output assert re.fullmatch(expected, result.output, re.MULTILINE), repr(result.output) # Check tasks tasks = swh_scheduler.search_tasks() _assert_origin_tasks_contraints(tasks, max_task_size, nb_origins, expected_origins) def test_cli_task_runner_unknown_task_types(swh_scheduler, storage): """When passing at least one unknown task type, the runner should fail.""" task_types = swh_scheduler.get_task_types() task_type_names = [t["type"] for t in task_types] known_task_type = random.choice(task_type_names) unknown_task_type = "unknown-task-type" assert unknown_task_type not in task_type_names with pytest.raises(ValueError, match="Unknown"): invoke( swh_scheduler, False, [ "start-runner", "--task-type", known_task_type, "--task-type", unknown_task_type, ], ) @pytest.mark.parametrize("flag_priority", ["--with-priority", "--without-priority"]) def test_cli_task_runner_with_known_tasks( swh_scheduler, storage, caplog, flag_priority ): """Trigger runner with known tasks runs smoothly.""" task_types = swh_scheduler.get_task_types() task_type_names = [t["type"] for t in task_types] task_type_name = random.choice(task_type_names) task_type_name2 = random.choice(task_type_names) # The runner will just iterate over the following known tasks and do noop. We are # just checking the runner does not explode here. result = invoke( swh_scheduler, False, [ "start-runner", flag_priority, "--task-type", task_type_name, "--task-type", task_type_name2, ], ) assert result.exit_code == 0, result.output def test_cli_task_runner_no_task(swh_scheduler, storage): """Trigger runner with no parameter should run as before.""" # The runner will just iterate over the existing tasks from the scheduler and do # noop. We are just checking the runner does not explode here. result = invoke(swh_scheduler, False, ["start-runner",],) assert result.exit_code == 0, result.output diff --git a/swh/scheduler/tests/test_config.py b/swh/scheduler/tests/test_config.py index f0a705c..313a19b 100644 --- a/swh/scheduler/tests/test_config.py +++ b/swh/scheduler/tests/test_config.py @@ -1,55 +1,65 @@ # Copyright (C) 2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import pytest from swh.scheduler.celery_backend.config import ( MAX_NUM_TASKS, app, get_available_slots, route_for_task, ) @pytest.mark.parametrize("name", ["swh.something", "swh.anything"]) def test_route_for_task_routing(name): assert route_for_task(name, [], {}, {}) == {"queue": name} @pytest.mark.parametrize("name", [None, "foobar"]) def test_route_for_task_no_routing(name): assert route_for_task(name, [], {}, {}) is None def test_get_available_slots_no_max_length(): actual_num = get_available_slots(app, "anything", None) assert actual_num == MAX_NUM_TASKS def test_get_available_slots_issue_when_reading_queue(mocker): mock = mocker.patch("swh.scheduler.celery_backend.config.get_queue_length") mock.side_effect = ValueError actual_num = get_available_slots(app, "anything", max_length=10) assert actual_num == MAX_NUM_TASKS assert mock.called def test_get_available_slots_no_queue_length(mocker): mock = mocker.patch("swh.scheduler.celery_backend.config.get_queue_length") mock.return_value = None actual_num = get_available_slots(app, "anything", max_length=100) assert actual_num == MAX_NUM_TASKS assert mock.called +def test_get_available_slots_no_more_slots(mocker): + mock = mocker.patch("swh.scheduler.celery_backend.config.get_queue_length") + max_length = 100 + queue_length = 9000 + mock.return_value = queue_length + actual_num = get_available_slots(app, "anything", max_length) + assert actual_num == 0 + assert mock.called + + def test_get_available_slots(mocker): mock = mocker.patch("swh.scheduler.celery_backend.config.get_queue_length") max_length = 100 queue_length = 90 mock.return_value = queue_length actual_num = get_available_slots(app, "anything", max_length) assert actual_num == max_length - queue_length assert mock.called