diff --git a/swh/lister/gitea/tasks.py b/swh/lister/gitea/tasks.py index c6af374..098f822 100644 --- a/swh/lister/gitea/tasks.py +++ b/swh/lister/gitea/tasks.py @@ -1,53 +1,53 @@ # Copyright (C) 2020 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import random from celery import group, shared_task from .. import utils from .lister import GiteaLister NBPAGES = 10 @shared_task(name=__name__ + ".IncrementalGiteaLister") def list_gitea_incremental(**lister_args): """Incremental update of a Gitea instance""" - lister_args["sort"] = "desc" + lister_args["order"] = "desc" lister = GiteaLister(**lister_args) total_pages = lister.get_pages_information()[1] # stopping as soon as existing origins for that instance are detected return lister.run(min_bound=1, max_bound=total_pages, check_existence=True) @shared_task(name=__name__ + ".RangeGiteaLister") def _range_gitea_lister(start, end, **lister_args): lister = GiteaLister(**lister_args) return lister.run(min_bound=start, max_bound=end) @shared_task(name=__name__ + ".FullGiteaRelister", bind=True) def list_gitea_full(self, **lister_args): """Full update of a Gitea instance""" lister = GiteaLister(**lister_args) _, total_pages, _ = lister.get_pages_information() ranges = list(utils.split_range(total_pages, NBPAGES)) random.shuffle(ranges) promise = group( _range_gitea_lister.s(minv, maxv, **lister_args) for minv, maxv in ranges )() self.log.debug("%s OK (spawned %s subtasks)" % (self.name, len(ranges))) try: promise.save() except (NotImplementedError, AttributeError): self.log.info("Unable to call save_group with current result backend.") # FIXME: what to do in terms of return here? return promise.id @shared_task(name=__name__ + ".ping") def _ping(): return "OK" diff --git a/swh/lister/gitea/tests/test_tasks.py b/swh/lister/gitea/tests/test_tasks.py index 4c223b6..5c070b5 100644 --- a/swh/lister/gitea/tests/test_tasks.py +++ b/swh/lister/gitea/tests/test_tasks.py @@ -1,150 +1,150 @@ # Copyright (C) 2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from time import sleep from celery.result import GroupResult from unittest.mock import patch def test_ping(swh_scheduler_celery_app, swh_scheduler_celery_worker): res = swh_scheduler_celery_app.send_task("swh.lister.gitea.tasks.ping") assert res res.wait() assert res.successful() assert res.result == "OK" @patch("swh.lister.gitea.tasks.GiteaLister") def test_incremental(lister, swh_scheduler_celery_app, swh_scheduler_celery_worker): # setup the mocked GiteaLister lister.return_value = lister lister.run.return_value = None lister.get_pages_information.return_value = (None, 10, None) res = swh_scheduler_celery_app.send_task( "swh.lister.gitea.tasks.IncrementalGiteaLister" ) assert res res.wait() assert res.successful() - lister.assert_called_once_with(sort="desc") + lister.assert_called_once_with(order="desc") lister.db_last_index.assert_not_called() lister.get_pages_information.assert_called_once_with() lister.run.assert_called_once_with(min_bound=1, max_bound=10, check_existence=True) @patch("swh.lister.gitea.tasks.GiteaLister") def test_range(lister, swh_scheduler_celery_app, swh_scheduler_celery_worker): # setup the mocked GiteaLister lister.return_value = lister lister.run.return_value = None res = swh_scheduler_celery_app.send_task( "swh.lister.gitea.tasks.RangeGiteaLister", kwargs=dict(start=12, end=42) ) assert res res.wait() assert res.successful() lister.assert_called_once_with() lister.db_last_index.assert_not_called() lister.run.assert_called_once_with(min_bound=12, max_bound=42) @patch("swh.lister.gitea.tasks.GiteaLister") def test_relister(lister, swh_scheduler_celery_app, swh_scheduler_celery_worker): # setup the mocked GiteaLister lister.return_value = lister lister.run.return_value = None lister.get_pages_information.return_value = (None, 85, None) lister.db_partition_indices.return_value = [ (i, i + 9) for i in range(0, 80, 10) ] + [(80, 85)] res = swh_scheduler_celery_app.send_task("swh.lister.gitea.tasks.FullGiteaRelister") assert res res.wait() assert res.successful() # retrieve the GroupResult for this task and wait for all the subtasks # to complete promise_id = res.result assert promise_id promise = GroupResult.restore(promise_id, app=swh_scheduler_celery_app) for i in range(5): if promise.ready(): break sleep(1) lister.assert_called_with() # one by the FullGiteaRelister task # + 9 for the RangeGiteaLister subtasks assert lister.call_count == 10 lister.db_last_index.assert_not_called() lister.db_partition_indices.assert_not_called() lister.get_pages_information.assert_called_once_with() # lister.run should have been called once per partition interval for i in range(8): # XXX inconsistent behavior: max_bound is EXCLUDED here assert ( dict(min_bound=10 * i, max_bound=10 * i + 10), ) in lister.run.call_args_list assert (dict(min_bound=80, max_bound=85),) in lister.run.call_args_list @patch("swh.lister.gitea.tasks.GiteaLister") def test_relister_instance( lister, swh_scheduler_celery_app, swh_scheduler_celery_worker ): # setup the mocked GiteaLister lister.return_value = lister lister.run.return_value = None lister.get_pages_information.return_value = (None, 85, None) lister.db_partition_indices.return_value = [ (i, i + 9) for i in range(0, 80, 10) ] + [(80, 85)] res = swh_scheduler_celery_app.send_task( "swh.lister.gitea.tasks.FullGiteaRelister", kwargs=dict(url="https://0xacab.org/api/v4"), ) assert res res.wait() assert res.successful() # retrieve the GroupResult for this task and wait for all the subtasks # to complete promise_id = res.result assert promise_id promise = GroupResult.restore(promise_id, app=swh_scheduler_celery_app) for i in range(5): if promise.ready(): break sleep(1) lister.assert_called_with(url="https://0xacab.org/api/v4") # one by the FullGiteaRelister task # + 9 for the RangeGiteaLister subtasks assert lister.call_count == 10 lister.db_last_index.assert_not_called() lister.db_partition_indices.assert_not_called() lister.get_pages_information.assert_called_once_with() # lister.run should have been called once per partition interval for i in range(8): # XXX inconsistent behavior: max_bound is EXCLUDED here assert ( dict(min_bound=10 * i, max_bound=10 * i + 10), ) in lister.run.call_args_list assert (dict(min_bound=80, max_bound=85),) in lister.run.call_args_list