Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7123250
D4939.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
7 KB
Subscribers
None
D4939.diff
View Options
diff --git a/swh/lister/gitlab/tasks.py b/swh/lister/gitlab/tasks.py
--- a/swh/lister/gitlab/tasks.py
+++ b/swh/lister/gitlab/tasks.py
@@ -1,50 +1,24 @@
-# Copyright (C) 2018 the Software Heritage developers
+# Copyright (C) 2018-2021 the Software Heritage developers
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-import random
+from celery import shared_task
-from celery import group, shared_task
-
-from .. import utils
-from .lister import GitLabLister
-
-NBPAGES = 10
+from swh.lister.gitlab.lister import GitLabLister
@shared_task(name=__name__ + ".IncrementalGitLabLister")
def list_gitlab_incremental(**lister_args):
"""Incremental update of a GitLab instance"""
- lister_args["sort"] = "desc"
- lister = GitLabLister(**lister_args)
- total_pages = lister.get_pages_information()[1]
- # stopping as soon as existing origins for that instance are detected
- return lister.run(min_bound=1, max_bound=total_pages, check_existence=True)
-
-
-@shared_task(name=__name__ + ".RangeGitLabLister")
-def _range_gitlab_lister(start, end, **lister_args):
- lister = GitLabLister(**lister_args)
- return lister.run(min_bound=start, max_bound=end)
+ lister = GitLabLister.from_configfile(incremental=True, **lister_args)
+ return lister.run().dict()
-@shared_task(name=__name__ + ".FullGitLabRelister", bind=True)
-def list_gitlab_full(self, **lister_args):
+@shared_task(name=__name__ + ".FullGitLabRelister")
+def list_gitlab_full(**lister_args):
"""Full update of a GitLab instance"""
- lister = GitLabLister(**lister_args)
- _, total_pages, _ = lister.get_pages_information()
- ranges = list(utils.split_range(total_pages, NBPAGES))
- random.shuffle(ranges)
- promise = group(
- _range_gitlab_lister.s(minv, maxv, **lister_args) for minv, maxv in ranges
- )()
- self.log.debug("%s OK (spawned %s subtasks)" % (self.name, len(ranges)))
- try:
- promise.save()
- except (NotImplementedError, AttributeError):
- self.log.info("Unable to call save_group with current result backend.")
- # FIXME: what to do in terms of return here?
- return promise.id
+ lister = GitLabLister.from_configfile(incremental=False, **lister_args)
+ return lister.run().dict()
@shared_task(name=__name__ + ".ping")
diff --git a/swh/lister/gitlab/tests/test_tasks.py b/swh/lister/gitlab/tests/test_tasks.py
--- a/swh/lister/gitlab/tests/test_tasks.py
+++ b/swh/lister/gitlab/tests/test_tasks.py
@@ -3,13 +3,9 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-from time import sleep
-from unittest.mock import call, patch
+import pytest
-from celery.result import GroupResult
-
-from swh.lister.gitlab.tasks import NBPAGES
-from swh.lister.utils import split_range
+from swh.lister.pattern import ListerStats
def test_ping(swh_scheduler_celery_app, swh_scheduler_celery_worker):
@@ -20,128 +16,32 @@
assert res.result == "OK"
-@patch("swh.lister.gitlab.tasks.GitLabLister")
-def test_incremental(lister, swh_scheduler_celery_app, swh_scheduler_celery_worker):
- # setup the mocked GitlabLister
- lister.return_value = lister
- lister.run.return_value = None
- lister.get_pages_information.return_value = (None, 10, None)
-
- res = swh_scheduler_celery_app.send_task(
- "swh.lister.gitlab.tasks.IncrementalGitLabLister"
- )
- assert res
- res.wait()
- assert res.successful()
-
- lister.assert_called_once_with(sort="desc")
- lister.db_last_index.assert_not_called()
- lister.get_pages_information.assert_called_once_with()
- lister.run.assert_called_once_with(min_bound=1, max_bound=10, check_existence=True)
-
-
-@patch("swh.lister.gitlab.tasks.GitLabLister")
-def test_range(lister, swh_scheduler_celery_app, swh_scheduler_celery_worker):
- # setup the mocked GitlabLister
- lister.return_value = lister
- lister.run.return_value = None
-
- res = swh_scheduler_celery_app.send_task(
- "swh.lister.gitlab.tasks.RangeGitLabLister", kwargs=dict(start=12, end=42)
- )
- assert res
- res.wait()
- assert res.successful()
-
- lister.assert_called_once_with()
- lister.db_last_index.assert_not_called()
- lister.run.assert_called_once_with(min_bound=12, max_bound=42)
-
-
-@patch("swh.lister.gitlab.tasks.GitLabLister")
-def test_relister(lister, swh_scheduler_celery_app, swh_scheduler_celery_worker):
- total_pages = 85
- # setup the mocked GitlabLister
- lister.return_value = lister
- lister.run.return_value = None
- lister.get_pages_information.return_value = (None, total_pages, None)
-
- res = swh_scheduler_celery_app.send_task(
- "swh.lister.gitlab.tasks.FullGitLabRelister"
- )
- assert res
-
- res.wait()
- assert res.successful()
-
- # retrieve the GroupResult for this task and wait for all the subtasks
- # to complete
- promise_id = res.result
- assert promise_id
- promise = GroupResult.restore(promise_id, app=swh_scheduler_celery_app)
- for i in range(5):
- if promise.ready():
- break
- sleep(1)
-
- lister.assert_called_with()
-
- # one by the FullGitlabRelister task
- # + 9 for the RangeGitlabLister subtasks
- assert lister.call_count == 10
-
- lister.db_last_index.assert_not_called()
- lister.db_partition_indices.assert_not_called()
- lister.get_pages_information.assert_called_once_with()
-
- # lister.run should have been called once per partition interval
- for min_bound, max_bound in split_range(total_pages, NBPAGES):
- assert (
- call(min_bound=min_bound, max_bound=max_bound) in lister.run.call_args_list
- )
-
-
-@patch("swh.lister.gitlab.tasks.GitLabLister")
-def test_relister_instance(
- lister, swh_scheduler_celery_app, swh_scheduler_celery_worker
+@pytest.mark.parametrize(
+ "task_name,incremental",
+ [("IncrementalGitLabLister", True), ("FullGitLabRelister", False)],
+)
+def test_task_lister_gitlab(
+ task_name,
+ incremental,
+ swh_scheduler_celery_app,
+ swh_scheduler_celery_worker,
+ mocker,
):
- total_pages = 85
- # setup the mocked GitlabLister
- lister.return_value = lister
- lister.run.return_value = None
- lister.get_pages_information.return_value = (None, total_pages, None)
+ stats = ListerStats(pages=10, origins=200)
+ mock_lister = mocker.patch("swh.lister.gitlab.tasks.GitLabLister")
+ mock_lister.from_configfile.return_value = mock_lister
+ mock_lister.run.return_value = ListerStats(pages=10, origins=200)
+ kwargs = dict(url="https://gitweb.torproject.org/")
res = swh_scheduler_celery_app.send_task(
- "swh.lister.gitlab.tasks.FullGitLabRelister",
- kwargs=dict(url="https://0xacab.org/api/v4"),
+ f"swh.lister.gitlab.tasks.{task_name}", kwargs=kwargs,
)
assert res
-
res.wait()
assert res.successful()
- # retrieve the GroupResult for this task and wait for all the subtasks
- # to complete
- promise_id = res.result
- assert promise_id
- promise = GroupResult.restore(promise_id, app=swh_scheduler_celery_app)
- for i in range(5):
- if promise.ready():
- break
- sleep(1)
-
- lister.assert_called_with(url="https://0xacab.org/api/v4")
-
- # one by the FullGitlabRelister task
- # + 9 for the RangeGitlabLister subtasks
- assert lister.call_count == 10
-
- lister.db_last_index.assert_not_called()
- lister.db_partition_indices.assert_not_called()
- lister.get_pages_information.assert_called_once_with()
-
- # lister.run should have been called once per partition interval
- for min_bound, max_bound in split_range(total_pages, NBPAGES):
- assert (
- call(min_bound=min_bound, max_bound=max_bound) in lister.run.call_args_list
- )
+ mock_lister.from_configfile.assert_called_once_with(
+ incremental=incremental, **kwargs
+ )
+ mock_lister.run.assert_called_once_with()
+ assert res.result == stats.dict()
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Wed, Dec 18, 5:08 AM (1 d, 1 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3220368
Attached To
D4939: gitlab: Adapt celery task implementations to the new lister api
Event Timeline
Log In to Comment