diff --git a/swh/lister/gitlab/lister.py b/swh/lister/gitlab/lister.py --- a/swh/lister/gitlab/lister.py +++ b/swh/lister/gitlab/lister.py @@ -4,6 +4,7 @@ import random import time +from urllib3.util import parse_url from ..core.page_by_page_lister import PageByPageHttpLister from .models import GitLabModel @@ -15,10 +16,12 @@ MODEL = GitLabModel LISTER_NAME = 'gitlab' - def __init__(self, api_baseurl=None, instance=None, + def __init__(self, api_baseurl, instance=None, override_config=None, sort='asc', per_page=20): super().__init__(api_baseurl=api_baseurl, override_config=override_config) + if instance is None: + instance = parse_url(api_baseurl).host self.instance = instance self.PATH_TEMPLATE = '%s&sort=%s' % (self.PATH_TEMPLATE, sort) if per_page != 20: diff --git a/swh/lister/gitlab/tasks.py b/swh/lister/gitlab/tasks.py --- a/swh/lister/gitlab/tasks.py +++ b/swh/lister/gitlab/tasks.py @@ -17,7 +17,7 @@ def new_lister(api_baseurl='https://gitlab.com/api/v4', - instance='gitlab', sort='asc', per_page=20): + instance=None, sort='asc', per_page=20): return GitLabLister( api_baseurl=api_baseurl, instance=instance, sort=sort, per_page=per_page) diff --git a/swh/lister/gitlab/tests/test_tasks.py b/swh/lister/gitlab/tests/test_tasks.py --- a/swh/lister/gitlab/tests/test_tasks.py +++ b/swh/lister/gitlab/tests/test_tasks.py @@ -28,7 +28,7 @@ lister.assert_called_once_with( api_baseurl='https://gitlab.com/api/v4', - instance='gitlab', sort='desc', per_page=20) + instance=None, sort='desc', per_page=20) lister.db_last_index.assert_not_called() lister.get_pages_information.assert_called_once_with() lister.run.assert_called_once_with( @@ -50,7 +50,7 @@ lister.assert_called_once_with( api_baseurl='https://gitlab.com/api/v4', - instance='gitlab', sort='asc', per_page=20) + instance=None, sort='asc', per_page=20) lister.db_last_index.assert_not_called() lister.run.assert_called_once_with(min_bound=12, max_bound=42) @@ -83,7 +83,55 @@ lister.assert_called_with( api_baseurl='https://gitlab.com/api/v4', - instance='gitlab', sort='asc', per_page=20) + instance=None, sort='asc', per_page=20) + + # one by the FullGitlabRelister task + # + 9 for the RangeGitlabLister subtasks + assert lister.call_count == 10 + + lister.db_last_index.assert_not_called() + lister.db_partition_indices.assert_not_called() + lister.get_pages_information.assert_called_once_with() + + # lister.run should have been called once per partition interval + for i in range(8): + # XXX inconsistent behavior: max_bound is EXCLUDED here + assert (dict(min_bound=10*i, max_bound=10*i + 10),) \ + in lister.run.call_args_list + assert (dict(min_bound=80, max_bound=85),) \ + in lister.run.call_args_list + + +@patch('swh.lister.gitlab.tasks.GitLabLister') +def test_relister_instance(lister, swh_app, celery_session_worker): + # setup the mocked GitlabLister + lister.return_value = lister + lister.run.return_value = None + lister.get_pages_information.return_value = (None, 85, None) + lister.db_partition_indices.return_value = [ + (i, i+9) for i in range(0, 80, 10)] + [(80, 85)] + + res = swh_app.send_task( + 'swh.lister.gitlab.tasks.FullGitLabRelister', + kwargs=dict(api_baseurl='https://0xacab.org/api/v4')) + assert res + + res.wait() + assert res.successful() + + # retrieve the GroupResult for this task and wait for all the subtasks + # to complete + promise_id = res.result + assert promise_id + promise = GroupResult.restore(promise_id, app=swh_app) + for i in range(5): + if promise.ready(): + break + sleep(1) + + lister.assert_called_with( + api_baseurl='https://0xacab.org/api/v4', + instance=None, sort='asc', per_page=20) # one by the FullGitlabRelister task # + 9 for the RangeGitlabLister subtasks