diff --git a/swh/lister/bitbucket/lister.py b/swh/lister/bitbucket/lister.py --- a/swh/lister/bitbucket/lister.py +++ b/swh/lister/bitbucket/lister.py @@ -23,9 +23,8 @@ instance = 'bitbucket' default_min_bound = datetime.utcfromtimestamp(0) - def __init__(self, api_baseurl=None, override_config=None, per_page=100): - super().__init__( - api_baseurl=api_baseurl, override_config=override_config) + def __init__(self, url=None, override_config=None, per_page=100): + super().__init__(url=url, override_config=override_config) per_page = self.config.get('per_page', per_page) self.PATH_TEMPLATE = '%s&pagelen=%s' % ( diff --git a/swh/lister/bitbucket/tasks.py b/swh/lister/bitbucket/tasks.py --- a/swh/lister/bitbucket/tasks.py +++ b/swh/lister/bitbucket/tasks.py @@ -12,20 +12,16 @@ GROUP_SPLIT = 10000 -def new_lister(api_baseurl='https://api.bitbucket.org/2.0', per_page=100): - return BitBucketLister(api_baseurl=api_baseurl, per_page=per_page) - - @app.task(name=__name__ + '.IncrementalBitBucketLister') def list_bitbucket_incremental(**lister_args): '''Incremental update of the BitBucket forge''' - lister = new_lister(**lister_args) + lister = BitBucketLister(**lister_args) lister.run(min_bound=lister.db_last_index(), max_bound=None) @app.task(name=__name__ + '.RangeBitBucketLister') def _range_bitbucket_lister(start, end, **lister_args): - lister = new_lister(**lister_args) + lister = BitBucketLister(**lister_args) lister.run(min_bound=start, max_bound=end) @@ -36,7 +32,7 @@ It's not to be called for an initial listing. """ - lister = new_lister(**lister_args) + lister = BitBucketLister(**lister_args) ranges = lister.db_partition_indices(split or GROUP_SPLIT) if not ranges: self.log.info('Nothing to list') diff --git a/swh/lister/bitbucket/tests/test_tasks.py b/swh/lister/bitbucket/tests/test_tasks.py --- a/swh/lister/bitbucket/tests/test_tasks.py +++ b/swh/lister/bitbucket/tests/test_tasks.py @@ -26,8 +26,7 @@ res.wait() assert res.successful() - lister.assert_called_once_with( - api_baseurl='https://api.bitbucket.org/2.0', per_page=100) + lister.assert_called_once_with() lister.db_last_index.assert_called_once_with() lister.run.assert_called_once_with(min_bound=42, max_bound=None) @@ -45,8 +44,7 @@ res.wait() assert res.successful() - lister.assert_called_once_with( - api_baseurl='https://api.bitbucket.org/2.0', per_page=100) + lister.assert_called_once_with() lister.db_last_index.assert_not_called() lister.run.assert_called_once_with(min_bound=12, max_bound=42) @@ -76,8 +74,7 @@ break sleep(1) - lister.assert_called_with( - api_baseurl='https://api.bitbucket.org/2.0', per_page=100) + lister.assert_called_with() # one by the FullBitbucketRelister task # + 5 for the RangeBitbucketLister subtasks diff --git a/swh/lister/cli.py b/swh/lister/cli.py --- a/swh/lister/cli.py +++ b/swh/lister/cli.py @@ -63,7 +63,7 @@ (lister_name, SUPPORTED_LISTERS)) if db_url: conf['lister'] = {'cls': 'local', 'args': {'db': db_url}} - # To allow api_baseurl override per lister + registry_entry = LISTERS[lister_name].load()() lister_cls = registry_entry['lister'] lister = lister_cls(override_config=conf) diff --git a/swh/lister/core/indexing_lister.py b/swh/lister/core/indexing_lister.py --- a/swh/lister/core/indexing_lister.py +++ b/swh/lister/core/indexing_lister.py @@ -244,6 +244,6 @@ class IndexingHttpLister(ListerHttpTransport, IndexingLister): """Convenience class for ensuring right lookup and init order when combining IndexingLister and ListerHttpTransport.""" - def __init__(self, api_baseurl=None, override_config=None): + def __init__(self, url=None, override_config=None): IndexingLister.__init__(self, override_config=override_config) - ListerHttpTransport.__init__(self, api_baseurl=api_baseurl) + ListerHttpTransport.__init__(self, url=url) diff --git a/swh/lister/core/lister_transports.py b/swh/lister/core/lister_transports.py --- a/swh/lister/core/lister_transports.py +++ b/swh/lister/core/lister_transports.py @@ -96,7 +96,7 @@ required. """ path = self.PATH_TEMPLATE % identifier - return self.api_baseurl + path + return self.url + path def request_params(self, identifier): """Get the full parameters passed to requests given the @@ -142,14 +142,14 @@ self.reset_backoff() return False, 0 - def __init__(self, api_baseurl=None): - if not api_baseurl: - api_baseurl = self.config.get('api_baseurl') - if not api_baseurl: - api_baseurl = self.DEFAULT_URL - if not api_baseurl: - raise NameError('HTTP Lister Transport requires api_baseurl.') - self.api_baseurl = api_baseurl # eg. 'https://api.github.com' + def __init__(self, url=None): + if not url: + url = self.config.get('url') + if not url: + url = self.DEFAULT_URL + if not url: + raise NameError('HTTP Lister Transport requires an url.') + self.url = url # eg. 'https://api.github.com' self.session = requests.Session() self.lister_version = __version__ @@ -218,7 +218,7 @@ "parse for information") PATH_TEMPLATE = None # we do not use it - def __init__(self, api_baseurl=None): + def __init__(self, url=None): self.session = requests.Session() self.lister_version = __version__ diff --git a/swh/lister/core/page_by_page_lister.py b/swh/lister/core/page_by_page_lister.py --- a/swh/lister/core/page_by_page_lister.py +++ b/swh/lister/core/page_by_page_lister.py @@ -155,6 +155,6 @@ combining PageByPageLister and ListerHttpTransport. """ - def __init__(self, api_baseurl=None, override_config=None): + def __init__(self, url=None, override_config=None): PageByPageLister.__init__(self, override_config=override_config) - ListerHttpTransport.__init__(self, api_baseurl=api_baseurl) + ListerHttpTransport.__init__(self, url=url) diff --git a/swh/lister/core/tests/test_lister.py b/swh/lister/core/tests/test_lister.py --- a/swh/lister/core/tests/test_lister.py +++ b/swh/lister/core/tests/test_lister.py @@ -77,7 +77,7 @@ """ if override_config or self.fl is None: - self.fl = self.Lister(api_baseurl='https://fakeurl', + self.fl = self.Lister(url='https://fakeurl', override_config=override_config) self.fl.INITIAL_BACKOFF = 1 diff --git a/swh/lister/debian/lister.py b/swh/lister/debian/lister.py --- a/swh/lister/debian/lister.py +++ b/swh/lister/debian/lister.py @@ -35,7 +35,7 @@ instance = 'debian' def __init__(self, override_config=None): - ListerHttpTransport.__init__(self, api_baseurl="bogus") + ListerHttpTransport.__init__(self, url="notused") ListerBase.__init__(self, override_config=override_config) def transport_request(self, identifier): diff --git a/swh/lister/github/tasks.py b/swh/lister/github/tasks.py --- a/swh/lister/github/tasks.py +++ b/swh/lister/github/tasks.py @@ -12,20 +12,16 @@ GROUP_SPLIT = 10000 -def new_lister(api_baseurl='https://api.github.com', **kw): - return GitHubLister(api_baseurl=api_baseurl, **kw) - - @app.task(name=__name__ + '.IncrementalGitHubLister') def list_github_incremental(**lister_args): 'Incremental update of GitHub' - lister = new_lister(**lister_args) + lister = GitHubLister(**lister_args) lister.run(min_bound=lister.db_last_index(), max_bound=None) @app.task(name=__name__ + '.RangeGitHubLister') def _range_github_lister(start, end, **lister_args): - lister = new_lister(**lister_args) + lister = GitHubLister(**lister_args) lister.run(min_bound=start, max_bound=end) @@ -36,7 +32,7 @@ It's not to be called for an initial listing. """ - lister = new_lister(**lister_args) + lister = GitHubLister(**lister_args) ranges = lister.db_partition_indices(split or GROUP_SPLIT) if not ranges: self.log.info('Nothing to list') diff --git a/swh/lister/github/tests/test_tasks.py b/swh/lister/github/tests/test_tasks.py --- a/swh/lister/github/tests/test_tasks.py +++ b/swh/lister/github/tests/test_tasks.py @@ -26,7 +26,7 @@ res.wait() assert res.successful() - lister.assert_called_once_with(api_baseurl='https://api.github.com') + lister.assert_called_once_with() lister.db_last_index.assert_called_once_with() lister.run.assert_called_once_with(min_bound=42, max_bound=None) @@ -44,7 +44,7 @@ res.wait() assert res.successful() - lister.assert_called_once_with(api_baseurl='https://api.github.com') + lister.assert_called_once_with() lister.db_last_index.assert_not_called() lister.run.assert_called_once_with(min_bound=12, max_bound=42) @@ -74,7 +74,7 @@ break sleep(1) - lister.assert_called_with(api_baseurl='https://api.github.com') + lister.assert_called_with() # one by the FullGitHubRelister task # + 5 for the RangeGitHubLister subtasks diff --git a/swh/lister/gitlab/lister.py b/swh/lister/gitlab/lister.py --- a/swh/lister/gitlab/lister.py +++ b/swh/lister/gitlab/lister.py @@ -16,12 +16,11 @@ MODEL = GitLabModel LISTER_NAME = 'gitlab' - def __init__(self, api_baseurl=None, instance=None, + def __init__(self, url=None, instance=None, override_config=None, sort='asc', per_page=20): - super().__init__(api_baseurl=api_baseurl, - override_config=override_config) + super().__init__(url=url, override_config=override_config) if instance is None: - instance = parse_url(self.api_baseurl).host + instance = parse_url(self.url).host self.instance = instance self.PATH_TEMPLATE = '%s&sort=%s&per_page=%s' % ( self.PATH_TEMPLATE, sort, per_page) diff --git a/swh/lister/gitlab/tasks.py b/swh/lister/gitlab/tasks.py --- a/swh/lister/gitlab/tasks.py +++ b/swh/lister/gitlab/tasks.py @@ -14,18 +14,11 @@ NBPAGES = 10 -def new_lister(api_baseurl='https://gitlab.com/api/v4', - instance=None, sort='asc', per_page=20): - return GitLabLister( - api_baseurl=api_baseurl, instance=instance, sort=sort, - per_page=per_page) - - @app.task(name=__name__ + '.IncrementalGitLabLister') def list_gitlab_incremental(**lister_args): """Incremental update of a GitLab instance""" lister_args['sort'] = 'desc' - lister = new_lister(**lister_args) + lister = GitLabLister(**lister_args) total_pages = lister.get_pages_information()[1] # stopping as soon as existing origins for that instance are detected lister.run(min_bound=1, max_bound=total_pages, check_existence=True) @@ -33,14 +26,14 @@ @app.task(name=__name__ + '.RangeGitLabLister') def _range_gitlab_lister(start, end, **lister_args): - lister = new_lister(**lister_args) + lister = GitLabLister(**lister_args) lister.run(min_bound=start, max_bound=end) @app.task(name=__name__ + '.FullGitLabRelister', bind=True) def list_gitlab_full(self, **lister_args): """Full update of a GitLab instance""" - lister = new_lister(**lister_args) + lister = GitLabLister(**lister_args) _, total_pages, _ = lister.get_pages_information() ranges = list(utils.split_range(total_pages, NBPAGES)) random.shuffle(ranges) diff --git a/swh/lister/gitlab/tests/test_tasks.py b/swh/lister/gitlab/tests/test_tasks.py --- a/swh/lister/gitlab/tests/test_tasks.py +++ b/swh/lister/gitlab/tests/test_tasks.py @@ -26,9 +26,7 @@ res.wait() assert res.successful() - lister.assert_called_once_with( - api_baseurl='https://gitlab.com/api/v4', - instance=None, sort='desc', per_page=20) + lister.assert_called_once_with(sort='desc') lister.db_last_index.assert_not_called() lister.get_pages_information.assert_called_once_with() lister.run.assert_called_once_with( @@ -48,9 +46,7 @@ res.wait() assert res.successful() - lister.assert_called_once_with( - api_baseurl='https://gitlab.com/api/v4', - instance=None, sort='asc', per_page=20) + lister.assert_called_once_with() lister.db_last_index.assert_not_called() lister.run.assert_called_once_with(min_bound=12, max_bound=42) @@ -81,9 +77,7 @@ break sleep(1) - lister.assert_called_with( - api_baseurl='https://gitlab.com/api/v4', - instance=None, sort='asc', per_page=20) + lister.assert_called_with() # one by the FullGitlabRelister task # + 9 for the RangeGitlabLister subtasks @@ -113,7 +107,7 @@ res = swh_app.send_task( 'swh.lister.gitlab.tasks.FullGitLabRelister', - kwargs=dict(api_baseurl='https://0xacab.org/api/v4')) + kwargs=dict(url='https://0xacab.org/api/v4')) assert res res.wait() @@ -129,9 +123,7 @@ break sleep(1) - lister.assert_called_with( - api_baseurl='https://0xacab.org/api/v4', - instance=None, sort='asc', per_page=20) + lister.assert_called_with(url='https://0xacab.org/api/v4') # one by the FullGitlabRelister task # + 9 for the RangeGitlabLister subtasks diff --git a/swh/lister/npm/lister.py b/swh/lister/npm/lister.py --- a/swh/lister/npm/lister.py +++ b/swh/lister/npm/lister.py @@ -16,10 +16,9 @@ LISTER_NAME = 'npm' instance = 'npm' - def __init__(self, api_baseurl='https://replicate.npmjs.com', + def __init__(self, url='https://replicate.npmjs.com', per_page=1000, override_config=None): - super().__init__(api_baseurl=api_baseurl, - override_config=override_config) + super().__init__(url=url, override_config=override_config) self.per_page = per_page + 1 self.PATH_TEMPLATE += '&limit=%s' % self.per_page @@ -76,7 +75,7 @@ 'https://www.npmjs.com/package/%s' % repo_name, # package metadata url needs to be escaped otherwise some requests # may fail (for instance when a package name contains '/') - '%s/%s' % (self.api_baseurl, quote(repo_name, safe='')) + '%s/%s' % (self.url, quote(repo_name, safe='')) ) def string_pattern_check(self, inner, lower, upper=None): diff --git a/swh/lister/npm/tasks.py b/swh/lister/npm/tasks.py --- a/swh/lister/npm/tasks.py +++ b/swh/lister/npm/tasks.py @@ -14,7 +14,7 @@ @contextmanager def save_registry_state(lister): params = {'headers': lister.request_headers()} - registry_state = lister.session.get(lister.api_baseurl, **params) + registry_state = lister.session.get(lister.url, **params) registry_state = registry_state.json() keys = ('doc_count', 'doc_del_count', 'update_seq', 'purge_seq', 'disk_size', 'data_size', 'committed_update_seq', diff --git a/swh/lister/phabricator/lister.py b/swh/lister/phabricator/lister.py --- a/swh/lister/phabricator/lister.py +++ b/swh/lister/phabricator/lister.py @@ -20,11 +20,10 @@ MODEL = PhabricatorModel LISTER_NAME = 'phabricator' - def __init__(self, api_baseurl=None, instance=None, override_config=None): - super().__init__(api_baseurl=api_baseurl, - override_config=override_config) + def __init__(self, url=None, instance=None, override_config=None): + super().__init__(url=url, override_config=override_config) if not instance: - instance = urllib.parse.urlparse(self.api_baseurl).hostname + instance = urllib.parse.urlparse(self.url).hostname self.instance = instance @property diff --git a/swh/lister/phabricator/tests/test_lister.py b/swh/lister/phabricator/tests/test_lister.py --- a/swh/lister/phabricator/tests/test_lister.py +++ b/swh/lister/phabricator/tests/test_lister.py @@ -33,9 +33,8 @@ ]}} override_config = dict(credentials=credentials, **(override_config or {})) - self.fl = self.Lister( - api_baseurl='https://fakeurl', instance='fake', - override_config=override_config) + self.fl = self.Lister(url='https://fakeurl', instance='fake', + override_config=override_config) self.fl.INITIAL_BACKOFF = 1 self.fl.reset_backoff() diff --git a/swh/lister/tests/test_cli.py b/swh/lister/tests/test_cli.py --- a/swh/lister/tests/test_cli.py +++ b/swh/lister/tests/test_cli.py @@ -67,34 +67,34 @@ db_url = init_db().url() listers = { - 'gitlab': ('api_baseurl', 'https://gitlab.uni/api/v4/'), - 'phabricator': ( - 'api_baseurl', - 'https://somewhere.org/api/diffusion.repository.search'), + 'gitlab': 'https://other.gitlab.uni/api/v4/', + 'phabricator': 'https://somewhere.org/api/diffusion.repository.search', + 'cgit': 'https://some.where/cgit', } # check the override ends up defined in the lister - for lister_name, (url_key, url_value) in listers.items(): + for lister_name, url in listers.items(): lst = get_lister( lister_name, db_url, **{ - url_key: url_value, + 'url': url, 'priority': 'high', 'policy': 'oneshot', }) - assert getattr(lst, url_key) == url_value + assert lst.url == url assert lst.config['priority'] == 'high' assert lst.config['policy'] == 'oneshot' # check the default urls are used and not the override (since it's not # passed) - for lister_name, (url_key, url_value) in listers.items(): + for lister_name, url in listers.items(): lst = get_lister(lister_name, db_url) # no override so this does not end up in lister's configuration - assert url_key not in lst.config + assert 'url' not in lst.config assert 'priority' not in lst.config assert 'oneshot' not in lst.config + assert lst.url == lst.DEFAULT_URL def test_task_types(swh_scheduler_config, tmp_path):