diff --git a/docs/tutorial.rst b/docs/tutorial.rst --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -164,6 +164,9 @@ looks much simpler when we look at the actual implementations of the two new-style indexing listers we currently haveā€¦ +An important aspect for making a new lister is its testing. To register the celery tasks of your new lister, you need add your lister in the main conftest.py +(swh/lister/core/tests/conftest.py) + This is the entire source code for the BitBucket repository lister:: # Copyright (C) 2017 the Software Heritage developers diff --git a/swh/lister/bitbucket/lister.py b/swh/lister/bitbucket/lister.py --- a/swh/lister/bitbucket/lister.py +++ b/swh/lister/bitbucket/lister.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2018 the Software Heritage developers +# Copyright (C) 2017-2019 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -17,6 +17,7 @@ PATH_TEMPLATE = '/repositories?after=%s' MODEL = BitBucketModel LISTER_NAME = 'bitbucket' + instance = 'bitbucket' def get_model_from_repo(self, repo): return { diff --git a/swh/lister/core/lister_base.py b/swh/lister/core/lister_base.py --- a/swh/lister/core/lister_base.py +++ b/swh/lister/core/lister_base.py @@ -239,12 +239,9 @@ @property def ADDITIONAL_CONFIG(self): # noqa: N802 return { - 'credentials': - ('list[dict]', []), - 'cache_responses': - ('bool', False), - 'cache_dir': - ('str', '~/.cache/swh/lister/%s' % self.LISTER_NAME), + 'credentials': ('dict', {}), + 'cache_responses': ('bool', False), + 'cache_dir': ('str', '~/.cache/swh/lister/%s' % self.LISTER_NAME), } INITIAL_BACKOFF = 10 diff --git a/swh/lister/core/lister_transports.py b/swh/lister/core/lister_transports.py --- a/swh/lister/core/lister_transports.py +++ b/swh/lister/core/lister_transports.py @@ -62,13 +62,40 @@ """Get the full parameters passed to requests given the transport_request identifier. + This uses credentials if any are provided. The 'credentials' + configuration is expected to be a dict of multiple levels. The first + level is the lister's name, the second is the lister's instance name. + + For example: + + credentials: + github: # github lister + github: # has only one instance (so far) + - username: some + password: somekey + - username: one + password: onekey + - ... + gitlab: # gitlab lister + riseup: # has many instances + - username: someone + password: ... + - ... + gitlab: + - username: someone + password: ... + - ... + + MAY BE OVERRIDDEN if something more complex than the request headers is needed. """ params = {} params['headers'] = self.request_headers() or {} - creds = self.config['credentials'] + all_creds = self.config['credentials'] + lister_creds = all_creds.get(self.LISTER_NAME, {}) + creds = lister_creds.get(self.instance, {}) auth = random.choice(creds) if creds else None if auth: params['auth'] = (auth['username'], auth['password']) diff --git a/swh/lister/debian/lister.py b/swh/lister/debian/lister.py --- a/swh/lister/debian/lister.py +++ b/swh/lister/debian/lister.py @@ -32,6 +32,7 @@ MODEL = Package PATH_TEMPLATE = None LISTER_NAME = 'debian' + instance = 'debian' def __init__(self, override_config=None): SWHListerHttpTransport.__init__(self, api_baseurl="bogus") diff --git a/swh/lister/github/lister.py b/swh/lister/github/lister.py --- a/swh/lister/github/lister.py +++ b/swh/lister/github/lister.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2018 the Software Heritage developers +# Copyright (C) 2017-2019 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -14,6 +14,7 @@ MODEL = GitHubModel API_URL_INDEX_RE = re.compile(r'^.*/repositories\?since=(\d+)') LISTER_NAME = 'github' + instance = 'github' # There is only 1 instance of such lister def get_model_from_repo(self, repo): return { diff --git a/swh/lister/gitlab/lister.py b/swh/lister/gitlab/lister.py --- a/swh/lister/gitlab/lister.py +++ b/swh/lister/gitlab/lister.py @@ -1,4 +1,4 @@ -# Copyright (C) 2018 the Software Heritage developers +# Copyright (C) 2018-2019 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -28,19 +28,6 @@ self.PATH_TEMPLATE = '%s&per_page=%s' % ( self.PATH_TEMPLATE, per_page) - @property - def ADDITIONAL_CONFIG(self): - """Override additional config as the 'credentials' structure change - between the ancestor classes and this class. - - cf. request_params method below - - """ - default_config = super().ADDITIONAL_CONFIG - # 'credentials' is a dict of (instance, {username, password}) dict - default_config['credentials'] = ('dict', {}) - return default_config - def request_params(self, identifier): """Get the full parameters passed to requests given the transport_request identifier. diff --git a/swh/lister/npm/lister.py b/swh/lister/npm/lister.py --- a/swh/lister/npm/lister.py +++ b/swh/lister/npm/lister.py @@ -1,4 +1,4 @@ -# Copyright (C) 2018 the Software Heritage developers +# Copyright (C) 2018-2019 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -14,6 +14,7 @@ """ MODEL = NpmModel LISTER_NAME = 'npm' + instance = 'npm' def __init__(self, api_baseurl='https://replicate.npmjs.com', per_page=1000, override_config=None): diff --git a/swh/lister/phabricator/lister.py b/swh/lister/phabricator/lister.py --- a/swh/lister/phabricator/lister.py +++ b/swh/lister/phabricator/lister.py @@ -2,6 +2,7 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +import urllib.parse from swh.lister.core.indexing_lister import SWHIndexingHttpLister from swh.lister.phabricator.models import PhabricatorModel @@ -14,13 +15,17 @@ MODEL = PhabricatorModel LISTER_NAME = 'phabricator' - def __init__(self, forge_url, api_token, override_config=None): + def __init__(self, forge_url, api_token, instance=None, + override_config=None): if forge_url.endswith("/"): forge_url = forge_url[:-1] self.forge_url = forge_url api_endpoint = ('api/diffusion.repository.' 'search?api.token=%s') % api_token api_baseurl = '%s/%s' % (forge_url, api_endpoint) + if not instance: + instance = urllib.parse.urlparse(forge_url).hostname + self.instance = instance super().__init__(api_baseurl=api_baseurl, override_config=override_config) diff --git a/swh/lister/phabricator/tasks.py b/swh/lister/phabricator/tasks.py --- a/swh/lister/phabricator/tasks.py +++ b/swh/lister/phabricator/tasks.py @@ -6,9 +6,10 @@ from swh.lister.phabricator.lister import PhabricatorLister -def new_lister( - forge_url='https://forge.softwareheritage.org', api_token='', **kw): - return PhabricatorLister(forge_url=forge_url, api_token=api_token, **kw) +def new_lister(forge_url='https://forge.softwareheritage.org', api_token='', + instance='swh', **kw): + return PhabricatorLister(forge_url=forge_url, api_token=api_token, + instance=instance, **kw) @app.task(name=__name__ + '.IncrementalPhabricatorLister') diff --git a/swh/lister/phabricator/tests/test_lister.py b/swh/lister/phabricator/tests/test_lister.py --- a/swh/lister/phabricator/tests/test_lister.py +++ b/swh/lister/phabricator/tests/test_lister.py @@ -24,9 +24,11 @@ def get_fl(self, override_config=None): """(Override) Retrieve an instance of fake lister (fl). + """ if override_config or self.fl is None: - self.fl = self.Lister(forge_url='https://fakeurl', api_token='a-1', + self.fl = self.Lister(forge_url='https://fakeurl', instance='fake', + api_token='a-1', override_config=override_config) self.fl.INITIAL_BACKOFF = 1 diff --git a/swh/lister/phabricator/tests/test_tasks.py b/swh/lister/phabricator/tests/test_tasks.py --- a/swh/lister/phabricator/tests/test_tasks.py +++ b/swh/lister/phabricator/tests/test_tasks.py @@ -24,6 +24,7 @@ assert res.successful() lister.assert_called_once_with( - api_token='', forge_url='https://forge.softwareheritage.org') + api_token='', forge_url='https://forge.softwareheritage.org', + instance='swh') lister.db_last_index.assert_called_once_with() lister.run.assert_called_once_with(min_bound=42) diff --git a/swh/lister/pypi/lister.py b/swh/lister/pypi/lister.py --- a/swh/lister/pypi/lister.py +++ b/swh/lister/pypi/lister.py @@ -1,4 +1,4 @@ -# Copyright (C) 2018 the Software Heritage developers +# Copyright (C) 2018-2019 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -16,6 +16,7 @@ MODEL = PyPIModel LISTER_NAME = 'pypi' PAGE = 'https://pypi.org/simple/' + instance = 'pypi' # As of today only the main pypi.org is used def __init__(self, override_config=None): ListerOnePageApiTransport .__init__(self)