diff --git a/docs/tutorial.rst b/docs/tutorial.rst --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -145,10 +145,10 @@ values monotonically increase with new repositories. A good indexing service also includes the URL of the next page with a later 'foo' in its responses. For these indexing services we provide another intermediate lister called the -indexing lister. Instead of inheriting from :class:`SWHListerBase -`, the lister class would inherit -from :class:`SWHIndexingLister -`. Along with the +indexing lister. Instead of inheriting from :class:`ListerBase +`, the lister class would inherit +from :class:`IndexingLister +`. Along with the requirements of the lister base, the indexing lister base adds one extra requirement: @@ -179,9 +179,9 @@ from urllib import parse from swh.lister.bitbucket.models import BitBucketModel - from swh.lister.core.indexing_lister import SWHIndexingHttpLister + from swh.lister.core.indexing_lister import IndexingHttpLister - class BitBucketLister(SWHIndexingHttpLister): + class BitBucketLister(IndexingHttpLister): PATH_TEMPLATE = '/repositories?after=%s' MODEL = BitBucketModel @@ -213,10 +213,10 @@ # See top-level LICENSE file for more information import time - from swh.lister.core.indexing_lister import SWHIndexingHttpLister + from swh.lister.core.indexing_lister import IndexingHttpLister from swh.lister.github.models import GitHubModel - class GitHubLister(SWHIndexingHttpLister): + class GitHubLister(IndexingHttpLister): PATH_TEMPLATE = '/repositories?since=%d' MODEL = GitHubModel @@ -255,12 +255,12 @@ We can see that there are some common elements: -* Both use the HTTP transport mixin (:class:`SWHIndexingHttpLister - `) just combines - :class:`SWHListerHttpTransport - ` and - :class:`SWHIndexingLister - `) to get most of the +* Both use the HTTP transport mixin (:class:`IndexingHttpLister + `) just combines + :class:`ListerHttpTransport + ` and + :class:`IndexingLister + `) to get most of the network request functionality for free. * Both also define ``MODEL`` and ``PATH_TEMPLATE`` variables. It should be @@ -305,59 +305,59 @@ api_baseurl='https://github.com') ghl.run() -⇓ (SWHIndexingLister.run):: +⇓ (IndexingLister.run):: - # SWHIndexingLister.run + # IndexingLister.run identifier = None do - response, repos = SWHListerBase.ingest_data(identifier) + response, repos = ListerBase.ingest_data(identifier) identifier = GitHubLister.get_next_target_from_response(response) while(identifier) -⇓ (SWHListerBase.ingest_data):: +⇓ (ListerBase.ingest_data):: - # SWHListerBase.ingest_data + # ListerBase.ingest_data - response = SWHListerBase.safely_issue_request(identifier) + response = ListerBase.safely_issue_request(identifier) repos = GitHubLister.transport_response_simplified(response) - injected = SWHListerBase.inject_repo_data_into_db(repos) + injected = ListerBase.inject_repo_data_into_db(repos) return response, injected -⇓ (SWHListerBase.safely_issue_request):: +⇓ (ListerBase.safely_issue_request):: - # SWHListerBase.safely_issue_request + # ListerBase.safely_issue_request repeat: - resp = SWHListerHttpTransport.transport_request(identifier) - retry, delay = SWHListerHttpTransport.transport_quota_check(resp) + resp = ListerHttpTransport.transport_request(identifier) + retry, delay = ListerHttpTransport.transport_quota_check(resp) if retry: sleep(delay) until((not retry) or too_many_retries) return resp -⇓ (SWHListerHttpTransport.transport_request):: +⇓ (ListerHttpTransport.transport_request):: - # SWHListerHttpTransport.transport_request + # ListerHttpTransport.transport_request - path = SWHListerBase.api_baseurl - + SWHListerHttpTransport.PATH_TEMPLATE % identifier - headers = SWHListerHttpTransport.request_headers() + path = ListerBase.api_baseurl + + ListerHttpTransport.PATH_TEMPLATE % identifier + headers = ListerHttpTransport.request_headers() return http.get(path, headers) (Oh look, there's our ``PATH_TEMPLATE``) -⇓ (SWHListerHttpTransport.request_headers):: +⇓ (ListerHttpTransport.request_headers):: - # SWHListerHttpTransport.request_headers + # ListerHttpTransport.request_headers override → GitHubLister.request_headers -↑↑ (SWHListerBase.safely_issue_request) +↑↑ (ListerBase.safely_issue_request) -⇓ (SWHListerHttpTransport.transport_quota_check):: +⇓ (ListerHttpTransport.transport_quota_check):: - # SWHListerHttpTransport.transport_quota_check + # ListerHttpTransport.transport_quota_check override → GitHubLister.transport_quota_check diff --git a/swh/lister/bitbucket/lister.py b/swh/lister/bitbucket/lister.py --- a/swh/lister/bitbucket/lister.py +++ b/swh/lister/bitbucket/lister.py @@ -7,13 +7,13 @@ import iso8601 from swh.lister.bitbucket.models import BitBucketModel -from swh.lister.core.indexing_lister import SWHIndexingHttpLister +from swh.lister.core.indexing_lister import IndexingHttpLister logger = logging.getLogger(__name__) -class BitBucketLister(SWHIndexingHttpLister): +class BitBucketLister(IndexingHttpLister): PATH_TEMPLATE = '/repositories?after=%s' MODEL = BitBucketModel LISTER_NAME = 'bitbucket' @@ -34,8 +34,6 @@ body = response.json() if 'next' in body: return parse.unquote(body['next'].split('after=')[1]) - else: - return None def transport_response_simplified(self, response): repos = response.json()['values'] diff --git a/swh/lister/core/indexing_lister.py b/swh/lister/core/indexing_lister.py --- a/swh/lister/core/indexing_lister.py +++ b/swh/lister/core/indexing_lister.py @@ -9,13 +9,13 @@ import dateutil from sqlalchemy import func -from .lister_transports import SWHListerHttpTransport -from .lister_base import SWHListerBase +from .lister_transports import ListerHttpTransport +from .lister_base import ListerBase logger = logging.getLogger(__name__) -class SWHIndexingLister(SWHListerBase): +class IndexingLister(ListerBase): """Lister* intermediate class for any service that follows the pattern: - The service must report at least one stable unique identifier, known @@ -32,7 +32,7 @@ necessary/available, some indication of the URL or index for fetching the next series of repository data. - See :class:`swh.lister.core.lister_base.SWHListerBase` for more details. + See :class:`swh.lister.core.lister_base.ListerBase` for more details. This class cannot be instantiated. To create a new Lister for a source code listing service that follows the model described above, you must @@ -64,7 +64,7 @@ # You probably don't need to override anything below this line. def filter_before_inject(self, models_list): - """Overrides SWHListerBase.filter_before_inject + """Overrides ListerBase.filter_before_inject Bounds query results by this Lister's set max_index. """ @@ -234,9 +234,9 @@ self.db_session = self.mk_session() -class SWHIndexingHttpLister(SWHListerHttpTransport, SWHIndexingLister): +class IndexingHttpLister(ListerHttpTransport, IndexingLister): """Convenience class for ensuring right lookup and init order - when combining SWHIndexingLister and SWHListerHttpTransport.""" + when combining IndexingLister and ListerHttpTransport.""" def __init__(self, api_baseurl=None, override_config=None): - SWHListerHttpTransport.__init__(self, api_baseurl=api_baseurl) - SWHIndexingLister.__init__(self, override_config=override_config) + ListerHttpTransport.__init__(self, api_baseurl=api_baseurl) + IndexingLister.__init__(self, override_config=override_config) diff --git a/swh/lister/core/lister_base.py b/swh/lister/core/lister_base.py --- a/swh/lister/core/lister_base.py +++ b/swh/lister/core/lister_base.py @@ -36,7 +36,7 @@ return repr(self.response) -class SWHListerBase(abc.ABC, config.SWHConfig): +class ListerBase(abc.ABC, config.SWHConfig): """Lister core base class. Generally a source code hosting service provides an API endpoint for listing the set of stored repositories. A Lister is the discovery @@ -46,11 +46,11 @@ The core method in this class is ingest_data. Any subclasses should be calling this method one or more times to fetch and ingest data from API - endpoints. See swh.lister.core.lister_base.SWHIndexingLister for + endpoints. See swh.lister.core.lister_base.IndexingLister for example usage. This class cannot be instantiated. Any instantiable Lister descending - from SWHListerBase must provide at least the required overrides. + from ListerBase must provide at least the required overrides. (see member docstrings for details): Required Overrides: @@ -172,7 +172,7 @@ MAY BE OVERRIDDEN, for example if the server indexable* key is technically sortable but not automatically so. - * - ( see: swh.lister.core.indexing_lister.SWHIndexingLister ) + * - ( see: swh.lister.core.indexing_lister.IndexingLister ) Args: inner (sortable type): the value being checked diff --git a/swh/lister/core/lister_transports.py b/swh/lister/core/lister_transports.py --- a/swh/lister/core/lister_transports.py +++ b/swh/lister/core/lister_transports.py @@ -24,10 +24,10 @@ logger = logging.getLogger(__name__) -class SWHListerHttpTransport(abc.ABC): +class ListerHttpTransport(abc.ABC): """Use the Requests library for making Lister endpoint requests. - To be used in conjunction with SWHListerBase or a subclass of it. + To be used in conjunction with ListerBase or a subclass of it. """ PATH_TEMPLATE = AbstractAttribute('string containing a python string' @@ -117,7 +117,7 @@ return params def transport_quota_check(self, response): - """Implements SWHListerBase.transport_quota_check with standard 429 + """Implements ListerBase.transport_quota_check with standard 429 code check for HTTP with Requests library. MAY BE OVERRIDDEN if the server notifies about rate limits in a @@ -174,7 +174,7 @@ return self._transport_action(identifier, method='head') def transport_request(self, identifier): - """Implements SWHListerBase.transport_request for HTTP using Requests. + """Implements ListerBase.transport_request for HTTP using Requests. Retrieve get information on api. @@ -182,7 +182,7 @@ return self._transport_action(identifier) def transport_response_to_string(self, response): - """Implements SWHListerBase.transport_response_to_string for HTTP given + """Implements ListerBase.transport_response_to_string for HTTP given Requests responses. """ s = pformat(response.request.path_url) @@ -200,11 +200,11 @@ return s -class ListerOnePageApiTransport(SWHListerHttpTransport): +class ListerOnePageApiTransport(ListerHttpTransport): """Leverage requests library to retrieve basic html page and parse result. - To be used in conjunction with SWHListerBase or a subclass of it. + To be used in conjunction with ListerBase or a subclass of it. """ PAGE = AbstractAttribute("The server api's unique page to retrieve and " diff --git a/swh/lister/core/page_by_page_lister.py b/swh/lister/core/page_by_page_lister.py --- a/swh/lister/core/page_by_page_lister.py +++ b/swh/lister/core/page_by_page_lister.py @@ -5,11 +5,11 @@ import abc import logging -from .lister_transports import SWHListerHttpTransport -from .lister_base import SWHListerBase +from .lister_transports import ListerHttpTransport +from .lister_base import ListerBase -class PageByPageLister(SWHListerBase): +class PageByPageLister(ListerBase): """Lister* intermediate class for any service that follows the simple pagination page pattern. @@ -22,7 +22,7 @@ of the next page index for fetching the remaining repository data. - See :class:`swh.lister.core.lister_base.SWHListerBase` for more + See :class:`swh.lister.core.lister_base.ListerBase` for more details. This class cannot be instantiated. To create a new Lister for a @@ -150,11 +150,11 @@ self.db_session = self.mk_session() -class PageByPageHttpLister(SWHListerHttpTransport, PageByPageLister): +class PageByPageHttpLister(ListerHttpTransport, PageByPageLister): """Convenience class for ensuring right lookup and init order when - combining PageByPageLister and SWHListerHttpTransport. + combining PageByPageLister and ListerHttpTransport. """ def __init__(self, api_baseurl=None, override_config=None): - SWHListerHttpTransport.__init__(self, api_baseurl=api_baseurl) + ListerHttpTransport.__init__(self, api_baseurl=api_baseurl) PageByPageLister.__init__(self, override_config=override_config) diff --git a/swh/lister/core/simple_lister.py b/swh/lister/core/simple_lister.py --- a/swh/lister/core/simple_lister.py +++ b/swh/lister/core/simple_lister.py @@ -6,10 +6,10 @@ from swh.core import utils -from .lister_base import SWHListerBase +from .lister_base import ListerBase -class SimpleLister(SWHListerBase): +class SimpleLister(ListerBase): """Lister* intermediate class for any service that follows the simple, 'list in oneshot information' pattern. diff --git a/swh/lister/core/tests/test_lister.py b/swh/lister/core/tests/test_lister.py --- a/swh/lister/core/tests/test_lister.py +++ b/swh/lister/core/tests/test_lister.py @@ -22,7 +22,7 @@ class HttpListerTesterBase(abc.ABC): """Base testing class for subclasses of - swh.lister.core.indexing_lister.SWHIndexingHttpLister. + swh.lister.core.indexing_lister.IndexingHttpLister. swh.lister.core.page_by_page_lister.PageByPageHttpLister See swh.lister.github.tests.test_gh_lister for an example of how diff --git a/swh/lister/debian/lister.py b/swh/lister/debian/lister.py --- a/swh/lister/debian/lister.py +++ b/swh/lister/debian/lister.py @@ -18,8 +18,8 @@ TempPackage, ) -from swh.lister.core.lister_base import SWHListerBase, FetchError -from swh.lister.core.lister_transports import SWHListerHttpTransport +from swh.lister.core.lister_base import ListerBase, FetchError +from swh.lister.core.lister_transports import ListerHttpTransport decompressors = { 'gz': lambda f: gzip.GzipFile(fileobj=f), @@ -28,18 +28,18 @@ } -class DebianLister(SWHListerHttpTransport, SWHListerBase): +class DebianLister(ListerHttpTransport, ListerBase): MODEL = Package PATH_TEMPLATE = None LISTER_NAME = 'debian' instance = 'debian' def __init__(self, override_config=None): - SWHListerHttpTransport.__init__(self, api_baseurl="bogus") - SWHListerBase.__init__(self, override_config=override_config) + ListerHttpTransport.__init__(self, api_baseurl="bogus") + ListerBase.__init__(self, override_config=override_config) def transport_request(self, identifier): - """Subvert SWHListerHttpTransport.transport_request, to try several + """Subvert ListerHttpTransport.transport_request, to try several index URIs in turn. The Debian repository format supports several compression algorithms @@ -70,7 +70,7 @@ def request_uri(self, identifier): # In the overridden transport_request, we pass - # SWHListerBase.transport_request() the full URI as identifier, so we + # ListerBase.transport_request() the full URI as identifier, so we # need to return it here. return identifier @@ -118,7 +118,7 @@ def inject_repo_data_into_db(self, models_list): """Generate the Package entries that didn't previously exist. - Contrary to SWHListerBase, we don't actually insert the data in + Contrary to ListerBase, we don't actually insert the data in database. `schedule_missing_tasks` does it once we have the origin and task identifiers. """ diff --git a/swh/lister/github/lister.py b/swh/lister/github/lister.py --- a/swh/lister/github/lister.py +++ b/swh/lister/github/lister.py @@ -5,11 +5,11 @@ import re import time -from swh.lister.core.indexing_lister import SWHIndexingHttpLister +from swh.lister.core.indexing_lister import IndexingHttpLister from swh.lister.github.models import GitHubModel -class GitHubLister(SWHIndexingHttpLister): +class GitHubLister(IndexingHttpLister): PATH_TEMPLATE = '/repositories?since=%d' MODEL = GitHubModel API_URL_INDEX_RE = re.compile(r'^.*/repositories\?since=(\d+)') diff --git a/swh/lister/npm/lister.py b/swh/lister/npm/lister.py --- a/swh/lister/npm/lister.py +++ b/swh/lister/npm/lister.py @@ -4,12 +4,12 @@ from urllib.parse import quote -from swh.lister.core.indexing_lister import SWHIndexingHttpLister +from swh.lister.core.indexing_lister import IndexingHttpLister from swh.lister.npm.models import NpmModel from swh.scheduler.utils import create_task_dict -class NpmListerBase(SWHIndexingHttpLister): +class NpmListerBase(IndexingHttpLister): """List packages available in the npm registry in a paginated way """ MODEL = NpmModel diff --git a/swh/lister/phabricator/lister.py b/swh/lister/phabricator/lister.py --- a/swh/lister/phabricator/lister.py +++ b/swh/lister/phabricator/lister.py @@ -6,14 +6,14 @@ import urllib.parse -from swh.lister.core.indexing_lister import SWHIndexingHttpLister +from swh.lister.core.indexing_lister import IndexingHttpLister from swh.lister.phabricator.models import PhabricatorModel from collections import defaultdict logger = logging.getLogger(__name__) -class PhabricatorLister(SWHIndexingHttpLister): +class PhabricatorLister(IndexingHttpLister): PATH_TEMPLATE = '?order=oldest&attachments[uris]=1&after=%s' MODEL = PhabricatorModel LISTER_NAME = 'phabricator' @@ -104,7 +104,7 @@ def filter_before_inject(self, models_list): """ - (Overrides) SWHIndexingLister.filter_before_inject + (Overrides) IndexingLister.filter_before_inject Bounds query results by this Lister's set max_index. """ models_list = [m for m in models_list if m is not None]