diff --git a/requirements.txt b/requirements.txt --- a/requirements.txt +++ b/requirements.txt @@ -4,3 +4,4 @@ requests setuptools xmltodict +iso8601 diff --git a/swh/lister/bitbucket/lister.py b/swh/lister/bitbucket/lister.py --- a/swh/lister/bitbucket/lister.py +++ b/swh/lister/bitbucket/lister.py @@ -3,11 +3,16 @@ # See top-level LICENSE file for more information from urllib import parse +import logging +import iso8601 from swh.lister.bitbucket.models import BitBucketModel from swh.lister.core.indexing_lister import SWHIndexingHttpLister +logger = logging.getLogger(__name__) + + class BitBucketLister(SWHIndexingHttpLister): PATH_TEMPLATE = '/repositories?after=%s' MODEL = BitBucketModel @@ -37,4 +42,32 @@ return [self.get_model_from_repo(repo) for repo in repos] def request_uri(self, identifier): + if isinstance(identifier, list): + raise Exception(identifier) return super().request_uri(identifier or '1970-01-01') + + def is_within_bounds(self, inner, lower=None, upper=None): + # values are expected to be str dates + try: + inner = iso8601.parse_date(inner) + if lower: + lower = iso8601.parse_date(lower) + if upper: + upper = iso8601.parse_date(upper) + if lower is None and upper is None: + return True + elif lower is None: + ret = inner <= upper + elif upper is None: + ret = inner >= lower + else: + ret = lower <= inner <= upper + except Exception as e: + logger.error(str(e) + ': %s, %s, %s' % + (('inner=%s%s' % (type(inner), inner)), + ('lower=%s%s' % (type(lower), lower)), + ('upper=%s%s' % (type(upper), upper))) + ) + raise + + return ret diff --git a/swh/lister/core/indexing_lister.py b/swh/lister/core/indexing_lister.py --- a/swh/lister/core/indexing_lister.py +++ b/swh/lister/core/indexing_lister.py @@ -112,6 +112,8 @@ # indexable column from the ith row index = self.db_session.query(self.MODEL.indexable) \ .order_by(self.MODEL.indexable).offset(i).first() + if index: + index = index[0] if index is not None and prev_index is not None: partitions.append((prev_index, index)) prev_index = index diff --git a/swh/lister/core/lister_base.py b/swh/lister/core/lister_base.py --- a/swh/lister/core/lister_base.py +++ b/swh/lister/core/lister_base.py @@ -200,11 +200,11 @@ self.string_pattern_check(inner, lower, upper) except Exception as e: - logging.error(str(e) + ': %s, %s, %s' % - (('inner=%s%s' % (type(inner), inner)), + logger.error(str(e) + ': %s, %s, %s' % + (('inner=%s%s' % (type(inner), inner)), ('lower=%s%s' % (type(lower), lower)), ('upper=%s%s' % (type(upper), upper))) - ) + ) raise return ret @@ -249,7 +249,7 @@ def __init__(self, override_config=None): self.backoff = self.INITIAL_BACKOFF - logging.debug('Loading config from %s' % self.CONFIG_BASE_FILENAME) + logger.debug('Loading config from %s' % self.CONFIG_BASE_FILENAME) self.config = self.parse_config_file( base_filename=self.CONFIG_BASE_FILENAME, additional_configs=[self.ADDITIONAL_CONFIG] @@ -297,7 +297,7 @@ r = self.transport_request(identifier) except FetchError: # network-level connection error, try again - logging.warning( + logger.warning( 'connection error on %s: sleep for %d seconds' % (identifier, self.CONN_SLEEP)) time.sleep(self.CONN_SLEEP) @@ -310,7 +310,7 @@ # detect throttling must_retry, delay = self.transport_quota_check(r) if must_retry: - logging.warning( + logger.warning( 'rate limited on %s: sleep for %f seconds' % (identifier, delay)) time.sleep(delay) @@ -320,7 +320,7 @@ retries_left -= 1 if not retries_left: - logging.warning( + logger.warning( 'giving up on %s: max retries exceeded' % identifier) return r @@ -438,7 +438,7 @@ re.escape(a)) if (isinstance(b, str) and (re.match(a_pattern, b) is None) or isinstance(c, str) and (re.match(a_pattern, c) is None)): - logging.debug(a_pattern) + logger.debug(a_pattern) raise TypeError('incomparable string patterns detected') def inject_repo_data_into_db(self, models_list): diff --git a/swh/lister/core/lister_transports.py b/swh/lister/core/lister_transports.py --- a/swh/lister/core/lister_transports.py +++ b/swh/lister/core/lister_transports.py @@ -7,6 +7,7 @@ from datetime import datetime from email.utils import parsedate from pprint import pformat +import logging import requests import xmltodict @@ -20,6 +21,9 @@ from .lister_base import FetchError +logger = logging.getLogger(__name__) + + class SWHListerHttpTransport(abc.ABC): """Use the Requests library for making Lister endpoint requests. @@ -114,6 +118,7 @@ else: response = self.session.get(path, **params) except requests.exceptions.ConnectionError as e: + logger.warning('Failed to fetch %s: %s', path, e) raise FetchError(e) else: if response.status_code not in self.EXPECTED_STATUS_CODES: