diff --git a/swh/lister/bitbucket/lister.py b/swh/lister/bitbucket/lister.py --- a/swh/lister/bitbucket/lister.py +++ b/swh/lister/bitbucket/lister.py @@ -6,7 +6,6 @@ import iso8601 from datetime import datetime - from urllib import parse from swh.lister.bitbucket.models import BitBucketModel @@ -15,7 +14,6 @@ logger = logging.getLogger(__name__) - DEFAULT_BITBUCKET_PAGE = 10 @@ -24,7 +22,7 @@ MODEL = BitBucketModel LISTER_NAME = 'bitbucket' instance = 'bitbucket' - default_min_bound = datetime.utcfromtimestamp(0).isoformat() + default_min_bound = datetime.utcfromtimestamp(0) def __init__(self, api_baseurl, override_config=None, per_page=100): super().__init__( @@ -39,7 +37,7 @@ def get_model_from_repo(self, repo): return { 'uid': repo['uuid'], - 'indexable': repo['created_on'], + 'indexable': iso8601.parse_date(repo['created_on']), 'name': repo['name'], 'full_name': repo['full_name'], 'html_url': repo['links']['html']['href'], @@ -48,38 +46,38 @@ } def get_next_target_from_response(self, response): + """This will read the 'next' link from the api response if any + and return it as a datetime. + + Args: + reponse (Response): requests' response from api call + + Returns: + next date as a datetime + + """ body = response.json() - if 'next' in body: - return parse.unquote(body['next'].split('after=')[1]) + next_ = body.get('next') + if next_ is not None: + next_ = parse.urlparse(next_) + return iso8601.parse_date(parse.parse_qs(next_.query)['after'][0]) def transport_response_simplified(self, response): repos = response.json()['values'] return [self.get_model_from_repo(repo) for repo in repos] def request_uri(self, identifier): + identifier = parse.quote(identifier.isoformat()) return super().request_uri(identifier or '1970-01-01') def is_within_bounds(self, inner, lower=None, upper=None): - # values are expected to be str dates - try: - inner = iso8601.parse_date(inner) - if lower: - lower = iso8601.parse_date(lower) - if upper: - upper = iso8601.parse_date(upper) - if lower is None and upper is None: - return True - elif lower is None: - ret = inner <= upper - elif upper is None: - ret = inner >= lower - else: - ret = lower <= inner <= upper - except Exception as e: - logger.error(str(e) + ': %s, %s, %s', - ('inner=%s%s' % (type(inner), inner)), - ('lower=%s%s' % (type(lower), lower)), - ('upper=%s%s' % (type(upper), upper))) - raise - + # values are expected to be datetimes + if lower is None and upper is None: + ret = True + elif lower is None: + ret = inner <= upper + elif upper is None: + ret = inner >= lower + else: + ret = lower <= inner <= upper return ret diff --git a/swh/lister/bitbucket/models.py b/swh/lister/bitbucket/models.py --- a/swh/lister/bitbucket/models.py +++ b/swh/lister/bitbucket/models.py @@ -2,7 +2,7 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -from sqlalchemy import Column, String +from sqlalchemy import Column, String, DateTime from swh.lister.core.models import IndexingModelBase @@ -12,4 +12,4 @@ __tablename__ = 'bitbucket_repo' uid = Column(String, primary_key=True) - indexable = Column(String, index=True) + indexable = Column(DateTime(timezone=True), index=True) diff --git a/swh/lister/bitbucket/tests/test_bb_lister.py b/swh/lister/bitbucket/tests/test_bb_lister.py --- a/swh/lister/bitbucket/tests/test_bb_lister.py +++ b/swh/lister/bitbucket/tests/test_bb_lister.py @@ -1,29 +1,63 @@ -# Copyright (C) 2017-2018 the Software Heritage developers +# Copyright (C) 2017-2019 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import re import unittest +from datetime import timedelta + +from urllib.parse import unquote + +import iso8601 +import requests_mock + from swh.lister.bitbucket.lister import BitBucketLister from swh.lister.core.tests.test_lister import HttpListerTester +def convert_type(req_index): + """Convert the req_index to its right type according to the model's + "indexable" column. + + """ + return iso8601.parse_date(unquote(req_index)) + + class BitBucketListerTester(HttpListerTester, unittest.TestCase): Lister = BitBucketLister test_re = re.compile(r'/repositories\?after=([^?&]+)') lister_subdir = 'bitbucket' good_api_response_file = 'api_response.json' bad_api_response_file = 'api_empty_response.json' - first_index = '2008-07-12T07:44:01.476818+00:00' - last_index = '2008-07-19T06:16:43.044743+00:00' + first_index = convert_type('2008-07-12T07:44:01.476818+00:00') + last_index = convert_type('2008-07-19T06:16:43.044743+00:00') entries_per_page = 10 + convert_type = staticmethod(convert_type) + + @requests_mock.Mocker() + def test_fetch_none_nodb(self, http_mocker): + """Overridden because index is not an integer nor a string + + """ + http_mocker.get(self.test_re, text=self.mock_response) + fl = self.get_fl() + + self.disable_scheduler(fl) + self.disable_db(fl) + + # stores no results + fl.run(min_bound=self.first_index - timedelta(days=3), + max_bound=self.first_index) def test_is_within_bounds(self): fl = self.get_fl() self.assertTrue(fl.is_within_bounds( - '2008-07-15', self.first_index, self.last_index)) + iso8601.parse_date('2008-07-15'), + self.first_index, self.last_index)) self.assertFalse(fl.is_within_bounds( - '2008-07-20', self.first_index, self.last_index)) + iso8601.parse_date('2008-07-20'), + self.first_index, self.last_index)) self.assertFalse(fl.is_within_bounds( - '2008-07-11', self.first_index, self.last_index)) + iso8601.parse_date('2008-07-11'), + self.first_index, self.last_index)) diff --git a/swh/lister/core/tests/test_lister.py b/swh/lister/core/tests/test_lister.py --- a/swh/lister/core/tests/test_lister.py +++ b/swh/lister/core/tests/test_lister.py @@ -38,6 +38,12 @@ first_index = AbstractAttribute('First index in good_api_response') entries_per_page = AbstractAttribute('Number of results in good response') LISTER_NAME = 'fake-lister' + convert_type = str + """static method used to convert the "request_index" to its right type (for + indexing listers for example, this is in accordance with the model's + "indexable" column). + + """ # May need to override this if the headers are used for something def response_headers(self, request): @@ -66,7 +72,7 @@ def request_index(self, request): m = self.test_re.search(request.path_url) if m and (len(m.groups()) > 0): - return m.group(1) + return self.convert_type(m.group(1)) def mock_response(self, request, context): self.fl.reset_backoff() @@ -76,7 +82,7 @@ context.headers.update(custom_headers) req_index = self.request_index(request) - if req_index == str(self.first_index): + if req_index == self.first_index: response_file = self.good_api_response_file else: response_file = self.bad_api_response_file diff --git a/swh/lister/github/tests/test_gh_lister.py b/swh/lister/github/tests/test_gh_lister.py --- a/swh/lister/github/tests/test_gh_lister.py +++ b/swh/lister/github/tests/test_gh_lister.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2018 the Software Heritage developers +# Copyright (C) 2017-2019 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -19,10 +19,11 @@ first_index = 26 last_index = 368 entries_per_page = 100 + convert_type = int def response_headers(self, request): headers = {'X-RateLimit-Remaining': '1'} - if self.request_index(request) == str(self.first_index): + if self.request_index(request) == self.first_index: headers.update({ 'Link': ';' ' rel="next",' diff --git a/swh/lister/gitlab/tests/test_gitlab_lister.py b/swh/lister/gitlab/tests/test_gitlab_lister.py --- a/swh/lister/gitlab/tests/test_gitlab_lister.py +++ b/swh/lister/gitlab/tests/test_gitlab_lister.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2018 the Software Heritage developers +# Copyright (C) 2017-2019 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -18,10 +18,11 @@ bad_api_response_file = 'api_empty_response.json' first_index = 1 entries_per_page = 10 + convert_type = int def response_headers(self, request): headers = {'RateLimit-Remaining': '1'} - if self.request_index(request) == str(self.first_index): + if self.request_index(request) == self.first_index: headers.update({ 'x-next-page': '3', }) diff --git a/swh/lister/npm/tests/test_npm_lister.py b/swh/lister/npm/tests/test_npm_lister.py --- a/swh/lister/npm/tests/test_npm_lister.py +++ b/swh/lister/npm/tests/test_npm_lister.py @@ -1,4 +1,4 @@ -# Copyright (C) 2018 the Software Heritage developers +# Copyright (C) 2018-2019 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -33,7 +33,7 @@ lister_subdir = 'npm' good_api_response_file = 'api_inc_response.json' bad_api_response_file = 'api_inc_empty_response.json' - first_index = 6920642 + first_index = '6920642' entries_per_page = 100 @requests_mock.Mocker() diff --git a/swh/lister/phabricator/tests/test_lister.py b/swh/lister/phabricator/tests/test_lister.py --- a/swh/lister/phabricator/tests/test_lister.py +++ b/swh/lister/phabricator/tests/test_lister.py @@ -21,6 +21,7 @@ first_index = 1 last_index = 12 entries_per_page = 10 + convert_type = int def get_fl(self, override_config=None): """(Override) Retrieve an instance of fake lister (fl).