diff --git a/swh/lister/core/paging_lister.py b/swh/lister/core/paging_lister.py --- a/swh/lister/core/paging_lister.py +++ b/swh/lister/core/paging_lister.py @@ -82,6 +82,8 @@ def check_existence(self, injected_repos): """Given a list of injected repos, check if we already have them. + Attribute 'instance' variable is assumed to be populated. + """ # FIXME: Implement the check return False diff --git a/swh/lister/gitlab/lister.py b/swh/lister/gitlab/lister.py --- a/swh/lister/gitlab/lister.py +++ b/swh/lister/gitlab/lister.py @@ -6,6 +6,7 @@ import re import time +from .. import utils from ..core.paging_lister import PageByPageHttpLister from .models import GitLabModel @@ -108,9 +109,9 @@ """ response = self.transport_head(identifier=1) h = response.headers - total = h.get('x-total', h.get('X-Total')) - total_pages = h.get('x-total-pages', h.get('X-Total-Pages')) - per_page = h.get('x-per-page', h.get('X-Per-Page')) + total = utils.get(h, ['X-Total', 'x-total']) + total_pages = utils.get(h, ['X-Total-Pages', 'x-total-pages']) + per_page = utils.get(h, ['X-Per-Page', 'x-per-page']) if total is not None: total = int(total) if total_pages is not None: diff --git a/swh/lister/gitlab/tasks.py b/swh/lister/gitlab/tasks.py --- a/swh/lister/gitlab/tasks.py +++ b/swh/lister/gitlab/tasks.py @@ -53,13 +53,13 @@ task_queue = 'swh_lister_gitlab_discover' def new_lister(self, api_baseurl='https://gitlab.com/api/v4', - instance='gitlab.com',): - # will invert the order of the lister's result + instance='gitlab.com'): + # assuming going forward in desc order, page 1 through return GitLabLister(instance=instance, api_baseurl=api_baseurl, sort='desc') def run_task(self, *args, **kwargs): lister = self.new_lister(*args, **kwargs) - # will check for existing data and exit when found - return lister.run(min_bound=None, max_bound=None, - check_existence=True) + total, _, _ = lister.get_pages_information() + # stopping as soon as existing origins for that instance are detected + return lister.run(min_bound=1, max_bound=total, check_existence=True) diff --git a/swh/lister/tests/__init__.py b/swh/lister/tests/__init__.py new file mode 100644 diff --git a/swh/lister/tests/test_utils.py b/swh/lister/tests/test_utils.py new file mode 100644 --- /dev/null +++ b/swh/lister/tests/test_utils.py @@ -0,0 +1,51 @@ +# Copyright (C) 2018 the Software Heritage developers +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import unittest + +from nose.tools import istest + +from swh.lister import utils + + +class UtilsTest(unittest.TestCase): + + @istest + def get(self): + data = { + 'X-Next-Page': None, + 'x-next-page': 1, + } + actual_value = utils.get(data, ['X-Next-Page', 'x-next-page']) + + self.assertEqual(actual_value, 1) + + data = { + 'X-Next-Page': 10, + 'x-next-page': 1, + } + actual_value = utils.get(data, ['X-Next-Page', 'x-next-page']) + + self.assertEqual(actual_value, 10) + + data = { + 'x-next-page': 100, + } + actual_value = utils.get(data, ['X-Next-Page', 'x-next-page']) + + self.assertEqual(actual_value, 100) + + @istest + def get_empty(self): + self.assertIsNone(utils.get({}, [])) + self.assertIsNone(utils.get({'a': 1}, ['b'])) + self.assertIsNone(utils.get({'b': 2}, [])) + self.assertIsNone(utils.get({'b': 2}, [])) + + @istest + def get_errors(self): + with self.assertRaises(TypeError): + self.assertIsNone(utils.get({}, None)) + with self.assertRaises(AttributeError): + self.assertIsNone(utils.get(None, ['a'])) diff --git a/swh/lister/utils.py b/swh/lister/utils.py new file mode 100644 --- /dev/null +++ b/swh/lister/utils.py @@ -0,0 +1,14 @@ +# Copyright (C) 2018 the Software Heritage developers +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + + +def get(d, keys): + """Given a dict, lookup in order for keys with values not None. + + """ + for key in keys: + v = d.get(key) + if v is not None: + return v + return None