diff --git a/swh/lister/core/tests/test_lister.py b/swh/lister/core/tests/test_lister.py index f1ea8d7..b7ae9e5 100644 --- a/swh/lister/core/tests/test_lister.py +++ b/swh/lister/core/tests/test_lister.py @@ -1,342 +1,340 @@ # Copyright (C) 2019 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import abc import time from unittest import TestCase from unittest.mock import Mock, patch import requests_mock from sqlalchemy import create_engine -from testing.postgresql import Postgresql from swh.lister.core.abstractattribute import AbstractAttribute +from swh.lister.tests.test_utils import init_db def noop(*args, **kwargs): pass class HttpListerTesterBase(abc.ABC): """Testing base class for listers. This contains methods for both :class:`HttpSimpleListerTester` and :class:`HttpListerTester`. See :class:`swh.lister.gitlab.tests.test_lister` for an example of how to customize for a specific listing service. """ Lister = AbstractAttribute('The lister class to test') lister_subdir = AbstractAttribute('bitbucket, github, etc.') good_api_response_file = AbstractAttribute('Example good response body') LISTER_NAME = 'fake-lister' # May need to override this if the headers are used for something def response_headers(self, request): return {} # May need to override this if the server uses non-standard rate limiting # method. # Please keep the requested retry delay reasonably low. def mock_rate_quota(self, n, request, context): self.rate_limit += 1 context.status_code = 429 context.headers['Retry-After'] = '1' return '{"error":"dummy"}' def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.rate_limit = 1 self.response = None self.fl = None self.helper = None if self.__class__ != HttpListerTesterBase: self.run = TestCase.run.__get__(self, self.__class__) else: self.run = noop def mock_limit_n_response(self, n, request, context): self.fl.reset_backoff() if self.rate_limit <= n: return self.mock_rate_quota(n, request, context) else: return self.mock_response(request, context) def mock_limit_twice_response(self, request, context): return self.mock_limit_n_response(2, request, context) def get_api_response(self, identifier): fl = self.get_fl() if self.response is None: self.response = fl.safely_issue_request(identifier) return self.response def get_fl(self, override_config=None): """Retrieve an instance of fake lister (fl). """ if override_config or self.fl is None: self.fl = self.Lister(api_baseurl='https://fakeurl', override_config=override_config) self.fl.INITIAL_BACKOFF = 1 self.fl.reset_backoff() return self.fl def disable_scheduler(self, fl): fl.schedule_missing_tasks = Mock(return_value=None) def disable_db(self, fl): fl.winnow_models = Mock(return_value=[]) fl.db_inject_repo = Mock(return_value=fl.MODEL()) fl.disable_deleted_repo_tasks = Mock(return_value=None) def init_db(self, db, model): engine = create_engine(db.url()) model.metadata.create_all(engine) @requests_mock.Mocker() def test_is_within_bounds(self, http_mocker): fl = self.get_fl() self.assertFalse(fl.is_within_bounds(1, 2, 3)) self.assertTrue(fl.is_within_bounds(2, 1, 3)) self.assertTrue(fl.is_within_bounds(1, 1, 1)) self.assertTrue(fl.is_within_bounds(1, None, None)) self.assertTrue(fl.is_within_bounds(1, None, 2)) self.assertTrue(fl.is_within_bounds(1, 0, None)) self.assertTrue(fl.is_within_bounds("b", "a", "c")) self.assertFalse(fl.is_within_bounds("a", "b", "c")) self.assertTrue(fl.is_within_bounds("a", None, "c")) self.assertTrue(fl.is_within_bounds("a", None, None)) self.assertTrue(fl.is_within_bounds("b", "a", None)) self.assertFalse(fl.is_within_bounds("a", "b", None)) self.assertTrue(fl.is_within_bounds("aa:02", "aa:01", "aa:03")) self.assertFalse(fl.is_within_bounds("aa:12", None, "aa:03")) with self.assertRaises(TypeError): fl.is_within_bounds(1.0, "b", None) with self.assertRaises(TypeError): fl.is_within_bounds("A:B", "A::B", None) class HttpListerTester(HttpListerTesterBase, abc.ABC): """Base testing class for subclass of :class:`swh.lister.core.indexing_lister.IndexingHttpLister` See :class:`swh.lister.github.tests.test_gh_lister` for an example of how to customize for a specific listing service. """ last_index = AbstractAttribute('Last index in good_api_response') first_index = AbstractAttribute('First index in good_api_response') bad_api_response_file = AbstractAttribute('Example bad response body') entries_per_page = AbstractAttribute('Number of results in good response') test_re = AbstractAttribute('Compiled regex matching the server url. Must' ' capture the index value.') convert_type = str """static method used to convert the "request_index" to its right type (for indexing listers for example, this is in accordance with the model's "indexable" column). """ def mock_response(self, request, context): self.fl.reset_backoff() self.rate_limit = 1 context.status_code = 200 custom_headers = self.response_headers(request) context.headers.update(custom_headers) req_index = self.request_index(request) if req_index == self.first_index: response_file = self.good_api_response_file else: response_file = self.bad_api_response_file with open('swh/lister/%s/tests/%s' % (self.lister_subdir, response_file), 'r', encoding='utf-8') as r: return r.read() def request_index(self, request): m = self.test_re.search(request.path_url) if m and (len(m.groups()) > 0): return self.convert_type(m.group(1)) @requests_mock.Mocker() def test_fetch_multiple_pages_yesdb(self, http_mocker): http_mocker.get(self.test_re, text=self.mock_response) - initdb_args = Postgresql.DEFAULT_SETTINGS['initdb_args'] - initdb_args = ' '.join([initdb_args, '-E UTF-8']) - db = Postgresql(initdb_args=initdb_args) + db = init_db() fl = self.get_fl(override_config={ 'lister': { 'cls': 'local', 'args': {'db': db.url()} } }) self.init_db(db, fl.MODEL) self.disable_scheduler(fl) fl.run(min_bound=self.first_index) self.assertEqual(fl.db_last_index(), self.last_index) partitions = fl.db_partition_indices(5) self.assertGreater(len(partitions), 0) for k in partitions: self.assertLessEqual(len(k), 5) self.assertGreater(len(k), 0) @requests_mock.Mocker() def test_fetch_none_nodb(self, http_mocker): http_mocker.get(self.test_re, text=self.mock_response) fl = self.get_fl() self.disable_scheduler(fl) self.disable_db(fl) fl.run(min_bound=1, max_bound=1) # stores no results # FIXME: Determine what this method tries to test and add checks to # actually test @requests_mock.Mocker() def test_fetch_one_nodb(self, http_mocker): http_mocker.get(self.test_re, text=self.mock_response) fl = self.get_fl() self.disable_scheduler(fl) self.disable_db(fl) fl.run(min_bound=self.first_index, max_bound=self.first_index) # FIXME: Determine what this method tries to test and add checks to # actually test @requests_mock.Mocker() def test_fetch_multiple_pages_nodb(self, http_mocker): http_mocker.get(self.test_re, text=self.mock_response) fl = self.get_fl() self.disable_scheduler(fl) self.disable_db(fl) fl.run(min_bound=self.first_index) # FIXME: Determine what this method tries to test and add checks to # actually test @requests_mock.Mocker() def test_repos_list(self, http_mocker): """Test the number of repos listed by the lister """ http_mocker.get(self.test_re, text=self.mock_response) li = self.get_fl().transport_response_simplified( self.get_api_response(self.first_index) ) self.assertIsInstance(li, list) self.assertEqual(len(li), self.entries_per_page) @requests_mock.Mocker() def test_model_map(self, http_mocker): """Check if all the keys of model are present in the model created by the `transport_response_simplified` """ http_mocker.get(self.test_re, text=self.mock_response) fl = self.get_fl() li = fl.transport_response_simplified( self.get_api_response(self.first_index)) di = li[0] self.assertIsInstance(di, dict) pubs = [k for k in vars(fl.MODEL).keys() if not k.startswith('_')] for k in pubs: if k not in ['last_seen', 'task_id', 'id']: self.assertIn(k, di) @requests_mock.Mocker() def test_api_request(self, http_mocker): """Test API request for rate limit handling """ http_mocker.get(self.test_re, text=self.mock_limit_twice_response) with patch.object(time, 'sleep', wraps=time.sleep) as sleepmock: self.get_api_response(self.first_index) self.assertEqual(sleepmock.call_count, 2) class HttpSimpleListerTester(HttpListerTesterBase, abc.ABC): """Base testing class for subclass of :class:`swh.lister.core.simple)_lister.SimpleLister` See :class:`swh.lister.pypi.tests.test_lister` for an example of how to customize for a specific listing service. """ entries = AbstractAttribute('Number of results in good response') PAGE = AbstractAttribute("The server api's unique page to retrieve and " "parse for information") def get_fl(self, override_config=None): """Retrieve an instance of fake lister (fl). """ if override_config or self.fl is None: self.fl = self.Lister( override_config=override_config) self.fl.INITIAL_BACKOFF = 1 self.fl.reset_backoff() return self.fl def mock_response(self, request, context): self.fl.reset_backoff() self.rate_limit = 1 context.status_code = 200 custom_headers = self.response_headers(request) context.headers.update(custom_headers) response_file = self.good_api_response_file with open('swh/lister/%s/tests/%s' % (self.lister_subdir, response_file), 'r', encoding='utf-8') as r: return r.read() @requests_mock.Mocker() def test_api_request(self, http_mocker): """Test API request for rate limit handling """ http_mocker.get(self.PAGE, text=self.mock_limit_twice_response) with patch.object(time, 'sleep', wraps=time.sleep) as sleepmock: self.get_api_response(0) self.assertEqual(sleepmock.call_count, 2) @requests_mock.Mocker() def test_model_map(self, http_mocker): """Check if all the keys of model are present in the model created by the `transport_response_simplified` """ http_mocker.get(self.PAGE, text=self.mock_response) fl = self.get_fl() li = fl.list_packages(self.get_api_response(0)) li = fl.transport_response_simplified(li) di = li[0] self.assertIsInstance(di, dict) pubs = [k for k in vars(fl.MODEL).keys() if not k.startswith('_')] for k in pubs: if k not in ['last_seen', 'task_id', 'id']: self.assertIn(k, di) @requests_mock.Mocker() def test_repos_list(self, http_mocker): """Test the number of packages listed by the lister """ http_mocker.get(self.PAGE, text=self.mock_response) li = self.get_fl().list_packages( self.get_api_response(0) ) self.assertIsInstance(li, list) self.assertEqual(len(li), self.entries) diff --git a/swh/lister/tests/test_cli.py b/swh/lister/tests/test_cli.py new file mode 100644 index 0000000..d0b9c50 --- /dev/null +++ b/swh/lister/tests/test_cli.py @@ -0,0 +1,95 @@ +# Copyright (C) 2019 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import pytest + +from swh.lister.core.lister_base import ListerBase +from swh.lister.cli import new_lister, SUPPORTED_LISTERS, DEFAULT_BASEURLS + +from .test_utils import init_db + + +def test_new_lister_wrong_input(): + """Unsupported lister should raise""" + with pytest.raises(ValueError) as e: + new_lister('unknown', 'db-url') + + assert "Invalid lister" in str(e.value) + + +def test_new_lister(): + """Instantiating a supported lister should be ok + + """ + db_url = init_db().url() + supported_listers_with_init = {'npm', 'debian'} + supported_listers = set(SUPPORTED_LISTERS) - supported_listers_with_init + for lister_name in supported_listers: + lst, drop_fn, init_fn, insert_data_fn = new_lister(lister_name, db_url) + + assert isinstance(lst, ListerBase) + assert drop_fn is None + assert init_fn is not None + assert insert_data_fn is None + + for lister_name in supported_listers_with_init: + lst, drop_fn, init_fn, insert_data_fn = new_lister(lister_name, db_url) + + assert isinstance(lst, ListerBase) + assert drop_fn is None + assert init_fn is not None + assert insert_data_fn is not None + + for lister_name in supported_listers_with_init: + lst, drop_fn, init_fn, insert_data_fn = new_lister(lister_name, db_url, + drop_tables=True) + + assert isinstance(lst, ListerBase) + assert drop_fn is not None + assert init_fn is not None + assert insert_data_fn is not None + + +def test_new_lister_override(): + """Overriding the lister configuration should populate its config + + """ + db_url = init_db().url() + + listers = { + 'gitlab': ('api_baseurl', 'https://gitlab.uni/api/v4/'), + 'phabricator': ('forge_url', 'https://somewhere.org'), + 'cgit': ('url_prefix', 'https://some-cgit.eu/'), + } + + # check the override ends up defined in the lister + for lister_name, (url_key, url_value) in listers.items(): + lst, drop_fn, init_fn, insert_data_fn = new_lister( + lister_name, db_url, **{ + 'api_baseurl': url_value, + 'priority': 'high', + 'policy': 'oneshot', + }) + + assert getattr(lst, url_key) == url_value + assert lst.config['priority'] == 'high' + assert lst.config['policy'] == 'oneshot' + + # check the default urls are used and not the override (since it's not + # passed) + for lister_name, (url_key, url_value) in listers.items(): + lst, drop_fn, init_fn, insert_data_fn = new_lister(lister_name, db_url) + + # no override so this does not end up in lister's configuration + assert url_key not in lst.config + + # then the default base url is used + default_url = DEFAULT_BASEURLS[lister_name] + if isinstance(default_url, tuple): # cgit implementation detail... + default_url = default_url[1] + + assert getattr(lst, url_key) == default_url + assert 'priority' not in lst.config + assert 'oneshot' not in lst.config diff --git a/swh/lister/tests/test_utils.py b/swh/lister/tests/test_utils.py index 5d9f476..1fe7e7a 100644 --- a/swh/lister/tests/test_utils.py +++ b/swh/lister/tests/test_utils.py @@ -1,24 +1,38 @@ -# Copyright (C) 2018 the Software Heritage developers +# Copyright (C) 2018-2019 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import unittest +from testing.postgresql import Postgresql + from swh.lister import utils class UtilsTest(unittest.TestCase): def test_split_range(self): actual_ranges = list(utils.split_range(14, 5)) self.assertEqual(actual_ranges, [(0, 5), (5, 10), (10, 14)]) actual_ranges = list(utils.split_range(19, 10)) self.assertEqual(actual_ranges, [(0, 10), (10, 19)]) def test_split_range_errors(self): with self.assertRaises(TypeError): list(utils.split_range(None, 1)) with self.assertRaises(TypeError): list(utils.split_range(100, None)) + + +def init_db(): + """Factorize the db_url instantiation + + Returns: + db object to ease db manipulation + + """ + initdb_args = Postgresql.DEFAULT_SETTINGS['initdb_args'] + initdb_args = ' '.join([initdb_args, '-E UTF-8']) + return Postgresql(initdb_args=initdb_args)