Changeset View
Changeset View
Standalone View
Standalone View
swh/lister/core/tests/test_lister.py
# Copyright (C) 2017 the Software Heritage developers | # Copyright (C) 2017-2018 the Software Heritage developers | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import abc | import abc | ||||
import time | import time | ||||
from unittest import TestCase | from unittest import TestCase | ||||
from unittest.mock import Mock, patch | from unittest.mock import Mock, patch | ||||
import requests_mock | import requests_mock | ||||
from testing.postgresql import Postgresql | from testing.postgresql import Postgresql | ||||
from nose.tools import istest | from nose.tools import istest | ||||
from sqlalchemy import create_engine | from sqlalchemy import create_engine | ||||
from swh.lister.core.abstractattribute import AbstractAttribute | from swh.lister.core.abstractattribute import AbstractAttribute | ||||
def noop(*args, **kwargs): | def noop(*args, **kwargs): | ||||
pass | pass | ||||
@requests_mock.Mocker() | @requests_mock.Mocker() | ||||
class IndexingHttpListerTesterBase(abc.ABC): | class HttpListerTesterBase(abc.ABC): | ||||
"""Base testing class for subclasses of | """Base testing class for subclasses of | ||||
swh.lister.core.indexing_lister.SWHIndexingHttpLister. | swh.lister.core.indexing_lister.SWHIndexingHttpLister. | ||||
swh.lister.core.paging_lister.PageByPageHttpLister | |||||
See swh.lister.github.tests.test_gh_lister for an example of how | |||||
to customize for a specific listing service. | |||||
See swh.lister.github.tests.test_gh_lister for an example of how to | |||||
customize for a specific listing service. | |||||
""" | """ | ||||
Lister = AbstractAttribute('The lister class to test') | Lister = AbstractAttribute('The lister class to test') | ||||
test_re = AbstractAttribute('Compiled regex matching the server url. Must' | test_re = AbstractAttribute('Compiled regex matching the server url. Must' | ||||
' capture the index value.') | ' capture the index value.') | ||||
lister_subdir = AbstractAttribute('bitbucket, github, etc.') | lister_subdir = AbstractAttribute('bitbucket, github, etc.') | ||||
good_api_response_file = AbstractAttribute('Example good response body') | good_api_response_file = AbstractAttribute('Example good response body') | ||||
bad_api_response_file = AbstractAttribute('Example bad response body') | bad_api_response_file = AbstractAttribute('Example bad response body') | ||||
first_index = AbstractAttribute('First index in good_api_response') | first_index = AbstractAttribute('First index in good_api_response') | ||||
last_index = AbstractAttribute('Last index in good_api_response') | |||||
entries_per_page = AbstractAttribute('Number of results in good response') | entries_per_page = AbstractAttribute('Number of results in good response') | ||||
LISTER_NAME = 'fake-lister' | |||||
# May need to override this if the headers are used for something | # May need to override this if the headers are used for something | ||||
def response_headers(self, request): | def response_headers(self, request): | ||||
return {} | return {} | ||||
# May need to override this if the server uses non-standard rate limiting | # May need to override this if the server uses non-standard rate limiting | ||||
# method. | # method. | ||||
# Please keep the requested retry delay reasonably low. | # Please keep the requested retry delay reasonably low. | ||||
def mock_rate_quota(self, n, request, context): | def mock_rate_quota(self, n, request, context): | ||||
self.rate_limit += 1 | self.rate_limit += 1 | ||||
context.status_code = 429 | context.status_code = 429 | ||||
context.headers['Retry-After'] = '1' | context.headers['Retry-After'] = '1' | ||||
return '{"error":"dummy"}' | return '{"error":"dummy"}' | ||||
def __init__(self, *args, **kwargs): | def __init__(self, *args, **kwargs): | ||||
super().__init__(*args, **kwargs) | super().__init__(*args, **kwargs) | ||||
self.rate_limit = 1 | self.rate_limit = 1 | ||||
self.response = None | self.response = None | ||||
self.fl = None | self.fl = None | ||||
self.helper = None | self.helper = None | ||||
if self.__class__ != IndexingHttpListerTesterBase: | if self.__class__ != HttpListerTesterBase: | ||||
self.run = TestCase.run.__get__(self, self.__class__) | self.run = TestCase.run.__get__(self, self.__class__) | ||||
else: | else: | ||||
self.run = noop | self.run = noop | ||||
def request_index(self, request): | def request_index(self, request): | ||||
m = self.test_re.search(request.path_url) | m = self.test_re.search(request.path_url) | ||||
if m and (len(m.groups()) > 0): | if m and (len(m.groups()) > 0): | ||||
return m.group(1) | return m.group(1) | ||||
Show All 26 Lines | class HttpListerTesterBase(abc.ABC): | ||||
def mock_limit_once_response(self, request, context): | def mock_limit_once_response(self, request, context): | ||||
return self.mock_limit_n_response(1, request, context) | return self.mock_limit_n_response(1, request, context) | ||||
def mock_limit_twice_response(self, request, context): | def mock_limit_twice_response(self, request, context): | ||||
return self.mock_limit_n_response(2, request, context) | return self.mock_limit_n_response(2, request, context) | ||||
def get_fl(self, override_config=None): | def get_fl(self, override_config=None): | ||||
"""Retrieve an instance of fake lister (fl). | |||||
""" | |||||
if override_config or self.fl is None: | if override_config or self.fl is None: | ||||
with patch( | with patch( | ||||
'swh.scheduler.backend.SchedulerBackend.reconnect', noop | 'swh.scheduler.backend.SchedulerBackend.reconnect', noop | ||||
): | ): | ||||
self.fl = self.Lister(lister_name='fakelister', | self.fl = self.Lister(api_baseurl='https://fakeurl', | ||||
api_baseurl='https://fakeurl', | |||||
override_config=override_config) | override_config=override_config) | ||||
self.fl.INITIAL_BACKOFF = 1 | self.fl.INITIAL_BACKOFF = 1 | ||||
self.fl.reset_backoff() | self.fl.reset_backoff() | ||||
return self.fl | return self.fl | ||||
def get_api_response(self): | def get_api_response(self): | ||||
fl = self.get_fl() | fl = self.get_fl() | ||||
▲ Show 20 Lines • Show All 43 Lines • ▼ Show 20 Lines | class HttpListerTesterBase(abc.ABC): | ||||
def test_model_map(self, http_mocker): | def test_model_map(self, http_mocker): | ||||
http_mocker.get(self.test_re, text=self.mock_response) | http_mocker.get(self.test_re, text=self.mock_response) | ||||
fl = self.get_fl() | fl = self.get_fl() | ||||
li = fl.transport_response_simplified(self.get_api_response()) | li = fl.transport_response_simplified(self.get_api_response()) | ||||
di = li[0] | di = li[0] | ||||
self.assertIsInstance(di, dict) | self.assertIsInstance(di, dict) | ||||
pubs = [k for k in vars(fl.MODEL).keys() if not k.startswith('_')] | pubs = [k for k in vars(fl.MODEL).keys() if not k.startswith('_')] | ||||
for k in pubs: | for k in pubs: | ||||
if k not in ['last_seen', 'task_id', 'origin_id']: | if k not in ['last_seen', 'task_id', 'origin_id', 'id']: | ||||
self.assertIn(k, di) | self.assertIn(k, di) | ||||
def disable_storage_and_scheduler(self, fl): | def disable_storage_and_scheduler(self, fl): | ||||
fl.create_missing_origins_and_tasks = Mock(return_value=None) | fl.create_missing_origins_and_tasks = Mock(return_value=None) | ||||
def disable_db(self, fl): | def disable_db(self, fl): | ||||
fl.winnow_models = Mock(return_value=[]) | fl.winnow_models = Mock(return_value=[]) | ||||
fl.db_inject_repo = Mock(return_value=fl.MODEL()) | fl.db_inject_repo = Mock(return_value=fl.MODEL()) | ||||
fl.disable_deleted_repo_tasks = Mock(return_value=None) | fl.disable_deleted_repo_tasks = Mock(return_value=None) | ||||
@istest | @istest | ||||
def test_fetch_none_nodb(self, http_mocker): | def test_fetch_none_nodb(self, http_mocker): | ||||
http_mocker.get(self.test_re, text=self.mock_response) | http_mocker.get(self.test_re, text=self.mock_response) | ||||
fl = self.get_fl() | fl = self.get_fl() | ||||
self.disable_storage_and_scheduler(fl) | self.disable_storage_and_scheduler(fl) | ||||
self.disable_db(fl) | self.disable_db(fl) | ||||
fl.run(min_index=1, max_index=1) # stores no results | fl.run(min_bound=1, max_bound=1) # stores no results | ||||
@istest | @istest | ||||
def test_fetch_one_nodb(self, http_mocker): | def test_fetch_one_nodb(self, http_mocker): | ||||
http_mocker.get(self.test_re, text=self.mock_response) | http_mocker.get(self.test_re, text=self.mock_response) | ||||
fl = self.get_fl() | fl = self.get_fl() | ||||
self.disable_storage_and_scheduler(fl) | self.disable_storage_and_scheduler(fl) | ||||
self.disable_db(fl) | self.disable_db(fl) | ||||
fl.run(min_index=self.first_index, max_index=self.first_index) | fl.run(min_bound=self.first_index, max_bound=self.first_index) | ||||
@istest | @istest | ||||
def test_fetch_multiple_pages_nodb(self, http_mocker): | def test_fetch_multiple_pages_nodb(self, http_mocker): | ||||
http_mocker.get(self.test_re, text=self.mock_response) | http_mocker.get(self.test_re, text=self.mock_response) | ||||
fl = self.get_fl() | fl = self.get_fl() | ||||
self.disable_storage_and_scheduler(fl) | self.disable_storage_and_scheduler(fl) | ||||
self.disable_db(fl) | self.disable_db(fl) | ||||
fl.run(min_index=self.first_index) | fl.run(min_bound=self.first_index) | ||||
def init_db(self, db, model): | def init_db(self, db, model): | ||||
engine = create_engine(db.url()) | engine = create_engine(db.url()) | ||||
model.metadata.create_all(engine) | model.metadata.create_all(engine) | ||||
class HttpListerTester(HttpListerTesterBase, abc.ABC): | |||||
last_index = AbstractAttribute('Last index in good_api_response') | |||||
@requests_mock.Mocker() | |||||
@istest | @istest | ||||
def test_fetch_multiple_pages_yesdb(self, http_mocker): | def test_fetch_multiple_pages_yesdb(self, http_mocker): | ||||
http_mocker.get(self.test_re, text=self.mock_response) | http_mocker.get(self.test_re, text=self.mock_response) | ||||
initdb_args = Postgresql.DEFAULT_SETTINGS['initdb_args'] | initdb_args = Postgresql.DEFAULT_SETTINGS['initdb_args'] | ||||
initdb_args = ' '.join([initdb_args, '-E UTF-8']) | initdb_args = ' '.join([initdb_args, '-E UTF-8']) | ||||
db = Postgresql(initdb_args=initdb_args) | db = Postgresql(initdb_args=initdb_args) | ||||
fl = self.get_fl(override_config={'lister_db_url': db.url()}) | fl = self.get_fl(override_config={'lister_db_url': db.url()}) | ||||
self.init_db(db, fl.MODEL) | self.init_db(db, fl.MODEL) | ||||
self.disable_storage_and_scheduler(fl) | self.disable_storage_and_scheduler(fl) | ||||
fl.run(min_index=self.first_index) | fl.run(min_bound=self.first_index) | ||||
self.assertEqual(fl.db_last_index(), self.last_index) | self.assertEqual(fl.db_last_index(), self.last_index) | ||||
partitions = fl.db_partition_indices(5) | partitions = fl.db_partition_indices(5) | ||||
self.assertGreater(len(partitions), 0) | self.assertGreater(len(partitions), 0) | ||||
for k in partitions: | for k in partitions: | ||||
self.assertLessEqual(len(k), 5) | self.assertLessEqual(len(k), 5) | ||||
self.assertGreater(len(k), 0) | self.assertGreater(len(k), 0) |