diff --git a/requirements-swh.txt b/requirements-swh.txt --- a/requirements-swh.txt +++ b/requirements-swh.txt @@ -1,3 +1,3 @@ -swh.core +swh.core >= 0.0.73 swh.storage[schemata] >= 0.0.122 swh.scheduler >= 0.0.58 diff --git a/swh/lister/__init__.py b/swh/lister/__init__.py --- a/swh/lister/__init__.py +++ b/swh/lister/__init__.py @@ -0,0 +1,43 @@ +# Copyright (C) 2018-2019 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import logging +import pkg_resources + + +logger = logging.getLogger(__name__) + + +LISTERS = {entry_point.name.split('.', 1)[1]: entry_point + for entry_point in pkg_resources.iter_entry_points('swh.workers') + if entry_point.name.split('.', 1)[0] == 'lister'} + + +SUPPORTED_LISTERS = list(LISTERS) + + +def get_lister(lister_name, db_url=None, **conf): + """Instantiate a lister given its name. + + Args: + lister_name (str): Lister's name + conf (dict): Configuration dict (lister db cnx, policy, priority...) + + Returns: + Tuple (instantiated lister, drop_tables function, init schema function, + insert minimum data function) + + """ + if lister_name not in LISTERS: + raise ValueError( + 'Invalid lister %s: only supported listers are %s' % + (lister_name, SUPPORTED_LISTERS)) + if db_url: + conf['lister'] = {'cls': 'local', 'args': {'db': db_url}} + + registry_entry = LISTERS[lister_name].load()() + lister_cls = registry_entry['lister'] + lister = lister_cls(override_config=conf) + return lister diff --git a/swh/lister/cgit/lister.py b/swh/lister/cgit/lister.py --- a/swh/lister/cgit/lister.py +++ b/swh/lister/cgit/lister.py @@ -47,7 +47,7 @@ 'https://git.savannah.gnu.org/git/elisp-es.git' """ MODEL = CGitModel - DEFAULT_URL = 'http://git.savannah.gnu.org/cgit/' + DEFAULT_URL = 'https://git.savannah.gnu.org/cgit/' LISTER_NAME = 'cgit' url_prefix_present = True diff --git a/swh/lister/cgit/tests/test_lister.py b/swh/lister/cgit/tests/test_lister.py --- a/swh/lister/cgit/tests/test_lister.py +++ b/swh/lister/cgit/tests/test_lister.py @@ -2,85 +2,65 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -from os.path import join, dirname -import re -from urllib.parse import urlparse -from unittest.mock import Mock -import requests_mock -from sqlalchemy import create_engine +def test_lister_no_page(requests_mock_datadir, swh_listers): + lister = swh_listers['cgit'] -from swh.lister.cgit.lister import CGitLister -from swh.lister.tests.test_utils import init_db + assert lister.url == 'https://git.savannah.gnu.org/cgit/' + repos = list(lister.get_repos()) + assert len(repos) == 977 -DATADIR = join(dirname(__file__), 'data') + assert repos[0] == 'https://git.savannah.gnu.org/cgit/elisp-es.git/' + # note the url below is NOT a subpath of /cgit/ + assert repos[-1] == 'https://git.savannah.gnu.org/path/to/yetris.git/' # noqa + # note the url below is NOT on the same server + assert repos[-2] == 'http://example.org/cgit/xstarcastle.git/' -def get_response_cb(request, context): - url = urlparse(request.url) - dirname = url.hostname - filename = url.path[1:-1].replace('/', '_') - if url.query: - filename += ',' + url.query - resp = open(join(DATADIR, dirname, filename), 'rb').read() - return resp.decode('ascii', 'ignore') +def test_lister_model(requests_mock_datadir, swh_listers): + lister = swh_listers['cgit'] + repo = next(lister.get_repos()) -def test_lister_no_page(): - with requests_mock.Mocker() as m: - m.get(re.compile('http://git.savannah.gnu.org'), text=get_response_cb) - lister = CGitLister() + model = lister.build_model(repo) + assert model == { + 'uid': 'https://git.savannah.gnu.org/cgit/elisp-es.git/', + 'name': 'elisp-es.git', + 'origin_type': 'git', + 'instance': 'git.savannah.gnu.org', + 'origin_url': 'https://git.savannah.gnu.org/git/elisp-es.git' + } - assert lister.url == 'http://git.savannah.gnu.org/cgit/' - repos = list(lister.get_repos()) - assert len(repos) == 977 +def test_lister_with_pages(requests_mock_datadir, swh_listers): + lister = swh_listers['cgit'] + lister.url = 'https://git.tizen/cgit/' - assert repos[0] == 'http://git.savannah.gnu.org/cgit/elisp-es.git/' - # note the url below is NOT a subpath of /cgit/ - assert repos[-1] == 'http://git.savannah.gnu.org/path/to/yetris.git/' # noqa - # note the url below is NOT on the same server - assert repos[-2] == 'http://example.org/cgit/xstarcastle.git/' + repos = list(lister.get_repos()) + # we should have 16 repos (listed on 3 pages) + assert len(repos) == 16 -def test_lister_model(): - with requests_mock.Mocker() as m: - m.get(re.compile('http://git.savannah.gnu.org'), text=get_response_cb) - lister = CGitLister() +def test_lister_run(requests_mock_datadir, swh_listers): + lister = swh_listers['cgit'] + lister.url = 'https://git.tizen/cgit/' + lister.run() - repo = next(lister.get_repos()) + r = lister.scheduler.search_tasks(task_type='load-git') + assert len(r) == 16 - model = lister.build_model(repo) - assert model == { - 'uid': 'http://git.savannah.gnu.org/cgit/elisp-es.git/', - 'name': 'elisp-es.git', - 'origin_type': 'git', - 'instance': 'git.savannah.gnu.org', - 'origin_url': 'https://git.savannah.gnu.org/git/elisp-es.git' - } + for row in r: + assert row['type'] == 'load-git' + # arguments check + args = row['arguments']['args'] + assert len(args) == 1 + url = args[0] + assert url.startswith('https://git.tizen') -def test_lister_with_pages(): - with requests_mock.Mocker() as m: - m.get(re.compile('http://git.tizen/cgit/'), text=get_response_cb) - lister = CGitLister(url='http://git.tizen/cgit/') - - assert lister.url == 'http://git.tizen/cgit/' - - repos = list(lister.get_repos()) - # we should have 16 repos (listed on 3 pages) - assert len(repos) == 16 - - -def test_lister_run(): - with requests_mock.Mocker() as m: - m.get(re.compile('http://git.tizen/cgit/'), text=get_response_cb) - db = init_db() - conf = {'lister': {'cls': 'local', 'args': {'db': db.url()}}} - lister = CGitLister(url='http://git.tizen/cgit/', - override_config=conf) - engine = create_engine(db.url()) - lister.MODEL.metadata.create_all(engine) - lister.schedule_missing_tasks = Mock(return_value=None) - lister.run() + # kwargs + kwargs = row['arguments']['kwargs'] + assert kwargs == {} + assert row['policy'] == 'recurring' + assert row['priority'] is None diff --git a/swh/lister/cli.py b/swh/lister/cli.py --- a/swh/lister/cli.py +++ b/swh/lister/cli.py @@ -5,7 +5,6 @@ import os import logging -import pkg_resources from copy import deepcopy from importlib import import_module @@ -15,15 +14,12 @@ from swh.core.cli import CONTEXT_SETTINGS from swh.scheduler import get_scheduler from swh.scheduler.task import SWHTask +from swh.lister import get_lister, SUPPORTED_LISTERS, LISTERS from swh.lister.core.models import initialize logger = logging.getLogger(__name__) -LISTERS = {entry_point.name.split('.', 1)[1]: entry_point - for entry_point in pkg_resources.iter_entry_points('swh.workers') - if entry_point.name.split('.', 1)[0] == 'lister'} -SUPPORTED_LISTERS = list(LISTERS) # the key in this dict is the suffix used to match new task-type to be added. # For example for a task which function name is "list_gitlab_full', the default @@ -45,31 +41,6 @@ } -def get_lister(lister_name, db_url=None, **conf): - """Instantiate a lister given its name. - - Args: - lister_name (str): Lister's name - conf (dict): Configuration dict (lister db cnx, policy, priority...) - - Returns: - Tuple (instantiated lister, drop_tables function, init schema function, - insert minimum data function) - - """ - if lister_name not in LISTERS: - raise ValueError( - 'Invalid lister %s: only supported listers are %s' % - (lister_name, SUPPORTED_LISTERS)) - if db_url: - conf['lister'] = {'cls': 'local', 'args': {'db': db_url}} - - registry_entry = LISTERS[lister_name].load()() - lister_cls = registry_entry['lister'] - lister = lister_cls(override_config=conf) - return lister - - @click.group(name='lister', context_settings=CONTEXT_SETTINGS) @click.option('--config-file', '-C', default=None, type=click.Path(exists=True, dir_okay=False,), diff --git a/swh/lister/core/tests/conftest.py b/swh/lister/core/tests/conftest.py --- a/swh/lister/core/tests/conftest.py +++ b/swh/lister/core/tests/conftest.py @@ -1 +1,28 @@ from swh.scheduler.tests.conftest import * # noqa + +import pytest + +from sqlalchemy import create_engine + +from swh.lister import get_lister, SUPPORTED_LISTERS +from swh.lister.core.models import initialize + + +@pytest.fixture +def swh_listers(request, postgresql_proc, postgresql, swh_scheduler): + db_url = 'postgresql://{user}@{host}:{port}/{dbname}'.format( + host=postgresql_proc.host, + port=postgresql_proc.port, + user='postgres', + dbname='tests') + + listers = {} + + # Prepare schema for all listers + for lister_name in SUPPORTED_LISTERS: + lister = get_lister(lister_name, db_url=db_url) + lister.scheduler = swh_scheduler # inject scheduler fixture + listers[lister_name] = lister + initialize(create_engine(db_url), drop_tables=True) + + return listers