Page MenuHomeSoftware Heritage

D2077.diff
No OneTemporary

D2077.diff

diff --git a/requirements-swh.txt b/requirements-swh.txt
--- a/requirements-swh.txt
+++ b/requirements-swh.txt
@@ -1,3 +1,3 @@
-swh.core
+swh.core >= 0.0.73
swh.storage[schemata] >= 0.0.122
swh.scheduler >= 0.0.58
diff --git a/swh/lister/__init__.py b/swh/lister/__init__.py
--- a/swh/lister/__init__.py
+++ b/swh/lister/__init__.py
@@ -0,0 +1,43 @@
+# Copyright (C) 2018-2019 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import logging
+import pkg_resources
+
+
+logger = logging.getLogger(__name__)
+
+
+LISTERS = {entry_point.name.split('.', 1)[1]: entry_point
+ for entry_point in pkg_resources.iter_entry_points('swh.workers')
+ if entry_point.name.split('.', 1)[0] == 'lister'}
+
+
+SUPPORTED_LISTERS = list(LISTERS)
+
+
+def get_lister(lister_name, db_url=None, **conf):
+ """Instantiate a lister given its name.
+
+ Args:
+ lister_name (str): Lister's name
+ conf (dict): Configuration dict (lister db cnx, policy, priority...)
+
+ Returns:
+ Tuple (instantiated lister, drop_tables function, init schema function,
+ insert minimum data function)
+
+ """
+ if lister_name not in LISTERS:
+ raise ValueError(
+ 'Invalid lister %s: only supported listers are %s' %
+ (lister_name, SUPPORTED_LISTERS))
+ if db_url:
+ conf['lister'] = {'cls': 'local', 'args': {'db': db_url}}
+
+ registry_entry = LISTERS[lister_name].load()()
+ lister_cls = registry_entry['lister']
+ lister = lister_cls(override_config=conf)
+ return lister
diff --git a/swh/lister/cgit/lister.py b/swh/lister/cgit/lister.py
--- a/swh/lister/cgit/lister.py
+++ b/swh/lister/cgit/lister.py
@@ -47,7 +47,7 @@
'https://git.savannah.gnu.org/git/elisp-es.git'
"""
MODEL = CGitModel
- DEFAULT_URL = 'http://git.savannah.gnu.org/cgit/'
+ DEFAULT_URL = 'https://git.savannah.gnu.org/cgit/'
LISTER_NAME = 'cgit'
url_prefix_present = True
diff --git a/swh/lister/cgit/tests/test_lister.py b/swh/lister/cgit/tests/test_lister.py
--- a/swh/lister/cgit/tests/test_lister.py
+++ b/swh/lister/cgit/tests/test_lister.py
@@ -2,85 +2,65 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-from os.path import join, dirname
-import re
-from urllib.parse import urlparse
-from unittest.mock import Mock
-import requests_mock
-from sqlalchemy import create_engine
+def test_lister_no_page(requests_mock_datadir, swh_listers):
+ lister = swh_listers['cgit']
-from swh.lister.cgit.lister import CGitLister
-from swh.lister.tests.test_utils import init_db
+ assert lister.url == 'https://git.savannah.gnu.org/cgit/'
+ repos = list(lister.get_repos())
+ assert len(repos) == 977
-DATADIR = join(dirname(__file__), 'data')
+ assert repos[0] == 'https://git.savannah.gnu.org/cgit/elisp-es.git/'
+ # note the url below is NOT a subpath of /cgit/
+ assert repos[-1] == 'https://git.savannah.gnu.org/path/to/yetris.git/' # noqa
+ # note the url below is NOT on the same server
+ assert repos[-2] == 'http://example.org/cgit/xstarcastle.git/'
-def get_response_cb(request, context):
- url = urlparse(request.url)
- dirname = url.hostname
- filename = url.path[1:-1].replace('/', '_')
- if url.query:
- filename += ',' + url.query
- resp = open(join(DATADIR, dirname, filename), 'rb').read()
- return resp.decode('ascii', 'ignore')
+def test_lister_model(requests_mock_datadir, swh_listers):
+ lister = swh_listers['cgit']
+ repo = next(lister.get_repos())
-def test_lister_no_page():
- with requests_mock.Mocker() as m:
- m.get(re.compile('http://git.savannah.gnu.org'), text=get_response_cb)
- lister = CGitLister()
+ model = lister.build_model(repo)
+ assert model == {
+ 'uid': 'https://git.savannah.gnu.org/cgit/elisp-es.git/',
+ 'name': 'elisp-es.git',
+ 'origin_type': 'git',
+ 'instance': 'git.savannah.gnu.org',
+ 'origin_url': 'https://git.savannah.gnu.org/git/elisp-es.git'
+ }
- assert lister.url == 'http://git.savannah.gnu.org/cgit/'
- repos = list(lister.get_repos())
- assert len(repos) == 977
+def test_lister_with_pages(requests_mock_datadir, swh_listers):
+ lister = swh_listers['cgit']
+ lister.url = 'https://git.tizen/cgit/'
- assert repos[0] == 'http://git.savannah.gnu.org/cgit/elisp-es.git/'
- # note the url below is NOT a subpath of /cgit/
- assert repos[-1] == 'http://git.savannah.gnu.org/path/to/yetris.git/' # noqa
- # note the url below is NOT on the same server
- assert repos[-2] == 'http://example.org/cgit/xstarcastle.git/'
+ repos = list(lister.get_repos())
+ # we should have 16 repos (listed on 3 pages)
+ assert len(repos) == 16
-def test_lister_model():
- with requests_mock.Mocker() as m:
- m.get(re.compile('http://git.savannah.gnu.org'), text=get_response_cb)
- lister = CGitLister()
+def test_lister_run(requests_mock_datadir, swh_listers):
+ lister = swh_listers['cgit']
+ lister.url = 'https://git.tizen/cgit/'
+ lister.run()
- repo = next(lister.get_repos())
+ r = lister.scheduler.search_tasks(task_type='load-git')
+ assert len(r) == 16
- model = lister.build_model(repo)
- assert model == {
- 'uid': 'http://git.savannah.gnu.org/cgit/elisp-es.git/',
- 'name': 'elisp-es.git',
- 'origin_type': 'git',
- 'instance': 'git.savannah.gnu.org',
- 'origin_url': 'https://git.savannah.gnu.org/git/elisp-es.git'
- }
+ for row in r:
+ assert row['type'] == 'load-git'
+ # arguments check
+ args = row['arguments']['args']
+ assert len(args) == 1
+ url = args[0]
+ assert url.startswith('https://git.tizen')
-def test_lister_with_pages():
- with requests_mock.Mocker() as m:
- m.get(re.compile('http://git.tizen/cgit/'), text=get_response_cb)
- lister = CGitLister(url='http://git.tizen/cgit/')
-
- assert lister.url == 'http://git.tizen/cgit/'
-
- repos = list(lister.get_repos())
- # we should have 16 repos (listed on 3 pages)
- assert len(repos) == 16
-
-
-def test_lister_run():
- with requests_mock.Mocker() as m:
- m.get(re.compile('http://git.tizen/cgit/'), text=get_response_cb)
- db = init_db()
- conf = {'lister': {'cls': 'local', 'args': {'db': db.url()}}}
- lister = CGitLister(url='http://git.tizen/cgit/',
- override_config=conf)
- engine = create_engine(db.url())
- lister.MODEL.metadata.create_all(engine)
- lister.schedule_missing_tasks = Mock(return_value=None)
- lister.run()
+ # kwargs
+ kwargs = row['arguments']['kwargs']
+ assert kwargs == {}
+ assert row['policy'] == 'recurring'
+ assert row['priority'] is None
diff --git a/swh/lister/cli.py b/swh/lister/cli.py
--- a/swh/lister/cli.py
+++ b/swh/lister/cli.py
@@ -5,7 +5,6 @@
import os
import logging
-import pkg_resources
from copy import deepcopy
from importlib import import_module
@@ -15,15 +14,12 @@
from swh.core.cli import CONTEXT_SETTINGS
from swh.scheduler import get_scheduler
from swh.scheduler.task import SWHTask
+from swh.lister import get_lister, SUPPORTED_LISTERS, LISTERS
from swh.lister.core.models import initialize
logger = logging.getLogger(__name__)
-LISTERS = {entry_point.name.split('.', 1)[1]: entry_point
- for entry_point in pkg_resources.iter_entry_points('swh.workers')
- if entry_point.name.split('.', 1)[0] == 'lister'}
-SUPPORTED_LISTERS = list(LISTERS)
# the key in this dict is the suffix used to match new task-type to be added.
# For example for a task which function name is "list_gitlab_full', the default
@@ -45,31 +41,6 @@
}
-def get_lister(lister_name, db_url=None, **conf):
- """Instantiate a lister given its name.
-
- Args:
- lister_name (str): Lister's name
- conf (dict): Configuration dict (lister db cnx, policy, priority...)
-
- Returns:
- Tuple (instantiated lister, drop_tables function, init schema function,
- insert minimum data function)
-
- """
- if lister_name not in LISTERS:
- raise ValueError(
- 'Invalid lister %s: only supported listers are %s' %
- (lister_name, SUPPORTED_LISTERS))
- if db_url:
- conf['lister'] = {'cls': 'local', 'args': {'db': db_url}}
-
- registry_entry = LISTERS[lister_name].load()()
- lister_cls = registry_entry['lister']
- lister = lister_cls(override_config=conf)
- return lister
-
-
@click.group(name='lister', context_settings=CONTEXT_SETTINGS)
@click.option('--config-file', '-C', default=None,
type=click.Path(exists=True, dir_okay=False,),
diff --git a/swh/lister/core/tests/conftest.py b/swh/lister/core/tests/conftest.py
--- a/swh/lister/core/tests/conftest.py
+++ b/swh/lister/core/tests/conftest.py
@@ -1 +1,28 @@
from swh.scheduler.tests.conftest import * # noqa
+
+import pytest
+
+from sqlalchemy import create_engine
+
+from swh.lister import get_lister, SUPPORTED_LISTERS
+from swh.lister.core.models import initialize
+
+
+@pytest.fixture
+def swh_listers(request, postgresql_proc, postgresql, swh_scheduler):
+ db_url = 'postgresql://{user}@{host}:{port}/{dbname}'.format(
+ host=postgresql_proc.host,
+ port=postgresql_proc.port,
+ user='postgres',
+ dbname='tests')
+
+ listers = {}
+
+ # Prepare schema for all listers
+ for lister_name in SUPPORTED_LISTERS:
+ lister = get_lister(lister_name, db_url=db_url)
+ lister.scheduler = swh_scheduler # inject scheduler fixture
+ listers[lister_name] = lister
+ initialize(create_engine(db_url), drop_tables=True)
+
+ return listers

File Metadata

Mime Type
text/plain
Expires
Nov 5 2024, 8:05 AM (8 w, 2 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3214178

Event Timeline