diff --git a/requirements-swh.txt b/requirements-swh.txt --- a/requirements-swh.txt +++ b/requirements-swh.txt @@ -1,3 +1,4 @@ swh.core >= 0.0.75 swh.model >= 0.0.18 swh.storage >= 0.0.153 +swh.scheduler diff --git a/requirements-test.txt b/requirements-test.txt --- a/requirements-test.txt +++ b/requirements-test.txt @@ -1,4 +1,5 @@ pytest +pytest-postgresql >= 2.1.0 requests_mock swh-core[testing] swh-scheduler[testing] diff --git a/setup.py b/setup.py --- a/setup.py +++ b/setup.py @@ -57,6 +57,8 @@ loader.deposit=swh.loader.package.deposit:register loader.npm=swh.loader.package.npm:register loader.pypi=swh.loader.package.pypi:register + [swh.cli.subcommands] + loader=swh.loader.cli:run ''', classifiers=[ "Programming Language :: Python :: 3", diff --git a/swh/loader/cli.py b/swh/loader/cli.py new file mode 100644 --- /dev/null +++ b/swh/loader/cli.py @@ -0,0 +1,68 @@ +# Copyright (C) 2019 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import logging + +import click +import pkg_resources + +from typing import Any + +from swh.core.cli import CONTEXT_SETTINGS +from swh.scheduler.cli.utils import parse_options + + +logger = logging.getLogger(__name__) + + +LOADERS = {entry_point.name.split('.', 1)[1]: entry_point + for entry_point in pkg_resources.iter_entry_points('swh.workers') + if entry_point.name.split('.', 1)[0] == 'loader'} + + +SUPPORTED_LOADERS = list(LOADERS) + + +def get_loader(name: str, **kwargs) -> Any: + """Given a loader name, instantiate it. + + Args: + name: Loader's name + kwargs: Configuration dict (url...) + + Returns: + An instantiated loader + + """ + if name not in LOADERS: + raise ValueError( + 'Invalid loader %s: only supported loaders are %s' % + (name, SUPPORTED_LOADERS)) + + registry_entry = LOADERS[name].load()() + logger.debug(f'registry: {registry_entry}') + loader_cls = registry_entry['loader'] + logger.debug(f'loader class: {loader_cls}') + return loader_cls(**kwargs) + + +@click.command(name='run', context_settings=CONTEXT_SETTINGS) +@click.option('--type', '-t', help='Loader to run', + type=click.Choice(SUPPORTED_LOADERS)) +@click.option('--url', '-u', default=None, + help="Origin url to load") +@click.argument('options', nargs=-1) +@click.pass_context +def run(ctx, type, url, options): + """Loader cli tools + + Load origin url with loader + + """ + (_, kw) = parse_options(options) + logger.debug(f'kw: {kw}') + loader = get_loader(type, url=url, **kw) + result = loader.load() + click.echo(result) diff --git a/swh/loader/package/archive/__init__.py b/swh/loader/package/archive/__init__.py --- a/swh/loader/package/archive/__init__.py +++ b/swh/loader/package/archive/__init__.py @@ -9,6 +9,8 @@ def register() -> Mapping[str, Any]: """Register the current worker module's definition""" + from .loader import ArchiveLoader return { 'task_modules': [f'{__name__}.tasks'], + 'loader': ArchiveLoader, } diff --git a/swh/loader/package/debian/__init__.py b/swh/loader/package/debian/__init__.py --- a/swh/loader/package/debian/__init__.py +++ b/swh/loader/package/debian/__init__.py @@ -9,6 +9,8 @@ def register() -> Mapping[str, Any]: """Register the current worker module's definition""" + from .loader import DebianLoader return { 'task_modules': [f'{__name__}.tasks'], + 'loader': DebianLoader, } diff --git a/swh/loader/package/deposit/__init__.py b/swh/loader/package/deposit/__init__.py --- a/swh/loader/package/deposit/__init__.py +++ b/swh/loader/package/deposit/__init__.py @@ -9,6 +9,8 @@ def register() -> Mapping[str, Any]: """Register the current worker module's definition""" + from .loader import DepositLoader return { 'task_modules': [f'{__name__}.tasks'], + 'loader': DepositLoader, } diff --git a/swh/loader/package/npm/__init__.py b/swh/loader/package/npm/__init__.py --- a/swh/loader/package/npm/__init__.py +++ b/swh/loader/package/npm/__init__.py @@ -9,6 +9,8 @@ def register() -> Mapping[str, Any]: """Register the current worker module's definition""" + from .loader import NpmLoader return { 'task_modules': [f'{__name__}.tasks'], + 'loader': NpmLoader, } diff --git a/swh/loader/package/npm/loader.py b/swh/loader/package/npm/loader.py --- a/swh/loader/package/npm/loader.py +++ b/swh/loader/package/npm/loader.py @@ -14,6 +14,7 @@ import chardet import iso8601 +from urllib.parse import quote from swh.model.identifiers import normalize_timestamp from swh.loader.package.loader import PackageLoader from swh.loader.package.utils import api_info, release_name @@ -31,21 +32,21 @@ class NpmLoader(PackageLoader): visit_type = 'npm' - def __init__(self, package_name, package_url, package_metadata_url): - super().__init__(url=package_url) - self.provider_url = package_metadata_url + def __init__(self, url: str): + """Constructor - self._info = None + Args + str: origin url (e.g. https://www.npmjs.com/package/) + """ + super().__init__(url=url) + package_name = url.split('https://www.npmjs.com/package/')[1] + safe_name = quote(package_name, safe='') + self.provider_url = f'https://replicate.npmjs.com/{safe_name}/' + self._info: Dict[str, Any] = {} self._versions = None - # if package_url is None: - # package_url = 'https://www.npmjs.com/package/%s' % package_name - # if package_metadata_url is None: - # package_metadata_url = 'https://replicate.npmjs.com/%s/' %\ - # quote(package_name, safe='') - @property - def info(self) -> Dict: + def info(self) -> Dict[str, Any]: """Return the project metadata information (fetched from npm registry) """ diff --git a/swh/loader/package/npm/tasks.py b/swh/loader/package/npm/tasks.py --- a/swh/loader/package/npm/tasks.py +++ b/swh/loader/package/npm/tasks.py @@ -9,6 +9,6 @@ @shared_task(name=__name__ + '.LoadNpm') -def load_npm(*, package_name, package_url, package_metadata_url): +def load_npm(*, url: str): """Load Npm package""" - return NpmLoader(package_name, package_url, package_metadata_url).load() + return NpmLoader(url).load() diff --git a/swh/loader/package/npm/tests/test_npm.py b/swh/loader/package/npm/tests/test_npm.py --- a/swh/loader/package/npm/tests/test_npm.py +++ b/swh/loader/package/npm/tests/test_npm.py @@ -382,9 +382,7 @@ def test_revision_metadata_structure(swh_config, requests_mock_datadir): package = 'org' - loader = NpmLoader(package, - package_url(package), - package_metadata_url(package)) + loader = NpmLoader(package_url(package)) actual_load_status = loader.load() assert actual_load_status['status'] == 'eventful' @@ -416,9 +414,7 @@ def test_npm_loader_first_visit(swh_config, requests_mock_datadir): package = 'org' - loader = NpmLoader(package, - package_url(package), - package_metadata_url(package)) + loader = NpmLoader(package_url(package)) actual_load_status = loader.load() expected_snapshot_id = 'd0587e1195aed5a8800411a008f2f2d627f18e2d' @@ -479,8 +475,7 @@ swh_config, requests_mock_datadir_visits): package = 'org' url = package_url(package) - metadata_url = package_metadata_url(package) - loader = NpmLoader(package, url, metadata_url) + loader = NpmLoader(url) actual_load_status = loader.load() assert actual_load_status['status'] == 'eventful' @@ -539,7 +534,7 @@ def test_npm_loader_version_divergence(swh_config): package = '@aller_shared' url = package_url(package) - loader = NpmLoader(package, url, package_metadata_url(package)) + loader = NpmLoader(url) actual_load_status = loader.load() assert actual_load_status['status'] == 'eventful' diff --git a/swh/loader/package/npm/tests/test_tasks.py b/swh/loader/package/npm/tests/test_tasks.py --- a/swh/loader/package/npm/tests/test_tasks.py +++ b/swh/loader/package/npm/tests/test_tasks.py @@ -13,9 +13,7 @@ res = swh_app.send_task( 'swh.loader.package.npm.tasks.LoadNpm', - (), dict(package_name='some-package', - package_url='some', - package_metadata_url='something')) + (), dict(url='https://www.npmjs.com/package/some-package')) assert res res.wait() assert res.successful() diff --git a/swh/loader/package/pypi/__init__.py b/swh/loader/package/pypi/__init__.py --- a/swh/loader/package/pypi/__init__.py +++ b/swh/loader/package/pypi/__init__.py @@ -9,6 +9,8 @@ def register() -> Mapping[str, Any]: """Register the current worker module's definition""" + from .loader import PyPILoader return { 'task_modules': [f'{__name__}.tasks'], + 'loader': PyPILoader, } diff --git a/swh/loader/tests/__init__.py b/swh/loader/tests/__init__.py new file mode 100644 diff --git a/swh/loader/tests/conftest.py b/swh/loader/tests/conftest.py new file mode 100644 --- /dev/null +++ b/swh/loader/tests/conftest.py @@ -0,0 +1,24 @@ +# Copyright (C) 2019 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import pytest + +from typing import Any, Dict + + +@pytest.fixture +def swh_loader_config() -> Dict[str, Any]: + return { + 'storage': { + 'cls': 'memory', + }, + 'deposit': { + 'url': 'https://deposit.softwareheritage.org/1/private', + 'auth': { + 'username': 'user', + 'password': 'pass', + } + }, + } diff --git a/swh/loader/tests/test_cli.py b/swh/loader/tests/test_cli.py new file mode 100644 --- /dev/null +++ b/swh/loader/tests/test_cli.py @@ -0,0 +1,91 @@ +# Copyright (C) 2019 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import pytest + + +from swh.loader.cli import run, get_loader, SUPPORTED_LOADERS +from swh.loader.package.loader import PackageLoader + +from click.testing import CliRunner +from unittest.mock import patch + + +def test_get_loader_wrong_input(swh_config): + """Unsupported loader should raise + + """ + loader_type = 'unknown' + assert loader_type not in SUPPORTED_LOADERS + with pytest.raises(ValueError, match='Invalid loader'): + get_loader(loader_type, url='db-url') + + +def test_get_loader(swh_config): + """Instantiating a supported loader should be ok + + """ + loader_input = { + 'archive': { + 'url': 'some-url', + 'artifacts': [], + }, + 'debian': { + 'url': 'some-url', + 'date': 'something', + 'packages': [], + }, + 'deposit': { + 'url': 'some-url', + 'deposit_id': 1, + }, + 'npm': { + 'url': 'https://www.npmjs.com/package/onepackage', + }, + 'pypi': { + 'url': 'some-url', + }, + } + for loader_type, kwargs in loader_input.items(): + loader = get_loader(loader_type, **kwargs) + assert isinstance(loader, PackageLoader) + + +help_msg = """Usage: run [OPTIONS] [OPTIONS]... + + Loader cli tools + + Load origin url with loader + +Options: + -t, --type [archive|debian|deposit|npm|pypi] + Loader to run + -u, --url TEXT Origin url to load + -h, --help Show this message and exit. +""" + + +def test_run_help(swh_config): + """Help message should be ok + + """ + runner = CliRunner() + result = runner.invoke(run, ['-h']) + assert result.exit_code == 0 + assert result.output.startswith(help_msg) + + +@patch('swh.loader.package.pypi.loader.PyPILoader') +def test_run_pypi(mock_loader, swh_config): + """Triggering a load should be ok + + """ + runner = CliRunner() + result = runner.invoke(run, [ + '--type', 'pypi', + '--url', 'https://some-url' + ]) + assert result.exit_code == 0 + mock_loader.assert_called_once_with(url='https://some-url') # constructor