diff --git a/swh/loader/cli.py b/swh/loader/cli.py index 479aa33..bc8076c 100644 --- a/swh/loader/cli.py +++ b/swh/loader/cli.py @@ -1,135 +1,141 @@ # Copyright (C) 2019-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information # WARNING: do not import unnecessary things here to keep cli startup time under # control import logging from typing import Any import click import pkg_resources from swh.core.cli import CONTEXT_SETTINGS from swh.core.cli import swh as swh_cli_group logger = logging.getLogger(__name__) LOADERS = { entry_point.name.split(".", 1)[1]: entry_point for entry_point in pkg_resources.iter_entry_points("swh.workers") if entry_point.name.split(".", 1)[0] == "loader" } SUPPORTED_LOADERS = sorted(list(LOADERS)) def get_loader(name: str, **kwargs) -> Any: """Given a loader name, instantiate it. Args: name: Loader's name kwargs: Configuration dict (url...) Returns: An instantiated loader """ if name not in LOADERS: raise ValueError( "Invalid loader %s: only supported loaders are %s" % (name, SUPPORTED_LOADERS) ) registry_entry = LOADERS[name].load()() logger.debug(f"registry: {registry_entry}") loader_cls = registry_entry["loader"] logger.debug(f"loader class: {loader_cls}") return loader_cls.from_config(**kwargs) @swh_cli_group.group(name="loader", context_settings=CONTEXT_SETTINGS) @click.option( "--config-file", "-C", default=None, type=click.Path( exists=True, dir_okay=False, ), help="Configuration file.", ) @click.pass_context def loader(ctx, config_file): """Loader cli tools""" from os import environ from swh.core.config import read ctx.ensure_object(dict) logger.debug("ctx: %s", ctx) if not config_file: config_file = environ.get("SWH_CONFIG_FILENAME") ctx.obj["config"] = read(config_file) logger.debug("config_file: %s", config_file) logger.debug("config: ", ctx.obj["config"]) @loader.command(name="run", context_settings=CONTEXT_SETTINGS) @click.argument("type", type=click.Choice(SUPPORTED_LOADERS)) @click.argument("url") @click.argument("options", nargs=-1) @click.pass_context def run(ctx, type, url, options): """Ingest with loader the origin located at """ import iso8601 from swh.scheduler.cli.utils import parse_options conf = ctx.obj.get("config", {}) if "storage" not in conf: raise ValueError("Missing storage configuration key") (_, kw) = parse_options(options) logger.debug(f"kw: {kw}") visit_date = kw.get("visit_date") if visit_date and isinstance(visit_date, str): visit_date = iso8601.parse_date(visit_date) kw["visit_date"] = visit_date - loader = get_loader(type, url=url, storage=conf["storage"], **kw) + loader = get_loader( + type, + url=url, + storage=conf["storage"], + metadata_fetcher_credentials=conf.get("metadata_fetcher_credentials"), + **kw, + ) result = loader.load() msg = f"{result} for origin '{url}'" directory = kw.get("directory") if directory: msg = msg + f" and directory '{directory}'" click.echo(msg) @loader.command(name="list", context_settings=CONTEXT_SETTINGS) @click.argument("type", default="all", type=click.Choice(["all"] + SUPPORTED_LOADERS)) @click.pass_context def list(ctx, type): """List supported loaders and optionally their arguments""" import inspect if type == "all": loaders = ", ".join(SUPPORTED_LOADERS) click.echo(f"Supported loaders: {loaders}") else: registry_entry = LOADERS[type].load()() loader_cls = registry_entry["loader"] doc = inspect.getdoc(loader_cls).strip() # Hack to get the signature of the class even though it subclasses # Generic, which reimplements __new__. # See signature = inspect.signature(loader_cls.__init__) signature_str = str(signature).replace("self, ", "") click.echo(f"Loader: {doc}\nsignature: {signature_str}") diff --git a/swh/loader/tests/test_cli.py b/swh/loader/tests/test_cli.py index 9f43735..7d762da 100644 --- a/swh/loader/tests/test_cli.py +++ b/swh/loader/tests/test_cli.py @@ -1,156 +1,157 @@ # Copyright (C) 2019-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import datetime import os from click.formatting import HelpFormatter from click.testing import CliRunner import pytest import yaml from swh.loader.cli import SUPPORTED_LOADERS, get_loader from swh.loader.cli import loader as loader_cli from swh.loader.package.loader import PackageLoader def test_get_loader_wrong_input(swh_config): """Unsupported loader should raise""" loader_type = "unknown" assert loader_type not in SUPPORTED_LOADERS with pytest.raises(ValueError, match="Invalid loader"): get_loader(loader_type, url="db-url") def test_get_loader(swh_loader_config): """Instantiating a supported loader should be ok""" loader_input = { "archive": {"url": "some-url", "artifacts": []}, "debian": { "url": "some-url", "packages": [], }, "npm": { "url": "https://www.npmjs.com/package/onepackage", }, "pypi": { "url": "some-url", }, } for loader_type, kwargs in loader_input.items(): kwargs["storage"] = swh_loader_config["storage"] loader = get_loader(loader_type, **kwargs) assert isinstance(loader, PackageLoader) def _write_usage(command, args, max_width=80): hf = HelpFormatter(width=max_width) hf.write_usage(command, args) return hf.getvalue()[:-1] def test_run_help(swh_config): """Usage message should contain list of available loaders""" runner = CliRunner() result = runner.invoke(loader_cli, ["run", "-h"]) assert result.exit_code == 0 # Syntax depends on dependencies' versions supported_loaders = "|".join(SUPPORTED_LOADERS) usage_prefix = _write_usage("loader run", "[OPTIONS] [%s]\n" % supported_loaders) usage_prefix2 = _write_usage("loader run", "[OPTIONS] {%s}\n" % supported_loaders) assert result.output.startswith((usage_prefix, usage_prefix2)) def test_run_with_configuration_failure(tmp_path): """Triggering a load should fail since configuration is incomplete""" runner = CliRunner() conf_path = os.path.join(str(tmp_path), "cli.yml") with open(conf_path, "w") as f: f.write(yaml.dump({})) with pytest.raises(ValueError, match="Missing storage"): runner.invoke( loader_cli, [ "-C", conf_path, "run", "pypi", "url=https://some-url", ], catch_exceptions=False, ) def test_run_pypi(mocker, swh_config): """Triggering a load should be ok""" mock_loader = mocker.patch("swh.loader.package.pypi.loader.PyPILoader.load") runner = CliRunner() result = runner.invoke( loader_cli, [ "-C", swh_config, "run", "pypi", "url=https://some-url", ], ) assert result.exit_code == 0 mock_loader.assert_called_once_with() def test_run_with_visit_date(mocker, swh_config): """iso visit_date parameter should be parsed as datetime""" mock_loader = mocker.patch("swh.loader.cli.get_loader") runner = CliRunner() input_date = "2016-05-03 15:16:32+00" result = runner.invoke( loader_cli, ["run", "npm", "https://some-url", f"visit_date='{input_date}'"] ) assert result.exit_code == 0 expected_parsed_date = datetime.datetime( 2016, 5, 3, 15, 16, 32, tzinfo=datetime.timezone.utc ) mock_loader.assert_called_once_with( "npm", storage={"cls": "memory"}, url="https://some-url", visit_date=expected_parsed_date, + metadata_fetcher_credentials=None, ) def test_list_help(mocker, swh_config): """Usage message should contain list of available loaders""" runner = CliRunner() result = runner.invoke(loader_cli, ["list", "--help"]) assert result.exit_code == 0 usage_prefix = _write_usage( "loader list", f"[OPTIONS] [[{'|'.join(['all'] + SUPPORTED_LOADERS)}]]" ) expected_help_msg = f"""{usage_prefix} List supported loaders and optionally their arguments Options: -h, --help Show this message and exit. """ assert result.output.startswith(expected_help_msg) def test_list_help_npm(mocker, swh_config): """Triggering a load should be ok""" runner = CliRunner() result = runner.invoke(loader_cli, ["list", "npm"]) assert result.exit_code == 0 expected_help_msg = """ Loader: Load npm origin's artifact releases into swh archive. """ assert result.output.startswith(expected_help_msg[1:])