diff --git a/requirements-swh.txt b/requirements-swh.txt index da1d754..c150267 100644 --- a/requirements-swh.txt +++ b/requirements-swh.txt @@ -1,4 +1,4 @@ -swh.core >= 0.0.75 +swh.core >= 0.3 swh.model >= 0.5.0 swh.scheduler >= 0.4.0 swh.storage >= 0.13.1 diff --git a/setup.py b/setup.py index 6bef6d3..d238685 100755 --- a/setup.py +++ b/setup.py @@ -1,79 +1,79 @@ #!/usr/bin/env python3 # Copyright (C) 2015-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from io import open from os import path from setuptools import find_packages, setup here = path.abspath(path.dirname(__file__)) # Get the long description from the README file with open(path.join(here, "README.md"), encoding="utf-8") as f: long_description = f.read() def parse_requirements(name=None): if name: reqf = "requirements-%s.txt" % name else: reqf = "requirements.txt" requirements = [] if not path.exists(reqf): return requirements with open(reqf) as f: for line in f.readlines(): line = line.strip() if not line or line.startswith("#"): continue requirements.append(line) return requirements setup( name="swh.loader.core", description="Software Heritage Base Loader", long_description=long_description, long_description_content_type="text/markdown", python_requires=">=3.7", author="Software Heritage developers", author_email="swh-devel@inria.fr", url="https://forge.softwareheritage.org/diffusion/DLDBASE", packages=find_packages(), # packages's modules scripts=[], # scripts to package install_requires=parse_requirements() + parse_requirements("swh"), setup_requires=["setuptools-scm"], use_scm_version=True, extras_require={"testing": parse_requirements("test")}, include_package_data=True, entry_points=""" + [swh.cli.subcommands] + loader=swh.loader.cli [swh.workers] loader.archive=swh.loader.package.archive:register loader.cran=swh.loader.package.cran:register loader.debian=swh.loader.package.debian:register loader.deposit=swh.loader.package.deposit:register loader.nixguix=swh.loader.package.nixguix:register loader.npm=swh.loader.package.npm:register loader.pypi=swh.loader.package.pypi:register - [swh.cli.subcommands] - loader=swh.loader.cli:loader """, classifiers=[ "Programming Language :: Python :: 3", "Intended Audience :: Developers", "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", "Operating System :: OS Independent", "Development Status :: 5 - Production/Stable", ], project_urls={ "Bug Reports": "https://forge.softwareheritage.org/maniphest", "Funding": "https://www.softwareheritage.org/donate", "Source": "https://forge.softwareheritage.org/source/swh-loader-core", "Documentation": "https://docs.softwareheritage.org/devel/swh-loader-core/", }, ) diff --git a/swh/loader/cli.py b/swh/loader/cli.py index f2b0be6..06102d1 100644 --- a/swh/loader/cli.py +++ b/swh/loader/cli.py @@ -1,106 +1,106 @@ # Copyright (C) 2019-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information # WARNING: do not import unnecessary things here to keep cli startup time under # control import logging from typing import Any import click import pkg_resources -from swh.core.cli import CONTEXT_SETTINGS +from swh.core.cli import CONTEXT_SETTINGS, swh as swh_cli_group logger = logging.getLogger(__name__) LOADERS = { entry_point.name.split(".", 1)[1]: entry_point for entry_point in pkg_resources.iter_entry_points("swh.workers") if entry_point.name.split(".", 1)[0] == "loader" } SUPPORTED_LOADERS = sorted(list(LOADERS)) def get_loader(name: str, **kwargs) -> Any: """Given a loader name, instantiate it. Args: name: Loader's name kwargs: Configuration dict (url...) Returns: An instantiated loader """ if name not in LOADERS: raise ValueError( "Invalid loader %s: only supported loaders are %s" % (name, SUPPORTED_LOADERS) ) registry_entry = LOADERS[name].load()() logger.debug(f"registry: {registry_entry}") loader_cls = registry_entry["loader"] logger.debug(f"loader class: {loader_cls}") return loader_cls(**kwargs) -@click.group(name="loader", context_settings=CONTEXT_SETTINGS) +@swh_cli_group.group(name="loader", context_settings=CONTEXT_SETTINGS) @click.pass_context def loader(ctx): """Loader cli tools """ pass @loader.command(name="run", context_settings=CONTEXT_SETTINGS) @click.argument("type", type=click.Choice(SUPPORTED_LOADERS)) @click.argument("url") @click.argument("options", nargs=-1) @click.pass_context def run(ctx, type, url, options): """Ingest with loader the origin located at """ import iso8601 from swh.scheduler.cli.utils import parse_options (_, kw) = parse_options(options) logger.debug(f"kw: {kw}") visit_date = kw.get("visit_date") if visit_date and isinstance(visit_date, str): visit_date = iso8601.parse_date(visit_date) kw["visit_date"] = visit_date loader = get_loader(type, url=url, **kw) result = loader.load() click.echo(result) @loader.command(name="list", context_settings=CONTEXT_SETTINGS) @click.argument("type", default="all", type=click.Choice(["all"] + SUPPORTED_LOADERS)) @click.pass_context def list(ctx, type): """List supported loaders and optionally their arguments""" import inspect if type == "all": loaders = ", ".join(SUPPORTED_LOADERS) click.echo(f"Supported loaders: {loaders}") else: registry_entry = LOADERS[type].load()() loader_cls = registry_entry["loader"] doc = inspect.getdoc(loader_cls).strip() # Hack to get the signature of the class even though it subclasses # Generic, which reimplements __new__. # See signature = inspect.signature(loader_cls.__init__) signature_str = str(signature).replace("self, ", "") click.echo(f"Loader: {doc}\nsignature: {signature_str}")