Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/cli.py
# Copyright (C) 2019 The Software Heritage developers | # Copyright (C) 2019 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import click | import click | ||||
from swh.core import config | from swh.core import config | ||||
from swh.core.cli import CONTEXT_SETTINGS | from swh.core.cli import CONTEXT_SETTINGS, AliasedGroup | ||||
from swh.scheduler import get_scheduler | from swh.scheduler import get_scheduler | ||||
from swh.scheduler.cli_utils import schedule_origin_batches | from swh.scheduler.cli_utils import schedule_origin_batches | ||||
from swh.storage import get_storage | from swh.storage import get_storage | ||||
from swh.indexer import metadata_dictionary | from swh.indexer import metadata_dictionary | ||||
from swh.indexer.storage import get_indexer_storage | from swh.indexer.storage import get_indexer_storage | ||||
from swh.indexer.storage.api.server import load_and_check_config, app | from swh.indexer.storage.api.server import load_and_check_config, app | ||||
@click.group(name='indexer', context_settings=CONTEXT_SETTINGS) | @click.group(name='indexer', context_settings=CONTEXT_SETTINGS, | ||||
cls=AliasedGroup) | |||||
@click.option('--config-file', '-C', default=None, | @click.option('--config-file', '-C', default=None, | ||||
type=click.Path(exists=True, dir_okay=False,), | type=click.Path(exists=True, dir_okay=False,), | ||||
help="Configuration file.") | help="Configuration file.") | ||||
@click.pass_context | @click.pass_context | ||||
def cli(ctx, config_file): | def cli(ctx, config_file): | ||||
"""Software Heritage Indexer CLI tools. | """Software Heritage Indexer tools. | ||||
The Indexer is used to mine the content of the archive and extract derived | |||||
information from archive source code artifacts. | |||||
""" | """ | ||||
ctx.ensure_object(dict) | ctx.ensure_object(dict) | ||||
conf = config.read(config_file) | conf = config.read(config_file) | ||||
ctx.obj['config'] = conf | ctx.obj['config'] = conf | ||||
def _get_api(getter, config, config_key, url): | def _get_api(getter, config, config_key, url): | ||||
if url: | if url: | ||||
config[config_key] = { | config[config_key] = { | ||||
'cls': 'remote', | 'cls': 'remote', | ||||
'args': {'url': url} | 'args': {'url': url} | ||||
} | } | ||||
elif config_key not in config: | elif config_key not in config: | ||||
raise click.ClickException( | raise click.ClickException( | ||||
'Missing configuration for {}'.format(config_key)) | 'Missing configuration for {}'.format(config_key)) | ||||
return getter(**config[config_key]) | return getter(**config[config_key]) | ||||
@cli.group('mapping') | @cli.group('mapping') | ||||
def mapping(): | def mapping(): | ||||
'''Manage Software Heritage Indexer mappings.''' | |||||
pass | pass | ||||
@mapping.command('list') | @mapping.command('list') | ||||
def mapping_list(): | def mapping_list(): | ||||
"""Prints the list of known mappings.""" | """Prints the list of known mappings.""" | ||||
mapping_names = [mapping.name | mapping_names = [mapping.name | ||||
for mapping in metadata_dictionary.MAPPINGS.values()] | for mapping in metadata_dictionary.MAPPINGS.values()] | ||||
Show All 31 Lines | |||||
@click.option('--storage-url', '-g', default=None, | @click.option('--storage-url', '-g', default=None, | ||||
help="URL of the (graph) storage API") | help="URL of the (graph) storage API") | ||||
@click.option('--dry-run/--no-dry-run', is_flag=True, | @click.option('--dry-run/--no-dry-run', is_flag=True, | ||||
default=False, | default=False, | ||||
help='List only what would be scheduled.') | help='List only what would be scheduled.') | ||||
@click.pass_context | @click.pass_context | ||||
def schedule(ctx, scheduler_url, storage_url, indexer_storage_url, | def schedule(ctx, scheduler_url, storage_url, indexer_storage_url, | ||||
dry_run): | dry_run): | ||||
"""Manipulate indexer tasks via SWH Scheduler's API.""" | """Manipulate Software Heritage Indexer tasks. | ||||
Via SWH Scheduler's API.""" | |||||
ctx.obj['indexer_storage'] = _get_api( | ctx.obj['indexer_storage'] = _get_api( | ||||
get_indexer_storage, | get_indexer_storage, | ||||
ctx.obj['config'], | ctx.obj['config'], | ||||
'indexer_storage', | 'indexer_storage', | ||||
indexer_storage_url | indexer_storage_url | ||||
) | ) | ||||
ctx.obj['storage'] = _get_api( | ctx.obj['storage'] = _get_api( | ||||
get_storage, | get_storage, | ||||
▲ Show 20 Lines • Show All 46 Lines • ▼ Show 20 Lines | def schedule_origin_metadata_reindex( | ||||
origins = list_origins_by_producer(idx_storage, mappings, tool_ids) | origins = list_origins_by_producer(idx_storage, mappings, tool_ids) | ||||
kwargs = {"policy_update": "update-dups", "parse_ids": False} | kwargs = {"policy_update": "update-dups", "parse_ids": False} | ||||
schedule_origin_batches( | schedule_origin_batches( | ||||
scheduler, task_type, origins, origin_batch_size, kwargs) | scheduler, task_type, origins, origin_batch_size, kwargs) | ||||
@cli.command('api-server') | @cli.command('rpc-serve') | ||||
@click.argument('config-path', required=1) | @click.argument('config-path', required=1) | ||||
@click.option('--host', default='0.0.0.0', help="Host to run the server") | @click.option('--host', default='0.0.0.0', help="Host to run the server") | ||||
@click.option('--port', default=5007, type=click.INT, | @click.option('--port', default=5007, type=click.INT, | ||||
help="Binding port of the server") | help="Binding port of the server") | ||||
@click.option('--debug/--nodebug', default=True, | @click.option('--debug/--nodebug', default=True, | ||||
help="Indicates if the server should run in debug mode") | help="Indicates if the server should run in debug mode") | ||||
def api_server(config_path, host, port, debug): | def rpc_server(config_path, host, port, debug): | ||||
"""Starts a Software Heritage Indexer RPC HTTP server.""" | |||||
api_cfg = load_and_check_config(config_path, type='any') | api_cfg = load_and_check_config(config_path, type='any') | ||||
app.config.update(api_cfg) | app.config.update(api_cfg) | ||||
app.run(host, port=int(port), debug=bool(debug)) | app.run(host, port=int(port), debug=bool(debug)) | ||||
cli.add_alias(rpc_server, 'api-server') | |||||
cli.add_alias(rpc_server, 'serve') | |||||
def main(): | def main(): | ||||
return cli(auto_envvar_prefix='SWH_INDEXER') | return cli(auto_envvar_prefix='SWH_INDEXER') | ||||
if __name__ == '__main__': | if __name__ == '__main__': | ||||
main() | main() |