diff --git a/swh/scanner/cli.py b/swh/scanner/cli.py --- a/swh/scanner/cli.py +++ b/swh/scanner/cli.py @@ -4,12 +4,13 @@ # See top-level LICENSE file for more information import click +import os import asyncio import glob import re import fnmatch from pathlib import Path -from typing import Tuple +from typing import Tuple, Dict, Any from .scanner import run from .model import Tree @@ -17,14 +18,41 @@ from .dashboard.dashboard import run_app from .exceptions import InvalidDirectoryPath +from swh.core import config from swh.core.cli import CONTEXT_SETTINGS +# All generic config code should reside in swh.core.config +DEFAULT_CONFIG_PATH = os.environ.get( + "SWH_CONFIG_FILE", os.path.join(click.get_app_dir("swh"), "global.yml") +) + +DEFAULT_CONFIG: Dict[str, Any] = { + "web-api": { + "url": "https://archive.softwareheritage.org/api/1/", + "auth-token": None, + } +} + + @click.group(name="scanner", context_settings=CONTEXT_SETTINGS) +@click.option( + "-C", + "--config-file", + default=DEFAULT_CONFIG_PATH, + type=click.Path(exists=True, dir_okay=False, path_type=str), + help="YAML configuration file", +) @click.pass_context -def scanner(ctx): +def scanner(ctx, config_file: Path): """Software Heritage Scanner tools.""" - pass + + # recursive merge not done by config.read + conf = config.read_raw_config(config.config_basepath(config_file)) + conf = config.merge_configs(DEFAULT_CONFIG, conf) + + ctx.ensure_object(dict) + ctx.obj["config"] = conf def parse_url(url): @@ -62,7 +90,7 @@ @click.option( "-u", "--api-url", - default="https://archive.softwareheritage.org/api/1", + default=None, metavar="API_URL", show_default=True, help="url for the api request", @@ -77,7 +105,8 @@ ) @click.option( "-f", - "--format", + "--output-format", + "out_fmt", type=click.Choice(["text", "json", "ndjson", "sunburst"], case_sensitive=False), default="text", help="select the output format", @@ -86,19 +115,22 @@ "-i", "--interactive", is_flag=True, help="show the result in a dashboard" ) @click.pass_context -def scan(ctx, root_path, api_url, patterns, format, interactive): +def scan(ctx, root_path, api_url, patterns, out_fmt, interactive): """Scan a source code project to discover files and directories already present in the archive""" + config = ctx.obj["config"] + if api_url: + config["web-api"]["url"] = parse_url(api_url) + sre_patterns = set() if patterns: sre_patterns = { reg_obj for reg_obj in extract_regex_objs(Path(root_path), patterns) } - api_url = parse_url(api_url) source_tree = Tree(Path(root_path)) loop = asyncio.get_event_loop() - loop.run_until_complete(run(root_path, api_url, source_tree, sre_patterns)) + loop.run_until_complete(run(config, root_path, source_tree, sre_patterns)) if interactive: root = Path(root_path) @@ -106,8 +138,12 @@ figure = generate_sunburst(directories, root) run_app(figure, source_tree) else: - source_tree.show(format) + source_tree.show(out_fmt) + + +def main(): + return scanner(auto_envvar_prefix="SWH_SCANNER") if __name__ == "__main__": - scan() + main() diff --git a/swh/scanner/scanner.py b/swh/scanner/scanner.py --- a/swh/scanner/scanner.py +++ b/swh/scanner/scanner.py @@ -151,7 +151,7 @@ async def run( - root: Path, api_url: str, source_tree: Tree, exclude_patterns: Set[Any] + config: Dict[str, Any], root: Path, source_tree: Tree, exclude_patterns: Set[Any], ) -> None: """Start scanning from the given root. @@ -162,6 +162,7 @@ api_url: url for the API request """ + api_url = config["web-api"]["url"] async def _scan(root, session, api_url, source_tree, exclude_patterns): for path, obj_swhid, known in await parse_path( @@ -176,5 +177,7 @@ if not known: await _scan(path, session, api_url, source_tree, exclude_patterns) - async with aiohttp.ClientSession() as session: + headers = {"Authorization": f"Bearer {config['web-api']['auth-token']}"} + + async with aiohttp.ClientSession(headers=headers) as session: await _scan(root, session, api_url, source_tree, exclude_patterns)