Changeset View
Standalone View
swh/scanner/cli.py
# Copyright (C) 2020 The Software Heritage developers | # Copyright (C) 2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
# WARNING: do not import unnecessary things here to keep cli startup time under | # WARNING: do not import unnecessary things here to keep cli startup time under | ||||
# control | # control | ||||
import os | |||||
from typing import Any, Dict | |||||
import click | import click | ||||
from pathlib import PosixPath | from pathlib import PosixPath | ||||
from typing import Tuple | from typing import Tuple | ||||
from swh.core import config | |||||
from swh.core.cli import CONTEXT_SETTINGS | from swh.core.cli import CONTEXT_SETTINGS | ||||
@click.group(name="scanner", context_settings=CONTEXT_SETTINGS) | # All generic config code should reside in swh.core.config | ||||
vlorentz: the type should be `Dict[str, Any]`; because config values can be any type; you just happen to… | |||||
Not Done Inline ActionsThe web-client key is not needed here, we already know what we configure here, you should simply write: DEFAULT_CONFIG: Dict[str, Any] = { "api-url": "https://archive.softwareheritage.org/api/1/" } anlambert: The `web-client` key is not needed here, we already know what we configure here, you should… | |||||
Done Inline ActionsChose to fully qualify the config in order to streamline config management. tenma: Chose to fully qualify the config in order to streamline config management. | |||||
@click.pass_context | DEFAULT_CONFIG_PATH = os.environ.get( | ||||
def scanner(ctx): | "SWH_CONFIG_FILE", os.path.join(click.get_app_dir("swh"), "global.yml") | ||||
Done Inline ActionsThis name ("config filename") is inconsistent with the name of the argument ("config file"). Additionally, Click automatically handles environment variables, so SWH_CONFIG_FILE is already handed, you just need to add Path(click.get_app_dir("swh")) / "global" as a default for the click option vlorentz: This name ("config filename") is inconsistent with the name of the argument ("config file"). | |||||
Done Inline Actions
tenma: - Does it really handles the swh config envvar, SWH_CONFIG_FILE or whatever its name? Some swh… | |||||
Done Inline ActionsThere is also the question of how much we let click handle, because there is currently much duplication between our click usage and swh.core.config:
tenma: There is also the question of how much we let click handle, because there is currently much… | |||||
Done Inline ActionsOK, so click as of now does not convert path through its click.Path.path_type param which would be a nice feature I could add one day. That would be neat and typing friendly. tenma: OK, so click as of now does not convert path through its click.Path.path_type param which would… | |||||
Done Inline ActionsImplemented the following config path resolving (descending priority): tenma: Implemented the following config path resolving (descending priority):
CLI switch, CLI auto… | |||||
"""Software Heritage Scanner tools.""" | ) | ||||
pass | |||||
DEFAULT_CONFIG: Dict[str, Any] = { | |||||
"web-api": { | |||||
"url": "https://archive.softwareheritage.org/api/1/", | |||||
"auth-token": None, | |||||
} | |||||
} | |||||
Done Inline ActionsI would rather load configuration this way as:
conf = {} if not config_file: config_file = os.environ.get("SWH_CONFIG_FILENAME") if config_file: if not os.path.exists(config_file): raise ValueError("Config file %s does not exist" % config_file) conf.update(config.load_global_config()) conf.update(config.read(config_file, DEFAULT_CONFIG)) else: conf = config.load_named_config("web/client", DEFAULT_CONFIG) This could be wrapped in a new swh.core.config function IMHO as it could be reused in other CLI tools. anlambert: I would rather load configuration this way as:
- it checks if provided config file exists
- it… | |||||
Done Inline ActionsI made a mistake, the named config should be scanner and not web/client. anlambert: I made a mistake, the named config should be `scanner` and not `web/client`. | |||||
Done Inline ActionsDid as replied in above comments. tenma: Did as replied in above comments. | |||||
def parse_url(url): | def parse_url(url): | ||||
if not url.startswith("https://"): | if not url.startswith("https://"): | ||||
url = "https://" + url | url = "https://" + url | ||||
if not url.endswith("/"): | if not url.endswith("/"): | ||||
url += "/" | url += "/" | ||||
return url | return url | ||||
Show All 14 Lines | for pattern in patterns: | ||||
for path in glob.glob(pattern): | for path in glob.glob(pattern): | ||||
dirpath = PosixPath(path) | dirpath = PosixPath(path) | ||||
if root_path not in dirpath.parents: | if root_path not in dirpath.parents: | ||||
error_msg = ( | error_msg = ( | ||||
f'The path "{dirpath}" is not a subdirectory or relative ' | f'The path "{dirpath}" is not a subdirectory or relative ' | ||||
f'to the root directory path: "{root_path}"' | f'to the root directory path: "{root_path}"' | ||||
) | ) | ||||
raise InvalidDirectoryPath(error_msg) | raise InvalidDirectoryPath(error_msg) | ||||
regex = fnmatch.translate(str(PosixPath(pattern))) | regex = fnmatch.translate(str(PosixPath(pattern))) | ||||
Not Done Inline ActionsI know you did not write this, but does it make sense to convert to Path then convert back to string? vlorentz: I know you did not write this, but does it make sense to convert to Path then convert back to… | |||||
Done Inline ActionsI suspect an implicit validation but I have no clue, thanks to impure function. tenma: I suspect an implicit validation but I have no clue, thanks to impure function.
Not critical I… | |||||
yield re.compile(regex) | yield re.compile(regex) | ||||
@click.group(name="scanner", context_settings=CONTEXT_SETTINGS) | |||||
@click.option( | |||||
"-C", | |||||
"--config-file", | |||||
default=DEFAULT_CONFIG_PATH, | |||||
type=click.Path(exists=True, dir_okay=False, path_type=str), | |||||
help="YAML configuration file", | |||||
) | |||||
@click.pass_context | |||||
def scanner(ctx, config_file: str): | |||||
"""Software Heritage Scanner tools.""" | |||||
# recursive merge not done by config.read | |||||
conf = config.read_raw_config(config.config_basepath(config_file)) | |||||
conf = config.merge_configs(DEFAULT_CONFIG, conf) | |||||
ctx.ensure_object(dict) | |||||
ctx.obj["config"] = conf | |||||
@scanner.command(name="scan") | @scanner.command(name="scan") | ||||
@click.argument("root_path", required=True, type=click.Path(exists=True)) | @click.argument("root_path", required=True, type=click.Path(exists=True)) | ||||
@click.option( | @click.option( | ||||
"-u", | "-u", | ||||
"--api-url", | "--api-url", | ||||
default="https://archive.softwareheritage.org/api/1", | default=None, | ||||
metavar="API_URL", | metavar="API_URL", | ||||
show_default=True, | show_default=True, | ||||
help="URL for the api request", | help="URL for the api request", | ||||
) | ) | ||||
@click.option( | @click.option( | ||||
"--exclude", | "--exclude", | ||||
"-x", | "-x", | ||||
"patterns", | "patterns", | ||||
Show All 13 Lines | |||||
@click.option( | @click.option( | ||||
"-i", "--interactive", is_flag=True, help="Show the result in a dashboard" | "-i", "--interactive", is_flag=True, help="Show the result in a dashboard" | ||||
) | ) | ||||
@click.pass_context | @click.pass_context | ||||
def scan(ctx, root_path, api_url, patterns, format, interactive): | def scan(ctx, root_path, api_url, patterns, format, interactive): | ||||
"""Scan a source code project to discover files and directories already | """Scan a source code project to discover files and directories already | ||||
present in the archive""" | present in the archive""" | ||||
import asyncio | import asyncio | ||||
from .scanner import run | from .scanner import run | ||||
Not Done Inline Actionsconfig["api-url"] = parse_url(api_url) anlambert: ```lang=python
config["api-url"] = parse_url(api_url)
``` | |||||
from .model import Tree | from .model import Tree | ||||
from .plot import generate_sunburst | from .plot import generate_sunburst | ||||
from .dashboard.dashboard import run_app | from .dashboard.dashboard import run_app | ||||
config = ctx.obj["config"] | |||||
if api_url: | |||||
config["web-api"]["url"] = parse_url(api_url) | |||||
sre_patterns = set() | sre_patterns = set() | ||||
if patterns: | if patterns: | ||||
sre_patterns = { | sre_patterns = { | ||||
reg_obj for reg_obj in extract_regex_objs(PosixPath(root_path), patterns) | reg_obj for reg_obj in extract_regex_objs(PosixPath(root_path), patterns) | ||||
} | } | ||||
api_url = parse_url(api_url) | |||||
source_tree = Tree(PosixPath(root_path)) | source_tree = Tree(PosixPath(root_path)) | ||||
loop = asyncio.get_event_loop() | loop = asyncio.get_event_loop() | ||||
loop.run_until_complete(run(root_path, api_url, source_tree, sre_patterns)) | loop.run_until_complete(run(config, root_path, source_tree, sre_patterns)) | ||||
if interactive: | if interactive: | ||||
root = PosixPath(root_path) | root = PosixPath(root_path) | ||||
directories = source_tree.getDirectoriesInfo(root) | directories = source_tree.getDirectoriesInfo(root) | ||||
figure = generate_sunburst(directories, root) | figure = generate_sunburst(directories, root) | ||||
run_app(figure, source_tree) | run_app(figure, source_tree) | ||||
else: | else: | ||||
source_tree.show(format) | source_tree.show(format) | ||||
def main(): | |||||
return scanner(auto_envvar_prefix="SWH_SCANNER") | |||||
if __name__ == "__main__": | if __name__ == "__main__": | ||||
scan() | main() |
the type should be Dict[str, Any]; because config values can be any type; you just happen to have only one.