Changeset View
Changeset View
Standalone View
Standalone View
swh/scanner/cli.py
# Copyright (C) 2020 The Software Heritage developers | # Copyright (C) 2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import click | import click | ||||
import asyncio | import asyncio | ||||
import glob | |||||
from pathlib import PosixPath | from pathlib import PosixPath | ||||
from typing import Tuple | |||||
from .scanner import run | from .scanner import run | ||||
from .model import Tree | from .model import Tree | ||||
from .exceptions import InvalidDirectoryPath | |||||
from swh.core.cli import CONTEXT_SETTINGS | from swh.core.cli import CONTEXT_SETTINGS | ||||
@click.group(name="scanner", context_settings=CONTEXT_SETTINGS) | @click.group(name="scanner", context_settings=CONTEXT_SETTINGS) | ||||
@click.pass_context | @click.pass_context | ||||
def scanner(ctx): | def scanner(ctx): | ||||
"""Software Heritage Scanner tools.""" | """Software Heritage Scanner tools.""" | ||||
pass | pass | ||||
def parse_url(url): | def parse_url(url): | ||||
if not url.startswith("https://"): | if not url.startswith("https://"): | ||||
url = "https://" + url | url = "https://" + url | ||||
if not url.endswith("/"): | if not url.endswith("/"): | ||||
url += "/" | url += "/" | ||||
return url | return url | ||||
vlorentz: `patterns: List[str]` | |||||
def check_patterns(root_path: PosixPath, patterns: Tuple[str]) -> None: | |||||
"""Checks that the paths taken from the given pattern exist, and if they are | |||||
subdirectories or relative to the root path. | |||||
""" | |||||
for pattern in patterns: | |||||
for path in glob.glob(pattern, recursive=True): | |||||
dirpath = PosixPath(path) | |||||
if not dirpath.exists() and not dirpath.is_symlink(): | |||||
raise InvalidDirectoryPath(f"{dirpath} does not exist") | |||||
elif root_path not in dirpath.parents: | |||||
Not Done Inline Actionselif root_path not in dirpath.parents: vlorentz: `elif root_path not in dirpath.parents:` | |||||
error_msg = ( | |||||
f'The path "{dirpath}" is not a subdirectory or relative ' | |||||
f'to the root directory path: "{root_path}"' | |||||
) | |||||
raise InvalidDirectoryPath(error_msg) | |||||
vlorentzUnsubmitted Not Done Inline ActionsCould you write test(s) for this function? vlorentz: Could you write test(s) for this function? | |||||
@scanner.command(name="scan") | @scanner.command(name="scan") | ||||
@click.argument("path", required=True, type=click.Path(exists=True)) | @click.argument("root_path", required=True, type=click.Path(exists=True)) | ||||
@click.option( | @click.option( | ||||
"-u", | "-u", | ||||
"--api-url", | "--api-url", | ||||
default="https://archive.softwareheritage.org/api/1", | default="https://archive.softwareheritage.org/api/1", | ||||
metavar="API_URL", | metavar="API_URL", | ||||
show_default=True, | show_default=True, | ||||
help="url for the api request", | help="url for the api request", | ||||
) | ) | ||||
@click.option( | @click.option( | ||||
"--exclude", | |||||
"-x", | |||||
"patterns", | |||||
metavar="PATTERN", | |||||
multiple=True, | |||||
help="recursively exclude a specific pattern", | |||||
) | |||||
@click.option( | |||||
"-f", | "-f", | ||||
"--format", | "--format", | ||||
type=click.Choice(["text", "json", "sunburst"], case_sensitive=False), | type=click.Choice(["text", "json", "sunburst"], case_sensitive=False), | ||||
default="text", | default="text", | ||||
help="select the output format", | help="select the output format", | ||||
) | ) | ||||
@click.pass_context | @click.pass_context | ||||
def scan(ctx, path, api_url, format): | def scan(ctx, root_path, api_url, patterns, format): | ||||
"""Scan a source code project to discover files and directories already | """Scan a source code project to discover files and directories already | ||||
present in the archive""" | present in the archive""" | ||||
if patterns: | |||||
check_patterns(PosixPath(root_path), patterns) | |||||
vlorentzUnsubmitted Not Done Inline ActionsYou could compile patterns here, so fnmatch doesn't need to recompile for each call to directory_filter. vlorentz: You could compile `patterns` here, so fnmatch doesn't need to recompile for each call to… | |||||
DanSerafAuthorUnsubmitted Not Done Inline ActionsDo you mean compiling them using the regex of each pattern? I could create a set of regex objects and use them in the scanner to match the paths. DanSeraf: Do you mean compiling them using the regex of each pattern? I could create a set of regex… | |||||
vlorentzUnsubmitted Not Done Inline Actionsyes (sorry for the late reply) vlorentz: yes (sorry for the late reply) | |||||
api_url = parse_url(api_url) | api_url = parse_url(api_url) | ||||
source_tree = Tree(PosixPath(path)) | source_tree = Tree(PosixPath(root_path)) | ||||
loop = asyncio.get_event_loop() | loop = asyncio.get_event_loop() | ||||
loop.run_until_complete(run(path, api_url, source_tree)) | loop.run_until_complete(run(root_path, api_url, source_tree, patterns)) | ||||
source_tree.show(format) | source_tree.show(format) | ||||
if __name__ == "__main__": | if __name__ == "__main__": | ||||
scan() | scan() |
patterns: List[str]