diff --git a/swh/scanner/cli.py b/swh/scanner/cli.py --- a/swh/scanner/cli.py +++ b/swh/scanner/cli.py @@ -9,8 +9,6 @@ from typing import Any, Dict import click -from pathlib import PosixPath -from typing import Tuple from swh.core import config from swh.core.cli import CONTEXT_SETTINGS @@ -31,6 +29,7 @@ def parse_url(url): + """CLI-specific helper to 'autocomplete' the provided url.""" if not url.startswith("https://"): url = "https://" + url if not url.endswith("/"): @@ -38,32 +37,6 @@ return url -def extract_regex_objs(root_path: PosixPath, patterns: Tuple[str]) -> object: - """Generates a regex object for each pattern given in input and checks if - the path is a subdirectory or relative to the root path. - - Yields: - an SRE_Pattern object - """ - import glob - import fnmatch - import re - from .exceptions import InvalidDirectoryPath - - for pattern in patterns: - for path in glob.glob(pattern): - dirpath = PosixPath(path) - if root_path not in dirpath.parents: - error_msg = ( - f'The path "{dirpath}" is not a subdirectory or relative ' - f'to the root directory path: "{root_path}"' - ) - raise InvalidDirectoryPath(error_msg) - - regex = fnmatch.translate(str(PosixPath(pattern))) - yield re.compile(regex) - - @click.group(name="scanner", context_settings=CONTEXT_SETTINGS) @click.option( "-C", @@ -105,7 +78,8 @@ ) @click.option( "-f", - "--format", + "--output-format", + "out_fmt", default="text", show_default=True, type=click.Choice(["text", "json", "ndjson", "sunburst"], case_sensitive=False), @@ -115,36 +89,16 @@ "-i", "--interactive", is_flag=True, help="Show the result in a dashboard" ) @click.pass_context -def scan(ctx, root_path, api_url, patterns, format, interactive): +def scan(ctx, root_path, api_url, patterns, out_fmt, interactive): """Scan a source code project to discover files and directories already present in the archive""" - import asyncio - from .scanner import run - from .model import Tree - from .plot import generate_sunburst - from .dashboard.dashboard import run_app + from .scanner import scan config = ctx.obj["config"] if api_url: config["web-api"]["url"] = parse_url(api_url) - sre_patterns = set() - if patterns: - sre_patterns = { - reg_obj for reg_obj in extract_regex_objs(PosixPath(root_path), patterns) - } - - source_tree = Tree(PosixPath(root_path)) - loop = asyncio.get_event_loop() - loop.run_until_complete(run(config, root_path, source_tree, sre_patterns)) - - if interactive: - root = PosixPath(root_path) - directories = source_tree.getDirectoriesInfo(root) - figure = generate_sunburst(directories, root) - run_app(figure, source_tree) - else: - source_tree.show(format) + scan(config, root_path, patterns, out_fmt, interactive) def main(): diff --git a/swh/scanner/scanner.py b/swh/scanner/scanner.py --- a/swh/scanner/scanner.py +++ b/swh/scanner/scanner.py @@ -3,17 +3,17 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -import os -import itertools import asyncio +import fnmatch +import glob +import itertools +import os from pathlib import PosixPath +import re from typing import List, Dict, Tuple, Iterator, Union, Iterable, Pattern, Any import aiohttp -from .exceptions import error_response -from .model import Tree - from swh.model.from_disk import Directory, Content, accept_all_directories from swh.model.identifiers import ( swhid, @@ -22,6 +22,11 @@ CONTENT, ) +from .exceptions import InvalidDirectoryPath, error_response +from .model import Tree +from .plot import generate_sunburst +from .dashboard.dashboard import run_app + async def swhids_discovery( swhids: List[str], session: aiohttp.ClientSession, api_url: str, @@ -192,3 +197,55 @@ async with aiohttp.ClientSession(headers=headers) as session: await _scan(root, session, api_url, source_tree, exclude_patterns) + + +def extract_regex_objs( + root_path: PosixPath, patterns: Iterable[str] +) -> Iterator[Pattern[str]]: + """Generates a regex object for each pattern given in input and checks if + the path is a subdirectory or relative to the root path. + + Yields: + an SRE_Pattern object + """ + for pattern in patterns: + for path in glob.glob(pattern): + dirpath = PosixPath(path) + if root_path not in dirpath.parents: + error_msg = ( + f'The path "{dirpath}" is not a subdirectory or relative ' + f'to the root directory path: "{root_path}"' + ) + raise InvalidDirectoryPath(error_msg) + + regex = fnmatch.translate((pattern)) + yield re.compile(regex) + + +def scan( + config: Dict[str, Any], + root_path: str, + exclude_patterns: Iterable[str], + out_fmt: str, + interactive: bool, +): + """Scan a source code project to discover files and directories already + present in the archive""" + sre_patterns = set() + if exclude_patterns: + sre_patterns = { + reg_obj + for reg_obj in extract_regex_objs(PosixPath(root_path), exclude_patterns) + } + + source_tree = Tree(PosixPath(root_path)) + loop = asyncio.get_event_loop() + loop.run_until_complete(run(config, root_path, source_tree, sre_patterns)) + + if interactive: + root = PosixPath(root_path) + directories = source_tree.getDirectoriesInfo(root) + figure = generate_sunburst(directories, root) + run_app(figure, source_tree) + else: + source_tree.show(out_fmt) diff --git a/swh/scanner/tests/test_cli.py b/swh/scanner/tests/test_cli.py deleted file mode 100644 --- a/swh/scanner/tests/test_cli.py +++ /dev/null @@ -1,16 +0,0 @@ -import pytest - -from swh.scanner.cli import extract_regex_objs -from swh.scanner.exceptions import InvalidDirectoryPath - - -def test_extract_regex_objs(temp_folder): - root_path = temp_folder["root"] - - patterns = (str(temp_folder["subdir"]), "/none") - sre_patterns = [reg_obj for reg_obj in extract_regex_objs(root_path, patterns)] - assert len(sre_patterns) == 2 - - patterns = (*patterns, "/tmp") - with pytest.raises(InvalidDirectoryPath): - sre_patterns = [reg_obj for reg_obj in extract_regex_objs(root_path, patterns)] diff --git a/swh/scanner/tests/test_scanner.py b/swh/scanner/tests/test_scanner.py --- a/swh/scanner/tests/test_scanner.py +++ b/swh/scanner/tests/test_scanner.py @@ -8,14 +8,25 @@ from .data import correct_api_response, present_swhids, to_exclude_swhid -from swh.scanner.scanner import swhids_discovery, get_subpaths, run +from swh.scanner.scanner import swhids_discovery, get_subpaths, extract_regex_objs, run from swh.scanner.model import Tree -from swh.scanner.cli import extract_regex_objs -from swh.scanner.exceptions import APIError +from swh.scanner.exceptions import APIError, InvalidDirectoryPath aio_url = "http://example.org/api/known/" +def test_extract_regex_objs(temp_folder): + root_path = temp_folder["root"] + + patterns = (str(temp_folder["subdir"]), "/none") + sre_patterns = [reg_obj for reg_obj in extract_regex_objs(root_path, patterns)] + assert len(sre_patterns) == 2 + + patterns = (*patterns, "/tmp") + with pytest.raises(InvalidDirectoryPath): + sre_patterns = [reg_obj for reg_obj in extract_regex_objs(root_path, patterns)] + + def test_scanner_correct_api_request(mock_aioresponse, event_loop, aiosession): mock_aioresponse.post( aio_url,