diff --git a/MANIFEST.in b/MANIFEST.in --- a/MANIFEST.in +++ b/MANIFEST.in @@ -3,4 +3,4 @@ include version.txt include README.md recursive-include swh py.typed -recursive-include swh/scanner/tests/data/ *.tgz +recursive-include swh/scanner/tests/data/ * diff --git a/swh/scanner/cli.py b/swh/scanner/cli.py --- a/swh/scanner/cli.py +++ b/swh/scanner/cli.py @@ -18,7 +18,6 @@ # All generic config code should reside in swh.core.config CONFIG_ENVVAR = "SWH_CONFIG_FILE" DEFAULT_CONFIG_PATH = os.path.join(click.get_app_dir("swh"), "global.yml") -DEFAULT_PATH = os.environ.get(CONFIG_ENVVAR, DEFAULT_CONFIG_PATH) DEFAULT_CONFIG: Dict[str, Any] = { "web-api": { @@ -57,15 +56,27 @@ ) @click.pass_context def scanner(ctx, config_file: Optional[str]): - if config_file is None and config.config_exists(DEFAULT_PATH): - config_file = DEFAULT_PATH - if config_file is None: - conf = DEFAULT_CONFIG - else: - # read_raw_config do not fail on ENOENT + env_config_path = os.environ.get(CONFIG_ENVVAR) + + # read_raw_config do not fail if file does not exist, so check it beforehand + # while enforcing loading priority + if config_file: if not config.config_exists(config_file): - raise FileNotFoundError(config_file) + raise click.BadParameter( + f"File '{config_file}' cannot be opened.", param_hint="--config-file" + ) + elif env_config_path: + if not config.config_exists(env_config_path): + raise click.BadParameter( + f"File '{env_config_path}' cannot be opened.", param_hint=CONFIG_ENVVAR + ) + config_file = env_config_path + elif config.config_exists(DEFAULT_CONFIG_PATH): + config_file = DEFAULT_CONFIG_PATH + + conf = DEFAULT_CONFIG + if config_file is not None: conf = config.read_raw_config(config.config_basepath(config_file)) conf = config.merge_configs(DEFAULT_CONFIG, conf) @@ -108,7 +119,7 @@ def scan(ctx, root_path, api_url, patterns, out_fmt, interactive): """Scan a source code project to discover files and directories already present in the archive""" - from .scanner import scan + import swh.scanner.scanner as scanner config = ctx.obj["config"] if api_url: @@ -116,7 +127,7 @@ api_url += "/" config["web-api"]["url"] = api_url - scan(config, root_path, patterns, out_fmt, interactive) + scanner.scan(config, root_path, patterns, out_fmt, interactive) def main(): diff --git a/swh/scanner/tests/conftest.py b/swh/scanner/tests/conftest.py --- a/swh/scanner/tests/conftest.py +++ b/swh/scanner/tests/conftest.py @@ -41,8 +41,8 @@ session.detach() -@pytest.fixture(scope="session") -def temp_folder(tmp_path_factory): +@pytest.fixture(scope="function") +def temp_folder(tmp_path): """Fixture that generates a temporary folder with the following structure: @@ -58,11 +58,13 @@ subfile.txt } """ - root = tmp_path_factory.getbasetemp() - subdir = tmp_path_factory.mktemp("subdir") - subsubdir = subdir.joinpath("subsubdir") + root = tmp_path + subdir = root / "subdir" + subdir.mkdir() + subsubdir = subdir / "subsubdir" subsubdir.mkdir() - subdir2 = tmp_path_factory.mktemp("subdir2") + subdir2 = root / "subdir2" + subdir2.mkdir() subfile = root / "subfile.txt" subfile.touch() filesample = subdir / "filesample.txt" diff --git a/swh/scanner/tests/data/global.yml b/swh/scanner/tests/data/global.yml new file mode 100644 --- /dev/null +++ b/swh/scanner/tests/data/global.yml @@ -0,0 +1,3 @@ +web-api: + url: "https://example.com/api/1" + auth-token: "dummy" diff --git a/swh/scanner/tests/data/global2.yml b/swh/scanner/tests/data/global2.yml new file mode 100644 --- /dev/null +++ b/swh/scanner/tests/data/global2.yml @@ -0,0 +1,3 @@ +web-api: + url: "https://example.com/api/2/" + auth-token: "dummy2" diff --git a/swh/scanner/tests/test_cli.py b/swh/scanner/tests/test_cli.py new file mode 100644 --- /dev/null +++ b/swh/scanner/tests/test_cli.py @@ -0,0 +1,121 @@ +# Copyright (C) 2020 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from pathlib import Path +from unittest.mock import Mock, call + +from click.testing import CliRunner +import pytest + +import swh.scanner.cli as cli +import swh.scanner.scanner as scanner + +DATADIR = Path(__file__).absolute().parent / "data" +CONFIG_PATH_GOOD = str(DATADIR / "global.yml") +CONFIG_PATH_GOOD2 = str(DATADIR / "global2.yml") # alternative to global.yml +ROOTPATH_GOOD = str(DATADIR) + + +@pytest.fixture(scope="function") +def m_scanner(mocker): + """Returns a mock swh.scanner.scanner object with all attributes mocked""" + # Customizable mock of scanner module + # Fortunately, noop is the default behavior for all methods + scanner_mock = Mock(scanner) + yield mocker.patch("swh.scanner.scanner", scanner_mock) + + +@pytest.fixture(scope="function") +def spy_configopen(mocker): + """Returns a mock of open builtin scoped to swh.core.config""" + yield mocker.patch("swh.core.config.open", wraps=open) + + +@pytest.fixture(scope="function") +def cli_runner(monkeypatch, tmp_path): + """Return a CliRunner with default environment variable SWH_CONFIG_FILE unset""" + BAD_CONFIG_PATH = str(tmp_path / "missing") + monkeypatch.setattr(cli, "DEFAULT_CONFIG_PATH", BAD_CONFIG_PATH) + return CliRunner(env={"SWH_CONFIG_FILE": None}) + + +# TEST BEGIN + +# For nominal code paths, check that the right config file is loaded +# scanner is mocked to not run actual scan, config loading is mocked to check its usage + + +def test_smoke(cli_runner): + """Break if basic functionality breaks""" + res = cli_runner.invoke(cli.scanner, ["scan", "-h"]) + assert res.exit_code == 0 + + +def test_config_path_option_bad(cli_runner, tmp_path): + """Test bad option no envvar bad default""" + CONFPATH_BAD = str(tmp_path / "missing") + res = cli_runner.invoke(cli.scanner, ["-C", CONFPATH_BAD, "scan", ROOTPATH_GOOD]) + assert res.exit_code != 0 + + +def test_default_config_path(cli_runner, m_scanner, spy_configopen, monkeypatch): + """Test no option no envvar good default""" + monkeypatch.setattr(cli, "DEFAULT_CONFIG_PATH", CONFIG_PATH_GOOD) + res = cli_runner.invoke(cli.scanner, ["scan", ROOTPATH_GOOD]) + assert res.exit_code == 0 + assert spy_configopen.call_args == call(CONFIG_PATH_GOOD) + assert m_scanner.scan.call_count == 1 + + +def test_root_no_config(cli_runner, m_scanner, spy_configopen): + """Test no config = no option no envvar bad default, good root""" + res = cli_runner.invoke(cli.scanner, ["scan", ROOTPATH_GOOD]) + assert res.exit_code == 0 + assert spy_configopen.call_count == 0 + assert m_scanner.scan.call_count == 1 + + +def test_root_bad(cli_runner, tmp_path): + """Test no option no envvar bad default bad root""" + ROOTPATH_BAD = str(tmp_path / "missing") + res = cli_runner.invoke(cli.scanner, ["scan", ROOTPATH_BAD]) + assert res.exit_code != 0 + + +def test_config_path_envvar_good(cli_runner, m_scanner, spy_configopen): + """Test no option good envvar bad default good root""" + cli_runner.env["SWH_CONFIG_FILE"] = CONFIG_PATH_GOOD + res = cli_runner.invoke(cli.scanner, ["scan", ROOTPATH_GOOD]) + assert res.exit_code == 0 + assert spy_configopen.call_args == call(CONFIG_PATH_GOOD) + assert m_scanner.scan.call_count == 1 + + +def test_config_path_envvar_bad(cli_runner, tmp_path): + """Test no option bad envvar bad default good root""" + CONFPATH_BAD = str(tmp_path / "missing") + cli_runner.env["SWH_CONFIG_FILE"] = CONFPATH_BAD + res = cli_runner.invoke(cli.scanner, ["scan", ROOTPATH_GOOD]) + assert res.exit_code != 0 + + +def test_config_path_option_envvar(cli_runner, m_scanner, spy_configopen): + """Test good option good envvar bad default good root + Check that option has precedence over envvar""" + cli_runner.env["SWH_CONFIG_FILE"] = CONFIG_PATH_GOOD2 + res = cli_runner.invoke( + cli.scanner, ["-C", CONFIG_PATH_GOOD, "scan", ROOTPATH_GOOD] + ) + assert res.exit_code == 0 + assert spy_configopen.call_args == call(CONFIG_PATH_GOOD) + assert m_scanner.scan.call_count == 1 + + +def test_api_url_option(cli_runner, m_scanner): + """Test no config good root good url""" + API_URL = "https://example.com/api/1" # without trailing "/" + res = cli_runner.invoke(cli.scanner, ["scan", ROOTPATH_GOOD, "-u", API_URL]) + assert res.exit_code == 0 + assert m_scanner.scan.call_count == 1 diff --git a/swh/scanner/tests/test_model.py b/swh/scanner/tests/test_model.py --- a/swh/scanner/tests/test_model.py +++ b/swh/scanner/tests/test_model.py @@ -57,7 +57,7 @@ assert len(result) == 6 for path, node_attr in result.items(): - if path == str(root) + "/subdir0/filesample.txt": + if path == str(root) + "/subdir/filesample.txt": assert node_attr["known"] is True else: assert node_attr["known"] is False diff --git a/swh/scanner/tests/test_plot.py b/swh/scanner/tests/test_plot.py --- a/swh/scanner/tests/test_plot.py +++ b/swh/scanner/tests/test_plot.py @@ -34,8 +34,8 @@ assert actual_df["known"][1] == 2 # assert subsubdir has correct level information - assert actual_df["lev0"][2] == "subdir0" - assert actual_df["lev1"][2] == "subdir0/subsubdir" + assert actual_df["lev0"][2] == "subdir" + assert actual_df["lev1"][2] == "subdir/subsubdir" def test_build_hierarchical_df(temp_folder, example_dirs): @@ -51,7 +51,7 @@ actual_df, levels_columns, metrics_columns, root ) - assert actual_result["parent"][1] == "subdir0" + assert actual_result["parent"][1] == "subdir" assert actual_result["contents"][1] == 2 assert actual_result["id"][5] == root assert actual_result["known"][5] == 75