diff --git a/benchmark.py b/benchmark.py --- a/benchmark.py +++ b/benchmark.py @@ -18,7 +18,9 @@ SEED_OPTIONS = ["-s 10"] -def get_scenario_cmd(algo, kb_url, kb_label, origin_info, extracted_repo_path): +def get_scenario_cmd( + algo, kb_url, kb_label, origin_info, extracted_repo_path, log_file +): return [ "swh", "scanner", @@ -33,6 +35,8 @@ origin_info["origin"], "--commit", origin_info["commit"], + "--log-file", + log_file, "--exclude", str(extracted_repo_path) + "/.git", str(extracted_repo_path), @@ -40,7 +44,7 @@ def run_experiments( - repo_path: str, temp_path: str, kb_state_file: str, algos: Set[str] + repo_path: str, temp_path: str, kb_state_file: str, algos: Set[str], log_file: str ): """This function create a process for each experiment; one experiment is composed by: the repository we want to scan, the algorithms we need to test and different @@ -70,13 +74,23 @@ if algo == "random": for seed_opt in SEED_OPTIONS: random_cmd = get_scenario_cmd( - algo, kb_url, kb_label, origin_info, str(extracted_repo_path) + algo, + kb_url, + kb_label, + origin_info, + str(extracted_repo_path), + log_file, ) scenario_cmds.append(random_cmd + [seed_opt]) else: scenario_cmds.append( get_scenario_cmd( - algo, kb_url, kb_label, origin_info, str(extracted_repo_path) + algo, + kb_url, + kb_label, + origin_info, + str(extracted_repo_path), + log_file, ) ) @@ -97,6 +111,9 @@ @click.argument("repo_path", type=click.Path(exists=True), required=True) @click.argument("temp_path", type=click.Path(exists=True), required=True) @click.argument("kb_state", type=click.Path(exists=True), required=True) +@click.option( + "--log-file", "-l", metavar="FILENAME", required=True, help="Log custom path." +) @click.option( "-a", "--algo", @@ -110,9 +127,9 @@ metavar="ALGORITHM_NAME", help="The algorithm name for the benchmark.", ) -def main(repo_path, temp_path, kb_state, algos): +def main(repo_path, temp_path, kb_state, log_file, algos): logging.basicConfig( - filename="experiments.log", + filename=log_file, format="%(asctime)s %(message)s", datefmt="%m/%d/%Y %I:%M:%S %p", ) @@ -125,7 +142,7 @@ stdout=subprocess.DEVNULL, stderr=sys.stderr, ) - run_experiments(repo_path, temp_path, kb_state, set(algos)) + run_experiments(repo_path, temp_path, kb_state, set(algos), log_file) except Exception as e: logging.exception(e) except IOError as ioerror: diff --git a/run_benchmark.sh b/run_benchmark.sh --- a/run_benchmark.sh +++ b/run_benchmark.sh @@ -13,6 +13,7 @@ temp_dir=$1 kb_state=$2 +log_file=$3 if [ ! -d "$temp_dir" ]; then echo "You should provide a valid temporary directory path" @@ -24,7 +25,14 @@ exit 1 fi -for i in "${@:3}"; do +if [ "$log_file" == '' ]; then + echo "You should provide the file where the logs will be saved" + exit 1 +else + log_file="-l ${log_file}" +fi + +for i in "${@:4}"; do algos="$algos -a $i" done @@ -33,5 +41,5 @@ while IFS= read -r repo; do - ./benchmark.py $repo $temp_dir $kb_state $algos + ./benchmark.py $repo $temp_dir $kb_state $algos $log_file done diff --git a/swh/scanner/benchmark_algos.py b/swh/scanner/benchmark_algos.py --- a/swh/scanner/benchmark_algos.py +++ b/swh/scanner/benchmark_algos.py @@ -6,6 +6,7 @@ import collections import itertools import json +import logging import os from pathlib import Path import random @@ -353,6 +354,10 @@ counter["api_calls"] = 0 counter["queries"] = 0 source_tree = load_source(Path(root), sre_patterns) + logging.info( + f'started processing repo "{repo_id}" with algorithm ' + f'"{algo}" and knowledge base "{backend_name}"' + ) if algo == "random": if seed: @@ -393,4 +398,9 @@ counter["queries"], ) + logging.info( + f'finished processing repo "{repo_id}" with algorithm ' + f'"{algo}" and knowledge base "{backend_name}"' + ) + print(*result, sep=",") diff --git a/swh/scanner/cli.py b/swh/scanner/cli.py --- a/swh/scanner/cli.py +++ b/swh/scanner/cli.py @@ -281,9 +281,19 @@ @click.option( "--seed", "-s", metavar="SEED", type=int, help="Seed for the random algorithm" ) +@click.option("--log-file", "-l", metavar="FILENAME", help="Log custom path.") @click.pass_context def benchmark( - ctx, root_path, api_url, backend_name, origin_url, commit, patterns, algo, seed + ctx, + root_path, + api_url, + backend_name, + origin_url, + commit, + patterns, + algo, + seed, + log_file, ): from importlib import reload import logging @@ -293,9 +303,10 @@ # reload logging module avoid conflict with benchmark.py logging reload(logging) logging.basicConfig( - filename="experiments.log", + filename=log_file if log_file else "experiments.log", format="%(asctime)s %(message)s", datefmt="%m/%d/%Y %I:%M:%S %p", + level=logging.INFO, ) try: