Page MenuHomeSoftware Heritage

D5032.diff
No OneTemporary

D5032.diff

diff --git a/benchmark.py b/benchmark.py
--- a/benchmark.py
+++ b/benchmark.py
@@ -18,7 +18,9 @@
SEED_OPTIONS = ["-s 10"]
-def get_scenario_cmd(algo, kb_url, kb_label, origin_info, extracted_repo_path):
+def get_scenario_cmd(
+ algo, kb_url, kb_label, origin_info, extracted_repo_path, log_file
+):
return [
"swh",
"scanner",
@@ -33,6 +35,8 @@
origin_info["origin"],
"--commit",
origin_info["commit"],
+ "--log-file",
+ log_file,
"--exclude",
str(extracted_repo_path) + "/.git",
str(extracted_repo_path),
@@ -40,7 +44,7 @@
def run_experiments(
- repo_path: str, temp_path: str, kb_state_file: str, algos: Set[str]
+ repo_path: str, temp_path: str, kb_state_file: str, algos: Set[str], log_file: str
):
"""This function create a process for each experiment; one experiment is composed
by: the repository we want to scan, the algorithms we need to test and different
@@ -70,13 +74,23 @@
if algo == "random":
for seed_opt in SEED_OPTIONS:
random_cmd = get_scenario_cmd(
- algo, kb_url, kb_label, origin_info, str(extracted_repo_path)
+ algo,
+ kb_url,
+ kb_label,
+ origin_info,
+ str(extracted_repo_path),
+ log_file,
)
scenario_cmds.append(random_cmd + [seed_opt])
else:
scenario_cmds.append(
get_scenario_cmd(
- algo, kb_url, kb_label, origin_info, str(extracted_repo_path)
+ algo,
+ kb_url,
+ kb_label,
+ origin_info,
+ str(extracted_repo_path),
+ log_file,
)
)
@@ -97,6 +111,9 @@
@click.argument("repo_path", type=click.Path(exists=True), required=True)
@click.argument("temp_path", type=click.Path(exists=True), required=True)
@click.argument("kb_state", type=click.Path(exists=True), required=True)
+@click.option(
+ "--log-file", "-l", metavar="FILENAME", required=True, help="Log custom path."
+)
@click.option(
"-a",
"--algo",
@@ -110,9 +127,9 @@
metavar="ALGORITHM_NAME",
help="The algorithm name for the benchmark.",
)
-def main(repo_path, temp_path, kb_state, algos):
+def main(repo_path, temp_path, kb_state, log_file, algos):
logging.basicConfig(
- filename="experiments.log",
+ filename=log_file,
format="%(asctime)s %(message)s",
datefmt="%m/%d/%Y %I:%M:%S %p",
)
@@ -125,7 +142,7 @@
stdout=subprocess.DEVNULL,
stderr=sys.stderr,
)
- run_experiments(repo_path, temp_path, kb_state, set(algos))
+ run_experiments(repo_path, temp_path, kb_state, set(algos), log_file)
except Exception as e:
logging.exception(e)
except IOError as ioerror:
diff --git a/run_benchmark.sh b/run_benchmark.sh
--- a/run_benchmark.sh
+++ b/run_benchmark.sh
@@ -13,6 +13,7 @@
temp_dir=$1
kb_state=$2
+log_file=$3
if [ ! -d "$temp_dir" ]; then
echo "You should provide a valid temporary directory path"
@@ -24,7 +25,14 @@
exit 1
fi
-for i in "${@:3}"; do
+if [ "$log_file" == '' ]; then
+ echo "You should provide the file where the logs will be saved"
+ exit 1
+else
+ log_file="-l ${log_file}"
+fi
+
+for i in "${@:4}"; do
algos="$algos -a $i"
done
@@ -33,5 +41,5 @@
while IFS= read -r repo;
do
- ./benchmark.py $repo $temp_dir $kb_state $algos
+ ./benchmark.py $repo $temp_dir $kb_state $algos $log_file
done
diff --git a/swh/scanner/benchmark_algos.py b/swh/scanner/benchmark_algos.py
--- a/swh/scanner/benchmark_algos.py
+++ b/swh/scanner/benchmark_algos.py
@@ -6,6 +6,7 @@
import collections
import itertools
import json
+import logging
import os
from pathlib import Path
import random
@@ -353,6 +354,10 @@
counter["api_calls"] = 0
counter["queries"] = 0
source_tree = load_source(Path(root), sre_patterns)
+ logging.info(
+ f'started processing repo "{repo_id}" with algorithm '
+ f'"{algo}" and knowledge base "{backend_name}"'
+ )
if algo == "random":
if seed:
@@ -393,4 +398,9 @@
counter["queries"],
)
+ logging.info(
+ f'finished processing repo "{repo_id}" with algorithm '
+ f'"{algo}" and knowledge base "{backend_name}"'
+ )
+
print(*result, sep=",")
diff --git a/swh/scanner/cli.py b/swh/scanner/cli.py
--- a/swh/scanner/cli.py
+++ b/swh/scanner/cli.py
@@ -281,9 +281,19 @@
@click.option(
"--seed", "-s", metavar="SEED", type=int, help="Seed for the random algorithm"
)
+@click.option("--log-file", "-l", metavar="FILENAME", help="Log custom path.")
@click.pass_context
def benchmark(
- ctx, root_path, api_url, backend_name, origin_url, commit, patterns, algo, seed
+ ctx,
+ root_path,
+ api_url,
+ backend_name,
+ origin_url,
+ commit,
+ patterns,
+ algo,
+ seed,
+ log_file,
):
from importlib import reload
import logging
@@ -293,9 +303,10 @@
# reload logging module avoid conflict with benchmark.py logging
reload(logging)
logging.basicConfig(
- filename="experiments.log",
+ filename=log_file if log_file else "experiments.log",
format="%(asctime)s %(message)s",
datefmt="%m/%d/%Y %I:%M:%S %p",
+ level=logging.INFO,
)
try:

File Metadata

Mime Type
text/plain
Expires
Dec 21 2024, 10:34 PM (11 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3228467

Event Timeline