D5032.diff
View Options

	diff --git a/benchmark.py b/benchmark.py
	--- a/benchmark.py
	+++ b/benchmark.py
	@@ -18,7 +18,9 @@
	SEED_OPTIONS = ["-s 10"]


	-def get_scenario_cmd(algo, kb_url, kb_label, origin_info, extracted_repo_path):
	+def get_scenario_cmd(
	+ algo, kb_url, kb_label, origin_info, extracted_repo_path, log_file
	+):
	return [
	"swh",
	"scanner",
	@@ -33,6 +35,8 @@
	origin_info["origin"],
	"--commit",
	origin_info["commit"],
	+ "--log-file",
	+ log_file,
	"--exclude",
	str(extracted_repo_path) + "/.git",
	str(extracted_repo_path),
	@@ -40,7 +44,7 @@


	def run_experiments(
	- repo_path: str, temp_path: str, kb_state_file: str, algos: Set[str]
	+ repo_path: str, temp_path: str, kb_state_file: str, algos: Set[str], log_file: str
	):
	"""This function create a process for each experiment; one experiment is composed
	by: the repository we want to scan, the algorithms we need to test and different
	@@ -70,13 +74,23 @@
	if algo == "random":
	for seed_opt in SEED_OPTIONS:
	random_cmd = get_scenario_cmd(
	- algo, kb_url, kb_label, origin_info, str(extracted_repo_path)
	+ algo,
	+ kb_url,
	+ kb_label,
	+ origin_info,
	+ str(extracted_repo_path),
	+ log_file,
	)
	scenario_cmds.append(random_cmd + [seed_opt])
	else:
	scenario_cmds.append(
	get_scenario_cmd(
	- algo, kb_url, kb_label, origin_info, str(extracted_repo_path)
	+ algo,
	+ kb_url,
	+ kb_label,
	+ origin_info,
	+ str(extracted_repo_path),
	+ log_file,
	)
	)

	@@ -97,6 +111,9 @@
	@click.argument("repo_path", type=click.Path(exists=True), required=True)
	@click.argument("temp_path", type=click.Path(exists=True), required=True)
	@click.argument("kb_state", type=click.Path(exists=True), required=True)
	+@click.option(
	+ "--log-file", "-l", metavar="FILENAME", required=True, help="Log custom path."
	+)
	@click.option(
	"-a",
	"--algo",
	@@ -110,9 +127,9 @@
	metavar="ALGORITHM_NAME",
	help="The algorithm name for the benchmark.",
	)
	-def main(repo_path, temp_path, kb_state, algos):
	+def main(repo_path, temp_path, kb_state, log_file, algos):
	logging.basicConfig(
	- filename="experiments.log",
	+ filename=log_file,
	format="%(asctime)s %(message)s",
	datefmt="%m/%d/%Y %I:%M:%S %p",
	)
	@@ -125,7 +142,7 @@
	stdout=subprocess.DEVNULL,
	stderr=sys.stderr,
	)
	- run_experiments(repo_path, temp_path, kb_state, set(algos))
	+ run_experiments(repo_path, temp_path, kb_state, set(algos), log_file)
	except Exception as e:
	logging.exception(e)
	except IOError as ioerror:
	diff --git a/run_benchmark.sh b/run_benchmark.sh
	--- a/run_benchmark.sh
	+++ b/run_benchmark.sh
	@@ -13,6 +13,7 @@

	temp_dir=$1
	kb_state=$2
	+log_file=$3

	if [ ! -d "$temp_dir" ]; then
	echo "You should provide a valid temporary directory path"
	@@ -24,7 +25,14 @@
	exit 1
	fi

	-for i in "${@:3}"; do
	+if [ "$log_file" == '' ]; then
	+ echo "You should provide the file where the logs will be saved"
	+ exit 1
	+else
	+ log_file="-l ${log_file}"
	+fi
	+
	+for i in "${@:4}"; do
	algos="$algos -a $i"
	done

	@@ -33,5 +41,5 @@

	while IFS= read -r repo;
	do
	- ./benchmark.py $repo $temp_dir $kb_state $algos
	+ ./benchmark.py $repo $temp_dir $kb_state $algos $log_file
	done
	diff --git a/swh/scanner/benchmark_algos.py b/swh/scanner/benchmark_algos.py
	--- a/swh/scanner/benchmark_algos.py
	+++ b/swh/scanner/benchmark_algos.py
	@@ -6,6 +6,7 @@
	import collections
	import itertools
	import json
	+import logging
	import os
	from pathlib import Path
	import random
	@@ -353,6 +354,10 @@
	counter["api_calls"] = 0
	counter["queries"] = 0
	source_tree = load_source(Path(root), sre_patterns)
	+ logging.info(
	+ f'started processing repo "{repo_id}" with algorithm '
	+ f'"{algo}" and knowledge base "{backend_name}"'
	+ )

	if algo == "random":
	if seed:
	@@ -393,4 +398,9 @@
	counter["queries"],
	)

	+ logging.info(
	+ f'finished processing repo "{repo_id}" with algorithm '
	+ f'"{algo}" and knowledge base "{backend_name}"'
	+ )
	+
	print(*result, sep=",")
	diff --git a/swh/scanner/cli.py b/swh/scanner/cli.py
	--- a/swh/scanner/cli.py
	+++ b/swh/scanner/cli.py
	@@ -281,9 +281,19 @@
	@click.option(
	"--seed", "-s", metavar="SEED", type=int, help="Seed for the random algorithm"
	)
	+@click.option("--log-file", "-l", metavar="FILENAME", help="Log custom path.")
	@click.pass_context
	def benchmark(
	- ctx, root_path, api_url, backend_name, origin_url, commit, patterns, algo, seed
	+ ctx,
	+ root_path,
	+ api_url,
	+ backend_name,
	+ origin_url,
	+ commit,
	+ patterns,
	+ algo,
	+ seed,
	+ log_file,
	):
	from importlib import reload
	import logging
	@@ -293,9 +303,10 @@
	# reload logging module avoid conflict with benchmark.py logging
	reload(logging)
	logging.basicConfig(
	- filename="experiments.log",
	+ filename=log_file if log_file else "experiments.log",
	format="%(asctime)s %(message)s",
	datefmt="%m/%d/%Y %I:%M:%S %p",
	+ level=logging.INFO,
	)

	try:

File Metadata

Mime Type: text/plain
Expires: Dec 21 2024, 10:34 PM (27 w, 6 d ago)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 3228467

D5032.diff
No OneTemporary
Actions

D5032.diff
View Options

File Metadata

Event Timeline

D5032.diffNo OneTemporaryActions

D5032.diffView Options

File Metadata

Event Timeline

D5032.diff
No OneTemporary
Actions

D5032.diff
View Options