diff --git a/java/server/src/main/java/org/softwareheritage/graph/benchmark/Benchmark.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Benchmark.java
index a98260b..795ff5f 100644
--- a/java/server/src/main/java/org/softwareheritage/graph/benchmark/Benchmark.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Benchmark.java
@@ -1,157 +1,166 @@
package org.softwareheritage.graph.benchmark;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.io.Writer;
import java.util.ArrayList;
import java.util.StringJoiner;
import java.util.function.Function;
import com.martiansoftware.jsap.FlaggedOption;
import com.martiansoftware.jsap.JSAP;
import com.martiansoftware.jsap.JSAPException;
import com.martiansoftware.jsap.JSAPResult;
import com.martiansoftware.jsap.Parameter;
import com.martiansoftware.jsap.SimpleJSAP;
import com.martiansoftware.jsap.UnflaggedOption;
import org.softwareheritage.graph.Endpoint;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.SwhPID;
import org.softwareheritage.graph.benchmark.utils.Random;
import org.softwareheritage.graph.benchmark.utils.Statistics;
/**
* Benchmark common utility functions.
*
* @author Thibault Allançon
* @version 0.0.1
* @since 0.0.1
*/
public class Benchmark {
/**
* Input arguments.
*/
public class Args {
/** Basename of the compressed graph */
public String graphPath;
/** Number of random nodes to use for the benchmark */
public int nbNodes;
/** File name for CSV format benchmark log */
public String logFile;
/** Random generator */
public Random random;
}
/** Command line arguments */
public Args args;
+ /** CSV separator for log file */
+ final String CSV_SEPARATOR = ";";
/**
* Constructor.
*/
public Benchmark() {
this.args = new Args();
}
/**
* Parses benchmark command line arguments.
*
* @param args command line arguments
*/
public void parseCommandLineArgs(String[] args) throws JSAPException {
SimpleJSAP jsap = new SimpleJSAP(Benchmark.class.getName(),
"Benchmark tool for Software Heritage use-cases scenarios.",
new Parameter[] {
new UnflaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED,
JSAP.NOT_GREEDY, "The basename of the compressed graph."),
new FlaggedOption("nbNodes", JSAP.INTEGER_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'n',
"nb-nodes", "Number of random nodes used to do the benchmark."),
new FlaggedOption("logFile", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'l',
"log-file", "File name to output CSV format benchmark log."),
new FlaggedOption("seed", JSAP.LONG_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 's',
"seed", "Random generator seed."),
});
JSAPResult config = jsap.parse(args);
if (jsap.messagePrinted()) {
System.exit(1);
}
this.args.graphPath = config.getString("graphPath");
this.args.nbNodes = config.getInt("nbNodes");
this.args.logFile = config.getString("logFile");
this.args.random = config.contains("seed") ? new Random(config.getLong("seed")) : new Random();
}
+ /**
+ * Creates CSV file for log output.
+ */
+ public void createCSVLogFile() throws IOException {
+ try (Writer csvLog = new BufferedWriter(new FileWriter(args.logFile))) {
+ StringJoiner csvHeader = new StringJoiner(CSV_SEPARATOR);
+ csvHeader.add("use case name")
+ .add("SWH PID")
+ .add("number of edges accessed")
+ .add("traversal timing")
+ .add("pid2node timing")
+ .add("node2pid timing");
+ csvLog.write(csvHeader.toString() + "\n");
+ }
+ }
+
/**
* Times a specific endpoint and outputs individual datapoints along with aggregated statistics.
*
* @param useCaseName benchmark use-case name
* @param graph compressed graph used in the benchmark
* @param nodeIds node ids to use as starting point for the endpoint traversal
* @param operation endpoint function to benchmark
* @param dstFmt destination formatted string as described in the API
* @param algorithm traversal algorithm used in endpoint call (either "dfs" or "bfs")
*/
public void timeEndpoint(String useCaseName, Graph graph, long[] nodeIds,
Function operation, String dstFmt, String algorithm)
throws IOException {
ArrayList timings = new ArrayList<>();
ArrayList timingsNormalized = new ArrayList<>();
- final String CSV_SEPARATOR = ";";
- try (Writer csvLog = new BufferedWriter(new FileWriter(args.logFile))) {
- StringJoiner csvHeader = new StringJoiner(CSV_SEPARATOR);
- csvHeader.add("use case name")
- .add("SWH PID")
- .add("number of edges accessed")
- .add("traversal timing")
- .add("pid2node timing")
- .add("node2pid timing");
- csvLog.write(csvHeader.toString() + "\n");
-
+ final boolean append = true;
+ try (Writer csvLog = new BufferedWriter(new FileWriter(args.logFile, append))) {
for (long nodeId : nodeIds) {
SwhPID swhPID = graph.getSwhPID(nodeId);
Endpoint.Output output = (dstFmt == null)
? operation.apply(new Endpoint.Input(swhPID))
: operation.apply(new Endpoint.Input(swhPID, dstFmt, algorithm));
StringJoiner csvLine = new StringJoiner(CSV_SEPARATOR);
csvLine.add(useCaseName)
.add(swhPID.toString())
.add(Long.toString(output.meta.nbEdgesAccessed))
.add(Double.toString(output.meta.timings.traversal))
.add(Double.toString(output.meta.timings.pid2node))
.add(Double.toString(output.meta.timings.node2pid));
csvLog.write(csvLine.toString() + "\n");
timings.add(output.meta.timings.traversal);
if (output.meta.nbEdgesAccessed != 0) {
timingsNormalized.add(output.meta.timings.traversal / output.meta.nbEdgesAccessed);
}
}
}
System.out.println("\n" + useCaseName + " use-case:");
System.out.println("timings:");
Statistics stats = new Statistics(timings);
stats.printAll();
System.out.println("timings normalized:");
Statistics statsNormalized = new Statistics(timingsNormalized);
statsNormalized.printAll();
}
/**
* Same as {@link timeEndpoint} but without destination or algorithm specified to endpoint call.
*/
public void timeEndpoint(String useCaseName, Graph graph, long[] nodeIds,
Function operation) throws IOException {
timeEndpoint(useCaseName, graph, nodeIds, operation, null, null);
}
}
diff --git a/java/server/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java
index b45b8ec..92dd9f6 100644
--- a/java/server/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java
@@ -1,47 +1,48 @@
package org.softwareheritage.graph.benchmark;
import java.io.IOException;
import com.martiansoftware.jsap.JSAPException;
import org.softwareheritage.graph.Endpoint;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.Node;
import org.softwareheritage.graph.benchmark.Benchmark;
/**
* Benchmark Software Heritage browsing
* use-cases scenarios.
*
* @author Thibault Allançon
* @version 0.0.1
* @since 0.0.1
*/
public class Browsing {
/**
* Main entrypoint.
*
* @param args command line arguments
*/
public static void main(String[] args) throws IOException, JSAPException {
Benchmark bench = new Benchmark();
bench.parseCommandLineArgs(args);
Graph graph = new Graph(bench.args.graphPath);
long[] dirNodeIds =
bench.args.random.generateNodeIdsOfType(graph, bench.args.nbNodes, Node.Type.DIR);
long[] revNodeIds =
bench.args.random.generateNodeIdsOfType(graph, bench.args.nbNodes, Node.Type.REV);
Endpoint dirEndpoint = new Endpoint(graph, "forward", "dir:cnt,dir:dir");
Endpoint revEndpoint = new Endpoint(graph, "forward", "rev:rev");
System.out.println("Used " + bench.args.nbNodes + " random nodes (results are in seconds):");
+ bench.createCSVLogFile();
bench.timeEndpoint("ls", graph, dirNodeIds, dirEndpoint::neighbors);
bench.timeEndpoint("ls -R", graph, dirNodeIds, dirEndpoint::visitPaths);
bench.timeEndpoint("git log", graph, revNodeIds, revEndpoint::visitNodes);
}
}
diff --git a/java/server/src/main/java/org/softwareheritage/graph/benchmark/Provenance.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Provenance.java
index 66a7d52..c2a3bf6 100644
--- a/java/server/src/main/java/org/softwareheritage/graph/benchmark/Provenance.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Provenance.java
@@ -1,54 +1,55 @@
package org.softwareheritage.graph.benchmark;
import java.io.IOException;
import com.martiansoftware.jsap.JSAPException;
import org.softwareheritage.graph.Endpoint;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.benchmark.Benchmark;
/**
* Benchmark Software Heritage provenance
* use-cases scenarios.
*
* @author Thibault Allançon
* @version 0.0.1
* @since 0.0.1
*/
public class Provenance {
/**
* Main entrypoint.
*
* @param args command line arguments
*/
public static void main(String[] args) throws IOException, JSAPException {
Benchmark bench = new Benchmark();
bench.parseCommandLineArgs(args);
Graph graph = new Graph(bench.args.graphPath);
long[] nodeIds = bench.args.random.generateNodeIds(graph, bench.args.nbNodes);
Endpoint commitProvenanceEndpoint = new Endpoint(graph, "backward", "dir:dir,cnt:dir,dir:rev");
Endpoint originProvenanceEndpoint = new Endpoint(graph, "backward", "*");
System.out.println("Used " + bench.args.nbNodes + " random nodes (results are in seconds):");
+ bench.createCSVLogFile();
bench.timeEndpoint(
"commit provenance (dfs)", graph, nodeIds, commitProvenanceEndpoint::walk, "rev", "dfs");
bench.timeEndpoint(
"commit provenance (bfs)", graph, nodeIds, commitProvenanceEndpoint::walk, "rev", "bfs");
bench.timeEndpoint(
"complete commit provenance", graph, nodeIds, commitProvenanceEndpoint::leaves);
bench.timeEndpoint(
"origin provenance (dfs)", graph, nodeIds, originProvenanceEndpoint::walk, "ori", "dfs");
bench.timeEndpoint(
"origin provenance (bfs)", graph, nodeIds, originProvenanceEndpoint::walk, "ori", "bfs");
bench.timeEndpoint(
"complete origin provenance", graph, nodeIds, originProvenanceEndpoint::leaves);
}
}
diff --git a/java/server/src/main/java/org/softwareheritage/graph/benchmark/Vault.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Vault.java
index 2773a3b..5d024c5 100644
--- a/java/server/src/main/java/org/softwareheritage/graph/benchmark/Vault.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Vault.java
@@ -1,40 +1,41 @@
package org.softwareheritage.graph.benchmark;
import java.io.IOException;
import com.martiansoftware.jsap.JSAPException;
import org.softwareheritage.graph.Endpoint;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.benchmark.Benchmark;
/**
* Benchmark Software Heritage vault
* use-case scenario.
*
* @author Thibault Allançon
* @version 0.0.1
* @since 0.0.1
*/
public class Vault {
/**
* Main entrypoint.
*
* @param args command line arguments
*/
public static void main(String[] args) throws IOException, JSAPException {
Benchmark bench = new Benchmark();
bench.parseCommandLineArgs(args);
Graph graph = new Graph(bench.args.graphPath);
long[] nodeIds = bench.args.random.generateNodeIds(graph, bench.args.nbNodes);
Endpoint endpoint = new Endpoint(graph, "forward", "*");
System.out.println("Used " + bench.args.nbNodes + " random nodes (results are in seconds):");
+ bench.createCSVLogFile();
bench.timeEndpoint("git bundle", graph, nodeIds, endpoint::visitNodes);
}
}