diff --git a/java/server/src/main/java/org/softwareheritage/graph/benchmark/AccessEdge.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/AccessEdge.java --- a/java/server/src/main/java/org/softwareheritage/graph/benchmark/AccessEdge.java +++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/AccessEdge.java @@ -7,7 +7,7 @@ import it.unimi.dsi.big.webgraph.LazyLongIterator; import org.softwareheritage.graph.Graph; -import org.softwareheritage.graph.benchmark.Common; +import org.softwareheritage.graph.benchmark.Benchmark; import org.softwareheritage.graph.benchmark.utils.Statistics; import org.softwareheritage.graph.benchmark.utils.Timing; @@ -26,11 +26,12 @@ * @param args command line arguments */ public static void main(String[] args) throws IOException, JSAPException { - Common.BenchArgs benchArgs = Common.parseCommandLineArgs(args); + Benchmark bench = new Benchmark(); + bench.parseCommandLineArgs(args); - Graph graph = new Graph(benchArgs.graphPath); + Graph graph = new Graph(bench.args.graphPath); - long[] nodeIds = benchArgs.random.generateNodeIds(graph, benchArgs.nbNodes); + long[] nodeIds = bench.args.random.generateNodeIds(graph, bench.args.nbNodes); ArrayList<Double> timings = new ArrayList<>(); for (long nodeId : nodeIds) { @@ -41,7 +42,7 @@ timings.add(duration); } - System.out.println("Used " + benchArgs.nbNodes + " random edges (results are in seconds):"); + System.out.println("Used " + bench.args.nbNodes + " random edges (results are in seconds):"); Statistics stats = new Statistics(timings); stats.printAll(); } diff --git a/java/server/src/main/java/org/softwareheritage/graph/benchmark/Common.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Benchmark.java rename from java/server/src/main/java/org/softwareheritage/graph/benchmark/Common.java rename to java/server/src/main/java/org/softwareheritage/graph/benchmark/Benchmark.java --- a/java/server/src/main/java/org/softwareheritage/graph/benchmark/Common.java +++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Benchmark.java @@ -1,6 +1,11 @@ package org.softwareheritage.graph.benchmark; +import java.io.BufferedWriter; +import java.io.FileWriter; +import java.io.IOException; +import java.io.Writer; import java.util.ArrayList; +import java.util.StringJoiner; import java.util.function.Function; import com.martiansoftware.jsap.FlaggedOption; @@ -25,33 +30,46 @@ * @since 0.0.1 */ -public class Common { +public class Benchmark { /** - * Benchmark input arguments. + * Input arguments. */ - public static class BenchArgs { + public class Args { /** Basename of the compressed graph */ public String graphPath; /** Number of random nodes to use for the benchmark */ public int nbNodes; + /** File name for CSV format benchmark log */ + public String logFile; /** Random generator */ public Random random; } + /** Command line arguments */ + public Args args; + + /** + * Constructor. + */ + public Benchmark() { + this.args = new Args(); + } + /** * Parses benchmark command line arguments. * * @param args command line arguments - * @return parsed arguments as a {@link BenchArgs} */ - public static BenchArgs parseCommandLineArgs(String[] args) throws JSAPException { - SimpleJSAP jsap = new SimpleJSAP(Common.class.getName(), + public void parseCommandLineArgs(String[] args) throws JSAPException { + SimpleJSAP jsap = new SimpleJSAP(Benchmark.class.getName(), "Benchmark tool for Software Heritage use-cases scenarios.", new Parameter[] { new UnflaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The basename of the compressed graph."), new FlaggedOption("nbNodes", JSAP.INTEGER_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'n', "nb-nodes", "Number of random nodes used to do the benchmark."), + new FlaggedOption("logFile", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'l', + "log-file", "File name to output CSV format benchmark log."), new FlaggedOption("seed", JSAP.LONG_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 's', "seed", "Random generator seed."), }); @@ -61,17 +79,16 @@ System.exit(1); } - BenchArgs benchArgs = new BenchArgs(); - benchArgs.graphPath = config.getString("graphPath"); - benchArgs.nbNodes = config.getInt("nbNodes"); - benchArgs.random = config.contains("seed") ? new Random(config.getLong("seed")) : new Random(); - - return benchArgs; + this.args.graphPath = config.getString("graphPath"); + this.args.nbNodes = config.getInt("nbNodes"); + this.args.logFile = config.getString("logFile"); + this.args.random = config.contains("seed") ? new Random(config.getLong("seed")) : new Random(); } /** - * Times a specific endpoint and prints aggregated statistics. + * Times a specific endpoint and outputs individual datapoints along with aggregated statistics. * + * @param useCaseName benchmark use-case name * @param graph compressed graph used in the benchmark * @param nodeIds node ids to use as starting point for the endpoint traversal * @param operation endpoint function to benchmark @@ -79,24 +96,48 @@ * href="https://docs.softwareheritage.org/devel/swh-graph/api.html#walk">API</a> * @param algorithm traversal algorithm used in endpoint call (either "dfs" or "bfs") */ - public static void timeEndpoint(Graph graph, long[] nodeIds, - Function<Endpoint.Input, Endpoint.Output> operation, String dstFmt, String algorithm) { + public void timeEndpoint(String useCaseName, Graph graph, long[] nodeIds, + Function<Endpoint.Input, Endpoint.Output> operation, String dstFmt, String algorithm) + throws IOException { ArrayList<Double> timings = new ArrayList<>(); ArrayList<Double> timingsNormalized = new ArrayList<>(); - for (long nodeId : nodeIds) { - SwhPID swhPID = graph.getSwhPID(nodeId); - - Endpoint.Output output = (dstFmt == null) - ? operation.apply(new Endpoint.Input(swhPID)) - : operation.apply(new Endpoint.Input(swhPID, dstFmt, algorithm)); - - timings.add(output.meta.timings.traversal); - if (output.meta.nbEdgesAccessed != 0) { - timingsNormalized.add(output.meta.timings.traversal / output.meta.nbEdgesAccessed); + final String CSV_SEPARATOR = ";"; + try (Writer csvLog = new BufferedWriter(new FileWriter(args.logFile))) { + StringJoiner csvHeader = new StringJoiner(CSV_SEPARATOR); + csvHeader.add("use case name") + .add("SWH PID") + .add("number of edges accessed") + .add("traversal timing") + .add("pid2node timing") + .add("node2pid timing"); + csvLog.write(csvHeader.toString() + "\n"); + + for (long nodeId : nodeIds) { + SwhPID swhPID = graph.getSwhPID(nodeId); + + Endpoint.Output output = (dstFmt == null) + ? operation.apply(new Endpoint.Input(swhPID)) + : operation.apply(new Endpoint.Input(swhPID, dstFmt, algorithm)); + + StringJoiner csvLine = new StringJoiner(CSV_SEPARATOR); + csvLine.add(useCaseName) + .add(swhPID.toString()) + .add(Long.toString(output.meta.nbEdgesAccessed)) + .add(Double.toString(output.meta.timings.traversal)) + .add(Double.toString(output.meta.timings.pid2node)) + .add(Double.toString(output.meta.timings.node2pid)); + csvLog.write(csvLine.toString() + "\n"); + + timings.add(output.meta.timings.traversal); + if (output.meta.nbEdgesAccessed != 0) { + timingsNormalized.add(output.meta.timings.traversal / output.meta.nbEdgesAccessed); + } } } + System.out.println("\n" + useCaseName + " use-case:"); + System.out.println("timings:"); Statistics stats = new Statistics(timings); stats.printAll(); @@ -109,8 +150,8 @@ /** * Same as {@link timeEndpoint} but without destination or algorithm specified to endpoint call. */ - public static void timeEndpoint( - Graph graph, long[] nodeIds, Function<Endpoint.Input, Endpoint.Output> operation) { - timeEndpoint(graph, nodeIds, operation, null, null); + public void timeEndpoint(String useCaseName, Graph graph, long[] nodeIds, + Function<Endpoint.Input, Endpoint.Output> operation) throws IOException { + timeEndpoint(useCaseName, graph, nodeIds, operation, null, null); } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java --- a/java/server/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java +++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java @@ -7,7 +7,7 @@ import org.softwareheritage.graph.Endpoint; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.Node; -import org.softwareheritage.graph.benchmark.Common; +import org.softwareheritage.graph.benchmark.Benchmark; /** * Benchmark Software Heritage <a @@ -26,24 +26,22 @@ * @param args command line arguments */ public static void main(String[] args) throws IOException, JSAPException { - Common.BenchArgs benchArgs = Common.parseCommandLineArgs(args); + Benchmark bench = new Benchmark(); + bench.parseCommandLineArgs(args); - Graph graph = new Graph(benchArgs.graphPath); + Graph graph = new Graph(bench.args.graphPath); long[] dirNodeIds = - benchArgs.random.generateNodeIdsOfType(graph, benchArgs.nbNodes, Node.Type.DIR); + bench.args.random.generateNodeIdsOfType(graph, bench.args.nbNodes, Node.Type.DIR); long[] revNodeIds = - benchArgs.random.generateNodeIdsOfType(graph, benchArgs.nbNodes, Node.Type.REV); + bench.args.random.generateNodeIdsOfType(graph, bench.args.nbNodes, Node.Type.REV); Endpoint dirEndpoint = new Endpoint(graph, "forward", "dir:cnt,dir:dir"); Endpoint revEndpoint = new Endpoint(graph, "forward", "rev:rev"); - System.out.println("Used " + benchArgs.nbNodes + " random nodes (results are in seconds):"); - System.out.println("\n'ls' use-case"); - Common.timeEndpoint(graph, dirNodeIds, dirEndpoint::neighbors); - System.out.println("\n'ls -R' use-case"); - Common.timeEndpoint(graph, dirNodeIds, dirEndpoint::visitPaths); - System.out.println("\n'git log' use-case"); - Common.timeEndpoint(graph, revNodeIds, revEndpoint::visitNodes); + System.out.println("Used " + bench.args.nbNodes + " random nodes (results are in seconds):"); + bench.timeEndpoint("ls", graph, dirNodeIds, dirEndpoint::neighbors); + bench.timeEndpoint("ls -R", graph, dirNodeIds, dirEndpoint::visitPaths); + bench.timeEndpoint("git log", graph, revNodeIds, revEndpoint::visitNodes); } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/benchmark/Provenance.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Provenance.java --- a/java/server/src/main/java/org/softwareheritage/graph/benchmark/Provenance.java +++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Provenance.java @@ -6,7 +6,7 @@ import org.softwareheritage.graph.Endpoint; import org.softwareheritage.graph.Graph; -import org.softwareheritage.graph.benchmark.Common; +import org.softwareheritage.graph.benchmark.Benchmark; /** * Benchmark Software Heritage <a @@ -25,29 +25,30 @@ * @param args command line arguments */ public static void main(String[] args) throws IOException, JSAPException { - Common.BenchArgs benchArgs = Common.parseCommandLineArgs(args); + Benchmark bench = new Benchmark(); + bench.parseCommandLineArgs(args); - Graph graph = new Graph(benchArgs.graphPath); + Graph graph = new Graph(bench.args.graphPath); - long[] nodeIds = benchArgs.random.generateNodeIds(graph, benchArgs.nbNodes); + long[] nodeIds = bench.args.random.generateNodeIds(graph, bench.args.nbNodes); Endpoint commitProvenanceEndpoint = new Endpoint(graph, "backward", "dir:dir,cnt:dir,dir:rev"); Endpoint originProvenanceEndpoint = new Endpoint(graph, "backward", "*"); - System.out.println("Used " + benchArgs.nbNodes + " random nodes (results are in seconds):"); - - System.out.println("\n'commit provenance' use-case (using dfs)"); - Common.timeEndpoint(graph, nodeIds, commitProvenanceEndpoint::walk, "rev", "dfs"); - System.out.println("\n'commit provenance' use-case (using bfs)"); - Common.timeEndpoint(graph, nodeIds, commitProvenanceEndpoint::walk, "rev", "bfs"); - System.out.println("\n'complete commit provenance' use-case"); - Common.timeEndpoint(graph, nodeIds, commitProvenanceEndpoint::leaves); - - System.out.println("\n'origin provenance' use-case (using dfs)"); - Common.timeEndpoint(graph, nodeIds, originProvenanceEndpoint::walk, "ori", "dfs"); - System.out.println("\n'origin provenance' use-case (using bfs)"); - Common.timeEndpoint(graph, nodeIds, originProvenanceEndpoint::walk, "ori", "bfs"); - System.out.println("\n'complete origin provenance' use-case"); - Common.timeEndpoint(graph, nodeIds, originProvenanceEndpoint::leaves); + System.out.println("Used " + bench.args.nbNodes + " random nodes (results are in seconds):"); + + bench.timeEndpoint( + "commit provenance (dfs)", graph, nodeIds, commitProvenanceEndpoint::walk, "rev", "dfs"); + bench.timeEndpoint( + "commit provenance (bfs)", graph, nodeIds, commitProvenanceEndpoint::walk, "rev", "bfs"); + bench.timeEndpoint( + "complete commit provenance", graph, nodeIds, commitProvenanceEndpoint::leaves); + + bench.timeEndpoint( + "origin provenance (dfs)", graph, nodeIds, originProvenanceEndpoint::walk, "ori", "dfs"); + bench.timeEndpoint( + "origin provenance (bfs)", graph, nodeIds, originProvenanceEndpoint::walk, "ori", "bfs"); + bench.timeEndpoint( + "complete origin provenance", graph, nodeIds, originProvenanceEndpoint::leaves); } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/benchmark/Vault.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Vault.java --- a/java/server/src/main/java/org/softwareheritage/graph/benchmark/Vault.java +++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Vault.java @@ -6,7 +6,7 @@ import org.softwareheritage.graph.Endpoint; import org.softwareheritage.graph.Graph; -import org.softwareheritage.graph.benchmark.Common; +import org.softwareheritage.graph.benchmark.Benchmark; /** * Benchmark Software Heritage <a @@ -25,16 +25,16 @@ * @param args command line arguments */ public static void main(String[] args) throws IOException, JSAPException { - Common.BenchArgs benchArgs = Common.parseCommandLineArgs(args); + Benchmark bench = new Benchmark(); + bench.parseCommandLineArgs(args); - Graph graph = new Graph(benchArgs.graphPath); + Graph graph = new Graph(bench.args.graphPath); - long[] nodeIds = benchArgs.random.generateNodeIds(graph, benchArgs.nbNodes); + long[] nodeIds = bench.args.random.generateNodeIds(graph, bench.args.nbNodes); Endpoint endpoint = new Endpoint(graph, "forward", "*"); - System.out.println("Used " + benchArgs.nbNodes + " random nodes (results are in seconds):"); - System.out.println("\n'git bundle' use-case"); - Common.timeEndpoint(graph, nodeIds, endpoint::visitNodes); + System.out.println("Used " + bench.args.nbNodes + " random nodes (results are in seconds):"); + bench.timeEndpoint("git bundle", graph, nodeIds, endpoint::visitNodes); } }