diff --git a/java/server/src/main/java/org/softwareheritage/graph/benchmark/AccessEdge.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/AccessEdge.java index 95a0a33..7781ba7 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/benchmark/AccessEdge.java +++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/AccessEdge.java @@ -1,49 +1,50 @@ package org.softwareheritage.graph.benchmark; import java.io.IOException; import java.util.ArrayList; +import com.martiansoftware.jsap.JSAPException; import it.unimi.dsi.big.webgraph.LazyLongIterator; import org.softwareheritage.graph.Graph; +import org.softwareheritage.graph.benchmark.Common; import org.softwareheritage.graph.benchmark.utils.Random; import org.softwareheritage.graph.benchmark.utils.Statistics; import org.softwareheritage.graph.benchmark.utils.Timing; /** * Benchmark to time edge access time. * * @author Thibault Allançon * @version 0.0.1 * @since 0.0.1 */ public class AccessEdge { /** * Main entrypoint. * * @param args command line arguments */ - public static void main(String[] args) throws IOException { - String path = args[0]; - Graph graph = new Graph(path); + public static void main(String[] args) throws IOException, JSAPException { + Common.BenchArgs benchArgs = Common.parseCommandLineArgs(args); - final long seed = 42; - final int nbNodes = 1_000_000; - Random random = new Random(seed); - long[] nodeIds = random.generateNodeIds(graph, nbNodes); + Graph graph = new Graph(benchArgs.graphPath); + Random random = (benchArgs.seed == null) ? new Random() : new Random(benchArgs.seed); + + long[] nodeIds = random.generateNodeIds(graph, benchArgs.nbNodes); ArrayList timings = new ArrayList<>(); for (long nodeId : nodeIds) { long startTime = Timing.start(); LazyLongIterator neighbors = graph.successors(nodeId); long firstNeighbor = neighbors.nextLong(); double duration = Timing.stop(startTime); timings.add(duration); } - System.out.println("Used " + nbNodes + " random edges (results are in seconds):"); + System.out.println("Used " + benchArgs.nbNodes + " random edges (results are in seconds):"); Statistics stats = new Statistics(timings); stats.printAll(); } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java index 4d284bc..ca59fe9 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java +++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java @@ -1,48 +1,49 @@ package org.softwareheritage.graph.benchmark; import java.io.IOException; +import com.martiansoftware.jsap.JSAPException; + import org.softwareheritage.graph.Endpoint; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.Node; import org.softwareheritage.graph.benchmark.Common; import org.softwareheritage.graph.benchmark.utils.Random; /** * Benchmark Software Heritage browsing * use-cases scenarios. * * @author Thibault Allançon * @version 0.0.1 * @since 0.0.1 */ public class Browsing { /** * Main entrypoint. * * @param args command line arguments */ - public static void main(String[] args) throws IOException { - String path = args[0]; - Graph graph = new Graph(path); + public static void main(String[] args) throws IOException, JSAPException { + Common.BenchArgs benchArgs = Common.parseCommandLineArgs(args); + + Graph graph = new Graph(benchArgs.graphPath); + Random random = (benchArgs.seed == null) ? new Random() : new Random(benchArgs.seed); - final long seed = 42; - final int nbNodes = 100_000; - Random random = new Random(seed); - long[] dirNodeIds = random.generateNodeIdsOfType(graph, nbNodes, Node.Type.DIR); - long[] revNodeIds = random.generateNodeIdsOfType(graph, nbNodes, Node.Type.REV); + long[] dirNodeIds = random.generateNodeIdsOfType(graph, benchArgs.nbNodes, Node.Type.DIR); + long[] revNodeIds = random.generateNodeIdsOfType(graph, benchArgs.nbNodes, Node.Type.REV); Endpoint dirEndpoint = new Endpoint(graph, "forward", "dir:cnt,dir:dir"); Endpoint revEndpoint = new Endpoint(graph, "forward", "rev:rev"); - System.out.println("Used " + nbNodes + " random nodes (results are in seconds):"); + System.out.println("Used " + benchArgs.nbNodes + " random nodes (results are in seconds):"); System.out.println("\n'ls' use-case"); Common.timeEndpoint(graph, dirNodeIds, dirEndpoint::neighbors); System.out.println("\n'ls -R' use-case"); Common.timeEndpoint(graph, dirNodeIds, dirEndpoint::visitPaths); System.out.println("\n'git log' use-case"); Common.timeEndpoint(graph, revNodeIds, revEndpoint::visitNodes); } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/benchmark/Common.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Common.java index 01574b3..1584a2d 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/benchmark/Common.java +++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Common.java @@ -1,64 +1,115 @@ package org.softwareheritage.graph.benchmark; import java.util.ArrayList; import java.util.function.Function; +import com.martiansoftware.jsap.FlaggedOption; +import com.martiansoftware.jsap.JSAP; +import com.martiansoftware.jsap.JSAPException; +import com.martiansoftware.jsap.JSAPResult; +import com.martiansoftware.jsap.Parameter; +import com.martiansoftware.jsap.SimpleJSAP; +import com.martiansoftware.jsap.UnflaggedOption; + import org.softwareheritage.graph.Endpoint; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.SwhPID; import org.softwareheritage.graph.benchmark.utils.Statistics; /** * Benchmark common utility functions. * * @author Thibault Allançon * @version 0.0.1 * @since 0.0.1 */ public class Common { + /** + * Benchmark input arguments. + */ + public static class BenchArgs { + /** Basename of the compressed graph */ + public String graphPath; + /** Number of random nodes to use for the benchmark */ + public int nbNodes; + /** Random generator seed */ + public Long seed; + } + + /** + * Parses benchmark command line arguments. + * + * @param args command line arguments + * @return parsed arguments as a {@link BenchArgs} + */ + public static BenchArgs parseCommandLineArgs(String[] args) throws JSAPException { + SimpleJSAP jsap = new SimpleJSAP(Common.class.getName(), + "Benchmark tool for Software Heritage use-cases scenarios.", + new Parameter[] { + new UnflaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, + JSAP.NOT_GREEDY, "The basename of the compressed graph."), + new FlaggedOption("nbNodes", JSAP.INTEGER_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'n', + "nb-nodes", "Number of random nodes used to do the benchmark."), + new FlaggedOption("seed", JSAP.LONG_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 's', + "seed", "Random generator seed."), + }); + + JSAPResult config = jsap.parse(args); + if (jsap.messagePrinted()) { + System.exit(1); + } + + BenchArgs benchArgs = new BenchArgs(); + benchArgs.graphPath = config.getString("graphPath"); + benchArgs.nbNodes = config.getInt("nbNodes"); + benchArgs.seed = config.getLong("seed"); + + return benchArgs; + } + /** * Times a specific endpoint and prints aggregated statistics. * * @param graph compressed graph used in the benchmark * @param nodeIds node ids to use as starting point for the endpoint traversal * @param operation endpoint function to benchmark * @param dstFmt destination formatted string as described in the API * @param algorithm traversal algorithm used in endpoint call (either "dfs" or "bfs") */ public static void timeEndpoint(Graph graph, long[] nodeIds, Function operation, String dstFmt, String algorithm) { ArrayList timings = new ArrayList<>(); ArrayList timingsNormalized = new ArrayList<>(); for (long nodeId : nodeIds) { SwhPID swhPID = graph.getSwhPID(nodeId); Endpoint.Output output = (dstFmt == null) ? operation.apply(new Endpoint.Input(swhPID)) : operation.apply(new Endpoint.Input(swhPID, dstFmt, algorithm)); timings.add(output.meta.timings.traversal); if (output.meta.nbEdgesAccessed != 0) { timingsNormalized.add(output.meta.timings.traversal / output.meta.nbEdgesAccessed); } } System.out.println("timings:"); Statistics stats = new Statistics(timings); stats.printAll(); System.out.println("timings normalized:"); Statistics statsNormalized = new Statistics(timingsNormalized); statsNormalized.printAll(); } /** * Same as {@link timeEndpoint} but without destination or algorithm specified to endpoint call. */ public static void timeEndpoint( Graph graph, long[] nodeIds, Function operation) { timeEndpoint(graph, nodeIds, operation, null, null); } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/benchmark/Provenance.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Provenance.java index b3404ea..2594cde 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/benchmark/Provenance.java +++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Provenance.java @@ -1,54 +1,55 @@ package org.softwareheritage.graph.benchmark; import java.io.IOException; +import com.martiansoftware.jsap.JSAPException; + import org.softwareheritage.graph.Endpoint; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.benchmark.Common; import org.softwareheritage.graph.benchmark.utils.Random; /** * Benchmark Software Heritage provenance * use-cases scenarios. * * @author Thibault Allançon * @version 0.0.1 * @since 0.0.1 */ public class Provenance { /** * Main entrypoint. * * @param args command line arguments */ - public static void main(String[] args) throws IOException { - String path = args[0]; - Graph graph = new Graph(path); + public static void main(String[] args) throws IOException, JSAPException { + Common.BenchArgs benchArgs = Common.parseCommandLineArgs(args); + + Graph graph = new Graph(benchArgs.graphPath); + Random random = (benchArgs.seed == null) ? new Random() : new Random(benchArgs.seed); - final long seed = 42; - final int nbNodes = 100_000; - Random random = new Random(seed); - long[] nodeIds = random.generateNodeIds(graph, nbNodes); + long[] nodeIds = random.generateNodeIds(graph, benchArgs.nbNodes); Endpoint commitProvenanceEndpoint = new Endpoint(graph, "backward", "dir:dir,cnt:dir,dir:rev"); Endpoint originProvenanceEndpoint = new Endpoint(graph, "backward", "*"); - System.out.println("Used " + nbNodes + " random nodes (results are in seconds):"); + System.out.println("Used " + benchArgs.nbNodes + " random nodes (results are in seconds):"); System.out.println("\n'commit provenance' use-case (using dfs)"); Common.timeEndpoint(graph, nodeIds, commitProvenanceEndpoint::walk, "rev", "dfs"); System.out.println("\n'commit provenance' use-case (using bfs)"); Common.timeEndpoint(graph, nodeIds, commitProvenanceEndpoint::walk, "rev", "bfs"); System.out.println("\n'complete commit provenance' use-case"); Common.timeEndpoint(graph, nodeIds, commitProvenanceEndpoint::leaves); System.out.println("\n'origin provenance' use-case (using dfs)"); Common.timeEndpoint(graph, nodeIds, originProvenanceEndpoint::walk, "ori", "dfs"); System.out.println("\n'origin provenance' use-case (using bfs)"); Common.timeEndpoint(graph, nodeIds, originProvenanceEndpoint::walk, "ori", "bfs"); System.out.println("\n'complete origin provenance' use-case"); Common.timeEndpoint(graph, nodeIds, originProvenanceEndpoint::leaves); } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/benchmark/Vault.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Vault.java index fd9fbf6..4ff1594 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/benchmark/Vault.java +++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Vault.java @@ -1,41 +1,42 @@ package org.softwareheritage.graph.benchmark; import java.io.IOException; +import com.martiansoftware.jsap.JSAPException; + import org.softwareheritage.graph.Endpoint; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.benchmark.Common; import org.softwareheritage.graph.benchmark.utils.Random; /** * Benchmark Software Heritage vault * use-case scenario. * * @author Thibault Allançon * @version 0.0.1 * @since 0.0.1 */ public class Vault { /** * Main entrypoint. * * @param args command line arguments */ - public static void main(String[] args) throws IOException { - String path = args[0]; - Graph graph = new Graph(path); + public static void main(String[] args) throws IOException, JSAPException { + Common.BenchArgs benchArgs = Common.parseCommandLineArgs(args); + + Graph graph = new Graph(benchArgs.graphPath); + Random random = (benchArgs.seed == null) ? new Random() : new Random(benchArgs.seed); - final long seed = 42; - final int nbNodes = 100_000; - Random random = new Random(seed); - long[] nodeIds = random.generateNodeIds(graph, nbNodes); + long[] nodeIds = random.generateNodeIds(graph, benchArgs.nbNodes); Endpoint endpoint = new Endpoint(graph, "forward", "*"); - System.out.println("Used " + nbNodes + " random nodes (results are in seconds):"); + System.out.println("Used " + benchArgs.nbNodes + " random nodes (results are in seconds):"); System.out.println("\n'git bundle' use-case"); Common.timeEndpoint(graph, nodeIds, endpoint::visitNodes); } }