diff --git a/java/server/src/main/java/org/softwareheritage/graph/Endpoint.java b/java/server/src/main/java/org/softwareheritage/graph/Endpoint.java index fcbce3a..ceb0a43 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/Endpoint.java +++ b/java/server/src/main/java/org/softwareheritage/graph/Endpoint.java @@ -1,291 +1,291 @@ package org.softwareheritage.graph; import java.util.ArrayList; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.SwhPID; import org.softwareheritage.graph.SwhPath; import org.softwareheritage.graph.algo.Traversal; -import org.softwareheritage.graph.utils.Timing; +import org.softwareheritage.graph.benchmark.utils.Timing; /** * REST API endpoints wrapper functions. *

* Graph operations are segmented between high-level class (this one) and the low-level class * ({@link Traversal}). The {@link Endpoint} class creates wrappers for each endpoints by performing * all the input/output node ids conversions and logging timings. * * @author Thibault Allançon * @version 0.0.1 * @since 0.0.1 * @see org.softwareheritage.graph.algo.Traversal */ public class Endpoint { /** * Wrapper class to return both the endpoint result and metadata (such as timings). */ public class Output { /** The result content itself */ public T result; /** Various metadata about the result */ public Meta meta; public Output() { this.result = null; this.meta = new Meta(); } /** * Endpoint result metadata. */ public class Meta { /** Operations timings */ public Timings timings; /** Number of edges accessed during traversal */ public long nbEdgesAccessed; public Meta() { this.timings = new Timings(); this.nbEdgesAccessed = 0; } /** * Wrapper class for JSON output format. */ public class Timings { /** Time in seconds to do the traversal */ public double traversal; /** Time in seconds to convert input SWH PID to node id */ public double pid2node; /** Time in seconds to convert output node ids to SWH PIDs */ public double node2pid; } } } /** Graph where traversal endpoint is performed */ Graph graph; /** Internal traversal API */ Traversal traversal; /** * Constructor. * * @param graph the graph used for traversal endpoint * @param direction a string (either "forward" or "backward") specifying edge orientation * @param edgesFmt a formatted string describing allowed edges */ public Endpoint(Graph graph, String direction, String edgesFmt) { this.graph = graph; this.traversal = new Traversal(graph, direction, edgesFmt); } /** * Converts a list of (internal) long node ids to a list of corresponding (external) SWH PIDs. * * @param nodeIds the list of long node ids * @return a list of corresponding SWH PIDs */ private ArrayList convertNodesToSwhPIDs(ArrayList nodeIds) { ArrayList swhPIDs = new ArrayList<>(); for (long nodeId : nodeIds) { swhPIDs.add(graph.getSwhPID(nodeId)); } return swhPIDs; } /** * Converts a list of (internal) long node ids to the corresponding {@link SwhPath}. * * @param nodeIds the list of long node ids * @return the corresponding {@link SwhPath} * @see org.softwareheritage.graph.SwhPath */ private SwhPath convertNodesToSwhPath(ArrayList nodeIds) { SwhPath path = new SwhPath(); for (long nodeId : nodeIds) { path.add(graph.getSwhPID(nodeId)); } return path; } /** * Converts a list of paths made of (internal) long node ids to one made of {@link SwhPath}-s. * * @param pathsNodeId the list of paths with long node ids * @return a list of corresponding {@link SwhPath} * @see org.softwareheritage.graph.SwhPath */ private ArrayList convertPathsToSwhPIDs(ArrayList> pathsNodeId) { ArrayList paths = new ArrayList<>(); for (ArrayList path : pathsNodeId) { paths.add(convertNodesToSwhPath(path)); } return paths; } /** * Leaves endpoint wrapper. * * @param src source node of endpoint call specified as a {@link SwhPID} * @return the resulting list of {@link SwhPID} from endpoint call and operation metadata * @see org.softwareheritage.graph.SwhPID * @see org.softwareheritage.graph.algo.Traversal#leaves(long) */ public Output leaves(SwhPID src) { Output> output = new Output<>(); long startTime; startTime = Timing.start(); long srcNodeId = graph.getNodeId(src); output.meta.timings.pid2node = Timing.stop(startTime); startTime = Timing.start(); ArrayList nodeIds = traversal.leaves(srcNodeId); output.meta.timings.traversal = Timing.stop(startTime); output.meta.nbEdgesAccessed = traversal.getnbEdgesAccessed(); startTime = Timing.start(); output.result = convertNodesToSwhPIDs(nodeIds); output.meta.timings.node2pid = Timing.stop(startTime); return output; } /** * Neighbors endpoint wrapper. * * @param src source node of endpoint call specified as a {@link SwhPID} * @return the resulting list of {@link SwhPID} from endpoint call and operation metadata * @see org.softwareheritage.graph.SwhPID * @see org.softwareheritage.graph.algo.Traversal#neighbors(long) */ public Output neighbors(SwhPID src) { Output> output = new Output<>(); long startTime; startTime = Timing.start(); long srcNodeId = graph.getNodeId(src); output.meta.timings.pid2node = Timing.stop(startTime); startTime = Timing.start(); ArrayList nodeIds = traversal.neighbors(srcNodeId); output.meta.timings.traversal = Timing.stop(startTime); output.meta.nbEdgesAccessed = traversal.getnbEdgesAccessed(); startTime = Timing.start(); output.result = convertNodesToSwhPIDs(nodeIds); output.meta.timings.node2pid = Timing.stop(startTime); return output; } /** * Walk endpoint wrapper. * * @param src source node of endpoint call specified as a {@link SwhPID} * @param dstFmt destination formatted string as described in the API * @param algorithm traversal algorithm used in endpoint call (either "dfs" or "bfs") * @return the resulting {@link SwhPath} from endpoint call and operation metadata * @see org.softwareheritage.graph.SwhPID * @see org.softwareheritage.graph.SwhPath * @see org.softwareheritage.graph.algo.Traversal#walk */ public Output walk(SwhPID src, String dstFmt, String algorithm) { Output output = new Output<>(); long startTime; startTime = Timing.start(); long srcNodeId = graph.getNodeId(src); output.meta.timings.pid2node = Timing.stop(startTime); ArrayList nodeIds = new ArrayList(); // Destination is either a SWH PID or a node type try { SwhPID dstSwhPID = new SwhPID(dstFmt); long dstNodeId = graph.getNodeId(dstSwhPID); startTime = Timing.start(); nodeIds = traversal.walk(srcNodeId, dstNodeId, algorithm); output.meta.timings.traversal = Timing.stop(startTime); } catch (IllegalArgumentException ignored1) { try { Node.Type dstType = Node.Type.fromStr(dstFmt); startTime = Timing.start(); nodeIds = traversal.walk(srcNodeId, dstType, algorithm); output.meta.timings.traversal = Timing.stop(startTime); } catch (IllegalArgumentException ignored2) { } } output.meta.nbEdgesAccessed = traversal.getnbEdgesAccessed(); startTime = Timing.start(); output.result = convertNodesToSwhPath(nodeIds); output.meta.timings.node2pid = Timing.stop(startTime); return output; } /** * VisitNodes endpoint wrapper. * * @param src source node of endpoint call specified as a {@link SwhPID} * @return the resulting list of {@link SwhPID} from endpoint call and operation metadata * @see org.softwareheritage.graph.SwhPID * @see org.softwareheritage.graph.algo.Traversal#visitNodes(long) */ public Output visitNodes(SwhPID src) { Output> output = new Output<>(); long startTime; startTime = Timing.start(); long srcNodeId = graph.getNodeId(src); output.meta.timings.pid2node = Timing.stop(startTime); startTime = Timing.start(); ArrayList nodeIds = traversal.visitNodes(srcNodeId); output.meta.timings.traversal = Timing.stop(startTime); output.meta.nbEdgesAccessed = traversal.getnbEdgesAccessed(); startTime = Timing.start(); output.result = convertNodesToSwhPIDs(nodeIds); output.meta.timings.node2pid = Timing.stop(startTime); return output; } /** * VisitPaths endpoint wrapper. * * @param src source node of endpoint call specified as a {@link SwhPID} * @return the resulting list of {@link SwhPath} from endpoint call and operation metadata * @see org.softwareheritage.graph.SwhPID * @see org.softwareheritage.graph.SwhPath * @see org.softwareheritage.graph.algo.Traversal#visitPaths(long) */ public Output visitPaths(SwhPID src) { Output> output = new Output<>(); long startTime; startTime = Timing.start(); long srcNodeId = graph.getNodeId(src); output.meta.timings.pid2node = Timing.stop(startTime); startTime = Timing.start(); ArrayList> paths = traversal.visitPaths(srcNodeId); output.meta.timings.traversal = Timing.stop(startTime); output.meta.nbEdgesAccessed = traversal.getnbEdgesAccessed(); startTime = Timing.start(); output.result = convertPathsToSwhPIDs(paths); output.meta.timings.node2pid = Timing.stop(startTime); return output; } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/benchmark/AccessEdge.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/AccessEdge.java index 6e4aef9..95a0a33 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/benchmark/AccessEdge.java +++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/AccessEdge.java @@ -1,49 +1,49 @@ package org.softwareheritage.graph.benchmark; import java.io.IOException; import java.util.ArrayList; import it.unimi.dsi.big.webgraph.LazyLongIterator; import org.softwareheritage.graph.Graph; -import org.softwareheritage.graph.utils.Random; -import org.softwareheritage.graph.utils.Statistics; -import org.softwareheritage.graph.utils.Timing; +import org.softwareheritage.graph.benchmark.utils.Random; +import org.softwareheritage.graph.benchmark.utils.Statistics; +import org.softwareheritage.graph.benchmark.utils.Timing; /** * Benchmark to time edge access time. * * @author Thibault Allançon * @version 0.0.1 * @since 0.0.1 */ public class AccessEdge { /** * Main entrypoint. * * @param args command line arguments */ public static void main(String[] args) throws IOException { String path = args[0]; Graph graph = new Graph(path); final long seed = 42; final int nbNodes = 1_000_000; Random random = new Random(seed); long[] nodeIds = random.generateNodeIds(graph, nbNodes); ArrayList timings = new ArrayList<>(); for (long nodeId : nodeIds) { long startTime = Timing.start(); LazyLongIterator neighbors = graph.successors(nodeId); long firstNeighbor = neighbors.nextLong(); double duration = Timing.stop(startTime); timings.add(duration); } System.out.println("Used " + nbNodes + " random edges (results are in seconds):"); Statistics stats = new Statistics(timings); stats.printAll(); } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java index 205ba52..1487175 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java +++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java @@ -1,47 +1,47 @@ package org.softwareheritage.graph.benchmark; import java.io.IOException; import org.softwareheritage.graph.Endpoint; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.Node; import org.softwareheritage.graph.benchmark.Common; -import org.softwareheritage.graph.utils.Random; +import org.softwareheritage.graph.benchmark.utils.Random; /** * Benchmark Software Heritage browsing use-cases scenarios: use cases. * * @author Thibault Allançon * @version 0.0.1 * @since 0.0.1 */ public class Browsing { /** * Main entrypoint. * * @param args command line arguments */ public static void main(String[] args) throws IOException { String path = args[0]; Graph graph = new Graph(path); final long seed = 42; final int nbNodes = 100_000; Random random = new Random(seed); long[] dirNodeIds = random.generateNodeIdsOfType(graph, nbNodes, Node.Type.DIR); long[] revNodeIds = random.generateNodeIdsOfType(graph, nbNodes, Node.Type.REV); Endpoint dirEndpoint = new Endpoint(graph, "forward", "dir:cnt,dir:dir"); Endpoint revEndpoint = new Endpoint(graph, "forward", "rev:rev"); System.out.println("Used " + nbNodes + " random nodes (results are in seconds):"); System.out.println("\n'ls' use-case"); Common.timeEndpoint(graph, dirNodeIds, dirEndpoint::neighbors); System.out.println("\n'ls -R' use-case"); Common.timeEndpoint(graph, dirNodeIds, dirEndpoint::visitPaths); System.out.println("\n'git log' use-case"); Common.timeEndpoint(graph, revNodeIds, revEndpoint::visitNodes); } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/benchmark/Common.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Common.java index f6b1c0b..462cbe4 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/benchmark/Common.java +++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Common.java @@ -1,51 +1,51 @@ package org.softwareheritage.graph.benchmark; import java.util.ArrayList; import java.util.function.Function; import org.softwareheritage.graph.Endpoint; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.SwhPID; -import org.softwareheritage.graph.utils.Statistics; +import org.softwareheritage.graph.benchmark.utils.Statistics; /** * Benchmark common utility functions. * * @author Thibault Allançon * @version 0.0.1 * @since 0.0.1 */ public class Common { /** * Times a specific endpoint and prints aggregated statistics. * * @param graph compressed graph used in the benchmark * @param nodeIds node ids to use as starting point for the endpoint traversal * @param operation endpoint function to benchmark */ public static void timeEndpoint( Graph graph, long[] nodeIds, Function operation) { ArrayList timings = new ArrayList<>(); ArrayList timingsNormalized = new ArrayList<>(); for (long nodeId : nodeIds) { SwhPID swhPID = graph.getSwhPID(nodeId); Endpoint.Output output = operation.apply(swhPID); timings.add(output.meta.timings.traversal); if (output.meta.nbEdgesAccessed != 0) { timingsNormalized.add(output.meta.timings.traversal / output.meta.nbEdgesAccessed); } } System.out.println("timings:"); Statistics stats = new Statistics(timings); stats.printAll(); System.out.println("timings normalized:"); Statistics statsNormalized = new Statistics(timingsNormalized); statsNormalized.printAll(); } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/utils/Random.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/utils/Random.java similarity index 96% rename from java/server/src/main/java/org/softwareheritage/graph/utils/Random.java rename to java/server/src/main/java/org/softwareheritage/graph/benchmark/utils/Random.java index 7eba612..2a6e17e 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/utils/Random.java +++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/utils/Random.java @@ -1,69 +1,69 @@ -package org.softwareheritage.graph.utils; +package org.softwareheritage.graph.benchmark.utils; import java.util.PrimitiveIterator; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.Node; /** * Random related utility class. * * @author Thibault Allançon * @version 0.0.1 * @since 0.0.1 */ public class Random { /** Internal pseudorandom generator */ java.util.Random random; /** * Constructor. */ public Random() { this.random = new java.util.Random(); } /** * Constructor. * * @param seed random generator seed */ public Random(long seed) { this.random = new java.util.Random(seed); } /** * Generates random node ids. * * @param graph graph used to pick node ids * @param nbNodes number of node ids to generate * @return an array of random node ids */ public long[] generateNodeIds(Graph graph, int nbNodes) { return random.longs(nbNodes, 0, graph.getNbNodes()).toArray(); } /** * Generates random node ids with a specific type. * * @param graph graph used to pick node ids * @param nbNodes number of node ids to generate * @param expectedType specific node type to pick * @return an array of random node ids */ public long[] generateNodeIdsOfType(Graph graph, int nbNodes, Node.Type expectedType) { PrimitiveIterator.OfLong nodes = random.longs(0, graph.getNbNodes()).iterator(); long[] nodeIds = new long[nbNodes]; long nextId; for (int i = 0; i < nbNodes; i++) { do { nextId = nodes.nextLong(); } while (graph.getNodeType(nextId) != expectedType); nodeIds[i] = nextId; } return nodeIds; } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/utils/Statistics.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/utils/Statistics.java similarity index 97% rename from java/server/src/main/java/org/softwareheritage/graph/utils/Statistics.java rename to java/server/src/main/java/org/softwareheritage/graph/benchmark/utils/Statistics.java index ff9db25..426a415 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/utils/Statistics.java +++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/utils/Statistics.java @@ -1,89 +1,89 @@ -package org.softwareheritage.graph.utils; +package org.softwareheritage.graph.benchmark.utils; import java.util.ArrayList; /** * Compute various statistics on a list of values. * * @author Thibault Allançon * @version 0.0.1 * @since 0.0.1 */ public class Statistics { /** Input values */ ArrayList values; /** * Constructor. * * @param values input values */ public Statistics(ArrayList values) { this.values = values; } /** * Returns the minimum value. * * @return minimum value */ public double getMin() { double min = Double.POSITIVE_INFINITY; for (double v : values) { min = Math.min(min, v); } return min; } /** * Returns the maximum value. * * @return maximum value */ public double getMax() { double max = Double.NEGATIVE_INFINITY; for (double v : values) { max = Math.max(max, v); } return max; } /** * Computes the average. * * @return average value */ public double getAverage() { double sum = 0; for (double v : values) { sum += v; } return sum / (double) values.size(); } /** * Computes the standard deviation. * * @return standard deviation value */ public double getStandardDeviation() { double average = getAverage(); double variance = 0; for (double v : values) { variance += (v - average) * (v - average); } variance /= (double) values.size(); return Math.sqrt(variance); } /** * Computes and prints all statistical values. */ public void printAll() { System.out.println("min value: " + getMin()); System.out.println("max value: " + getMax()); System.out.println("average: " + getAverage()); System.out.println("standard deviation: " + getStandardDeviation()); } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/utils/Timing.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/utils/Timing.java similarity index 92% rename from java/server/src/main/java/org/softwareheritage/graph/utils/Timing.java rename to java/server/src/main/java/org/softwareheritage/graph/benchmark/utils/Timing.java index 6deada3..fea64a2 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/utils/Timing.java +++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/utils/Timing.java @@ -1,32 +1,32 @@ -package org.softwareheritage.graph.utils; +package org.softwareheritage.graph.benchmark.utils; /** * Time measurement utility class. * * @author Thibault Allançon * @version 0.0.1 * @since 0.0.1 */ public class Timing { /** * Returns measurement starting timestamp. * * @return timestamp used for time measurement */ public static long start() { return System.nanoTime(); } /** * Ends timing measurement and returns total duration in seconds. * * @param startTime measurement starting timestamp * @return time in seconds elapsed since starting point */ public static double stop(long startTime) { long endTime = System.nanoTime(); double duration = (double) (endTime - startTime) / 1_000_000_000; return duration; } }