diff --git a/java/server/README.md b/java/server/README.md index 5e2a41c..baeb2a7 100644 --- a/java/server/README.md +++ b/java/server/README.md @@ -1,50 +1,50 @@ Graph service - Server side =========================== Server side Java REST API. Build ----- ```bash $ mvn compile assembly:single ``` Start REST API -------------- ```bash -$ java -cp target/swh-graph-0.0.1-jar-with-dependencies.jar \ +$ java -cp target/swh-graph-0.0.2-jar-with-dependencies.jar \ org.softwareheritage.graph.App \ ``` Default port is 5009 (use the `--port` option to change port number). If you need timings metadata send back to the client in addition to the result, use the `--timings` flag. Tests ----- Unit tests rely on test data that are already available in the Git repository (under `src/test/dataset/`). You generally only need to run them using Maven: ```bash $ mvn test ``` In case you want to regenerate the test data: ```bash # Graph compression $ cd src/test/dataset $ ./generate_graph.sh $ cd ../../../ $ mvn compile assembly:single # Dump mapping files -$ java -cp target/swh-graph-0.0.1-jar-with-dependencies.jar \ +$ java -cp target/swh-graph-0.0.2-jar-with-dependencies.jar \ org.softwareheritage.graph.backend.Setup \ src/test/dataset/example.nodes.csv.gz \ src/test/dataset/output/example ``` diff --git a/java/server/pom.xml b/java/server/pom.xml index bd06957..7da3b7c 100644 --- a/java/server/pom.xml +++ b/java/server/pom.xml @@ -1,151 +1,151 @@ 4.0.0 org.softwareheritage.graph swh-graph - 0.0.1 + 0.0.2 swh-graph https://www.softwareheritage.org/ UTF-8 11 ch.qos.logback logback-classic 1.2.3 junit junit 4.11 test org.hamcrest hamcrest 2.1 test io.javalin javalin 3.0.0 org.slf4j slf4j-simple 1.7.26 com.fasterxml.jackson.core jackson-databind 2.9.8 it.unimi.dsi webgraph-big 3.5.1 it.unimi.dsi fastutil 8.2.2 com.martiansoftware jsap 2.1 maven-clean-plugin 3.1.0 maven-resources-plugin 3.0.2 maven-compiler-plugin 3.8.0 -verbose -Xlint:all maven-surefire-plugin 2.22.1 maven-jar-plugin 3.0.2 maven-install-plugin 2.5.2 maven-deploy-plugin 2.8.2 maven-site-plugin 3.7.1 maven-project-info-reports-plugin 3.0.0 maven-assembly-plugin org.softwareheritage.graph.App jar-with-dependencies make-assembly package single org.apache.maven.plugins maven-javadoc-plugin 3.1.1 diff --git a/java/server/src/main/java/org/softwareheritage/graph/AllowedEdges.java b/java/server/src/main/java/org/softwareheritage/graph/AllowedEdges.java index f21e372..b6be1c2 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/AllowedEdges.java +++ b/java/server/src/main/java/org/softwareheritage/graph/AllowedEdges.java @@ -1,93 +1,91 @@ package org.softwareheritage.graph; import java.util.ArrayList; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.Node; /** * Edge restriction based on node types, used when visiting the graph. *

* Software Heritage * graph contains multiple node types (contents, directories, revisions, ...) and restricting * the traversal to specific node types is necessary for many querying operations: use cases. * * @author Thibault Allançon - * @version 0.0.1 - * @since 0.0.1 */ public class AllowedEdges { /** Graph on which edge restriction is performed */ Graph graph; /** * 2D boolean matrix storing access rights for all combination of src/dst node types (first * dimension is source, second dimension is destination), when edge restriction is not enforced * this array is set to null for early bypass. */ public boolean[][] restrictedTo; /** * Constructor. * * @param graph the graph on which to perform edge restriction * @param edgesFmt a formatted string describing allowed edges */ public AllowedEdges(Graph graph, String edgesFmt) { this.graph = graph; int nbNodeTypes = Node.Type.values().length; this.restrictedTo = new boolean[nbNodeTypes][nbNodeTypes]; // Special values (null, empty, "*") if (edgesFmt == null || edgesFmt.isEmpty()) { return; } if (edgesFmt.equals("*")) { // Allows for quick bypass (with simple null check) when no edge restriction restrictedTo = null; return; } // Format: "src1:dst1,src2:dst2,[...]" String[] edgeTypes = edgesFmt.split(","); for (String edgeType : edgeTypes) { String[] nodeTypes = edgeType.split(":"); if (nodeTypes.length != 2) { throw new IllegalArgumentException("Cannot parse edge type: " + edgeType); } ArrayList srcTypes = Node.Type.parse(nodeTypes[0]); ArrayList dstTypes = Node.Type.parse(nodeTypes[1]); for (Node.Type srcType : srcTypes) { for (Node.Type dstType : dstTypes) { restrictedTo[srcType.ordinal()][dstType.ordinal()] = true; } } } } /** * Checks if a given edge can be followed during graph traversal. * * @param srcNodeId edge source node * @param dstNodeId edge destination node * @return true if allowed and false otherwise */ public boolean isAllowed(long srcNodeId, long dstNodeId) { Node.Type srcType = graph.getNodeType(srcNodeId); Node.Type dstType = graph.getNodeType(dstNodeId); return restrictedTo[srcType.ordinal()][dstType.ordinal()]; } /** * Works like {@link AllowedEdges#isAllowed(long, long)} but with node types directly instead of * node ids. * * @see AllowedEdges#isAllowed(long, long) */ public boolean isAllowed(Node.Type srcType, Node.Type dstType) { return restrictedTo[srcType.ordinal()][dstType.ordinal()]; } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/App.java b/java/server/src/main/java/org/softwareheritage/graph/App.java index cc38758..c22fb30 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/App.java +++ b/java/server/src/main/java/org/softwareheritage/graph/App.java @@ -1,197 +1,195 @@ package org.softwareheritage.graph; import java.io.IOException; import java.util.List; import java.util.Map; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.PropertyNamingStrategy; import com.martiansoftware.jsap.FlaggedOption; import com.martiansoftware.jsap.JSAP; import com.martiansoftware.jsap.JSAPException; import com.martiansoftware.jsap.JSAPResult; import com.martiansoftware.jsap.Parameter; import com.martiansoftware.jsap.SimpleJSAP; import com.martiansoftware.jsap.Switch; import com.martiansoftware.jsap.UnflaggedOption; import io.javalin.Javalin; import io.javalin.http.Context; import io.javalin.plugin.json.JavalinJackson; import org.softwareheritage.graph.Endpoint; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.SwhPID; import org.softwareheritage.graph.algo.Stats; /** * Web framework of the swh-graph server REST API. * * @author Thibault Allançon - * @version 0.0.1 - * @since 0.0.1 */ public class App { /** * Main entrypoint. * * @param args command line arguments */ public static void main(String[] args) throws IOException, JSAPException { SimpleJSAP jsap = new SimpleJSAP(App.class.getName(), "Server to load and query a compressed graph representation of Software Heritage archive.", new Parameter[] { new FlaggedOption("port", JSAP.INTEGER_PARSER, "5009", JSAP.NOT_REQUIRED, 'p', "port", "Binding port of the server."), new UnflaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The basename of the compressed graph."), new Switch("timings", 't', "timings", "Show timings in API result metadata."), }); JSAPResult config = jsap.parse(args); if (jsap.messagePrinted()) { System.exit(1); } String graphPath = config.getString("graphPath"); int port = config.getInt("port"); boolean showTimings = config.getBoolean("timings"); startServer(graphPath, port, showTimings); } /** * Loads compressed graph and starts the web server to query it. * * @param graphPath basename of the compressed graph * @param port binding port of the server * @param showTimings true if timings should be in results metadata, false otherwise */ private static void startServer(String graphPath, int port, boolean showTimings) throws IOException { Graph graph = new Graph(graphPath); Stats stats = new Stats(graphPath); // Clean up on exit Runtime.getRuntime().addShutdownHook(new Thread() { public void run() { try { graph.cleanUp(); } catch (IOException e) { System.out.println("Could not clean up graph on exit: " + e); } } }); // Configure Jackson JSON to use snake case naming style ObjectMapper objectMapper = JavalinJackson.getObjectMapper(); objectMapper.setPropertyNamingStrategy(PropertyNamingStrategy.SNAKE_CASE); JavalinJackson.configure(objectMapper); Javalin app = Javalin.create().start(port); app.before("/stats/*", ctx -> { checkQueryStrings(ctx, ""); }); app.before("/leaves/*", ctx -> { checkQueryStrings(ctx, "direction|edges"); }); app.before("/neighbors/*", ctx -> { checkQueryStrings(ctx, "direction|edges"); }); app.before("/visit/*", ctx -> { checkQueryStrings(ctx, "direction|edges"); }); app.before("/walk/*", ctx -> { checkQueryStrings(ctx, "direction|edges|traversal"); }); app.get("/stats/", ctx -> { ctx.json(stats); }); // Graph traversal endpoints // By default the traversal is a forward DFS using all edges app.get("/leaves/:src", ctx -> { SwhPID src = new SwhPID(ctx.pathParam("src")); String direction = ctx.queryParam("direction", "forward"); String edgesFmt = ctx.queryParam("edges", "*"); Endpoint endpoint = new Endpoint(graph, direction, edgesFmt); Endpoint.Output output = endpoint.leaves(new Endpoint.Input(src)); ctx.json(formatEndpointOutput(output, showTimings)); }); app.get("/neighbors/:src", ctx -> { SwhPID src = new SwhPID(ctx.pathParam("src")); String direction = ctx.queryParam("direction", "forward"); String edgesFmt = ctx.queryParam("edges", "*"); Endpoint endpoint = new Endpoint(graph, direction, edgesFmt); Endpoint.Output output = endpoint.neighbors(new Endpoint.Input(src)); ctx.json(formatEndpointOutput(output, showTimings)); }); app.get("/visit/nodes/:src", ctx -> { SwhPID src = new SwhPID(ctx.pathParam("src")); String direction = ctx.queryParam("direction", "forward"); String edgesFmt = ctx.queryParam("edges", "*"); Endpoint endpoint = new Endpoint(graph, direction, edgesFmt); Endpoint.Output output = endpoint.visitNodes(new Endpoint.Input(src)); ctx.json(formatEndpointOutput(output, showTimings)); }); app.get("/visit/paths/:src", ctx -> { SwhPID src = new SwhPID(ctx.pathParam("src")); String direction = ctx.queryParam("direction", "forward"); String edgesFmt = ctx.queryParam("edges", "*"); Endpoint endpoint = new Endpoint(graph, direction, edgesFmt); Endpoint.Output output = endpoint.visitPaths(new Endpoint.Input(src)); ctx.json(formatEndpointOutput(output, showTimings)); }); app.get("/walk/:src/:dst", ctx -> { SwhPID src = new SwhPID(ctx.pathParam("src")); String dstFmt = ctx.pathParam("dst"); String direction = ctx.queryParam("direction", "forward"); String edgesFmt = ctx.queryParam("edges", "*"); String algorithm = ctx.queryParam("traversal", "dfs"); Endpoint endpoint = new Endpoint(graph, direction, edgesFmt); Endpoint.Output output = endpoint.walk(new Endpoint.Input(src, dstFmt, algorithm)); ctx.json(formatEndpointOutput(output, showTimings)); }); app.exception(IllegalArgumentException.class, (e, ctx) -> { ctx.status(400); ctx.result(e.getMessage()); }); } /** * Checks query strings names provided to the REST API. * * @param ctx Javalin HTTP request context * @param allowedFmt a regular expression describing allowed query strings names * @throws IllegalArgumentException unknown query string provided */ private static void checkQueryStrings(Context ctx, String allowedFmt) { Map> queryParamMap = ctx.queryParamMap(); for (String key : queryParamMap.keySet()) { if (!key.matches(allowedFmt)) { throw new IllegalArgumentException("Unknown query string: " + key); } } } /** * Formats endpoint result into final JSON for the REST API. *

* Removes unwanted information if necessary, such as timings (to prevent use of side channels * attacks). * * @param output endpoint operation output which needs formatting * @param showTimings true if timings should be in results metadata, false otherwise * @return final Object with desired JSON format */ private static Object formatEndpointOutput(Endpoint.Output output, boolean showTimings) { if (showTimings) { return output; } else { Map metaNoTimings = Map.of("nb_edges_accessed", output.meta.nbEdgesAccessed); Map outputNoTimings = Map.of("result", output.result, "meta", metaNoTimings); return outputNoTimings; } } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/Endpoint.java b/java/server/src/main/java/org/softwareheritage/graph/Endpoint.java index 2323c3f..078fc76 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/Endpoint.java +++ b/java/server/src/main/java/org/softwareheritage/graph/Endpoint.java @@ -1,313 +1,311 @@ package org.softwareheritage.graph; import java.util.ArrayList; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.SwhPID; import org.softwareheritage.graph.SwhPath; import org.softwareheritage.graph.algo.Traversal; import org.softwareheritage.graph.benchmark.utils.Timing; /** * REST API endpoints wrapper functions. *

* Graph operations are segmented between high-level class (this one) and the low-level class * ({@link Traversal}). The {@link Endpoint} class creates wrappers for each endpoints by performing * all the input/output node ids conversions and logging timings. * * @author Thibault Allançon - * @version 0.0.1 - * @since 0.0.1 * @see org.softwareheritage.graph.algo.Traversal */ public class Endpoint { /** * Wrapper class to unify traversal methods input signatures. */ public static class Input { /** Source node of endpoint call specified as a {@link SwhPID} */ public SwhPID src; /** * Destination formatted string as described in the API */ public String dstFmt; /** Traversal algorithm used in endpoint call (either "dfs" or "bfs") */ public String algorithm; public Input(SwhPID src) { this.src = src; } public Input(SwhPID src, String dstFmt, String algorithm) { this.src = src; this.dstFmt = dstFmt; this.algorithm = algorithm; } } /** * Wrapper class to return both the endpoint result and metadata (such as timings). */ public static class Output { /** The result content itself */ public T result; /** Various metadata about the result */ public Meta meta; public Output() { this.result = null; this.meta = new Meta(); } /** * Endpoint result metadata. */ public class Meta { /** Operations timings */ public Timings timings; /** Number of edges accessed during traversal */ public long nbEdgesAccessed; public Meta() { this.timings = new Timings(); this.nbEdgesAccessed = 0; } /** * Wrapper class for JSON output format. */ public class Timings { /** Time in seconds to do the traversal */ public double traversal; /** Time in seconds to convert input SWH PID to node id */ public double pid2node; /** Time in seconds to convert output node ids to SWH PIDs */ public double node2pid; } } } /** Graph where traversal endpoint is performed */ Graph graph; /** Internal traversal API */ Traversal traversal; /** * Constructor. * * @param graph the graph used for traversal endpoint * @param direction a string (either "forward" or "backward") specifying edge orientation * @param edgesFmt a formatted string describing allowed edges */ public Endpoint(Graph graph, String direction, String edgesFmt) { this.graph = graph; this.traversal = new Traversal(graph, direction, edgesFmt); } /** * Converts a list of (internal) long node ids to a list of corresponding (external) SWH PIDs. * * @param nodeIds the list of long node ids * @return a list of corresponding SWH PIDs */ private ArrayList convertNodesToSwhPIDs(ArrayList nodeIds) { ArrayList swhPIDs = new ArrayList<>(); for (long nodeId : nodeIds) { swhPIDs.add(graph.getSwhPID(nodeId)); } return swhPIDs; } /** * Converts a list of (internal) long node ids to the corresponding {@link SwhPath}. * * @param nodeIds the list of long node ids * @return the corresponding {@link SwhPath} * @see org.softwareheritage.graph.SwhPath */ private SwhPath convertNodesToSwhPath(ArrayList nodeIds) { SwhPath path = new SwhPath(); for (long nodeId : nodeIds) { path.add(graph.getSwhPID(nodeId)); } return path; } /** * Converts a list of paths made of (internal) long node ids to one made of {@link SwhPath}-s. * * @param pathsNodeId the list of paths with long node ids * @return a list of corresponding {@link SwhPath} * @see org.softwareheritage.graph.SwhPath */ private ArrayList convertPathsToSwhPIDs(ArrayList> pathsNodeId) { ArrayList paths = new ArrayList<>(); for (ArrayList path : pathsNodeId) { paths.add(convertNodesToSwhPath(path)); } return paths; } /** * Leaves endpoint wrapper. * * @param input input parameters for the underlying endpoint call * @return the resulting list of {@link SwhPID} from endpoint call and operation metadata * @see org.softwareheritage.graph.SwhPID * @see org.softwareheritage.graph.algo.Traversal#leaves(long) */ public Output leaves(Input input) { Output> output = new Output<>(); long startTime; startTime = Timing.start(); long srcNodeId = graph.getNodeId(input.src); output.meta.timings.pid2node = Timing.stop(startTime); startTime = Timing.start(); ArrayList nodeIds = traversal.leaves(srcNodeId); output.meta.timings.traversal = Timing.stop(startTime); output.meta.nbEdgesAccessed = traversal.getNbEdgesAccessed(); startTime = Timing.start(); output.result = convertNodesToSwhPIDs(nodeIds); output.meta.timings.node2pid = Timing.stop(startTime); return output; } /** * Neighbors endpoint wrapper. * * @param input input parameters for the underlying endpoint call * @return the resulting list of {@link SwhPID} from endpoint call and operation metadata * @see org.softwareheritage.graph.SwhPID * @see org.softwareheritage.graph.algo.Traversal#neighbors(long) */ public Output neighbors(Input input) { Output> output = new Output<>(); long startTime; startTime = Timing.start(); long srcNodeId = graph.getNodeId(input.src); output.meta.timings.pid2node = Timing.stop(startTime); startTime = Timing.start(); ArrayList nodeIds = traversal.neighbors(srcNodeId); output.meta.timings.traversal = Timing.stop(startTime); output.meta.nbEdgesAccessed = traversal.getNbEdgesAccessed(); startTime = Timing.start(); output.result = convertNodesToSwhPIDs(nodeIds); output.meta.timings.node2pid = Timing.stop(startTime); return output; } /** * Walk endpoint wrapper. * * @param input input parameters for the underlying endpoint call * @return the resulting {@link SwhPath} from endpoint call and operation metadata * @see org.softwareheritage.graph.SwhPID * @see org.softwareheritage.graph.SwhPath * @see org.softwareheritage.graph.algo.Traversal#walk */ public Output walk(Input input) { Output output = new Output<>(); long startTime; startTime = Timing.start(); long srcNodeId = graph.getNodeId(input.src); output.meta.timings.pid2node = Timing.stop(startTime); ArrayList nodeIds = new ArrayList(); // Destination is either a SWH PID or a node type try { SwhPID dstSwhPID = new SwhPID(input.dstFmt); long dstNodeId = graph.getNodeId(dstSwhPID); startTime = Timing.start(); nodeIds = traversal.walk(srcNodeId, dstNodeId, input.algorithm); output.meta.timings.traversal = Timing.stop(startTime); } catch (IllegalArgumentException ignored1) { try { Node.Type dstType = Node.Type.fromStr(input.dstFmt); startTime = Timing.start(); nodeIds = traversal.walk(srcNodeId, dstType, input.algorithm); output.meta.timings.traversal = Timing.stop(startTime); } catch (IllegalArgumentException ignored2) { } } output.meta.nbEdgesAccessed = traversal.getNbEdgesAccessed(); startTime = Timing.start(); output.result = convertNodesToSwhPath(nodeIds); output.meta.timings.node2pid = Timing.stop(startTime); return output; } /** * VisitNodes endpoint wrapper. * * @param input input parameters for the underlying endpoint call * @return the resulting list of {@link SwhPID} from endpoint call and operation metadata * @see org.softwareheritage.graph.SwhPID * @see org.softwareheritage.graph.algo.Traversal#visitNodes(long) */ public Output visitNodes(Input input) { Output> output = new Output<>(); long startTime; startTime = Timing.start(); long srcNodeId = graph.getNodeId(input.src); output.meta.timings.pid2node = Timing.stop(startTime); startTime = Timing.start(); ArrayList nodeIds = traversal.visitNodes(srcNodeId); output.meta.timings.traversal = Timing.stop(startTime); output.meta.nbEdgesAccessed = traversal.getNbEdgesAccessed(); startTime = Timing.start(); output.result = convertNodesToSwhPIDs(nodeIds); output.meta.timings.node2pid = Timing.stop(startTime); return output; } /** * VisitPaths endpoint wrapper. * * @param input input parameters for the underlying endpoint call * @return the resulting list of {@link SwhPath} from endpoint call and operation metadata * @see org.softwareheritage.graph.SwhPID * @see org.softwareheritage.graph.SwhPath * @see org.softwareheritage.graph.algo.Traversal#visitPaths(long) */ public Output visitPaths(Input input) { Output> output = new Output<>(); long startTime; startTime = Timing.start(); long srcNodeId = graph.getNodeId(input.src); output.meta.timings.pid2node = Timing.stop(startTime); startTime = Timing.start(); ArrayList> paths = traversal.visitPaths(srcNodeId); output.meta.timings.traversal = Timing.stop(startTime); output.meta.nbEdgesAccessed = traversal.getNbEdgesAccessed(); startTime = Timing.start(); output.result = convertPathsToSwhPIDs(paths); output.meta.timings.node2pid = Timing.stop(startTime); return output; } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/Graph.java b/java/server/src/main/java/org/softwareheritage/graph/Graph.java index 0186839..e40558b 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/Graph.java +++ b/java/server/src/main/java/org/softwareheritage/graph/Graph.java @@ -1,196 +1,194 @@ package org.softwareheritage.graph; import java.io.IOException; import it.unimi.dsi.big.webgraph.BVGraph; import it.unimi.dsi.big.webgraph.LazyLongIterator; import org.softwareheritage.graph.Node; import org.softwareheritage.graph.SwhPID; import org.softwareheritage.graph.backend.NodeIdMap; import org.softwareheritage.graph.backend.NodeTypesMap; /** * Main class storing the compressed graph and node id mappings. *

* The compressed graph is stored using the WebGraph * ecosystem. Additional mappings are necessary because Software Heritage uses string based persistent * identifiers (PID) while WebGraph uses integers internally. These two mappings (long id ↔ * PID) are used for the input (users refer to the graph using PID) and the output (convert back to * PID for users results). However, since graph traversal can be restricted depending on the node * type (see {@link AllowedEdges}), a long id → node type map is stored as well to avoid a full * PID lookup. * * @author Thibault Allançon - * @version 0.0.1 - * @since 0.0.1 * @see org.softwareheritage.graph.AllowedEdges * @see org.softwareheritage.graph.NodeIdMap; * @see org.softwareheritage.graph.NodeTypesMap; */ public class Graph { /** File extension for the SWH PID to long node id map */ public static final String PID_TO_NODE = ".pid2node.csv"; /** File extension for the long node id to SWH PID map */ public static final String NODE_TO_PID = ".node2pid.csv"; /** File extension for the long node id to node typ map */ public static final String NODE_TO_TYPE = ".node2type.map"; /** Compressed graph stored as a {@link it.unimi.dsi.big.webgraph.BVGraph} */ BVGraph graph; /** Transposed compressed graph (used for backward traversals) */ BVGraph graphTransposed; /** Path and basename of the compressed graph */ String path; /** Mapping long id ↔ SWH PIDs */ NodeIdMap nodeIdMap; /** Mapping long id → node types */ NodeTypesMap nodeTypesMap; /** * Constructor. * * @param path path and basename of the compressed graph to load */ public Graph(String path) throws IOException { this.graph = BVGraph.load(path); this.graphTransposed = BVGraph.load(path + "-transposed"); this.path = path; this.nodeIdMap = new NodeIdMap(path, getNbNodes()); this.nodeTypesMap = new NodeTypesMap(path); } /** * Cleans up graph resources after use. */ public void cleanUp() throws IOException { nodeIdMap.close(); } /** * Returns the graph full path. * * @return graph full path */ public String getPath() { return path; } /** * Converts {@link SwhPID} node to long. * * @param swhPID node specified as a {@link SwhPID} * @return internal long node id * @see org.softwareheritage.graph.SwhPID */ public long getNodeId(SwhPID swhPID) { return nodeIdMap.getNodeId(swhPID); } /** * Converts long id node to {@link SwhPID}. * * @param nodeId node specified as a long id * @return external SWH PID * @see org.softwareheritage.graph.SwhPID */ public SwhPID getSwhPID(long nodeId) { return nodeIdMap.getSwhPID(nodeId); } /** * Returns node type. * * @param nodeId node specified as a long id * @return corresponding node type * @see org.softwareheritage.graph.Node.Type */ public Node.Type getNodeType(long nodeId) { return nodeTypesMap.getType(nodeId); } /** * Returns number of nodes in the graph. * * @return number of nodes in the graph */ public long getNbNodes() { return graph.numNodes(); } /** * Returns number of edges in the graph. * * @return number of edges in the graph */ public long getNbEdges() { return graph.numArcs(); } /** * Returns lazy iterator of successors of a node. * * @param nodeId node specified as a long id * @return lazy iterator of successors of the node, specified as a WebGraph LazyLongIterator */ public LazyLongIterator successors(long nodeId) { return graph.successors(nodeId); } /** * Returns the outdegree of a node. * * @param nodeId node specified as a long id * @return outdegree of a node */ public long outdegree(long nodeId) { return graph.outdegree(nodeId); } /** * Returns lazy iterator of predecessors of a node. * * @param nodeId node specified as a long id * @return lazy iterator of predecessors of the node, specified as a WebGraph LazyLongIterator */ public LazyLongIterator predecessors(long nodeId) { return graphTransposed.successors(nodeId); } /** * Returns the indegree of a node. * * @param nodeId node specified as a long id * @return indegree of a node */ public long indegree(long nodeId) { return graphTransposed.outdegree(nodeId); } /** * Returns the degree of a node, depending on graph orientation. * * @param nodeId node specified as a long id * @param useTransposed boolean value to use transposed graph * @return degree of a node */ public long degree(long nodeId, boolean useTransposed) { return (useTransposed) ? indegree(nodeId) : outdegree(nodeId); } /** * Returns the neighbors of a node (as a lazy iterator), depending on graph orientation. * * @param nodeId node specified as a long id * @param useTransposed boolean value to use transposed graph * @return lazy iterator of neighbors of the node, specified as a WebGraph LazyLongIterator */ public LazyLongIterator neighbors(long nodeId, boolean useTransposed) { return (useTransposed) ? predecessors(nodeId) : successors(nodeId); } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/Neighbors.java b/java/server/src/main/java/org/softwareheritage/graph/Neighbors.java index c3805bc..4cdf690 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/Neighbors.java +++ b/java/server/src/main/java/org/softwareheritage/graph/Neighbors.java @@ -1,88 +1,84 @@ package org.softwareheritage.graph; import java.util.Iterator; import it.unimi.dsi.big.webgraph.LazyLongIterator; import org.softwareheritage.graph.AllowedEdges; import org.softwareheritage.graph.Graph; /** * Iterator class to go over a node neighbors in the graph. *

* Wrapper iterator class to easily deal with {@link AllowedEdges} during traversals. * * @author Thibault Allançon - * @version 0.0.1 - * @since 0.0.1 * @see org.softwareheritage.graph.AllowedEdges */ public class Neighbors implements Iterable { /** Graph used to explore neighbors */ Graph graph; /** Boolean to specify the use of the transposed graph */ boolean useTransposed; /** Graph edge restriction */ AllowedEdges edges; /** Source node from which neighbors will be listed */ long srcNodeId; /** * Constructor. * * @param graph graph used to explore neighbors * @param useTransposed boolean value to use transposed graph * @param edges edges allowed to be used in the graph * @param srcNodeId source node from where to list neighbors */ public Neighbors(Graph graph, boolean useTransposed, AllowedEdges edges, long srcNodeId) { this.graph = graph; this.useTransposed = useTransposed; this.edges = edges; this.srcNodeId = srcNodeId; } @Override public Iterator iterator() { return new NeighborsIterator(); } /** * Inner class for {@link Neighbors} iterator. * * @author Thibault Allançon - * @version 0.0.1 - * @since 0.0.1 */ public class NeighborsIterator implements Iterator { LazyLongIterator neighbors; long nextNeighborId; public NeighborsIterator() { this.neighbors = graph.neighbors(srcNodeId, useTransposed); this.nextNeighborId = -1; } public boolean hasNext() { // Case 1: no edge restriction, bypass type checks and skip to next neighbor if (edges.restrictedTo == null) { nextNeighborId = neighbors.nextLong(); return (nextNeighborId != -1); } // Case 2: edge restriction, look ahead for next neighbor while ((nextNeighborId = neighbors.nextLong()) != -1) { if (edges.isAllowed(srcNodeId, nextNeighborId)) { return true; } } return false; } public Long next() { return nextNeighborId; } } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/Node.java b/java/server/src/main/java/org/softwareheritage/graph/Node.java index 9194b97..ecd263f 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/Node.java +++ b/java/server/src/main/java/org/softwareheritage/graph/Node.java @@ -1,95 +1,93 @@ package org.softwareheritage.graph; import java.util.ArrayList; import java.util.Arrays; import java.util.List; /** * A node in the Software Heritage graph. * * @author Thibault Allançon - * @version 0.0.1 - * @since 0.0.1 */ public class Node { /** * Software Heritage graph node types, as described in the * data model. */ public enum Type { /** Content node */ CNT, /** Directory node */ DIR, /** Origin node */ ORI, /** Release node */ REL, /** Revision node */ REV, /** Snapshot node */ SNP; /** * Converts integer to corresponding SWH node type. * * @param intType node type represented as an integer * @return the corresponding {@link Node.Type} value * @see org.softwareheritage.graph.Node.Type */ public static Node.Type fromInt(int intType) { switch (intType) { case 0: return CNT; case 1: return DIR; case 2: return ORI; case 3: return REL; case 4: return REV; case 5: return SNP; } return null; } /** * Converts string to corresponding SWH node type. * * @param strType node type represented as a string * @return the corresponding {@link Node.Type} value * @see org.softwareheritage.graph.Node.Type */ public static Node.Type fromStr(String strType) { if (!strType.matches("cnt|dir|ori|rel|rev|snp")) { throw new IllegalArgumentException("Unknown node type: " + strType); } return Node.Type.valueOf(strType.toUpperCase()); } /** * Parses SWH node type possible values from formatted string (see the API * syntax). * * @param strFmtType node types represented as a formatted string * @return a list containing the {@link Node.Type} values * @see org.softwareheritage.graph.Node.Type */ public static ArrayList parse(String strFmtType) { ArrayList types = new ArrayList<>(); if (strFmtType.equals("*")) { List nodeTypes = Arrays.asList(Node.Type.values()); types.addAll(nodeTypes); } else { types.add(Node.Type.fromStr(strFmtType)); } return types; } } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/SwhPID.java b/java/server/src/main/java/org/softwareheritage/graph/SwhPID.java index 39c6604..01586e7 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/SwhPID.java +++ b/java/server/src/main/java/org/softwareheritage/graph/SwhPID.java @@ -1,100 +1,98 @@ package org.softwareheritage.graph; import com.fasterxml.jackson.annotation.JsonValue; import org.softwareheritage.graph.Node; /** * A Software Heritage PID, see persistent * identifier documentation. * * @author Thibault Allançon - * @version 0.0.1 - * @since 0.0.1 */ public class SwhPID { /** Fixed hash length of the PID */ public static final int HASH_LENGTH = 40; /** Full PID as a string */ String swhPID; /** PID node type */ Node.Type type; /** PID hex-encoded SHA1 hash */ String hash; /** * Constructor. * * @param swhPID full PID as a string */ public SwhPID(String swhPID) { this.swhPID = swhPID; // PID format: 'swh:1:type:hash' String[] parts = swhPID.split(":"); if (parts.length != 4 || !parts[0].equals("swh") || !parts[1].equals("1")) { throw new IllegalArgumentException( "Expected SWH PID format to be 'swh:1:type:hash', got: " + swhPID); } this.type = Node.Type.fromStr(parts[2]); this.hash = parts[3]; if (!hash.matches("[0-9a-f]{" + HASH_LENGTH + "}")) { throw new IllegalArgumentException("Wrong SWH PID hash format in: " + swhPID); } } @Override public boolean equals(Object otherObj) { if (otherObj == this) return true; if (!(otherObj instanceof SwhPID)) return false; SwhPID other = (SwhPID) otherObj; return swhPID.equals(other.getSwhPID()); } @Override public int hashCode() { return swhPID.hashCode(); } @Override public String toString() { return swhPID; } /** * Returns full PID as a string. * * @return full PID string */ @JsonValue public String getSwhPID() { return swhPID; } /** * Returns PID node type. * * @return PID corresponding {@link Node.Type} * @see org.softwareheritage.graph.Node.Type */ public Node.Type getType() { return type; } /** * Returns PID hex-encoded SHA1 hash. * * @return PID string hash */ public String getHash() { return hash; } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/SwhPath.java b/java/server/src/main/java/org/softwareheritage/graph/SwhPath.java index 0ad7518..718d446 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/SwhPath.java +++ b/java/server/src/main/java/org/softwareheritage/graph/SwhPath.java @@ -1,126 +1,124 @@ package org.softwareheritage.graph; import java.util.ArrayList; import com.fasterxml.jackson.annotation.JsonValue; import org.softwareheritage.graph.SwhPID; /** * Wrapper class to store a list of {@link SwhPID}. * * @author Thibault Allançon - * @version 0.0.1 - * @since 0.0.1 * @see org.softwareheritage.graph.SwhPID */ public class SwhPath { /** Internal list of {@link SwhPID} */ ArrayList path; /** * Constructor. */ public SwhPath() { this.path = new ArrayList(); } /** * Constructor. * * @param swhPIDs variable number of string PIDs to initialize this path with */ public SwhPath(String... swhPIDs) { this(); for (String swhPID : swhPIDs) { add(new SwhPID(swhPID)); } } /** * Constructor. * * @param swhPIDs variable number of {@link SwhPID} to initialize this path with * @see org.softwareheritage.graph.SwhPID */ public SwhPath(SwhPID... swhPIDs) { this(); for (SwhPID swhPID : swhPIDs) { add(swhPID); } } /** * Returns this path as a list of {@link SwhPID}. * * @return list of {@link SwhPID} constituting the path * @see org.softwareheritage.graph.SwhPID */ @JsonValue public ArrayList getPath() { return path; } /** * Adds a {@link SwhPID} to this path. * * @param {@link SwhPID} to add to this path * @see org.softwareheritage.graph.SwhPID */ public void add(SwhPID swhPID) { path.add(swhPID); } /** * Returns the {@link SwhPID} at the specified position in this path. * * @param index position of the {@link SwhPID} to return * @return {@link SwhPID} at the specified position * @see org.softwareheritage.graph.SwhPID */ public SwhPID get(int index) { return path.get(index); } /** * Returns the number of elements in this path. * * @return number of elements in this path */ public int size() { return path.size(); } @Override public boolean equals(Object otherObj) { if (otherObj == this) return true; if (!(otherObj instanceof SwhPath)) return false; SwhPath other = (SwhPath) otherObj; if (size() != other.size()) { return false; } for (int i = 0; i < size(); i++) { SwhPID thisSwhPID = get(i); SwhPID otherSwhPID = other.get(i); if (!thisSwhPID.equals(otherSwhPID)) { return false; } } return true; } @Override public String toString() { String str = new String(); for (SwhPID swhPID : path) { str += swhPID + "/"; } return str; } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/algo/Stats.java b/java/server/src/main/java/org/softwareheritage/graph/algo/Stats.java index a26bbad..a94250a 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/algo/Stats.java +++ b/java/server/src/main/java/org/softwareheritage/graph/algo/Stats.java @@ -1,70 +1,68 @@ package org.softwareheritage.graph.algo; import java.io.FileInputStream; import java.io.IOException; import java.util.Properties; /** * Statistics on the compressed graph. *

* These statistics are not computed but directly read from WebGraph generated .stats and .properties files. * * @author Thibault Allançon - * @version 0.0.1 - * @since 0.0.1 */ public class Stats { public class Counts { public long nodes; public long edges; } public class Ratios { public double compression; public double bitsPerNode; public double bitsPerEdge; public double avgLocality; } public class Degree { public long min; public long max; public double avg; } public Counts counts; public Ratios ratios; public Degree indegree; public Degree outdegree; /** * Constructor. * * @param graphPath path and basename of compressed graph */ public Stats(String graphPath) throws IOException { Properties properties = new Properties(); properties.load(new FileInputStream(graphPath + ".properties")); properties.load(new FileInputStream(graphPath + ".stats")); this.counts = new Counts(); this.ratios = new Ratios(); this.indegree = new Degree(); this.outdegree = new Degree(); this.counts.nodes = Long.parseLong(properties.getProperty("nodes")); this.counts.edges = Long.parseLong(properties.getProperty("arcs")); this.ratios.compression = Double.parseDouble(properties.getProperty("compratio")); this.ratios.bitsPerNode = Double.parseDouble(properties.getProperty("bitspernode")); this.ratios.bitsPerEdge = Double.parseDouble(properties.getProperty("bitsperlink")); this.ratios.avgLocality = Double.parseDouble(properties.getProperty("avglocality")); this.indegree.min = Long.parseLong(properties.getProperty("minindegree")); this.indegree.max = Long.parseLong(properties.getProperty("maxindegree")); this.indegree.avg = Double.parseDouble(properties.getProperty("avgindegree")); this.outdegree.min = Long.parseLong(properties.getProperty("minoutdegree")); this.outdegree.max = Long.parseLong(properties.getProperty("maxoutdegree")); this.outdegree.avg = Double.parseDouble(properties.getProperty("avgoutdegree")); } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java b/java/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java index 4ac1cdb..ee06f7c 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java +++ b/java/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java @@ -1,330 +1,328 @@ package org.softwareheritage.graph.algo; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.LinkedList; import java.util.Map; import java.util.Queue; import java.util.Stack; import it.unimi.dsi.bits.LongArrayBitVector; import org.softwareheritage.graph.AllowedEdges; import org.softwareheritage.graph.Endpoint; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.Neighbors; import org.softwareheritage.graph.Node; /** * Traversal algorithms on the compressed graph. *

* Internal implementation of the traversal API endpoints. These methods only input/output internal * long ids, which are converted in the {@link Endpoint} higher-level class to Software Heritage * PID. * * @author Thibault Allançon - * @version 0.0.1 - * @since 0.0.1 * @see org.softwareheritage.graph.Endpoint */ public class Traversal { /** Graph used in the traversal */ Graph graph; /** Boolean to specify the use of the transposed graph */ boolean useTransposed; /** Graph edge restriction */ AllowedEdges edges; /** Bit array storing if we have visited a node */ LongArrayBitVector visited; /** Hash map storing parent node id for each nodes during a traversal */ Map parentNode; /** Number of edges accessed during traversal */ long nbEdgesAccessed; /** * Constructor. * * @param graph graph used in the traversal * @param direction a string (either "forward" or "backward") specifying edge orientation * @param edgesFmt a formatted string describing allowed edges */ public Traversal(Graph graph, String direction, String edgesFmt) { if (!direction.matches("forward|backward")) { throw new IllegalArgumentException("Unknown traversal direction: " + direction); } this.graph = graph; this.useTransposed = (direction.equals("backward")); this.edges = new AllowedEdges(graph, edgesFmt); long nbNodes = graph.getNbNodes(); this.visited = LongArrayBitVector.ofLength(nbNodes); this.parentNode = new HashMap<>(); this.nbEdgesAccessed = 0; } /** * Returns number of accessed edges during traversal. * * @return number of edges accessed in last traversal */ public long getNbEdgesAccessed() { return nbEdgesAccessed; } /** * Returns the leaves of a subgraph rooted at the specified source node. * * @param srcNodeId source node * @return list of node ids corresponding to the leaves */ public ArrayList leaves(long srcNodeId) { ArrayList nodeIds = new ArrayList(); Stack stack = new Stack(); this.nbEdgesAccessed = 0; stack.push(srcNodeId); visited.set(srcNodeId); while (!stack.isEmpty()) { long currentNodeId = stack.pop(); long neighborsCnt = 0; nbEdgesAccessed += graph.degree(currentNodeId, useTransposed); for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) { neighborsCnt++; if (!visited.getBoolean(neighborNodeId)) { stack.push(neighborNodeId); visited.set(neighborNodeId); } } if (neighborsCnt == 0) { nodeIds.add(currentNodeId); } } return nodeIds; } /** * Returns node direct neighbors (linked with exactly one edge). * * @param srcNodeId source node * @return list of node ids corresponding to the neighbors */ public ArrayList neighbors(long srcNodeId) { ArrayList nodeIds = new ArrayList(); this.nbEdgesAccessed = graph.degree(srcNodeId, useTransposed); for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, srcNodeId)) { nodeIds.add(neighborNodeId); } return nodeIds; } /** * Performs a graph traversal and returns explored nodes. * * @param srcNodeId source node * @return list of explored node ids */ public ArrayList visitNodes(long srcNodeId) { ArrayList nodeIds = new ArrayList(); Stack stack = new Stack(); this.nbEdgesAccessed = 0; stack.push(srcNodeId); visited.set(srcNodeId); while (!stack.isEmpty()) { long currentNodeId = stack.pop(); nodeIds.add(currentNodeId); nbEdgesAccessed += graph.degree(currentNodeId, useTransposed); for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) { if (!visited.getBoolean(neighborNodeId)) { stack.push(neighborNodeId); visited.set(neighborNodeId); } } } return nodeIds; } /** * Performs a graph traversal and returns explored paths. * * @param srcNodeId source node * @return list of explored paths (represented as a list of node ids) */ public ArrayList> visitPaths(long srcNodeId) { ArrayList> paths = new ArrayList<>(); Stack currentPath = new Stack(); this.nbEdgesAccessed = 0; visitPathsInternal(srcNodeId, paths, currentPath); return paths; } /** * Internal recursive function of {@link #visitPaths}. * * @param currentNodeId current node * @param paths list of currently stored paths * @param currentPath current path as node ids */ private void visitPathsInternal( long currentNodeId, ArrayList> paths, Stack currentPath) { currentPath.push(currentNodeId); long visitedNeighbors = 0; nbEdgesAccessed += graph.degree(currentNodeId, useTransposed); for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) { visitPathsInternal(neighborNodeId, paths, currentPath); visitedNeighbors++; } if (visitedNeighbors == 0) { ArrayList path = new ArrayList(); for (long nodeId : currentPath) { path.add(nodeId); } paths.add(path); } currentPath.pop(); } /** * Performs a graph traversal and returns the first found path from source to destination. * * @param srcNodeId source node * @param dst destination (either a node or a node type) * @return found path as a list of node ids */ public ArrayList walk(long srcNodeId, T dst, String algorithm) { long dstNodeId = -1; if (algorithm.equals("dfs")) { dstNodeId = walkInternalDfs(srcNodeId, dst); } else if (algorithm.equals("bfs")) { dstNodeId = walkInternalBfs(srcNodeId, dst); } else { throw new IllegalArgumentException("Unknown traversal algorithm: " + algorithm); } if (dstNodeId == -1) { throw new IllegalArgumentException("Unable to find destination point: " + dst); } ArrayList nodeIds = backtracking(srcNodeId, dstNodeId); return nodeIds; } /** * Internal DFS function of {@link #walk}. * * @param srcNodeId source node * @param dst destination (either a node or a node type) * @return final destination node or -1 if no path found */ private long walkInternalDfs(long srcNodeId, T dst) { Stack stack = new Stack(); this.nbEdgesAccessed = 0; stack.push(srcNodeId); visited.set(srcNodeId); while (!stack.isEmpty()) { long currentNodeId = stack.pop(); if (isDstNode(currentNodeId, dst)) { return currentNodeId; } nbEdgesAccessed += graph.degree(currentNodeId, useTransposed); for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) { if (!visited.getBoolean(neighborNodeId)) { stack.push(neighborNodeId); visited.set(neighborNodeId); parentNode.put(neighborNodeId, currentNodeId); } } } return -1; } /** * Internal BFS function of {@link #walk}. * * @param srcNodeId source node * @param dst destination (either a node or a node type) * @return final destination node or -1 if no path found */ private long walkInternalBfs(long srcNodeId, T dst) { Queue queue = new LinkedList(); this.nbEdgesAccessed = 0; queue.add(srcNodeId); visited.set(srcNodeId); while (!queue.isEmpty()) { long currentNodeId = queue.poll(); if (isDstNode(currentNodeId, dst)) { return currentNodeId; } nbEdgesAccessed += graph.degree(currentNodeId, useTransposed); for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) { if (!visited.getBoolean(neighborNodeId)) { queue.add(neighborNodeId); visited.set(neighborNodeId); parentNode.put(neighborNodeId, currentNodeId); } } } return -1; } /** * Internal function of {@link #walk} to check if a node corresponds to the destination. * * @param nodeId current node * @param dst destination (either a node or a node type) * @return true if the node is a destination, or false otherwise */ private boolean isDstNode(long nodeId, T dst) { if (dst instanceof Long) { long dstNodeId = (Long) dst; return nodeId == dstNodeId; } else if (dst instanceof Node.Type) { Node.Type dstType = (Node.Type) dst; return graph.getNodeType(nodeId) == dstType; } else { return false; } } /** * Internal backtracking function of {@link #walk}. * * @param srcNodeId source node * @param dstNodeId destination node * @return the found path, as a list of node ids */ private ArrayList backtracking(long srcNodeId, long dstNodeId) { ArrayList path = new ArrayList(); long currentNodeId = dstNodeId; while (currentNodeId != srcNodeId) { path.add(currentNodeId); currentNodeId = parentNode.get(currentNodeId); } path.add(srcNodeId); Collections.reverse(path); return path; } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/backend/MapFile.java b/java/server/src/main/java/org/softwareheritage/graph/backend/MapFile.java index 0644049..3fe03e3 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/backend/MapFile.java +++ b/java/server/src/main/java/org/softwareheritage/graph/backend/MapFile.java @@ -1,65 +1,63 @@ package org.softwareheritage.graph.backend; import java.io.File; import java.io.IOException; import java.io.RandomAccessFile; import java.nio.channels.FileChannel; import it.unimi.dsi.io.ByteBufferInputStream; /** * Wrapper class around very big mmap()-ed file. *

* Java has a limit for mmap()-ed files because of unsupported 64-bit indexing. The dsiutils ByteBufferInputStream is used to overcome this * Java limit. * * @author Thibault Allançon - * @version 0.0.1 - * @since 0.0.1 */ public class MapFile { /** Memory-mapped file buffer */ ByteBufferInputStream bufferMap; /** Fixed line length of the mmap()-ed file */ int lineLength; /** * Constructor. * * @param path file path to mmap() * @param lineLength fixed length of a line in the file */ public MapFile(String path, int lineLength) throws IOException { this.bufferMap = null; this.lineLength = lineLength; try (RandomAccessFile mapFile = new RandomAccessFile(new File(path), "r")) { FileChannel fileChannel = mapFile.getChannel(); bufferMap = ByteBufferInputStream.map(fileChannel, FileChannel.MapMode.READ_ONLY); } } /** * Returns a specific line in the file. * * @param lineIndex line number in the file * @return the line at the specified position */ public String readAtLine(long lineIndex) { byte[] buffer = new byte[lineLength]; long position = lineIndex * (long) lineLength; bufferMap.position(position); bufferMap.read(buffer, 0, lineLength); String line = new String(buffer); return line.trim(); } /** * Closes the mmap()-ed file. */ public void close() throws IOException { bufferMap.close(); } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/backend/NodeIdMap.java b/java/server/src/main/java/org/softwareheritage/graph/backend/NodeIdMap.java index 1e8f2ac..72a2efb 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/backend/NodeIdMap.java +++ b/java/server/src/main/java/org/softwareheritage/graph/backend/NodeIdMap.java @@ -1,117 +1,115 @@ package org.softwareheritage.graph.backend; import java.io.IOException; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.SwhPID; import org.softwareheritage.graph.backend.MapFile; import org.softwareheritage.graph.backend.Setup; /** * Mapping between internal long node id and external SWH PID. *

* Mappings in both directions are pre-computed and dumped on disk in the {@link Setup} class, then * they are loaded here using mmap(). * * @author Thibault Allançon - * @version 0.0.1 - * @since 0.0.1 * @see org.softwareheritage.graph.backend.Setup */ public class NodeIdMap { /** Fixed length of full SWH PID */ public static final int SWH_ID_LENGTH = 50; /** Fixed length of long node id */ public static final int NODE_ID_LENGTH = 20; /** Graph path and basename */ String graphPath; /** Number of ids to map */ long nbIds; /** mmap()-ed PID_TO_NODE file */ MapFile swhToNodeMap; /** mmap()-ed NODE_TO_PID file */ MapFile nodeToSwhMap; /** * Constructor. * * @param graphPath full graph path * @param nbNodes number of nodes in the graph */ public NodeIdMap(String graphPath, long nbNodes) throws IOException { this.graphPath = graphPath; this.nbIds = nbNodes; // +1 are for spaces and end of lines int swhToNodeLineLength = SWH_ID_LENGTH + 1 + NODE_ID_LENGTH + 1; int nodeToSwhLineLength = SWH_ID_LENGTH + 1; this.swhToNodeMap = new MapFile(graphPath + Graph.PID_TO_NODE, swhToNodeLineLength); this.nodeToSwhMap = new MapFile(graphPath + Graph.NODE_TO_PID, nodeToSwhLineLength); } /** * Converts SWH PID to corresponding long node id. * * @param swhPID node represented as a {@link SwhPID} * @return corresponding node as a long id * @see org.softwareheritage.graph.SwhPID */ public long getNodeId(SwhPID swhPID) { // Each line in PID_TO_NODE is formatted as: swhPID nodeId // The file is sorted by swhPID, hence we can binary search on swhPID to get corresponding // nodeId long start = 0; long end = nbIds - 1; while (start <= end) { long lineNumber = (start + end) / 2L; String[] parts = swhToNodeMap.readAtLine(lineNumber).split(" "); if (parts.length != 2) { break; } String currentSwhPID = parts[0]; long currentNodeId = Long.parseLong(parts[1]); int cmp = currentSwhPID.compareTo(swhPID.toString()); if (cmp == 0) { return currentNodeId; } else if (cmp < 0) { start = lineNumber + 1; } else { end = lineNumber - 1; } } throw new IllegalArgumentException("Unknown SWH PID: " + swhPID); } /** * Converts a node long id to corresponding SWH PID. * * @param nodeId node as a long id * @return corresponding node as a {@link SwhPID} * @see org.softwareheritage.graph.SwhPID */ public SwhPID getSwhPID(long nodeId) { // Each line in NODE_TO_PID is formatted as: swhPID // The file is ordered by nodeId, meaning node0's swhPID is at line 0, hence we can read the // nodeId-th line to get corresponding swhPID if (nodeId < 0 || nodeId >= nbIds) { throw new IllegalArgumentException("Node id " + nodeId + " should be between 0 and " + nbIds); } String swhPID = nodeToSwhMap.readAtLine(nodeId); return new SwhPID(swhPID); } /** * Closes the mapping files. */ public void close() throws IOException { swhToNodeMap.close(); nodeToSwhMap.close(); } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/backend/NodeTypesMap.java b/java/server/src/main/java/org/softwareheritage/graph/backend/NodeTypesMap.java index 1b95bd0..42669f8 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/backend/NodeTypesMap.java +++ b/java/server/src/main/java/org/softwareheritage/graph/backend/NodeTypesMap.java @@ -1,53 +1,51 @@ package org.softwareheritage.graph.backend; import java.io.IOException; import it.unimi.dsi.fastutil.io.BinIO; import it.unimi.dsi.fastutil.longs.LongBigList; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.Node; /** * Mapping between long node id and SWH node type as described in the data model. *

* The type mapping is pre-computed and dumped on disk in the {@link Setup} class, then it is loaded * in-memory here using fastutil LongBigList. To be * space-efficient, the mapping is stored as a bitmap using minimum number of bits per {@link * Node.Type}. * * @author Thibault Allançon - * @version 0.0.1 - * @since 0.0.1 */ public class NodeTypesMap { /** Array storing for each node its type */ LongBigList nodeTypesMap; /** * Constructor. * * @param graphPath path and basename of the compressed graph */ public NodeTypesMap(String graphPath) throws IOException { try { nodeTypesMap = (LongBigList) BinIO.loadObject(graphPath + Graph.NODE_TO_TYPE); } catch (ClassNotFoundException e) { throw new IllegalArgumentException("Unknown class object: " + e); } } /** * Returns node type from a node long id. * * @param nodeId node as a long id * @return corresponding {@link Node.Type} value * @see org.softwareheritage.graph.Node.Type */ public Node.Type getType(long nodeId) { long type = nodeTypesMap.getLong(nodeId); return Node.Type.fromInt((int) type); } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/backend/Setup.java b/java/server/src/main/java/org/softwareheritage/graph/backend/Setup.java index 77df754..c1d44ec 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/backend/Setup.java +++ b/java/server/src/main/java/org/softwareheritage/graph/backend/Setup.java @@ -1,125 +1,123 @@ package org.softwareheritage.graph.backend; import java.io.BufferedWriter; import java.io.FileInputStream; import java.io.FileWriter; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.Writer; import java.util.zip.GZIPInputStream; import it.unimi.dsi.bits.LongArrayBitVector; import it.unimi.dsi.fastutil.Size64; import it.unimi.dsi.fastutil.io.BinIO; import it.unimi.dsi.fastutil.longs.LongBigArrays; import it.unimi.dsi.fastutil.longs.LongBigList; import it.unimi.dsi.fastutil.objects.Object2LongFunction; import it.unimi.dsi.fastutil.objects.ObjectBigArrays; import it.unimi.dsi.io.FastBufferedReader; import it.unimi.dsi.io.LineIterator; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.Node; import org.softwareheritage.graph.SwhPID; import org.softwareheritage.graph.backend.NodeTypesMap; /** * Pre-processing steps (such as dumping mapping files on disk) before running the graph service. * * @author Thibault Allançon - * @version 0.0.1 - * @since 0.0.1 */ public class Setup { /** * Main entrypoint. * * @param args command line arguments */ public static void main(String[] args) throws IOException { if (args.length != 2) { System.err.println("Expected parameters: "); System.exit(1); } String nodesPath = args[0]; String graphPath = args[1]; System.out.println("Pre-computing node id maps..."); long startTime = System.nanoTime(); precomputeNodeIdMap(nodesPath, graphPath); long endTime = System.nanoTime(); double duration = (endTime - startTime) / 1_000_000_000; System.out.println("Done in: " + duration + " seconds"); } /** * Computes and dumps on disk mapping files. * * @param nodesPath path of the compressed csv nodes file * @param graphPath path of the compressed graph */ // Suppress warning for Object2LongFunction cast @SuppressWarnings("unchecked") static void precomputeNodeIdMap(String nodesPath, String graphPath) throws IOException { // First internal mapping: SWH PID (string) -> WebGraph MPH (long) Object2LongFunction mphMap = null; try { mphMap = (Object2LongFunction) BinIO.loadObject(graphPath + ".mph"); } catch (ClassNotFoundException e) { throw new IllegalArgumentException("The .mph file contains unknown class object: " + e); } long nbIds = (mphMap instanceof Size64) ? ((Size64) mphMap).size64() : mphMap.size(); // Second internal mapping: WebGraph MPH (long) -> BFS ordering (long) long[][] bfsMap = LongBigArrays.newBigArray(nbIds); long loaded = BinIO.loadLongs(graphPath + ".order", bfsMap); if (loaded != nbIds) { throw new IllegalArgumentException("Graph contains " + nbIds + " nodes, but read " + loaded); } // Dump complete mapping for all nodes: SWH PID (string) <=> WebGraph node id (long) InputStream nodesStream = new GZIPInputStream(new FileInputStream(nodesPath)); FastBufferedReader buffer = new FastBufferedReader(new InputStreamReader(nodesStream, "UTF-8")); LineIterator swhPIDIterator = new LineIterator(buffer); try (Writer swhToNodeMap = new BufferedWriter(new FileWriter(graphPath + Graph.PID_TO_NODE)); Writer nodeToSwhMap = new BufferedWriter(new FileWriter(graphPath + Graph.NODE_TO_PID))) { // nodeToSwhMap needs to write SWH PID in order of node id, so use a temporary array Object[][] nodeToSwhPID = ObjectBigArrays.newBigArray(nbIds); // To effectively run edge restriction during graph traversals, we store node id (long) -> SWH // type map. This is represented as a bitmap using minimum number of bits per Node.Type. final int log2NbTypes = (int) Math.ceil(Math.log(Node.Type.values().length) / Math.log(2)); final int nbBitsPerNodeType = log2NbTypes; LongArrayBitVector nodeTypesBitVector = LongArrayBitVector.ofLength(nbBitsPerNodeType * nbIds); LongBigList nodeTypesMap = nodeTypesBitVector.asLongBigList(nbBitsPerNodeType); for (long iNode = 0; iNode < nbIds && swhPIDIterator.hasNext(); iNode++) { String strSwhPID = swhPIDIterator.next().toString(); long mphId = mphMap.getLong(strSwhPID); long nodeId = LongBigArrays.get(bfsMap, mphId); String paddedNodeId = String.format("%0" + NodeIdMap.NODE_ID_LENGTH + "d", nodeId); String line = strSwhPID + " " + paddedNodeId + "\n"; swhToNodeMap.write(line); ObjectBigArrays.set(nodeToSwhPID, nodeId, strSwhPID); SwhPID swhPID = new SwhPID(strSwhPID); nodeTypesMap.set(nodeId, swhPID.getType().ordinal()); } BinIO.storeObject(nodeTypesMap, graphPath + Graph.NODE_TO_TYPE); for (long iNode = 0; iNode < nbIds; iNode++) { String line = ObjectBigArrays.get(nodeToSwhPID, iNode).toString() + "\n"; nodeToSwhMap.write(line); } } } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/benchmark/AccessEdge.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/AccessEdge.java index e0b281b..84cc498 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/benchmark/AccessEdge.java +++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/AccessEdge.java @@ -1,49 +1,47 @@ package org.softwareheritage.graph.benchmark; import java.io.IOException; import java.util.ArrayList; import com.martiansoftware.jsap.JSAPException; import it.unimi.dsi.big.webgraph.LazyLongIterator; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.benchmark.Benchmark; import org.softwareheritage.graph.benchmark.utils.Statistics; import org.softwareheritage.graph.benchmark.utils.Timing; /** * Benchmark to time edge access time. * * @author Thibault Allançon - * @version 0.0.1 - * @since 0.0.1 */ public class AccessEdge { /** * Main entrypoint. * * @param args command line arguments */ public static void main(String[] args) throws IOException, JSAPException { Benchmark bench = new Benchmark(); bench.parseCommandLineArgs(args); Graph graph = new Graph(bench.args.graphPath); long[] nodeIds = bench.args.random.generateNodeIds(graph, bench.args.nbNodes); ArrayList timings = new ArrayList<>(); for (long nodeId : nodeIds) { long startTime = Timing.start(); LazyLongIterator neighbors = graph.successors(nodeId); long firstNeighbor = neighbors.nextLong(); double duration = Timing.stop(startTime); timings.add(duration); } System.out.println("Used " + bench.args.nbNodes + " random edges (results are in seconds):"); Statistics stats = new Statistics(timings); stats.printAll(); } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/benchmark/Benchmark.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Benchmark.java index cc1c51b..bacf160 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/benchmark/Benchmark.java +++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Benchmark.java @@ -1,172 +1,170 @@ package org.softwareheritage.graph.benchmark; import java.io.BufferedWriter; import java.io.FileWriter; import java.io.IOException; import java.io.Writer; import java.util.ArrayList; import java.util.StringJoiner; import java.util.function.Function; import com.martiansoftware.jsap.FlaggedOption; import com.martiansoftware.jsap.JSAP; import com.martiansoftware.jsap.JSAPException; import com.martiansoftware.jsap.JSAPResult; import com.martiansoftware.jsap.Parameter; import com.martiansoftware.jsap.SimpleJSAP; import com.martiansoftware.jsap.UnflaggedOption; import org.softwareheritage.graph.Endpoint; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.SwhPID; import org.softwareheritage.graph.benchmark.utils.Random; import org.softwareheritage.graph.benchmark.utils.Statistics; /** * Benchmark common utility functions. * * @author Thibault Allançon - * @version 0.0.1 - * @since 0.0.1 */ public class Benchmark { /** * Input arguments. */ public class Args { /** Basename of the compressed graph */ public String graphPath; /** Number of random nodes to use for the benchmark */ public int nbNodes; /** File name for CSV format benchmark log */ public String logFile; /** Random generator */ public Random random; } /** Command line arguments */ public Args args; /** CSV separator for log file */ final String CSV_SEPARATOR = ";"; /** * Constructor. */ public Benchmark() { this.args = new Args(); } /** * Parses benchmark command line arguments. * * @param args command line arguments */ public void parseCommandLineArgs(String[] args) throws JSAPException { SimpleJSAP jsap = new SimpleJSAP(Benchmark.class.getName(), "Benchmark tool for Software Heritage use-cases scenarios.", new Parameter[] { new UnflaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The basename of the compressed graph."), new FlaggedOption("nbNodes", JSAP.INTEGER_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'n', "nb-nodes", "Number of random nodes used to do the benchmark."), new FlaggedOption("logFile", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'l', "log-file", "File name to output CSV format benchmark log."), new FlaggedOption("seed", JSAP.LONG_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 's', "seed", "Random generator seed."), }); JSAPResult config = jsap.parse(args); if (jsap.messagePrinted()) { System.exit(1); } this.args.graphPath = config.getString("graphPath"); this.args.nbNodes = config.getInt("nbNodes"); this.args.logFile = config.getString("logFile"); this.args.random = config.contains("seed") ? new Random(config.getLong("seed")) : new Random(); } /** * Creates CSV file for log output. */ public void createCSVLogFile() throws IOException { try (Writer csvLog = new BufferedWriter(new FileWriter(args.logFile))) { StringJoiner csvHeader = new StringJoiner(CSV_SEPARATOR); csvHeader.add("use case name") .add("SWH PID") .add("number of edges accessed") .add("traversal timing") .add("pid2node timing") .add("node2pid timing"); csvLog.write(csvHeader.toString() + "\n"); } } /** * Times a specific endpoint and outputs individual datapoints along with aggregated statistics. * * @param useCaseName benchmark use-case name * @param graph compressed graph used in the benchmark * @param nodeIds node ids to use as starting point for the endpoint traversal * @param operation endpoint function to benchmark * @param dstFmt destination formatted string as described in the API * @param algorithm traversal algorithm used in endpoint call (either "dfs" or "bfs") */ public void timeEndpoint(String useCaseName, Graph graph, long[] nodeIds, Function operation, String dstFmt, String algorithm) throws IOException { ArrayList timings = new ArrayList<>(); ArrayList timingsNormalized = new ArrayList<>(); ArrayList nbEdgesAccessed = new ArrayList<>(); final boolean append = true; try (Writer csvLog = new BufferedWriter(new FileWriter(args.logFile, append))) { for (long nodeId : nodeIds) { SwhPID swhPID = graph.getSwhPID(nodeId); Endpoint.Output output = (dstFmt == null) ? operation.apply(new Endpoint.Input(swhPID)) : operation.apply(new Endpoint.Input(swhPID, dstFmt, algorithm)); StringJoiner csvLine = new StringJoiner(CSV_SEPARATOR); csvLine.add(useCaseName) .add(swhPID.toString()) .add(Long.toString(output.meta.nbEdgesAccessed)) .add(Double.toString(output.meta.timings.traversal)) .add(Double.toString(output.meta.timings.pid2node)) .add(Double.toString(output.meta.timings.node2pid)); csvLog.write(csvLine.toString() + "\n"); timings.add(output.meta.timings.traversal); nbEdgesAccessed.add((double) output.meta.nbEdgesAccessed); if (output.meta.nbEdgesAccessed != 0) { timingsNormalized.add(output.meta.timings.traversal / output.meta.nbEdgesAccessed); } } } System.out.println("\n" + useCaseName + " use-case:"); System.out.println("timings:"); Statistics stats = new Statistics(timings); stats.printAll(); System.out.println("timings normalized:"); Statistics statsNormalized = new Statistics(timingsNormalized); statsNormalized.printAll(); System.out.println("nb edges accessed:"); Statistics statsNbEdgesAccessed = new Statistics(nbEdgesAccessed); statsNbEdgesAccessed.printAll(); } /** * Same as {@link timeEndpoint} but without destination or algorithm specified to endpoint call. */ public void timeEndpoint(String useCaseName, Graph graph, long[] nodeIds, Function operation) throws IOException { timeEndpoint(useCaseName, graph, nodeIds, operation, null, null); } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java index 92dd9f6..45f699e 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java +++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java @@ -1,48 +1,46 @@ package org.softwareheritage.graph.benchmark; import java.io.IOException; import com.martiansoftware.jsap.JSAPException; import org.softwareheritage.graph.Endpoint; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.Node; import org.softwareheritage.graph.benchmark.Benchmark; /** * Benchmark Software Heritage browsing * use-cases scenarios. * * @author Thibault Allançon - * @version 0.0.1 - * @since 0.0.1 */ public class Browsing { /** * Main entrypoint. * * @param args command line arguments */ public static void main(String[] args) throws IOException, JSAPException { Benchmark bench = new Benchmark(); bench.parseCommandLineArgs(args); Graph graph = new Graph(bench.args.graphPath); long[] dirNodeIds = bench.args.random.generateNodeIdsOfType(graph, bench.args.nbNodes, Node.Type.DIR); long[] revNodeIds = bench.args.random.generateNodeIdsOfType(graph, bench.args.nbNodes, Node.Type.REV); Endpoint dirEndpoint = new Endpoint(graph, "forward", "dir:cnt,dir:dir"); Endpoint revEndpoint = new Endpoint(graph, "forward", "rev:rev"); System.out.println("Used " + bench.args.nbNodes + " random nodes (results are in seconds):"); bench.createCSVLogFile(); bench.timeEndpoint("ls", graph, dirNodeIds, dirEndpoint::neighbors); bench.timeEndpoint("ls -R", graph, dirNodeIds, dirEndpoint::visitPaths); bench.timeEndpoint("git log", graph, revNodeIds, revEndpoint::visitNodes); } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/benchmark/Provenance.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Provenance.java index c2a3bf6..e53b4bf 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/benchmark/Provenance.java +++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Provenance.java @@ -1,55 +1,53 @@ package org.softwareheritage.graph.benchmark; import java.io.IOException; import com.martiansoftware.jsap.JSAPException; import org.softwareheritage.graph.Endpoint; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.benchmark.Benchmark; /** * Benchmark Software Heritage provenance * use-cases scenarios. * * @author Thibault Allançon - * @version 0.0.1 - * @since 0.0.1 */ public class Provenance { /** * Main entrypoint. * * @param args command line arguments */ public static void main(String[] args) throws IOException, JSAPException { Benchmark bench = new Benchmark(); bench.parseCommandLineArgs(args); Graph graph = new Graph(bench.args.graphPath); long[] nodeIds = bench.args.random.generateNodeIds(graph, bench.args.nbNodes); Endpoint commitProvenanceEndpoint = new Endpoint(graph, "backward", "dir:dir,cnt:dir,dir:rev"); Endpoint originProvenanceEndpoint = new Endpoint(graph, "backward", "*"); System.out.println("Used " + bench.args.nbNodes + " random nodes (results are in seconds):"); bench.createCSVLogFile(); bench.timeEndpoint( "commit provenance (dfs)", graph, nodeIds, commitProvenanceEndpoint::walk, "rev", "dfs"); bench.timeEndpoint( "commit provenance (bfs)", graph, nodeIds, commitProvenanceEndpoint::walk, "rev", "bfs"); bench.timeEndpoint( "complete commit provenance", graph, nodeIds, commitProvenanceEndpoint::leaves); bench.timeEndpoint( "origin provenance (dfs)", graph, nodeIds, originProvenanceEndpoint::walk, "ori", "dfs"); bench.timeEndpoint( "origin provenance (bfs)", graph, nodeIds, originProvenanceEndpoint::walk, "ori", "bfs"); bench.timeEndpoint( "complete origin provenance", graph, nodeIds, originProvenanceEndpoint::leaves); } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/benchmark/Vault.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Vault.java index 5d024c5..cd08b57 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/benchmark/Vault.java +++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Vault.java @@ -1,41 +1,39 @@ package org.softwareheritage.graph.benchmark; import java.io.IOException; import com.martiansoftware.jsap.JSAPException; import org.softwareheritage.graph.Endpoint; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.benchmark.Benchmark; /** * Benchmark Software Heritage vault * use-case scenario. * * @author Thibault Allançon - * @version 0.0.1 - * @since 0.0.1 */ public class Vault { /** * Main entrypoint. * * @param args command line arguments */ public static void main(String[] args) throws IOException, JSAPException { Benchmark bench = new Benchmark(); bench.parseCommandLineArgs(args); Graph graph = new Graph(bench.args.graphPath); long[] nodeIds = bench.args.random.generateNodeIds(graph, bench.args.nbNodes); Endpoint endpoint = new Endpoint(graph, "forward", "*"); System.out.println("Used " + bench.args.nbNodes + " random nodes (results are in seconds):"); bench.createCSVLogFile(); bench.timeEndpoint("git bundle", graph, nodeIds, endpoint::visitNodes); } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/benchmark/utils/Random.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/utils/Random.java index 2a6e17e..633a8ee 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/benchmark/utils/Random.java +++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/utils/Random.java @@ -1,69 +1,67 @@ package org.softwareheritage.graph.benchmark.utils; import java.util.PrimitiveIterator; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.Node; /** * Random related utility class. * * @author Thibault Allançon - * @version 0.0.1 - * @since 0.0.1 */ public class Random { /** Internal pseudorandom generator */ java.util.Random random; /** * Constructor. */ public Random() { this.random = new java.util.Random(); } /** * Constructor. * * @param seed random generator seed */ public Random(long seed) { this.random = new java.util.Random(seed); } /** * Generates random node ids. * * @param graph graph used to pick node ids * @param nbNodes number of node ids to generate * @return an array of random node ids */ public long[] generateNodeIds(Graph graph, int nbNodes) { return random.longs(nbNodes, 0, graph.getNbNodes()).toArray(); } /** * Generates random node ids with a specific type. * * @param graph graph used to pick node ids * @param nbNodes number of node ids to generate * @param expectedType specific node type to pick * @return an array of random node ids */ public long[] generateNodeIdsOfType(Graph graph, int nbNodes, Node.Type expectedType) { PrimitiveIterator.OfLong nodes = random.longs(0, graph.getNbNodes()).iterator(); long[] nodeIds = new long[nbNodes]; long nextId; for (int i = 0; i < nbNodes; i++) { do { nextId = nodes.nextLong(); } while (graph.getNodeType(nextId) != expectedType); nodeIds[i] = nextId; } return nodeIds; } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/benchmark/utils/Statistics.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/utils/Statistics.java index 61f12fb..33e341e 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/benchmark/utils/Statistics.java +++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/utils/Statistics.java @@ -1,106 +1,104 @@ package org.softwareheritage.graph.benchmark.utils; import java.util.ArrayList; import java.util.Collections; /** * Compute various statistics on a list of values. * * @author Thibault Allançon - * @version 0.0.1 - * @since 0.0.1 */ public class Statistics { /** Input values */ ArrayList values; /** * Constructor. * * @param values input values */ public Statistics(ArrayList values) { this.values = values; } /** * Returns the minimum value. * * @return minimum value */ public double getMin() { double min = Double.POSITIVE_INFINITY; for (double v : values) { min = Math.min(min, v); } return min; } /** * Returns the maximum value. * * @return maximum value */ public double getMax() { double max = Double.NEGATIVE_INFINITY; for (double v : values) { max = Math.max(max, v); } return max; } /** * Computes the average. * * @return average value */ public double getAverage() { double sum = 0; for (double v : values) { sum += v; } return sum / (double) values.size(); } /** * Returns the median value. * * @return median value */ public double getMedian() { Collections.sort(values); int length = values.size(); if (length % 2 == 0) { return (values.get(length / 2) + values.get(length / 2 - 1)) / 2; } else { return values.get(length / 2); } } /** * Computes the standard deviation. * * @return standard deviation value */ public double getStandardDeviation() { double average = getAverage(); double variance = 0; for (double v : values) { variance += (v - average) * (v - average); } variance /= (double) values.size(); return Math.sqrt(variance); } /** * Computes and prints all statistical values. */ public void printAll() { System.out.println("min value: " + getMin()); System.out.println("max value: " + getMax()); System.out.println("average: " + getAverage()); System.out.println("median: " + getMedian()); System.out.println("standard deviation: " + getStandardDeviation()); } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/benchmark/utils/Timing.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/utils/Timing.java index fea64a2..f07b9f0 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/benchmark/utils/Timing.java +++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/utils/Timing.java @@ -1,32 +1,30 @@ package org.softwareheritage.graph.benchmark.utils; /** * Time measurement utility class. * * @author Thibault Allançon - * @version 0.0.1 - * @since 0.0.1 */ public class Timing { /** * Returns measurement starting timestamp. * * @return timestamp used for time measurement */ public static long start() { return System.nanoTime(); } /** * Ends timing measurement and returns total duration in seconds. * * @param startTime measurement starting timestamp * @return time in seconds elapsed since starting point */ public static double stop(long startTime) { long endTime = System.nanoTime(); double duration = (double) (endTime - startTime) / 1_000_000_000; return duration; } } diff --git a/swh/graph/tests/__init__.py b/swh/graph/tests/__init__.py index 8419e91..44e0c37 100644 --- a/swh/graph/tests/__init__.py +++ b/swh/graph/tests/__init__.py @@ -1 +1 @@ -SWH_GRAPH_VERSION = '0.0.1' +SWH_GRAPH_VERSION = '0.0.2'