diff --git a/java/server/src/main/java/org/softwareheritage/graph/AllowedEdges.java b/java/server/src/main/java/org/softwareheritage/graph/AllowedEdges.java index b6be1c2..7d71619 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/AllowedEdges.java +++ b/java/server/src/main/java/org/softwareheritage/graph/AllowedEdges.java @@ -1,91 +1,91 @@ package org.softwareheritage.graph; import java.util.ArrayList; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.Node; /** * Edge restriction based on node types, used when visiting the graph. *

* Software Heritage * graph contains multiple node types (contents, directories, revisions, ...) and restricting * the traversal to specific node types is necessary for many querying operations: use cases. * * @author Thibault Allançon */ public class AllowedEdges { - /** Graph on which edge restriction is performed */ - Graph graph; - /** - * 2D boolean matrix storing access rights for all combination of src/dst node types (first - * dimension is source, second dimension is destination), when edge restriction is not enforced - * this array is set to null for early bypass. - */ - public boolean[][] restrictedTo; + /** Graph on which edge restriction is performed */ + Graph graph; + /** + * 2D boolean matrix storing access rights for all combination of src/dst node types (first + * dimension is source, second dimension is destination), when edge restriction is not enforced + * this array is set to null for early bypass. + */ + public boolean[][] restrictedTo; - /** - * Constructor. - * - * @param graph the graph on which to perform edge restriction - * @param edgesFmt a formatted string describing allowed edges - */ - public AllowedEdges(Graph graph, String edgesFmt) { - this.graph = graph; + /** + * Constructor. + * + * @param graph the graph on which to perform edge restriction + * @param edgesFmt a formatted string describing allowed edges + */ + public AllowedEdges(Graph graph, String edgesFmt) { + this.graph = graph; - int nbNodeTypes = Node.Type.values().length; - this.restrictedTo = new boolean[nbNodeTypes][nbNodeTypes]; - // Special values (null, empty, "*") - if (edgesFmt == null || edgesFmt.isEmpty()) { - return; - } - if (edgesFmt.equals("*")) { - // Allows for quick bypass (with simple null check) when no edge restriction - restrictedTo = null; - return; - } + int nbNodeTypes = Node.Type.values().length; + this.restrictedTo = new boolean[nbNodeTypes][nbNodeTypes]; + // Special values (null, empty, "*") + if (edgesFmt == null || edgesFmt.isEmpty()) { + return; + } + if (edgesFmt.equals("*")) { + // Allows for quick bypass (with simple null check) when no edge restriction + restrictedTo = null; + return; + } - // Format: "src1:dst1,src2:dst2,[...]" - String[] edgeTypes = edgesFmt.split(","); - for (String edgeType : edgeTypes) { - String[] nodeTypes = edgeType.split(":"); - if (nodeTypes.length != 2) { - throw new IllegalArgumentException("Cannot parse edge type: " + edgeType); - } + // Format: "src1:dst1,src2:dst2,[...]" + String[] edgeTypes = edgesFmt.split(","); + for (String edgeType : edgeTypes) { + String[] nodeTypes = edgeType.split(":"); + if (nodeTypes.length != 2) { + throw new IllegalArgumentException("Cannot parse edge type: " + edgeType); + } - ArrayList srcTypes = Node.Type.parse(nodeTypes[0]); - ArrayList dstTypes = Node.Type.parse(nodeTypes[1]); - for (Node.Type srcType : srcTypes) { - for (Node.Type dstType : dstTypes) { - restrictedTo[srcType.ordinal()][dstType.ordinal()] = true; + ArrayList srcTypes = Node.Type.parse(nodeTypes[0]); + ArrayList dstTypes = Node.Type.parse(nodeTypes[1]); + for (Node.Type srcType : srcTypes) { + for (Node.Type dstType : dstTypes) { + restrictedTo[srcType.ordinal()][dstType.ordinal()] = true; + } + } } - } } - } - /** - * Checks if a given edge can be followed during graph traversal. - * - * @param srcNodeId edge source node - * @param dstNodeId edge destination node - * @return true if allowed and false otherwise - */ - public boolean isAllowed(long srcNodeId, long dstNodeId) { - Node.Type srcType = graph.getNodeType(srcNodeId); - Node.Type dstType = graph.getNodeType(dstNodeId); - return restrictedTo[srcType.ordinal()][dstType.ordinal()]; - } + /** + * Checks if a given edge can be followed during graph traversal. + * + * @param srcNodeId edge source node + * @param dstNodeId edge destination node + * @return true if allowed and false otherwise + */ + public boolean isAllowed(long srcNodeId, long dstNodeId) { + Node.Type srcType = graph.getNodeType(srcNodeId); + Node.Type dstType = graph.getNodeType(dstNodeId); + return restrictedTo[srcType.ordinal()][dstType.ordinal()]; + } - /** - * Works like {@link AllowedEdges#isAllowed(long, long)} but with node types directly instead of - * node ids. - * - * @see AllowedEdges#isAllowed(long, long) - */ - public boolean isAllowed(Node.Type srcType, Node.Type dstType) { - return restrictedTo[srcType.ordinal()][dstType.ordinal()]; - } + /** + * Works like {@link AllowedEdges#isAllowed(long, long)} but with node types directly instead of + * node ids. + * + * @see AllowedEdges#isAllowed(long, long) + */ + public boolean isAllowed(Node.Type srcType, Node.Type dstType) { + return restrictedTo[srcType.ordinal()][dstType.ordinal()]; + } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/App.java b/java/server/src/main/java/org/softwareheritage/graph/App.java index c22fb30..c0821b7 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/App.java +++ b/java/server/src/main/java/org/softwareheritage/graph/App.java @@ -1,195 +1,195 @@ package org.softwareheritage.graph; import java.io.IOException; import java.util.List; import java.util.Map; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.PropertyNamingStrategy; import com.martiansoftware.jsap.FlaggedOption; import com.martiansoftware.jsap.JSAP; import com.martiansoftware.jsap.JSAPException; import com.martiansoftware.jsap.JSAPResult; import com.martiansoftware.jsap.Parameter; import com.martiansoftware.jsap.SimpleJSAP; import com.martiansoftware.jsap.Switch; import com.martiansoftware.jsap.UnflaggedOption; import io.javalin.Javalin; import io.javalin.http.Context; import io.javalin.plugin.json.JavalinJackson; import org.softwareheritage.graph.Endpoint; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.SwhPID; import org.softwareheritage.graph.algo.Stats; /** * Web framework of the swh-graph server REST API. * * @author Thibault Allançon */ public class App { - /** - * Main entrypoint. - * - * @param args command line arguments - */ - public static void main(String[] args) throws IOException, JSAPException { - SimpleJSAP jsap = new SimpleJSAP(App.class.getName(), - "Server to load and query a compressed graph representation of Software Heritage archive.", - new Parameter[] { - new FlaggedOption("port", JSAP.INTEGER_PARSER, "5009", JSAP.NOT_REQUIRED, 'p', "port", - "Binding port of the server."), - new UnflaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, - JSAP.NOT_GREEDY, "The basename of the compressed graph."), - new Switch("timings", 't', "timings", "Show timings in API result metadata."), + /** + * Main entrypoint. + * + * @param args command line arguments + */ + public static void main(String[] args) throws IOException, JSAPException { + SimpleJSAP jsap = new SimpleJSAP(App.class.getName(), + "Server to load and query a compressed graph representation of Software Heritage archive.", + new Parameter[] { + new FlaggedOption("port", JSAP.INTEGER_PARSER, "5009", JSAP.NOT_REQUIRED, 'p', "port", + "Binding port of the server."), + new UnflaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, + JSAP.NOT_GREEDY, "The basename of the compressed graph."), + new Switch("timings", 't', "timings", "Show timings in API result metadata."), + }); + + JSAPResult config = jsap.parse(args); + if (jsap.messagePrinted()) { + System.exit(1); + } + + String graphPath = config.getString("graphPath"); + int port = config.getInt("port"); + boolean showTimings = config.getBoolean("timings"); + + startServer(graphPath, port, showTimings); + } + + /** + * Loads compressed graph and starts the web server to query it. + * + * @param graphPath basename of the compressed graph + * @param port binding port of the server + * @param showTimings true if timings should be in results metadata, false otherwise + */ + private static void startServer(String graphPath, int port, boolean showTimings) + throws IOException { + Graph graph = new Graph(graphPath); + Stats stats = new Stats(graphPath); + + // Clean up on exit + Runtime.getRuntime().addShutdownHook(new Thread() { + public void run() { + try { + graph.cleanUp(); + } catch (IOException e) { + System.out.println("Could not clean up graph on exit: " + e); + } + } + }); + + // Configure Jackson JSON to use snake case naming style + ObjectMapper objectMapper = JavalinJackson.getObjectMapper(); + objectMapper.setPropertyNamingStrategy(PropertyNamingStrategy.SNAKE_CASE); + JavalinJackson.configure(objectMapper); + + Javalin app = Javalin.create().start(port); + + app.before("/stats/*", ctx -> { checkQueryStrings(ctx, ""); }); + app.before("/leaves/*", ctx -> { checkQueryStrings(ctx, "direction|edges"); }); + app.before("/neighbors/*", ctx -> { checkQueryStrings(ctx, "direction|edges"); }); + app.before("/visit/*", ctx -> { checkQueryStrings(ctx, "direction|edges"); }); + app.before("/walk/*", ctx -> { checkQueryStrings(ctx, "direction|edges|traversal"); }); + + app.get("/stats/", ctx -> { ctx.json(stats); }); + + // Graph traversal endpoints + // By default the traversal is a forward DFS using all edges + + app.get("/leaves/:src", ctx -> { + SwhPID src = new SwhPID(ctx.pathParam("src")); + String direction = ctx.queryParam("direction", "forward"); + String edgesFmt = ctx.queryParam("edges", "*"); + + Endpoint endpoint = new Endpoint(graph, direction, edgesFmt); + Endpoint.Output output = endpoint.leaves(new Endpoint.Input(src)); + ctx.json(formatEndpointOutput(output, showTimings)); + }); + + app.get("/neighbors/:src", ctx -> { + SwhPID src = new SwhPID(ctx.pathParam("src")); + String direction = ctx.queryParam("direction", "forward"); + String edgesFmt = ctx.queryParam("edges", "*"); + + Endpoint endpoint = new Endpoint(graph, direction, edgesFmt); + Endpoint.Output output = endpoint.neighbors(new Endpoint.Input(src)); + ctx.json(formatEndpointOutput(output, showTimings)); + }); + + app.get("/visit/nodes/:src", ctx -> { + SwhPID src = new SwhPID(ctx.pathParam("src")); + String direction = ctx.queryParam("direction", "forward"); + String edgesFmt = ctx.queryParam("edges", "*"); + + Endpoint endpoint = new Endpoint(graph, direction, edgesFmt); + Endpoint.Output output = endpoint.visitNodes(new Endpoint.Input(src)); + ctx.json(formatEndpointOutput(output, showTimings)); + }); + + app.get("/visit/paths/:src", ctx -> { + SwhPID src = new SwhPID(ctx.pathParam("src")); + String direction = ctx.queryParam("direction", "forward"); + String edgesFmt = ctx.queryParam("edges", "*"); + + Endpoint endpoint = new Endpoint(graph, direction, edgesFmt); + Endpoint.Output output = endpoint.visitPaths(new Endpoint.Input(src)); + ctx.json(formatEndpointOutput(output, showTimings)); + }); + + app.get("/walk/:src/:dst", ctx -> { + SwhPID src = new SwhPID(ctx.pathParam("src")); + String dstFmt = ctx.pathParam("dst"); + String direction = ctx.queryParam("direction", "forward"); + String edgesFmt = ctx.queryParam("edges", "*"); + String algorithm = ctx.queryParam("traversal", "dfs"); + + Endpoint endpoint = new Endpoint(graph, direction, edgesFmt); + Endpoint.Output output = endpoint.walk(new Endpoint.Input(src, dstFmt, algorithm)); + ctx.json(formatEndpointOutput(output, showTimings)); }); - JSAPResult config = jsap.parse(args); - if (jsap.messagePrinted()) { - System.exit(1); + app.exception(IllegalArgumentException.class, (e, ctx) -> { + ctx.status(400); + ctx.result(e.getMessage()); + }); } - String graphPath = config.getString("graphPath"); - int port = config.getInt("port"); - boolean showTimings = config.getBoolean("timings"); - - startServer(graphPath, port, showTimings); - } - - /** - * Loads compressed graph and starts the web server to query it. - * - * @param graphPath basename of the compressed graph - * @param port binding port of the server - * @param showTimings true if timings should be in results metadata, false otherwise - */ - private static void startServer(String graphPath, int port, boolean showTimings) - throws IOException { - Graph graph = new Graph(graphPath); - Stats stats = new Stats(graphPath); - - // Clean up on exit - Runtime.getRuntime().addShutdownHook(new Thread() { - public void run() { - try { - graph.cleanUp(); - } catch (IOException e) { - System.out.println("Could not clean up graph on exit: " + e); + /** + * Checks query strings names provided to the REST API. + * + * @param ctx Javalin HTTP request context + * @param allowedFmt a regular expression describing allowed query strings names + * @throws IllegalArgumentException unknown query string provided + */ + private static void checkQueryStrings(Context ctx, String allowedFmt) { + Map> queryParamMap = ctx.queryParamMap(); + for (String key : queryParamMap.keySet()) { + if (!key.matches(allowedFmt)) { + throw new IllegalArgumentException("Unknown query string: " + key); + } } - } - }); - - // Configure Jackson JSON to use snake case naming style - ObjectMapper objectMapper = JavalinJackson.getObjectMapper(); - objectMapper.setPropertyNamingStrategy(PropertyNamingStrategy.SNAKE_CASE); - JavalinJackson.configure(objectMapper); - - Javalin app = Javalin.create().start(port); - - app.before("/stats/*", ctx -> { checkQueryStrings(ctx, ""); }); - app.before("/leaves/*", ctx -> { checkQueryStrings(ctx, "direction|edges"); }); - app.before("/neighbors/*", ctx -> { checkQueryStrings(ctx, "direction|edges"); }); - app.before("/visit/*", ctx -> { checkQueryStrings(ctx, "direction|edges"); }); - app.before("/walk/*", ctx -> { checkQueryStrings(ctx, "direction|edges|traversal"); }); - - app.get("/stats/", ctx -> { ctx.json(stats); }); - - // Graph traversal endpoints - // By default the traversal is a forward DFS using all edges - - app.get("/leaves/:src", ctx -> { - SwhPID src = new SwhPID(ctx.pathParam("src")); - String direction = ctx.queryParam("direction", "forward"); - String edgesFmt = ctx.queryParam("edges", "*"); - - Endpoint endpoint = new Endpoint(graph, direction, edgesFmt); - Endpoint.Output output = endpoint.leaves(new Endpoint.Input(src)); - ctx.json(formatEndpointOutput(output, showTimings)); - }); - - app.get("/neighbors/:src", ctx -> { - SwhPID src = new SwhPID(ctx.pathParam("src")); - String direction = ctx.queryParam("direction", "forward"); - String edgesFmt = ctx.queryParam("edges", "*"); - - Endpoint endpoint = new Endpoint(graph, direction, edgesFmt); - Endpoint.Output output = endpoint.neighbors(new Endpoint.Input(src)); - ctx.json(formatEndpointOutput(output, showTimings)); - }); - - app.get("/visit/nodes/:src", ctx -> { - SwhPID src = new SwhPID(ctx.pathParam("src")); - String direction = ctx.queryParam("direction", "forward"); - String edgesFmt = ctx.queryParam("edges", "*"); - - Endpoint endpoint = new Endpoint(graph, direction, edgesFmt); - Endpoint.Output output = endpoint.visitNodes(new Endpoint.Input(src)); - ctx.json(formatEndpointOutput(output, showTimings)); - }); - - app.get("/visit/paths/:src", ctx -> { - SwhPID src = new SwhPID(ctx.pathParam("src")); - String direction = ctx.queryParam("direction", "forward"); - String edgesFmt = ctx.queryParam("edges", "*"); - - Endpoint endpoint = new Endpoint(graph, direction, edgesFmt); - Endpoint.Output output = endpoint.visitPaths(new Endpoint.Input(src)); - ctx.json(formatEndpointOutput(output, showTimings)); - }); - - app.get("/walk/:src/:dst", ctx -> { - SwhPID src = new SwhPID(ctx.pathParam("src")); - String dstFmt = ctx.pathParam("dst"); - String direction = ctx.queryParam("direction", "forward"); - String edgesFmt = ctx.queryParam("edges", "*"); - String algorithm = ctx.queryParam("traversal", "dfs"); - - Endpoint endpoint = new Endpoint(graph, direction, edgesFmt); - Endpoint.Output output = endpoint.walk(new Endpoint.Input(src, dstFmt, algorithm)); - ctx.json(formatEndpointOutput(output, showTimings)); - }); - - app.exception(IllegalArgumentException.class, (e, ctx) -> { - ctx.status(400); - ctx.result(e.getMessage()); - }); - } - - /** - * Checks query strings names provided to the REST API. - * - * @param ctx Javalin HTTP request context - * @param allowedFmt a regular expression describing allowed query strings names - * @throws IllegalArgumentException unknown query string provided - */ - private static void checkQueryStrings(Context ctx, String allowedFmt) { - Map> queryParamMap = ctx.queryParamMap(); - for (String key : queryParamMap.keySet()) { - if (!key.matches(allowedFmt)) { - throw new IllegalArgumentException("Unknown query string: " + key); - } } - } - - /** - * Formats endpoint result into final JSON for the REST API. - *

- * Removes unwanted information if necessary, such as timings (to prevent use of side channels - * attacks). - * - * @param output endpoint operation output which needs formatting - * @param showTimings true if timings should be in results metadata, false otherwise - * @return final Object with desired JSON format - */ - private static Object formatEndpointOutput(Endpoint.Output output, boolean showTimings) { - if (showTimings) { - return output; - } else { - Map metaNoTimings = Map.of("nb_edges_accessed", output.meta.nbEdgesAccessed); - Map outputNoTimings = Map.of("result", output.result, "meta", metaNoTimings); - return outputNoTimings; + + /** + * Formats endpoint result into final JSON for the REST API. + *

+ * Removes unwanted information if necessary, such as timings (to prevent use of side channels + * attacks). + * + * @param output endpoint operation output which needs formatting + * @param showTimings true if timings should be in results metadata, false otherwise + * @return final Object with desired JSON format + */ + private static Object formatEndpointOutput(Endpoint.Output output, boolean showTimings) { + if (showTimings) { + return output; + } else { + Map metaNoTimings = Map.of("nb_edges_accessed", output.meta.nbEdgesAccessed); + Map outputNoTimings = Map.of("result", output.result, "meta", metaNoTimings); + return outputNoTimings; + } } - } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/Endpoint.java b/java/server/src/main/java/org/softwareheritage/graph/Endpoint.java index 078fc76..1a83e80 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/Endpoint.java +++ b/java/server/src/main/java/org/softwareheritage/graph/Endpoint.java @@ -1,311 +1,311 @@ package org.softwareheritage.graph; import java.util.ArrayList; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.SwhPID; import org.softwareheritage.graph.SwhPath; import org.softwareheritage.graph.algo.Traversal; import org.softwareheritage.graph.benchmark.utils.Timing; /** * REST API endpoints wrapper functions. *

* Graph operations are segmented between high-level class (this one) and the low-level class * ({@link Traversal}). The {@link Endpoint} class creates wrappers for each endpoints by performing * all the input/output node ids conversions and logging timings. * * @author Thibault Allançon * @see org.softwareheritage.graph.algo.Traversal */ public class Endpoint { - /** - * Wrapper class to unify traversal methods input signatures. - */ - public static class Input { - /** Source node of endpoint call specified as a {@link SwhPID} */ - public SwhPID src; /** - * Destination formatted string as described in the API + * Wrapper class to unify traversal methods input signatures. */ - public String dstFmt; - /** Traversal algorithm used in endpoint call (either "dfs" or "bfs") */ - public String algorithm; + public static class Input { + /** Source node of endpoint call specified as a {@link SwhPID} */ + public SwhPID src; + /** + * Destination formatted string as described in the API + */ + public String dstFmt; + /** Traversal algorithm used in endpoint call (either "dfs" or "bfs") */ + public String algorithm; + + public Input(SwhPID src) { + this.src = src; + } + + public Input(SwhPID src, String dstFmt, String algorithm) { + this.src = src; + this.dstFmt = dstFmt; + this.algorithm = algorithm; + } + } - public Input(SwhPID src) { - this.src = src; + /** + * Wrapper class to return both the endpoint result and metadata (such as timings). + */ + public static class Output { + /** The result content itself */ + public T result; + /** Various metadata about the result */ + public Meta meta; + + public Output() { + this.result = null; + this.meta = new Meta(); + } + + /** + * Endpoint result metadata. + */ + public class Meta { + /** Operations timings */ + public Timings timings; + /** Number of edges accessed during traversal */ + public long nbEdgesAccessed; + + public Meta() { + this.timings = new Timings(); + this.nbEdgesAccessed = 0; + } + + /** + * Wrapper class for JSON output format. + */ + public class Timings { + /** Time in seconds to do the traversal */ + public double traversal; + /** Time in seconds to convert input SWH PID to node id */ + public double pid2node; + /** Time in seconds to convert output node ids to SWH PIDs */ + public double node2pid; + } + } } - public Input(SwhPID src, String dstFmt, String algorithm) { - this.src = src; - this.dstFmt = dstFmt; - this.algorithm = algorithm; + /** Graph where traversal endpoint is performed */ + Graph graph; + /** Internal traversal API */ + Traversal traversal; + + /** + * Constructor. + * + * @param graph the graph used for traversal endpoint + * @param direction a string (either "forward" or "backward") specifying edge orientation + * @param edgesFmt a formatted string describing allowed edges + */ + public Endpoint(Graph graph, String direction, String edgesFmt) { + this.graph = graph; + this.traversal = new Traversal(graph, direction, edgesFmt); } - } - - /** - * Wrapper class to return both the endpoint result and metadata (such as timings). - */ - public static class Output { - /** The result content itself */ - public T result; - /** Various metadata about the result */ - public Meta meta; - - public Output() { - this.result = null; - this.meta = new Meta(); + + /** + * Converts a list of (internal) long node ids to a list of corresponding (external) SWH PIDs. + * + * @param nodeIds the list of long node ids + * @return a list of corresponding SWH PIDs + */ + private ArrayList convertNodesToSwhPIDs(ArrayList nodeIds) { + ArrayList swhPIDs = new ArrayList<>(); + for (long nodeId : nodeIds) { + swhPIDs.add(graph.getSwhPID(nodeId)); + } + return swhPIDs; } /** - * Endpoint result metadata. + * Converts a list of (internal) long node ids to the corresponding {@link SwhPath}. + * + * @param nodeIds the list of long node ids + * @return the corresponding {@link SwhPath} + * @see org.softwareheritage.graph.SwhPath */ - public class Meta { - /** Operations timings */ - public Timings timings; - /** Number of edges accessed during traversal */ - public long nbEdgesAccessed; - - public Meta() { - this.timings = new Timings(); - this.nbEdgesAccessed = 0; - } - - /** - * Wrapper class for JSON output format. - */ - public class Timings { - /** Time in seconds to do the traversal */ - public double traversal; - /** Time in seconds to convert input SWH PID to node id */ - public double pid2node; - /** Time in seconds to convert output node ids to SWH PIDs */ - public double node2pid; - } + private SwhPath convertNodesToSwhPath(ArrayList nodeIds) { + SwhPath path = new SwhPath(); + for (long nodeId : nodeIds) { + path.add(graph.getSwhPID(nodeId)); + } + return path; } - } - - /** Graph where traversal endpoint is performed */ - Graph graph; - /** Internal traversal API */ - Traversal traversal; - - /** - * Constructor. - * - * @param graph the graph used for traversal endpoint - * @param direction a string (either "forward" or "backward") specifying edge orientation - * @param edgesFmt a formatted string describing allowed edges - */ - public Endpoint(Graph graph, String direction, String edgesFmt) { - this.graph = graph; - this.traversal = new Traversal(graph, direction, edgesFmt); - } - - /** - * Converts a list of (internal) long node ids to a list of corresponding (external) SWH PIDs. - * - * @param nodeIds the list of long node ids - * @return a list of corresponding SWH PIDs - */ - private ArrayList convertNodesToSwhPIDs(ArrayList nodeIds) { - ArrayList swhPIDs = new ArrayList<>(); - for (long nodeId : nodeIds) { - swhPIDs.add(graph.getSwhPID(nodeId)); + + /** + * Converts a list of paths made of (internal) long node ids to one made of {@link SwhPath}-s. + * + * @param pathsNodeId the list of paths with long node ids + * @return a list of corresponding {@link SwhPath} + * @see org.softwareheritage.graph.SwhPath + */ + private ArrayList convertPathsToSwhPIDs(ArrayList> pathsNodeId) { + ArrayList paths = new ArrayList<>(); + for (ArrayList path : pathsNodeId) { + paths.add(convertNodesToSwhPath(path)); + } + return paths; } - return swhPIDs; - } - - /** - * Converts a list of (internal) long node ids to the corresponding {@link SwhPath}. - * - * @param nodeIds the list of long node ids - * @return the corresponding {@link SwhPath} - * @see org.softwareheritage.graph.SwhPath - */ - private SwhPath convertNodesToSwhPath(ArrayList nodeIds) { - SwhPath path = new SwhPath(); - for (long nodeId : nodeIds) { - path.add(graph.getSwhPID(nodeId)); + + /** + * Leaves endpoint wrapper. + * + * @param input input parameters for the underlying endpoint call + * @return the resulting list of {@link SwhPID} from endpoint call and operation metadata + * @see org.softwareheritage.graph.SwhPID + * @see org.softwareheritage.graph.algo.Traversal#leaves(long) + */ + public Output leaves(Input input) { + Output> output = new Output<>(); + long startTime; + + startTime = Timing.start(); + long srcNodeId = graph.getNodeId(input.src); + output.meta.timings.pid2node = Timing.stop(startTime); + + startTime = Timing.start(); + ArrayList nodeIds = traversal.leaves(srcNodeId); + output.meta.timings.traversal = Timing.stop(startTime); + output.meta.nbEdgesAccessed = traversal.getNbEdgesAccessed(); + + startTime = Timing.start(); + output.result = convertNodesToSwhPIDs(nodeIds); + output.meta.timings.node2pid = Timing.stop(startTime); + + return output; } - return path; - } - - /** - * Converts a list of paths made of (internal) long node ids to one made of {@link SwhPath}-s. - * - * @param pathsNodeId the list of paths with long node ids - * @return a list of corresponding {@link SwhPath} - * @see org.softwareheritage.graph.SwhPath - */ - private ArrayList convertPathsToSwhPIDs(ArrayList> pathsNodeId) { - ArrayList paths = new ArrayList<>(); - for (ArrayList path : pathsNodeId) { - paths.add(convertNodesToSwhPath(path)); + + /** + * Neighbors endpoint wrapper. + * + * @param input input parameters for the underlying endpoint call + * @return the resulting list of {@link SwhPID} from endpoint call and operation metadata + * @see org.softwareheritage.graph.SwhPID + * @see org.softwareheritage.graph.algo.Traversal#neighbors(long) + */ + public Output neighbors(Input input) { + Output> output = new Output<>(); + long startTime; + + startTime = Timing.start(); + long srcNodeId = graph.getNodeId(input.src); + output.meta.timings.pid2node = Timing.stop(startTime); + + startTime = Timing.start(); + ArrayList nodeIds = traversal.neighbors(srcNodeId); + output.meta.timings.traversal = Timing.stop(startTime); + output.meta.nbEdgesAccessed = traversal.getNbEdgesAccessed(); + + startTime = Timing.start(); + output.result = convertNodesToSwhPIDs(nodeIds); + output.meta.timings.node2pid = Timing.stop(startTime); + + return output; } - return paths; - } - - /** - * Leaves endpoint wrapper. - * - * @param input input parameters for the underlying endpoint call - * @return the resulting list of {@link SwhPID} from endpoint call and operation metadata - * @see org.softwareheritage.graph.SwhPID - * @see org.softwareheritage.graph.algo.Traversal#leaves(long) - */ - public Output leaves(Input input) { - Output> output = new Output<>(); - long startTime; - - startTime = Timing.start(); - long srcNodeId = graph.getNodeId(input.src); - output.meta.timings.pid2node = Timing.stop(startTime); - - startTime = Timing.start(); - ArrayList nodeIds = traversal.leaves(srcNodeId); - output.meta.timings.traversal = Timing.stop(startTime); - output.meta.nbEdgesAccessed = traversal.getNbEdgesAccessed(); - - startTime = Timing.start(); - output.result = convertNodesToSwhPIDs(nodeIds); - output.meta.timings.node2pid = Timing.stop(startTime); - - return output; - } - - /** - * Neighbors endpoint wrapper. - * - * @param input input parameters for the underlying endpoint call - * @return the resulting list of {@link SwhPID} from endpoint call and operation metadata - * @see org.softwareheritage.graph.SwhPID - * @see org.softwareheritage.graph.algo.Traversal#neighbors(long) - */ - public Output neighbors(Input input) { - Output> output = new Output<>(); - long startTime; - - startTime = Timing.start(); - long srcNodeId = graph.getNodeId(input.src); - output.meta.timings.pid2node = Timing.stop(startTime); - - startTime = Timing.start(); - ArrayList nodeIds = traversal.neighbors(srcNodeId); - output.meta.timings.traversal = Timing.stop(startTime); - output.meta.nbEdgesAccessed = traversal.getNbEdgesAccessed(); - - startTime = Timing.start(); - output.result = convertNodesToSwhPIDs(nodeIds); - output.meta.timings.node2pid = Timing.stop(startTime); - - return output; - } - - /** - * Walk endpoint wrapper. - * - * @param input input parameters for the underlying endpoint call - * @return the resulting {@link SwhPath} from endpoint call and operation metadata - * @see org.softwareheritage.graph.SwhPID - * @see org.softwareheritage.graph.SwhPath - * @see org.softwareheritage.graph.algo.Traversal#walk - */ - public Output walk(Input input) { - Output output = new Output<>(); - long startTime; - - startTime = Timing.start(); - long srcNodeId = graph.getNodeId(input.src); - output.meta.timings.pid2node = Timing.stop(startTime); - - ArrayList nodeIds = new ArrayList(); - - // Destination is either a SWH PID or a node type - try { - SwhPID dstSwhPID = new SwhPID(input.dstFmt); - long dstNodeId = graph.getNodeId(dstSwhPID); - - startTime = Timing.start(); - nodeIds = traversal.walk(srcNodeId, dstNodeId, input.algorithm); - output.meta.timings.traversal = Timing.stop(startTime); - } catch (IllegalArgumentException ignored1) { - try { - Node.Type dstType = Node.Type.fromStr(input.dstFmt); + + /** + * Walk endpoint wrapper. + * + * @param input input parameters for the underlying endpoint call + * @return the resulting {@link SwhPath} from endpoint call and operation metadata + * @see org.softwareheritage.graph.SwhPID + * @see org.softwareheritage.graph.SwhPath + * @see org.softwareheritage.graph.algo.Traversal#walk + */ + public Output walk(Input input) { + Output output = new Output<>(); + long startTime; + + startTime = Timing.start(); + long srcNodeId = graph.getNodeId(input.src); + output.meta.timings.pid2node = Timing.stop(startTime); + + ArrayList nodeIds = new ArrayList(); + + // Destination is either a SWH PID or a node type + try { + SwhPID dstSwhPID = new SwhPID(input.dstFmt); + long dstNodeId = graph.getNodeId(dstSwhPID); + + startTime = Timing.start(); + nodeIds = traversal.walk(srcNodeId, dstNodeId, input.algorithm); + output.meta.timings.traversal = Timing.stop(startTime); + } catch (IllegalArgumentException ignored1) { + try { + Node.Type dstType = Node.Type.fromStr(input.dstFmt); + + startTime = Timing.start(); + nodeIds = traversal.walk(srcNodeId, dstType, input.algorithm); + output.meta.timings.traversal = Timing.stop(startTime); + } catch (IllegalArgumentException ignored2) { + } + } + + output.meta.nbEdgesAccessed = traversal.getNbEdgesAccessed(); + + startTime = Timing.start(); + output.result = convertNodesToSwhPath(nodeIds); + output.meta.timings.node2pid = Timing.stop(startTime); + + return output; + } + + /** + * VisitNodes endpoint wrapper. + * + * @param input input parameters for the underlying endpoint call + * @return the resulting list of {@link SwhPID} from endpoint call and operation metadata + * @see org.softwareheritage.graph.SwhPID + * @see org.softwareheritage.graph.algo.Traversal#visitNodes(long) + */ + public Output visitNodes(Input input) { + Output> output = new Output<>(); + long startTime; startTime = Timing.start(); - nodeIds = traversal.walk(srcNodeId, dstType, input.algorithm); + long srcNodeId = graph.getNodeId(input.src); + output.meta.timings.pid2node = Timing.stop(startTime); + + startTime = Timing.start(); + ArrayList nodeIds = traversal.visitNodes(srcNodeId); output.meta.timings.traversal = Timing.stop(startTime); - } catch (IllegalArgumentException ignored2) { - } + output.meta.nbEdgesAccessed = traversal.getNbEdgesAccessed(); + + startTime = Timing.start(); + output.result = convertNodesToSwhPIDs(nodeIds); + output.meta.timings.node2pid = Timing.stop(startTime); + + return output; } - output.meta.nbEdgesAccessed = traversal.getNbEdgesAccessed(); - - startTime = Timing.start(); - output.result = convertNodesToSwhPath(nodeIds); - output.meta.timings.node2pid = Timing.stop(startTime); - - return output; - } - - /** - * VisitNodes endpoint wrapper. - * - * @param input input parameters for the underlying endpoint call - * @return the resulting list of {@link SwhPID} from endpoint call and operation metadata - * @see org.softwareheritage.graph.SwhPID - * @see org.softwareheritage.graph.algo.Traversal#visitNodes(long) - */ - public Output visitNodes(Input input) { - Output> output = new Output<>(); - long startTime; - - startTime = Timing.start(); - long srcNodeId = graph.getNodeId(input.src); - output.meta.timings.pid2node = Timing.stop(startTime); - - startTime = Timing.start(); - ArrayList nodeIds = traversal.visitNodes(srcNodeId); - output.meta.timings.traversal = Timing.stop(startTime); - output.meta.nbEdgesAccessed = traversal.getNbEdgesAccessed(); - - startTime = Timing.start(); - output.result = convertNodesToSwhPIDs(nodeIds); - output.meta.timings.node2pid = Timing.stop(startTime); - - return output; - } - - /** - * VisitPaths endpoint wrapper. - * - * @param input input parameters for the underlying endpoint call - * @return the resulting list of {@link SwhPath} from endpoint call and operation metadata - * @see org.softwareheritage.graph.SwhPID - * @see org.softwareheritage.graph.SwhPath - * @see org.softwareheritage.graph.algo.Traversal#visitPaths(long) - */ - public Output visitPaths(Input input) { - Output> output = new Output<>(); - long startTime; - - startTime = Timing.start(); - long srcNodeId = graph.getNodeId(input.src); - output.meta.timings.pid2node = Timing.stop(startTime); - - startTime = Timing.start(); - ArrayList> paths = traversal.visitPaths(srcNodeId); - output.meta.timings.traversal = Timing.stop(startTime); - output.meta.nbEdgesAccessed = traversal.getNbEdgesAccessed(); - - startTime = Timing.start(); - output.result = convertPathsToSwhPIDs(paths); - output.meta.timings.node2pid = Timing.stop(startTime); - - return output; - } + /** + * VisitPaths endpoint wrapper. + * + * @param input input parameters for the underlying endpoint call + * @return the resulting list of {@link SwhPath} from endpoint call and operation metadata + * @see org.softwareheritage.graph.SwhPID + * @see org.softwareheritage.graph.SwhPath + * @see org.softwareheritage.graph.algo.Traversal#visitPaths(long) + */ + public Output visitPaths(Input input) { + Output> output = new Output<>(); + long startTime; + + startTime = Timing.start(); + long srcNodeId = graph.getNodeId(input.src); + output.meta.timings.pid2node = Timing.stop(startTime); + + startTime = Timing.start(); + ArrayList> paths = traversal.visitPaths(srcNodeId); + output.meta.timings.traversal = Timing.stop(startTime); + output.meta.nbEdgesAccessed = traversal.getNbEdgesAccessed(); + + startTime = Timing.start(); + output.result = convertPathsToSwhPIDs(paths); + output.meta.timings.node2pid = Timing.stop(startTime); + + return output; + } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/Graph.java b/java/server/src/main/java/org/softwareheritage/graph/Graph.java index e40558b..7b6187e 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/Graph.java +++ b/java/server/src/main/java/org/softwareheritage/graph/Graph.java @@ -1,194 +1,194 @@ package org.softwareheritage.graph; import java.io.IOException; import it.unimi.dsi.big.webgraph.BVGraph; import it.unimi.dsi.big.webgraph.LazyLongIterator; import org.softwareheritage.graph.Node; import org.softwareheritage.graph.SwhPID; import org.softwareheritage.graph.backend.NodeIdMap; import org.softwareheritage.graph.backend.NodeTypesMap; /** * Main class storing the compressed graph and node id mappings. *

* The compressed graph is stored using the WebGraph * ecosystem. Additional mappings are necessary because Software Heritage uses string based persistent * identifiers (PID) while WebGraph uses integers internally. These two mappings (long id ↔ * PID) are used for the input (users refer to the graph using PID) and the output (convert back to * PID for users results). However, since graph traversal can be restricted depending on the node * type (see {@link AllowedEdges}), a long id → node type map is stored as well to avoid a full * PID lookup. * * @author Thibault Allançon * @see org.softwareheritage.graph.AllowedEdges * @see org.softwareheritage.graph.NodeIdMap; * @see org.softwareheritage.graph.NodeTypesMap; */ public class Graph { - /** File extension for the SWH PID to long node id map */ - public static final String PID_TO_NODE = ".pid2node.csv"; - /** File extension for the long node id to SWH PID map */ - public static final String NODE_TO_PID = ".node2pid.csv"; - /** File extension for the long node id to node typ map */ - public static final String NODE_TO_TYPE = ".node2type.map"; - - /** Compressed graph stored as a {@link it.unimi.dsi.big.webgraph.BVGraph} */ - BVGraph graph; - /** Transposed compressed graph (used for backward traversals) */ - BVGraph graphTransposed; - /** Path and basename of the compressed graph */ - String path; - /** Mapping long id ↔ SWH PIDs */ - NodeIdMap nodeIdMap; - /** Mapping long id → node types */ - NodeTypesMap nodeTypesMap; - - /** - * Constructor. - * - * @param path path and basename of the compressed graph to load - */ - public Graph(String path) throws IOException { - this.graph = BVGraph.load(path); - this.graphTransposed = BVGraph.load(path + "-transposed"); - this.path = path; - this.nodeIdMap = new NodeIdMap(path, getNbNodes()); - this.nodeTypesMap = new NodeTypesMap(path); - } - - /** - * Cleans up graph resources after use. - */ - public void cleanUp() throws IOException { - nodeIdMap.close(); - } - - /** - * Returns the graph full path. - * - * @return graph full path - */ - public String getPath() { - return path; - } - - /** - * Converts {@link SwhPID} node to long. - * - * @param swhPID node specified as a {@link SwhPID} - * @return internal long node id - * @see org.softwareheritage.graph.SwhPID - */ - public long getNodeId(SwhPID swhPID) { - return nodeIdMap.getNodeId(swhPID); - } - - /** - * Converts long id node to {@link SwhPID}. - * - * @param nodeId node specified as a long id - * @return external SWH PID - * @see org.softwareheritage.graph.SwhPID - */ - public SwhPID getSwhPID(long nodeId) { - return nodeIdMap.getSwhPID(nodeId); - } - - /** - * Returns node type. - * - * @param nodeId node specified as a long id - * @return corresponding node type - * @see org.softwareheritage.graph.Node.Type - */ - public Node.Type getNodeType(long nodeId) { - return nodeTypesMap.getType(nodeId); - } - - /** - * Returns number of nodes in the graph. - * - * @return number of nodes in the graph - */ - public long getNbNodes() { - return graph.numNodes(); - } - - /** - * Returns number of edges in the graph. - * - * @return number of edges in the graph - */ - public long getNbEdges() { - return graph.numArcs(); - } - - /** - * Returns lazy iterator of successors of a node. - * - * @param nodeId node specified as a long id - * @return lazy iterator of successors of the node, specified as a WebGraph LazyLongIterator - */ - public LazyLongIterator successors(long nodeId) { - return graph.successors(nodeId); - } - - /** - * Returns the outdegree of a node. - * - * @param nodeId node specified as a long id - * @return outdegree of a node - */ - public long outdegree(long nodeId) { - return graph.outdegree(nodeId); - } - - /** - * Returns lazy iterator of predecessors of a node. - * - * @param nodeId node specified as a long id - * @return lazy iterator of predecessors of the node, specified as a WebGraph LazyLongIterator - */ - public LazyLongIterator predecessors(long nodeId) { - return graphTransposed.successors(nodeId); - } - - /** - * Returns the indegree of a node. - * - * @param nodeId node specified as a long id - * @return indegree of a node - */ - public long indegree(long nodeId) { - return graphTransposed.outdegree(nodeId); - } - - /** - * Returns the degree of a node, depending on graph orientation. - * - * @param nodeId node specified as a long id - * @param useTransposed boolean value to use transposed graph - * @return degree of a node - */ - public long degree(long nodeId, boolean useTransposed) { - return (useTransposed) ? indegree(nodeId) : outdegree(nodeId); - } - - /** - * Returns the neighbors of a node (as a lazy iterator), depending on graph orientation. - * - * @param nodeId node specified as a long id - * @param useTransposed boolean value to use transposed graph - * @return lazy iterator of neighbors of the node, specified as a WebGraph LazyLongIterator - */ - public LazyLongIterator neighbors(long nodeId, boolean useTransposed) { - return (useTransposed) ? predecessors(nodeId) : successors(nodeId); - } + /** File extension for the SWH PID to long node id map */ + public static final String PID_TO_NODE = ".pid2node.csv"; + /** File extension for the long node id to SWH PID map */ + public static final String NODE_TO_PID = ".node2pid.csv"; + /** File extension for the long node id to node typ map */ + public static final String NODE_TO_TYPE = ".node2type.map"; + + /** Compressed graph stored as a {@link it.unimi.dsi.big.webgraph.BVGraph} */ + BVGraph graph; + /** Transposed compressed graph (used for backward traversals) */ + BVGraph graphTransposed; + /** Path and basename of the compressed graph */ + String path; + /** Mapping long id ↔ SWH PIDs */ + NodeIdMap nodeIdMap; + /** Mapping long id → node types */ + NodeTypesMap nodeTypesMap; + + /** + * Constructor. + * + * @param path path and basename of the compressed graph to load + */ + public Graph(String path) throws IOException { + this.graph = BVGraph.load(path); + this.graphTransposed = BVGraph.load(path + "-transposed"); + this.path = path; + this.nodeIdMap = new NodeIdMap(path, getNbNodes()); + this.nodeTypesMap = new NodeTypesMap(path); + } + + /** + * Cleans up graph resources after use. + */ + public void cleanUp() throws IOException { + nodeIdMap.close(); + } + + /** + * Returns the graph full path. + * + * @return graph full path + */ + public String getPath() { + return path; + } + + /** + * Converts {@link SwhPID} node to long. + * + * @param swhPID node specified as a {@link SwhPID} + * @return internal long node id + * @see org.softwareheritage.graph.SwhPID + */ + public long getNodeId(SwhPID swhPID) { + return nodeIdMap.getNodeId(swhPID); + } + + /** + * Converts long id node to {@link SwhPID}. + * + * @param nodeId node specified as a long id + * @return external SWH PID + * @see org.softwareheritage.graph.SwhPID + */ + public SwhPID getSwhPID(long nodeId) { + return nodeIdMap.getSwhPID(nodeId); + } + + /** + * Returns node type. + * + * @param nodeId node specified as a long id + * @return corresponding node type + * @see org.softwareheritage.graph.Node.Type + */ + public Node.Type getNodeType(long nodeId) { + return nodeTypesMap.getType(nodeId); + } + + /** + * Returns number of nodes in the graph. + * + * @return number of nodes in the graph + */ + public long getNbNodes() { + return graph.numNodes(); + } + + /** + * Returns number of edges in the graph. + * + * @return number of edges in the graph + */ + public long getNbEdges() { + return graph.numArcs(); + } + + /** + * Returns lazy iterator of successors of a node. + * + * @param nodeId node specified as a long id + * @return lazy iterator of successors of the node, specified as a WebGraph LazyLongIterator + */ + public LazyLongIterator successors(long nodeId) { + return graph.successors(nodeId); + } + + /** + * Returns the outdegree of a node. + * + * @param nodeId node specified as a long id + * @return outdegree of a node + */ + public long outdegree(long nodeId) { + return graph.outdegree(nodeId); + } + + /** + * Returns lazy iterator of predecessors of a node. + * + * @param nodeId node specified as a long id + * @return lazy iterator of predecessors of the node, specified as a WebGraph LazyLongIterator + */ + public LazyLongIterator predecessors(long nodeId) { + return graphTransposed.successors(nodeId); + } + + /** + * Returns the indegree of a node. + * + * @param nodeId node specified as a long id + * @return indegree of a node + */ + public long indegree(long nodeId) { + return graphTransposed.outdegree(nodeId); + } + + /** + * Returns the degree of a node, depending on graph orientation. + * + * @param nodeId node specified as a long id + * @param useTransposed boolean value to use transposed graph + * @return degree of a node + */ + public long degree(long nodeId, boolean useTransposed) { + return (useTransposed) ? indegree(nodeId) : outdegree(nodeId); + } + + /** + * Returns the neighbors of a node (as a lazy iterator), depending on graph orientation. + * + * @param nodeId node specified as a long id + * @param useTransposed boolean value to use transposed graph + * @return lazy iterator of neighbors of the node, specified as a WebGraph LazyLongIterator + */ + public LazyLongIterator neighbors(long nodeId, boolean useTransposed) { + return (useTransposed) ? predecessors(nodeId) : successors(nodeId); + } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/Neighbors.java b/java/server/src/main/java/org/softwareheritage/graph/Neighbors.java index 4cdf690..bdae373 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/Neighbors.java +++ b/java/server/src/main/java/org/softwareheritage/graph/Neighbors.java @@ -1,84 +1,84 @@ package org.softwareheritage.graph; import java.util.Iterator; import it.unimi.dsi.big.webgraph.LazyLongIterator; import org.softwareheritage.graph.AllowedEdges; import org.softwareheritage.graph.Graph; /** * Iterator class to go over a node neighbors in the graph. *

* Wrapper iterator class to easily deal with {@link AllowedEdges} during traversals. * * @author Thibault Allançon * @see org.softwareheritage.graph.AllowedEdges */ public class Neighbors implements Iterable { - /** Graph used to explore neighbors */ - Graph graph; - /** Boolean to specify the use of the transposed graph */ - boolean useTransposed; - /** Graph edge restriction */ - AllowedEdges edges; - /** Source node from which neighbors will be listed */ - long srcNodeId; + /** Graph used to explore neighbors */ + Graph graph; + /** Boolean to specify the use of the transposed graph */ + boolean useTransposed; + /** Graph edge restriction */ + AllowedEdges edges; + /** Source node from which neighbors will be listed */ + long srcNodeId; - /** - * Constructor. - * - * @param graph graph used to explore neighbors - * @param useTransposed boolean value to use transposed graph - * @param edges edges allowed to be used in the graph - * @param srcNodeId source node from where to list neighbors - */ - public Neighbors(Graph graph, boolean useTransposed, AllowedEdges edges, long srcNodeId) { - this.graph = graph; - this.useTransposed = useTransposed; - this.edges = edges; - this.srcNodeId = srcNodeId; - } + /** + * Constructor. + * + * @param graph graph used to explore neighbors + * @param useTransposed boolean value to use transposed graph + * @param edges edges allowed to be used in the graph + * @param srcNodeId source node from where to list neighbors + */ + public Neighbors(Graph graph, boolean useTransposed, AllowedEdges edges, long srcNodeId) { + this.graph = graph; + this.useTransposed = useTransposed; + this.edges = edges; + this.srcNodeId = srcNodeId; + } - @Override - public Iterator iterator() { - return new NeighborsIterator(); - } + @Override + public Iterator iterator() { + return new NeighborsIterator(); + } - /** - * Inner class for {@link Neighbors} iterator. - * - * @author Thibault Allançon - */ + /** + * Inner class for {@link Neighbors} iterator. + * + * @author Thibault Allançon + */ - public class NeighborsIterator implements Iterator { - LazyLongIterator neighbors; - long nextNeighborId; + public class NeighborsIterator implements Iterator { + LazyLongIterator neighbors; + long nextNeighborId; - public NeighborsIterator() { - this.neighbors = graph.neighbors(srcNodeId, useTransposed); - this.nextNeighborId = -1; - } + public NeighborsIterator() { + this.neighbors = graph.neighbors(srcNodeId, useTransposed); + this.nextNeighborId = -1; + } - public boolean hasNext() { - // Case 1: no edge restriction, bypass type checks and skip to next neighbor - if (edges.restrictedTo == null) { - nextNeighborId = neighbors.nextLong(); - return (nextNeighborId != -1); - } + public boolean hasNext() { + // Case 1: no edge restriction, bypass type checks and skip to next neighbor + if (edges.restrictedTo == null) { + nextNeighborId = neighbors.nextLong(); + return (nextNeighborId != -1); + } - // Case 2: edge restriction, look ahead for next neighbor - while ((nextNeighborId = neighbors.nextLong()) != -1) { - if (edges.isAllowed(srcNodeId, nextNeighborId)) { - return true; + // Case 2: edge restriction, look ahead for next neighbor + while ((nextNeighborId = neighbors.nextLong()) != -1) { + if (edges.isAllowed(srcNodeId, nextNeighborId)) { + return true; + } + } + return false; } - } - return false; - } - public Long next() { - return nextNeighborId; + public Long next() { + return nextNeighborId; + } } - } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/Node.java b/java/server/src/main/java/org/softwareheritage/graph/Node.java index ecd263f..7acc2bf 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/Node.java +++ b/java/server/src/main/java/org/softwareheritage/graph/Node.java @@ -1,93 +1,93 @@ package org.softwareheritage.graph; import java.util.ArrayList; import java.util.Arrays; import java.util.List; /** * A node in the Software Heritage graph. * * @author Thibault Allançon */ public class Node { - /** - * Software Heritage graph node types, as described in the - * data model. - */ - public enum Type { - /** Content node */ - CNT, - /** Directory node */ - DIR, - /** Origin node */ - ORI, - /** Release node */ - REL, - /** Revision node */ - REV, - /** Snapshot node */ - SNP; - /** - * Converts integer to corresponding SWH node type. - * - * @param intType node type represented as an integer - * @return the corresponding {@link Node.Type} value - * @see org.softwareheritage.graph.Node.Type + * Software Heritage graph node types, as described in the + * data model. */ - public static Node.Type fromInt(int intType) { - switch (intType) { - case 0: - return CNT; - case 1: - return DIR; - case 2: - return ORI; - case 3: - return REL; - case 4: - return REV; - case 5: - return SNP; - } - return null; - } + public enum Type { + /** Content node */ + CNT, + /** Directory node */ + DIR, + /** Origin node */ + ORI, + /** Release node */ + REL, + /** Revision node */ + REV, + /** Snapshot node */ + SNP; - /** - * Converts string to corresponding SWH node type. - * - * @param strType node type represented as a string - * @return the corresponding {@link Node.Type} value - * @see org.softwareheritage.graph.Node.Type - */ - public static Node.Type fromStr(String strType) { - if (!strType.matches("cnt|dir|ori|rel|rev|snp")) { - throw new IllegalArgumentException("Unknown node type: " + strType); - } - return Node.Type.valueOf(strType.toUpperCase()); - } + /** + * Converts integer to corresponding SWH node type. + * + * @param intType node type represented as an integer + * @return the corresponding {@link Node.Type} value + * @see org.softwareheritage.graph.Node.Type + */ + public static Node.Type fromInt(int intType) { + switch (intType) { + case 0: + return CNT; + case 1: + return DIR; + case 2: + return ORI; + case 3: + return REL; + case 4: + return REV; + case 5: + return SNP; + } + return null; + } - /** - * Parses SWH node type possible values from formatted string (see the API - * syntax). - * - * @param strFmtType node types represented as a formatted string - * @return a list containing the {@link Node.Type} values - * @see org.softwareheritage.graph.Node.Type - */ - public static ArrayList parse(String strFmtType) { - ArrayList types = new ArrayList<>(); + /** + * Converts string to corresponding SWH node type. + * + * @param strType node type represented as a string + * @return the corresponding {@link Node.Type} value + * @see org.softwareheritage.graph.Node.Type + */ + public static Node.Type fromStr(String strType) { + if (!strType.matches("cnt|dir|ori|rel|rev|snp")) { + throw new IllegalArgumentException("Unknown node type: " + strType); + } + return Node.Type.valueOf(strType.toUpperCase()); + } + + /** + * Parses SWH node type possible values from formatted string (see the API + * syntax). + * + * @param strFmtType node types represented as a formatted string + * @return a list containing the {@link Node.Type} values + * @see org.softwareheritage.graph.Node.Type + */ + public static ArrayList parse(String strFmtType) { + ArrayList types = new ArrayList<>(); - if (strFmtType.equals("*")) { - List nodeTypes = Arrays.asList(Node.Type.values()); - types.addAll(nodeTypes); - } else { - types.add(Node.Type.fromStr(strFmtType)); - } + if (strFmtType.equals("*")) { + List nodeTypes = Arrays.asList(Node.Type.values()); + types.addAll(nodeTypes); + } else { + types.add(Node.Type.fromStr(strFmtType)); + } - return types; + return types; + } } - } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/SwhPID.java b/java/server/src/main/java/org/softwareheritage/graph/SwhPID.java index 01586e7..8790246 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/SwhPID.java +++ b/java/server/src/main/java/org/softwareheritage/graph/SwhPID.java @@ -1,98 +1,98 @@ package org.softwareheritage.graph; import com.fasterxml.jackson.annotation.JsonValue; import org.softwareheritage.graph.Node; /** * A Software Heritage PID, see persistent * identifier documentation. * * @author Thibault Allançon */ public class SwhPID { - /** Fixed hash length of the PID */ - public static final int HASH_LENGTH = 40; - - /** Full PID as a string */ - String swhPID; - /** PID node type */ - Node.Type type; - /** PID hex-encoded SHA1 hash */ - String hash; - - /** - * Constructor. - * - * @param swhPID full PID as a string - */ - public SwhPID(String swhPID) { - this.swhPID = swhPID; - - // PID format: 'swh:1:type:hash' - String[] parts = swhPID.split(":"); - if (parts.length != 4 || !parts[0].equals("swh") || !parts[1].equals("1")) { - throw new IllegalArgumentException( - "Expected SWH PID format to be 'swh:1:type:hash', got: " + swhPID); + /** Fixed hash length of the PID */ + public static final int HASH_LENGTH = 40; + + /** Full PID as a string */ + String swhPID; + /** PID node type */ + Node.Type type; + /** PID hex-encoded SHA1 hash */ + String hash; + + /** + * Constructor. + * + * @param swhPID full PID as a string + */ + public SwhPID(String swhPID) { + this.swhPID = swhPID; + + // PID format: 'swh:1:type:hash' + String[] parts = swhPID.split(":"); + if (parts.length != 4 || !parts[0].equals("swh") || !parts[1].equals("1")) { + throw new IllegalArgumentException( + "Expected SWH PID format to be 'swh:1:type:hash', got: " + swhPID); + } + + this.type = Node.Type.fromStr(parts[2]); + + this.hash = parts[3]; + if (!hash.matches("[0-9a-f]{" + HASH_LENGTH + "}")) { + throw new IllegalArgumentException("Wrong SWH PID hash format in: " + swhPID); + } } - this.type = Node.Type.fromStr(parts[2]); + @Override + public boolean equals(Object otherObj) { + if (otherObj == this) + return true; + if (!(otherObj instanceof SwhPID)) + return false; - this.hash = parts[3]; - if (!hash.matches("[0-9a-f]{" + HASH_LENGTH + "}")) { - throw new IllegalArgumentException("Wrong SWH PID hash format in: " + swhPID); + SwhPID other = (SwhPID) otherObj; + return swhPID.equals(other.getSwhPID()); + } + + @Override + public int hashCode() { + return swhPID.hashCode(); + } + + @Override + public String toString() { + return swhPID; + } + + /** + * Returns full PID as a string. + * + * @return full PID string + */ + @JsonValue + public String getSwhPID() { + return swhPID; + } + + /** + * Returns PID node type. + * + * @return PID corresponding {@link Node.Type} + * @see org.softwareheritage.graph.Node.Type + */ + public Node.Type getType() { + return type; + } + + /** + * Returns PID hex-encoded SHA1 hash. + * + * @return PID string hash + */ + public String getHash() { + return hash; } - } - - @Override - public boolean equals(Object otherObj) { - if (otherObj == this) - return true; - if (!(otherObj instanceof SwhPID)) - return false; - - SwhPID other = (SwhPID) otherObj; - return swhPID.equals(other.getSwhPID()); - } - - @Override - public int hashCode() { - return swhPID.hashCode(); - } - - @Override - public String toString() { - return swhPID; - } - - /** - * Returns full PID as a string. - * - * @return full PID string - */ - @JsonValue - public String getSwhPID() { - return swhPID; - } - - /** - * Returns PID node type. - * - * @return PID corresponding {@link Node.Type} - * @see org.softwareheritage.graph.Node.Type - */ - public Node.Type getType() { - return type; - } - - /** - * Returns PID hex-encoded SHA1 hash. - * - * @return PID string hash - */ - public String getHash() { - return hash; - } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/SwhPath.java b/java/server/src/main/java/org/softwareheritage/graph/SwhPath.java index 718d446..c2df815 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/SwhPath.java +++ b/java/server/src/main/java/org/softwareheritage/graph/SwhPath.java @@ -1,124 +1,124 @@ package org.softwareheritage.graph; import java.util.ArrayList; import com.fasterxml.jackson.annotation.JsonValue; import org.softwareheritage.graph.SwhPID; /** * Wrapper class to store a list of {@link SwhPID}. * * @author Thibault Allançon * @see org.softwareheritage.graph.SwhPID */ public class SwhPath { - /** Internal list of {@link SwhPID} */ - ArrayList path; - - /** - * Constructor. - */ - public SwhPath() { - this.path = new ArrayList(); - } - - /** - * Constructor. - * - * @param swhPIDs variable number of string PIDs to initialize this path with - */ - public SwhPath(String... swhPIDs) { - this(); - for (String swhPID : swhPIDs) { - add(new SwhPID(swhPID)); + /** Internal list of {@link SwhPID} */ + ArrayList path; + + /** + * Constructor. + */ + public SwhPath() { + this.path = new ArrayList(); } - } - - /** - * Constructor. - * - * @param swhPIDs variable number of {@link SwhPID} to initialize this path with - * @see org.softwareheritage.graph.SwhPID - */ - public SwhPath(SwhPID... swhPIDs) { - this(); - for (SwhPID swhPID : swhPIDs) { - add(swhPID); + + /** + * Constructor. + * + * @param swhPIDs variable number of string PIDs to initialize this path with + */ + public SwhPath(String... swhPIDs) { + this(); + for (String swhPID : swhPIDs) { + add(new SwhPID(swhPID)); + } } - } - - /** - * Returns this path as a list of {@link SwhPID}. - * - * @return list of {@link SwhPID} constituting the path - * @see org.softwareheritage.graph.SwhPID - */ - @JsonValue - public ArrayList getPath() { - return path; - } - - /** - * Adds a {@link SwhPID} to this path. - * - * @param {@link SwhPID} to add to this path - * @see org.softwareheritage.graph.SwhPID - */ - public void add(SwhPID swhPID) { - path.add(swhPID); - } - - /** - * Returns the {@link SwhPID} at the specified position in this path. - * - * @param index position of the {@link SwhPID} to return - * @return {@link SwhPID} at the specified position - * @see org.softwareheritage.graph.SwhPID - */ - public SwhPID get(int index) { - return path.get(index); - } - - /** - * Returns the number of elements in this path. - * - * @return number of elements in this path - */ - public int size() { - return path.size(); - } - - @Override - public boolean equals(Object otherObj) { - if (otherObj == this) - return true; - if (!(otherObj instanceof SwhPath)) - return false; - - SwhPath other = (SwhPath) otherObj; - if (size() != other.size()) { - return false; + + /** + * Constructor. + * + * @param swhPIDs variable number of {@link SwhPID} to initialize this path with + * @see org.softwareheritage.graph.SwhPID + */ + public SwhPath(SwhPID... swhPIDs) { + this(); + for (SwhPID swhPID : swhPIDs) { + add(swhPID); + } } - for (int i = 0; i < size(); i++) { - SwhPID thisSwhPID = get(i); - SwhPID otherSwhPID = other.get(i); - if (!thisSwhPID.equals(otherSwhPID)) { - return false; - } + /** + * Returns this path as a list of {@link SwhPID}. + * + * @return list of {@link SwhPID} constituting the path + * @see org.softwareheritage.graph.SwhPID + */ + @JsonValue + public ArrayList getPath() { + return path; } - return true; - } + /** + * Adds a {@link SwhPID} to this path. + * + * @param {@link SwhPID} to add to this path + * @see org.softwareheritage.graph.SwhPID + */ + public void add(SwhPID swhPID) { + path.add(swhPID); + } + + /** + * Returns the {@link SwhPID} at the specified position in this path. + * + * @param index position of the {@link SwhPID} to return + * @return {@link SwhPID} at the specified position + * @see org.softwareheritage.graph.SwhPID + */ + public SwhPID get(int index) { + return path.get(index); + } + + /** + * Returns the number of elements in this path. + * + * @return number of elements in this path + */ + public int size() { + return path.size(); + } + + @Override + public boolean equals(Object otherObj) { + if (otherObj == this) + return true; + if (!(otherObj instanceof SwhPath)) + return false; + + SwhPath other = (SwhPath) otherObj; + if (size() != other.size()) { + return false; + } + + for (int i = 0; i < size(); i++) { + SwhPID thisSwhPID = get(i); + SwhPID otherSwhPID = other.get(i); + if (!thisSwhPID.equals(otherSwhPID)) { + return false; + } + } + + return true; + } - @Override - public String toString() { - String str = new String(); - for (SwhPID swhPID : path) { - str += swhPID + "/"; + @Override + public String toString() { + String str = new String(); + for (SwhPID swhPID : path) { + str += swhPID + "/"; + } + return str; } - return str; - } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/algo/Stats.java b/java/server/src/main/java/org/softwareheritage/graph/algo/Stats.java index a94250a..1099b6f 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/algo/Stats.java +++ b/java/server/src/main/java/org/softwareheritage/graph/algo/Stats.java @@ -1,68 +1,68 @@ package org.softwareheritage.graph.algo; import java.io.FileInputStream; import java.io.IOException; import java.util.Properties; /** * Statistics on the compressed graph. *

* These statistics are not computed but directly read from WebGraph generated .stats and .properties files. * * @author Thibault Allançon */ public class Stats { - public class Counts { - public long nodes; - public long edges; - } + public class Counts { + public long nodes; + public long edges; + } - public class Ratios { - public double compression; - public double bitsPerNode; - public double bitsPerEdge; - public double avgLocality; - } + public class Ratios { + public double compression; + public double bitsPerNode; + public double bitsPerEdge; + public double avgLocality; + } - public class Degree { - public long min; - public long max; - public double avg; - } + public class Degree { + public long min; + public long max; + public double avg; + } - public Counts counts; - public Ratios ratios; - public Degree indegree; - public Degree outdegree; + public Counts counts; + public Ratios ratios; + public Degree indegree; + public Degree outdegree; - /** - * Constructor. - * - * @param graphPath path and basename of compressed graph - */ - public Stats(String graphPath) throws IOException { - Properties properties = new Properties(); - properties.load(new FileInputStream(graphPath + ".properties")); - properties.load(new FileInputStream(graphPath + ".stats")); + /** + * Constructor. + * + * @param graphPath path and basename of compressed graph + */ + public Stats(String graphPath) throws IOException { + Properties properties = new Properties(); + properties.load(new FileInputStream(graphPath + ".properties")); + properties.load(new FileInputStream(graphPath + ".stats")); - this.counts = new Counts(); - this.ratios = new Ratios(); - this.indegree = new Degree(); - this.outdegree = new Degree(); + this.counts = new Counts(); + this.ratios = new Ratios(); + this.indegree = new Degree(); + this.outdegree = new Degree(); - this.counts.nodes = Long.parseLong(properties.getProperty("nodes")); - this.counts.edges = Long.parseLong(properties.getProperty("arcs")); - this.ratios.compression = Double.parseDouble(properties.getProperty("compratio")); - this.ratios.bitsPerNode = Double.parseDouble(properties.getProperty("bitspernode")); - this.ratios.bitsPerEdge = Double.parseDouble(properties.getProperty("bitsperlink")); - this.ratios.avgLocality = Double.parseDouble(properties.getProperty("avglocality")); - this.indegree.min = Long.parseLong(properties.getProperty("minindegree")); - this.indegree.max = Long.parseLong(properties.getProperty("maxindegree")); - this.indegree.avg = Double.parseDouble(properties.getProperty("avgindegree")); - this.outdegree.min = Long.parseLong(properties.getProperty("minoutdegree")); - this.outdegree.max = Long.parseLong(properties.getProperty("maxoutdegree")); - this.outdegree.avg = Double.parseDouble(properties.getProperty("avgoutdegree")); - } + this.counts.nodes = Long.parseLong(properties.getProperty("nodes")); + this.counts.edges = Long.parseLong(properties.getProperty("arcs")); + this.ratios.compression = Double.parseDouble(properties.getProperty("compratio")); + this.ratios.bitsPerNode = Double.parseDouble(properties.getProperty("bitspernode")); + this.ratios.bitsPerEdge = Double.parseDouble(properties.getProperty("bitsperlink")); + this.ratios.avgLocality = Double.parseDouble(properties.getProperty("avglocality")); + this.indegree.min = Long.parseLong(properties.getProperty("minindegree")); + this.indegree.max = Long.parseLong(properties.getProperty("maxindegree")); + this.indegree.avg = Double.parseDouble(properties.getProperty("avgindegree")); + this.outdegree.min = Long.parseLong(properties.getProperty("minoutdegree")); + this.outdegree.max = Long.parseLong(properties.getProperty("maxoutdegree")); + this.outdegree.avg = Double.parseDouble(properties.getProperty("avgoutdegree")); + } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java b/java/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java index ee06f7c..c923724 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java +++ b/java/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java @@ -1,328 +1,328 @@ package org.softwareheritage.graph.algo; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.LinkedList; import java.util.Map; import java.util.Queue; import java.util.Stack; import it.unimi.dsi.bits.LongArrayBitVector; import org.softwareheritage.graph.AllowedEdges; import org.softwareheritage.graph.Endpoint; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.Neighbors; import org.softwareheritage.graph.Node; /** * Traversal algorithms on the compressed graph. *

* Internal implementation of the traversal API endpoints. These methods only input/output internal * long ids, which are converted in the {@link Endpoint} higher-level class to Software Heritage * PID. * * @author Thibault Allançon * @see org.softwareheritage.graph.Endpoint */ public class Traversal { - /** Graph used in the traversal */ - Graph graph; - /** Boolean to specify the use of the transposed graph */ - boolean useTransposed; - /** Graph edge restriction */ - AllowedEdges edges; - - /** Bit array storing if we have visited a node */ - LongArrayBitVector visited; - /** Hash map storing parent node id for each nodes during a traversal */ - Map parentNode; - /** Number of edges accessed during traversal */ - long nbEdgesAccessed; - - /** - * Constructor. - * - * @param graph graph used in the traversal - * @param direction a string (either "forward" or "backward") specifying edge orientation - * @param edgesFmt a formatted string describing allowed edges - */ - public Traversal(Graph graph, String direction, String edgesFmt) { - if (!direction.matches("forward|backward")) { - throw new IllegalArgumentException("Unknown traversal direction: " + direction); + /** Graph used in the traversal */ + Graph graph; + /** Boolean to specify the use of the transposed graph */ + boolean useTransposed; + /** Graph edge restriction */ + AllowedEdges edges; + + /** Bit array storing if we have visited a node */ + LongArrayBitVector visited; + /** Hash map storing parent node id for each nodes during a traversal */ + Map parentNode; + /** Number of edges accessed during traversal */ + long nbEdgesAccessed; + + /** + * Constructor. + * + * @param graph graph used in the traversal + * @param direction a string (either "forward" or "backward") specifying edge orientation + * @param edgesFmt a formatted string describing allowed edges + */ + public Traversal(Graph graph, String direction, String edgesFmt) { + if (!direction.matches("forward|backward")) { + throw new IllegalArgumentException("Unknown traversal direction: " + direction); + } + + this.graph = graph; + this.useTransposed = (direction.equals("backward")); + this.edges = new AllowedEdges(graph, edgesFmt); + + long nbNodes = graph.getNbNodes(); + this.visited = LongArrayBitVector.ofLength(nbNodes); + this.parentNode = new HashMap<>(); + this.nbEdgesAccessed = 0; } - this.graph = graph; - this.useTransposed = (direction.equals("backward")); - this.edges = new AllowedEdges(graph, edgesFmt); - - long nbNodes = graph.getNbNodes(); - this.visited = LongArrayBitVector.ofLength(nbNodes); - this.parentNode = new HashMap<>(); - this.nbEdgesAccessed = 0; - } - - /** - * Returns number of accessed edges during traversal. - * - * @return number of edges accessed in last traversal - */ - public long getNbEdgesAccessed() { - return nbEdgesAccessed; - } - - /** - * Returns the leaves of a subgraph rooted at the specified source node. - * - * @param srcNodeId source node - * @return list of node ids corresponding to the leaves - */ - public ArrayList leaves(long srcNodeId) { - ArrayList nodeIds = new ArrayList(); - Stack stack = new Stack(); - this.nbEdgesAccessed = 0; - - stack.push(srcNodeId); - visited.set(srcNodeId); - - while (!stack.isEmpty()) { - long currentNodeId = stack.pop(); - - long neighborsCnt = 0; - nbEdgesAccessed += graph.degree(currentNodeId, useTransposed); - for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) { - neighborsCnt++; - if (!visited.getBoolean(neighborNodeId)) { - stack.push(neighborNodeId); - visited.set(neighborNodeId); + /** + * Returns number of accessed edges during traversal. + * + * @return number of edges accessed in last traversal + */ + public long getNbEdgesAccessed() { + return nbEdgesAccessed; + } + + /** + * Returns the leaves of a subgraph rooted at the specified source node. + * + * @param srcNodeId source node + * @return list of node ids corresponding to the leaves + */ + public ArrayList leaves(long srcNodeId) { + ArrayList nodeIds = new ArrayList(); + Stack stack = new Stack(); + this.nbEdgesAccessed = 0; + + stack.push(srcNodeId); + visited.set(srcNodeId); + + while (!stack.isEmpty()) { + long currentNodeId = stack.pop(); + + long neighborsCnt = 0; + nbEdgesAccessed += graph.degree(currentNodeId, useTransposed); + for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) { + neighborsCnt++; + if (!visited.getBoolean(neighborNodeId)) { + stack.push(neighborNodeId); + visited.set(neighborNodeId); + } + } + + if (neighborsCnt == 0) { + nodeIds.add(currentNodeId); + } } - } - if (neighborsCnt == 0) { - nodeIds.add(currentNodeId); - } + return nodeIds; } - return nodeIds; - } - - /** - * Returns node direct neighbors (linked with exactly one edge). - * - * @param srcNodeId source node - * @return list of node ids corresponding to the neighbors - */ - public ArrayList neighbors(long srcNodeId) { - ArrayList nodeIds = new ArrayList(); - this.nbEdgesAccessed = graph.degree(srcNodeId, useTransposed); - for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, srcNodeId)) { - nodeIds.add(neighborNodeId); - } - return nodeIds; - } - - /** - * Performs a graph traversal and returns explored nodes. - * - * @param srcNodeId source node - * @return list of explored node ids - */ - public ArrayList visitNodes(long srcNodeId) { - ArrayList nodeIds = new ArrayList(); - Stack stack = new Stack(); - this.nbEdgesAccessed = 0; - - stack.push(srcNodeId); - visited.set(srcNodeId); - - while (!stack.isEmpty()) { - long currentNodeId = stack.pop(); - nodeIds.add(currentNodeId); - - nbEdgesAccessed += graph.degree(currentNodeId, useTransposed); - for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) { - if (!visited.getBoolean(neighborNodeId)) { - stack.push(neighborNodeId); - visited.set(neighborNodeId); + /** + * Returns node direct neighbors (linked with exactly one edge). + * + * @param srcNodeId source node + * @return list of node ids corresponding to the neighbors + */ + public ArrayList neighbors(long srcNodeId) { + ArrayList nodeIds = new ArrayList(); + this.nbEdgesAccessed = graph.degree(srcNodeId, useTransposed); + for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, srcNodeId)) { + nodeIds.add(neighborNodeId); } - } + return nodeIds; } - return nodeIds; - } - - /** - * Performs a graph traversal and returns explored paths. - * - * @param srcNodeId source node - * @return list of explored paths (represented as a list of node ids) - */ - public ArrayList> visitPaths(long srcNodeId) { - ArrayList> paths = new ArrayList<>(); - Stack currentPath = new Stack(); - this.nbEdgesAccessed = 0; - visitPathsInternal(srcNodeId, paths, currentPath); - return paths; - } - - /** - * Internal recursive function of {@link #visitPaths}. - * - * @param currentNodeId current node - * @param paths list of currently stored paths - * @param currentPath current path as node ids - */ - private void visitPathsInternal( - long currentNodeId, ArrayList> paths, Stack currentPath) { - currentPath.push(currentNodeId); - - long visitedNeighbors = 0; - nbEdgesAccessed += graph.degree(currentNodeId, useTransposed); - for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) { - visitPathsInternal(neighborNodeId, paths, currentPath); - visitedNeighbors++; + /** + * Performs a graph traversal and returns explored nodes. + * + * @param srcNodeId source node + * @return list of explored node ids + */ + public ArrayList visitNodes(long srcNodeId) { + ArrayList nodeIds = new ArrayList(); + Stack stack = new Stack(); + this.nbEdgesAccessed = 0; + + stack.push(srcNodeId); + visited.set(srcNodeId); + + while (!stack.isEmpty()) { + long currentNodeId = stack.pop(); + nodeIds.add(currentNodeId); + + nbEdgesAccessed += graph.degree(currentNodeId, useTransposed); + for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) { + if (!visited.getBoolean(neighborNodeId)) { + stack.push(neighborNodeId); + visited.set(neighborNodeId); + } + } + } + + return nodeIds; } - if (visitedNeighbors == 0) { - ArrayList path = new ArrayList(); - for (long nodeId : currentPath) { - path.add(nodeId); - } - paths.add(path); + /** + * Performs a graph traversal and returns explored paths. + * + * @param srcNodeId source node + * @return list of explored paths (represented as a list of node ids) + */ + public ArrayList> visitPaths(long srcNodeId) { + ArrayList> paths = new ArrayList<>(); + Stack currentPath = new Stack(); + this.nbEdgesAccessed = 0; + visitPathsInternal(srcNodeId, paths, currentPath); + return paths; } - currentPath.pop(); - } - - /** - * Performs a graph traversal and returns the first found path from source to destination. - * - * @param srcNodeId source node - * @param dst destination (either a node or a node type) - * @return found path as a list of node ids - */ - public ArrayList walk(long srcNodeId, T dst, String algorithm) { - long dstNodeId = -1; - if (algorithm.equals("dfs")) { - dstNodeId = walkInternalDfs(srcNodeId, dst); - } else if (algorithm.equals("bfs")) { - dstNodeId = walkInternalBfs(srcNodeId, dst); - } else { - throw new IllegalArgumentException("Unknown traversal algorithm: " + algorithm); + /** + * Internal recursive function of {@link #visitPaths}. + * + * @param currentNodeId current node + * @param paths list of currently stored paths + * @param currentPath current path as node ids + */ + private void visitPathsInternal( + long currentNodeId, ArrayList> paths, Stack currentPath) { + currentPath.push(currentNodeId); + + long visitedNeighbors = 0; + nbEdgesAccessed += graph.degree(currentNodeId, useTransposed); + for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) { + visitPathsInternal(neighborNodeId, paths, currentPath); + visitedNeighbors++; + } + + if (visitedNeighbors == 0) { + ArrayList path = new ArrayList(); + for (long nodeId : currentPath) { + path.add(nodeId); + } + paths.add(path); + } + + currentPath.pop(); } - if (dstNodeId == -1) { - throw new IllegalArgumentException("Unable to find destination point: " + dst); + /** + * Performs a graph traversal and returns the first found path from source to destination. + * + * @param srcNodeId source node + * @param dst destination (either a node or a node type) + * @return found path as a list of node ids + */ + public ArrayList walk(long srcNodeId, T dst, String algorithm) { + long dstNodeId = -1; + if (algorithm.equals("dfs")) { + dstNodeId = walkInternalDfs(srcNodeId, dst); + } else if (algorithm.equals("bfs")) { + dstNodeId = walkInternalBfs(srcNodeId, dst); + } else { + throw new IllegalArgumentException("Unknown traversal algorithm: " + algorithm); + } + + if (dstNodeId == -1) { + throw new IllegalArgumentException("Unable to find destination point: " + dst); + } + + ArrayList nodeIds = backtracking(srcNodeId, dstNodeId); + return nodeIds; } - ArrayList nodeIds = backtracking(srcNodeId, dstNodeId); - return nodeIds; - } - - /** - * Internal DFS function of {@link #walk}. - * - * @param srcNodeId source node - * @param dst destination (either a node or a node type) - * @return final destination node or -1 if no path found - */ - private long walkInternalDfs(long srcNodeId, T dst) { - Stack stack = new Stack(); - this.nbEdgesAccessed = 0; - - stack.push(srcNodeId); - visited.set(srcNodeId); - - while (!stack.isEmpty()) { - long currentNodeId = stack.pop(); - if (isDstNode(currentNodeId, dst)) { - return currentNodeId; - } - - nbEdgesAccessed += graph.degree(currentNodeId, useTransposed); - for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) { - if (!visited.getBoolean(neighborNodeId)) { - stack.push(neighborNodeId); - visited.set(neighborNodeId); - parentNode.put(neighborNodeId, currentNodeId); + /** + * Internal DFS function of {@link #walk}. + * + * @param srcNodeId source node + * @param dst destination (either a node or a node type) + * @return final destination node or -1 if no path found + */ + private long walkInternalDfs(long srcNodeId, T dst) { + Stack stack = new Stack(); + this.nbEdgesAccessed = 0; + + stack.push(srcNodeId); + visited.set(srcNodeId); + + while (!stack.isEmpty()) { + long currentNodeId = stack.pop(); + if (isDstNode(currentNodeId, dst)) { + return currentNodeId; + } + + nbEdgesAccessed += graph.degree(currentNodeId, useTransposed); + for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) { + if (!visited.getBoolean(neighborNodeId)) { + stack.push(neighborNodeId); + visited.set(neighborNodeId); + parentNode.put(neighborNodeId, currentNodeId); + } + } } - } + + return -1; } - return -1; - } - - /** - * Internal BFS function of {@link #walk}. - * - * @param srcNodeId source node - * @param dst destination (either a node or a node type) - * @return final destination node or -1 if no path found - */ - private long walkInternalBfs(long srcNodeId, T dst) { - Queue queue = new LinkedList(); - this.nbEdgesAccessed = 0; - - queue.add(srcNodeId); - visited.set(srcNodeId); - - while (!queue.isEmpty()) { - long currentNodeId = queue.poll(); - if (isDstNode(currentNodeId, dst)) { - return currentNodeId; - } - - nbEdgesAccessed += graph.degree(currentNodeId, useTransposed); - for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) { - if (!visited.getBoolean(neighborNodeId)) { - queue.add(neighborNodeId); - visited.set(neighborNodeId); - parentNode.put(neighborNodeId, currentNodeId); + /** + * Internal BFS function of {@link #walk}. + * + * @param srcNodeId source node + * @param dst destination (either a node or a node type) + * @return final destination node or -1 if no path found + */ + private long walkInternalBfs(long srcNodeId, T dst) { + Queue queue = new LinkedList(); + this.nbEdgesAccessed = 0; + + queue.add(srcNodeId); + visited.set(srcNodeId); + + while (!queue.isEmpty()) { + long currentNodeId = queue.poll(); + if (isDstNode(currentNodeId, dst)) { + return currentNodeId; + } + + nbEdgesAccessed += graph.degree(currentNodeId, useTransposed); + for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) { + if (!visited.getBoolean(neighborNodeId)) { + queue.add(neighborNodeId); + visited.set(neighborNodeId); + parentNode.put(neighborNodeId, currentNodeId); + } + } } - } + + return -1; } - return -1; - } - - /** - * Internal function of {@link #walk} to check if a node corresponds to the destination. - * - * @param nodeId current node - * @param dst destination (either a node or a node type) - * @return true if the node is a destination, or false otherwise - */ - private boolean isDstNode(long nodeId, T dst) { - if (dst instanceof Long) { - long dstNodeId = (Long) dst; - return nodeId == dstNodeId; - } else if (dst instanceof Node.Type) { - Node.Type dstType = (Node.Type) dst; - return graph.getNodeType(nodeId) == dstType; - } else { - return false; + /** + * Internal function of {@link #walk} to check if a node corresponds to the destination. + * + * @param nodeId current node + * @param dst destination (either a node or a node type) + * @return true if the node is a destination, or false otherwise + */ + private boolean isDstNode(long nodeId, T dst) { + if (dst instanceof Long) { + long dstNodeId = (Long) dst; + return nodeId == dstNodeId; + } else if (dst instanceof Node.Type) { + Node.Type dstType = (Node.Type) dst; + return graph.getNodeType(nodeId) == dstType; + } else { + return false; + } } - } - - /** - * Internal backtracking function of {@link #walk}. - * - * @param srcNodeId source node - * @param dstNodeId destination node - * @return the found path, as a list of node ids - */ - private ArrayList backtracking(long srcNodeId, long dstNodeId) { - ArrayList path = new ArrayList(); - long currentNodeId = dstNodeId; - while (currentNodeId != srcNodeId) { - path.add(currentNodeId); - currentNodeId = parentNode.get(currentNodeId); + + /** + * Internal backtracking function of {@link #walk}. + * + * @param srcNodeId source node + * @param dstNodeId destination node + * @return the found path, as a list of node ids + */ + private ArrayList backtracking(long srcNodeId, long dstNodeId) { + ArrayList path = new ArrayList(); + long currentNodeId = dstNodeId; + while (currentNodeId != srcNodeId) { + path.add(currentNodeId); + currentNodeId = parentNode.get(currentNodeId); + } + path.add(srcNodeId); + Collections.reverse(path); + return path; } - path.add(srcNodeId); - Collections.reverse(path); - return path; - } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/backend/MapFile.java b/java/server/src/main/java/org/softwareheritage/graph/backend/MapFile.java index 3fe03e3..8b3f60c 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/backend/MapFile.java +++ b/java/server/src/main/java/org/softwareheritage/graph/backend/MapFile.java @@ -1,63 +1,63 @@ package org.softwareheritage.graph.backend; import java.io.File; import java.io.IOException; import java.io.RandomAccessFile; import java.nio.channels.FileChannel; import it.unimi.dsi.io.ByteBufferInputStream; /** * Wrapper class around very big mmap()-ed file. *

* Java has a limit for mmap()-ed files because of unsupported 64-bit indexing. The dsiutils ByteBufferInputStream is used to overcome this * Java limit. * * @author Thibault Allançon */ public class MapFile { - /** Memory-mapped file buffer */ - ByteBufferInputStream bufferMap; - /** Fixed line length of the mmap()-ed file */ - int lineLength; - - /** - * Constructor. - * - * @param path file path to mmap() - * @param lineLength fixed length of a line in the file - */ - public MapFile(String path, int lineLength) throws IOException { - this.bufferMap = null; - this.lineLength = lineLength; - - try (RandomAccessFile mapFile = new RandomAccessFile(new File(path), "r")) { - FileChannel fileChannel = mapFile.getChannel(); - bufferMap = ByteBufferInputStream.map(fileChannel, FileChannel.MapMode.READ_ONLY); + /** Memory-mapped file buffer */ + ByteBufferInputStream bufferMap; + /** Fixed line length of the mmap()-ed file */ + int lineLength; + + /** + * Constructor. + * + * @param path file path to mmap() + * @param lineLength fixed length of a line in the file + */ + public MapFile(String path, int lineLength) throws IOException { + this.bufferMap = null; + this.lineLength = lineLength; + + try (RandomAccessFile mapFile = new RandomAccessFile(new File(path), "r")) { + FileChannel fileChannel = mapFile.getChannel(); + bufferMap = ByteBufferInputStream.map(fileChannel, FileChannel.MapMode.READ_ONLY); + } + } + + /** + * Returns a specific line in the file. + * + * @param lineIndex line number in the file + * @return the line at the specified position + */ + public String readAtLine(long lineIndex) { + byte[] buffer = new byte[lineLength]; + long position = lineIndex * (long) lineLength; + bufferMap.position(position); + bufferMap.read(buffer, 0, lineLength); + String line = new String(buffer); + return line.trim(); + } + + /** + * Closes the mmap()-ed file. + */ + public void close() throws IOException { + bufferMap.close(); } - } - - /** - * Returns a specific line in the file. - * - * @param lineIndex line number in the file - * @return the line at the specified position - */ - public String readAtLine(long lineIndex) { - byte[] buffer = new byte[lineLength]; - long position = lineIndex * (long) lineLength; - bufferMap.position(position); - bufferMap.read(buffer, 0, lineLength); - String line = new String(buffer); - return line.trim(); - } - - /** - * Closes the mmap()-ed file. - */ - public void close() throws IOException { - bufferMap.close(); - } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/backend/NodeIdMap.java b/java/server/src/main/java/org/softwareheritage/graph/backend/NodeIdMap.java index 72a2efb..b148f28 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/backend/NodeIdMap.java +++ b/java/server/src/main/java/org/softwareheritage/graph/backend/NodeIdMap.java @@ -1,115 +1,115 @@ package org.softwareheritage.graph.backend; import java.io.IOException; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.SwhPID; import org.softwareheritage.graph.backend.MapFile; import org.softwareheritage.graph.backend.Setup; /** * Mapping between internal long node id and external SWH PID. *

* Mappings in both directions are pre-computed and dumped on disk in the {@link Setup} class, then * they are loaded here using mmap(). * * @author Thibault Allançon * @see org.softwareheritage.graph.backend.Setup */ public class NodeIdMap { - /** Fixed length of full SWH PID */ - public static final int SWH_ID_LENGTH = 50; - /** Fixed length of long node id */ - public static final int NODE_ID_LENGTH = 20; + /** Fixed length of full SWH PID */ + public static final int SWH_ID_LENGTH = 50; + /** Fixed length of long node id */ + public static final int NODE_ID_LENGTH = 20; - /** Graph path and basename */ - String graphPath; - /** Number of ids to map */ - long nbIds; - /** mmap()-ed PID_TO_NODE file */ - MapFile swhToNodeMap; - /** mmap()-ed NODE_TO_PID file */ - MapFile nodeToSwhMap; + /** Graph path and basename */ + String graphPath; + /** Number of ids to map */ + long nbIds; + /** mmap()-ed PID_TO_NODE file */ + MapFile swhToNodeMap; + /** mmap()-ed NODE_TO_PID file */ + MapFile nodeToSwhMap; - /** - * Constructor. - * - * @param graphPath full graph path - * @param nbNodes number of nodes in the graph - */ - public NodeIdMap(String graphPath, long nbNodes) throws IOException { - this.graphPath = graphPath; - this.nbIds = nbNodes; + /** + * Constructor. + * + * @param graphPath full graph path + * @param nbNodes number of nodes in the graph + */ + public NodeIdMap(String graphPath, long nbNodes) throws IOException { + this.graphPath = graphPath; + this.nbIds = nbNodes; - // +1 are for spaces and end of lines - int swhToNodeLineLength = SWH_ID_LENGTH + 1 + NODE_ID_LENGTH + 1; - int nodeToSwhLineLength = SWH_ID_LENGTH + 1; - this.swhToNodeMap = new MapFile(graphPath + Graph.PID_TO_NODE, swhToNodeLineLength); - this.nodeToSwhMap = new MapFile(graphPath + Graph.NODE_TO_PID, nodeToSwhLineLength); - } + // +1 are for spaces and end of lines + int swhToNodeLineLength = SWH_ID_LENGTH + 1 + NODE_ID_LENGTH + 1; + int nodeToSwhLineLength = SWH_ID_LENGTH + 1; + this.swhToNodeMap = new MapFile(graphPath + Graph.PID_TO_NODE, swhToNodeLineLength); + this.nodeToSwhMap = new MapFile(graphPath + Graph.NODE_TO_PID, nodeToSwhLineLength); + } + + /** + * Converts SWH PID to corresponding long node id. + * + * @param swhPID node represented as a {@link SwhPID} + * @return corresponding node as a long id + * @see org.softwareheritage.graph.SwhPID + */ + public long getNodeId(SwhPID swhPID) { + // Each line in PID_TO_NODE is formatted as: swhPID nodeId + // The file is sorted by swhPID, hence we can binary search on swhPID to get corresponding + // nodeId + long start = 0; + long end = nbIds - 1; - /** - * Converts SWH PID to corresponding long node id. - * - * @param swhPID node represented as a {@link SwhPID} - * @return corresponding node as a long id - * @see org.softwareheritage.graph.SwhPID - */ - public long getNodeId(SwhPID swhPID) { - // Each line in PID_TO_NODE is formatted as: swhPID nodeId - // The file is sorted by swhPID, hence we can binary search on swhPID to get corresponding - // nodeId - long start = 0; - long end = nbIds - 1; + while (start <= end) { + long lineNumber = (start + end) / 2L; + String[] parts = swhToNodeMap.readAtLine(lineNumber).split(" "); + if (parts.length != 2) { + break; + } - while (start <= end) { - long lineNumber = (start + end) / 2L; - String[] parts = swhToNodeMap.readAtLine(lineNumber).split(" "); - if (parts.length != 2) { - break; - } + String currentSwhPID = parts[0]; + long currentNodeId = Long.parseLong(parts[1]); - String currentSwhPID = parts[0]; - long currentNodeId = Long.parseLong(parts[1]); + int cmp = currentSwhPID.compareTo(swhPID.toString()); + if (cmp == 0) { + return currentNodeId; + } else if (cmp < 0) { + start = lineNumber + 1; + } else { + end = lineNumber - 1; + } + } - int cmp = currentSwhPID.compareTo(swhPID.toString()); - if (cmp == 0) { - return currentNodeId; - } else if (cmp < 0) { - start = lineNumber + 1; - } else { - end = lineNumber - 1; - } + throw new IllegalArgumentException("Unknown SWH PID: " + swhPID); } - throw new IllegalArgumentException("Unknown SWH PID: " + swhPID); - } + /** + * Converts a node long id to corresponding SWH PID. + * + * @param nodeId node as a long id + * @return corresponding node as a {@link SwhPID} + * @see org.softwareheritage.graph.SwhPID + */ + public SwhPID getSwhPID(long nodeId) { + // Each line in NODE_TO_PID is formatted as: swhPID + // The file is ordered by nodeId, meaning node0's swhPID is at line 0, hence we can read the + // nodeId-th line to get corresponding swhPID + if (nodeId < 0 || nodeId >= nbIds) { + throw new IllegalArgumentException("Node id " + nodeId + " should be between 0 and " + nbIds); + } - /** - * Converts a node long id to corresponding SWH PID. - * - * @param nodeId node as a long id - * @return corresponding node as a {@link SwhPID} - * @see org.softwareheritage.graph.SwhPID - */ - public SwhPID getSwhPID(long nodeId) { - // Each line in NODE_TO_PID is formatted as: swhPID - // The file is ordered by nodeId, meaning node0's swhPID is at line 0, hence we can read the - // nodeId-th line to get corresponding swhPID - if (nodeId < 0 || nodeId >= nbIds) { - throw new IllegalArgumentException("Node id " + nodeId + " should be between 0 and " + nbIds); + String swhPID = nodeToSwhMap.readAtLine(nodeId); + return new SwhPID(swhPID); } - String swhPID = nodeToSwhMap.readAtLine(nodeId); - return new SwhPID(swhPID); - } - - /** - * Closes the mapping files. - */ - public void close() throws IOException { - swhToNodeMap.close(); - nodeToSwhMap.close(); - } + /** + * Closes the mapping files. + */ + public void close() throws IOException { + swhToNodeMap.close(); + nodeToSwhMap.close(); + } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/backend/NodeTypesMap.java b/java/server/src/main/java/org/softwareheritage/graph/backend/NodeTypesMap.java index 42669f8..a516c8d 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/backend/NodeTypesMap.java +++ b/java/server/src/main/java/org/softwareheritage/graph/backend/NodeTypesMap.java @@ -1,51 +1,51 @@ package org.softwareheritage.graph.backend; import java.io.IOException; import it.unimi.dsi.fastutil.io.BinIO; import it.unimi.dsi.fastutil.longs.LongBigList; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.Node; /** * Mapping between long node id and SWH node type as described in the data model. *

* The type mapping is pre-computed and dumped on disk in the {@link Setup} class, then it is loaded * in-memory here using fastutil LongBigList. To be * space-efficient, the mapping is stored as a bitmap using minimum number of bits per {@link * Node.Type}. * * @author Thibault Allançon */ public class NodeTypesMap { - /** Array storing for each node its type */ - LongBigList nodeTypesMap; + /** Array storing for each node its type */ + LongBigList nodeTypesMap; - /** - * Constructor. - * - * @param graphPath path and basename of the compressed graph - */ - public NodeTypesMap(String graphPath) throws IOException { - try { - nodeTypesMap = (LongBigList) BinIO.loadObject(graphPath + Graph.NODE_TO_TYPE); - } catch (ClassNotFoundException e) { - throw new IllegalArgumentException("Unknown class object: " + e); + /** + * Constructor. + * + * @param graphPath path and basename of the compressed graph + */ + public NodeTypesMap(String graphPath) throws IOException { + try { + nodeTypesMap = (LongBigList) BinIO.loadObject(graphPath + Graph.NODE_TO_TYPE); + } catch (ClassNotFoundException e) { + throw new IllegalArgumentException("Unknown class object: " + e); + } } - } - /** - * Returns node type from a node long id. - * - * @param nodeId node as a long id - * @return corresponding {@link Node.Type} value - * @see org.softwareheritage.graph.Node.Type - */ - public Node.Type getType(long nodeId) { - long type = nodeTypesMap.getLong(nodeId); - return Node.Type.fromInt((int) type); - } + /** + * Returns node type from a node long id. + * + * @param nodeId node as a long id + * @return corresponding {@link Node.Type} value + * @see org.softwareheritage.graph.Node.Type + */ + public Node.Type getType(long nodeId) { + long type = nodeTypesMap.getLong(nodeId); + return Node.Type.fromInt((int) type); + } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/backend/Setup.java b/java/server/src/main/java/org/softwareheritage/graph/backend/Setup.java index c1d44ec..225de4f 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/backend/Setup.java +++ b/java/server/src/main/java/org/softwareheritage/graph/backend/Setup.java @@ -1,123 +1,123 @@ package org.softwareheritage.graph.backend; import java.io.BufferedWriter; import java.io.FileInputStream; import java.io.FileWriter; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.Writer; import java.util.zip.GZIPInputStream; import it.unimi.dsi.bits.LongArrayBitVector; import it.unimi.dsi.fastutil.Size64; import it.unimi.dsi.fastutil.io.BinIO; import it.unimi.dsi.fastutil.longs.LongBigArrays; import it.unimi.dsi.fastutil.longs.LongBigList; import it.unimi.dsi.fastutil.objects.Object2LongFunction; import it.unimi.dsi.fastutil.objects.ObjectBigArrays; import it.unimi.dsi.io.FastBufferedReader; import it.unimi.dsi.io.LineIterator; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.Node; import org.softwareheritage.graph.SwhPID; import org.softwareheritage.graph.backend.NodeTypesMap; /** * Pre-processing steps (such as dumping mapping files on disk) before running the graph service. * * @author Thibault Allançon */ public class Setup { - /** - * Main entrypoint. - * - * @param args command line arguments - */ - public static void main(String[] args) throws IOException { - if (args.length != 2) { - System.err.println("Expected parameters: "); - System.exit(1); + /** + * Main entrypoint. + * + * @param args command line arguments + */ + public static void main(String[] args) throws IOException { + if (args.length != 2) { + System.err.println("Expected parameters: "); + System.exit(1); + } + + String nodesPath = args[0]; + String graphPath = args[1]; + + System.out.println("Pre-computing node id maps..."); + long startTime = System.nanoTime(); + precomputeNodeIdMap(nodesPath, graphPath); + long endTime = System.nanoTime(); + double duration = (endTime - startTime) / 1_000_000_000; + System.out.println("Done in: " + duration + " seconds"); } - String nodesPath = args[0]; - String graphPath = args[1]; - - System.out.println("Pre-computing node id maps..."); - long startTime = System.nanoTime(); - precomputeNodeIdMap(nodesPath, graphPath); - long endTime = System.nanoTime(); - double duration = (endTime - startTime) / 1_000_000_000; - System.out.println("Done in: " + duration + " seconds"); - } - - /** - * Computes and dumps on disk mapping files. - * - * @param nodesPath path of the compressed csv nodes file - * @param graphPath path of the compressed graph - */ - // Suppress warning for Object2LongFunction cast - @SuppressWarnings("unchecked") - static void precomputeNodeIdMap(String nodesPath, String graphPath) throws IOException { - // First internal mapping: SWH PID (string) -> WebGraph MPH (long) - Object2LongFunction mphMap = null; - try { - mphMap = (Object2LongFunction) BinIO.loadObject(graphPath + ".mph"); - } catch (ClassNotFoundException e) { - throw new IllegalArgumentException("The .mph file contains unknown class object: " + e); + /** + * Computes and dumps on disk mapping files. + * + * @param nodesPath path of the compressed csv nodes file + * @param graphPath path of the compressed graph + */ + // Suppress warning for Object2LongFunction cast + @SuppressWarnings("unchecked") + static void precomputeNodeIdMap(String nodesPath, String graphPath) throws IOException { + // First internal mapping: SWH PID (string) -> WebGraph MPH (long) + Object2LongFunction mphMap = null; + try { + mphMap = (Object2LongFunction) BinIO.loadObject(graphPath + ".mph"); + } catch (ClassNotFoundException e) { + throw new IllegalArgumentException("The .mph file contains unknown class object: " + e); + } + long nbIds = (mphMap instanceof Size64) ? ((Size64) mphMap).size64() : mphMap.size(); + + // Second internal mapping: WebGraph MPH (long) -> BFS ordering (long) + long[][] bfsMap = LongBigArrays.newBigArray(nbIds); + long loaded = BinIO.loadLongs(graphPath + ".order", bfsMap); + if (loaded != nbIds) { + throw new IllegalArgumentException("Graph contains " + nbIds + " nodes, but read " + loaded); + } + + // Dump complete mapping for all nodes: SWH PID (string) <=> WebGraph node id (long) + + InputStream nodesStream = new GZIPInputStream(new FileInputStream(nodesPath)); + FastBufferedReader buffer = new FastBufferedReader(new InputStreamReader(nodesStream, "UTF-8")); + LineIterator swhPIDIterator = new LineIterator(buffer); + + try (Writer swhToNodeMap = new BufferedWriter(new FileWriter(graphPath + Graph.PID_TO_NODE)); + Writer nodeToSwhMap = new BufferedWriter(new FileWriter(graphPath + Graph.NODE_TO_PID))) { + // nodeToSwhMap needs to write SWH PID in order of node id, so use a temporary array + Object[][] nodeToSwhPID = ObjectBigArrays.newBigArray(nbIds); + + // To effectively run edge restriction during graph traversals, we store node id (long) -> SWH + // type map. This is represented as a bitmap using minimum number of bits per Node.Type. + final int log2NbTypes = (int) Math.ceil(Math.log(Node.Type.values().length) / Math.log(2)); + final int nbBitsPerNodeType = log2NbTypes; + LongArrayBitVector nodeTypesBitVector = + LongArrayBitVector.ofLength(nbBitsPerNodeType * nbIds); + LongBigList nodeTypesMap = nodeTypesBitVector.asLongBigList(nbBitsPerNodeType); + + for (long iNode = 0; iNode < nbIds && swhPIDIterator.hasNext(); iNode++) { + String strSwhPID = swhPIDIterator.next().toString(); + long mphId = mphMap.getLong(strSwhPID); + long nodeId = LongBigArrays.get(bfsMap, mphId); + + String paddedNodeId = String.format("%0" + NodeIdMap.NODE_ID_LENGTH + "d", nodeId); + String line = strSwhPID + " " + paddedNodeId + "\n"; + swhToNodeMap.write(line); + + ObjectBigArrays.set(nodeToSwhPID, nodeId, strSwhPID); + + SwhPID swhPID = new SwhPID(strSwhPID); + nodeTypesMap.set(nodeId, swhPID.getType().ordinal()); + } + + BinIO.storeObject(nodeTypesMap, graphPath + Graph.NODE_TO_TYPE); + + for (long iNode = 0; iNode < nbIds; iNode++) { + String line = ObjectBigArrays.get(nodeToSwhPID, iNode).toString() + "\n"; + nodeToSwhMap.write(line); + } + } } - long nbIds = (mphMap instanceof Size64) ? ((Size64) mphMap).size64() : mphMap.size(); - - // Second internal mapping: WebGraph MPH (long) -> BFS ordering (long) - long[][] bfsMap = LongBigArrays.newBigArray(nbIds); - long loaded = BinIO.loadLongs(graphPath + ".order", bfsMap); - if (loaded != nbIds) { - throw new IllegalArgumentException("Graph contains " + nbIds + " nodes, but read " + loaded); - } - - // Dump complete mapping for all nodes: SWH PID (string) <=> WebGraph node id (long) - - InputStream nodesStream = new GZIPInputStream(new FileInputStream(nodesPath)); - FastBufferedReader buffer = new FastBufferedReader(new InputStreamReader(nodesStream, "UTF-8")); - LineIterator swhPIDIterator = new LineIterator(buffer); - - try (Writer swhToNodeMap = new BufferedWriter(new FileWriter(graphPath + Graph.PID_TO_NODE)); - Writer nodeToSwhMap = new BufferedWriter(new FileWriter(graphPath + Graph.NODE_TO_PID))) { - // nodeToSwhMap needs to write SWH PID in order of node id, so use a temporary array - Object[][] nodeToSwhPID = ObjectBigArrays.newBigArray(nbIds); - - // To effectively run edge restriction during graph traversals, we store node id (long) -> SWH - // type map. This is represented as a bitmap using minimum number of bits per Node.Type. - final int log2NbTypes = (int) Math.ceil(Math.log(Node.Type.values().length) / Math.log(2)); - final int nbBitsPerNodeType = log2NbTypes; - LongArrayBitVector nodeTypesBitVector = - LongArrayBitVector.ofLength(nbBitsPerNodeType * nbIds); - LongBigList nodeTypesMap = nodeTypesBitVector.asLongBigList(nbBitsPerNodeType); - - for (long iNode = 0; iNode < nbIds && swhPIDIterator.hasNext(); iNode++) { - String strSwhPID = swhPIDIterator.next().toString(); - long mphId = mphMap.getLong(strSwhPID); - long nodeId = LongBigArrays.get(bfsMap, mphId); - - String paddedNodeId = String.format("%0" + NodeIdMap.NODE_ID_LENGTH + "d", nodeId); - String line = strSwhPID + " " + paddedNodeId + "\n"; - swhToNodeMap.write(line); - - ObjectBigArrays.set(nodeToSwhPID, nodeId, strSwhPID); - - SwhPID swhPID = new SwhPID(strSwhPID); - nodeTypesMap.set(nodeId, swhPID.getType().ordinal()); - } - - BinIO.storeObject(nodeTypesMap, graphPath + Graph.NODE_TO_TYPE); - - for (long iNode = 0; iNode < nbIds; iNode++) { - String line = ObjectBigArrays.get(nodeToSwhPID, iNode).toString() + "\n"; - nodeToSwhMap.write(line); - } - } - } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/benchmark/AccessEdge.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/AccessEdge.java index 84cc498..a5ae040 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/benchmark/AccessEdge.java +++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/AccessEdge.java @@ -1,47 +1,47 @@ package org.softwareheritage.graph.benchmark; import java.io.IOException; import java.util.ArrayList; import com.martiansoftware.jsap.JSAPException; import it.unimi.dsi.big.webgraph.LazyLongIterator; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.benchmark.Benchmark; import org.softwareheritage.graph.benchmark.utils.Statistics; import org.softwareheritage.graph.benchmark.utils.Timing; /** * Benchmark to time edge access time. * * @author Thibault Allançon */ public class AccessEdge { - /** - * Main entrypoint. - * - * @param args command line arguments - */ - public static void main(String[] args) throws IOException, JSAPException { - Benchmark bench = new Benchmark(); - bench.parseCommandLineArgs(args); - - Graph graph = new Graph(bench.args.graphPath); - - long[] nodeIds = bench.args.random.generateNodeIds(graph, bench.args.nbNodes); - - ArrayList timings = new ArrayList<>(); - for (long nodeId : nodeIds) { - long startTime = Timing.start(); - LazyLongIterator neighbors = graph.successors(nodeId); - long firstNeighbor = neighbors.nextLong(); - double duration = Timing.stop(startTime); - timings.add(duration); + /** + * Main entrypoint. + * + * @param args command line arguments + */ + public static void main(String[] args) throws IOException, JSAPException { + Benchmark bench = new Benchmark(); + bench.parseCommandLineArgs(args); + + Graph graph = new Graph(bench.args.graphPath); + + long[] nodeIds = bench.args.random.generateNodeIds(graph, bench.args.nbNodes); + + ArrayList timings = new ArrayList<>(); + for (long nodeId : nodeIds) { + long startTime = Timing.start(); + LazyLongIterator neighbors = graph.successors(nodeId); + long firstNeighbor = neighbors.nextLong(); + double duration = Timing.stop(startTime); + timings.add(duration); + } + + System.out.println("Used " + bench.args.nbNodes + " random edges (results are in seconds):"); + Statistics stats = new Statistics(timings); + stats.printAll(); } - - System.out.println("Used " + bench.args.nbNodes + " random edges (results are in seconds):"); - Statistics stats = new Statistics(timings); - stats.printAll(); - } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/benchmark/Benchmark.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Benchmark.java index bacf160..70fc9e5 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/benchmark/Benchmark.java +++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Benchmark.java @@ -1,170 +1,170 @@ package org.softwareheritage.graph.benchmark; import java.io.BufferedWriter; import java.io.FileWriter; import java.io.IOException; import java.io.Writer; import java.util.ArrayList; import java.util.StringJoiner; import java.util.function.Function; import com.martiansoftware.jsap.FlaggedOption; import com.martiansoftware.jsap.JSAP; import com.martiansoftware.jsap.JSAPException; import com.martiansoftware.jsap.JSAPResult; import com.martiansoftware.jsap.Parameter; import com.martiansoftware.jsap.SimpleJSAP; import com.martiansoftware.jsap.UnflaggedOption; import org.softwareheritage.graph.Endpoint; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.SwhPID; import org.softwareheritage.graph.benchmark.utils.Random; import org.softwareheritage.graph.benchmark.utils.Statistics; /** * Benchmark common utility functions. * * @author Thibault Allançon */ public class Benchmark { - /** - * Input arguments. - */ - public class Args { - /** Basename of the compressed graph */ - public String graphPath; - /** Number of random nodes to use for the benchmark */ - public int nbNodes; - /** File name for CSV format benchmark log */ - public String logFile; - /** Random generator */ - public Random random; - } - - /** Command line arguments */ - public Args args; - /** CSV separator for log file */ - final String CSV_SEPARATOR = ";"; - - /** - * Constructor. - */ - public Benchmark() { - this.args = new Args(); - } - - /** - * Parses benchmark command line arguments. - * - * @param args command line arguments - */ - public void parseCommandLineArgs(String[] args) throws JSAPException { - SimpleJSAP jsap = new SimpleJSAP(Benchmark.class.getName(), - "Benchmark tool for Software Heritage use-cases scenarios.", - new Parameter[] { - new UnflaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, - JSAP.NOT_GREEDY, "The basename of the compressed graph."), - new FlaggedOption("nbNodes", JSAP.INTEGER_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'n', - "nb-nodes", "Number of random nodes used to do the benchmark."), - new FlaggedOption("logFile", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'l', - "log-file", "File name to output CSV format benchmark log."), - new FlaggedOption("seed", JSAP.LONG_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 's', - "seed", "Random generator seed."), - }); - - JSAPResult config = jsap.parse(args); - if (jsap.messagePrinted()) { - System.exit(1); + /** + * Input arguments. + */ + public class Args { + /** Basename of the compressed graph */ + public String graphPath; + /** Number of random nodes to use for the benchmark */ + public int nbNodes; + /** File name for CSV format benchmark log */ + public String logFile; + /** Random generator */ + public Random random; } - this.args.graphPath = config.getString("graphPath"); - this.args.nbNodes = config.getInt("nbNodes"); - this.args.logFile = config.getString("logFile"); - this.args.random = config.contains("seed") ? new Random(config.getLong("seed")) : new Random(); - } - - /** - * Creates CSV file for log output. - */ - public void createCSVLogFile() throws IOException { - try (Writer csvLog = new BufferedWriter(new FileWriter(args.logFile))) { - StringJoiner csvHeader = new StringJoiner(CSV_SEPARATOR); - csvHeader.add("use case name") - .add("SWH PID") - .add("number of edges accessed") - .add("traversal timing") - .add("pid2node timing") - .add("node2pid timing"); - csvLog.write(csvHeader.toString() + "\n"); + /** Command line arguments */ + public Args args; + /** CSV separator for log file */ + final String CSV_SEPARATOR = ";"; + + /** + * Constructor. + */ + public Benchmark() { + this.args = new Args(); + } + + /** + * Parses benchmark command line arguments. + * + * @param args command line arguments + */ + public void parseCommandLineArgs(String[] args) throws JSAPException { + SimpleJSAP jsap = new SimpleJSAP(Benchmark.class.getName(), + "Benchmark tool for Software Heritage use-cases scenarios.", + new Parameter[] { + new UnflaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, + JSAP.NOT_GREEDY, "The basename of the compressed graph."), + new FlaggedOption("nbNodes", JSAP.INTEGER_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'n', + "nb-nodes", "Number of random nodes used to do the benchmark."), + new FlaggedOption("logFile", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'l', + "log-file", "File name to output CSV format benchmark log."), + new FlaggedOption("seed", JSAP.LONG_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 's', + "seed", "Random generator seed."), + }); + + JSAPResult config = jsap.parse(args); + if (jsap.messagePrinted()) { + System.exit(1); + } + + this.args.graphPath = config.getString("graphPath"); + this.args.nbNodes = config.getInt("nbNodes"); + this.args.logFile = config.getString("logFile"); + this.args.random = config.contains("seed") ? new Random(config.getLong("seed")) : new Random(); } - } - - /** - * Times a specific endpoint and outputs individual datapoints along with aggregated statistics. - * - * @param useCaseName benchmark use-case name - * @param graph compressed graph used in the benchmark - * @param nodeIds node ids to use as starting point for the endpoint traversal - * @param operation endpoint function to benchmark - * @param dstFmt destination formatted string as described in the API - * @param algorithm traversal algorithm used in endpoint call (either "dfs" or "bfs") - */ - public void timeEndpoint(String useCaseName, Graph graph, long[] nodeIds, - Function operation, String dstFmt, String algorithm) - throws IOException { - ArrayList timings = new ArrayList<>(); - ArrayList timingsNormalized = new ArrayList<>(); - ArrayList nbEdgesAccessed = new ArrayList<>(); - - final boolean append = true; - try (Writer csvLog = new BufferedWriter(new FileWriter(args.logFile, append))) { - for (long nodeId : nodeIds) { - SwhPID swhPID = graph.getSwhPID(nodeId); - - Endpoint.Output output = (dstFmt == null) - ? operation.apply(new Endpoint.Input(swhPID)) - : operation.apply(new Endpoint.Input(swhPID, dstFmt, algorithm)); - - StringJoiner csvLine = new StringJoiner(CSV_SEPARATOR); - csvLine.add(useCaseName) - .add(swhPID.toString()) - .add(Long.toString(output.meta.nbEdgesAccessed)) - .add(Double.toString(output.meta.timings.traversal)) - .add(Double.toString(output.meta.timings.pid2node)) - .add(Double.toString(output.meta.timings.node2pid)); - csvLog.write(csvLine.toString() + "\n"); - - timings.add(output.meta.timings.traversal); - nbEdgesAccessed.add((double) output.meta.nbEdgesAccessed); - if (output.meta.nbEdgesAccessed != 0) { - timingsNormalized.add(output.meta.timings.traversal / output.meta.nbEdgesAccessed); + + /** + * Creates CSV file for log output. + */ + public void createCSVLogFile() throws IOException { + try (Writer csvLog = new BufferedWriter(new FileWriter(args.logFile))) { + StringJoiner csvHeader = new StringJoiner(CSV_SEPARATOR); + csvHeader.add("use case name") + .add("SWH PID") + .add("number of edges accessed") + .add("traversal timing") + .add("pid2node timing") + .add("node2pid timing"); + csvLog.write(csvHeader.toString() + "\n"); } - } } - System.out.println("\n" + useCaseName + " use-case:"); + /** + * Times a specific endpoint and outputs individual datapoints along with aggregated statistics. + * + * @param useCaseName benchmark use-case name + * @param graph compressed graph used in the benchmark + * @param nodeIds node ids to use as starting point for the endpoint traversal + * @param operation endpoint function to benchmark + * @param dstFmt destination formatted string as described in the API + * @param algorithm traversal algorithm used in endpoint call (either "dfs" or "bfs") + */ + public void timeEndpoint(String useCaseName, Graph graph, long[] nodeIds, + Function operation, String dstFmt, String algorithm) + throws IOException { + ArrayList timings = new ArrayList<>(); + ArrayList timingsNormalized = new ArrayList<>(); + ArrayList nbEdgesAccessed = new ArrayList<>(); + + final boolean append = true; + try (Writer csvLog = new BufferedWriter(new FileWriter(args.logFile, append))) { + for (long nodeId : nodeIds) { + SwhPID swhPID = graph.getSwhPID(nodeId); + + Endpoint.Output output = (dstFmt == null) + ? operation.apply(new Endpoint.Input(swhPID)) + : operation.apply(new Endpoint.Input(swhPID, dstFmt, algorithm)); + + StringJoiner csvLine = new StringJoiner(CSV_SEPARATOR); + csvLine.add(useCaseName) + .add(swhPID.toString()) + .add(Long.toString(output.meta.nbEdgesAccessed)) + .add(Double.toString(output.meta.timings.traversal)) + .add(Double.toString(output.meta.timings.pid2node)) + .add(Double.toString(output.meta.timings.node2pid)); + csvLog.write(csvLine.toString() + "\n"); + + timings.add(output.meta.timings.traversal); + nbEdgesAccessed.add((double) output.meta.nbEdgesAccessed); + if (output.meta.nbEdgesAccessed != 0) { + timingsNormalized.add(output.meta.timings.traversal / output.meta.nbEdgesAccessed); + } + } + } - System.out.println("timings:"); - Statistics stats = new Statistics(timings); - stats.printAll(); + System.out.println("\n" + useCaseName + " use-case:"); - System.out.println("timings normalized:"); - Statistics statsNormalized = new Statistics(timingsNormalized); - statsNormalized.printAll(); + System.out.println("timings:"); + Statistics stats = new Statistics(timings); + stats.printAll(); - System.out.println("nb edges accessed:"); - Statistics statsNbEdgesAccessed = new Statistics(nbEdgesAccessed); - statsNbEdgesAccessed.printAll(); - } + System.out.println("timings normalized:"); + Statistics statsNormalized = new Statistics(timingsNormalized); + statsNormalized.printAll(); - /** - * Same as {@link timeEndpoint} but without destination or algorithm specified to endpoint call. - */ - public void timeEndpoint(String useCaseName, Graph graph, long[] nodeIds, - Function operation) throws IOException { - timeEndpoint(useCaseName, graph, nodeIds, operation, null, null); - } + System.out.println("nb edges accessed:"); + Statistics statsNbEdgesAccessed = new Statistics(nbEdgesAccessed); + statsNbEdgesAccessed.printAll(); + } + + /** + * Same as {@link timeEndpoint} but without destination or algorithm specified to endpoint call. + */ + public void timeEndpoint(String useCaseName, Graph graph, long[] nodeIds, + Function operation) throws IOException { + timeEndpoint(useCaseName, graph, nodeIds, operation, null, null); + } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java index 45f699e..5939e1c 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java +++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java @@ -1,46 +1,46 @@ package org.softwareheritage.graph.benchmark; import java.io.IOException; import com.martiansoftware.jsap.JSAPException; import org.softwareheritage.graph.Endpoint; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.Node; import org.softwareheritage.graph.benchmark.Benchmark; /** * Benchmark Software Heritage browsing * use-cases scenarios. * * @author Thibault Allançon */ public class Browsing { - /** - * Main entrypoint. - * - * @param args command line arguments - */ - public static void main(String[] args) throws IOException, JSAPException { - Benchmark bench = new Benchmark(); - bench.parseCommandLineArgs(args); - - Graph graph = new Graph(bench.args.graphPath); - - long[] dirNodeIds = - bench.args.random.generateNodeIdsOfType(graph, bench.args.nbNodes, Node.Type.DIR); - long[] revNodeIds = - bench.args.random.generateNodeIdsOfType(graph, bench.args.nbNodes, Node.Type.REV); - - Endpoint dirEndpoint = new Endpoint(graph, "forward", "dir:cnt,dir:dir"); - Endpoint revEndpoint = new Endpoint(graph, "forward", "rev:rev"); - - System.out.println("Used " + bench.args.nbNodes + " random nodes (results are in seconds):"); - bench.createCSVLogFile(); - bench.timeEndpoint("ls", graph, dirNodeIds, dirEndpoint::neighbors); - bench.timeEndpoint("ls -R", graph, dirNodeIds, dirEndpoint::visitPaths); - bench.timeEndpoint("git log", graph, revNodeIds, revEndpoint::visitNodes); - } + /** + * Main entrypoint. + * + * @param args command line arguments + */ + public static void main(String[] args) throws IOException, JSAPException { + Benchmark bench = new Benchmark(); + bench.parseCommandLineArgs(args); + + Graph graph = new Graph(bench.args.graphPath); + + long[] dirNodeIds = + bench.args.random.generateNodeIdsOfType(graph, bench.args.nbNodes, Node.Type.DIR); + long[] revNodeIds = + bench.args.random.generateNodeIdsOfType(graph, bench.args.nbNodes, Node.Type.REV); + + Endpoint dirEndpoint = new Endpoint(graph, "forward", "dir:cnt,dir:dir"); + Endpoint revEndpoint = new Endpoint(graph, "forward", "rev:rev"); + + System.out.println("Used " + bench.args.nbNodes + " random nodes (results are in seconds):"); + bench.createCSVLogFile(); + bench.timeEndpoint("ls", graph, dirNodeIds, dirEndpoint::neighbors); + bench.timeEndpoint("ls -R", graph, dirNodeIds, dirEndpoint::visitPaths); + bench.timeEndpoint("git log", graph, revNodeIds, revEndpoint::visitNodes); + } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/benchmark/Provenance.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Provenance.java index e53b4bf..9fa4f7d 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/benchmark/Provenance.java +++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Provenance.java @@ -1,53 +1,53 @@ package org.softwareheritage.graph.benchmark; import java.io.IOException; import com.martiansoftware.jsap.JSAPException; import org.softwareheritage.graph.Endpoint; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.benchmark.Benchmark; /** * Benchmark Software Heritage provenance * use-cases scenarios. * * @author Thibault Allançon */ public class Provenance { - /** - * Main entrypoint. - * - * @param args command line arguments - */ - public static void main(String[] args) throws IOException, JSAPException { - Benchmark bench = new Benchmark(); - bench.parseCommandLineArgs(args); - - Graph graph = new Graph(bench.args.graphPath); - - long[] nodeIds = bench.args.random.generateNodeIds(graph, bench.args.nbNodes); - - Endpoint commitProvenanceEndpoint = new Endpoint(graph, "backward", "dir:dir,cnt:dir,dir:rev"); - Endpoint originProvenanceEndpoint = new Endpoint(graph, "backward", "*"); - - System.out.println("Used " + bench.args.nbNodes + " random nodes (results are in seconds):"); - bench.createCSVLogFile(); - - bench.timeEndpoint( - "commit provenance (dfs)", graph, nodeIds, commitProvenanceEndpoint::walk, "rev", "dfs"); - bench.timeEndpoint( - "commit provenance (bfs)", graph, nodeIds, commitProvenanceEndpoint::walk, "rev", "bfs"); - bench.timeEndpoint( - "complete commit provenance", graph, nodeIds, commitProvenanceEndpoint::leaves); - - bench.timeEndpoint( - "origin provenance (dfs)", graph, nodeIds, originProvenanceEndpoint::walk, "ori", "dfs"); - bench.timeEndpoint( - "origin provenance (bfs)", graph, nodeIds, originProvenanceEndpoint::walk, "ori", "bfs"); - bench.timeEndpoint( - "complete origin provenance", graph, nodeIds, originProvenanceEndpoint::leaves); - } + /** + * Main entrypoint. + * + * @param args command line arguments + */ + public static void main(String[] args) throws IOException, JSAPException { + Benchmark bench = new Benchmark(); + bench.parseCommandLineArgs(args); + + Graph graph = new Graph(bench.args.graphPath); + + long[] nodeIds = bench.args.random.generateNodeIds(graph, bench.args.nbNodes); + + Endpoint commitProvenanceEndpoint = new Endpoint(graph, "backward", "dir:dir,cnt:dir,dir:rev"); + Endpoint originProvenanceEndpoint = new Endpoint(graph, "backward", "*"); + + System.out.println("Used " + bench.args.nbNodes + " random nodes (results are in seconds):"); + bench.createCSVLogFile(); + + bench.timeEndpoint( + "commit provenance (dfs)", graph, nodeIds, commitProvenanceEndpoint::walk, "rev", "dfs"); + bench.timeEndpoint( + "commit provenance (bfs)", graph, nodeIds, commitProvenanceEndpoint::walk, "rev", "bfs"); + bench.timeEndpoint( + "complete commit provenance", graph, nodeIds, commitProvenanceEndpoint::leaves); + + bench.timeEndpoint( + "origin provenance (dfs)", graph, nodeIds, originProvenanceEndpoint::walk, "ori", "dfs"); + bench.timeEndpoint( + "origin provenance (bfs)", graph, nodeIds, originProvenanceEndpoint::walk, "ori", "bfs"); + bench.timeEndpoint( + "complete origin provenance", graph, nodeIds, originProvenanceEndpoint::leaves); + } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/benchmark/Vault.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Vault.java index cd08b57..c840ac9 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/benchmark/Vault.java +++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Vault.java @@ -1,39 +1,39 @@ package org.softwareheritage.graph.benchmark; import java.io.IOException; import com.martiansoftware.jsap.JSAPException; import org.softwareheritage.graph.Endpoint; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.benchmark.Benchmark; /** * Benchmark Software Heritage vault * use-case scenario. * * @author Thibault Allançon */ public class Vault { - /** - * Main entrypoint. - * - * @param args command line arguments - */ - public static void main(String[] args) throws IOException, JSAPException { - Benchmark bench = new Benchmark(); - bench.parseCommandLineArgs(args); + /** + * Main entrypoint. + * + * @param args command line arguments + */ + public static void main(String[] args) throws IOException, JSAPException { + Benchmark bench = new Benchmark(); + bench.parseCommandLineArgs(args); - Graph graph = new Graph(bench.args.graphPath); + Graph graph = new Graph(bench.args.graphPath); - long[] nodeIds = bench.args.random.generateNodeIds(graph, bench.args.nbNodes); + long[] nodeIds = bench.args.random.generateNodeIds(graph, bench.args.nbNodes); - Endpoint endpoint = new Endpoint(graph, "forward", "*"); + Endpoint endpoint = new Endpoint(graph, "forward", "*"); - System.out.println("Used " + bench.args.nbNodes + " random nodes (results are in seconds):"); - bench.createCSVLogFile(); - bench.timeEndpoint("git bundle", graph, nodeIds, endpoint::visitNodes); - } + System.out.println("Used " + bench.args.nbNodes + " random nodes (results are in seconds):"); + bench.createCSVLogFile(); + bench.timeEndpoint("git bundle", graph, nodeIds, endpoint::visitNodes); + } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/benchmark/utils/Random.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/utils/Random.java index 633a8ee..bab2fb8 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/benchmark/utils/Random.java +++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/utils/Random.java @@ -1,67 +1,67 @@ package org.softwareheritage.graph.benchmark.utils; import java.util.PrimitiveIterator; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.Node; /** * Random related utility class. * * @author Thibault Allançon */ public class Random { - /** Internal pseudorandom generator */ - java.util.Random random; + /** Internal pseudorandom generator */ + java.util.Random random; - /** - * Constructor. - */ - public Random() { - this.random = new java.util.Random(); - } + /** + * Constructor. + */ + public Random() { + this.random = new java.util.Random(); + } - /** - * Constructor. - * - * @param seed random generator seed - */ - public Random(long seed) { - this.random = new java.util.Random(seed); - } + /** + * Constructor. + * + * @param seed random generator seed + */ + public Random(long seed) { + this.random = new java.util.Random(seed); + } - /** - * Generates random node ids. - * - * @param graph graph used to pick node ids - * @param nbNodes number of node ids to generate - * @return an array of random node ids - */ - public long[] generateNodeIds(Graph graph, int nbNodes) { - return random.longs(nbNodes, 0, graph.getNbNodes()).toArray(); - } + /** + * Generates random node ids. + * + * @param graph graph used to pick node ids + * @param nbNodes number of node ids to generate + * @return an array of random node ids + */ + public long[] generateNodeIds(Graph graph, int nbNodes) { + return random.longs(nbNodes, 0, graph.getNbNodes()).toArray(); + } - /** - * Generates random node ids with a specific type. - * - * @param graph graph used to pick node ids - * @param nbNodes number of node ids to generate - * @param expectedType specific node type to pick - * @return an array of random node ids - */ - public long[] generateNodeIdsOfType(Graph graph, int nbNodes, Node.Type expectedType) { - PrimitiveIterator.OfLong nodes = random.longs(0, graph.getNbNodes()).iterator(); - long[] nodeIds = new long[nbNodes]; + /** + * Generates random node ids with a specific type. + * + * @param graph graph used to pick node ids + * @param nbNodes number of node ids to generate + * @param expectedType specific node type to pick + * @return an array of random node ids + */ + public long[] generateNodeIdsOfType(Graph graph, int nbNodes, Node.Type expectedType) { + PrimitiveIterator.OfLong nodes = random.longs(0, graph.getNbNodes()).iterator(); + long[] nodeIds = new long[nbNodes]; - long nextId; - for (int i = 0; i < nbNodes; i++) { - do { - nextId = nodes.nextLong(); - } while (graph.getNodeType(nextId) != expectedType); - nodeIds[i] = nextId; - } + long nextId; + for (int i = 0; i < nbNodes; i++) { + do { + nextId = nodes.nextLong(); + } while (graph.getNodeType(nextId) != expectedType); + nodeIds[i] = nextId; + } - return nodeIds; - } + return nodeIds; + } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/benchmark/utils/Statistics.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/utils/Statistics.java index 33e341e..71ac8d3 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/benchmark/utils/Statistics.java +++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/utils/Statistics.java @@ -1,104 +1,104 @@ package org.softwareheritage.graph.benchmark.utils; import java.util.ArrayList; import java.util.Collections; /** * Compute various statistics on a list of values. * * @author Thibault Allançon */ public class Statistics { - /** Input values */ - ArrayList values; + /** Input values */ + ArrayList values; - /** - * Constructor. - * - * @param values input values - */ - public Statistics(ArrayList values) { - this.values = values; - } + /** + * Constructor. + * + * @param values input values + */ + public Statistics(ArrayList values) { + this.values = values; + } - /** - * Returns the minimum value. - * - * @return minimum value - */ - public double getMin() { - double min = Double.POSITIVE_INFINITY; - for (double v : values) { - min = Math.min(min, v); + /** + * Returns the minimum value. + * + * @return minimum value + */ + public double getMin() { + double min = Double.POSITIVE_INFINITY; + for (double v : values) { + min = Math.min(min, v); + } + return min; } - return min; - } - /** - * Returns the maximum value. - * - * @return maximum value - */ - public double getMax() { - double max = Double.NEGATIVE_INFINITY; - for (double v : values) { - max = Math.max(max, v); + /** + * Returns the maximum value. + * + * @return maximum value + */ + public double getMax() { + double max = Double.NEGATIVE_INFINITY; + for (double v : values) { + max = Math.max(max, v); + } + return max; } - return max; - } - /** - * Computes the average. - * - * @return average value - */ - public double getAverage() { - double sum = 0; - for (double v : values) { - sum += v; + /** + * Computes the average. + * + * @return average value + */ + public double getAverage() { + double sum = 0; + for (double v : values) { + sum += v; + } + return sum / (double) values.size(); } - return sum / (double) values.size(); - } - /** - * Returns the median value. - * - * @return median value - */ - public double getMedian() { - Collections.sort(values); - int length = values.size(); - if (length % 2 == 0) { - return (values.get(length / 2) + values.get(length / 2 - 1)) / 2; - } else { - return values.get(length / 2); + /** + * Returns the median value. + * + * @return median value + */ + public double getMedian() { + Collections.sort(values); + int length = values.size(); + if (length % 2 == 0) { + return (values.get(length / 2) + values.get(length / 2 - 1)) / 2; + } else { + return values.get(length / 2); + } } - } - /** - * Computes the standard deviation. - * - * @return standard deviation value - */ - public double getStandardDeviation() { - double average = getAverage(); - double variance = 0; - for (double v : values) { - variance += (v - average) * (v - average); + /** + * Computes the standard deviation. + * + * @return standard deviation value + */ + public double getStandardDeviation() { + double average = getAverage(); + double variance = 0; + for (double v : values) { + variance += (v - average) * (v - average); + } + variance /= (double) values.size(); + return Math.sqrt(variance); } - variance /= (double) values.size(); - return Math.sqrt(variance); - } - /** - * Computes and prints all statistical values. - */ - public void printAll() { - System.out.println("min value: " + getMin()); - System.out.println("max value: " + getMax()); - System.out.println("average: " + getAverage()); - System.out.println("median: " + getMedian()); - System.out.println("standard deviation: " + getStandardDeviation()); - } + /** + * Computes and prints all statistical values. + */ + public void printAll() { + System.out.println("min value: " + getMin()); + System.out.println("max value: " + getMax()); + System.out.println("average: " + getAverage()); + System.out.println("median: " + getMedian()); + System.out.println("standard deviation: " + getStandardDeviation()); + } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/benchmark/utils/Timing.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/utils/Timing.java index f07b9f0..f5ed8e1 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/benchmark/utils/Timing.java +++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/utils/Timing.java @@ -1,30 +1,30 @@ package org.softwareheritage.graph.benchmark.utils; /** * Time measurement utility class. * * @author Thibault Allançon */ public class Timing { - /** - * Returns measurement starting timestamp. - * - * @return timestamp used for time measurement - */ - public static long start() { - return System.nanoTime(); - } + /** + * Returns measurement starting timestamp. + * + * @return timestamp used for time measurement + */ + public static long start() { + return System.nanoTime(); + } - /** - * Ends timing measurement and returns total duration in seconds. - * - * @param startTime measurement starting timestamp - * @return time in seconds elapsed since starting point - */ - public static double stop(long startTime) { - long endTime = System.nanoTime(); - double duration = (double) (endTime - startTime) / 1_000_000_000; - return duration; - } + /** + * Ends timing measurement and returns total duration in seconds. + * + * @param startTime measurement starting timestamp + * @return time in seconds elapsed since starting point + */ + public static double stop(long startTime) { + long endTime = System.nanoTime(); + double duration = (double) (endTime - startTime) / 1_000_000_000; + return duration; + } }