diff --git a/java/server/README.md b/java/server/README.md index 858e926..0323461 100644 --- a/java/server/README.md +++ b/java/server/README.md @@ -1,48 +1,48 @@ Graph service - Server side =========================== Server side Java REST API. Build ----- ```bash $ mvn compile assembly:single ``` Start REST API -------------- ```bash -$ java -cp target/swh-graph-1.0-jar-with-dependencies.jar \ +$ java -cp target/swh-graph-0.0.1-jar-with-dependencies.jar \ org.softwareheritage.graph.App \ ``` Default port is 5009. Tests ----- Unit tests rely on test data that are already available in the Git repository (under `src/test/dataset/`). You generally only need to run them using Maven: ```bash $ mvn test ``` In case you want to regenerate the test data: ```bash # Graph compression $ cd src/test/dataset $ ./generate_graph.sh $ cd ../../../ $ mvn compile assembly:single # Dump mapping files -$ java -cp target/swh-graph-1.0-jar-with-dependencies.jar \ +$ java -cp target/swh-graph-0.0.1-jar-with-dependencies.jar \ org.softwareheritage.graph.backend.Setup \ src/test/dataset/example.nodes.csv.gz \ src/test/dataset/output/example ``` diff --git a/java/server/pom.xml b/java/server/pom.xml index 544be2f..bd06957 100644 --- a/java/server/pom.xml +++ b/java/server/pom.xml @@ -1,151 +1,151 @@ 4.0.0 org.softwareheritage.graph swh-graph - 1.0 + 0.0.1 swh-graph https://www.softwareheritage.org/ UTF-8 11 ch.qos.logback logback-classic 1.2.3 junit junit 4.11 test org.hamcrest hamcrest 2.1 test io.javalin javalin 3.0.0 org.slf4j slf4j-simple 1.7.26 com.fasterxml.jackson.core jackson-databind 2.9.8 it.unimi.dsi webgraph-big 3.5.1 it.unimi.dsi fastutil 8.2.2 com.martiansoftware jsap 2.1 maven-clean-plugin 3.1.0 maven-resources-plugin 3.0.2 maven-compiler-plugin 3.8.0 -verbose -Xlint:all maven-surefire-plugin 2.22.1 maven-jar-plugin 3.0.2 maven-install-plugin 2.5.2 maven-deploy-plugin 2.8.2 maven-site-plugin 3.7.1 maven-project-info-reports-plugin 3.0.0 maven-assembly-plugin org.softwareheritage.graph.App jar-with-dependencies make-assembly package single org.apache.maven.plugins maven-javadoc-plugin 3.1.1 diff --git a/java/server/src/main/java/org/softwareheritage/graph/AllowedEdges.java b/java/server/src/main/java/org/softwareheritage/graph/AllowedEdges.java index 407324c..08f7dd4 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/AllowedEdges.java +++ b/java/server/src/main/java/org/softwareheritage/graph/AllowedEdges.java @@ -1,88 +1,88 @@ package org.softwareheritage.graph; import java.util.ArrayList; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.Node; /** * Edge restriction based on node types, used when visiting the graph. * * @author Thibault Allançon - * @version 1.0 - * @since 1.0 + * @version 0.0.1 + * @since 0.0.1 */ public class AllowedEdges { /** Graph on which edge restriction is performed */ Graph graph; /** * 2D boolean matrix storing access rights for all combination of src/dst node types (first * dimension is source, second dimension is destination), when edge restriction is not enforced * this array is set to null for early bypass. */ public boolean[][] restrictedTo; /** * Constructor. * * @param graph the graph on which to perform edge restriction * @param edgesFmt a formatted string describing allowed edges (TODO: link API doc) */ public AllowedEdges(Graph graph, String edgesFmt) { this.graph = graph; int nbNodeTypes = Node.Type.values().length; this.restrictedTo = new boolean[nbNodeTypes][nbNodeTypes]; // Special values (null, empty, "*") if (edgesFmt == null || edgesFmt.isEmpty()) { return; } if (edgesFmt.equals("*")) { // Allows for quick bypass (with simple null check) when no edge restriction restrictedTo = null; return; } // Format: "src1:dst1,src2:dst2,[...]" // TODO: link API doc String[] edgeTypes = edgesFmt.split(","); for (String edgeType : edgeTypes) { String[] nodeTypes = edgeType.split(":"); if (nodeTypes.length != 2) { throw new IllegalArgumentException("Cannot parse edge type: " + edgeType); } ArrayList srcTypes = Node.Type.parse(nodeTypes[0]); ArrayList dstTypes = Node.Type.parse(nodeTypes[1]); for (Node.Type srcType : srcTypes) { for (Node.Type dstType : dstTypes) { restrictedTo[srcType.ordinal()][dstType.ordinal()] = true; } } } } /** * Checks if a given edge can be followed during graph traversal. * * @param srcNodeId edge source node * @param dstNodeId edge destination node * @return true if allowed and false otherwise */ public boolean isAllowed(long srcNodeId, long dstNodeId) { Node.Type srcType = graph.getNodeType(srcNodeId); Node.Type dstType = graph.getNodeType(dstNodeId); return restrictedTo[srcType.ordinal()][dstType.ordinal()]; } /** * Works like {@link AllowedEdges#isAllowed(long, long)} but with node types directly instead of * node ids. * * @see AllowedEdges#isAllowed(long, long) */ public boolean isAllowed(Node.Type srcType, Node.Type dstType) { return restrictedTo[srcType.ordinal()][dstType.ordinal()]; } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/App.java b/java/server/src/main/java/org/softwareheritage/graph/App.java index 654597c..29f9774 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/App.java +++ b/java/server/src/main/java/org/softwareheritage/graph/App.java @@ -1,151 +1,151 @@ package org.softwareheritage.graph; import java.io.IOException; import java.util.List; import java.util.Map; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.PropertyNamingStrategy; import com.martiansoftware.jsap.FlaggedOption; import com.martiansoftware.jsap.JSAP; import com.martiansoftware.jsap.JSAPException; import com.martiansoftware.jsap.JSAPResult; import com.martiansoftware.jsap.Parameter; import com.martiansoftware.jsap.SimpleJSAP; import com.martiansoftware.jsap.UnflaggedOption; import io.javalin.Javalin; import io.javalin.http.Context; import io.javalin.plugin.json.JavalinJackson; import org.softwareheritage.graph.Endpoint; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.SwhId; import org.softwareheritage.graph.algo.Stats; /** * Entrypoint of the swh-graph server REST API. * * @author Thibault Allançon - * @version 1.0 - * @since 1.0 + * @version 0.0.1 + * @since 0.0.1 */ public class App { public static void main(String[] args) throws IOException, JSAPException { SimpleJSAP jsap = new SimpleJSAP( App.class.getName(), "Server to load and query a compressed graph representation of Software Heritage archive.", new Parameter[] { new FlaggedOption("port", JSAP.INTEGER_PARSER, "5009", JSAP.NOT_REQUIRED, 'p', "port", "Binding port of the server."), new UnflaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The basename of the compressed graph."), } ); JSAPResult config = jsap.parse(args); if (jsap.messagePrinted()) { System.exit(1); } String graphPath = config.getString("graphPath"); int port = config.getInt("port"); startServer(graphPath, port); } private static void startServer(String graphPath, int port) throws IOException { Graph graph = new Graph(graphPath); Stats stats = new Stats(graphPath); // Clean up on exit Runtime.getRuntime().addShutdownHook(new Thread() { public void run() { try { graph.cleanUp(); } catch (IOException e) { System.out.println("Could not clean up graph on exit: " + e); } } }); // Configure Jackson JSON to use snake case naming style ObjectMapper objectMapper = JavalinJackson.getObjectMapper(); objectMapper.setPropertyNamingStrategy(PropertyNamingStrategy.SNAKE_CASE); JavalinJackson.configure(objectMapper); Javalin app = Javalin.create().start(port); app.before("/stats/*", ctx -> { checkQueryStrings(ctx, ""); }); app.before("/leaves/*", ctx -> { checkQueryStrings(ctx, "direction|edges"); }); app.before("/neighbors/*", ctx -> { checkQueryStrings(ctx, "direction|edges"); }); app.before("/visit/*", ctx -> { checkQueryStrings(ctx, "direction|edges"); }); app.before("/walk/*", ctx -> { checkQueryStrings(ctx, "direction|edges|traversal"); }); app.get("/stats/", ctx -> { ctx.json(stats); }); // Graph traversal endpoints // By default the traversal is a forward DFS using all edges app.get("/leaves/:src", ctx -> { SwhId src = new SwhId(ctx.pathParam("src")); String direction = ctx.queryParam("direction", "forward"); String edgesFmt = ctx.queryParam("edges", "*"); Endpoint endpoint = new Endpoint(graph, direction, edgesFmt); ctx.json(endpoint.leaves(src)); }); app.get("/neighbors/:src", ctx -> { SwhId src = new SwhId(ctx.pathParam("src")); String direction = ctx.queryParam("direction", "forward"); String edgesFmt = ctx.queryParam("edges", "*"); Endpoint endpoint = new Endpoint(graph, direction, edgesFmt); ctx.json(endpoint.neighbors(src)); }); app.get("/visit/nodes/:src", ctx -> { SwhId src = new SwhId(ctx.pathParam("src")); String direction = ctx.queryParam("direction", "forward"); String edgesFmt = ctx.queryParam("edges", "*"); Endpoint endpoint = new Endpoint(graph, direction, edgesFmt); ctx.json(endpoint.visitNodes(src)); }); app.get("/visit/paths/:src", ctx -> { SwhId src = new SwhId(ctx.pathParam("src")); String direction = ctx.queryParam("direction", "forward"); String edgesFmt = ctx.queryParam("edges", "*"); Endpoint endpoint = new Endpoint(graph, direction, edgesFmt); ctx.json(endpoint.visitPaths(src)); }); app.get("/walk/:src/:dst", ctx -> { SwhId src = new SwhId(ctx.pathParam("src")); String dstFmt = ctx.pathParam("dst"); String direction = ctx.queryParam("direction", "forward"); String edgesFmt = ctx.queryParam("edges", "*"); String algorithm = ctx.queryParam("traversal", "dfs"); Endpoint endpoint = new Endpoint(graph, direction, edgesFmt); ctx.json(endpoint.walk(src, dstFmt, algorithm)); }); app.exception(IllegalArgumentException.class, (e, ctx) -> { ctx.status(400); ctx.result(e.getMessage()); }); } private static void checkQueryStrings(Context ctx, String allowedFmt) { Map> queryParamMap = ctx.queryParamMap(); for (String key : queryParamMap.keySet()) { if (!key.matches(allowedFmt)) { throw new IllegalArgumentException("Unknown query string: " + key); } } } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/Endpoint.java b/java/server/src/main/java/org/softwareheritage/graph/Endpoint.java index 35939b4..a32fc35 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/Endpoint.java +++ b/java/server/src/main/java/org/softwareheritage/graph/Endpoint.java @@ -1,230 +1,230 @@ package org.softwareheritage.graph; import java.util.ArrayList; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.SwhId; import org.softwareheritage.graph.SwhPath; import org.softwareheritage.graph.algo.Traversal; /** * REST API endpoints wrapper functions. * * @author Thibault Allançon - * @version 1.0 - * @since 1.0 + * @version 0.0.1 + * @since 0.0.1 */ public class Endpoint { /** Graph where traversal endpoint is performed */ Graph graph; /** Internal traversal API */ Traversal traversal; /** Timings logger */ private static final Logger logger = LoggerFactory.getLogger(Endpoint.class); /** * Constructor. * * @param graph the graph used for traversal endpoint * @param direction a string (either "forward" or "backward") specifying edge orientation * @param edgesFmt a formatted string describing allowed edges (TODO: link API doc) */ public Endpoint(Graph graph, String direction, String edgesFmt) { this.graph = graph; this.traversal = new Traversal(graph, direction, edgesFmt); } /** * Returns measurement starting timestamp for logging purposes. * * @return timestamp used for time measurement */ private long startTiming() { return System.nanoTime(); } /** * Ends timing measurement and returns total duration in seconds for logging purposes. * * @param startTime measurement starting timestamp * @return time in seconds elapsed since starting point */ private float stopTiming(long startTime) { long endTime = System.nanoTime(); float duration = (float) (endTime - startTime) / 1_000_000_000; return duration; } /** * Converts a list of (internal) long node ids to a list of corresponding (external) SWH PIDs. * * @param nodeIds the list of long node ids * @return a list of corresponding SWH PIDs */ private ArrayList convertNodesToSwhIds(ArrayList nodeIds) { long startTime = this.startTiming(); ArrayList swhIds = new ArrayList<>(); for (long nodeId : nodeIds) { swhIds.add(graph.getSwhId(nodeId)); } float duration = this.stopTiming(startTime); logger.debug("convertNodesToSwhIds() took {} s.", duration); return swhIds; } /** * Converts a list of (internal) long node ids to the corresponding {@link SwhPath}. * * @param nodeIds the list of long node ids * @return the corresponding {@link SwhPath} * @see org.softwareheritage.graph.SwhPath */ private SwhPath convertNodesToSwhPath(ArrayList nodeIds) { long startTime = this.startTiming(); SwhPath path = new SwhPath(); for (long nodeId : nodeIds) { path.add(graph.getSwhId(nodeId)); } float duration = this.stopTiming(startTime); logger.debug("convertNodesToSwhPath() took {} s.", duration); return path; } /** * Converts a list of paths made of (internal) long node ids to one made of {@link SwhPath}-s. * * @param pathsNodeId the list of paths with long node ids * @return a list of corresponding {@link SwhPath} * @see org.softwareheritage.graph.SwhPath */ private ArrayList convertPathsToSwhIds(ArrayList> pathsNodeId) { long startTime = this.startTiming(); ArrayList paths = new ArrayList<>(); for (ArrayList path : pathsNodeId) { paths.add(convertNodesToSwhPath(path)); } float duration = this.stopTiming(startTime); logger.debug("convertPathsToSwhIds() took {} s.", duration); return paths; } /** * Leaves endpoint wrapper (performs input/output node ids conversions). * * @param src source node of endpoint call specified as a SwhId * @return the resulting list of SwhId from endpoint call * @see org.softwareheritage.graph.SwhId * @see org.softwareheritage.graph.algo.Traversal#leaves(long) */ public ArrayList leaves(SwhId src) { long srcNodeId = graph.getNodeId(src); long startTime = this.startTiming(); ArrayList nodeIds = traversal.leaves(srcNodeId); float duration = this.stopTiming(startTime); logger.debug("leaves({}) took {} s.", src, duration); return convertNodesToSwhIds(nodeIds); } /** * Neighbors endpoint wrapper (performs input/output node ids conversions). * * @param src source node of endpoint call specified as a SwhId * @return the resulting list of SwhId from endpoint call * @see org.softwareheritage.graph.SwhId * @see org.softwareheritage.graph.algo.Traversal#neighbors(long) */ public ArrayList neighbors(SwhId src) { long srcNodeId = graph.getNodeId(src); long startTime = this.startTiming(); ArrayList nodeIds = traversal.neighbors(srcNodeId); float duration = this.stopTiming(startTime); logger.debug("neighbors({}) took {} s.", src, duration); return convertNodesToSwhIds(nodeIds); } /** * Walk endpoint wrapper (performs input/output node ids conversions). * * @param src source node of endpoint call specified as a SwhId * @param dstFmt destination used in endpoint call (TODO: link to API doc) * @param algorithm traversal algorithm used in endpoint call (either "dfs" or "bfs") * @return the resulting {@link SwhPath} from endpoint call * @see org.softwareheritage.graph.SwhId * @see org.softwareheritage.graph.SwhPath * @see org.softwareheritage.graph.algo.Traversal#walk */ public SwhPath walk(SwhId src, String dstFmt, String algorithm) { long srcNodeId = graph.getNodeId(src); ArrayList nodeIds = new ArrayList(); // Destination is either a SWH ID or a node type try { SwhId dstSwhId = new SwhId(dstFmt); long dstNodeId = graph.getNodeId(dstSwhId); long startTime = this.startTiming(); nodeIds = traversal.walk(srcNodeId, dstNodeId, algorithm); float duration = this.stopTiming(startTime); logger.debug("walk({}) took {} s.", src, duration); } catch (IllegalArgumentException ignored1) { try { Node.Type dstType = Node.Type.fromStr(dstFmt); long startTime = this.startTiming(); nodeIds = traversal.walk(srcNodeId, dstType, algorithm); float duration = this.stopTiming(startTime); logger.debug("walk({}) took {} s.", src, duration); } catch (IllegalArgumentException ignored2) { } } return convertNodesToSwhPath(nodeIds); } /** * VisitNodes endpoint wrapper (performs input/output node ids conversions). * * @param src source node of endpoint call specified as a SwhId * @return the resulting list of SwhId from endpoint call * @see org.softwareheritage.graph.SwhId * @see org.softwareheritage.graph.algo.Traversal#visitNodes(long) */ public ArrayList visitNodes(SwhId src) { long srcNodeId = graph.getNodeId(src); long startTime = this.startTiming(); ArrayList nodeIds = traversal.visitNodes(srcNodeId); float duration = this.stopTiming(startTime); logger.debug("visitNodes({}) took {} s.", src, duration); return convertNodesToSwhIds(nodeIds); } /** * VisitPaths endpoint wrapper (performs input/output node ids conversions). * * @param src source node of endpoint call specified as a SwhId * @return the resulting list of {@link SwhPath} from endpoint call * @see org.softwareheritage.graph.SwhId * @see org.softwareheritage.graph.SwhPath * @see org.softwareheritage.graph.algo.Traversal#visitPaths(long) */ public ArrayList visitPaths(SwhId src) { long srcNodeId = graph.getNodeId(src); long startTime = this.startTiming(); ArrayList> paths = traversal.visitPaths(srcNodeId); float duration = this.stopTiming(startTime); logger.debug("visitPaths({}) took {} s.", src, duration); return convertPathsToSwhIds(paths); } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/Graph.java b/java/server/src/main/java/org/softwareheritage/graph/Graph.java index dc90a40..6c2310b 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/Graph.java +++ b/java/server/src/main/java/org/softwareheritage/graph/Graph.java @@ -1,183 +1,183 @@ package org.softwareheritage.graph; import java.io.IOException; import it.unimi.dsi.big.webgraph.BVGraph; import org.softwareheritage.graph.Node; import org.softwareheritage.graph.SwhId; import org.softwareheritage.graph.backend.NodeIdMap; import org.softwareheritage.graph.backend.NodeTypesMap; /** * Main class storing the compressed graph and node id mappings. * * @author Thibault Allançon - * @version 1.0 - * @since 1.0 + * @version 0.0.1 + * @since 0.0.1 */ public class Graph { /** File extension for the SWH PID to long node id map */ public static final String PID_TO_NODE = ".pid2node.csv"; /** File extension for the long node id to SWH PID map */ public static final String NODE_TO_PID = ".node2pid.csv"; /** File extension for the long node id to node typ map */ public static final String NODE_TO_TYPE = ".node2type.map"; /** Compressed graph stored as a {@link it.unimi.dsi.big.webgraph.BVGraph} */ BVGraph graph; /** Transposed compressed graph (used for backward traversals) */ BVGraph graphTransposed; /** Path and basename of the compressed graph */ String path; /** Mapping long id ↔ SWH PIDs */ NodeIdMap nodeIdMap; /** Mapping long id → node types */ NodeTypesMap nodeTypesMap; /** * Constructor. * * @param path path and basename of the compressed graph to load */ public Graph(String path) throws IOException { this.graph = BVGraph.load(path); this.graphTransposed = BVGraph.load(path + "-transposed"); this.path = path; this.nodeIdMap = new NodeIdMap(path, getNbNodes()); this.nodeTypesMap = new NodeTypesMap(path); } /** * Cleans up graph resources after use. */ public void cleanUp() throws IOException { nodeIdMap.close(); } /** * Returns the graph full path. * * @return graph full path */ public String getPath() { return path; } /** * Converts {@link SwhId} node to long. * * @param swhId node specified as a {@link SwhId} * @return internal long node id * @see org.softwareheritage.graph.SwhId */ public long getNodeId(SwhId swhId) { return nodeIdMap.getNodeId(swhId); } /** * Converts long id node to {@link SwhId}. * * @param nodeId node specified as a long id * @return external SWH PID * @see org.softwareheritage.graph.SwhId */ public SwhId getSwhId(long nodeId) { return nodeIdMap.getSwhId(nodeId); } /** * Returns node type. * * @param nodeId node specified as a long id * @return corresponding node type * @see org.softwareheritage.graph.Node.Type */ public Node.Type getNodeType(long nodeId) { return nodeTypesMap.getType(nodeId); } /** * Returns number of nodes in the graph. * * @return number of nodes in the graph */ public long getNbNodes() { return graph.numNodes(); } /** * Returns number of edges in the graph. * * @return number of edges in the graph */ public long getNbEdges() { return graph.numArcs(); } /** * Returns list of successors of a node. * * @param nodeId node specified as a long id * @return list of successors of the node, specified as a {@link LongBigArrays} * @see it.unimi.dsi.fastutil.longs.LongBigArrays */ public long[][] successors(long nodeId) { return graph.successorBigArray(nodeId); } /** * Returns the outdegree of a node. * * @param nodeId node specified as a long id * @return outdegree of a node */ public long outdegree(long nodeId) { return graph.outdegree(nodeId); } /** * Returns list of predecessors of a node. * * @param nodeId node specified as a long id * @return list of predecessors of the node, specified as a {@link LongBigArrays} * @see it.unimi.dsi.fastutil.longs.LongBigArrays */ public long[][] predecessors(long nodeId) { return graphTransposed.successorBigArray(nodeId); } /** * Returns the indegree of a node. * * @param nodeId node specified as a long id * @return indegree of a node */ public long indegree(long nodeId) { return graphTransposed.outdegree(nodeId); } /** * Returns the degree of a node, depending on graph orientation. * * @param nodeId node specified as a long id * @param useTransposed boolean value to use transposed graph * @return degree of a node */ public long degree(long nodeId, boolean useTransposed) { return (useTransposed) ? indegree(nodeId) : outdegree(nodeId); } /** * Returns the neighbors of a node, depending on graph orientation. * * @param nodeId node specified as a long id * @param useTransposed boolean value to use transposed graph * @return list of neighbors of the node, specified as a {@link LongBigArrays} * @see it.unimi.dsi.fastutil.longs.LongBigArrays */ public long[][] neighbors(long nodeId, boolean useTransposed) { return (useTransposed) ? predecessors(nodeId) : successors(nodeId); } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/Neighbors.java b/java/server/src/main/java/org/softwareheritage/graph/Neighbors.java index 9840350..b688687 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/Neighbors.java +++ b/java/server/src/main/java/org/softwareheritage/graph/Neighbors.java @@ -1,94 +1,94 @@ package org.softwareheritage.graph; import java.util.Iterator; import it.unimi.dsi.fastutil.longs.LongBigArrays; import org.softwareheritage.graph.AllowedEdges; import org.softwareheritage.graph.Graph; /** * Iterator class to go over a node neighbors in the graph. * * @author Thibault Allançon - * @version 1.0 - * @since 1.0 + * @version 0.0.1 + * @since 0.0.1 */ public class Neighbors implements Iterable { /** Graph used to explore neighbors */ Graph graph; /** Boolean to specify the use of the transposed graph */ boolean useTransposed; /** Graph edge restriction */ AllowedEdges edges; /** Source node from which neighbors will be listed */ long srcNodeId; /** * Constructor. * * @param graph graph used to explore neighbors * @param useTransposed boolean value to use transposed graph * @param edges edges allowed to be used in the graph * @param srcNodeId source node from where to list neighbors */ public Neighbors(Graph graph, boolean useTransposed, AllowedEdges edges, long srcNodeId) { this.graph = graph; this.useTransposed = useTransposed; this.edges = edges; this.srcNodeId = srcNodeId; } @Override public Iterator iterator() { return new NeighborsIterator(); } /** * Inner class for {@link Neighbors} iterator. * * @author Thibault Allançon - * @version 1.0 - * @since 1.0 + * @version 0.0.1 + * @since 0.0.1 */ public class NeighborsIterator implements Iterator { long nextNeighborIdx; long nbNeighbors; long[][] neighbors; public NeighborsIterator() { this.nextNeighborIdx = -1; this.nbNeighbors = graph.degree(srcNodeId, useTransposed); this.neighbors = graph.neighbors(srcNodeId, useTransposed); } public boolean hasNext() { // No edge restriction case: bypass type checks and skip to next neighbor if (edges.restrictedTo == null) { if (nextNeighborIdx + 1 < nbNeighbors) { nextNeighborIdx++; return true; } else { return false; } } // Edge restriction case: look ahead for next neighbor for (long lookAheadIdx = nextNeighborIdx + 1; lookAheadIdx < nbNeighbors; lookAheadIdx++) { long nextNodeId = LongBigArrays.get(neighbors, lookAheadIdx); if (edges.isAllowed(srcNodeId, nextNodeId)) { nextNeighborIdx = lookAheadIdx; return true; } } return false; } public Long next() { long nextNodeId = LongBigArrays.get(neighbors, nextNeighborIdx); return nextNodeId; } } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/Node.java b/java/server/src/main/java/org/softwareheritage/graph/Node.java index b2a532b..8c36580 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/Node.java +++ b/java/server/src/main/java/org/softwareheritage/graph/Node.java @@ -1,93 +1,93 @@ package org.softwareheritage.graph; import java.util.ArrayList; import java.util.Arrays; import java.util.List; /** * A node in the Software Heritage graph. * * @author Thibault Allançon - * @version 1.0 - * @since 1.0 + * @version 0.0.1 + * @since 0.0.1 */ public class Node { /** * Software Heritage graph node types, as described in the * data model. */ public enum Type { /** Content node */ CNT, /** Directory node */ DIR, /** Origin node */ ORI, /** Release node */ REL, /** Revision node */ REV, /** Snapshot node */ SNP; /** * Converts integer to corresponding SWH node type. * * @param intType node type represented as an integer * @return the corresponding {@link Node.Type} value * @see org.softwareheritage.graph.Node.Type */ public static Node.Type fromInt(int intType) { switch (intType) { case 0: return CNT; case 1: return DIR; case 2: return ORI; case 3: return REL; case 4: return REV; case 5: return SNP; } return null; } /** * Parses SWH node type from string. * * @param strType node type represented as a string * @return the corresponding {@link Node.Type} value * @see org.softwareheritage.graph.Node.Type */ public static Node.Type fromStr(String strType) { if (!strType.matches("cnt|dir|ori|rel|rev|snp")) { throw new IllegalArgumentException("Unknown node type: " + strType); } return Node.Type.valueOf(strType.toUpperCase()); } /** * Parses SWH node type possible values from formatted string (TODO: link API doc). * * @param strFmtType node types represented as a formatted string (TODO: link API doc) * @return a list containing the {@link Node.Type} values * @see org.softwareheritage.graph.Node.Type */ public static ArrayList parse(String strFmtType) { ArrayList types = new ArrayList<>(); if (strFmtType.equals("*")) { List nodeTypes = Arrays.asList(Node.Type.values()); types.addAll(nodeTypes); } else { types.add(Node.Type.fromStr(strFmtType)); } return types; } } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/SwhId.java b/java/server/src/main/java/org/softwareheritage/graph/SwhId.java index 172148f..d89dd69 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/SwhId.java +++ b/java/server/src/main/java/org/softwareheritage/graph/SwhId.java @@ -1,97 +1,97 @@ package org.softwareheritage.graph; import com.fasterxml.jackson.annotation.JsonValue; import org.softwareheritage.graph.Node; /** * A Software Heritage PID, see persistent * identifier documentation. * * @author Thibault Allançon - * @version 1.0 - * @since 1.0 + * @version 0.0.1 + * @since 0.0.1 */ public class SwhId { /** Fixed hash length of the PID */ public static final int HASH_LENGTH = 40; /** Full PID as a string */ String swhId; /** PID node type */ Node.Type type; /** PID hex-encoded SHA1 hash */ String hash; /** * Constructor. * * @param swhId full PID as a string */ public SwhId(String swhId) { this.swhId = swhId; // PID format: 'swh:1:type:hash' String[] parts = swhId.split(":"); if (parts.length != 4 || !parts[0].equals("swh") || !parts[1].equals("1")) { throw new IllegalArgumentException("Expected SWH ID format to be 'swh:1:type:hash', got: " + swhId); } this.type = Node.Type.fromStr(parts[2]); this.hash = parts[3]; if (!hash.matches("[0-9a-f]{" + HASH_LENGTH + "}")) { throw new IllegalArgumentException("Wrong SWH ID hash format in: " + swhId); } } @Override public boolean equals(Object otherObj) { if (otherObj == this) return true; if (!(otherObj instanceof SwhId)) return false; SwhId other = (SwhId) otherObj; return swhId.equals(other.getSwhId()); } @Override public int hashCode() { return swhId.hashCode(); } @Override public String toString() { return swhId; } /** * Returns full PID as a string. * * @return full PID string */ @JsonValue public String getSwhId() { return swhId; } /** * Returns PID node type. * * @return PID corresponding {@link Node.Type} * @see org.softwareheritage.graph.Node.Type */ public Node.Type getType() { return type; } /** * Returns PID hex-encoded SHA1 hash. * * @return PID string hash */ public String getHash() { return hash; } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/SwhPath.java b/java/server/src/main/java/org/softwareheritage/graph/SwhPath.java index 4d45849..ad84da7 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/SwhPath.java +++ b/java/server/src/main/java/org/softwareheritage/graph/SwhPath.java @@ -1,123 +1,123 @@ package org.softwareheritage.graph; import java.util.ArrayList; import com.fasterxml.jackson.annotation.JsonValue; import org.softwareheritage.graph.SwhId; /** * Wrapper class to store a list of {@link SwhId}. * * @author Thibault Allançon - * @version 1.0 - * @since 1.0 + * @version 0.0.1 + * @since 0.0.1 * @see org.softwareheritage.graph.SwhId */ public class SwhPath { ArrayList path; /** * Constructor. */ public SwhPath() { this.path = new ArrayList(); } /** * Constructor. * * @param swhIds variable number of string PIDs to initialize this path with */ public SwhPath(String ...swhIds) { this(); for (String swhId : swhIds) { add(new SwhId(swhId)); } } /** * Constructor. * * @param swhIds variable number of @{link SwhId} to initialize this path with * @see org.softwareheritage.graph.SwhId */ public SwhPath(SwhId ...swhIds) { this(); for (SwhId swhId : swhIds) { add(swhId); } } /** * Returns this path as a list of {@link SwhId}. * * @return list of {@link SwhId} constituting the path * @see org.softwareheritage.graph.SwhId */ @JsonValue public ArrayList getPath() { return path; } /** * Adds a {@link SwhId} to this path. * * @param {@link SwhId} to add to this path * @see org.softwareheritage.graph.SwhId */ public void add(SwhId swhId) { path.add(swhId); } /** * Returns the {@link SwhId} at the specified position in this path. * * @param index position of the {@link SwhId} to return * @return {@link SwhId} at the specified position * @see org.softwareheritage.graph.SwhId */ public SwhId get(int index) { return path.get(index); } /** * Returns the number of elements in this path. * * @return number of elements in this path */ public int size() { return path.size(); } @Override public boolean equals(Object otherObj) { if (otherObj == this) return true; if (!(otherObj instanceof SwhPath)) return false; SwhPath other = (SwhPath) otherObj; if (size() != other.size()) { return false; } for (int i = 0; i < size(); i++) { SwhId thisSwhId = get(i); SwhId otherSwhId = other.get(i); if (!thisSwhId.equals(otherSwhId)) { return false; } } return true; } @Override public String toString() { String str = new String(); for (SwhId swhId : path) { str += swhId + "/"; } return str; } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/algo/Stats.java b/java/server/src/main/java/org/softwareheritage/graph/algo/Stats.java index 67683c9..f145657 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/algo/Stats.java +++ b/java/server/src/main/java/org/softwareheritage/graph/algo/Stats.java @@ -1,67 +1,67 @@ package org.softwareheritage.graph.algo; import java.io.FileInputStream; import java.io.IOException; import java.util.Properties; /** * Statistics on the compressed graph. * * @author Thibault Allançon - * @version 1.0 - * @since 1.0 + * @version 0.0.1 + * @since 0.0.1 */ public class Stats { public class Counts { public long nodes; public long edges; } public class Ratios { public double compression; public double bitsPerNode; public double bitsPerEdge; public double avgLocality; } public class Degree { public long min; public long max; public double avg; } public Counts counts; public Ratios ratios; public Degree indegree; public Degree outdegree; /** * Constructor. * * @param graphPath full path of compressed graph */ public Stats(String graphPath) throws IOException { Properties properties = new Properties(); properties.load(new FileInputStream(graphPath + ".properties")); properties.load(new FileInputStream(graphPath + ".stats")); this.counts = new Counts(); this.ratios = new Ratios(); this.indegree = new Degree(); this.outdegree = new Degree(); this.counts.nodes = Long.parseLong(properties.getProperty("nodes")); this.counts.edges = Long.parseLong(properties.getProperty("arcs")); this.ratios.compression = Double.parseDouble(properties.getProperty("compratio")); this.ratios.bitsPerNode = Double.parseDouble(properties.getProperty("bitspernode")); this.ratios.bitsPerEdge = Double.parseDouble(properties.getProperty("bitsperlink")); this.ratios.avgLocality = Double.parseDouble(properties.getProperty("avglocality")); this.indegree.min = Long.parseLong(properties.getProperty("minindegree")); this.indegree.max = Long.parseLong(properties.getProperty("maxindegree")); this.indegree.avg = Double.parseDouble(properties.getProperty("avgindegree")); this.outdegree.min = Long.parseLong(properties.getProperty("minoutdegree")); this.outdegree.max = Long.parseLong(properties.getProperty("maxoutdegree")); this.outdegree.avg = Double.parseDouble(properties.getProperty("avgoutdegree")); } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java b/java/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java index 966710a..cff6fc4 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java +++ b/java/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java @@ -1,309 +1,309 @@ package org.softwareheritage.graph.algo; import java.util.ArrayList; import java.util.Collections; import java.util.LinkedList; import java.util.Queue; import java.util.Stack; import it.unimi.dsi.bits.LongArrayBitVector; import it.unimi.dsi.fastutil.longs.LongBigArrays; import org.softwareheritage.graph.AllowedEdges; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.Neighbors; import org.softwareheritage.graph.Node; /** * Traversal algorithms on the compressed graph. * * @author Thibault Allançon - * @version 1.0 - * @since 1.0 + * @version 0.0.1 + * @since 0.0.1 */ public class Traversal { /** Graph used in the traversal */ Graph graph; /** Boolean to specify the use of the transposed graph */ boolean useTransposed; /** Graph edge restriction */ AllowedEdges edges; /** * Bit array storing if we have visited a node * @see it.unimi.dsi.bits.LongArrayBitVector */ LongArrayBitVector visited; /** * Array storing parent node id for each nodes during a traversal * @see it.unimi.dsi.fastutil.longs.LongBigArrays */ long[][] nodeParent; /** * Constructor. * * @param graph graph used in the traversal * @param direction a string (either "forward" or "backward") specifying edge orientation * @param edgesFmt a formatted string describing allowed edges (TODO: link API doc) */ public Traversal(Graph graph, String direction, String edgesFmt) { if (!direction.matches("forward|backward")) { throw new IllegalArgumentException("Unknown traversal direction: " + direction); } this.graph = graph; this.useTransposed = (direction.equals("backward")); this.edges = new AllowedEdges(graph, edgesFmt); long nbNodes = graph.getNbNodes(); this.visited = LongArrayBitVector.ofLength(nbNodes); this.nodeParent = LongBigArrays.newBigArray(nbNodes); } /** * Returns the leaves of a subgraph rooted at the specified source node. * * @param srcNodeId source node * @return list of node ids corresponding to the leaves */ public ArrayList leaves(long srcNodeId) { ArrayList nodeIds = new ArrayList(); Stack stack = new Stack(); this.visited.fill(false); stack.push(srcNodeId); visited.set(srcNodeId); while (!stack.isEmpty()) { long currentNodeId = stack.pop(); long neighborsCnt = 0; for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) { neighborsCnt++; if (!visited.getBoolean(neighborNodeId)) { stack.push(neighborNodeId); visited.set(neighborNodeId); } } if (neighborsCnt == 0) { nodeIds.add(currentNodeId); } } return nodeIds; } /** * Returns node direct neighbors (linked with exactly one edge). * * @param srcNodeId source node * @return list of node ids corresponding to the neighbors */ public ArrayList neighbors(long srcNodeId) { ArrayList nodeIds = new ArrayList(); for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, srcNodeId)) { nodeIds.add(neighborNodeId); } return nodeIds; } /** * Performs a graph traversal and returns explored nodes. * * @param srcNodeId source node * @return list of explored node ids */ public ArrayList visitNodes(long srcNodeId) { ArrayList nodeIds = new ArrayList(); Stack stack = new Stack(); this.visited.fill(false); stack.push(srcNodeId); visited.set(srcNodeId); while (!stack.isEmpty()) { long currentNodeId = stack.pop(); nodeIds.add(currentNodeId); for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) { if (!visited.getBoolean(neighborNodeId)) { stack.push(neighborNodeId); visited.set(neighborNodeId); } } } return nodeIds; } /** * Performs a graph traversal and returns explored paths. * * @param srcNodeId source node * @return list of explored paths (represented as a list of node ids) */ public ArrayList> visitPaths(long srcNodeId) { ArrayList> paths = new ArrayList<>(); Stack currentPath = new Stack(); visitPathsInternal(srcNodeId, paths, currentPath); return paths; } /** * Internal recursive function of {@link #visitPaths}. * * @param currentNodeId current node * @param paths list of currently stored paths * @param currentPath current path as node ids */ private void visitPathsInternal( long currentNodeId, ArrayList> paths, Stack currentPath) { currentPath.push(currentNodeId); long visitedNeighbors = 0; for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) { visitPathsInternal(neighborNodeId, paths, currentPath); visitedNeighbors++; } if (visitedNeighbors == 0) { ArrayList path = new ArrayList(); for (long nodeId : currentPath) { path.add(nodeId); } paths.add(path); } currentPath.pop(); } /** * Performs a graph traversal and returns the first found path from source to destination. * * @param srcNodeId source node * @param dst destination (either a node or a node type) * @return found path as a list of node ids */ public ArrayList walk(long srcNodeId, T dst, String algorithm) { long dstNodeId = -1; if (algorithm.equals("dfs")) { dstNodeId = walkInternalDfs(srcNodeId, dst); } else if (algorithm.equals("bfs")) { dstNodeId = walkInternalBfs(srcNodeId, dst); } else { throw new IllegalArgumentException("Unknown traversal algorithm: " + algorithm); } if (dstNodeId == -1) { throw new IllegalArgumentException("Unable to find destination point: " + dst); } ArrayList nodeIds = backtracking(srcNodeId, dstNodeId); return nodeIds; } /** * Internal DFS function of {@link #walk}. * * @param srcNodeId source node * @param dst destination (either a node or a node type) * @return final destination node or -1 if no path found */ private long walkInternalDfs(long srcNodeId, T dst) { Stack stack = new Stack(); this.visited.fill(false); stack.push(srcNodeId); visited.set(srcNodeId); while (!stack.isEmpty()) { long currentNodeId = stack.pop(); if (isDstNode(currentNodeId, dst)) { return currentNodeId; } for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) { if (!visited.getBoolean(neighborNodeId)) { stack.push(neighborNodeId); visited.set(neighborNodeId); LongBigArrays.set(nodeParent, neighborNodeId, currentNodeId); } } } return -1; } /** * Internal BFS function of {@link #walk}. * * @param srcNodeId source node * @param dst destination (either a node or a node type) * @return final destination node or -1 if no path found */ private long walkInternalBfs(long srcNodeId, T dst) { Queue queue = new LinkedList(); this.visited.fill(false); queue.add(srcNodeId); visited.set(srcNodeId); while (!queue.isEmpty()) { long currentNodeId = queue.poll(); if (isDstNode(currentNodeId, dst)) { return currentNodeId; } for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) { if (!visited.getBoolean(neighborNodeId)) { queue.add(neighborNodeId); visited.set(neighborNodeId); LongBigArrays.set(nodeParent, neighborNodeId, currentNodeId); } } } return -1; } /** * Internal function of {@link #walk} to check if a node corresponds to the destination. * * @param nodeId current node * @param dst destination (either a node or a node type) * @return true if the node is a destination, or false otherwise */ private boolean isDstNode(long nodeId, T dst) { if (dst instanceof Long) { long dstNodeId = (Long) dst; return nodeId == dstNodeId; } else if (dst instanceof Node.Type) { Node.Type dstType = (Node.Type) dst; return graph.getNodeType(nodeId) == dstType; } else { return false; } } /** * Internal backtracking function of {@link #walk}. * * @param srcNodeId source node * @param dstNodeId destination node * @return the found path, as a list of node ids */ private ArrayList backtracking(long srcNodeId, long dstNodeId) { ArrayList path = new ArrayList(); long currentNodeId = dstNodeId; while (currentNodeId != srcNodeId) { path.add(currentNodeId); currentNodeId = LongBigArrays.get(nodeParent, currentNodeId); } path.add(srcNodeId); Collections.reverse(path); return path; } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/backend/MapFile.java b/java/server/src/main/java/org/softwareheritage/graph/backend/MapFile.java index 46549ff..be63e96 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/backend/MapFile.java +++ b/java/server/src/main/java/org/softwareheritage/graph/backend/MapFile.java @@ -1,64 +1,64 @@ package org.softwareheritage.graph.backend; import java.io.File; import java.io.IOException; import java.io.RandomAccessFile; import java.nio.channels.FileChannel; import it.unimi.dsi.io.ByteBufferInputStream; /** * Wrapper class around very big mmap()-ed file. * * @author Thibault Allançon - * @version 1.0 - * @since 1.0 + * @version 0.0.1 + * @since 0.0.1 */ public class MapFile { /** * Memory-mapped file buffer * @see it.unimi.dsi.io.ByteBufferInputStream */ ByteBufferInputStream bufferMap; /** Fixed line length of the mmap()-ed file */ int lineLength; /** * Constructor. * * @param path file path to mmap() * @param lineLength fixed length of a line in the file */ public MapFile(String path, int lineLength) throws IOException { this.bufferMap = null; this.lineLength = lineLength; try (RandomAccessFile mapFile = new RandomAccessFile(new File(path), "r")) { FileChannel fileChannel = mapFile.getChannel(); bufferMap = ByteBufferInputStream.map(fileChannel, FileChannel.MapMode.READ_ONLY); } } /** * Returns a specific line in the file. * * @param lineIndex line number in the file * @return the line at the specified position */ public String readAtLine(long lineIndex) { byte[] buffer = new byte[lineLength]; long position = lineIndex * (long) lineLength; bufferMap.position(position); bufferMap.read(buffer, 0, lineLength); String line = new String(buffer); return line.trim(); } /** * Closes the mmap()-ed file. */ public void close() throws IOException { bufferMap.close(); } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/backend/NodeIdMap.java b/java/server/src/main/java/org/softwareheritage/graph/backend/NodeIdMap.java index 73c0d8f..2f31677 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/backend/NodeIdMap.java +++ b/java/server/src/main/java/org/softwareheritage/graph/backend/NodeIdMap.java @@ -1,111 +1,111 @@ package org.softwareheritage.graph.backend; import java.io.IOException; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.SwhId; import org.softwareheritage.graph.backend.MapFile; /** * Mapping between internal long node id and external SWH PID. * * @author Thibault Allançon - * @version 1.0 - * @since 1.0 + * @version 0.0.1 + * @since 0.0.1 */ public class NodeIdMap { /** Fixed length of full SWH PID */ public static final int SWH_ID_LENGTH = 50; /** Fixed length of long node id */ public static final int NODE_ID_LENGTH = 20; /** Full graph path */ String graphPath; /** Number of ids to map */ long nbIds; /** mmap()-ed PID_TO_NODE file */ MapFile swhToNodeMap; /** mmap()-ed NODE_TO_PID file */ MapFile nodeToSwhMap; /** * Constructor. * * @param graphPath full graph path * @param nbNodes number of nodes in the graph */ public NodeIdMap(String graphPath, long nbNodes) throws IOException { this.graphPath = graphPath; this.nbIds = nbNodes; // +1 are for spaces and end of lines int swhToNodeLineLength = SWH_ID_LENGTH + 1 + NODE_ID_LENGTH + 1; int nodeToSwhLineLength = SWH_ID_LENGTH + 1; this.swhToNodeMap = new MapFile(graphPath + Graph.PID_TO_NODE, swhToNodeLineLength); this.nodeToSwhMap = new MapFile(graphPath + Graph.NODE_TO_PID, nodeToSwhLineLength); } /** * Converts SWH PID to corresponding long node id. * * @param swhId node represented as a {@link SwhId} * @return corresponding node as a long id * @see org.softwareheritage.graph.SwhId */ public long getNodeId(SwhId swhId) { // Each line in PID_TO_NODE is formatted as: swhId nodeId // The file is sorted by swhId, hence we can binary search on swhId to get corresponding nodeId long start = 0; long end = nbIds - 1; while (start <= end) { long lineNumber = (start + end) / 2L; String[] parts = swhToNodeMap.readAtLine(lineNumber).split(" "); if (parts.length != 2) { break; } String currentSwhId = parts[0]; long currentNodeId = Long.parseLong(parts[1]); int cmp = currentSwhId.compareTo(swhId.toString()); if (cmp == 0) { return currentNodeId; } else if (cmp < 0) { start = lineNumber + 1; } else { end = lineNumber - 1; } } throw new IllegalArgumentException("Unknown SWH id: " + swhId); } /** * Converts a node long id to corresponding SWH PID. * * @param nodeId node as a long id * @return corresponding node as a {@link SwhId} * @see org.softwareheritage.graph.SwhId */ public SwhId getSwhId(long nodeId) { // Each line in NODE_TO_PID is formatted as: swhId // The file is ordered by nodeId, meaning node0's swhId is at line 0, hence we can read the // nodeId-th line to get corresponding swhId if (nodeId < 0 || nodeId >= nbIds) { throw new IllegalArgumentException("Node id " + nodeId + " should be between 0 and " + nbIds); } String swhId = nodeToSwhMap.readAtLine(nodeId); return new SwhId(swhId); } /** * Closes the mapping files. */ public void close() throws IOException { swhToNodeMap.close(); nodeToSwhMap.close(); } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/backend/NodeTypesMap.java b/java/server/src/main/java/org/softwareheritage/graph/backend/NodeTypesMap.java index 2a80a67..a4a094e 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/backend/NodeTypesMap.java +++ b/java/server/src/main/java/org/softwareheritage/graph/backend/NodeTypesMap.java @@ -1,51 +1,51 @@ package org.softwareheritage.graph.backend; import java.io.IOException; import it.unimi.dsi.fastutil.io.BinIO; import it.unimi.dsi.fastutil.longs.LongBigList; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.Node; /** * Mapping between long node id and SWH node type as described in the data model. * * @author Thibault Allançon - * @version 1.0 - * @since 1.0 + * @version 0.0.1 + * @since 0.0.1 */ public class NodeTypesMap { /** * Array storing for each node its type * @see it.unimi.dsi.fastutil.longs.LongBigList */ LongBigList nodeTypesMap; /** * Constructor. * * @param graphPath full path of the compressed graph */ public NodeTypesMap(String graphPath) throws IOException { try { nodeTypesMap = (LongBigList) BinIO.loadObject(graphPath + Graph.NODE_TO_TYPE); } catch (ClassNotFoundException e) { throw new IllegalArgumentException("Unknown class object: " + e); } } /** * Returns node type from a node long id. * * @param nodeId node as a long id * @return corresponding {@link Node.Type} value * @see org.softwareheritage.graph.Node.Type */ public Node.Type getType(long nodeId) { long type = nodeTypesMap.getLong(nodeId); return Node.Type.fromInt((int) type); } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/backend/Setup.java b/java/server/src/main/java/org/softwareheritage/graph/backend/Setup.java index 72ada80..5474abc 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/backend/Setup.java +++ b/java/server/src/main/java/org/softwareheritage/graph/backend/Setup.java @@ -1,120 +1,120 @@ package org.softwareheritage.graph.backend; import java.io.BufferedWriter; import java.io.FileInputStream; import java.io.FileWriter; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.Writer; import java.util.zip.GZIPInputStream; import it.unimi.dsi.bits.LongArrayBitVector; import it.unimi.dsi.fastutil.Size64; import it.unimi.dsi.fastutil.io.BinIO; import it.unimi.dsi.fastutil.longs.LongBigArrays; import it.unimi.dsi.fastutil.longs.LongBigList; import it.unimi.dsi.fastutil.objects.Object2LongFunction; import it.unimi.dsi.fastutil.objects.ObjectBigArrays; import it.unimi.dsi.io.FastBufferedReader; import it.unimi.dsi.io.LineIterator; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.Node; import org.softwareheritage.graph.SwhId; import org.softwareheritage.graph.backend.NodeTypesMap; /** * Pre-processing steps (such as dumping mapping files on disk) before running the graph service. * * @author Thibault Allançon - * @version 1.0 - * @since 1.0 + * @version 0.0.1 + * @since 0.0.1 */ public class Setup { public static void main(String[] args) throws IOException { if (args.length != 2) { System.err.println("Expected parameters: "); System.exit(1); } String nodesPath = args[0]; String graphPath = args[1]; System.out.println("Pre-computing node id maps..."); long startTime = System.nanoTime(); precomputeNodeIdMap(nodesPath, graphPath); long endTime = System.nanoTime(); double duration = (double) (endTime - startTime) / 1_000_000_000; System.out.println("Done in: " + duration + " seconds"); } /** * Computes and dumps on disk mapping files. * * @param nodesPath path of the compressed csv nodes file * @param graphPath path of the compressed graph */ // Suppress warning for Object2LongFunction cast @SuppressWarnings("unchecked") static void precomputeNodeIdMap(String nodesPath, String graphPath) throws IOException { // First internal mapping: SWH id (string) -> WebGraph MPH (long) Object2LongFunction mphMap = null; try { mphMap = (Object2LongFunction) BinIO.loadObject(graphPath + ".mph"); } catch (ClassNotFoundException e) { throw new IllegalArgumentException("The .mph file contains unknown class object: " + e); } long nbIds = (mphMap instanceof Size64) ? ((Size64) mphMap).size64() : mphMap.size(); // Second internal mapping: WebGraph MPH (long) -> BFS ordering (long) long[][] bfsMap = LongBigArrays.newBigArray(nbIds); long loaded = BinIO.loadLongs(graphPath + ".order", bfsMap); if (loaded != nbIds) { throw new IllegalArgumentException("Graph contains " + nbIds + " nodes, but read " + loaded); } // Dump complete mapping for all nodes: SWH id (string) <=> WebGraph node id (long) InputStream nodesStream = new GZIPInputStream(new FileInputStream(nodesPath)); FastBufferedReader buffer = new FastBufferedReader(new InputStreamReader(nodesStream, "UTF-8")); LineIterator swhIdIterator = new LineIterator(buffer); try (Writer swhToNodeMap = new BufferedWriter(new FileWriter(graphPath + Graph.PID_TO_NODE)); Writer nodeToSwhMap = new BufferedWriter(new FileWriter(graphPath + Graph.NODE_TO_PID))) { // nodeToSwhMap needs to write SWH id in order of node id, so use a temporary array Object[][] nodeToSwhId = ObjectBigArrays.newBigArray(nbIds); // To effectively run edge restriction during graph traversals, we store node id (long) -> SWH // type map. This is represented as a bitmap using minimum number of bits per Node.Type. final int log2NbTypes = (int) Math.ceil(Math.log(Node.Type.values().length) / Math.log(2)); final int nbBitsPerNodeType = log2NbTypes; LongArrayBitVector nodeTypesBitVector = LongArrayBitVector.ofLength(nbBitsPerNodeType * nbIds); LongBigList nodeTypesMap = nodeTypesBitVector.asLongBigList(nbBitsPerNodeType); for (long iNode = 0; iNode < nbIds && swhIdIterator.hasNext(); iNode++) { String strSwhId = swhIdIterator.next().toString(); long mphId = mphMap.getLong(strSwhId); long nodeId = LongBigArrays.get(bfsMap, mphId); String paddedNodeId = String.format("%0" + NodeIdMap.NODE_ID_LENGTH + "d", nodeId); String line = strSwhId + " " + paddedNodeId + "\n"; swhToNodeMap.write(line); ObjectBigArrays.set(nodeToSwhId, nodeId, strSwhId); SwhId swhId = new SwhId(strSwhId); nodeTypesMap.set(nodeId, swhId.getType().ordinal()); } BinIO.storeObject(nodeTypesMap, graphPath + Graph.NODE_TO_TYPE); for (long iNode = 0; iNode < nbIds; iNode++) { String line = ObjectBigArrays.get(nodeToSwhId, iNode).toString() + "\n"; nodeToSwhMap.write(line); } } } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/benchmark/LinuxLog.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/LinuxLog.java index 88faec9..e4829ec 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/benchmark/LinuxLog.java +++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/LinuxLog.java @@ -1,39 +1,39 @@ package org.softwareheritage.graph.benchmark; import java.io.IOException; import org.softwareheritage.graph.Endpoint; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.SwhId; /** * Linux git log experiment to benchmark graph traversal. * * @author Thibault Allançon - * @version 1.0 - * @since 1.0 + * @version 0.0.1 + * @since 0.0.1 */ public class LinuxLog { public static void main(String[] args) throws IOException { String path = args[0]; Graph graph = new Graph(path); // A linux kernel commit on Sun Dec 31 final SwhId commit = new SwhId("swh:1:rev:f39d7d78b70e0f39facb1e4fab77ad3df5c52a35"); final long expectedCount = 723640; System.out.println("git log " + commit); System.out.println("Expecting " + expectedCount + " commits"); long startTime = System.nanoTime(); Endpoint endpoint = new Endpoint(graph, "forward", "rev:rev"); long count = endpoint.visitNodes(commit).size(); if (count != expectedCount) { throw new IllegalArgumentException("Counted " + count + " commits"); } long endTime = System.nanoTime(); double duration = (double) (endTime - startTime) / 1_000_000_000; System.out.println("Visit operation done in: " + duration + " seconds"); } }