diff --git a/java/server/src/main/java/org/softwareheritage/graph/AllowedEdges.java b/java/server/src/main/java/org/softwareheritage/graph/AllowedEdges.java
index 08f7dd4..f21e372 100644
--- a/java/server/src/main/java/org/softwareheritage/graph/AllowedEdges.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/AllowedEdges.java
@@ -1,88 +1,93 @@
package org.softwareheritage.graph;
import java.util.ArrayList;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.Node;
/**
* Edge restriction based on node types, used when visiting the graph.
+ *
+ * Software Heritage
+ * graph contains multiple node types (contents, directories, revisions, ...) and restricting
+ * the traversal to specific node types is necessary for many querying operations: use cases.
*
* @author Thibault Allançon
* @version 0.0.1
* @since 0.0.1
*/
public class AllowedEdges {
/** Graph on which edge restriction is performed */
Graph graph;
/**
* 2D boolean matrix storing access rights for all combination of src/dst node types (first
* dimension is source, second dimension is destination), when edge restriction is not enforced
* this array is set to null for early bypass.
*/
public boolean[][] restrictedTo;
/**
* Constructor.
*
* @param graph the graph on which to perform edge restriction
- * @param edgesFmt a formatted string describing allowed edges (TODO: link API doc)
+ * @param edgesFmt a formatted string describing allowed edges
*/
public AllowedEdges(Graph graph, String edgesFmt) {
this.graph = graph;
int nbNodeTypes = Node.Type.values().length;
this.restrictedTo = new boolean[nbNodeTypes][nbNodeTypes];
// Special values (null, empty, "*")
if (edgesFmt == null || edgesFmt.isEmpty()) {
return;
}
if (edgesFmt.equals("*")) {
// Allows for quick bypass (with simple null check) when no edge restriction
restrictedTo = null;
return;
}
// Format: "src1:dst1,src2:dst2,[...]"
- // TODO: link API doc
String[] edgeTypes = edgesFmt.split(",");
for (String edgeType : edgeTypes) {
String[] nodeTypes = edgeType.split(":");
if (nodeTypes.length != 2) {
throw new IllegalArgumentException("Cannot parse edge type: " + edgeType);
}
ArrayList srcTypes = Node.Type.parse(nodeTypes[0]);
ArrayList dstTypes = Node.Type.parse(nodeTypes[1]);
for (Node.Type srcType : srcTypes) {
for (Node.Type dstType : dstTypes) {
restrictedTo[srcType.ordinal()][dstType.ordinal()] = true;
}
}
}
}
/**
* Checks if a given edge can be followed during graph traversal.
*
* @param srcNodeId edge source node
* @param dstNodeId edge destination node
* @return true if allowed and false otherwise
*/
public boolean isAllowed(long srcNodeId, long dstNodeId) {
Node.Type srcType = graph.getNodeType(srcNodeId);
Node.Type dstType = graph.getNodeType(dstNodeId);
return restrictedTo[srcType.ordinal()][dstType.ordinal()];
}
/**
* Works like {@link AllowedEdges#isAllowed(long, long)} but with node types directly instead of
* node ids.
*
* @see AllowedEdges#isAllowed(long, long)
*/
public boolean isAllowed(Node.Type srcType, Node.Type dstType) {
return restrictedTo[srcType.ordinal()][dstType.ordinal()];
}
}
diff --git a/java/server/src/main/java/org/softwareheritage/graph/App.java b/java/server/src/main/java/org/softwareheritage/graph/App.java
index 29f9774..4a3d896 100644
--- a/java/server/src/main/java/org/softwareheritage/graph/App.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/App.java
@@ -1,151 +1,169 @@
package org.softwareheritage.graph;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.PropertyNamingStrategy;
import com.martiansoftware.jsap.FlaggedOption;
import com.martiansoftware.jsap.JSAP;
import com.martiansoftware.jsap.JSAPException;
import com.martiansoftware.jsap.JSAPResult;
import com.martiansoftware.jsap.Parameter;
import com.martiansoftware.jsap.SimpleJSAP;
import com.martiansoftware.jsap.UnflaggedOption;
import io.javalin.Javalin;
import io.javalin.http.Context;
import io.javalin.plugin.json.JavalinJackson;
import org.softwareheritage.graph.Endpoint;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.SwhId;
import org.softwareheritage.graph.algo.Stats;
/**
- * Entrypoint of the swh-graph server REST API.
+ * Web framework of the swh-graph server REST API.
*
* @author Thibault Allançon
* @version 0.0.1
* @since 0.0.1
*/
public class App {
+ /**
+ * Main entrypoint.
+ *
+ * @param args command line arguments
+ */
public static void main(String[] args) throws IOException, JSAPException {
SimpleJSAP jsap = new SimpleJSAP(
App.class.getName(),
"Server to load and query a compressed graph representation of Software Heritage archive.",
new Parameter[] {
new FlaggedOption("port", JSAP.INTEGER_PARSER, "5009", JSAP.NOT_REQUIRED, 'p', "port",
"Binding port of the server."),
new UnflaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED,
JSAP.NOT_GREEDY, "The basename of the compressed graph."),
}
);
JSAPResult config = jsap.parse(args);
if (jsap.messagePrinted()) {
System.exit(1);
}
String graphPath = config.getString("graphPath");
int port = config.getInt("port");
startServer(graphPath, port);
}
+ /**
+ * Loads compressed graph and starts the web server to query it.
+ *
+ * @param graphPath basename of the compressed graph
+ * @param port binding port of the server
+ */
private static void startServer(String graphPath, int port) throws IOException {
Graph graph = new Graph(graphPath);
Stats stats = new Stats(graphPath);
// Clean up on exit
Runtime.getRuntime().addShutdownHook(new Thread() {
public void run() {
try {
graph.cleanUp();
} catch (IOException e) {
System.out.println("Could not clean up graph on exit: " + e);
}
}
});
// Configure Jackson JSON to use snake case naming style
ObjectMapper objectMapper = JavalinJackson.getObjectMapper();
objectMapper.setPropertyNamingStrategy(PropertyNamingStrategy.SNAKE_CASE);
JavalinJackson.configure(objectMapper);
Javalin app = Javalin.create().start(port);
app.before("/stats/*", ctx -> { checkQueryStrings(ctx, ""); });
app.before("/leaves/*", ctx -> { checkQueryStrings(ctx, "direction|edges"); });
app.before("/neighbors/*", ctx -> { checkQueryStrings(ctx, "direction|edges"); });
app.before("/visit/*", ctx -> { checkQueryStrings(ctx, "direction|edges"); });
app.before("/walk/*", ctx -> { checkQueryStrings(ctx, "direction|edges|traversal"); });
app.get("/stats/", ctx -> { ctx.json(stats); });
// Graph traversal endpoints
// By default the traversal is a forward DFS using all edges
app.get("/leaves/:src", ctx -> {
SwhId src = new SwhId(ctx.pathParam("src"));
String direction = ctx.queryParam("direction", "forward");
String edgesFmt = ctx.queryParam("edges", "*");
Endpoint endpoint = new Endpoint(graph, direction, edgesFmt);
ctx.json(endpoint.leaves(src));
});
app.get("/neighbors/:src", ctx -> {
SwhId src = new SwhId(ctx.pathParam("src"));
String direction = ctx.queryParam("direction", "forward");
String edgesFmt = ctx.queryParam("edges", "*");
Endpoint endpoint = new Endpoint(graph, direction, edgesFmt);
ctx.json(endpoint.neighbors(src));
});
app.get("/visit/nodes/:src", ctx -> {
SwhId src = new SwhId(ctx.pathParam("src"));
String direction = ctx.queryParam("direction", "forward");
String edgesFmt = ctx.queryParam("edges", "*");
Endpoint endpoint = new Endpoint(graph, direction, edgesFmt);
ctx.json(endpoint.visitNodes(src));
});
app.get("/visit/paths/:src", ctx -> {
SwhId src = new SwhId(ctx.pathParam("src"));
String direction = ctx.queryParam("direction", "forward");
String edgesFmt = ctx.queryParam("edges", "*");
Endpoint endpoint = new Endpoint(graph, direction, edgesFmt);
ctx.json(endpoint.visitPaths(src));
});
app.get("/walk/:src/:dst", ctx -> {
SwhId src = new SwhId(ctx.pathParam("src"));
String dstFmt = ctx.pathParam("dst");
String direction = ctx.queryParam("direction", "forward");
String edgesFmt = ctx.queryParam("edges", "*");
String algorithm = ctx.queryParam("traversal", "dfs");
Endpoint endpoint = new Endpoint(graph, direction, edgesFmt);
ctx.json(endpoint.walk(src, dstFmt, algorithm));
});
app.exception(IllegalArgumentException.class, (e, ctx) -> {
ctx.status(400);
ctx.result(e.getMessage());
});
}
+ /**
+ * Checks query strings names provided to the REST API.
+ *
+ * @param ctx Javalin HTTP request context
+ * @param allowedFmt a regular expression describing allowed query strings names
+ * @throws IllegalArgumentException unknown query string provided
+ */
private static void checkQueryStrings(Context ctx, String allowedFmt) {
Map> queryParamMap = ctx.queryParamMap();
for (String key : queryParamMap.keySet()) {
if (!key.matches(allowedFmt)) {
throw new IllegalArgumentException("Unknown query string: " + key);
}
}
}
}
diff --git a/java/server/src/main/java/org/softwareheritage/graph/Endpoint.java b/java/server/src/main/java/org/softwareheritage/graph/Endpoint.java
index 424dfd1..03bbf1b 100644
--- a/java/server/src/main/java/org/softwareheritage/graph/Endpoint.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/Endpoint.java
@@ -1,210 +1,217 @@
package org.softwareheritage.graph;
import java.util.ArrayList;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.SwhId;
import org.softwareheritage.graph.SwhPath;
import org.softwareheritage.graph.algo.Traversal;
import org.softwareheritage.graph.utils.Timing;
/**
* REST API endpoints wrapper functions.
+ *
+ * Graph operations are segmented between high-level class (this one) and the low-level class
+ * ({@link Traversal}). The {@link Endpoint} class creates wrappers for each endpoints by performing
+ * all the input/output node ids conversions and logging timings.
*
* @author Thibault Allançon
* @version 0.0.1
* @since 0.0.1
+ * @see org.softwareheritage.graph.algo.Traversal
*/
public class Endpoint {
/** Graph where traversal endpoint is performed */
Graph graph;
/** Internal traversal API */
Traversal traversal;
/** Timings logger */
private static final Logger logger = LoggerFactory.getLogger(Endpoint.class);
/**
* Constructor.
*
* @param graph the graph used for traversal endpoint
* @param direction a string (either "forward" or "backward") specifying edge orientation
- * @param edgesFmt a formatted string describing allowed edges (TODO: link API doc)
+ * @param edgesFmt a formatted string describing allowed edges
*/
public Endpoint(Graph graph, String direction, String edgesFmt) {
this.graph = graph;
this.traversal = new Traversal(graph, direction, edgesFmt);
}
/**
* Converts a list of (internal) long node ids to a list of corresponding (external) SWH PIDs.
*
* @param nodeIds the list of long node ids
* @return a list of corresponding SWH PIDs
*/
private ArrayList convertNodesToSwhIds(ArrayList nodeIds) {
long startTime = Timing.start();
ArrayList swhIds = new ArrayList<>();
for (long nodeId : nodeIds) {
swhIds.add(graph.getSwhId(nodeId));
}
float duration = Timing.stop(startTime);
logger.debug("convertNodesToSwhIds() took {} s.", duration);
return swhIds;
}
/**
* Converts a list of (internal) long node ids to the corresponding {@link SwhPath}.
*
* @param nodeIds the list of long node ids
* @return the corresponding {@link SwhPath}
* @see org.softwareheritage.graph.SwhPath
*/
private SwhPath convertNodesToSwhPath(ArrayList nodeIds) {
long startTime = Timing.start();
SwhPath path = new SwhPath();
for (long nodeId : nodeIds) {
path.add(graph.getSwhId(nodeId));
}
float duration = Timing.stop(startTime);
logger.debug("convertNodesToSwhPath() took {} s.", duration);
return path;
}
/**
* Converts a list of paths made of (internal) long node ids to one made of {@link SwhPath}-s.
*
* @param pathsNodeId the list of paths with long node ids
* @return a list of corresponding {@link SwhPath}
* @see org.softwareheritage.graph.SwhPath
*/
private ArrayList convertPathsToSwhIds(ArrayList> pathsNodeId) {
long startTime = Timing.start();
ArrayList paths = new ArrayList<>();
for (ArrayList path : pathsNodeId) {
paths.add(convertNodesToSwhPath(path));
}
float duration = Timing.stop(startTime);
logger.debug("convertPathsToSwhIds() took {} s.", duration);
return paths;
}
/**
- * Leaves endpoint wrapper (performs input/output node ids conversions).
+ * Leaves endpoint wrapper.
*
- * @param src source node of endpoint call specified as a SwhId
- * @return the resulting list of SwhId from endpoint call
+ * @param src source node of endpoint call specified as a {@link SwhId}
+ * @return the resulting list of {@link SwhId} from endpoint call
* @see org.softwareheritage.graph.SwhId
* @see org.softwareheritage.graph.algo.Traversal#leaves(long)
*/
public ArrayList leaves(SwhId src) {
long srcNodeId = graph.getNodeId(src);
long startTime = Timing.start();
ArrayList nodeIds = traversal.leaves(srcNodeId);
float duration = Timing.stop(startTime);
logger.debug("leaves({}) took {} s.", src, duration);
return convertNodesToSwhIds(nodeIds);
}
/**
- * Neighbors endpoint wrapper (performs input/output node ids conversions).
+ * Neighbors endpoint wrapper.
*
- * @param src source node of endpoint call specified as a SwhId
- * @return the resulting list of SwhId from endpoint call
+ * @param src source node of endpoint call specified as a {@link SwhId}
+ * @return the resulting list of {@link SwhId} from endpoint call
* @see org.softwareheritage.graph.SwhId
* @see org.softwareheritage.graph.algo.Traversal#neighbors(long)
*/
public ArrayList neighbors(SwhId src) {
long srcNodeId = graph.getNodeId(src);
long startTime = Timing.start();
ArrayList nodeIds = traversal.neighbors(srcNodeId);
float duration = Timing.stop(startTime);
logger.debug("neighbors({}) took {} s.", src, duration);
return convertNodesToSwhIds(nodeIds);
}
/**
- * Walk endpoint wrapper (performs input/output node ids conversions).
+ * Walk endpoint wrapper.
*
- * @param src source node of endpoint call specified as a SwhId
- * @param dstFmt destination used in endpoint call (TODO: link to API doc)
+ * @param src source node of endpoint call specified as a {@link SwhId}
+ * @param dstFmt destination formatted string as described in the API
* @param algorithm traversal algorithm used in endpoint call (either "dfs" or "bfs")
* @return the resulting {@link SwhPath} from endpoint call
* @see org.softwareheritage.graph.SwhId
* @see org.softwareheritage.graph.SwhPath
* @see org.softwareheritage.graph.algo.Traversal#walk
*/
public SwhPath walk(SwhId src, String dstFmt, String algorithm) {
long srcNodeId = graph.getNodeId(src);
ArrayList nodeIds = new ArrayList();
// Destination is either a SWH ID or a node type
try {
SwhId dstSwhId = new SwhId(dstFmt);
long dstNodeId = graph.getNodeId(dstSwhId);
long startTime = Timing.start();
nodeIds = traversal.walk(srcNodeId, dstNodeId, algorithm);
float duration = Timing.stop(startTime);
logger.debug("walk({}) took {} s.", src, duration);
} catch (IllegalArgumentException ignored1) {
try {
Node.Type dstType = Node.Type.fromStr(dstFmt);
long startTime = Timing.start();
nodeIds = traversal.walk(srcNodeId, dstType, algorithm);
float duration = Timing.stop(startTime);
logger.debug("walk({}) took {} s.", src, duration);
} catch (IllegalArgumentException ignored2) { }
}
return convertNodesToSwhPath(nodeIds);
}
/**
- * VisitNodes endpoint wrapper (performs input/output node ids conversions).
+ * VisitNodes endpoint wrapper.
*
- * @param src source node of endpoint call specified as a SwhId
- * @return the resulting list of SwhId from endpoint call
+ * @param src source node of endpoint call specified as a {@link SwhId}
+ * @return the resulting list of {@link SwhId} from endpoint call
* @see org.softwareheritage.graph.SwhId
* @see org.softwareheritage.graph.algo.Traversal#visitNodes(long)
*/
public ArrayList visitNodes(SwhId src) {
long srcNodeId = graph.getNodeId(src);
long startTime = Timing.start();
ArrayList nodeIds = traversal.visitNodes(srcNodeId);
float duration = Timing.stop(startTime);
logger.debug("visitNodes({}) took {} s.", src, duration);
return convertNodesToSwhIds(nodeIds);
}
/**
- * VisitPaths endpoint wrapper (performs input/output node ids conversions).
+ * VisitPaths endpoint wrapper.
*
- * @param src source node of endpoint call specified as a SwhId
+ * @param src source node of endpoint call specified as a {@link SwhId}
* @return the resulting list of {@link SwhPath} from endpoint call
* @see org.softwareheritage.graph.SwhId
* @see org.softwareheritage.graph.SwhPath
* @see org.softwareheritage.graph.algo.Traversal#visitPaths(long)
*/
public ArrayList visitPaths(SwhId src) {
long srcNodeId = graph.getNodeId(src);
long startTime = Timing.start();
ArrayList> paths = traversal.visitPaths(srcNodeId);
float duration = Timing.stop(startTime);
logger.debug("visitPaths({}) took {} s.", src, duration);
return convertPathsToSwhIds(paths);
}
}
diff --git a/java/server/src/main/java/org/softwareheritage/graph/Graph.java b/java/server/src/main/java/org/softwareheritage/graph/Graph.java
index 6c2310b..1b7be1d 100644
--- a/java/server/src/main/java/org/softwareheritage/graph/Graph.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/Graph.java
@@ -1,183 +1,195 @@
package org.softwareheritage.graph;
import java.io.IOException;
import it.unimi.dsi.big.webgraph.BVGraph;
import org.softwareheritage.graph.Node;
import org.softwareheritage.graph.SwhId;
import org.softwareheritage.graph.backend.NodeIdMap;
import org.softwareheritage.graph.backend.NodeTypesMap;
/**
* Main class storing the compressed graph and node id mappings.
+ *
+ * The compressed graph is stored using the WebGraph
+ * ecosystem. Additional mappings are necessary because Software Heritage uses string based persistent
+ * identifiers (PID) while WebGraph uses integers internally. These two mappings (long id ↔
+ * PID) are used for the input (users refer to the graph using PID) and the output (convert back to
+ * PID for users results). However, since graph traversal can be restricted depending on the node
+ * type (see {@link AllowedEdges}), a long id → node type map is stored as well to avoid a full
+ * PID lookup.
*
* @author Thibault Allançon
* @version 0.0.1
* @since 0.0.1
+ * @see org.softwareheritage.graph.AllowedEdges
+ * @see org.softwareheritage.graph.NodeIdMap;
+ * @see org.softwareheritage.graph.NodeTypesMap;
*/
public class Graph {
/** File extension for the SWH PID to long node id map */
public static final String PID_TO_NODE = ".pid2node.csv";
/** File extension for the long node id to SWH PID map */
public static final String NODE_TO_PID = ".node2pid.csv";
/** File extension for the long node id to node typ map */
public static final String NODE_TO_TYPE = ".node2type.map";
/** Compressed graph stored as a {@link it.unimi.dsi.big.webgraph.BVGraph} */
BVGraph graph;
/** Transposed compressed graph (used for backward traversals) */
BVGraph graphTransposed;
/** Path and basename of the compressed graph */
String path;
/** Mapping long id ↔ SWH PIDs */
NodeIdMap nodeIdMap;
/** Mapping long id → node types */
NodeTypesMap nodeTypesMap;
/**
* Constructor.
*
* @param path path and basename of the compressed graph to load
*/
public Graph(String path) throws IOException {
this.graph = BVGraph.load(path);
this.graphTransposed = BVGraph.load(path + "-transposed");
this.path = path;
this.nodeIdMap = new NodeIdMap(path, getNbNodes());
this.nodeTypesMap = new NodeTypesMap(path);
}
/**
* Cleans up graph resources after use.
*/
public void cleanUp() throws IOException {
nodeIdMap.close();
}
/**
* Returns the graph full path.
*
* @return graph full path
*/
public String getPath() {
return path;
}
/**
* Converts {@link SwhId} node to long.
*
* @param swhId node specified as a {@link SwhId}
* @return internal long node id
* @see org.softwareheritage.graph.SwhId
*/
public long getNodeId(SwhId swhId) {
return nodeIdMap.getNodeId(swhId);
}
/**
* Converts long id node to {@link SwhId}.
*
* @param nodeId node specified as a long id
* @return external SWH PID
* @see org.softwareheritage.graph.SwhId
*/
public SwhId getSwhId(long nodeId) {
return nodeIdMap.getSwhId(nodeId);
}
/**
* Returns node type.
*
* @param nodeId node specified as a long id
* @return corresponding node type
* @see org.softwareheritage.graph.Node.Type
*/
public Node.Type getNodeType(long nodeId) {
return nodeTypesMap.getType(nodeId);
}
/**
* Returns number of nodes in the graph.
*
* @return number of nodes in the graph
*/
public long getNbNodes() {
return graph.numNodes();
}
/**
* Returns number of edges in the graph.
*
* @return number of edges in the graph
*/
public long getNbEdges() {
return graph.numArcs();
}
/**
* Returns list of successors of a node.
*
* @param nodeId node specified as a long id
- * @return list of successors of the node, specified as a {@link LongBigArrays}
- * @see it.unimi.dsi.fastutil.longs.LongBigArrays
+ * @return list of successors of the node, specified as a fastutil LongBigArrays
*/
public long[][] successors(long nodeId) {
return graph.successorBigArray(nodeId);
}
/**
* Returns the outdegree of a node.
*
* @param nodeId node specified as a long id
* @return outdegree of a node
*/
public long outdegree(long nodeId) {
return graph.outdegree(nodeId);
}
/**
* Returns list of predecessors of a node.
*
* @param nodeId node specified as a long id
- * @return list of predecessors of the node, specified as a {@link LongBigArrays}
- * @see it.unimi.dsi.fastutil.longs.LongBigArrays
+ * @return list of successors of the node, specified as a fastutil LongBigArrays
*/
public long[][] predecessors(long nodeId) {
return graphTransposed.successorBigArray(nodeId);
}
/**
* Returns the indegree of a node.
*
* @param nodeId node specified as a long id
* @return indegree of a node
*/
public long indegree(long nodeId) {
return graphTransposed.outdegree(nodeId);
}
/**
* Returns the degree of a node, depending on graph orientation.
*
* @param nodeId node specified as a long id
* @param useTransposed boolean value to use transposed graph
* @return degree of a node
*/
public long degree(long nodeId, boolean useTransposed) {
return (useTransposed) ? indegree(nodeId) : outdegree(nodeId);
}
/**
* Returns the neighbors of a node, depending on graph orientation.
*
* @param nodeId node specified as a long id
* @param useTransposed boolean value to use transposed graph
- * @return list of neighbors of the node, specified as a {@link LongBigArrays}
- * @see it.unimi.dsi.fastutil.longs.LongBigArrays
+ * @return list of successors of the node, specified as a fastutil LongBigArrays
*/
public long[][] neighbors(long nodeId, boolean useTransposed) {
return (useTransposed) ? predecessors(nodeId) : successors(nodeId);
}
}
diff --git a/java/server/src/main/java/org/softwareheritage/graph/Neighbors.java b/java/server/src/main/java/org/softwareheritage/graph/Neighbors.java
index b688687..51de885 100644
--- a/java/server/src/main/java/org/softwareheritage/graph/Neighbors.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/Neighbors.java
@@ -1,94 +1,98 @@
package org.softwareheritage.graph;
import java.util.Iterator;
import it.unimi.dsi.fastutil.longs.LongBigArrays;
import org.softwareheritage.graph.AllowedEdges;
import org.softwareheritage.graph.Graph;
/**
* Iterator class to go over a node neighbors in the graph.
+ *
+ * Wrapper iterator class to easily deal with {@link AllowedEdges} during traversals.
*
* @author Thibault Allançon
* @version 0.0.1
* @since 0.0.1
+ * @see org.softwareheritage.graph.AllowedEdges
*/
public class Neighbors implements Iterable {
/** Graph used to explore neighbors */
Graph graph;
/** Boolean to specify the use of the transposed graph */
boolean useTransposed;
/** Graph edge restriction */
AllowedEdges edges;
/** Source node from which neighbors will be listed */
long srcNodeId;
/**
* Constructor.
*
* @param graph graph used to explore neighbors
* @param useTransposed boolean value to use transposed graph
* @param edges edges allowed to be used in the graph
* @param srcNodeId source node from where to list neighbors
*/
public Neighbors(Graph graph, boolean useTransposed, AllowedEdges edges, long srcNodeId) {
this.graph = graph;
this.useTransposed = useTransposed;
this.edges = edges;
this.srcNodeId = srcNodeId;
}
@Override
public Iterator iterator() {
return new NeighborsIterator();
}
/**
* Inner class for {@link Neighbors} iterator.
*
* @author Thibault Allançon
* @version 0.0.1
* @since 0.0.1
*/
public class NeighborsIterator implements Iterator {
long nextNeighborIdx;
long nbNeighbors;
+ // LongBigArrays to support 64-bit indexing.
long[][] neighbors;
public NeighborsIterator() {
this.nextNeighborIdx = -1;
this.nbNeighbors = graph.degree(srcNodeId, useTransposed);
this.neighbors = graph.neighbors(srcNodeId, useTransposed);
}
public boolean hasNext() {
- // No edge restriction case: bypass type checks and skip to next neighbor
+ // Case 1: no edge restriction, bypass type checks and skip to next neighbor
if (edges.restrictedTo == null) {
if (nextNeighborIdx + 1 < nbNeighbors) {
nextNeighborIdx++;
return true;
} else {
return false;
}
}
- // Edge restriction case: look ahead for next neighbor
+ // Case 2: edge restriction, look ahead for next neighbor
for (long lookAheadIdx = nextNeighborIdx + 1; lookAheadIdx < nbNeighbors; lookAheadIdx++) {
long nextNodeId = LongBigArrays.get(neighbors, lookAheadIdx);
if (edges.isAllowed(srcNodeId, nextNodeId)) {
nextNeighborIdx = lookAheadIdx;
return true;
}
}
return false;
}
public Long next() {
long nextNodeId = LongBigArrays.get(neighbors, nextNeighborIdx);
return nextNodeId;
}
}
}
diff --git a/java/server/src/main/java/org/softwareheritage/graph/Node.java b/java/server/src/main/java/org/softwareheritage/graph/Node.java
index 8c36580..9194b97 100644
--- a/java/server/src/main/java/org/softwareheritage/graph/Node.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/Node.java
@@ -1,93 +1,95 @@
package org.softwareheritage.graph;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
/**
* A node in the Software Heritage graph.
*
* @author Thibault Allançon
* @version 0.0.1
* @since 0.0.1
*/
public class Node {
/**
* Software Heritage graph node types, as described in the
* data model.
*/
public enum Type {
/** Content node */
CNT,
/** Directory node */
DIR,
/** Origin node */
ORI,
/** Release node */
REL,
/** Revision node */
REV,
/** Snapshot node */
SNP;
/**
* Converts integer to corresponding SWH node type.
*
* @param intType node type represented as an integer
* @return the corresponding {@link Node.Type} value
* @see org.softwareheritage.graph.Node.Type
*/
public static Node.Type fromInt(int intType) {
switch (intType) {
case 0:
return CNT;
case 1:
return DIR;
case 2:
return ORI;
case 3:
return REL;
case 4:
return REV;
case 5:
return SNP;
}
return null;
}
/**
- * Parses SWH node type from string.
+ * Converts string to corresponding SWH node type.
*
* @param strType node type represented as a string
* @return the corresponding {@link Node.Type} value
* @see org.softwareheritage.graph.Node.Type
*/
public static Node.Type fromStr(String strType) {
if (!strType.matches("cnt|dir|ori|rel|rev|snp")) {
throw new IllegalArgumentException("Unknown node type: " + strType);
}
return Node.Type.valueOf(strType.toUpperCase());
}
/**
- * Parses SWH node type possible values from formatted string (TODO: link API doc).
+ * Parses SWH node type possible values from formatted string (see the API
+ * syntax).
*
- * @param strFmtType node types represented as a formatted string (TODO: link API doc)
+ * @param strFmtType node types represented as a formatted string
* @return a list containing the {@link Node.Type} values
* @see org.softwareheritage.graph.Node.Type
*/
public static ArrayList parse(String strFmtType) {
ArrayList types = new ArrayList<>();
if (strFmtType.equals("*")) {
List nodeTypes = Arrays.asList(Node.Type.values());
types.addAll(nodeTypes);
} else {
types.add(Node.Type.fromStr(strFmtType));
}
return types;
}
}
}
diff --git a/java/server/src/main/java/org/softwareheritage/graph/SwhPath.java b/java/server/src/main/java/org/softwareheritage/graph/SwhPath.java
index ad84da7..f77c835 100644
--- a/java/server/src/main/java/org/softwareheritage/graph/SwhPath.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/SwhPath.java
@@ -1,123 +1,124 @@
package org.softwareheritage.graph;
import java.util.ArrayList;
import com.fasterxml.jackson.annotation.JsonValue;
import org.softwareheritage.graph.SwhId;
/**
* Wrapper class to store a list of {@link SwhId}.
*
* @author Thibault Allançon
* @version 0.0.1
* @since 0.0.1
* @see org.softwareheritage.graph.SwhId
*/
public class SwhPath {
+ /** Internal list of {@link SwhId} */
ArrayList path;
/**
* Constructor.
*/
public SwhPath() {
this.path = new ArrayList();
}
/**
* Constructor.
*
* @param swhIds variable number of string PIDs to initialize this path with
*/
public SwhPath(String ...swhIds) {
this();
for (String swhId : swhIds) {
add(new SwhId(swhId));
}
}
/**
* Constructor.
*
- * @param swhIds variable number of @{link SwhId} to initialize this path with
+ * @param swhIds variable number of {@link SwhId} to initialize this path with
* @see org.softwareheritage.graph.SwhId
*/
public SwhPath(SwhId ...swhIds) {
this();
for (SwhId swhId : swhIds) {
add(swhId);
}
}
/**
* Returns this path as a list of {@link SwhId}.
*
* @return list of {@link SwhId} constituting the path
* @see org.softwareheritage.graph.SwhId
*/
@JsonValue
public ArrayList getPath() {
return path;
}
/**
* Adds a {@link SwhId} to this path.
*
* @param {@link SwhId} to add to this path
* @see org.softwareheritage.graph.SwhId
*/
public void add(SwhId swhId) {
path.add(swhId);
}
/**
* Returns the {@link SwhId} at the specified position in this path.
*
* @param index position of the {@link SwhId} to return
* @return {@link SwhId} at the specified position
* @see org.softwareheritage.graph.SwhId
*/
public SwhId get(int index) {
return path.get(index);
}
/**
* Returns the number of elements in this path.
*
* @return number of elements in this path
*/
public int size() {
return path.size();
}
@Override
public boolean equals(Object otherObj) {
if (otherObj == this) return true;
if (!(otherObj instanceof SwhPath)) return false;
SwhPath other = (SwhPath) otherObj;
if (size() != other.size()) {
return false;
}
for (int i = 0; i < size(); i++) {
SwhId thisSwhId = get(i);
SwhId otherSwhId = other.get(i);
if (!thisSwhId.equals(otherSwhId)) {
return false;
}
}
return true;
}
@Override
public String toString() {
String str = new String();
for (SwhId swhId : path) {
str += swhId + "/";
}
return str;
}
}
diff --git a/java/server/src/main/java/org/softwareheritage/graph/algo/Stats.java b/java/server/src/main/java/org/softwareheritage/graph/algo/Stats.java
index f145657..a26bbad 100644
--- a/java/server/src/main/java/org/softwareheritage/graph/algo/Stats.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/algo/Stats.java
@@ -1,67 +1,70 @@
package org.softwareheritage.graph.algo;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.Properties;
/**
* Statistics on the compressed graph.
+ *
+ * These statistics are not computed but directly read from WebGraph generated .stats and .properties files.
*
* @author Thibault Allançon
* @version 0.0.1
* @since 0.0.1
*/
public class Stats {
public class Counts {
public long nodes;
public long edges;
}
public class Ratios {
public double compression;
public double bitsPerNode;
public double bitsPerEdge;
public double avgLocality;
}
public class Degree {
public long min;
public long max;
public double avg;
}
public Counts counts;
public Ratios ratios;
public Degree indegree;
public Degree outdegree;
/**
* Constructor.
*
- * @param graphPath full path of compressed graph
+ * @param graphPath path and basename of compressed graph
*/
public Stats(String graphPath) throws IOException {
Properties properties = new Properties();
properties.load(new FileInputStream(graphPath + ".properties"));
properties.load(new FileInputStream(graphPath + ".stats"));
this.counts = new Counts();
this.ratios = new Ratios();
this.indegree = new Degree();
this.outdegree = new Degree();
this.counts.nodes = Long.parseLong(properties.getProperty("nodes"));
this.counts.edges = Long.parseLong(properties.getProperty("arcs"));
this.ratios.compression = Double.parseDouble(properties.getProperty("compratio"));
this.ratios.bitsPerNode = Double.parseDouble(properties.getProperty("bitspernode"));
this.ratios.bitsPerEdge = Double.parseDouble(properties.getProperty("bitsperlink"));
this.ratios.avgLocality = Double.parseDouble(properties.getProperty("avglocality"));
this.indegree.min = Long.parseLong(properties.getProperty("minindegree"));
this.indegree.max = Long.parseLong(properties.getProperty("maxindegree"));
this.indegree.avg = Double.parseDouble(properties.getProperty("avgindegree"));
this.outdegree.min = Long.parseLong(properties.getProperty("minoutdegree"));
this.outdegree.max = Long.parseLong(properties.getProperty("maxoutdegree"));
this.outdegree.avg = Double.parseDouble(properties.getProperty("avgoutdegree"));
}
}
diff --git a/java/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java b/java/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java
index cff6fc4..65f5fb3 100644
--- a/java/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java
@@ -1,309 +1,310 @@
package org.softwareheritage.graph.algo;
import java.util.ArrayList;
import java.util.Collections;
import java.util.LinkedList;
import java.util.Queue;
import java.util.Stack;
import it.unimi.dsi.bits.LongArrayBitVector;
import it.unimi.dsi.fastutil.longs.LongBigArrays;
import org.softwareheritage.graph.AllowedEdges;
+import org.softwareheritage.graph.Endpoint;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.Neighbors;
import org.softwareheritage.graph.Node;
/**
* Traversal algorithms on the compressed graph.
+ *
+ * Internal implementation of the traversal API endpoints. These methods only input/output internal
+ * long ids, which are converted in the {@link Endpoint} higher-level class to Software Heritage
+ * PID.
*
* @author Thibault Allançon
* @version 0.0.1
* @since 0.0.1
+ * @see org.softwareheritage.graph.Endpoint
*/
public class Traversal {
/** Graph used in the traversal */
Graph graph;
/** Boolean to specify the use of the transposed graph */
boolean useTransposed;
/** Graph edge restriction */
AllowedEdges edges;
- /**
- * Bit array storing if we have visited a node
- * @see it.unimi.dsi.bits.LongArrayBitVector
- */
+ /** Bit array storing if we have visited a node */
LongArrayBitVector visited;
- /**
- * Array storing parent node id for each nodes during a traversal
- * @see it.unimi.dsi.fastutil.longs.LongBigArrays
- */
+ /** LongBigArray storing parent node id for each nodes during a traversal */
long[][] nodeParent;
/**
* Constructor.
*
* @param graph graph used in the traversal
* @param direction a string (either "forward" or "backward") specifying edge orientation
- * @param edgesFmt a formatted string describing allowed edges (TODO: link API doc)
+ * @param edgesFmt a formatted string describing allowed edges
*/
public Traversal(Graph graph, String direction, String edgesFmt) {
if (!direction.matches("forward|backward")) {
throw new IllegalArgumentException("Unknown traversal direction: " + direction);
}
this.graph = graph;
this.useTransposed = (direction.equals("backward"));
this.edges = new AllowedEdges(graph, edgesFmt);
long nbNodes = graph.getNbNodes();
this.visited = LongArrayBitVector.ofLength(nbNodes);
this.nodeParent = LongBigArrays.newBigArray(nbNodes);
}
/**
* Returns the leaves of a subgraph rooted at the specified source node.
*
* @param srcNodeId source node
* @return list of node ids corresponding to the leaves
*/
public ArrayList leaves(long srcNodeId) {
ArrayList nodeIds = new ArrayList();
Stack stack = new Stack();
this.visited.fill(false);
stack.push(srcNodeId);
visited.set(srcNodeId);
while (!stack.isEmpty()) {
long currentNodeId = stack.pop();
long neighborsCnt = 0;
for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) {
neighborsCnt++;
if (!visited.getBoolean(neighborNodeId)) {
stack.push(neighborNodeId);
visited.set(neighborNodeId);
}
}
if (neighborsCnt == 0) {
nodeIds.add(currentNodeId);
}
}
return nodeIds;
}
/**
* Returns node direct neighbors (linked with exactly one edge).
*
* @param srcNodeId source node
* @return list of node ids corresponding to the neighbors
*/
public ArrayList neighbors(long srcNodeId) {
ArrayList nodeIds = new ArrayList();
for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, srcNodeId)) {
nodeIds.add(neighborNodeId);
}
return nodeIds;
}
/**
* Performs a graph traversal and returns explored nodes.
*
* @param srcNodeId source node
* @return list of explored node ids
*/
public ArrayList visitNodes(long srcNodeId) {
ArrayList nodeIds = new ArrayList();
Stack stack = new Stack();
this.visited.fill(false);
stack.push(srcNodeId);
visited.set(srcNodeId);
while (!stack.isEmpty()) {
long currentNodeId = stack.pop();
nodeIds.add(currentNodeId);
for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) {
if (!visited.getBoolean(neighborNodeId)) {
stack.push(neighborNodeId);
visited.set(neighborNodeId);
}
}
}
return nodeIds;
}
/**
* Performs a graph traversal and returns explored paths.
*
* @param srcNodeId source node
* @return list of explored paths (represented as a list of node ids)
*/
public ArrayList> visitPaths(long srcNodeId) {
ArrayList> paths = new ArrayList<>();
Stack currentPath = new Stack();
visitPathsInternal(srcNodeId, paths, currentPath);
return paths;
}
/**
* Internal recursive function of {@link #visitPaths}.
*
* @param currentNodeId current node
* @param paths list of currently stored paths
* @param currentPath current path as node ids
*/
private void visitPathsInternal(
long currentNodeId, ArrayList> paths, Stack currentPath) {
currentPath.push(currentNodeId);
long visitedNeighbors = 0;
for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) {
visitPathsInternal(neighborNodeId, paths, currentPath);
visitedNeighbors++;
}
if (visitedNeighbors == 0) {
ArrayList path = new ArrayList();
for (long nodeId : currentPath) {
path.add(nodeId);
}
paths.add(path);
}
currentPath.pop();
}
/**
* Performs a graph traversal and returns the first found path from source to destination.
*
* @param srcNodeId source node
* @param dst destination (either a node or a node type)
* @return found path as a list of node ids
*/
public ArrayList walk(long srcNodeId, T dst, String algorithm) {
long dstNodeId = -1;
if (algorithm.equals("dfs")) {
dstNodeId = walkInternalDfs(srcNodeId, dst);
} else if (algorithm.equals("bfs")) {
dstNodeId = walkInternalBfs(srcNodeId, dst);
} else {
throw new IllegalArgumentException("Unknown traversal algorithm: " + algorithm);
}
if (dstNodeId == -1) {
throw new IllegalArgumentException("Unable to find destination point: " + dst);
}
ArrayList nodeIds = backtracking(srcNodeId, dstNodeId);
return nodeIds;
}
/**
* Internal DFS function of {@link #walk}.
*
* @param srcNodeId source node
* @param dst destination (either a node or a node type)
* @return final destination node or -1 if no path found
*/
private long walkInternalDfs(long srcNodeId, T dst) {
Stack stack = new Stack();
this.visited.fill(false);
stack.push(srcNodeId);
visited.set(srcNodeId);
while (!stack.isEmpty()) {
long currentNodeId = stack.pop();
if (isDstNode(currentNodeId, dst)) {
return currentNodeId;
}
for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) {
if (!visited.getBoolean(neighborNodeId)) {
stack.push(neighborNodeId);
visited.set(neighborNodeId);
LongBigArrays.set(nodeParent, neighborNodeId, currentNodeId);
}
}
}
return -1;
}
/**
* Internal BFS function of {@link #walk}.
*
* @param srcNodeId source node
* @param dst destination (either a node or a node type)
* @return final destination node or -1 if no path found
*/
private long walkInternalBfs(long srcNodeId, T dst) {
Queue queue = new LinkedList();
this.visited.fill(false);
queue.add(srcNodeId);
visited.set(srcNodeId);
while (!queue.isEmpty()) {
long currentNodeId = queue.poll();
if (isDstNode(currentNodeId, dst)) {
return currentNodeId;
}
for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) {
if (!visited.getBoolean(neighborNodeId)) {
queue.add(neighborNodeId);
visited.set(neighborNodeId);
LongBigArrays.set(nodeParent, neighborNodeId, currentNodeId);
}
}
}
return -1;
}
/**
* Internal function of {@link #walk} to check if a node corresponds to the destination.
*
* @param nodeId current node
* @param dst destination (either a node or a node type)
* @return true if the node is a destination, or false otherwise
*/
private boolean isDstNode(long nodeId, T dst) {
if (dst instanceof Long) {
long dstNodeId = (Long) dst;
return nodeId == dstNodeId;
} else if (dst instanceof Node.Type) {
Node.Type dstType = (Node.Type) dst;
return graph.getNodeType(nodeId) == dstType;
} else {
return false;
}
}
/**
* Internal backtracking function of {@link #walk}.
*
* @param srcNodeId source node
* @param dstNodeId destination node
* @return the found path, as a list of node ids
*/
private ArrayList backtracking(long srcNodeId, long dstNodeId) {
ArrayList path = new ArrayList();
long currentNodeId = dstNodeId;
while (currentNodeId != srcNodeId) {
path.add(currentNodeId);
currentNodeId = LongBigArrays.get(nodeParent, currentNodeId);
}
path.add(srcNodeId);
Collections.reverse(path);
return path;
}
}
diff --git a/java/server/src/main/java/org/softwareheritage/graph/backend/MapFile.java b/java/server/src/main/java/org/softwareheritage/graph/backend/MapFile.java
index be63e96..0644049 100644
--- a/java/server/src/main/java/org/softwareheritage/graph/backend/MapFile.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/backend/MapFile.java
@@ -1,64 +1,65 @@
package org.softwareheritage.graph.backend;
import java.io.File;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.channels.FileChannel;
import it.unimi.dsi.io.ByteBufferInputStream;
/**
* Wrapper class around very big mmap()-ed file.
+ *
+ * Java has a limit for mmap()-ed files because of unsupported 64-bit indexing. The dsiutils ByteBufferInputStream is used to overcome this
+ * Java limit.
*
* @author Thibault Allançon
* @version 0.0.1
* @since 0.0.1
*/
public class MapFile {
- /**
- * Memory-mapped file buffer
- * @see it.unimi.dsi.io.ByteBufferInputStream
- */
+ /** Memory-mapped file buffer */
ByteBufferInputStream bufferMap;
/** Fixed line length of the mmap()-ed file */
int lineLength;
/**
* Constructor.
*
* @param path file path to mmap()
* @param lineLength fixed length of a line in the file
*/
public MapFile(String path, int lineLength) throws IOException {
this.bufferMap = null;
this.lineLength = lineLength;
try (RandomAccessFile mapFile = new RandomAccessFile(new File(path), "r")) {
FileChannel fileChannel = mapFile.getChannel();
bufferMap = ByteBufferInputStream.map(fileChannel, FileChannel.MapMode.READ_ONLY);
}
}
/**
* Returns a specific line in the file.
*
* @param lineIndex line number in the file
* @return the line at the specified position
*/
public String readAtLine(long lineIndex) {
byte[] buffer = new byte[lineLength];
long position = lineIndex * (long) lineLength;
bufferMap.position(position);
bufferMap.read(buffer, 0, lineLength);
String line = new String(buffer);
return line.trim();
}
/**
* Closes the mmap()-ed file.
*/
public void close() throws IOException {
bufferMap.close();
}
}
diff --git a/java/server/src/main/java/org/softwareheritage/graph/backend/NodeIdMap.java b/java/server/src/main/java/org/softwareheritage/graph/backend/NodeIdMap.java
index 2f31677..3d68dce 100644
--- a/java/server/src/main/java/org/softwareheritage/graph/backend/NodeIdMap.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/backend/NodeIdMap.java
@@ -1,111 +1,116 @@
package org.softwareheritage.graph.backend;
import java.io.IOException;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.SwhId;
import org.softwareheritage.graph.backend.MapFile;
+import org.softwareheritage.graph.backend.Setup;
/**
* Mapping between internal long node id and external SWH PID.
+ *
+ * Mappings in both directions are pre-computed and dumped on disk in the {@link Setup} class, then
+ * they are loaded here using mmap().
*
* @author Thibault Allançon
* @version 0.0.1
* @since 0.0.1
+ * @see org.softwareheritage.graph.backend.Setup
*/
public class NodeIdMap {
/** Fixed length of full SWH PID */
public static final int SWH_ID_LENGTH = 50;
/** Fixed length of long node id */
public static final int NODE_ID_LENGTH = 20;
- /** Full graph path */
+ /** Graph path and basename */
String graphPath;
/** Number of ids to map */
long nbIds;
/** mmap()-ed PID_TO_NODE file */
MapFile swhToNodeMap;
/** mmap()-ed NODE_TO_PID file */
MapFile nodeToSwhMap;
/**
* Constructor.
*
* @param graphPath full graph path
* @param nbNodes number of nodes in the graph
*/
public NodeIdMap(String graphPath, long nbNodes) throws IOException {
this.graphPath = graphPath;
this.nbIds = nbNodes;
// +1 are for spaces and end of lines
int swhToNodeLineLength = SWH_ID_LENGTH + 1 + NODE_ID_LENGTH + 1;
int nodeToSwhLineLength = SWH_ID_LENGTH + 1;
this.swhToNodeMap = new MapFile(graphPath + Graph.PID_TO_NODE, swhToNodeLineLength);
this.nodeToSwhMap = new MapFile(graphPath + Graph.NODE_TO_PID, nodeToSwhLineLength);
}
/**
* Converts SWH PID to corresponding long node id.
*
* @param swhId node represented as a {@link SwhId}
* @return corresponding node as a long id
* @see org.softwareheritage.graph.SwhId
*/
public long getNodeId(SwhId swhId) {
// Each line in PID_TO_NODE is formatted as: swhId nodeId
// The file is sorted by swhId, hence we can binary search on swhId to get corresponding nodeId
long start = 0;
long end = nbIds - 1;
while (start <= end) {
long lineNumber = (start + end) / 2L;
String[] parts = swhToNodeMap.readAtLine(lineNumber).split(" ");
if (parts.length != 2) {
break;
}
String currentSwhId = parts[0];
long currentNodeId = Long.parseLong(parts[1]);
int cmp = currentSwhId.compareTo(swhId.toString());
if (cmp == 0) {
return currentNodeId;
} else if (cmp < 0) {
start = lineNumber + 1;
} else {
end = lineNumber - 1;
}
}
throw new IllegalArgumentException("Unknown SWH id: " + swhId);
}
/**
* Converts a node long id to corresponding SWH PID.
*
* @param nodeId node as a long id
* @return corresponding node as a {@link SwhId}
* @see org.softwareheritage.graph.SwhId
*/
public SwhId getSwhId(long nodeId) {
// Each line in NODE_TO_PID is formatted as: swhId
// The file is ordered by nodeId, meaning node0's swhId is at line 0, hence we can read the
// nodeId-th line to get corresponding swhId
if (nodeId < 0 || nodeId >= nbIds) {
throw new IllegalArgumentException("Node id " + nodeId + " should be between 0 and " + nbIds);
}
String swhId = nodeToSwhMap.readAtLine(nodeId);
return new SwhId(swhId);
}
/**
* Closes the mapping files.
*/
public void close() throws IOException {
swhToNodeMap.close();
nodeToSwhMap.close();
}
}
diff --git a/java/server/src/main/java/org/softwareheritage/graph/backend/NodeTypesMap.java b/java/server/src/main/java/org/softwareheritage/graph/backend/NodeTypesMap.java
index a4a094e..1b95bd0 100644
--- a/java/server/src/main/java/org/softwareheritage/graph/backend/NodeTypesMap.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/backend/NodeTypesMap.java
@@ -1,51 +1,53 @@
package org.softwareheritage.graph.backend;
import java.io.IOException;
import it.unimi.dsi.fastutil.io.BinIO;
import it.unimi.dsi.fastutil.longs.LongBigList;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.Node;
/**
* Mapping between long node id and SWH node type as described in the data model.
+ *
+ * The type mapping is pre-computed and dumped on disk in the {@link Setup} class, then it is loaded
+ * in-memory here using fastutil LongBigList. To be
+ * space-efficient, the mapping is stored as a bitmap using minimum number of bits per {@link
+ * Node.Type}.
*
* @author Thibault Allançon
* @version 0.0.1
* @since 0.0.1
*/
public class NodeTypesMap {
- /**
- * Array storing for each node its type
- * @see it.unimi.dsi.fastutil.longs.LongBigList
- */
+ /** Array storing for each node its type */
LongBigList nodeTypesMap;
/**
* Constructor.
*
- * @param graphPath full path of the compressed graph
+ * @param graphPath path and basename of the compressed graph
*/
public NodeTypesMap(String graphPath) throws IOException {
try {
nodeTypesMap = (LongBigList) BinIO.loadObject(graphPath + Graph.NODE_TO_TYPE);
} catch (ClassNotFoundException e) {
throw new IllegalArgumentException("Unknown class object: " + e);
}
}
/**
* Returns node type from a node long id.
*
* @param nodeId node as a long id
* @return corresponding {@link Node.Type} value
* @see org.softwareheritage.graph.Node.Type
*/
public Node.Type getType(long nodeId) {
long type = nodeTypesMap.getLong(nodeId);
return Node.Type.fromInt((int) type);
}
}
diff --git a/java/server/src/main/java/org/softwareheritage/graph/backend/Setup.java b/java/server/src/main/java/org/softwareheritage/graph/backend/Setup.java
index 5474abc..a3fa6c7 100644
--- a/java/server/src/main/java/org/softwareheritage/graph/backend/Setup.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/backend/Setup.java
@@ -1,120 +1,125 @@
package org.softwareheritage.graph.backend;
import java.io.BufferedWriter;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Writer;
import java.util.zip.GZIPInputStream;
import it.unimi.dsi.bits.LongArrayBitVector;
import it.unimi.dsi.fastutil.Size64;
import it.unimi.dsi.fastutil.io.BinIO;
import it.unimi.dsi.fastutil.longs.LongBigArrays;
import it.unimi.dsi.fastutil.longs.LongBigList;
import it.unimi.dsi.fastutil.objects.Object2LongFunction;
import it.unimi.dsi.fastutil.objects.ObjectBigArrays;
import it.unimi.dsi.io.FastBufferedReader;
import it.unimi.dsi.io.LineIterator;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.Node;
import org.softwareheritage.graph.SwhId;
import org.softwareheritage.graph.backend.NodeTypesMap;
/**
* Pre-processing steps (such as dumping mapping files on disk) before running the graph service.
*
* @author Thibault Allançon
* @version 0.0.1
* @since 0.0.1
*/
public class Setup {
+ /**
+ * Main entrypoint.
+ *
+ * @param args command line arguments
+ */
public static void main(String[] args) throws IOException {
if (args.length != 2) {
System.err.println("Expected parameters: ");
System.exit(1);
}
String nodesPath = args[0];
String graphPath = args[1];
System.out.println("Pre-computing node id maps...");
long startTime = System.nanoTime();
precomputeNodeIdMap(nodesPath, graphPath);
long endTime = System.nanoTime();
double duration = (double) (endTime - startTime) / 1_000_000_000;
System.out.println("Done in: " + duration + " seconds");
}
/**
* Computes and dumps on disk mapping files.
*
* @param nodesPath path of the compressed csv nodes file
* @param graphPath path of the compressed graph
*/
// Suppress warning for Object2LongFunction cast
@SuppressWarnings("unchecked")
static void precomputeNodeIdMap(String nodesPath, String graphPath) throws IOException {
// First internal mapping: SWH id (string) -> WebGraph MPH (long)
Object2LongFunction mphMap = null;
try {
mphMap = (Object2LongFunction) BinIO.loadObject(graphPath + ".mph");
} catch (ClassNotFoundException e) {
throw new IllegalArgumentException("The .mph file contains unknown class object: " + e);
}
long nbIds = (mphMap instanceof Size64) ? ((Size64) mphMap).size64() : mphMap.size();
// Second internal mapping: WebGraph MPH (long) -> BFS ordering (long)
long[][] bfsMap = LongBigArrays.newBigArray(nbIds);
long loaded = BinIO.loadLongs(graphPath + ".order", bfsMap);
if (loaded != nbIds) {
throw new IllegalArgumentException("Graph contains " + nbIds + " nodes, but read " + loaded);
}
// Dump complete mapping for all nodes: SWH id (string) <=> WebGraph node id (long)
InputStream nodesStream = new GZIPInputStream(new FileInputStream(nodesPath));
FastBufferedReader buffer = new FastBufferedReader(new InputStreamReader(nodesStream, "UTF-8"));
LineIterator swhIdIterator = new LineIterator(buffer);
try (Writer swhToNodeMap = new BufferedWriter(new FileWriter(graphPath + Graph.PID_TO_NODE));
Writer nodeToSwhMap = new BufferedWriter(new FileWriter(graphPath + Graph.NODE_TO_PID))) {
// nodeToSwhMap needs to write SWH id in order of node id, so use a temporary array
Object[][] nodeToSwhId = ObjectBigArrays.newBigArray(nbIds);
// To effectively run edge restriction during graph traversals, we store node id (long) -> SWH
// type map. This is represented as a bitmap using minimum number of bits per Node.Type.
final int log2NbTypes = (int) Math.ceil(Math.log(Node.Type.values().length) / Math.log(2));
final int nbBitsPerNodeType = log2NbTypes;
LongArrayBitVector nodeTypesBitVector =
LongArrayBitVector.ofLength(nbBitsPerNodeType * nbIds);
LongBigList nodeTypesMap = nodeTypesBitVector.asLongBigList(nbBitsPerNodeType);
for (long iNode = 0; iNode < nbIds && swhIdIterator.hasNext(); iNode++) {
String strSwhId = swhIdIterator.next().toString();
long mphId = mphMap.getLong(strSwhId);
long nodeId = LongBigArrays.get(bfsMap, mphId);
String paddedNodeId = String.format("%0" + NodeIdMap.NODE_ID_LENGTH + "d", nodeId);
String line = strSwhId + " " + paddedNodeId + "\n";
swhToNodeMap.write(line);
ObjectBigArrays.set(nodeToSwhId, nodeId, strSwhId);
SwhId swhId = new SwhId(strSwhId);
nodeTypesMap.set(nodeId, swhId.getType().ordinal());
}
BinIO.storeObject(nodeTypesMap, graphPath + Graph.NODE_TO_TYPE);
for (long iNode = 0; iNode < nbIds; iNode++) {
String line = ObjectBigArrays.get(nodeToSwhId, iNode).toString() + "\n";
nodeToSwhMap.write(line);
}
}
}
}
diff --git a/java/server/src/main/java/org/softwareheritage/graph/benchmark/LinuxLog.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/LinuxLog.java
index e4829ec..bd45232 100644
--- a/java/server/src/main/java/org/softwareheritage/graph/benchmark/LinuxLog.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/LinuxLog.java
@@ -1,39 +1,47 @@
package org.softwareheritage.graph.benchmark;
import java.io.IOException;
import org.softwareheritage.graph.Endpoint;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.SwhId;
/**
* Linux git log experiment to benchmark graph traversal.
+ *
+ * The goal is to do the equivalent of a {@code git log} in the Linux kernel by following revisions
+ * nodes.
*
* @author Thibault Allançon
* @version 0.0.1
* @since 0.0.1
*/
public class LinuxLog {
+ /**
+ * Main entrypoint.
+ *
+ * @param args command line arguments
+ */
public static void main(String[] args) throws IOException {
String path = args[0];
Graph graph = new Graph(path);
// A linux kernel commit on Sun Dec 31
final SwhId commit = new SwhId("swh:1:rev:f39d7d78b70e0f39facb1e4fab77ad3df5c52a35");
final long expectedCount = 723640;
System.out.println("git log " + commit);
System.out.println("Expecting " + expectedCount + " commits");
long startTime = System.nanoTime();
Endpoint endpoint = new Endpoint(graph, "forward", "rev:rev");
long count = endpoint.visitNodes(commit).size();
if (count != expectedCount) {
throw new IllegalArgumentException("Counted " + count + " commits");
}
long endTime = System.nanoTime();
double duration = (double) (endTime - startTime) / 1_000_000_000;
System.out.println("Visit operation done in: " + duration + " seconds");
}
}