Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9339433
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
63 KB
Subscribers
None
View Options
diff --git a/java/server/src/main/java/org/softwareheritage/graph/AllowedEdges.java b/java/server/src/main/java/org/softwareheritage/graph/AllowedEdges.java
index 08f7dd4..f21e372 100644
--- a/java/server/src/main/java/org/softwareheritage/graph/AllowedEdges.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/AllowedEdges.java
@@ -1,88 +1,93 @@
package org.softwareheritage.graph;
import java.util.ArrayList;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.Node;
/**
* Edge restriction based on node types, used when visiting the graph.
+ * <p>
+ * <a href="https://docs.softwareheritage.org/devel/swh-model/data-model.html">Software Heritage
+ * graph</a> contains multiple node types (contents, directories, revisions, ...) and restricting
+ * the traversal to specific node types is necessary for many querying operations: <a
+ * href="https://docs.softwareheritage.org/devel/swh-graph/use-cases.html">use cases</a>.
*
* @author Thibault Allançon
* @version 0.0.1
* @since 0.0.1
*/
public class AllowedEdges {
/** Graph on which edge restriction is performed */
Graph graph;
/**
* 2D boolean matrix storing access rights for all combination of src/dst node types (first
* dimension is source, second dimension is destination), when edge restriction is not enforced
* this array is set to null for early bypass.
*/
public boolean[][] restrictedTo;
/**
* Constructor.
*
* @param graph the graph on which to perform edge restriction
- * @param edgesFmt a formatted string describing allowed edges (TODO: link API doc)
+ * @param edgesFmt a formatted string describing <a
+ * href="https://docs.softwareheritage.org/devel/swh-graph/api.html#terminology">allowed edges</a>
*/
public AllowedEdges(Graph graph, String edgesFmt) {
this.graph = graph;
int nbNodeTypes = Node.Type.values().length;
this.restrictedTo = new boolean[nbNodeTypes][nbNodeTypes];
// Special values (null, empty, "*")
if (edgesFmt == null || edgesFmt.isEmpty()) {
return;
}
if (edgesFmt.equals("*")) {
// Allows for quick bypass (with simple null check) when no edge restriction
restrictedTo = null;
return;
}
// Format: "src1:dst1,src2:dst2,[...]"
- // TODO: link API doc
String[] edgeTypes = edgesFmt.split(",");
for (String edgeType : edgeTypes) {
String[] nodeTypes = edgeType.split(":");
if (nodeTypes.length != 2) {
throw new IllegalArgumentException("Cannot parse edge type: " + edgeType);
}
ArrayList<Node.Type> srcTypes = Node.Type.parse(nodeTypes[0]);
ArrayList<Node.Type> dstTypes = Node.Type.parse(nodeTypes[1]);
for (Node.Type srcType : srcTypes) {
for (Node.Type dstType : dstTypes) {
restrictedTo[srcType.ordinal()][dstType.ordinal()] = true;
}
}
}
}
/**
* Checks if a given edge can be followed during graph traversal.
*
* @param srcNodeId edge source node
* @param dstNodeId edge destination node
* @return true if allowed and false otherwise
*/
public boolean isAllowed(long srcNodeId, long dstNodeId) {
Node.Type srcType = graph.getNodeType(srcNodeId);
Node.Type dstType = graph.getNodeType(dstNodeId);
return restrictedTo[srcType.ordinal()][dstType.ordinal()];
}
/**
* Works like {@link AllowedEdges#isAllowed(long, long)} but with node types directly instead of
* node ids.
*
* @see AllowedEdges#isAllowed(long, long)
*/
public boolean isAllowed(Node.Type srcType, Node.Type dstType) {
return restrictedTo[srcType.ordinal()][dstType.ordinal()];
}
}
diff --git a/java/server/src/main/java/org/softwareheritage/graph/App.java b/java/server/src/main/java/org/softwareheritage/graph/App.java
index 29f9774..4a3d896 100644
--- a/java/server/src/main/java/org/softwareheritage/graph/App.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/App.java
@@ -1,151 +1,169 @@
package org.softwareheritage.graph;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.PropertyNamingStrategy;
import com.martiansoftware.jsap.FlaggedOption;
import com.martiansoftware.jsap.JSAP;
import com.martiansoftware.jsap.JSAPException;
import com.martiansoftware.jsap.JSAPResult;
import com.martiansoftware.jsap.Parameter;
import com.martiansoftware.jsap.SimpleJSAP;
import com.martiansoftware.jsap.UnflaggedOption;
import io.javalin.Javalin;
import io.javalin.http.Context;
import io.javalin.plugin.json.JavalinJackson;
import org.softwareheritage.graph.Endpoint;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.SwhId;
import org.softwareheritage.graph.algo.Stats;
/**
- * Entrypoint of the swh-graph server REST API.
+ * Web framework of the swh-graph server REST API.
*
* @author Thibault Allançon
* @version 0.0.1
* @since 0.0.1
*/
public class App {
+ /**
+ * Main entrypoint.
+ *
+ * @param args command line arguments
+ */
public static void main(String[] args) throws IOException, JSAPException {
SimpleJSAP jsap = new SimpleJSAP(
App.class.getName(),
"Server to load and query a compressed graph representation of Software Heritage archive.",
new Parameter[] {
new FlaggedOption("port", JSAP.INTEGER_PARSER, "5009", JSAP.NOT_REQUIRED, 'p', "port",
"Binding port of the server."),
new UnflaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED,
JSAP.NOT_GREEDY, "The basename of the compressed graph."),
}
);
JSAPResult config = jsap.parse(args);
if (jsap.messagePrinted()) {
System.exit(1);
}
String graphPath = config.getString("graphPath");
int port = config.getInt("port");
startServer(graphPath, port);
}
+ /**
+ * Loads compressed graph and starts the web server to query it.
+ *
+ * @param graphPath basename of the compressed graph
+ * @param port binding port of the server
+ */
private static void startServer(String graphPath, int port) throws IOException {
Graph graph = new Graph(graphPath);
Stats stats = new Stats(graphPath);
// Clean up on exit
Runtime.getRuntime().addShutdownHook(new Thread() {
public void run() {
try {
graph.cleanUp();
} catch (IOException e) {
System.out.println("Could not clean up graph on exit: " + e);
}
}
});
// Configure Jackson JSON to use snake case naming style
ObjectMapper objectMapper = JavalinJackson.getObjectMapper();
objectMapper.setPropertyNamingStrategy(PropertyNamingStrategy.SNAKE_CASE);
JavalinJackson.configure(objectMapper);
Javalin app = Javalin.create().start(port);
app.before("/stats/*", ctx -> { checkQueryStrings(ctx, ""); });
app.before("/leaves/*", ctx -> { checkQueryStrings(ctx, "direction|edges"); });
app.before("/neighbors/*", ctx -> { checkQueryStrings(ctx, "direction|edges"); });
app.before("/visit/*", ctx -> { checkQueryStrings(ctx, "direction|edges"); });
app.before("/walk/*", ctx -> { checkQueryStrings(ctx, "direction|edges|traversal"); });
app.get("/stats/", ctx -> { ctx.json(stats); });
// Graph traversal endpoints
// By default the traversal is a forward DFS using all edges
app.get("/leaves/:src", ctx -> {
SwhId src = new SwhId(ctx.pathParam("src"));
String direction = ctx.queryParam("direction", "forward");
String edgesFmt = ctx.queryParam("edges", "*");
Endpoint endpoint = new Endpoint(graph, direction, edgesFmt);
ctx.json(endpoint.leaves(src));
});
app.get("/neighbors/:src", ctx -> {
SwhId src = new SwhId(ctx.pathParam("src"));
String direction = ctx.queryParam("direction", "forward");
String edgesFmt = ctx.queryParam("edges", "*");
Endpoint endpoint = new Endpoint(graph, direction, edgesFmt);
ctx.json(endpoint.neighbors(src));
});
app.get("/visit/nodes/:src", ctx -> {
SwhId src = new SwhId(ctx.pathParam("src"));
String direction = ctx.queryParam("direction", "forward");
String edgesFmt = ctx.queryParam("edges", "*");
Endpoint endpoint = new Endpoint(graph, direction, edgesFmt);
ctx.json(endpoint.visitNodes(src));
});
app.get("/visit/paths/:src", ctx -> {
SwhId src = new SwhId(ctx.pathParam("src"));
String direction = ctx.queryParam("direction", "forward");
String edgesFmt = ctx.queryParam("edges", "*");
Endpoint endpoint = new Endpoint(graph, direction, edgesFmt);
ctx.json(endpoint.visitPaths(src));
});
app.get("/walk/:src/:dst", ctx -> {
SwhId src = new SwhId(ctx.pathParam("src"));
String dstFmt = ctx.pathParam("dst");
String direction = ctx.queryParam("direction", "forward");
String edgesFmt = ctx.queryParam("edges", "*");
String algorithm = ctx.queryParam("traversal", "dfs");
Endpoint endpoint = new Endpoint(graph, direction, edgesFmt);
ctx.json(endpoint.walk(src, dstFmt, algorithm));
});
app.exception(IllegalArgumentException.class, (e, ctx) -> {
ctx.status(400);
ctx.result(e.getMessage());
});
}
+ /**
+ * Checks query strings names provided to the REST API.
+ *
+ * @param ctx Javalin HTTP request context
+ * @param allowedFmt a regular expression describing allowed query strings names
+ * @throws IllegalArgumentException unknown query string provided
+ */
private static void checkQueryStrings(Context ctx, String allowedFmt) {
Map<String, List<String>> queryParamMap = ctx.queryParamMap();
for (String key : queryParamMap.keySet()) {
if (!key.matches(allowedFmt)) {
throw new IllegalArgumentException("Unknown query string: " + key);
}
}
}
}
diff --git a/java/server/src/main/java/org/softwareheritage/graph/Endpoint.java b/java/server/src/main/java/org/softwareheritage/graph/Endpoint.java
index 424dfd1..03bbf1b 100644
--- a/java/server/src/main/java/org/softwareheritage/graph/Endpoint.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/Endpoint.java
@@ -1,210 +1,217 @@
package org.softwareheritage.graph;
import java.util.ArrayList;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.SwhId;
import org.softwareheritage.graph.SwhPath;
import org.softwareheritage.graph.algo.Traversal;
import org.softwareheritage.graph.utils.Timing;
/**
* REST API endpoints wrapper functions.
+ * <p>
+ * Graph operations are segmented between high-level class (this one) and the low-level class
+ * ({@link Traversal}). The {@link Endpoint} class creates wrappers for each endpoints by performing
+ * all the input/output node ids conversions and logging timings.
*
* @author Thibault Allançon
* @version 0.0.1
* @since 0.0.1
+ * @see org.softwareheritage.graph.algo.Traversal
*/
public class Endpoint {
/** Graph where traversal endpoint is performed */
Graph graph;
/** Internal traversal API */
Traversal traversal;
/** Timings logger */
private static final Logger logger = LoggerFactory.getLogger(Endpoint.class);
/**
* Constructor.
*
* @param graph the graph used for traversal endpoint
* @param direction a string (either "forward" or "backward") specifying edge orientation
- * @param edgesFmt a formatted string describing allowed edges (TODO: link API doc)
+ * @param edgesFmt a formatted string describing <a
+ * href="https://docs.softwareheritage.org/devel/swh-graph/api.html#terminology">allowed edges</a>
*/
public Endpoint(Graph graph, String direction, String edgesFmt) {
this.graph = graph;
this.traversal = new Traversal(graph, direction, edgesFmt);
}
/**
* Converts a list of (internal) long node ids to a list of corresponding (external) SWH PIDs.
*
* @param nodeIds the list of long node ids
* @return a list of corresponding SWH PIDs
*/
private ArrayList<SwhId> convertNodesToSwhIds(ArrayList<Long> nodeIds) {
long startTime = Timing.start();
ArrayList<SwhId> swhIds = new ArrayList<>();
for (long nodeId : nodeIds) {
swhIds.add(graph.getSwhId(nodeId));
}
float duration = Timing.stop(startTime);
logger.debug("convertNodesToSwhIds() took {} s.", duration);
return swhIds;
}
/**
* Converts a list of (internal) long node ids to the corresponding {@link SwhPath}.
*
* @param nodeIds the list of long node ids
* @return the corresponding {@link SwhPath}
* @see org.softwareheritage.graph.SwhPath
*/
private SwhPath convertNodesToSwhPath(ArrayList<Long> nodeIds) {
long startTime = Timing.start();
SwhPath path = new SwhPath();
for (long nodeId : nodeIds) {
path.add(graph.getSwhId(nodeId));
}
float duration = Timing.stop(startTime);
logger.debug("convertNodesToSwhPath() took {} s.", duration);
return path;
}
/**
* Converts a list of paths made of (internal) long node ids to one made of {@link SwhPath}-s.
*
* @param pathsNodeId the list of paths with long node ids
* @return a list of corresponding {@link SwhPath}
* @see org.softwareheritage.graph.SwhPath
*/
private ArrayList<SwhPath> convertPathsToSwhIds(ArrayList<ArrayList<Long>> pathsNodeId) {
long startTime = Timing.start();
ArrayList<SwhPath> paths = new ArrayList<>();
for (ArrayList<Long> path : pathsNodeId) {
paths.add(convertNodesToSwhPath(path));
}
float duration = Timing.stop(startTime);
logger.debug("convertPathsToSwhIds() took {} s.", duration);
return paths;
}
/**
- * Leaves endpoint wrapper (performs input/output node ids conversions).
+ * Leaves endpoint wrapper.
*
- * @param src source node of endpoint call specified as a SwhId
- * @return the resulting list of SwhId from endpoint call
+ * @param src source node of endpoint call specified as a {@link SwhId}
+ * @return the resulting list of {@link SwhId} from endpoint call
* @see org.softwareheritage.graph.SwhId
* @see org.softwareheritage.graph.algo.Traversal#leaves(long)
*/
public ArrayList<SwhId> leaves(SwhId src) {
long srcNodeId = graph.getNodeId(src);
long startTime = Timing.start();
ArrayList<Long> nodeIds = traversal.leaves(srcNodeId);
float duration = Timing.stop(startTime);
logger.debug("leaves({}) took {} s.", src, duration);
return convertNodesToSwhIds(nodeIds);
}
/**
- * Neighbors endpoint wrapper (performs input/output node ids conversions).
+ * Neighbors endpoint wrapper.
*
- * @param src source node of endpoint call specified as a SwhId
- * @return the resulting list of SwhId from endpoint call
+ * @param src source node of endpoint call specified as a {@link SwhId}
+ * @return the resulting list of {@link SwhId} from endpoint call
* @see org.softwareheritage.graph.SwhId
* @see org.softwareheritage.graph.algo.Traversal#neighbors(long)
*/
public ArrayList<SwhId> neighbors(SwhId src) {
long srcNodeId = graph.getNodeId(src);
long startTime = Timing.start();
ArrayList<Long> nodeIds = traversal.neighbors(srcNodeId);
float duration = Timing.stop(startTime);
logger.debug("neighbors({}) took {} s.", src, duration);
return convertNodesToSwhIds(nodeIds);
}
/**
- * Walk endpoint wrapper (performs input/output node ids conversions).
+ * Walk endpoint wrapper.
*
- * @param src source node of endpoint call specified as a SwhId
- * @param dstFmt destination used in endpoint call (TODO: link to API doc)
+ * @param src source node of endpoint call specified as a {@link SwhId}
+ * @param dstFmt destination formatted string as described in the <a
+ * href="https://docs.softwareheritage.org/devel/swh-graph/api.html#walk">API</a>
* @param algorithm traversal algorithm used in endpoint call (either "dfs" or "bfs")
* @return the resulting {@link SwhPath} from endpoint call
* @see org.softwareheritage.graph.SwhId
* @see org.softwareheritage.graph.SwhPath
* @see org.softwareheritage.graph.algo.Traversal#walk
*/
public SwhPath walk(SwhId src, String dstFmt, String algorithm) {
long srcNodeId = graph.getNodeId(src);
ArrayList<Long> nodeIds = new ArrayList<Long>();
// Destination is either a SWH ID or a node type
try {
SwhId dstSwhId = new SwhId(dstFmt);
long dstNodeId = graph.getNodeId(dstSwhId);
long startTime = Timing.start();
nodeIds = traversal.walk(srcNodeId, dstNodeId, algorithm);
float duration = Timing.stop(startTime);
logger.debug("walk({}) took {} s.", src, duration);
} catch (IllegalArgumentException ignored1) {
try {
Node.Type dstType = Node.Type.fromStr(dstFmt);
long startTime = Timing.start();
nodeIds = traversal.walk(srcNodeId, dstType, algorithm);
float duration = Timing.stop(startTime);
logger.debug("walk({}) took {} s.", src, duration);
} catch (IllegalArgumentException ignored2) { }
}
return convertNodesToSwhPath(nodeIds);
}
/**
- * VisitNodes endpoint wrapper (performs input/output node ids conversions).
+ * VisitNodes endpoint wrapper.
*
- * @param src source node of endpoint call specified as a SwhId
- * @return the resulting list of SwhId from endpoint call
+ * @param src source node of endpoint call specified as a {@link SwhId}
+ * @return the resulting list of {@link SwhId} from endpoint call
* @see org.softwareheritage.graph.SwhId
* @see org.softwareheritage.graph.algo.Traversal#visitNodes(long)
*/
public ArrayList<SwhId> visitNodes(SwhId src) {
long srcNodeId = graph.getNodeId(src);
long startTime = Timing.start();
ArrayList<Long> nodeIds = traversal.visitNodes(srcNodeId);
float duration = Timing.stop(startTime);
logger.debug("visitNodes({}) took {} s.", src, duration);
return convertNodesToSwhIds(nodeIds);
}
/**
- * VisitPaths endpoint wrapper (performs input/output node ids conversions).
+ * VisitPaths endpoint wrapper.
*
- * @param src source node of endpoint call specified as a SwhId
+ * @param src source node of endpoint call specified as a {@link SwhId}
* @return the resulting list of {@link SwhPath} from endpoint call
* @see org.softwareheritage.graph.SwhId
* @see org.softwareheritage.graph.SwhPath
* @see org.softwareheritage.graph.algo.Traversal#visitPaths(long)
*/
public ArrayList<SwhPath> visitPaths(SwhId src) {
long srcNodeId = graph.getNodeId(src);
long startTime = Timing.start();
ArrayList<ArrayList<Long>> paths = traversal.visitPaths(srcNodeId);
float duration = Timing.stop(startTime);
logger.debug("visitPaths({}) took {} s.", src, duration);
return convertPathsToSwhIds(paths);
}
}
diff --git a/java/server/src/main/java/org/softwareheritage/graph/Graph.java b/java/server/src/main/java/org/softwareheritage/graph/Graph.java
index 6c2310b..1b7be1d 100644
--- a/java/server/src/main/java/org/softwareheritage/graph/Graph.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/Graph.java
@@ -1,183 +1,195 @@
package org.softwareheritage.graph;
import java.io.IOException;
import it.unimi.dsi.big.webgraph.BVGraph;
import org.softwareheritage.graph.Node;
import org.softwareheritage.graph.SwhId;
import org.softwareheritage.graph.backend.NodeIdMap;
import org.softwareheritage.graph.backend.NodeTypesMap;
/**
* Main class storing the compressed graph and node id mappings.
+ * <p>
+ * The compressed graph is stored using the <a href="http://webgraph.di.unimi.it/">WebGraph</a>
+ * ecosystem. Additional mappings are necessary because Software Heritage uses string based <a
+ * href="https://docs.softwareheritage.org/devel/swh-model/persistent-identifiers.html">persistent
+ * identifiers</a> (PID) while WebGraph uses integers internally. These two mappings (long id ↔
+ * PID) are used for the input (users refer to the graph using PID) and the output (convert back to
+ * PID for users results). However, since graph traversal can be restricted depending on the node
+ * type (see {@link AllowedEdges}), a long id → node type map is stored as well to avoid a full
+ * PID lookup.
*
* @author Thibault Allançon
* @version 0.0.1
* @since 0.0.1
+ * @see org.softwareheritage.graph.AllowedEdges
+ * @see org.softwareheritage.graph.NodeIdMap;
+ * @see org.softwareheritage.graph.NodeTypesMap;
*/
public class Graph {
/** File extension for the SWH PID to long node id map */
public static final String PID_TO_NODE = ".pid2node.csv";
/** File extension for the long node id to SWH PID map */
public static final String NODE_TO_PID = ".node2pid.csv";
/** File extension for the long node id to node typ map */
public static final String NODE_TO_TYPE = ".node2type.map";
/** Compressed graph stored as a {@link it.unimi.dsi.big.webgraph.BVGraph} */
BVGraph graph;
/** Transposed compressed graph (used for backward traversals) */
BVGraph graphTransposed;
/** Path and basename of the compressed graph */
String path;
/** Mapping long id ↔ SWH PIDs */
NodeIdMap nodeIdMap;
/** Mapping long id → node types */
NodeTypesMap nodeTypesMap;
/**
* Constructor.
*
* @param path path and basename of the compressed graph to load
*/
public Graph(String path) throws IOException {
this.graph = BVGraph.load(path);
this.graphTransposed = BVGraph.load(path + "-transposed");
this.path = path;
this.nodeIdMap = new NodeIdMap(path, getNbNodes());
this.nodeTypesMap = new NodeTypesMap(path);
}
/**
* Cleans up graph resources after use.
*/
public void cleanUp() throws IOException {
nodeIdMap.close();
}
/**
* Returns the graph full path.
*
* @return graph full path
*/
public String getPath() {
return path;
}
/**
* Converts {@link SwhId} node to long.
*
* @param swhId node specified as a {@link SwhId}
* @return internal long node id
* @see org.softwareheritage.graph.SwhId
*/
public long getNodeId(SwhId swhId) {
return nodeIdMap.getNodeId(swhId);
}
/**
* Converts long id node to {@link SwhId}.
*
* @param nodeId node specified as a long id
* @return external SWH PID
* @see org.softwareheritage.graph.SwhId
*/
public SwhId getSwhId(long nodeId) {
return nodeIdMap.getSwhId(nodeId);
}
/**
* Returns node type.
*
* @param nodeId node specified as a long id
* @return corresponding node type
* @see org.softwareheritage.graph.Node.Type
*/
public Node.Type getNodeType(long nodeId) {
return nodeTypesMap.getType(nodeId);
}
/**
* Returns number of nodes in the graph.
*
* @return number of nodes in the graph
*/
public long getNbNodes() {
return graph.numNodes();
}
/**
* Returns number of edges in the graph.
*
* @return number of edges in the graph
*/
public long getNbEdges() {
return graph.numArcs();
}
/**
* Returns list of successors of a node.
*
* @param nodeId node specified as a long id
- * @return list of successors of the node, specified as a {@link LongBigArrays}
- * @see it.unimi.dsi.fastutil.longs.LongBigArrays
+ * @return list of successors of the node, specified as a <a
+ * href="http://fastutil.di.unimi.it/">fastutil</a> LongBigArrays
*/
public long[][] successors(long nodeId) {
return graph.successorBigArray(nodeId);
}
/**
* Returns the outdegree of a node.
*
* @param nodeId node specified as a long id
* @return outdegree of a node
*/
public long outdegree(long nodeId) {
return graph.outdegree(nodeId);
}
/**
* Returns list of predecessors of a node.
*
* @param nodeId node specified as a long id
- * @return list of predecessors of the node, specified as a {@link LongBigArrays}
- * @see it.unimi.dsi.fastutil.longs.LongBigArrays
+ * @return list of successors of the node, specified as a <a
+ * href="http://fastutil.di.unimi.it/">fastutil</a> LongBigArrays
*/
public long[][] predecessors(long nodeId) {
return graphTransposed.successorBigArray(nodeId);
}
/**
* Returns the indegree of a node.
*
* @param nodeId node specified as a long id
* @return indegree of a node
*/
public long indegree(long nodeId) {
return graphTransposed.outdegree(nodeId);
}
/**
* Returns the degree of a node, depending on graph orientation.
*
* @param nodeId node specified as a long id
* @param useTransposed boolean value to use transposed graph
* @return degree of a node
*/
public long degree(long nodeId, boolean useTransposed) {
return (useTransposed) ? indegree(nodeId) : outdegree(nodeId);
}
/**
* Returns the neighbors of a node, depending on graph orientation.
*
* @param nodeId node specified as a long id
* @param useTransposed boolean value to use transposed graph
- * @return list of neighbors of the node, specified as a {@link LongBigArrays}
- * @see it.unimi.dsi.fastutil.longs.LongBigArrays
+ * @return list of successors of the node, specified as a <a
+ * href="http://fastutil.di.unimi.it/">fastutil</a> LongBigArrays
*/
public long[][] neighbors(long nodeId, boolean useTransposed) {
return (useTransposed) ? predecessors(nodeId) : successors(nodeId);
}
}
diff --git a/java/server/src/main/java/org/softwareheritage/graph/Neighbors.java b/java/server/src/main/java/org/softwareheritage/graph/Neighbors.java
index b688687..51de885 100644
--- a/java/server/src/main/java/org/softwareheritage/graph/Neighbors.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/Neighbors.java
@@ -1,94 +1,98 @@
package org.softwareheritage.graph;
import java.util.Iterator;
import it.unimi.dsi.fastutil.longs.LongBigArrays;
import org.softwareheritage.graph.AllowedEdges;
import org.softwareheritage.graph.Graph;
/**
* Iterator class to go over a node neighbors in the graph.
+ * <p>
+ * Wrapper iterator class to easily deal with {@link AllowedEdges} during traversals.
*
* @author Thibault Allançon
* @version 0.0.1
* @since 0.0.1
+ * @see org.softwareheritage.graph.AllowedEdges
*/
public class Neighbors implements Iterable<Long> {
/** Graph used to explore neighbors */
Graph graph;
/** Boolean to specify the use of the transposed graph */
boolean useTransposed;
/** Graph edge restriction */
AllowedEdges edges;
/** Source node from which neighbors will be listed */
long srcNodeId;
/**
* Constructor.
*
* @param graph graph used to explore neighbors
* @param useTransposed boolean value to use transposed graph
* @param edges edges allowed to be used in the graph
* @param srcNodeId source node from where to list neighbors
*/
public Neighbors(Graph graph, boolean useTransposed, AllowedEdges edges, long srcNodeId) {
this.graph = graph;
this.useTransposed = useTransposed;
this.edges = edges;
this.srcNodeId = srcNodeId;
}
@Override
public Iterator<Long> iterator() {
return new NeighborsIterator();
}
/**
* Inner class for {@link Neighbors} iterator.
*
* @author Thibault Allançon
* @version 0.0.1
* @since 0.0.1
*/
public class NeighborsIterator implements Iterator<Long> {
long nextNeighborIdx;
long nbNeighbors;
+ // LongBigArrays to support 64-bit indexing.
long[][] neighbors;
public NeighborsIterator() {
this.nextNeighborIdx = -1;
this.nbNeighbors = graph.degree(srcNodeId, useTransposed);
this.neighbors = graph.neighbors(srcNodeId, useTransposed);
}
public boolean hasNext() {
- // No edge restriction case: bypass type checks and skip to next neighbor
+ // Case 1: no edge restriction, bypass type checks and skip to next neighbor
if (edges.restrictedTo == null) {
if (nextNeighborIdx + 1 < nbNeighbors) {
nextNeighborIdx++;
return true;
} else {
return false;
}
}
- // Edge restriction case: look ahead for next neighbor
+ // Case 2: edge restriction, look ahead for next neighbor
for (long lookAheadIdx = nextNeighborIdx + 1; lookAheadIdx < nbNeighbors; lookAheadIdx++) {
long nextNodeId = LongBigArrays.get(neighbors, lookAheadIdx);
if (edges.isAllowed(srcNodeId, nextNodeId)) {
nextNeighborIdx = lookAheadIdx;
return true;
}
}
return false;
}
public Long next() {
long nextNodeId = LongBigArrays.get(neighbors, nextNeighborIdx);
return nextNodeId;
}
}
}
diff --git a/java/server/src/main/java/org/softwareheritage/graph/Node.java b/java/server/src/main/java/org/softwareheritage/graph/Node.java
index 8c36580..9194b97 100644
--- a/java/server/src/main/java/org/softwareheritage/graph/Node.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/Node.java
@@ -1,93 +1,95 @@
package org.softwareheritage.graph;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
/**
* A node in the Software Heritage graph.
*
* @author Thibault Allançon
* @version 0.0.1
* @since 0.0.1
*/
public class Node {
/**
* Software Heritage graph node types, as described in the
* <a href="https://docs.softwareheritage.org/devel/swh-model/data-model.html">data model</a>.
*/
public enum Type {
/** Content node */
CNT,
/** Directory node */
DIR,
/** Origin node */
ORI,
/** Release node */
REL,
/** Revision node */
REV,
/** Snapshot node */
SNP;
/**
* Converts integer to corresponding SWH node type.
*
* @param intType node type represented as an integer
* @return the corresponding {@link Node.Type} value
* @see org.softwareheritage.graph.Node.Type
*/
public static Node.Type fromInt(int intType) {
switch (intType) {
case 0:
return CNT;
case 1:
return DIR;
case 2:
return ORI;
case 3:
return REL;
case 4:
return REV;
case 5:
return SNP;
}
return null;
}
/**
- * Parses SWH node type from string.
+ * Converts string to corresponding SWH node type.
*
* @param strType node type represented as a string
* @return the corresponding {@link Node.Type} value
* @see org.softwareheritage.graph.Node.Type
*/
public static Node.Type fromStr(String strType) {
if (!strType.matches("cnt|dir|ori|rel|rev|snp")) {
throw new IllegalArgumentException("Unknown node type: " + strType);
}
return Node.Type.valueOf(strType.toUpperCase());
}
/**
- * Parses SWH node type possible values from formatted string (TODO: link API doc).
+ * Parses SWH node type possible values from formatted string (see the <a
+ * href="https://docs.softwareheritage.org/devel/swh-graph/api.html#terminology">API
+ * syntax</a>).
*
- * @param strFmtType node types represented as a formatted string (TODO: link API doc)
+ * @param strFmtType node types represented as a formatted string
* @return a list containing the {@link Node.Type} values
* @see org.softwareheritage.graph.Node.Type
*/
public static ArrayList<Node.Type> parse(String strFmtType) {
ArrayList<Node.Type> types = new ArrayList<>();
if (strFmtType.equals("*")) {
List<Node.Type> nodeTypes = Arrays.asList(Node.Type.values());
types.addAll(nodeTypes);
} else {
types.add(Node.Type.fromStr(strFmtType));
}
return types;
}
}
}
diff --git a/java/server/src/main/java/org/softwareheritage/graph/SwhPath.java b/java/server/src/main/java/org/softwareheritage/graph/SwhPath.java
index ad84da7..f77c835 100644
--- a/java/server/src/main/java/org/softwareheritage/graph/SwhPath.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/SwhPath.java
@@ -1,123 +1,124 @@
package org.softwareheritage.graph;
import java.util.ArrayList;
import com.fasterxml.jackson.annotation.JsonValue;
import org.softwareheritage.graph.SwhId;
/**
* Wrapper class to store a list of {@link SwhId}.
*
* @author Thibault Allançon
* @version 0.0.1
* @since 0.0.1
* @see org.softwareheritage.graph.SwhId
*/
public class SwhPath {
+ /** Internal list of {@link SwhId} */
ArrayList<SwhId> path;
/**
* Constructor.
*/
public SwhPath() {
this.path = new ArrayList<SwhId>();
}
/**
* Constructor.
*
* @param swhIds variable number of string PIDs to initialize this path with
*/
public SwhPath(String ...swhIds) {
this();
for (String swhId : swhIds) {
add(new SwhId(swhId));
}
}
/**
* Constructor.
*
- * @param swhIds variable number of @{link SwhId} to initialize this path with
+ * @param swhIds variable number of {@link SwhId} to initialize this path with
* @see org.softwareheritage.graph.SwhId
*/
public SwhPath(SwhId ...swhIds) {
this();
for (SwhId swhId : swhIds) {
add(swhId);
}
}
/**
* Returns this path as a list of {@link SwhId}.
*
* @return list of {@link SwhId} constituting the path
* @see org.softwareheritage.graph.SwhId
*/
@JsonValue
public ArrayList<SwhId> getPath() {
return path;
}
/**
* Adds a {@link SwhId} to this path.
*
* @param {@link SwhId} to add to this path
* @see org.softwareheritage.graph.SwhId
*/
public void add(SwhId swhId) {
path.add(swhId);
}
/**
* Returns the {@link SwhId} at the specified position in this path.
*
* @param index position of the {@link SwhId} to return
* @return {@link SwhId} at the specified position
* @see org.softwareheritage.graph.SwhId
*/
public SwhId get(int index) {
return path.get(index);
}
/**
* Returns the number of elements in this path.
*
* @return number of elements in this path
*/
public int size() {
return path.size();
}
@Override
public boolean equals(Object otherObj) {
if (otherObj == this) return true;
if (!(otherObj instanceof SwhPath)) return false;
SwhPath other = (SwhPath) otherObj;
if (size() != other.size()) {
return false;
}
for (int i = 0; i < size(); i++) {
SwhId thisSwhId = get(i);
SwhId otherSwhId = other.get(i);
if (!thisSwhId.equals(otherSwhId)) {
return false;
}
}
return true;
}
@Override
public String toString() {
String str = new String();
for (SwhId swhId : path) {
str += swhId + "/";
}
return str;
}
}
diff --git a/java/server/src/main/java/org/softwareheritage/graph/algo/Stats.java b/java/server/src/main/java/org/softwareheritage/graph/algo/Stats.java
index f145657..a26bbad 100644
--- a/java/server/src/main/java/org/softwareheritage/graph/algo/Stats.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/algo/Stats.java
@@ -1,67 +1,70 @@
package org.softwareheritage.graph.algo;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.Properties;
/**
* Statistics on the compressed graph.
+ * <p>
+ * These statistics are not computed but directly read from <a
+ * href="http://webgraph.di.unimi.it/">WebGraph</a> generated .stats and .properties files.
*
* @author Thibault Allançon
* @version 0.0.1
* @since 0.0.1
*/
public class Stats {
public class Counts {
public long nodes;
public long edges;
}
public class Ratios {
public double compression;
public double bitsPerNode;
public double bitsPerEdge;
public double avgLocality;
}
public class Degree {
public long min;
public long max;
public double avg;
}
public Counts counts;
public Ratios ratios;
public Degree indegree;
public Degree outdegree;
/**
* Constructor.
*
- * @param graphPath full path of compressed graph
+ * @param graphPath path and basename of compressed graph
*/
public Stats(String graphPath) throws IOException {
Properties properties = new Properties();
properties.load(new FileInputStream(graphPath + ".properties"));
properties.load(new FileInputStream(graphPath + ".stats"));
this.counts = new Counts();
this.ratios = new Ratios();
this.indegree = new Degree();
this.outdegree = new Degree();
this.counts.nodes = Long.parseLong(properties.getProperty("nodes"));
this.counts.edges = Long.parseLong(properties.getProperty("arcs"));
this.ratios.compression = Double.parseDouble(properties.getProperty("compratio"));
this.ratios.bitsPerNode = Double.parseDouble(properties.getProperty("bitspernode"));
this.ratios.bitsPerEdge = Double.parseDouble(properties.getProperty("bitsperlink"));
this.ratios.avgLocality = Double.parseDouble(properties.getProperty("avglocality"));
this.indegree.min = Long.parseLong(properties.getProperty("minindegree"));
this.indegree.max = Long.parseLong(properties.getProperty("maxindegree"));
this.indegree.avg = Double.parseDouble(properties.getProperty("avgindegree"));
this.outdegree.min = Long.parseLong(properties.getProperty("minoutdegree"));
this.outdegree.max = Long.parseLong(properties.getProperty("maxoutdegree"));
this.outdegree.avg = Double.parseDouble(properties.getProperty("avgoutdegree"));
}
}
diff --git a/java/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java b/java/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java
index cff6fc4..65f5fb3 100644
--- a/java/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java
@@ -1,309 +1,310 @@
package org.softwareheritage.graph.algo;
import java.util.ArrayList;
import java.util.Collections;
import java.util.LinkedList;
import java.util.Queue;
import java.util.Stack;
import it.unimi.dsi.bits.LongArrayBitVector;
import it.unimi.dsi.fastutil.longs.LongBigArrays;
import org.softwareheritage.graph.AllowedEdges;
+import org.softwareheritage.graph.Endpoint;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.Neighbors;
import org.softwareheritage.graph.Node;
/**
* Traversal algorithms on the compressed graph.
+ * <p>
+ * Internal implementation of the traversal API endpoints. These methods only input/output internal
+ * long ids, which are converted in the {@link Endpoint} higher-level class to Software Heritage
+ * PID.
*
* @author Thibault Allançon
* @version 0.0.1
* @since 0.0.1
+ * @see org.softwareheritage.graph.Endpoint
*/
public class Traversal {
/** Graph used in the traversal */
Graph graph;
/** Boolean to specify the use of the transposed graph */
boolean useTransposed;
/** Graph edge restriction */
AllowedEdges edges;
- /**
- * Bit array storing if we have visited a node
- * @see it.unimi.dsi.bits.LongArrayBitVector
- */
+ /** Bit array storing if we have visited a node */
LongArrayBitVector visited;
- /**
- * Array storing parent node id for each nodes during a traversal
- * @see it.unimi.dsi.fastutil.longs.LongBigArrays
- */
+ /** LongBigArray storing parent node id for each nodes during a traversal */
long[][] nodeParent;
/**
* Constructor.
*
* @param graph graph used in the traversal
* @param direction a string (either "forward" or "backward") specifying edge orientation
- * @param edgesFmt a formatted string describing allowed edges (TODO: link API doc)
+ * @param edgesFmt a formatted string describing <a
+ * href="https://docs.softwareheritage.org/devel/swh-graph/api.html#terminology">allowed edges</a>
*/
public Traversal(Graph graph, String direction, String edgesFmt) {
if (!direction.matches("forward|backward")) {
throw new IllegalArgumentException("Unknown traversal direction: " + direction);
}
this.graph = graph;
this.useTransposed = (direction.equals("backward"));
this.edges = new AllowedEdges(graph, edgesFmt);
long nbNodes = graph.getNbNodes();
this.visited = LongArrayBitVector.ofLength(nbNodes);
this.nodeParent = LongBigArrays.newBigArray(nbNodes);
}
/**
* Returns the leaves of a subgraph rooted at the specified source node.
*
* @param srcNodeId source node
* @return list of node ids corresponding to the leaves
*/
public ArrayList<Long> leaves(long srcNodeId) {
ArrayList<Long> nodeIds = new ArrayList<Long>();
Stack<Long> stack = new Stack<Long>();
this.visited.fill(false);
stack.push(srcNodeId);
visited.set(srcNodeId);
while (!stack.isEmpty()) {
long currentNodeId = stack.pop();
long neighborsCnt = 0;
for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) {
neighborsCnt++;
if (!visited.getBoolean(neighborNodeId)) {
stack.push(neighborNodeId);
visited.set(neighborNodeId);
}
}
if (neighborsCnt == 0) {
nodeIds.add(currentNodeId);
}
}
return nodeIds;
}
/**
* Returns node direct neighbors (linked with exactly one edge).
*
* @param srcNodeId source node
* @return list of node ids corresponding to the neighbors
*/
public ArrayList<Long> neighbors(long srcNodeId) {
ArrayList<Long> nodeIds = new ArrayList<Long>();
for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, srcNodeId)) {
nodeIds.add(neighborNodeId);
}
return nodeIds;
}
/**
* Performs a graph traversal and returns explored nodes.
*
* @param srcNodeId source node
* @return list of explored node ids
*/
public ArrayList<Long> visitNodes(long srcNodeId) {
ArrayList<Long> nodeIds = new ArrayList<Long>();
Stack<Long> stack = new Stack<Long>();
this.visited.fill(false);
stack.push(srcNodeId);
visited.set(srcNodeId);
while (!stack.isEmpty()) {
long currentNodeId = stack.pop();
nodeIds.add(currentNodeId);
for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) {
if (!visited.getBoolean(neighborNodeId)) {
stack.push(neighborNodeId);
visited.set(neighborNodeId);
}
}
}
return nodeIds;
}
/**
* Performs a graph traversal and returns explored paths.
*
* @param srcNodeId source node
* @return list of explored paths (represented as a list of node ids)
*/
public ArrayList<ArrayList<Long>> visitPaths(long srcNodeId) {
ArrayList<ArrayList<Long>> paths = new ArrayList<>();
Stack<Long> currentPath = new Stack<Long>();
visitPathsInternal(srcNodeId, paths, currentPath);
return paths;
}
/**
* Internal recursive function of {@link #visitPaths}.
*
* @param currentNodeId current node
* @param paths list of currently stored paths
* @param currentPath current path as node ids
*/
private void visitPathsInternal(
long currentNodeId, ArrayList<ArrayList<Long>> paths, Stack<Long> currentPath) {
currentPath.push(currentNodeId);
long visitedNeighbors = 0;
for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) {
visitPathsInternal(neighborNodeId, paths, currentPath);
visitedNeighbors++;
}
if (visitedNeighbors == 0) {
ArrayList<Long> path = new ArrayList<Long>();
for (long nodeId : currentPath) {
path.add(nodeId);
}
paths.add(path);
}
currentPath.pop();
}
/**
* Performs a graph traversal and returns the first found path from source to destination.
*
* @param srcNodeId source node
* @param dst destination (either a node or a node type)
* @return found path as a list of node ids
*/
public <T> ArrayList<Long> walk(long srcNodeId, T dst, String algorithm) {
long dstNodeId = -1;
if (algorithm.equals("dfs")) {
dstNodeId = walkInternalDfs(srcNodeId, dst);
} else if (algorithm.equals("bfs")) {
dstNodeId = walkInternalBfs(srcNodeId, dst);
} else {
throw new IllegalArgumentException("Unknown traversal algorithm: " + algorithm);
}
if (dstNodeId == -1) {
throw new IllegalArgumentException("Unable to find destination point: " + dst);
}
ArrayList<Long> nodeIds = backtracking(srcNodeId, dstNodeId);
return nodeIds;
}
/**
* Internal DFS function of {@link #walk}.
*
* @param srcNodeId source node
* @param dst destination (either a node or a node type)
* @return final destination node or -1 if no path found
*/
private <T> long walkInternalDfs(long srcNodeId, T dst) {
Stack<Long> stack = new Stack<Long>();
this.visited.fill(false);
stack.push(srcNodeId);
visited.set(srcNodeId);
while (!stack.isEmpty()) {
long currentNodeId = stack.pop();
if (isDstNode(currentNodeId, dst)) {
return currentNodeId;
}
for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) {
if (!visited.getBoolean(neighborNodeId)) {
stack.push(neighborNodeId);
visited.set(neighborNodeId);
LongBigArrays.set(nodeParent, neighborNodeId, currentNodeId);
}
}
}
return -1;
}
/**
* Internal BFS function of {@link #walk}.
*
* @param srcNodeId source node
* @param dst destination (either a node or a node type)
* @return final destination node or -1 if no path found
*/
private <T> long walkInternalBfs(long srcNodeId, T dst) {
Queue<Long> queue = new LinkedList<Long>();
this.visited.fill(false);
queue.add(srcNodeId);
visited.set(srcNodeId);
while (!queue.isEmpty()) {
long currentNodeId = queue.poll();
if (isDstNode(currentNodeId, dst)) {
return currentNodeId;
}
for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) {
if (!visited.getBoolean(neighborNodeId)) {
queue.add(neighborNodeId);
visited.set(neighborNodeId);
LongBigArrays.set(nodeParent, neighborNodeId, currentNodeId);
}
}
}
return -1;
}
/**
* Internal function of {@link #walk} to check if a node corresponds to the destination.
*
* @param nodeId current node
* @param dst destination (either a node or a node type)
* @return true if the node is a destination, or false otherwise
*/
private <T> boolean isDstNode(long nodeId, T dst) {
if (dst instanceof Long) {
long dstNodeId = (Long) dst;
return nodeId == dstNodeId;
} else if (dst instanceof Node.Type) {
Node.Type dstType = (Node.Type) dst;
return graph.getNodeType(nodeId) == dstType;
} else {
return false;
}
}
/**
* Internal backtracking function of {@link #walk}.
*
* @param srcNodeId source node
* @param dstNodeId destination node
* @return the found path, as a list of node ids
*/
private ArrayList<Long> backtracking(long srcNodeId, long dstNodeId) {
ArrayList<Long> path = new ArrayList<Long>();
long currentNodeId = dstNodeId;
while (currentNodeId != srcNodeId) {
path.add(currentNodeId);
currentNodeId = LongBigArrays.get(nodeParent, currentNodeId);
}
path.add(srcNodeId);
Collections.reverse(path);
return path;
}
}
diff --git a/java/server/src/main/java/org/softwareheritage/graph/backend/MapFile.java b/java/server/src/main/java/org/softwareheritage/graph/backend/MapFile.java
index be63e96..0644049 100644
--- a/java/server/src/main/java/org/softwareheritage/graph/backend/MapFile.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/backend/MapFile.java
@@ -1,64 +1,65 @@
package org.softwareheritage.graph.backend;
import java.io.File;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.channels.FileChannel;
import it.unimi.dsi.io.ByteBufferInputStream;
/**
* Wrapper class around very big mmap()-ed file.
+ * <p>
+ * Java has a limit for mmap()-ed files because of unsupported 64-bit indexing. The <a
+ * href="http://dsiutils.di.unimi.it/">dsiutils</a> ByteBufferInputStream is used to overcome this
+ * Java limit.
*
* @author Thibault Allançon
* @version 0.0.1
* @since 0.0.1
*/
public class MapFile {
- /**
- * Memory-mapped file buffer
- * @see it.unimi.dsi.io.ByteBufferInputStream
- */
+ /** Memory-mapped file buffer */
ByteBufferInputStream bufferMap;
/** Fixed line length of the mmap()-ed file */
int lineLength;
/**
* Constructor.
*
* @param path file path to mmap()
* @param lineLength fixed length of a line in the file
*/
public MapFile(String path, int lineLength) throws IOException {
this.bufferMap = null;
this.lineLength = lineLength;
try (RandomAccessFile mapFile = new RandomAccessFile(new File(path), "r")) {
FileChannel fileChannel = mapFile.getChannel();
bufferMap = ByteBufferInputStream.map(fileChannel, FileChannel.MapMode.READ_ONLY);
}
}
/**
* Returns a specific line in the file.
*
* @param lineIndex line number in the file
* @return the line at the specified position
*/
public String readAtLine(long lineIndex) {
byte[] buffer = new byte[lineLength];
long position = lineIndex * (long) lineLength;
bufferMap.position(position);
bufferMap.read(buffer, 0, lineLength);
String line = new String(buffer);
return line.trim();
}
/**
* Closes the mmap()-ed file.
*/
public void close() throws IOException {
bufferMap.close();
}
}
diff --git a/java/server/src/main/java/org/softwareheritage/graph/backend/NodeIdMap.java b/java/server/src/main/java/org/softwareheritage/graph/backend/NodeIdMap.java
index 2f31677..3d68dce 100644
--- a/java/server/src/main/java/org/softwareheritage/graph/backend/NodeIdMap.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/backend/NodeIdMap.java
@@ -1,111 +1,116 @@
package org.softwareheritage.graph.backend;
import java.io.IOException;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.SwhId;
import org.softwareheritage.graph.backend.MapFile;
+import org.softwareheritage.graph.backend.Setup;
/**
* Mapping between internal long node id and external SWH PID.
+ * <p>
+ * Mappings in both directions are pre-computed and dumped on disk in the {@link Setup} class, then
+ * they are loaded here using mmap().
*
* @author Thibault Allançon
* @version 0.0.1
* @since 0.0.1
+ * @see org.softwareheritage.graph.backend.Setup
*/
public class NodeIdMap {
/** Fixed length of full SWH PID */
public static final int SWH_ID_LENGTH = 50;
/** Fixed length of long node id */
public static final int NODE_ID_LENGTH = 20;
- /** Full graph path */
+ /** Graph path and basename */
String graphPath;
/** Number of ids to map */
long nbIds;
/** mmap()-ed PID_TO_NODE file */
MapFile swhToNodeMap;
/** mmap()-ed NODE_TO_PID file */
MapFile nodeToSwhMap;
/**
* Constructor.
*
* @param graphPath full graph path
* @param nbNodes number of nodes in the graph
*/
public NodeIdMap(String graphPath, long nbNodes) throws IOException {
this.graphPath = graphPath;
this.nbIds = nbNodes;
// +1 are for spaces and end of lines
int swhToNodeLineLength = SWH_ID_LENGTH + 1 + NODE_ID_LENGTH + 1;
int nodeToSwhLineLength = SWH_ID_LENGTH + 1;
this.swhToNodeMap = new MapFile(graphPath + Graph.PID_TO_NODE, swhToNodeLineLength);
this.nodeToSwhMap = new MapFile(graphPath + Graph.NODE_TO_PID, nodeToSwhLineLength);
}
/**
* Converts SWH PID to corresponding long node id.
*
* @param swhId node represented as a {@link SwhId}
* @return corresponding node as a long id
* @see org.softwareheritage.graph.SwhId
*/
public long getNodeId(SwhId swhId) {
// Each line in PID_TO_NODE is formatted as: swhId nodeId
// The file is sorted by swhId, hence we can binary search on swhId to get corresponding nodeId
long start = 0;
long end = nbIds - 1;
while (start <= end) {
long lineNumber = (start + end) / 2L;
String[] parts = swhToNodeMap.readAtLine(lineNumber).split(" ");
if (parts.length != 2) {
break;
}
String currentSwhId = parts[0];
long currentNodeId = Long.parseLong(parts[1]);
int cmp = currentSwhId.compareTo(swhId.toString());
if (cmp == 0) {
return currentNodeId;
} else if (cmp < 0) {
start = lineNumber + 1;
} else {
end = lineNumber - 1;
}
}
throw new IllegalArgumentException("Unknown SWH id: " + swhId);
}
/**
* Converts a node long id to corresponding SWH PID.
*
* @param nodeId node as a long id
* @return corresponding node as a {@link SwhId}
* @see org.softwareheritage.graph.SwhId
*/
public SwhId getSwhId(long nodeId) {
// Each line in NODE_TO_PID is formatted as: swhId
// The file is ordered by nodeId, meaning node0's swhId is at line 0, hence we can read the
// nodeId-th line to get corresponding swhId
if (nodeId < 0 || nodeId >= nbIds) {
throw new IllegalArgumentException("Node id " + nodeId + " should be between 0 and " + nbIds);
}
String swhId = nodeToSwhMap.readAtLine(nodeId);
return new SwhId(swhId);
}
/**
* Closes the mapping files.
*/
public void close() throws IOException {
swhToNodeMap.close();
nodeToSwhMap.close();
}
}
diff --git a/java/server/src/main/java/org/softwareheritage/graph/backend/NodeTypesMap.java b/java/server/src/main/java/org/softwareheritage/graph/backend/NodeTypesMap.java
index a4a094e..1b95bd0 100644
--- a/java/server/src/main/java/org/softwareheritage/graph/backend/NodeTypesMap.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/backend/NodeTypesMap.java
@@ -1,51 +1,53 @@
package org.softwareheritage.graph.backend;
import java.io.IOException;
import it.unimi.dsi.fastutil.io.BinIO;
import it.unimi.dsi.fastutil.longs.LongBigList;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.Node;
/**
* Mapping between long node id and SWH node type as described in the <a
* href="https://docs.softwareheritage.org/devel/swh-model/data-model.html">data model</a>.
+ * <p>
+ * The type mapping is pre-computed and dumped on disk in the {@link Setup} class, then it is loaded
+ * in-memory here using <a href="http://fastutil.di.unimi.it/">fastutil</a> LongBigList. To be
+ * space-efficient, the mapping is stored as a bitmap using minimum number of bits per {@link
+ * Node.Type}.
*
* @author Thibault Allançon
* @version 0.0.1
* @since 0.0.1
*/
public class NodeTypesMap {
- /**
- * Array storing for each node its type
- * @see it.unimi.dsi.fastutil.longs.LongBigList
- */
+ /** Array storing for each node its type */
LongBigList nodeTypesMap;
/**
* Constructor.
*
- * @param graphPath full path of the compressed graph
+ * @param graphPath path and basename of the compressed graph
*/
public NodeTypesMap(String graphPath) throws IOException {
try {
nodeTypesMap = (LongBigList) BinIO.loadObject(graphPath + Graph.NODE_TO_TYPE);
} catch (ClassNotFoundException e) {
throw new IllegalArgumentException("Unknown class object: " + e);
}
}
/**
* Returns node type from a node long id.
*
* @param nodeId node as a long id
* @return corresponding {@link Node.Type} value
* @see org.softwareheritage.graph.Node.Type
*/
public Node.Type getType(long nodeId) {
long type = nodeTypesMap.getLong(nodeId);
return Node.Type.fromInt((int) type);
}
}
diff --git a/java/server/src/main/java/org/softwareheritage/graph/backend/Setup.java b/java/server/src/main/java/org/softwareheritage/graph/backend/Setup.java
index 5474abc..a3fa6c7 100644
--- a/java/server/src/main/java/org/softwareheritage/graph/backend/Setup.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/backend/Setup.java
@@ -1,120 +1,125 @@
package org.softwareheritage.graph.backend;
import java.io.BufferedWriter;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Writer;
import java.util.zip.GZIPInputStream;
import it.unimi.dsi.bits.LongArrayBitVector;
import it.unimi.dsi.fastutil.Size64;
import it.unimi.dsi.fastutil.io.BinIO;
import it.unimi.dsi.fastutil.longs.LongBigArrays;
import it.unimi.dsi.fastutil.longs.LongBigList;
import it.unimi.dsi.fastutil.objects.Object2LongFunction;
import it.unimi.dsi.fastutil.objects.ObjectBigArrays;
import it.unimi.dsi.io.FastBufferedReader;
import it.unimi.dsi.io.LineIterator;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.Node;
import org.softwareheritage.graph.SwhId;
import org.softwareheritage.graph.backend.NodeTypesMap;
/**
* Pre-processing steps (such as dumping mapping files on disk) before running the graph service.
*
* @author Thibault Allançon
* @version 0.0.1
* @since 0.0.1
*/
public class Setup {
+ /**
+ * Main entrypoint.
+ *
+ * @param args command line arguments
+ */
public static void main(String[] args) throws IOException {
if (args.length != 2) {
System.err.println("Expected parameters: <nodes.csv.gz path> <compressed graph path>");
System.exit(1);
}
String nodesPath = args[0];
String graphPath = args[1];
System.out.println("Pre-computing node id maps...");
long startTime = System.nanoTime();
precomputeNodeIdMap(nodesPath, graphPath);
long endTime = System.nanoTime();
double duration = (double) (endTime - startTime) / 1_000_000_000;
System.out.println("Done in: " + duration + " seconds");
}
/**
* Computes and dumps on disk mapping files.
*
* @param nodesPath path of the compressed csv nodes file
* @param graphPath path of the compressed graph
*/
// Suppress warning for Object2LongFunction cast
@SuppressWarnings("unchecked")
static void precomputeNodeIdMap(String nodesPath, String graphPath) throws IOException {
// First internal mapping: SWH id (string) -> WebGraph MPH (long)
Object2LongFunction<String> mphMap = null;
try {
mphMap = (Object2LongFunction<String>) BinIO.loadObject(graphPath + ".mph");
} catch (ClassNotFoundException e) {
throw new IllegalArgumentException("The .mph file contains unknown class object: " + e);
}
long nbIds = (mphMap instanceof Size64) ? ((Size64) mphMap).size64() : mphMap.size();
// Second internal mapping: WebGraph MPH (long) -> BFS ordering (long)
long[][] bfsMap = LongBigArrays.newBigArray(nbIds);
long loaded = BinIO.loadLongs(graphPath + ".order", bfsMap);
if (loaded != nbIds) {
throw new IllegalArgumentException("Graph contains " + nbIds + " nodes, but read " + loaded);
}
// Dump complete mapping for all nodes: SWH id (string) <=> WebGraph node id (long)
InputStream nodesStream = new GZIPInputStream(new FileInputStream(nodesPath));
FastBufferedReader buffer = new FastBufferedReader(new InputStreamReader(nodesStream, "UTF-8"));
LineIterator swhIdIterator = new LineIterator(buffer);
try (Writer swhToNodeMap = new BufferedWriter(new FileWriter(graphPath + Graph.PID_TO_NODE));
Writer nodeToSwhMap = new BufferedWriter(new FileWriter(graphPath + Graph.NODE_TO_PID))) {
// nodeToSwhMap needs to write SWH id in order of node id, so use a temporary array
Object[][] nodeToSwhId = ObjectBigArrays.newBigArray(nbIds);
// To effectively run edge restriction during graph traversals, we store node id (long) -> SWH
// type map. This is represented as a bitmap using minimum number of bits per Node.Type.
final int log2NbTypes = (int) Math.ceil(Math.log(Node.Type.values().length) / Math.log(2));
final int nbBitsPerNodeType = log2NbTypes;
LongArrayBitVector nodeTypesBitVector =
LongArrayBitVector.ofLength(nbBitsPerNodeType * nbIds);
LongBigList nodeTypesMap = nodeTypesBitVector.asLongBigList(nbBitsPerNodeType);
for (long iNode = 0; iNode < nbIds && swhIdIterator.hasNext(); iNode++) {
String strSwhId = swhIdIterator.next().toString();
long mphId = mphMap.getLong(strSwhId);
long nodeId = LongBigArrays.get(bfsMap, mphId);
String paddedNodeId = String.format("%0" + NodeIdMap.NODE_ID_LENGTH + "d", nodeId);
String line = strSwhId + " " + paddedNodeId + "\n";
swhToNodeMap.write(line);
ObjectBigArrays.set(nodeToSwhId, nodeId, strSwhId);
SwhId swhId = new SwhId(strSwhId);
nodeTypesMap.set(nodeId, swhId.getType().ordinal());
}
BinIO.storeObject(nodeTypesMap, graphPath + Graph.NODE_TO_TYPE);
for (long iNode = 0; iNode < nbIds; iNode++) {
String line = ObjectBigArrays.get(nodeToSwhId, iNode).toString() + "\n";
nodeToSwhMap.write(line);
}
}
}
}
diff --git a/java/server/src/main/java/org/softwareheritage/graph/benchmark/LinuxLog.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/LinuxLog.java
index e4829ec..bd45232 100644
--- a/java/server/src/main/java/org/softwareheritage/graph/benchmark/LinuxLog.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/LinuxLog.java
@@ -1,39 +1,47 @@
package org.softwareheritage.graph.benchmark;
import java.io.IOException;
import org.softwareheritage.graph.Endpoint;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.SwhId;
/**
* Linux git log experiment to benchmark graph traversal.
+ * <p>
+ * The goal is to do the equivalent of a {@code git log} in the Linux kernel by following revisions
+ * nodes.
*
* @author Thibault Allançon
* @version 0.0.1
* @since 0.0.1
*/
public class LinuxLog {
+ /**
+ * Main entrypoint.
+ *
+ * @param args command line arguments
+ */
public static void main(String[] args) throws IOException {
String path = args[0];
Graph graph = new Graph(path);
// A linux kernel commit on Sun Dec 31
final SwhId commit = new SwhId("swh:1:rev:f39d7d78b70e0f39facb1e4fab77ad3df5c52a35");
final long expectedCount = 723640;
System.out.println("git log " + commit);
System.out.println("Expecting " + expectedCount + " commits");
long startTime = System.nanoTime();
Endpoint endpoint = new Endpoint(graph, "forward", "rev:rev");
long count = endpoint.visitNodes(commit).size();
if (count != expectedCount) {
throw new IllegalArgumentException("Counted " + count + " commits");
}
long endTime = System.nanoTime();
double duration = (double) (endTime - startTime) / 1_000_000_000;
System.out.println("Visit operation done in: " + duration + " seconds");
}
}
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Jul 4 2025, 9:41 AM (5 w, 5 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3275145
Attached To
rDGRPH Compressed graph representation
Event Timeline
Log In to Comment