diff --git a/api/server/pom.xml b/api/server/pom.xml
index 12e6484..99ff2b1 100644
--- a/api/server/pom.xml
+++ b/api/server/pom.xml
@@ -1,132 +1,141 @@
4.0.0
org.softwareheritage.graph
swh-graph
1.0
swh-graph
https://www.softwareheritage.org/
UTF-8
11
junit
junit
4.11
test
org.hamcrest
hamcrest
2.1
test
io.javalin
javalin
3.0.0
org.slf4j
slf4j-simple
1.7.26
com.fasterxml.jackson.core
jackson-databind
2.9.8
it.unimi.dsi
webgraph-big
3.5.1
it.unimi.dsi
fastutil
8.2.2
maven-clean-plugin
3.1.0
maven-resources-plugin
3.0.2
maven-compiler-plugin
3.8.0
-verbose
-Xlint:all
maven-surefire-plugin
2.22.1
maven-jar-plugin
3.0.2
maven-install-plugin
2.5.2
maven-deploy-plugin
2.8.2
maven-site-plugin
3.7.1
maven-project-info-reports-plugin
3.0.0
maven-assembly-plugin
org.softwareheritage.graph.App
jar-with-dependencies
make-assembly
package
single
+
+
+
+ org.apache.maven.plugins
+ maven-javadoc-plugin
+ 3.1.1
+
+
+
diff --git a/api/server/src/main/java/org/softwareheritage/graph/AllowedEdges.java b/api/server/src/main/java/org/softwareheritage/graph/AllowedEdges.java
index 784207d..8c74758 100644
--- a/api/server/src/main/java/org/softwareheritage/graph/AllowedEdges.java
+++ b/api/server/src/main/java/org/softwareheritage/graph/AllowedEdges.java
@@ -1,58 +1,90 @@
package org.softwareheritage.graph;
import java.util.ArrayList;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.Node;
+/**
+ * Edge restriction based on node types, used when visiting the graph.
+ *
+ * @author Thibault Allançon
+ * @version 1.0
+ * @since 1.0
+ */
+
public class AllowedEdges {
+ /** Graph on which edge restriction is performed */
Graph graph;
- // First dimension is source node type, second dimension is destination node type
+ /**
+ * 2D boolean matrix storing access rights for all combination of src/dst node types (first
+ * dimension is source, second dimension is destination)
+ */
boolean[][] allowed;
+ /**
+ * Constructor.
+ *
+ * @param graph the graph on which to perform edge restriction
+ * @param edgesFmt a formatted string describing allowed edges (TODO: link API doc)
+ */
public AllowedEdges(Graph graph, String edgesFmt) {
this.graph = graph;
int nbNodeTypes = Node.Type.values().length;
this.allowed = new boolean[nbNodeTypes][nbNodeTypes];
// Special values (null, empty, "*")
if (edgesFmt == null || edgesFmt.isEmpty()) {
return;
}
if (edgesFmt.equals("*")) {
for (int i = 0; i < nbNodeTypes; i++) {
for (int j = 0; j < nbNodeTypes; j++) {
allowed[i][j] = true;
}
}
return;
}
// Format: "src1:dst1,src2:dst2,[...]"
+ // TODO: link API doc
String[] edgeTypes = edgesFmt.split(",");
for (String edgeType : edgeTypes) {
String[] nodeTypes = edgeType.split(":");
if (nodeTypes.length != 2) {
throw new IllegalArgumentException("Cannot parse edge type: " + edgeType);
}
ArrayList srcTypes = Node.Type.parse(nodeTypes[0]);
ArrayList dstTypes = Node.Type.parse(nodeTypes[1]);
for (Node.Type srcType : srcTypes) {
for (Node.Type dstType : dstTypes) {
allowed[srcType.ordinal()][dstType.ordinal()] = true;
}
}
}
}
+ /**
+ * Checks if a given edge can be followed during graph traversal.
+ *
+ * @param srcNodeId edge source node
+ * @param dstNodeId edge destination node
+ * @return true if allowed and false otherwise
+ */
public boolean isAllowed(long srcNodeId, long dstNodeId) {
Node.Type srcType = graph.getNodeType(srcNodeId);
Node.Type dstType = graph.getNodeType(dstNodeId);
return isAllowed(srcType, dstType);
}
+ /**
+ * Works like {@link AllowedEdges#isAllowed(long, long)} but with node types directly instead of
+ * node ids.
+ *
+ * @see AllowedEdges#isAllowed(long, long)
+ */
public boolean isAllowed(Node.Type srcType, Node.Type dstType) {
return allowed[srcType.ordinal()][dstType.ordinal()];
}
}
diff --git a/api/server/src/main/java/org/softwareheritage/graph/App.java b/api/server/src/main/java/org/softwareheritage/graph/App.java
index e02a6a5..841e37c 100644
--- a/api/server/src/main/java/org/softwareheritage/graph/App.java
+++ b/api/server/src/main/java/org/softwareheritage/graph/App.java
@@ -1,124 +1,132 @@
package org.softwareheritage.graph;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.PropertyNamingStrategy;
import io.javalin.Javalin;
import io.javalin.http.Context;
import io.javalin.plugin.json.JavalinJackson;
import org.softwareheritage.graph.Endpoint;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.SwhId;
import org.softwareheritage.graph.algo.Stats;
+/**
+ * Entrypoint of the swh-graph server REST API.
+ *
+ * @author Thibault Allançon
+ * @version 1.0
+ * @since 1.0
+ */
+
public class App {
public static void main(String[] args) throws IOException {
String path = args[0];
Graph graph = new Graph(path);
Stats stats = new Stats(path);
// Clean up on exit
Runtime.getRuntime().addShutdownHook(new Thread() {
public void run() {
try {
graph.cleanUp();
} catch (IOException e) {
System.out.println("Could not clean up graph on exit: " + e);
}
}
});
// Configure Jackson JSON to use snake case naming style
ObjectMapper objectMapper = JavalinJackson.getObjectMapper();
objectMapper.setPropertyNamingStrategy(PropertyNamingStrategy.SNAKE_CASE);
JavalinJackson.configure(objectMapper);
Javalin app = Javalin.create().start(5009);
app.before("/stats/*", ctx -> { checkQueryStrings(ctx, ""); });
app.before("/leaves/*", ctx -> { checkQueryStrings(ctx, "direction|edges"); });
app.before("/neighbors/*", ctx -> { checkQueryStrings(ctx, "direction|edges"); });
app.before("/visit/*", ctx -> { checkQueryStrings(ctx, "direction|edges"); });
app.before("/walk/*", ctx -> { checkQueryStrings(ctx, "direction|edges|traversal"); });
app.get("/stats/", ctx -> { ctx.json(stats); });
// Graph traversal endpoints
// By default the traversal is a forward DFS using all edges
app.get("/leaves/:src", ctx -> {
SwhId src = new SwhId(ctx.pathParam("src"));
String direction = ctx.queryParam("direction", "forward");
String edgesFmt = ctx.queryParam("edges", "*");
Endpoint endpoint = new Endpoint(graph, direction, edgesFmt);
ctx.json(endpoint.leaves(src));
});
app.get("/neighbors/:src", ctx -> {
SwhId src = new SwhId(ctx.pathParam("src"));
String direction = ctx.queryParam("direction", "forward");
String edgesFmt = ctx.queryParam("edges", "*");
Endpoint endpoint = new Endpoint(graph, direction, edgesFmt);
ctx.json(endpoint.neighbors(src));
});
// TODO: anonymous class to return both nodes/paths? (waiting on node types map merged/refactor)
/*app.get("/visit/:src", ctx -> {
SwhId src = new SwhId(ctx.pathParam("src"));
String direction = ctx.queryParam("direction", "forward");
String edgesFmt = ctx.queryParam("edges", "*");
Visit visit = new Visit(graph, src, edgesFmt, direction, Visit.OutputFmt.NODES_AND_PATHS);
ctx.json(visit);
});*/
app.get("/visit/nodes/:src", ctx -> {
SwhId src = new SwhId(ctx.pathParam("src"));
String direction = ctx.queryParam("direction", "forward");
String edgesFmt = ctx.queryParam("edges", "*");
Endpoint endpoint = new Endpoint(graph, direction, edgesFmt);
ctx.json(endpoint.visitNodes(src));
});
app.get("/visit/paths/:src", ctx -> {
SwhId src = new SwhId(ctx.pathParam("src"));
String direction = ctx.queryParam("direction", "forward");
String edgesFmt = ctx.queryParam("edges", "*");
Endpoint endpoint = new Endpoint(graph, direction, edgesFmt);
ctx.json(endpoint.visitPaths(src));
});
app.get("/walk/:src/:dst", ctx -> {
SwhId src = new SwhId(ctx.pathParam("src"));
String dstFmt = ctx.pathParam("dst");
String direction = ctx.queryParam("direction", "forward");
String edgesFmt = ctx.queryParam("edges", "*");
String algorithm = ctx.queryParam("traversal", "dfs");
Endpoint endpoint = new Endpoint(graph, direction, edgesFmt);
ctx.json(endpoint.walk(src, dstFmt, algorithm));
});
app.exception(IllegalArgumentException.class, (e, ctx) -> {
ctx.status(400);
ctx.result(e.getMessage());
});
}
private static void checkQueryStrings(Context ctx, String allowedFmt) {
Map> queryParamMap = ctx.queryParamMap();
for (String key : queryParamMap.keySet()) {
if (!key.matches(allowedFmt)) {
throw new IllegalArgumentException("Unknown query string: " + key);
}
}
}
}
diff --git a/api/server/src/main/java/org/softwareheritage/graph/Endpoint.java b/api/server/src/main/java/org/softwareheritage/graph/Endpoint.java
index 5742cad..a851291 100644
--- a/api/server/src/main/java/org/softwareheritage/graph/Endpoint.java
+++ b/api/server/src/main/java/org/softwareheritage/graph/Endpoint.java
@@ -1,85 +1,166 @@
package org.softwareheritage.graph;
import java.util.ArrayList;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.SwhId;
import org.softwareheritage.graph.SwhPath;
import org.softwareheritage.graph.algo.Traversal;
+/**
+ * REST API endpoints wrapper functions.
+ *
+ * @author Thibault Allançon
+ * @version 1.0
+ * @since 1.0
+ */
+
public class Endpoint {
+ /** Graph where traversal endpoint is performed */
Graph graph;
+ /** Internal traversal API */
Traversal traversal;
+ /**
+ * Constructor.
+ *
+ * @param graph the graph used for traversal endpoint
+ * @param direction a string (either "forward" or "backward") specifying edge orientation
+ * @param edgesFmt a formatted string describing allowed edges (TODO: link API doc)
+ */
public Endpoint(Graph graph, String direction, String edgesFmt) {
this.graph = graph;
this.traversal = new Traversal(graph, direction, edgesFmt);
}
+ /**
+ * Converts a list of (internal) long node ids to a list of corresponding (external) SWH PIDs.
+ *
+ * @param nodeIds the list of long node ids
+ * @return a list of corresponding SWH PIDs
+ */
private ArrayList convertNodesToSwhIds(ArrayList nodeIds) {
ArrayList swhIds = new ArrayList<>();
for (long nodeId : nodeIds) {
swhIds.add(graph.getSwhId(nodeId));
}
return swhIds;
}
+ /**
+ * Converts a list of (internal) long node ids to the corresponding {@link SwhPath}.
+ *
+ * @param nodeIds the list of long node ids
+ * @return the corresponding {@link SwhPath}
+ * @see org.softwareheritage.graph.SwhPath
+ */
private SwhPath convertNodesToSwhPath(ArrayList nodeIds) {
SwhPath path = new SwhPath();
for (long nodeId : nodeIds) {
path.add(graph.getSwhId(nodeId));
}
return path;
}
+ /**
+ * Converts a list of paths made of (internal) long node ids to one made of {@link SwhPath}-s.
+ *
+ * @param pathsNodeId the list of paths with long node ids
+ * @return a list of corresponding {@link SwhPath}
+ * @see org.softwareheritage.graph.SwhPath
+ */
private ArrayList convertPathsToSwhIds(ArrayList> pathsNodeId) {
ArrayList paths = new ArrayList<>();
for (ArrayList path : pathsNodeId) {
paths.add(convertNodesToSwhPath(path));
}
return paths;
}
+ /**
+ * Leaves endpoint wrapper (performs input/output node ids conversions).
+ *
+ * @param src source node of endpoint call specified as a SwhId
+ * @return the resulting list of SwhId from endpoint call
+ * @see org.softwareheritage.graph.SwhId
+ * @see org.softwareheritage.graph.algo.Traversal#leaves(long)
+ */
public ArrayList leaves(SwhId src) {
long srcNodeId = graph.getNodeId(src);
ArrayList nodeIds = traversal.leaves(srcNodeId);
return convertNodesToSwhIds(nodeIds);
}
+ /**
+ * Neighbors endpoint wrapper (performs input/output node ids conversions).
+ *
+ * @param src source node of endpoint call specified as a SwhId
+ * @return the resulting list of SwhId from endpoint call
+ * @see org.softwareheritage.graph.SwhId
+ * @see org.softwareheritage.graph.algo.Traversal#neighbors(long)
+ */
public ArrayList neighbors(SwhId src) {
long srcNodeId = graph.getNodeId(src);
ArrayList nodeIds = traversal.neighbors(srcNodeId);
return convertNodesToSwhIds(nodeIds);
}
+ /**
+ * Walk endpoint wrapper (performs input/output node ids conversions).
+ *
+ * @param src source node of endpoint call specified as a SwhId
+ * @param dstFmt destination used in endpoint call (TODO: link to API doc)
+ * @param algorithm traversal algorithm used in endpoint call (either "dfs" or "bfs")
+ * @return the resulting {@link SwhPath} from endpoint call
+ * @see org.softwareheritage.graph.SwhId
+ * @see org.softwareheritage.graph.SwhPath
+ * @see org.softwareheritage.graph.algo.Traversal#walk
+ */
public SwhPath walk(SwhId src, String dstFmt, String algorithm) {
long srcNodeId = graph.getNodeId(src);
ArrayList nodeIds = null;
// Destination is either a SWH ID or a node type
try {
SwhId dstSwhId = new SwhId(dstFmt);
long dstNodeId = graph.getNodeId(dstSwhId);
nodeIds = traversal.walk(srcNodeId, dstNodeId, algorithm);
} catch (IllegalArgumentException ignored1) {
try {
Node.Type dstType = Node.Type.fromStr(dstFmt);
nodeIds = traversal.walk(srcNodeId, dstType, algorithm);
} catch (IllegalArgumentException ignored2) { }
}
return convertNodesToSwhPath(nodeIds);
}
+ /**
+ * VisitNodes endpoint wrapper (performs input/output node ids conversions).
+ *
+ * @param src source node of endpoint call specified as a SwhId
+ * @return the resulting list of SwhId from endpoint call
+ * @see org.softwareheritage.graph.SwhId
+ * @see org.softwareheritage.graph.algo.Traversal#visitNodes(long)
+ */
public ArrayList visitNodes(SwhId src) {
long srcNodeId = graph.getNodeId(src);
ArrayList nodeIds = traversal.visitNodes(srcNodeId);
return convertNodesToSwhIds(nodeIds);
}
+ /**
+ * VisitPaths endpoint wrapper (performs input/output node ids conversions).
+ *
+ * @param src source node of endpoint call specified as a SwhId
+ * @return the resulting list of {@link SwhPath} from endpoint call
+ * @see org.softwareheritage.graph.SwhId
+ * @see org.softwareheritage.graph.SwhPath
+ * @see org.softwareheritage.graph.algo.Traversal#visitPaths(long)
+ */
public ArrayList visitPaths(SwhId src) {
long srcNodeId = graph.getNodeId(src);
ArrayList> paths = traversal.visitPaths(srcNodeId);
return convertPathsToSwhIds(paths);
}
}
diff --git a/api/server/src/main/java/org/softwareheritage/graph/Graph.java b/api/server/src/main/java/org/softwareheritage/graph/Graph.java
index e265f67..dc90a40 100644
--- a/api/server/src/main/java/org/softwareheritage/graph/Graph.java
+++ b/api/server/src/main/java/org/softwareheritage/graph/Graph.java
@@ -1,82 +1,183 @@
package org.softwareheritage.graph;
import java.io.IOException;
import it.unimi.dsi.big.webgraph.BVGraph;
import org.softwareheritage.graph.Node;
import org.softwareheritage.graph.SwhId;
import org.softwareheritage.graph.backend.NodeIdMap;
import org.softwareheritage.graph.backend.NodeTypesMap;
+/**
+ * Main class storing the compressed graph and node id mappings.
+ *
+ * @author Thibault Allançon
+ * @version 1.0
+ * @since 1.0
+ */
+
public class Graph {
+ /** File extension for the SWH PID to long node id map */
public static final String PID_TO_NODE = ".pid2node.csv";
+ /** File extension for the long node id to SWH PID map */
public static final String NODE_TO_PID = ".node2pid.csv";
+ /** File extension for the long node id to node typ map */
public static final String NODE_TO_TYPE = ".node2type.map";
+ /** Compressed graph stored as a {@link it.unimi.dsi.big.webgraph.BVGraph} */
BVGraph graph;
+ /** Transposed compressed graph (used for backward traversals) */
BVGraph graphTransposed;
+ /** Path and basename of the compressed graph */
String path;
+ /** Mapping long id ↔ SWH PIDs */
NodeIdMap nodeIdMap;
+ /** Mapping long id → node types */
NodeTypesMap nodeTypesMap;
+ /**
+ * Constructor.
+ *
+ * @param path path and basename of the compressed graph to load
+ */
public Graph(String path) throws IOException {
this.graph = BVGraph.load(path);
this.graphTransposed = BVGraph.load(path + "-transposed");
this.path = path;
this.nodeIdMap = new NodeIdMap(path, getNbNodes());
this.nodeTypesMap = new NodeTypesMap(path);
}
+ /**
+ * Cleans up graph resources after use.
+ */
public void cleanUp() throws IOException {
nodeIdMap.close();
}
+ /**
+ * Returns the graph full path.
+ *
+ * @return graph full path
+ */
public String getPath() {
return path;
}
+ /**
+ * Converts {@link SwhId} node to long.
+ *
+ * @param swhId node specified as a {@link SwhId}
+ * @return internal long node id
+ * @see org.softwareheritage.graph.SwhId
+ */
public long getNodeId(SwhId swhId) {
return nodeIdMap.getNodeId(swhId);
}
+ /**
+ * Converts long id node to {@link SwhId}.
+ *
+ * @param nodeId node specified as a long id
+ * @return external SWH PID
+ * @see org.softwareheritage.graph.SwhId
+ */
public SwhId getSwhId(long nodeId) {
return nodeIdMap.getSwhId(nodeId);
}
+ /**
+ * Returns node type.
+ *
+ * @param nodeId node specified as a long id
+ * @return corresponding node type
+ * @see org.softwareheritage.graph.Node.Type
+ */
public Node.Type getNodeType(long nodeId) {
return nodeTypesMap.getType(nodeId);
}
+ /**
+ * Returns number of nodes in the graph.
+ *
+ * @return number of nodes in the graph
+ */
public long getNbNodes() {
return graph.numNodes();
}
+ /**
+ * Returns number of edges in the graph.
+ *
+ * @return number of edges in the graph
+ */
public long getNbEdges() {
return graph.numArcs();
}
+ /**
+ * Returns list of successors of a node.
+ *
+ * @param nodeId node specified as a long id
+ * @return list of successors of the node, specified as a {@link LongBigArrays}
+ * @see it.unimi.dsi.fastutil.longs.LongBigArrays
+ */
public long[][] successors(long nodeId) {
return graph.successorBigArray(nodeId);
}
+ /**
+ * Returns the outdegree of a node.
+ *
+ * @param nodeId node specified as a long id
+ * @return outdegree of a node
+ */
public long outdegree(long nodeId) {
return graph.outdegree(nodeId);
}
+ /**
+ * Returns list of predecessors of a node.
+ *
+ * @param nodeId node specified as a long id
+ * @return list of predecessors of the node, specified as a {@link LongBigArrays}
+ * @see it.unimi.dsi.fastutil.longs.LongBigArrays
+ */
public long[][] predecessors(long nodeId) {
return graphTransposed.successorBigArray(nodeId);
}
+ /**
+ * Returns the indegree of a node.
+ *
+ * @param nodeId node specified as a long id
+ * @return indegree of a node
+ */
public long indegree(long nodeId) {
return graphTransposed.outdegree(nodeId);
}
+ /**
+ * Returns the degree of a node, depending on graph orientation.
+ *
+ * @param nodeId node specified as a long id
+ * @param useTransposed boolean value to use transposed graph
+ * @return degree of a node
+ */
public long degree(long nodeId, boolean useTransposed) {
return (useTransposed) ? indegree(nodeId) : outdegree(nodeId);
}
+ /**
+ * Returns the neighbors of a node, depending on graph orientation.
+ *
+ * @param nodeId node specified as a long id
+ * @param useTransposed boolean value to use transposed graph
+ * @return list of neighbors of the node, specified as a {@link LongBigArrays}
+ * @see it.unimi.dsi.fastutil.longs.LongBigArrays
+ */
public long[][] neighbors(long nodeId, boolean useTransposed) {
return (useTransposed) ? predecessors(nodeId) : successors(nodeId);
}
}
diff --git a/api/server/src/main/java/org/softwareheritage/graph/Neighbors.java b/api/server/src/main/java/org/softwareheritage/graph/Neighbors.java
index c31f430..f2d08e3 100644
--- a/api/server/src/main/java/org/softwareheritage/graph/Neighbors.java
+++ b/api/server/src/main/java/org/softwareheritage/graph/Neighbors.java
@@ -1,56 +1,84 @@
package org.softwareheritage.graph;
import java.util.Iterator;
import it.unimi.dsi.fastutil.longs.LongBigArrays;
import org.softwareheritage.graph.AllowedEdges;
import org.softwareheritage.graph.Graph;
+/**
+ * Iterator class to go over a node neighbors in the graph.
+ *
+ * @author Thibault Allançon
+ * @version 1.0
+ * @since 1.0
+ */
+
public class Neighbors implements Iterable {
+ /** Graph used to explore neighbors */
Graph graph;
+ /** Boolean to specify the use of the transposed graph */
boolean useTransposed;
+ /** Graph edge restriction */
AllowedEdges edges;
+ /** Source node from which neighbors will be listed */
long srcNodeId;
+ /**
+ * Constructor.
+ *
+ * @param graph graph used to explore neighbors
+ * @param useTransposed boolean value to use transposed graph
+ * @param edges edges allowed to be used in the graph
+ * @param srcNodeId source node from where to list neighbors
+ */
public Neighbors(Graph graph, boolean useTransposed, AllowedEdges edges, long srcNodeId) {
this.graph = graph;
this.useTransposed = useTransposed;
this.edges = edges;
this.srcNodeId = srcNodeId;
}
@Override
public Iterator iterator() {
return new NeighborsIterator();
}
+ /**
+ * Inner class for {@link Neighbors} iterator.
+ *
+ * @author Thibault Allançon
+ * @version 1.0
+ * @since 1.0
+ */
+
public class NeighborsIterator implements Iterator {
long nextNeighborIdx;
long nbNeighbors;
long[][] neighbors;
public NeighborsIterator() {
this.nextNeighborIdx = -1;
this.nbNeighbors = graph.degree(srcNodeId, useTransposed);
this.neighbors = graph.neighbors(srcNodeId, useTransposed);
}
// Look ahead because with edge restriction not all neighbors are considered
public boolean hasNext() {
for (long lookAheadIdx = nextNeighborIdx + 1; lookAheadIdx < nbNeighbors; lookAheadIdx++) {
long nextNodeId = LongBigArrays.get(neighbors, lookAheadIdx);
if (edges.isAllowed(srcNodeId, nextNodeId)) {
nextNeighborIdx = lookAheadIdx;
return true;
}
}
return false;
}
public Long next() {
long nextNodeId = LongBigArrays.get(neighbors, nextNeighborIdx);
return nextNodeId;
}
}
}
diff --git a/api/server/src/main/java/org/softwareheritage/graph/Node.java b/api/server/src/main/java/org/softwareheritage/graph/Node.java
index 4fe9823..a883737 100644
--- a/api/server/src/main/java/org/softwareheritage/graph/Node.java
+++ b/api/server/src/main/java/org/softwareheritage/graph/Node.java
@@ -1,48 +1,86 @@
package org.softwareheritage.graph;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
+/**
+ * A node in the Software Heritage graph.
+ *
+ * @author Thibault Allançon
+ * @version 1.0
+ * @since 1.0
+ */
+
public class Node {
+ /**
+ * Software Heritage graph node types, as described in the
+ * data model.
+ */
public enum Type {
+ /** Content node */
CNT,
+ /** Directory node */
DIR,
+ /** Release node */
REL,
+ /** Revision node */
REV,
+ /** Snapshot node */
SNP;
+ /**
+ * Converts integer to corresponding SWH node type.
+ *
+ * @param intType node type represented as an integer
+ * @return the corresponding {@link Node.Type} value
+ * @see org.softwareheritage.graph.Node.Type
+ */
public static Node.Type fromInt(int intType) {
switch (intType) {
case 0:
return CNT;
case 1:
return DIR;
case 2:
return REL;
case 3:
return REV;
case 4:
return SNP;
}
return null;
}
+ /**
+ * Parses SWH node type from string.
+ *
+ * @param strType node type represented as a string
+ * @return the corresponding {@link Node.Type} value
+ * @see org.softwareheritage.graph.Node.Type
+ */
public static Node.Type fromStr(String strType) {
return Node.Type.valueOf(strType.toUpperCase());
}
- public static ArrayList parse(String strType) {
+ /**
+ * Parses SWH node type possible values from formatted string (TODO: link API doc).
+ *
+ * @param strFmtType node types represented as a formatted string (TODO: link API doc)
+ * @return a list containing the {@link Node.Type} values
+ * @see org.softwareheritage.graph.Node.Type
+ */
+ public static ArrayList parse(String strFmtType) {
ArrayList types = new ArrayList<>();
- if (strType.equals("*")) {
+ if (strFmtType.equals("*")) {
List nodeTypes = Arrays.asList(Node.Type.values());
types.addAll(nodeTypes);
} else {
- types.add(Node.Type.fromStr(strType));
+ types.add(Node.Type.fromStr(strFmtType));
}
return types;
}
}
}
diff --git a/api/server/src/main/java/org/softwareheritage/graph/SwhId.java b/api/server/src/main/java/org/softwareheritage/graph/SwhId.java
index a5ad7c1..5e70e1b 100644
--- a/api/server/src/main/java/org/softwareheritage/graph/SwhId.java
+++ b/api/server/src/main/java/org/softwareheritage/graph/SwhId.java
@@ -1,67 +1,101 @@
package org.softwareheritage.graph;
import com.fasterxml.jackson.annotation.JsonValue;
import org.softwareheritage.graph.Node;
+/**
+ * A Software Heritage PID, see persistent
+ * identifier documentation.
+ *
+ * @author Thibault Allançon
+ * @version 1.0
+ * @since 1.0
+ */
+
public class SwhId {
+ /** Fixed hash length of the PID */
public static final int HASH_LENGTH = 40;
+ /** Full PID as a string */
String swhId;
+ /** PID node type */
Node.Type type;
+ /** PID hex-encoded SHA1 hash */
String hash;
- // SWH ID format: 'swh:1:type:hash'
- // https://docs.softwareheritage.org/devel/swh-model/persistent-identifiers.html
+ /**
+ * Constructor.
+ *
+ * @param swhId full PID as a string
+ */
public SwhId(String swhId) {
this.swhId = swhId;
+ // PID format: 'swh:1:type:hash'
String[] parts = swhId.split(":");
if (parts.length != 4 || !parts[0].equals("swh") || !parts[1].equals("1")) {
throw new IllegalArgumentException("Expected SWH ID format to be 'swh:1:type:hash', got: " + swhId);
}
String type = parts[2];
if (!type.matches("cnt|dir|rel|rev|snp")) {
throw new IllegalArgumentException("Unknown SWH ID type in: " + swhId);
}
this.type = Node.Type.fromStr(type);
this.hash = parts[3];
if (!hash.matches("[0-9a-f]{" + HASH_LENGTH + "}")) {
throw new IllegalArgumentException("Wrong SWH ID hash format in: " + swhId);
}
}
@Override
public boolean equals(Object otherObj) {
if (otherObj == this) return true;
if (!(otherObj instanceof SwhId)) return false;
SwhId other = (SwhId) otherObj;
return swhId.equals(other.getSwhId());
}
@Override
public int hashCode() {
return swhId.hashCode();
}
@Override
public String toString() {
return swhId;
}
+ /**
+ * Returns full PID as a string.
+ *
+ * @return full PID string
+ */
@JsonValue
public String getSwhId() {
return swhId;
}
+ /**
+ * Returns PID node type.
+ *
+ * @return PID corresponding {@link Node.Type}
+ * @see org.softwareheritage.graph.Node.Type
+ */
public Node.Type getType() {
return type;
}
+ /**
+ * Returns PID hex-encoded SHA1 hash.
+ *
+ * @return PID string hash
+ */
public String getHash() {
return hash;
}
}
diff --git a/api/server/src/main/java/org/softwareheritage/graph/SwhPath.java b/api/server/src/main/java/org/softwareheritage/graph/SwhPath.java
index be9c80b..4d45849 100644
--- a/api/server/src/main/java/org/softwareheritage/graph/SwhPath.java
+++ b/api/server/src/main/java/org/softwareheritage/graph/SwhPath.java
@@ -1,76 +1,123 @@
package org.softwareheritage.graph;
import java.util.ArrayList;
import com.fasterxml.jackson.annotation.JsonValue;
import org.softwareheritage.graph.SwhId;
+/**
+ * Wrapper class to store a list of {@link SwhId}.
+ *
+ * @author Thibault Allançon
+ * @version 1.0
+ * @since 1.0
+ * @see org.softwareheritage.graph.SwhId
+ */
+
public class SwhPath {
ArrayList path;
+ /**
+ * Constructor.
+ */
public SwhPath() {
this.path = new ArrayList();
}
+ /**
+ * Constructor.
+ *
+ * @param swhIds variable number of string PIDs to initialize this path with
+ */
public SwhPath(String ...swhIds) {
this();
for (String swhId : swhIds) {
add(new SwhId(swhId));
}
}
+ /**
+ * Constructor.
+ *
+ * @param swhIds variable number of @{link SwhId} to initialize this path with
+ * @see org.softwareheritage.graph.SwhId
+ */
public SwhPath(SwhId ...swhIds) {
this();
for (SwhId swhId : swhIds) {
add(swhId);
}
}
+ /**
+ * Returns this path as a list of {@link SwhId}.
+ *
+ * @return list of {@link SwhId} constituting the path
+ * @see org.softwareheritage.graph.SwhId
+ */
@JsonValue
public ArrayList getPath() {
return path;
}
+ /**
+ * Adds a {@link SwhId} to this path.
+ *
+ * @param {@link SwhId} to add to this path
+ * @see org.softwareheritage.graph.SwhId
+ */
public void add(SwhId swhId) {
path.add(swhId);
}
+ /**
+ * Returns the {@link SwhId} at the specified position in this path.
+ *
+ * @param index position of the {@link SwhId} to return
+ * @return {@link SwhId} at the specified position
+ * @see org.softwareheritage.graph.SwhId
+ */
public SwhId get(int index) {
return path.get(index);
}
+ /**
+ * Returns the number of elements in this path.
+ *
+ * @return number of elements in this path
+ */
public int size() {
return path.size();
}
@Override
public boolean equals(Object otherObj) {
if (otherObj == this) return true;
if (!(otherObj instanceof SwhPath)) return false;
SwhPath other = (SwhPath) otherObj;
if (size() != other.size()) {
return false;
}
for (int i = 0; i < size(); i++) {
SwhId thisSwhId = get(i);
SwhId otherSwhId = other.get(i);
if (!thisSwhId.equals(otherSwhId)) {
return false;
}
}
return true;
}
@Override
public String toString() {
String str = new String();
for (SwhId swhId : path) {
str += swhId + "/";
}
return str;
}
}
diff --git a/api/server/src/main/java/org/softwareheritage/graph/algo/Stats.java b/api/server/src/main/java/org/softwareheritage/graph/algo/Stats.java
index e5304f7..67683c9 100644
--- a/api/server/src/main/java/org/softwareheritage/graph/algo/Stats.java
+++ b/api/server/src/main/java/org/softwareheritage/graph/algo/Stats.java
@@ -1,54 +1,67 @@
package org.softwareheritage.graph.algo;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.Properties;
+/**
+ * Statistics on the compressed graph.
+ *
+ * @author Thibault Allançon
+ * @version 1.0
+ * @since 1.0
+ */
+
public class Stats {
public class Counts {
public long nodes;
public long edges;
}
public class Ratios {
public double compression;
public double bitsPerNode;
public double bitsPerEdge;
public double avgLocality;
}
public class Degree {
public long min;
public long max;
public double avg;
}
public Counts counts;
public Ratios ratios;
public Degree indegree;
public Degree outdegree;
+ /**
+ * Constructor.
+ *
+ * @param graphPath full path of compressed graph
+ */
public Stats(String graphPath) throws IOException {
Properties properties = new Properties();
properties.load(new FileInputStream(graphPath + ".properties"));
properties.load(new FileInputStream(graphPath + ".stats"));
this.counts = new Counts();
this.ratios = new Ratios();
this.indegree = new Degree();
this.outdegree = new Degree();
this.counts.nodes = Long.parseLong(properties.getProperty("nodes"));
this.counts.edges = Long.parseLong(properties.getProperty("arcs"));
this.ratios.compression = Double.parseDouble(properties.getProperty("compratio"));
this.ratios.bitsPerNode = Double.parseDouble(properties.getProperty("bitspernode"));
this.ratios.bitsPerEdge = Double.parseDouble(properties.getProperty("bitsperlink"));
this.ratios.avgLocality = Double.parseDouble(properties.getProperty("avglocality"));
this.indegree.min = Long.parseLong(properties.getProperty("minindegree"));
this.indegree.max = Long.parseLong(properties.getProperty("maxindegree"));
this.indegree.avg = Double.parseDouble(properties.getProperty("avgindegree"));
this.outdegree.min = Long.parseLong(properties.getProperty("minoutdegree"));
this.outdegree.max = Long.parseLong(properties.getProperty("maxoutdegree"));
this.outdegree.avg = Double.parseDouble(properties.getProperty("avgoutdegree"));
}
}
diff --git a/api/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java b/api/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java
index c9c2e04..966710a 100644
--- a/api/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java
+++ b/api/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java
@@ -1,219 +1,309 @@
package org.softwareheritage.graph.algo;
import java.util.ArrayList;
import java.util.Collections;
import java.util.LinkedList;
import java.util.Queue;
import java.util.Stack;
import it.unimi.dsi.bits.LongArrayBitVector;
import it.unimi.dsi.fastutil.longs.LongBigArrays;
import org.softwareheritage.graph.AllowedEdges;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.Neighbors;
import org.softwareheritage.graph.Node;
+/**
+ * Traversal algorithms on the compressed graph.
+ *
+ * @author Thibault Allançon
+ * @version 1.0
+ * @since 1.0
+ */
+
public class Traversal {
+ /** Graph used in the traversal */
Graph graph;
+ /** Boolean to specify the use of the transposed graph */
boolean useTransposed;
+ /** Graph edge restriction */
AllowedEdges edges;
- // Big array storing if we have visited a node
+ /**
+ * Bit array storing if we have visited a node
+ * @see it.unimi.dsi.bits.LongArrayBitVector
+ */
LongArrayBitVector visited;
- // Big array storing parents to retrieve the path when backtracking
+ /**
+ * Array storing parent node id for each nodes during a traversal
+ * @see it.unimi.dsi.fastutil.longs.LongBigArrays
+ */
long[][] nodeParent;
+ /**
+ * Constructor.
+ *
+ * @param graph graph used in the traversal
+ * @param direction a string (either "forward" or "backward") specifying edge orientation
+ * @param edgesFmt a formatted string describing allowed edges (TODO: link API doc)
+ */
public Traversal(Graph graph, String direction, String edgesFmt) {
if (!direction.matches("forward|backward")) {
throw new IllegalArgumentException("Unknown traversal direction: " + direction);
}
this.graph = graph;
this.useTransposed = (direction.equals("backward"));
this.edges = new AllowedEdges(graph, edgesFmt);
long nbNodes = graph.getNbNodes();
this.visited = LongArrayBitVector.ofLength(nbNodes);
this.nodeParent = LongBigArrays.newBigArray(nbNodes);
}
+ /**
+ * Returns the leaves of a subgraph rooted at the specified source node.
+ *
+ * @param srcNodeId source node
+ * @return list of node ids corresponding to the leaves
+ */
public ArrayList leaves(long srcNodeId) {
ArrayList nodeIds = new ArrayList();
Stack stack = new Stack();
this.visited.fill(false);
stack.push(srcNodeId);
visited.set(srcNodeId);
while (!stack.isEmpty()) {
long currentNodeId = stack.pop();
long neighborsCnt = 0;
for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) {
neighborsCnt++;
if (!visited.getBoolean(neighborNodeId)) {
stack.push(neighborNodeId);
visited.set(neighborNodeId);
}
}
if (neighborsCnt == 0) {
nodeIds.add(currentNodeId);
}
}
return nodeIds;
}
+ /**
+ * Returns node direct neighbors (linked with exactly one edge).
+ *
+ * @param srcNodeId source node
+ * @return list of node ids corresponding to the neighbors
+ */
public ArrayList neighbors(long srcNodeId) {
ArrayList nodeIds = new ArrayList();
for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, srcNodeId)) {
nodeIds.add(neighborNodeId);
}
return nodeIds;
}
+ /**
+ * Performs a graph traversal and returns explored nodes.
+ *
+ * @param srcNodeId source node
+ * @return list of explored node ids
+ */
public ArrayList visitNodes(long srcNodeId) {
ArrayList nodeIds = new ArrayList();
Stack stack = new Stack();
this.visited.fill(false);
stack.push(srcNodeId);
visited.set(srcNodeId);
while (!stack.isEmpty()) {
long currentNodeId = stack.pop();
nodeIds.add(currentNodeId);
for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) {
if (!visited.getBoolean(neighborNodeId)) {
stack.push(neighborNodeId);
visited.set(neighborNodeId);
}
}
}
return nodeIds;
}
+ /**
+ * Performs a graph traversal and returns explored paths.
+ *
+ * @param srcNodeId source node
+ * @return list of explored paths (represented as a list of node ids)
+ */
public ArrayList> visitPaths(long srcNodeId) {
ArrayList> paths = new ArrayList<>();
Stack currentPath = new Stack();
visitPathsInternal(srcNodeId, paths, currentPath);
return paths;
}
+ /**
+ * Internal recursive function of {@link #visitPaths}.
+ *
+ * @param currentNodeId current node
+ * @param paths list of currently stored paths
+ * @param currentPath current path as node ids
+ */
private void visitPathsInternal(
long currentNodeId, ArrayList> paths, Stack currentPath) {
currentPath.push(currentNodeId);
long visitedNeighbors = 0;
for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) {
visitPathsInternal(neighborNodeId, paths, currentPath);
visitedNeighbors++;
}
if (visitedNeighbors == 0) {
ArrayList path = new ArrayList();
for (long nodeId : currentPath) {
path.add(nodeId);
}
paths.add(path);
}
currentPath.pop();
}
+ /**
+ * Performs a graph traversal and returns the first found path from source to destination.
+ *
+ * @param srcNodeId source node
+ * @param dst destination (either a node or a node type)
+ * @return found path as a list of node ids
+ */
public ArrayList walk(long srcNodeId, T dst, String algorithm) {
long dstNodeId = -1;
if (algorithm.equals("dfs")) {
dstNodeId = walkInternalDfs(srcNodeId, dst);
} else if (algorithm.equals("bfs")) {
dstNodeId = walkInternalBfs(srcNodeId, dst);
} else {
throw new IllegalArgumentException("Unknown traversal algorithm: " + algorithm);
}
if (dstNodeId == -1) {
throw new IllegalArgumentException("Unable to find destination point: " + dst);
}
ArrayList nodeIds = backtracking(srcNodeId, dstNodeId);
return nodeIds;
}
+ /**
+ * Internal DFS function of {@link #walk}.
+ *
+ * @param srcNodeId source node
+ * @param dst destination (either a node or a node type)
+ * @return final destination node or -1 if no path found
+ */
private long walkInternalDfs(long srcNodeId, T dst) {
Stack stack = new Stack();
this.visited.fill(false);
stack.push(srcNodeId);
visited.set(srcNodeId);
while (!stack.isEmpty()) {
long currentNodeId = stack.pop();
if (isDstNode(currentNodeId, dst)) {
return currentNodeId;
}
for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) {
if (!visited.getBoolean(neighborNodeId)) {
stack.push(neighborNodeId);
visited.set(neighborNodeId);
LongBigArrays.set(nodeParent, neighborNodeId, currentNodeId);
}
}
}
return -1;
}
+ /**
+ * Internal BFS function of {@link #walk}.
+ *
+ * @param srcNodeId source node
+ * @param dst destination (either a node or a node type)
+ * @return final destination node or -1 if no path found
+ */
private long walkInternalBfs(long srcNodeId, T dst) {
Queue queue = new LinkedList();
this.visited.fill(false);
queue.add(srcNodeId);
visited.set(srcNodeId);
while (!queue.isEmpty()) {
long currentNodeId = queue.poll();
if (isDstNode(currentNodeId, dst)) {
return currentNodeId;
}
for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) {
if (!visited.getBoolean(neighborNodeId)) {
queue.add(neighborNodeId);
visited.set(neighborNodeId);
LongBigArrays.set(nodeParent, neighborNodeId, currentNodeId);
}
}
}
return -1;
}
+ /**
+ * Internal function of {@link #walk} to check if a node corresponds to the destination.
+ *
+ * @param nodeId current node
+ * @param dst destination (either a node or a node type)
+ * @return true if the node is a destination, or false otherwise
+ */
private boolean isDstNode(long nodeId, T dst) {
if (dst instanceof Long) {
long dstNodeId = (Long) dst;
return nodeId == dstNodeId;
} else if (dst instanceof Node.Type) {
Node.Type dstType = (Node.Type) dst;
return graph.getNodeType(nodeId) == dstType;
} else {
return false;
}
}
+ /**
+ * Internal backtracking function of {@link #walk}.
+ *
+ * @param srcNodeId source node
+ * @param dstNodeId destination node
+ * @return the found path, as a list of node ids
+ */
private ArrayList backtracking(long srcNodeId, long dstNodeId) {
ArrayList path = new ArrayList();
long currentNodeId = dstNodeId;
while (currentNodeId != srcNodeId) {
path.add(currentNodeId);
currentNodeId = LongBigArrays.get(nodeParent, currentNodeId);
}
path.add(srcNodeId);
Collections.reverse(path);
return path;
}
}
diff --git a/api/server/src/main/java/org/softwareheritage/graph/backend/MapFile.java b/api/server/src/main/java/org/softwareheritage/graph/backend/MapFile.java
index de3f8ad..46549ff 100644
--- a/api/server/src/main/java/org/softwareheritage/graph/backend/MapFile.java
+++ b/api/server/src/main/java/org/softwareheritage/graph/backend/MapFile.java
@@ -1,36 +1,64 @@
package org.softwareheritage.graph.backend;
import java.io.File;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.channels.FileChannel;
import it.unimi.dsi.io.ByteBufferInputStream;
+/**
+ * Wrapper class around very big mmap()-ed file.
+ *
+ * @author Thibault Allançon
+ * @version 1.0
+ * @since 1.0
+ */
+
public class MapFile {
+ /**
+ * Memory-mapped file buffer
+ * @see it.unimi.dsi.io.ByteBufferInputStream
+ */
ByteBufferInputStream bufferMap;
+ /** Fixed line length of the mmap()-ed file */
int lineLength;
+ /**
+ * Constructor.
+ *
+ * @param path file path to mmap()
+ * @param lineLength fixed length of a line in the file
+ */
public MapFile(String path, int lineLength) throws IOException {
this.bufferMap = null;
this.lineLength = lineLength;
try (RandomAccessFile mapFile = new RandomAccessFile(new File(path), "r")) {
FileChannel fileChannel = mapFile.getChannel();
bufferMap = ByteBufferInputStream.map(fileChannel, FileChannel.MapMode.READ_ONLY);
}
}
+ /**
+ * Returns a specific line in the file.
+ *
+ * @param lineIndex line number in the file
+ * @return the line at the specified position
+ */
public String readAtLine(long lineIndex) {
byte[] buffer = new byte[lineLength];
long position = lineIndex * (long) lineLength;
bufferMap.position(position);
bufferMap.read(buffer, 0, lineLength);
String line = new String(buffer);
return line.trim();
}
+ /**
+ * Closes the mmap()-ed file.
+ */
public void close() throws IOException {
bufferMap.close();
}
}
diff --git a/api/server/src/main/java/org/softwareheritage/graph/backend/NodeIdMap.java b/api/server/src/main/java/org/softwareheritage/graph/backend/NodeIdMap.java
index b510281..73c0d8f 100644
--- a/api/server/src/main/java/org/softwareheritage/graph/backend/NodeIdMap.java
+++ b/api/server/src/main/java/org/softwareheritage/graph/backend/NodeIdMap.java
@@ -1,76 +1,111 @@
package org.softwareheritage.graph.backend;
import java.io.IOException;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.SwhId;
import org.softwareheritage.graph.backend.MapFile;
+/**
+ * Mapping between internal long node id and external SWH PID.
+ *
+ * @author Thibault Allançon
+ * @version 1.0
+ * @since 1.0
+ */
+
public class NodeIdMap {
+ /** Fixed length of full SWH PID */
public static final int SWH_ID_LENGTH = 50;
+ /** Fixed length of long node id */
public static final int NODE_ID_LENGTH = 20;
+ /** Full graph path */
String graphPath;
+ /** Number of ids to map */
long nbIds;
+ /** mmap()-ed PID_TO_NODE file */
MapFile swhToNodeMap;
+ /** mmap()-ed NODE_TO_PID file */
MapFile nodeToSwhMap;
+ /**
+ * Constructor.
+ *
+ * @param graphPath full graph path
+ * @param nbNodes number of nodes in the graph
+ */
public NodeIdMap(String graphPath, long nbNodes) throws IOException {
this.graphPath = graphPath;
this.nbIds = nbNodes;
// +1 are for spaces and end of lines
int swhToNodeLineLength = SWH_ID_LENGTH + 1 + NODE_ID_LENGTH + 1;
int nodeToSwhLineLength = SWH_ID_LENGTH + 1;
this.swhToNodeMap = new MapFile(graphPath + Graph.PID_TO_NODE, swhToNodeLineLength);
this.nodeToSwhMap = new MapFile(graphPath + Graph.NODE_TO_PID, nodeToSwhLineLength);
}
- // SWH id (string) -> WebGraph node id (long)
- // Each line in PID_TO_NODE is formatted as: swhId nodeId
- // The file is sorted by swhId, hence we can binary search on swhId to get corresponding nodeId
+ /**
+ * Converts SWH PID to corresponding long node id.
+ *
+ * @param swhId node represented as a {@link SwhId}
+ * @return corresponding node as a long id
+ * @see org.softwareheritage.graph.SwhId
+ */
public long getNodeId(SwhId swhId) {
+ // Each line in PID_TO_NODE is formatted as: swhId nodeId
+ // The file is sorted by swhId, hence we can binary search on swhId to get corresponding nodeId
long start = 0;
long end = nbIds - 1;
while (start <= end) {
long lineNumber = (start + end) / 2L;
String[] parts = swhToNodeMap.readAtLine(lineNumber).split(" ");
if (parts.length != 2) {
break;
}
String currentSwhId = parts[0];
long currentNodeId = Long.parseLong(parts[1]);
int cmp = currentSwhId.compareTo(swhId.toString());
if (cmp == 0) {
return currentNodeId;
} else if (cmp < 0) {
start = lineNumber + 1;
} else {
end = lineNumber - 1;
}
}
throw new IllegalArgumentException("Unknown SWH id: " + swhId);
}
- // WebGraph node id (long) -> SWH id (string)
- // Each line in NODE_TO_PID is formatted as: swhId
- // The file is ordered by nodeId, meaning node0's swhId is at line 0, hence we can read the
- // nodeId-th line to get corresponding swhId
+ /**
+ * Converts a node long id to corresponding SWH PID.
+ *
+ * @param nodeId node as a long id
+ * @return corresponding node as a {@link SwhId}
+ * @see org.softwareheritage.graph.SwhId
+ */
public SwhId getSwhId(long nodeId) {
+ // Each line in NODE_TO_PID is formatted as: swhId
+ // The file is ordered by nodeId, meaning node0's swhId is at line 0, hence we can read the
+ // nodeId-th line to get corresponding swhId
if (nodeId < 0 || nodeId >= nbIds) {
throw new IllegalArgumentException("Node id " + nodeId + " should be between 0 and " + nbIds);
}
String swhId = nodeToSwhMap.readAtLine(nodeId);
return new SwhId(swhId);
}
+ /**
+ * Closes the mapping files.
+ */
public void close() throws IOException {
swhToNodeMap.close();
nodeToSwhMap.close();
}
}
diff --git a/api/server/src/main/java/org/softwareheritage/graph/backend/NodeTypesMap.java b/api/server/src/main/java/org/softwareheritage/graph/backend/NodeTypesMap.java
index 143fa65..2a80a67 100644
--- a/api/server/src/main/java/org/softwareheritage/graph/backend/NodeTypesMap.java
+++ b/api/server/src/main/java/org/softwareheritage/graph/backend/NodeTypesMap.java
@@ -1,26 +1,51 @@
package org.softwareheritage.graph.backend;
import java.io.IOException;
import it.unimi.dsi.fastutil.io.BinIO;
import it.unimi.dsi.fastutil.longs.LongBigList;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.Node;
+/**
+ * Mapping between long node id and SWH node type as described in the data model.
+ *
+ * @author Thibault Allançon
+ * @version 1.0
+ * @since 1.0
+ */
+
public class NodeTypesMap {
+ /**
+ * Array storing for each node its type
+ * @see it.unimi.dsi.fastutil.longs.LongBigList
+ */
LongBigList nodeTypesMap;
+ /**
+ * Constructor.
+ *
+ * @param graphPath full path of the compressed graph
+ */
public NodeTypesMap(String graphPath) throws IOException {
try {
nodeTypesMap = (LongBigList) BinIO.loadObject(graphPath + Graph.NODE_TO_TYPE);
} catch (ClassNotFoundException e) {
throw new IllegalArgumentException("Unknown class object: " + e);
}
}
+ /**
+ * Returns node type from a node long id.
+ *
+ * @param nodeId node as a long id
+ * @return corresponding {@link Node.Type} value
+ * @see org.softwareheritage.graph.Node.Type
+ */
public Node.Type getType(long nodeId) {
long type = nodeTypesMap.getLong(nodeId);
return Node.Type.fromInt((int) type);
}
}
diff --git a/api/server/src/main/java/org/softwareheritage/graph/backend/Setup.java b/api/server/src/main/java/org/softwareheritage/graph/backend/Setup.java
index 3ef285f..72ada80 100644
--- a/api/server/src/main/java/org/softwareheritage/graph/backend/Setup.java
+++ b/api/server/src/main/java/org/softwareheritage/graph/backend/Setup.java
@@ -1,106 +1,120 @@
package org.softwareheritage.graph.backend;
import java.io.BufferedWriter;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Writer;
import java.util.zip.GZIPInputStream;
import it.unimi.dsi.bits.LongArrayBitVector;
import it.unimi.dsi.fastutil.Size64;
import it.unimi.dsi.fastutil.io.BinIO;
import it.unimi.dsi.fastutil.longs.LongBigArrays;
import it.unimi.dsi.fastutil.longs.LongBigList;
import it.unimi.dsi.fastutil.objects.Object2LongFunction;
import it.unimi.dsi.fastutil.objects.ObjectBigArrays;
import it.unimi.dsi.io.FastBufferedReader;
import it.unimi.dsi.io.LineIterator;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.Node;
import org.softwareheritage.graph.SwhId;
import org.softwareheritage.graph.backend.NodeTypesMap;
+/**
+ * Pre-processing steps (such as dumping mapping files on disk) before running the graph service.
+ *
+ * @author Thibault Allançon
+ * @version 1.0
+ * @since 1.0
+ */
+
public class Setup {
public static void main(String[] args) throws IOException {
if (args.length != 2) {
System.err.println("Expected parameters: ");
System.exit(1);
}
String nodesPath = args[0];
String graphPath = args[1];
System.out.println("Pre-computing node id maps...");
long startTime = System.nanoTime();
precomputeNodeIdMap(nodesPath, graphPath);
long endTime = System.nanoTime();
double duration = (double) (endTime - startTime) / 1_000_000_000;
System.out.println("Done in: " + duration + " seconds");
}
+ /**
+ * Computes and dumps on disk mapping files.
+ *
+ * @param nodesPath path of the compressed csv nodes file
+ * @param graphPath path of the compressed graph
+ */
// Suppress warning for Object2LongFunction cast
@SuppressWarnings("unchecked")
static void precomputeNodeIdMap(String nodesPath, String graphPath) throws IOException {
// First internal mapping: SWH id (string) -> WebGraph MPH (long)
Object2LongFunction mphMap = null;
try {
mphMap = (Object2LongFunction) BinIO.loadObject(graphPath + ".mph");
} catch (ClassNotFoundException e) {
throw new IllegalArgumentException("The .mph file contains unknown class object: " + e);
}
long nbIds = (mphMap instanceof Size64) ? ((Size64) mphMap).size64() : mphMap.size();
// Second internal mapping: WebGraph MPH (long) -> BFS ordering (long)
long[][] bfsMap = LongBigArrays.newBigArray(nbIds);
long loaded = BinIO.loadLongs(graphPath + ".order", bfsMap);
if (loaded != nbIds) {
throw new IllegalArgumentException("Graph contains " + nbIds + " nodes, but read " + loaded);
}
// Dump complete mapping for all nodes: SWH id (string) <=> WebGraph node id (long)
InputStream nodesStream = new GZIPInputStream(new FileInputStream(nodesPath));
FastBufferedReader buffer = new FastBufferedReader(new InputStreamReader(nodesStream, "UTF-8"));
LineIterator swhIdIterator = new LineIterator(buffer);
try (Writer swhToNodeMap = new BufferedWriter(new FileWriter(graphPath + Graph.PID_TO_NODE));
Writer nodeToSwhMap = new BufferedWriter(new FileWriter(graphPath + Graph.NODE_TO_PID))) {
// nodeToSwhMap needs to write SWH id in order of node id, so use a temporary array
Object[][] nodeToSwhId = ObjectBigArrays.newBigArray(nbIds);
// To effectively run edge restriction during graph traversals, we store node id (long) -> SWH
// type map. This is represented as a bitmap using minimum number of bits per Node.Type.
final int log2NbTypes = (int) Math.ceil(Math.log(Node.Type.values().length) / Math.log(2));
final int nbBitsPerNodeType = log2NbTypes;
LongArrayBitVector nodeTypesBitVector =
LongArrayBitVector.ofLength(nbBitsPerNodeType * nbIds);
LongBigList nodeTypesMap = nodeTypesBitVector.asLongBigList(nbBitsPerNodeType);
for (long iNode = 0; iNode < nbIds && swhIdIterator.hasNext(); iNode++) {
String strSwhId = swhIdIterator.next().toString();
long mphId = mphMap.getLong(strSwhId);
long nodeId = LongBigArrays.get(bfsMap, mphId);
String paddedNodeId = String.format("%0" + NodeIdMap.NODE_ID_LENGTH + "d", nodeId);
String line = strSwhId + " " + paddedNodeId + "\n";
swhToNodeMap.write(line);
ObjectBigArrays.set(nodeToSwhId, nodeId, strSwhId);
SwhId swhId = new SwhId(strSwhId);
nodeTypesMap.set(nodeId, swhId.getType().ordinal());
}
BinIO.storeObject(nodeTypesMap, graphPath + Graph.NODE_TO_TYPE);
for (long iNode = 0; iNode < nbIds; iNode++) {
String line = ObjectBigArrays.get(nodeToSwhId, iNode).toString() + "\n";
nodeToSwhMap.write(line);
}
}
}
}
diff --git a/api/server/src/main/java/org/softwareheritage/graph/benchmark/LinuxLog.java b/api/server/src/main/java/org/softwareheritage/graph/benchmark/LinuxLog.java
index f6e9eae..88faec9 100644
--- a/api/server/src/main/java/org/softwareheritage/graph/benchmark/LinuxLog.java
+++ b/api/server/src/main/java/org/softwareheritage/graph/benchmark/LinuxLog.java
@@ -1,31 +1,39 @@
package org.softwareheritage.graph.benchmark;
import java.io.IOException;
import org.softwareheritage.graph.Endpoint;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.SwhId;
+/**
+ * Linux git log experiment to benchmark graph traversal.
+ *
+ * @author Thibault Allançon
+ * @version 1.0
+ * @since 1.0
+ */
+
public class LinuxLog {
public static void main(String[] args) throws IOException {
String path = args[0];
Graph graph = new Graph(path);
// A linux kernel commit on Sun Dec 31
final SwhId commit = new SwhId("swh:1:rev:f39d7d78b70e0f39facb1e4fab77ad3df5c52a35");
final long expectedCount = 723640;
System.out.println("git log " + commit);
System.out.println("Expecting " + expectedCount + " commits");
long startTime = System.nanoTime();
Endpoint endpoint = new Endpoint(graph, "forward", "rev:rev");
long count = endpoint.visitNodes(commit).size();
if (count != expectedCount) {
throw new IllegalArgumentException("Counted " + count + " commits");
}
long endTime = System.nanoTime();
double duration = (double) (endTime - startTime) / 1_000_000_000;
System.out.println("Visit operation done in: " + duration + " seconds");
}
}