diff --git a/java/server/src/main/java/org/softwareheritage/graph/AllowedEdges.java b/java/server/src/main/java/org/softwareheritage/graph/AllowedEdges.java
index 7d71619..1d63774 100644
--- a/java/server/src/main/java/org/softwareheritage/graph/AllowedEdges.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/AllowedEdges.java
@@ -1,91 +1,91 @@
package org.softwareheritage.graph;
import java.util.ArrayList;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.Node;
/**
* Edge restriction based on node types, used when visiting the graph.
*
* Software Heritage
* graph contains multiple node types (contents, directories, revisions, ...) and restricting
* the traversal to specific node types is necessary for many querying operations: use cases.
*
- * @author Thibault Allançon
+ * @author The Software Heritage developers
*/
public class AllowedEdges {
/** Graph on which edge restriction is performed */
Graph graph;
/**
* 2D boolean matrix storing access rights for all combination of src/dst node types (first
* dimension is source, second dimension is destination), when edge restriction is not enforced
* this array is set to null for early bypass.
*/
public boolean[][] restrictedTo;
/**
* Constructor.
*
* @param graph the graph on which to perform edge restriction
* @param edgesFmt a formatted string describing allowed edges
*/
public AllowedEdges(Graph graph, String edgesFmt) {
this.graph = graph;
int nbNodeTypes = Node.Type.values().length;
this.restrictedTo = new boolean[nbNodeTypes][nbNodeTypes];
// Special values (null, empty, "*")
if (edgesFmt == null || edgesFmt.isEmpty()) {
return;
}
if (edgesFmt.equals("*")) {
// Allows for quick bypass (with simple null check) when no edge restriction
restrictedTo = null;
return;
}
// Format: "src1:dst1,src2:dst2,[...]"
String[] edgeTypes = edgesFmt.split(",");
for (String edgeType : edgeTypes) {
String[] nodeTypes = edgeType.split(":");
if (nodeTypes.length != 2) {
throw new IllegalArgumentException("Cannot parse edge type: " + edgeType);
}
ArrayList srcTypes = Node.Type.parse(nodeTypes[0]);
ArrayList dstTypes = Node.Type.parse(nodeTypes[1]);
for (Node.Type srcType : srcTypes) {
for (Node.Type dstType : dstTypes) {
restrictedTo[srcType.ordinal()][dstType.ordinal()] = true;
}
}
}
}
/**
* Checks if a given edge can be followed during graph traversal.
*
* @param srcNodeId edge source node
* @param dstNodeId edge destination node
* @return true if allowed and false otherwise
*/
public boolean isAllowed(long srcNodeId, long dstNodeId) {
Node.Type srcType = graph.getNodeType(srcNodeId);
Node.Type dstType = graph.getNodeType(dstNodeId);
return restrictedTo[srcType.ordinal()][dstType.ordinal()];
}
/**
* Works like {@link AllowedEdges#isAllowed(long, long)} but with node types directly instead of
* node ids.
*
* @see AllowedEdges#isAllowed(long, long)
*/
public boolean isAllowed(Node.Type srcType, Node.Type dstType) {
return restrictedTo[srcType.ordinal()][dstType.ordinal()];
}
}
diff --git a/java/server/src/main/java/org/softwareheritage/graph/App.java b/java/server/src/main/java/org/softwareheritage/graph/App.java
index c0821b7..dabca46 100644
--- a/java/server/src/main/java/org/softwareheritage/graph/App.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/App.java
@@ -1,195 +1,195 @@
package org.softwareheritage.graph;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.PropertyNamingStrategy;
import com.martiansoftware.jsap.FlaggedOption;
import com.martiansoftware.jsap.JSAP;
import com.martiansoftware.jsap.JSAPException;
import com.martiansoftware.jsap.JSAPResult;
import com.martiansoftware.jsap.Parameter;
import com.martiansoftware.jsap.SimpleJSAP;
import com.martiansoftware.jsap.Switch;
import com.martiansoftware.jsap.UnflaggedOption;
import io.javalin.Javalin;
import io.javalin.http.Context;
import io.javalin.plugin.json.JavalinJackson;
import org.softwareheritage.graph.Endpoint;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.SwhPID;
import org.softwareheritage.graph.algo.Stats;
/**
* Web framework of the swh-graph server REST API.
*
- * @author Thibault Allançon
+ * @author The Software Heritage developers
*/
public class App {
/**
* Main entrypoint.
*
* @param args command line arguments
*/
public static void main(String[] args) throws IOException, JSAPException {
SimpleJSAP jsap = new SimpleJSAP(App.class.getName(),
"Server to load and query a compressed graph representation of Software Heritage archive.",
new Parameter[] {
new FlaggedOption("port", JSAP.INTEGER_PARSER, "5009", JSAP.NOT_REQUIRED, 'p', "port",
"Binding port of the server."),
new UnflaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED,
JSAP.NOT_GREEDY, "The basename of the compressed graph."),
new Switch("timings", 't', "timings", "Show timings in API result metadata."),
});
JSAPResult config = jsap.parse(args);
if (jsap.messagePrinted()) {
System.exit(1);
}
String graphPath = config.getString("graphPath");
int port = config.getInt("port");
boolean showTimings = config.getBoolean("timings");
startServer(graphPath, port, showTimings);
}
/**
* Loads compressed graph and starts the web server to query it.
*
* @param graphPath basename of the compressed graph
* @param port binding port of the server
* @param showTimings true if timings should be in results metadata, false otherwise
*/
private static void startServer(String graphPath, int port, boolean showTimings)
throws IOException {
Graph graph = new Graph(graphPath);
Stats stats = new Stats(graphPath);
// Clean up on exit
Runtime.getRuntime().addShutdownHook(new Thread() {
public void run() {
try {
graph.cleanUp();
} catch (IOException e) {
System.out.println("Could not clean up graph on exit: " + e);
}
}
});
// Configure Jackson JSON to use snake case naming style
ObjectMapper objectMapper = JavalinJackson.getObjectMapper();
objectMapper.setPropertyNamingStrategy(PropertyNamingStrategy.SNAKE_CASE);
JavalinJackson.configure(objectMapper);
Javalin app = Javalin.create().start(port);
app.before("/stats/*", ctx -> { checkQueryStrings(ctx, ""); });
app.before("/leaves/*", ctx -> { checkQueryStrings(ctx, "direction|edges"); });
app.before("/neighbors/*", ctx -> { checkQueryStrings(ctx, "direction|edges"); });
app.before("/visit/*", ctx -> { checkQueryStrings(ctx, "direction|edges"); });
app.before("/walk/*", ctx -> { checkQueryStrings(ctx, "direction|edges|traversal"); });
app.get("/stats/", ctx -> { ctx.json(stats); });
// Graph traversal endpoints
// By default the traversal is a forward DFS using all edges
app.get("/leaves/:src", ctx -> {
SwhPID src = new SwhPID(ctx.pathParam("src"));
String direction = ctx.queryParam("direction", "forward");
String edgesFmt = ctx.queryParam("edges", "*");
Endpoint endpoint = new Endpoint(graph, direction, edgesFmt);
Endpoint.Output output = endpoint.leaves(new Endpoint.Input(src));
ctx.json(formatEndpointOutput(output, showTimings));
});
app.get("/neighbors/:src", ctx -> {
SwhPID src = new SwhPID(ctx.pathParam("src"));
String direction = ctx.queryParam("direction", "forward");
String edgesFmt = ctx.queryParam("edges", "*");
Endpoint endpoint = new Endpoint(graph, direction, edgesFmt);
Endpoint.Output output = endpoint.neighbors(new Endpoint.Input(src));
ctx.json(formatEndpointOutput(output, showTimings));
});
app.get("/visit/nodes/:src", ctx -> {
SwhPID src = new SwhPID(ctx.pathParam("src"));
String direction = ctx.queryParam("direction", "forward");
String edgesFmt = ctx.queryParam("edges", "*");
Endpoint endpoint = new Endpoint(graph, direction, edgesFmt);
Endpoint.Output output = endpoint.visitNodes(new Endpoint.Input(src));
ctx.json(formatEndpointOutput(output, showTimings));
});
app.get("/visit/paths/:src", ctx -> {
SwhPID src = new SwhPID(ctx.pathParam("src"));
String direction = ctx.queryParam("direction", "forward");
String edgesFmt = ctx.queryParam("edges", "*");
Endpoint endpoint = new Endpoint(graph, direction, edgesFmt);
Endpoint.Output output = endpoint.visitPaths(new Endpoint.Input(src));
ctx.json(formatEndpointOutput(output, showTimings));
});
app.get("/walk/:src/:dst", ctx -> {
SwhPID src = new SwhPID(ctx.pathParam("src"));
String dstFmt = ctx.pathParam("dst");
String direction = ctx.queryParam("direction", "forward");
String edgesFmt = ctx.queryParam("edges", "*");
String algorithm = ctx.queryParam("traversal", "dfs");
Endpoint endpoint = new Endpoint(graph, direction, edgesFmt);
Endpoint.Output output = endpoint.walk(new Endpoint.Input(src, dstFmt, algorithm));
ctx.json(formatEndpointOutput(output, showTimings));
});
app.exception(IllegalArgumentException.class, (e, ctx) -> {
ctx.status(400);
ctx.result(e.getMessage());
});
}
/**
* Checks query strings names provided to the REST API.
*
* @param ctx Javalin HTTP request context
* @param allowedFmt a regular expression describing allowed query strings names
* @throws IllegalArgumentException unknown query string provided
*/
private static void checkQueryStrings(Context ctx, String allowedFmt) {
Map> queryParamMap = ctx.queryParamMap();
for (String key : queryParamMap.keySet()) {
if (!key.matches(allowedFmt)) {
throw new IllegalArgumentException("Unknown query string: " + key);
}
}
}
/**
* Formats endpoint result into final JSON for the REST API.
*
* Removes unwanted information if necessary, such as timings (to prevent use of side channels
* attacks).
*
* @param output endpoint operation output which needs formatting
* @param showTimings true if timings should be in results metadata, false otherwise
* @return final Object with desired JSON format
*/
private static Object formatEndpointOutput(Endpoint.Output output, boolean showTimings) {
if (showTimings) {
return output;
} else {
Map metaNoTimings = Map.of("nb_edges_accessed", output.meta.nbEdgesAccessed);
Map outputNoTimings = Map.of("result", output.result, "meta", metaNoTimings);
return outputNoTimings;
}
}
}
diff --git a/java/server/src/main/java/org/softwareheritage/graph/Endpoint.java b/java/server/src/main/java/org/softwareheritage/graph/Endpoint.java
index 1a83e80..7e66d62 100644
--- a/java/server/src/main/java/org/softwareheritage/graph/Endpoint.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/Endpoint.java
@@ -1,311 +1,311 @@
package org.softwareheritage.graph;
import java.util.ArrayList;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.SwhPID;
import org.softwareheritage.graph.SwhPath;
import org.softwareheritage.graph.algo.Traversal;
import org.softwareheritage.graph.benchmark.utils.Timing;
/**
* REST API endpoints wrapper functions.
*
* Graph operations are segmented between high-level class (this one) and the low-level class
* ({@link Traversal}). The {@link Endpoint} class creates wrappers for each endpoints by performing
* all the input/output node ids conversions and logging timings.
*
- * @author Thibault Allançon
+ * @author The Software Heritage developers
* @see org.softwareheritage.graph.algo.Traversal
*/
public class Endpoint {
/**
* Wrapper class to unify traversal methods input signatures.
*/
public static class Input {
/** Source node of endpoint call specified as a {@link SwhPID} */
public SwhPID src;
/**
* Destination formatted string as described in the API
*/
public String dstFmt;
/** Traversal algorithm used in endpoint call (either "dfs" or "bfs") */
public String algorithm;
public Input(SwhPID src) {
this.src = src;
}
public Input(SwhPID src, String dstFmt, String algorithm) {
this.src = src;
this.dstFmt = dstFmt;
this.algorithm = algorithm;
}
}
/**
* Wrapper class to return both the endpoint result and metadata (such as timings).
*/
public static class Output {
/** The result content itself */
public T result;
/** Various metadata about the result */
public Meta meta;
public Output() {
this.result = null;
this.meta = new Meta();
}
/**
* Endpoint result metadata.
*/
public class Meta {
/** Operations timings */
public Timings timings;
/** Number of edges accessed during traversal */
public long nbEdgesAccessed;
public Meta() {
this.timings = new Timings();
this.nbEdgesAccessed = 0;
}
/**
* Wrapper class for JSON output format.
*/
public class Timings {
/** Time in seconds to do the traversal */
public double traversal;
/** Time in seconds to convert input SWH PID to node id */
public double pid2node;
/** Time in seconds to convert output node ids to SWH PIDs */
public double node2pid;
}
}
}
/** Graph where traversal endpoint is performed */
Graph graph;
/** Internal traversal API */
Traversal traversal;
/**
* Constructor.
*
* @param graph the graph used for traversal endpoint
* @param direction a string (either "forward" or "backward") specifying edge orientation
* @param edgesFmt a formatted string describing allowed edges
*/
public Endpoint(Graph graph, String direction, String edgesFmt) {
this.graph = graph;
this.traversal = new Traversal(graph, direction, edgesFmt);
}
/**
* Converts a list of (internal) long node ids to a list of corresponding (external) SWH PIDs.
*
* @param nodeIds the list of long node ids
* @return a list of corresponding SWH PIDs
*/
private ArrayList convertNodesToSwhPIDs(ArrayList nodeIds) {
ArrayList swhPIDs = new ArrayList<>();
for (long nodeId : nodeIds) {
swhPIDs.add(graph.getSwhPID(nodeId));
}
return swhPIDs;
}
/**
* Converts a list of (internal) long node ids to the corresponding {@link SwhPath}.
*
* @param nodeIds the list of long node ids
* @return the corresponding {@link SwhPath}
* @see org.softwareheritage.graph.SwhPath
*/
private SwhPath convertNodesToSwhPath(ArrayList nodeIds) {
SwhPath path = new SwhPath();
for (long nodeId : nodeIds) {
path.add(graph.getSwhPID(nodeId));
}
return path;
}
/**
* Converts a list of paths made of (internal) long node ids to one made of {@link SwhPath}-s.
*
* @param pathsNodeId the list of paths with long node ids
* @return a list of corresponding {@link SwhPath}
* @see org.softwareheritage.graph.SwhPath
*/
private ArrayList convertPathsToSwhPIDs(ArrayList> pathsNodeId) {
ArrayList paths = new ArrayList<>();
for (ArrayList path : pathsNodeId) {
paths.add(convertNodesToSwhPath(path));
}
return paths;
}
/**
* Leaves endpoint wrapper.
*
* @param input input parameters for the underlying endpoint call
* @return the resulting list of {@link SwhPID} from endpoint call and operation metadata
* @see org.softwareheritage.graph.SwhPID
* @see org.softwareheritage.graph.algo.Traversal#leaves(long)
*/
public Output leaves(Input input) {
Output> output = new Output<>();
long startTime;
startTime = Timing.start();
long srcNodeId = graph.getNodeId(input.src);
output.meta.timings.pid2node = Timing.stop(startTime);
startTime = Timing.start();
ArrayList nodeIds = traversal.leaves(srcNodeId);
output.meta.timings.traversal = Timing.stop(startTime);
output.meta.nbEdgesAccessed = traversal.getNbEdgesAccessed();
startTime = Timing.start();
output.result = convertNodesToSwhPIDs(nodeIds);
output.meta.timings.node2pid = Timing.stop(startTime);
return output;
}
/**
* Neighbors endpoint wrapper.
*
* @param input input parameters for the underlying endpoint call
* @return the resulting list of {@link SwhPID} from endpoint call and operation metadata
* @see org.softwareheritage.graph.SwhPID
* @see org.softwareheritage.graph.algo.Traversal#neighbors(long)
*/
public Output neighbors(Input input) {
Output> output = new Output<>();
long startTime;
startTime = Timing.start();
long srcNodeId = graph.getNodeId(input.src);
output.meta.timings.pid2node = Timing.stop(startTime);
startTime = Timing.start();
ArrayList nodeIds = traversal.neighbors(srcNodeId);
output.meta.timings.traversal = Timing.stop(startTime);
output.meta.nbEdgesAccessed = traversal.getNbEdgesAccessed();
startTime = Timing.start();
output.result = convertNodesToSwhPIDs(nodeIds);
output.meta.timings.node2pid = Timing.stop(startTime);
return output;
}
/**
* Walk endpoint wrapper.
*
* @param input input parameters for the underlying endpoint call
* @return the resulting {@link SwhPath} from endpoint call and operation metadata
* @see org.softwareheritage.graph.SwhPID
* @see org.softwareheritage.graph.SwhPath
* @see org.softwareheritage.graph.algo.Traversal#walk
*/
public Output walk(Input input) {
Output output = new Output<>();
long startTime;
startTime = Timing.start();
long srcNodeId = graph.getNodeId(input.src);
output.meta.timings.pid2node = Timing.stop(startTime);
ArrayList nodeIds = new ArrayList();
// Destination is either a SWH PID or a node type
try {
SwhPID dstSwhPID = new SwhPID(input.dstFmt);
long dstNodeId = graph.getNodeId(dstSwhPID);
startTime = Timing.start();
nodeIds = traversal.walk(srcNodeId, dstNodeId, input.algorithm);
output.meta.timings.traversal = Timing.stop(startTime);
} catch (IllegalArgumentException ignored1) {
try {
Node.Type dstType = Node.Type.fromStr(input.dstFmt);
startTime = Timing.start();
nodeIds = traversal.walk(srcNodeId, dstType, input.algorithm);
output.meta.timings.traversal = Timing.stop(startTime);
} catch (IllegalArgumentException ignored2) {
}
}
output.meta.nbEdgesAccessed = traversal.getNbEdgesAccessed();
startTime = Timing.start();
output.result = convertNodesToSwhPath(nodeIds);
output.meta.timings.node2pid = Timing.stop(startTime);
return output;
}
/**
* VisitNodes endpoint wrapper.
*
* @param input input parameters for the underlying endpoint call
* @return the resulting list of {@link SwhPID} from endpoint call and operation metadata
* @see org.softwareheritage.graph.SwhPID
* @see org.softwareheritage.graph.algo.Traversal#visitNodes(long)
*/
public Output visitNodes(Input input) {
Output> output = new Output<>();
long startTime;
startTime = Timing.start();
long srcNodeId = graph.getNodeId(input.src);
output.meta.timings.pid2node = Timing.stop(startTime);
startTime = Timing.start();
ArrayList nodeIds = traversal.visitNodes(srcNodeId);
output.meta.timings.traversal = Timing.stop(startTime);
output.meta.nbEdgesAccessed = traversal.getNbEdgesAccessed();
startTime = Timing.start();
output.result = convertNodesToSwhPIDs(nodeIds);
output.meta.timings.node2pid = Timing.stop(startTime);
return output;
}
/**
* VisitPaths endpoint wrapper.
*
* @param input input parameters for the underlying endpoint call
* @return the resulting list of {@link SwhPath} from endpoint call and operation metadata
* @see org.softwareheritage.graph.SwhPID
* @see org.softwareheritage.graph.SwhPath
* @see org.softwareheritage.graph.algo.Traversal#visitPaths(long)
*/
public Output visitPaths(Input input) {
Output> output = new Output<>();
long startTime;
startTime = Timing.start();
long srcNodeId = graph.getNodeId(input.src);
output.meta.timings.pid2node = Timing.stop(startTime);
startTime = Timing.start();
ArrayList> paths = traversal.visitPaths(srcNodeId);
output.meta.timings.traversal = Timing.stop(startTime);
output.meta.nbEdgesAccessed = traversal.getNbEdgesAccessed();
startTime = Timing.start();
output.result = convertPathsToSwhPIDs(paths);
output.meta.timings.node2pid = Timing.stop(startTime);
return output;
}
}
diff --git a/java/server/src/main/java/org/softwareheritage/graph/Graph.java b/java/server/src/main/java/org/softwareheritage/graph/Graph.java
index 7b6187e..d69e1c2 100644
--- a/java/server/src/main/java/org/softwareheritage/graph/Graph.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/Graph.java
@@ -1,194 +1,194 @@
package org.softwareheritage.graph;
import java.io.IOException;
import it.unimi.dsi.big.webgraph.BVGraph;
import it.unimi.dsi.big.webgraph.LazyLongIterator;
import org.softwareheritage.graph.Node;
import org.softwareheritage.graph.SwhPID;
import org.softwareheritage.graph.backend.NodeIdMap;
import org.softwareheritage.graph.backend.NodeTypesMap;
/**
* Main class storing the compressed graph and node id mappings.
*
* The compressed graph is stored using the WebGraph
* ecosystem. Additional mappings are necessary because Software Heritage uses string based persistent
* identifiers (PID) while WebGraph uses integers internally. These two mappings (long id ↔
* PID) are used for the input (users refer to the graph using PID) and the output (convert back to
* PID for users results). However, since graph traversal can be restricted depending on the node
* type (see {@link AllowedEdges}), a long id → node type map is stored as well to avoid a full
* PID lookup.
*
- * @author Thibault Allançon
+ * @author The Software Heritage developers
* @see org.softwareheritage.graph.AllowedEdges
* @see org.softwareheritage.graph.NodeIdMap;
* @see org.softwareheritage.graph.NodeTypesMap;
*/
public class Graph {
/** File extension for the SWH PID to long node id map */
public static final String PID_TO_NODE = ".pid2node.csv";
/** File extension for the long node id to SWH PID map */
public static final String NODE_TO_PID = ".node2pid.csv";
/** File extension for the long node id to node typ map */
public static final String NODE_TO_TYPE = ".node2type.map";
/** Compressed graph stored as a {@link it.unimi.dsi.big.webgraph.BVGraph} */
BVGraph graph;
/** Transposed compressed graph (used for backward traversals) */
BVGraph graphTransposed;
/** Path and basename of the compressed graph */
String path;
/** Mapping long id ↔ SWH PIDs */
NodeIdMap nodeIdMap;
/** Mapping long id → node types */
NodeTypesMap nodeTypesMap;
/**
* Constructor.
*
* @param path path and basename of the compressed graph to load
*/
public Graph(String path) throws IOException {
this.graph = BVGraph.load(path);
this.graphTransposed = BVGraph.load(path + "-transposed");
this.path = path;
this.nodeIdMap = new NodeIdMap(path, getNbNodes());
this.nodeTypesMap = new NodeTypesMap(path);
}
/**
* Cleans up graph resources after use.
*/
public void cleanUp() throws IOException {
nodeIdMap.close();
}
/**
* Returns the graph full path.
*
* @return graph full path
*/
public String getPath() {
return path;
}
/**
* Converts {@link SwhPID} node to long.
*
* @param swhPID node specified as a {@link SwhPID}
* @return internal long node id
* @see org.softwareheritage.graph.SwhPID
*/
public long getNodeId(SwhPID swhPID) {
return nodeIdMap.getNodeId(swhPID);
}
/**
* Converts long id node to {@link SwhPID}.
*
* @param nodeId node specified as a long id
* @return external SWH PID
* @see org.softwareheritage.graph.SwhPID
*/
public SwhPID getSwhPID(long nodeId) {
return nodeIdMap.getSwhPID(nodeId);
}
/**
* Returns node type.
*
* @param nodeId node specified as a long id
* @return corresponding node type
* @see org.softwareheritage.graph.Node.Type
*/
public Node.Type getNodeType(long nodeId) {
return nodeTypesMap.getType(nodeId);
}
/**
* Returns number of nodes in the graph.
*
* @return number of nodes in the graph
*/
public long getNbNodes() {
return graph.numNodes();
}
/**
* Returns number of edges in the graph.
*
* @return number of edges in the graph
*/
public long getNbEdges() {
return graph.numArcs();
}
/**
* Returns lazy iterator of successors of a node.
*
* @param nodeId node specified as a long id
* @return lazy iterator of successors of the node, specified as a WebGraph LazyLongIterator
*/
public LazyLongIterator successors(long nodeId) {
return graph.successors(nodeId);
}
/**
* Returns the outdegree of a node.
*
* @param nodeId node specified as a long id
* @return outdegree of a node
*/
public long outdegree(long nodeId) {
return graph.outdegree(nodeId);
}
/**
* Returns lazy iterator of predecessors of a node.
*
* @param nodeId node specified as a long id
* @return lazy iterator of predecessors of the node, specified as a WebGraph LazyLongIterator
*/
public LazyLongIterator predecessors(long nodeId) {
return graphTransposed.successors(nodeId);
}
/**
* Returns the indegree of a node.
*
* @param nodeId node specified as a long id
* @return indegree of a node
*/
public long indegree(long nodeId) {
return graphTransposed.outdegree(nodeId);
}
/**
* Returns the degree of a node, depending on graph orientation.
*
* @param nodeId node specified as a long id
* @param useTransposed boolean value to use transposed graph
* @return degree of a node
*/
public long degree(long nodeId, boolean useTransposed) {
return (useTransposed) ? indegree(nodeId) : outdegree(nodeId);
}
/**
* Returns the neighbors of a node (as a lazy iterator), depending on graph orientation.
*
* @param nodeId node specified as a long id
* @param useTransposed boolean value to use transposed graph
* @return lazy iterator of neighbors of the node, specified as a WebGraph LazyLongIterator
*/
public LazyLongIterator neighbors(long nodeId, boolean useTransposed) {
return (useTransposed) ? predecessors(nodeId) : successors(nodeId);
}
}
diff --git a/java/server/src/main/java/org/softwareheritage/graph/Neighbors.java b/java/server/src/main/java/org/softwareheritage/graph/Neighbors.java
index bdae373..1b13c88 100644
--- a/java/server/src/main/java/org/softwareheritage/graph/Neighbors.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/Neighbors.java
@@ -1,84 +1,84 @@
package org.softwareheritage.graph;
import java.util.Iterator;
import it.unimi.dsi.big.webgraph.LazyLongIterator;
import org.softwareheritage.graph.AllowedEdges;
import org.softwareheritage.graph.Graph;
/**
* Iterator class to go over a node neighbors in the graph.
*
* Wrapper iterator class to easily deal with {@link AllowedEdges} during traversals.
*
- * @author Thibault Allançon
+ * @author The Software Heritage developers
* @see org.softwareheritage.graph.AllowedEdges
*/
public class Neighbors implements Iterable {
/** Graph used to explore neighbors */
Graph graph;
/** Boolean to specify the use of the transposed graph */
boolean useTransposed;
/** Graph edge restriction */
AllowedEdges edges;
/** Source node from which neighbors will be listed */
long srcNodeId;
/**
* Constructor.
*
* @param graph graph used to explore neighbors
* @param useTransposed boolean value to use transposed graph
* @param edges edges allowed to be used in the graph
* @param srcNodeId source node from where to list neighbors
*/
public Neighbors(Graph graph, boolean useTransposed, AllowedEdges edges, long srcNodeId) {
this.graph = graph;
this.useTransposed = useTransposed;
this.edges = edges;
this.srcNodeId = srcNodeId;
}
@Override
public Iterator iterator() {
return new NeighborsIterator();
}
/**
* Inner class for {@link Neighbors} iterator.
*
- * @author Thibault Allançon
+ * @author The Software Heritage developers
*/
public class NeighborsIterator implements Iterator {
LazyLongIterator neighbors;
long nextNeighborId;
public NeighborsIterator() {
this.neighbors = graph.neighbors(srcNodeId, useTransposed);
this.nextNeighborId = -1;
}
public boolean hasNext() {
// Case 1: no edge restriction, bypass type checks and skip to next neighbor
if (edges.restrictedTo == null) {
nextNeighborId = neighbors.nextLong();
return (nextNeighborId != -1);
}
// Case 2: edge restriction, look ahead for next neighbor
while ((nextNeighborId = neighbors.nextLong()) != -1) {
if (edges.isAllowed(srcNodeId, nextNeighborId)) {
return true;
}
}
return false;
}
public Long next() {
return nextNeighborId;
}
}
}
diff --git a/java/server/src/main/java/org/softwareheritage/graph/Node.java b/java/server/src/main/java/org/softwareheritage/graph/Node.java
index 7acc2bf..04fd073 100644
--- a/java/server/src/main/java/org/softwareheritage/graph/Node.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/Node.java
@@ -1,93 +1,93 @@
package org.softwareheritage.graph;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
/**
* A node in the Software Heritage graph.
*
- * @author Thibault Allançon
+ * @author The Software Heritage developers
*/
public class Node {
/**
* Software Heritage graph node types, as described in the
* data model.
*/
public enum Type {
/** Content node */
CNT,
/** Directory node */
DIR,
/** Origin node */
ORI,
/** Release node */
REL,
/** Revision node */
REV,
/** Snapshot node */
SNP;
/**
* Converts integer to corresponding SWH node type.
*
* @param intType node type represented as an integer
* @return the corresponding {@link Node.Type} value
* @see org.softwareheritage.graph.Node.Type
*/
public static Node.Type fromInt(int intType) {
switch (intType) {
case 0:
return CNT;
case 1:
return DIR;
case 2:
return ORI;
case 3:
return REL;
case 4:
return REV;
case 5:
return SNP;
}
return null;
}
/**
* Converts string to corresponding SWH node type.
*
* @param strType node type represented as a string
* @return the corresponding {@link Node.Type} value
* @see org.softwareheritage.graph.Node.Type
*/
public static Node.Type fromStr(String strType) {
if (!strType.matches("cnt|dir|ori|rel|rev|snp")) {
throw new IllegalArgumentException("Unknown node type: " + strType);
}
return Node.Type.valueOf(strType.toUpperCase());
}
/**
* Parses SWH node type possible values from formatted string (see the API
* syntax).
*
* @param strFmtType node types represented as a formatted string
* @return a list containing the {@link Node.Type} values
* @see org.softwareheritage.graph.Node.Type
*/
public static ArrayList parse(String strFmtType) {
ArrayList types = new ArrayList<>();
if (strFmtType.equals("*")) {
List nodeTypes = Arrays.asList(Node.Type.values());
types.addAll(nodeTypes);
} else {
types.add(Node.Type.fromStr(strFmtType));
}
return types;
}
}
}
diff --git a/java/server/src/main/java/org/softwareheritage/graph/SwhPID.java b/java/server/src/main/java/org/softwareheritage/graph/SwhPID.java
index 8790246..34f44e1 100644
--- a/java/server/src/main/java/org/softwareheritage/graph/SwhPID.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/SwhPID.java
@@ -1,98 +1,98 @@
package org.softwareheritage.graph;
import com.fasterxml.jackson.annotation.JsonValue;
import org.softwareheritage.graph.Node;
/**
* A Software Heritage PID, see persistent
* identifier documentation.
*
- * @author Thibault Allançon
+ * @author The Software Heritage developers
*/
public class SwhPID {
/** Fixed hash length of the PID */
public static final int HASH_LENGTH = 40;
/** Full PID as a string */
String swhPID;
/** PID node type */
Node.Type type;
/** PID hex-encoded SHA1 hash */
String hash;
/**
* Constructor.
*
* @param swhPID full PID as a string
*/
public SwhPID(String swhPID) {
this.swhPID = swhPID;
// PID format: 'swh:1:type:hash'
String[] parts = swhPID.split(":");
if (parts.length != 4 || !parts[0].equals("swh") || !parts[1].equals("1")) {
throw new IllegalArgumentException(
"Expected SWH PID format to be 'swh:1:type:hash', got: " + swhPID);
}
this.type = Node.Type.fromStr(parts[2]);
this.hash = parts[3];
if (!hash.matches("[0-9a-f]{" + HASH_LENGTH + "}")) {
throw new IllegalArgumentException("Wrong SWH PID hash format in: " + swhPID);
}
}
@Override
public boolean equals(Object otherObj) {
if (otherObj == this)
return true;
if (!(otherObj instanceof SwhPID))
return false;
SwhPID other = (SwhPID) otherObj;
return swhPID.equals(other.getSwhPID());
}
@Override
public int hashCode() {
return swhPID.hashCode();
}
@Override
public String toString() {
return swhPID;
}
/**
* Returns full PID as a string.
*
* @return full PID string
*/
@JsonValue
public String getSwhPID() {
return swhPID;
}
/**
* Returns PID node type.
*
* @return PID corresponding {@link Node.Type}
* @see org.softwareheritage.graph.Node.Type
*/
public Node.Type getType() {
return type;
}
/**
* Returns PID hex-encoded SHA1 hash.
*
* @return PID string hash
*/
public String getHash() {
return hash;
}
}
diff --git a/java/server/src/main/java/org/softwareheritage/graph/SwhPath.java b/java/server/src/main/java/org/softwareheritage/graph/SwhPath.java
index c2df815..49925d7 100644
--- a/java/server/src/main/java/org/softwareheritage/graph/SwhPath.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/SwhPath.java
@@ -1,124 +1,124 @@
package org.softwareheritage.graph;
import java.util.ArrayList;
import com.fasterxml.jackson.annotation.JsonValue;
import org.softwareheritage.graph.SwhPID;
/**
* Wrapper class to store a list of {@link SwhPID}.
*
- * @author Thibault Allançon
+ * @author The Software Heritage developers
* @see org.softwareheritage.graph.SwhPID
*/
public class SwhPath {
/** Internal list of {@link SwhPID} */
ArrayList path;
/**
* Constructor.
*/
public SwhPath() {
this.path = new ArrayList();
}
/**
* Constructor.
*
* @param swhPIDs variable number of string PIDs to initialize this path with
*/
public SwhPath(String... swhPIDs) {
this();
for (String swhPID : swhPIDs) {
add(new SwhPID(swhPID));
}
}
/**
* Constructor.
*
* @param swhPIDs variable number of {@link SwhPID} to initialize this path with
* @see org.softwareheritage.graph.SwhPID
*/
public SwhPath(SwhPID... swhPIDs) {
this();
for (SwhPID swhPID : swhPIDs) {
add(swhPID);
}
}
/**
* Returns this path as a list of {@link SwhPID}.
*
* @return list of {@link SwhPID} constituting the path
* @see org.softwareheritage.graph.SwhPID
*/
@JsonValue
public ArrayList getPath() {
return path;
}
/**
* Adds a {@link SwhPID} to this path.
*
* @param {@link SwhPID} to add to this path
* @see org.softwareheritage.graph.SwhPID
*/
public void add(SwhPID swhPID) {
path.add(swhPID);
}
/**
* Returns the {@link SwhPID} at the specified position in this path.
*
* @param index position of the {@link SwhPID} to return
* @return {@link SwhPID} at the specified position
* @see org.softwareheritage.graph.SwhPID
*/
public SwhPID get(int index) {
return path.get(index);
}
/**
* Returns the number of elements in this path.
*
* @return number of elements in this path
*/
public int size() {
return path.size();
}
@Override
public boolean equals(Object otherObj) {
if (otherObj == this)
return true;
if (!(otherObj instanceof SwhPath))
return false;
SwhPath other = (SwhPath) otherObj;
if (size() != other.size()) {
return false;
}
for (int i = 0; i < size(); i++) {
SwhPID thisSwhPID = get(i);
SwhPID otherSwhPID = other.get(i);
if (!thisSwhPID.equals(otherSwhPID)) {
return false;
}
}
return true;
}
@Override
public String toString() {
String str = new String();
for (SwhPID swhPID : path) {
str += swhPID + "/";
}
return str;
}
}
diff --git a/java/server/src/main/java/org/softwareheritage/graph/algo/Stats.java b/java/server/src/main/java/org/softwareheritage/graph/algo/Stats.java
index 1099b6f..8a0e708 100644
--- a/java/server/src/main/java/org/softwareheritage/graph/algo/Stats.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/algo/Stats.java
@@ -1,68 +1,68 @@
package org.softwareheritage.graph.algo;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.Properties;
/**
* Statistics on the compressed graph.
*
* These statistics are not computed but directly read from WebGraph generated .stats and .properties files.
*
- * @author Thibault Allançon
+ * @author The Software Heritage developers
*/
public class Stats {
public class Counts {
public long nodes;
public long edges;
}
public class Ratios {
public double compression;
public double bitsPerNode;
public double bitsPerEdge;
public double avgLocality;
}
public class Degree {
public long min;
public long max;
public double avg;
}
public Counts counts;
public Ratios ratios;
public Degree indegree;
public Degree outdegree;
/**
* Constructor.
*
* @param graphPath path and basename of compressed graph
*/
public Stats(String graphPath) throws IOException {
Properties properties = new Properties();
properties.load(new FileInputStream(graphPath + ".properties"));
properties.load(new FileInputStream(graphPath + ".stats"));
this.counts = new Counts();
this.ratios = new Ratios();
this.indegree = new Degree();
this.outdegree = new Degree();
this.counts.nodes = Long.parseLong(properties.getProperty("nodes"));
this.counts.edges = Long.parseLong(properties.getProperty("arcs"));
this.ratios.compression = Double.parseDouble(properties.getProperty("compratio"));
this.ratios.bitsPerNode = Double.parseDouble(properties.getProperty("bitspernode"));
this.ratios.bitsPerEdge = Double.parseDouble(properties.getProperty("bitsperlink"));
this.ratios.avgLocality = Double.parseDouble(properties.getProperty("avglocality"));
this.indegree.min = Long.parseLong(properties.getProperty("minindegree"));
this.indegree.max = Long.parseLong(properties.getProperty("maxindegree"));
this.indegree.avg = Double.parseDouble(properties.getProperty("avgindegree"));
this.outdegree.min = Long.parseLong(properties.getProperty("minoutdegree"));
this.outdegree.max = Long.parseLong(properties.getProperty("maxoutdegree"));
this.outdegree.avg = Double.parseDouble(properties.getProperty("avgoutdegree"));
}
}
diff --git a/java/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java b/java/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java
index c923724..fe0f56f 100644
--- a/java/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java
@@ -1,328 +1,328 @@
package org.softwareheritage.graph.algo;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.Map;
import java.util.Queue;
import java.util.Stack;
import it.unimi.dsi.bits.LongArrayBitVector;
import org.softwareheritage.graph.AllowedEdges;
import org.softwareheritage.graph.Endpoint;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.Neighbors;
import org.softwareheritage.graph.Node;
/**
* Traversal algorithms on the compressed graph.
*
* Internal implementation of the traversal API endpoints. These methods only input/output internal
* long ids, which are converted in the {@link Endpoint} higher-level class to Software Heritage
* PID.
*
- * @author Thibault Allançon
+ * @author The Software Heritage developers
* @see org.softwareheritage.graph.Endpoint
*/
public class Traversal {
/** Graph used in the traversal */
Graph graph;
/** Boolean to specify the use of the transposed graph */
boolean useTransposed;
/** Graph edge restriction */
AllowedEdges edges;
/** Bit array storing if we have visited a node */
LongArrayBitVector visited;
/** Hash map storing parent node id for each nodes during a traversal */
Map parentNode;
/** Number of edges accessed during traversal */
long nbEdgesAccessed;
/**
* Constructor.
*
* @param graph graph used in the traversal
* @param direction a string (either "forward" or "backward") specifying edge orientation
* @param edgesFmt a formatted string describing allowed edges
*/
public Traversal(Graph graph, String direction, String edgesFmt) {
if (!direction.matches("forward|backward")) {
throw new IllegalArgumentException("Unknown traversal direction: " + direction);
}
this.graph = graph;
this.useTransposed = (direction.equals("backward"));
this.edges = new AllowedEdges(graph, edgesFmt);
long nbNodes = graph.getNbNodes();
this.visited = LongArrayBitVector.ofLength(nbNodes);
this.parentNode = new HashMap<>();
this.nbEdgesAccessed = 0;
}
/**
* Returns number of accessed edges during traversal.
*
* @return number of edges accessed in last traversal
*/
public long getNbEdgesAccessed() {
return nbEdgesAccessed;
}
/**
* Returns the leaves of a subgraph rooted at the specified source node.
*
* @param srcNodeId source node
* @return list of node ids corresponding to the leaves
*/
public ArrayList leaves(long srcNodeId) {
ArrayList nodeIds = new ArrayList();
Stack stack = new Stack();
this.nbEdgesAccessed = 0;
stack.push(srcNodeId);
visited.set(srcNodeId);
while (!stack.isEmpty()) {
long currentNodeId = stack.pop();
long neighborsCnt = 0;
nbEdgesAccessed += graph.degree(currentNodeId, useTransposed);
for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) {
neighborsCnt++;
if (!visited.getBoolean(neighborNodeId)) {
stack.push(neighborNodeId);
visited.set(neighborNodeId);
}
}
if (neighborsCnt == 0) {
nodeIds.add(currentNodeId);
}
}
return nodeIds;
}
/**
* Returns node direct neighbors (linked with exactly one edge).
*
* @param srcNodeId source node
* @return list of node ids corresponding to the neighbors
*/
public ArrayList neighbors(long srcNodeId) {
ArrayList nodeIds = new ArrayList();
this.nbEdgesAccessed = graph.degree(srcNodeId, useTransposed);
for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, srcNodeId)) {
nodeIds.add(neighborNodeId);
}
return nodeIds;
}
/**
* Performs a graph traversal and returns explored nodes.
*
* @param srcNodeId source node
* @return list of explored node ids
*/
public ArrayList visitNodes(long srcNodeId) {
ArrayList nodeIds = new ArrayList();
Stack stack = new Stack();
this.nbEdgesAccessed = 0;
stack.push(srcNodeId);
visited.set(srcNodeId);
while (!stack.isEmpty()) {
long currentNodeId = stack.pop();
nodeIds.add(currentNodeId);
nbEdgesAccessed += graph.degree(currentNodeId, useTransposed);
for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) {
if (!visited.getBoolean(neighborNodeId)) {
stack.push(neighborNodeId);
visited.set(neighborNodeId);
}
}
}
return nodeIds;
}
/**
* Performs a graph traversal and returns explored paths.
*
* @param srcNodeId source node
* @return list of explored paths (represented as a list of node ids)
*/
public ArrayList> visitPaths(long srcNodeId) {
ArrayList> paths = new ArrayList<>();
Stack currentPath = new Stack();
this.nbEdgesAccessed = 0;
visitPathsInternal(srcNodeId, paths, currentPath);
return paths;
}
/**
* Internal recursive function of {@link #visitPaths}.
*
* @param currentNodeId current node
* @param paths list of currently stored paths
* @param currentPath current path as node ids
*/
private void visitPathsInternal(
long currentNodeId, ArrayList> paths, Stack currentPath) {
currentPath.push(currentNodeId);
long visitedNeighbors = 0;
nbEdgesAccessed += graph.degree(currentNodeId, useTransposed);
for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) {
visitPathsInternal(neighborNodeId, paths, currentPath);
visitedNeighbors++;
}
if (visitedNeighbors == 0) {
ArrayList path = new ArrayList();
for (long nodeId : currentPath) {
path.add(nodeId);
}
paths.add(path);
}
currentPath.pop();
}
/**
* Performs a graph traversal and returns the first found path from source to destination.
*
* @param srcNodeId source node
* @param dst destination (either a node or a node type)
* @return found path as a list of node ids
*/
public ArrayList walk(long srcNodeId, T dst, String algorithm) {
long dstNodeId = -1;
if (algorithm.equals("dfs")) {
dstNodeId = walkInternalDfs(srcNodeId, dst);
} else if (algorithm.equals("bfs")) {
dstNodeId = walkInternalBfs(srcNodeId, dst);
} else {
throw new IllegalArgumentException("Unknown traversal algorithm: " + algorithm);
}
if (dstNodeId == -1) {
throw new IllegalArgumentException("Unable to find destination point: " + dst);
}
ArrayList nodeIds = backtracking(srcNodeId, dstNodeId);
return nodeIds;
}
/**
* Internal DFS function of {@link #walk}.
*
* @param srcNodeId source node
* @param dst destination (either a node or a node type)
* @return final destination node or -1 if no path found
*/
private long walkInternalDfs(long srcNodeId, T dst) {
Stack stack = new Stack();
this.nbEdgesAccessed = 0;
stack.push(srcNodeId);
visited.set(srcNodeId);
while (!stack.isEmpty()) {
long currentNodeId = stack.pop();
if (isDstNode(currentNodeId, dst)) {
return currentNodeId;
}
nbEdgesAccessed += graph.degree(currentNodeId, useTransposed);
for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) {
if (!visited.getBoolean(neighborNodeId)) {
stack.push(neighborNodeId);
visited.set(neighborNodeId);
parentNode.put(neighborNodeId, currentNodeId);
}
}
}
return -1;
}
/**
* Internal BFS function of {@link #walk}.
*
* @param srcNodeId source node
* @param dst destination (either a node or a node type)
* @return final destination node or -1 if no path found
*/
private long walkInternalBfs(long srcNodeId, T dst) {
Queue queue = new LinkedList();
this.nbEdgesAccessed = 0;
queue.add(srcNodeId);
visited.set(srcNodeId);
while (!queue.isEmpty()) {
long currentNodeId = queue.poll();
if (isDstNode(currentNodeId, dst)) {
return currentNodeId;
}
nbEdgesAccessed += graph.degree(currentNodeId, useTransposed);
for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) {
if (!visited.getBoolean(neighborNodeId)) {
queue.add(neighborNodeId);
visited.set(neighborNodeId);
parentNode.put(neighborNodeId, currentNodeId);
}
}
}
return -1;
}
/**
* Internal function of {@link #walk} to check if a node corresponds to the destination.
*
* @param nodeId current node
* @param dst destination (either a node or a node type)
* @return true if the node is a destination, or false otherwise
*/
private boolean isDstNode(long nodeId, T dst) {
if (dst instanceof Long) {
long dstNodeId = (Long) dst;
return nodeId == dstNodeId;
} else if (dst instanceof Node.Type) {
Node.Type dstType = (Node.Type) dst;
return graph.getNodeType(nodeId) == dstType;
} else {
return false;
}
}
/**
* Internal backtracking function of {@link #walk}.
*
* @param srcNodeId source node
* @param dstNodeId destination node
* @return the found path, as a list of node ids
*/
private ArrayList backtracking(long srcNodeId, long dstNodeId) {
ArrayList path = new ArrayList();
long currentNodeId = dstNodeId;
while (currentNodeId != srcNodeId) {
path.add(currentNodeId);
currentNodeId = parentNode.get(currentNodeId);
}
path.add(srcNodeId);
Collections.reverse(path);
return path;
}
}
diff --git a/java/server/src/main/java/org/softwareheritage/graph/backend/MapFile.java b/java/server/src/main/java/org/softwareheritage/graph/backend/MapFile.java
index 8b3f60c..b63ae87 100644
--- a/java/server/src/main/java/org/softwareheritage/graph/backend/MapFile.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/backend/MapFile.java
@@ -1,63 +1,63 @@
package org.softwareheritage.graph.backend;
import java.io.File;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.channels.FileChannel;
import it.unimi.dsi.io.ByteBufferInputStream;
/**
* Wrapper class around very big mmap()-ed file.
*
* Java has a limit for mmap()-ed files because of unsupported 64-bit indexing. The dsiutils ByteBufferInputStream is used to overcome this
* Java limit.
*
- * @author Thibault Allançon
+ * @author The Software Heritage developers
*/
public class MapFile {
/** Memory-mapped file buffer */
ByteBufferInputStream bufferMap;
/** Fixed line length of the mmap()-ed file */
int lineLength;
/**
* Constructor.
*
* @param path file path to mmap()
* @param lineLength fixed length of a line in the file
*/
public MapFile(String path, int lineLength) throws IOException {
this.bufferMap = null;
this.lineLength = lineLength;
try (RandomAccessFile mapFile = new RandomAccessFile(new File(path), "r")) {
FileChannel fileChannel = mapFile.getChannel();
bufferMap = ByteBufferInputStream.map(fileChannel, FileChannel.MapMode.READ_ONLY);
}
}
/**
* Returns a specific line in the file.
*
* @param lineIndex line number in the file
* @return the line at the specified position
*/
public String readAtLine(long lineIndex) {
byte[] buffer = new byte[lineLength];
long position = lineIndex * (long) lineLength;
bufferMap.position(position);
bufferMap.read(buffer, 0, lineLength);
String line = new String(buffer);
return line.trim();
}
/**
* Closes the mmap()-ed file.
*/
public void close() throws IOException {
bufferMap.close();
}
}
diff --git a/java/server/src/main/java/org/softwareheritage/graph/backend/NodeIdMap.java b/java/server/src/main/java/org/softwareheritage/graph/backend/NodeIdMap.java
index b148f28..0ac98b0 100644
--- a/java/server/src/main/java/org/softwareheritage/graph/backend/NodeIdMap.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/backend/NodeIdMap.java
@@ -1,115 +1,115 @@
package org.softwareheritage.graph.backend;
import java.io.IOException;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.SwhPID;
import org.softwareheritage.graph.backend.MapFile;
import org.softwareheritage.graph.backend.Setup;
/**
* Mapping between internal long node id and external SWH PID.
*
* Mappings in both directions are pre-computed and dumped on disk in the {@link Setup} class, then
* they are loaded here using mmap().
*
- * @author Thibault Allançon
+ * @author The Software Heritage developers
* @see org.softwareheritage.graph.backend.Setup
*/
public class NodeIdMap {
/** Fixed length of full SWH PID */
public static final int SWH_ID_LENGTH = 50;
/** Fixed length of long node id */
public static final int NODE_ID_LENGTH = 20;
/** Graph path and basename */
String graphPath;
/** Number of ids to map */
long nbIds;
/** mmap()-ed PID_TO_NODE file */
MapFile swhToNodeMap;
/** mmap()-ed NODE_TO_PID file */
MapFile nodeToSwhMap;
/**
* Constructor.
*
* @param graphPath full graph path
* @param nbNodes number of nodes in the graph
*/
public NodeIdMap(String graphPath, long nbNodes) throws IOException {
this.graphPath = graphPath;
this.nbIds = nbNodes;
// +1 are for spaces and end of lines
int swhToNodeLineLength = SWH_ID_LENGTH + 1 + NODE_ID_LENGTH + 1;
int nodeToSwhLineLength = SWH_ID_LENGTH + 1;
this.swhToNodeMap = new MapFile(graphPath + Graph.PID_TO_NODE, swhToNodeLineLength);
this.nodeToSwhMap = new MapFile(graphPath + Graph.NODE_TO_PID, nodeToSwhLineLength);
}
/**
* Converts SWH PID to corresponding long node id.
*
* @param swhPID node represented as a {@link SwhPID}
* @return corresponding node as a long id
* @see org.softwareheritage.graph.SwhPID
*/
public long getNodeId(SwhPID swhPID) {
// Each line in PID_TO_NODE is formatted as: swhPID nodeId
// The file is sorted by swhPID, hence we can binary search on swhPID to get corresponding
// nodeId
long start = 0;
long end = nbIds - 1;
while (start <= end) {
long lineNumber = (start + end) / 2L;
String[] parts = swhToNodeMap.readAtLine(lineNumber).split(" ");
if (parts.length != 2) {
break;
}
String currentSwhPID = parts[0];
long currentNodeId = Long.parseLong(parts[1]);
int cmp = currentSwhPID.compareTo(swhPID.toString());
if (cmp == 0) {
return currentNodeId;
} else if (cmp < 0) {
start = lineNumber + 1;
} else {
end = lineNumber - 1;
}
}
throw new IllegalArgumentException("Unknown SWH PID: " + swhPID);
}
/**
* Converts a node long id to corresponding SWH PID.
*
* @param nodeId node as a long id
* @return corresponding node as a {@link SwhPID}
* @see org.softwareheritage.graph.SwhPID
*/
public SwhPID getSwhPID(long nodeId) {
// Each line in NODE_TO_PID is formatted as: swhPID
// The file is ordered by nodeId, meaning node0's swhPID is at line 0, hence we can read the
// nodeId-th line to get corresponding swhPID
if (nodeId < 0 || nodeId >= nbIds) {
throw new IllegalArgumentException("Node id " + nodeId + " should be between 0 and " + nbIds);
}
String swhPID = nodeToSwhMap.readAtLine(nodeId);
return new SwhPID(swhPID);
}
/**
* Closes the mapping files.
*/
public void close() throws IOException {
swhToNodeMap.close();
nodeToSwhMap.close();
}
}
diff --git a/java/server/src/main/java/org/softwareheritage/graph/backend/NodeTypesMap.java b/java/server/src/main/java/org/softwareheritage/graph/backend/NodeTypesMap.java
index a516c8d..cf2be75 100644
--- a/java/server/src/main/java/org/softwareheritage/graph/backend/NodeTypesMap.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/backend/NodeTypesMap.java
@@ -1,51 +1,51 @@
package org.softwareheritage.graph.backend;
import java.io.IOException;
import it.unimi.dsi.fastutil.io.BinIO;
import it.unimi.dsi.fastutil.longs.LongBigList;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.Node;
/**
* Mapping between long node id and SWH node type as described in the data model.
*
* The type mapping is pre-computed and dumped on disk in the {@link Setup} class, then it is loaded
* in-memory here using fastutil LongBigList. To be
* space-efficient, the mapping is stored as a bitmap using minimum number of bits per {@link
* Node.Type}.
*
- * @author Thibault Allançon
+ * @author The Software Heritage developers
*/
public class NodeTypesMap {
/** Array storing for each node its type */
LongBigList nodeTypesMap;
/**
* Constructor.
*
* @param graphPath path and basename of the compressed graph
*/
public NodeTypesMap(String graphPath) throws IOException {
try {
nodeTypesMap = (LongBigList) BinIO.loadObject(graphPath + Graph.NODE_TO_TYPE);
} catch (ClassNotFoundException e) {
throw new IllegalArgumentException("Unknown class object: " + e);
}
}
/**
* Returns node type from a node long id.
*
* @param nodeId node as a long id
* @return corresponding {@link Node.Type} value
* @see org.softwareheritage.graph.Node.Type
*/
public Node.Type getType(long nodeId) {
long type = nodeTypesMap.getLong(nodeId);
return Node.Type.fromInt((int) type);
}
}
diff --git a/java/server/src/main/java/org/softwareheritage/graph/backend/Setup.java b/java/server/src/main/java/org/softwareheritage/graph/backend/Setup.java
index 225de4f..16f3092 100644
--- a/java/server/src/main/java/org/softwareheritage/graph/backend/Setup.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/backend/Setup.java
@@ -1,123 +1,123 @@
package org.softwareheritage.graph.backend;
import java.io.BufferedWriter;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Writer;
import java.util.zip.GZIPInputStream;
import it.unimi.dsi.bits.LongArrayBitVector;
import it.unimi.dsi.fastutil.Size64;
import it.unimi.dsi.fastutil.io.BinIO;
import it.unimi.dsi.fastutil.longs.LongBigArrays;
import it.unimi.dsi.fastutil.longs.LongBigList;
import it.unimi.dsi.fastutil.objects.Object2LongFunction;
import it.unimi.dsi.fastutil.objects.ObjectBigArrays;
import it.unimi.dsi.io.FastBufferedReader;
import it.unimi.dsi.io.LineIterator;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.Node;
import org.softwareheritage.graph.SwhPID;
import org.softwareheritage.graph.backend.NodeTypesMap;
/**
* Pre-processing steps (such as dumping mapping files on disk) before running the graph service.
*
- * @author Thibault Allançon
+ * @author The Software Heritage developers
*/
public class Setup {
/**
* Main entrypoint.
*
* @param args command line arguments
*/
public static void main(String[] args) throws IOException {
if (args.length != 2) {
System.err.println("Expected parameters: ");
System.exit(1);
}
String nodesPath = args[0];
String graphPath = args[1];
System.out.println("Pre-computing node id maps...");
long startTime = System.nanoTime();
precomputeNodeIdMap(nodesPath, graphPath);
long endTime = System.nanoTime();
double duration = (endTime - startTime) / 1_000_000_000;
System.out.println("Done in: " + duration + " seconds");
}
/**
* Computes and dumps on disk mapping files.
*
* @param nodesPath path of the compressed csv nodes file
* @param graphPath path of the compressed graph
*/
// Suppress warning for Object2LongFunction cast
@SuppressWarnings("unchecked")
static void precomputeNodeIdMap(String nodesPath, String graphPath) throws IOException {
// First internal mapping: SWH PID (string) -> WebGraph MPH (long)
Object2LongFunction mphMap = null;
try {
mphMap = (Object2LongFunction) BinIO.loadObject(graphPath + ".mph");
} catch (ClassNotFoundException e) {
throw new IllegalArgumentException("The .mph file contains unknown class object: " + e);
}
long nbIds = (mphMap instanceof Size64) ? ((Size64) mphMap).size64() : mphMap.size();
// Second internal mapping: WebGraph MPH (long) -> BFS ordering (long)
long[][] bfsMap = LongBigArrays.newBigArray(nbIds);
long loaded = BinIO.loadLongs(graphPath + ".order", bfsMap);
if (loaded != nbIds) {
throw new IllegalArgumentException("Graph contains " + nbIds + " nodes, but read " + loaded);
}
// Dump complete mapping for all nodes: SWH PID (string) <=> WebGraph node id (long)
InputStream nodesStream = new GZIPInputStream(new FileInputStream(nodesPath));
FastBufferedReader buffer = new FastBufferedReader(new InputStreamReader(nodesStream, "UTF-8"));
LineIterator swhPIDIterator = new LineIterator(buffer);
try (Writer swhToNodeMap = new BufferedWriter(new FileWriter(graphPath + Graph.PID_TO_NODE));
Writer nodeToSwhMap = new BufferedWriter(new FileWriter(graphPath + Graph.NODE_TO_PID))) {
// nodeToSwhMap needs to write SWH PID in order of node id, so use a temporary array
Object[][] nodeToSwhPID = ObjectBigArrays.newBigArray(nbIds);
// To effectively run edge restriction during graph traversals, we store node id (long) -> SWH
// type map. This is represented as a bitmap using minimum number of bits per Node.Type.
final int log2NbTypes = (int) Math.ceil(Math.log(Node.Type.values().length) / Math.log(2));
final int nbBitsPerNodeType = log2NbTypes;
LongArrayBitVector nodeTypesBitVector =
LongArrayBitVector.ofLength(nbBitsPerNodeType * nbIds);
LongBigList nodeTypesMap = nodeTypesBitVector.asLongBigList(nbBitsPerNodeType);
for (long iNode = 0; iNode < nbIds && swhPIDIterator.hasNext(); iNode++) {
String strSwhPID = swhPIDIterator.next().toString();
long mphId = mphMap.getLong(strSwhPID);
long nodeId = LongBigArrays.get(bfsMap, mphId);
String paddedNodeId = String.format("%0" + NodeIdMap.NODE_ID_LENGTH + "d", nodeId);
String line = strSwhPID + " " + paddedNodeId + "\n";
swhToNodeMap.write(line);
ObjectBigArrays.set(nodeToSwhPID, nodeId, strSwhPID);
SwhPID swhPID = new SwhPID(strSwhPID);
nodeTypesMap.set(nodeId, swhPID.getType().ordinal());
}
BinIO.storeObject(nodeTypesMap, graphPath + Graph.NODE_TO_TYPE);
for (long iNode = 0; iNode < nbIds; iNode++) {
String line = ObjectBigArrays.get(nodeToSwhPID, iNode).toString() + "\n";
nodeToSwhMap.write(line);
}
}
}
}
diff --git a/java/server/src/main/java/org/softwareheritage/graph/benchmark/AccessEdge.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/AccessEdge.java
index a5ae040..dae2af1 100644
--- a/java/server/src/main/java/org/softwareheritage/graph/benchmark/AccessEdge.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/AccessEdge.java
@@ -1,47 +1,47 @@
package org.softwareheritage.graph.benchmark;
import java.io.IOException;
import java.util.ArrayList;
import com.martiansoftware.jsap.JSAPException;
import it.unimi.dsi.big.webgraph.LazyLongIterator;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.benchmark.Benchmark;
import org.softwareheritage.graph.benchmark.utils.Statistics;
import org.softwareheritage.graph.benchmark.utils.Timing;
/**
* Benchmark to time edge access time.
*
- * @author Thibault Allançon
+ * @author The Software Heritage developers
*/
public class AccessEdge {
/**
* Main entrypoint.
*
* @param args command line arguments
*/
public static void main(String[] args) throws IOException, JSAPException {
Benchmark bench = new Benchmark();
bench.parseCommandLineArgs(args);
Graph graph = new Graph(bench.args.graphPath);
long[] nodeIds = bench.args.random.generateNodeIds(graph, bench.args.nbNodes);
ArrayList timings = new ArrayList<>();
for (long nodeId : nodeIds) {
long startTime = Timing.start();
LazyLongIterator neighbors = graph.successors(nodeId);
long firstNeighbor = neighbors.nextLong();
double duration = Timing.stop(startTime);
timings.add(duration);
}
System.out.println("Used " + bench.args.nbNodes + " random edges (results are in seconds):");
Statistics stats = new Statistics(timings);
stats.printAll();
}
}
diff --git a/java/server/src/main/java/org/softwareheritage/graph/benchmark/Benchmark.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Benchmark.java
index 70fc9e5..0eb6c23 100644
--- a/java/server/src/main/java/org/softwareheritage/graph/benchmark/Benchmark.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Benchmark.java
@@ -1,170 +1,170 @@
package org.softwareheritage.graph.benchmark;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.io.Writer;
import java.util.ArrayList;
import java.util.StringJoiner;
import java.util.function.Function;
import com.martiansoftware.jsap.FlaggedOption;
import com.martiansoftware.jsap.JSAP;
import com.martiansoftware.jsap.JSAPException;
import com.martiansoftware.jsap.JSAPResult;
import com.martiansoftware.jsap.Parameter;
import com.martiansoftware.jsap.SimpleJSAP;
import com.martiansoftware.jsap.UnflaggedOption;
import org.softwareheritage.graph.Endpoint;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.SwhPID;
import org.softwareheritage.graph.benchmark.utils.Random;
import org.softwareheritage.graph.benchmark.utils.Statistics;
/**
* Benchmark common utility functions.
*
- * @author Thibault Allançon
+ * @author The Software Heritage developers
*/
public class Benchmark {
/**
* Input arguments.
*/
public class Args {
/** Basename of the compressed graph */
public String graphPath;
/** Number of random nodes to use for the benchmark */
public int nbNodes;
/** File name for CSV format benchmark log */
public String logFile;
/** Random generator */
public Random random;
}
/** Command line arguments */
public Args args;
/** CSV separator for log file */
final String CSV_SEPARATOR = ";";
/**
* Constructor.
*/
public Benchmark() {
this.args = new Args();
}
/**
* Parses benchmark command line arguments.
*
* @param args command line arguments
*/
public void parseCommandLineArgs(String[] args) throws JSAPException {
SimpleJSAP jsap = new SimpleJSAP(Benchmark.class.getName(),
"Benchmark tool for Software Heritage use-cases scenarios.",
new Parameter[] {
new UnflaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED,
JSAP.NOT_GREEDY, "The basename of the compressed graph."),
new FlaggedOption("nbNodes", JSAP.INTEGER_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'n',
"nb-nodes", "Number of random nodes used to do the benchmark."),
new FlaggedOption("logFile", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'l',
"log-file", "File name to output CSV format benchmark log."),
new FlaggedOption("seed", JSAP.LONG_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 's',
"seed", "Random generator seed."),
});
JSAPResult config = jsap.parse(args);
if (jsap.messagePrinted()) {
System.exit(1);
}
this.args.graphPath = config.getString("graphPath");
this.args.nbNodes = config.getInt("nbNodes");
this.args.logFile = config.getString("logFile");
this.args.random = config.contains("seed") ? new Random(config.getLong("seed")) : new Random();
}
/**
* Creates CSV file for log output.
*/
public void createCSVLogFile() throws IOException {
try (Writer csvLog = new BufferedWriter(new FileWriter(args.logFile))) {
StringJoiner csvHeader = new StringJoiner(CSV_SEPARATOR);
csvHeader.add("use case name")
.add("SWH PID")
.add("number of edges accessed")
.add("traversal timing")
.add("pid2node timing")
.add("node2pid timing");
csvLog.write(csvHeader.toString() + "\n");
}
}
/**
* Times a specific endpoint and outputs individual datapoints along with aggregated statistics.
*
* @param useCaseName benchmark use-case name
* @param graph compressed graph used in the benchmark
* @param nodeIds node ids to use as starting point for the endpoint traversal
* @param operation endpoint function to benchmark
* @param dstFmt destination formatted string as described in the API
* @param algorithm traversal algorithm used in endpoint call (either "dfs" or "bfs")
*/
public void timeEndpoint(String useCaseName, Graph graph, long[] nodeIds,
Function operation, String dstFmt, String algorithm)
throws IOException {
ArrayList timings = new ArrayList<>();
ArrayList timingsNormalized = new ArrayList<>();
ArrayList nbEdgesAccessed = new ArrayList<>();
final boolean append = true;
try (Writer csvLog = new BufferedWriter(new FileWriter(args.logFile, append))) {
for (long nodeId : nodeIds) {
SwhPID swhPID = graph.getSwhPID(nodeId);
Endpoint.Output output = (dstFmt == null)
? operation.apply(new Endpoint.Input(swhPID))
: operation.apply(new Endpoint.Input(swhPID, dstFmt, algorithm));
StringJoiner csvLine = new StringJoiner(CSV_SEPARATOR);
csvLine.add(useCaseName)
.add(swhPID.toString())
.add(Long.toString(output.meta.nbEdgesAccessed))
.add(Double.toString(output.meta.timings.traversal))
.add(Double.toString(output.meta.timings.pid2node))
.add(Double.toString(output.meta.timings.node2pid));
csvLog.write(csvLine.toString() + "\n");
timings.add(output.meta.timings.traversal);
nbEdgesAccessed.add((double) output.meta.nbEdgesAccessed);
if (output.meta.nbEdgesAccessed != 0) {
timingsNormalized.add(output.meta.timings.traversal / output.meta.nbEdgesAccessed);
}
}
}
System.out.println("\n" + useCaseName + " use-case:");
System.out.println("timings:");
Statistics stats = new Statistics(timings);
stats.printAll();
System.out.println("timings normalized:");
Statistics statsNormalized = new Statistics(timingsNormalized);
statsNormalized.printAll();
System.out.println("nb edges accessed:");
Statistics statsNbEdgesAccessed = new Statistics(nbEdgesAccessed);
statsNbEdgesAccessed.printAll();
}
/**
* Same as {@link timeEndpoint} but without destination or algorithm specified to endpoint call.
*/
public void timeEndpoint(String useCaseName, Graph graph, long[] nodeIds,
Function operation) throws IOException {
timeEndpoint(useCaseName, graph, nodeIds, operation, null, null);
}
}
diff --git a/java/server/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java
index 5939e1c..188685f 100644
--- a/java/server/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java
@@ -1,46 +1,46 @@
package org.softwareheritage.graph.benchmark;
import java.io.IOException;
import com.martiansoftware.jsap.JSAPException;
import org.softwareheritage.graph.Endpoint;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.Node;
import org.softwareheritage.graph.benchmark.Benchmark;
/**
* Benchmark Software Heritage browsing
* use-cases scenarios.
*
- * @author Thibault Allançon
+ * @author The Software Heritage developers
*/
public class Browsing {
/**
* Main entrypoint.
*
* @param args command line arguments
*/
public static void main(String[] args) throws IOException, JSAPException {
Benchmark bench = new Benchmark();
bench.parseCommandLineArgs(args);
Graph graph = new Graph(bench.args.graphPath);
long[] dirNodeIds =
bench.args.random.generateNodeIdsOfType(graph, bench.args.nbNodes, Node.Type.DIR);
long[] revNodeIds =
bench.args.random.generateNodeIdsOfType(graph, bench.args.nbNodes, Node.Type.REV);
Endpoint dirEndpoint = new Endpoint(graph, "forward", "dir:cnt,dir:dir");
Endpoint revEndpoint = new Endpoint(graph, "forward", "rev:rev");
System.out.println("Used " + bench.args.nbNodes + " random nodes (results are in seconds):");
bench.createCSVLogFile();
bench.timeEndpoint("ls", graph, dirNodeIds, dirEndpoint::neighbors);
bench.timeEndpoint("ls -R", graph, dirNodeIds, dirEndpoint::visitPaths);
bench.timeEndpoint("git log", graph, revNodeIds, revEndpoint::visitNodes);
}
}
diff --git a/java/server/src/main/java/org/softwareheritage/graph/benchmark/Provenance.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Provenance.java
index 9fa4f7d..613c2b5 100644
--- a/java/server/src/main/java/org/softwareheritage/graph/benchmark/Provenance.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Provenance.java
@@ -1,53 +1,53 @@
package org.softwareheritage.graph.benchmark;
import java.io.IOException;
import com.martiansoftware.jsap.JSAPException;
import org.softwareheritage.graph.Endpoint;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.benchmark.Benchmark;
/**
* Benchmark Software Heritage provenance
* use-cases scenarios.
*
- * @author Thibault Allançon
+ * @author The Software Heritage developers
*/
public class Provenance {
/**
* Main entrypoint.
*
* @param args command line arguments
*/
public static void main(String[] args) throws IOException, JSAPException {
Benchmark bench = new Benchmark();
bench.parseCommandLineArgs(args);
Graph graph = new Graph(bench.args.graphPath);
long[] nodeIds = bench.args.random.generateNodeIds(graph, bench.args.nbNodes);
Endpoint commitProvenanceEndpoint = new Endpoint(graph, "backward", "dir:dir,cnt:dir,dir:rev");
Endpoint originProvenanceEndpoint = new Endpoint(graph, "backward", "*");
System.out.println("Used " + bench.args.nbNodes + " random nodes (results are in seconds):");
bench.createCSVLogFile();
bench.timeEndpoint(
"commit provenance (dfs)", graph, nodeIds, commitProvenanceEndpoint::walk, "rev", "dfs");
bench.timeEndpoint(
"commit provenance (bfs)", graph, nodeIds, commitProvenanceEndpoint::walk, "rev", "bfs");
bench.timeEndpoint(
"complete commit provenance", graph, nodeIds, commitProvenanceEndpoint::leaves);
bench.timeEndpoint(
"origin provenance (dfs)", graph, nodeIds, originProvenanceEndpoint::walk, "ori", "dfs");
bench.timeEndpoint(
"origin provenance (bfs)", graph, nodeIds, originProvenanceEndpoint::walk, "ori", "bfs");
bench.timeEndpoint(
"complete origin provenance", graph, nodeIds, originProvenanceEndpoint::leaves);
}
}
diff --git a/java/server/src/main/java/org/softwareheritage/graph/benchmark/Vault.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Vault.java
index c840ac9..3e43a2a 100644
--- a/java/server/src/main/java/org/softwareheritage/graph/benchmark/Vault.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/Vault.java
@@ -1,39 +1,39 @@
package org.softwareheritage.graph.benchmark;
import java.io.IOException;
import com.martiansoftware.jsap.JSAPException;
import org.softwareheritage.graph.Endpoint;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.benchmark.Benchmark;
/**
* Benchmark Software Heritage vault
* use-case scenario.
*
- * @author Thibault Allançon
+ * @author The Software Heritage developers
*/
public class Vault {
/**
* Main entrypoint.
*
* @param args command line arguments
*/
public static void main(String[] args) throws IOException, JSAPException {
Benchmark bench = new Benchmark();
bench.parseCommandLineArgs(args);
Graph graph = new Graph(bench.args.graphPath);
long[] nodeIds = bench.args.random.generateNodeIds(graph, bench.args.nbNodes);
Endpoint endpoint = new Endpoint(graph, "forward", "*");
System.out.println("Used " + bench.args.nbNodes + " random nodes (results are in seconds):");
bench.createCSVLogFile();
bench.timeEndpoint("git bundle", graph, nodeIds, endpoint::visitNodes);
}
}
diff --git a/java/server/src/main/java/org/softwareheritage/graph/benchmark/utils/Random.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/utils/Random.java
index bab2fb8..abcec76 100644
--- a/java/server/src/main/java/org/softwareheritage/graph/benchmark/utils/Random.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/utils/Random.java
@@ -1,67 +1,67 @@
package org.softwareheritage.graph.benchmark.utils;
import java.util.PrimitiveIterator;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.Node;
/**
* Random related utility class.
*
- * @author Thibault Allançon
+ * @author The Software Heritage developers
*/
public class Random {
/** Internal pseudorandom generator */
java.util.Random random;
/**
* Constructor.
*/
public Random() {
this.random = new java.util.Random();
}
/**
* Constructor.
*
* @param seed random generator seed
*/
public Random(long seed) {
this.random = new java.util.Random(seed);
}
/**
* Generates random node ids.
*
* @param graph graph used to pick node ids
* @param nbNodes number of node ids to generate
* @return an array of random node ids
*/
public long[] generateNodeIds(Graph graph, int nbNodes) {
return random.longs(nbNodes, 0, graph.getNbNodes()).toArray();
}
/**
* Generates random node ids with a specific type.
*
* @param graph graph used to pick node ids
* @param nbNodes number of node ids to generate
* @param expectedType specific node type to pick
* @return an array of random node ids
*/
public long[] generateNodeIdsOfType(Graph graph, int nbNodes, Node.Type expectedType) {
PrimitiveIterator.OfLong nodes = random.longs(0, graph.getNbNodes()).iterator();
long[] nodeIds = new long[nbNodes];
long nextId;
for (int i = 0; i < nbNodes; i++) {
do {
nextId = nodes.nextLong();
} while (graph.getNodeType(nextId) != expectedType);
nodeIds[i] = nextId;
}
return nodeIds;
}
}
diff --git a/java/server/src/main/java/org/softwareheritage/graph/benchmark/utils/Statistics.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/utils/Statistics.java
index 71ac8d3..96bdfd0 100644
--- a/java/server/src/main/java/org/softwareheritage/graph/benchmark/utils/Statistics.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/utils/Statistics.java
@@ -1,104 +1,104 @@
package org.softwareheritage.graph.benchmark.utils;
import java.util.ArrayList;
import java.util.Collections;
/**
* Compute various statistics on a list of values.
*
- * @author Thibault Allançon
+ * @author The Software Heritage developers
*/
public class Statistics {
/** Input values */
ArrayList values;
/**
* Constructor.
*
* @param values input values
*/
public Statistics(ArrayList values) {
this.values = values;
}
/**
* Returns the minimum value.
*
* @return minimum value
*/
public double getMin() {
double min = Double.POSITIVE_INFINITY;
for (double v : values) {
min = Math.min(min, v);
}
return min;
}
/**
* Returns the maximum value.
*
* @return maximum value
*/
public double getMax() {
double max = Double.NEGATIVE_INFINITY;
for (double v : values) {
max = Math.max(max, v);
}
return max;
}
/**
* Computes the average.
*
* @return average value
*/
public double getAverage() {
double sum = 0;
for (double v : values) {
sum += v;
}
return sum / (double) values.size();
}
/**
* Returns the median value.
*
* @return median value
*/
public double getMedian() {
Collections.sort(values);
int length = values.size();
if (length % 2 == 0) {
return (values.get(length / 2) + values.get(length / 2 - 1)) / 2;
} else {
return values.get(length / 2);
}
}
/**
* Computes the standard deviation.
*
* @return standard deviation value
*/
public double getStandardDeviation() {
double average = getAverage();
double variance = 0;
for (double v : values) {
variance += (v - average) * (v - average);
}
variance /= (double) values.size();
return Math.sqrt(variance);
}
/**
* Computes and prints all statistical values.
*/
public void printAll() {
System.out.println("min value: " + getMin());
System.out.println("max value: " + getMax());
System.out.println("average: " + getAverage());
System.out.println("median: " + getMedian());
System.out.println("standard deviation: " + getStandardDeviation());
}
}
diff --git a/java/server/src/main/java/org/softwareheritage/graph/benchmark/utils/Timing.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/utils/Timing.java
index f5ed8e1..de5de6c 100644
--- a/java/server/src/main/java/org/softwareheritage/graph/benchmark/utils/Timing.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/utils/Timing.java
@@ -1,30 +1,30 @@
package org.softwareheritage.graph.benchmark.utils;
/**
* Time measurement utility class.
*
- * @author Thibault Allançon
+ * @author The Software Heritage developers
*/
public class Timing {
/**
* Returns measurement starting timestamp.
*
* @return timestamp used for time measurement
*/
public static long start() {
return System.nanoTime();
}
/**
* Ends timing measurement and returns total duration in seconds.
*
* @param startTime measurement starting timestamp
* @return time in seconds elapsed since starting point
*/
public static double stop(long startTime) {
long endTime = System.nanoTime();
double duration = (double) (endTime - startTime) / 1_000_000_000;
return duration;
}
}