diff --git a/java/server/src/main/java/org/softwareheritage/graph/AllowedEdges.java b/java/server/src/main/java/org/softwareheritage/graph/AllowedEdges.java --- a/java/server/src/main/java/org/softwareheritage/graph/AllowedEdges.java +++ b/java/server/src/main/java/org/softwareheritage/graph/AllowedEdges.java @@ -7,6 +7,11 @@ /** * Edge restriction based on node types, used when visiting the graph. + *
+ * Software Heritage
+ * graph contains multiple node types (contents, directories, revisions, ...) and restricting
+ * the traversal to specific node types is necessary for many querying operations: use cases.
*
* @author Thibault Allançon
* @version 0.0.1
@@ -27,7 +32,8 @@
* Constructor.
*
* @param graph the graph on which to perform edge restriction
- * @param edgesFmt a formatted string describing allowed edges (TODO: link API doc)
+ * @param edgesFmt a formatted string describing allowed edges
*/
public AllowedEdges(Graph graph, String edgesFmt) {
this.graph = graph;
@@ -45,7 +51,6 @@
}
// Format: "src1:dst1,src2:dst2,[...]"
- // TODO: link API doc
String[] edgeTypes = edgesFmt.split(",");
for (String edgeType : edgeTypes) {
String[] nodeTypes = edgeType.split(":");
diff --git a/java/server/src/main/java/org/softwareheritage/graph/App.java b/java/server/src/main/java/org/softwareheritage/graph/App.java
--- a/java/server/src/main/java/org/softwareheritage/graph/App.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/App.java
@@ -23,7 +23,7 @@
import org.softwareheritage.graph.algo.Stats;
/**
- * Entrypoint of the swh-graph server REST API.
+ * Web framework of the swh-graph server REST API.
*
* @author Thibault Allançon
* @version 0.0.1
@@ -31,6 +31,11 @@
*/
public class App {
+ /**
+ * Main entrypoint.
+ *
+ * @param args command line arguments
+ */
public static void main(String[] args) throws IOException, JSAPException {
SimpleJSAP jsap = new SimpleJSAP(
App.class.getName(),
@@ -54,6 +59,12 @@
startServer(graphPath, port);
}
+ /**
+ * Loads compressed graph and starts the web server to query it.
+ *
+ * @param graphPath basename of the compressed graph
+ * @param port binding port of the server
+ */
private static void startServer(String graphPath, int port) throws IOException {
Graph graph = new Graph(graphPath);
Stats stats = new Stats(graphPath);
@@ -140,6 +151,13 @@
});
}
+ /**
+ * Checks query strings names provided to the REST API.
+ *
+ * @param ctx Javalin HTTP request context
+ * @param allowedFmt a regular expression describing allowed query strings names
+ * @throws IllegalArgumentException unknown query string provided
+ */
private static void checkQueryStrings(Context ctx, String allowedFmt) {
Map
+ * Graph operations are segmented between high-level class (this one) and the low-level class
+ * ({@link Traversal}). The {@link Endpoint} class creates wrappers for each endpoints by performing
+ * all the input/output node ids conversions and logging timings.
*
* @author Thibault Allançon
* @version 0.0.1
* @since 0.0.1
+ * @see org.softwareheritage.graph.algo.Traversal
*/
public class Endpoint {
@@ -33,7 +38,8 @@
*
* @param graph the graph used for traversal endpoint
* @param direction a string (either "forward" or "backward") specifying edge orientation
- * @param edgesFmt a formatted string describing allowed edges (TODO: link API doc)
+ * @param edgesFmt a formatted string describing allowed edges
*/
public Endpoint(Graph graph, String direction, String edgesFmt) {
this.graph = graph;
@@ -94,10 +100,10 @@
}
/**
- * Leaves endpoint wrapper (performs input/output node ids conversions).
+ * Leaves endpoint wrapper.
*
- * @param src source node of endpoint call specified as a SwhId
- * @return the resulting list of SwhId from endpoint call
+ * @param src source node of endpoint call specified as a {@link SwhId}
+ * @return the resulting list of {@link SwhId} from endpoint call
* @see org.softwareheritage.graph.SwhId
* @see org.softwareheritage.graph.algo.Traversal#leaves(long)
*/
@@ -113,10 +119,10 @@
}
/**
- * Neighbors endpoint wrapper (performs input/output node ids conversions).
+ * Neighbors endpoint wrapper.
*
- * @param src source node of endpoint call specified as a SwhId
- * @return the resulting list of SwhId from endpoint call
+ * @param src source node of endpoint call specified as a {@link SwhId}
+ * @return the resulting list of {@link SwhId} from endpoint call
* @see org.softwareheritage.graph.SwhId
* @see org.softwareheritage.graph.algo.Traversal#neighbors(long)
*/
@@ -132,10 +138,11 @@
}
/**
- * Walk endpoint wrapper (performs input/output node ids conversions).
+ * Walk endpoint wrapper.
*
- * @param src source node of endpoint call specified as a SwhId
- * @param dstFmt destination used in endpoint call (TODO: link to API doc)
+ * @param src source node of endpoint call specified as a {@link SwhId}
+ * @param dstFmt destination formatted string as described in the API
* @param algorithm traversal algorithm used in endpoint call (either "dfs" or "bfs")
* @return the resulting {@link SwhPath} from endpoint call
* @see org.softwareheritage.graph.SwhId
@@ -170,10 +177,10 @@
}
/**
- * VisitNodes endpoint wrapper (performs input/output node ids conversions).
+ * VisitNodes endpoint wrapper.
*
- * @param src source node of endpoint call specified as a SwhId
- * @return the resulting list of SwhId from endpoint call
+ * @param src source node of endpoint call specified as a {@link SwhId}
+ * @return the resulting list of {@link SwhId} from endpoint call
* @see org.softwareheritage.graph.SwhId
* @see org.softwareheritage.graph.algo.Traversal#visitNodes(long)
*/
@@ -189,9 +196,9 @@
}
/**
- * VisitPaths endpoint wrapper (performs input/output node ids conversions).
+ * VisitPaths endpoint wrapper.
*
- * @param src source node of endpoint call specified as a SwhId
+ * @param src source node of endpoint call specified as a {@link SwhId}
* @return the resulting list of {@link SwhPath} from endpoint call
* @see org.softwareheritage.graph.SwhId
* @see org.softwareheritage.graph.SwhPath
diff --git a/java/server/src/main/java/org/softwareheritage/graph/Graph.java b/java/server/src/main/java/org/softwareheritage/graph/Graph.java
--- a/java/server/src/main/java/org/softwareheritage/graph/Graph.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/Graph.java
@@ -11,10 +11,22 @@
/**
* Main class storing the compressed graph and node id mappings.
+ *
+ * The compressed graph is stored using the WebGraph
+ * ecosystem. Additional mappings are necessary because Software Heritage uses string based persistent
+ * identifiers (PID) while WebGraph uses integers internally. These two mappings (long id ↔
+ * PID) are used for the input (users refer to the graph using PID) and the output (convert back to
+ * PID for users results). However, since graph traversal can be restricted depending on the node
+ * type (see {@link AllowedEdges}), a long id → node type map is stored as well to avoid a full
+ * PID lookup.
*
* @author Thibault Allançon
* @version 0.0.1
* @since 0.0.1
+ * @see org.softwareheritage.graph.AllowedEdges
+ * @see org.softwareheritage.graph.NodeIdMap;
+ * @see org.softwareheritage.graph.NodeTypesMap;
*/
public class Graph {
@@ -120,8 +132,8 @@
* Returns list of successors of a node.
*
* @param nodeId node specified as a long id
- * @return list of successors of the node, specified as a {@link LongBigArrays}
- * @see it.unimi.dsi.fastutil.longs.LongBigArrays
+ * @return list of successors of the node, specified as a fastutil LongBigArrays
*/
public long[][] successors(long nodeId) {
return graph.successorBigArray(nodeId);
@@ -141,8 +153,8 @@
* Returns list of predecessors of a node.
*
* @param nodeId node specified as a long id
- * @return list of predecessors of the node, specified as a {@link LongBigArrays}
- * @see it.unimi.dsi.fastutil.longs.LongBigArrays
+ * @return list of successors of the node, specified as a fastutil LongBigArrays
*/
public long[][] predecessors(long nodeId) {
return graphTransposed.successorBigArray(nodeId);
@@ -174,8 +186,8 @@
*
* @param nodeId node specified as a long id
* @param useTransposed boolean value to use transposed graph
- * @return list of neighbors of the node, specified as a {@link LongBigArrays}
- * @see it.unimi.dsi.fastutil.longs.LongBigArrays
+ * @return list of successors of the node, specified as a fastutil LongBigArrays
*/
public long[][] neighbors(long nodeId, boolean useTransposed) {
return (useTransposed) ? predecessors(nodeId) : successors(nodeId);
diff --git a/java/server/src/main/java/org/softwareheritage/graph/Neighbors.java b/java/server/src/main/java/org/softwareheritage/graph/Neighbors.java
--- a/java/server/src/main/java/org/softwareheritage/graph/Neighbors.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/Neighbors.java
@@ -9,10 +9,13 @@
/**
* Iterator class to go over a node neighbors in the graph.
+ *
+ * Wrapper iterator class to easily deal with {@link AllowedEdges} during traversals.
*
* @author Thibault Allançon
* @version 0.0.1
* @since 0.0.1
+ * @see org.softwareheritage.graph.AllowedEdges
*/
public class Neighbors implements Iterable
+ * These statistics are not computed but directly read from WebGraph generated .stats and .properties files.
*
* @author Thibault Allançon
* @version 0.0.1
@@ -39,7 +42,7 @@
/**
* Constructor.
*
- * @param graphPath full path of compressed graph
+ * @param graphPath path and basename of compressed graph
*/
public Stats(String graphPath) throws IOException {
Properties properties = new Properties();
diff --git a/java/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java b/java/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java
--- a/java/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java
@@ -10,16 +10,22 @@
import it.unimi.dsi.fastutil.longs.LongBigArrays;
import org.softwareheritage.graph.AllowedEdges;
+import org.softwareheritage.graph.Endpoint;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.Neighbors;
import org.softwareheritage.graph.Node;
/**
* Traversal algorithms on the compressed graph.
+ *
+ * Internal implementation of the traversal API endpoints. These methods only input/output internal
+ * long ids, which are converted in the {@link Endpoint} higher-level class to Software Heritage
+ * PID.
*
* @author Thibault Allançon
* @version 0.0.1
* @since 0.0.1
+ * @see org.softwareheritage.graph.Endpoint
*/
public class Traversal {
@@ -30,15 +36,9 @@
/** Graph edge restriction */
AllowedEdges edges;
- /**
- * Bit array storing if we have visited a node
- * @see it.unimi.dsi.bits.LongArrayBitVector
- */
+ /** Bit array storing if we have visited a node */
LongArrayBitVector visited;
- /**
- * Array storing parent node id for each nodes during a traversal
- * @see it.unimi.dsi.fastutil.longs.LongBigArrays
- */
+ /** LongBigArray storing parent node id for each nodes during a traversal */
long[][] nodeParent;
/**
@@ -46,7 +46,8 @@
*
* @param graph graph used in the traversal
* @param direction a string (either "forward" or "backward") specifying edge orientation
- * @param edgesFmt a formatted string describing allowed edges (TODO: link API doc)
+ * @param edgesFmt a formatted string describing allowed edges
*/
public Traversal(Graph graph, String direction, String edgesFmt) {
if (!direction.matches("forward|backward")) {
diff --git a/java/server/src/main/java/org/softwareheritage/graph/backend/MapFile.java b/java/server/src/main/java/org/softwareheritage/graph/backend/MapFile.java
--- a/java/server/src/main/java/org/softwareheritage/graph/backend/MapFile.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/backend/MapFile.java
@@ -9,6 +9,10 @@
/**
* Wrapper class around very big mmap()-ed file.
+ *
+ * Java has a limit for mmap()-ed files because of unsupported 64-bit indexing. The dsiutils ByteBufferInputStream is used to overcome this
+ * Java limit.
*
* @author Thibault Allançon
* @version 0.0.1
@@ -16,10 +20,7 @@
*/
public class MapFile {
- /**
- * Memory-mapped file buffer
- * @see it.unimi.dsi.io.ByteBufferInputStream
- */
+ /** Memory-mapped file buffer */
ByteBufferInputStream bufferMap;
/** Fixed line length of the mmap()-ed file */
int lineLength;
diff --git a/java/server/src/main/java/org/softwareheritage/graph/backend/NodeIdMap.java b/java/server/src/main/java/org/softwareheritage/graph/backend/NodeIdMap.java
--- a/java/server/src/main/java/org/softwareheritage/graph/backend/NodeIdMap.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/backend/NodeIdMap.java
@@ -5,13 +5,18 @@
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.SwhId;
import org.softwareheritage.graph.backend.MapFile;
+import org.softwareheritage.graph.backend.Setup;
/**
* Mapping between internal long node id and external SWH PID.
+ *
+ * Mappings in both directions are pre-computed and dumped on disk in the {@link Setup} class, then
+ * they are loaded here using mmap().
*
* @author Thibault Allançon
* @version 0.0.1
* @since 0.0.1
+ * @see org.softwareheritage.graph.backend.Setup
*/
public class NodeIdMap {
@@ -20,7 +25,7 @@
/** Fixed length of long node id */
public static final int NODE_ID_LENGTH = 20;
- /** Full graph path */
+ /** Graph path and basename */
String graphPath;
/** Number of ids to map */
long nbIds;
diff --git a/java/server/src/main/java/org/softwareheritage/graph/backend/NodeTypesMap.java b/java/server/src/main/java/org/softwareheritage/graph/backend/NodeTypesMap.java
--- a/java/server/src/main/java/org/softwareheritage/graph/backend/NodeTypesMap.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/backend/NodeTypesMap.java
@@ -11,6 +11,11 @@
/**
* Mapping between long node id and SWH node type as described in the data model.
+ *
+ * The type mapping is pre-computed and dumped on disk in the {@link Setup} class, then it is loaded
+ * in-memory here using fastutil LongBigList. To be
+ * space-efficient, the mapping is stored as a bitmap using minimum number of bits per {@link
+ * Node.Type}.
*
* @author Thibault Allançon
* @version 0.0.1
@@ -18,16 +23,13 @@
*/
public class NodeTypesMap {
- /**
- * Array storing for each node its type
- * @see it.unimi.dsi.fastutil.longs.LongBigList
- */
+ /** Array storing for each node its type */
LongBigList nodeTypesMap;
/**
* Constructor.
*
- * @param graphPath full path of the compressed graph
+ * @param graphPath path and basename of the compressed graph
*/
public NodeTypesMap(String graphPath) throws IOException {
try {
diff --git a/java/server/src/main/java/org/softwareheritage/graph/backend/Setup.java b/java/server/src/main/java/org/softwareheritage/graph/backend/Setup.java
--- a/java/server/src/main/java/org/softwareheritage/graph/backend/Setup.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/backend/Setup.java
@@ -33,6 +33,11 @@
*/
public class Setup {
+ /**
+ * Main entrypoint.
+ *
+ * @param args command line arguments
+ */
public static void main(String[] args) throws IOException {
if (args.length != 2) {
System.err.println("Expected parameters:
+ * The goal is to do the equivalent of a {@code git log} in the Linux kernel by following revisions
+ * nodes.
*
* @author Thibault Allançon
* @version 0.0.1
@@ -15,6 +18,11 @@
*/
public class LinuxLog {
+ /**
+ * Main entrypoint.
+ *
+ * @param args command line arguments
+ */
public static void main(String[] args) throws IOException {
String path = args[0];
Graph graph = new Graph(path);