diff --git a/java/server/src/main/java/org/softwareheritage/graph/AllowedEdges.java b/java/server/src/main/java/org/softwareheritage/graph/AllowedEdges.java --- a/java/server/src/main/java/org/softwareheritage/graph/AllowedEdges.java +++ b/java/server/src/main/java/org/softwareheritage/graph/AllowedEdges.java @@ -7,6 +7,11 @@ /** * Edge restriction based on node types, used when visiting the graph. + *

+ * Software Heritage + * graph contains multiple node types (contents, directories, revisions, ...) and restricting + * the traversal to specific node types is necessary for many querying operations: use cases. * * @author Thibault Allançon * @version 0.0.1 @@ -27,7 +32,8 @@ * Constructor. * * @param graph the graph on which to perform edge restriction - * @param edgesFmt a formatted string describing allowed edges (TODO: link API doc) + * @param edgesFmt a formatted string describing allowed edges */ public AllowedEdges(Graph graph, String edgesFmt) { this.graph = graph; @@ -45,7 +51,6 @@ } // Format: "src1:dst1,src2:dst2,[...]" - // TODO: link API doc String[] edgeTypes = edgesFmt.split(","); for (String edgeType : edgeTypes) { String[] nodeTypes = edgeType.split(":"); diff --git a/java/server/src/main/java/org/softwareheritage/graph/App.java b/java/server/src/main/java/org/softwareheritage/graph/App.java --- a/java/server/src/main/java/org/softwareheritage/graph/App.java +++ b/java/server/src/main/java/org/softwareheritage/graph/App.java @@ -23,7 +23,7 @@ import org.softwareheritage.graph.algo.Stats; /** - * Entrypoint of the swh-graph server REST API. + * Web framework of the swh-graph server REST API. * * @author Thibault Allançon * @version 0.0.1 @@ -31,6 +31,11 @@ */ public class App { + /** + * Main entrypoint. + * + * @param args command line arguments + */ public static void main(String[] args) throws IOException, JSAPException { SimpleJSAP jsap = new SimpleJSAP( App.class.getName(), @@ -54,6 +59,12 @@ startServer(graphPath, port); } + /** + * Loads compressed graph and starts the web server to query it. + * + * @param graphPath basename of the compressed graph + * @param port binding port of the server + */ private static void startServer(String graphPath, int port) throws IOException { Graph graph = new Graph(graphPath); Stats stats = new Stats(graphPath); @@ -140,6 +151,13 @@ }); } + /** + * Checks query strings names provided to the REST API. + * + * @param ctx Javalin HTTP request context + * @param allowedFmt a regular expression describing allowed query strings names + * @throws IllegalArgumentException unknown query string provided + */ private static void checkQueryStrings(Context ctx, String allowedFmt) { Map> queryParamMap = ctx.queryParamMap(); for (String key : queryParamMap.keySet()) { diff --git a/java/server/src/main/java/org/softwareheritage/graph/Endpoint.java b/java/server/src/main/java/org/softwareheritage/graph/Endpoint.java --- a/java/server/src/main/java/org/softwareheritage/graph/Endpoint.java +++ b/java/server/src/main/java/org/softwareheritage/graph/Endpoint.java @@ -13,10 +13,15 @@ /** * REST API endpoints wrapper functions. + *

+ * Graph operations are segmented between high-level class (this one) and the low-level class + * ({@link Traversal}). The {@link Endpoint} class creates wrappers for each endpoints by performing + * all the input/output node ids conversions and logging timings. * * @author Thibault Allançon * @version 0.0.1 * @since 0.0.1 + * @see org.softwareheritage.graph.algo.Traversal */ public class Endpoint { @@ -33,7 +38,8 @@ * * @param graph the graph used for traversal endpoint * @param direction a string (either "forward" or "backward") specifying edge orientation - * @param edgesFmt a formatted string describing allowed edges (TODO: link API doc) + * @param edgesFmt a formatted string describing allowed edges */ public Endpoint(Graph graph, String direction, String edgesFmt) { this.graph = graph; @@ -94,10 +100,10 @@ } /** - * Leaves endpoint wrapper (performs input/output node ids conversions). + * Leaves endpoint wrapper. * - * @param src source node of endpoint call specified as a SwhId - * @return the resulting list of SwhId from endpoint call + * @param src source node of endpoint call specified as a {@link SwhId} + * @return the resulting list of {@link SwhId} from endpoint call * @see org.softwareheritage.graph.SwhId * @see org.softwareheritage.graph.algo.Traversal#leaves(long) */ @@ -113,10 +119,10 @@ } /** - * Neighbors endpoint wrapper (performs input/output node ids conversions). + * Neighbors endpoint wrapper. * - * @param src source node of endpoint call specified as a SwhId - * @return the resulting list of SwhId from endpoint call + * @param src source node of endpoint call specified as a {@link SwhId} + * @return the resulting list of {@link SwhId} from endpoint call * @see org.softwareheritage.graph.SwhId * @see org.softwareheritage.graph.algo.Traversal#neighbors(long) */ @@ -132,10 +138,11 @@ } /** - * Walk endpoint wrapper (performs input/output node ids conversions). + * Walk endpoint wrapper. * - * @param src source node of endpoint call specified as a SwhId - * @param dstFmt destination used in endpoint call (TODO: link to API doc) + * @param src source node of endpoint call specified as a {@link SwhId} + * @param dstFmt destination formatted string as described in the API * @param algorithm traversal algorithm used in endpoint call (either "dfs" or "bfs") * @return the resulting {@link SwhPath} from endpoint call * @see org.softwareheritage.graph.SwhId @@ -170,10 +177,10 @@ } /** - * VisitNodes endpoint wrapper (performs input/output node ids conversions). + * VisitNodes endpoint wrapper. * - * @param src source node of endpoint call specified as a SwhId - * @return the resulting list of SwhId from endpoint call + * @param src source node of endpoint call specified as a {@link SwhId} + * @return the resulting list of {@link SwhId} from endpoint call * @see org.softwareheritage.graph.SwhId * @see org.softwareheritage.graph.algo.Traversal#visitNodes(long) */ @@ -189,9 +196,9 @@ } /** - * VisitPaths endpoint wrapper (performs input/output node ids conversions). + * VisitPaths endpoint wrapper. * - * @param src source node of endpoint call specified as a SwhId + * @param src source node of endpoint call specified as a {@link SwhId} * @return the resulting list of {@link SwhPath} from endpoint call * @see org.softwareheritage.graph.SwhId * @see org.softwareheritage.graph.SwhPath diff --git a/java/server/src/main/java/org/softwareheritage/graph/Graph.java b/java/server/src/main/java/org/softwareheritage/graph/Graph.java --- a/java/server/src/main/java/org/softwareheritage/graph/Graph.java +++ b/java/server/src/main/java/org/softwareheritage/graph/Graph.java @@ -11,10 +11,22 @@ /** * Main class storing the compressed graph and node id mappings. + *

+ * The compressed graph is stored using the WebGraph + * ecosystem. Additional mappings are necessary because Software Heritage uses string based persistent + * identifiers (PID) while WebGraph uses integers internally. These two mappings (long id ↔ + * PID) are used for the input (users refer to the graph using PID) and the output (convert back to + * PID for users results). However, since graph traversal can be restricted depending on the node + * type (see {@link AllowedEdges}), a long id → node type map is stored as well to avoid a full + * PID lookup. * * @author Thibault Allançon * @version 0.0.1 * @since 0.0.1 + * @see org.softwareheritage.graph.AllowedEdges + * @see org.softwareheritage.graph.NodeIdMap; + * @see org.softwareheritage.graph.NodeTypesMap; */ public class Graph { @@ -120,8 +132,8 @@ * Returns list of successors of a node. * * @param nodeId node specified as a long id - * @return list of successors of the node, specified as a {@link LongBigArrays} - * @see it.unimi.dsi.fastutil.longs.LongBigArrays + * @return list of successors of the node, specified as a fastutil LongBigArrays */ public long[][] successors(long nodeId) { return graph.successorBigArray(nodeId); @@ -141,8 +153,8 @@ * Returns list of predecessors of a node. * * @param nodeId node specified as a long id - * @return list of predecessors of the node, specified as a {@link LongBigArrays} - * @see it.unimi.dsi.fastutil.longs.LongBigArrays + * @return list of successors of the node, specified as a fastutil LongBigArrays */ public long[][] predecessors(long nodeId) { return graphTransposed.successorBigArray(nodeId); @@ -174,8 +186,8 @@ * * @param nodeId node specified as a long id * @param useTransposed boolean value to use transposed graph - * @return list of neighbors of the node, specified as a {@link LongBigArrays} - * @see it.unimi.dsi.fastutil.longs.LongBigArrays + * @return list of successors of the node, specified as a fastutil LongBigArrays */ public long[][] neighbors(long nodeId, boolean useTransposed) { return (useTransposed) ? predecessors(nodeId) : successors(nodeId); diff --git a/java/server/src/main/java/org/softwareheritage/graph/Neighbors.java b/java/server/src/main/java/org/softwareheritage/graph/Neighbors.java --- a/java/server/src/main/java/org/softwareheritage/graph/Neighbors.java +++ b/java/server/src/main/java/org/softwareheritage/graph/Neighbors.java @@ -9,10 +9,13 @@ /** * Iterator class to go over a node neighbors in the graph. + *

+ * Wrapper iterator class to easily deal with {@link AllowedEdges} during traversals. * * @author Thibault Allançon * @version 0.0.1 * @since 0.0.1 + * @see org.softwareheritage.graph.AllowedEdges */ public class Neighbors implements Iterable { @@ -56,6 +59,7 @@ public class NeighborsIterator implements Iterator { long nextNeighborIdx; long nbNeighbors; + // LongBigArrays to support 64-bit indexing. long[][] neighbors; public NeighborsIterator() { @@ -65,7 +69,7 @@ } public boolean hasNext() { - // No edge restriction case: bypass type checks and skip to next neighbor + // Case 1: no edge restriction, bypass type checks and skip to next neighbor if (edges.restrictedTo == null) { if (nextNeighborIdx + 1 < nbNeighbors) { nextNeighborIdx++; @@ -75,7 +79,7 @@ } } - // Edge restriction case: look ahead for next neighbor + // Case 2: edge restriction, look ahead for next neighbor for (long lookAheadIdx = nextNeighborIdx + 1; lookAheadIdx < nbNeighbors; lookAheadIdx++) { long nextNodeId = LongBigArrays.get(neighbors, lookAheadIdx); if (edges.isAllowed(srcNodeId, nextNodeId)) { diff --git a/java/server/src/main/java/org/softwareheritage/graph/Node.java b/java/server/src/main/java/org/softwareheritage/graph/Node.java --- a/java/server/src/main/java/org/softwareheritage/graph/Node.java +++ b/java/server/src/main/java/org/softwareheritage/graph/Node.java @@ -57,7 +57,7 @@ } /** - * Parses SWH node type from string. + * Converts string to corresponding SWH node type. * * @param strType node type represented as a string * @return the corresponding {@link Node.Type} value @@ -71,9 +71,11 @@ } /** - * Parses SWH node type possible values from formatted string (TODO: link API doc). + * Parses SWH node type possible values from formatted string (see the API + * syntax). * - * @param strFmtType node types represented as a formatted string (TODO: link API doc) + * @param strFmtType node types represented as a formatted string * @return a list containing the {@link Node.Type} values * @see org.softwareheritage.graph.Node.Type */ diff --git a/java/server/src/main/java/org/softwareheritage/graph/SwhPath.java b/java/server/src/main/java/org/softwareheritage/graph/SwhPath.java --- a/java/server/src/main/java/org/softwareheritage/graph/SwhPath.java +++ b/java/server/src/main/java/org/softwareheritage/graph/SwhPath.java @@ -16,6 +16,7 @@ */ public class SwhPath { + /** Internal list of {@link SwhId} */ ArrayList path; /** @@ -40,7 +41,7 @@ /** * Constructor. * - * @param swhIds variable number of @{link SwhId} to initialize this path with + * @param swhIds variable number of {@link SwhId} to initialize this path with * @see org.softwareheritage.graph.SwhId */ public SwhPath(SwhId ...swhIds) { diff --git a/java/server/src/main/java/org/softwareheritage/graph/algo/Stats.java b/java/server/src/main/java/org/softwareheritage/graph/algo/Stats.java --- a/java/server/src/main/java/org/softwareheritage/graph/algo/Stats.java +++ b/java/server/src/main/java/org/softwareheritage/graph/algo/Stats.java @@ -6,6 +6,9 @@ /** * Statistics on the compressed graph. + *

+ * These statistics are not computed but directly read from WebGraph generated .stats and .properties files. * * @author Thibault Allançon * @version 0.0.1 @@ -39,7 +42,7 @@ /** * Constructor. * - * @param graphPath full path of compressed graph + * @param graphPath path and basename of compressed graph */ public Stats(String graphPath) throws IOException { Properties properties = new Properties(); diff --git a/java/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java b/java/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java --- a/java/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java +++ b/java/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java @@ -10,16 +10,22 @@ import it.unimi.dsi.fastutil.longs.LongBigArrays; import org.softwareheritage.graph.AllowedEdges; +import org.softwareheritage.graph.Endpoint; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.Neighbors; import org.softwareheritage.graph.Node; /** * Traversal algorithms on the compressed graph. + *

+ * Internal implementation of the traversal API endpoints. These methods only input/output internal + * long ids, which are converted in the {@link Endpoint} higher-level class to Software Heritage + * PID. * * @author Thibault Allançon * @version 0.0.1 * @since 0.0.1 + * @see org.softwareheritage.graph.Endpoint */ public class Traversal { @@ -30,15 +36,9 @@ /** Graph edge restriction */ AllowedEdges edges; - /** - * Bit array storing if we have visited a node - * @see it.unimi.dsi.bits.LongArrayBitVector - */ + /** Bit array storing if we have visited a node */ LongArrayBitVector visited; - /** - * Array storing parent node id for each nodes during a traversal - * @see it.unimi.dsi.fastutil.longs.LongBigArrays - */ + /** LongBigArray storing parent node id for each nodes during a traversal */ long[][] nodeParent; /** @@ -46,7 +46,8 @@ * * @param graph graph used in the traversal * @param direction a string (either "forward" or "backward") specifying edge orientation - * @param edgesFmt a formatted string describing allowed edges (TODO: link API doc) + * @param edgesFmt a formatted string describing allowed edges */ public Traversal(Graph graph, String direction, String edgesFmt) { if (!direction.matches("forward|backward")) { diff --git a/java/server/src/main/java/org/softwareheritage/graph/backend/MapFile.java b/java/server/src/main/java/org/softwareheritage/graph/backend/MapFile.java --- a/java/server/src/main/java/org/softwareheritage/graph/backend/MapFile.java +++ b/java/server/src/main/java/org/softwareheritage/graph/backend/MapFile.java @@ -9,6 +9,10 @@ /** * Wrapper class around very big mmap()-ed file. + *

+ * Java has a limit for mmap()-ed files because of unsupported 64-bit indexing. The dsiutils ByteBufferInputStream is used to overcome this + * Java limit. * * @author Thibault Allançon * @version 0.0.1 @@ -16,10 +20,7 @@ */ public class MapFile { - /** - * Memory-mapped file buffer - * @see it.unimi.dsi.io.ByteBufferInputStream - */ + /** Memory-mapped file buffer */ ByteBufferInputStream bufferMap; /** Fixed line length of the mmap()-ed file */ int lineLength; diff --git a/java/server/src/main/java/org/softwareheritage/graph/backend/NodeIdMap.java b/java/server/src/main/java/org/softwareheritage/graph/backend/NodeIdMap.java --- a/java/server/src/main/java/org/softwareheritage/graph/backend/NodeIdMap.java +++ b/java/server/src/main/java/org/softwareheritage/graph/backend/NodeIdMap.java @@ -5,13 +5,18 @@ import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.SwhId; import org.softwareheritage.graph.backend.MapFile; +import org.softwareheritage.graph.backend.Setup; /** * Mapping between internal long node id and external SWH PID. + *

+ * Mappings in both directions are pre-computed and dumped on disk in the {@link Setup} class, then + * they are loaded here using mmap(). * * @author Thibault Allançon * @version 0.0.1 * @since 0.0.1 + * @see org.softwareheritage.graph.backend.Setup */ public class NodeIdMap { @@ -20,7 +25,7 @@ /** Fixed length of long node id */ public static final int NODE_ID_LENGTH = 20; - /** Full graph path */ + /** Graph path and basename */ String graphPath; /** Number of ids to map */ long nbIds; diff --git a/java/server/src/main/java/org/softwareheritage/graph/backend/NodeTypesMap.java b/java/server/src/main/java/org/softwareheritage/graph/backend/NodeTypesMap.java --- a/java/server/src/main/java/org/softwareheritage/graph/backend/NodeTypesMap.java +++ b/java/server/src/main/java/org/softwareheritage/graph/backend/NodeTypesMap.java @@ -11,6 +11,11 @@ /** * Mapping between long node id and SWH node type as described in the data model. + *

+ * The type mapping is pre-computed and dumped on disk in the {@link Setup} class, then it is loaded + * in-memory here using fastutil LongBigList. To be + * space-efficient, the mapping is stored as a bitmap using minimum number of bits per {@link + * Node.Type}. * * @author Thibault Allançon * @version 0.0.1 @@ -18,16 +23,13 @@ */ public class NodeTypesMap { - /** - * Array storing for each node its type - * @see it.unimi.dsi.fastutil.longs.LongBigList - */ + /** Array storing for each node its type */ LongBigList nodeTypesMap; /** * Constructor. * - * @param graphPath full path of the compressed graph + * @param graphPath path and basename of the compressed graph */ public NodeTypesMap(String graphPath) throws IOException { try { diff --git a/java/server/src/main/java/org/softwareheritage/graph/backend/Setup.java b/java/server/src/main/java/org/softwareheritage/graph/backend/Setup.java --- a/java/server/src/main/java/org/softwareheritage/graph/backend/Setup.java +++ b/java/server/src/main/java/org/softwareheritage/graph/backend/Setup.java @@ -33,6 +33,11 @@ */ public class Setup { + /** + * Main entrypoint. + * + * @param args command line arguments + */ public static void main(String[] args) throws IOException { if (args.length != 2) { System.err.println("Expected parameters: "); diff --git a/java/server/src/main/java/org/softwareheritage/graph/benchmark/LinuxLog.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/LinuxLog.java --- a/java/server/src/main/java/org/softwareheritage/graph/benchmark/LinuxLog.java +++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/LinuxLog.java @@ -8,6 +8,9 @@ /** * Linux git log experiment to benchmark graph traversal. + *

+ * The goal is to do the equivalent of a {@code git log} in the Linux kernel by following revisions + * nodes. * * @author Thibault Allançon * @version 0.0.1 @@ -15,6 +18,11 @@ */ public class LinuxLog { + /** + * Main entrypoint. + * + * @param args command line arguments + */ public static void main(String[] args) throws IOException { String path = args[0]; Graph graph = new Graph(path);