diff --git a/api/server/pom.xml b/api/server/pom.xml --- a/api/server/pom.xml +++ b/api/server/pom.xml @@ -129,4 +129,13 @@ + + + + org.apache.maven.plugins + maven-javadoc-plugin + 3.1.1 + + + diff --git a/api/server/src/main/java/org/softwareheritage/graph/AllowedEdges.java b/api/server/src/main/java/org/softwareheritage/graph/AllowedEdges.java --- a/api/server/src/main/java/org/softwareheritage/graph/AllowedEdges.java +++ b/api/server/src/main/java/org/softwareheritage/graph/AllowedEdges.java @@ -5,11 +5,29 @@ import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.Node; +/** + * Edge restriction based on node types, used when visiting the graph. + * + * @author Thibault Allançon + * @version 1.0 + * @since 1.0 + */ + public class AllowedEdges { + /** Graph on which edge restriction is performed */ Graph graph; - // First dimension is source node type, second dimension is destination node type + /** + * 2D boolean matrix storing access rights for all combination of src/dst node types (first + * dimension is source, second dimension is destination) + */ boolean[][] allowed; + /** + * Constructor. + * + * @param graph the graph on which to perform edge restriction + * @param edgesFmt a formatted string describing allowed edges (TODO: link API doc) + */ public AllowedEdges(Graph graph, String edgesFmt) { this.graph = graph; @@ -29,6 +47,7 @@ } // Format: "src1:dst1,src2:dst2,[...]" + // TODO: link API doc String[] edgeTypes = edgesFmt.split(","); for (String edgeType : edgeTypes) { String[] nodeTypes = edgeType.split(":"); @@ -46,12 +65,25 @@ } } + /** + * Checks if a given edge can be followed during graph traversal. + * + * @param srcNodeId edge source node + * @param dstNodeId edge destination node + * @return true if allowed and false otherwise + */ public boolean isAllowed(long srcNodeId, long dstNodeId) { Node.Type srcType = graph.getNodeType(srcNodeId); Node.Type dstType = graph.getNodeType(dstNodeId); return isAllowed(srcType, dstType); } + /** + * Works like {@link AllowedEdges#isAllowed(long, long)} but with node types directly instead of + * node ids. + * + * @see AllowedEdges#isAllowed(long, long) + */ public boolean isAllowed(Node.Type srcType, Node.Type dstType) { return allowed[srcType.ordinal()][dstType.ordinal()]; } diff --git a/api/server/src/main/java/org/softwareheritage/graph/App.java b/api/server/src/main/java/org/softwareheritage/graph/App.java --- a/api/server/src/main/java/org/softwareheritage/graph/App.java +++ b/api/server/src/main/java/org/softwareheritage/graph/App.java @@ -15,6 +15,14 @@ import org.softwareheritage.graph.SwhId; import org.softwareheritage.graph.algo.Stats; +/** + * Entrypoint of the swh-graph server REST API. + * + * @author Thibault Allançon + * @version 1.0 + * @since 1.0 + */ + public class App { public static void main(String[] args) throws IOException { String path = args[0]; diff --git a/api/server/src/main/java/org/softwareheritage/graph/Endpoint.java b/api/server/src/main/java/org/softwareheritage/graph/Endpoint.java --- a/api/server/src/main/java/org/softwareheritage/graph/Endpoint.java +++ b/api/server/src/main/java/org/softwareheritage/graph/Endpoint.java @@ -7,15 +7,38 @@ import org.softwareheritage.graph.SwhPath; import org.softwareheritage.graph.algo.Traversal; +/** + * REST API endpoints wrapper functions. + * + * @author Thibault Allançon + * @version 1.0 + * @since 1.0 + */ + public class Endpoint { + /** Graph where traversal endpoint is performed */ Graph graph; + /** Internal traversal API */ Traversal traversal; + /** + * Constructor. + * + * @param graph the graph used for traversal endpoint + * @param direction a string (either "forward" or "backward") specifying edge orientation + * @param edgesFmt a formatted string describing allowed edges (TODO: link API doc) + */ public Endpoint(Graph graph, String direction, String edgesFmt) { this.graph = graph; this.traversal = new Traversal(graph, direction, edgesFmt); } + /** + * Converts a list of (internal) long node ids to a list of corresponding (external) SWH PIDs. + * + * @param nodeIds the list of long node ids + * @return a list of corresponding SWH PIDs + */ private ArrayList convertNodesToSwhIds(ArrayList nodeIds) { ArrayList swhIds = new ArrayList<>(); for (long nodeId : nodeIds) { @@ -24,6 +47,13 @@ return swhIds; } + /** + * Converts a list of (internal) long node ids to the corresponding {@link SwhPath}. + * + * @param nodeIds the list of long node ids + * @return the corresponding {@link SwhPath} + * @see org.softwareheritage.graph.SwhPath + */ private SwhPath convertNodesToSwhPath(ArrayList nodeIds) { SwhPath path = new SwhPath(); for (long nodeId : nodeIds) { @@ -32,6 +62,13 @@ return path; } + /** + * Converts a list of paths made of (internal) long node ids to one made of {@link SwhPath}-s. + * + * @param pathsNodeId the list of paths with long node ids + * @return a list of corresponding {@link SwhPath} + * @see org.softwareheritage.graph.SwhPath + */ private ArrayList convertPathsToSwhIds(ArrayList> pathsNodeId) { ArrayList paths = new ArrayList<>(); for (ArrayList path : pathsNodeId) { @@ -40,18 +77,45 @@ return paths; } + /** + * Leaves endpoint wrapper (performs input/output node ids conversions). + * + * @param src source node of endpoint call specified as a SwhId + * @return the resulting list of SwhId from endpoint call + * @see org.softwareheritage.graph.SwhId + * @see org.softwareheritage.graph.algo.Traversal#leaves(long) + */ public ArrayList leaves(SwhId src) { long srcNodeId = graph.getNodeId(src); ArrayList nodeIds = traversal.leaves(srcNodeId); return convertNodesToSwhIds(nodeIds); } + /** + * Neighbors endpoint wrapper (performs input/output node ids conversions). + * + * @param src source node of endpoint call specified as a SwhId + * @return the resulting list of SwhId from endpoint call + * @see org.softwareheritage.graph.SwhId + * @see org.softwareheritage.graph.algo.Traversal#neighbors(long) + */ public ArrayList neighbors(SwhId src) { long srcNodeId = graph.getNodeId(src); ArrayList nodeIds = traversal.neighbors(srcNodeId); return convertNodesToSwhIds(nodeIds); } + /** + * Walk endpoint wrapper (performs input/output node ids conversions). + * + * @param src source node of endpoint call specified as a SwhId + * @param dstFmt destination used in endpoint call (TODO: link to API doc) + * @param algorithm traversal algorithm used in endpoint call (either "dfs" or "bfs") + * @return the resulting {@link SwhPath} from endpoint call + * @see org.softwareheritage.graph.SwhId + * @see org.softwareheritage.graph.SwhPath + * @see org.softwareheritage.graph.algo.Traversal#walk + */ public SwhPath walk(SwhId src, String dstFmt, String algorithm) { long srcNodeId = graph.getNodeId(src); ArrayList nodeIds = null; @@ -71,12 +135,29 @@ return convertNodesToSwhPath(nodeIds); } + /** + * VisitNodes endpoint wrapper (performs input/output node ids conversions). + * + * @param src source node of endpoint call specified as a SwhId + * @return the resulting list of SwhId from endpoint call + * @see org.softwareheritage.graph.SwhId + * @see org.softwareheritage.graph.algo.Traversal#visitNodes(long) + */ public ArrayList visitNodes(SwhId src) { long srcNodeId = graph.getNodeId(src); ArrayList nodeIds = traversal.visitNodes(srcNodeId); return convertNodesToSwhIds(nodeIds); } + /** + * VisitPaths endpoint wrapper (performs input/output node ids conversions). + * + * @param src source node of endpoint call specified as a SwhId + * @return the resulting list of {@link SwhPath} from endpoint call + * @see org.softwareheritage.graph.SwhId + * @see org.softwareheritage.graph.SwhPath + * @see org.softwareheritage.graph.algo.Traversal#visitPaths(long) + */ public ArrayList visitPaths(SwhId src) { long srcNodeId = graph.getNodeId(src); ArrayList> paths = traversal.visitPaths(srcNodeId); diff --git a/api/server/src/main/java/org/softwareheritage/graph/Graph.java b/api/server/src/main/java/org/softwareheritage/graph/Graph.java --- a/api/server/src/main/java/org/softwareheritage/graph/Graph.java +++ b/api/server/src/main/java/org/softwareheritage/graph/Graph.java @@ -9,17 +9,38 @@ import org.softwareheritage.graph.backend.NodeIdMap; import org.softwareheritage.graph.backend.NodeTypesMap; +/** + * Main class storing the compressed graph and node id mappings. + * + * @author Thibault Allançon + * @version 1.0 + * @since 1.0 + */ + public class Graph { + /** File extension for the SWH PID to long node id map */ public static final String PID_TO_NODE = ".pid2node.csv"; + /** File extension for the long node id to SWH PID map */ public static final String NODE_TO_PID = ".node2pid.csv"; + /** File extension for the long node id to node typ map */ public static final String NODE_TO_TYPE = ".node2type.map"; + /** Compressed graph stored as a {@link it.unimi.dsi.big.webgraph.BVGraph} */ BVGraph graph; + /** Transposed compressed graph (used for backward traversals) */ BVGraph graphTransposed; + /** Path and basename of the compressed graph */ String path; + /** Mapping long id ↔ SWH PIDs */ NodeIdMap nodeIdMap; + /** Mapping long id → node types */ NodeTypesMap nodeTypesMap; + /** + * Constructor. + * + * @param path path and basename of the compressed graph to load + */ public Graph(String path) throws IOException { this.graph = BVGraph.load(path); this.graphTransposed = BVGraph.load(path + "-transposed"); @@ -28,54 +49,134 @@ this.nodeTypesMap = new NodeTypesMap(path); } + /** + * Cleans up graph resources after use. + */ public void cleanUp() throws IOException { nodeIdMap.close(); } + /** + * Returns the graph full path. + * + * @return graph full path + */ public String getPath() { return path; } + /** + * Converts {@link SwhId} node to long. + * + * @param swhId node specified as a {@link SwhId} + * @return internal long node id + * @see org.softwareheritage.graph.SwhId + */ public long getNodeId(SwhId swhId) { return nodeIdMap.getNodeId(swhId); } + /** + * Converts long id node to {@link SwhId}. + * + * @param nodeId node specified as a long id + * @return external SWH PID + * @see org.softwareheritage.graph.SwhId + */ public SwhId getSwhId(long nodeId) { return nodeIdMap.getSwhId(nodeId); } + /** + * Returns node type. + * + * @param nodeId node specified as a long id + * @return corresponding node type + * @see org.softwareheritage.graph.Node.Type + */ public Node.Type getNodeType(long nodeId) { return nodeTypesMap.getType(nodeId); } + /** + * Returns number of nodes in the graph. + * + * @return number of nodes in the graph + */ public long getNbNodes() { return graph.numNodes(); } + /** + * Returns number of edges in the graph. + * + * @return number of edges in the graph + */ public long getNbEdges() { return graph.numArcs(); } + /** + * Returns list of successors of a node. + * + * @param nodeId node specified as a long id + * @return list of successors of the node, specified as a {@link LongBigArrays} + * @see it.unimi.dsi.fastutil.longs.LongBigArrays + */ public long[][] successors(long nodeId) { return graph.successorBigArray(nodeId); } + /** + * Returns the outdegree of a node. + * + * @param nodeId node specified as a long id + * @return outdegree of a node + */ public long outdegree(long nodeId) { return graph.outdegree(nodeId); } + /** + * Returns list of predecessors of a node. + * + * @param nodeId node specified as a long id + * @return list of predecessors of the node, specified as a {@link LongBigArrays} + * @see it.unimi.dsi.fastutil.longs.LongBigArrays + */ public long[][] predecessors(long nodeId) { return graphTransposed.successorBigArray(nodeId); } + /** + * Returns the indegree of a node. + * + * @param nodeId node specified as a long id + * @return indegree of a node + */ public long indegree(long nodeId) { return graphTransposed.outdegree(nodeId); } + /** + * Returns the degree of a node, depending on graph orientation. + * + * @param nodeId node specified as a long id + * @param useTransposed boolean value to use transposed graph + * @return degree of a node + */ public long degree(long nodeId, boolean useTransposed) { return (useTransposed) ? indegree(nodeId) : outdegree(nodeId); } + /** + * Returns the neighbors of a node, depending on graph orientation. + * + * @param nodeId node specified as a long id + * @param useTransposed boolean value to use transposed graph + * @return list of neighbors of the node, specified as a {@link LongBigArrays} + * @see it.unimi.dsi.fastutil.longs.LongBigArrays + */ public long[][] neighbors(long nodeId, boolean useTransposed) { return (useTransposed) ? predecessors(nodeId) : successors(nodeId); } diff --git a/api/server/src/main/java/org/softwareheritage/graph/Neighbors.java b/api/server/src/main/java/org/softwareheritage/graph/Neighbors.java --- a/api/server/src/main/java/org/softwareheritage/graph/Neighbors.java +++ b/api/server/src/main/java/org/softwareheritage/graph/Neighbors.java @@ -7,12 +7,32 @@ import org.softwareheritage.graph.AllowedEdges; import org.softwareheritage.graph.Graph; +/** + * Iterator class to go over a node neighbors in the graph. + * + * @author Thibault Allançon + * @version 1.0 + * @since 1.0 + */ + public class Neighbors implements Iterable { + /** Graph used to explore neighbors */ Graph graph; + /** Boolean to specify the use of the transposed graph */ boolean useTransposed; + /** Graph edge restriction */ AllowedEdges edges; + /** Source node from which neighbors will be listed */ long srcNodeId; + /** + * Constructor. + * + * @param graph graph used to explore neighbors + * @param useTransposed boolean value to use transposed graph + * @param edges edges allowed to be used in the graph + * @param srcNodeId source node from where to list neighbors + */ public Neighbors(Graph graph, boolean useTransposed, AllowedEdges edges, long srcNodeId) { this.graph = graph; this.useTransposed = useTransposed; @@ -25,6 +45,14 @@ return new NeighborsIterator(); } + /** + * Inner class for {@link Neighbors} iterator. + * + * @author Thibault Allançon + * @version 1.0 + * @since 1.0 + */ + public class NeighborsIterator implements Iterator { long nextNeighborIdx; long nbNeighbors; diff --git a/api/server/src/main/java/org/softwareheritage/graph/Node.java b/api/server/src/main/java/org/softwareheritage/graph/Node.java --- a/api/server/src/main/java/org/softwareheritage/graph/Node.java +++ b/api/server/src/main/java/org/softwareheritage/graph/Node.java @@ -4,14 +4,38 @@ import java.util.Arrays; import java.util.List; +/** + * A node in the Software Heritage graph. + * + * @author Thibault Allançon + * @version 1.0 + * @since 1.0 + */ + public class Node { + /** + * Software Heritage graph node types, as described in the + * data model. + */ public enum Type { + /** Content node */ CNT, + /** Directory node */ DIR, + /** Release node */ REL, + /** Revision node */ REV, + /** Snapshot node */ SNP; + /** + * Converts integer to corresponding SWH node type. + * + * @param intType node type represented as an integer + * @return the corresponding {@link Node.Type} value + * @see org.softwareheritage.graph.Node.Type + */ public static Node.Type fromInt(int intType) { switch (intType) { case 0: @@ -28,18 +52,32 @@ return null; } + /** + * Parses SWH node type from string. + * + * @param strType node type represented as a string + * @return the corresponding {@link Node.Type} value + * @see org.softwareheritage.graph.Node.Type + */ public static Node.Type fromStr(String strType) { return Node.Type.valueOf(strType.toUpperCase()); } - public static ArrayList parse(String strType) { + /** + * Parses SWH node type possible values from formatted string (TODO: link API doc). + * + * @param strFmtType node types represented as a formatted string (TODO: link API doc) + * @return a list containing the {@link Node.Type} values + * @see org.softwareheritage.graph.Node.Type + */ + public static ArrayList parse(String strFmtType) { ArrayList types = new ArrayList<>(); - if (strType.equals("*")) { + if (strFmtType.equals("*")) { List nodeTypes = Arrays.asList(Node.Type.values()); types.addAll(nodeTypes); } else { - types.add(Node.Type.fromStr(strType)); + types.add(Node.Type.fromStr(strFmtType)); } return types; diff --git a/api/server/src/main/java/org/softwareheritage/graph/SwhId.java b/api/server/src/main/java/org/softwareheritage/graph/SwhId.java --- a/api/server/src/main/java/org/softwareheritage/graph/SwhId.java +++ b/api/server/src/main/java/org/softwareheritage/graph/SwhId.java @@ -4,18 +4,36 @@ import org.softwareheritage.graph.Node; +/** + * A Software Heritage PID, see persistent + * identifier documentation. + * + * @author Thibault Allançon + * @version 1.0 + * @since 1.0 + */ + public class SwhId { + /** Fixed hash length of the PID */ public static final int HASH_LENGTH = 40; + /** Full PID as a string */ String swhId; + /** PID node type */ Node.Type type; + /** PID hex-encoded SHA1 hash */ String hash; - // SWH ID format: 'swh:1:type:hash' - // https://docs.softwareheritage.org/devel/swh-model/persistent-identifiers.html + /** + * Constructor. + * + * @param swhId full PID as a string + */ public SwhId(String swhId) { this.swhId = swhId; + // PID format: 'swh:1:type:hash' String[] parts = swhId.split(":"); if (parts.length != 4 || !parts[0].equals("swh") || !parts[1].equals("1")) { throw new IllegalArgumentException("Expected SWH ID format to be 'swh:1:type:hash', got: " + swhId); @@ -52,15 +70,31 @@ return swhId; } + /** + * Returns full PID as a string. + * + * @return full PID string + */ @JsonValue public String getSwhId() { return swhId; } + /** + * Returns PID node type. + * + * @return PID corresponding {@link Node.Type} + * @see org.softwareheritage.graph.Node.Type + */ public Node.Type getType() { return type; } + /** + * Returns PID hex-encoded SHA1 hash. + * + * @return PID string hash + */ public String getHash() { return hash; } diff --git a/api/server/src/main/java/org/softwareheritage/graph/SwhPath.java b/api/server/src/main/java/org/softwareheritage/graph/SwhPath.java --- a/api/server/src/main/java/org/softwareheritage/graph/SwhPath.java +++ b/api/server/src/main/java/org/softwareheritage/graph/SwhPath.java @@ -6,13 +6,30 @@ import org.softwareheritage.graph.SwhId; +/** + * Wrapper class to store a list of {@link SwhId}. + * + * @author Thibault Allançon + * @version 1.0 + * @since 1.0 + * @see org.softwareheritage.graph.SwhId + */ + public class SwhPath { ArrayList path; + /** + * Constructor. + */ public SwhPath() { this.path = new ArrayList(); } + /** + * Constructor. + * + * @param swhIds variable number of string PIDs to initialize this path with + */ public SwhPath(String ...swhIds) { this(); for (String swhId : swhIds) { @@ -20,6 +37,12 @@ } } + /** + * Constructor. + * + * @param swhIds variable number of @{link SwhId} to initialize this path with + * @see org.softwareheritage.graph.SwhId + */ public SwhPath(SwhId ...swhIds) { this(); for (SwhId swhId : swhIds) { @@ -27,19 +50,43 @@ } } + /** + * Returns this path as a list of {@link SwhId}. + * + * @return list of {@link SwhId} constituting the path + * @see org.softwareheritage.graph.SwhId + */ @JsonValue public ArrayList getPath() { return path; } + /** + * Adds a {@link SwhId} to this path. + * + * @param {@link SwhId} to add to this path + * @see org.softwareheritage.graph.SwhId + */ public void add(SwhId swhId) { path.add(swhId); } + /** + * Returns the {@link SwhId} at the specified position in this path. + * + * @param index position of the {@link SwhId} to return + * @return {@link SwhId} at the specified position + * @see org.softwareheritage.graph.SwhId + */ public SwhId get(int index) { return path.get(index); } + /** + * Returns the number of elements in this path. + * + * @return number of elements in this path + */ public int size() { return path.size(); } diff --git a/api/server/src/main/java/org/softwareheritage/graph/algo/Stats.java b/api/server/src/main/java/org/softwareheritage/graph/algo/Stats.java --- a/api/server/src/main/java/org/softwareheritage/graph/algo/Stats.java +++ b/api/server/src/main/java/org/softwareheritage/graph/algo/Stats.java @@ -4,6 +4,14 @@ import java.io.IOException; import java.util.Properties; +/** + * Statistics on the compressed graph. + * + * @author Thibault Allançon + * @version 1.0 + * @since 1.0 + */ + public class Stats { public class Counts { public long nodes; @@ -28,6 +36,11 @@ public Degree indegree; public Degree outdegree; + /** + * Constructor. + * + * @param graphPath full path of compressed graph + */ public Stats(String graphPath) throws IOException { Properties properties = new Properties(); properties.load(new FileInputStream(graphPath + ".properties")); diff --git a/api/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java b/api/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java --- a/api/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java +++ b/api/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java @@ -14,16 +14,40 @@ import org.softwareheritage.graph.Neighbors; import org.softwareheritage.graph.Node; +/** + * Traversal algorithms on the compressed graph. + * + * @author Thibault Allançon + * @version 1.0 + * @since 1.0 + */ + public class Traversal { + /** Graph used in the traversal */ Graph graph; + /** Boolean to specify the use of the transposed graph */ boolean useTransposed; + /** Graph edge restriction */ AllowedEdges edges; - // Big array storing if we have visited a node + /** + * Bit array storing if we have visited a node + * @see it.unimi.dsi.bits.LongArrayBitVector + */ LongArrayBitVector visited; - // Big array storing parents to retrieve the path when backtracking + /** + * Array storing parent node id for each nodes during a traversal + * @see it.unimi.dsi.fastutil.longs.LongBigArrays + */ long[][] nodeParent; + /** + * Constructor. + * + * @param graph graph used in the traversal + * @param direction a string (either "forward" or "backward") specifying edge orientation + * @param edgesFmt a formatted string describing allowed edges (TODO: link API doc) + */ public Traversal(Graph graph, String direction, String edgesFmt) { if (!direction.matches("forward|backward")) { throw new IllegalArgumentException("Unknown traversal direction: " + direction); @@ -38,6 +62,12 @@ this.nodeParent = LongBigArrays.newBigArray(nbNodes); } + /** + * Returns the leaves of a subgraph rooted at the specified source node. + * + * @param srcNodeId source node + * @return list of node ids corresponding to the leaves + */ public ArrayList leaves(long srcNodeId) { ArrayList nodeIds = new ArrayList(); Stack stack = new Stack(); @@ -66,6 +96,12 @@ return nodeIds; } + /** + * Returns node direct neighbors (linked with exactly one edge). + * + * @param srcNodeId source node + * @return list of node ids corresponding to the neighbors + */ public ArrayList neighbors(long srcNodeId) { ArrayList nodeIds = new ArrayList(); for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, srcNodeId)) { @@ -74,6 +110,12 @@ return nodeIds; } + /** + * Performs a graph traversal and returns explored nodes. + * + * @param srcNodeId source node + * @return list of explored node ids + */ public ArrayList visitNodes(long srcNodeId) { ArrayList nodeIds = new ArrayList(); Stack stack = new Stack(); @@ -97,6 +139,12 @@ return nodeIds; } + /** + * Performs a graph traversal and returns explored paths. + * + * @param srcNodeId source node + * @return list of explored paths (represented as a list of node ids) + */ public ArrayList> visitPaths(long srcNodeId) { ArrayList> paths = new ArrayList<>(); Stack currentPath = new Stack(); @@ -104,6 +152,13 @@ return paths; } + /** + * Internal recursive function of {@link #visitPaths}. + * + * @param currentNodeId current node + * @param paths list of currently stored paths + * @param currentPath current path as node ids + */ private void visitPathsInternal( long currentNodeId, ArrayList> paths, Stack currentPath) { currentPath.push(currentNodeId); @@ -125,6 +180,13 @@ currentPath.pop(); } + /** + * Performs a graph traversal and returns the first found path from source to destination. + * + * @param srcNodeId source node + * @param dst destination (either a node or a node type) + * @return found path as a list of node ids + */ public ArrayList walk(long srcNodeId, T dst, String algorithm) { long dstNodeId = -1; if (algorithm.equals("dfs")) { @@ -143,6 +205,13 @@ return nodeIds; } + /** + * Internal DFS function of {@link #walk}. + * + * @param srcNodeId source node + * @param dst destination (either a node or a node type) + * @return final destination node or -1 if no path found + */ private long walkInternalDfs(long srcNodeId, T dst) { Stack stack = new Stack(); this.visited.fill(false); @@ -168,6 +237,13 @@ return -1; } + /** + * Internal BFS function of {@link #walk}. + * + * @param srcNodeId source node + * @param dst destination (either a node or a node type) + * @return final destination node or -1 if no path found + */ private long walkInternalBfs(long srcNodeId, T dst) { Queue queue = new LinkedList(); this.visited.fill(false); @@ -193,6 +269,13 @@ return -1; } + /** + * Internal function of {@link #walk} to check if a node corresponds to the destination. + * + * @param nodeId current node + * @param dst destination (either a node or a node type) + * @return true if the node is a destination, or false otherwise + */ private boolean isDstNode(long nodeId, T dst) { if (dst instanceof Long) { long dstNodeId = (Long) dst; @@ -205,6 +288,13 @@ } } + /** + * Internal backtracking function of {@link #walk}. + * + * @param srcNodeId source node + * @param dstNodeId destination node + * @return the found path, as a list of node ids + */ private ArrayList backtracking(long srcNodeId, long dstNodeId) { ArrayList path = new ArrayList(); long currentNodeId = dstNodeId; diff --git a/api/server/src/main/java/org/softwareheritage/graph/backend/MapFile.java b/api/server/src/main/java/org/softwareheritage/graph/backend/MapFile.java --- a/api/server/src/main/java/org/softwareheritage/graph/backend/MapFile.java +++ b/api/server/src/main/java/org/softwareheritage/graph/backend/MapFile.java @@ -7,10 +7,29 @@ import it.unimi.dsi.io.ByteBufferInputStream; +/** + * Wrapper class around very big mmap()-ed file. + * + * @author Thibault Allançon + * @version 1.0 + * @since 1.0 + */ + public class MapFile { + /** + * Memory-mapped file buffer + * @see it.unimi.dsi.io.ByteBufferInputStream + */ ByteBufferInputStream bufferMap; + /** Fixed line length of the mmap()-ed file */ int lineLength; + /** + * Constructor. + * + * @param path file path to mmap() + * @param lineLength fixed length of a line in the file + */ public MapFile(String path, int lineLength) throws IOException { this.bufferMap = null; this.lineLength = lineLength; @@ -21,6 +40,12 @@ } } + /** + * Returns a specific line in the file. + * + * @param lineIndex line number in the file + * @return the line at the specified position + */ public String readAtLine(long lineIndex) { byte[] buffer = new byte[lineLength]; long position = lineIndex * (long) lineLength; @@ -30,6 +55,9 @@ return line.trim(); } + /** + * Closes the mmap()-ed file. + */ public void close() throws IOException { bufferMap.close(); } diff --git a/api/server/src/main/java/org/softwareheritage/graph/backend/NodeIdMap.java b/api/server/src/main/java/org/softwareheritage/graph/backend/NodeIdMap.java --- a/api/server/src/main/java/org/softwareheritage/graph/backend/NodeIdMap.java +++ b/api/server/src/main/java/org/softwareheritage/graph/backend/NodeIdMap.java @@ -6,15 +6,35 @@ import org.softwareheritage.graph.SwhId; import org.softwareheritage.graph.backend.MapFile; +/** + * Mapping between internal long node id and external SWH PID. + * + * @author Thibault Allançon + * @version 1.0 + * @since 1.0 + */ + public class NodeIdMap { + /** Fixed length of full SWH PID */ public static final int SWH_ID_LENGTH = 50; + /** Fixed length of long node id */ public static final int NODE_ID_LENGTH = 20; + /** Full graph path */ String graphPath; + /** Number of ids to map */ long nbIds; + /** mmap()-ed PID_TO_NODE file */ MapFile swhToNodeMap; + /** mmap()-ed NODE_TO_PID file */ MapFile nodeToSwhMap; + /** + * Constructor. + * + * @param graphPath full graph path + * @param nbNodes number of nodes in the graph + */ public NodeIdMap(String graphPath, long nbNodes) throws IOException { this.graphPath = graphPath; this.nbIds = nbNodes; @@ -26,10 +46,16 @@ this.nodeToSwhMap = new MapFile(graphPath + Graph.NODE_TO_PID, nodeToSwhLineLength); } - // SWH id (string) -> WebGraph node id (long) - // Each line in PID_TO_NODE is formatted as: swhId nodeId - // The file is sorted by swhId, hence we can binary search on swhId to get corresponding nodeId + /** + * Converts SWH PID to corresponding long node id. + * + * @param swhId node represented as a {@link SwhId} + * @return corresponding node as a long id + * @see org.softwareheritage.graph.SwhId + */ public long getNodeId(SwhId swhId) { + // Each line in PID_TO_NODE is formatted as: swhId nodeId + // The file is sorted by swhId, hence we can binary search on swhId to get corresponding nodeId long start = 0; long end = nbIds - 1; @@ -56,11 +82,17 @@ throw new IllegalArgumentException("Unknown SWH id: " + swhId); } - // WebGraph node id (long) -> SWH id (string) - // Each line in NODE_TO_PID is formatted as: swhId - // The file is ordered by nodeId, meaning node0's swhId is at line 0, hence we can read the - // nodeId-th line to get corresponding swhId + /** + * Converts a node long id to corresponding SWH PID. + * + * @param nodeId node as a long id + * @return corresponding node as a {@link SwhId} + * @see org.softwareheritage.graph.SwhId + */ public SwhId getSwhId(long nodeId) { + // Each line in NODE_TO_PID is formatted as: swhId + // The file is ordered by nodeId, meaning node0's swhId is at line 0, hence we can read the + // nodeId-th line to get corresponding swhId if (nodeId < 0 || nodeId >= nbIds) { throw new IllegalArgumentException("Node id " + nodeId + " should be between 0 and " + nbIds); } @@ -69,6 +101,9 @@ return new SwhId(swhId); } + /** + * Closes the mapping files. + */ public void close() throws IOException { swhToNodeMap.close(); nodeToSwhMap.close(); diff --git a/api/server/src/main/java/org/softwareheritage/graph/backend/NodeTypesMap.java b/api/server/src/main/java/org/softwareheritage/graph/backend/NodeTypesMap.java --- a/api/server/src/main/java/org/softwareheritage/graph/backend/NodeTypesMap.java +++ b/api/server/src/main/java/org/softwareheritage/graph/backend/NodeTypesMap.java @@ -8,9 +8,27 @@ import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.Node; +/** + * Mapping between long node id and SWH node type as described in the data model. + * + * @author Thibault Allançon + * @version 1.0 + * @since 1.0 + */ + public class NodeTypesMap { + /** + * Array storing for each node its type + * @see it.unimi.dsi.fastutil.longs.LongBigList + */ LongBigList nodeTypesMap; + /** + * Constructor. + * + * @param graphPath full path of the compressed graph + */ public NodeTypesMap(String graphPath) throws IOException { try { nodeTypesMap = (LongBigList) BinIO.loadObject(graphPath + Graph.NODE_TO_TYPE); @@ -19,6 +37,13 @@ } } + /** + * Returns node type from a node long id. + * + * @param nodeId node as a long id + * @return corresponding {@link Node.Type} value + * @see org.softwareheritage.graph.Node.Type + */ public Node.Type getType(long nodeId) { long type = nodeTypesMap.getLong(nodeId); return Node.Type.fromInt((int) type); diff --git a/api/server/src/main/java/org/softwareheritage/graph/backend/Setup.java b/api/server/src/main/java/org/softwareheritage/graph/backend/Setup.java --- a/api/server/src/main/java/org/softwareheritage/graph/backend/Setup.java +++ b/api/server/src/main/java/org/softwareheritage/graph/backend/Setup.java @@ -24,6 +24,14 @@ import org.softwareheritage.graph.SwhId; import org.softwareheritage.graph.backend.NodeTypesMap; +/** + * Pre-processing steps (such as dumping mapping files on disk) before running the graph service. + * + * @author Thibault Allançon + * @version 1.0 + * @since 1.0 + */ + public class Setup { public static void main(String[] args) throws IOException { if (args.length != 2) { @@ -42,6 +50,12 @@ System.out.println("Done in: " + duration + " seconds"); } + /** + * Computes and dumps on disk mapping files. + * + * @param nodesPath path of the compressed csv nodes file + * @param graphPath path of the compressed graph + */ // Suppress warning for Object2LongFunction cast @SuppressWarnings("unchecked") static void precomputeNodeIdMap(String nodesPath, String graphPath) throws IOException { diff --git a/api/server/src/main/java/org/softwareheritage/graph/benchmark/LinuxLog.java b/api/server/src/main/java/org/softwareheritage/graph/benchmark/LinuxLog.java --- a/api/server/src/main/java/org/softwareheritage/graph/benchmark/LinuxLog.java +++ b/api/server/src/main/java/org/softwareheritage/graph/benchmark/LinuxLog.java @@ -6,6 +6,14 @@ import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.SwhId; +/** + * Linux git log experiment to benchmark graph traversal. + * + * @author Thibault Allançon + * @version 1.0 + * @since 1.0 + */ + public class LinuxLog { public static void main(String[] args) throws IOException { String path = args[0];