Page MenuHomeSoftware Heritage

D1741.id5851.diff
No OneTemporary

D1741.id5851.diff

diff --git a/api/server/pom.xml b/api/server/pom.xml
--- a/api/server/pom.xml
+++ b/api/server/pom.xml
@@ -129,4 +129,13 @@
</plugins>
</pluginManagement>
</build>
+ <reporting>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-javadoc-plugin</artifactId>
+ <version>3.1.1</version>
+ </plugin>
+ </plugins>
+ </reporting>
</project>
diff --git a/api/server/src/main/java/org/softwareheritage/graph/AllowedEdges.java b/api/server/src/main/java/org/softwareheritage/graph/AllowedEdges.java
--- a/api/server/src/main/java/org/softwareheritage/graph/AllowedEdges.java
+++ b/api/server/src/main/java/org/softwareheritage/graph/AllowedEdges.java
@@ -5,11 +5,29 @@
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.Node;
+/**
+ * Edge restriction based on node types, used when visiting the graph.
+ *
+ * @author Thibault Allançon
+ * @version 1.0
+ * @since 1.0
+ */
+
public class AllowedEdges {
+ /** Graph on which edge restriction is performed */
Graph graph;
- // First dimension is source node type, second dimension is destination node type
+ /**
+ * 2D boolean matrix storing access rights for all combination of src/dst node types (first
+ * dimension is source, second dimension is destination)
+ */
boolean[][] allowed;
+ /**
+ * Constructor.
+ *
+ * @param graph the graph on which to perform edge restriction
+ * @param edgesFmt a formatted string describing allowed edges (TODO: link API doc)
+ */
public AllowedEdges(Graph graph, String edgesFmt) {
this.graph = graph;
@@ -29,6 +47,7 @@
}
// Format: "src1:dst1,src2:dst2,[...]"
+ // TODO: link API doc
String[] edgeTypes = edgesFmt.split(",");
for (String edgeType : edgeTypes) {
String[] nodeTypes = edgeType.split(":");
@@ -46,12 +65,25 @@
}
}
+ /**
+ * Checks if allowed to use the edge from the source node to the destination node.
+ *
+ * @param srcNodeId edge source node
+ * @param dstNodeId edge destination node
+ * @return true if allowed and false otherwise
+ */
public boolean isAllowed(long srcNodeId, long dstNodeId) {
Node.Type srcType = graph.getNodeType(srcNodeId);
Node.Type dstType = graph.getNodeType(dstNodeId);
return isAllowed(srcType, dstType);
}
+ /**
+ * Works like {@link AllowedEdges#isAllowed(long, long)} but with node types directly instead of
+ * node ids.
+ *
+ * @see AllowedEdges#isAllowed(long, long)
+ */
public boolean isAllowed(Node.Type srcType, Node.Type dstType) {
return allowed[srcType.ordinal()][dstType.ordinal()];
}
diff --git a/api/server/src/main/java/org/softwareheritage/graph/App.java b/api/server/src/main/java/org/softwareheritage/graph/App.java
--- a/api/server/src/main/java/org/softwareheritage/graph/App.java
+++ b/api/server/src/main/java/org/softwareheritage/graph/App.java
@@ -15,6 +15,14 @@
import org.softwareheritage.graph.SwhId;
import org.softwareheritage.graph.algo.Stats;
+/**
+ * Entrypoint of the swh-graph server REST API.
+ *
+ * @author Thibault Allançon
+ * @version 1.0
+ * @since 1.0
+ */
+
public class App {
public static void main(String[] args) throws IOException {
String path = args[0];
diff --git a/api/server/src/main/java/org/softwareheritage/graph/Endpoint.java b/api/server/src/main/java/org/softwareheritage/graph/Endpoint.java
--- a/api/server/src/main/java/org/softwareheritage/graph/Endpoint.java
+++ b/api/server/src/main/java/org/softwareheritage/graph/Endpoint.java
@@ -7,15 +7,38 @@
import org.softwareheritage.graph.SwhPath;
import org.softwareheritage.graph.algo.Traversal;
+/**
+ * REST API endpoints wrapper functions.
+ *
+ * @author Thibault Allançon
+ * @version 1.0
+ * @since 1.0
+ */
+
public class Endpoint {
+ /** Graph where traversal endpoint is performed */
Graph graph;
+ /** Internal traversal API */
Traversal traversal;
+ /**
+ * Constructor.
+ *
+ * @param graph the graph used for traversal endpoint
+ * @param direction a string (either "forward" or "backward") specifying edge orientation
+ * @param edgesFmt a formatted string describing allowed edges (TODO: link API doc)
+ */
public Endpoint(Graph graph, String direction, String edgesFmt) {
this.graph = graph;
this.traversal = new Traversal(graph, direction, edgesFmt);
}
+ /**
+ * Converts a list of (internal) long node ids to a list of corresponding (external) SWH PIDs.
+ *
+ * @param nodeIds the list of long node ids
+ * @return a list of corresponding SWH PIDs
+ */
private ArrayList<SwhId> convertNodesToSwhIds(ArrayList<Long> nodeIds) {
ArrayList<SwhId> swhIds = new ArrayList<>();
for (long nodeId : nodeIds) {
@@ -24,6 +47,13 @@
return swhIds;
}
+ /**
+ * Converts a list of (internal) long node ids to the corresponding SwhPath.
+ *
+ * @param nodeIds the list of long node ids
+ * @return the corresponding SwhPath
+ * @see org.softwareheritage.graph.SwhPath
+ */
private SwhPath convertNodesToSwhPath(ArrayList<Long> nodeIds) {
SwhPath path = new SwhPath();
for (long nodeId : nodeIds) {
@@ -32,6 +62,13 @@
return path;
}
+ /**
+ * Converts a list of paths using (internal) long node ids to a list of corresponding SwhPath.
+ *
+ * @param pathsNodeId the list of paths with long node ids
+ * @return a list of corresponding SwhPath
+ * @see org.softwareheritage.graph.SwhPath
+ */
private ArrayList<SwhPath> convertPathsToSwhIds(ArrayList<ArrayList<Long>> pathsNodeId) {
ArrayList<SwhPath> paths = new ArrayList<>();
for (ArrayList<Long> path : pathsNodeId) {
@@ -40,18 +77,45 @@
return paths;
}
+ /**
+ * Leaves endpoint wrapper function (performs input/output node ids conversions).
+ *
+ * @param src source node of endpoint call specified as a SwhId
+ * @return the resulting list of SwhId from endpoint call
+ * @see org.softwareheritage.graph.SwhId
+ * @see org.softwareheritage.graph.algo.Traversal#leaves(long)
+ */
public ArrayList<SwhId> leaves(SwhId src) {
long srcNodeId = graph.getNodeId(src);
ArrayList<Long> nodeIds = traversal.leaves(srcNodeId);
return convertNodesToSwhIds(nodeIds);
}
+ /**
+ * Neighbors endpoint wrapper function (performs input/output node ids conversions).
+ *
+ * @param src source node of endpoint call specified as a SwhId
+ * @return the resulting list of SwhId from endpoint call
+ * @see org.softwareheritage.graph.SwhId
+ * @see org.softwareheritage.graph.algo.Traversal#neighbors(long)
+ */
public ArrayList<SwhId> neighbors(SwhId src) {
long srcNodeId = graph.getNodeId(src);
ArrayList<Long> nodeIds = traversal.neighbors(srcNodeId);
return convertNodesToSwhIds(nodeIds);
}
+ /**
+ * Walk endpoint wrapper function (performs input/output node ids conversions).
+ *
+ * @param src source node of endpoint call specified as a SwhId
+ * @param dstFmt destination used in endpoint call (TODO: link to API doc)
+ * @param algorithm traversal algorithm used in endpoint call (either "dfs" or "bfs")
+ * @return the resulting SwhPath from endpoint call
+ * @see org.softwareheritage.graph.SwhId
+ * @see org.softwareheritage.graph.SwhPath
+ * @see org.softwareheritage.graph.algo.Traversal#walk
+ */
public SwhPath walk(SwhId src, String dstFmt, String algorithm) {
long srcNodeId = graph.getNodeId(src);
ArrayList<Long> nodeIds = null;
@@ -71,12 +135,29 @@
return convertNodesToSwhPath(nodeIds);
}
+ /**
+ * VisitNodes endpoint wrapper function (performs input/output node ids conversions).
+ *
+ * @param src source node of endpoint call specified as a SwhId
+ * @return the resulting list of SwhId from endpoint call
+ * @see org.softwareheritage.graph.SwhId
+ * @see org.softwareheritage.graph.algo.Traversal#visitNodes(long)
+ */
public ArrayList<SwhId> visitNodes(SwhId src) {
long srcNodeId = graph.getNodeId(src);
ArrayList<Long> nodeIds = traversal.visitNodes(srcNodeId);
return convertNodesToSwhIds(nodeIds);
}
+ /**
+ * VisitPaths endpoint wrapper function (performs input/output node ids conversions).
+ *
+ * @param src source node of endpoint call specified as a SwhId
+ * @return the resulting list of SwhPath from endpoint call
+ * @see org.softwareheritage.graph.SwhId
+ * @see org.softwareheritage.graph.SwhPath
+ * @see org.softwareheritage.graph.algo.Traversal#visitPaths(long)
+ */
public ArrayList<SwhPath> visitPaths(SwhId src) {
long srcNodeId = graph.getNodeId(src);
ArrayList<ArrayList<Long>> paths = traversal.visitPaths(srcNodeId);
diff --git a/api/server/src/main/java/org/softwareheritage/graph/Graph.java b/api/server/src/main/java/org/softwareheritage/graph/Graph.java
--- a/api/server/src/main/java/org/softwareheritage/graph/Graph.java
+++ b/api/server/src/main/java/org/softwareheritage/graph/Graph.java
@@ -9,17 +9,38 @@
import org.softwareheritage.graph.backend.NodeIdMap;
import org.softwareheritage.graph.backend.NodeTypesMap;
+/**
+ * Main class storing the compressed graph and node id mappings.
+ *
+ * @author Thibault Allançon
+ * @version 1.0
+ * @since 1.0
+ */
+
public class Graph {
+ /** File extension for the SWH PID to long node id map */
public static final String PID_TO_NODE = ".pid2node.csv";
+ /** File extension for the long node id to SWH PID map */
public static final String NODE_TO_PID = ".node2pid.csv";
+ /** File extension for the long node id to node typ map */
public static final String NODE_TO_TYPE = ".node2type.map";
+ /** Compressed graph stored as a {@link it.unimi.dsi.big.webgraph.BVGraph} */
BVGraph graph;
+ /** Transposed compressed graph (used for backward traversals) */
BVGraph graphTransposed;
+ /** Full path of the graph */
String path;
+ /** Mapping for internal long id and external SWH PIDs */
NodeIdMap nodeIdMap;
+ /** Mapping for internal long id and node types */
NodeTypesMap nodeTypesMap;
+ /**
+ * Constructor.
+ *
+ * @param path path of the compressed graph to load
+ */
public Graph(String path) throws IOException {
this.graph = BVGraph.load(path);
this.graphTransposed = BVGraph.load(path + "-transposed");
@@ -28,54 +49,134 @@
this.nodeTypesMap = new NodeTypesMap(path);
}
+ /**
+ * Cleans up graph resources after use.
+ */
public void cleanUp() throws IOException {
nodeIdMap.close();
}
+ /**
+ * Returns the graph full path.
+ *
+ * @return graph full path
+ */
public String getPath() {
return path;
}
+ /**
+ * Converts node id as {@link SwhId} to long.
+ *
+ * @param swhId node specified as a {@link SwhId}
+ * @return internal long node id
+ * @see org.softwareheritage.graph.SwhId
+ */
public long getNodeId(SwhId swhId) {
return nodeIdMap.getNodeId(swhId);
}
+ /**
+ * Converts node id as long to {@link SwhId}.
+ *
+ * @param nodeId node specified as a long id
+ * @return external SWH PID
+ * @see org.softwareheritage.graph.SwhId
+ */
public SwhId getSwhId(long nodeId) {
return nodeIdMap.getSwhId(nodeId);
}
+ /**
+ * Returns node type.
+ *
+ * @param nodeId node specified as a long id
+ * @return corresponding node type
+ * @see org.softwareheritage.graph.Node.Type
+ */
public Node.Type getNodeType(long nodeId) {
return nodeTypesMap.getType(nodeId);
}
+ /**
+ * Returns number of nodes in the graph.
+ *
+ * @return number of nodes in the graph
+ */
public long getNbNodes() {
return graph.numNodes();
}
+ /**
+ * Returns number of edges in the graph.
+ *
+ * @return number of edges in the graph
+ */
public long getNbEdges() {
return graph.numArcs();
}
+ /**
+ * Returns list of successors of a node.
+ *
+ * @param nodeId node specified as a long id
+ * @return list of successors of the node, specified as a {@link LongBigArrays}
+ * @see it.unimi.dsi.fastutil.longs.LongBigArrays
+ */
public long[][] successors(long nodeId) {
return graph.successorBigArray(nodeId);
}
+ /**
+ * Returns the outdegree of a node.
+ *
+ * @param nodeId node specified as a long id
+ * @return outdegree of a node
+ */
public long outdegree(long nodeId) {
return graph.outdegree(nodeId);
}
+ /**
+ * Returns list of predecessors of a node.
+ *
+ * @param nodeId node specified as a long id
+ * @return list of predecessors of the node, specified as a {@link LongBigArrays}
+ * @see it.unimi.dsi.fastutil.longs.LongBigArrays
+ */
public long[][] predecessors(long nodeId) {
return graphTransposed.successorBigArray(nodeId);
}
+ /**
+ * Returns the indegree of a node.
+ *
+ * @param nodeId node specified as a long id
+ * @return indegree of a node
+ */
public long indegree(long nodeId) {
return graphTransposed.outdegree(nodeId);
}
+ /**
+ * Returns the degree of a node, depending on graph orientation.
+ *
+ * @param nodeId node specified as a long id
+ * @param useTransposed boolean value to use transposed graph
+ * @return degree of a node
+ */
public long degree(long nodeId, boolean useTransposed) {
return (useTransposed) ? indegree(nodeId) : outdegree(nodeId);
}
+ /**
+ * Returns the neighbors of a node, depending on graph orientation.
+ *
+ * @param nodeId node specified as a long id
+ * @param useTransposed boolean value to use transposed graph
+ * @return list of neighbors of the node, specified as a {@link LongBigArrays}
+ * @see it.unimi.dsi.fastutil.longs.LongBigArrays
+ */
public long[][] neighbors(long nodeId, boolean useTransposed) {
return (useTransposed) ? predecessors(nodeId) : successors(nodeId);
}
diff --git a/api/server/src/main/java/org/softwareheritage/graph/Neighbors.java b/api/server/src/main/java/org/softwareheritage/graph/Neighbors.java
--- a/api/server/src/main/java/org/softwareheritage/graph/Neighbors.java
+++ b/api/server/src/main/java/org/softwareheritage/graph/Neighbors.java
@@ -7,12 +7,32 @@
import org.softwareheritage.graph.AllowedEdges;
import org.softwareheritage.graph.Graph;
+/**
+ * Iterator class to go over a node neighbors in the graph.
+ *
+ * @author Thibault Allançon
+ * @version 1.0
+ * @since 1.0
+ */
+
public class Neighbors implements Iterable<Long> {
+ /** Graph used to explore neighbors */
Graph graph;
+ /** Boolean to specify the use of the transposed graph */
boolean useTransposed;
+ /** Graph edge restriction */
AllowedEdges edges;
+ /** Source node from which neighbors will be listed */
long srcNodeId;
+ /**
+ * Constructor.
+ *
+ * @param graph graph used to explore neighbors
+ * @param useTransposed boolean value to use transposed graph
+ * @param edges edges allowed to be used in the graph
+ * @param srcNodeId source node from where to list neighbors
+ */
public Neighbors(Graph graph, boolean useTransposed, AllowedEdges edges, long srcNodeId) {
this.graph = graph;
this.useTransposed = useTransposed;
@@ -25,6 +45,14 @@
return new NeighborsIterator();
}
+ /**
+ * Inner class for {@link Neighbors} iterator.
+ *
+ * @author Thibault Allançon
+ * @version 1.0
+ * @since 1.0
+ */
+
public class NeighborsIterator implements Iterator<Long> {
long nextNeighborIdx;
long nbNeighbors;
diff --git a/api/server/src/main/java/org/softwareheritage/graph/Node.java b/api/server/src/main/java/org/softwareheritage/graph/Node.java
--- a/api/server/src/main/java/org/softwareheritage/graph/Node.java
+++ b/api/server/src/main/java/org/softwareheritage/graph/Node.java
@@ -4,14 +4,38 @@
import java.util.Arrays;
import java.util.List;
+/**
+ * A node in the Software Heritage graph.
+ *
+ * @author Thibault Allançon
+ * @version 1.0
+ * @since 1.0
+ */
+
public class Node {
+ /**
+ * Software Heritage graph node types, as described in the
+ * <a href="https://docs.softwareheritage.org/devel/swh-model/data-model.html">data model</a>.
+ */
public enum Type {
+ /** Content node */
CNT,
+ /** Directory node */
DIR,
+ /** Release node */
REL,
+ /** Revision node */
REV,
+ /** Snapshot node */
SNP;
+ /**
+ * Converts integer to corresponding SWH node type.
+ *
+ * @param intType node type represented as an integer
+ * @return the corresponding {@link Node.Type} value
+ * @see org.softwareheritage.graph.Node.Type
+ */
public static Node.Type fromInt(int intType) {
switch (intType) {
case 0:
@@ -28,18 +52,32 @@
return null;
}
+ /**
+ * Converts string to corresponding SWH node type.
+ *
+ * @param strType node type represented as a string
+ * @return the corresponding {@link Node.Type} value
+ * @see org.softwareheritage.graph.Node.Type
+ */
public static Node.Type fromStr(String strType) {
return Node.Type.valueOf(strType.toUpperCase());
}
- public static ArrayList<Node.Type> parse(String strType) {
+ /**
+ * Converts formatted string (TODO: link API doc) to SWH node type possible values.
+ *
+ * @param strFmtType node types represented as a formatted string (TODO: link API doc)
+ * @return a list containing the {@link Node.Type} values
+ * @see org.softwareheritage.graph.Node.Type
+ */
+ public static ArrayList<Node.Type> parse(String strFmtType) {
ArrayList<Node.Type> types = new ArrayList<>();
- if (strType.equals("*")) {
+ if (strFmtType.equals("*")) {
List<Node.Type> nodeTypes = Arrays.asList(Node.Type.values());
types.addAll(nodeTypes);
} else {
- types.add(Node.Type.fromStr(strType));
+ types.add(Node.Type.fromStr(strFmtType));
}
return types;
diff --git a/api/server/src/main/java/org/softwareheritage/graph/SwhId.java b/api/server/src/main/java/org/softwareheritage/graph/SwhId.java
--- a/api/server/src/main/java/org/softwareheritage/graph/SwhId.java
+++ b/api/server/src/main/java/org/softwareheritage/graph/SwhId.java
@@ -4,18 +4,36 @@
import org.softwareheritage.graph.Node;
+/**
+ * A Software Heritage PID, see <a
+ * href="https://docs.softwareheritage.org/devel/swh-model/persistent-identifiers.html#persistent-identifiers">persistent
+ * identifier documentation</a>.
+ *
+ * @author Thibault Allançon
+ * @version 1.0
+ * @since 1.0
+ */
+
public class SwhId {
+ /** Fixed hash length of the PID */
public static final int HASH_LENGTH = 40;
+ /** Full PID as a string */
String swhId;
+ /** PID node type */
Node.Type type;
+ /** PID hex-encoded SHA1 hash */
String hash;
- // SWH ID format: 'swh:1:type:hash'
- // https://docs.softwareheritage.org/devel/swh-model/persistent-identifiers.html
+ /**
+ * Constructor.
+ *
+ * @param swhId full PID as a string
+ */
public SwhId(String swhId) {
this.swhId = swhId;
+ // PID format: 'swh:1:type:hash'
String[] parts = swhId.split(":");
if (parts.length != 4 || !parts[0].equals("swh") || !parts[1].equals("1")) {
throw new IllegalArgumentException("Expected SWH ID format to be 'swh:1:type:hash', got: " + swhId);
@@ -52,15 +70,31 @@
return swhId;
}
+ /**
+ * Returns full PID as a string.
+ *
+ * @return full PID string
+ */
@JsonValue
public String getSwhId() {
return swhId;
}
+ /**
+ * Returns PID node type.
+ *
+ * @return PID corresponding {@link Node.Type}
+ * @see org.softwareheritage.graph.Node.Type
+ */
public Node.Type getType() {
return type;
}
+ /**
+ * Returns PID hex-encoded SHA1 hash.
+ *
+ * @return PID string hash
+ */
public String getHash() {
return hash;
}
diff --git a/api/server/src/main/java/org/softwareheritage/graph/SwhPath.java b/api/server/src/main/java/org/softwareheritage/graph/SwhPath.java
--- a/api/server/src/main/java/org/softwareheritage/graph/SwhPath.java
+++ b/api/server/src/main/java/org/softwareheritage/graph/SwhPath.java
@@ -6,13 +6,30 @@
import org.softwareheritage.graph.SwhId;
+/**
+ * Wrapper class to store a list of {@link SwhId}.
+ *
+ * @author Thibault Allançon
+ * @version 1.0
+ * @since 1.0
+ * @see org.softwareheritage.graph.SwhId
+ */
+
public class SwhPath {
ArrayList<SwhId> path;
+ /**
+ * Constructor.
+ */
public SwhPath() {
this.path = new ArrayList<SwhId>();
}
+ /**
+ * Constructor.
+ *
+ * @param swhIds variable number of string PIDs to initialize this path with
+ */
public SwhPath(String ...swhIds) {
this();
for (String swhId : swhIds) {
@@ -20,6 +37,12 @@
}
}
+ /**
+ * Constructor.
+ *
+ * @param swhIds variable number of @{link SwhId} to initialize this path with
+ * @see org.softwareheritage.graph.SwhId
+ */
public SwhPath(SwhId ...swhIds) {
this();
for (SwhId swhId : swhIds) {
@@ -27,19 +50,43 @@
}
}
+ /**
+ * Returns this path as a list of {@link SwhId}.
+ *
+ * @return list of {@link SwhId} constituting the path
+ * @see org.softwareheritage.graph.SwhId
+ */
@JsonValue
public ArrayList<SwhId> getPath() {
return path;
}
+ /**
+ * Adds a {@link SwhId} to this path.
+ *
+ * @param {@link SwhId} to add to this path
+ * @see org.softwareheritage.graph.SwhId
+ */
public void add(SwhId swhId) {
path.add(swhId);
}
+ /**
+ * Returns the {@link SwhId} at the specified position in this path.
+ *
+ * @param index position of the {@link SwhId} to return
+ * @return {@link SwhId} at the specified position
+ * @see org.softwareheritage.graph.SwhId
+ */
public SwhId get(int index) {
return path.get(index);
}
+ /**
+ * Returns the number of elements in this path.
+ *
+ * @return number of elements in this path
+ */
public int size() {
return path.size();
}
diff --git a/api/server/src/main/java/org/softwareheritage/graph/algo/Stats.java b/api/server/src/main/java/org/softwareheritage/graph/algo/Stats.java
--- a/api/server/src/main/java/org/softwareheritage/graph/algo/Stats.java
+++ b/api/server/src/main/java/org/softwareheritage/graph/algo/Stats.java
@@ -4,6 +4,14 @@
import java.io.IOException;
import java.util.Properties;
+/**
+ * Statistics on the compressed graph.
+ *
+ * @author Thibault Allançon
+ * @version 1.0
+ * @since 1.0
+ */
+
public class Stats {
public class Counts {
public long nodes;
@@ -28,6 +36,11 @@
public Degree indegree;
public Degree outdegree;
+ /**
+ * Constructor.
+ *
+ * @param graphPath full path of compressed graph
+ */
public Stats(String graphPath) throws IOException {
Properties properties = new Properties();
properties.load(new FileInputStream(graphPath + ".properties"));
diff --git a/api/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java b/api/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java
--- a/api/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java
+++ b/api/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java
@@ -14,16 +14,40 @@
import org.softwareheritage.graph.Neighbors;
import org.softwareheritage.graph.Node;
+/**
+ * Traversal algorithms on the compressed graph.
+ *
+ * @author Thibault Allançon
+ * @version 1.0
+ * @since 1.0
+ */
+
public class Traversal {
+ /** Graph used in the traversal */
Graph graph;
+ /** Boolean to specify the use of the transposed graph */
boolean useTransposed;
+ /** Graph edge restriction */
AllowedEdges edges;
- // Big array storing if we have visited a node
+ /**
+ * Bit array storing if we have visited a node
+ * @see it.unimi.dsi.bits.LongArrayBitVector
+ */
LongArrayBitVector visited;
- // Big array storing parents to retrieve the path when backtracking
+ /**
+ * Array storing parent node id for each nodes during a traversal
+ * @see it.unimi.dsi.fastutil.longs.LongBigArrays
+ */
long[][] nodeParent;
+ /**
+ * Constructor.
+ *
+ * @param graph graph used in the traversal
+ * @param direction a string (either "forward" or "backward") specifying edge orientation
+ * @param edgesFmt a formatted string describing allowed edges (TODO: link API doc)
+ */
public Traversal(Graph graph, String direction, String edgesFmt) {
if (!direction.matches("forward|backward")) {
throw new IllegalArgumentException("Unknown traversal direction: " + direction);
@@ -38,6 +62,12 @@
this.nodeParent = LongBigArrays.newBigArray(nbNodes);
}
+ /**
+ * Returns the leaves of a subgraph rooted at the specified source node.
+ *
+ * @param srcNodeId source node
+ * @return list of node ids corresponding to the leaves
+ */
public ArrayList<Long> leaves(long srcNodeId) {
ArrayList<Long> nodeIds = new ArrayList<Long>();
Stack<Long> stack = new Stack<Long>();
@@ -66,6 +96,12 @@
return nodeIds;
}
+ /**
+ * Returns node direct neighbors (linked with exactly one edge).
+ *
+ * @param srcNodeId source node
+ * @return list of node ids corresponding to the neighbors
+ */
public ArrayList<Long> neighbors(long srcNodeId) {
ArrayList<Long> nodeIds = new ArrayList<Long>();
for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, srcNodeId)) {
@@ -74,6 +110,12 @@
return nodeIds;
}
+ /**
+ * Performs a graph traversal and returns explored nodes.
+ *
+ * @param srcNodeId source node
+ * @return list of explored node ids
+ */
public ArrayList<Long> visitNodes(long srcNodeId) {
ArrayList<Long> nodeIds = new ArrayList<Long>();
Stack<Long> stack = new Stack<Long>();
@@ -97,6 +139,12 @@
return nodeIds;
}
+ /**
+ * Performs a graph traversal and returns explored paths.
+ *
+ * @param srcNodeId source node
+ * @return list of explored paths (represented as a list of node ids)
+ */
public ArrayList<ArrayList<Long>> visitPaths(long srcNodeId) {
ArrayList<ArrayList<Long>> paths = new ArrayList<>();
Stack<Long> currentPath = new Stack<Long>();
@@ -104,6 +152,13 @@
return paths;
}
+ /**
+ * Internal recursive function of {@link #visitPaths}.
+ *
+ * @param currentNodeId current node
+ * @param paths list of currently stored paths
+ * @param currentPath current path as node ids
+ */
private void visitPathsInternal(
long currentNodeId, ArrayList<ArrayList<Long>> paths, Stack<Long> currentPath) {
currentPath.push(currentNodeId);
@@ -125,6 +180,13 @@
currentPath.pop();
}
+ /**
+ * Performs a graph traversal and returns the first found path from source to destination.
+ *
+ * @param srcNodeId source node
+ * @param dst destination (either a node or a node type)
+ * @return found path as a list of node ids
+ */
public <T> ArrayList<Long> walk(long srcNodeId, T dst, String algorithm) {
long dstNodeId = -1;
if (algorithm.equals("dfs")) {
@@ -143,6 +205,13 @@
return nodeIds;
}
+ /**
+ * Internal DFS function of {@link #walk}.
+ *
+ * @param srcNodeId source node
+ * @param dst destination (either a node or a node type)
+ * @return final destination node or -1 if no path found
+ */
private <T> long walkInternalDfs(long srcNodeId, T dst) {
Stack<Long> stack = new Stack<Long>();
this.visited.fill(false);
@@ -168,6 +237,13 @@
return -1;
}
+ /**
+ * Internal BFS function of {@link #walk}.
+ *
+ * @param srcNodeId source node
+ * @param dst destination (either a node or a node type)
+ * @return final destination node or -1 if no path found
+ */
private <T> long walkInternalBfs(long srcNodeId, T dst) {
Queue<Long> queue = new LinkedList<Long>();
this.visited.fill(false);
@@ -193,6 +269,13 @@
return -1;
}
+ /**
+ * Internal function of {@link #walk} to check if a node corresponds to the destination.
+ *
+ * @param nodeId current node
+ * @param dst destination (either a node or a node type)
+ * @return true if the node is a destination, or false otherwise
+ */
private <T> boolean isDstNode(long nodeId, T dst) {
if (dst instanceof Long) {
long dstNodeId = (Long) dst;
@@ -205,6 +288,13 @@
}
}
+ /**
+ * Internal backtracking function of {@link #walk}.
+ *
+ * @param srcNodeId source node
+ * @param dstNodeId destination node
+ * @return the found path, as a list of node ids
+ */
private ArrayList<Long> backtracking(long srcNodeId, long dstNodeId) {
ArrayList<Long> path = new ArrayList<Long>();
long currentNodeId = dstNodeId;
diff --git a/api/server/src/main/java/org/softwareheritage/graph/backend/MapFile.java b/api/server/src/main/java/org/softwareheritage/graph/backend/MapFile.java
--- a/api/server/src/main/java/org/softwareheritage/graph/backend/MapFile.java
+++ b/api/server/src/main/java/org/softwareheritage/graph/backend/MapFile.java
@@ -7,10 +7,29 @@
import it.unimi.dsi.io.ByteBufferInputStream;
+/**
+ * Wrapper class around very big mmap()-ed file.
+ *
+ * @author Thibault Allançon
+ * @version 1.0
+ * @since 1.0
+ */
+
public class MapFile {
+ /**
+ * Memory-mapped file buffer
+ * @see it.unimi.dsi.io.ByteBufferInputStream
+ */
ByteBufferInputStream bufferMap;
+ /** Fixed line length of the mmap()-ed file */
int lineLength;
+ /**
+ * Constructor.
+ *
+ * @param path file path to mmap()
+ * @param lineLength fixed length of a line in the file
+ */
public MapFile(String path, int lineLength) throws IOException {
this.bufferMap = null;
this.lineLength = lineLength;
@@ -21,6 +40,12 @@
}
}
+ /**
+ * Returns a specific line in the file.
+ *
+ * @param lineIndex line number in the file
+ * @return the line at the specified position
+ */
public String readAtLine(long lineIndex) {
byte[] buffer = new byte[lineLength];
long position = lineIndex * (long) lineLength;
@@ -30,6 +55,9 @@
return line.trim();
}
+ /**
+ * Closes the mmap()-ed file.
+ */
public void close() throws IOException {
bufferMap.close();
}
diff --git a/api/server/src/main/java/org/softwareheritage/graph/backend/NodeIdMap.java b/api/server/src/main/java/org/softwareheritage/graph/backend/NodeIdMap.java
--- a/api/server/src/main/java/org/softwareheritage/graph/backend/NodeIdMap.java
+++ b/api/server/src/main/java/org/softwareheritage/graph/backend/NodeIdMap.java
@@ -6,15 +6,35 @@
import org.softwareheritage.graph.SwhId;
import org.softwareheritage.graph.backend.MapFile;
+/**
+ * Mapping between internal long node id and external SWH PID.
+ *
+ * @author Thibault Allançon
+ * @version 1.0
+ * @since 1.0
+ */
+
public class NodeIdMap {
+ /** Fixed length of full SWH PID */
public static final int SWH_ID_LENGTH = 50;
+ /** Fixed length of long node id */
public static final int NODE_ID_LENGTH = 20;
+ /** Full graph path */
String graphPath;
+ /** Number of ids to map */
long nbIds;
+ /** mmap()-ed PID_TO_NODE file */
MapFile swhToNodeMap;
+ /** mmap()-ed NODE_TO_PID file */
MapFile nodeToSwhMap;
+ /**
+ * Constructor.
+ *
+ * @param graphPath full graph path
+ * @param nbNodes number of nodes in the graph
+ */
public NodeIdMap(String graphPath, long nbNodes) throws IOException {
this.graphPath = graphPath;
this.nbIds = nbNodes;
@@ -26,10 +46,16 @@
this.nodeToSwhMap = new MapFile(graphPath + Graph.NODE_TO_PID, nodeToSwhLineLength);
}
- // SWH id (string) -> WebGraph node id (long)
- // Each line in PID_TO_NODE is formatted as: swhId nodeId
- // The file is sorted by swhId, hence we can binary search on swhId to get corresponding nodeId
+ /**
+ * Converts SWH PID to corresponding long node id.
+ *
+ * @param swhId node represented as a {@link SwhId}
+ * @return corresponding node as a long id
+ * @see org.softwareheritage.graph.SwhId
+ */
public long getNodeId(SwhId swhId) {
+ // Each line in PID_TO_NODE is formatted as: swhId nodeId
+ // The file is sorted by swhId, hence we can binary search on swhId to get corresponding nodeId
long start = 0;
long end = nbIds - 1;
@@ -56,11 +82,17 @@
throw new IllegalArgumentException("Unknown SWH id: " + swhId);
}
- // WebGraph node id (long) -> SWH id (string)
- // Each line in NODE_TO_PID is formatted as: swhId
- // The file is ordered by nodeId, meaning node0's swhId is at line 0, hence we can read the
- // nodeId-th line to get corresponding swhId
+ /**
+ * Converts a node long id to corresponding SWH PID.
+ *
+ * @param nodeId node as a long id
+ * @return corresponding node as a {@link SwhId}
+ * @see org.softwareheritage.graph.SwhId
+ */
public SwhId getSwhId(long nodeId) {
+ // Each line in NODE_TO_PID is formatted as: swhId
+ // The file is ordered by nodeId, meaning node0's swhId is at line 0, hence we can read the
+ // nodeId-th line to get corresponding swhId
if (nodeId < 0 || nodeId >= nbIds) {
throw new IllegalArgumentException("Node id " + nodeId + " should be between 0 and " + nbIds);
}
@@ -69,6 +101,9 @@
return new SwhId(swhId);
}
+ /**
+ * Closes the mapping files.
+ */
public void close() throws IOException {
swhToNodeMap.close();
nodeToSwhMap.close();
diff --git a/api/server/src/main/java/org/softwareheritage/graph/backend/NodeTypesMap.java b/api/server/src/main/java/org/softwareheritage/graph/backend/NodeTypesMap.java
--- a/api/server/src/main/java/org/softwareheritage/graph/backend/NodeTypesMap.java
+++ b/api/server/src/main/java/org/softwareheritage/graph/backend/NodeTypesMap.java
@@ -8,9 +8,27 @@
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.Node;
+/**
+ * Mapping between long node id and SWH node type as described in the <a
+ * href="https://docs.softwareheritage.org/devel/swh-model/data-model.html">data model</a>.
+ *
+ * @author Thibault Allançon
+ * @version 1.0
+ * @since 1.0
+ */
+
public class NodeTypesMap {
+ /**
+ * Array storing for each node its type
+ * @see it.unimi.dsi.fastutil.longs.LongBigList
+ */
LongBigList nodeTypesMap;
+ /**
+ * Constructor.
+ *
+ * @param graphPath full path of the compressed graph
+ */
public NodeTypesMap(String graphPath) throws IOException {
try {
nodeTypesMap = (LongBigList) BinIO.loadObject(graphPath + Graph.NODE_TO_TYPE);
@@ -19,6 +37,13 @@
}
}
+ /**
+ * Returns node type from a node long id.
+ *
+ * @param nodeId node as a long id
+ * @return corresponding {@link Node.Type} value
+ * @see org.softwareheritage.graph.Node.Type
+ */
public Node.Type getType(long nodeId) {
long type = nodeTypesMap.getLong(nodeId);
return Node.Type.fromInt((int) type);
diff --git a/api/server/src/main/java/org/softwareheritage/graph/backend/Setup.java b/api/server/src/main/java/org/softwareheritage/graph/backend/Setup.java
--- a/api/server/src/main/java/org/softwareheritage/graph/backend/Setup.java
+++ b/api/server/src/main/java/org/softwareheritage/graph/backend/Setup.java
@@ -24,6 +24,14 @@
import org.softwareheritage.graph.SwhId;
import org.softwareheritage.graph.backend.NodeTypesMap;
+/**
+ * Pre-processing steps (such as dumping mapping files on disk) before running the graph service.
+ *
+ * @author Thibault Allançon
+ * @version 1.0
+ * @since 1.0
+ */
+
public class Setup {
public static void main(String[] args) throws IOException {
if (args.length != 2) {
@@ -42,6 +50,12 @@
System.out.println("Done in: " + duration + " seconds");
}
+ /**
+ * Computes and dumps on disk mapping files.
+ *
+ * @param nodesPath path of the compressed csv nodes file
+ * @param graphPath path of the compressed graph
+ */
// Suppress warning for Object2LongFunction cast
@SuppressWarnings("unchecked")
static void precomputeNodeIdMap(String nodesPath, String graphPath) throws IOException {
diff --git a/api/server/src/main/java/org/softwareheritage/graph/benchmark/LinuxLog.java b/api/server/src/main/java/org/softwareheritage/graph/benchmark/LinuxLog.java
--- a/api/server/src/main/java/org/softwareheritage/graph/benchmark/LinuxLog.java
+++ b/api/server/src/main/java/org/softwareheritage/graph/benchmark/LinuxLog.java
@@ -6,6 +6,14 @@
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.SwhId;
+/**
+ * Linux git log experiment to benchmark graph traversal.
+ *
+ * @author Thibault Allançon
+ * @version 1.0
+ * @since 1.0
+ */
+
public class LinuxLog {
public static void main(String[] args) throws IOException {
String path = args[0];

File Metadata

Mime Type
text/plain
Expires
Wed, Dec 18, 11:34 AM (9 h, 47 m ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3219109

Event Timeline