Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7123285
D1741.id5851.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
36 KB
Subscribers
None
D1741.id5851.diff
View Options
diff --git a/api/server/pom.xml b/api/server/pom.xml
--- a/api/server/pom.xml
+++ b/api/server/pom.xml
@@ -129,4 +129,13 @@
</plugins>
</pluginManagement>
</build>
+ <reporting>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-javadoc-plugin</artifactId>
+ <version>3.1.1</version>
+ </plugin>
+ </plugins>
+ </reporting>
</project>
diff --git a/api/server/src/main/java/org/softwareheritage/graph/AllowedEdges.java b/api/server/src/main/java/org/softwareheritage/graph/AllowedEdges.java
--- a/api/server/src/main/java/org/softwareheritage/graph/AllowedEdges.java
+++ b/api/server/src/main/java/org/softwareheritage/graph/AllowedEdges.java
@@ -5,11 +5,29 @@
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.Node;
+/**
+ * Edge restriction based on node types, used when visiting the graph.
+ *
+ * @author Thibault Allançon
+ * @version 1.0
+ * @since 1.0
+ */
+
public class AllowedEdges {
+ /** Graph on which edge restriction is performed */
Graph graph;
- // First dimension is source node type, second dimension is destination node type
+ /**
+ * 2D boolean matrix storing access rights for all combination of src/dst node types (first
+ * dimension is source, second dimension is destination)
+ */
boolean[][] allowed;
+ /**
+ * Constructor.
+ *
+ * @param graph the graph on which to perform edge restriction
+ * @param edgesFmt a formatted string describing allowed edges (TODO: link API doc)
+ */
public AllowedEdges(Graph graph, String edgesFmt) {
this.graph = graph;
@@ -29,6 +47,7 @@
}
// Format: "src1:dst1,src2:dst2,[...]"
+ // TODO: link API doc
String[] edgeTypes = edgesFmt.split(",");
for (String edgeType : edgeTypes) {
String[] nodeTypes = edgeType.split(":");
@@ -46,12 +65,25 @@
}
}
+ /**
+ * Checks if allowed to use the edge from the source node to the destination node.
+ *
+ * @param srcNodeId edge source node
+ * @param dstNodeId edge destination node
+ * @return true if allowed and false otherwise
+ */
public boolean isAllowed(long srcNodeId, long dstNodeId) {
Node.Type srcType = graph.getNodeType(srcNodeId);
Node.Type dstType = graph.getNodeType(dstNodeId);
return isAllowed(srcType, dstType);
}
+ /**
+ * Works like {@link AllowedEdges#isAllowed(long, long)} but with node types directly instead of
+ * node ids.
+ *
+ * @see AllowedEdges#isAllowed(long, long)
+ */
public boolean isAllowed(Node.Type srcType, Node.Type dstType) {
return allowed[srcType.ordinal()][dstType.ordinal()];
}
diff --git a/api/server/src/main/java/org/softwareheritage/graph/App.java b/api/server/src/main/java/org/softwareheritage/graph/App.java
--- a/api/server/src/main/java/org/softwareheritage/graph/App.java
+++ b/api/server/src/main/java/org/softwareheritage/graph/App.java
@@ -15,6 +15,14 @@
import org.softwareheritage.graph.SwhId;
import org.softwareheritage.graph.algo.Stats;
+/**
+ * Entrypoint of the swh-graph server REST API.
+ *
+ * @author Thibault Allançon
+ * @version 1.0
+ * @since 1.0
+ */
+
public class App {
public static void main(String[] args) throws IOException {
String path = args[0];
diff --git a/api/server/src/main/java/org/softwareheritage/graph/Endpoint.java b/api/server/src/main/java/org/softwareheritage/graph/Endpoint.java
--- a/api/server/src/main/java/org/softwareheritage/graph/Endpoint.java
+++ b/api/server/src/main/java/org/softwareheritage/graph/Endpoint.java
@@ -7,15 +7,38 @@
import org.softwareheritage.graph.SwhPath;
import org.softwareheritage.graph.algo.Traversal;
+/**
+ * REST API endpoints wrapper functions.
+ *
+ * @author Thibault Allançon
+ * @version 1.0
+ * @since 1.0
+ */
+
public class Endpoint {
+ /** Graph where traversal endpoint is performed */
Graph graph;
+ /** Internal traversal API */
Traversal traversal;
+ /**
+ * Constructor.
+ *
+ * @param graph the graph used for traversal endpoint
+ * @param direction a string (either "forward" or "backward") specifying edge orientation
+ * @param edgesFmt a formatted string describing allowed edges (TODO: link API doc)
+ */
public Endpoint(Graph graph, String direction, String edgesFmt) {
this.graph = graph;
this.traversal = new Traversal(graph, direction, edgesFmt);
}
+ /**
+ * Converts a list of (internal) long node ids to a list of corresponding (external) SWH PIDs.
+ *
+ * @param nodeIds the list of long node ids
+ * @return a list of corresponding SWH PIDs
+ */
private ArrayList<SwhId> convertNodesToSwhIds(ArrayList<Long> nodeIds) {
ArrayList<SwhId> swhIds = new ArrayList<>();
for (long nodeId : nodeIds) {
@@ -24,6 +47,13 @@
return swhIds;
}
+ /**
+ * Converts a list of (internal) long node ids to the corresponding SwhPath.
+ *
+ * @param nodeIds the list of long node ids
+ * @return the corresponding SwhPath
+ * @see org.softwareheritage.graph.SwhPath
+ */
private SwhPath convertNodesToSwhPath(ArrayList<Long> nodeIds) {
SwhPath path = new SwhPath();
for (long nodeId : nodeIds) {
@@ -32,6 +62,13 @@
return path;
}
+ /**
+ * Converts a list of paths using (internal) long node ids to a list of corresponding SwhPath.
+ *
+ * @param pathsNodeId the list of paths with long node ids
+ * @return a list of corresponding SwhPath
+ * @see org.softwareheritage.graph.SwhPath
+ */
private ArrayList<SwhPath> convertPathsToSwhIds(ArrayList<ArrayList<Long>> pathsNodeId) {
ArrayList<SwhPath> paths = new ArrayList<>();
for (ArrayList<Long> path : pathsNodeId) {
@@ -40,18 +77,45 @@
return paths;
}
+ /**
+ * Leaves endpoint wrapper function (performs input/output node ids conversions).
+ *
+ * @param src source node of endpoint call specified as a SwhId
+ * @return the resulting list of SwhId from endpoint call
+ * @see org.softwareheritage.graph.SwhId
+ * @see org.softwareheritage.graph.algo.Traversal#leaves(long)
+ */
public ArrayList<SwhId> leaves(SwhId src) {
long srcNodeId = graph.getNodeId(src);
ArrayList<Long> nodeIds = traversal.leaves(srcNodeId);
return convertNodesToSwhIds(nodeIds);
}
+ /**
+ * Neighbors endpoint wrapper function (performs input/output node ids conversions).
+ *
+ * @param src source node of endpoint call specified as a SwhId
+ * @return the resulting list of SwhId from endpoint call
+ * @see org.softwareheritage.graph.SwhId
+ * @see org.softwareheritage.graph.algo.Traversal#neighbors(long)
+ */
public ArrayList<SwhId> neighbors(SwhId src) {
long srcNodeId = graph.getNodeId(src);
ArrayList<Long> nodeIds = traversal.neighbors(srcNodeId);
return convertNodesToSwhIds(nodeIds);
}
+ /**
+ * Walk endpoint wrapper function (performs input/output node ids conversions).
+ *
+ * @param src source node of endpoint call specified as a SwhId
+ * @param dstFmt destination used in endpoint call (TODO: link to API doc)
+ * @param algorithm traversal algorithm used in endpoint call (either "dfs" or "bfs")
+ * @return the resulting SwhPath from endpoint call
+ * @see org.softwareheritage.graph.SwhId
+ * @see org.softwareheritage.graph.SwhPath
+ * @see org.softwareheritage.graph.algo.Traversal#walk
+ */
public SwhPath walk(SwhId src, String dstFmt, String algorithm) {
long srcNodeId = graph.getNodeId(src);
ArrayList<Long> nodeIds = null;
@@ -71,12 +135,29 @@
return convertNodesToSwhPath(nodeIds);
}
+ /**
+ * VisitNodes endpoint wrapper function (performs input/output node ids conversions).
+ *
+ * @param src source node of endpoint call specified as a SwhId
+ * @return the resulting list of SwhId from endpoint call
+ * @see org.softwareheritage.graph.SwhId
+ * @see org.softwareheritage.graph.algo.Traversal#visitNodes(long)
+ */
public ArrayList<SwhId> visitNodes(SwhId src) {
long srcNodeId = graph.getNodeId(src);
ArrayList<Long> nodeIds = traversal.visitNodes(srcNodeId);
return convertNodesToSwhIds(nodeIds);
}
+ /**
+ * VisitPaths endpoint wrapper function (performs input/output node ids conversions).
+ *
+ * @param src source node of endpoint call specified as a SwhId
+ * @return the resulting list of SwhPath from endpoint call
+ * @see org.softwareheritage.graph.SwhId
+ * @see org.softwareheritage.graph.SwhPath
+ * @see org.softwareheritage.graph.algo.Traversal#visitPaths(long)
+ */
public ArrayList<SwhPath> visitPaths(SwhId src) {
long srcNodeId = graph.getNodeId(src);
ArrayList<ArrayList<Long>> paths = traversal.visitPaths(srcNodeId);
diff --git a/api/server/src/main/java/org/softwareheritage/graph/Graph.java b/api/server/src/main/java/org/softwareheritage/graph/Graph.java
--- a/api/server/src/main/java/org/softwareheritage/graph/Graph.java
+++ b/api/server/src/main/java/org/softwareheritage/graph/Graph.java
@@ -9,17 +9,38 @@
import org.softwareheritage.graph.backend.NodeIdMap;
import org.softwareheritage.graph.backend.NodeTypesMap;
+/**
+ * Main class storing the compressed graph and node id mappings.
+ *
+ * @author Thibault Allançon
+ * @version 1.0
+ * @since 1.0
+ */
+
public class Graph {
+ /** File extension for the SWH PID to long node id map */
public static final String PID_TO_NODE = ".pid2node.csv";
+ /** File extension for the long node id to SWH PID map */
public static final String NODE_TO_PID = ".node2pid.csv";
+ /** File extension for the long node id to node typ map */
public static final String NODE_TO_TYPE = ".node2type.map";
+ /** Compressed graph stored as a {@link it.unimi.dsi.big.webgraph.BVGraph} */
BVGraph graph;
+ /** Transposed compressed graph (used for backward traversals) */
BVGraph graphTransposed;
+ /** Full path of the graph */
String path;
+ /** Mapping for internal long id and external SWH PIDs */
NodeIdMap nodeIdMap;
+ /** Mapping for internal long id and node types */
NodeTypesMap nodeTypesMap;
+ /**
+ * Constructor.
+ *
+ * @param path path of the compressed graph to load
+ */
public Graph(String path) throws IOException {
this.graph = BVGraph.load(path);
this.graphTransposed = BVGraph.load(path + "-transposed");
@@ -28,54 +49,134 @@
this.nodeTypesMap = new NodeTypesMap(path);
}
+ /**
+ * Cleans up graph resources after use.
+ */
public void cleanUp() throws IOException {
nodeIdMap.close();
}
+ /**
+ * Returns the graph full path.
+ *
+ * @return graph full path
+ */
public String getPath() {
return path;
}
+ /**
+ * Converts node id as {@link SwhId} to long.
+ *
+ * @param swhId node specified as a {@link SwhId}
+ * @return internal long node id
+ * @see org.softwareheritage.graph.SwhId
+ */
public long getNodeId(SwhId swhId) {
return nodeIdMap.getNodeId(swhId);
}
+ /**
+ * Converts node id as long to {@link SwhId}.
+ *
+ * @param nodeId node specified as a long id
+ * @return external SWH PID
+ * @see org.softwareheritage.graph.SwhId
+ */
public SwhId getSwhId(long nodeId) {
return nodeIdMap.getSwhId(nodeId);
}
+ /**
+ * Returns node type.
+ *
+ * @param nodeId node specified as a long id
+ * @return corresponding node type
+ * @see org.softwareheritage.graph.Node.Type
+ */
public Node.Type getNodeType(long nodeId) {
return nodeTypesMap.getType(nodeId);
}
+ /**
+ * Returns number of nodes in the graph.
+ *
+ * @return number of nodes in the graph
+ */
public long getNbNodes() {
return graph.numNodes();
}
+ /**
+ * Returns number of edges in the graph.
+ *
+ * @return number of edges in the graph
+ */
public long getNbEdges() {
return graph.numArcs();
}
+ /**
+ * Returns list of successors of a node.
+ *
+ * @param nodeId node specified as a long id
+ * @return list of successors of the node, specified as a {@link LongBigArrays}
+ * @see it.unimi.dsi.fastutil.longs.LongBigArrays
+ */
public long[][] successors(long nodeId) {
return graph.successorBigArray(nodeId);
}
+ /**
+ * Returns the outdegree of a node.
+ *
+ * @param nodeId node specified as a long id
+ * @return outdegree of a node
+ */
public long outdegree(long nodeId) {
return graph.outdegree(nodeId);
}
+ /**
+ * Returns list of predecessors of a node.
+ *
+ * @param nodeId node specified as a long id
+ * @return list of predecessors of the node, specified as a {@link LongBigArrays}
+ * @see it.unimi.dsi.fastutil.longs.LongBigArrays
+ */
public long[][] predecessors(long nodeId) {
return graphTransposed.successorBigArray(nodeId);
}
+ /**
+ * Returns the indegree of a node.
+ *
+ * @param nodeId node specified as a long id
+ * @return indegree of a node
+ */
public long indegree(long nodeId) {
return graphTransposed.outdegree(nodeId);
}
+ /**
+ * Returns the degree of a node, depending on graph orientation.
+ *
+ * @param nodeId node specified as a long id
+ * @param useTransposed boolean value to use transposed graph
+ * @return degree of a node
+ */
public long degree(long nodeId, boolean useTransposed) {
return (useTransposed) ? indegree(nodeId) : outdegree(nodeId);
}
+ /**
+ * Returns the neighbors of a node, depending on graph orientation.
+ *
+ * @param nodeId node specified as a long id
+ * @param useTransposed boolean value to use transposed graph
+ * @return list of neighbors of the node, specified as a {@link LongBigArrays}
+ * @see it.unimi.dsi.fastutil.longs.LongBigArrays
+ */
public long[][] neighbors(long nodeId, boolean useTransposed) {
return (useTransposed) ? predecessors(nodeId) : successors(nodeId);
}
diff --git a/api/server/src/main/java/org/softwareheritage/graph/Neighbors.java b/api/server/src/main/java/org/softwareheritage/graph/Neighbors.java
--- a/api/server/src/main/java/org/softwareheritage/graph/Neighbors.java
+++ b/api/server/src/main/java/org/softwareheritage/graph/Neighbors.java
@@ -7,12 +7,32 @@
import org.softwareheritage.graph.AllowedEdges;
import org.softwareheritage.graph.Graph;
+/**
+ * Iterator class to go over a node neighbors in the graph.
+ *
+ * @author Thibault Allançon
+ * @version 1.0
+ * @since 1.0
+ */
+
public class Neighbors implements Iterable<Long> {
+ /** Graph used to explore neighbors */
Graph graph;
+ /** Boolean to specify the use of the transposed graph */
boolean useTransposed;
+ /** Graph edge restriction */
AllowedEdges edges;
+ /** Source node from which neighbors will be listed */
long srcNodeId;
+ /**
+ * Constructor.
+ *
+ * @param graph graph used to explore neighbors
+ * @param useTransposed boolean value to use transposed graph
+ * @param edges edges allowed to be used in the graph
+ * @param srcNodeId source node from where to list neighbors
+ */
public Neighbors(Graph graph, boolean useTransposed, AllowedEdges edges, long srcNodeId) {
this.graph = graph;
this.useTransposed = useTransposed;
@@ -25,6 +45,14 @@
return new NeighborsIterator();
}
+ /**
+ * Inner class for {@link Neighbors} iterator.
+ *
+ * @author Thibault Allançon
+ * @version 1.0
+ * @since 1.0
+ */
+
public class NeighborsIterator implements Iterator<Long> {
long nextNeighborIdx;
long nbNeighbors;
diff --git a/api/server/src/main/java/org/softwareheritage/graph/Node.java b/api/server/src/main/java/org/softwareheritage/graph/Node.java
--- a/api/server/src/main/java/org/softwareheritage/graph/Node.java
+++ b/api/server/src/main/java/org/softwareheritage/graph/Node.java
@@ -4,14 +4,38 @@
import java.util.Arrays;
import java.util.List;
+/**
+ * A node in the Software Heritage graph.
+ *
+ * @author Thibault Allançon
+ * @version 1.0
+ * @since 1.0
+ */
+
public class Node {
+ /**
+ * Software Heritage graph node types, as described in the
+ * <a href="https://docs.softwareheritage.org/devel/swh-model/data-model.html">data model</a>.
+ */
public enum Type {
+ /** Content node */
CNT,
+ /** Directory node */
DIR,
+ /** Release node */
REL,
+ /** Revision node */
REV,
+ /** Snapshot node */
SNP;
+ /**
+ * Converts integer to corresponding SWH node type.
+ *
+ * @param intType node type represented as an integer
+ * @return the corresponding {@link Node.Type} value
+ * @see org.softwareheritage.graph.Node.Type
+ */
public static Node.Type fromInt(int intType) {
switch (intType) {
case 0:
@@ -28,18 +52,32 @@
return null;
}
+ /**
+ * Converts string to corresponding SWH node type.
+ *
+ * @param strType node type represented as a string
+ * @return the corresponding {@link Node.Type} value
+ * @see org.softwareheritage.graph.Node.Type
+ */
public static Node.Type fromStr(String strType) {
return Node.Type.valueOf(strType.toUpperCase());
}
- public static ArrayList<Node.Type> parse(String strType) {
+ /**
+ * Converts formatted string (TODO: link API doc) to SWH node type possible values.
+ *
+ * @param strFmtType node types represented as a formatted string (TODO: link API doc)
+ * @return a list containing the {@link Node.Type} values
+ * @see org.softwareheritage.graph.Node.Type
+ */
+ public static ArrayList<Node.Type> parse(String strFmtType) {
ArrayList<Node.Type> types = new ArrayList<>();
- if (strType.equals("*")) {
+ if (strFmtType.equals("*")) {
List<Node.Type> nodeTypes = Arrays.asList(Node.Type.values());
types.addAll(nodeTypes);
} else {
- types.add(Node.Type.fromStr(strType));
+ types.add(Node.Type.fromStr(strFmtType));
}
return types;
diff --git a/api/server/src/main/java/org/softwareheritage/graph/SwhId.java b/api/server/src/main/java/org/softwareheritage/graph/SwhId.java
--- a/api/server/src/main/java/org/softwareheritage/graph/SwhId.java
+++ b/api/server/src/main/java/org/softwareheritage/graph/SwhId.java
@@ -4,18 +4,36 @@
import org.softwareheritage.graph.Node;
+/**
+ * A Software Heritage PID, see <a
+ * href="https://docs.softwareheritage.org/devel/swh-model/persistent-identifiers.html#persistent-identifiers">persistent
+ * identifier documentation</a>.
+ *
+ * @author Thibault Allançon
+ * @version 1.0
+ * @since 1.0
+ */
+
public class SwhId {
+ /** Fixed hash length of the PID */
public static final int HASH_LENGTH = 40;
+ /** Full PID as a string */
String swhId;
+ /** PID node type */
Node.Type type;
+ /** PID hex-encoded SHA1 hash */
String hash;
- // SWH ID format: 'swh:1:type:hash'
- // https://docs.softwareheritage.org/devel/swh-model/persistent-identifiers.html
+ /**
+ * Constructor.
+ *
+ * @param swhId full PID as a string
+ */
public SwhId(String swhId) {
this.swhId = swhId;
+ // PID format: 'swh:1:type:hash'
String[] parts = swhId.split(":");
if (parts.length != 4 || !parts[0].equals("swh") || !parts[1].equals("1")) {
throw new IllegalArgumentException("Expected SWH ID format to be 'swh:1:type:hash', got: " + swhId);
@@ -52,15 +70,31 @@
return swhId;
}
+ /**
+ * Returns full PID as a string.
+ *
+ * @return full PID string
+ */
@JsonValue
public String getSwhId() {
return swhId;
}
+ /**
+ * Returns PID node type.
+ *
+ * @return PID corresponding {@link Node.Type}
+ * @see org.softwareheritage.graph.Node.Type
+ */
public Node.Type getType() {
return type;
}
+ /**
+ * Returns PID hex-encoded SHA1 hash.
+ *
+ * @return PID string hash
+ */
public String getHash() {
return hash;
}
diff --git a/api/server/src/main/java/org/softwareheritage/graph/SwhPath.java b/api/server/src/main/java/org/softwareheritage/graph/SwhPath.java
--- a/api/server/src/main/java/org/softwareheritage/graph/SwhPath.java
+++ b/api/server/src/main/java/org/softwareheritage/graph/SwhPath.java
@@ -6,13 +6,30 @@
import org.softwareheritage.graph.SwhId;
+/**
+ * Wrapper class to store a list of {@link SwhId}.
+ *
+ * @author Thibault Allançon
+ * @version 1.0
+ * @since 1.0
+ * @see org.softwareheritage.graph.SwhId
+ */
+
public class SwhPath {
ArrayList<SwhId> path;
+ /**
+ * Constructor.
+ */
public SwhPath() {
this.path = new ArrayList<SwhId>();
}
+ /**
+ * Constructor.
+ *
+ * @param swhIds variable number of string PIDs to initialize this path with
+ */
public SwhPath(String ...swhIds) {
this();
for (String swhId : swhIds) {
@@ -20,6 +37,12 @@
}
}
+ /**
+ * Constructor.
+ *
+ * @param swhIds variable number of @{link SwhId} to initialize this path with
+ * @see org.softwareheritage.graph.SwhId
+ */
public SwhPath(SwhId ...swhIds) {
this();
for (SwhId swhId : swhIds) {
@@ -27,19 +50,43 @@
}
}
+ /**
+ * Returns this path as a list of {@link SwhId}.
+ *
+ * @return list of {@link SwhId} constituting the path
+ * @see org.softwareheritage.graph.SwhId
+ */
@JsonValue
public ArrayList<SwhId> getPath() {
return path;
}
+ /**
+ * Adds a {@link SwhId} to this path.
+ *
+ * @param {@link SwhId} to add to this path
+ * @see org.softwareheritage.graph.SwhId
+ */
public void add(SwhId swhId) {
path.add(swhId);
}
+ /**
+ * Returns the {@link SwhId} at the specified position in this path.
+ *
+ * @param index position of the {@link SwhId} to return
+ * @return {@link SwhId} at the specified position
+ * @see org.softwareheritage.graph.SwhId
+ */
public SwhId get(int index) {
return path.get(index);
}
+ /**
+ * Returns the number of elements in this path.
+ *
+ * @return number of elements in this path
+ */
public int size() {
return path.size();
}
diff --git a/api/server/src/main/java/org/softwareheritage/graph/algo/Stats.java b/api/server/src/main/java/org/softwareheritage/graph/algo/Stats.java
--- a/api/server/src/main/java/org/softwareheritage/graph/algo/Stats.java
+++ b/api/server/src/main/java/org/softwareheritage/graph/algo/Stats.java
@@ -4,6 +4,14 @@
import java.io.IOException;
import java.util.Properties;
+/**
+ * Statistics on the compressed graph.
+ *
+ * @author Thibault Allançon
+ * @version 1.0
+ * @since 1.0
+ */
+
public class Stats {
public class Counts {
public long nodes;
@@ -28,6 +36,11 @@
public Degree indegree;
public Degree outdegree;
+ /**
+ * Constructor.
+ *
+ * @param graphPath full path of compressed graph
+ */
public Stats(String graphPath) throws IOException {
Properties properties = new Properties();
properties.load(new FileInputStream(graphPath + ".properties"));
diff --git a/api/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java b/api/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java
--- a/api/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java
+++ b/api/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java
@@ -14,16 +14,40 @@
import org.softwareheritage.graph.Neighbors;
import org.softwareheritage.graph.Node;
+/**
+ * Traversal algorithms on the compressed graph.
+ *
+ * @author Thibault Allançon
+ * @version 1.0
+ * @since 1.0
+ */
+
public class Traversal {
+ /** Graph used in the traversal */
Graph graph;
+ /** Boolean to specify the use of the transposed graph */
boolean useTransposed;
+ /** Graph edge restriction */
AllowedEdges edges;
- // Big array storing if we have visited a node
+ /**
+ * Bit array storing if we have visited a node
+ * @see it.unimi.dsi.bits.LongArrayBitVector
+ */
LongArrayBitVector visited;
- // Big array storing parents to retrieve the path when backtracking
+ /**
+ * Array storing parent node id for each nodes during a traversal
+ * @see it.unimi.dsi.fastutil.longs.LongBigArrays
+ */
long[][] nodeParent;
+ /**
+ * Constructor.
+ *
+ * @param graph graph used in the traversal
+ * @param direction a string (either "forward" or "backward") specifying edge orientation
+ * @param edgesFmt a formatted string describing allowed edges (TODO: link API doc)
+ */
public Traversal(Graph graph, String direction, String edgesFmt) {
if (!direction.matches("forward|backward")) {
throw new IllegalArgumentException("Unknown traversal direction: " + direction);
@@ -38,6 +62,12 @@
this.nodeParent = LongBigArrays.newBigArray(nbNodes);
}
+ /**
+ * Returns the leaves of a subgraph rooted at the specified source node.
+ *
+ * @param srcNodeId source node
+ * @return list of node ids corresponding to the leaves
+ */
public ArrayList<Long> leaves(long srcNodeId) {
ArrayList<Long> nodeIds = new ArrayList<Long>();
Stack<Long> stack = new Stack<Long>();
@@ -66,6 +96,12 @@
return nodeIds;
}
+ /**
+ * Returns node direct neighbors (linked with exactly one edge).
+ *
+ * @param srcNodeId source node
+ * @return list of node ids corresponding to the neighbors
+ */
public ArrayList<Long> neighbors(long srcNodeId) {
ArrayList<Long> nodeIds = new ArrayList<Long>();
for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, srcNodeId)) {
@@ -74,6 +110,12 @@
return nodeIds;
}
+ /**
+ * Performs a graph traversal and returns explored nodes.
+ *
+ * @param srcNodeId source node
+ * @return list of explored node ids
+ */
public ArrayList<Long> visitNodes(long srcNodeId) {
ArrayList<Long> nodeIds = new ArrayList<Long>();
Stack<Long> stack = new Stack<Long>();
@@ -97,6 +139,12 @@
return nodeIds;
}
+ /**
+ * Performs a graph traversal and returns explored paths.
+ *
+ * @param srcNodeId source node
+ * @return list of explored paths (represented as a list of node ids)
+ */
public ArrayList<ArrayList<Long>> visitPaths(long srcNodeId) {
ArrayList<ArrayList<Long>> paths = new ArrayList<>();
Stack<Long> currentPath = new Stack<Long>();
@@ -104,6 +152,13 @@
return paths;
}
+ /**
+ * Internal recursive function of {@link #visitPaths}.
+ *
+ * @param currentNodeId current node
+ * @param paths list of currently stored paths
+ * @param currentPath current path as node ids
+ */
private void visitPathsInternal(
long currentNodeId, ArrayList<ArrayList<Long>> paths, Stack<Long> currentPath) {
currentPath.push(currentNodeId);
@@ -125,6 +180,13 @@
currentPath.pop();
}
+ /**
+ * Performs a graph traversal and returns the first found path from source to destination.
+ *
+ * @param srcNodeId source node
+ * @param dst destination (either a node or a node type)
+ * @return found path as a list of node ids
+ */
public <T> ArrayList<Long> walk(long srcNodeId, T dst, String algorithm) {
long dstNodeId = -1;
if (algorithm.equals("dfs")) {
@@ -143,6 +205,13 @@
return nodeIds;
}
+ /**
+ * Internal DFS function of {@link #walk}.
+ *
+ * @param srcNodeId source node
+ * @param dst destination (either a node or a node type)
+ * @return final destination node or -1 if no path found
+ */
private <T> long walkInternalDfs(long srcNodeId, T dst) {
Stack<Long> stack = new Stack<Long>();
this.visited.fill(false);
@@ -168,6 +237,13 @@
return -1;
}
+ /**
+ * Internal BFS function of {@link #walk}.
+ *
+ * @param srcNodeId source node
+ * @param dst destination (either a node or a node type)
+ * @return final destination node or -1 if no path found
+ */
private <T> long walkInternalBfs(long srcNodeId, T dst) {
Queue<Long> queue = new LinkedList<Long>();
this.visited.fill(false);
@@ -193,6 +269,13 @@
return -1;
}
+ /**
+ * Internal function of {@link #walk} to check if a node corresponds to the destination.
+ *
+ * @param nodeId current node
+ * @param dst destination (either a node or a node type)
+ * @return true if the node is a destination, or false otherwise
+ */
private <T> boolean isDstNode(long nodeId, T dst) {
if (dst instanceof Long) {
long dstNodeId = (Long) dst;
@@ -205,6 +288,13 @@
}
}
+ /**
+ * Internal backtracking function of {@link #walk}.
+ *
+ * @param srcNodeId source node
+ * @param dstNodeId destination node
+ * @return the found path, as a list of node ids
+ */
private ArrayList<Long> backtracking(long srcNodeId, long dstNodeId) {
ArrayList<Long> path = new ArrayList<Long>();
long currentNodeId = dstNodeId;
diff --git a/api/server/src/main/java/org/softwareheritage/graph/backend/MapFile.java b/api/server/src/main/java/org/softwareheritage/graph/backend/MapFile.java
--- a/api/server/src/main/java/org/softwareheritage/graph/backend/MapFile.java
+++ b/api/server/src/main/java/org/softwareheritage/graph/backend/MapFile.java
@@ -7,10 +7,29 @@
import it.unimi.dsi.io.ByteBufferInputStream;
+/**
+ * Wrapper class around very big mmap()-ed file.
+ *
+ * @author Thibault Allançon
+ * @version 1.0
+ * @since 1.0
+ */
+
public class MapFile {
+ /**
+ * Memory-mapped file buffer
+ * @see it.unimi.dsi.io.ByteBufferInputStream
+ */
ByteBufferInputStream bufferMap;
+ /** Fixed line length of the mmap()-ed file */
int lineLength;
+ /**
+ * Constructor.
+ *
+ * @param path file path to mmap()
+ * @param lineLength fixed length of a line in the file
+ */
public MapFile(String path, int lineLength) throws IOException {
this.bufferMap = null;
this.lineLength = lineLength;
@@ -21,6 +40,12 @@
}
}
+ /**
+ * Returns a specific line in the file.
+ *
+ * @param lineIndex line number in the file
+ * @return the line at the specified position
+ */
public String readAtLine(long lineIndex) {
byte[] buffer = new byte[lineLength];
long position = lineIndex * (long) lineLength;
@@ -30,6 +55,9 @@
return line.trim();
}
+ /**
+ * Closes the mmap()-ed file.
+ */
public void close() throws IOException {
bufferMap.close();
}
diff --git a/api/server/src/main/java/org/softwareheritage/graph/backend/NodeIdMap.java b/api/server/src/main/java/org/softwareheritage/graph/backend/NodeIdMap.java
--- a/api/server/src/main/java/org/softwareheritage/graph/backend/NodeIdMap.java
+++ b/api/server/src/main/java/org/softwareheritage/graph/backend/NodeIdMap.java
@@ -6,15 +6,35 @@
import org.softwareheritage.graph.SwhId;
import org.softwareheritage.graph.backend.MapFile;
+/**
+ * Mapping between internal long node id and external SWH PID.
+ *
+ * @author Thibault Allançon
+ * @version 1.0
+ * @since 1.0
+ */
+
public class NodeIdMap {
+ /** Fixed length of full SWH PID */
public static final int SWH_ID_LENGTH = 50;
+ /** Fixed length of long node id */
public static final int NODE_ID_LENGTH = 20;
+ /** Full graph path */
String graphPath;
+ /** Number of ids to map */
long nbIds;
+ /** mmap()-ed PID_TO_NODE file */
MapFile swhToNodeMap;
+ /** mmap()-ed NODE_TO_PID file */
MapFile nodeToSwhMap;
+ /**
+ * Constructor.
+ *
+ * @param graphPath full graph path
+ * @param nbNodes number of nodes in the graph
+ */
public NodeIdMap(String graphPath, long nbNodes) throws IOException {
this.graphPath = graphPath;
this.nbIds = nbNodes;
@@ -26,10 +46,16 @@
this.nodeToSwhMap = new MapFile(graphPath + Graph.NODE_TO_PID, nodeToSwhLineLength);
}
- // SWH id (string) -> WebGraph node id (long)
- // Each line in PID_TO_NODE is formatted as: swhId nodeId
- // The file is sorted by swhId, hence we can binary search on swhId to get corresponding nodeId
+ /**
+ * Converts SWH PID to corresponding long node id.
+ *
+ * @param swhId node represented as a {@link SwhId}
+ * @return corresponding node as a long id
+ * @see org.softwareheritage.graph.SwhId
+ */
public long getNodeId(SwhId swhId) {
+ // Each line in PID_TO_NODE is formatted as: swhId nodeId
+ // The file is sorted by swhId, hence we can binary search on swhId to get corresponding nodeId
long start = 0;
long end = nbIds - 1;
@@ -56,11 +82,17 @@
throw new IllegalArgumentException("Unknown SWH id: " + swhId);
}
- // WebGraph node id (long) -> SWH id (string)
- // Each line in NODE_TO_PID is formatted as: swhId
- // The file is ordered by nodeId, meaning node0's swhId is at line 0, hence we can read the
- // nodeId-th line to get corresponding swhId
+ /**
+ * Converts a node long id to corresponding SWH PID.
+ *
+ * @param nodeId node as a long id
+ * @return corresponding node as a {@link SwhId}
+ * @see org.softwareheritage.graph.SwhId
+ */
public SwhId getSwhId(long nodeId) {
+ // Each line in NODE_TO_PID is formatted as: swhId
+ // The file is ordered by nodeId, meaning node0's swhId is at line 0, hence we can read the
+ // nodeId-th line to get corresponding swhId
if (nodeId < 0 || nodeId >= nbIds) {
throw new IllegalArgumentException("Node id " + nodeId + " should be between 0 and " + nbIds);
}
@@ -69,6 +101,9 @@
return new SwhId(swhId);
}
+ /**
+ * Closes the mapping files.
+ */
public void close() throws IOException {
swhToNodeMap.close();
nodeToSwhMap.close();
diff --git a/api/server/src/main/java/org/softwareheritage/graph/backend/NodeTypesMap.java b/api/server/src/main/java/org/softwareheritage/graph/backend/NodeTypesMap.java
--- a/api/server/src/main/java/org/softwareheritage/graph/backend/NodeTypesMap.java
+++ b/api/server/src/main/java/org/softwareheritage/graph/backend/NodeTypesMap.java
@@ -8,9 +8,27 @@
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.Node;
+/**
+ * Mapping between long node id and SWH node type as described in the <a
+ * href="https://docs.softwareheritage.org/devel/swh-model/data-model.html">data model</a>.
+ *
+ * @author Thibault Allançon
+ * @version 1.0
+ * @since 1.0
+ */
+
public class NodeTypesMap {
+ /**
+ * Array storing for each node its type
+ * @see it.unimi.dsi.fastutil.longs.LongBigList
+ */
LongBigList nodeTypesMap;
+ /**
+ * Constructor.
+ *
+ * @param graphPath full path of the compressed graph
+ */
public NodeTypesMap(String graphPath) throws IOException {
try {
nodeTypesMap = (LongBigList) BinIO.loadObject(graphPath + Graph.NODE_TO_TYPE);
@@ -19,6 +37,13 @@
}
}
+ /**
+ * Returns node type from a node long id.
+ *
+ * @param nodeId node as a long id
+ * @return corresponding {@link Node.Type} value
+ * @see org.softwareheritage.graph.Node.Type
+ */
public Node.Type getType(long nodeId) {
long type = nodeTypesMap.getLong(nodeId);
return Node.Type.fromInt((int) type);
diff --git a/api/server/src/main/java/org/softwareheritage/graph/backend/Setup.java b/api/server/src/main/java/org/softwareheritage/graph/backend/Setup.java
--- a/api/server/src/main/java/org/softwareheritage/graph/backend/Setup.java
+++ b/api/server/src/main/java/org/softwareheritage/graph/backend/Setup.java
@@ -24,6 +24,14 @@
import org.softwareheritage.graph.SwhId;
import org.softwareheritage.graph.backend.NodeTypesMap;
+/**
+ * Pre-processing steps (such as dumping mapping files on disk) before running the graph service.
+ *
+ * @author Thibault Allançon
+ * @version 1.0
+ * @since 1.0
+ */
+
public class Setup {
public static void main(String[] args) throws IOException {
if (args.length != 2) {
@@ -42,6 +50,12 @@
System.out.println("Done in: " + duration + " seconds");
}
+ /**
+ * Computes and dumps on disk mapping files.
+ *
+ * @param nodesPath path of the compressed csv nodes file
+ * @param graphPath path of the compressed graph
+ */
// Suppress warning for Object2LongFunction cast
@SuppressWarnings("unchecked")
static void precomputeNodeIdMap(String nodesPath, String graphPath) throws IOException {
diff --git a/api/server/src/main/java/org/softwareheritage/graph/benchmark/LinuxLog.java b/api/server/src/main/java/org/softwareheritage/graph/benchmark/LinuxLog.java
--- a/api/server/src/main/java/org/softwareheritage/graph/benchmark/LinuxLog.java
+++ b/api/server/src/main/java/org/softwareheritage/graph/benchmark/LinuxLog.java
@@ -6,6 +6,14 @@
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.SwhId;
+/**
+ * Linux git log experiment to benchmark graph traversal.
+ *
+ * @author Thibault Allançon
+ * @version 1.0
+ * @since 1.0
+ */
+
public class LinuxLog {
public static void main(String[] args) throws IOException {
String path = args[0];
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Wed, Dec 18, 11:34 AM (9 h, 47 m ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3219109
Attached To
D1741: Add javadoc documentation in graph service
Event Timeline
Log In to Comment