();
- for (Long node : nodeIds) {
- if (this.typeIsAllowed(g.getNodeType(node))) {
- filteredNodes.add(node);
- }
- }
- return filteredNodes;
- }
-}
diff --git a/java/src/main/java/org/softwareheritage/graph/Traversal.java b/java/src/main/java/org/softwareheritage/graph/Traversal.java
index 71c4407..2ce9e4a 100644
--- a/java/src/main/java/org/softwareheritage/graph/Traversal.java
+++ b/java/src/main/java/org/softwareheritage/graph/Traversal.java
@@ -1,623 +1,614 @@
package org.softwareheritage.graph;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.Map;
import java.util.Queue;
import java.util.Random;
import java.util.Stack;
import java.util.function.Consumer;
import java.util.function.LongConsumer;
import org.softwareheritage.graph.server.Endpoint;
import it.unimi.dsi.big.webgraph.LazyLongIterator;
/**
* Traversal algorithms on the compressed graph.
*
* Internal implementation of the traversal API endpoints. These methods only input/output internal
* long ids, which are converted in the {@link Endpoint} higher-level class to {@link SWHID}.
*
* @author The Software Heritage developers
* @see Endpoint
*/
public class Traversal {
/** Graph used in the traversal */
SwhBidirectionalGraph graph;
- /** Graph edge restrictions */
- AllowedEdges edges;
+ /** Type filter on the returned nodes */
+ AllowedNodes nodesFilter;
+ /** Restrictions on which edges can be traversed */
+ AllowedEdges edgesRestrictions;
/** Hash set storing if we have visited a node */
HashSet visited;
/** Hash map storing parent node id for each nodes during a traversal */
Map parentNode;
/** Number of edges accessed during traversal */
long nbEdgesAccessed;
/** The anti Dos limit of edges traversed while a visit */
long maxEdges;
- /** The string represent the set of type restriction */
- NodesFiltering ndsfilter;
/** random number generator, for random walks */
Random rng;
/**
* Constructor.
*
* @param graph graph used in the traversal
* @param direction a string (either "forward" or "backward") specifying edge orientation
* @param edgesFmt a formatted string describing allowed
* edges
*/
public Traversal(SwhBidirectionalGraph graph, String direction, String edgesFmt) {
this(graph, direction, edgesFmt, 0);
}
public Traversal(SwhBidirectionalGraph graph, String direction, String edgesFmt, long maxEdges) {
this(graph, direction, edgesFmt, maxEdges, "*");
}
public Traversal(SwhBidirectionalGraph graph, String direction, String edgesFmt, long maxEdges,
String returnTypes) {
if (!direction.matches("forward|backward")) {
throw new IllegalArgumentException("Unknown traversal direction: " + direction);
}
if (direction.equals("backward")) {
this.graph = graph.transpose();
} else {
this.graph = graph;
}
- this.edges = new AllowedEdges(edgesFmt);
+ this.nodesFilter = new AllowedNodes(returnTypes);
+ this.edgesRestrictions = new AllowedEdges(edgesFmt);
this.visited = new HashSet<>();
this.parentNode = new HashMap<>();
this.nbEdgesAccessed = 0;
this.maxEdges = maxEdges;
this.rng = new Random();
-
- if (returnTypes.equals("*")) {
- this.ndsfilter = new NodesFiltering();
- } else {
- this.ndsfilter = new NodesFiltering(returnTypes);
- }
}
/**
* Returns number of accessed edges during traversal.
*
* @return number of edges accessed in last traversal
*/
public long getNbEdgesAccessed() {
return nbEdgesAccessed;
}
/**
* Returns number of accessed nodes during traversal.
*
* @return number of nodes accessed in last traversal
*/
public long getNbNodesAccessed() {
return this.visited.size();
}
/**
* Returns lazy iterator of successors of a node while following a specific set of edge types.
*
* @param g input graph
* @param nodeId node specified as a long id
* @param allowedEdges the specification of which edges can be traversed
* @return lazy iterator of successors of the node, specified as a
* WebGraph LazyLongIterator
*/
public static LazyLongIterator filterSuccessors(SwhBidirectionalGraph g, long nodeId, AllowedEdges allowedEdges) {
if (allowedEdges.restrictedTo == null) {
// All edges are allowed, bypass edge check
return g.successors(nodeId);
} else {
LazyLongIterator allSuccessors = g.successors(nodeId);
return new LazyLongIterator() {
@Override
public long nextLong() {
long neighbor;
while ((neighbor = allSuccessors.nextLong()) != -1) {
if (allowedEdges.isAllowed(g.getNodeType(nodeId), g.getNodeType(neighbor))) {
return neighbor;
}
}
return -1;
}
@Override
public long skip(final long n) {
long i;
for (i = 0; i < n && nextLong() != -1; i++)
;
return i;
}
};
}
}
private LazyLongIterator filterSuccessors(long nodeId, AllowedEdges allowedEdges) {
return filterSuccessors(graph, nodeId, allowedEdges);
}
/**
* Push version of {@link #leaves} will fire passed callback for each leaf.
*/
public void leavesVisitor(long srcNodeId, NodeIdConsumer cb) {
Stack stack = new Stack<>();
this.nbEdgesAccessed = 0;
stack.push(srcNodeId);
visited.add(srcNodeId);
while (!stack.isEmpty()) {
long currentNodeId = stack.pop();
long neighborsCnt = 0;
nbEdgesAccessed += graph.outdegree(currentNodeId);
if (this.maxEdges > 0) {
if (nbEdgesAccessed >= this.maxEdges) {
break;
}
}
- LazyLongIterator it = filterSuccessors(currentNodeId, edges);
+ LazyLongIterator it = filterSuccessors(currentNodeId, edgesRestrictions);
for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) {
neighborsCnt++;
if (!visited.contains(neighborNodeId)) {
stack.push(neighborNodeId);
visited.add(neighborNodeId);
}
}
if (neighborsCnt == 0) {
- cb.accept(currentNodeId);
+ if (nodesFilter.isAllowed(graph.getNodeType(currentNodeId))) {
+ cb.accept(currentNodeId);
+ }
}
}
}
/**
* Returns the leaves of a subgraph rooted at the specified source node.
*
* @param srcNodeId source node
* @return list of node ids corresponding to the leaves
*/
public ArrayList leaves(long srcNodeId) {
ArrayList nodeIds = new ArrayList();
leavesVisitor(srcNodeId, nodeIds::add);
- if (ndsfilter.restricted) {
- return ndsfilter.filterByNodeTypes(nodeIds, graph);
- }
return nodeIds;
}
/**
* Push version of {@link #neighbors}: will fire passed callback on each neighbor.
*/
public void neighborsVisitor(long srcNodeId, NodeIdConsumer cb) {
this.nbEdgesAccessed = graph.outdegree(srcNodeId);
if (this.maxEdges > 0) {
if (nbEdgesAccessed >= this.maxEdges) {
return;
}
}
- LazyLongIterator it = filterSuccessors(srcNodeId, edges);
+ LazyLongIterator it = filterSuccessors(srcNodeId, edgesRestrictions);
for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) {
- cb.accept(neighborNodeId);
+ if (nodesFilter.isAllowed(graph.getNodeType(neighborNodeId))) {
+ cb.accept(neighborNodeId);
+ }
}
}
/**
* Returns node direct neighbors (linked with exactly one edge).
*
* @param srcNodeId source node
* @return list of node ids corresponding to the neighbors
*/
public ArrayList neighbors(long srcNodeId) {
ArrayList nodeIds = new ArrayList<>();
neighborsVisitor(srcNodeId, nodeIds::add);
- if (ndsfilter.restricted) {
- return ndsfilter.filterByNodeTypes(nodeIds, graph);
- }
return nodeIds;
}
/**
* Push version of {@link #visitNodes}: will fire passed callback on each visited node.
*/
public void visitNodesVisitor(long srcNodeId, NodeIdConsumer nodeCb, EdgeIdConsumer edgeCb) {
Stack stack = new Stack<>();
this.nbEdgesAccessed = 0;
stack.push(srcNodeId);
visited.add(srcNodeId);
while (!stack.isEmpty()) {
long currentNodeId = stack.pop();
if (nodeCb != null) {
- nodeCb.accept(currentNodeId);
+ if (nodesFilter.isAllowed(graph.getNodeType(currentNodeId))) {
+ nodeCb.accept(currentNodeId);
+ }
}
nbEdgesAccessed += graph.outdegree(currentNodeId);
if (this.maxEdges > 0) {
if (nbEdgesAccessed >= this.maxEdges) {
break;
}
}
- LazyLongIterator it = filterSuccessors(currentNodeId, edges);
+ LazyLongIterator it = filterSuccessors(currentNodeId, edgesRestrictions);
for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) {
if (edgeCb != null) {
- edgeCb.accept(currentNodeId, neighborNodeId);
+ if (nodesFilter.isAllowed(graph.getNodeType(currentNodeId))) {
+ edgeCb.accept(currentNodeId, neighborNodeId);
+ }
}
if (!visited.contains(neighborNodeId)) {
stack.push(neighborNodeId);
visited.add(neighborNodeId);
}
}
}
}
/** One-argument version to handle callbacks properly */
public void visitNodesVisitor(long srcNodeId, NodeIdConsumer cb) {
visitNodesVisitor(srcNodeId, cb, null);
}
/**
* Performs a graph traversal and returns explored nodes.
*
* @param srcNodeId source node
* @return list of explored node ids
*/
public ArrayList visitNodes(long srcNodeId) {
ArrayList nodeIds = new ArrayList<>();
visitNodesVisitor(srcNodeId, nodeIds::add);
- if (ndsfilter.restricted) {
- return ndsfilter.filterByNodeTypes(nodeIds, graph);
- }
return nodeIds;
}
/**
* Push version of {@link #visitPaths}: will fire passed callback on each discovered (complete)
* path.
*/
public void visitPathsVisitor(long srcNodeId, PathConsumer cb) {
Stack currentPath = new Stack<>();
this.nbEdgesAccessed = 0;
visitPathsInternalVisitor(srcNodeId, currentPath, cb);
}
/**
* Performs a graph traversal and returns explored paths.
*
* @param srcNodeId source node
* @return list of explored paths (represented as a list of node ids)
*/
public ArrayList> visitPaths(long srcNodeId) {
ArrayList> paths = new ArrayList<>();
visitPathsVisitor(srcNodeId, paths::add);
return paths;
}
private void visitPathsInternalVisitor(long currentNodeId, Stack currentPath, PathConsumer cb) {
currentPath.push(currentNodeId);
long visitedNeighbors = 0;
nbEdgesAccessed += graph.outdegree(currentNodeId);
if (this.maxEdges > 0) {
if (nbEdgesAccessed >= this.maxEdges) {
currentPath.pop();
return;
}
}
- LazyLongIterator it = filterSuccessors(currentNodeId, edges);
+ LazyLongIterator it = filterSuccessors(currentNodeId, edgesRestrictions);
for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) {
visitPathsInternalVisitor(neighborNodeId, currentPath, cb);
visitedNeighbors++;
}
if (visitedNeighbors == 0) {
ArrayList path = new ArrayList<>(currentPath);
cb.accept(path);
}
currentPath.pop();
}
/**
* Performs a graph traversal with backtracking, and returns the first found path from source to
* destination.
*
* @param srcNodeId source node
* @param dst destination (either a node or a node type)
* @return found path as a list of node ids
*/
public ArrayList walk(long srcNodeId, T dst, String visitOrder) {
long dstNodeId;
if (visitOrder.equals("dfs")) {
dstNodeId = walkInternalDFS(srcNodeId, dst);
} else if (visitOrder.equals("bfs")) {
dstNodeId = walkInternalBFS(srcNodeId, dst);
} else {
throw new IllegalArgumentException("Unknown visit order: " + visitOrder);
}
if (dstNodeId == -1) {
throw new IllegalArgumentException("Cannot find destination: " + dst);
}
return backtracking(srcNodeId, dstNodeId);
}
/**
* Performs a random walk (picking a random successor at each step) from source to destination.
*
* @param srcNodeId source node
* @param dst destination (either a node or a node type)
* @return found path as a list of node ids or an empty path to indicate that no suitable path have
* been found
*/
public ArrayList randomWalk(long srcNodeId, T dst) {
return randomWalk(srcNodeId, dst, 0);
}
/**
* Performs a stubborn random walk (picking a random successor at each step) from source to
* destination. The walk is "stubborn" in the sense that it will not give up the first time if a
* satisfying target node is found, but it will retry up to a limited amount of times.
*
* @param srcNodeId source node
* @param dst destination (either a node or a node type)
* @param retries number of times to retry; 0 means no retries (single walk)
* @return found path as a list of node ids or an empty path to indicate that no suitable path have
* been found
*/
public ArrayList randomWalk(long srcNodeId, T dst, int retries) {
long curNodeId = srcNodeId;
ArrayList path = new ArrayList<>();
this.nbEdgesAccessed = 0;
boolean found;
if (retries < 0) {
throw new IllegalArgumentException("Negative number of retries given: " + retries);
}
while (true) {
path.add(curNodeId);
- LazyLongIterator successors = filterSuccessors(curNodeId, edges);
+ LazyLongIterator successors = filterSuccessors(curNodeId, edgesRestrictions);
curNodeId = randomPick(successors);
if (curNodeId < 0) {
found = false;
break;
}
if (isDstNode(curNodeId, dst)) {
path.add(curNodeId);
found = true;
break;
}
}
if (found) {
- if (ndsfilter.restricted) {
- return ndsfilter.filterByNodeTypes(path, graph);
- }
return path;
} else if (retries > 0) { // try again
return randomWalk(srcNodeId, dst, retries - 1);
} else { // not found and no retries left
path.clear();
return path;
}
}
/**
* Randomly choose an element from an iterator over Longs using reservoir sampling
*
* @param elements iterator over selection domain
* @return randomly chosen element or -1 if no suitable element was found
*/
private long randomPick(LazyLongIterator elements) {
long curPick = -1;
long seenCandidates = 0;
for (long element; (element = elements.nextLong()) != -1;) {
seenCandidates++;
if (Math.round(rng.nextFloat() * (seenCandidates - 1)) == 0) {
curPick = element;
}
}
return curPick;
}
/**
* Internal DFS function of {@link #walk}.
*
* @param srcNodeId source node
* @param dst destination (either a node or a node type)
* @return final destination node or -1 if no path found
*/
private long walkInternalDFS(long srcNodeId, T dst) {
Stack stack = new Stack<>();
this.nbEdgesAccessed = 0;
stack.push(srcNodeId);
visited.add(srcNodeId);
while (!stack.isEmpty()) {
long currentNodeId = stack.pop();
if (isDstNode(currentNodeId, dst)) {
return currentNodeId;
}
nbEdgesAccessed += graph.outdegree(currentNodeId);
- LazyLongIterator it = filterSuccessors(currentNodeId, edges);
+ LazyLongIterator it = filterSuccessors(currentNodeId, edgesRestrictions);
for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) {
if (!visited.contains(neighborNodeId)) {
stack.push(neighborNodeId);
visited.add(neighborNodeId);
parentNode.put(neighborNodeId, currentNodeId);
}
}
}
return -1;
}
/**
* Internal BFS function of {@link #walk}.
*
* @param srcNodeId source node
* @param dst destination (either a node or a node type)
* @return final destination node or -1 if no path found
*/
private long walkInternalBFS(long srcNodeId, T dst) {
Queue queue = new LinkedList<>();
this.nbEdgesAccessed = 0;
queue.add(srcNodeId);
visited.add(srcNodeId);
while (!queue.isEmpty()) {
long currentNodeId = queue.poll();
if (isDstNode(currentNodeId, dst)) {
return currentNodeId;
}
nbEdgesAccessed += graph.outdegree(currentNodeId);
- LazyLongIterator it = filterSuccessors(currentNodeId, edges);
+ LazyLongIterator it = filterSuccessors(currentNodeId, edgesRestrictions);
for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) {
if (!visited.contains(neighborNodeId)) {
queue.add(neighborNodeId);
visited.add(neighborNodeId);
parentNode.put(neighborNodeId, currentNodeId);
}
}
}
return -1;
}
/**
* Internal function of {@link #walk} to check if a node corresponds to the destination.
*
* @param nodeId current node
* @param dst destination (either a node or a node type)
* @return true if the node is a destination, or false otherwise
*/
private boolean isDstNode(long nodeId, T dst) {
if (dst instanceof Long) {
long dstNodeId = (Long) dst;
return nodeId == dstNodeId;
} else if (dst instanceof Node.Type) {
Node.Type dstType = (Node.Type) dst;
return graph.getNodeType(nodeId) == dstType;
} else {
return false;
}
}
/**
* Internal backtracking function of {@link #walk}.
*
* @param srcNodeId source node
* @param dstNodeId destination node
* @return the found path, as a list of node ids
*/
private ArrayList backtracking(long srcNodeId, long dstNodeId) {
ArrayList path = new ArrayList<>();
long currentNodeId = dstNodeId;
while (currentNodeId != srcNodeId) {
path.add(currentNodeId);
currentNodeId = parentNode.get(currentNodeId);
}
path.add(srcNodeId);
Collections.reverse(path);
return path;
}
/**
* Find a common descendant between two given nodes using two parallel BFS
*
* @param lhsNode the first node
* @param rhsNode the second node
* @return the found path, as a list of node ids
*/
public Long findCommonDescendant(long lhsNode, long rhsNode) {
Queue lhsStack = new ArrayDeque<>();
Queue rhsStack = new ArrayDeque<>();
HashSet lhsVisited = new HashSet<>();
HashSet rhsVisited = new HashSet<>();
lhsStack.add(lhsNode);
rhsStack.add(rhsNode);
lhsVisited.add(lhsNode);
rhsVisited.add(rhsNode);
this.nbEdgesAccessed = 0;
Long curNode;
while (!lhsStack.isEmpty() || !rhsStack.isEmpty()) {
if (!lhsStack.isEmpty()) {
curNode = lhsStack.poll();
nbEdgesAccessed += graph.outdegree(curNode);
- LazyLongIterator it = filterSuccessors(curNode, edges);
+ LazyLongIterator it = filterSuccessors(curNode, edgesRestrictions);
for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) {
if (!lhsVisited.contains(neighborNodeId)) {
if (rhsVisited.contains(neighborNodeId))
return neighborNodeId;
lhsStack.add(neighborNodeId);
lhsVisited.add(neighborNodeId);
}
}
}
if (!rhsStack.isEmpty()) {
curNode = rhsStack.poll();
nbEdgesAccessed += graph.outdegree(curNode);
- LazyLongIterator it = filterSuccessors(curNode, edges);
+ LazyLongIterator it = filterSuccessors(curNode, edgesRestrictions);
for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) {
if (!rhsVisited.contains(neighborNodeId)) {
if (lhsVisited.contains(neighborNodeId))
return neighborNodeId;
rhsStack.add(neighborNodeId);
rhsVisited.add(neighborNodeId);
}
}
}
}
return null;
}
public interface NodeIdConsumer extends LongConsumer {
/**
* Callback for incrementally receiving node identifiers during a graph visit.
*/
void accept(long nodeId);
}
public interface EdgeIdConsumer {
/**
* Callback for incrementally receiving edge identifiers during a graph visit.
*/
void accept(long srcId, long dstId);
}
public interface PathConsumer extends Consumer> {
/**
* Callback for incrementally receiving node paths (made of node identifiers) during a graph visit.
*/
void accept(ArrayList path);
}
}
diff --git a/java/src/test/java/org/softwareheritage/graph/AllowedNodesTest.java b/java/src/test/java/org/softwareheritage/graph/AllowedNodesTest.java
new file mode 100644
index 0000000..ca6479f
--- /dev/null
+++ b/java/src/test/java/org/softwareheritage/graph/AllowedNodesTest.java
@@ -0,0 +1,53 @@
+package org.softwareheritage.graph;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import java.util.Set;
+
+public class AllowedNodesTest extends GraphTest {
+ void assertNodeRestriction(AllowedNodes nodes, Set expectedAllowed) {
+ Node.Type[] nodeTypes = Node.Type.values();
+ for (Node.Type t : nodeTypes) {
+ boolean isAllowed = nodes.isAllowed(t);
+ boolean isExpected = expectedAllowed.contains(t);
+ Assertions.assertEquals(isAllowed, isExpected, "Node type: " + t);
+ }
+ }
+
+ @Test
+ public void dirCntNodes() {
+ AllowedNodes edges = new AllowedNodes("dir,cnt");
+ Set expected = Set.of(Node.Type.DIR, Node.Type.CNT);
+ assertNodeRestriction(edges, expected);
+ }
+
+ @Test
+ public void revDirNodes() {
+ AllowedNodes edges = new AllowedNodes("rev,dir");
+ Set expected = Set.of(Node.Type.DIR, Node.Type.REV);
+ assertNodeRestriction(edges, expected);
+ }
+
+ @Test
+ public void relSnpCntNodes() {
+ AllowedNodes edges = new AllowedNodes("rel,snp,cnt");
+ Set expected = Set.of(Node.Type.REL, Node.Type.SNP, Node.Type.CNT);
+ assertNodeRestriction(edges, expected);
+ }
+
+ @Test
+ public void allNodes() {
+ AllowedNodes edges = new AllowedNodes("*");
+ Set expected = Set.of(Node.Type.REL, Node.Type.SNP, Node.Type.CNT, Node.Type.DIR, Node.Type.REV,
+ Node.Type.ORI);
+ assertNodeRestriction(edges, expected);
+ }
+
+ @Test
+ public void noNodes() {
+ AllowedNodes edges = new AllowedNodes("");
+ Set expected = Set.of();
+ assertNodeRestriction(edges, expected);
+ }
+}