diff --git a/api/server/src/main/java/org/softwareheritage/graph/Graph.java b/api/server/src/main/java/org/softwareheritage/graph/Graph.java --- a/api/server/src/main/java/org/softwareheritage/graph/Graph.java +++ b/api/server/src/main/java/org/softwareheritage/graph/Graph.java @@ -3,7 +3,6 @@ import java.io.IOException; import it.unimi.dsi.big.webgraph.BVGraph; -import it.unimi.dsi.big.webgraph.LazyLongIterator; import org.softwareheritage.graph.SwhId; import org.softwareheritage.graph.backend.NodeIdMap; @@ -45,16 +44,16 @@ return graph.numArcs(); } - public LazyLongIterator successors(long nodeId) { - return graph.successors(nodeId); + public long[][] successors(long nodeId) { + return graph.successorBigArray(nodeId); } public long outdegree(long nodeId) { return graph.outdegree(nodeId); } - public LazyLongIterator predecessors(long nodeId) { - return graphTransposed.successors(nodeId); + public long[][] predecessors(long nodeId) { + return graphTransposed.successorBigArray(nodeId); } public long indegree(long nodeId) { @@ -65,7 +64,7 @@ return (useTransposed) ? indegree(nodeId) : outdegree(nodeId); } - public LazyLongIterator neighbors(long nodeId, boolean useTransposed) { + public long[][] neighbors(long nodeId, boolean useTransposed) { return (useTransposed) ? predecessors(nodeId) : successors(nodeId); } } diff --git a/api/server/src/main/java/org/softwareheritage/graph/Neighbors.java b/api/server/src/main/java/org/softwareheritage/graph/Neighbors.java new file mode 100644 --- /dev/null +++ b/api/server/src/main/java/org/softwareheritage/graph/Neighbors.java @@ -0,0 +1,60 @@ +package org.softwareheritage.graph; + +import java.util.Iterator; + +import it.unimi.dsi.fastutil.longs.LongBigArrays; + +import org.softwareheritage.graph.AllowedEdges; +import org.softwareheritage.graph.Graph; +import org.softwareheritage.graph.SwhId; + +public class Neighbors implements Iterable { + Graph graph; + boolean useTransposed; + AllowedEdges edges; + SwhId src; + + public Neighbors(Graph graph, boolean useTransposed, AllowedEdges edges, SwhId src) { + this.graph = graph; + this.useTransposed = useTransposed; + this.edges = edges; + this.src = src; + } + + @Override + public Iterator iterator() { + return new NeighborsIterator(); + } + + public class NeighborsIterator implements Iterator { + long nextNeighborIdx; + long nbNeighbors; + long[][] neighbors; + + public NeighborsIterator() { + long srcNodeId = graph.getNodeId(src); + + this.nextNeighborIdx = -1; + this.nbNeighbors = graph.degree(srcNodeId, useTransposed); + this.neighbors = graph.neighbors(srcNodeId, useTransposed); + } + + // Look ahead because with edge restriction not all neighbors are considered + public boolean hasNext() { + for (long lookAheadIdx = nextNeighborIdx + 1; lookAheadIdx < nbNeighbors; lookAheadIdx++) { + long nextNodeId = LongBigArrays.get(neighbors, lookAheadIdx); + SwhId nextSwhId = graph.getSwhId(nextNodeId); + if (edges.isAllowed(src.getType(), nextSwhId.getType())) { + nextNeighborIdx = lookAheadIdx; + return true; + } + } + return false; + } + + public Long next() { + long nextNodeId = LongBigArrays.get(neighbors, nextNeighborIdx); + return nextNodeId; + } + } +} diff --git a/api/server/src/main/java/org/softwareheritage/graph/algo/Leaves.java b/api/server/src/main/java/org/softwareheritage/graph/algo/Leaves.java --- a/api/server/src/main/java/org/softwareheritage/graph/algo/Leaves.java +++ b/api/server/src/main/java/org/softwareheritage/graph/algo/Leaves.java @@ -2,12 +2,11 @@ import java.util.ArrayList; -import it.unimi.dsi.big.webgraph.LazyLongIterator; import it.unimi.dsi.bits.LongArrayBitVector; import org.softwareheritage.graph.AllowedEdges; import org.softwareheritage.graph.Graph; -import org.softwareheritage.graph.Node; +import org.softwareheritage.graph.Neighbors; import org.softwareheritage.graph.SwhId; public class Leaves { @@ -41,19 +40,11 @@ visited.set(currentNodeId); SwhId currentSwhId = graph.getSwhId(currentNodeId); - long degree = graph.degree(currentNodeId, useTransposed); - LazyLongIterator neighbors = graph.neighbors(currentNodeId, useTransposed); long neighborsCnt = 0; - - while (degree-- > 0) { - long neighborNodeId = neighbors.nextLong(); - Node.Type currentNodeType = currentSwhId.getType(); - Node.Type neighborNodeType = graph.getSwhId(neighborNodeId).getType(); - if (edges.isAllowed(currentNodeType, neighborNodeType)) { - neighborsCnt++; - if (!visited.getBoolean(neighborNodeId)) { - dfs(neighborNodeId); - } + for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentSwhId)) { + neighborsCnt++; + if (!visited.getBoolean(neighborNodeId)) { + dfs(neighborNodeId); } } diff --git a/api/server/src/main/java/org/softwareheritage/graph/algo/Neighbors.java b/api/server/src/main/java/org/softwareheritage/graph/algo/Neighbors.java --- a/api/server/src/main/java/org/softwareheritage/graph/algo/Neighbors.java +++ b/api/server/src/main/java/org/softwareheritage/graph/algo/Neighbors.java @@ -2,8 +2,6 @@ import java.util.ArrayList; -import it.unimi.dsi.big.webgraph.LazyLongIterator; - import org.softwareheritage.graph.AllowedEdges; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.SwhId; @@ -33,17 +31,9 @@ } private void iterateNeighbors(SwhId swhId) { - long nodeId = graph.getNodeId(swhId); - long degree = graph.degree(nodeId, useTransposed); - LazyLongIterator neighborsNodeIds = graph.neighbors(nodeId, useTransposed); - - while (degree-- > 0) { - long neighborNodeId = neighborsNodeIds.nextLong(); - SwhId neighborSwhId = graph.getSwhId(neighborNodeId); - if (edges.isAllowed(swhId.getType(), neighborSwhId.getType())) { - neighbors.add(neighborSwhId); - } + // TEMPORARY FIX: Avoid import naming problem with Neighbors + for (long neighborNodeId : new org.softwareheritage.graph.Neighbors(graph, useTransposed, edges, swhId)) { + neighbors.add(graph.getSwhId(neighborNodeId)); } } } - diff --git a/api/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java b/api/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java new file mode 100644 --- /dev/null +++ b/api/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java @@ -0,0 +1,254 @@ +package org.softwareheritage.graph.algo; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.LinkedList; +import java.util.Queue; +import java.util.Stack; + +import it.unimi.dsi.bits.LongArrayBitVector; +import it.unimi.dsi.fastutil.longs.LongBigArrays; + +import org.softwareheritage.graph.AllowedEdges; +import org.softwareheritage.graph.Graph; +import org.softwareheritage.graph.Neighbors; +import org.softwareheritage.graph.Node; +import org.softwareheritage.graph.SwhId; + +public class Traversal { + Graph graph; + boolean useTransposed; + AllowedEdges edges; + + // Big array storing if we have visited a node + LongArrayBitVector visited; + // Big array storing parents to retrieve the path when backtracking + long[][] nodeParent; + + public Traversal(Graph graph, String direction, String edgesFmt) { + if (!direction.matches("forward|backward")) { + throw new IllegalArgumentException("Unknown traversal direction: " + direction); + } + + this.graph = graph; + this.useTransposed = (direction.equals("backward")); + this.edges = new AllowedEdges(edgesFmt); + + long nbNodes = graph.getNbNodes(); + this.visited = LongArrayBitVector.ofLength(nbNodes); + this.nodeParent = LongBigArrays.newBigArray(nbNodes); + } + + // Temporary until better separation between internal work and external ids + private ArrayList convertToSwhIds(ArrayList nodeIds) { + ArrayList swhIds = new ArrayList(); + for (long nodeId : nodeIds) { + swhIds.add(graph.getSwhId(nodeId)); + } + return swhIds; + } + + public ArrayList leavesEndpoint(SwhId src) { + long nodeId = graph.getNodeId(src); + ArrayList leavesNodeIds = leavesInternalEndpoint(nodeId); + return convertToSwhIds(leavesNodeIds); + } + + public ArrayList neighborsEndpoint(SwhId src) { + long nodeId = graph.getNodeId(src); + ArrayList neighborsNodeIds = neighborsInternalEndpoint(nodeId); + return convertToSwhIds(neighborsNodeIds); + } + + public ArrayList walkEndpoint(SwhId src, String dstFmt, String traversal) { + if (!traversal.matches("dfs|bfs")) { + throw new IllegalArgumentException("Unknown traversal algorithm: " + traversal); + } + + long srcNodeId = graph.getNodeId(src); + long dstNodeId = (traversal.equals("dfs")) ? dfs(srcNodeId, dstFmt) : bfs(srcNodeId, dstFmt); + if (dstNodeId == -1) { + throw new IllegalArgumentException("Unable to find destination point: " + dstFmt); + } + + ArrayList path = backtracking(srcNodeId, dstNodeId); + return convertToSwhIds(path); + } + + public ArrayList visitNodesEndpoint(SwhId src) { + long nodeId = graph.getNodeId(src); + ArrayList nodes = visitNodesInternalEndpoint(nodeId); + return convertToSwhIds(nodes); + } + + public ArrayList> visitPathsEndpoint(SwhId src) { + long nodeId = graph.getNodeId(src); + ArrayList> pathNodeIds = new ArrayList<>(); + Stack currentPath = new Stack(); + visitPathsInternalEndpoint(nodeId, pathNodeIds, currentPath); + + ArrayList> paths = new ArrayList<>(); + for (ArrayList nodeIds : pathNodeIds) { + paths.add(convertToSwhIds(nodeIds)); + } + return paths; + } + + private ArrayList leavesInternalEndpoint(long srcNodeId) { + ArrayList leaves = new ArrayList(); + Stack stack = new Stack(); + this.visited.clear(); + + stack.push(srcNodeId); + visited.set(srcNodeId); + + while (!stack.isEmpty()) { + long currentNodeId = stack.pop(); + SwhId currentSwhId = graph.getSwhId(currentNodeId); + + long neighborsCnt = 0; + for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentSwhId)) { + neighborsCnt++; + if (!visited.getBoolean(neighborNodeId)) { + stack.push(neighborNodeId); + visited.set(neighborNodeId); + } + } + + if (neighborsCnt == 0) { + leaves.add(currentNodeId); + } + } + + return leaves; + } + + private ArrayList neighborsInternalEndpoint(long srcNodeId) { + SwhId srcSwhId = graph.getSwhId(srcNodeId); + ArrayList neighbors = new ArrayList(); + for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, srcSwhId)) { + neighbors.add(neighborNodeId); + } + return neighbors; + } + + private ArrayList visitNodesInternalEndpoint(long srcNodeId) { + // No specific destination point + String dstFmt = null; + dfs(srcNodeId, dstFmt); + + ArrayList nodes = new ArrayList(); + for (long nodeId = 0; nodeId < graph.getNbNodes(); nodeId++) { + if (this.visited.getBoolean(nodeId)) { + nodes.add(nodeId); + } + } + return nodes; + } + + private void visitPathsInternalEndpoint( + long currentNodeId, ArrayList> paths, Stack currentPath) { + SwhId currentSwhId = graph.getSwhId(currentNodeId); + currentPath.push(currentNodeId); + + long visitedNeighbors = 0; + for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentSwhId)) { + visitPathsInternalEndpoint(neighborNodeId, paths, currentPath); + visitedNeighbors++; + } + + if (visitedNeighbors == 0) { + ArrayList path = new ArrayList(); + for (long nodeId : currentPath) { + path.add(nodeId); + } + paths.add(path); + } + + currentPath.pop(); + } + + private ArrayList backtracking(long srcNodeId, long dstNodeId) { + ArrayList path = new ArrayList(); + long currentNodeId = dstNodeId; + while (currentNodeId != srcNodeId) { + path.add(currentNodeId); + currentNodeId = LongBigArrays.get(nodeParent, currentNodeId); + } + path.add(srcNodeId); + Collections.reverse(path); + return path; + } + + private long dfs(long srcNodeId, String dstFmt) { + Stack stack = new Stack(); + this.visited.clear(); + + stack.push(srcNodeId); + visited.set(srcNodeId); + + while (!stack.isEmpty()) { + long currentNodeId = stack.pop(); + SwhId currentSwhId = graph.getSwhId(currentNodeId); + if (isDestinationNode(currentSwhId, dstFmt)) { + return currentNodeId; + } + + for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentSwhId)) { + if (!visited.getBoolean(neighborNodeId)) { + stack.push(neighborNodeId); + visited.set(neighborNodeId); + LongBigArrays.set(nodeParent, neighborNodeId, currentNodeId); + } + } + } + + return -1; + } + + private long bfs(long srcNodeId, String dstFmt) { + Queue queue = new LinkedList(); + this.visited.clear(); + + queue.add(srcNodeId); + visited.set(srcNodeId); + + while (!queue.isEmpty()) { + long currentNodeId = queue.poll(); + SwhId currentSwhId = graph.getSwhId(currentNodeId); + if (isDestinationNode(currentSwhId, dstFmt)) { + return currentNodeId; + } + + for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentSwhId)) { + if (!visited.getBoolean(neighborNodeId)) { + queue.add(neighborNodeId); + visited.set(neighborNodeId); + LongBigArrays.set(nodeParent, neighborNodeId, currentNodeId); + } + } + } + + return -1; + } + + private boolean isDestinationNode(SwhId swhId, String dstFmt) { + // No destination node, early exit + if (dstFmt == null) { + return false; + } + + // SwhId as destination node + if (swhId.toString().equals(dstFmt)) { + return true; + } + + // Node.Type as destination node + try { + Node.Type dstType = Node.Type.fromStr(dstFmt); + return (swhId.getType().equals(dstType)); + } catch (IllegalArgumentException e) { + return false; + } + } +} diff --git a/api/server/src/main/java/org/softwareheritage/graph/algo/Visit.java b/api/server/src/main/java/org/softwareheritage/graph/algo/Visit.java --- a/api/server/src/main/java/org/softwareheritage/graph/algo/Visit.java +++ b/api/server/src/main/java/org/softwareheritage/graph/algo/Visit.java @@ -4,12 +4,11 @@ import java.util.LinkedHashSet; import java.util.Stack; -import it.unimi.dsi.big.webgraph.LazyLongIterator; import it.unimi.dsi.bits.LongArrayBitVector; import org.softwareheritage.graph.AllowedEdges; import org.softwareheritage.graph.Graph; -import org.softwareheritage.graph.Node; +import org.softwareheritage.graph.Neighbors; import org.softwareheritage.graph.SwhId; import org.softwareheritage.graph.SwhPath; @@ -56,21 +55,14 @@ } private void dfs(long currentNodeId) { - nodes.add(graph.getSwhId(currentNodeId)); + SwhId currentSwhId = graph.getSwhId(currentNodeId); + nodes.add(currentSwhId); currentPath.push(currentNodeId); - long degree = graph.degree(currentNodeId, useTransposed); - LazyLongIterator neighbors = graph.neighbors(currentNodeId, useTransposed); long visitedNeighbors = 0; - - while (degree-- > 0) { - long neighborNodeId = neighbors.nextLong(); - Node.Type currentNodeType = graph.getSwhId(currentNodeId).getType(); - Node.Type neighborNodeType = graph.getSwhId(neighborNodeId).getType(); - if (edges.isAllowed(currentNodeType, neighborNodeType)) { + for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentSwhId)) { dfs(neighborNodeId); visitedNeighbors++; - } } if (visitedNeighbors == 0) { @@ -85,18 +77,12 @@ } private void dfsOutputOnlyNodes(long currentNodeId) { - nodes.add(graph.getSwhId(currentNodeId)); + SwhId currentSwhId = graph.getSwhId(currentNodeId); + nodes.add(currentSwhId); visited.set(currentNodeId); - long degree = graph.degree(currentNodeId, useTransposed); - LazyLongIterator neighbors = graph.neighbors(currentNodeId, useTransposed); - - while (degree-- > 0) { - long neighborNodeId = neighbors.nextLong(); - Node.Type currentNodeType = graph.getSwhId(currentNodeId).getType(); - Node.Type neighborNodeType = graph.getSwhId(neighborNodeId).getType(); - if (!visited.getBoolean(neighborNodeId) - && edges.isAllowed(currentNodeType, neighborNodeType)) { + for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentSwhId)) { + if (!visited.getBoolean(neighborNodeId)) { dfsOutputOnlyNodes(neighborNodeId); } } diff --git a/api/server/src/main/java/org/softwareheritage/graph/algo/Walk.java b/api/server/src/main/java/org/softwareheritage/graph/algo/Walk.java --- a/api/server/src/main/java/org/softwareheritage/graph/algo/Walk.java +++ b/api/server/src/main/java/org/softwareheritage/graph/algo/Walk.java @@ -4,12 +4,12 @@ import java.util.Queue; import java.util.LinkedList; -import it.unimi.dsi.big.webgraph.LazyLongIterator; import it.unimi.dsi.bits.LongArrayBitVector; import it.unimi.dsi.fastutil.longs.LongBigArrays; import org.softwareheritage.graph.AllowedEdges; import org.softwareheritage.graph.Graph; +import org.softwareheritage.graph.Neighbors; import org.softwareheritage.graph.Node; import org.softwareheritage.graph.SwhId; import org.softwareheritage.graph.SwhPath; @@ -81,14 +81,8 @@ return currentNodeId; } - long degree = graph.degree(currentNodeId, useTransposed); - LazyLongIterator neighbors = graph.neighbors(currentNodeId, useTransposed); - - while (degree-- > 0) { - long neighborNodeId = neighbors.nextLong(); - SwhId neighborSwhId = graph.getSwhId(neighborNodeId); - if (!visited.getBoolean(neighborNodeId) - && edges.isAllowed(currentSwhId.getType(), neighborSwhId.getType())) { + for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentSwhId)) { + if (!visited.getBoolean(neighborNodeId)) { stack.push(neighborNodeId); visited.set(neighborNodeId); LongBigArrays.set(nodeParent, neighborNodeId, currentNodeId); @@ -113,14 +107,8 @@ return currentNodeId; } - long degree = graph.degree(currentNodeId, useTransposed); - LazyLongIterator neighbors = graph.neighbors(currentNodeId, useTransposed); - - while (degree-- > 0) { - long neighborNodeId = neighbors.nextLong(); - SwhId neighborSwhId = graph.getSwhId(neighborNodeId); - if (!visited.getBoolean(neighborNodeId) - && edges.isAllowed(currentSwhId.getType(), neighborSwhId.getType())) { + for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentSwhId)) { + if (!visited.getBoolean(neighborNodeId)) { queue.add(neighborNodeId); visited.set(neighborNodeId); LongBigArrays.set(nodeParent, neighborNodeId, currentNodeId);