diff --git a/java/src/main/java/org/softwareheritage/graph/Traversal.java b/java/src/main/java/org/softwareheritage/graph/Traversal.java
deleted file mode 100644
index 1b4c7d8..0000000
--- a/java/src/main/java/org/softwareheritage/graph/Traversal.java
+++ /dev/null
@@ -1,609 +0,0 @@
-package org.softwareheritage.graph;
-
-import java.util.ArrayDeque;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.LinkedList;
-import java.util.Map;
-import java.util.Queue;
-import java.util.Random;
-import java.util.Stack;
-import java.util.function.Consumer;
-import java.util.function.LongConsumer;
-
-import it.unimi.dsi.big.webgraph.LazyLongIterator;
-
-/**
- * Traversal algorithms on the compressed graph.
- *
- *
- * @author The Software Heritage developers
- */
-
-public class Traversal {
- /** Graph used in the traversal */
- SwhBidirectionalGraph graph;
- /** Type filter on the returned nodes */
- AllowedNodes nodesFilter;
- /** Restrictions on which edges can be traversed */
- AllowedEdges edgesRestrictions;
-
- /** Hash set storing if we have visited a node */
- HashSet visited;
- /** Hash map storing parent node id for each nodes during a traversal */
- Map parentNode;
- /** Number of edges accessed during traversal */
- long nbEdgesAccessed;
-
- /** The anti Dos limit of edges traversed while a visit */
- long maxEdges;
-
- /** random number generator, for random walks */
- Random rng;
-
- /**
- * Constructor.
- *
- * @param graph graph used in the traversal
- * @param direction a string (either "forward" or "backward") specifying edge orientation
- * @param edgesFmt a formatted string describing allowed
- * edges
- */
-
- public Traversal(SwhBidirectionalGraph graph, String direction, String edgesFmt) {
- this(graph, direction, edgesFmt, 0);
- }
-
- public Traversal(SwhBidirectionalGraph graph, String direction, String edgesFmt, long maxEdges) {
- this(graph, direction, edgesFmt, maxEdges, "*");
- }
-
- public Traversal(SwhBidirectionalGraph graph, String direction, String edgesFmt, long maxEdges,
- String returnTypes) {
- if (!direction.matches("forward|backward")) {
- throw new IllegalArgumentException("Unknown traversal direction: " + direction);
- }
-
- if (direction.equals("backward")) {
- this.graph = graph.transpose();
- } else {
- this.graph = graph;
- }
- this.nodesFilter = new AllowedNodes(returnTypes);
- this.edgesRestrictions = new AllowedEdges(edgesFmt);
-
- this.visited = new HashSet<>();
- this.parentNode = new HashMap<>();
- this.nbEdgesAccessed = 0;
- this.maxEdges = maxEdges;
- this.rng = new Random();
- }
-
- /**
- * Returns number of accessed edges during traversal.
- *
- * @return number of edges accessed in last traversal
- */
- public long getNbEdgesAccessed() {
- return nbEdgesAccessed;
- }
-
- /**
- * Returns number of accessed nodes during traversal.
- *
- * @return number of nodes accessed in last traversal
- */
- public long getNbNodesAccessed() {
- return this.visited.size();
- }
-
- /**
- * Returns lazy iterator of successors of a node while following a specific set of edge types.
- *
- * @param g input graph
- * @param nodeId node specified as a long id
- * @param allowedEdges the specification of which edges can be traversed
- * @return lazy iterator of successors of the node, specified as a
- * WebGraph LazyLongIterator
- */
- public static LazyLongIterator filterSuccessors(SwhBidirectionalGraph g, long nodeId, AllowedEdges allowedEdges) {
- if (allowedEdges.restrictedTo == null) {
- // All edges are allowed, bypass edge check
- return g.successors(nodeId);
- } else {
- LazyLongIterator allSuccessors = g.successors(nodeId);
- return new LazyLongIterator() {
- @Override
- public long nextLong() {
- long neighbor;
- while ((neighbor = allSuccessors.nextLong()) != -1) {
- if (allowedEdges.isAllowed(g.getNodeType(nodeId), g.getNodeType(neighbor))) {
- return neighbor;
- }
- }
- return -1;
- }
-
- @Override
- public long skip(final long n) {
- long i;
- for (i = 0; i < n && nextLong() != -1; i++)
- ;
- return i;
- }
- };
- }
- }
-
- private LazyLongIterator filterSuccessors(long nodeId, AllowedEdges allowedEdges) {
- return filterSuccessors(graph, nodeId, allowedEdges);
- }
-
- /**
- * Push version of {@link #leaves} will fire passed callback for each leaf.
- */
- public void leavesVisitor(long srcNodeId, NodeIdConsumer cb) {
- Stack stack = new Stack<>();
- this.nbEdgesAccessed = 0;
-
- stack.push(srcNodeId);
- visited.add(srcNodeId);
-
- while (!stack.isEmpty()) {
- long currentNodeId = stack.pop();
-
- long neighborsCnt = 0;
- nbEdgesAccessed += graph.outdegree(currentNodeId);
- if (this.maxEdges > 0) {
- if (nbEdgesAccessed >= this.maxEdges) {
- break;
- }
- }
- LazyLongIterator it = filterSuccessors(currentNodeId, edgesRestrictions);
- for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) {
- neighborsCnt++;
- if (!visited.contains(neighborNodeId)) {
- stack.push(neighborNodeId);
- visited.add(neighborNodeId);
- }
- }
-
- if (neighborsCnt == 0) {
- if (nodesFilter.isAllowed(graph.getNodeType(currentNodeId))) {
- cb.accept(currentNodeId);
- }
- }
- }
- }
-
- /**
- * Returns the leaves of a subgraph rooted at the specified source node.
- *
- * @param srcNodeId source node
- * @return list of node ids corresponding to the leaves
- */
- public ArrayList leaves(long srcNodeId) {
- ArrayList nodeIds = new ArrayList();
- leavesVisitor(srcNodeId, nodeIds::add);
- return nodeIds;
- }
-
- /**
- * Push version of {@link #neighbors}: will fire passed callback on each neighbor.
- */
- public void neighborsVisitor(long srcNodeId, NodeIdConsumer cb) {
- this.nbEdgesAccessed = graph.outdegree(srcNodeId);
- if (this.maxEdges > 0) {
- if (nbEdgesAccessed >= this.maxEdges) {
- return;
- }
- }
- LazyLongIterator it = filterSuccessors(srcNodeId, edgesRestrictions);
- for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) {
- if (nodesFilter.isAllowed(graph.getNodeType(neighborNodeId))) {
- cb.accept(neighborNodeId);
- }
- }
- }
-
- /**
- * Returns node direct neighbors (linked with exactly one edge).
- *
- * @param srcNodeId source node
- * @return list of node ids corresponding to the neighbors
- */
- public ArrayList neighbors(long srcNodeId) {
- ArrayList nodeIds = new ArrayList<>();
- neighborsVisitor(srcNodeId, nodeIds::add);
- return nodeIds;
- }
-
- /**
- * Push version of {@link #visitNodes}: will fire passed callback on each visited node.
- */
- public void visitNodesVisitor(long srcNodeId, NodeIdConsumer nodeCb, EdgeIdConsumer edgeCb) {
- Stack stack = new Stack<>();
- this.nbEdgesAccessed = 0;
-
- stack.push(srcNodeId);
- visited.add(srcNodeId);
-
- while (!stack.isEmpty()) {
- long currentNodeId = stack.pop();
- if (nodeCb != null) {
- if (nodesFilter.isAllowed(graph.getNodeType(currentNodeId))) {
- nodeCb.accept(currentNodeId);
- }
- }
- nbEdgesAccessed += graph.outdegree(currentNodeId);
- if (this.maxEdges > 0) {
- if (nbEdgesAccessed >= this.maxEdges) {
- break;
- }
- }
- LazyLongIterator it = filterSuccessors(currentNodeId, edgesRestrictions);
- for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) {
- if (edgeCb != null) {
- if (nodesFilter.isAllowed(graph.getNodeType(currentNodeId))) {
- edgeCb.accept(currentNodeId, neighborNodeId);
- }
- }
- if (!visited.contains(neighborNodeId)) {
- stack.push(neighborNodeId);
- visited.add(neighborNodeId);
- }
- }
- }
- }
-
- /** One-argument version to handle callbacks properly */
- public void visitNodesVisitor(long srcNodeId, NodeIdConsumer cb) {
- visitNodesVisitor(srcNodeId, cb, null);
- }
-
- /**
- * Performs a graph traversal and returns explored nodes.
- *
- * @param srcNodeId source node
- * @return list of explored node ids
- */
- public ArrayList visitNodes(long srcNodeId) {
- ArrayList nodeIds = new ArrayList<>();
- visitNodesVisitor(srcNodeId, nodeIds::add);
- return nodeIds;
- }
-
- /**
- * Push version of {@link #visitPaths}: will fire passed callback on each discovered (complete)
- * path.
- */
- public void visitPathsVisitor(long srcNodeId, PathConsumer cb) {
- Stack currentPath = new Stack<>();
- this.nbEdgesAccessed = 0;
- visitPathsInternalVisitor(srcNodeId, currentPath, cb);
- }
-
- /**
- * Performs a graph traversal and returns explored paths.
- *
- * @param srcNodeId source node
- * @return list of explored paths (represented as a list of node ids)
- */
- public ArrayList> visitPaths(long srcNodeId) {
- ArrayList> paths = new ArrayList<>();
- visitPathsVisitor(srcNodeId, paths::add);
- return paths;
- }
-
- private void visitPathsInternalVisitor(long currentNodeId, Stack currentPath, PathConsumer cb) {
- currentPath.push(currentNodeId);
-
- long visitedNeighbors = 0;
-
- nbEdgesAccessed += graph.outdegree(currentNodeId);
- if (this.maxEdges > 0) {
- if (nbEdgesAccessed >= this.maxEdges) {
- currentPath.pop();
- return;
- }
- }
- LazyLongIterator it = filterSuccessors(currentNodeId, edgesRestrictions);
- for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) {
- visitPathsInternalVisitor(neighborNodeId, currentPath, cb);
- visitedNeighbors++;
- }
-
- if (visitedNeighbors == 0) {
- ArrayList path = new ArrayList<>(currentPath);
- cb.accept(path);
- }
-
- currentPath.pop();
- }
-
- /**
- * Performs a graph traversal with backtracking, and returns the first found path from source to
- * destination.
- *
- * @param srcNodeId source node
- * @param dst destination (either a node or a node type)
- * @return found path as a list of node ids
- */
- public ArrayList walk(long srcNodeId, T dst, String visitOrder) {
- long dstNodeId;
- if (visitOrder.equals("dfs")) {
- dstNodeId = walkInternalDFS(srcNodeId, dst);
- } else if (visitOrder.equals("bfs")) {
- dstNodeId = walkInternalBFS(srcNodeId, dst);
- } else {
- throw new IllegalArgumentException("Unknown visit order: " + visitOrder);
- }
-
- if (dstNodeId == -1) {
- throw new IllegalArgumentException("Cannot find destination: " + dst);
- }
-
- return backtracking(srcNodeId, dstNodeId);
- }
-
- /**
- * Performs a random walk (picking a random successor at each step) from source to destination.
- *
- * @param srcNodeId source node
- * @param dst destination (either a node or a node type)
- * @return found path as a list of node ids or an empty path to indicate that no suitable path have
- * been found
- */
- public ArrayList randomWalk(long srcNodeId, T dst) {
- return randomWalk(srcNodeId, dst, 0);
- }
-
- /**
- * Performs a stubborn random walk (picking a random successor at each step) from source to
- * destination. The walk is "stubborn" in the sense that it will not give up the first time if a
- * satisfying target node is found, but it will retry up to a limited amount of times.
- *
- * @param srcNodeId source node
- * @param dst destination (either a node or a node type)
- * @param retries number of times to retry; 0 means no retries (single walk)
- * @return found path as a list of node ids or an empty path to indicate that no suitable path have
- * been found
- */
- public ArrayList randomWalk(long srcNodeId, T dst, int retries) {
- long curNodeId = srcNodeId;
- ArrayList path = new ArrayList<>();
- this.nbEdgesAccessed = 0;
- boolean found;
-
- if (retries < 0) {
- throw new IllegalArgumentException("Negative number of retries given: " + retries);
- }
-
- while (true) {
- path.add(curNodeId);
- LazyLongIterator successors = filterSuccessors(curNodeId, edgesRestrictions);
- curNodeId = randomPick(successors);
- if (curNodeId < 0) {
- found = false;
- break;
- }
- if (isDstNode(curNodeId, dst)) {
- path.add(curNodeId);
- found = true;
- break;
- }
- }
-
- if (found) {
- return path;
- } else if (retries > 0) { // try again
- return randomWalk(srcNodeId, dst, retries - 1);
- } else { // not found and no retries left
- path.clear();
- return path;
- }
- }
-
- /**
- * Randomly choose an element from an iterator over Longs using reservoir sampling
- *
- * @param elements iterator over selection domain
- * @return randomly chosen element or -1 if no suitable element was found
- */
- private long randomPick(LazyLongIterator elements) {
- long curPick = -1;
- long seenCandidates = 0;
-
- for (long element; (element = elements.nextLong()) != -1;) {
- seenCandidates++;
- if (Math.round(rng.nextFloat() * (seenCandidates - 1)) == 0) {
- curPick = element;
- }
- }
-
- return curPick;
- }
-
- /**
- * Internal DFS function of {@link #walk}.
- *
- * @param srcNodeId source node
- * @param dst destination (either a node or a node type)
- * @return final destination node or -1 if no path found
- */
- private long walkInternalDFS(long srcNodeId, T dst) {
- Stack stack = new Stack<>();
- this.nbEdgesAccessed = 0;
-
- stack.push(srcNodeId);
- visited.add(srcNodeId);
-
- while (!stack.isEmpty()) {
- long currentNodeId = stack.pop();
- if (isDstNode(currentNodeId, dst)) {
- return currentNodeId;
- }
-
- nbEdgesAccessed += graph.outdegree(currentNodeId);
- LazyLongIterator it = filterSuccessors(currentNodeId, edgesRestrictions);
- for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) {
- if (!visited.contains(neighborNodeId)) {
- stack.push(neighborNodeId);
- visited.add(neighborNodeId);
- parentNode.put(neighborNodeId, currentNodeId);
- }
- }
- }
-
- return -1;
- }
-
- /**
- * Internal BFS function of {@link #walk}.
- *
- * @param srcNodeId source node
- * @param dst destination (either a node or a node type)
- * @return final destination node or -1 if no path found
- */
- private long walkInternalBFS(long srcNodeId, T dst) {
- Queue queue = new LinkedList<>();
- this.nbEdgesAccessed = 0;
-
- queue.add(srcNodeId);
- visited.add(srcNodeId);
-
- while (!queue.isEmpty()) {
- long currentNodeId = queue.poll();
- if (isDstNode(currentNodeId, dst)) {
- return currentNodeId;
- }
-
- nbEdgesAccessed += graph.outdegree(currentNodeId);
- LazyLongIterator it = filterSuccessors(currentNodeId, edgesRestrictions);
- for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) {
- if (!visited.contains(neighborNodeId)) {
- queue.add(neighborNodeId);
- visited.add(neighborNodeId);
- parentNode.put(neighborNodeId, currentNodeId);
- }
- }
- }
-
- return -1;
- }
-
- /**
- * Internal function of {@link #walk} to check if a node corresponds to the destination.
- *
- * @param nodeId current node
- * @param dst destination (either a node or a node type)
- * @return true if the node is a destination, or false otherwise
- */
- private boolean isDstNode(long nodeId, T dst) {
- if (dst instanceof Long) {
- long dstNodeId = (Long) dst;
- return nodeId == dstNodeId;
- } else if (dst instanceof Node.Type) {
- Node.Type dstType = (Node.Type) dst;
- return graph.getNodeType(nodeId) == dstType;
- } else {
- return false;
- }
- }
-
- /**
- * Internal backtracking function of {@link #walk}.
- *
- * @param srcNodeId source node
- * @param dstNodeId destination node
- * @return the found path, as a list of node ids
- */
- private ArrayList backtracking(long srcNodeId, long dstNodeId) {
- ArrayList path = new ArrayList<>();
- long currentNodeId = dstNodeId;
- while (currentNodeId != srcNodeId) {
- path.add(currentNodeId);
- currentNodeId = parentNode.get(currentNodeId);
- }
- path.add(srcNodeId);
- Collections.reverse(path);
- return path;
- }
-
- /**
- * Find a common descendant between two given nodes using two parallel BFS
- *
- * @param lhsNode the first node
- * @param rhsNode the second node
- * @return the found path, as a list of node ids
- */
- public Long findCommonDescendant(long lhsNode, long rhsNode) {
- Queue lhsStack = new ArrayDeque<>();
- Queue rhsStack = new ArrayDeque<>();
- HashSet lhsVisited = new HashSet<>();
- HashSet rhsVisited = new HashSet<>();
- lhsStack.add(lhsNode);
- rhsStack.add(rhsNode);
- lhsVisited.add(lhsNode);
- rhsVisited.add(rhsNode);
-
- this.nbEdgesAccessed = 0;
- Long curNode;
-
- while (!lhsStack.isEmpty() || !rhsStack.isEmpty()) {
- if (!lhsStack.isEmpty()) {
- curNode = lhsStack.poll();
- nbEdgesAccessed += graph.outdegree(curNode);
- LazyLongIterator it = filterSuccessors(curNode, edgesRestrictions);
- for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) {
- if (!lhsVisited.contains(neighborNodeId)) {
- if (rhsVisited.contains(neighborNodeId))
- return neighborNodeId;
- lhsStack.add(neighborNodeId);
- lhsVisited.add(neighborNodeId);
- }
- }
- }
-
- if (!rhsStack.isEmpty()) {
- curNode = rhsStack.poll();
- nbEdgesAccessed += graph.outdegree(curNode);
- LazyLongIterator it = filterSuccessors(curNode, edgesRestrictions);
- for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) {
- if (!rhsVisited.contains(neighborNodeId)) {
- if (lhsVisited.contains(neighborNodeId))
- return neighborNodeId;
- rhsStack.add(neighborNodeId);
- rhsVisited.add(neighborNodeId);
- }
- }
- }
- }
-
- return null;
- }
-
- public interface NodeIdConsumer extends LongConsumer {
- /**
- * Callback for incrementally receiving node identifiers during a graph visit.
- */
- void accept(long nodeId);
- }
-
- public interface EdgeIdConsumer {
- /**
- * Callback for incrementally receiving edge identifiers during a graph visit.
- */
- void accept(long srcId, long dstId);
- }
-
- public interface PathConsumer extends Consumer> {
- /**
- * Callback for incrementally receiving node paths (made of node identifiers) during a graph visit.
- */
- void accept(ArrayList path);
- }
-}
diff --git a/java/src/main/java/org/softwareheritage/graph/experiments/forks/FindCommonAncestor.java b/java/src/main/java/org/softwareheritage/graph/experiments/forks/FindCommonAncestor.java
deleted file mode 100644
index 56c8369..0000000
--- a/java/src/main/java/org/softwareheritage/graph/experiments/forks/FindCommonAncestor.java
+++ /dev/null
@@ -1,62 +0,0 @@
-package org.softwareheritage.graph.experiments.forks;
-
-import com.martiansoftware.jsap.*;
-import org.softwareheritage.graph.SwhBidirectionalGraph;
-import org.softwareheritage.graph.Traversal;
-
-import java.io.IOException;
-import java.util.Scanner;
-
-public class FindCommonAncestor {
- private SwhBidirectionalGraph graph;
-
- private void load_graph(String graphBasename) throws IOException {
- System.err.println("Loading graph " + graphBasename + " ...");
- this.graph = SwhBidirectionalGraph.loadMapped(graphBasename);
- System.err.println("Graph loaded.");
- }
-
- private static JSAPResult parse_args(String[] args) {
- JSAPResult config = null;
- try {
- SimpleJSAP jsap = new SimpleJSAP(FindCommonAncestor.class.getName(), "",
- new Parameter[]{
- new FlaggedOption("edgesFmt", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'e',
- "edges", "Edges constraints"),
- new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'g',
- "graph", "Basename of the compressed graph"),});
-
- config = jsap.parse(args);
- if (jsap.messagePrinted()) {
- System.exit(1);
- }
- } catch (JSAPException e) {
- e.printStackTrace();
- }
- return config;
- }
-
- public static void main(String[] args) {
- JSAPResult config = parse_args(args);
-
- String graphPath = config.getString("graphPath");
- String edgesFmt = config.getString("edgesFmt");
-
- FindCommonAncestor fca = new FindCommonAncestor();
- try {
- fca.load_graph(graphPath);
- } catch (IOException e) {
- System.out.println("Could not load graph: " + e);
- System.exit(2);
- }
-
- Scanner input = new Scanner(System.in);
- while (input.hasNextLong()) {
- long lhsNode = input.nextLong();
- long rhsNode = input.nextLong();
-
- Traversal t = new Traversal(fca.graph.symmetrize(), "forward", edgesFmt);
- System.out.println(t.findCommonDescendant(lhsNode, rhsNode));
- }
- }
-}
diff --git a/java/src/main/java/org/softwareheritage/graph/experiments/forks/FindPath.java b/java/src/main/java/org/softwareheritage/graph/experiments/forks/FindPath.java
deleted file mode 100644
index b5f318c..0000000
--- a/java/src/main/java/org/softwareheritage/graph/experiments/forks/FindPath.java
+++ /dev/null
@@ -1,123 +0,0 @@
-package org.softwareheritage.graph.experiments.forks;
-
-import com.martiansoftware.jsap.*;
-import it.unimi.dsi.big.webgraph.LazyLongIterator;
-import org.softwareheritage.graph.SwhBidirectionalGraph;
-import org.softwareheritage.graph.Node;
-
-import java.io.IOException;
-import java.util.*;
-
-public class FindPath {
- private SwhBidirectionalGraph graph;
- private Long emptySnapshot;
-
- private void load_graph(String graphBasename) throws IOException {
- System.err.println("Loading graph " + graphBasename + " ...");
- this.graph = SwhBidirectionalGraph.loadMapped(graphBasename).symmetrize();
- System.err.println("Graph loaded.");
- this.emptySnapshot = null;
- }
-
- private static JSAPResult parse_args(String[] args) {
- JSAPResult config = null;
- try {
- SimpleJSAP jsap = new SimpleJSAP(FindPath.class.getName(), "",
- new Parameter[]{new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED,
- 'g', "graph", "Basename of the compressed graph"),});
-
- config = jsap.parse(args);
- if (jsap.messagePrinted()) {
- System.exit(1);
- }
- } catch (JSAPException e) {
- e.printStackTrace();
- }
- return config;
- }
-
- private boolean nodeIsEmptySnapshot(Long node) {
- if (this.emptySnapshot == null && this.graph.getNodeType(node) == Node.Type.SNP
- && this.graph.outdegree(node) == 0) {
- System.err.println("Found empty snapshot: " + node);
- this.emptySnapshot = node;
- }
- return node.equals(this.emptySnapshot);
- }
-
- private Boolean shouldVisit(Long node) {
- Node.Type nt = this.graph.getNodeType(node);
- if (nt != Node.Type.REV && nt != Node.Type.REL && nt != Node.Type.SNP && nt != Node.Type.ORI) {
- return false;
- }
- if (this.nodeIsEmptySnapshot(node))
- return false;
- return true;
- }
-
- private ArrayList findPath(Long src, Long dst) {
- HashSet visited = new HashSet<>();
- Queue queue = new ArrayDeque<>();
- Map parentNode = new HashMap<>();
-
- queue.add(src);
- visited.add(src);
-
- while (!queue.isEmpty()) {
- long currentNode = queue.poll();
-
- final LazyLongIterator iterator = graph.successors(currentNode);
- long succ;
- while ((succ = iterator.nextLong()) != -1) {
- if (!shouldVisit(succ) || visited.contains(succ))
- continue;
- visited.add(succ);
- queue.add(succ);
- parentNode.put(succ, currentNode);
-
- if (succ == dst) {
- ArrayList path = new ArrayList<>();
- long n = dst;
- while (n != src) {
- path.add(n);
- n = parentNode.get(n);
- }
- path.add(src);
- Collections.reverse(path);
- return path;
- }
- }
- }
- return null;
- }
-
- public static void main(String[] args) {
- JSAPResult config = parse_args(args);
-
- String graphPath = config.getString("graphPath");
-
- FindPath fpath = new FindPath();
- try {
- fpath.load_graph(graphPath);
- } catch (IOException e) {
- System.out.println("Could not load graph: " + e);
- System.exit(2);
- }
-
- Scanner input = new Scanner(System.in);
- while (input.hasNextLong()) {
- long lhsNode = input.nextLong();
- long rhsNode = input.nextLong();
-
- ArrayList path = fpath.findPath(lhsNode, rhsNode);
- if (path != null) {
- for (Long n : path) {
- System.out.format("%d ", n);
- }
- System.out.println();
- } else {
- System.out.println("null");
- }
- }
- }
-}
diff --git a/java/src/main/java/org/softwareheritage/graph/experiments/multiplicationfactor/GenDistribution.java b/java/src/main/java/org/softwareheritage/graph/experiments/multiplicationfactor/GenDistribution.java
deleted file mode 100644
index 8d5e5fa..0000000
--- a/java/src/main/java/org/softwareheritage/graph/experiments/multiplicationfactor/GenDistribution.java
+++ /dev/null
@@ -1,130 +0,0 @@
-package org.softwareheritage.graph.experiments.multiplicationfactor;
-
-import com.martiansoftware.jsap.*;
-import org.softwareheritage.graph.SwhBidirectionalGraph;
-import org.softwareheritage.graph.Node;
-import org.softwareheritage.graph.Traversal;
-
-import java.io.IOException;
-import java.util.Scanner;
-import java.util.concurrent.ArrayBlockingQueue;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-
-public class GenDistribution {
- private SwhBidirectionalGraph graph;
-
- private static JSAPResult parse_args(String[] args) {
- JSAPResult config = null;
- try {
- SimpleJSAP jsap = new SimpleJSAP(GenDistribution.class.getName(), "",
- new Parameter[]{
- new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'g',
- "graph", "Basename of the compressed graph"),
- new FlaggedOption("srcType", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 's',
- "srctype", "Source node type"),
- new FlaggedOption("dstType", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'd',
- "dsttype", "Destination node type"),
- new FlaggedOption("edgesFmt", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'e',
- "edges", "Edges constraints"),
-
- new FlaggedOption("numThreads", JSAP.INTEGER_PARSER, "128", JSAP.NOT_REQUIRED, 't',
- "numthreads", "Number of threads"),});
-
- config = jsap.parse(args);
- if (jsap.messagePrinted()) {
- System.exit(1);
- }
- } catch (JSAPException e) {
- e.printStackTrace();
- }
- return config;
- }
-
- public static void main(String[] args) {
- JSAPResult config = parse_args(args);
-
- String graphPath = config.getString("graphPath");
- Node.Type srcType = Node.Type.fromStr(config.getString("srcType"));
- Node.Type dstType = Node.Type.fromStr(config.getString("dstType"));
- String edgesFmt = config.getString("edgesFmt");
- int numThreads = config.getInt("numThreads");
-
- GenDistribution tp = new GenDistribution();
- try {
- tp.load_graph(graphPath);
- } catch (IOException e) {
- System.out.println("Could not load graph: " + e);
- System.exit(2);
- }
-
- final long END_OF_QUEUE = -1L;
-
- ArrayBlockingQueue queue = new ArrayBlockingQueue<>(numThreads);
- ExecutorService service = Executors.newFixedThreadPool(numThreads + 1);
-
- service.submit(() -> {
- try {
- Scanner input = new Scanner(System.in);
- while (input.hasNextLong()) {
- long node = input.nextLong();
- if (tp.graph.getNodeType(node) == srcType) {
- queue.put(node);
- }
- }
- } catch (InterruptedException e) {
- e.printStackTrace();
- } finally {
- for (int i = 0; i < numThreads; ++i) {
- try {
- queue.put(END_OF_QUEUE);
- } catch (InterruptedException e) {
- e.printStackTrace();
- }
- }
- }
- });
-
- for (int i = 0; i < numThreads; ++i) {
- service.submit(() -> {
- SwhBidirectionalGraph thread_graph = tp.graph.copy();
- long startTime;
- double totalTime;
-
- while (true) {
- Long node = null;
- try {
- node = queue.take();
- } catch (InterruptedException e) {
- e.printStackTrace();
- }
- if (node == null || node == END_OF_QUEUE) {
- return;
- }
-
- Traversal t = new Traversal(thread_graph, "backward", edgesFmt);
- int[] count = {0};
-
- startTime = System.nanoTime();
- t.visitNodesVisitor(node, (curnode) -> {
- if (tp.graph.getNodeType(curnode) == dstType) {
- count[0]++;
- }
- });
- long endTime = System.nanoTime();
- totalTime = (double) (endTime - startTime) / 1e9;
- System.out.format("%d %d %d %d %f\n", node, count[0], t.getNbNodesAccessed(),
- t.getNbEdgesAccessed(), totalTime);
- }
- });
- }
-
- service.shutdown();
- }
-
- private void load_graph(String graphBasename) throws IOException {
- System.err.println("Loading graph " + graphBasename + " ...");
- this.graph = SwhBidirectionalGraph.loadMapped(graphBasename);
- System.err.println("Graph loaded.");
- }
-}
diff --git a/java/src/main/java/org/softwareheritage/graph/utils/FindEarliestRevision.java b/java/src/main/java/org/softwareheritage/graph/utils/FindEarliestRevision.java
index 4a2bb79..3623bb0 100644
--- a/java/src/main/java/org/softwareheritage/graph/utils/FindEarliestRevision.java
+++ b/java/src/main/java/org/softwareheritage/graph/utils/FindEarliestRevision.java
@@ -1,110 +1,113 @@
package org.softwareheritage.graph.utils;
import it.unimi.dsi.big.webgraph.LazyLongIterator;
import org.softwareheritage.graph.*;
import java.io.IOException;
import java.time.Duration;
import java.util.HashSet;
import java.util.Scanner;
import java.util.Stack;
/* sample invocation on granet.internal.softwareheritage.org for benchmarking
* purposes, with the main swh-graph service already running:
*
* $ java -cp ~/swh-environment/swh-graph/java/target/swh-graph-0.3.0.jar -Xmx300G -XX:PretenureSizeThreshold=512M -XX:MaxNewSize=4G -XX:+UseLargePages -XX:+UseTransparentHugePages -XX:+UseNUMA -XX:+UseTLAB -XX:+ResizeTLAB org.softwareheritage.graph.utils.FindEarliestRevision --timing /dev/shm/swh-graph/default/graph
*
*/
public class FindEarliestRevision {
public static void main(String[] args) throws IOException, ClassNotFoundException {
String graphPath = args[0];
boolean timing = false;
long ts, elapsedNanos;
Duration elapsed;
if (args.length >= 2 && (args[0].equals("-t") || args[0].equals("--timing"))) {
timing = true;
graphPath = args[1];
System.err.println("started with timing option, will keep track of elapsed time");
}
System.err.println("loading transposed graph...");
ts = System.nanoTime();
SwhBidirectionalGraph graph = SwhBidirectionalGraph.loadMapped(graphPath).transpose();
elapsed = Duration.ofNanos(System.nanoTime() - ts);
System.err.println(String.format("transposed graph loaded (duration: %s).", elapsed));
System.err.println("loading revision timestamps...");
ts = System.nanoTime();
graph.loadCommitterTimestamps();
elapsed = Duration.ofNanos(System.nanoTime() - ts);
System.err.println(String.format("revision timestamps loaded (duration: %s).", elapsed));
Scanner stdin = new Scanner(System.in);
AllowedEdges edges = new AllowedEdges("cnt:dir,dir:dir,dir:rev");
String rawSWHID = null;
SWHID srcSWHID = null;
long lineCount = 0;
long srcNodeId = -1;
if (timing) {
System.err.println("starting SWHID processing...");
elapsed = Duration.ZERO;
}
while (stdin.hasNextLine()) {
if (timing)
ts = System.nanoTime();
rawSWHID = stdin.nextLine().strip();
lineCount++;
try {
srcSWHID = new SWHID(rawSWHID);
srcNodeId = graph.getNodeId(srcSWHID);
} catch (IllegalArgumentException e) {
System.err
.println(String.format("skipping invalid or unknown SWHID %s on line %d", rawSWHID, lineCount));
continue;
}
if (timing)
System.err.println("starting traversal for: " + srcSWHID.toString());
Stack stack = new Stack<>();
HashSet visited = new HashSet<>();
stack.push(srcNodeId);
visited.add(srcNodeId);
long minRevId = -1;
long minTimestamp = Long.MAX_VALUE;
while (!stack.isEmpty()) {
long currentNodeId = stack.pop();
if (graph.getNodeType(currentNodeId) == Node.Type.REV) {
long committerTs = graph.getCommitterTimestamp(currentNodeId);
if (committerTs < minTimestamp) {
minRevId = currentNodeId;
minTimestamp = committerTs;
}
}
- LazyLongIterator it = Traversal.filterSuccessors(graph, currentNodeId, edges);
+ LazyLongIterator it = graph.successors(currentNodeId);
for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) {
+ if (!edges.isAllowed(graph.getNodeType(currentNodeId), graph.getNodeType(neighborNodeId))) {
+ continue;
+ }
if (!visited.contains(neighborNodeId)) {
stack.push(neighborNodeId);
visited.add(neighborNodeId);
}
}
}
if (minRevId == -1) {
System.err.println("no revision found containing: " + srcSWHID.toString());
} else {
System.out.println(srcSWHID.toString() + "\t" + graph.getSWHID(minRevId).toString());
}
if (timing) {
elapsedNanos = System.nanoTime() - ts; // processing time for current SWHID
elapsed = elapsed.plus(Duration.ofNanos(elapsedNanos)); // cumulative processing time for all SWHIDs
System.err.printf("visit time (s):\t%.6f\n", (double) elapsedNanos / 1_000_000_000);
}
}
if (timing)
System.err.printf("processed %d SWHIDs in %s (%s avg)\n", lineCount, elapsed, elapsed.dividedBy(lineCount));
}
}