diff --git a/java/src/main/java/org/softwareheritage/graph/Traversal.java b/java/src/main/java/org/softwareheritage/graph/Traversal.java deleted file mode 100644 index 1b4c7d8..0000000 --- a/java/src/main/java/org/softwareheritage/graph/Traversal.java +++ /dev/null @@ -1,609 +0,0 @@ -package org.softwareheritage.graph; - -import java.util.ArrayDeque; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.LinkedList; -import java.util.Map; -import java.util.Queue; -import java.util.Random; -import java.util.Stack; -import java.util.function.Consumer; -import java.util.function.LongConsumer; - -import it.unimi.dsi.big.webgraph.LazyLongIterator; - -/** - * Traversal algorithms on the compressed graph. - *

- * - * @author The Software Heritage developers - */ - -public class Traversal { - /** Graph used in the traversal */ - SwhBidirectionalGraph graph; - /** Type filter on the returned nodes */ - AllowedNodes nodesFilter; - /** Restrictions on which edges can be traversed */ - AllowedEdges edgesRestrictions; - - /** Hash set storing if we have visited a node */ - HashSet visited; - /** Hash map storing parent node id for each nodes during a traversal */ - Map parentNode; - /** Number of edges accessed during traversal */ - long nbEdgesAccessed; - - /** The anti Dos limit of edges traversed while a visit */ - long maxEdges; - - /** random number generator, for random walks */ - Random rng; - - /** - * Constructor. - * - * @param graph graph used in the traversal - * @param direction a string (either "forward" or "backward") specifying edge orientation - * @param edgesFmt a formatted string describing allowed - * edges - */ - - public Traversal(SwhBidirectionalGraph graph, String direction, String edgesFmt) { - this(graph, direction, edgesFmt, 0); - } - - public Traversal(SwhBidirectionalGraph graph, String direction, String edgesFmt, long maxEdges) { - this(graph, direction, edgesFmt, maxEdges, "*"); - } - - public Traversal(SwhBidirectionalGraph graph, String direction, String edgesFmt, long maxEdges, - String returnTypes) { - if (!direction.matches("forward|backward")) { - throw new IllegalArgumentException("Unknown traversal direction: " + direction); - } - - if (direction.equals("backward")) { - this.graph = graph.transpose(); - } else { - this.graph = graph; - } - this.nodesFilter = new AllowedNodes(returnTypes); - this.edgesRestrictions = new AllowedEdges(edgesFmt); - - this.visited = new HashSet<>(); - this.parentNode = new HashMap<>(); - this.nbEdgesAccessed = 0; - this.maxEdges = maxEdges; - this.rng = new Random(); - } - - /** - * Returns number of accessed edges during traversal. - * - * @return number of edges accessed in last traversal - */ - public long getNbEdgesAccessed() { - return nbEdgesAccessed; - } - - /** - * Returns number of accessed nodes during traversal. - * - * @return number of nodes accessed in last traversal - */ - public long getNbNodesAccessed() { - return this.visited.size(); - } - - /** - * Returns lazy iterator of successors of a node while following a specific set of edge types. - * - * @param g input graph - * @param nodeId node specified as a long id - * @param allowedEdges the specification of which edges can be traversed - * @return lazy iterator of successors of the node, specified as a - * WebGraph LazyLongIterator - */ - public static LazyLongIterator filterSuccessors(SwhBidirectionalGraph g, long nodeId, AllowedEdges allowedEdges) { - if (allowedEdges.restrictedTo == null) { - // All edges are allowed, bypass edge check - return g.successors(nodeId); - } else { - LazyLongIterator allSuccessors = g.successors(nodeId); - return new LazyLongIterator() { - @Override - public long nextLong() { - long neighbor; - while ((neighbor = allSuccessors.nextLong()) != -1) { - if (allowedEdges.isAllowed(g.getNodeType(nodeId), g.getNodeType(neighbor))) { - return neighbor; - } - } - return -1; - } - - @Override - public long skip(final long n) { - long i; - for (i = 0; i < n && nextLong() != -1; i++) - ; - return i; - } - }; - } - } - - private LazyLongIterator filterSuccessors(long nodeId, AllowedEdges allowedEdges) { - return filterSuccessors(graph, nodeId, allowedEdges); - } - - /** - * Push version of {@link #leaves} will fire passed callback for each leaf. - */ - public void leavesVisitor(long srcNodeId, NodeIdConsumer cb) { - Stack stack = new Stack<>(); - this.nbEdgesAccessed = 0; - - stack.push(srcNodeId); - visited.add(srcNodeId); - - while (!stack.isEmpty()) { - long currentNodeId = stack.pop(); - - long neighborsCnt = 0; - nbEdgesAccessed += graph.outdegree(currentNodeId); - if (this.maxEdges > 0) { - if (nbEdgesAccessed >= this.maxEdges) { - break; - } - } - LazyLongIterator it = filterSuccessors(currentNodeId, edgesRestrictions); - for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) { - neighborsCnt++; - if (!visited.contains(neighborNodeId)) { - stack.push(neighborNodeId); - visited.add(neighborNodeId); - } - } - - if (neighborsCnt == 0) { - if (nodesFilter.isAllowed(graph.getNodeType(currentNodeId))) { - cb.accept(currentNodeId); - } - } - } - } - - /** - * Returns the leaves of a subgraph rooted at the specified source node. - * - * @param srcNodeId source node - * @return list of node ids corresponding to the leaves - */ - public ArrayList leaves(long srcNodeId) { - ArrayList nodeIds = new ArrayList(); - leavesVisitor(srcNodeId, nodeIds::add); - return nodeIds; - } - - /** - * Push version of {@link #neighbors}: will fire passed callback on each neighbor. - */ - public void neighborsVisitor(long srcNodeId, NodeIdConsumer cb) { - this.nbEdgesAccessed = graph.outdegree(srcNodeId); - if (this.maxEdges > 0) { - if (nbEdgesAccessed >= this.maxEdges) { - return; - } - } - LazyLongIterator it = filterSuccessors(srcNodeId, edgesRestrictions); - for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) { - if (nodesFilter.isAllowed(graph.getNodeType(neighborNodeId))) { - cb.accept(neighborNodeId); - } - } - } - - /** - * Returns node direct neighbors (linked with exactly one edge). - * - * @param srcNodeId source node - * @return list of node ids corresponding to the neighbors - */ - public ArrayList neighbors(long srcNodeId) { - ArrayList nodeIds = new ArrayList<>(); - neighborsVisitor(srcNodeId, nodeIds::add); - return nodeIds; - } - - /** - * Push version of {@link #visitNodes}: will fire passed callback on each visited node. - */ - public void visitNodesVisitor(long srcNodeId, NodeIdConsumer nodeCb, EdgeIdConsumer edgeCb) { - Stack stack = new Stack<>(); - this.nbEdgesAccessed = 0; - - stack.push(srcNodeId); - visited.add(srcNodeId); - - while (!stack.isEmpty()) { - long currentNodeId = stack.pop(); - if (nodeCb != null) { - if (nodesFilter.isAllowed(graph.getNodeType(currentNodeId))) { - nodeCb.accept(currentNodeId); - } - } - nbEdgesAccessed += graph.outdegree(currentNodeId); - if (this.maxEdges > 0) { - if (nbEdgesAccessed >= this.maxEdges) { - break; - } - } - LazyLongIterator it = filterSuccessors(currentNodeId, edgesRestrictions); - for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) { - if (edgeCb != null) { - if (nodesFilter.isAllowed(graph.getNodeType(currentNodeId))) { - edgeCb.accept(currentNodeId, neighborNodeId); - } - } - if (!visited.contains(neighborNodeId)) { - stack.push(neighborNodeId); - visited.add(neighborNodeId); - } - } - } - } - - /** One-argument version to handle callbacks properly */ - public void visitNodesVisitor(long srcNodeId, NodeIdConsumer cb) { - visitNodesVisitor(srcNodeId, cb, null); - } - - /** - * Performs a graph traversal and returns explored nodes. - * - * @param srcNodeId source node - * @return list of explored node ids - */ - public ArrayList visitNodes(long srcNodeId) { - ArrayList nodeIds = new ArrayList<>(); - visitNodesVisitor(srcNodeId, nodeIds::add); - return nodeIds; - } - - /** - * Push version of {@link #visitPaths}: will fire passed callback on each discovered (complete) - * path. - */ - public void visitPathsVisitor(long srcNodeId, PathConsumer cb) { - Stack currentPath = new Stack<>(); - this.nbEdgesAccessed = 0; - visitPathsInternalVisitor(srcNodeId, currentPath, cb); - } - - /** - * Performs a graph traversal and returns explored paths. - * - * @param srcNodeId source node - * @return list of explored paths (represented as a list of node ids) - */ - public ArrayList> visitPaths(long srcNodeId) { - ArrayList> paths = new ArrayList<>(); - visitPathsVisitor(srcNodeId, paths::add); - return paths; - } - - private void visitPathsInternalVisitor(long currentNodeId, Stack currentPath, PathConsumer cb) { - currentPath.push(currentNodeId); - - long visitedNeighbors = 0; - - nbEdgesAccessed += graph.outdegree(currentNodeId); - if (this.maxEdges > 0) { - if (nbEdgesAccessed >= this.maxEdges) { - currentPath.pop(); - return; - } - } - LazyLongIterator it = filterSuccessors(currentNodeId, edgesRestrictions); - for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) { - visitPathsInternalVisitor(neighborNodeId, currentPath, cb); - visitedNeighbors++; - } - - if (visitedNeighbors == 0) { - ArrayList path = new ArrayList<>(currentPath); - cb.accept(path); - } - - currentPath.pop(); - } - - /** - * Performs a graph traversal with backtracking, and returns the first found path from source to - * destination. - * - * @param srcNodeId source node - * @param dst destination (either a node or a node type) - * @return found path as a list of node ids - */ - public ArrayList walk(long srcNodeId, T dst, String visitOrder) { - long dstNodeId; - if (visitOrder.equals("dfs")) { - dstNodeId = walkInternalDFS(srcNodeId, dst); - } else if (visitOrder.equals("bfs")) { - dstNodeId = walkInternalBFS(srcNodeId, dst); - } else { - throw new IllegalArgumentException("Unknown visit order: " + visitOrder); - } - - if (dstNodeId == -1) { - throw new IllegalArgumentException("Cannot find destination: " + dst); - } - - return backtracking(srcNodeId, dstNodeId); - } - - /** - * Performs a random walk (picking a random successor at each step) from source to destination. - * - * @param srcNodeId source node - * @param dst destination (either a node or a node type) - * @return found path as a list of node ids or an empty path to indicate that no suitable path have - * been found - */ - public ArrayList randomWalk(long srcNodeId, T dst) { - return randomWalk(srcNodeId, dst, 0); - } - - /** - * Performs a stubborn random walk (picking a random successor at each step) from source to - * destination. The walk is "stubborn" in the sense that it will not give up the first time if a - * satisfying target node is found, but it will retry up to a limited amount of times. - * - * @param srcNodeId source node - * @param dst destination (either a node or a node type) - * @param retries number of times to retry; 0 means no retries (single walk) - * @return found path as a list of node ids or an empty path to indicate that no suitable path have - * been found - */ - public ArrayList randomWalk(long srcNodeId, T dst, int retries) { - long curNodeId = srcNodeId; - ArrayList path = new ArrayList<>(); - this.nbEdgesAccessed = 0; - boolean found; - - if (retries < 0) { - throw new IllegalArgumentException("Negative number of retries given: " + retries); - } - - while (true) { - path.add(curNodeId); - LazyLongIterator successors = filterSuccessors(curNodeId, edgesRestrictions); - curNodeId = randomPick(successors); - if (curNodeId < 0) { - found = false; - break; - } - if (isDstNode(curNodeId, dst)) { - path.add(curNodeId); - found = true; - break; - } - } - - if (found) { - return path; - } else if (retries > 0) { // try again - return randomWalk(srcNodeId, dst, retries - 1); - } else { // not found and no retries left - path.clear(); - return path; - } - } - - /** - * Randomly choose an element from an iterator over Longs using reservoir sampling - * - * @param elements iterator over selection domain - * @return randomly chosen element or -1 if no suitable element was found - */ - private long randomPick(LazyLongIterator elements) { - long curPick = -1; - long seenCandidates = 0; - - for (long element; (element = elements.nextLong()) != -1;) { - seenCandidates++; - if (Math.round(rng.nextFloat() * (seenCandidates - 1)) == 0) { - curPick = element; - } - } - - return curPick; - } - - /** - * Internal DFS function of {@link #walk}. - * - * @param srcNodeId source node - * @param dst destination (either a node or a node type) - * @return final destination node or -1 if no path found - */ - private long walkInternalDFS(long srcNodeId, T dst) { - Stack stack = new Stack<>(); - this.nbEdgesAccessed = 0; - - stack.push(srcNodeId); - visited.add(srcNodeId); - - while (!stack.isEmpty()) { - long currentNodeId = stack.pop(); - if (isDstNode(currentNodeId, dst)) { - return currentNodeId; - } - - nbEdgesAccessed += graph.outdegree(currentNodeId); - LazyLongIterator it = filterSuccessors(currentNodeId, edgesRestrictions); - for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) { - if (!visited.contains(neighborNodeId)) { - stack.push(neighborNodeId); - visited.add(neighborNodeId); - parentNode.put(neighborNodeId, currentNodeId); - } - } - } - - return -1; - } - - /** - * Internal BFS function of {@link #walk}. - * - * @param srcNodeId source node - * @param dst destination (either a node or a node type) - * @return final destination node or -1 if no path found - */ - private long walkInternalBFS(long srcNodeId, T dst) { - Queue queue = new LinkedList<>(); - this.nbEdgesAccessed = 0; - - queue.add(srcNodeId); - visited.add(srcNodeId); - - while (!queue.isEmpty()) { - long currentNodeId = queue.poll(); - if (isDstNode(currentNodeId, dst)) { - return currentNodeId; - } - - nbEdgesAccessed += graph.outdegree(currentNodeId); - LazyLongIterator it = filterSuccessors(currentNodeId, edgesRestrictions); - for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) { - if (!visited.contains(neighborNodeId)) { - queue.add(neighborNodeId); - visited.add(neighborNodeId); - parentNode.put(neighborNodeId, currentNodeId); - } - } - } - - return -1; - } - - /** - * Internal function of {@link #walk} to check if a node corresponds to the destination. - * - * @param nodeId current node - * @param dst destination (either a node or a node type) - * @return true if the node is a destination, or false otherwise - */ - private boolean isDstNode(long nodeId, T dst) { - if (dst instanceof Long) { - long dstNodeId = (Long) dst; - return nodeId == dstNodeId; - } else if (dst instanceof Node.Type) { - Node.Type dstType = (Node.Type) dst; - return graph.getNodeType(nodeId) == dstType; - } else { - return false; - } - } - - /** - * Internal backtracking function of {@link #walk}. - * - * @param srcNodeId source node - * @param dstNodeId destination node - * @return the found path, as a list of node ids - */ - private ArrayList backtracking(long srcNodeId, long dstNodeId) { - ArrayList path = new ArrayList<>(); - long currentNodeId = dstNodeId; - while (currentNodeId != srcNodeId) { - path.add(currentNodeId); - currentNodeId = parentNode.get(currentNodeId); - } - path.add(srcNodeId); - Collections.reverse(path); - return path; - } - - /** - * Find a common descendant between two given nodes using two parallel BFS - * - * @param lhsNode the first node - * @param rhsNode the second node - * @return the found path, as a list of node ids - */ - public Long findCommonDescendant(long lhsNode, long rhsNode) { - Queue lhsStack = new ArrayDeque<>(); - Queue rhsStack = new ArrayDeque<>(); - HashSet lhsVisited = new HashSet<>(); - HashSet rhsVisited = new HashSet<>(); - lhsStack.add(lhsNode); - rhsStack.add(rhsNode); - lhsVisited.add(lhsNode); - rhsVisited.add(rhsNode); - - this.nbEdgesAccessed = 0; - Long curNode; - - while (!lhsStack.isEmpty() || !rhsStack.isEmpty()) { - if (!lhsStack.isEmpty()) { - curNode = lhsStack.poll(); - nbEdgesAccessed += graph.outdegree(curNode); - LazyLongIterator it = filterSuccessors(curNode, edgesRestrictions); - for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) { - if (!lhsVisited.contains(neighborNodeId)) { - if (rhsVisited.contains(neighborNodeId)) - return neighborNodeId; - lhsStack.add(neighborNodeId); - lhsVisited.add(neighborNodeId); - } - } - } - - if (!rhsStack.isEmpty()) { - curNode = rhsStack.poll(); - nbEdgesAccessed += graph.outdegree(curNode); - LazyLongIterator it = filterSuccessors(curNode, edgesRestrictions); - for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) { - if (!rhsVisited.contains(neighborNodeId)) { - if (lhsVisited.contains(neighborNodeId)) - return neighborNodeId; - rhsStack.add(neighborNodeId); - rhsVisited.add(neighborNodeId); - } - } - } - } - - return null; - } - - public interface NodeIdConsumer extends LongConsumer { - /** - * Callback for incrementally receiving node identifiers during a graph visit. - */ - void accept(long nodeId); - } - - public interface EdgeIdConsumer { - /** - * Callback for incrementally receiving edge identifiers during a graph visit. - */ - void accept(long srcId, long dstId); - } - - public interface PathConsumer extends Consumer> { - /** - * Callback for incrementally receiving node paths (made of node identifiers) during a graph visit. - */ - void accept(ArrayList path); - } -} diff --git a/java/src/main/java/org/softwareheritage/graph/experiments/forks/FindCommonAncestor.java b/java/src/main/java/org/softwareheritage/graph/experiments/forks/FindCommonAncestor.java deleted file mode 100644 index 56c8369..0000000 --- a/java/src/main/java/org/softwareheritage/graph/experiments/forks/FindCommonAncestor.java +++ /dev/null @@ -1,62 +0,0 @@ -package org.softwareheritage.graph.experiments.forks; - -import com.martiansoftware.jsap.*; -import org.softwareheritage.graph.SwhBidirectionalGraph; -import org.softwareheritage.graph.Traversal; - -import java.io.IOException; -import java.util.Scanner; - -public class FindCommonAncestor { - private SwhBidirectionalGraph graph; - - private void load_graph(String graphBasename) throws IOException { - System.err.println("Loading graph " + graphBasename + " ..."); - this.graph = SwhBidirectionalGraph.loadMapped(graphBasename); - System.err.println("Graph loaded."); - } - - private static JSAPResult parse_args(String[] args) { - JSAPResult config = null; - try { - SimpleJSAP jsap = new SimpleJSAP(FindCommonAncestor.class.getName(), "", - new Parameter[]{ - new FlaggedOption("edgesFmt", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'e', - "edges", "Edges constraints"), - new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'g', - "graph", "Basename of the compressed graph"),}); - - config = jsap.parse(args); - if (jsap.messagePrinted()) { - System.exit(1); - } - } catch (JSAPException e) { - e.printStackTrace(); - } - return config; - } - - public static void main(String[] args) { - JSAPResult config = parse_args(args); - - String graphPath = config.getString("graphPath"); - String edgesFmt = config.getString("edgesFmt"); - - FindCommonAncestor fca = new FindCommonAncestor(); - try { - fca.load_graph(graphPath); - } catch (IOException e) { - System.out.println("Could not load graph: " + e); - System.exit(2); - } - - Scanner input = new Scanner(System.in); - while (input.hasNextLong()) { - long lhsNode = input.nextLong(); - long rhsNode = input.nextLong(); - - Traversal t = new Traversal(fca.graph.symmetrize(), "forward", edgesFmt); - System.out.println(t.findCommonDescendant(lhsNode, rhsNode)); - } - } -} diff --git a/java/src/main/java/org/softwareheritage/graph/experiments/forks/FindPath.java b/java/src/main/java/org/softwareheritage/graph/experiments/forks/FindPath.java deleted file mode 100644 index b5f318c..0000000 --- a/java/src/main/java/org/softwareheritage/graph/experiments/forks/FindPath.java +++ /dev/null @@ -1,123 +0,0 @@ -package org.softwareheritage.graph.experiments.forks; - -import com.martiansoftware.jsap.*; -import it.unimi.dsi.big.webgraph.LazyLongIterator; -import org.softwareheritage.graph.SwhBidirectionalGraph; -import org.softwareheritage.graph.Node; - -import java.io.IOException; -import java.util.*; - -public class FindPath { - private SwhBidirectionalGraph graph; - private Long emptySnapshot; - - private void load_graph(String graphBasename) throws IOException { - System.err.println("Loading graph " + graphBasename + " ..."); - this.graph = SwhBidirectionalGraph.loadMapped(graphBasename).symmetrize(); - System.err.println("Graph loaded."); - this.emptySnapshot = null; - } - - private static JSAPResult parse_args(String[] args) { - JSAPResult config = null; - try { - SimpleJSAP jsap = new SimpleJSAP(FindPath.class.getName(), "", - new Parameter[]{new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, - 'g', "graph", "Basename of the compressed graph"),}); - - config = jsap.parse(args); - if (jsap.messagePrinted()) { - System.exit(1); - } - } catch (JSAPException e) { - e.printStackTrace(); - } - return config; - } - - private boolean nodeIsEmptySnapshot(Long node) { - if (this.emptySnapshot == null && this.graph.getNodeType(node) == Node.Type.SNP - && this.graph.outdegree(node) == 0) { - System.err.println("Found empty snapshot: " + node); - this.emptySnapshot = node; - } - return node.equals(this.emptySnapshot); - } - - private Boolean shouldVisit(Long node) { - Node.Type nt = this.graph.getNodeType(node); - if (nt != Node.Type.REV && nt != Node.Type.REL && nt != Node.Type.SNP && nt != Node.Type.ORI) { - return false; - } - if (this.nodeIsEmptySnapshot(node)) - return false; - return true; - } - - private ArrayList findPath(Long src, Long dst) { - HashSet visited = new HashSet<>(); - Queue queue = new ArrayDeque<>(); - Map parentNode = new HashMap<>(); - - queue.add(src); - visited.add(src); - - while (!queue.isEmpty()) { - long currentNode = queue.poll(); - - final LazyLongIterator iterator = graph.successors(currentNode); - long succ; - while ((succ = iterator.nextLong()) != -1) { - if (!shouldVisit(succ) || visited.contains(succ)) - continue; - visited.add(succ); - queue.add(succ); - parentNode.put(succ, currentNode); - - if (succ == dst) { - ArrayList path = new ArrayList<>(); - long n = dst; - while (n != src) { - path.add(n); - n = parentNode.get(n); - } - path.add(src); - Collections.reverse(path); - return path; - } - } - } - return null; - } - - public static void main(String[] args) { - JSAPResult config = parse_args(args); - - String graphPath = config.getString("graphPath"); - - FindPath fpath = new FindPath(); - try { - fpath.load_graph(graphPath); - } catch (IOException e) { - System.out.println("Could not load graph: " + e); - System.exit(2); - } - - Scanner input = new Scanner(System.in); - while (input.hasNextLong()) { - long lhsNode = input.nextLong(); - long rhsNode = input.nextLong(); - - ArrayList path = fpath.findPath(lhsNode, rhsNode); - if (path != null) { - for (Long n : path) { - System.out.format("%d ", n); - } - System.out.println(); - } else { - System.out.println("null"); - } - } - } -} diff --git a/java/src/main/java/org/softwareheritage/graph/experiments/multiplicationfactor/GenDistribution.java b/java/src/main/java/org/softwareheritage/graph/experiments/multiplicationfactor/GenDistribution.java deleted file mode 100644 index 8d5e5fa..0000000 --- a/java/src/main/java/org/softwareheritage/graph/experiments/multiplicationfactor/GenDistribution.java +++ /dev/null @@ -1,130 +0,0 @@ -package org.softwareheritage.graph.experiments.multiplicationfactor; - -import com.martiansoftware.jsap.*; -import org.softwareheritage.graph.SwhBidirectionalGraph; -import org.softwareheritage.graph.Node; -import org.softwareheritage.graph.Traversal; - -import java.io.IOException; -import java.util.Scanner; -import java.util.concurrent.ArrayBlockingQueue; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; - -public class GenDistribution { - private SwhBidirectionalGraph graph; - - private static JSAPResult parse_args(String[] args) { - JSAPResult config = null; - try { - SimpleJSAP jsap = new SimpleJSAP(GenDistribution.class.getName(), "", - new Parameter[]{ - new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'g', - "graph", "Basename of the compressed graph"), - new FlaggedOption("srcType", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 's', - "srctype", "Source node type"), - new FlaggedOption("dstType", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'd', - "dsttype", "Destination node type"), - new FlaggedOption("edgesFmt", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'e', - "edges", "Edges constraints"), - - new FlaggedOption("numThreads", JSAP.INTEGER_PARSER, "128", JSAP.NOT_REQUIRED, 't', - "numthreads", "Number of threads"),}); - - config = jsap.parse(args); - if (jsap.messagePrinted()) { - System.exit(1); - } - } catch (JSAPException e) { - e.printStackTrace(); - } - return config; - } - - public static void main(String[] args) { - JSAPResult config = parse_args(args); - - String graphPath = config.getString("graphPath"); - Node.Type srcType = Node.Type.fromStr(config.getString("srcType")); - Node.Type dstType = Node.Type.fromStr(config.getString("dstType")); - String edgesFmt = config.getString("edgesFmt"); - int numThreads = config.getInt("numThreads"); - - GenDistribution tp = new GenDistribution(); - try { - tp.load_graph(graphPath); - } catch (IOException e) { - System.out.println("Could not load graph: " + e); - System.exit(2); - } - - final long END_OF_QUEUE = -1L; - - ArrayBlockingQueue queue = new ArrayBlockingQueue<>(numThreads); - ExecutorService service = Executors.newFixedThreadPool(numThreads + 1); - - service.submit(() -> { - try { - Scanner input = new Scanner(System.in); - while (input.hasNextLong()) { - long node = input.nextLong(); - if (tp.graph.getNodeType(node) == srcType) { - queue.put(node); - } - } - } catch (InterruptedException e) { - e.printStackTrace(); - } finally { - for (int i = 0; i < numThreads; ++i) { - try { - queue.put(END_OF_QUEUE); - } catch (InterruptedException e) { - e.printStackTrace(); - } - } - } - }); - - for (int i = 0; i < numThreads; ++i) { - service.submit(() -> { - SwhBidirectionalGraph thread_graph = tp.graph.copy(); - long startTime; - double totalTime; - - while (true) { - Long node = null; - try { - node = queue.take(); - } catch (InterruptedException e) { - e.printStackTrace(); - } - if (node == null || node == END_OF_QUEUE) { - return; - } - - Traversal t = new Traversal(thread_graph, "backward", edgesFmt); - int[] count = {0}; - - startTime = System.nanoTime(); - t.visitNodesVisitor(node, (curnode) -> { - if (tp.graph.getNodeType(curnode) == dstType) { - count[0]++; - } - }); - long endTime = System.nanoTime(); - totalTime = (double) (endTime - startTime) / 1e9; - System.out.format("%d %d %d %d %f\n", node, count[0], t.getNbNodesAccessed(), - t.getNbEdgesAccessed(), totalTime); - } - }); - } - - service.shutdown(); - } - - private void load_graph(String graphBasename) throws IOException { - System.err.println("Loading graph " + graphBasename + " ..."); - this.graph = SwhBidirectionalGraph.loadMapped(graphBasename); - System.err.println("Graph loaded."); - } -} diff --git a/java/src/main/java/org/softwareheritage/graph/utils/FindEarliestRevision.java b/java/src/main/java/org/softwareheritage/graph/utils/FindEarliestRevision.java index 4a2bb79..3623bb0 100644 --- a/java/src/main/java/org/softwareheritage/graph/utils/FindEarliestRevision.java +++ b/java/src/main/java/org/softwareheritage/graph/utils/FindEarliestRevision.java @@ -1,110 +1,113 @@ package org.softwareheritage.graph.utils; import it.unimi.dsi.big.webgraph.LazyLongIterator; import org.softwareheritage.graph.*; import java.io.IOException; import java.time.Duration; import java.util.HashSet; import java.util.Scanner; import java.util.Stack; /* sample invocation on granet.internal.softwareheritage.org for benchmarking * purposes, with the main swh-graph service already running: * * $ java -cp ~/swh-environment/swh-graph/java/target/swh-graph-0.3.0.jar -Xmx300G -XX:PretenureSizeThreshold=512M -XX:MaxNewSize=4G -XX:+UseLargePages -XX:+UseTransparentHugePages -XX:+UseNUMA -XX:+UseTLAB -XX:+ResizeTLAB org.softwareheritage.graph.utils.FindEarliestRevision --timing /dev/shm/swh-graph/default/graph * */ public class FindEarliestRevision { public static void main(String[] args) throws IOException, ClassNotFoundException { String graphPath = args[0]; boolean timing = false; long ts, elapsedNanos; Duration elapsed; if (args.length >= 2 && (args[0].equals("-t") || args[0].equals("--timing"))) { timing = true; graphPath = args[1]; System.err.println("started with timing option, will keep track of elapsed time"); } System.err.println("loading transposed graph..."); ts = System.nanoTime(); SwhBidirectionalGraph graph = SwhBidirectionalGraph.loadMapped(graphPath).transpose(); elapsed = Duration.ofNanos(System.nanoTime() - ts); System.err.println(String.format("transposed graph loaded (duration: %s).", elapsed)); System.err.println("loading revision timestamps..."); ts = System.nanoTime(); graph.loadCommitterTimestamps(); elapsed = Duration.ofNanos(System.nanoTime() - ts); System.err.println(String.format("revision timestamps loaded (duration: %s).", elapsed)); Scanner stdin = new Scanner(System.in); AllowedEdges edges = new AllowedEdges("cnt:dir,dir:dir,dir:rev"); String rawSWHID = null; SWHID srcSWHID = null; long lineCount = 0; long srcNodeId = -1; if (timing) { System.err.println("starting SWHID processing..."); elapsed = Duration.ZERO; } while (stdin.hasNextLine()) { if (timing) ts = System.nanoTime(); rawSWHID = stdin.nextLine().strip(); lineCount++; try { srcSWHID = new SWHID(rawSWHID); srcNodeId = graph.getNodeId(srcSWHID); } catch (IllegalArgumentException e) { System.err .println(String.format("skipping invalid or unknown SWHID %s on line %d", rawSWHID, lineCount)); continue; } if (timing) System.err.println("starting traversal for: " + srcSWHID.toString()); Stack stack = new Stack<>(); HashSet visited = new HashSet<>(); stack.push(srcNodeId); visited.add(srcNodeId); long minRevId = -1; long minTimestamp = Long.MAX_VALUE; while (!stack.isEmpty()) { long currentNodeId = stack.pop(); if (graph.getNodeType(currentNodeId) == Node.Type.REV) { long committerTs = graph.getCommitterTimestamp(currentNodeId); if (committerTs < minTimestamp) { minRevId = currentNodeId; minTimestamp = committerTs; } } - LazyLongIterator it = Traversal.filterSuccessors(graph, currentNodeId, edges); + LazyLongIterator it = graph.successors(currentNodeId); for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) { + if (!edges.isAllowed(graph.getNodeType(currentNodeId), graph.getNodeType(neighborNodeId))) { + continue; + } if (!visited.contains(neighborNodeId)) { stack.push(neighborNodeId); visited.add(neighborNodeId); } } } if (minRevId == -1) { System.err.println("no revision found containing: " + srcSWHID.toString()); } else { System.out.println(srcSWHID.toString() + "\t" + graph.getSWHID(minRevId).toString()); } if (timing) { elapsedNanos = System.nanoTime() - ts; // processing time for current SWHID elapsed = elapsed.plus(Duration.ofNanos(elapsedNanos)); // cumulative processing time for all SWHIDs System.err.printf("visit time (s):\t%.6f\n", (double) elapsedNanos / 1_000_000_000); } } if (timing) System.err.printf("processed %d SWHIDs in %s (%s avg)\n", lineCount, elapsed, elapsed.dividedBy(lineCount)); } }