diff --git a/java/pom.xml b/java/pom.xml index 26f0ade..c59405c 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -1,274 +1,341 @@ 4.0.0 org.softwareheritage.graph swh-graph ${git.closest.tag.name} swh-graph https://forge.softwareheritage.org/source/swh-graph/ UTF-8 11 ch.qos.logback logback-classic 1.2.3 org.junit.jupiter junit-jupiter-api 5.7.0 test + + org.junit.vintage + junit-vintage-engine + 5.7.0 + + + junit + junit + 4.12 + org.junit.jupiter junit-jupiter-engine 5.7.0 test org.hamcrest hamcrest 2.2 test io.javalin javalin 3.0.0 org.slf4j slf4j-simple 1.7.26 com.fasterxml.jackson.core jackson-databind 2.13.0 it.unimi.dsi webgraph-big 3.6.6 it.unimi.dsi fastutil 8.5.6 it.unimi.dsi dsiutils 2.6.17 it.unimi.dsi sux4j 5.2.3 it.unimi.dsi law 2.7.2 org.apache.hadoop hadoop-common org.umlgraph umlgraph org.eclipse.jetty.aggregate jetty-all it.unimi.di mg4j it.unimi.di mg4j-big com.martiansoftware jsap 2.1 net.sf.py4j py4j 0.10.9.3 commons-codec commons-codec 1.15 maven-clean-plugin 3.1.0 maven-resources-plugin 3.0.2 maven-compiler-plugin 3.8.0 11 11 -verbose -Xlint:all maven-surefire-plugin 2.22.2 maven-failsafe-plugin 2.22.2 maven-jar-plugin 3.0.2 maven-install-plugin 2.5.2 maven-deploy-plugin 2.8.2 maven-site-plugin 3.7.1 maven-project-info-reports-plugin 3.0.0 maven-assembly-plugin 3.3.0 org.softwareheritage.graph.server.App jar-with-dependencies false make-assembly package single com.diffplug.spotless spotless-maven-plugin 2.4.1 *.md .gitignore true 4 4.16.0 .coding-style.xml pl.project13.maven git-commit-id-plugin 3.0.1 get-the-git-infos revision initialize true true true true v* git.closest.tag.name ^v true - - - - + + maven-source-plugin + 2.1.1 + + + bundle-sources + package + + jar-no-fork + test-jar-no-fork + + + + org.apache.maven.plugins maven-javadoc-plugin - 3.1.1 + 3.3.1 + + + resource-bundles + package + + + resource-bundle + + + test-resource-bundle + + + false + + + + javadoc-jar + package + + jar + + + + + true + + it.unimi.dsi:webgraph-big:* + + + https://webgraph.di.unimi.it/docs-big/ + https://dsiutils.di.unimi.it/docs/ + https://fastutil.di.unimi.it/docs/ + https://law.di.unimi.it/software/law-docs/ + + + + implSpec + a + Implementation Requirements: + + + implNote + a + Implementation Note: + + + - + diff --git a/java/src/main/java/org/softwareheritage/graph/BidirectionalImmutableGraph.java b/java/src/main/java/org/softwareheritage/graph/BidirectionalImmutableGraph.java index be19956..815b426 100644 --- a/java/src/main/java/org/softwareheritage/graph/BidirectionalImmutableGraph.java +++ b/java/src/main/java/org/softwareheritage/graph/BidirectionalImmutableGraph.java @@ -1,128 +1,128 @@ package org.softwareheritage.graph; import it.unimi.dsi.big.webgraph.ImmutableGraph; import it.unimi.dsi.big.webgraph.LazyLongIterator; import it.unimi.dsi.big.webgraph.Transform; import it.unimi.dsi.fastutil.longs.LongIterator; /** * A directed immutable graph which can be iterated in both directions (forward and backward). It * exposes the backward equivalents of the ImmutableGraph primitives (indegree() and * predecessors()). This is implemented by passing two graphs, one in the forward and one in the * backward direction. */ public class BidirectionalImmutableGraph extends ImmutableGraph { private final ImmutableGraph forwardGraph; private final ImmutableGraph backwardGraph; /** * Creates a bidirectional immutable graph * * @param forwardGraph The graph in the forward direction * @param backwardGraph The graph in the backward direction */ - protected BidirectionalImmutableGraph(ImmutableGraph forwardGraph, ImmutableGraph backwardGraph) { + public BidirectionalImmutableGraph(ImmutableGraph forwardGraph, ImmutableGraph backwardGraph) { this.forwardGraph = forwardGraph; this.backwardGraph = backwardGraph; } @Override public long numNodes() { assert forwardGraph.numNodes() == backwardGraph.numNodes(); return this.forwardGraph.numNodes(); } @Override public long numArcs() { assert forwardGraph.numArcs() == backwardGraph.numArcs(); return this.forwardGraph.numArcs(); } @Override public boolean randomAccess() { return this.forwardGraph.randomAccess() && this.backwardGraph.randomAccess(); } @Override public boolean hasCopiableIterators() { return forwardGraph.hasCopiableIterators() && backwardGraph.hasCopiableIterators(); } @Override public BidirectionalImmutableGraph copy() { return new BidirectionalImmutableGraph(this.forwardGraph.copy(), this.backwardGraph.copy()); } /** * Returns the transposed version of the bidirectional graph. Successors become predecessors, and * vice-versa. */ public BidirectionalImmutableGraph transpose() { return new BidirectionalImmutableGraph(backwardGraph, forwardGraph); } /** * Returns the symmetric version of the bidirectional graph. It returns the (lazy) union of the * forward graph and the backward graph. This is equivalent to removing the directionality of the * edges: the successors of a node are also its predecessors. * * @return a symmetric, undirected BidirectionalImmutableGraph. */ public BidirectionalImmutableGraph symmetrize() { ImmutableGraph symmetric = Transform.union(forwardGraph, backwardGraph); return new BidirectionalImmutableGraph(symmetric, symmetric); } /** * Returns the simplified version of the bidirectional graph. Works like symmetrize(), but also * removes the loop edges. * * @return a simplified (loopless and symmetric) BidirectionalImmutableGraph */ public BidirectionalImmutableGraph simplify() { ImmutableGraph simplified = Transform.simplify(forwardGraph, backwardGraph); return new BidirectionalImmutableGraph(simplified, simplified); } /** Returns the outdegree of a node */ @Override public long outdegree(long l) { return forwardGraph.outdegree(l); } /** Returns the indegree of a node */ public long indegree(long l) { return backwardGraph.outdegree(l); } /** Returns a lazy iterator over the successors of a given node. */ @Override public LazyLongIterator successors(long nodeId) { return forwardGraph.successors(nodeId); } /** Returns a lazy iterator over the predecessors of a given node. */ public LazyLongIterator predecessors(long nodeId) { return backwardGraph.successors(nodeId); } /** Returns a reference to an array containing the predecessors of a given node. */ public long[][] predecessorBigArray(long x) { return backwardGraph.successorBigArray(x); } /** Returns an iterator enumerating the indegrees of the nodes of this graph. */ public LongIterator indegrees() { return backwardGraph.outdegrees(); } /** Returns the underlying ImmutableGraph in the forward direction. */ public ImmutableGraph getForwardGraph() { return forwardGraph; } /** Returns the underlying ImmutableGraph in the backward direction. */ public ImmutableGraph getBackwardGraph() { return backwardGraph; } } diff --git a/java/src/main/java/org/softwareheritage/graph/Entry.java b/java/src/main/java/org/softwareheritage/graph/Entry.java index a2d3f5a..e6f4494 100644 --- a/java/src/main/java/org/softwareheritage/graph/Entry.java +++ b/java/src/main/java/org/softwareheritage/graph/Entry.java @@ -1,193 +1,193 @@ package org.softwareheritage.graph; import java.io.*; import java.util.ArrayList; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.PropertyNamingStrategy; public class Entry { - private Graph graph; + private SwhBidirectionalGraph graph; public void load_graph(String graphBasename) throws IOException { System.err.println("Loading graph " + graphBasename + " ..."); - this.graph = Graph.loadMapped(graphBasename); + this.graph = SwhBidirectionalGraph.loadMapped(graphBasename); System.err.println("Graph loaded."); } - public Graph get_graph() { + public SwhBidirectionalGraph get_graph() { return graph.copy(); } public String stats() { try { Stats stats = new Stats(graph.getPath()); ObjectMapper objectMapper = new ObjectMapper(); objectMapper.setPropertyNamingStrategy(PropertyNamingStrategy.SNAKE_CASE); return objectMapper.writeValueAsString(stats); } catch (IOException e) { throw new RuntimeException("Cannot read stats: " + e); } } public void check_swhid(String src) { graph.getNodeId(new SWHID(src)); } private int count_visitor(NodeCountVisitor f, long srcNodeId) { int[] count = {0}; f.accept(srcNodeId, (node) -> { count[0]++; }); return count[0]; } public int count_leaves(String direction, String edgesFmt, String src, long maxEdges) { long srcNodeId = graph.getNodeId(new SWHID(src)); Traversal t = new Traversal(graph.copy(), direction, edgesFmt, maxEdges); return count_visitor(t::leavesVisitor, srcNodeId); } public int count_neighbors(String direction, String edgesFmt, String src, long maxEdges) { long srcNodeId = graph.getNodeId(new SWHID(src)); Traversal t = new Traversal(graph.copy(), direction, edgesFmt, maxEdges); return count_visitor(t::neighborsVisitor, srcNodeId); } public int count_visit_nodes(String direction, String edgesFmt, String src, long maxEdges) { long srcNodeId = graph.getNodeId(new SWHID(src)); Traversal t = new Traversal(graph.copy(), direction, edgesFmt, maxEdges); return count_visitor(t::visitNodesVisitor, srcNodeId); } public QueryHandler get_handler(String clientFIFO) { return new QueryHandler(graph.copy(), clientFIFO); } private interface NodeCountVisitor { void accept(long nodeId, Traversal.NodeIdConsumer consumer); } public class QueryHandler { - Graph graph; + SwhBidirectionalGraph graph; BufferedWriter out; String clientFIFO; - public QueryHandler(Graph graph, String clientFIFO) { + public QueryHandler(SwhBidirectionalGraph graph, String clientFIFO) { this.graph = graph; this.clientFIFO = clientFIFO; this.out = null; } public void writeNode(SWHID swhid) { try { out.write(swhid.toString() + "\n"); } catch (IOException e) { throw new RuntimeException("Cannot write response to client: " + e); } } public void writeEdge(SWHID src, SWHID dst) { try { out.write(src.toString() + " " + dst.toString() + "\n"); } catch (IOException e) { throw new RuntimeException("Cannot write response to client: " + e); } } public void open() { try { FileOutputStream file = new FileOutputStream(this.clientFIFO); this.out = new BufferedWriter(new OutputStreamWriter(file)); } catch (IOException e) { throw new RuntimeException("Cannot open client FIFO: " + e); } } public void close() { try { out.close(); } catch (IOException e) { throw new RuntimeException("Cannot write response to client: " + e); } } public void leaves(String direction, String edgesFmt, String src, long maxEdges, String returnTypes) { long srcNodeId = graph.getNodeId(new SWHID(src)); open(); Traversal t = new Traversal(graph, direction, edgesFmt, maxEdges, returnTypes); for (Long nodeId : t.leaves(srcNodeId)) { writeNode(graph.getSWHID(nodeId)); } close(); } public void neighbors(String direction, String edgesFmt, String src, long maxEdges, String returnTypes) { long srcNodeId = graph.getNodeId(new SWHID(src)); open(); Traversal t = new Traversal(graph, direction, edgesFmt, maxEdges, returnTypes); for (Long nodeId : t.neighbors(srcNodeId)) { writeNode(graph.getSWHID(nodeId)); } close(); } public void visit_nodes(String direction, String edgesFmt, String src, long maxEdges, String returnTypes) { long srcNodeId = graph.getNodeId(new SWHID(src)); open(); Traversal t = new Traversal(graph, direction, edgesFmt, maxEdges, returnTypes); for (Long nodeId : t.visitNodes(srcNodeId)) { writeNode(graph.getSWHID(nodeId)); } close(); } public void visit_edges(String direction, String edgesFmt, String src, long maxEdges, String returnTypes) { long srcNodeId = graph.getNodeId(new SWHID(src)); open(); Traversal t = new Traversal(graph, direction, edgesFmt, maxEdges); t.visitNodesVisitor(srcNodeId, null, (srcId, dstId) -> { writeEdge(graph.getSWHID(srcId), graph.getSWHID(dstId)); }); close(); } public void walk(String direction, String edgesFmt, String algorithm, String src, String dst, long maxEdges, String returnTypes) { long srcNodeId = graph.getNodeId(new SWHID(src)); open(); ArrayList res; Traversal t = new Traversal(graph, direction, edgesFmt, maxEdges, returnTypes); if (dst.matches("ori|snp|rel|rev|dir|cnt")) { Node.Type dstType = Node.Type.fromStr(dst); res = t.walk(srcNodeId, dstType, algorithm); } else { long dstNodeId = graph.getNodeId(new SWHID(dst)); res = t.walk(srcNodeId, dstNodeId, algorithm); } for (Long nodeId : res) { writeNode(graph.getSWHID(nodeId)); } close(); } public void random_walk(String direction, String edgesFmt, int retries, String src, String dst, long maxEdges, String returnTypes) { long srcNodeId = graph.getNodeId(new SWHID(src)); open(); ArrayList res; Traversal t = new Traversal(graph, direction, edgesFmt, maxEdges, returnTypes); if (dst.matches("ori|snp|rel|rev|dir|cnt")) { Node.Type dstType = Node.Type.fromStr(dst); res = t.randomWalk(srcNodeId, dstType, retries); } else { long dstNodeId = graph.getNodeId(new SWHID(dst)); res = t.randomWalk(srcNodeId, dstNodeId, retries); } for (Long nodeId : res) { writeNode(graph.getSWHID(nodeId)); } close(); } } } diff --git a/java/src/main/java/org/softwareheritage/graph/Graph.java b/java/src/main/java/org/softwareheritage/graph/Graph.java deleted file mode 100644 index 8d9acf1..0000000 --- a/java/src/main/java/org/softwareheritage/graph/Graph.java +++ /dev/null @@ -1,304 +0,0 @@ -package org.softwareheritage.graph; - -import it.unimi.dsi.big.webgraph.ImmutableGraph; -import it.unimi.dsi.big.webgraph.LazyLongIterator; -import it.unimi.dsi.logging.ProgressLogger; -import org.softwareheritage.graph.maps.NodeIdMap; -import org.softwareheritage.graph.maps.NodeTypesMap; - -import java.io.IOException; - -/** - * Main class storing the compressed graph and node id mappings. - *

- * The compressed graph is stored using the WebGraph - * ecosystem. Additional mappings are necessary because Software Heritage uses string based persistent - * identifiers (SWHID) while WebGraph uses integers internally. These two mappings (long id - * ↔ SWHID) are used for the input (users refer to the graph using SWHID) and the output - * (convert back to SWHID for users results). However, since graph traversal can be restricted - * depending on the node type (see {@link AllowedEdges}), a long id → node type map is stored - * as well to avoid a full SWHID lookup. - * - * @author The Software Heritage developers - * @see org.softwareheritage.graph.AllowedEdges - * @see org.softwareheritage.graph.maps.NodeIdMap - * @see org.softwareheritage.graph.maps.NodeTypesMap - */ - -public class Graph extends ImmutableGraph { - /** - * Bidirectional graph containing two compressed {@link it.unimi.dsi.big.webgraph.BVGraph} one for - * each direction - */ - BidirectionalImmutableGraph graph; - - /** Path and basename of the compressed graph */ - String path; - /** Mapping long id ↔ SWHIDs */ - NodeIdMap nodeIdMap; - /** Mapping long id → node types */ - NodeTypesMap nodeTypesMap; - - /** - * Constructor. - * - * @param path path and basename of the compressed graph to load - */ - - private Graph(String path) throws IOException { - loadInternal(path, null, LoadMethod.MAPPED); - } - - /** - * Loading mechanisms - */ - - enum LoadMethod { - MEMORY, MAPPED, OFFLINE, - } - - protected Graph loadInternal(String path, ProgressLogger pl, LoadMethod method) throws IOException { - this.path = path; - ImmutableGraph direct = null; - ImmutableGraph transposed = null; - if (method == LoadMethod.MEMORY) { - direct = ImmutableGraph.load(path, pl); - transposed = ImmutableGraph.load(path + "-transposed", pl); - } else if (method == LoadMethod.MAPPED) { - direct = ImmutableGraph.load(path, pl); - transposed = ImmutableGraph.loadMapped(path + "-transposed", pl); - } else if (method == LoadMethod.OFFLINE) { - direct = ImmutableGraph.loadOffline(path, pl); - transposed = ImmutableGraph.loadOffline(path + "-transposed", pl); - } - this.graph = new BidirectionalImmutableGraph(direct, transposed); - this.nodeTypesMap = new NodeTypesMap(path); - this.nodeIdMap = new NodeIdMap(path, numNodes()); - return this; - } - - protected Graph() { - } - - public static Graph load(String path, ProgressLogger pl) throws IOException { - return new Graph().loadInternal(path, pl, LoadMethod.MEMORY); - } - - public static Graph loadMapped(String path, ProgressLogger pl) throws IOException { - return new Graph().loadInternal(path, pl, LoadMethod.MAPPED); - } - - public static Graph loadOffline(String path, ProgressLogger pl) throws IOException { - return new Graph().loadInternal(path, null, LoadMethod.OFFLINE); - } - - public static Graph load(String path) throws IOException { - return new Graph().loadInternal(path, null, LoadMethod.MEMORY); - } - - public static Graph loadMapped(String path) throws IOException { - return new Graph().loadInternal(path, null, LoadMethod.MAPPED); - } - - public static Graph loadOffline(String path) throws IOException { - return new Graph().loadInternal(path, null, LoadMethod.OFFLINE); - } - - /** - * Constructor used for copy() - */ - protected Graph(BidirectionalImmutableGraph graph, String path, NodeIdMap nodeIdMap, NodeTypesMap nodeTypesMap) { - this.graph = graph; - this.path = path; - this.nodeIdMap = nodeIdMap; - this.nodeTypesMap = nodeTypesMap; - } - - /** - * Return a flyweight copy of the graph. - */ - @Override - public Graph copy() { - return new Graph(this.graph.copy(), this.path, this.nodeIdMap, this.nodeTypesMap); - } - - @Override - public boolean randomAccess() { - return graph.randomAccess(); - } - - /** - * Return a transposed version of the graph. - */ - public Graph transpose() { - return new Graph(this.graph.transpose(), this.path, this.nodeIdMap, this.nodeTypesMap); - } - - /** - * Return a symmetric version of the graph. - */ - public Graph symmetrize() { - return new Graph(this.graph.symmetrize(), this.path, this.nodeIdMap, this.nodeTypesMap); - } - - /** - * Cleans up graph resources after use. - */ - public void cleanUp() throws IOException { - nodeIdMap.close(); - } - - /** - * Returns number of nodes in the graph. - * - * @return number of nodes in the graph - */ - @Override - public long numNodes() { - return graph.numNodes(); - } - - /** - * Returns number of edges in the graph. - * - * @return number of edges in the graph - */ - @Override - public long numArcs() { - return graph.numArcs(); - } - - /** - * Returns lazy iterator of successors of a node. - * - * @param nodeId node specified as a long id - * @return lazy iterator of successors of the node, specified as a - * WebGraph LazyLongIterator - */ - @Override - public LazyLongIterator successors(long nodeId) { - return graph.successors(nodeId); - } - - /** - * Returns lazy iterator of successors of a node while following a specific set of edge types. - * - * @param nodeId node specified as a long id - * @param allowedEdges the specification of which edges can be traversed - * @return lazy iterator of successors of the node, specified as a - * WebGraph LazyLongIterator - */ - public LazyLongIterator successors(long nodeId, AllowedEdges allowedEdges) { - if (allowedEdges.restrictedTo == null) { - // All edges are allowed, bypass edge check - return this.successors(nodeId); - } else { - LazyLongIterator allSuccessors = this.successors(nodeId); - Graph thisGraph = this; - return new LazyLongIterator() { - @Override - public long nextLong() { - long neighbor; - while ((neighbor = allSuccessors.nextLong()) != -1) { - if (allowedEdges.isAllowed(thisGraph.getNodeType(nodeId), thisGraph.getNodeType(neighbor))) { - return neighbor; - } - } - return -1; - } - - @Override - public long skip(final long n) { - long i; - for (i = 0; i < n && nextLong() != -1; i++) - ; - return i; - } - }; - } - } - - /** - * Returns the outdegree of a node. - * - * @param nodeId node specified as a long id - * @return outdegree of a node - */ - @Override - public long outdegree(long nodeId) { - return graph.outdegree(nodeId); - } - - /** - * Returns lazy iterator of predecessors of a node. - * - * @param nodeId node specified as a long id - * @return lazy iterator of predecessors of the node, specified as a - * WebGraph LazyLongIterator - */ - public LazyLongIterator predecessors(long nodeId) { - return graph.predecessors(nodeId); - } - - /** - * Returns the indegree of a node. - * - * @param nodeId node specified as a long id - * @return indegree of a node - */ - public long indegree(long nodeId) { - return graph.indegree(nodeId); - } - - /** - * Returns the underlying BidirectionalImmutableGraph. - * - * @return WebGraph ImmutableGraph - */ - public ImmutableGraph getGraph() { - return this.graph; - } - - /** - * Returns the graph full path. - * - * @return graph full path - */ - public String getPath() { - return path; - } - - /** - * Converts {@link SWHID} node to long. - * - * @param swhid node specified as a {@link SWHID} - * @return internal long node id - * @see SWHID - */ - public long getNodeId(SWHID swhid) { - return nodeIdMap.getNodeId(swhid); - } - - /** - * Converts long id node to {@link SWHID}. - * - * @param nodeId node specified as a long id - * @return external SWHID - * @see SWHID - */ - public SWHID getSWHID(long nodeId) { - return nodeIdMap.getSWHID(nodeId); - } - - /** - * Returns node type. - * - * @param nodeId node specified as a long id - * @return corresponding node type - * @see org.softwareheritage.graph.Node.Type - */ - public Node.Type getNodeType(long nodeId) { - return nodeTypesMap.getType(nodeId); - } -} diff --git a/java/src/main/java/org/softwareheritage/graph/NodesFiltering.java b/java/src/main/java/org/softwareheritage/graph/NodesFiltering.java index 3f3e7a3..18eb3b8 100644 --- a/java/src/main/java/org/softwareheritage/graph/NodesFiltering.java +++ b/java/src/main/java/org/softwareheritage/graph/NodesFiltering.java @@ -1,107 +1,107 @@ package org.softwareheritage.graph; import java.util.ArrayList; /** - *

NodesFiltering

+ * NodesFiltering *

* class that manages the filtering of nodes that have been returned after a visit of the graph. * parameterized by a string that represents either no filtering (*) or a set of node types. *

* * * * How to use NodesFiltering : * *
  * {@code
  *  Long id1 = .... // graph.getNodeType(id1) == CNT
  *  Long id2 = .... // graph.getNodeType(id2) == SNP
  *  Long id3 = .... // graph.getNodeType(id3) == ORI
  *  ArrayList nodeIds = nez ArrayList();
  *  nodeIds.add(id1); nodeIds.add(id2); nodeIds.add(id3);
  *
  *  NodeFiltering nds = new NodesFiltering("snp,ori"); // we allow only snp node types to be shown
  *  System.out.println(nds.filterByNodeTypes(nodeIds,graph)); // will print id2, id3
  *
  *  nds = NodesFiltering("*");
  *  System.out.println(nds.filterByNodeTypes(nodeIds,graph)); // will print id1, id2 id3
  *
  * }
  * 
*/ public class NodesFiltering { boolean restricted; ArrayList allowedNodesTypes; /** * Default constructor, in order to handle the * case (all types of nodes are allowed to be * returned). allowedNodesTypes will contains [SNP,CNT....] all types of nodes. * */ public NodesFiltering() { restricted = false; allowedNodesTypes = Node.Type.parse("*"); } /** * Constructor * * @param strTypes a formatted string describing the types of nodes we want to allow to be shown. * * NodesFilterind("cnt,snp") will set allowedNodesTypes to [CNT,SNP] * */ public NodesFiltering(String strTypes) { restricted = true; allowedNodesTypes = new ArrayList(); String[] types = strTypes.split(","); for (String type : types) { allowedNodesTypes.add(Node.Type.fromStr(type)); } } /** * Check if the type given in parameter is in the list of allowed types. * * @param typ the type of the node. */ public boolean typeIsAllowed(Node.Type typ) { return this.allowedNodesTypes.contains(typ); } /** *

* the function that filters the nodes returned, we browse the list of nodes found after a visit and * we create a new list with only the nodes that have a type that is contained in the list of * allowed types (allowedNodesTypes) *

* * @param nodeIds the nodes founded during the visit * @param g the graph in order to find the types of nodes from their id in nodeIds * @return a new list with the id of node which have a type in allowedTypes * * */ - public ArrayList filterByNodeTypes(ArrayList nodeIds, Graph g) { + public ArrayList filterByNodeTypes(ArrayList nodeIds, SwhBidirectionalGraph g) { ArrayList filteredNodes = new ArrayList(); for (Long node : nodeIds) { if (this.typeIsAllowed(g.getNodeType(node))) { filteredNodes.add(node); } } return filteredNodes; } } diff --git a/java/src/main/java/org/softwareheritage/graph/Subgraph.java b/java/src/main/java/org/softwareheritage/graph/Subgraph.java index 3e7e7fd..53ef937 100644 --- a/java/src/main/java/org/softwareheritage/graph/Subgraph.java +++ b/java/src/main/java/org/softwareheritage/graph/Subgraph.java @@ -1,224 +1,224 @@ package org.softwareheritage.graph; import it.unimi.dsi.big.webgraph.ImmutableGraph; import it.unimi.dsi.big.webgraph.LazyLongIterator; import it.unimi.dsi.big.webgraph.NodeIterator; import java.util.NoSuchElementException; public class Subgraph extends ImmutableGraph { - private final Graph underlyingGraph; + private final SwhBidirectionalGraph underlyingGraph; public final AllowedNodes allowedNodeTypes; private long nodeCount = -1; /** * Constructor. * */ - public Subgraph(Graph underlyingGraph, AllowedNodes allowedNodeTypes) { + public Subgraph(SwhBidirectionalGraph underlyingGraph, AllowedNodes allowedNodeTypes) { this.underlyingGraph = underlyingGraph.copy(); this.allowedNodeTypes = allowedNodeTypes; } /** * Return a flyweight copy of the graph. */ @Override public Subgraph copy() { return new Subgraph(this.underlyingGraph.copy(), allowedNodeTypes); } @Override public boolean randomAccess() { return underlyingGraph.randomAccess(); } /** * Return a transposed version of the graph. */ public Subgraph transpose() { return new Subgraph(underlyingGraph.transpose(), allowedNodeTypes); } /** * Return a symmetric version of the graph. */ public Subgraph symmetrize() { return new Subgraph(underlyingGraph.symmetrize(), allowedNodeTypes); } /** * Returns number of nodes in the graph. * * @return number of nodes in the graph */ @Override public long numNodes() { if (nodeCount == -1) { for (long i = 0; i < underlyingGraph.numNodes(); ++i) { if (nodeExists(i)) ++nodeCount; } } return nodeCount; } /** * Returns number of edges in the graph. * * @return number of edges in the graph */ @Override public long numArcs() { throw new UnsupportedOperationException("Cannot determine the number of arcs in a subgraph"); } public long maxNodeNumber() { return underlyingGraph.numNodes(); } public boolean nodeExists(long node) { return allowedNodeTypes.isAllowed(underlyingGraph.getNodeType(node)); } /** * Returns lazy iterator of successors of a node. * * @param nodeId node specified as a long id * @return lazy iterator of successors of the node, specified as a * WebGraph LazyLongIterator */ @Override public LazyLongIterator successors(long nodeId) { if (!nodeExists(nodeId)) { throw new IllegalArgumentException("Node " + nodeId + " not in subgraph"); } LazyLongIterator allSuccessors = underlyingGraph.successors(nodeId); return new LazyLongIterator() { @Override public long nextLong() { long neighbor; while ((neighbor = allSuccessors.nextLong()) != -1) { if (nodeExists(neighbor)) { return neighbor; } } return -1; } @Override public long skip(final long n) { long i; for (i = 0; i < n && nextLong() != -1; i++) ; return i; } }; } /** * Returns the outdegree of a node. * * @param nodeId node specified as a long id * @return outdegree of a node */ @Override public long outdegree(long nodeId) { long deg = 0; for (LazyLongIterator allSuccessors = successors(nodeId); allSuccessors.nextLong() != -1; ++deg) ; return deg; } @Override public NodeIterator nodeIterator() { return new NodeIterator() { final long n = numNodes(); long i = -1; long done = 0; @Override public boolean hasNext() { return done <= n; } @Override public long nextLong() { if (!hasNext()) throw new NoSuchElementException(); do { ++i; if (i >= underlyingGraph.numNodes()) throw new NoSuchElementException(); } while (!nodeExists(i)); ++done; return i; } @Override public long outdegree() { return Subgraph.this.outdegree(i); } @Override public LazyLongIterator successors() { return Subgraph.this.successors(i); } }; } /** * Returns lazy iterator of predecessors of a node. * * @param nodeId node specified as a long id * @return lazy iterator of predecessors of the node, specified as a * WebGraph LazyLongIterator */ public LazyLongIterator predecessors(long nodeId) { return this.transpose().successors(nodeId); } /** * Returns the indegree of a node. * * @param nodeId node specified as a long id * @return indegree of a node */ public long indegree(long nodeId) { return this.transpose().outdegree(nodeId); } /** * Converts {@link SWHID} node to long. * * @param swhid node specified as a {@link SWHID} * @return internal long node id * @see SWHID */ public long getNodeId(SWHID swhid) { return underlyingGraph.getNodeId(swhid); } /** * Converts long id node to {@link SWHID}. * * @param nodeId node specified as a long id * @return external SWHID * @see SWHID */ public SWHID getSWHID(long nodeId) { return underlyingGraph.getSWHID(nodeId); } /** * Returns node type. * * @param nodeId node specified as a long id * @return corresponding node type * @see Node.Type */ public Node.Type getNodeType(long nodeId) { return underlyingGraph.getNodeType(nodeId); } } diff --git a/java/src/main/java/org/softwareheritage/graph/SwhBidirectionalGraph.java b/java/src/main/java/org/softwareheritage/graph/SwhBidirectionalGraph.java new file mode 100644 index 0000000..df12875 --- /dev/null +++ b/java/src/main/java/org/softwareheritage/graph/SwhBidirectionalGraph.java @@ -0,0 +1,166 @@ +package org.softwareheritage.graph; + +import it.unimi.dsi.big.webgraph.labelling.ArcLabelledNodeIterator; +import it.unimi.dsi.logging.ProgressLogger; + +import java.io.IOException; +import java.io.InputStream; + +/** + * Class representing the compressed Software Heritage graph in both directions (forward and + * backward). + * + * This class uses the {@link BidirectionalImmutableGraph} class internally to implement the + * backward equivalent of graph operations ({@link SwhBidirectionalGraph#indegree(long)}, + * {@link SwhBidirectionalGraph#predecessors(long)}, etc.) by holding a reference to two + * {@link SwhUnidirectionalGraph} (a forward graph and a backward graph). + * + * Both graphs share their graph properties in memory by storing references to the same + * {@link SwhGraphProperties} object. + * + * @author The Software Heritage developers + * @see SwhUnidirectionalGraph + */ + +public class SwhBidirectionalGraph extends BidirectionalImmutableGraph implements SwhGraph { + /** Property data of the graph (id/type mappings etc.) */ + private final SwhGraphProperties properties; + + private final SwhUnidirectionalGraph forwardGraph; + private final SwhUnidirectionalGraph backwardGraph; + + public SwhBidirectionalGraph(SwhUnidirectionalGraph forwardGraph, SwhUnidirectionalGraph backwardGraph, + SwhGraphProperties properties) { + super(forwardGraph, backwardGraph); + this.forwardGraph = forwardGraph; + this.backwardGraph = backwardGraph; + this.properties = properties; + } + + private SwhBidirectionalGraph(BidirectionalImmutableGraph graph, SwhGraphProperties properties) { + super(graph.getForwardGraph(), graph.getBackwardGraph()); + this.forwardGraph = (SwhUnidirectionalGraph) graph.getForwardGraph(); + this.backwardGraph = (SwhUnidirectionalGraph) graph.getBackwardGraph(); + this.properties = properties; + } + + public static SwhBidirectionalGraph load(LoadMethod method, String path, InputStream is, ProgressLogger pl) + throws IOException { + SwhUnidirectionalGraph forward = SwhUnidirectionalGraph.loadGraphOnly(method, path, is, pl); + SwhUnidirectionalGraph backward = SwhUnidirectionalGraph.loadGraphOnly(method, path + "-transposed", is, pl); + SwhGraphProperties properties = SwhGraphProperties.load(path); + forward.setProperties(properties); + backward.setProperties(properties); + return new SwhBidirectionalGraph(forward, backward, properties); + } + + public static SwhBidirectionalGraph loadLabelled(LoadMethod method, String path, InputStream is, ProgressLogger pl) + throws IOException { + SwhUnidirectionalGraph forward = SwhUnidirectionalGraph.loadLabelledGraphOnly(method, path, is, pl); + SwhUnidirectionalGraph backward = SwhUnidirectionalGraph.loadLabelledGraphOnly(method, path + "-transposed", is, + pl); + SwhGraphProperties properties = SwhGraphProperties.load(path); + forward.setProperties(properties); + backward.setProperties(properties); + return new SwhBidirectionalGraph(forward, backward, properties); + } + + // loadXXX methods from ImmutableGraph + public static SwhBidirectionalGraph load(String path, ProgressLogger pl) throws IOException { + return load(LoadMethod.STANDARD, path, null, pl); + } + public static SwhBidirectionalGraph load(String path) throws IOException { + return load(LoadMethod.STANDARD, path, null, null); + } + public static SwhBidirectionalGraph loadMapped(String path, ProgressLogger pl) throws IOException { + return load(LoadMethod.MAPPED, path, null, pl); + } + public static SwhBidirectionalGraph loadMapped(String path) throws IOException { + return load(LoadMethod.MAPPED, path, null, null); + } + public static SwhBidirectionalGraph loadOffline(String path, ProgressLogger pl) throws IOException { + return load(LoadMethod.OFFLINE, path, null, pl); + } + public static SwhBidirectionalGraph loadOffline(String path) throws IOException { + return load(LoadMethod.OFFLINE, path, null, null); + } + + // Labelled versions of the loadXXX methods from ImmutableGraph + public static SwhBidirectionalGraph loadLabelled(String path, ProgressLogger pl) throws IOException { + return loadLabelled(LoadMethod.STANDARD, path, null, pl); + } + public static SwhBidirectionalGraph loadLabelled(String path) throws IOException { + return loadLabelled(LoadMethod.STANDARD, path, null, null); + } + public static SwhBidirectionalGraph loadLabelledMapped(String path, ProgressLogger pl) throws IOException { + return loadLabelled(LoadMethod.MAPPED, path, null, pl); + } + public static SwhBidirectionalGraph loadLabelledMapped(String path) throws IOException { + return loadLabelled(LoadMethod.MAPPED, path, null, null); + } + public static SwhBidirectionalGraph loadLabelledOffline(String path, ProgressLogger pl) throws IOException { + return loadLabelled(LoadMethod.OFFLINE, path, null, pl); + } + public static SwhBidirectionalGraph loadLabelledOffline(String path) throws IOException { + return loadLabelled(LoadMethod.OFFLINE, path, null, null); + } + + @Override + public SwhBidirectionalGraph copy() { + return new SwhBidirectionalGraph(forwardGraph, backwardGraph, this.properties); + } + + @Override + public SwhBidirectionalGraph transpose() { + return new SwhBidirectionalGraph(super.transpose(), this.properties); + } + + @Override + public SwhBidirectionalGraph symmetrize() { + return new SwhBidirectionalGraph(super.symmetrize(), this.properties); + } + + public SwhUnidirectionalGraph getForwardGraph() { + return this.forwardGraph; + } + + public SwhUnidirectionalGraph getBackwardGraph() { + return this.backwardGraph; + } + + /** + * Returns a *labelled* lazy iterator over the successors of a given node. The iteration terminates + * when -1 is returned. + */ + public ArcLabelledNodeIterator.LabelledArcIterator labelledSuccessors(long x) { + return forwardGraph.labelledSuccessors(x); + } + + /** + * Returns a *labelled* lazy iterator over the predecessors of a given node. The iteration + * terminates when -1 is returned. + */ + public ArcLabelledNodeIterator.LabelledArcIterator labelledPredecessors(long x) { + return backwardGraph.labelledSuccessors(x); + } + + public void close() throws IOException { + this.properties.close(); + } + + public String getPath() { + return properties.getPath(); + } + + public long getNodeId(SWHID swhid) { + return properties.getNodeId(swhid); + } + + public SWHID getSWHID(long nodeId) { + return properties.getSWHID(nodeId); + } + + public Node.Type getNodeType(long nodeId) { + return properties.getNodeType(nodeId); + } +} diff --git a/java/src/main/java/org/softwareheritage/graph/SwhGraph.java b/java/src/main/java/org/softwareheritage/graph/SwhGraph.java new file mode 100644 index 0000000..b16e677 --- /dev/null +++ b/java/src/main/java/org/softwareheritage/graph/SwhGraph.java @@ -0,0 +1,47 @@ +package org.softwareheritage.graph; + +import java.io.IOException; + +/** + * Common interface for SWH graph classes. + */ +public interface SwhGraph { + /** + * Cleans up graph resources after use. + */ + void close() throws IOException; + + /** + * Converts {@link SWHID} node to long. + * + * @param swhid node specified as a {@link SWHID} + * @return internal long node id + * @see SWHID + */ + long getNodeId(SWHID swhid); + + /** + * Converts long id node to {@link SWHID}. + * + * @param nodeId node specified as a long id + * @return external SWHID + * @see SWHID + */ + SWHID getSWHID(long nodeId); + + /** + * Returns node type. + * + * @param nodeId node specified as a long id + * @return corresponding node type + * @see Node.Type + */ + Node.Type getNodeType(long nodeId); + + /** + * Returns the graph full path. + * + * @return graph full path + */ + String getPath(); +} diff --git a/java/src/main/java/org/softwareheritage/graph/SwhGraphProperties.java b/java/src/main/java/org/softwareheritage/graph/SwhGraphProperties.java new file mode 100644 index 0000000..ef5e304 --- /dev/null +++ b/java/src/main/java/org/softwareheritage/graph/SwhGraphProperties.java @@ -0,0 +1,85 @@ +package org.softwareheritage.graph; + +import org.softwareheritage.graph.maps.NodeIdMap; +import org.softwareheritage.graph.maps.NodeTypesMap; + +import java.io.IOException; + +/** + * This objects contains SWH graph properties such as node labels. + * + * Some property mappings are necessary because Software Heritage uses string based persistent + * identifiers (SWHID) while WebGraph uses integers internally. + * + * The two node ID mappings (long id ↔ SWHID) are used for the input (users refer to the graph + * using SWHID) and the output (convert back to SWHID for users results). + * + * Since graph traversal can be restricted depending on the node type (see {@link AllowedEdges}), a + * long id → node type map is stored as well to avoid a full SWHID lookup. + * + * @see NodeIdMap + * @see NodeTypesMap + */ +public class SwhGraphProperties { + /** Path and basename of the compressed graph */ + String path; + /** Mapping long id ↔ SWHIDs */ + NodeIdMap nodeIdMap; + /** Mapping long id → node types */ + NodeTypesMap nodeTypesMap; + + protected SwhGraphProperties(String path, NodeIdMap nodeIdMap, NodeTypesMap nodeTypesMap) { + this.path = path; + this.nodeIdMap = nodeIdMap; + this.nodeTypesMap = nodeTypesMap; + } + + public static SwhGraphProperties load(String path) throws IOException { + return new SwhGraphProperties(path, new NodeIdMap(path), new NodeTypesMap(path)); + } + + /** + * Cleans up graph resources after use. + */ + public void close() throws IOException { + this.nodeIdMap.close(); + } + + public String getPath() { + return path; + } + + /** + * Converts {@link SWHID} node to long. + * + * @param swhid node specified as a {@link SWHID} + * @return internal long node id + * @see SWHID + */ + public long getNodeId(SWHID swhid) { + return nodeIdMap.getNodeId(swhid); + } + + /** + * Converts long id node to {@link SWHID}. + * + * @param nodeId node specified as a long id + * @return external SWHID + * @see SWHID + */ + public SWHID getSWHID(long nodeId) { + return nodeIdMap.getSWHID(nodeId); + } + + /** + * Returns node type. + * + * @param nodeId node specified as a long id + * @return corresponding node type + * @see Node.Type + */ + public Node.Type getNodeType(long nodeId) { + return nodeTypesMap.getType(nodeId); + } +} diff --git a/java/src/main/java/org/softwareheritage/graph/SwhUnidirectionalGraph.java b/java/src/main/java/org/softwareheritage/graph/SwhUnidirectionalGraph.java new file mode 100644 index 0000000..35c35f7 --- /dev/null +++ b/java/src/main/java/org/softwareheritage/graph/SwhUnidirectionalGraph.java @@ -0,0 +1,221 @@ +package org.softwareheritage.graph; + +import it.unimi.dsi.big.webgraph.ImmutableGraph; +import it.unimi.dsi.big.webgraph.LazyLongIterator; +import it.unimi.dsi.big.webgraph.labelling.ArcLabelledNodeIterator; +import it.unimi.dsi.big.webgraph.labelling.BitStreamArcLabelledImmutableGraph; +import it.unimi.dsi.logging.ProgressLogger; +import it.unimi.dsi.big.webgraph.labelling.ArcLabelledImmutableGraph; + +import java.io.IOException; +import java.io.InputStream; + +/** + * Class representing the compressed Software Heritage graph in a single direction. + *

+ * The compressed graph is stored using the WebGraph + * framework. This class contains an {@link ImmutableGraph} representing the graph itself, as well + * as a reference to the object containing the graph properties (e.g. node labels). Optionally, + * arc labels (properties stored on the graph edges) can also be loaded with the + * loadLabelled...() function family. + * + * @author The Software Heritage developers + * @see SwhGraphProperties + * @see SwhUnidirectionalGraph + */ + +public class SwhUnidirectionalGraph extends ImmutableGraph implements SwhGraph { + /** Underlying ImmutableGraph */ + private final ImmutableGraph graph; + + /** Labelled ImmutableGraph, null if labels are not loaded */ + private ArcLabelledImmutableGraph labelledGraph; + + /** Property data of the graph (id/type mappings etc.) */ + private SwhGraphProperties properties; + + protected SwhUnidirectionalGraph(ImmutableGraph graph, SwhGraphProperties properties) { + this.graph = graph; + this.properties = properties; + } + + protected SwhUnidirectionalGraph(ArcLabelledImmutableGraph graph, SwhGraphProperties properties) { + this.graph = graph; + this.labelledGraph = graph; + this.properties = properties; + } + + /** + * Load the (unlabelled) graph only, without the SWH properties. + */ + public static SwhUnidirectionalGraph loadGraphOnly(LoadMethod method, String path, InputStream is, + ProgressLogger pl) throws IOException { + return new SwhUnidirectionalGraph(ImmutableGraph.load(method, path, is, pl), null); + } + + /** + * Load the labelled graph only, without the SWH properties. + */ + public static SwhUnidirectionalGraph loadLabelledGraphOnly(LoadMethod method, String path, InputStream is, + ProgressLogger pl) throws IOException { + BitStreamArcLabelledImmutableGraph g = (BitStreamArcLabelledImmutableGraph) BitStreamArcLabelledImmutableGraph + .load(method, path, is, pl); + return new SwhUnidirectionalGraph(g, null); + } + + /** + * Load the SWH properties of the graph from a given path. + */ + public void loadProperties(String path) throws IOException { + properties = SwhGraphProperties.load(path); + } + + /** + * Setter for the SWH graph properties. + * + * @param properties The {@link SwhGraphProperties} object containing the graph properties + */ + public void setProperties(SwhGraphProperties properties) { + this.properties = properties; + } + + /** + * Load the unlabelled graph and its SWH properties. + */ + public static SwhUnidirectionalGraph load(LoadMethod method, String path, InputStream is, ProgressLogger pl) + throws IOException { + SwhUnidirectionalGraph g = loadGraphOnly(method, path, is, pl); + g.loadProperties(path); + return g; + } + + /** + * Load the labelled graph and its SWH properties. + */ + public static SwhUnidirectionalGraph loadLabelled(LoadMethod method, String path, InputStream is, ProgressLogger pl) + throws IOException { + SwhUnidirectionalGraph g = loadLabelledGraphOnly(method, path, is, pl); + g.loadProperties(path); + return g; + } + + // loadXXX methods of ImmutableGraph + public static SwhUnidirectionalGraph load(String path, ProgressLogger pl) throws IOException { + return load(LoadMethod.STANDARD, path, null, pl); + } + public static SwhUnidirectionalGraph load(String path) throws IOException { + return load(LoadMethod.STANDARD, path, null, null); + } + public static SwhUnidirectionalGraph loadMapped(String path, ProgressLogger pl) throws IOException { + return load(LoadMethod.MAPPED, path, null, pl); + } + public static SwhUnidirectionalGraph loadMapped(String path) throws IOException { + return load(LoadMethod.MAPPED, path, null, null); + } + public static SwhUnidirectionalGraph loadOffline(String path, ProgressLogger pl) throws IOException { + return load(LoadMethod.OFFLINE, path, null, pl); + } + public static SwhUnidirectionalGraph loadOffline(String path) throws IOException { + return load(LoadMethod.OFFLINE, path, null, null); + } + + // Labelled versions of the loadXXX methods from ImmutableGraph + public static SwhUnidirectionalGraph loadLabelled(String path, ProgressLogger pl) throws IOException { + return loadLabelled(LoadMethod.STANDARD, path, null, pl); + } + public static SwhUnidirectionalGraph loadLabelled(String path) throws IOException { + return loadLabelled(LoadMethod.STANDARD, path, null, null); + } + public static SwhUnidirectionalGraph loadLabelledMapped(String path, ProgressLogger pl) throws IOException { + return loadLabelled(LoadMethod.MAPPED, path, null, pl); + } + public static SwhUnidirectionalGraph loadLabelledMapped(String path) throws IOException { + return loadLabelled(LoadMethod.MAPPED, path, null, null); + } + public static SwhUnidirectionalGraph loadLabelledOffline(String path, ProgressLogger pl) throws IOException { + return loadLabelled(LoadMethod.OFFLINE, path, null, pl); + } + public static SwhUnidirectionalGraph loadLabelledOffline(String path) throws IOException { + return loadLabelled(LoadMethod.OFFLINE, path, null, null); + } + + @Override + public SwhUnidirectionalGraph copy() { + return new SwhUnidirectionalGraph(this.graph.copy(), this.properties); + } + + @Override + public boolean randomAccess() { + return graph.randomAccess(); + } + + public void close() throws IOException { + this.properties.close(); + } + + @Override + public long numNodes() { + return graph.numNodes(); + } + + @Override + public long numArcs() { + return graph.numArcs(); + } + + @Override + public LazyLongIterator successors(long nodeId) { + return graph.successors(nodeId); + } + + /** + * Returns a labelled node iterator for scanning the graph sequentially, starting from the + * first node. + */ + public ArcLabelledNodeIterator labelledNodeIterator() { + if (labelledGraph == null) { + throw new RuntimeException("Calling labelledNodeIterator() but labels were not loaded."); + } + return labelledGraph.nodeIterator(); + } + + /** + * Returns a labelled node iterator for scanning the graph sequentially, starting from a + * given node. + */ + public ArcLabelledNodeIterator labelledNodeIterator(long from) { + if (labelledGraph == null) { + throw new RuntimeException("Calling labelledNodeIterator() but labels were not loaded."); + } + return labelledGraph.nodeIterator(from); + } + + /** + * Returns a labelled lazy iterator over the successors of a given node. The iteration + * terminates when -1 is returned. + */ + public ArcLabelledNodeIterator.LabelledArcIterator labelledSuccessors(long x) { + if (labelledGraph == null) { + throw new RuntimeException("Calling labelledNodeIterator() but labels were not loaded."); + } + return labelledGraph.successors(x); + } + + @Override + public long outdegree(long nodeId) { + return graph.outdegree(nodeId); + } + + public String getPath() { + return properties.getPath(); + } + public long getNodeId(SWHID swhid) { + return properties.getNodeId(swhid); + } + public SWHID getSWHID(long nodeId) { + return properties.getSWHID(nodeId); + } + public Node.Type getNodeType(long nodeId) { + return properties.getNodeType(nodeId); + } +} diff --git a/java/src/main/java/org/softwareheritage/graph/Traversal.java b/java/src/main/java/org/softwareheritage/graph/Traversal.java index 4c8c669..71c4407 100644 --- a/java/src/main/java/org/softwareheritage/graph/Traversal.java +++ b/java/src/main/java/org/softwareheritage/graph/Traversal.java @@ -1,580 +1,623 @@ package org.softwareheritage.graph; import java.util.ArrayDeque; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedList; import java.util.Map; import java.util.Queue; import java.util.Random; import java.util.Stack; import java.util.function.Consumer; import java.util.function.LongConsumer; import org.softwareheritage.graph.server.Endpoint; import it.unimi.dsi.big.webgraph.LazyLongIterator; /** * Traversal algorithms on the compressed graph. *

* Internal implementation of the traversal API endpoints. These methods only input/output internal * long ids, which are converted in the {@link Endpoint} higher-level class to {@link SWHID}. * * @author The Software Heritage developers * @see Endpoint */ public class Traversal { /** Graph used in the traversal */ - Graph graph; - /** Graph edge restriction */ + SwhBidirectionalGraph graph; + /** Graph edge restrictions */ AllowedEdges edges; /** Hash set storing if we have visited a node */ HashSet visited; /** Hash map storing parent node id for each nodes during a traversal */ Map parentNode; /** Number of edges accessed during traversal */ long nbEdgesAccessed; /** The anti Dos limit of edges traversed while a visit */ long maxEdges; /** The string represent the set of type restriction */ NodesFiltering ndsfilter; /** random number generator, for random walks */ Random rng; /** * Constructor. * * @param graph graph used in the traversal * @param direction a string (either "forward" or "backward") specifying edge orientation * @param edgesFmt a formatted string describing allowed * edges */ - public Traversal(Graph graph, String direction, String edgesFmt) { + public Traversal(SwhBidirectionalGraph graph, String direction, String edgesFmt) { this(graph, direction, edgesFmt, 0); } - public Traversal(Graph graph, String direction, String edgesFmt, long maxEdges) { + public Traversal(SwhBidirectionalGraph graph, String direction, String edgesFmt, long maxEdges) { this(graph, direction, edgesFmt, maxEdges, "*"); } - public Traversal(Graph graph, String direction, String edgesFmt, long maxEdges, String returnTypes) { + public Traversal(SwhBidirectionalGraph graph, String direction, String edgesFmt, long maxEdges, + String returnTypes) { if (!direction.matches("forward|backward")) { throw new IllegalArgumentException("Unknown traversal direction: " + direction); } if (direction.equals("backward")) { this.graph = graph.transpose(); } else { this.graph = graph; } this.edges = new AllowedEdges(edgesFmt); this.visited = new HashSet<>(); this.parentNode = new HashMap<>(); this.nbEdgesAccessed = 0; this.maxEdges = maxEdges; this.rng = new Random(); if (returnTypes.equals("*")) { this.ndsfilter = new NodesFiltering(); } else { this.ndsfilter = new NodesFiltering(returnTypes); } } /** * Returns number of accessed edges during traversal. * * @return number of edges accessed in last traversal */ public long getNbEdgesAccessed() { return nbEdgesAccessed; } /** * Returns number of accessed nodes during traversal. * * @return number of nodes accessed in last traversal */ public long getNbNodesAccessed() { return this.visited.size(); } + /** + * Returns lazy iterator of successors of a node while following a specific set of edge types. + * + * @param g input graph + * @param nodeId node specified as a long id + * @param allowedEdges the specification of which edges can be traversed + * @return lazy iterator of successors of the node, specified as a + * WebGraph LazyLongIterator + */ + public static LazyLongIterator filterSuccessors(SwhBidirectionalGraph g, long nodeId, AllowedEdges allowedEdges) { + if (allowedEdges.restrictedTo == null) { + // All edges are allowed, bypass edge check + return g.successors(nodeId); + } else { + LazyLongIterator allSuccessors = g.successors(nodeId); + return new LazyLongIterator() { + @Override + public long nextLong() { + long neighbor; + while ((neighbor = allSuccessors.nextLong()) != -1) { + if (allowedEdges.isAllowed(g.getNodeType(nodeId), g.getNodeType(neighbor))) { + return neighbor; + } + } + return -1; + } + + @Override + public long skip(final long n) { + long i; + for (i = 0; i < n && nextLong() != -1; i++) + ; + return i; + } + }; + } + } + + private LazyLongIterator filterSuccessors(long nodeId, AllowedEdges allowedEdges) { + return filterSuccessors(graph, nodeId, allowedEdges); + } + /** * Push version of {@link #leaves} will fire passed callback for each leaf. */ public void leavesVisitor(long srcNodeId, NodeIdConsumer cb) { Stack stack = new Stack<>(); this.nbEdgesAccessed = 0; stack.push(srcNodeId); visited.add(srcNodeId); while (!stack.isEmpty()) { long currentNodeId = stack.pop(); long neighborsCnt = 0; nbEdgesAccessed += graph.outdegree(currentNodeId); if (this.maxEdges > 0) { if (nbEdgesAccessed >= this.maxEdges) { break; } } - LazyLongIterator it = graph.successors(currentNodeId, edges); + LazyLongIterator it = filterSuccessors(currentNodeId, edges); for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) { neighborsCnt++; if (!visited.contains(neighborNodeId)) { stack.push(neighborNodeId); visited.add(neighborNodeId); } } if (neighborsCnt == 0) { cb.accept(currentNodeId); } } } /** * Returns the leaves of a subgraph rooted at the specified source node. * * @param srcNodeId source node * @return list of node ids corresponding to the leaves */ public ArrayList leaves(long srcNodeId) { ArrayList nodeIds = new ArrayList(); leavesVisitor(srcNodeId, nodeIds::add); if (ndsfilter.restricted) { return ndsfilter.filterByNodeTypes(nodeIds, graph); } return nodeIds; } /** * Push version of {@link #neighbors}: will fire passed callback on each neighbor. */ public void neighborsVisitor(long srcNodeId, NodeIdConsumer cb) { this.nbEdgesAccessed = graph.outdegree(srcNodeId); if (this.maxEdges > 0) { if (nbEdgesAccessed >= this.maxEdges) { return; } } - LazyLongIterator it = graph.successors(srcNodeId, edges); + LazyLongIterator it = filterSuccessors(srcNodeId, edges); for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) { cb.accept(neighborNodeId); } } /** * Returns node direct neighbors (linked with exactly one edge). * * @param srcNodeId source node * @return list of node ids corresponding to the neighbors */ public ArrayList neighbors(long srcNodeId) { ArrayList nodeIds = new ArrayList<>(); neighborsVisitor(srcNodeId, nodeIds::add); if (ndsfilter.restricted) { return ndsfilter.filterByNodeTypes(nodeIds, graph); } return nodeIds; } /** * Push version of {@link #visitNodes}: will fire passed callback on each visited node. */ public void visitNodesVisitor(long srcNodeId, NodeIdConsumer nodeCb, EdgeIdConsumer edgeCb) { Stack stack = new Stack<>(); this.nbEdgesAccessed = 0; stack.push(srcNodeId); visited.add(srcNodeId); while (!stack.isEmpty()) { long currentNodeId = stack.pop(); if (nodeCb != null) { nodeCb.accept(currentNodeId); } nbEdgesAccessed += graph.outdegree(currentNodeId); if (this.maxEdges > 0) { if (nbEdgesAccessed >= this.maxEdges) { break; } } - LazyLongIterator it = graph.successors(currentNodeId, edges); + LazyLongIterator it = filterSuccessors(currentNodeId, edges); for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) { if (edgeCb != null) { edgeCb.accept(currentNodeId, neighborNodeId); } if (!visited.contains(neighborNodeId)) { stack.push(neighborNodeId); visited.add(neighborNodeId); } } } } /** One-argument version to handle callbacks properly */ public void visitNodesVisitor(long srcNodeId, NodeIdConsumer cb) { visitNodesVisitor(srcNodeId, cb, null); } /** * Performs a graph traversal and returns explored nodes. * * @param srcNodeId source node * @return list of explored node ids */ public ArrayList visitNodes(long srcNodeId) { ArrayList nodeIds = new ArrayList<>(); visitNodesVisitor(srcNodeId, nodeIds::add); if (ndsfilter.restricted) { return ndsfilter.filterByNodeTypes(nodeIds, graph); } return nodeIds; } /** * Push version of {@link #visitPaths}: will fire passed callback on each discovered (complete) * path. */ public void visitPathsVisitor(long srcNodeId, PathConsumer cb) { Stack currentPath = new Stack<>(); this.nbEdgesAccessed = 0; visitPathsInternalVisitor(srcNodeId, currentPath, cb); } /** * Performs a graph traversal and returns explored paths. * * @param srcNodeId source node * @return list of explored paths (represented as a list of node ids) */ public ArrayList> visitPaths(long srcNodeId) { ArrayList> paths = new ArrayList<>(); visitPathsVisitor(srcNodeId, paths::add); return paths; } private void visitPathsInternalVisitor(long currentNodeId, Stack currentPath, PathConsumer cb) { currentPath.push(currentNodeId); long visitedNeighbors = 0; nbEdgesAccessed += graph.outdegree(currentNodeId); if (this.maxEdges > 0) { if (nbEdgesAccessed >= this.maxEdges) { currentPath.pop(); return; } } - LazyLongIterator it = graph.successors(currentNodeId, edges); + LazyLongIterator it = filterSuccessors(currentNodeId, edges); for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) { visitPathsInternalVisitor(neighborNodeId, currentPath, cb); visitedNeighbors++; } if (visitedNeighbors == 0) { ArrayList path = new ArrayList<>(currentPath); cb.accept(path); } currentPath.pop(); } /** * Performs a graph traversal with backtracking, and returns the first found path from source to * destination. * * @param srcNodeId source node * @param dst destination (either a node or a node type) * @return found path as a list of node ids */ public ArrayList walk(long srcNodeId, T dst, String visitOrder) { long dstNodeId; if (visitOrder.equals("dfs")) { dstNodeId = walkInternalDFS(srcNodeId, dst); } else if (visitOrder.equals("bfs")) { dstNodeId = walkInternalBFS(srcNodeId, dst); } else { throw new IllegalArgumentException("Unknown visit order: " + visitOrder); } if (dstNodeId == -1) { throw new IllegalArgumentException("Cannot find destination: " + dst); } return backtracking(srcNodeId, dstNodeId); } /** * Performs a random walk (picking a random successor at each step) from source to destination. * * @param srcNodeId source node * @param dst destination (either a node or a node type) * @return found path as a list of node ids or an empty path to indicate that no suitable path have * been found */ public ArrayList randomWalk(long srcNodeId, T dst) { return randomWalk(srcNodeId, dst, 0); } /** * Performs a stubborn random walk (picking a random successor at each step) from source to * destination. The walk is "stubborn" in the sense that it will not give up the first time if a * satisfying target node is found, but it will retry up to a limited amount of times. * * @param srcNodeId source node * @param dst destination (either a node or a node type) * @param retries number of times to retry; 0 means no retries (single walk) * @return found path as a list of node ids or an empty path to indicate that no suitable path have * been found */ public ArrayList randomWalk(long srcNodeId, T dst, int retries) { long curNodeId = srcNodeId; ArrayList path = new ArrayList<>(); this.nbEdgesAccessed = 0; boolean found; if (retries < 0) { throw new IllegalArgumentException("Negative number of retries given: " + retries); } while (true) { path.add(curNodeId); - LazyLongIterator successors = graph.successors(curNodeId, edges); + LazyLongIterator successors = filterSuccessors(curNodeId, edges); curNodeId = randomPick(successors); if (curNodeId < 0) { found = false; break; } if (isDstNode(curNodeId, dst)) { path.add(curNodeId); found = true; break; } } if (found) { if (ndsfilter.restricted) { return ndsfilter.filterByNodeTypes(path, graph); } return path; } else if (retries > 0) { // try again return randomWalk(srcNodeId, dst, retries - 1); } else { // not found and no retries left path.clear(); return path; } } /** * Randomly choose an element from an iterator over Longs using reservoir sampling * * @param elements iterator over selection domain * @return randomly chosen element or -1 if no suitable element was found */ private long randomPick(LazyLongIterator elements) { long curPick = -1; long seenCandidates = 0; for (long element; (element = elements.nextLong()) != -1;) { seenCandidates++; if (Math.round(rng.nextFloat() * (seenCandidates - 1)) == 0) { curPick = element; } } return curPick; } /** * Internal DFS function of {@link #walk}. * * @param srcNodeId source node * @param dst destination (either a node or a node type) * @return final destination node or -1 if no path found */ private long walkInternalDFS(long srcNodeId, T dst) { Stack stack = new Stack<>(); this.nbEdgesAccessed = 0; stack.push(srcNodeId); visited.add(srcNodeId); while (!stack.isEmpty()) { long currentNodeId = stack.pop(); if (isDstNode(currentNodeId, dst)) { return currentNodeId; } nbEdgesAccessed += graph.outdegree(currentNodeId); - LazyLongIterator it = graph.successors(currentNodeId, edges); + LazyLongIterator it = filterSuccessors(currentNodeId, edges); for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) { if (!visited.contains(neighborNodeId)) { stack.push(neighborNodeId); visited.add(neighborNodeId); parentNode.put(neighborNodeId, currentNodeId); } } } return -1; } /** * Internal BFS function of {@link #walk}. * * @param srcNodeId source node * @param dst destination (either a node or a node type) * @return final destination node or -1 if no path found */ private long walkInternalBFS(long srcNodeId, T dst) { Queue queue = new LinkedList<>(); this.nbEdgesAccessed = 0; queue.add(srcNodeId); visited.add(srcNodeId); while (!queue.isEmpty()) { long currentNodeId = queue.poll(); if (isDstNode(currentNodeId, dst)) { return currentNodeId; } nbEdgesAccessed += graph.outdegree(currentNodeId); - LazyLongIterator it = graph.successors(currentNodeId, edges); + LazyLongIterator it = filterSuccessors(currentNodeId, edges); for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) { if (!visited.contains(neighborNodeId)) { queue.add(neighborNodeId); visited.add(neighborNodeId); parentNode.put(neighborNodeId, currentNodeId); } } } return -1; } /** * Internal function of {@link #walk} to check if a node corresponds to the destination. * * @param nodeId current node * @param dst destination (either a node or a node type) * @return true if the node is a destination, or false otherwise */ private boolean isDstNode(long nodeId, T dst) { if (dst instanceof Long) { long dstNodeId = (Long) dst; return nodeId == dstNodeId; } else if (dst instanceof Node.Type) { Node.Type dstType = (Node.Type) dst; return graph.getNodeType(nodeId) == dstType; } else { return false; } } /** * Internal backtracking function of {@link #walk}. * * @param srcNodeId source node * @param dstNodeId destination node * @return the found path, as a list of node ids */ private ArrayList backtracking(long srcNodeId, long dstNodeId) { ArrayList path = new ArrayList<>(); long currentNodeId = dstNodeId; while (currentNodeId != srcNodeId) { path.add(currentNodeId); currentNodeId = parentNode.get(currentNodeId); } path.add(srcNodeId); Collections.reverse(path); return path; } /** * Find a common descendant between two given nodes using two parallel BFS * * @param lhsNode the first node * @param rhsNode the second node * @return the found path, as a list of node ids */ public Long findCommonDescendant(long lhsNode, long rhsNode) { Queue lhsStack = new ArrayDeque<>(); Queue rhsStack = new ArrayDeque<>(); HashSet lhsVisited = new HashSet<>(); HashSet rhsVisited = new HashSet<>(); lhsStack.add(lhsNode); rhsStack.add(rhsNode); lhsVisited.add(lhsNode); rhsVisited.add(rhsNode); this.nbEdgesAccessed = 0; Long curNode; while (!lhsStack.isEmpty() || !rhsStack.isEmpty()) { if (!lhsStack.isEmpty()) { curNode = lhsStack.poll(); nbEdgesAccessed += graph.outdegree(curNode); - LazyLongIterator it = graph.successors(curNode, edges); + LazyLongIterator it = filterSuccessors(curNode, edges); for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) { if (!lhsVisited.contains(neighborNodeId)) { if (rhsVisited.contains(neighborNodeId)) return neighborNodeId; lhsStack.add(neighborNodeId); lhsVisited.add(neighborNodeId); } } } if (!rhsStack.isEmpty()) { curNode = rhsStack.poll(); nbEdgesAccessed += graph.outdegree(curNode); - LazyLongIterator it = graph.successors(curNode, edges); + LazyLongIterator it = filterSuccessors(curNode, edges); for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) { if (!rhsVisited.contains(neighborNodeId)) { if (lhsVisited.contains(neighborNodeId)) return neighborNodeId; rhsStack.add(neighborNodeId); rhsVisited.add(neighborNodeId); } } } } return null; } public interface NodeIdConsumer extends LongConsumer { /** * Callback for incrementally receiving node identifiers during a graph visit. */ void accept(long nodeId); } public interface EdgeIdConsumer { /** * Callback for incrementally receiving edge identifiers during a graph visit. */ void accept(long srcId, long dstId); } public interface PathConsumer extends Consumer> { /** * Callback for incrementally receiving node paths (made of node identifiers) during a graph visit. */ void accept(ArrayList path); } } diff --git a/java/src/main/java/org/softwareheritage/graph/algo/TopologicalTraversal.java b/java/src/main/java/org/softwareheritage/graph/algo/TopologicalTraversal.java index bdbb60d..af09f5d 100644 --- a/java/src/main/java/org/softwareheritage/graph/algo/TopologicalTraversal.java +++ b/java/src/main/java/org/softwareheritage/graph/algo/TopologicalTraversal.java @@ -1,70 +1,70 @@ package org.softwareheritage.graph.algo; import com.google.common.primitives.Longs; import it.unimi.dsi.big.webgraph.LazyLongIterator; import it.unimi.dsi.bits.LongArrayBitVector; import it.unimi.dsi.fastutil.Arrays; import it.unimi.dsi.fastutil.BigArrays; import it.unimi.dsi.fastutil.longs.LongBigArrays; import it.unimi.dsi.io.ByteDiskQueue; import it.unimi.dsi.logging.ProgressLogger; -import org.softwareheritage.graph.Graph; +import org.softwareheritage.graph.SwhBidirectionalGraph; import org.softwareheritage.graph.Traversal; import org.softwareheritage.graph.experiments.forks.ForkCC; import java.io.File; import java.io.IOException; public class TopologicalTraversal { - public static void run(final Graph graph, Traversal.NodeIdConsumer cb) throws IOException { + public static void run(final SwhBidirectionalGraph graph, Traversal.NodeIdConsumer cb) throws IOException { final long[][] indegree = LongBigArrays.newBigArray(graph.numNodes()); final ProgressLogger pl = new ProgressLogger(); pl.itemsName = "nodes"; pl.expectedUpdates = graph.numNodes(); pl.start("Fetching indegrees..."); long n = graph.numNodes(); for (long i = 0; i < graph.numNodes(); ++i) { BigArrays.add(indegree, i, graph.indegree(i)); } pl.done(); LongArrayBitVector visited = LongArrayBitVector.ofLength(n); int bufferSize = (int) Math.min(Arrays.MAX_ARRAY_SIZE & ~0x7, 8L * n); final File queueFile = File.createTempFile(ForkCC.class.getSimpleName(), "queue"); final ByteDiskQueue queue = ByteDiskQueue.createNew(queueFile, bufferSize, true); final byte[] byteBuf = new byte[Long.BYTES]; pl.start("Traversal in topological order..."); for (long i = 0; i < graph.numNodes(); ++i) { if (visited.getBoolean(i) || BigArrays.get(indegree, i) != 0L) { continue; } queue.enqueue(Longs.toByteArray(i)); visited.set(i); while (!queue.isEmpty()) { queue.dequeue(byteBuf); final long currentNode = Longs.fromByteArray(byteBuf); cb.accept(currentNode); final LazyLongIterator iterator = graph.successors(currentNode); long succ; while ((succ = iterator.nextLong()) != -1) { BigArrays.add(indegree, succ, -1L); if (visited.getBoolean(succ) || BigArrays.get(indegree, succ) != 0) continue; visited.set(succ); queue.enqueue(Longs.toByteArray(succ)); } pl.update(); } } pl.done(); } } diff --git a/java/src/main/java/org/softwareheritage/graph/benchmark/AccessEdge.java b/java/src/main/java/org/softwareheritage/graph/benchmark/AccessEdge.java index 9397de7..7f05184 100644 --- a/java/src/main/java/org/softwareheritage/graph/benchmark/AccessEdge.java +++ b/java/src/main/java/org/softwareheritage/graph/benchmark/AccessEdge.java @@ -1,45 +1,45 @@ package org.softwareheritage.graph.benchmark; import com.martiansoftware.jsap.JSAPException; import it.unimi.dsi.big.webgraph.LazyLongIterator; -import org.softwareheritage.graph.Graph; +import org.softwareheritage.graph.SwhBidirectionalGraph; import org.softwareheritage.graph.benchmark.utils.Statistics; import org.softwareheritage.graph.benchmark.utils.Timing; import java.io.IOException; import java.util.ArrayList; /** * Benchmark to time edge access time. * * @author The Software Heritage developers */ public class AccessEdge { /** * Main entrypoint. * * @param args command line arguments */ public static void main(String[] args) throws IOException, JSAPException { Benchmark bench = new Benchmark(); bench.parseCommandLineArgs(args); - Graph graph = Graph.loadMapped(bench.args.graphPath); + SwhBidirectionalGraph graph = SwhBidirectionalGraph.loadMapped(bench.args.graphPath); long[] nodeIds = bench.args.random.generateNodeIds(graph, bench.args.nbNodes); ArrayList timings = new ArrayList<>(); for (long nodeId : nodeIds) { long startTime = Timing.start(); LazyLongIterator neighbors = graph.successors(nodeId); long firstNeighbor = neighbors.nextLong(); double duration = Timing.stop(startTime); timings.add(duration); } System.out.println("Used " + bench.args.nbNodes + " random edges (results are in seconds):"); Statistics stats = new Statistics(timings); stats.printAll(); } } diff --git a/java/src/main/java/org/softwareheritage/graph/benchmark/BFS.java b/java/src/main/java/org/softwareheritage/graph/benchmark/BFS.java index 43aec2e..2ca9b58 100644 --- a/java/src/main/java/org/softwareheritage/graph/benchmark/BFS.java +++ b/java/src/main/java/org/softwareheritage/graph/benchmark/BFS.java @@ -1,107 +1,107 @@ package org.softwareheritage.graph.benchmark; import com.google.common.primitives.Longs; import com.martiansoftware.jsap.*; import it.unimi.dsi.big.webgraph.ImmutableGraph; import it.unimi.dsi.big.webgraph.LazyLongIterator; import it.unimi.dsi.bits.LongArrayBitVector; import it.unimi.dsi.fastutil.Arrays; import it.unimi.dsi.io.ByteDiskQueue; import it.unimi.dsi.logging.ProgressLogger; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.softwareheritage.graph.Graph; +import org.softwareheritage.graph.SwhBidirectionalGraph; import java.io.File; import java.io.IOException; public class BFS { private final static Logger LOGGER = LoggerFactory.getLogger(BFS.class); private final ImmutableGraph graph; public BFS(ImmutableGraph graph) { this.graph = graph; } private static JSAPResult parse_args(String[] args) { JSAPResult config = null; try { SimpleJSAP jsap = new SimpleJSAP(BFS.class.getName(), "", new Parameter[]{ new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'g', "graph", "Basename of the compressed graph"), new FlaggedOption("useTransposed", JSAP.BOOLEAN_PARSER, "false", JSAP.NOT_REQUIRED, 'T', "transposed", "Use transposed graph (default: false)"),}); config = jsap.parse(args); if (jsap.messagePrinted()) { System.exit(1); } } catch (JSAPException e) { e.printStackTrace(); } return config; } public static void main(String[] args) throws IOException { JSAPResult config = parse_args(args); String graphPath = config.getString("graphPath"); boolean useTransposed = config.getBoolean("useTransposed"); System.err.println("Loading graph " + graphPath + " ..."); - Graph graph = Graph.loadMapped(graphPath); + SwhBidirectionalGraph graph = SwhBidirectionalGraph.loadMapped(graphPath); System.err.println("Graph loaded."); if (useTransposed) graph = graph.transpose(); BFS bfs = new BFS(graph); bfs.bfsperm(); } // Partly inlined from it.unimi.dsi.law.big.graph.BFS private void bfsperm() throws IOException { final long n = graph.numNodes(); // Allow enough memory to behave like in-memory queue int bufferSize = (int) Math.min(Arrays.MAX_ARRAY_SIZE & ~0x7, 8L * n); // Use a disk based queue to store BFS frontier final File queueFile = File.createTempFile(BFS.class.getSimpleName(), "queue"); final ByteDiskQueue queue = ByteDiskQueue.createNew(queueFile, bufferSize, true); final byte[] byteBuf = new byte[Long.BYTES]; // WARNING: no 64-bit version of this data-structure, but it can support // indices up to 2^37 final LongArrayBitVector visited = LongArrayBitVector.ofLength(n); final ProgressLogger pl = new ProgressLogger(LOGGER); pl.expectedUpdates = n; pl.itemsName = "nodes"; pl.start("Starting breadth-first visit..."); for (long i = 0; i < n; i++) { if (visited.getBoolean(i)) continue; queue.enqueue(Longs.toByteArray(i)); visited.set(i); while (!queue.isEmpty()) { queue.dequeue(byteBuf); final long currentNode = Longs.fromByteArray(byteBuf); final LazyLongIterator iterator = graph.successors(currentNode); long succ; while ((succ = iterator.nextLong()) != -1) { if (!visited.getBoolean(succ)) { visited.set(succ); queue.enqueue(Longs.toByteArray(succ)); } } pl.update(); } } pl.done(); queue.close(); } } diff --git a/java/src/main/java/org/softwareheritage/graph/benchmark/Benchmark.java b/java/src/main/java/org/softwareheritage/graph/benchmark/Benchmark.java index 98dd854..591c7d5 100644 --- a/java/src/main/java/org/softwareheritage/graph/benchmark/Benchmark.java +++ b/java/src/main/java/org/softwareheritage/graph/benchmark/Benchmark.java @@ -1,154 +1,154 @@ package org.softwareheritage.graph.benchmark; import com.martiansoftware.jsap.*; -import org.softwareheritage.graph.Graph; +import org.softwareheritage.graph.SwhBidirectionalGraph; import org.softwareheritage.graph.SWHID; import org.softwareheritage.graph.benchmark.utils.Random; import org.softwareheritage.graph.benchmark.utils.Statistics; import org.softwareheritage.graph.server.Endpoint; import java.io.BufferedWriter; import java.io.FileWriter; import java.io.IOException; import java.io.Writer; import java.util.ArrayList; import java.util.StringJoiner; import java.util.function.Function; /** * Benchmark common utility functions. * * @author The Software Heritage developers */ public class Benchmark { /** CSV separator for log file */ final String CSV_SEPARATOR = ";"; /** Command line arguments */ public Args args; /** * Constructor. */ public Benchmark() { this.args = new Args(); } /** * Parses benchmark command line arguments. * * @param args command line arguments */ public void parseCommandLineArgs(String[] args) throws JSAPException { SimpleJSAP jsap = new SimpleJSAP(Benchmark.class.getName(), "Benchmark tool for Software Heritage use-cases scenarios.", new Parameter[]{ new UnflaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The basename of the compressed graph."), new FlaggedOption("nbNodes", JSAP.INTEGER_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'n', "nb-nodes", "Number of random nodes used to do the benchmark."), new FlaggedOption("logFile", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'l', "log-file", "File name to output CSV format benchmark log."), new FlaggedOption("seed", JSAP.LONG_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 's', "seed", "Random generator seed."),}); JSAPResult config = jsap.parse(args); if (jsap.messagePrinted()) { System.exit(1); } this.args.graphPath = config.getString("graphPath"); this.args.nbNodes = config.getInt("nbNodes"); this.args.logFile = config.getString("logFile"); this.args.random = config.contains("seed") ? new Random(config.getLong("seed")) : new Random(); } /** * Creates CSV file for log output. */ public void createCSVLogFile() throws IOException { try (Writer csvLog = new BufferedWriter(new FileWriter(args.logFile))) { StringJoiner csvHeader = new StringJoiner(CSV_SEPARATOR); csvHeader.add("use case name").add("SWHID").add("number of edges accessed").add("traversal timing") .add("swhid2node timing").add("node2swhid timing"); csvLog.write(csvHeader.toString() + "\n"); } } /** * Times a specific endpoint and outputs individual datapoints along with aggregated statistics. * * @param useCaseName benchmark use-case name * @param graph compressed graph used in the benchmark * @param nodeIds node ids to use as starting point for the endpoint traversal * @param operation endpoint function to benchmark * @param dstFmt destination formatted string as described in the * API * @param algorithm traversal algorithm used in endpoint call (either "dfs" or "bfs") */ - public void timeEndpoint(String useCaseName, Graph graph, long[] nodeIds, + public void timeEndpoint(String useCaseName, SwhBidirectionalGraph graph, long[] nodeIds, Function operation, String dstFmt, String algorithm) throws IOException { ArrayList timings = new ArrayList<>(); ArrayList timingsNormalized = new ArrayList<>(); ArrayList nbEdgesAccessed = new ArrayList<>(); final boolean append = true; try (Writer csvLog = new BufferedWriter(new FileWriter(args.logFile, append))) { for (long nodeId : nodeIds) { SWHID swhid = graph.getSWHID(nodeId); Endpoint.Output output = (dstFmt == null) ? operation.apply(new Endpoint.Input(swhid)) : operation.apply(new Endpoint.Input(swhid, dstFmt, algorithm)); StringJoiner csvLine = new StringJoiner(CSV_SEPARATOR); csvLine.add(useCaseName).add(swhid.toString()).add(Long.toString(output.meta.nbEdgesAccessed)) .add(Double.toString(output.meta.timings.traversal)) .add(Double.toString(output.meta.timings.swhid2node)) .add(Double.toString(output.meta.timings.node2swhid)); csvLog.write(csvLine.toString() + "\n"); timings.add(output.meta.timings.traversal); nbEdgesAccessed.add((double) output.meta.nbEdgesAccessed); if (output.meta.nbEdgesAccessed != 0) { timingsNormalized.add(output.meta.timings.traversal / output.meta.nbEdgesAccessed); } } } System.out.println("\n" + useCaseName + " use-case:"); System.out.println("timings:"); Statistics stats = new Statistics(timings); stats.printAll(); System.out.println("timings normalized:"); Statistics statsNormalized = new Statistics(timingsNormalized); statsNormalized.printAll(); System.out.println("nb edges accessed:"); Statistics statsNbEdgesAccessed = new Statistics(nbEdgesAccessed); statsNbEdgesAccessed.printAll(); } /** * Same as {@link #timeEndpoint} but without destination or algorithm specified to endpoint call. */ - public void timeEndpoint(String useCaseName, Graph graph, long[] nodeIds, + public void timeEndpoint(String useCaseName, SwhBidirectionalGraph graph, long[] nodeIds, Function operation) throws IOException { timeEndpoint(useCaseName, graph, nodeIds, operation, null, null); } /** * Input arguments. */ public class Args { /** Basename of the compressed graph */ public String graphPath; /** Number of random nodes to use for the benchmark */ public int nbNodes; /** File name for CSV format benchmark log */ public String logFile; /** Random generator */ public Random random; } } diff --git a/java/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java b/java/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java index 6a0cf58..61b5cbf 100644 --- a/java/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java +++ b/java/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java @@ -1,42 +1,42 @@ package org.softwareheritage.graph.benchmark; import com.martiansoftware.jsap.JSAPException; -import org.softwareheritage.graph.Graph; +import org.softwareheritage.graph.SwhBidirectionalGraph; import org.softwareheritage.graph.Node; import org.softwareheritage.graph.server.Endpoint; import java.io.IOException; /** * Benchmark Software Heritage * browsing * use-cases scenarios. * * @author The Software Heritage developers */ public class Browsing { /** * Main entrypoint. * * @param args command line arguments */ public static void main(String[] args) throws IOException, JSAPException { Benchmark bench = new Benchmark(); bench.parseCommandLineArgs(args); - Graph graph = Graph.loadMapped(bench.args.graphPath); + SwhBidirectionalGraph graph = SwhBidirectionalGraph.loadMapped(bench.args.graphPath); long[] dirNodeIds = bench.args.random.generateNodeIdsOfType(graph, bench.args.nbNodes, Node.Type.DIR); long[] revNodeIds = bench.args.random.generateNodeIdsOfType(graph, bench.args.nbNodes, Node.Type.REV); Endpoint dirEndpoint = new Endpoint(graph, "forward", "dir:cnt,dir:dir"); Endpoint revEndpoint = new Endpoint(graph, "forward", "rev:rev"); System.out.println("Used " + bench.args.nbNodes + " random nodes (results are in seconds):"); bench.createCSVLogFile(); bench.timeEndpoint("ls", graph, dirNodeIds, dirEndpoint::neighbors); bench.timeEndpoint("ls -R", graph, dirNodeIds, dirEndpoint::visitPaths); bench.timeEndpoint("git log", graph, revNodeIds, revEndpoint::visitNodes); } } diff --git a/java/src/main/java/org/softwareheritage/graph/benchmark/Provenance.java b/java/src/main/java/org/softwareheritage/graph/benchmark/Provenance.java index 9b3c4c9..960ef5f 100644 --- a/java/src/main/java/org/softwareheritage/graph/benchmark/Provenance.java +++ b/java/src/main/java/org/softwareheritage/graph/benchmark/Provenance.java @@ -1,45 +1,45 @@ package org.softwareheritage.graph.benchmark; import com.martiansoftware.jsap.JSAPException; -import org.softwareheritage.graph.Graph; +import org.softwareheritage.graph.SwhBidirectionalGraph; import org.softwareheritage.graph.server.Endpoint; import java.io.IOException; /** * Benchmark Software Heritage * provenance * use-cases scenarios. * * @author The Software Heritage developers */ public class Provenance { /** * Main entrypoint. * * @param args command line arguments */ public static void main(String[] args) throws IOException, JSAPException { Benchmark bench = new Benchmark(); bench.parseCommandLineArgs(args); - Graph graph = Graph.loadMapped(bench.args.graphPath); + SwhBidirectionalGraph graph = SwhBidirectionalGraph.loadMapped(bench.args.graphPath); long[] nodeIds = bench.args.random.generateNodeIds(graph, bench.args.nbNodes); Endpoint commitProvenanceEndpoint = new Endpoint(graph, "backward", "dir:dir,cnt:dir,dir:rev"); Endpoint originProvenanceEndpoint = new Endpoint(graph, "backward", "*"); System.out.println("Used " + bench.args.nbNodes + " random nodes (results are in seconds):"); bench.createCSVLogFile(); bench.timeEndpoint("commit provenance (dfs)", graph, nodeIds, commitProvenanceEndpoint::walk, "rev", "dfs"); bench.timeEndpoint("commit provenance (bfs)", graph, nodeIds, commitProvenanceEndpoint::walk, "rev", "bfs"); bench.timeEndpoint("complete commit provenance", graph, nodeIds, commitProvenanceEndpoint::leaves); bench.timeEndpoint("origin provenance (dfs)", graph, nodeIds, originProvenanceEndpoint::walk, "ori", "dfs"); bench.timeEndpoint("origin provenance (bfs)", graph, nodeIds, originProvenanceEndpoint::walk, "ori", "bfs"); bench.timeEndpoint("complete origin provenance", graph, nodeIds, originProvenanceEndpoint::leaves); } } diff --git a/java/src/main/java/org/softwareheritage/graph/benchmark/Vault.java b/java/src/main/java/org/softwareheritage/graph/benchmark/Vault.java index c0e19f6..9403047 100644 --- a/java/src/main/java/org/softwareheritage/graph/benchmark/Vault.java +++ b/java/src/main/java/org/softwareheritage/graph/benchmark/Vault.java @@ -1,37 +1,37 @@ package org.softwareheritage.graph.benchmark; import com.martiansoftware.jsap.JSAPException; -import org.softwareheritage.graph.Graph; +import org.softwareheritage.graph.SwhBidirectionalGraph; import org.softwareheritage.graph.server.Endpoint; import java.io.IOException; /** * Benchmark Software Heritage * vault use-case * scenario. * * @author The Software Heritage developers */ public class Vault { /** * Main entrypoint. * * @param args command line arguments */ public static void main(String[] args) throws IOException, JSAPException { Benchmark bench = new Benchmark(); bench.parseCommandLineArgs(args); - Graph graph = Graph.loadMapped(bench.args.graphPath); + SwhBidirectionalGraph graph = SwhBidirectionalGraph.loadMapped(bench.args.graphPath); long[] nodeIds = bench.args.random.generateNodeIds(graph, bench.args.nbNodes); Endpoint endpoint = new Endpoint(graph, "forward", "*"); System.out.println("Used " + bench.args.nbNodes + " random nodes (results are in seconds):"); bench.createCSVLogFile(); bench.timeEndpoint("git bundle", graph, nodeIds, endpoint::visitNodes); } } diff --git a/java/src/main/java/org/softwareheritage/graph/benchmark/utils/Random.java b/java/src/main/java/org/softwareheritage/graph/benchmark/utils/Random.java index ee4c530..8ee0b63 100644 --- a/java/src/main/java/org/softwareheritage/graph/benchmark/utils/Random.java +++ b/java/src/main/java/org/softwareheritage/graph/benchmark/utils/Random.java @@ -1,67 +1,67 @@ package org.softwareheritage.graph.benchmark.utils; -import org.softwareheritage.graph.Graph; +import org.softwareheritage.graph.SwhBidirectionalGraph; import org.softwareheritage.graph.Node; import java.util.PrimitiveIterator; /** * Random related utility class. * * @author The Software Heritage developers */ public class Random { /** Internal pseudorandom generator */ java.util.Random random; /** * Constructor. */ public Random() { this.random = new java.util.Random(); } /** * Constructor. * * @param seed random generator seed */ public Random(long seed) { this.random = new java.util.Random(seed); } /** * Generates random node ids. * * @param graph graph used to pick node ids * @param nbNodes number of node ids to generate * @return an array of random node ids */ - public long[] generateNodeIds(Graph graph, int nbNodes) { + public long[] generateNodeIds(SwhBidirectionalGraph graph, int nbNodes) { return random.longs(nbNodes, 0, graph.numNodes()).toArray(); } /** * Generates random node ids with a specific type. * * @param graph graph used to pick node ids * @param nbNodes number of node ids to generate * @param expectedType specific node type to pick * @return an array of random node ids */ - public long[] generateNodeIdsOfType(Graph graph, int nbNodes, Node.Type expectedType) { + public long[] generateNodeIdsOfType(SwhBidirectionalGraph graph, int nbNodes, Node.Type expectedType) { PrimitiveIterator.OfLong nodes = random.longs(0, graph.numNodes()).iterator(); long[] nodeIds = new long[nbNodes]; long nextId; for (int i = 0; i < nbNodes; i++) { do { nextId = nodes.nextLong(); } while (graph.getNodeType(nextId) != expectedType); nodeIds[i] = nextId; } return nodeIds; } } diff --git a/java/src/main/java/org/softwareheritage/graph/experiments/forks/FindCommonAncestor.java b/java/src/main/java/org/softwareheritage/graph/experiments/forks/FindCommonAncestor.java index f36ce88..56c8369 100644 --- a/java/src/main/java/org/softwareheritage/graph/experiments/forks/FindCommonAncestor.java +++ b/java/src/main/java/org/softwareheritage/graph/experiments/forks/FindCommonAncestor.java @@ -1,62 +1,62 @@ package org.softwareheritage.graph.experiments.forks; import com.martiansoftware.jsap.*; -import org.softwareheritage.graph.Graph; +import org.softwareheritage.graph.SwhBidirectionalGraph; import org.softwareheritage.graph.Traversal; import java.io.IOException; import java.util.Scanner; public class FindCommonAncestor { - private Graph graph; + private SwhBidirectionalGraph graph; private void load_graph(String graphBasename) throws IOException { System.err.println("Loading graph " + graphBasename + " ..."); - this.graph = Graph.loadMapped(graphBasename); + this.graph = SwhBidirectionalGraph.loadMapped(graphBasename); System.err.println("Graph loaded."); } private static JSAPResult parse_args(String[] args) { JSAPResult config = null; try { SimpleJSAP jsap = new SimpleJSAP(FindCommonAncestor.class.getName(), "", new Parameter[]{ new FlaggedOption("edgesFmt", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'e', "edges", "Edges constraints"), new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'g', "graph", "Basename of the compressed graph"),}); config = jsap.parse(args); if (jsap.messagePrinted()) { System.exit(1); } } catch (JSAPException e) { e.printStackTrace(); } return config; } public static void main(String[] args) { JSAPResult config = parse_args(args); String graphPath = config.getString("graphPath"); String edgesFmt = config.getString("edgesFmt"); FindCommonAncestor fca = new FindCommonAncestor(); try { fca.load_graph(graphPath); } catch (IOException e) { System.out.println("Could not load graph: " + e); System.exit(2); } Scanner input = new Scanner(System.in); while (input.hasNextLong()) { long lhsNode = input.nextLong(); long rhsNode = input.nextLong(); Traversal t = new Traversal(fca.graph.symmetrize(), "forward", edgesFmt); System.out.println(t.findCommonDescendant(lhsNode, rhsNode)); } } } diff --git a/java/src/main/java/org/softwareheritage/graph/experiments/forks/FindPath.java b/java/src/main/java/org/softwareheritage/graph/experiments/forks/FindPath.java index 2e5afd9..b5f318c 100644 --- a/java/src/main/java/org/softwareheritage/graph/experiments/forks/FindPath.java +++ b/java/src/main/java/org/softwareheritage/graph/experiments/forks/FindPath.java @@ -1,123 +1,123 @@ package org.softwareheritage.graph.experiments.forks; import com.martiansoftware.jsap.*; import it.unimi.dsi.big.webgraph.LazyLongIterator; -import org.softwareheritage.graph.Graph; +import org.softwareheritage.graph.SwhBidirectionalGraph; import org.softwareheritage.graph.Node; import java.io.IOException; import java.util.*; public class FindPath { - private Graph graph; + private SwhBidirectionalGraph graph; private Long emptySnapshot; private void load_graph(String graphBasename) throws IOException { System.err.println("Loading graph " + graphBasename + " ..."); - this.graph = Graph.loadMapped(graphBasename).symmetrize(); + this.graph = SwhBidirectionalGraph.loadMapped(graphBasename).symmetrize(); System.err.println("Graph loaded."); this.emptySnapshot = null; } private static JSAPResult parse_args(String[] args) { JSAPResult config = null; try { SimpleJSAP jsap = new SimpleJSAP(FindPath.class.getName(), "", new Parameter[]{new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'g', "graph", "Basename of the compressed graph"),}); config = jsap.parse(args); if (jsap.messagePrinted()) { System.exit(1); } } catch (JSAPException e) { e.printStackTrace(); } return config; } private boolean nodeIsEmptySnapshot(Long node) { if (this.emptySnapshot == null && this.graph.getNodeType(node) == Node.Type.SNP && this.graph.outdegree(node) == 0) { System.err.println("Found empty snapshot: " + node); this.emptySnapshot = node; } return node.equals(this.emptySnapshot); } private Boolean shouldVisit(Long node) { Node.Type nt = this.graph.getNodeType(node); if (nt != Node.Type.REV && nt != Node.Type.REL && nt != Node.Type.SNP && nt != Node.Type.ORI) { return false; } if (this.nodeIsEmptySnapshot(node)) return false; return true; } private ArrayList findPath(Long src, Long dst) { HashSet visited = new HashSet<>(); Queue queue = new ArrayDeque<>(); Map parentNode = new HashMap<>(); queue.add(src); visited.add(src); while (!queue.isEmpty()) { long currentNode = queue.poll(); final LazyLongIterator iterator = graph.successors(currentNode); long succ; while ((succ = iterator.nextLong()) != -1) { if (!shouldVisit(succ) || visited.contains(succ)) continue; visited.add(succ); queue.add(succ); parentNode.put(succ, currentNode); if (succ == dst) { ArrayList path = new ArrayList<>(); long n = dst; while (n != src) { path.add(n); n = parentNode.get(n); } path.add(src); Collections.reverse(path); return path; } } } return null; } public static void main(String[] args) { JSAPResult config = parse_args(args); String graphPath = config.getString("graphPath"); FindPath fpath = new FindPath(); try { fpath.load_graph(graphPath); } catch (IOException e) { System.out.println("Could not load graph: " + e); System.exit(2); } Scanner input = new Scanner(System.in); while (input.hasNextLong()) { long lhsNode = input.nextLong(); long rhsNode = input.nextLong(); ArrayList path = fpath.findPath(lhsNode, rhsNode); if (path != null) { for (Long n : path) { System.out.format("%d ", n); } System.out.println(); } else { System.out.println("null"); } } } } diff --git a/java/src/main/java/org/softwareheritage/graph/experiments/forks/ForkCC.java b/java/src/main/java/org/softwareheritage/graph/experiments/forks/ForkCC.java index 714df2e..bd5459f 100644 --- a/java/src/main/java/org/softwareheritage/graph/experiments/forks/ForkCC.java +++ b/java/src/main/java/org/softwareheritage/graph/experiments/forks/ForkCC.java @@ -1,249 +1,249 @@ package org.softwareheritage.graph.experiments.forks; import com.google.common.primitives.Longs; import com.martiansoftware.jsap.*; import it.unimi.dsi.big.webgraph.LazyLongIterator; import it.unimi.dsi.bits.LongArrayBitVector; import it.unimi.dsi.fastutil.Arrays; import it.unimi.dsi.io.ByteDiskQueue; import it.unimi.dsi.logging.ProgressLogger; -import org.softwareheritage.graph.Graph; +import org.softwareheritage.graph.SwhBidirectionalGraph; import org.softwareheritage.graph.Node; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.util.*; public class ForkCC { public Boolean includeRootDir; - private Graph graph; + private SwhBidirectionalGraph graph; private Long emptySnapshot; private LongArrayBitVector visited; private LongArrayBitVector whitelist; private static JSAPResult parse_args(String[] args) { JSAPResult config = null; try { SimpleJSAP jsap = new SimpleJSAP(ForkCC.class.getName(), "", new Parameter[]{ new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'g', "graph", "Basename of the compressed graph"), new FlaggedOption("whitelistPath", JSAP.STRING_PARSER, null, JSAP.NOT_REQUIRED, 't', "whitelist", "Whitelist of origins"), new FlaggedOption("includeRootDir", JSAP.BOOLEAN_PARSER, "false", JSAP.NOT_REQUIRED, 'R', "includerootdir", "Include root directory (default: false)"), new FlaggedOption("outdir", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'o', "outdir", "Directory where to put the results"),}); config = jsap.parse(args); if (jsap.messagePrinted()) { System.exit(1); } } catch (JSAPException e) { e.printStackTrace(); } return config; } private static void printDistribution(ArrayList> components) { TreeMap distribution = new TreeMap<>(); for (ArrayList component : components) { distribution.merge((long) component.size(), 1L, Long::sum); } for (Map.Entry entry : distribution.entrySet()) { System.out.format("%d %d\n", entry.getKey(), entry.getValue()); } } private static void printLargestComponent(ArrayList> components) { int indexLargest = 0; for (int i = 1; i < components.size(); ++i) { if (components.get(i).size() > components.get(indexLargest).size()) indexLargest = i; } ArrayList component = components.get(indexLargest); for (Long node : component) { System.out.println(node); } } private void load_graph(String graphBasename) throws IOException { System.err.println("Loading graph " + graphBasename + " ..."); - this.graph = Graph.loadMapped(graphBasename).symmetrize(); + this.graph = SwhBidirectionalGraph.loadMapped(graphBasename).symmetrize(); System.err.println("Graph loaded."); this.emptySnapshot = null; this.whitelist = null; this.visited = null; this.includeRootDir = null; } private boolean nodeIsEmptySnapshot(Long node) { if (this.emptySnapshot == null && this.graph.getNodeType(node) == Node.Type.SNP && this.graph.outdegree(node) == 0) { System.err.println("Found empty snapshot: " + node); this.emptySnapshot = node; } return node.equals(this.emptySnapshot); } private Boolean shouldVisit(Long node) { Node.Type nt = this.graph.getNodeType(node); if (nt == Node.Type.CNT) { return false; } if (nt == Node.Type.DIR && !includeRootDir) return false; if (this.nodeIsEmptySnapshot(node)) return false; if (visited.getBoolean(node)) return false; return true; } private ArrayList> compute(ProgressLogger pl) throws IOException { final long n = graph.numNodes(); // Allow enough memory to behave like in-memory queue int bufferSize = (int) Math.min(Arrays.MAX_ARRAY_SIZE & ~0x7, 8L * n); // Use a disk based queue to store BFS frontier final File queueFile = File.createTempFile(ForkCC.class.getSimpleName(), "queue"); final ByteDiskQueue queue = ByteDiskQueue.createNew(queueFile, bufferSize, true); final byte[] byteBuf = new byte[Long.BYTES]; // WARNING: no 64-bit version of this data-structure, but it can support // indices up to 2^37 visited = LongArrayBitVector.ofLength(n); pl.expectedUpdates = n; pl.itemsName = "nodes"; pl.start("Starting connected components visit..."); ArrayList> components = new ArrayList<>(); for (long i = 0; i < n; i++) { if (!shouldVisit(i) || this.graph.getNodeType(i) == Node.Type.DIR) continue; ArrayList component = new ArrayList<>(); queue.enqueue(Longs.toByteArray(i)); visited.set(i); while (!queue.isEmpty()) { queue.dequeue(byteBuf); final long currentNode = Longs.fromByteArray(byteBuf); Node.Type cur_nt = this.graph.getNodeType(currentNode); if (cur_nt == Node.Type.ORI && (this.whitelist == null || this.whitelist.getBoolean(currentNode))) { // TODO: add a check that the origin has >=1 non-empty snapshot component.add(currentNode); } final LazyLongIterator iterator = graph.successors(currentNode); long succ; while ((succ = iterator.nextLong()) != -1) { if (!shouldVisit(succ)) continue; if (this.graph.getNodeType(succ) == Node.Type.DIR && cur_nt != Node.Type.REV) continue; visited.set(succ); queue.enqueue(Longs.toByteArray(succ)); } pl.update(); } if (component.size() > 0) { components.add(component); } } pl.done(); queue.close(); return components; } private static void printDistribution(ArrayList> components, Formatter out) { TreeMap distribution = new TreeMap<>(); for (ArrayList component : components) { distribution.merge((long) component.size(), 1L, Long::sum); } for (Map.Entry entry : distribution.entrySet()) { out.format("%d %d\n", entry.getKey(), entry.getValue()); } } private static void printLargestComponent(ArrayList> components, Formatter out) { int indexLargest = 0; for (int i = 1; i < components.size(); ++i) { if (components.get(i).size() > components.get(indexLargest).size()) indexLargest = i; } ArrayList component = components.get(indexLargest); for (Long node : component) { out.format("%d\n", node); } } private static void printAllComponents(ArrayList> components, Formatter out) { for (int i = 1; i < components.size(); ++i) { ArrayList component = components.get(i); for (Long node : component) { out.format("%d ", node); } out.format("\n"); } } private void parseWhitelist(String path) { System.err.println("Loading whitelist " + path + " ..."); this.whitelist = LongArrayBitVector.ofLength(this.graph.numNodes()); Scanner scanner; try { scanner = new Scanner(new File(path)); while (scanner.hasNextLong()) { whitelist.set(scanner.nextLong()); } System.err.println("Whitelist loaded."); } catch (FileNotFoundException e) { e.printStackTrace(); } } public static void main(String[] args) { JSAPResult config = parse_args(args); String graphPath = config.getString("graphPath"); String whitelistPath = config.getString("whitelistPath"); boolean includeRootDir = config.getBoolean("includeRootDir"); String outdirPath = config.getString("outdir"); ForkCC forkCc = new ForkCC(); try { forkCc.load_graph(graphPath); forkCc.includeRootDir = includeRootDir; } catch (IOException e) { System.out.println("Could not load graph: " + e); System.exit(2); } if (whitelistPath != null) { forkCc.parseWhitelist(whitelistPath); } ProgressLogger logger = new ProgressLogger(); // noinspection ResultOfMethodCallIgnored new File(outdirPath).mkdirs(); try { ArrayList> components = forkCc.compute(logger); printDistribution(components, new Formatter(outdirPath + "/distribution.txt")); printLargestComponent(components, new Formatter(outdirPath + "/largest_clique.txt")); printAllComponents(components, new Formatter(outdirPath + "/all_cliques.txt")); } catch (IOException e) { e.printStackTrace(); } logger.done(); } } diff --git a/java/src/main/java/org/softwareheritage/graph/experiments/forks/ForkCliques.java b/java/src/main/java/org/softwareheritage/graph/experiments/forks/ForkCliques.java index 4d749bd..aa57ae6 100644 --- a/java/src/main/java/org/softwareheritage/graph/experiments/forks/ForkCliques.java +++ b/java/src/main/java/org/softwareheritage/graph/experiments/forks/ForkCliques.java @@ -1,223 +1,223 @@ package org.softwareheritage.graph.experiments.forks; import ch.qos.logback.classic.Level; import ch.qos.logback.classic.Logger; import com.google.common.primitives.Longs; import com.martiansoftware.jsap.*; import it.unimi.dsi.big.webgraph.LazyLongIterator; import it.unimi.dsi.bits.LongArrayBitVector; import it.unimi.dsi.logging.ProgressLogger; import org.slf4j.LoggerFactory; -import org.softwareheritage.graph.Graph; +import org.softwareheritage.graph.SwhBidirectionalGraph; import org.softwareheritage.graph.Node; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.util.*; public class ForkCliques { - private Graph graph; + private SwhBidirectionalGraph graph; private LongArrayBitVector whitelist; private void load_graph(String graphBasename) throws IOException { System.err.println("Loading graph " + graphBasename + " ..."); - this.graph = Graph.loadMapped(graphBasename); + this.graph = SwhBidirectionalGraph.loadMapped(graphBasename); System.err.println("Graph loaded."); this.whitelist = null; } private static JSAPResult parse_args(String[] args) { JSAPResult config = null; try { SimpleJSAP jsap = new SimpleJSAP(ForkCliques.class.getName(), "", new Parameter[]{ new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'g', "graph", "Basename of the compressed graph"), new FlaggedOption("whitelistPath", JSAP.STRING_PARSER, null, JSAP.NOT_REQUIRED, 't', "whitelist", "Whitelist of origins"), new FlaggedOption("outdir", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'o', "outdir", "Directory where to put the results"),}); config = jsap.parse(args); if (jsap.messagePrinted()) { System.exit(1); } } catch (JSAPException e) { e.printStackTrace(); } return config; } private ArrayList dfsAt(Long baseNode) { ArrayList res = new ArrayList<>(); final Deque stack = new ArrayDeque<>(); HashSet seen = new HashSet<>(); stack.push(baseNode); while (!stack.isEmpty()) { final Long currentNode = stack.pop(); final LazyLongIterator iterator = this.graph.predecessors(currentNode); long succ; while ((succ = iterator.nextLong()) != -1) { if (!seen.contains(succ)) { Node.Type nt = this.graph.getNodeType(succ); if (nt == Node.Type.DIR || nt == Node.Type.CNT) continue; if (nt == Node.Type.ORI && (this.whitelist == null || this.whitelist.getBoolean(succ))) { res.add(succ); } else { stack.push(succ); seen.add(succ); } } } } Collections.sort(res); return res; } private boolean isBaseRevision(Long node) { if (this.graph.getNodeType(node) != Node.Type.REV) return false; final LazyLongIterator iterator = this.graph.successors(node); long succ; while ((succ = iterator.nextLong()) != -1) { if (this.graph.getNodeType(succ) == Node.Type.REV) return false; } return true; } static private String fingerprint(ArrayList cluster) { MessageDigest digest; try { digest = MessageDigest.getInstance("SHA-256"); } catch (NoSuchAlgorithmException e) { e.printStackTrace(); return null; } for (Long n : cluster) digest.update(Longs.toByteArray(n)); return new String(digest.digest()); } private ArrayList> compute(ProgressLogger pl) { final long n = this.graph.numNodes(); HashSet fingerprints = new HashSet<>(); ArrayList> clusters = new ArrayList<>(); pl.expectedUpdates = n; pl.itemsName = "nodes"; pl.start("Starting topological sort..."); for (long i = 0; i < n; i++) { if (isBaseRevision(i)) { ArrayList currentCluster = dfsAt(i); String clusterFp = fingerprint(currentCluster); if (!fingerprints.contains(clusterFp)) { fingerprints.add(clusterFp); clusters.add(currentCluster); } } pl.update(); } pl.done(); return clusters; } private static void printDistribution(ArrayList> components, Formatter out) { TreeMap distribution = new TreeMap<>(); for (ArrayList component : components) { distribution.merge((long) component.size(), 1L, Long::sum); } for (Map.Entry entry : distribution.entrySet()) { out.format("%d %d\n", entry.getKey(), entry.getValue()); } } private static void printLargestComponent(ArrayList> components, Formatter out) { int indexLargest = 0; for (int i = 1; i < components.size(); ++i) { if (components.get(i).size() > components.get(indexLargest).size()) indexLargest = i; } ArrayList component = components.get(indexLargest); for (Long node : component) { out.format("%d\n", node); } } private static void printAllComponents(ArrayList> components, Formatter out) { for (int i = 1; i < components.size(); ++i) { ArrayList component = components.get(i); for (Long node : component) { out.format("%d ", node); } out.format("\n"); } } private void parseWhitelist(String path) { System.err.println("Loading whitelist " + path + " ..."); this.whitelist = LongArrayBitVector.ofLength(this.graph.numNodes()); Scanner scanner; try { scanner = new Scanner(new File(path)); while (scanner.hasNextLong()) { whitelist.set(scanner.nextLong()); } System.err.println("Whitelist loaded."); } catch (FileNotFoundException e) { e.printStackTrace(); } } public static void main(String[] args) { JSAPResult config = parse_args(args); String graphPath = config.getString("graphPath"); String whitelistPath = config.getString("whitelistPath"); String outdirPath = config.getString("outdir"); ForkCliques forkCliques = new ForkCliques(); try { forkCliques.load_graph(graphPath); } catch (IOException e) { System.out.println("Could not load graph: " + e); System.exit(2); } if (whitelistPath != null) { forkCliques.parseWhitelist(whitelistPath); } Logger rootLogger = (ch.qos.logback.classic.Logger) LoggerFactory.getLogger(org.slf4j.Logger.ROOT_LOGGER_NAME); rootLogger.setLevel(Level.DEBUG); ProgressLogger logger = new ProgressLogger(rootLogger); ArrayList> components = forkCliques.compute(logger); // noinspection ResultOfMethodCallIgnored new File(outdirPath).mkdirs(); try { printDistribution(components, new Formatter(outdirPath + "/distribution.txt")); printLargestComponent(components, new Formatter(outdirPath + "/largest_clique.txt")); printAllComponents(components, new Formatter(outdirPath + "/all_cliques.txt")); } catch (FileNotFoundException e) { e.printStackTrace(); } logger.done(); } } diff --git a/java/src/main/java/org/softwareheritage/graph/experiments/forks/ListEmptyOrigins.java b/java/src/main/java/org/softwareheritage/graph/experiments/forks/ListEmptyOrigins.java index 332a908..8389962 100644 --- a/java/src/main/java/org/softwareheritage/graph/experiments/forks/ListEmptyOrigins.java +++ b/java/src/main/java/org/softwareheritage/graph/experiments/forks/ListEmptyOrigins.java @@ -1,88 +1,88 @@ package org.softwareheritage.graph.experiments.forks; import com.martiansoftware.jsap.*; import it.unimi.dsi.big.webgraph.ImmutableGraph; import it.unimi.dsi.big.webgraph.LazyLongIterator; -import org.softwareheritage.graph.Graph; +import org.softwareheritage.graph.SwhBidirectionalGraph; import org.softwareheritage.graph.Node; import java.io.IOException; import java.util.ArrayList; public class ListEmptyOrigins { - private Graph graph; + private SwhBidirectionalGraph graph; private Long emptySnapshot; private static JSAPResult parse_args(String[] args) { JSAPResult config = null; try { SimpleJSAP jsap = new SimpleJSAP(ListEmptyOrigins.class.getName(), "", new Parameter[]{new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'g', "graph", "Basename of the compressed graph"),}); config = jsap.parse(args); if (jsap.messagePrinted()) { System.exit(1); } } catch (JSAPException e) { e.printStackTrace(); } return config; } public static void main(String[] args) { JSAPResult config = parse_args(args); String graphPath = config.getString("graphPath"); ListEmptyOrigins leo = new ListEmptyOrigins(); try { leo.load_graph(graphPath); } catch (IOException e) { System.out.println("Could not load graph: " + e); System.exit(2); } ArrayList badlist = leo.compute(leo.graph); for (Long bad : badlist) { System.out.println(bad); } } private void load_graph(String graphBasename) throws IOException { System.err.println("Loading graph " + graphBasename + " ..."); - this.graph = Graph.loadMapped(graphBasename); + this.graph = SwhBidirectionalGraph.loadMapped(graphBasename); System.err.println("Graph loaded."); this.emptySnapshot = null; } private boolean nodeIsEmptySnapshot(Long node) { System.err.println(this.graph.getNodeType(node) + " " + this.graph.outdegree(node) + " " + node); if (this.emptySnapshot == null && this.graph.getNodeType(node) == Node.Type.SNP && this.graph.outdegree(node) == 0) { System.err.println("Found empty snapshot: " + node); this.emptySnapshot = node; } return node.equals(this.emptySnapshot); } private ArrayList compute(ImmutableGraph graph) { final long n = graph.numNodes(); ArrayList bad = new ArrayList<>(); for (long i = 0; i < n; i++) { Node.Type nt = this.graph.getNodeType(i); if (nt != Node.Type.ORI) continue; final LazyLongIterator iterator = graph.successors(i); long succ; boolean found = false; while ((succ = iterator.nextLong()) != -1) { if (this.graph.outdegree(succ) > 0) { found = true; } } if (!found) bad.add(i); } return bad; } } diff --git a/java/src/main/java/org/softwareheritage/graph/experiments/multiplicationfactor/GenDistribution.java b/java/src/main/java/org/softwareheritage/graph/experiments/multiplicationfactor/GenDistribution.java index 89fd675..e3fdb3b 100644 --- a/java/src/main/java/org/softwareheritage/graph/experiments/multiplicationfactor/GenDistribution.java +++ b/java/src/main/java/org/softwareheritage/graph/experiments/multiplicationfactor/GenDistribution.java @@ -1,130 +1,130 @@ package org.softwareheritage.graph.experiments.multiplicationfactor; import com.martiansoftware.jsap.*; -import org.softwareheritage.graph.Graph; +import org.softwareheritage.graph.SwhBidirectionalGraph; import org.softwareheritage.graph.Node; import org.softwareheritage.graph.Traversal; import org.softwareheritage.graph.benchmark.utils.Timing; import java.io.IOException; import java.util.Scanner; import java.util.concurrent.ArrayBlockingQueue; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; public class GenDistribution { - private Graph graph; + private SwhBidirectionalGraph graph; private static JSAPResult parse_args(String[] args) { JSAPResult config = null; try { SimpleJSAP jsap = new SimpleJSAP(GenDistribution.class.getName(), "", new Parameter[]{ new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'g', "graph", "Basename of the compressed graph"), new FlaggedOption("srcType", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 's', "srctype", "Source node type"), new FlaggedOption("dstType", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'd', "dsttype", "Destination node type"), new FlaggedOption("edgesFmt", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'e', "edges", "Edges constraints"), new FlaggedOption("numThreads", JSAP.INTEGER_PARSER, "128", JSAP.NOT_REQUIRED, 't', "numthreads", "Number of threads"),}); config = jsap.parse(args); if (jsap.messagePrinted()) { System.exit(1); } } catch (JSAPException e) { e.printStackTrace(); } return config; } public static void main(String[] args) { JSAPResult config = parse_args(args); String graphPath = config.getString("graphPath"); Node.Type srcType = Node.Type.fromStr(config.getString("srcType")); Node.Type dstType = Node.Type.fromStr(config.getString("dstType")); String edgesFmt = config.getString("edgesFmt"); int numThreads = config.getInt("numThreads"); GenDistribution tp = new GenDistribution(); try { tp.load_graph(graphPath); } catch (IOException e) { System.out.println("Could not load graph: " + e); System.exit(2); } final long END_OF_QUEUE = -1L; ArrayBlockingQueue queue = new ArrayBlockingQueue<>(numThreads); ExecutorService service = Executors.newFixedThreadPool(numThreads + 1); service.submit(() -> { try { Scanner input = new Scanner(System.in); while (input.hasNextLong()) { long node = input.nextLong(); if (tp.graph.getNodeType(node) == srcType) { queue.put(node); } } } catch (InterruptedException e) { e.printStackTrace(); } finally { for (int i = 0; i < numThreads; ++i) { try { queue.put(END_OF_QUEUE); } catch (InterruptedException e) { e.printStackTrace(); } } } }); for (int i = 0; i < numThreads; ++i) { service.submit(() -> { - Graph thread_graph = tp.graph.copy(); + SwhBidirectionalGraph thread_graph = tp.graph.copy(); long startTime; double totalTime; while (true) { Long node = null; try { node = queue.take(); } catch (InterruptedException e) { e.printStackTrace(); } if (node == null || node == END_OF_QUEUE) { return; } Traversal t = new Traversal(thread_graph, "backward", edgesFmt); int[] count = {0}; startTime = Timing.start(); t.visitNodesVisitor(node, (curnode) -> { if (tp.graph.getNodeType(curnode) == dstType) { count[0]++; } }); totalTime = Timing.stop(startTime); System.out.format("%d %d %d %d %f\n", node, count[0], t.getNbNodesAccessed(), t.getNbEdgesAccessed(), totalTime); } }); } service.shutdown(); } private void load_graph(String graphBasename) throws IOException { System.err.println("Loading graph " + graphBasename + " ..."); - this.graph = Graph.loadMapped(graphBasename); + this.graph = SwhBidirectionalGraph.loadMapped(graphBasename); System.err.println("Graph loaded."); } } diff --git a/java/src/main/java/org/softwareheritage/graph/experiments/topology/AveragePaths.java b/java/src/main/java/org/softwareheritage/graph/experiments/topology/AveragePaths.java index ad7eadf..53bcc49 100644 --- a/java/src/main/java/org/softwareheritage/graph/experiments/topology/AveragePaths.java +++ b/java/src/main/java/org/softwareheritage/graph/experiments/topology/AveragePaths.java @@ -1,188 +1,188 @@ package org.softwareheritage.graph.experiments.topology; import com.martiansoftware.jsap.*; import it.unimi.dsi.Util; import it.unimi.dsi.big.webgraph.LazyLongIterator; import it.unimi.dsi.fastutil.BigArrays; import it.unimi.dsi.fastutil.longs.LongBigArrays; import it.unimi.dsi.logging.ProgressLogger; import it.unimi.dsi.util.XoRoShiRo128PlusRandom; import org.softwareheritage.graph.*; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.io.PrintWriter; import java.util.*; import java.util.concurrent.*; public class AveragePaths { - private final Graph graph; + private final SwhBidirectionalGraph graph; private final Subgraph subgraph; private final ConcurrentHashMap result; private final String outdir; public AveragePaths(String graphBasename, String allowedNodes, String outdir) throws IOException { System.err.println("Loading graph " + graphBasename + " ..."); - this.graph = Graph.loadMapped(graphBasename); + this.graph = SwhBidirectionalGraph.loadMapped(graphBasename); this.subgraph = new Subgraph(this.graph, new AllowedNodes(allowedNodes)); this.outdir = outdir; System.err.println("Graph loaded."); result = new ConcurrentHashMap<>(); } private static JSAPResult parse_args(String[] args) { JSAPResult config = null; try { SimpleJSAP jsap = new SimpleJSAP(AveragePaths.class.getName(), "", new Parameter[]{ new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'g', "graph", "Basename of the compressed graph"), new FlaggedOption("nodeTypes", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 's', "nodetypes", "Node type constraints"), new FlaggedOption("outdir", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'o', "outdir", "Directory where to put the results"), new FlaggedOption("numThreads", JSAP.INTEGER_PARSER, "32", JSAP.NOT_REQUIRED, 't', "numthreads", "Number of threads"),}); config = jsap.parse(args); if (jsap.messagePrinted()) { System.exit(1); } } catch (JSAPException e) { e.printStackTrace(); } return config; } private void run(int numThreads) throws InterruptedException { final long END_OF_QUEUE = -1L; ArrayBlockingQueue queue = new ArrayBlockingQueue<>(numThreads); ExecutorService service = Executors.newFixedThreadPool(numThreads + 1); service.submit(() -> { try { - Graph thread_graph = graph.copy(); + SwhBidirectionalGraph thread_graph = graph.copy(); Subgraph thread_subgraph = subgraph.copy(); long[][] randomPerm = Util.identity(thread_graph.numNodes()); LongBigArrays.shuffle(randomPerm, new XoRoShiRo128PlusRandom()); long n = thread_graph.numNodes(); ProgressLogger pl = new ProgressLogger(); pl.expectedUpdates = n; pl.itemsName = "nodes"; pl.start("Filling processor queue..."); for (long j = 0; j < n; ++j) { long node = BigArrays.get(randomPerm, j); if (thread_subgraph.nodeExists(node) && thread_subgraph.outdegree(node) == 0) { queue.put(node); } if (j % 10000 == 0) { printResult(); } pl.update(); } } catch (Exception e) { e.printStackTrace(); } finally { for (int i = 0; i < numThreads; ++i) { try { queue.put(END_OF_QUEUE); } catch (InterruptedException e) { e.printStackTrace(); } } } }); for (int i = 0; i < numThreads; ++i) { service.submit(() -> { try { Subgraph thread_subgraph = subgraph.copy(); while (true) { Long node = null; try { node = queue.take(); } catch (InterruptedException e) { e.printStackTrace(); } if (node == null || node == END_OF_QUEUE) { return; } bfsAt(thread_subgraph, node); } } catch (Exception e) { e.printStackTrace(); } }); } service.shutdown(); service.awaitTermination(365, TimeUnit.DAYS); } private void bfsAt(Subgraph graph, long srcNodeId) { ArrayDeque queue = new ArrayDeque<>(); HashSet visited = new HashSet<>(); long FRONTIER_MARKER = -1; queue.addLast(srcNodeId); visited.add(srcNodeId); long distance = 0; queue.addLast(FRONTIER_MARKER); while (!queue.isEmpty()) { long currentNodeId = queue.removeFirst(); // System.err.println("curr: " + currentNodeId); if (currentNodeId == FRONTIER_MARKER) { if (queue.isEmpty()) // avoid infinite loops break; ++distance; queue.addLast(FRONTIER_MARKER); continue; } if (graph.indegree(currentNodeId) == 0) { result.merge(distance, 1L, Long::sum); } LazyLongIterator it = graph.predecessors(currentNodeId); for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) { if (!visited.contains(neighborNodeId)) { queue.addLast(neighborNodeId); visited.add(neighborNodeId); } } } } public void printResult() throws IOException { new File(outdir).mkdirs(); PrintWriter f = new PrintWriter(new FileWriter(outdir + "/distribution.txt")); TreeMap sortedDistribution = new TreeMap<>(result); for (Map.Entry entry : sortedDistribution.entrySet()) { f.println(entry.getKey() + " " + entry.getValue()); } f.close(); } public static void main(String[] args) throws IOException, InterruptedException { JSAPResult config = parse_args(args); String graphPath = config.getString("graphPath"); String outdir = config.getString("outdir"); String allowedNodes = config.getString("nodeTypes"); int numThreads = config.getInt("numThreads"); AveragePaths tp = new AveragePaths(graphPath, allowedNodes, outdir); tp.run(numThreads); tp.printResult(); } } diff --git a/java/src/main/java/org/softwareheritage/graph/experiments/topology/ClusteringCoefficient.java b/java/src/main/java/org/softwareheritage/graph/experiments/topology/ClusteringCoefficient.java index 2e6fa0c..0564463 100644 --- a/java/src/main/java/org/softwareheritage/graph/experiments/topology/ClusteringCoefficient.java +++ b/java/src/main/java/org/softwareheritage/graph/experiments/topology/ClusteringCoefficient.java @@ -1,325 +1,325 @@ package org.softwareheritage.graph.experiments.topology; import com.martiansoftware.jsap.*; import it.unimi.dsi.Util; import it.unimi.dsi.big.webgraph.ImmutableGraph; import it.unimi.dsi.big.webgraph.LazyLongIterator; import it.unimi.dsi.fastutil.BigArrays; import it.unimi.dsi.fastutil.longs.LongBigArrays; import it.unimi.dsi.logging.ProgressLogger; import it.unimi.dsi.util.XoRoShiRo128PlusRandom; -import org.softwareheritage.graph.Graph; +import org.softwareheritage.graph.SwhBidirectionalGraph; import org.softwareheritage.graph.Node; import java.io.*; import java.util.*; import java.util.concurrent.*; public class ClusteringCoefficient { - private final Graph graph; + private final SwhBidirectionalGraph graph; private final String outdirPath; private final ConcurrentHashMap result_full; private final ConcurrentHashMap result_dircnt; private final ConcurrentHashMap result_rev; private final ConcurrentHashMap result_revrel; private final ConcurrentHashMap result_orisnp; public ClusteringCoefficient(String graphBasename, String outdirPath) throws IOException { this.outdirPath = outdirPath; System.err.println("Loading graph " + graphBasename + " ..."); - Graph directedGraph = Graph.loadMapped(graphBasename); + SwhBidirectionalGraph directedGraph = SwhBidirectionalGraph.loadMapped(graphBasename); this.graph = directedGraph.symmetrize(); System.err.println("Graph loaded."); result_full = new ConcurrentHashMap<>(); result_dircnt = new ConcurrentHashMap<>(); result_rev = new ConcurrentHashMap<>(); result_revrel = new ConcurrentHashMap<>(); result_orisnp = new ConcurrentHashMap<>(); } private static JSAPResult parse_args(String[] args) { JSAPResult config = null; try { SimpleJSAP jsap = new SimpleJSAP(AveragePaths.class.getName(), "", new Parameter[]{ new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'g', "graph", "Basename of the compressed graph"), new FlaggedOption("outdir", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'o', "outdir", "Directory where to put the results"), new FlaggedOption("numThreads", JSAP.INTEGER_PARSER, "32", JSAP.NOT_REQUIRED, 't', "numthreads", "Number of threads"),}); config = jsap.parse(args); if (jsap.messagePrinted()) { System.exit(1); } } catch (JSAPException e) { e.printStackTrace(); } return config; } private void run(int numThreads) throws InterruptedException { final long END_OF_QUEUE = -1L; ArrayBlockingQueue queue = new ArrayBlockingQueue<>(numThreads); ExecutorService service = Executors.newFixedThreadPool(numThreads + 1); service.submit(() -> { try { - Graph thread_graph = graph.copy(); + SwhBidirectionalGraph thread_graph = graph.copy(); long[][] randomPerm = Util.identity(thread_graph.numNodes()); LongBigArrays.shuffle(randomPerm, new XoRoShiRo128PlusRandom()); long n = thread_graph.numNodes(); ProgressLogger pl = new ProgressLogger(); pl.expectedUpdates = n; pl.itemsName = "nodes"; pl.start("Filling processor queue..."); for (long j = 0; j < n; ++j) { long node = BigArrays.get(randomPerm, j); queue.put(node); if (j % 10000 == 0) { printResult(); } pl.update(); } } catch (Exception e) { e.printStackTrace(); } finally { for (int i = 0; i < numThreads; ++i) { try { queue.put(END_OF_QUEUE); } catch (InterruptedException e) { e.printStackTrace(); } } } }); for (int i = 0; i < numThreads; ++i) { service.submit(() -> { try { - Graph thread_graph = graph.copy(); + SwhBidirectionalGraph thread_graph = graph.copy(); while (true) { Long node = null; try { node = queue.take(); } catch (InterruptedException e) { e.printStackTrace(); } if (node == null || node == END_OF_QUEUE) { return; } computeAt(thread_graph, node); } } catch (Exception e) { e.printStackTrace(); } }); } service.shutdown(); service.awaitTermination(365, TimeUnit.DAYS); } - private void computeAt(Graph graph, long node) { + private void computeAt(SwhBidirectionalGraph graph, long node) { long d = graph.outdegree(node); if (d < 2) { return; } Node.Type nodeType = graph.getNodeType(node); HashSet neighborhood = new HashSet<>(); long succ; final LazyLongIterator iterator = graph.successors(node); while ((succ = iterator.nextLong()) != -1) { neighborhood.add(succ); } long triangles_full = 0; long triangles_dircnt = 0; long triangles_rev = 0; long triangles_revrel = 0; long triangles_orisnp = 0; for (Long neighbor : neighborhood) { Node.Type neighborNodeType = graph.getNodeType(neighbor); final LazyLongIterator it = graph.successors(neighbor); while ((succ = it.nextLong()) != -1) { if (neighborhood.contains(succ)) { Node.Type succNodeType = graph.getNodeType(succ); triangles_full++; if ((nodeType == Node.Type.DIR || nodeType == Node.Type.CNT) && (neighborNodeType == Node.Type.DIR || neighborNodeType == Node.Type.CNT) && (succNodeType == Node.Type.DIR || succNodeType == Node.Type.CNT)) { triangles_dircnt++; } else if ((nodeType == Node.Type.REV || nodeType == Node.Type.REL) && (neighborNodeType == Node.Type.REV || neighborNodeType == Node.Type.REL) && (succNodeType == Node.Type.REV || succNodeType == Node.Type.REL)) { triangles_revrel++; if (nodeType == Node.Type.REV && neighborNodeType == Node.Type.REV && succNodeType == Node.Type.REV) triangles_rev++; } else if ((nodeType == Node.Type.ORI || nodeType == Node.Type.SNP) && (neighborNodeType == Node.Type.ORI || neighborNodeType == Node.Type.SNP) && (succNodeType == Node.Type.ORI || succNodeType == Node.Type.SNP)) { triangles_orisnp++; } } } } result_full.merge(triangles_full, 1L, Long::sum); result_dircnt.merge(triangles_dircnt, 1L, Long::sum); result_rev.merge(triangles_rev, 1L, Long::sum); result_revrel.merge(triangles_revrel, 1L, Long::sum); result_orisnp.merge(triangles_orisnp, 1L, Long::sum); } public void printSortedDistribution(String distribPath, Map distrib) throws IOException { PrintWriter f = new PrintWriter(new FileWriter(distribPath)); TreeMap sortedDistribution = new TreeMap<>(distrib); for (Map.Entry entry : sortedDistribution.entrySet()) { f.println(entry.getKey() + " " + entry.getValue()); } f.close(); } public void printResult() throws IOException { new File(outdirPath).mkdirs(); printSortedDistribution(outdirPath + "/distribution-full.txt", result_full); printSortedDistribution(outdirPath + "/distribution-dircnt.txt", result_dircnt); printSortedDistribution(outdirPath + "/distribution-rev.txt", result_rev); printSortedDistribution(outdirPath + "/distribution-relrev.txt", result_revrel); printSortedDistribution(outdirPath + "/distribution-orisnp.txt", result_orisnp); } public static void main(String[] args) throws IOException, InterruptedException { JSAPResult config = parse_args(args); String graphPath = config.getString("graphPath"); String outdir = config.getString("outdir"); int numThreads = config.getInt("numThreads"); ClusteringCoefficient cc = new ClusteringCoefficient(graphPath, outdir); cc.run(numThreads); cc.printResult(); } // Old unused functions private long oppositeEdges(ImmutableGraph graph, long node) { HashSet neighborhood = new HashSet<>(); long succ; final LazyLongIterator iterator = graph.successors(node); while ((succ = iterator.nextLong()) != -1) { neighborhood.add(succ); } long closed_triplets = 0; for (Long neighbor : neighborhood) { final LazyLongIterator it = graph.successors(neighbor); while ((succ = it.nextLong()) != -1) { if (neighborhood.contains(succ)) { closed_triplets++; } } } return closed_triplets; } public void compute(ProgressLogger pl, Formatter out_local, Formatter out_global) { final long n = this.graph.numNodes(); pl.expectedUpdates = n; pl.itemsName = "nodes"; long nodes_d2 = 0; double cum_lcc = 0; double cum_lcc_c0 = 0; double cum_lcc_c1 = 0; HashMap distribution = new HashMap<>(); for (long node = 0; node < n; node++) { long d = graph.outdegree(node); if (d >= 2) { double t = (d * (d - 1)); double m = oppositeEdges(graph, node); double lcc = m / t; distribution.merge(lcc, 1L, Long::sum); cum_lcc += lcc; nodes_d2++; } else { cum_lcc_c1++; } pl.update(); } pl.done(); for (Map.Entry entry : distribution.entrySet()) { out_local.format("%f %d\n", entry.getKey(), entry.getValue()); } double gC = cum_lcc / nodes_d2; double gC0 = cum_lcc_c0 / n; double gC1 = cum_lcc_c1 / n; out_global.format("C: %f\n", gC); out_global.format("C0: %f\n", gC0); out_global.format("C1: %f\n", gC1); } public void compute_approx(Formatter out_global) { final long n = this.graph.numNodes(); long trials = 0; long triangles = 0; while (true) { long node = ThreadLocalRandom.current().nextLong(0, n); long d = graph.outdegree(node); if (d >= 2) { Long u = null; Long v = null; long u_idx = ThreadLocalRandom.current().nextLong(0, d); long v_idx = ThreadLocalRandom.current().nextLong(0, d - 1); if (v_idx >= u_idx) { v_idx++; } long succ; final LazyLongIterator node_iterator = graph.successors(node); for (long succ_idx = 0; (succ = node_iterator.nextLong()) != -1; succ_idx++) { if (succ_idx == u_idx) { u = succ; } if (succ_idx == v_idx) { v = succ; } } final LazyLongIterator u_iterator = graph.successors(u); while ((succ = u_iterator.nextLong()) != -1) { if (succ == v) triangles++; } } trials++; if (trials % 100 == 0 || true) { double gC = (double) triangles / (double) trials; out_global.format("C: %f (triangles: %d, trials: %d)\n", gC, triangles, trials); System.out.format("C: %f (triangles: %d, trials: %d)\n", gC, triangles, trials); } } } } diff --git a/java/src/main/java/org/softwareheritage/graph/experiments/topology/ConnectedComponents.java b/java/src/main/java/org/softwareheritage/graph/experiments/topology/ConnectedComponents.java index 4dc4c7d..b351869 100644 --- a/java/src/main/java/org/softwareheritage/graph/experiments/topology/ConnectedComponents.java +++ b/java/src/main/java/org/softwareheritage/graph/experiments/topology/ConnectedComponents.java @@ -1,200 +1,200 @@ package org.softwareheritage.graph.experiments.topology; import com.google.common.primitives.Longs; import com.martiansoftware.jsap.*; import it.unimi.dsi.big.webgraph.LazyLongIterator; import it.unimi.dsi.bits.LongArrayBitVector; import it.unimi.dsi.fastutil.Arrays; import it.unimi.dsi.io.ByteDiskQueue; import it.unimi.dsi.logging.ProgressLogger; import org.softwareheritage.graph.AllowedNodes; -import org.softwareheritage.graph.Graph; +import org.softwareheritage.graph.SwhBidirectionalGraph; import org.softwareheritage.graph.Node; import org.softwareheritage.graph.Subgraph; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.io.PrintWriter; import java.util.*; public class ConnectedComponents { private Subgraph graph; private void load_graph(String graphBasename, String nodeTypes) throws IOException { System.err.println("Loading graph " + graphBasename + " ..."); - var underlyingGraph = Graph.loadMapped(graphBasename); + var underlyingGraph = SwhBidirectionalGraph.loadMapped(graphBasename); var underlyingGraphSym = underlyingGraph.symmetrize(); graph = new Subgraph(underlyingGraphSym, new AllowedNodes(nodeTypes)); System.err.println("Graph loaded."); } private static JSAPResult parse_args(String[] args) { JSAPResult config = null; try { SimpleJSAP jsap = new SimpleJSAP(ConnectedComponents.class.getName(), "", new Parameter[]{ new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'g', "graph", "Basename of the compressed graph"), new FlaggedOption("outdir", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'o', "outdir", "Directory where to put the results"), new Switch("byOrigins", JSAP.NO_SHORTFLAG, "by-origins", "Compute size of components by number of origins"), new FlaggedOption("nodeTypes", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'n', "nodetypes", "Allowed node types (comma-separated)"),}); config = jsap.parse(args); if (jsap.messagePrinted()) { System.exit(1); } } catch (JSAPException e) { e.printStackTrace(); } return config; } private HashMap /* ArrayList> */ compute(ProgressLogger pl, boolean byOrigin) throws IOException { final long n = graph.numNodes(); final long maxN = graph.maxNodeNumber(); // Allow enough memory to behave like in-memory queue int bufferSize = (int) Math.min(Arrays.MAX_ARRAY_SIZE & ~0x7, 8L * maxN); // Use a disk based queue to store BFS frontier final File queueFile = File.createTempFile(ConnectedComponents.class.getSimpleName(), "queue"); final ByteDiskQueue queue = ByteDiskQueue.createNew(queueFile, bufferSize, true); final byte[] byteBuf = new byte[Long.BYTES]; // WARNING: no 64-bit version of this data-structure, but it can support // indices up to 2^37 LongArrayBitVector visited = LongArrayBitVector.ofLength(maxN); pl.expectedUpdates = n; pl.itemsName = "nodes"; pl.start("Starting connected components visit..."); // ArrayList> components = new ArrayList<>(); HashMap componentDistribution = new HashMap<>(); var it = graph.nodeIterator(); while (it.hasNext()) { long i = it.nextLong(); if (visited.getBoolean(i)) continue; // ArrayList component = new ArrayList<>(); long componentNodes = 0; queue.enqueue(Longs.toByteArray(i)); visited.set(i); while (!queue.isEmpty()) { queue.dequeue(byteBuf); final long currentNode = Longs.fromByteArray(byteBuf); // component.add(currentNode); if (!byOrigin || graph.getNodeType(currentNode) == Node.Type.ORI) componentNodes += 1; final LazyLongIterator iterator = graph.successors(currentNode); long succ; while ((succ = iterator.nextLong()) != -1) { if (visited.getBoolean(succ)) continue; visited.set(succ); queue.enqueue(Longs.toByteArray(succ)); } pl.update(); } /* * if (component.size() > 0) { components.add(component); } */ if (componentNodes > 0) componentDistribution.merge(componentNodes, 1L, Long::sum); } pl.done(); // return components; return componentDistribution; } private static void printDistribution(ArrayList> components, Formatter out) { TreeMap distribution = new TreeMap<>(); for (ArrayList component : components) { distribution.merge((long) component.size(), 1L, Long::sum); } for (Map.Entry entry : distribution.entrySet()) { out.format("%d %d\n", entry.getKey(), entry.getValue()); } out.close(); } private static void printLargestComponent(ArrayList> components, Formatter out) { int indexLargest = 0; for (int i = 1; i < components.size(); ++i) { if (components.get(i).size() > components.get(indexLargest).size()) indexLargest = i; } ArrayList component = components.get(indexLargest); for (Long node : component) { out.format("%d\n", node); } out.close(); } private static void printAllComponents(ArrayList> components, Formatter out) { for (int i = 1; i < components.size(); ++i) { ArrayList component = components.get(i); for (Long node : component) { out.format("%d ", node); } out.format("\n"); } out.close(); } public static void main(String[] args) { JSAPResult config = parse_args(args); String graphPath = config.getString("graphPath"); String outdirPath = config.getString("outdir"); String nodeTypes = config.getString("nodeTypes"); boolean byOrigin = config.getBoolean("byOrigins"); ConnectedComponents connectedComponents = new ConnectedComponents(); try { connectedComponents.load_graph(graphPath, nodeTypes); } catch (IOException e) { System.out.println("Could not load graph: " + e); System.exit(2); } ProgressLogger logger = new ProgressLogger(); // noinspection ResultOfMethodCallIgnored new File(outdirPath).mkdirs(); try { // ArrayList> components = connectedComponents.compute(logger); // components.sort(Comparator.comparing(ArrayList::size).reversed()); // printDistribution(components, new Formatter(outdirPath + "/distribution.txt")); // printLargestComponent(components, new Formatter(outdirPath + "/largest_component.txt")); // printAllComponents(components, new Formatter(outdirPath + "/all_components.txt")); HashMap componentDistribution = connectedComponents.compute(logger, byOrigin); PrintWriter f = new PrintWriter(new FileWriter(outdirPath + "/distribution.txt")); TreeMap sortedDistribution = new TreeMap<>(componentDistribution); for (Map.Entry entry : sortedDistribution.entrySet()) { f.println(entry.getKey() + " " + entry.getValue()); } f.close(); } catch (IOException e) { e.printStackTrace(); } logger.done(); } } diff --git a/java/src/main/java/org/softwareheritage/graph/experiments/topology/InOutDegree.java b/java/src/main/java/org/softwareheritage/graph/experiments/topology/InOutDegree.java index 2d6ebdb..a74a31b 100644 --- a/java/src/main/java/org/softwareheritage/graph/experiments/topology/InOutDegree.java +++ b/java/src/main/java/org/softwareheritage/graph/experiments/topology/InOutDegree.java @@ -1,239 +1,239 @@ package org.softwareheritage.graph.experiments.topology; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.io.PrintWriter; import java.lang.reflect.InvocationTargetException; import java.util.*; import com.martiansoftware.jsap.JSAP; import com.martiansoftware.jsap.JSAPException; import com.martiansoftware.jsap.JSAPResult; import com.martiansoftware.jsap.Parameter; import com.martiansoftware.jsap.SimpleJSAP; import com.martiansoftware.jsap.UnflaggedOption; import it.unimi.dsi.logging.ProgressLogger; -import org.softwareheritage.graph.Graph; +import org.softwareheritage.graph.SwhBidirectionalGraph; import org.softwareheritage.graph.Node; public class InOutDegree { private InOutDegree() { } private static final int NODE_ARRAY_SIZE = Node.Type.values().length + 1; private static final int TYPE_ALL = Node.Type.values().length; private static final int TYPE_CNT = Node.Type.toInt(Node.Type.CNT); private static final int TYPE_DIR = Node.Type.toInt(Node.Type.DIR); private static final int TYPE_REV = Node.Type.toInt(Node.Type.REV); private static final int TYPE_REL = Node.Type.toInt(Node.Type.REL); private static final int TYPE_SNP = Node.Type.toInt(Node.Type.SNP); private static final int TYPE_ORI = Node.Type.toInt(Node.Type.ORI); - public static long[] outdegreeTypes(final Graph graph, long node) { + public static long[] outdegreeTypes(final SwhBidirectionalGraph graph, long node) { long[] out = new long[NODE_ARRAY_SIZE]; var successors = graph.successors(node); long neighbor; while ((neighbor = successors.nextLong()) != -1) { out[Node.Type.toInt(graph.getNodeType(neighbor))]++; out[TYPE_ALL]++; } return out; } - public static long[] indegreeTypes(final Graph graph, long node) { + public static long[] indegreeTypes(final SwhBidirectionalGraph graph, long node) { return outdegreeTypes(graph.transpose(), node); } public static void writeDistribution(HashMap distribution, String filename) throws IOException { PrintWriter f = new PrintWriter(new FileWriter(filename)); TreeMap sortedDistribution = new TreeMap<>(distribution); for (Map.Entry entry : sortedDistribution.entrySet()) { f.println(entry.getKey() + " " + entry.getValue()); } f.close(); } - public static void run(final Graph graph, String resultsDir) throws IOException { + public static void run(final SwhBidirectionalGraph graph, String resultsDir) throws IOException { // Per-type var cnt_in_dir = new HashMap(); var dir_in_dir = new HashMap(); var dir_in_rev = new HashMap(); var dir_in_all = new HashMap(); var dir_out_all = new HashMap(); var dir_out_dir = new HashMap(); var dir_out_cnt = new HashMap(); var dir_out_rev = new HashMap(); var rev_in_dir = new HashMap(); var rev_in_rel = new HashMap(); var rev_in_rev = new HashMap(); var rev_in_snp = new HashMap(); var rev_in_all = new HashMap(); var rev_out_rev = new HashMap(); var rel_in_snp = new HashMap(); var snp_in_ori = new HashMap(); var snp_out_all = new HashMap(); var snp_out_rel = new HashMap(); var snp_out_rev = new HashMap(); var ori_out_snp = new HashMap(); // Aggregated per layer var full_in = new HashMap(); var full_out = new HashMap(); var dircnt_in = new HashMap(); var dircnt_out = new HashMap(); var orisnp_in = new HashMap(); var orisnp_out = new HashMap(); var relrev_in = new HashMap(); var relrev_out = new HashMap(); var rev_in = rev_in_rev; // alias for single-type layer var rev_out = rev_out_rev; final ProgressLogger pl = new ProgressLogger(); pl.itemsName = "nodes"; pl.expectedUpdates = graph.numNodes(); pl.start("Scanning..."); long[] in; long[] out; for (long i = graph.numNodes(); i-- != 0;) { long d_in = graph.indegree(i); long d_out = graph.outdegree(i); full_in.merge(d_in, 1L, Long::sum); full_out.merge(d_out, 1L, Long::sum); switch (graph.getNodeType(i)) { case CNT: cnt_in_dir.merge(d_in, 1L, Long::sum); dircnt_in.merge(d_in, 1L, Long::sum); dircnt_out.merge(0L, 1L, Long::sum); break; case DIR: in = indegreeTypes(graph, i); out = outdegreeTypes(graph, i); dir_in_all.merge(in[TYPE_ALL], 1L, Long::sum); dir_out_all.merge(out[TYPE_ALL], 1L, Long::sum); dir_in_dir.merge(in[TYPE_DIR], 1L, Long::sum); dir_in_rev.merge(in[TYPE_REV], 1L, Long::sum); dir_out_cnt.merge(out[TYPE_CNT], 1L, Long::sum); dir_out_dir.merge(out[TYPE_DIR], 1L, Long::sum); dir_out_rev.merge(out[TYPE_REV], 1L, Long::sum); dircnt_in.merge(in[TYPE_DIR] + in[TYPE_CNT], 1L, Long::sum); dircnt_out.merge(out[TYPE_DIR] + out[TYPE_CNT], 1L, Long::sum); break; case REV: in = indegreeTypes(graph, i); out = outdegreeTypes(graph, i); rev_in_all.merge(in[TYPE_ALL], 1L, Long::sum); rev_in_dir.merge(in[TYPE_DIR], 1L, Long::sum); rev_in_rev.merge(in[TYPE_REV], 1L, Long::sum); rev_in_rel.merge(in[TYPE_REL], 1L, Long::sum); rev_in_snp.merge(in[TYPE_SNP], 1L, Long::sum); rev_out_rev.merge(out[TYPE_REV], 1L, Long::sum); relrev_in.merge(in[TYPE_REL] + in[TYPE_REV], 1L, Long::sum); relrev_out.merge(out[TYPE_REL] + out[TYPE_REV], 1L, Long::sum); break; case REL: rel_in_snp.merge(d_in, 1L, Long::sum); relrev_in.merge(0L, 1L, Long::sum); relrev_out.merge(d_out, 1L, Long::sum); break; case SNP: out = outdegreeTypes(graph, i); snp_in_ori.merge(d_in, 1L, Long::sum); snp_out_all.merge(out[TYPE_ALL], 1L, Long::sum); snp_out_rel.merge(out[TYPE_REL], 1L, Long::sum); snp_out_rev.merge(out[TYPE_REV], 1L, Long::sum); orisnp_in.merge(d_in, 1L, Long::sum); orisnp_out.merge(out[TYPE_REL] + out[TYPE_REV], 1L, Long::sum); break; case ORI: ori_out_snp.merge(d_out, 1L, Long::sum); orisnp_in.merge(0L, 1L, Long::sum); orisnp_out.merge(d_out, 1L, Long::sum); break; default : pl.logger().warn("Invalid node type at pos {}", i); break; } pl.update(); } pl.done(); (new File(resultsDir)).mkdir(); writeDistribution(full_in, resultsDir + "/full_in.txt"); writeDistribution(full_out, resultsDir + "/full_out.txt"); writeDistribution(dircnt_in, resultsDir + "/dir+cnt_in.txt"); writeDistribution(dircnt_out, resultsDir + "/dir+cnt_out.txt"); writeDistribution(relrev_in, resultsDir + "/rel+rev_in.txt"); writeDistribution(relrev_out, resultsDir + "/rel+rev_out.txt"); writeDistribution(orisnp_in, resultsDir + "/ori+snp_in.txt"); writeDistribution(orisnp_out, resultsDir + "/ori+snp_out.txt"); writeDistribution(rev_in, resultsDir + "/rev_in.txt"); writeDistribution(rev_out, resultsDir + "/rev_out.txt"); String resultsTypeDir = resultsDir + "/per_type"; (new File(resultsTypeDir)).mkdir(); writeDistribution(cnt_in_dir, resultsTypeDir + "/cnt_in_dir.txt"); writeDistribution(dir_in_dir, resultsTypeDir + "/dir_in_dir.txt"); writeDistribution(dir_in_rev, resultsTypeDir + "/dir_in_rev.txt"); writeDistribution(dir_in_all, resultsTypeDir + "/dir_in_all.txt"); writeDistribution(dir_out_all, resultsTypeDir + "/dir_out_all.txt"); writeDistribution(dir_out_dir, resultsTypeDir + "/dir_out_dir.txt"); writeDistribution(dir_out_cnt, resultsTypeDir + "/dir_out_cnt.txt"); writeDistribution(dir_out_rev, resultsTypeDir + "/dir_out_rev.txt"); writeDistribution(rev_in_dir, resultsTypeDir + "/rev_in_dir.txt"); writeDistribution(rev_in_rel, resultsTypeDir + "/rev_in_rel.txt"); writeDistribution(rev_in_rev, resultsTypeDir + "/rev_in_rev.txt"); writeDistribution(rev_in_snp, resultsTypeDir + "/rev_in_snp.txt"); writeDistribution(rev_in_all, resultsTypeDir + "/rev_in_all.txt"); writeDistribution(rev_out_rev, resultsTypeDir + "/rev_out_rev.txt"); writeDistribution(rel_in_snp, resultsTypeDir + "/rel_in_snp.txt"); writeDistribution(snp_in_ori, resultsTypeDir + "/snp_in_ori.txt"); writeDistribution(snp_out_all, resultsTypeDir + "/snp_out_all.txt"); writeDistribution(snp_out_rel, resultsTypeDir + "/snp_out_rel.txt"); writeDistribution(snp_out_rev, resultsTypeDir + "/snp_out_rev.txt"); writeDistribution(ori_out_snp, resultsTypeDir + "/ori_out_snp.txt"); } static public void main(final String[] arg) throws IllegalArgumentException, SecurityException, IllegalAccessException, InvocationTargetException, NoSuchMethodException, JSAPException, IOException, ClassNotFoundException { final SimpleJSAP jsap = new SimpleJSAP(InOutDegree.class.getName(), "Computes in and out degrees of the given SWHGraph", new Parameter[]{ new UnflaggedOption("basename", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The basename of the graph."), new UnflaggedOption("resultsDir", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, JSAP.NOT_GREEDY, "The directory of the resulting files."),}); final JSAPResult jsapResult = jsap.parse(arg); if (jsap.messagePrinted()) System.exit(1); final String basename = jsapResult.getString("basename"); final String resultsDir = jsapResult.userSpecified("resultsDir") ? jsapResult.getString("resultsDir") : basename; final ProgressLogger pl = new ProgressLogger(); - Graph graph = Graph.loadMapped(basename); + SwhBidirectionalGraph graph = SwhBidirectionalGraph.loadMapped(basename); run(graph, resultsDir); } } diff --git a/java/src/main/java/org/softwareheritage/graph/experiments/topology/SubdatasetSizeFunction.java b/java/src/main/java/org/softwareheritage/graph/experiments/topology/SubdatasetSizeFunction.java index f897e00..3632d32 100644 --- a/java/src/main/java/org/softwareheritage/graph/experiments/topology/SubdatasetSizeFunction.java +++ b/java/src/main/java/org/softwareheritage/graph/experiments/topology/SubdatasetSizeFunction.java @@ -1,98 +1,98 @@ package org.softwareheritage.graph.experiments.topology; import com.google.common.primitives.Longs; import com.martiansoftware.jsap.*; import it.unimi.dsi.Util; import it.unimi.dsi.big.webgraph.LazyLongIterator; import it.unimi.dsi.bits.LongArrayBitVector; import it.unimi.dsi.fastutil.Arrays; import it.unimi.dsi.fastutil.BigArrays; import it.unimi.dsi.fastutil.longs.LongBigArrays; import it.unimi.dsi.io.ByteDiskQueue; import it.unimi.dsi.logging.ProgressLogger; import it.unimi.dsi.util.XoRoShiRo128PlusRandom; -import org.softwareheritage.graph.Graph; +import org.softwareheritage.graph.SwhBidirectionalGraph; import org.softwareheritage.graph.Node; import org.softwareheritage.graph.experiments.forks.ForkCC; import java.io.*; public class SubdatasetSizeFunction { private SubdatasetSizeFunction() { } - public static void run(final Graph graph) throws IOException { + public static void run(final SwhBidirectionalGraph graph) throws IOException { final ProgressLogger pl = new ProgressLogger(); pl.itemsName = "nodes"; pl.expectedUpdates = graph.numNodes(); long n = graph.numNodes(); LongArrayBitVector visited = LongArrayBitVector.ofLength(n); int bufferSize = (int) Math.min(Arrays.MAX_ARRAY_SIZE & ~0x7, 8L * n); final File queueFile = File.createTempFile(ForkCC.class.getSimpleName(), "queue"); final ByteDiskQueue queue = ByteDiskQueue.createNew(queueFile, bufferSize, true); final byte[] byteBuf = new byte[Long.BYTES]; long[][] randomPerm = Util.identity(graph.numNodes()); LongBigArrays.shuffle(randomPerm, new XoRoShiRo128PlusRandom()); long visitedNodes = 0; long visitedEdges = 0; long visitedOrigins = 0; long visitedContents = 0; pl.start("Running traversal starting from origins..."); for (long j = 0; j < n; ++j) { long i = BigArrays.get(randomPerm, j); if (visited.getBoolean(i) || graph.getNodeType(i) != Node.Type.ORI) { continue; } visitedOrigins++; queue.enqueue(Longs.toByteArray(i)); visited.set(i); while (!queue.isEmpty()) { queue.dequeue(byteBuf); final long currentNode = Longs.fromByteArray(byteBuf); visitedNodes++; if (graph.getNodeType(currentNode) == Node.Type.CNT) visitedContents++; final LazyLongIterator iterator = graph.successors(currentNode); long succ; while ((succ = iterator.nextLong()) != -1) { visitedEdges++; if (visited.getBoolean(succ)) continue; visited.set(succ); queue.enqueue(Longs.toByteArray(succ)); } pl.update(); } if (visitedOrigins % 10000 == 0) System.out.println(visitedNodes + " " + visitedEdges + " " + visitedContents); } pl.done(); } static public void main(final String[] arg) throws IllegalArgumentException, SecurityException, JSAPException, IOException { final SimpleJSAP jsap = new SimpleJSAP(SubdatasetSizeFunction.class.getName(), "Computes subdataset size functions using a random uniform order", new Parameter[]{new UnflaggedOption("basename", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The basename of the graph."),}); final JSAPResult jsapResult = jsap.parse(arg); if (jsap.messagePrinted()) System.exit(1); final String basename = jsapResult.getString("basename"); - Graph graph = Graph.loadMapped(basename); + SwhBidirectionalGraph graph = SwhBidirectionalGraph.loadMapped(basename); run(graph); } } diff --git a/java/src/main/java/org/softwareheritage/graph/maps/LabelMapBuilder.java b/java/src/main/java/org/softwareheritage/graph/maps/LabelMapBuilder.java index 04bde71..0ace4bd 100644 --- a/java/src/main/java/org/softwareheritage/graph/maps/LabelMapBuilder.java +++ b/java/src/main/java/org/softwareheritage/graph/maps/LabelMapBuilder.java @@ -1,527 +1,527 @@ package org.softwareheritage.graph.maps; import com.martiansoftware.jsap.*; import it.unimi.dsi.big.webgraph.LazyLongIterator; import it.unimi.dsi.big.webgraph.labelling.ArcLabelledImmutableGraph; import it.unimi.dsi.big.webgraph.labelling.BitStreamArcLabelledImmutableGraph; import it.unimi.dsi.fastutil.Size64; import it.unimi.dsi.fastutil.bytes.ByteArrays; import it.unimi.dsi.fastutil.io.FastBufferedInputStream; import it.unimi.dsi.fastutil.objects.Object2LongFunction; import it.unimi.dsi.io.OutputBitStream; import it.unimi.dsi.logging.ProgressLogger; import it.unimi.dsi.big.webgraph.BVGraph; import it.unimi.dsi.big.webgraph.ImmutableGraph; import it.unimi.dsi.big.webgraph.NodeIterator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.softwareheritage.graph.labels.DirEntry; import org.softwareheritage.graph.labels.SwhLabel; import java.io.*; import java.nio.ByteBuffer; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; import java.util.*; import java.util.concurrent.TimeUnit; public class LabelMapBuilder { final static String SORT_BUFFER_SIZE = "40%"; final static Logger logger = LoggerFactory.getLogger(LabelMapBuilder.class); String graphPath; String outputGraphPath; String debugPath; String tmpDir; ImmutableGraph graph; long numNodes; long numArcs; NodeIdMap nodeIdMap; Object2LongFunction filenameMph; long numFilenames; int totalLabelWidth; public LabelMapBuilder(String graphPath, String debugPath, String outputGraphPath, String tmpDir) throws IOException { this.graphPath = graphPath; if (outputGraphPath == null) { this.outputGraphPath = graphPath; } else { this.outputGraphPath = outputGraphPath; } this.debugPath = debugPath; this.tmpDir = tmpDir; // Load the graph in offline mode to retrieve the number of nodes/edges, // then immediately destroy it. XXX: not even needed? // ImmutableGraph graphOffline = BVGraph.loadMapped(graphPath); graph = BVGraph.loadMapped(graphPath); numArcs = graph.numArcs(); numNodes = graph.numNodes(); - nodeIdMap = new NodeIdMap(graphPath, numNodes); + nodeIdMap = new NodeIdMap(graphPath); filenameMph = NodeIdMap.loadMph(graphPath + "-labels.mph"); numFilenames = getMPHSize(filenameMph); totalLabelWidth = DirEntry.labelWidth(numFilenames); } private static JSAPResult parse_args(String[] args) { JSAPResult config = null; try { SimpleJSAP jsap = new SimpleJSAP(LabelMapBuilder.class.getName(), "", new Parameter[]{ new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'g', "graph", "Basename of the compressed graph"), new FlaggedOption("debugPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'd', "debug-path", "Store the intermediate representation here for debug"), new FlaggedOption("outputGraphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'o', "output-graph", "Basename of the output graph, same as --graph if not specified"), new FlaggedOption("tmpDir", JSAP.STRING_PARSER, "tmp", JSAP.NOT_REQUIRED, 't', "tmp", "Temporary directory path"),}); config = jsap.parse(args); if (jsap.messagePrinted()) { System.exit(1); } } catch (JSAPException e) { e.printStackTrace(); } return config; } public static void main(String[] args) throws IOException, InterruptedException { JSAPResult config = parse_args(args); String graphPath = config.getString("graphPath"); String outputGraphPath = config.getString("outputGraphPath"); String tmpDir = config.getString("tmpDir"); String debugPath = config.getString("debugPath"); LabelMapBuilder builder = new LabelMapBuilder(graphPath, debugPath, outputGraphPath, tmpDir); logger.info("Loading graph and MPH functions..."); builder.computeLabelMap(); } static long getMPHSize(Object2LongFunction mph) { return (mph instanceof Size64) ? ((Size64) mph).size64() : mph.size(); } void computeLabelMap() throws IOException, InterruptedException { this.loadGraph(); // this.computeLabelMapSort(); this.computeLabelMapBsort(); } void computeLabelMapSort() throws IOException { // Pass the intermediate representation to sort(1) so that we see the labels in the order they will // appear in the label file. ProcessBuilder processBuilder = new ProcessBuilder(); processBuilder.command("sort", "-k1,1n", "-k2,2n", // Numerical sort "--numeric-sort", "--buffer-size", SORT_BUFFER_SIZE, "--temporary-directory", tmpDir); Process sort = processBuilder.start(); BufferedOutputStream sort_stdin = new BufferedOutputStream(sort.getOutputStream()); // BufferedInputStream sort_stdout = new BufferedInputStream(sort.getInputStream()); FastBufferedInputStream sort_stdout = new FastBufferedInputStream(sort.getInputStream()); final FastBufferedInputStream fbis = new FastBufferedInputStream(System.in); hashLabelStream(fbis, new EdgeLabelLineWriter() { @Override public void writeLine(long src, long dst, long filenameId, int permission) throws IOException { sort_stdin.write((src + "\t" + dst + "\t" + filenameId + "\t" + permission + "\n") .getBytes(StandardCharsets.US_ASCII)); } }); sort_stdin.close(); EdgeLabelLineIterator mapLines = new TextualEdgeLabelLineIterator(sort_stdout); writeLabels(mapLines); logger.info("Done"); } void computeLabelMapBsort() throws IOException, InterruptedException { // Pass the intermediate representation to bsort(1) so that we see the labels in the order they will // appear in the label file. String tmpFile = tmpDir + "/labelsToSort.bin"; final FastBufferedInputStream fbis = new FastBufferedInputStream(System.in); final DataOutputStream out = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(tmpFile))); // Number of bytes to represent a node. final int nodeBytes = (Long.SIZE - Long.numberOfLeadingZeros(graph.numNodes())) / 8 + 1; ByteBuffer buffer = ByteBuffer.allocate(Long.BYTES); logger.info("Writing labels to a packed binary files (node bytes: {})", nodeBytes); hashLabelStream(fbis, new EdgeLabelLineWriter() { @Override public void writeLine(long src, long dst, long filenameId, int permission) throws IOException { buffer.putLong(0, src); out.write(buffer.array(), Long.BYTES - nodeBytes, nodeBytes); buffer.putLong(0, dst); out.write(buffer.array(), Long.BYTES - nodeBytes, nodeBytes); out.writeLong(filenameId); out.writeInt(permission); } }); ProcessBuilder processBuilder = new ProcessBuilder(); processBuilder.command("/home/seirl/bsort/src/bsort", "-v", "-r", String.valueOf(nodeBytes * 2 + Long.BYTES + Integer.BYTES), "-k", String.valueOf(nodeBytes * 2), tmpFile); Process sort = processBuilder.start(); sort.waitFor(); final DataInputStream sortedLabels = new DataInputStream(new BufferedInputStream(new FileInputStream(tmpFile))); BinaryEdgeLabelLineIterator mapLines = new BinaryEdgeLabelLineIterator(sortedLabels, nodeBytes); writeLabels(mapLines); logger.info("Done"); } void loadGraph() throws IOException { } void hashLabelStream(FastBufferedInputStream input, EdgeLabelLineWriter writer) throws IOException { // Compute intermediate representation and write it on : // "

* Java has a limit for mmap()-ed files because of unsupported 64-bit indexing. The * dsiutils ByteBufferInputStream is used to overcome * this Java limit. * * @author The Software Heritage developers */ public class MapFile { /** Memory-mapped file buffer */ ByteBufferInputStream bufferMap; /** Fixed line length of the mmap()-ed file */ int lineLength; /** * Constructor. * * @param path file path to mmap() * @param lineLength fixed length of a line in the file */ public MapFile(String path, int lineLength) throws IOException { this.bufferMap = null; this.lineLength = lineLength; try (RandomAccessFile mapFile = new RandomAccessFile(new File(path), "r")) { FileChannel fileChannel = mapFile.getChannel(); bufferMap = ByteBufferInputStream.map(fileChannel, FileChannel.MapMode.READ_ONLY); } } /** * Returns a specific line in the file. * * @param lineIndex line number in the file * @return the line at the specified position */ public byte[] readAtLine(long lineIndex) { byte[] buffer = new byte[lineLength]; long position = lineIndex * (long) lineLength; bufferMap.position(position); bufferMap.read(buffer, 0, lineLength); return buffer; } + public long size() { + return bufferMap.length() / (long) lineLength; + } + /** * Closes the mmap()-ed file. */ public void close() throws IOException { bufferMap.close(); } } diff --git a/java/src/main/java/org/softwareheritage/graph/maps/NodeIdMap.java b/java/src/main/java/org/softwareheritage/graph/maps/NodeIdMap.java index 2a2c50f..7db9c03 100644 --- a/java/src/main/java/org/softwareheritage/graph/maps/NodeIdMap.java +++ b/java/src/main/java/org/softwareheritage/graph/maps/NodeIdMap.java @@ -1,186 +1,182 @@ package org.softwareheritage.graph.maps; import it.unimi.dsi.fastutil.Size64; import it.unimi.dsi.fastutil.io.BinIO; import it.unimi.dsi.fastutil.longs.LongBigList; import it.unimi.dsi.fastutil.objects.Object2LongFunction; import it.unimi.dsi.util.ByteBufferLongBigList; import org.softwareheritage.graph.SWHID; import java.io.FileInputStream; import java.io.IOException; import java.nio.charset.StandardCharsets; /** * Mapping between internal long node id and external SWHID. *

* The SWHID -> node mapping is obtained from hashing the SWHID with a MPH, then permuting it using * an mmap()-ed .order file containing the graph permutation. * * The node -> SWHID reverse mapping is pre-computed and dumped on disk in the * {@link NodeMapBuilder} class, then it is loaded here using mmap(). * * @author The Software Heritage developers * @see NodeMapBuilder */ public class NodeIdMap { /** Fixed length of binary SWHID buffer */ public static final int SWHID_BIN_SIZE = 22; /** File extension for the long node id to SWHID map */ public static final String NODE_TO_SWHID = ".node2swhid.bin"; /** Graph path and basename */ String graphPath; - /** Number of ids to map */ - long nbIds; /** mmap()-ed NODE_TO_SWHID file */ MapFile nodeToSwhMap; /** Minimal perfect hash (MPH) function SWHID -> initial order */ Object2LongFunction mph; /** mmap()-ed long list with the permutation initial order -> graph order */ LongBigList orderMap; /** FileInputStream containing the permutation */ FileInputStream orderInputStream; /** * Constructor. * * @param graphPath full graph path - * @param nbNodes number of nodes in the graph */ - public NodeIdMap(String graphPath, long nbNodes) throws IOException { + public NodeIdMap(String graphPath) throws IOException { this.graphPath = graphPath; - this.nbIds = nbNodes; // node -> SWHID this.nodeToSwhMap = new MapFile(graphPath + NODE_TO_SWHID, SWHID_BIN_SIZE); // SWHID -> node this.mph = loadMph(graphPath + ".mph"); this.orderInputStream = new FileInputStream(graphPath + ".order"); this.orderMap = ByteBufferLongBigList.map(orderInputStream.getChannel()); } @SuppressWarnings("unchecked") public static Object2LongFunction loadMph(String path) throws IOException { Object obj; try { obj = BinIO.loadObject(path); } catch (ClassNotFoundException e) { throw new IOException(e.getMessage()); } Object2LongFunction res = (Object2LongFunction) obj; // Backward-compatibility for old maps parametrized with . // New maps should be parametrized with , which is faster. try { // Try to call it with bytes, will fail if it's a O2LF. res.getLong("42".getBytes(StandardCharsets.UTF_8)); } catch (ClassCastException e) { class StringCompatibleByteFunction implements Object2LongFunction, Size64 { private final Object2LongFunction legacyFunction; public StringCompatibleByteFunction(Object2LongFunction legacyFunction) { this.legacyFunction = legacyFunction; } @Override public long getLong(Object o) { byte[] bi = (byte[]) o; return legacyFunction.getLong(new String(bi, StandardCharsets.UTF_8)); } @Override public int size() { return legacyFunction.size(); } @Override public long size64() { return (legacyFunction instanceof Size64) ? ((Size64) legacyFunction).size64() : legacyFunction.size(); } } Object2LongFunction mphLegacy = (Object2LongFunction) obj; return new StringCompatibleByteFunction(mphLegacy); } // End of backward-compatibility block return res; } /** * Converts byte-form SWHID to corresponding long node id. Low-level function, does not check if the * SWHID is valid. * * @param swhid node represented as bytes * @return corresponding node as a long id */ public long getNodeId(byte[] swhid) { // 1. Hash the SWHID with the MPH to get its original ID long origNodeId = mph.getLong(swhid); // 2. Use the order permutation to get the position in the permuted graph return this.orderMap.getLong(origNodeId); } /** * Converts SWHID to corresponding long node id. * * @param swhid node represented as a {@link SWHID} * @param checkExists if true, error if the SWHID is not present in the graph, if false the check * will be skipped and invalid data will be returned for non-existing SWHIDs. * @return corresponding node as a long id * @see SWHID */ public long getNodeId(SWHID swhid, boolean checkExists) { // Convert the SWHID to bytes and call getNodeId() long nodeId = getNodeId(swhid.toString().getBytes(StandardCharsets.US_ASCII)); // Check that the position effectively corresponds to a real node using the reverse map. // This is necessary because the MPH makes no guarantees on whether the input SWHID is valid. if (!checkExists || getSWHID(nodeId).equals(swhid)) { return nodeId; } else { throw new IllegalArgumentException("Unknown SWHID: " + swhid); } } public long getNodeId(SWHID swhid) { return getNodeId(swhid, true); } /** * Converts a node long id to corresponding SWHID. * * @param nodeId node as a long id * @return corresponding node as a {@link SWHID} * @see SWHID */ public SWHID getSWHID(long nodeId) { /* * Each line in NODE_TO_SWHID is formatted as: swhid The file is ordered by nodeId, meaning node0's * swhid is at line 0, hence we can read the nodeId-th line to get corresponding swhid */ - if (nodeId < 0 || nodeId >= nbIds) { - throw new IllegalArgumentException("Node id " + nodeId + " should be between 0 and " + nbIds); + if (nodeId < 0 || nodeId >= nodeToSwhMap.size()) { + throw new IllegalArgumentException("Node id " + nodeId + " should be between 0 and " + nodeToSwhMap.size()); } return SWHID.fromBytes(nodeToSwhMap.readAtLine(nodeId)); } /** * Closes the mapping files. */ public void close() throws IOException { orderInputStream.close(); nodeToSwhMap.close(); } } diff --git a/java/src/main/java/org/softwareheritage/graph/server/App.java b/java/src/main/java/org/softwareheritage/graph/server/App.java index bb2ce5b..9849310 100644 --- a/java/src/main/java/org/softwareheritage/graph/server/App.java +++ b/java/src/main/java/org/softwareheritage/graph/server/App.java @@ -1,196 +1,196 @@ package org.softwareheritage.graph.server; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.PropertyNamingStrategy; import com.martiansoftware.jsap.*; import io.javalin.Javalin; import io.javalin.http.Context; import io.javalin.plugin.json.JavalinJackson; -import org.softwareheritage.graph.Graph; +import org.softwareheritage.graph.SwhBidirectionalGraph; import org.softwareheritage.graph.Stats; import org.softwareheritage.graph.SWHID; import java.io.IOException; import java.util.List; import java.util.Map; /** * Web framework of the swh-graph server RPC API. * * @author The Software Heritage developers */ public class App { /** * Main entrypoint. * * @param args command line arguments */ public static void main(String[] args) throws IOException, JSAPException { SimpleJSAP jsap = new SimpleJSAP(App.class.getName(), "Server to load and query a compressed graph representation of Software Heritage archive.", new Parameter[]{ new FlaggedOption("port", JSAP.INTEGER_PARSER, "5009", JSAP.NOT_REQUIRED, 'p', "port", "Binding port of the server."), new UnflaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The basename of the compressed graph."), new Switch("timings", 't', "timings", "Show timings in API result metadata."),}); JSAPResult config = jsap.parse(args); if (jsap.messagePrinted()) { System.exit(1); } String graphPath = config.getString("graphPath"); int port = config.getInt("port"); boolean showTimings = config.getBoolean("timings"); startServer(graphPath, port, showTimings); } /** * Loads compressed graph and starts the web server to query it. * * @param graphPath basename of the compressed graph * @param port binding port of the server * @param showTimings true if timings should be in results metadata, false otherwise */ private static void startServer(String graphPath, int port, boolean showTimings) throws IOException { - Graph graph = Graph.loadMapped(graphPath); + SwhBidirectionalGraph graph = SwhBidirectionalGraph.loadMapped(graphPath); Stats stats = new Stats(graphPath); // Clean up on exit Runtime.getRuntime().addShutdownHook(new Thread() { public void run() { try { - graph.cleanUp(); + graph.close(); } catch (IOException e) { System.out.println("Could not clean up graph on exit: " + e); } } }); // Configure Jackson JSON to use snake case naming style ObjectMapper objectMapper = JavalinJackson.getObjectMapper(); objectMapper.setPropertyNamingStrategy(PropertyNamingStrategy.SNAKE_CASE); JavalinJackson.configure(objectMapper); Javalin app = Javalin.create().start(port); app.before("/stats/*", ctx -> { checkQueryStrings(ctx, ""); }); app.before("/leaves/*", ctx -> { checkQueryStrings(ctx, "direction|edges"); }); app.before("/neighbors/*", ctx -> { checkQueryStrings(ctx, "direction|edges"); }); app.before("/visit/*", ctx -> { checkQueryStrings(ctx, "direction|edges"); }); app.before("/walk/*", ctx -> { checkQueryStrings(ctx, "direction|edges|traversal"); }); app.get("/stats/", ctx -> { ctx.json(stats); }); // Graph traversal endpoints // By default the traversal is a forward DFS using all edges app.get("/leaves/:src", ctx -> { SWHID src = new SWHID(ctx.pathParam("src")); String direction = ctx.queryParam("direction", "forward"); String edgesFmt = ctx.queryParam("edges", "*"); Endpoint endpoint = new Endpoint(graph, direction, edgesFmt); Endpoint.Output output = endpoint.leaves(new Endpoint.Input(src)); ctx.json(formatEndpointOutput(output, showTimings)); }); app.get("/neighbors/:src", ctx -> { SWHID src = new SWHID(ctx.pathParam("src")); String direction = ctx.queryParam("direction", "forward"); String edgesFmt = ctx.queryParam("edges", "*"); Endpoint endpoint = new Endpoint(graph, direction, edgesFmt); Endpoint.Output output = endpoint.neighbors(new Endpoint.Input(src)); ctx.json(formatEndpointOutput(output, showTimings)); }); app.get("/visit/nodes/:src", ctx -> { SWHID src = new SWHID(ctx.pathParam("src")); String direction = ctx.queryParam("direction", "forward"); String edgesFmt = ctx.queryParam("edges", "*"); Endpoint endpoint = new Endpoint(graph, direction, edgesFmt); Endpoint.Output output = endpoint.visitNodes(new Endpoint.Input(src)); ctx.json(formatEndpointOutput(output, showTimings)); }); app.get("/visit/paths/:src", ctx -> { SWHID src = new SWHID(ctx.pathParam("src")); String direction = ctx.queryParam("direction", "forward"); String edgesFmt = ctx.queryParam("edges", "*"); Endpoint endpoint = new Endpoint(graph, direction, edgesFmt); Endpoint.Output output = endpoint.visitPaths(new Endpoint.Input(src)); ctx.json(formatEndpointOutput(output, showTimings)); }); app.get("/walk/:src/:dst", ctx -> { SWHID src = new SWHID(ctx.pathParam("src")); String dstFmt = ctx.pathParam("dst"); String direction = ctx.queryParam("direction", "forward"); String edgesFmt = ctx.queryParam("edges", "*"); String algorithm = ctx.queryParam("traversal", "dfs"); Endpoint endpoint = new Endpoint(graph, direction, edgesFmt); Endpoint.Output output = endpoint.walk(new Endpoint.Input(src, dstFmt, algorithm)); ctx.json(formatEndpointOutput(output, showTimings)); }); app.exception(IllegalArgumentException.class, (e, ctx) -> { ctx.status(400); ctx.result(e.getMessage()); }); } /** * Checks query strings names provided to the RPC API. * * @param ctx Javalin HTTP request context * @param allowedFmt a regular expression describing allowed query strings names * @throws IllegalArgumentException unknown query string provided */ private static void checkQueryStrings(Context ctx, String allowedFmt) { Map> queryParamMap = ctx.queryParamMap(); for (String key : queryParamMap.keySet()) { if (!key.matches(allowedFmt)) { throw new IllegalArgumentException("Unknown query string: " + key); } } } /** * Formats endpoint result into final JSON for the RPC API. *

* Removes unwanted information if necessary, such as timings (to prevent use of side channels * attacks). * * @param output endpoint operation output which needs formatting * @param showTimings true if timings should be in results metadata, false otherwise * @return final Object with desired JSON format */ private static Object formatEndpointOutput(Endpoint.Output output, boolean showTimings) { if (showTimings) { return output; } else { Map metaNoTimings = Map.of("nb_edges_accessed", output.meta.nbEdgesAccessed); Map outputNoTimings = Map.of("result", output.result, "meta", metaNoTimings); return outputNoTimings; } } } diff --git a/java/src/main/java/org/softwareheritage/graph/server/Endpoint.java b/java/src/main/java/org/softwareheritage/graph/server/Endpoint.java index 97664d6..d1055e4 100644 --- a/java/src/main/java/org/softwareheritage/graph/server/Endpoint.java +++ b/java/src/main/java/org/softwareheritage/graph/server/Endpoint.java @@ -1,309 +1,309 @@ package org.softwareheritage.graph.server; import org.softwareheritage.graph.*; import org.softwareheritage.graph.benchmark.utils.Timing; import java.util.ArrayList; /** * RPC API endpoints wrapper functions. *

* Graph operations are segmented between high-level class (this one) and the low-level class * ({@link Traversal}). The {@link Endpoint} class creates wrappers for each endpoints by performing * all the input/output node ids conversions and logging timings. * * @author The Software Heritage developers * @see Traversal */ public class Endpoint { /** Graph where traversal endpoint is performed */ - Graph graph; + SwhBidirectionalGraph graph; /** Internal traversal API */ Traversal traversal; /** * Constructor. * * @param graph the graph used for traversal endpoint * @param direction a string (either "forward" or "backward") specifying edge orientation * @param edgesFmt a formatted string describing allowed * edges */ - public Endpoint(Graph graph, String direction, String edgesFmt) { + public Endpoint(SwhBidirectionalGraph graph, String direction, String edgesFmt) { this.graph = graph; this.traversal = new Traversal(graph, direction, edgesFmt); } /** * Converts a list of (internal) long node ids to a list of corresponding (external) SWHIDs. * * @param nodeIds the list of long node ids * @return a list of corresponding SWHIDs */ private ArrayList convertNodesToSWHIDs(ArrayList nodeIds) { ArrayList swhids = new ArrayList<>(); for (long nodeId : nodeIds) { swhids.add(graph.getSWHID(nodeId)); } return swhids; } /** * Converts a list of (internal) long node ids to the corresponding {@link SwhPath}. * * @param nodeIds the list of long node ids * @return the corresponding {@link SwhPath} * @see org.softwareheritage.graph.SwhPath */ private SwhPath convertNodesToSwhPath(ArrayList nodeIds) { SwhPath path = new SwhPath(); for (long nodeId : nodeIds) { path.add(graph.getSWHID(nodeId)); } return path; } /** * Converts a list of paths made of (internal) long node ids to one made of {@link SwhPath}-s. * * @param pathsNodeId the list of paths with long node ids * @return a list of corresponding {@link SwhPath} * @see org.softwareheritage.graph.SwhPath */ private ArrayList convertPathsToSWHIDs(ArrayList> pathsNodeId) { ArrayList paths = new ArrayList<>(); for (ArrayList path : pathsNodeId) { paths.add(convertNodesToSwhPath(path)); } return paths; } /** * Leaves endpoint wrapper. * * @param input input parameters for the underlying endpoint call * @return the resulting list of {@link SWHID} from endpoint call and operation metadata * @see SWHID * @see Traversal#leaves(long) */ public Output leaves(Input input) { Output> output = new Output<>(); long startTime; startTime = Timing.start(); long srcNodeId = graph.getNodeId(input.src); output.meta.timings.swhid2node = Timing.stop(startTime); startTime = Timing.start(); ArrayList nodeIds = traversal.leaves(srcNodeId); output.meta.timings.traversal = Timing.stop(startTime); output.meta.nbEdgesAccessed = traversal.getNbEdgesAccessed(); startTime = Timing.start(); output.result = convertNodesToSWHIDs(nodeIds); output.meta.timings.node2swhid = Timing.stop(startTime); return output; } /** * Neighbors endpoint wrapper. * * @param input input parameters for the underlying endpoint call * @return the resulting list of {@link SWHID} from endpoint call and operation metadata * @see SWHID * @see Traversal#neighbors(long) */ public Output neighbors(Input input) { Output> output = new Output<>(); long startTime; startTime = Timing.start(); long srcNodeId = graph.getNodeId(input.src); output.meta.timings.swhid2node = Timing.stop(startTime); startTime = Timing.start(); ArrayList nodeIds = traversal.neighbors(srcNodeId); output.meta.timings.traversal = Timing.stop(startTime); output.meta.nbEdgesAccessed = traversal.getNbEdgesAccessed(); startTime = Timing.start(); output.result = convertNodesToSWHIDs(nodeIds); output.meta.timings.node2swhid = Timing.stop(startTime); return output; } /** * Walk endpoint wrapper. * * @param input input parameters for the underlying endpoint call * @return the resulting {@link SwhPath} from endpoint call and operation metadata * @see SWHID * @see org.softwareheritage.graph.SwhPath * @see Traversal#walk */ public Output walk(Input input) { Output output = new Output<>(); long startTime; startTime = Timing.start(); long srcNodeId = graph.getNodeId(input.src); output.meta.timings.swhid2node = Timing.stop(startTime); ArrayList nodeIds = new ArrayList(); // Destination is either a SWHID or a node type try { SWHID dstSWHID = new SWHID(input.dstFmt); long dstNodeId = graph.getNodeId(dstSWHID); startTime = Timing.start(); nodeIds = traversal.walk(srcNodeId, dstNodeId, input.algorithm); output.meta.timings.traversal = Timing.stop(startTime); } catch (IllegalArgumentException ignored1) { try { Node.Type dstType = Node.Type.fromStr(input.dstFmt); startTime = Timing.start(); nodeIds = traversal.walk(srcNodeId, dstType, input.algorithm); output.meta.timings.traversal = Timing.stop(startTime); } catch (IllegalArgumentException ignored2) { } } output.meta.nbEdgesAccessed = traversal.getNbEdgesAccessed(); startTime = Timing.start(); output.result = convertNodesToSwhPath(nodeIds); output.meta.timings.node2swhid = Timing.stop(startTime); return output; } /** * VisitNodes endpoint wrapper. * * @param input input parameters for the underlying endpoint call * @return the resulting list of {@link SWHID} from endpoint call and operation metadata * @see SWHID * @see Traversal#visitNodes(long) */ public Output visitNodes(Input input) { Output> output = new Output<>(); long startTime; startTime = Timing.start(); long srcNodeId = graph.getNodeId(input.src); output.meta.timings.swhid2node = Timing.stop(startTime); startTime = Timing.start(); ArrayList nodeIds = traversal.visitNodes(srcNodeId); output.meta.timings.traversal = Timing.stop(startTime); output.meta.nbEdgesAccessed = traversal.getNbEdgesAccessed(); startTime = Timing.start(); output.result = convertNodesToSWHIDs(nodeIds); output.meta.timings.node2swhid = Timing.stop(startTime); return output; } /** * VisitPaths endpoint wrapper. * * @param input input parameters for the underlying endpoint call * @return the resulting list of {@link SwhPath} from endpoint call and operation metadata * @see SWHID * @see org.softwareheritage.graph.SwhPath * @see Traversal#visitPaths(long) */ public Output visitPaths(Input input) { Output> output = new Output<>(); long startTime; startTime = Timing.start(); long srcNodeId = graph.getNodeId(input.src); output.meta.timings.swhid2node = Timing.stop(startTime); startTime = Timing.start(); ArrayList> paths = traversal.visitPaths(srcNodeId); output.meta.timings.traversal = Timing.stop(startTime); output.meta.nbEdgesAccessed = traversal.getNbEdgesAccessed(); startTime = Timing.start(); output.result = convertPathsToSWHIDs(paths); output.meta.timings.node2swhid = Timing.stop(startTime); return output; } /** * Wrapper class to unify traversal methods input signatures. */ public static class Input { /** Source node of endpoint call specified as a {@link SWHID} */ public SWHID src; /** * Destination formatted string as described in the * API */ public String dstFmt; /** Traversal algorithm used in endpoint call (either "dfs" or "bfs") */ public String algorithm; public Input(SWHID src) { this.src = src; } public Input(SWHID src, String dstFmt, String algorithm) { this.src = src; this.dstFmt = dstFmt; this.algorithm = algorithm; } } /** * Wrapper class to return both the endpoint result and metadata (such as timings). */ public static class Output { /** The result content itself */ public T result; /** Various metadata about the result */ public Meta meta; public Output() { this.result = null; this.meta = new Meta(); } /** * Endpoint result metadata. */ public class Meta { /** Operations timings */ public Timings timings; /** Number of edges accessed during traversal */ public long nbEdgesAccessed; public Meta() { this.timings = new Timings(); this.nbEdgesAccessed = 0; } /** * Wrapper class for JSON output format. */ public class Timings { /** Time in seconds to do the traversal */ public double traversal; /** Time in seconds to convert input SWHID to node id */ public double swhid2node; /** Time in seconds to convert output node ids to SWHIDs */ public double node2swhid; } } } } diff --git a/java/src/main/java/org/softwareheritage/graph/utils/ExportSubdataset.java b/java/src/main/java/org/softwareheritage/graph/utils/ExportSubdataset.java index b88f8b6..0f09ccd 100644 --- a/java/src/main/java/org/softwareheritage/graph/utils/ExportSubdataset.java +++ b/java/src/main/java/org/softwareheritage/graph/utils/ExportSubdataset.java @@ -1,76 +1,76 @@ package org.softwareheritage.graph.utils; import com.google.common.primitives.Longs; import it.unimi.dsi.big.webgraph.LazyLongIterator; import it.unimi.dsi.bits.LongArrayBitVector; import it.unimi.dsi.fastutil.Arrays; import it.unimi.dsi.fastutil.objects.Object2LongFunction; import it.unimi.dsi.io.ByteDiskQueue; import it.unimi.dsi.io.FastBufferedReader; import it.unimi.dsi.io.LineIterator; -import org.softwareheritage.graph.Graph; +import org.softwareheritage.graph.SwhBidirectionalGraph; import org.softwareheritage.graph.SWHID; import org.softwareheritage.graph.experiments.topology.ConnectedComponents; import org.softwareheritage.graph.maps.NodeIdMap; import java.io.File; import java.io.IOException; import java.io.InputStreamReader; import java.nio.charset.StandardCharsets; public class ExportSubdataset { public static void main(String[] args) throws IOException, ClassNotFoundException { System.err.print("Loading everything..."); String graphPath = args[0]; - Graph graph = Graph.loadMapped(graphPath); + SwhBidirectionalGraph graph = SwhBidirectionalGraph.loadMapped(graphPath); Object2LongFunction mphMap = NodeIdMap.loadMph(graphPath + ".mph"); System.err.println(" done."); final long n = graph.numNodes(); // Allow enough memory to behave like in-memory queue int bufferSize = (int) Math.min(Arrays.MAX_ARRAY_SIZE & ~0x7, 8L * n); // Use a disk based queue to store BFS frontier final File queueFile = File.createTempFile(ConnectedComponents.class.getSimpleName(), "queue"); final ByteDiskQueue queue = ByteDiskQueue.createNew(queueFile, bufferSize, true); final byte[] byteBuf = new byte[Long.BYTES]; // WARNING: no 64-bit version of this data-structure, but it can support // indices up to 2^37 LongArrayBitVector visited = LongArrayBitVector.ofLength(n); FastBufferedReader buffer = new FastBufferedReader(new InputStreamReader(System.in, StandardCharsets.US_ASCII)); LineIterator lineIterator = new LineIterator(buffer); while (lineIterator.hasNext()) { String line = lineIterator.next().toString(); long i; try { // i = mphMap.getLong(line.getBytes(StandardCharsets.UTF_8)); i = graph.getNodeId(new SWHID(line)); } catch (IllegalArgumentException e) { continue; } queue.enqueue(Longs.toByteArray(i)); visited.set(i); while (!queue.isEmpty()) { queue.dequeue(byteBuf); final long currentNode = Longs.fromByteArray(byteBuf); SWHID currentNodeSWHID = graph.getSWHID(currentNode); final LazyLongIterator iterator = graph.successors(currentNode); long succ; while ((succ = iterator.nextLong()) != -1) { System.out.format("%s %s\n", currentNodeSWHID, graph.getSWHID(succ)); if (visited.getBoolean(succ)) continue; visited.set(succ); queue.enqueue(Longs.toByteArray(succ)); } } } } } diff --git a/java/src/main/java/org/softwareheritage/graph/utils/FindEarliestRevision.java b/java/src/main/java/org/softwareheritage/graph/utils/FindEarliestRevision.java index 71379f2..b7461a3 100644 --- a/java/src/main/java/org/softwareheritage/graph/utils/FindEarliestRevision.java +++ b/java/src/main/java/org/softwareheritage/graph/utils/FindEarliestRevision.java @@ -1,116 +1,113 @@ package org.softwareheritage.graph.utils; import it.unimi.dsi.big.webgraph.LazyLongIterator; import it.unimi.dsi.fastutil.BigArrays; import it.unimi.dsi.fastutil.io.BinIO; -import org.softwareheritage.graph.AllowedEdges; -import org.softwareheritage.graph.Graph; -import org.softwareheritage.graph.Node; -import org.softwareheritage.graph.SWHID; +import org.softwareheritage.graph.*; import java.io.IOException; import java.time.Duration; import java.util.HashSet; import java.util.Scanner; import java.util.Stack; /* sample invocation on granet.internal.softwareheritage.org for benchmarking * purposes, with the main swh-graph service already running: * * $ java -cp ~/swh-environment/swh-graph/java/target/swh-graph-0.3.0.jar -Xmx300G -XX:PretenureSizeThreshold=512M -XX:MaxNewSize=4G -XX:+UseLargePages -XX:+UseTransparentHugePages -XX:+UseNUMA -XX:+UseTLAB -XX:+ResizeTLAB org.softwareheritage.graph.utils.FindEarliestRevision --timing /dev/shm/swh-graph/default/graph * */ public class FindEarliestRevision { public static void main(String[] args) throws IOException, ClassNotFoundException { String graphPath = args[0]; boolean timing = false; long ts, elapsedNanos; Duration elapsed; if (args.length >= 2 && (args[0].equals("-t") || args[0].equals("--timing"))) { timing = true; graphPath = args[1]; System.err.println("started with timing option, will keep track of elapsed time"); } System.err.println("loading transposed graph..."); ts = System.nanoTime(); - Graph graph = Graph.loadMapped(graphPath).transpose(); + SwhBidirectionalGraph graph = SwhBidirectionalGraph.loadMapped(graphPath).transpose(); elapsed = Duration.ofNanos(System.nanoTime() - ts); System.err.println(String.format("transposed graph loaded (duration: %s).", elapsed)); System.err.println("loading revision timestamps..."); ts = System.nanoTime(); long[][] committerTimestamps = BinIO.loadLongsBig(graphPath + "-rev_committer_timestamps.bin"); elapsed = Duration.ofNanos(System.nanoTime() - ts); System.err.println(String.format("revision timestamps loaded (duration: %s).", elapsed)); Scanner stdin = new Scanner(System.in); AllowedEdges edges = new AllowedEdges("cnt:dir,dir:dir,dir:rev"); String rawSWHID = null; SWHID srcSWHID = null; long lineCount = 0; long srcNodeId = -1; if (timing) { System.err.println("starting SWHID processing..."); elapsed = Duration.ZERO; } while (stdin.hasNextLine()) { if (timing) ts = System.nanoTime(); rawSWHID = stdin.nextLine().strip(); lineCount++; try { srcSWHID = new SWHID(rawSWHID); srcNodeId = graph.getNodeId(srcSWHID); } catch (IllegalArgumentException e) { System.err .println(String.format("skipping invalid or unknown SWHID %s on line %d", rawSWHID, lineCount)); continue; } if (timing) System.err.println("starting traversal for: " + srcSWHID.toString()); Stack stack = new Stack<>(); HashSet visited = new HashSet<>(); stack.push(srcNodeId); visited.add(srcNodeId); long minRevId = -1; long minTimestamp = Long.MAX_VALUE; while (!stack.isEmpty()) { long currentNodeId = stack.pop(); if (graph.getNodeType(currentNodeId) == Node.Type.REV) { long committerTs = BigArrays.get(committerTimestamps, currentNodeId); if (committerTs < minTimestamp) { minRevId = currentNodeId; minTimestamp = committerTs; } } - LazyLongIterator it = graph.successors(currentNodeId, edges); + LazyLongIterator it = Traversal.filterSuccessors(graph, currentNodeId, edges); for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) { if (!visited.contains(neighborNodeId)) { stack.push(neighborNodeId); visited.add(neighborNodeId); } } } if (minRevId == -1) { System.err.println("no revision found containing: " + srcSWHID.toString()); } else { System.out.println(srcSWHID.toString() + "\t" + graph.getSWHID(minRevId).toString()); } if (timing) { elapsedNanos = System.nanoTime() - ts; // processing time for current SWHID elapsed = elapsed.plus(Duration.ofNanos(elapsedNanos)); // cumulative processing time for all SWHIDs System.err.println(String.format("visit time (s):\t%.6f", (double) elapsedNanos / 1_000_000_000)); } } if (timing) System.err.println(String.format("processed %d SWHIDs in %s (%s avg)", lineCount, elapsed, elapsed.dividedBy(lineCount))); } } diff --git a/java/src/main/java/org/softwareheritage/graph/utils/ReadGraph.java b/java/src/main/java/org/softwareheritage/graph/utils/ReadGraph.java index 545dc8f..ace8c94 100644 --- a/java/src/main/java/org/softwareheritage/graph/utils/ReadGraph.java +++ b/java/src/main/java/org/softwareheritage/graph/utils/ReadGraph.java @@ -1,27 +1,27 @@ package org.softwareheritage.graph.utils; import it.unimi.dsi.big.webgraph.ImmutableGraph; import it.unimi.dsi.big.webgraph.NodeIterator; import org.softwareheritage.graph.maps.NodeIdMap; import java.io.IOException; public class ReadGraph { public static void main(String[] args) throws IOException { String graphPath = args[0]; ImmutableGraph graph = ImmutableGraph.load(graphPath); - NodeIdMap nodeMap = new NodeIdMap(graphPath, graph.numNodes()); + NodeIdMap nodeMap = new NodeIdMap(graphPath); NodeIterator it = graph.nodeIterator(); while (it.hasNext()) { long srcNode = it.nextLong(); var s = it.successors(); long dstNode; while ((dstNode = s.nextLong()) >= 0) { System.out.format("%s %s\n", nodeMap.getSWHID(srcNode), nodeMap.getSWHID(dstNode)); } } } } diff --git a/java/src/main/java/org/softwareheritage/graph/utils/ReadLabelledGraph.java b/java/src/main/java/org/softwareheritage/graph/utils/ReadLabelledGraph.java index 4b04992..179edf1 100644 --- a/java/src/main/java/org/softwareheritage/graph/utils/ReadLabelledGraph.java +++ b/java/src/main/java/org/softwareheritage/graph/utils/ReadLabelledGraph.java @@ -1,40 +1,40 @@ package org.softwareheritage.graph.utils; import it.unimi.dsi.big.util.FrontCodedStringBigList; import it.unimi.dsi.big.webgraph.labelling.ArcLabelledImmutableGraph; import it.unimi.dsi.big.webgraph.labelling.ArcLabelledNodeIterator; import it.unimi.dsi.big.webgraph.labelling.BitStreamArcLabelledImmutableGraph; import it.unimi.dsi.fastutil.io.BinIO; import org.softwareheritage.graph.labels.DirEntry; import org.softwareheritage.graph.maps.NodeIdMap; import java.io.IOException; public class ReadLabelledGraph { public static void main(String[] args) throws IOException, ClassNotFoundException { String graphPath = args[0]; ArcLabelledImmutableGraph graph = BitStreamArcLabelledImmutableGraph.loadOffline(graphPath + "-labelled"); - NodeIdMap nodeMap = new NodeIdMap(graphPath, graph.numNodes()); + NodeIdMap nodeMap = new NodeIdMap(graphPath); FrontCodedStringBigList filenameMap = (FrontCodedStringBigList) BinIO.loadObject(graphPath + "-labels.fcl"); ArcLabelledNodeIterator it = graph.nodeIterator(); while (it.hasNext()) { long srcNode = it.nextLong(); ArcLabelledNodeIterator.LabelledArcIterator s = it.successors(); long dstNode; while ((dstNode = s.nextLong()) >= 0) { DirEntry[] labels = (DirEntry[]) s.label().get(); if (labels.length > 0) { for (DirEntry label : labels) { System.out.format("%s %s %s %d\n", nodeMap.getSWHID(srcNode), nodeMap.getSWHID(dstNode), filenameMap.get(label.filenameId), label.permission); } } else { System.out.format("%s %s\n", nodeMap.getSWHID(srcNode), nodeMap.getSWHID(dstNode)); } } } } } diff --git a/java/src/test/java/org/softwareheritage/graph/GraphTest.java b/java/src/test/java/org/softwareheritage/graph/GraphTest.java index fba8ed8..42c84a2 100644 --- a/java/src/test/java/org/softwareheritage/graph/GraphTest.java +++ b/java/src/test/java/org/softwareheritage/graph/GraphTest.java @@ -1,44 +1,44 @@ package org.softwareheritage.graph; import java.io.IOException; import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; import java.util.Collection; import java.util.Iterator; import it.unimi.dsi.big.webgraph.LazyLongIterator; import it.unimi.dsi.big.webgraph.LazyLongIterators; import org.hamcrest.MatcherAssert; import org.junit.jupiter.api.BeforeAll; import static org.hamcrest.collection.IsIterableContainingInAnyOrder.containsInAnyOrder; public class GraphTest { - static Graph graph; + static SwhBidirectionalGraph graph; @BeforeAll public static void setUp() throws IOException { Path graphPath = Paths.get("..", "swh", "graph", "tests", "dataset", "output", "example"); - graph = Graph.loadMapped(graphPath.toString()); + graph = SwhBidirectionalGraph.loadMapped(graphPath.toString()); } - public Graph getGraph() { + public SwhBidirectionalGraph getGraph() { return graph; } public static SWHID fakeSWHID(String type, int num) { return new SWHID(String.format("swh:1:%s:%040d", type, num)); } public static void assertEqualsAnyOrder(Collection expecteds, Collection actuals) { MatcherAssert.assertThat(expecteds, containsInAnyOrder(actuals.toArray())); } public static ArrayList lazyLongIteratorToList(LazyLongIterator input) { ArrayList inputList = new ArrayList<>(); Iterator inputIt = LazyLongIterators.eager(input); inputIt.forEachRemaining(inputList::add); return inputList; } } diff --git a/java/src/test/java/org/softwareheritage/graph/LeavesTest.java b/java/src/test/java/org/softwareheritage/graph/LeavesTest.java index a288d03..ae8765b 100644 --- a/java/src/test/java/org/softwareheritage/graph/LeavesTest.java +++ b/java/src/test/java/org/softwareheritage/graph/LeavesTest.java @@ -1,107 +1,107 @@ package org.softwareheritage.graph; import java.util.ArrayList; import org.junit.jupiter.api.Test; import org.softwareheritage.graph.server.Endpoint; // Avoid warnings concerning Endpoint.Output.result manual cast @SuppressWarnings("unchecked") public class LeavesTest extends GraphTest { @Test public void forwardFromSnp() { - Graph graph = getGraph(); + SwhBidirectionalGraph graph = getGraph(); SWHID src = new SWHID("swh:1:snp:0000000000000000000000000000000000000020"); Endpoint endpoint = new Endpoint(graph, "forward", "*"); ArrayList expectedLeaves = new ArrayList<>(); expectedLeaves.add(new SWHID("swh:1:cnt:0000000000000000000000000000000000000001")); expectedLeaves.add(new SWHID("swh:1:cnt:0000000000000000000000000000000000000004")); expectedLeaves.add(new SWHID("swh:1:cnt:0000000000000000000000000000000000000005")); expectedLeaves.add(new SWHID("swh:1:cnt:0000000000000000000000000000000000000007")); ArrayList actualLeaves = (ArrayList) endpoint.leaves(new Endpoint.Input(src)).result; GraphTest.assertEqualsAnyOrder(expectedLeaves, actualLeaves); } @Test public void forwardFromRel() { - Graph graph = getGraph(); + SwhBidirectionalGraph graph = getGraph(); SWHID src = new SWHID("swh:1:rel:0000000000000000000000000000000000000019"); Endpoint endpoint = new Endpoint(graph, "forward", "*"); ArrayList expectedLeaves = new ArrayList<>(); expectedLeaves.add(new SWHID("swh:1:cnt:0000000000000000000000000000000000000015")); expectedLeaves.add(new SWHID("swh:1:cnt:0000000000000000000000000000000000000014")); expectedLeaves.add(new SWHID("swh:1:cnt:0000000000000000000000000000000000000001")); expectedLeaves.add(new SWHID("swh:1:cnt:0000000000000000000000000000000000000004")); expectedLeaves.add(new SWHID("swh:1:cnt:0000000000000000000000000000000000000005")); expectedLeaves.add(new SWHID("swh:1:cnt:0000000000000000000000000000000000000007")); expectedLeaves.add(new SWHID("swh:1:cnt:0000000000000000000000000000000000000011")); ArrayList actualLeaves = (ArrayList) endpoint.leaves(new Endpoint.Input(src)).result; GraphTest.assertEqualsAnyOrder(expectedLeaves, actualLeaves); } @Test public void backwardFromLeaf() { - Graph graph = getGraph(); + SwhBidirectionalGraph graph = getGraph(); Endpoint endpoint1 = new Endpoint(graph, "backward", "*"); SWHID src1 = new SWHID("swh:1:cnt:0000000000000000000000000000000000000015"); ArrayList expectedLeaves1 = new ArrayList<>(); expectedLeaves1.add(new SWHID("swh:1:rel:0000000000000000000000000000000000000019")); ArrayList actualLeaves1 = (ArrayList) endpoint1.leaves(new Endpoint.Input(src1)).result; GraphTest.assertEqualsAnyOrder(expectedLeaves1, actualLeaves1); Endpoint endpoint2 = new Endpoint(graph, "backward", "*"); SWHID src2 = new SWHID("swh:1:cnt:0000000000000000000000000000000000000004"); ArrayList expectedLeaves2 = new ArrayList<>(); expectedLeaves2.add(new SWHID("swh:1:ori:0000000000000000000000000000000000000021")); expectedLeaves2.add(new SWHID("swh:1:rel:0000000000000000000000000000000000000019")); ArrayList actualLeaves2 = (ArrayList) endpoint2.leaves(new Endpoint.Input(src2)).result; GraphTest.assertEqualsAnyOrder(expectedLeaves2, actualLeaves2); } @Test public void forwardRevToRevOnly() { - Graph graph = getGraph(); + SwhBidirectionalGraph graph = getGraph(); SWHID src = new SWHID("swh:1:rev:0000000000000000000000000000000000000018"); Endpoint endpoint = new Endpoint(graph, "forward", "rev:rev"); ArrayList expectedLeaves = new ArrayList<>(); expectedLeaves.add(new SWHID("swh:1:rev:0000000000000000000000000000000000000003")); ArrayList actualLeaves = (ArrayList) endpoint.leaves(new Endpoint.Input(src)).result; GraphTest.assertEqualsAnyOrder(expectedLeaves, actualLeaves); } @Test public void forwardDirToAll() { - Graph graph = getGraph(); + SwhBidirectionalGraph graph = getGraph(); SWHID src = new SWHID("swh:1:dir:0000000000000000000000000000000000000008"); Endpoint endpoint = new Endpoint(graph, "forward", "dir:*"); ArrayList expectedLeaves = new ArrayList<>(); expectedLeaves.add(new SWHID("swh:1:cnt:0000000000000000000000000000000000000004")); expectedLeaves.add(new SWHID("swh:1:cnt:0000000000000000000000000000000000000005")); expectedLeaves.add(new SWHID("swh:1:cnt:0000000000000000000000000000000000000001")); expectedLeaves.add(new SWHID("swh:1:cnt:0000000000000000000000000000000000000007")); ArrayList actualLeaves = (ArrayList) endpoint.leaves(new Endpoint.Input(src)).result; GraphTest.assertEqualsAnyOrder(expectedLeaves, actualLeaves); } @Test public void backwardCntToDirDirToDir() { - Graph graph = getGraph(); + SwhBidirectionalGraph graph = getGraph(); SWHID src = new SWHID("swh:1:cnt:0000000000000000000000000000000000000005"); Endpoint endpoint = new Endpoint(graph, "backward", "cnt:dir,dir:dir"); ArrayList expectedLeaves = new ArrayList<>(); expectedLeaves.add(new SWHID("swh:1:dir:0000000000000000000000000000000000000012")); ArrayList actualLeaves = (ArrayList) endpoint.leaves(new Endpoint.Input(src)).result; GraphTest.assertEqualsAnyOrder(expectedLeaves, actualLeaves); } } diff --git a/java/src/test/java/org/softwareheritage/graph/NeighborsTest.java b/java/src/test/java/org/softwareheritage/graph/NeighborsTest.java index cf41aa4..86bbda9 100644 --- a/java/src/test/java/org/softwareheritage/graph/NeighborsTest.java +++ b/java/src/test/java/org/softwareheritage/graph/NeighborsTest.java @@ -1,141 +1,141 @@ package org.softwareheritage.graph; import java.util.ArrayList; import org.junit.jupiter.api.Test; import org.softwareheritage.graph.server.Endpoint; // Avoid warnings concerning Endpoint.Output.result manual cast @SuppressWarnings("unchecked") public class NeighborsTest extends GraphTest { @Test public void zeroNeighbor() { - Graph graph = getGraph(); + SwhBidirectionalGraph graph = getGraph(); ArrayList expectedNodes = new ArrayList<>(); SWHID src1 = new SWHID("swh:1:ori:0000000000000000000000000000000000000021"); Endpoint endpoint1 = new Endpoint(graph, "backward", "*"); ArrayList actuals1 = (ArrayList) endpoint1.neighbors(new Endpoint.Input(src1)).result; GraphTest.assertEqualsAnyOrder(expectedNodes, actuals1); SWHID src2 = new SWHID("swh:1:cnt:0000000000000000000000000000000000000004"); Endpoint endpoint2 = new Endpoint(graph, "forward", "*"); ArrayList actuals2 = (ArrayList) endpoint2.neighbors(new Endpoint.Input(src2)).result; GraphTest.assertEqualsAnyOrder(expectedNodes, actuals2); SWHID src3 = new SWHID("swh:1:cnt:0000000000000000000000000000000000000015"); Endpoint endpoint3 = new Endpoint(graph, "forward", "*"); ArrayList actuals3 = (ArrayList) endpoint3.neighbors(new Endpoint.Input(src3)).result; GraphTest.assertEqualsAnyOrder(expectedNodes, actuals3); SWHID src4 = new SWHID("swh:1:rel:0000000000000000000000000000000000000019"); Endpoint endpoint4 = new Endpoint(graph, "backward", "*"); ArrayList actuals4 = (ArrayList) endpoint4.neighbors(new Endpoint.Input(src4)).result; GraphTest.assertEqualsAnyOrder(expectedNodes, actuals4); SWHID src5 = new SWHID("swh:1:dir:0000000000000000000000000000000000000008"); Endpoint endpoint5 = new Endpoint(graph, "forward", "snp:*,rev:*,rel:*"); ArrayList actuals5 = (ArrayList) endpoint5.neighbors(new Endpoint.Input(src5)).result; GraphTest.assertEqualsAnyOrder(expectedNodes, actuals5); } @Test public void oneNeighbor() { - Graph graph = getGraph(); + SwhBidirectionalGraph graph = getGraph(); SWHID src1 = new SWHID("swh:1:rev:0000000000000000000000000000000000000003"); Endpoint endpoint1 = new Endpoint(graph, "forward", "*"); ArrayList expectedNodes1 = new ArrayList<>(); expectedNodes1.add(new SWHID("swh:1:dir:0000000000000000000000000000000000000002")); ArrayList actuals1 = (ArrayList) endpoint1.neighbors(new Endpoint.Input(src1)).result; GraphTest.assertEqualsAnyOrder(expectedNodes1, actuals1); SWHID src2 = new SWHID("swh:1:dir:0000000000000000000000000000000000000017"); Endpoint endpoint2 = new Endpoint(graph, "forward", "dir:cnt"); ArrayList expectedNodes2 = new ArrayList<>(); expectedNodes2.add(new SWHID("swh:1:cnt:0000000000000000000000000000000000000014")); ArrayList actuals2 = (ArrayList) endpoint2.neighbors(new Endpoint.Input(src2)).result; GraphTest.assertEqualsAnyOrder(expectedNodes2, actuals2); SWHID src3 = new SWHID("swh:1:dir:0000000000000000000000000000000000000012"); Endpoint endpoint3 = new Endpoint(graph, "backward", "*"); ArrayList expectedNodes3 = new ArrayList<>(); expectedNodes3.add(new SWHID("swh:1:rev:0000000000000000000000000000000000000013")); ArrayList actuals3 = (ArrayList) endpoint3.neighbors(new Endpoint.Input(src3)).result; GraphTest.assertEqualsAnyOrder(expectedNodes3, actuals3); SWHID src4 = new SWHID("swh:1:rev:0000000000000000000000000000000000000009"); Endpoint endpoint4 = new Endpoint(graph, "backward", "rev:rev"); ArrayList expectedNodes4 = new ArrayList<>(); expectedNodes4.add(new SWHID("swh:1:rev:0000000000000000000000000000000000000013")); ArrayList actuals4 = (ArrayList) endpoint4.neighbors(new Endpoint.Input(src4)).result; GraphTest.assertEqualsAnyOrder(expectedNodes4, actuals4); SWHID src5 = new SWHID("swh:1:snp:0000000000000000000000000000000000000020"); Endpoint endpoint5 = new Endpoint(graph, "backward", "*"); ArrayList expectedNodes5 = new ArrayList<>(); expectedNodes5.add(new SWHID("swh:1:ori:0000000000000000000000000000000000000021")); ArrayList actuals5 = (ArrayList) endpoint5.neighbors(new Endpoint.Input(src5)).result; GraphTest.assertEqualsAnyOrder(expectedNodes5, actuals5); } @Test public void twoNeighbors() { - Graph graph = getGraph(); + SwhBidirectionalGraph graph = getGraph(); SWHID src1 = new SWHID("swh:1:snp:0000000000000000000000000000000000000020"); Endpoint endpoint1 = new Endpoint(graph, "forward", "*"); ArrayList expectedNodes1 = new ArrayList<>(); expectedNodes1.add(new SWHID("swh:1:rel:0000000000000000000000000000000000000010")); expectedNodes1.add(new SWHID("swh:1:rev:0000000000000000000000000000000000000009")); ArrayList actuals1 = (ArrayList) endpoint1.neighbors(new Endpoint.Input(src1)).result; GraphTest.assertEqualsAnyOrder(expectedNodes1, actuals1); SWHID src2 = new SWHID("swh:1:dir:0000000000000000000000000000000000000008"); Endpoint endpoint2 = new Endpoint(graph, "forward", "dir:cnt"); ArrayList expectedNodes2 = new ArrayList<>(); expectedNodes2.add(new SWHID("swh:1:cnt:0000000000000000000000000000000000000001")); expectedNodes2.add(new SWHID("swh:1:cnt:0000000000000000000000000000000000000007")); ArrayList actuals2 = (ArrayList) endpoint2.neighbors(new Endpoint.Input(src2)).result; GraphTest.assertEqualsAnyOrder(expectedNodes2, actuals2); SWHID src3 = new SWHID("swh:1:cnt:0000000000000000000000000000000000000001"); Endpoint endpoint3 = new Endpoint(graph, "backward", "*"); ArrayList expectedNodes3 = new ArrayList<>(); expectedNodes3.add(new SWHID("swh:1:dir:0000000000000000000000000000000000000008")); expectedNodes3.add(new SWHID("swh:1:dir:0000000000000000000000000000000000000002")); ArrayList actuals3 = (ArrayList) endpoint3.neighbors(new Endpoint.Input(src3)).result; GraphTest.assertEqualsAnyOrder(expectedNodes3, actuals3); SWHID src4 = new SWHID("swh:1:rev:0000000000000000000000000000000000000009"); Endpoint endpoint4 = new Endpoint(graph, "backward", "rev:snp,rev:rel"); ArrayList expectedNodes4 = new ArrayList<>(); expectedNodes4.add(new SWHID("swh:1:snp:0000000000000000000000000000000000000020")); expectedNodes4.add(new SWHID("swh:1:rel:0000000000000000000000000000000000000010")); ArrayList actuals4 = (ArrayList) endpoint4.neighbors(new Endpoint.Input(src4)).result; GraphTest.assertEqualsAnyOrder(expectedNodes4, actuals4); } @Test public void threeNeighbors() { - Graph graph = getGraph(); + SwhBidirectionalGraph graph = getGraph(); SWHID src1 = new SWHID("swh:1:dir:0000000000000000000000000000000000000008"); Endpoint endpoint1 = new Endpoint(graph, "forward", "*"); ArrayList expectedNodes1 = new ArrayList<>(); expectedNodes1.add(new SWHID("swh:1:dir:0000000000000000000000000000000000000006")); expectedNodes1.add(new SWHID("swh:1:cnt:0000000000000000000000000000000000000001")); expectedNodes1.add(new SWHID("swh:1:cnt:0000000000000000000000000000000000000007")); ArrayList actuals1 = (ArrayList) endpoint1.neighbors(new Endpoint.Input(src1)).result; GraphTest.assertEqualsAnyOrder(expectedNodes1, actuals1); SWHID src2 = new SWHID("swh:1:rev:0000000000000000000000000000000000000009"); Endpoint endpoint2 = new Endpoint(graph, "backward", "*"); ArrayList expectedNodes2 = new ArrayList<>(); expectedNodes2.add(new SWHID("swh:1:snp:0000000000000000000000000000000000000020")); expectedNodes2.add(new SWHID("swh:1:rel:0000000000000000000000000000000000000010")); expectedNodes2.add(new SWHID("swh:1:rev:0000000000000000000000000000000000000013")); ArrayList actuals2 = (ArrayList) endpoint2.neighbors(new Endpoint.Input(src2)).result; GraphTest.assertEqualsAnyOrder(expectedNodes2, actuals2); } } diff --git a/java/src/test/java/org/softwareheritage/graph/SubgraphTest.java b/java/src/test/java/org/softwareheritage/graph/SubgraphTest.java index 1f95ebe..b561de9 100644 --- a/java/src/test/java/org/softwareheritage/graph/SubgraphTest.java +++ b/java/src/test/java/org/softwareheritage/graph/SubgraphTest.java @@ -1,85 +1,85 @@ package org.softwareheritage.graph; import java.util.*; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; public class SubgraphTest extends GraphTest { @Test public void noFilter() { - Graph g = getGraph(); + SwhBidirectionalGraph g = getGraph(); Subgraph sg = new Subgraph(g, new AllowedNodes("*")); for (long i = 0; i < g.numNodes(); ++i) { Assertions.assertEquals(g.outdegree(i), sg.outdegree(i)); } } @Test public void missingNode() { - Graph g = getGraph(); + SwhBidirectionalGraph g = getGraph(); Subgraph sg = new Subgraph(g, new AllowedNodes("dir,ori")); SWHID rev1 = fakeSWHID("rev", 18); Assertions.assertThrows(IllegalArgumentException.class, () -> { sg.outdegree(sg.getNodeId(rev1)); }); Assertions.assertThrows(IllegalArgumentException.class, () -> { sg.successors(sg.getNodeId(rev1)); }); } @Test public void outdegreeOnlyDirOri() { - Graph g = getGraph(); + SwhBidirectionalGraph g = getGraph(); Subgraph sg = new Subgraph(g, new AllowedNodes("dir,ori")); SWHID dir1 = fakeSWHID("dir", 17); Assertions.assertEquals(2, g.outdegree(g.getNodeId(dir1))); Assertions.assertEquals(1, sg.outdegree(sg.getNodeId(dir1))); SWHID dir2 = fakeSWHID("dir", 6); Assertions.assertEquals(2, g.outdegree(g.getNodeId(dir2))); Assertions.assertEquals(0, sg.outdegree(sg.getNodeId(dir2))); SWHID ori1 = fakeSWHID("ori", 21); Assertions.assertEquals(1, g.outdegree(g.getNodeId(ori1))); Assertions.assertEquals(0, sg.outdegree(sg.getNodeId(ori1))); } @Test public void successorsOnlyDirOri() { - Graph g = getGraph(); + SwhBidirectionalGraph g = getGraph(); Subgraph sg = new Subgraph(g, new AllowedNodes("dir,ori")); SWHID dir1 = fakeSWHID("dir", 17); assertEqualsAnyOrder(Collections.singletonList(sg.getNodeId(fakeSWHID("dir", 16))), lazyLongIteratorToList(sg.successors(sg.getNodeId(dir1)))); SWHID dir2 = fakeSWHID("dir", 6); assertEqualsAnyOrder(Collections.emptyList(), lazyLongIteratorToList(sg.successors(sg.getNodeId(dir2)))); SWHID ori1 = fakeSWHID("ori", 21); assertEqualsAnyOrder(Collections.emptyList(), lazyLongIteratorToList(sg.successors(sg.getNodeId(ori1)))); } @Test public void nodeIteratorOnlyOriDir() { - Graph g = getGraph(); + SwhBidirectionalGraph g = getGraph(); Subgraph sg = new Subgraph(g, new AllowedNodes("dir,ori")); ArrayList nodeList = new ArrayList<>(); Iterator nodeIt = sg.nodeIterator(); nodeIt.forEachRemaining(nodeList::add); assertEqualsAnyOrder(Arrays.asList(sg.getNodeId(fakeSWHID("ori", 21)), sg.getNodeId(fakeSWHID("dir", 2)), sg.getNodeId(fakeSWHID("dir", 6)), sg.getNodeId(fakeSWHID("dir", 8)), sg.getNodeId(fakeSWHID("dir", 12)), sg.getNodeId(fakeSWHID("dir", 16)), sg.getNodeId(fakeSWHID("dir", 17))), nodeList); sg = new Subgraph(g, new AllowedNodes("snp,rel")); nodeList = new ArrayList<>(); nodeIt = sg.nodeIterator(); nodeIt.forEachRemaining(nodeList::add); assertEqualsAnyOrder(Arrays.asList(sg.getNodeId(fakeSWHID("snp", 20)), sg.getNodeId(fakeSWHID("rel", 10)), sg.getNodeId(fakeSWHID("rel", 19))), nodeList); } } diff --git a/java/src/test/java/org/softwareheritage/graph/VisitTest.java b/java/src/test/java/org/softwareheritage/graph/VisitTest.java index de5e8af..6144890 100644 --- a/java/src/test/java/org/softwareheritage/graph/VisitTest.java +++ b/java/src/test/java/org/softwareheritage/graph/VisitTest.java @@ -1,420 +1,420 @@ package org.softwareheritage.graph; import java.util.ArrayList; import java.util.Set; import java.util.HashSet; import org.junit.jupiter.api.Test; import org.softwareheritage.graph.server.Endpoint; // Avoid warnings concerning Endpoint.Output.result manual cast @SuppressWarnings("unchecked") public class VisitTest extends GraphTest { private void assertSameNodesFromPaths(ArrayList paths, ArrayList nodes) { Set expectedNodes = new HashSet(); for (SwhPath path : paths) { expectedNodes.addAll(path.getPath()); } GraphTest.assertEqualsAnyOrder(expectedNodes, nodes); } @Test public void forwardFromRoot() { - Graph graph = getGraph(); + SwhBidirectionalGraph graph = getGraph(); SWHID swhid = new SWHID("swh:1:ori:0000000000000000000000000000000000000021"); Endpoint endpoint1 = new Endpoint(graph, "forward", "*"); ArrayList paths = (ArrayList) endpoint1.visitPaths(new Endpoint.Input(swhid)).result; Endpoint endpoint2 = new Endpoint(graph, "forward", "*"); ArrayList nodes = (ArrayList) endpoint2.visitNodes(new Endpoint.Input(swhid)).result; ArrayList expectedPaths = new ArrayList(); expectedPaths.add(new SwhPath("swh:1:ori:0000000000000000000000000000000000000021", "swh:1:snp:0000000000000000000000000000000000000020", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:cnt:0000000000000000000000000000000000000007")); expectedPaths.add(new SwhPath("swh:1:ori:0000000000000000000000000000000000000021", "swh:1:snp:0000000000000000000000000000000000000020", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:cnt:0000000000000000000000000000000000000001")); expectedPaths.add(new SwhPath("swh:1:ori:0000000000000000000000000000000000000021", "swh:1:snp:0000000000000000000000000000000000000020", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:dir:0000000000000000000000000000000000000006", "swh:1:cnt:0000000000000000000000000000000000000004")); expectedPaths.add(new SwhPath("swh:1:ori:0000000000000000000000000000000000000021", "swh:1:snp:0000000000000000000000000000000000000020", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:dir:0000000000000000000000000000000000000006", "swh:1:cnt:0000000000000000000000000000000000000005")); expectedPaths.add(new SwhPath("swh:1:ori:0000000000000000000000000000000000000021", "swh:1:snp:0000000000000000000000000000000000000020", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:rev:0000000000000000000000000000000000000003", "swh:1:dir:0000000000000000000000000000000000000002", "swh:1:cnt:0000000000000000000000000000000000000001")); expectedPaths.add(new SwhPath("swh:1:ori:0000000000000000000000000000000000000021", "swh:1:snp:0000000000000000000000000000000000000020", "swh:1:rel:0000000000000000000000000000000000000010", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:cnt:0000000000000000000000000000000000000007")); expectedPaths.add(new SwhPath("swh:1:ori:0000000000000000000000000000000000000021", "swh:1:snp:0000000000000000000000000000000000000020", "swh:1:rel:0000000000000000000000000000000000000010", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:cnt:0000000000000000000000000000000000000001")); expectedPaths.add(new SwhPath("swh:1:ori:0000000000000000000000000000000000000021", "swh:1:snp:0000000000000000000000000000000000000020", "swh:1:rel:0000000000000000000000000000000000000010", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:dir:0000000000000000000000000000000000000006", "swh:1:cnt:0000000000000000000000000000000000000004")); expectedPaths.add(new SwhPath("swh:1:ori:0000000000000000000000000000000000000021", "swh:1:snp:0000000000000000000000000000000000000020", "swh:1:rel:0000000000000000000000000000000000000010", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:dir:0000000000000000000000000000000000000006", "swh:1:cnt:0000000000000000000000000000000000000005")); expectedPaths.add(new SwhPath("swh:1:ori:0000000000000000000000000000000000000021", "swh:1:snp:0000000000000000000000000000000000000020", "swh:1:rel:0000000000000000000000000000000000000010", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:rev:0000000000000000000000000000000000000003", "swh:1:dir:0000000000000000000000000000000000000002", "swh:1:cnt:0000000000000000000000000000000000000001")); GraphTest.assertEqualsAnyOrder(expectedPaths, paths); assertSameNodesFromPaths(expectedPaths, nodes); } @Test public void forwardFromMiddle() { - Graph graph = getGraph(); + SwhBidirectionalGraph graph = getGraph(); SWHID swhid = new SWHID("swh:1:dir:0000000000000000000000000000000000000012"); Endpoint endpoint1 = new Endpoint(graph, "forward", "*"); ArrayList paths = (ArrayList) endpoint1.visitPaths(new Endpoint.Input(swhid)).result; Endpoint endpoint2 = new Endpoint(graph, "forward", "*"); ArrayList nodes = (ArrayList) endpoint2.visitNodes(new Endpoint.Input(swhid)).result; ArrayList expectedPaths = new ArrayList(); expectedPaths.add(new SwhPath("swh:1:dir:0000000000000000000000000000000000000012", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:cnt:0000000000000000000000000000000000000007")); expectedPaths.add(new SwhPath("swh:1:dir:0000000000000000000000000000000000000012", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:cnt:0000000000000000000000000000000000000001")); expectedPaths.add(new SwhPath("swh:1:dir:0000000000000000000000000000000000000012", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:dir:0000000000000000000000000000000000000006", "swh:1:cnt:0000000000000000000000000000000000000004")); expectedPaths.add(new SwhPath("swh:1:dir:0000000000000000000000000000000000000012", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:dir:0000000000000000000000000000000000000006", "swh:1:cnt:0000000000000000000000000000000000000005")); expectedPaths.add(new SwhPath("swh:1:dir:0000000000000000000000000000000000000012", "swh:1:cnt:0000000000000000000000000000000000000011")); GraphTest.assertEqualsAnyOrder(expectedPaths, paths); assertSameNodesFromPaths(expectedPaths, nodes); } @Test public void forwardFromLeaf() { - Graph graph = getGraph(); + SwhBidirectionalGraph graph = getGraph(); SWHID swhid = new SWHID("swh:1:cnt:0000000000000000000000000000000000000004"); Endpoint endpoint1 = new Endpoint(graph, "forward", "*"); ArrayList paths = (ArrayList) endpoint1.visitPaths(new Endpoint.Input(swhid)).result; Endpoint endpoint2 = new Endpoint(graph, "forward", "*"); ArrayList nodes = (ArrayList) endpoint2.visitNodes(new Endpoint.Input(swhid)).result; ArrayList expectedPaths = new ArrayList(); expectedPaths.add(new SwhPath("swh:1:cnt:0000000000000000000000000000000000000004")); GraphTest.assertEqualsAnyOrder(expectedPaths, paths); assertSameNodesFromPaths(expectedPaths, nodes); } @Test public void backwardFromRoot() { - Graph graph = getGraph(); + SwhBidirectionalGraph graph = getGraph(); SWHID swhid = new SWHID("swh:1:ori:0000000000000000000000000000000000000021"); Endpoint endpoint1 = new Endpoint(graph, "backward", "*"); ArrayList paths = (ArrayList) endpoint1.visitPaths(new Endpoint.Input(swhid)).result; Endpoint endpoint2 = new Endpoint(graph, "backward", "*"); ArrayList nodes = (ArrayList) endpoint2.visitNodes(new Endpoint.Input(swhid)).result; ArrayList expectedPaths = new ArrayList(); expectedPaths.add(new SwhPath("swh:1:ori:0000000000000000000000000000000000000021")); GraphTest.assertEqualsAnyOrder(expectedPaths, paths); assertSameNodesFromPaths(expectedPaths, nodes); } @Test public void backwardFromMiddle() { - Graph graph = getGraph(); + SwhBidirectionalGraph graph = getGraph(); SWHID swhid = new SWHID("swh:1:dir:0000000000000000000000000000000000000012"); Endpoint endpoint1 = new Endpoint(graph, "backward", "*"); ArrayList paths = (ArrayList) endpoint1.visitPaths(new Endpoint.Input(swhid)).result; Endpoint endpoint2 = new Endpoint(graph, "backward", "*"); ArrayList nodes = (ArrayList) endpoint2.visitNodes(new Endpoint.Input(swhid)).result; ArrayList expectedPaths = new ArrayList(); expectedPaths.add(new SwhPath("swh:1:dir:0000000000000000000000000000000000000012", "swh:1:rev:0000000000000000000000000000000000000013", "swh:1:rev:0000000000000000000000000000000000000018", "swh:1:rel:0000000000000000000000000000000000000019")); GraphTest.assertEqualsAnyOrder(expectedPaths, paths); assertSameNodesFromPaths(expectedPaths, nodes); } @Test public void backwardFromLeaf() { - Graph graph = getGraph(); + SwhBidirectionalGraph graph = getGraph(); SWHID swhid = new SWHID("swh:1:cnt:0000000000000000000000000000000000000004"); Endpoint endpoint1 = new Endpoint(graph, "backward", "*"); ArrayList paths = (ArrayList) endpoint1.visitPaths(new Endpoint.Input(swhid)).result; Endpoint endpoint2 = new Endpoint(graph, "backward", "*"); ArrayList nodes = (ArrayList) endpoint2.visitNodes(new Endpoint.Input(swhid)).result; ArrayList expectedPaths = new ArrayList(); expectedPaths.add(new SwhPath("swh:1:cnt:0000000000000000000000000000000000000004", "swh:1:dir:0000000000000000000000000000000000000006", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:dir:0000000000000000000000000000000000000012", "swh:1:rev:0000000000000000000000000000000000000013", "swh:1:rev:0000000000000000000000000000000000000018", "swh:1:rel:0000000000000000000000000000000000000019")); expectedPaths.add(new SwhPath("swh:1:cnt:0000000000000000000000000000000000000004", "swh:1:dir:0000000000000000000000000000000000000006", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:rev:0000000000000000000000000000000000000013", "swh:1:rev:0000000000000000000000000000000000000018", "swh:1:rel:0000000000000000000000000000000000000019")); expectedPaths.add(new SwhPath("swh:1:cnt:0000000000000000000000000000000000000004", "swh:1:dir:0000000000000000000000000000000000000006", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:snp:0000000000000000000000000000000000000020", "swh:1:ori:0000000000000000000000000000000000000021")); expectedPaths.add(new SwhPath("swh:1:cnt:0000000000000000000000000000000000000004", "swh:1:dir:0000000000000000000000000000000000000006", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:rel:0000000000000000000000000000000000000010", "swh:1:snp:0000000000000000000000000000000000000020", "swh:1:ori:0000000000000000000000000000000000000021")); GraphTest.assertEqualsAnyOrder(expectedPaths, paths); assertSameNodesFromPaths(expectedPaths, nodes); } @Test public void forwardSnpToRev() { - Graph graph = getGraph(); + SwhBidirectionalGraph graph = getGraph(); SWHID swhid = new SWHID("swh:1:snp:0000000000000000000000000000000000000020"); Endpoint endpoint1 = new Endpoint(graph, "forward", "snp:rev"); ArrayList paths = (ArrayList) endpoint1.visitPaths(new Endpoint.Input(swhid)).result; Endpoint endpoint2 = new Endpoint(graph, "forward", "snp:rev"); ArrayList nodes = (ArrayList) endpoint2.visitNodes(new Endpoint.Input(swhid)).result; ArrayList expectedPaths = new ArrayList(); expectedPaths.add(new SwhPath("swh:1:snp:0000000000000000000000000000000000000020", "swh:1:rev:0000000000000000000000000000000000000009")); GraphTest.assertEqualsAnyOrder(expectedPaths, paths); assertSameNodesFromPaths(expectedPaths, nodes); } @Test public void forwardRelToRevRevToRev() { - Graph graph = getGraph(); + SwhBidirectionalGraph graph = getGraph(); SWHID swhid = new SWHID("swh:1:rel:0000000000000000000000000000000000000010"); Endpoint endpoint1 = new Endpoint(graph, "forward", "rel:rev,rev:rev"); ArrayList paths = (ArrayList) endpoint1.visitPaths(new Endpoint.Input(swhid)).result; Endpoint endpoint2 = new Endpoint(graph, "forward", "rel:rev,rev:rev"); ArrayList nodes = (ArrayList) endpoint2.visitNodes(new Endpoint.Input(swhid)).result; ArrayList expectedPaths = new ArrayList(); expectedPaths.add(new SwhPath("swh:1:rel:0000000000000000000000000000000000000010", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:rev:0000000000000000000000000000000000000003")); GraphTest.assertEqualsAnyOrder(expectedPaths, paths); assertSameNodesFromPaths(expectedPaths, nodes); } @Test public void forwardRevToAllDirToAll() { - Graph graph = getGraph(); + SwhBidirectionalGraph graph = getGraph(); SWHID swhid = new SWHID("swh:1:rev:0000000000000000000000000000000000000013"); Endpoint endpoint1 = new Endpoint(graph, "forward", "rev:*,dir:*"); ArrayList paths = (ArrayList) endpoint1.visitPaths(new Endpoint.Input(swhid)).result; Endpoint endpoint2 = new Endpoint(graph, "forward", "rev:*,dir:*"); ArrayList nodes = (ArrayList) endpoint2.visitNodes(new Endpoint.Input(swhid)).result; ArrayList expectedPaths = new ArrayList(); expectedPaths.add(new SwhPath("swh:1:rev:0000000000000000000000000000000000000013", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:dir:0000000000000000000000000000000000000006", "swh:1:cnt:0000000000000000000000000000000000000005")); expectedPaths.add(new SwhPath("swh:1:rev:0000000000000000000000000000000000000013", "swh:1:dir:0000000000000000000000000000000000000012", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:dir:0000000000000000000000000000000000000006", "swh:1:cnt:0000000000000000000000000000000000000005")); expectedPaths.add(new SwhPath("swh:1:rev:0000000000000000000000000000000000000013", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:dir:0000000000000000000000000000000000000006", "swh:1:cnt:0000000000000000000000000000000000000004")); expectedPaths.add(new SwhPath("swh:1:rev:0000000000000000000000000000000000000013", "swh:1:dir:0000000000000000000000000000000000000012", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:dir:0000000000000000000000000000000000000006", "swh:1:cnt:0000000000000000000000000000000000000004")); expectedPaths.add(new SwhPath("swh:1:rev:0000000000000000000000000000000000000013", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:cnt:0000000000000000000000000000000000000007")); expectedPaths.add(new SwhPath("swh:1:rev:0000000000000000000000000000000000000013", "swh:1:dir:0000000000000000000000000000000000000012", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:cnt:0000000000000000000000000000000000000007")); expectedPaths.add(new SwhPath("swh:1:rev:0000000000000000000000000000000000000013", "swh:1:dir:0000000000000000000000000000000000000012", "swh:1:cnt:0000000000000000000000000000000000000011")); expectedPaths.add(new SwhPath("swh:1:rev:0000000000000000000000000000000000000013", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:rev:0000000000000000000000000000000000000003", "swh:1:dir:0000000000000000000000000000000000000002", "swh:1:cnt:0000000000000000000000000000000000000001")); expectedPaths.add(new SwhPath("swh:1:rev:0000000000000000000000000000000000000013", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:cnt:0000000000000000000000000000000000000001")); expectedPaths.add(new SwhPath("swh:1:rev:0000000000000000000000000000000000000013", "swh:1:dir:0000000000000000000000000000000000000012", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:cnt:0000000000000000000000000000000000000001")); GraphTest.assertEqualsAnyOrder(expectedPaths, paths); assertSameNodesFromPaths(expectedPaths, nodes); } @Test public void forwardSnpToAllRevToAll() { - Graph graph = getGraph(); + SwhBidirectionalGraph graph = getGraph(); SWHID swhid = new SWHID("swh:1:snp:0000000000000000000000000000000000000020"); Endpoint endpoint1 = new Endpoint(graph, "forward", "snp:*,rev:*"); ArrayList paths = (ArrayList) endpoint1.visitPaths(new Endpoint.Input(swhid)).result; Endpoint endpoint2 = new Endpoint(graph, "forward", "snp:*,rev:*"); ArrayList nodes = (ArrayList) endpoint2.visitNodes(new Endpoint.Input(swhid)).result; ArrayList expectedPaths = new ArrayList(); expectedPaths.add(new SwhPath("swh:1:snp:0000000000000000000000000000000000000020", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:rev:0000000000000000000000000000000000000003", "swh:1:dir:0000000000000000000000000000000000000002")); expectedPaths.add(new SwhPath("swh:1:snp:0000000000000000000000000000000000000020", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:dir:0000000000000000000000000000000000000008")); expectedPaths.add(new SwhPath("swh:1:snp:0000000000000000000000000000000000000020", "swh:1:rel:0000000000000000000000000000000000000010")); GraphTest.assertEqualsAnyOrder(expectedPaths, paths); assertSameNodesFromPaths(expectedPaths, nodes); } @Test public void forwardNoEdges() { - Graph graph = getGraph(); + SwhBidirectionalGraph graph = getGraph(); SWHID swhid = new SWHID("swh:1:snp:0000000000000000000000000000000000000020"); Endpoint endpoint1 = new Endpoint(graph, "forward", ""); ArrayList paths = (ArrayList) endpoint1.visitPaths(new Endpoint.Input(swhid)).result; Endpoint endpoint2 = new Endpoint(graph, "forward", ""); ArrayList nodes = (ArrayList) endpoint2.visitNodes(new Endpoint.Input(swhid)).result; ArrayList expectedPaths = new ArrayList(); expectedPaths.add(new SwhPath("swh:1:snp:0000000000000000000000000000000000000020")); GraphTest.assertEqualsAnyOrder(expectedPaths, paths); assertSameNodesFromPaths(expectedPaths, nodes); } @Test public void backwardRevToRevRevToRel() { - Graph graph = getGraph(); + SwhBidirectionalGraph graph = getGraph(); SWHID swhid = new SWHID("swh:1:rev:0000000000000000000000000000000000000003"); Endpoint endpoint1 = new Endpoint(graph, "backward", "rev:rev,rev:rel"); ArrayList paths = (ArrayList) endpoint1.visitPaths(new Endpoint.Input(swhid)).result; Endpoint endpoint2 = new Endpoint(graph, "backward", "rev:rev,rev:rel"); ArrayList nodes = (ArrayList) endpoint2.visitNodes(new Endpoint.Input(swhid)).result; ArrayList expectedPaths = new ArrayList(); expectedPaths.add(new SwhPath("swh:1:rev:0000000000000000000000000000000000000003", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:rev:0000000000000000000000000000000000000013", "swh:1:rev:0000000000000000000000000000000000000018", "swh:1:rel:0000000000000000000000000000000000000019")); expectedPaths.add(new SwhPath("swh:1:rev:0000000000000000000000000000000000000003", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:rel:0000000000000000000000000000000000000010")); GraphTest.assertEqualsAnyOrder(expectedPaths, paths); assertSameNodesFromPaths(expectedPaths, nodes); } @Test public void forwardFromRootNodesOnly() { - Graph graph = getGraph(); + SwhBidirectionalGraph graph = getGraph(); SWHID swhid = new SWHID("swh:1:ori:0000000000000000000000000000000000000021"); Endpoint endpoint = new Endpoint(graph, "forward", "*"); ArrayList nodes = (ArrayList) endpoint.visitNodes(new Endpoint.Input(swhid)).result; ArrayList expectedNodes = new ArrayList(); expectedNodes.add(new SWHID("swh:1:ori:0000000000000000000000000000000000000021")); expectedNodes.add(new SWHID("swh:1:snp:0000000000000000000000000000000000000020")); expectedNodes.add(new SWHID("swh:1:rel:0000000000000000000000000000000000000010")); expectedNodes.add(new SWHID("swh:1:rev:0000000000000000000000000000000000000009")); expectedNodes.add(new SWHID("swh:1:rev:0000000000000000000000000000000000000003")); expectedNodes.add(new SWHID("swh:1:dir:0000000000000000000000000000000000000002")); expectedNodes.add(new SWHID("swh:1:cnt:0000000000000000000000000000000000000001")); expectedNodes.add(new SWHID("swh:1:dir:0000000000000000000000000000000000000008")); expectedNodes.add(new SWHID("swh:1:dir:0000000000000000000000000000000000000006")); expectedNodes.add(new SWHID("swh:1:cnt:0000000000000000000000000000000000000004")); expectedNodes.add(new SWHID("swh:1:cnt:0000000000000000000000000000000000000005")); expectedNodes.add(new SWHID("swh:1:cnt:0000000000000000000000000000000000000007")); GraphTest.assertEqualsAnyOrder(expectedNodes, nodes); } @Test public void backwardRevToAllNodesOnly() { - Graph graph = getGraph(); + SwhBidirectionalGraph graph = getGraph(); SWHID swhid = new SWHID("swh:1:rev:0000000000000000000000000000000000000003"); Endpoint endpoint = new Endpoint(graph, "backward", "rev:*"); ArrayList nodes = (ArrayList) endpoint.visitNodes(new Endpoint.Input(swhid)).result; ArrayList expectedNodes = new ArrayList(); expectedNodes.add(new SWHID("swh:1:rev:0000000000000000000000000000000000000003")); expectedNodes.add(new SWHID("swh:1:rev:0000000000000000000000000000000000000009")); expectedNodes.add(new SWHID("swh:1:snp:0000000000000000000000000000000000000020")); expectedNodes.add(new SWHID("swh:1:rel:0000000000000000000000000000000000000010")); expectedNodes.add(new SWHID("swh:1:rev:0000000000000000000000000000000000000013")); expectedNodes.add(new SWHID("swh:1:rev:0000000000000000000000000000000000000018")); expectedNodes.add(new SWHID("swh:1:rel:0000000000000000000000000000000000000019")); GraphTest.assertEqualsAnyOrder(expectedNodes, nodes); } } diff --git a/java/src/test/java/org/softwareheritage/graph/WalkTest.java b/java/src/test/java/org/softwareheritage/graph/WalkTest.java index 8ddd0f9..57dc600 100644 --- a/java/src/test/java/org/softwareheritage/graph/WalkTest.java +++ b/java/src/test/java/org/softwareheritage/graph/WalkTest.java @@ -1,187 +1,187 @@ package org.softwareheritage.graph; import java.util.Arrays; import java.util.List; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; import org.softwareheritage.graph.server.Endpoint; public class WalkTest extends GraphTest { @Test public void forwardRootToLeaf() { - Graph graph = getGraph(); + SwhBidirectionalGraph graph = getGraph(); SWHID src = new SWHID("swh:1:snp:0000000000000000000000000000000000000020"); String dstFmt = "swh:1:cnt:0000000000000000000000000000000000000005"; SwhPath solution1 = new SwhPath("swh:1:snp:0000000000000000000000000000000000000020", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:dir:0000000000000000000000000000000000000006", "swh:1:cnt:0000000000000000000000000000000000000005"); SwhPath solution2 = new SwhPath("swh:1:snp:0000000000000000000000000000000000000020", "swh:1:rel:0000000000000000000000000000000000000010", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:dir:0000000000000000000000000000000000000006", "swh:1:cnt:0000000000000000000000000000000000000005"); Endpoint endpoint1 = new Endpoint(graph, "forward", "*"); SwhPath dfsPath = (SwhPath) endpoint1.walk(new Endpoint.Input(src, dstFmt, "dfs")).result; Endpoint endpoint2 = new Endpoint(graph, "forward", "*"); SwhPath bfsPath = (SwhPath) endpoint2.walk(new Endpoint.Input(src, dstFmt, "bfs")).result; List possibleSolutions = Arrays.asList(solution1, solution2); Assertions.assertTrue(possibleSolutions.contains(dfsPath)); Assertions.assertTrue(possibleSolutions.contains(bfsPath)); } @Test public void forwardLeafToLeaf() { - Graph graph = getGraph(); + SwhBidirectionalGraph graph = getGraph(); SWHID src = new SWHID("swh:1:cnt:0000000000000000000000000000000000000007"); String dstFmt = "cnt"; SwhPath expectedPath = new SwhPath("swh:1:cnt:0000000000000000000000000000000000000007"); Endpoint endpoint1 = new Endpoint(graph, "forward", "*"); SwhPath dfsPath = (SwhPath) endpoint1.walk(new Endpoint.Input(src, dstFmt, "dfs")).result; Endpoint endpoint2 = new Endpoint(graph, "forward", "*"); SwhPath bfsPath = (SwhPath) endpoint2.walk(new Endpoint.Input(src, dstFmt, "bfs")).result; Assertions.assertEquals(dfsPath, expectedPath); Assertions.assertEquals(bfsPath, expectedPath); } @Test public void forwardRevToRev() { - Graph graph = getGraph(); + SwhBidirectionalGraph graph = getGraph(); SWHID src = new SWHID("swh:1:rev:0000000000000000000000000000000000000018"); String dstFmt = "swh:1:rev:0000000000000000000000000000000000000003"; SwhPath expectedPath = new SwhPath("swh:1:rev:0000000000000000000000000000000000000018", "swh:1:rev:0000000000000000000000000000000000000013", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:rev:0000000000000000000000000000000000000003"); Endpoint endpoint1 = new Endpoint(graph, "forward", "rev:rev"); SwhPath dfsPath = (SwhPath) endpoint1.walk(new Endpoint.Input(src, dstFmt, "dfs")).result; Endpoint endpoint2 = new Endpoint(graph, "forward", "rev:rev"); SwhPath bfsPath = (SwhPath) endpoint2.walk(new Endpoint.Input(src, dstFmt, "bfs")).result; Assertions.assertEquals(dfsPath, expectedPath); Assertions.assertEquals(bfsPath, expectedPath); } @Test public void backwardRevToRev() { - Graph graph = getGraph(); + SwhBidirectionalGraph graph = getGraph(); SWHID src = new SWHID("swh:1:rev:0000000000000000000000000000000000000003"); String dstFmt = "swh:1:rev:0000000000000000000000000000000000000018"; SwhPath expectedPath = new SwhPath("swh:1:rev:0000000000000000000000000000000000000003", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:rev:0000000000000000000000000000000000000013", "swh:1:rev:0000000000000000000000000000000000000018"); Endpoint endpoint1 = new Endpoint(graph, "backward", "rev:rev"); SwhPath dfsPath = (SwhPath) endpoint1.walk(new Endpoint.Input(src, dstFmt, "dfs")).result; Endpoint endpoint2 = new Endpoint(graph, "backward", "rev:rev"); SwhPath bfsPath = (SwhPath) endpoint2.walk(new Endpoint.Input(src, dstFmt, "bfs")).result; Assertions.assertEquals(dfsPath, expectedPath); Assertions.assertEquals(bfsPath, expectedPath); } @Test public void backwardCntToFirstSnp() { - Graph graph = getGraph(); + SwhBidirectionalGraph graph = getGraph(); SWHID src = new SWHID("swh:1:cnt:0000000000000000000000000000000000000001"); String dstFmt = "snp"; SwhPath solution1 = new SwhPath("swh:1:cnt:0000000000000000000000000000000000000001", "swh:1:dir:0000000000000000000000000000000000000002", "swh:1:rev:0000000000000000000000000000000000000003", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:snp:0000000000000000000000000000000000000020"); SwhPath solution2 = new SwhPath("swh:1:cnt:0000000000000000000000000000000000000001", "swh:1:dir:0000000000000000000000000000000000000002", "swh:1:rev:0000000000000000000000000000000000000003", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:rel:0000000000000000000000000000000000000010", "swh:1:snp:0000000000000000000000000000000000000020"); SwhPath solution3 = new SwhPath("swh:1:cnt:0000000000000000000000000000000000000001", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:snp:0000000000000000000000000000000000000020"); SwhPath solution4 = new SwhPath("swh:1:cnt:0000000000000000000000000000000000000001", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:rel:0000000000000000000000000000000000000010", "swh:1:snp:0000000000000000000000000000000000000020"); Endpoint endpoint1 = new Endpoint(graph, "backward", "*"); SwhPath dfsPath = (SwhPath) endpoint1.walk(new Endpoint.Input(src, dstFmt, "dfs")).result; Endpoint endpoint2 = new Endpoint(graph, "backward", "*"); SwhPath bfsPath = (SwhPath) endpoint2.walk(new Endpoint.Input(src, dstFmt, "bfs")).result; List possibleSolutions = Arrays.asList(solution1, solution2, solution3, solution4); Assertions.assertTrue(possibleSolutions.contains(dfsPath)); Assertions.assertTrue(possibleSolutions.contains(bfsPath)); } @Test public void forwardRevToFirstCnt() { - Graph graph = getGraph(); + SwhBidirectionalGraph graph = getGraph(); SWHID src = new SWHID("swh:1:rev:0000000000000000000000000000000000000009"); String dstFmt = "cnt"; SwhPath solution1 = new SwhPath("swh:1:rev:0000000000000000000000000000000000000009", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:cnt:0000000000000000000000000000000000000007"); SwhPath solution2 = new SwhPath("swh:1:rev:0000000000000000000000000000000000000009", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:dir:0000000000000000000000000000000000000006", "swh:1:cnt:0000000000000000000000000000000000000005"); SwhPath solution3 = new SwhPath("swh:1:rev:0000000000000000000000000000000000000009", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:dir:0000000000000000000000000000000000000006", "swh:1:cnt:0000000000000000000000000000000000000004"); SwhPath solution4 = new SwhPath("swh:1:rev:0000000000000000000000000000000000000009", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:cnt:0000000000000000000000000000000000000001"); SwhPath solution5 = new SwhPath("swh:1:rev:0000000000000000000000000000000000000009", "swh:1:rev:0000000000000000000000000000000000000003", "swh:1:dir:0000000000000000000000000000000000000002", "swh:1:cnt:0000000000000000000000000000000000000001"); Endpoint endpoint1 = new Endpoint(graph, "forward", "rev:*,dir:*"); SwhPath dfsPath = (SwhPath) endpoint1.walk(new Endpoint.Input(src, dstFmt, "dfs")).result; Endpoint endpoint2 = new Endpoint(graph, "forward", "rev:*,dir:*"); SwhPath bfsPath = (SwhPath) endpoint2.walk(new Endpoint.Input(src, dstFmt, "bfs")).result; List possibleSolutions = Arrays.asList(solution1, solution2, solution3, solution4, solution5); Assertions.assertTrue(possibleSolutions.contains(dfsPath)); Assertions.assertTrue(possibleSolutions.contains(bfsPath)); } @Test public void backwardDirToFirstRel() { - Graph graph = getGraph(); + SwhBidirectionalGraph graph = getGraph(); SWHID src = new SWHID("swh:1:dir:0000000000000000000000000000000000000016"); String dstFmt = "rel"; SwhPath expectedPath = new SwhPath("swh:1:dir:0000000000000000000000000000000000000016", "swh:1:dir:0000000000000000000000000000000000000017", "swh:1:rev:0000000000000000000000000000000000000018", "swh:1:rel:0000000000000000000000000000000000000019"); Endpoint endpoint1 = new Endpoint(graph, "backward", "dir:dir,dir:rev,rev:*"); SwhPath dfsPath = (SwhPath) endpoint1.walk(new Endpoint.Input(src, dstFmt, "dfs")).result; Endpoint endpoint2 = new Endpoint(graph, "backward", "dir:dir,dir:rev,rev:*"); SwhPath bfsPath = (SwhPath) endpoint2.walk(new Endpoint.Input(src, dstFmt, "bfs")).result; Assertions.assertEquals(dfsPath, expectedPath); Assertions.assertEquals(bfsPath, expectedPath); } }