diff --git a/java/src/main/java/org/softwareheritage/graph/AllowedNodes.java b/java/src/main/java/org/softwareheritage/graph/AllowedNodes.java new file mode 100644 index 0000000..40f473a --- /dev/null +++ b/java/src/main/java/org/softwareheritage/graph/AllowedNodes.java @@ -0,0 +1,50 @@ +package org.softwareheritage.graph; + +/** + * TODO + * + * @author The Software Heritage developers + */ + +public class AllowedNodes { + public boolean[] restrictedTo; + + /** + * Constructor. + * + * @param nodesFmt a formatted string describing allowed nodes + */ + public AllowedNodes(String nodesFmt) { + int nbNodeTypes = Node.Type.values().length; + this.restrictedTo = new boolean[nbNodeTypes]; + // Special values (null, empty, "*") + if (nodesFmt == null || nodesFmt.isEmpty()) { + return; + } + if (nodesFmt.equals("*")) { + // Allows for quick bypass (with simple null check) when no node restriction + restrictedTo = null; + return; + } + + // Format: "nodeType1,nodeType2,[...]" + String[] nodeTypesStr = nodesFmt.split(","); + for (String nodeTypeStr : nodeTypesStr) { + for (Node.Type nodeType : Node.Type.parse(nodeTypeStr)) { + this.restrictedTo[Node.Type.toInt(nodeType)] = true; + } + } + } + + /** + * Checks if a given node type is allowed. + * + * @param nodeType node type to check + * @return true if allowed and false otherwise + */ + public boolean isAllowed(Node.Type nodeType) { + if (restrictedTo == null) + return true; + return restrictedTo[Node.Type.toInt(nodeType)]; + } +} diff --git a/java/src/main/java/org/softwareheritage/graph/Entry.java b/java/src/main/java/org/softwareheritage/graph/Entry.java index c911581..0db1bb6 100644 --- a/java/src/main/java/org/softwareheritage/graph/Entry.java +++ b/java/src/main/java/org/softwareheritage/graph/Entry.java @@ -1,196 +1,196 @@ package org.softwareheritage.graph; import java.io.DataOutputStream; import java.io.FileOutputStream; import java.io.IOException; import java.util.ArrayList; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.PropertyNamingStrategy; public class Entry { private final long PATH_SEPARATOR_ID = -1; private Graph graph; public void load_graph(String graphBasename) throws IOException { System.err.println("Loading graph " + graphBasename + " ..."); - this.graph = new Graph(graphBasename); + this.graph = Graph.loadMapped(graphBasename); System.err.println("Graph loaded."); } public Graph get_graph() { return graph.copy(); } public String stats() { try { Stats stats = new Stats(graph.getPath()); ObjectMapper objectMapper = new ObjectMapper(); objectMapper.setPropertyNamingStrategy(PropertyNamingStrategy.SNAKE_CASE); return objectMapper.writeValueAsString(stats); } catch (IOException e) { throw new RuntimeException("Cannot read stats: " + e); } } private int count_visitor(NodeCountVisitor f, long srcNodeId) { int[] count = {0}; f.accept(srcNodeId, (node) -> { count[0]++; }); return count[0]; } public int count_leaves(String direction, String edgesFmt, long srcNodeId) { Traversal t = new Traversal(this.graph.copy(), direction, edgesFmt); return count_visitor(t::leavesVisitor, srcNodeId); } public int count_neighbors(String direction, String edgesFmt, long srcNodeId) { Traversal t = new Traversal(this.graph.copy(), direction, edgesFmt); return count_visitor(t::neighborsVisitor, srcNodeId); } public int count_visit_nodes(String direction, String edgesFmt, long srcNodeId) { Traversal t = new Traversal(this.graph.copy(), direction, edgesFmt); return count_visitor(t::visitNodesVisitor, srcNodeId); } public QueryHandler get_handler(String clientFIFO) { return new QueryHandler(this.graph.copy(), clientFIFO); } private interface NodeCountVisitor { void accept(long nodeId, Traversal.NodeIdConsumer consumer); } public class QueryHandler { Graph graph; DataOutputStream out; String clientFIFO; public QueryHandler(Graph graph, String clientFIFO) { this.graph = graph; this.clientFIFO = clientFIFO; this.out = null; } public void writeNode(long nodeId) { try { out.writeLong(nodeId); } catch (IOException e) { throw new RuntimeException("Cannot write response to client: " + e); } } public void writeEdge(long srcId, long dstId) { writeNode(srcId); writeNode(dstId); } public void writePath(ArrayList path) { for (Long nodeId : path) { writeNode(nodeId); } writeNode(PATH_SEPARATOR_ID); } public void open() { try { FileOutputStream file = new FileOutputStream(this.clientFIFO); this.out = new DataOutputStream(file); } catch (IOException e) { throw new RuntimeException("Cannot open client FIFO: " + e); } } public void close() { try { out.close(); } catch (IOException e) { throw new RuntimeException("Cannot write response to client: " + e); } } public void leaves(String direction, String edgesFmt, long srcNodeId, long maxEdges, String returnTypes) { open(); Traversal t = new Traversal(this.graph, direction, edgesFmt, maxEdges, returnTypes); for (Long nodeId : t.leaves(srcNodeId)) { writeNode(nodeId); } close(); } public void neighbors(String direction, String edgesFmt, long srcNodeId, long maxEdges, String returnTypes) { open(); Traversal t = new Traversal(this.graph, direction, edgesFmt, maxEdges, returnTypes); for (Long nodeId : t.neighbors(srcNodeId)) { writeNode(nodeId); } close(); } public void visit_nodes(String direction, String edgesFmt, long srcNodeId, long maxEdges, String returnTypes) { open(); Traversal t = new Traversal(this.graph, direction, edgesFmt, maxEdges, returnTypes); for (Long nodeId : t.visitNodes(srcNodeId)) { writeNode(nodeId); } close(); } public void visit_edges(String direction, String edgesFmt, long srcNodeId, long maxEdges) { open(); Traversal t = new Traversal(this.graph, direction, edgesFmt, maxEdges); t.visitNodesVisitor(srcNodeId, null, this::writeEdge); close(); } public void visit_paths(String direction, String edgesFmt, long srcNodeId, long maxEdges) { open(); Traversal t = new Traversal(this.graph, direction, edgesFmt, maxEdges); t.visitPathsVisitor(srcNodeId, this::writePath); close(); } public void walk(String direction, String edgesFmt, String algorithm, long srcNodeId, long dstNodeId) { open(); Traversal t = new Traversal(this.graph, direction, edgesFmt); for (Long nodeId : t.walk(srcNodeId, dstNodeId, algorithm)) { writeNode(nodeId); } close(); } public void walk_type(String direction, String edgesFmt, String algorithm, long srcNodeId, String dst) { open(); Node.Type dstType = Node.Type.fromStr(dst); Traversal t = new Traversal(this.graph, direction, edgesFmt); for (Long nodeId : t.walk(srcNodeId, dstType, algorithm)) { writeNode(nodeId); } close(); } public void random_walk(String direction, String edgesFmt, int retries, long srcNodeId, long dstNodeId, String returnTypes) { open(); Traversal t = new Traversal(this.graph, direction, edgesFmt, 0, returnTypes); for (Long nodeId : t.randomWalk(srcNodeId, dstNodeId, retries)) { writeNode(nodeId); } close(); } public void random_walk_type(String direction, String edgesFmt, int retries, long srcNodeId, String dst, String returnTypes) { open(); Node.Type dstType = Node.Type.fromStr(dst); Traversal t = new Traversal(this.graph, direction, edgesFmt, 0, returnTypes); for (Long nodeId : t.randomWalk(srcNodeId, dstType, retries)) { writeNode(nodeId); } close(); } } } diff --git a/java/src/main/java/org/softwareheritage/graph/Graph.java b/java/src/main/java/org/softwareheritage/graph/Graph.java index ad44a54..c5f5513 100644 --- a/java/src/main/java/org/softwareheritage/graph/Graph.java +++ b/java/src/main/java/org/softwareheritage/graph/Graph.java @@ -1,257 +1,310 @@ package org.softwareheritage.graph; import it.unimi.dsi.big.webgraph.ImmutableGraph; import it.unimi.dsi.big.webgraph.LazyLongIterator; import it.unimi.dsi.big.webgraph.Transform; +import it.unimi.dsi.logging.ProgressLogger; import org.softwareheritage.graph.maps.NodeIdMap; import org.softwareheritage.graph.maps.NodeTypesMap; import java.io.IOException; /** * Main class storing the compressed graph and node id mappings. *

* The compressed graph is stored using the WebGraph * ecosystem. Additional mappings are necessary because Software Heritage uses string based persistent * identifiers (SWHID) while WebGraph uses integers internally. These two mappings (long id * ↔ SWHID) are used for the input (users refer to the graph using SWHID) and the output * (convert back to SWHID for users results). However, since graph traversal can be restricted * depending on the node type (see {@link AllowedEdges}), a long id → node type map is stored * as well to avoid a full SWHID lookup. * * @author The Software Heritage developers * @see org.softwareheritage.graph.AllowedEdges * @see org.softwareheritage.graph.maps.NodeIdMap * @see org.softwareheritage.graph.maps.NodeTypesMap */ public class Graph extends ImmutableGraph { /** File extension for the SWHID to long node id map */ public static final String SWHID_TO_NODE = ".swhid2node.bin"; /** File extension for the long node id to SWHID map */ public static final String NODE_TO_SWHID = ".node2swhid.bin"; /** File extension for the long node id to node type map */ public static final String NODE_TO_TYPE = ".node2type.map"; /** Compressed graph stored as a {@link it.unimi.dsi.big.webgraph.BVGraph} */ ImmutableGraph graph; /** Transposed compressed graph (used for backward traversals) */ ImmutableGraph graphTransposed; /** Path and basename of the compressed graph */ String path; /** Mapping long id ↔ SWHIDs */ NodeIdMap nodeIdMap; /** Mapping long id → node types */ NodeTypesMap nodeTypesMap; /** * Constructor. * * @param path path and basename of the compressed graph to load */ - public Graph(String path) throws IOException { + + private Graph(String path) throws IOException { + loadInternal(path, null, LoadMethod.MAPPED); + } + + /** + * Loading mechanisms + */ + + enum LoadMethod { + MEMORY, MAPPED, OFFLINE, + } + + protected Graph loadInternal(String path, ProgressLogger pl, LoadMethod method) throws IOException { this.path = path; - this.graph = ImmutableGraph.loadMapped(path); - this.graphTransposed = ImmutableGraph.loadMapped(path + "-transposed"); + if (method == LoadMethod.MEMORY) { + this.graph = ImmutableGraph.load(path, pl); + this.graphTransposed = ImmutableGraph.load(path + "-transposed", pl); + } else if (method == LoadMethod.MAPPED) { + this.graph = ImmutableGraph.loadMapped(path, pl); + this.graphTransposed = ImmutableGraph.loadMapped(path + "-transposed", pl); + } else if (method == LoadMethod.OFFLINE) { + this.graph = ImmutableGraph.loadOffline(path, pl); + this.graphTransposed = ImmutableGraph.loadOffline(path + "-transposed", pl); + } this.nodeTypesMap = new NodeTypesMap(path); this.nodeIdMap = new NodeIdMap(path, numNodes()); + return this; + } + + protected Graph() { + } + + public static Graph load(String path, ProgressLogger pl) throws IOException { + return new Graph().loadInternal(path, pl, LoadMethod.MEMORY); } + public static Graph loadMapped(String path, ProgressLogger pl) throws IOException { + return new Graph().loadInternal(path, pl, LoadMethod.MAPPED); + } + + public static Graph loadOffline(String path, ProgressLogger pl) throws IOException { + return new Graph().loadInternal(path, null, LoadMethod.OFFLINE); + } + + public static Graph load(String path) throws IOException { + return new Graph().loadInternal(path, null, LoadMethod.MEMORY); + } + + public static Graph loadMapped(String path) throws IOException { + return new Graph().loadInternal(path, null, LoadMethod.MAPPED); + } + + public static Graph loadOffline(String path) throws IOException { + return new Graph().loadInternal(path, null, LoadMethod.OFFLINE); + } + + /** + * Constructor used for copy() + */ protected Graph(ImmutableGraph graph, ImmutableGraph graphTransposed, String path, NodeIdMap nodeIdMap, NodeTypesMap nodeTypesMap) { this.graph = graph; this.graphTransposed = graphTransposed; this.path = path; this.nodeIdMap = nodeIdMap; this.nodeTypesMap = nodeTypesMap; } /** * Return a flyweight copy of the graph. */ @Override public Graph copy() { return new Graph(this.graph.copy(), this.graphTransposed.copy(), this.path, this.nodeIdMap, this.nodeTypesMap); } @Override public boolean randomAccess() { return graph.randomAccess() && graphTransposed.randomAccess(); } /** * Return a transposed version of the graph. */ public Graph transpose() { return new Graph(this.graphTransposed, this.graph, this.path, this.nodeIdMap, this.nodeTypesMap); } /** * Return a symmetric version of the graph. */ public Graph symmetrize() { ImmutableGraph symmetric = Transform.union(graph, graphTransposed); return new Graph(symmetric, symmetric, this.path, this.nodeIdMap, this.nodeTypesMap); } /** * Cleans up graph resources after use. */ public void cleanUp() throws IOException { nodeIdMap.close(); } /** * Returns number of nodes in the graph. * * @return number of nodes in the graph */ @Override public long numNodes() { return graph.numNodes(); } /** * Returns number of edges in the graph. * * @return number of edges in the graph */ @Override public long numArcs() { return graph.numArcs(); } /** * Returns lazy iterator of successors of a node. * * @param nodeId node specified as a long id * @return lazy iterator of successors of the node, specified as a * WebGraph LazyLongIterator */ @Override public LazyLongIterator successors(long nodeId) { return graph.successors(nodeId); } /** * Returns lazy iterator of successors of a node while following a specific set of edge types. * * @param nodeId node specified as a long id * @param allowedEdges the specification of which edges can be traversed * @return lazy iterator of successors of the node, specified as a * WebGraph LazyLongIterator */ public LazyLongIterator successors(long nodeId, AllowedEdges allowedEdges) { if (allowedEdges.restrictedTo == null) { // All edges are allowed, bypass edge check return this.successors(nodeId); } else { LazyLongIterator allSuccessors = this.successors(nodeId); Graph thisGraph = this; return new LazyLongIterator() { @Override public long nextLong() { long neighbor; while ((neighbor = allSuccessors.nextLong()) != -1) { if (allowedEdges.isAllowed(thisGraph.getNodeType(nodeId), thisGraph.getNodeType(neighbor))) { return neighbor; } } return -1; } @Override public long skip(final long n) { long i; for (i = 0; i < n && nextLong() != -1; i++) ; return i; } }; } } /** * Returns the outdegree of a node. * * @param nodeId node specified as a long id * @return outdegree of a node */ @Override public long outdegree(long nodeId) { return graph.outdegree(nodeId); } /** * Returns lazy iterator of predecessors of a node. * * @param nodeId node specified as a long id * @return lazy iterator of predecessors of the node, specified as a * WebGraph LazyLongIterator */ public LazyLongIterator predecessors(long nodeId) { return this.transpose().successors(nodeId); } /** * Returns the indegree of a node. * * @param nodeId node specified as a long id * @return indegree of a node */ public long indegree(long nodeId) { return this.transpose().outdegree(nodeId); } /** * Returns the underlying BVGraph. * * @return WebGraph BVGraph */ public ImmutableGraph getGraph() { return this.graph; } /** * Returns the graph full path. * * @return graph full path */ public String getPath() { return path; } /** * Converts {@link SWHID} node to long. * * @param swhid node specified as a {@link SWHID} * @return internal long node id * @see SWHID */ public long getNodeId(SWHID swhid) { return nodeIdMap.getNodeId(swhid); } /** * Converts long id node to {@link SWHID}. * * @param nodeId node specified as a long id * @return external SWHID * @see SWHID */ public SWHID getSWHID(long nodeId) { return nodeIdMap.getSWHID(nodeId); } /** * Returns node type. * * @param nodeId node specified as a long id * @return corresponding node type * @see org.softwareheritage.graph.Node.Type */ public Node.Type getNodeType(long nodeId) { return nodeTypesMap.getType(nodeId); } } diff --git a/java/src/main/java/org/softwareheritage/graph/Subgraph.java b/java/src/main/java/org/softwareheritage/graph/Subgraph.java new file mode 100644 index 0000000..3e7e7fd --- /dev/null +++ b/java/src/main/java/org/softwareheritage/graph/Subgraph.java @@ -0,0 +1,224 @@ +package org.softwareheritage.graph; + +import it.unimi.dsi.big.webgraph.ImmutableGraph; +import it.unimi.dsi.big.webgraph.LazyLongIterator; +import it.unimi.dsi.big.webgraph.NodeIterator; + +import java.util.NoSuchElementException; + +public class Subgraph extends ImmutableGraph { + private final Graph underlyingGraph; + public final AllowedNodes allowedNodeTypes; + + private long nodeCount = -1; + + /** + * Constructor. + * + */ + public Subgraph(Graph underlyingGraph, AllowedNodes allowedNodeTypes) { + this.underlyingGraph = underlyingGraph.copy(); + this.allowedNodeTypes = allowedNodeTypes; + } + + /** + * Return a flyweight copy of the graph. + */ + @Override + public Subgraph copy() { + return new Subgraph(this.underlyingGraph.copy(), allowedNodeTypes); + } + + @Override + public boolean randomAccess() { + return underlyingGraph.randomAccess(); + } + + /** + * Return a transposed version of the graph. + */ + public Subgraph transpose() { + return new Subgraph(underlyingGraph.transpose(), allowedNodeTypes); + } + + /** + * Return a symmetric version of the graph. + */ + public Subgraph symmetrize() { + return new Subgraph(underlyingGraph.symmetrize(), allowedNodeTypes); + } + + /** + * Returns number of nodes in the graph. + * + * @return number of nodes in the graph + */ + @Override + public long numNodes() { + if (nodeCount == -1) { + for (long i = 0; i < underlyingGraph.numNodes(); ++i) { + if (nodeExists(i)) + ++nodeCount; + } + } + return nodeCount; + } + + /** + * Returns number of edges in the graph. + * + * @return number of edges in the graph + */ + @Override + public long numArcs() { + throw new UnsupportedOperationException("Cannot determine the number of arcs in a subgraph"); + } + + public long maxNodeNumber() { + return underlyingGraph.numNodes(); + } + + public boolean nodeExists(long node) { + return allowedNodeTypes.isAllowed(underlyingGraph.getNodeType(node)); + } + + /** + * Returns lazy iterator of successors of a node. + * + * @param nodeId node specified as a long id + * @return lazy iterator of successors of the node, specified as a + * WebGraph LazyLongIterator + */ + @Override + public LazyLongIterator successors(long nodeId) { + if (!nodeExists(nodeId)) { + throw new IllegalArgumentException("Node " + nodeId + " not in subgraph"); + } + LazyLongIterator allSuccessors = underlyingGraph.successors(nodeId); + return new LazyLongIterator() { + @Override + public long nextLong() { + long neighbor; + while ((neighbor = allSuccessors.nextLong()) != -1) { + if (nodeExists(neighbor)) { + return neighbor; + } + } + return -1; + } + + @Override + public long skip(final long n) { + long i; + for (i = 0; i < n && nextLong() != -1; i++) + ; + return i; + } + }; + } + + /** + * Returns the outdegree of a node. + * + * @param nodeId node specified as a long id + * @return outdegree of a node + */ + @Override + public long outdegree(long nodeId) { + long deg = 0; + for (LazyLongIterator allSuccessors = successors(nodeId); allSuccessors.nextLong() != -1; ++deg) + ; + return deg; + } + + @Override + public NodeIterator nodeIterator() { + return new NodeIterator() { + final long n = numNodes(); + long i = -1; + long done = 0; + + @Override + public boolean hasNext() { + return done <= n; + } + + @Override + public long nextLong() { + if (!hasNext()) + throw new NoSuchElementException(); + do { + ++i; + if (i >= underlyingGraph.numNodes()) + throw new NoSuchElementException(); + } while (!nodeExists(i)); + ++done; + return i; + } + + @Override + public long outdegree() { + return Subgraph.this.outdegree(i); + } + + @Override + public LazyLongIterator successors() { + return Subgraph.this.successors(i); + } + }; + } + + /** + * Returns lazy iterator of predecessors of a node. + * + * @param nodeId node specified as a long id + * @return lazy iterator of predecessors of the node, specified as a + * WebGraph LazyLongIterator + */ + public LazyLongIterator predecessors(long nodeId) { + return this.transpose().successors(nodeId); + } + + /** + * Returns the indegree of a node. + * + * @param nodeId node specified as a long id + * @return indegree of a node + */ + public long indegree(long nodeId) { + return this.transpose().outdegree(nodeId); + } + + /** + * Converts {@link SWHID} node to long. + * + * @param swhid node specified as a {@link SWHID} + * @return internal long node id + * @see SWHID + */ + public long getNodeId(SWHID swhid) { + return underlyingGraph.getNodeId(swhid); + } + + /** + * Converts long id node to {@link SWHID}. + * + * @param nodeId node specified as a long id + * @return external SWHID + * @see SWHID + */ + public SWHID getSWHID(long nodeId) { + return underlyingGraph.getSWHID(nodeId); + } + + /** + * Returns node type. + * + * @param nodeId node specified as a long id + * @return corresponding node type + * @see Node.Type + */ + public Node.Type getNodeType(long nodeId) { + return underlyingGraph.getNodeType(nodeId); + } +} diff --git a/java/src/main/java/org/softwareheritage/graph/benchmark/AccessEdge.java b/java/src/main/java/org/softwareheritage/graph/benchmark/AccessEdge.java index 038a3e6..9397de7 100644 --- a/java/src/main/java/org/softwareheritage/graph/benchmark/AccessEdge.java +++ b/java/src/main/java/org/softwareheritage/graph/benchmark/AccessEdge.java @@ -1,45 +1,45 @@ package org.softwareheritage.graph.benchmark; import com.martiansoftware.jsap.JSAPException; import it.unimi.dsi.big.webgraph.LazyLongIterator; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.benchmark.utils.Statistics; import org.softwareheritage.graph.benchmark.utils.Timing; import java.io.IOException; import java.util.ArrayList; /** * Benchmark to time edge access time. * * @author The Software Heritage developers */ public class AccessEdge { /** * Main entrypoint. * * @param args command line arguments */ public static void main(String[] args) throws IOException, JSAPException { Benchmark bench = new Benchmark(); bench.parseCommandLineArgs(args); - Graph graph = new Graph(bench.args.graphPath); + Graph graph = Graph.loadMapped(bench.args.graphPath); long[] nodeIds = bench.args.random.generateNodeIds(graph, bench.args.nbNodes); ArrayList timings = new ArrayList<>(); for (long nodeId : nodeIds) { long startTime = Timing.start(); LazyLongIterator neighbors = graph.successors(nodeId); long firstNeighbor = neighbors.nextLong(); double duration = Timing.stop(startTime); timings.add(duration); } System.out.println("Used " + bench.args.nbNodes + " random edges (results are in seconds):"); Statistics stats = new Statistics(timings); stats.printAll(); } } diff --git a/java/src/main/java/org/softwareheritage/graph/benchmark/BFS.java b/java/src/main/java/org/softwareheritage/graph/benchmark/BFS.java index 883264d..43aec2e 100644 --- a/java/src/main/java/org/softwareheritage/graph/benchmark/BFS.java +++ b/java/src/main/java/org/softwareheritage/graph/benchmark/BFS.java @@ -1,107 +1,107 @@ package org.softwareheritage.graph.benchmark; import com.google.common.primitives.Longs; import com.martiansoftware.jsap.*; import it.unimi.dsi.big.webgraph.ImmutableGraph; import it.unimi.dsi.big.webgraph.LazyLongIterator; import it.unimi.dsi.bits.LongArrayBitVector; import it.unimi.dsi.fastutil.Arrays; import it.unimi.dsi.io.ByteDiskQueue; import it.unimi.dsi.logging.ProgressLogger; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.softwareheritage.graph.Graph; import java.io.File; import java.io.IOException; public class BFS { private final static Logger LOGGER = LoggerFactory.getLogger(BFS.class); private final ImmutableGraph graph; public BFS(ImmutableGraph graph) { this.graph = graph; } private static JSAPResult parse_args(String[] args) { JSAPResult config = null; try { SimpleJSAP jsap = new SimpleJSAP(BFS.class.getName(), "", new Parameter[]{ new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'g', "graph", "Basename of the compressed graph"), new FlaggedOption("useTransposed", JSAP.BOOLEAN_PARSER, "false", JSAP.NOT_REQUIRED, 'T', "transposed", "Use transposed graph (default: false)"),}); config = jsap.parse(args); if (jsap.messagePrinted()) { System.exit(1); } } catch (JSAPException e) { e.printStackTrace(); } return config; } public static void main(String[] args) throws IOException { JSAPResult config = parse_args(args); String graphPath = config.getString("graphPath"); boolean useTransposed = config.getBoolean("useTransposed"); System.err.println("Loading graph " + graphPath + " ..."); - Graph graph = new Graph(graphPath); + Graph graph = Graph.loadMapped(graphPath); System.err.println("Graph loaded."); if (useTransposed) graph = graph.transpose(); BFS bfs = new BFS(graph); bfs.bfsperm(); } // Partly inlined from it.unimi.dsi.law.big.graph.BFS private void bfsperm() throws IOException { final long n = graph.numNodes(); // Allow enough memory to behave like in-memory queue int bufferSize = (int) Math.min(Arrays.MAX_ARRAY_SIZE & ~0x7, 8L * n); // Use a disk based queue to store BFS frontier final File queueFile = File.createTempFile(BFS.class.getSimpleName(), "queue"); final ByteDiskQueue queue = ByteDiskQueue.createNew(queueFile, bufferSize, true); final byte[] byteBuf = new byte[Long.BYTES]; // WARNING: no 64-bit version of this data-structure, but it can support // indices up to 2^37 final LongArrayBitVector visited = LongArrayBitVector.ofLength(n); final ProgressLogger pl = new ProgressLogger(LOGGER); pl.expectedUpdates = n; pl.itemsName = "nodes"; pl.start("Starting breadth-first visit..."); for (long i = 0; i < n; i++) { if (visited.getBoolean(i)) continue; queue.enqueue(Longs.toByteArray(i)); visited.set(i); while (!queue.isEmpty()) { queue.dequeue(byteBuf); final long currentNode = Longs.fromByteArray(byteBuf); final LazyLongIterator iterator = graph.successors(currentNode); long succ; while ((succ = iterator.nextLong()) != -1) { if (!visited.getBoolean(succ)) { visited.set(succ); queue.enqueue(Longs.toByteArray(succ)); } } pl.update(); } } pl.done(); queue.close(); } } diff --git a/java/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java b/java/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java index 1bfc2a7..6a0cf58 100644 --- a/java/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java +++ b/java/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java @@ -1,42 +1,42 @@ package org.softwareheritage.graph.benchmark; import com.martiansoftware.jsap.JSAPException; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.Node; import org.softwareheritage.graph.server.Endpoint; import java.io.IOException; /** * Benchmark Software Heritage * browsing * use-cases scenarios. * * @author The Software Heritage developers */ public class Browsing { /** * Main entrypoint. * * @param args command line arguments */ public static void main(String[] args) throws IOException, JSAPException { Benchmark bench = new Benchmark(); bench.parseCommandLineArgs(args); - Graph graph = new Graph(bench.args.graphPath); + Graph graph = Graph.loadMapped(bench.args.graphPath); long[] dirNodeIds = bench.args.random.generateNodeIdsOfType(graph, bench.args.nbNodes, Node.Type.DIR); long[] revNodeIds = bench.args.random.generateNodeIdsOfType(graph, bench.args.nbNodes, Node.Type.REV); Endpoint dirEndpoint = new Endpoint(graph, "forward", "dir:cnt,dir:dir"); Endpoint revEndpoint = new Endpoint(graph, "forward", "rev:rev"); System.out.println("Used " + bench.args.nbNodes + " random nodes (results are in seconds):"); bench.createCSVLogFile(); bench.timeEndpoint("ls", graph, dirNodeIds, dirEndpoint::neighbors); bench.timeEndpoint("ls -R", graph, dirNodeIds, dirEndpoint::visitPaths); bench.timeEndpoint("git log", graph, revNodeIds, revEndpoint::visitNodes); } } diff --git a/java/src/main/java/org/softwareheritage/graph/benchmark/Provenance.java b/java/src/main/java/org/softwareheritage/graph/benchmark/Provenance.java index 9ab04c3..9b3c4c9 100644 --- a/java/src/main/java/org/softwareheritage/graph/benchmark/Provenance.java +++ b/java/src/main/java/org/softwareheritage/graph/benchmark/Provenance.java @@ -1,45 +1,45 @@ package org.softwareheritage.graph.benchmark; import com.martiansoftware.jsap.JSAPException; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.server.Endpoint; import java.io.IOException; /** * Benchmark Software Heritage * provenance * use-cases scenarios. * * @author The Software Heritage developers */ public class Provenance { /** * Main entrypoint. * * @param args command line arguments */ public static void main(String[] args) throws IOException, JSAPException { Benchmark bench = new Benchmark(); bench.parseCommandLineArgs(args); - Graph graph = new Graph(bench.args.graphPath); + Graph graph = Graph.loadMapped(bench.args.graphPath); long[] nodeIds = bench.args.random.generateNodeIds(graph, bench.args.nbNodes); Endpoint commitProvenanceEndpoint = new Endpoint(graph, "backward", "dir:dir,cnt:dir,dir:rev"); Endpoint originProvenanceEndpoint = new Endpoint(graph, "backward", "*"); System.out.println("Used " + bench.args.nbNodes + " random nodes (results are in seconds):"); bench.createCSVLogFile(); bench.timeEndpoint("commit provenance (dfs)", graph, nodeIds, commitProvenanceEndpoint::walk, "rev", "dfs"); bench.timeEndpoint("commit provenance (bfs)", graph, nodeIds, commitProvenanceEndpoint::walk, "rev", "bfs"); bench.timeEndpoint("complete commit provenance", graph, nodeIds, commitProvenanceEndpoint::leaves); bench.timeEndpoint("origin provenance (dfs)", graph, nodeIds, originProvenanceEndpoint::walk, "ori", "dfs"); bench.timeEndpoint("origin provenance (bfs)", graph, nodeIds, originProvenanceEndpoint::walk, "ori", "bfs"); bench.timeEndpoint("complete origin provenance", graph, nodeIds, originProvenanceEndpoint::leaves); } } diff --git a/java/src/main/java/org/softwareheritage/graph/benchmark/Vault.java b/java/src/main/java/org/softwareheritage/graph/benchmark/Vault.java index c0cd7ec..c0e19f6 100644 --- a/java/src/main/java/org/softwareheritage/graph/benchmark/Vault.java +++ b/java/src/main/java/org/softwareheritage/graph/benchmark/Vault.java @@ -1,37 +1,37 @@ package org.softwareheritage.graph.benchmark; import com.martiansoftware.jsap.JSAPException; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.server.Endpoint; import java.io.IOException; /** * Benchmark Software Heritage * vault use-case * scenario. * * @author The Software Heritage developers */ public class Vault { /** * Main entrypoint. * * @param args command line arguments */ public static void main(String[] args) throws IOException, JSAPException { Benchmark bench = new Benchmark(); bench.parseCommandLineArgs(args); - Graph graph = new Graph(bench.args.graphPath); + Graph graph = Graph.loadMapped(bench.args.graphPath); long[] nodeIds = bench.args.random.generateNodeIds(graph, bench.args.nbNodes); Endpoint endpoint = new Endpoint(graph, "forward", "*"); System.out.println("Used " + bench.args.nbNodes + " random nodes (results are in seconds):"); bench.createCSVLogFile(); bench.timeEndpoint("git bundle", graph, nodeIds, endpoint::visitNodes); } } diff --git a/java/src/main/java/org/softwareheritage/graph/experiments/forks/FindCommonAncestor.java b/java/src/main/java/org/softwareheritage/graph/experiments/forks/FindCommonAncestor.java index 5dab92b..f36ce88 100644 --- a/java/src/main/java/org/softwareheritage/graph/experiments/forks/FindCommonAncestor.java +++ b/java/src/main/java/org/softwareheritage/graph/experiments/forks/FindCommonAncestor.java @@ -1,62 +1,62 @@ package org.softwareheritage.graph.experiments.forks; import com.martiansoftware.jsap.*; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.Traversal; import java.io.IOException; import java.util.Scanner; public class FindCommonAncestor { private Graph graph; private void load_graph(String graphBasename) throws IOException { System.err.println("Loading graph " + graphBasename + " ..."); - this.graph = new Graph(graphBasename); + this.graph = Graph.loadMapped(graphBasename); System.err.println("Graph loaded."); } private static JSAPResult parse_args(String[] args) { JSAPResult config = null; try { SimpleJSAP jsap = new SimpleJSAP(FindCommonAncestor.class.getName(), "", new Parameter[]{ new FlaggedOption("edgesFmt", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'e', "edges", "Edges constraints"), new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'g', "graph", "Basename of the compressed graph"),}); config = jsap.parse(args); if (jsap.messagePrinted()) { System.exit(1); } } catch (JSAPException e) { e.printStackTrace(); } return config; } public static void main(String[] args) { JSAPResult config = parse_args(args); String graphPath = config.getString("graphPath"); String edgesFmt = config.getString("edgesFmt"); FindCommonAncestor fca = new FindCommonAncestor(); try { fca.load_graph(graphPath); } catch (IOException e) { System.out.println("Could not load graph: " + e); System.exit(2); } Scanner input = new Scanner(System.in); while (input.hasNextLong()) { long lhsNode = input.nextLong(); long rhsNode = input.nextLong(); Traversal t = new Traversal(fca.graph.symmetrize(), "forward", edgesFmt); System.out.println(t.findCommonDescendant(lhsNode, rhsNode)); } } } diff --git a/java/src/main/java/org/softwareheritage/graph/experiments/forks/FindPath.java b/java/src/main/java/org/softwareheritage/graph/experiments/forks/FindPath.java index f916a8e..2e5afd9 100644 --- a/java/src/main/java/org/softwareheritage/graph/experiments/forks/FindPath.java +++ b/java/src/main/java/org/softwareheritage/graph/experiments/forks/FindPath.java @@ -1,123 +1,123 @@ package org.softwareheritage.graph.experiments.forks; import com.martiansoftware.jsap.*; import it.unimi.dsi.big.webgraph.LazyLongIterator; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.Node; import java.io.IOException; import java.util.*; public class FindPath { private Graph graph; private Long emptySnapshot; private void load_graph(String graphBasename) throws IOException { System.err.println("Loading graph " + graphBasename + " ..."); - this.graph = new Graph(graphBasename).symmetrize(); + this.graph = Graph.loadMapped(graphBasename).symmetrize(); System.err.println("Graph loaded."); this.emptySnapshot = null; } private static JSAPResult parse_args(String[] args) { JSAPResult config = null; try { SimpleJSAP jsap = new SimpleJSAP(FindPath.class.getName(), "", new Parameter[]{new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'g', "graph", "Basename of the compressed graph"),}); config = jsap.parse(args); if (jsap.messagePrinted()) { System.exit(1); } } catch (JSAPException e) { e.printStackTrace(); } return config; } private boolean nodeIsEmptySnapshot(Long node) { if (this.emptySnapshot == null && this.graph.getNodeType(node) == Node.Type.SNP && this.graph.outdegree(node) == 0) { System.err.println("Found empty snapshot: " + node); this.emptySnapshot = node; } return node.equals(this.emptySnapshot); } private Boolean shouldVisit(Long node) { Node.Type nt = this.graph.getNodeType(node); if (nt != Node.Type.REV && nt != Node.Type.REL && nt != Node.Type.SNP && nt != Node.Type.ORI) { return false; } if (this.nodeIsEmptySnapshot(node)) return false; return true; } private ArrayList findPath(Long src, Long dst) { HashSet visited = new HashSet<>(); Queue queue = new ArrayDeque<>(); Map parentNode = new HashMap<>(); queue.add(src); visited.add(src); while (!queue.isEmpty()) { long currentNode = queue.poll(); final LazyLongIterator iterator = graph.successors(currentNode); long succ; while ((succ = iterator.nextLong()) != -1) { if (!shouldVisit(succ) || visited.contains(succ)) continue; visited.add(succ); queue.add(succ); parentNode.put(succ, currentNode); if (succ == dst) { ArrayList path = new ArrayList<>(); long n = dst; while (n != src) { path.add(n); n = parentNode.get(n); } path.add(src); Collections.reverse(path); return path; } } } return null; } public static void main(String[] args) { JSAPResult config = parse_args(args); String graphPath = config.getString("graphPath"); FindPath fpath = new FindPath(); try { fpath.load_graph(graphPath); } catch (IOException e) { System.out.println("Could not load graph: " + e); System.exit(2); } Scanner input = new Scanner(System.in); while (input.hasNextLong()) { long lhsNode = input.nextLong(); long rhsNode = input.nextLong(); ArrayList path = fpath.findPath(lhsNode, rhsNode); if (path != null) { for (Long n : path) { System.out.format("%d ", n); } System.out.println(); } else { System.out.println("null"); } } } } diff --git a/java/src/main/java/org/softwareheritage/graph/experiments/forks/ForkCC.java b/java/src/main/java/org/softwareheritage/graph/experiments/forks/ForkCC.java index 8dddf1d..714df2e 100644 --- a/java/src/main/java/org/softwareheritage/graph/experiments/forks/ForkCC.java +++ b/java/src/main/java/org/softwareheritage/graph/experiments/forks/ForkCC.java @@ -1,249 +1,249 @@ package org.softwareheritage.graph.experiments.forks; import com.google.common.primitives.Longs; import com.martiansoftware.jsap.*; import it.unimi.dsi.big.webgraph.LazyLongIterator; import it.unimi.dsi.bits.LongArrayBitVector; import it.unimi.dsi.fastutil.Arrays; import it.unimi.dsi.io.ByteDiskQueue; import it.unimi.dsi.logging.ProgressLogger; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.Node; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.util.*; public class ForkCC { public Boolean includeRootDir; private Graph graph; private Long emptySnapshot; private LongArrayBitVector visited; private LongArrayBitVector whitelist; private static JSAPResult parse_args(String[] args) { JSAPResult config = null; try { SimpleJSAP jsap = new SimpleJSAP(ForkCC.class.getName(), "", new Parameter[]{ new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'g', "graph", "Basename of the compressed graph"), new FlaggedOption("whitelistPath", JSAP.STRING_PARSER, null, JSAP.NOT_REQUIRED, 't', "whitelist", "Whitelist of origins"), new FlaggedOption("includeRootDir", JSAP.BOOLEAN_PARSER, "false", JSAP.NOT_REQUIRED, 'R', "includerootdir", "Include root directory (default: false)"), new FlaggedOption("outdir", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'o', "outdir", "Directory where to put the results"),}); config = jsap.parse(args); if (jsap.messagePrinted()) { System.exit(1); } } catch (JSAPException e) { e.printStackTrace(); } return config; } private static void printDistribution(ArrayList> components) { TreeMap distribution = new TreeMap<>(); for (ArrayList component : components) { distribution.merge((long) component.size(), 1L, Long::sum); } for (Map.Entry entry : distribution.entrySet()) { System.out.format("%d %d\n", entry.getKey(), entry.getValue()); } } private static void printLargestComponent(ArrayList> components) { int indexLargest = 0; for (int i = 1; i < components.size(); ++i) { if (components.get(i).size() > components.get(indexLargest).size()) indexLargest = i; } ArrayList component = components.get(indexLargest); for (Long node : component) { System.out.println(node); } } private void load_graph(String graphBasename) throws IOException { System.err.println("Loading graph " + graphBasename + " ..."); - this.graph = new Graph(graphBasename).symmetrize(); + this.graph = Graph.loadMapped(graphBasename).symmetrize(); System.err.println("Graph loaded."); this.emptySnapshot = null; this.whitelist = null; this.visited = null; this.includeRootDir = null; } private boolean nodeIsEmptySnapshot(Long node) { if (this.emptySnapshot == null && this.graph.getNodeType(node) == Node.Type.SNP && this.graph.outdegree(node) == 0) { System.err.println("Found empty snapshot: " + node); this.emptySnapshot = node; } return node.equals(this.emptySnapshot); } private Boolean shouldVisit(Long node) { Node.Type nt = this.graph.getNodeType(node); if (nt == Node.Type.CNT) { return false; } if (nt == Node.Type.DIR && !includeRootDir) return false; if (this.nodeIsEmptySnapshot(node)) return false; if (visited.getBoolean(node)) return false; return true; } private ArrayList> compute(ProgressLogger pl) throws IOException { final long n = graph.numNodes(); // Allow enough memory to behave like in-memory queue int bufferSize = (int) Math.min(Arrays.MAX_ARRAY_SIZE & ~0x7, 8L * n); // Use a disk based queue to store BFS frontier final File queueFile = File.createTempFile(ForkCC.class.getSimpleName(), "queue"); final ByteDiskQueue queue = ByteDiskQueue.createNew(queueFile, bufferSize, true); final byte[] byteBuf = new byte[Long.BYTES]; // WARNING: no 64-bit version of this data-structure, but it can support // indices up to 2^37 visited = LongArrayBitVector.ofLength(n); pl.expectedUpdates = n; pl.itemsName = "nodes"; pl.start("Starting connected components visit..."); ArrayList> components = new ArrayList<>(); for (long i = 0; i < n; i++) { if (!shouldVisit(i) || this.graph.getNodeType(i) == Node.Type.DIR) continue; ArrayList component = new ArrayList<>(); queue.enqueue(Longs.toByteArray(i)); visited.set(i); while (!queue.isEmpty()) { queue.dequeue(byteBuf); final long currentNode = Longs.fromByteArray(byteBuf); Node.Type cur_nt = this.graph.getNodeType(currentNode); if (cur_nt == Node.Type.ORI && (this.whitelist == null || this.whitelist.getBoolean(currentNode))) { // TODO: add a check that the origin has >=1 non-empty snapshot component.add(currentNode); } final LazyLongIterator iterator = graph.successors(currentNode); long succ; while ((succ = iterator.nextLong()) != -1) { if (!shouldVisit(succ)) continue; if (this.graph.getNodeType(succ) == Node.Type.DIR && cur_nt != Node.Type.REV) continue; visited.set(succ); queue.enqueue(Longs.toByteArray(succ)); } pl.update(); } if (component.size() > 0) { components.add(component); } } pl.done(); queue.close(); return components; } private static void printDistribution(ArrayList> components, Formatter out) { TreeMap distribution = new TreeMap<>(); for (ArrayList component : components) { distribution.merge((long) component.size(), 1L, Long::sum); } for (Map.Entry entry : distribution.entrySet()) { out.format("%d %d\n", entry.getKey(), entry.getValue()); } } private static void printLargestComponent(ArrayList> components, Formatter out) { int indexLargest = 0; for (int i = 1; i < components.size(); ++i) { if (components.get(i).size() > components.get(indexLargest).size()) indexLargest = i; } ArrayList component = components.get(indexLargest); for (Long node : component) { out.format("%d\n", node); } } private static void printAllComponents(ArrayList> components, Formatter out) { for (int i = 1; i < components.size(); ++i) { ArrayList component = components.get(i); for (Long node : component) { out.format("%d ", node); } out.format("\n"); } } private void parseWhitelist(String path) { System.err.println("Loading whitelist " + path + " ..."); this.whitelist = LongArrayBitVector.ofLength(this.graph.numNodes()); Scanner scanner; try { scanner = new Scanner(new File(path)); while (scanner.hasNextLong()) { whitelist.set(scanner.nextLong()); } System.err.println("Whitelist loaded."); } catch (FileNotFoundException e) { e.printStackTrace(); } } public static void main(String[] args) { JSAPResult config = parse_args(args); String graphPath = config.getString("graphPath"); String whitelistPath = config.getString("whitelistPath"); boolean includeRootDir = config.getBoolean("includeRootDir"); String outdirPath = config.getString("outdir"); ForkCC forkCc = new ForkCC(); try { forkCc.load_graph(graphPath); forkCc.includeRootDir = includeRootDir; } catch (IOException e) { System.out.println("Could not load graph: " + e); System.exit(2); } if (whitelistPath != null) { forkCc.parseWhitelist(whitelistPath); } ProgressLogger logger = new ProgressLogger(); // noinspection ResultOfMethodCallIgnored new File(outdirPath).mkdirs(); try { ArrayList> components = forkCc.compute(logger); printDistribution(components, new Formatter(outdirPath + "/distribution.txt")); printLargestComponent(components, new Formatter(outdirPath + "/largest_clique.txt")); printAllComponents(components, new Formatter(outdirPath + "/all_cliques.txt")); } catch (IOException e) { e.printStackTrace(); } logger.done(); } } diff --git a/java/src/main/java/org/softwareheritage/graph/experiments/forks/ForkCliques.java b/java/src/main/java/org/softwareheritage/graph/experiments/forks/ForkCliques.java index 8e6da02..4d749bd 100644 --- a/java/src/main/java/org/softwareheritage/graph/experiments/forks/ForkCliques.java +++ b/java/src/main/java/org/softwareheritage/graph/experiments/forks/ForkCliques.java @@ -1,223 +1,223 @@ package org.softwareheritage.graph.experiments.forks; import ch.qos.logback.classic.Level; import ch.qos.logback.classic.Logger; import com.google.common.primitives.Longs; import com.martiansoftware.jsap.*; import it.unimi.dsi.big.webgraph.LazyLongIterator; import it.unimi.dsi.bits.LongArrayBitVector; import it.unimi.dsi.logging.ProgressLogger; import org.slf4j.LoggerFactory; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.Node; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.util.*; public class ForkCliques { private Graph graph; private LongArrayBitVector whitelist; private void load_graph(String graphBasename) throws IOException { System.err.println("Loading graph " + graphBasename + " ..."); - this.graph = new Graph(graphBasename); + this.graph = Graph.loadMapped(graphBasename); System.err.println("Graph loaded."); this.whitelist = null; } private static JSAPResult parse_args(String[] args) { JSAPResult config = null; try { SimpleJSAP jsap = new SimpleJSAP(ForkCliques.class.getName(), "", new Parameter[]{ new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'g', "graph", "Basename of the compressed graph"), new FlaggedOption("whitelistPath", JSAP.STRING_PARSER, null, JSAP.NOT_REQUIRED, 't', "whitelist", "Whitelist of origins"), new FlaggedOption("outdir", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'o', "outdir", "Directory where to put the results"),}); config = jsap.parse(args); if (jsap.messagePrinted()) { System.exit(1); } } catch (JSAPException e) { e.printStackTrace(); } return config; } private ArrayList dfsAt(Long baseNode) { ArrayList res = new ArrayList<>(); final Deque stack = new ArrayDeque<>(); HashSet seen = new HashSet<>(); stack.push(baseNode); while (!stack.isEmpty()) { final Long currentNode = stack.pop(); final LazyLongIterator iterator = this.graph.predecessors(currentNode); long succ; while ((succ = iterator.nextLong()) != -1) { if (!seen.contains(succ)) { Node.Type nt = this.graph.getNodeType(succ); if (nt == Node.Type.DIR || nt == Node.Type.CNT) continue; if (nt == Node.Type.ORI && (this.whitelist == null || this.whitelist.getBoolean(succ))) { res.add(succ); } else { stack.push(succ); seen.add(succ); } } } } Collections.sort(res); return res; } private boolean isBaseRevision(Long node) { if (this.graph.getNodeType(node) != Node.Type.REV) return false; final LazyLongIterator iterator = this.graph.successors(node); long succ; while ((succ = iterator.nextLong()) != -1) { if (this.graph.getNodeType(succ) == Node.Type.REV) return false; } return true; } static private String fingerprint(ArrayList cluster) { MessageDigest digest; try { digest = MessageDigest.getInstance("SHA-256"); } catch (NoSuchAlgorithmException e) { e.printStackTrace(); return null; } for (Long n : cluster) digest.update(Longs.toByteArray(n)); return new String(digest.digest()); } private ArrayList> compute(ProgressLogger pl) { final long n = this.graph.numNodes(); HashSet fingerprints = new HashSet<>(); ArrayList> clusters = new ArrayList<>(); pl.expectedUpdates = n; pl.itemsName = "nodes"; pl.start("Starting topological sort..."); for (long i = 0; i < n; i++) { if (isBaseRevision(i)) { ArrayList currentCluster = dfsAt(i); String clusterFp = fingerprint(currentCluster); if (!fingerprints.contains(clusterFp)) { fingerprints.add(clusterFp); clusters.add(currentCluster); } } pl.update(); } pl.done(); return clusters; } private static void printDistribution(ArrayList> components, Formatter out) { TreeMap distribution = new TreeMap<>(); for (ArrayList component : components) { distribution.merge((long) component.size(), 1L, Long::sum); } for (Map.Entry entry : distribution.entrySet()) { out.format("%d %d\n", entry.getKey(), entry.getValue()); } } private static void printLargestComponent(ArrayList> components, Formatter out) { int indexLargest = 0; for (int i = 1; i < components.size(); ++i) { if (components.get(i).size() > components.get(indexLargest).size()) indexLargest = i; } ArrayList component = components.get(indexLargest); for (Long node : component) { out.format("%d\n", node); } } private static void printAllComponents(ArrayList> components, Formatter out) { for (int i = 1; i < components.size(); ++i) { ArrayList component = components.get(i); for (Long node : component) { out.format("%d ", node); } out.format("\n"); } } private void parseWhitelist(String path) { System.err.println("Loading whitelist " + path + " ..."); this.whitelist = LongArrayBitVector.ofLength(this.graph.numNodes()); Scanner scanner; try { scanner = new Scanner(new File(path)); while (scanner.hasNextLong()) { whitelist.set(scanner.nextLong()); } System.err.println("Whitelist loaded."); } catch (FileNotFoundException e) { e.printStackTrace(); } } public static void main(String[] args) { JSAPResult config = parse_args(args); String graphPath = config.getString("graphPath"); String whitelistPath = config.getString("whitelistPath"); String outdirPath = config.getString("outdir"); ForkCliques forkCliques = new ForkCliques(); try { forkCliques.load_graph(graphPath); } catch (IOException e) { System.out.println("Could not load graph: " + e); System.exit(2); } if (whitelistPath != null) { forkCliques.parseWhitelist(whitelistPath); } Logger rootLogger = (ch.qos.logback.classic.Logger) LoggerFactory.getLogger(org.slf4j.Logger.ROOT_LOGGER_NAME); rootLogger.setLevel(Level.DEBUG); ProgressLogger logger = new ProgressLogger(rootLogger); ArrayList> components = forkCliques.compute(logger); // noinspection ResultOfMethodCallIgnored new File(outdirPath).mkdirs(); try { printDistribution(components, new Formatter(outdirPath + "/distribution.txt")); printLargestComponent(components, new Formatter(outdirPath + "/largest_clique.txt")); printAllComponents(components, new Formatter(outdirPath + "/all_cliques.txt")); } catch (FileNotFoundException e) { e.printStackTrace(); } logger.done(); } } diff --git a/java/src/main/java/org/softwareheritage/graph/experiments/forks/ListEmptyOrigins.java b/java/src/main/java/org/softwareheritage/graph/experiments/forks/ListEmptyOrigins.java index d99dc2c..332a908 100644 --- a/java/src/main/java/org/softwareheritage/graph/experiments/forks/ListEmptyOrigins.java +++ b/java/src/main/java/org/softwareheritage/graph/experiments/forks/ListEmptyOrigins.java @@ -1,88 +1,88 @@ package org.softwareheritage.graph.experiments.forks; import com.martiansoftware.jsap.*; import it.unimi.dsi.big.webgraph.ImmutableGraph; import it.unimi.dsi.big.webgraph.LazyLongIterator; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.Node; import java.io.IOException; import java.util.ArrayList; public class ListEmptyOrigins { private Graph graph; private Long emptySnapshot; private static JSAPResult parse_args(String[] args) { JSAPResult config = null; try { SimpleJSAP jsap = new SimpleJSAP(ListEmptyOrigins.class.getName(), "", new Parameter[]{new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'g', "graph", "Basename of the compressed graph"),}); config = jsap.parse(args); if (jsap.messagePrinted()) { System.exit(1); } } catch (JSAPException e) { e.printStackTrace(); } return config; } public static void main(String[] args) { JSAPResult config = parse_args(args); String graphPath = config.getString("graphPath"); ListEmptyOrigins leo = new ListEmptyOrigins(); try { leo.load_graph(graphPath); } catch (IOException e) { System.out.println("Could not load graph: " + e); System.exit(2); } ArrayList badlist = leo.compute(leo.graph); for (Long bad : badlist) { System.out.println(bad); } } private void load_graph(String graphBasename) throws IOException { System.err.println("Loading graph " + graphBasename + " ..."); - this.graph = new Graph(graphBasename); + this.graph = Graph.loadMapped(graphBasename); System.err.println("Graph loaded."); this.emptySnapshot = null; } private boolean nodeIsEmptySnapshot(Long node) { System.err.println(this.graph.getNodeType(node) + " " + this.graph.outdegree(node) + " " + node); if (this.emptySnapshot == null && this.graph.getNodeType(node) == Node.Type.SNP && this.graph.outdegree(node) == 0) { System.err.println("Found empty snapshot: " + node); this.emptySnapshot = node; } return node.equals(this.emptySnapshot); } private ArrayList compute(ImmutableGraph graph) { final long n = graph.numNodes(); ArrayList bad = new ArrayList<>(); for (long i = 0; i < n; i++) { Node.Type nt = this.graph.getNodeType(i); if (nt != Node.Type.ORI) continue; final LazyLongIterator iterator = graph.successors(i); long succ; boolean found = false; while ((succ = iterator.nextLong()) != -1) { if (this.graph.outdegree(succ) > 0) { found = true; } } if (!found) bad.add(i); } return bad; } } diff --git a/java/src/main/java/org/softwareheritage/graph/experiments/multiplicationfactor/GenDistribution.java b/java/src/main/java/org/softwareheritage/graph/experiments/multiplicationfactor/GenDistribution.java index 1681bd5..89fd675 100644 --- a/java/src/main/java/org/softwareheritage/graph/experiments/multiplicationfactor/GenDistribution.java +++ b/java/src/main/java/org/softwareheritage/graph/experiments/multiplicationfactor/GenDistribution.java @@ -1,130 +1,130 @@ package org.softwareheritage.graph.experiments.multiplicationfactor; import com.martiansoftware.jsap.*; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.Node; import org.softwareheritage.graph.Traversal; import org.softwareheritage.graph.benchmark.utils.Timing; import java.io.IOException; import java.util.Scanner; import java.util.concurrent.ArrayBlockingQueue; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; public class GenDistribution { private Graph graph; private static JSAPResult parse_args(String[] args) { JSAPResult config = null; try { SimpleJSAP jsap = new SimpleJSAP(GenDistribution.class.getName(), "", new Parameter[]{ new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'g', "graph", "Basename of the compressed graph"), new FlaggedOption("srcType", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 's', "srctype", "Source node type"), new FlaggedOption("dstType", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'd', "dsttype", "Destination node type"), new FlaggedOption("edgesFmt", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'e', "edges", "Edges constraints"), new FlaggedOption("numThreads", JSAP.INTEGER_PARSER, "128", JSAP.NOT_REQUIRED, 't', "numthreads", "Number of threads"),}); config = jsap.parse(args); if (jsap.messagePrinted()) { System.exit(1); } } catch (JSAPException e) { e.printStackTrace(); } return config; } public static void main(String[] args) { JSAPResult config = parse_args(args); String graphPath = config.getString("graphPath"); Node.Type srcType = Node.Type.fromStr(config.getString("srcType")); Node.Type dstType = Node.Type.fromStr(config.getString("dstType")); String edgesFmt = config.getString("edgesFmt"); int numThreads = config.getInt("numThreads"); GenDistribution tp = new GenDistribution(); try { tp.load_graph(graphPath); } catch (IOException e) { System.out.println("Could not load graph: " + e); System.exit(2); } final long END_OF_QUEUE = -1L; ArrayBlockingQueue queue = new ArrayBlockingQueue<>(numThreads); ExecutorService service = Executors.newFixedThreadPool(numThreads + 1); service.submit(() -> { try { Scanner input = new Scanner(System.in); while (input.hasNextLong()) { long node = input.nextLong(); if (tp.graph.getNodeType(node) == srcType) { queue.put(node); } } } catch (InterruptedException e) { e.printStackTrace(); } finally { for (int i = 0; i < numThreads; ++i) { try { queue.put(END_OF_QUEUE); } catch (InterruptedException e) { e.printStackTrace(); } } } }); for (int i = 0; i < numThreads; ++i) { service.submit(() -> { Graph thread_graph = tp.graph.copy(); long startTime; double totalTime; while (true) { Long node = null; try { node = queue.take(); } catch (InterruptedException e) { e.printStackTrace(); } if (node == null || node == END_OF_QUEUE) { return; } Traversal t = new Traversal(thread_graph, "backward", edgesFmt); int[] count = {0}; startTime = Timing.start(); t.visitNodesVisitor(node, (curnode) -> { if (tp.graph.getNodeType(curnode) == dstType) { count[0]++; } }); totalTime = Timing.stop(startTime); System.out.format("%d %d %d %d %f\n", node, count[0], t.getNbNodesAccessed(), t.getNbEdgesAccessed(), totalTime); } }); } service.shutdown(); } private void load_graph(String graphBasename) throws IOException { System.err.println("Loading graph " + graphBasename + " ..."); - this.graph = new Graph(graphBasename); + this.graph = Graph.loadMapped(graphBasename); System.err.println("Graph loaded."); } } diff --git a/java/src/main/java/org/softwareheritage/graph/experiments/topology/AveragePaths.java b/java/src/main/java/org/softwareheritage/graph/experiments/topology/AveragePaths.java new file mode 100644 index 0000000..ad7eadf --- /dev/null +++ b/java/src/main/java/org/softwareheritage/graph/experiments/topology/AveragePaths.java @@ -0,0 +1,188 @@ +package org.softwareheritage.graph.experiments.topology; + +import com.martiansoftware.jsap.*; +import it.unimi.dsi.Util; +import it.unimi.dsi.big.webgraph.LazyLongIterator; +import it.unimi.dsi.fastutil.BigArrays; +import it.unimi.dsi.fastutil.longs.LongBigArrays; +import it.unimi.dsi.logging.ProgressLogger; +import it.unimi.dsi.util.XoRoShiRo128PlusRandom; +import org.softwareheritage.graph.*; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.io.PrintWriter; +import java.util.*; +import java.util.concurrent.*; + +public class AveragePaths { + private final Graph graph; + private final Subgraph subgraph; + private final ConcurrentHashMap result; + private final String outdir; + + public AveragePaths(String graphBasename, String allowedNodes, String outdir) throws IOException { + System.err.println("Loading graph " + graphBasename + " ..."); + this.graph = Graph.loadMapped(graphBasename); + this.subgraph = new Subgraph(this.graph, new AllowedNodes(allowedNodes)); + this.outdir = outdir; + System.err.println("Graph loaded."); + + result = new ConcurrentHashMap<>(); + } + + private static JSAPResult parse_args(String[] args) { + JSAPResult config = null; + try { + SimpleJSAP jsap = new SimpleJSAP(AveragePaths.class.getName(), "", + new Parameter[]{ + new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'g', + "graph", "Basename of the compressed graph"), + new FlaggedOption("nodeTypes", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 's', + "nodetypes", "Node type constraints"), + new FlaggedOption("outdir", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'o', + "outdir", "Directory where to put the results"), + new FlaggedOption("numThreads", JSAP.INTEGER_PARSER, "32", JSAP.NOT_REQUIRED, 't', + "numthreads", "Number of threads"),}); + + config = jsap.parse(args); + if (jsap.messagePrinted()) { + System.exit(1); + } + } catch (JSAPException e) { + e.printStackTrace(); + } + return config; + } + + private void run(int numThreads) throws InterruptedException { + final long END_OF_QUEUE = -1L; + + ArrayBlockingQueue queue = new ArrayBlockingQueue<>(numThreads); + ExecutorService service = Executors.newFixedThreadPool(numThreads + 1); + + service.submit(() -> { + try { + Graph thread_graph = graph.copy(); + Subgraph thread_subgraph = subgraph.copy(); + + long[][] randomPerm = Util.identity(thread_graph.numNodes()); + LongBigArrays.shuffle(randomPerm, new XoRoShiRo128PlusRandom()); + long n = thread_graph.numNodes(); + + ProgressLogger pl = new ProgressLogger(); + pl.expectedUpdates = n; + pl.itemsName = "nodes"; + pl.start("Filling processor queue..."); + + for (long j = 0; j < n; ++j) { + long node = BigArrays.get(randomPerm, j); + if (thread_subgraph.nodeExists(node) && thread_subgraph.outdegree(node) == 0) { + queue.put(node); + } + if (j % 10000 == 0) { + printResult(); + } + pl.update(); + } + } catch (Exception e) { + e.printStackTrace(); + } finally { + for (int i = 0; i < numThreads; ++i) { + try { + queue.put(END_OF_QUEUE); + } catch (InterruptedException e) { + e.printStackTrace(); + } + } + } + }); + + for (int i = 0; i < numThreads; ++i) { + service.submit(() -> { + try { + Subgraph thread_subgraph = subgraph.copy(); + while (true) { + Long node = null; + try { + node = queue.take(); + } catch (InterruptedException e) { + e.printStackTrace(); + } + if (node == null || node == END_OF_QUEUE) { + return; + } + + bfsAt(thread_subgraph, node); + } + } catch (Exception e) { + e.printStackTrace(); + } + }); + } + + service.shutdown(); + service.awaitTermination(365, TimeUnit.DAYS); + } + + private void bfsAt(Subgraph graph, long srcNodeId) { + ArrayDeque queue = new ArrayDeque<>(); + HashSet visited = new HashSet<>(); + + long FRONTIER_MARKER = -1; + + queue.addLast(srcNodeId); + visited.add(srcNodeId); + + long distance = 0; + queue.addLast(FRONTIER_MARKER); + + while (!queue.isEmpty()) { + long currentNodeId = queue.removeFirst(); + // System.err.println("curr: " + currentNodeId); + if (currentNodeId == FRONTIER_MARKER) { + if (queue.isEmpty()) // avoid infinite loops + break; + ++distance; + queue.addLast(FRONTIER_MARKER); + continue; + } + if (graph.indegree(currentNodeId) == 0) { + result.merge(distance, 1L, Long::sum); + } + + LazyLongIterator it = graph.predecessors(currentNodeId); + for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) { + if (!visited.contains(neighborNodeId)) { + queue.addLast(neighborNodeId); + visited.add(neighborNodeId); + } + } + } + } + + public void printResult() throws IOException { + new File(outdir).mkdirs(); + + PrintWriter f = new PrintWriter(new FileWriter(outdir + "/distribution.txt")); + TreeMap sortedDistribution = new TreeMap<>(result); + for (Map.Entry entry : sortedDistribution.entrySet()) { + f.println(entry.getKey() + " " + entry.getValue()); + } + f.close(); + } + + public static void main(String[] args) throws IOException, InterruptedException { + JSAPResult config = parse_args(args); + + String graphPath = config.getString("graphPath"); + String outdir = config.getString("outdir"); + String allowedNodes = config.getString("nodeTypes"); + int numThreads = config.getInt("numThreads"); + + AveragePaths tp = new AveragePaths(graphPath, allowedNodes, outdir); + tp.run(numThreads); + tp.printResult(); + } +} diff --git a/java/src/main/java/org/softwareheritage/graph/experiments/topology/ClusteringCoefficient.java b/java/src/main/java/org/softwareheritage/graph/experiments/topology/ClusteringCoefficient.java index 9064ef2..2e6fa0c 100644 --- a/java/src/main/java/org/softwareheritage/graph/experiments/topology/ClusteringCoefficient.java +++ b/java/src/main/java/org/softwareheritage/graph/experiments/topology/ClusteringCoefficient.java @@ -1,189 +1,325 @@ package org.softwareheritage.graph.experiments.topology; -import ch.qos.logback.classic.Level; -import ch.qos.logback.classic.Logger; import com.martiansoftware.jsap.*; +import it.unimi.dsi.Util; import it.unimi.dsi.big.webgraph.ImmutableGraph; import it.unimi.dsi.big.webgraph.LazyLongIterator; +import it.unimi.dsi.fastutil.BigArrays; +import it.unimi.dsi.fastutil.longs.LongBigArrays; import it.unimi.dsi.logging.ProgressLogger; -import org.slf4j.LoggerFactory; +import it.unimi.dsi.util.XoRoShiRo128PlusRandom; import org.softwareheritage.graph.Graph; +import org.softwareheritage.graph.Node; -import java.io.File; -import java.io.FileNotFoundException; -import java.io.IOException; +import java.io.*; import java.util.*; -import java.util.concurrent.ThreadLocalRandom; +import java.util.concurrent.*; public class ClusteringCoefficient { - private Graph graph; + private final Graph graph; + private final String outdirPath; + private final ConcurrentHashMap result_full; + private final ConcurrentHashMap result_dircnt; + private final ConcurrentHashMap result_rev; + private final ConcurrentHashMap result_revrel; + private final ConcurrentHashMap result_orisnp; - private void load_graph(String graphBasename) throws IOException { + public ClusteringCoefficient(String graphBasename, String outdirPath) throws IOException { + this.outdirPath = outdirPath; System.err.println("Loading graph " + graphBasename + " ..."); - this.graph = new Graph(graphBasename).symmetrize(); + Graph directedGraph = Graph.loadMapped(graphBasename); + this.graph = directedGraph.symmetrize(); System.err.println("Graph loaded."); + + result_full = new ConcurrentHashMap<>(); + result_dircnt = new ConcurrentHashMap<>(); + result_rev = new ConcurrentHashMap<>(); + result_revrel = new ConcurrentHashMap<>(); + result_orisnp = new ConcurrentHashMap<>(); } private static JSAPResult parse_args(String[] args) { JSAPResult config = null; try { - SimpleJSAP jsap = new SimpleJSAP(ClusteringCoefficient.class.getName(), "", + SimpleJSAP jsap = new SimpleJSAP(AveragePaths.class.getName(), "", new Parameter[]{ new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'g', "graph", "Basename of the compressed graph"), new FlaggedOption("outdir", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'o', - "outdir", "Directory where to put the results"),}); + "outdir", "Directory where to put the results"), + new FlaggedOption("numThreads", JSAP.INTEGER_PARSER, "32", JSAP.NOT_REQUIRED, 't', + "numthreads", "Number of threads"),}); config = jsap.parse(args); if (jsap.messagePrinted()) { System.exit(1); } } catch (JSAPException e) { e.printStackTrace(); } return config; } + private void run(int numThreads) throws InterruptedException { + final long END_OF_QUEUE = -1L; + + ArrayBlockingQueue queue = new ArrayBlockingQueue<>(numThreads); + ExecutorService service = Executors.newFixedThreadPool(numThreads + 1); + + service.submit(() -> { + try { + Graph thread_graph = graph.copy(); + + long[][] randomPerm = Util.identity(thread_graph.numNodes()); + LongBigArrays.shuffle(randomPerm, new XoRoShiRo128PlusRandom()); + long n = thread_graph.numNodes(); + + ProgressLogger pl = new ProgressLogger(); + pl.expectedUpdates = n; + pl.itemsName = "nodes"; + pl.start("Filling processor queue..."); + + for (long j = 0; j < n; ++j) { + long node = BigArrays.get(randomPerm, j); + queue.put(node); + if (j % 10000 == 0) { + printResult(); + } + pl.update(); + } + } catch (Exception e) { + e.printStackTrace(); + } finally { + for (int i = 0; i < numThreads; ++i) { + try { + queue.put(END_OF_QUEUE); + } catch (InterruptedException e) { + e.printStackTrace(); + } + } + } + }); + + for (int i = 0; i < numThreads; ++i) { + service.submit(() -> { + try { + Graph thread_graph = graph.copy(); + while (true) { + Long node = null; + try { + node = queue.take(); + } catch (InterruptedException e) { + e.printStackTrace(); + } + if (node == null || node == END_OF_QUEUE) { + return; + } + + computeAt(thread_graph, node); + } + } catch (Exception e) { + e.printStackTrace(); + } + }); + } + + service.shutdown(); + service.awaitTermination(365, TimeUnit.DAYS); + } + + private void computeAt(Graph graph, long node) { + long d = graph.outdegree(node); + if (d < 2) { + return; + } + Node.Type nodeType = graph.getNodeType(node); + + HashSet neighborhood = new HashSet<>(); + long succ; + final LazyLongIterator iterator = graph.successors(node); + while ((succ = iterator.nextLong()) != -1) { + neighborhood.add(succ); + } + + long triangles_full = 0; + long triangles_dircnt = 0; + long triangles_rev = 0; + long triangles_revrel = 0; + long triangles_orisnp = 0; + + for (Long neighbor : neighborhood) { + Node.Type neighborNodeType = graph.getNodeType(neighbor); + final LazyLongIterator it = graph.successors(neighbor); + while ((succ = it.nextLong()) != -1) { + if (neighborhood.contains(succ)) { + Node.Type succNodeType = graph.getNodeType(succ); + triangles_full++; + if ((nodeType == Node.Type.DIR || nodeType == Node.Type.CNT) + && (neighborNodeType == Node.Type.DIR || neighborNodeType == Node.Type.CNT) + && (succNodeType == Node.Type.DIR || succNodeType == Node.Type.CNT)) { + triangles_dircnt++; + } else if ((nodeType == Node.Type.REV || nodeType == Node.Type.REL) + && (neighborNodeType == Node.Type.REV || neighborNodeType == Node.Type.REL) + && (succNodeType == Node.Type.REV || succNodeType == Node.Type.REL)) { + triangles_revrel++; + if (nodeType == Node.Type.REV && neighborNodeType == Node.Type.REV + && succNodeType == Node.Type.REV) + triangles_rev++; + } else if ((nodeType == Node.Type.ORI || nodeType == Node.Type.SNP) + && (neighborNodeType == Node.Type.ORI || neighborNodeType == Node.Type.SNP) + && (succNodeType == Node.Type.ORI || succNodeType == Node.Type.SNP)) { + triangles_orisnp++; + } + } + } + } + + result_full.merge(triangles_full, 1L, Long::sum); + result_dircnt.merge(triangles_dircnt, 1L, Long::sum); + result_rev.merge(triangles_rev, 1L, Long::sum); + result_revrel.merge(triangles_revrel, 1L, Long::sum); + result_orisnp.merge(triangles_orisnp, 1L, Long::sum); + } + + public void printSortedDistribution(String distribPath, Map distrib) throws IOException { + PrintWriter f = new PrintWriter(new FileWriter(distribPath)); + TreeMap sortedDistribution = new TreeMap<>(distrib); + for (Map.Entry entry : sortedDistribution.entrySet()) { + f.println(entry.getKey() + " " + entry.getValue()); + } + f.close(); + } + + public void printResult() throws IOException { + new File(outdirPath).mkdirs(); + + printSortedDistribution(outdirPath + "/distribution-full.txt", result_full); + printSortedDistribution(outdirPath + "/distribution-dircnt.txt", result_dircnt); + printSortedDistribution(outdirPath + "/distribution-rev.txt", result_rev); + printSortedDistribution(outdirPath + "/distribution-relrev.txt", result_revrel); + printSortedDistribution(outdirPath + "/distribution-orisnp.txt", result_orisnp); + } + + public static void main(String[] args) throws IOException, InterruptedException { + JSAPResult config = parse_args(args); + + String graphPath = config.getString("graphPath"); + String outdir = config.getString("outdir"); + int numThreads = config.getInt("numThreads"); + + ClusteringCoefficient cc = new ClusteringCoefficient(graphPath, outdir); + cc.run(numThreads); + cc.printResult(); + } + + // Old unused functions + private long oppositeEdges(ImmutableGraph graph, long node) { - System.out.format("%d\n", node); HashSet neighborhood = new HashSet<>(); long succ; final LazyLongIterator iterator = graph.successors(node); while ((succ = iterator.nextLong()) != -1) { - System.out.format("%d neighbor add %d\n", node, succ); neighborhood.add(succ); } long closed_triplets = 0; for (Long neighbor : neighborhood) { - System.out.format("%d neighbor visit %d\n", node, neighbor); final LazyLongIterator it = graph.successors(neighbor); while ((succ = it.nextLong()) != -1) { - System.out.format("%d neighbor visit %d succ %d\n", node, neighbor, succ); if (neighborhood.contains(succ)) { closed_triplets++; } } } return closed_triplets; } public void compute(ProgressLogger pl, Formatter out_local, Formatter out_global) { final long n = this.graph.numNodes(); pl.expectedUpdates = n; pl.itemsName = "nodes"; long nodes_d2 = 0; double cum_lcc = 0; double cum_lcc_c0 = 0; double cum_lcc_c1 = 0; HashMap distribution = new HashMap<>(); for (long node = 0; node < n; node++) { long d = graph.outdegree(node); if (d >= 2) { double t = (d * (d - 1)); double m = oppositeEdges(graph, node); double lcc = m / t; distribution.merge(lcc, 1L, Long::sum); cum_lcc += lcc; nodes_d2++; } else { cum_lcc_c1++; } pl.update(); } pl.done(); for (Map.Entry entry : distribution.entrySet()) { out_local.format("%f %d\n", entry.getKey(), entry.getValue()); } double gC = cum_lcc / nodes_d2; double gC0 = cum_lcc_c0 / n; double gC1 = cum_lcc_c1 / n; out_global.format("C: %f\n", gC); out_global.format("C0: %f\n", gC0); out_global.format("C1: %f\n", gC1); } public void compute_approx(Formatter out_global) { final long n = this.graph.numNodes(); long trials = 0; long triangles = 0; while (true) { long node = ThreadLocalRandom.current().nextLong(0, n); long d = graph.outdegree(node); if (d >= 2) { Long u = null; Long v = null; long u_idx = ThreadLocalRandom.current().nextLong(0, d); long v_idx = ThreadLocalRandom.current().nextLong(0, d - 1); if (v_idx >= u_idx) { v_idx++; } long succ; final LazyLongIterator node_iterator = graph.successors(node); for (long succ_idx = 0; (succ = node_iterator.nextLong()) != -1; succ_idx++) { if (succ_idx == u_idx) { u = succ; } if (succ_idx == v_idx) { v = succ; } } final LazyLongIterator u_iterator = graph.successors(u); while ((succ = u_iterator.nextLong()) != -1) { if (succ == v) triangles++; } } trials++; if (trials % 100 == 0 || true) { double gC = (double) triangles / (double) trials; out_global.format("C: %f (triangles: %d, trials: %d)\n", gC, triangles, trials); System.out.format("C: %f (triangles: %d, trials: %d)\n", gC, triangles, trials); } } } - - public static void main(String[] args) { - JSAPResult config = parse_args(args); - - String graphPath = config.getString("graphPath"); - String outdirPath = config.getString("outdir"); - - ClusteringCoefficient ccoef = new ClusteringCoefficient(); - try { - ccoef.load_graph(graphPath); - } catch (IOException e) { - System.out.println("Could not load graph: " + e); - System.exit(2); - } - - Logger rootLogger = (Logger) LoggerFactory.getLogger(org.slf4j.Logger.ROOT_LOGGER_NAME); - rootLogger.setLevel(Level.DEBUG); - - new File(outdirPath).mkdirs(); - - try { - ccoef.compute_approx(new Formatter(outdirPath + "/local.txt")); - /* - * ccoef.compute( symmetric, new ProgressLogger(rootLogger), new Formatter(outdirPath + - * "/local.txt"), new Formatter(outdirPath + "/global.txt") ); - */ - } catch (FileNotFoundException e) { - e.printStackTrace(); - } - } } diff --git a/java/src/main/java/org/softwareheritage/graph/experiments/topology/ConnectedComponents.java b/java/src/main/java/org/softwareheritage/graph/experiments/topology/ConnectedComponents.java index ae573fd..43fd11b 100644 --- a/java/src/main/java/org/softwareheritage/graph/experiments/topology/ConnectedComponents.java +++ b/java/src/main/java/org/softwareheritage/graph/experiments/topology/ConnectedComponents.java @@ -1,179 +1,193 @@ package org.softwareheritage.graph.experiments.topology; import com.google.common.primitives.Longs; import com.martiansoftware.jsap.*; import it.unimi.dsi.big.webgraph.LazyLongIterator; import it.unimi.dsi.bits.LongArrayBitVector; import it.unimi.dsi.fastutil.Arrays; import it.unimi.dsi.io.ByteDiskQueue; import it.unimi.dsi.logging.ProgressLogger; +import org.softwareheritage.graph.AllowedNodes; import org.softwareheritage.graph.Graph; +import org.softwareheritage.graph.Subgraph; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.io.PrintWriter; import java.util.*; public class ConnectedComponents { - private Graph graph; + private Subgraph graph; - private void load_graph(String graphBasename) throws IOException { + private void load_graph(String graphBasename, String nodeTypes) throws IOException { System.err.println("Loading graph " + graphBasename + " ..."); - this.graph = new Graph(graphBasename).symmetrize(); + var underlyingGraph = Graph.loadMapped(graphBasename); + var underlyingGraphSym = underlyingGraph.symmetrize(); + graph = new Subgraph(underlyingGraphSym, new AllowedNodes(nodeTypes)); System.err.println("Graph loaded."); } private static JSAPResult parse_args(String[] args) { JSAPResult config = null; try { SimpleJSAP jsap = new SimpleJSAP(ConnectedComponents.class.getName(), "", new Parameter[]{ new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'g', "graph", "Basename of the compressed graph"), new FlaggedOption("outdir", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'o', - "outdir", "Directory where to put the results"),}); + "outdir", "Directory where to put the results"), + new FlaggedOption("nodeTypes", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'n', + "nodetypes", "Allowed node types (comma-separated)"),}); config = jsap.parse(args); if (jsap.messagePrinted()) { System.exit(1); } } catch (JSAPException e) { e.printStackTrace(); } return config; } private HashMap /* ArrayList> */ compute(ProgressLogger pl) throws IOException { final long n = graph.numNodes(); + final long maxN = graph.maxNodeNumber(); // Allow enough memory to behave like in-memory queue - int bufferSize = (int) Math.min(Arrays.MAX_ARRAY_SIZE & ~0x7, 8L * n); + int bufferSize = (int) Math.min(Arrays.MAX_ARRAY_SIZE & ~0x7, 8L * maxN); // Use a disk based queue to store BFS frontier final File queueFile = File.createTempFile(ConnectedComponents.class.getSimpleName(), "queue"); final ByteDiskQueue queue = ByteDiskQueue.createNew(queueFile, bufferSize, true); final byte[] byteBuf = new byte[Long.BYTES]; // WARNING: no 64-bit version of this data-structure, but it can support // indices up to 2^37 - LongArrayBitVector visited = LongArrayBitVector.ofLength(n); + LongArrayBitVector visited = LongArrayBitVector.ofLength(maxN); pl.expectedUpdates = n; pl.itemsName = "nodes"; pl.start("Starting connected components visit..."); // ArrayList> components = new ArrayList<>(); HashMap componentDistribution = new HashMap<>(); - for (long i = 0; i < n; i++) { + var it = graph.nodeIterator(); + while (it.hasNext()) { + long i = it.nextLong(); + + if (visited.getBoolean(i)) + continue; + // ArrayList component = new ArrayList<>(); long componentNodes = 0; queue.enqueue(Longs.toByteArray(i)); visited.set(i); while (!queue.isEmpty()) { queue.dequeue(byteBuf); final long currentNode = Longs.fromByteArray(byteBuf); // component.add(currentNode); componentNodes += 1; final LazyLongIterator iterator = graph.successors(currentNode); long succ; while ((succ = iterator.nextLong()) != -1) { if (visited.getBoolean(succ)) continue; visited.set(succ); queue.enqueue(Longs.toByteArray(succ)); } pl.update(); } /* * if (component.size() > 0) { components.add(component); } */ if (componentNodes > 0) componentDistribution.merge(componentNodes, 1L, Long::sum); } pl.done(); // return components; return componentDistribution; } private static void printDistribution(ArrayList> components, Formatter out) { TreeMap distribution = new TreeMap<>(); for (ArrayList component : components) { distribution.merge((long) component.size(), 1L, Long::sum); } for (Map.Entry entry : distribution.entrySet()) { out.format("%d %d\n", entry.getKey(), entry.getValue()); } out.close(); } private static void printLargestComponent(ArrayList> components, Formatter out) { int indexLargest = 0; for (int i = 1; i < components.size(); ++i) { if (components.get(i).size() > components.get(indexLargest).size()) indexLargest = i; } ArrayList component = components.get(indexLargest); for (Long node : component) { out.format("%d\n", node); } out.close(); } private static void printAllComponents(ArrayList> components, Formatter out) { for (int i = 1; i < components.size(); ++i) { ArrayList component = components.get(i); for (Long node : component) { out.format("%d ", node); } out.format("\n"); } out.close(); } public static void main(String[] args) { JSAPResult config = parse_args(args); String graphPath = config.getString("graphPath"); String outdirPath = config.getString("outdir"); + String nodeTypes = config.getString("nodeTypes"); ConnectedComponents connectedComponents = new ConnectedComponents(); try { - connectedComponents.load_graph(graphPath); + connectedComponents.load_graph(graphPath, nodeTypes); } catch (IOException e) { System.out.println("Could not load graph: " + e); System.exit(2); } ProgressLogger logger = new ProgressLogger(); // noinspection ResultOfMethodCallIgnored new File(outdirPath).mkdirs(); try { // ArrayList> components = connectedComponents.compute(logger); // components.sort(Comparator.comparing(ArrayList::size).reversed()); // printDistribution(components, new Formatter(outdirPath + "/distribution.txt")); // printLargestComponent(components, new Formatter(outdirPath + "/largest_component.txt")); // printAllComponents(components, new Formatter(outdirPath + "/all_components.txt")); HashMap componentDistribution = connectedComponents.compute(logger); PrintWriter f = new PrintWriter(new FileWriter(outdirPath + "/distribution.txt")); TreeMap sortedDistribution = new TreeMap<>(componentDistribution); for (Map.Entry entry : sortedDistribution.entrySet()) { f.println(entry.getKey() + " " + entry.getValue()); } f.close(); } catch (IOException e) { e.printStackTrace(); } logger.done(); } } diff --git a/java/src/main/java/org/softwareheritage/graph/experiments/topology/InOutDegree.java b/java/src/main/java/org/softwareheritage/graph/experiments/topology/InOutDegree.java index c82d4ea..2d6ebdb 100644 --- a/java/src/main/java/org/softwareheritage/graph/experiments/topology/InOutDegree.java +++ b/java/src/main/java/org/softwareheritage/graph/experiments/topology/InOutDegree.java @@ -1,174 +1,239 @@ package org.softwareheritage.graph.experiments.topology; +import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.io.PrintWriter; import java.lang.reflect.InvocationTargetException; import java.util.*; import com.martiansoftware.jsap.JSAP; import com.martiansoftware.jsap.JSAPException; import com.martiansoftware.jsap.JSAPResult; import com.martiansoftware.jsap.Parameter; import com.martiansoftware.jsap.SimpleJSAP; import com.martiansoftware.jsap.UnflaggedOption; import it.unimi.dsi.logging.ProgressLogger; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.Node; public class InOutDegree { private InOutDegree() { } private static final int NODE_ARRAY_SIZE = Node.Type.values().length + 1; private static final int TYPE_ALL = Node.Type.values().length; private static final int TYPE_CNT = Node.Type.toInt(Node.Type.CNT); private static final int TYPE_DIR = Node.Type.toInt(Node.Type.DIR); private static final int TYPE_REV = Node.Type.toInt(Node.Type.REV); private static final int TYPE_REL = Node.Type.toInt(Node.Type.REL); private static final int TYPE_SNP = Node.Type.toInt(Node.Type.SNP); private static final int TYPE_ORI = Node.Type.toInt(Node.Type.ORI); public static long[] outdegreeTypes(final Graph graph, long node) { long[] out = new long[NODE_ARRAY_SIZE]; var successors = graph.successors(node); long neighbor; while ((neighbor = successors.nextLong()) != -1) { out[Node.Type.toInt(graph.getNodeType(neighbor))]++; out[TYPE_ALL]++; } return out; } public static long[] indegreeTypes(final Graph graph, long node) { return outdegreeTypes(graph.transpose(), node); } public static void writeDistribution(HashMap distribution, String filename) throws IOException { PrintWriter f = new PrintWriter(new FileWriter(filename)); TreeMap sortedDistribution = new TreeMap<>(distribution); for (Map.Entry entry : sortedDistribution.entrySet()) { f.println(entry.getKey() + " " + entry.getValue()); } f.close(); } public static void run(final Graph graph, String resultsDir) throws IOException { + // Per-type var cnt_in_dir = new HashMap(); var dir_in_dir = new HashMap(); var dir_in_rev = new HashMap(); var dir_in_all = new HashMap(); var dir_out_all = new HashMap(); var dir_out_dir = new HashMap(); var dir_out_cnt = new HashMap(); var dir_out_rev = new HashMap(); var rev_in_dir = new HashMap(); var rev_in_rel = new HashMap(); var rev_in_rev = new HashMap(); var rev_in_snp = new HashMap(); var rev_in_all = new HashMap(); var rev_out_rev = new HashMap(); var rel_in_snp = new HashMap(); var snp_in_ori = new HashMap(); var snp_out_all = new HashMap(); var snp_out_rel = new HashMap(); var snp_out_rev = new HashMap(); var ori_out_snp = new HashMap(); + // Aggregated per layer + var full_in = new HashMap(); + var full_out = new HashMap(); + var dircnt_in = new HashMap(); + var dircnt_out = new HashMap(); + var orisnp_in = new HashMap(); + var orisnp_out = new HashMap(); + var relrev_in = new HashMap(); + var relrev_out = new HashMap(); + var rev_in = rev_in_rev; // alias for single-type layer + var rev_out = rev_out_rev; + final ProgressLogger pl = new ProgressLogger(); pl.itemsName = "nodes"; pl.expectedUpdates = graph.numNodes(); pl.start("Scanning..."); long[] in; long[] out; for (long i = graph.numNodes(); i-- != 0;) { + long d_in = graph.indegree(i); + long d_out = graph.outdegree(i); + + full_in.merge(d_in, 1L, Long::sum); + full_out.merge(d_out, 1L, Long::sum); + switch (graph.getNodeType(i)) { case CNT: - cnt_in_dir.merge(graph.indegree(i), 1L, Long::sum); + cnt_in_dir.merge(d_in, 1L, Long::sum); + + dircnt_in.merge(d_in, 1L, Long::sum); + dircnt_out.merge(0L, 1L, Long::sum); + break; case DIR: in = indegreeTypes(graph, i); out = outdegreeTypes(graph, i); + dir_in_all.merge(in[TYPE_ALL], 1L, Long::sum); dir_out_all.merge(out[TYPE_ALL], 1L, Long::sum); dir_in_dir.merge(in[TYPE_DIR], 1L, Long::sum); dir_in_rev.merge(in[TYPE_REV], 1L, Long::sum); dir_out_cnt.merge(out[TYPE_CNT], 1L, Long::sum); dir_out_dir.merge(out[TYPE_DIR], 1L, Long::sum); dir_out_rev.merge(out[TYPE_REV], 1L, Long::sum); + + dircnt_in.merge(in[TYPE_DIR] + in[TYPE_CNT], 1L, Long::sum); + dircnt_out.merge(out[TYPE_DIR] + out[TYPE_CNT], 1L, Long::sum); + break; case REV: in = indegreeTypes(graph, i); out = outdegreeTypes(graph, i); + rev_in_all.merge(in[TYPE_ALL], 1L, Long::sum); rev_in_dir.merge(in[TYPE_DIR], 1L, Long::sum); rev_in_rev.merge(in[TYPE_REV], 1L, Long::sum); rev_in_rel.merge(in[TYPE_REL], 1L, Long::sum); rev_in_snp.merge(in[TYPE_SNP], 1L, Long::sum); rev_out_rev.merge(out[TYPE_REV], 1L, Long::sum); + + relrev_in.merge(in[TYPE_REL] + in[TYPE_REV], 1L, Long::sum); + relrev_out.merge(out[TYPE_REL] + out[TYPE_REV], 1L, Long::sum); + break; case REL: - rel_in_snp.merge(graph.indegree(i), 1L, Long::sum); + rel_in_snp.merge(d_in, 1L, Long::sum); + + relrev_in.merge(0L, 1L, Long::sum); + relrev_out.merge(d_out, 1L, Long::sum); + break; case SNP: out = outdegreeTypes(graph, i); - snp_in_ori.merge(graph.indegree(i), 1L, Long::sum); + + snp_in_ori.merge(d_in, 1L, Long::sum); snp_out_all.merge(out[TYPE_ALL], 1L, Long::sum); snp_out_rel.merge(out[TYPE_REL], 1L, Long::sum); snp_out_rev.merge(out[TYPE_REV], 1L, Long::sum); + + orisnp_in.merge(d_in, 1L, Long::sum); + orisnp_out.merge(out[TYPE_REL] + out[TYPE_REV], 1L, Long::sum); + break; case ORI: - ori_out_snp.merge(graph.outdegree(i), 1L, Long::sum); + ori_out_snp.merge(d_out, 1L, Long::sum); + + orisnp_in.merge(0L, 1L, Long::sum); + orisnp_out.merge(d_out, 1L, Long::sum); + break; + default : + pl.logger().warn("Invalid node type at pos {}", i); + break; } pl.update(); } pl.done(); - writeDistribution(cnt_in_dir, resultsDir + "/cnt_in_dir.txt"); - writeDistribution(dir_in_dir, resultsDir + "/dir_in_dir.txt"); - writeDistribution(dir_in_rev, resultsDir + "/dir_in_rev.txt"); - writeDistribution(dir_in_all, resultsDir + "/dir_in_all.txt"); - writeDistribution(dir_out_all, resultsDir + "/dir_out_all.txt"); - writeDistribution(dir_out_dir, resultsDir + "/dir_out_dir.txt"); - writeDistribution(dir_out_cnt, resultsDir + "/dir_out_cnt.txt"); - writeDistribution(dir_out_rev, resultsDir + "/dir_out_rev.txt"); - writeDistribution(rev_in_dir, resultsDir + "/rev_in_dir.txt"); - writeDistribution(rev_in_rel, resultsDir + "/rev_in_rel.txt"); - writeDistribution(rev_in_rev, resultsDir + "/rev_in_rev.txt"); - writeDistribution(rev_in_snp, resultsDir + "/rev_in_snp.txt"); - writeDistribution(rev_in_all, resultsDir + "/rev_in_all.txt"); - writeDistribution(rev_out_rev, resultsDir + "/rev_out_rev.txt"); - writeDistribution(rel_in_snp, resultsDir + "/rel_in_snp.txt"); - writeDistribution(snp_in_ori, resultsDir + "/snp_in_ori.txt"); - writeDistribution(snp_out_all, resultsDir + "/snp_out_all.txt"); - writeDistribution(snp_out_rel, resultsDir + "/snp_out_rel.txt"); - writeDistribution(snp_out_rev, resultsDir + "/snp_out_rev.txt"); - writeDistribution(ori_out_snp, resultsDir + "/ori_out_snp.txt"); + (new File(resultsDir)).mkdir(); + writeDistribution(full_in, resultsDir + "/full_in.txt"); + writeDistribution(full_out, resultsDir + "/full_out.txt"); + writeDistribution(dircnt_in, resultsDir + "/dir+cnt_in.txt"); + writeDistribution(dircnt_out, resultsDir + "/dir+cnt_out.txt"); + writeDistribution(relrev_in, resultsDir + "/rel+rev_in.txt"); + writeDistribution(relrev_out, resultsDir + "/rel+rev_out.txt"); + writeDistribution(orisnp_in, resultsDir + "/ori+snp_in.txt"); + writeDistribution(orisnp_out, resultsDir + "/ori+snp_out.txt"); + writeDistribution(rev_in, resultsDir + "/rev_in.txt"); + writeDistribution(rev_out, resultsDir + "/rev_out.txt"); + + String resultsTypeDir = resultsDir + "/per_type"; + (new File(resultsTypeDir)).mkdir(); + writeDistribution(cnt_in_dir, resultsTypeDir + "/cnt_in_dir.txt"); + writeDistribution(dir_in_dir, resultsTypeDir + "/dir_in_dir.txt"); + writeDistribution(dir_in_rev, resultsTypeDir + "/dir_in_rev.txt"); + writeDistribution(dir_in_all, resultsTypeDir + "/dir_in_all.txt"); + writeDistribution(dir_out_all, resultsTypeDir + "/dir_out_all.txt"); + writeDistribution(dir_out_dir, resultsTypeDir + "/dir_out_dir.txt"); + writeDistribution(dir_out_cnt, resultsTypeDir + "/dir_out_cnt.txt"); + writeDistribution(dir_out_rev, resultsTypeDir + "/dir_out_rev.txt"); + writeDistribution(rev_in_dir, resultsTypeDir + "/rev_in_dir.txt"); + writeDistribution(rev_in_rel, resultsTypeDir + "/rev_in_rel.txt"); + writeDistribution(rev_in_rev, resultsTypeDir + "/rev_in_rev.txt"); + writeDistribution(rev_in_snp, resultsTypeDir + "/rev_in_snp.txt"); + writeDistribution(rev_in_all, resultsTypeDir + "/rev_in_all.txt"); + writeDistribution(rev_out_rev, resultsTypeDir + "/rev_out_rev.txt"); + writeDistribution(rel_in_snp, resultsTypeDir + "/rel_in_snp.txt"); + writeDistribution(snp_in_ori, resultsTypeDir + "/snp_in_ori.txt"); + writeDistribution(snp_out_all, resultsTypeDir + "/snp_out_all.txt"); + writeDistribution(snp_out_rel, resultsTypeDir + "/snp_out_rel.txt"); + writeDistribution(snp_out_rev, resultsTypeDir + "/snp_out_rev.txt"); + writeDistribution(ori_out_snp, resultsTypeDir + "/ori_out_snp.txt"); + } static public void main(final String[] arg) throws IllegalArgumentException, SecurityException, IllegalAccessException, InvocationTargetException, NoSuchMethodException, JSAPException, IOException, ClassNotFoundException { final SimpleJSAP jsap = new SimpleJSAP(InOutDegree.class.getName(), "Computes in and out degrees of the given SWHGraph", new Parameter[]{ new UnflaggedOption("basename", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The basename of the graph."), new UnflaggedOption("resultsDir", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, JSAP.NOT_GREEDY, "The directory of the resulting files."),}); final JSAPResult jsapResult = jsap.parse(arg); if (jsap.messagePrinted()) System.exit(1); final String basename = jsapResult.getString("basename"); final String resultsDir = jsapResult.userSpecified("resultsDir") ? jsapResult.getString("resultsDir") : basename; final ProgressLogger pl = new ProgressLogger(); - Graph graph = new Graph(basename); + Graph graph = Graph.loadMapped(basename); run(graph, resultsDir); } } diff --git a/java/src/main/java/org/softwareheritage/graph/experiments/topology/SubdatasetSizeFunction.java b/java/src/main/java/org/softwareheritage/graph/experiments/topology/SubdatasetSizeFunction.java index d1be386..bb6779e 100644 --- a/java/src/main/java/org/softwareheritage/graph/experiments/topology/SubdatasetSizeFunction.java +++ b/java/src/main/java/org/softwareheritage/graph/experiments/topology/SubdatasetSizeFunction.java @@ -1,90 +1,90 @@ package org.softwareheritage.graph.experiments.topology; import com.google.common.primitives.Longs; import com.martiansoftware.jsap.*; import it.unimi.dsi.Util; import it.unimi.dsi.big.webgraph.LazyLongIterator; import it.unimi.dsi.bits.LongArrayBitVector; import it.unimi.dsi.fastutil.Arrays; import it.unimi.dsi.fastutil.BigArrays; import it.unimi.dsi.fastutil.longs.LongBigArrays; import it.unimi.dsi.io.ByteDiskQueue; import it.unimi.dsi.logging.ProgressLogger; import it.unimi.dsi.util.XoRoShiRo128PlusRandom; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.Node; import org.softwareheritage.graph.experiments.forks.ForkCC; import java.io.*; public class SubdatasetSizeFunction { private SubdatasetSizeFunction() { } public static void run(final Graph graph) throws IOException { final ProgressLogger pl = new ProgressLogger(); pl.itemsName = "nodes"; pl.expectedUpdates = graph.numNodes(); long n = graph.numNodes(); LongArrayBitVector visited = LongArrayBitVector.ofLength(n); int bufferSize = (int) Math.min(Arrays.MAX_ARRAY_SIZE & ~0x7, 8L * n); final File queueFile = File.createTempFile(ForkCC.class.getSimpleName(), "queue"); final ByteDiskQueue queue = ByteDiskQueue.createNew(queueFile, bufferSize, true); final byte[] byteBuf = new byte[Long.BYTES]; long[][] randomPerm = Util.identity(graph.numNodes()); LongBigArrays.shuffle(randomPerm, new XoRoShiRo128PlusRandom()); long visitedSize = 0; pl.start("Running traversal starting from origins..."); for (long j = 0; j < n; ++j) { long i = BigArrays.get(randomPerm, j); if (visited.getBoolean(i) || graph.getNodeType(i) != Node.Type.ORI) { continue; } queue.enqueue(Longs.toByteArray(i)); visited.set(i); while (!queue.isEmpty()) { queue.dequeue(byteBuf); final long currentNode = Longs.fromByteArray(byteBuf); visitedSize++; final LazyLongIterator iterator = graph.successors(currentNode); long succ; while ((succ = iterator.nextLong()) != -1) { if (visited.getBoolean(succ)) continue; visited.set(succ); queue.enqueue(Longs.toByteArray(succ)); } pl.update(); } System.out.println(visitedSize); } pl.done(); } static public void main(final String[] arg) throws IllegalArgumentException, SecurityException, JSAPException, IOException { final SimpleJSAP jsap = new SimpleJSAP(SubdatasetSizeFunction.class.getName(), "Computes in and out degrees of the given SWHGraph", new Parameter[]{new UnflaggedOption("basename", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The basename of the graph."),}); final JSAPResult jsapResult = jsap.parse(arg); if (jsap.messagePrinted()) System.exit(1); final String basename = jsapResult.getString("basename"); - Graph graph = new Graph(basename); + Graph graph = Graph.loadMapped(basename); run(graph); } } diff --git a/java/src/main/java/org/softwareheritage/graph/server/App.java b/java/src/main/java/org/softwareheritage/graph/server/App.java index 2cf1cf8..bb2ce5b 100644 --- a/java/src/main/java/org/softwareheritage/graph/server/App.java +++ b/java/src/main/java/org/softwareheritage/graph/server/App.java @@ -1,196 +1,196 @@ package org.softwareheritage.graph.server; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.PropertyNamingStrategy; import com.martiansoftware.jsap.*; import io.javalin.Javalin; import io.javalin.http.Context; import io.javalin.plugin.json.JavalinJackson; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.Stats; import org.softwareheritage.graph.SWHID; import java.io.IOException; import java.util.List; import java.util.Map; /** * Web framework of the swh-graph server RPC API. * * @author The Software Heritage developers */ public class App { /** * Main entrypoint. * * @param args command line arguments */ public static void main(String[] args) throws IOException, JSAPException { SimpleJSAP jsap = new SimpleJSAP(App.class.getName(), "Server to load and query a compressed graph representation of Software Heritage archive.", new Parameter[]{ new FlaggedOption("port", JSAP.INTEGER_PARSER, "5009", JSAP.NOT_REQUIRED, 'p', "port", "Binding port of the server."), new UnflaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The basename of the compressed graph."), new Switch("timings", 't', "timings", "Show timings in API result metadata."),}); JSAPResult config = jsap.parse(args); if (jsap.messagePrinted()) { System.exit(1); } String graphPath = config.getString("graphPath"); int port = config.getInt("port"); boolean showTimings = config.getBoolean("timings"); startServer(graphPath, port, showTimings); } /** * Loads compressed graph and starts the web server to query it. * * @param graphPath basename of the compressed graph * @param port binding port of the server * @param showTimings true if timings should be in results metadata, false otherwise */ private static void startServer(String graphPath, int port, boolean showTimings) throws IOException { - Graph graph = new Graph(graphPath); + Graph graph = Graph.loadMapped(graphPath); Stats stats = new Stats(graphPath); // Clean up on exit Runtime.getRuntime().addShutdownHook(new Thread() { public void run() { try { graph.cleanUp(); } catch (IOException e) { System.out.println("Could not clean up graph on exit: " + e); } } }); // Configure Jackson JSON to use snake case naming style ObjectMapper objectMapper = JavalinJackson.getObjectMapper(); objectMapper.setPropertyNamingStrategy(PropertyNamingStrategy.SNAKE_CASE); JavalinJackson.configure(objectMapper); Javalin app = Javalin.create().start(port); app.before("/stats/*", ctx -> { checkQueryStrings(ctx, ""); }); app.before("/leaves/*", ctx -> { checkQueryStrings(ctx, "direction|edges"); }); app.before("/neighbors/*", ctx -> { checkQueryStrings(ctx, "direction|edges"); }); app.before("/visit/*", ctx -> { checkQueryStrings(ctx, "direction|edges"); }); app.before("/walk/*", ctx -> { checkQueryStrings(ctx, "direction|edges|traversal"); }); app.get("/stats/", ctx -> { ctx.json(stats); }); // Graph traversal endpoints // By default the traversal is a forward DFS using all edges app.get("/leaves/:src", ctx -> { SWHID src = new SWHID(ctx.pathParam("src")); String direction = ctx.queryParam("direction", "forward"); String edgesFmt = ctx.queryParam("edges", "*"); Endpoint endpoint = new Endpoint(graph, direction, edgesFmt); Endpoint.Output output = endpoint.leaves(new Endpoint.Input(src)); ctx.json(formatEndpointOutput(output, showTimings)); }); app.get("/neighbors/:src", ctx -> { SWHID src = new SWHID(ctx.pathParam("src")); String direction = ctx.queryParam("direction", "forward"); String edgesFmt = ctx.queryParam("edges", "*"); Endpoint endpoint = new Endpoint(graph, direction, edgesFmt); Endpoint.Output output = endpoint.neighbors(new Endpoint.Input(src)); ctx.json(formatEndpointOutput(output, showTimings)); }); app.get("/visit/nodes/:src", ctx -> { SWHID src = new SWHID(ctx.pathParam("src")); String direction = ctx.queryParam("direction", "forward"); String edgesFmt = ctx.queryParam("edges", "*"); Endpoint endpoint = new Endpoint(graph, direction, edgesFmt); Endpoint.Output output = endpoint.visitNodes(new Endpoint.Input(src)); ctx.json(formatEndpointOutput(output, showTimings)); }); app.get("/visit/paths/:src", ctx -> { SWHID src = new SWHID(ctx.pathParam("src")); String direction = ctx.queryParam("direction", "forward"); String edgesFmt = ctx.queryParam("edges", "*"); Endpoint endpoint = new Endpoint(graph, direction, edgesFmt); Endpoint.Output output = endpoint.visitPaths(new Endpoint.Input(src)); ctx.json(formatEndpointOutput(output, showTimings)); }); app.get("/walk/:src/:dst", ctx -> { SWHID src = new SWHID(ctx.pathParam("src")); String dstFmt = ctx.pathParam("dst"); String direction = ctx.queryParam("direction", "forward"); String edgesFmt = ctx.queryParam("edges", "*"); String algorithm = ctx.queryParam("traversal", "dfs"); Endpoint endpoint = new Endpoint(graph, direction, edgesFmt); Endpoint.Output output = endpoint.walk(new Endpoint.Input(src, dstFmt, algorithm)); ctx.json(formatEndpointOutput(output, showTimings)); }); app.exception(IllegalArgumentException.class, (e, ctx) -> { ctx.status(400); ctx.result(e.getMessage()); }); } /** * Checks query strings names provided to the RPC API. * * @param ctx Javalin HTTP request context * @param allowedFmt a regular expression describing allowed query strings names * @throws IllegalArgumentException unknown query string provided */ private static void checkQueryStrings(Context ctx, String allowedFmt) { Map> queryParamMap = ctx.queryParamMap(); for (String key : queryParamMap.keySet()) { if (!key.matches(allowedFmt)) { throw new IllegalArgumentException("Unknown query string: " + key); } } } /** * Formats endpoint result into final JSON for the RPC API. *

* Removes unwanted information if necessary, such as timings (to prevent use of side channels * attacks). * * @param output endpoint operation output which needs formatting * @param showTimings true if timings should be in results metadata, false otherwise * @return final Object with desired JSON format */ private static Object formatEndpointOutput(Endpoint.Output output, boolean showTimings) { if (showTimings) { return output; } else { Map metaNoTimings = Map.of("nb_edges_accessed", output.meta.nbEdgesAccessed); Map outputNoTimings = Map.of("result", output.result, "meta", metaNoTimings); return outputNoTimings; } } } diff --git a/java/src/main/java/org/softwareheritage/graph/utils/ExportSubdataset.java b/java/src/main/java/org/softwareheritage/graph/utils/ExportSubdataset.java index 3471ed5..b88f8b6 100644 --- a/java/src/main/java/org/softwareheritage/graph/utils/ExportSubdataset.java +++ b/java/src/main/java/org/softwareheritage/graph/utils/ExportSubdataset.java @@ -1,76 +1,76 @@ package org.softwareheritage.graph.utils; import com.google.common.primitives.Longs; import it.unimi.dsi.big.webgraph.LazyLongIterator; import it.unimi.dsi.bits.LongArrayBitVector; import it.unimi.dsi.fastutil.Arrays; import it.unimi.dsi.fastutil.objects.Object2LongFunction; import it.unimi.dsi.io.ByteDiskQueue; import it.unimi.dsi.io.FastBufferedReader; import it.unimi.dsi.io.LineIterator; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.SWHID; import org.softwareheritage.graph.experiments.topology.ConnectedComponents; import org.softwareheritage.graph.maps.NodeIdMap; import java.io.File; import java.io.IOException; import java.io.InputStreamReader; import java.nio.charset.StandardCharsets; public class ExportSubdataset { public static void main(String[] args) throws IOException, ClassNotFoundException { System.err.print("Loading everything..."); String graphPath = args[0]; - Graph graph = new Graph(graphPath); + Graph graph = Graph.loadMapped(graphPath); Object2LongFunction mphMap = NodeIdMap.loadMph(graphPath + ".mph"); System.err.println(" done."); final long n = graph.numNodes(); // Allow enough memory to behave like in-memory queue int bufferSize = (int) Math.min(Arrays.MAX_ARRAY_SIZE & ~0x7, 8L * n); // Use a disk based queue to store BFS frontier final File queueFile = File.createTempFile(ConnectedComponents.class.getSimpleName(), "queue"); final ByteDiskQueue queue = ByteDiskQueue.createNew(queueFile, bufferSize, true); final byte[] byteBuf = new byte[Long.BYTES]; // WARNING: no 64-bit version of this data-structure, but it can support // indices up to 2^37 LongArrayBitVector visited = LongArrayBitVector.ofLength(n); FastBufferedReader buffer = new FastBufferedReader(new InputStreamReader(System.in, StandardCharsets.US_ASCII)); LineIterator lineIterator = new LineIterator(buffer); while (lineIterator.hasNext()) { String line = lineIterator.next().toString(); long i; try { // i = mphMap.getLong(line.getBytes(StandardCharsets.UTF_8)); i = graph.getNodeId(new SWHID(line)); } catch (IllegalArgumentException e) { continue; } queue.enqueue(Longs.toByteArray(i)); visited.set(i); while (!queue.isEmpty()) { queue.dequeue(byteBuf); final long currentNode = Longs.fromByteArray(byteBuf); SWHID currentNodeSWHID = graph.getSWHID(currentNode); final LazyLongIterator iterator = graph.successors(currentNode); long succ; while ((succ = iterator.nextLong()) != -1) { System.out.format("%s %s\n", currentNodeSWHID, graph.getSWHID(succ)); if (visited.getBoolean(succ)) continue; visited.set(succ); queue.enqueue(Longs.toByteArray(succ)); } } } } } diff --git a/java/src/main/java/org/softwareheritage/graph/utils/FindEarliestRevision.java b/java/src/main/java/org/softwareheritage/graph/utils/FindEarliestRevision.java index 1f0f320..5697121 100644 --- a/java/src/main/java/org/softwareheritage/graph/utils/FindEarliestRevision.java +++ b/java/src/main/java/org/softwareheritage/graph/utils/FindEarliestRevision.java @@ -1,93 +1,93 @@ package org.softwareheritage.graph.utils; import it.unimi.dsi.big.webgraph.LazyLongIterator; import it.unimi.dsi.fastutil.BigArrays; import it.unimi.dsi.fastutil.io.BinIO; import org.softwareheritage.graph.AllowedEdges; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.Node; import org.softwareheritage.graph.SWHID; import java.io.IOException; import java.time.Duration; import java.util.HashSet; import java.util.Scanner; import java.util.Stack; public class FindEarliestRevision { public static void main(String[] args) throws IOException, ClassNotFoundException { String graphPath = args[0]; long ts, elapsedNanos; Duration elapsed; System.err.println("loading transposed graph..."); ts = System.nanoTime(); - Graph graph = new Graph(graphPath).transpose(); + Graph graph = Graph.loadMapped(graphPath).transpose(); elapsed = Duration.ofNanos(System.nanoTime() - ts); System.err.println(String.format("transposed graph loaded (duration: %s).", elapsed)); System.err.println("loading revision timestamps..."); ts = System.nanoTime(); long[][] committerTimestamps = BinIO.loadLongsBig(graphPath + "-rev_committer_timestamps.bin"); elapsed = Duration.ofNanos(System.nanoTime() - ts); System.err.println(String.format("revision timestamps loaded (duration: %s).", elapsed)); Scanner stdin = new Scanner(System.in); AllowedEdges edges = new AllowedEdges("cnt:dir,dir:dir,dir:rev"); String rawSWHID = null; SWHID srcSWHID = null; long lineCount = 0; System.err.println("starting SWHID processing..."); elapsed = Duration.ZERO; while (stdin.hasNextLine()) { ts = System.nanoTime(); rawSWHID = stdin.nextLine().strip(); lineCount++; try { srcSWHID = new SWHID(rawSWHID); } catch (IllegalArgumentException e) { System.err.println(String.format("skipping invalid SWHID %s on line %d", rawSWHID, lineCount)); continue; } long srcNodeId = graph.getNodeId(srcSWHID); System.err.println("starting traversal for: " + srcSWHID.toString()); Stack stack = new Stack<>(); HashSet visited = new HashSet<>(); stack.push(srcNodeId); visited.add(srcNodeId); long minRevId = -1; long minTimestamp = Long.MAX_VALUE; while (!stack.isEmpty()) { long currentNodeId = stack.pop(); if (graph.getNodeType(currentNodeId) == Node.Type.REV) { long committerTs = BigArrays.get(committerTimestamps, currentNodeId); if (committerTs < minTimestamp) { minRevId = currentNodeId; minTimestamp = committerTs; } } LazyLongIterator it = graph.successors(currentNodeId, edges); for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) { if (!visited.contains(neighborNodeId)) { stack.push(neighborNodeId); visited.add(neighborNodeId); } } } if (minRevId == -1) { System.err.println("no revision found containing: " + srcSWHID.toString()); } else { System.out.println(srcSWHID.toString() + "\t" + graph.getSWHID(minRevId).toString()); } elapsedNanos = System.nanoTime() - ts; // processing time for current SWHID elapsed = elapsed.plus(Duration.ofNanos(elapsedNanos)); // cumulative processing time for all SWHIDs System.err.println(String.format("visit time (s):\t%.6f", (double) elapsedNanos / 1_000_000_000)); } System.err.println( String.format("processed %d SWHIDs in %s (%s avg)", lineCount, elapsed, elapsed.dividedBy(lineCount))); } } diff --git a/java/src/test/java/org/softwareheritage/graph/GraphTest.java b/java/src/test/java/org/softwareheritage/graph/GraphTest.java index df1afb7..fba8ed8 100644 --- a/java/src/test/java/org/softwareheritage/graph/GraphTest.java +++ b/java/src/test/java/org/softwareheritage/graph/GraphTest.java @@ -1,44 +1,44 @@ package org.softwareheritage.graph; import java.io.IOException; import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; import java.util.Collection; import java.util.Iterator; import it.unimi.dsi.big.webgraph.LazyLongIterator; import it.unimi.dsi.big.webgraph.LazyLongIterators; import org.hamcrest.MatcherAssert; -import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; import static org.hamcrest.collection.IsIterableContainingInAnyOrder.containsInAnyOrder; public class GraphTest { static Graph graph; - public static void assertEqualsAnyOrder(Collection expecteds, Collection actuals) { - MatcherAssert.assertThat(expecteds, containsInAnyOrder(actuals.toArray())); - } - - public static void assertLazyLongIteratorsEqual(LazyLongIterator expected, LazyLongIterator actual) { - ArrayList expectedList = new ArrayList<>(); - ArrayList actualList = new ArrayList<>(); - Iterator expectedIt = LazyLongIterators.eager(expected); - Iterator actualIt = LazyLongIterators.eager(actual); - expectedIt.forEachRemaining(expectedList::add); - actualIt.forEachRemaining(actualList::add); - Assertions.assertArrayEquals(expectedList.toArray(), actualList.toArray()); - } - @BeforeAll public static void setUp() throws IOException { Path graphPath = Paths.get("..", "swh", "graph", "tests", "dataset", "output", "example"); - graph = new Graph(graphPath.toString()); + graph = Graph.loadMapped(graphPath.toString()); } public Graph getGraph() { return graph; } + + public static SWHID fakeSWHID(String type, int num) { + return new SWHID(String.format("swh:1:%s:%040d", type, num)); + } + + public static void assertEqualsAnyOrder(Collection expecteds, Collection actuals) { + MatcherAssert.assertThat(expecteds, containsInAnyOrder(actuals.toArray())); + } + + public static ArrayList lazyLongIteratorToList(LazyLongIterator input) { + ArrayList inputList = new ArrayList<>(); + Iterator inputIt = LazyLongIterators.eager(input); + inputIt.forEachRemaining(inputList::add); + return inputList; + } } diff --git a/java/src/test/java/org/softwareheritage/graph/SubgraphTest.java b/java/src/test/java/org/softwareheritage/graph/SubgraphTest.java new file mode 100644 index 0000000..1f95ebe --- /dev/null +++ b/java/src/test/java/org/softwareheritage/graph/SubgraphTest.java @@ -0,0 +1,85 @@ +package org.softwareheritage.graph; + +import java.util.*; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +public class SubgraphTest extends GraphTest { + @Test + public void noFilter() { + Graph g = getGraph(); + Subgraph sg = new Subgraph(g, new AllowedNodes("*")); + + for (long i = 0; i < g.numNodes(); ++i) { + Assertions.assertEquals(g.outdegree(i), sg.outdegree(i)); + } + } + + @Test + public void missingNode() { + Graph g = getGraph(); + Subgraph sg = new Subgraph(g, new AllowedNodes("dir,ori")); + + SWHID rev1 = fakeSWHID("rev", 18); + Assertions.assertThrows(IllegalArgumentException.class, () -> { + sg.outdegree(sg.getNodeId(rev1)); + }); + Assertions.assertThrows(IllegalArgumentException.class, () -> { + sg.successors(sg.getNodeId(rev1)); + }); + } + + @Test + public void outdegreeOnlyDirOri() { + Graph g = getGraph(); + Subgraph sg = new Subgraph(g, new AllowedNodes("dir,ori")); + + SWHID dir1 = fakeSWHID("dir", 17); + Assertions.assertEquals(2, g.outdegree(g.getNodeId(dir1))); + Assertions.assertEquals(1, sg.outdegree(sg.getNodeId(dir1))); + + SWHID dir2 = fakeSWHID("dir", 6); + Assertions.assertEquals(2, g.outdegree(g.getNodeId(dir2))); + Assertions.assertEquals(0, sg.outdegree(sg.getNodeId(dir2))); + + SWHID ori1 = fakeSWHID("ori", 21); + Assertions.assertEquals(1, g.outdegree(g.getNodeId(ori1))); + Assertions.assertEquals(0, sg.outdegree(sg.getNodeId(ori1))); + } + + @Test + public void successorsOnlyDirOri() { + Graph g = getGraph(); + Subgraph sg = new Subgraph(g, new AllowedNodes("dir,ori")); + + SWHID dir1 = fakeSWHID("dir", 17); + assertEqualsAnyOrder(Collections.singletonList(sg.getNodeId(fakeSWHID("dir", 16))), + lazyLongIteratorToList(sg.successors(sg.getNodeId(dir1)))); + + SWHID dir2 = fakeSWHID("dir", 6); + assertEqualsAnyOrder(Collections.emptyList(), lazyLongIteratorToList(sg.successors(sg.getNodeId(dir2)))); + + SWHID ori1 = fakeSWHID("ori", 21); + assertEqualsAnyOrder(Collections.emptyList(), lazyLongIteratorToList(sg.successors(sg.getNodeId(ori1)))); + } + + @Test + public void nodeIteratorOnlyOriDir() { + Graph g = getGraph(); + Subgraph sg = new Subgraph(g, new AllowedNodes("dir,ori")); + ArrayList nodeList = new ArrayList<>(); + Iterator nodeIt = sg.nodeIterator(); + nodeIt.forEachRemaining(nodeList::add); + assertEqualsAnyOrder(Arrays.asList(sg.getNodeId(fakeSWHID("ori", 21)), sg.getNodeId(fakeSWHID("dir", 2)), + sg.getNodeId(fakeSWHID("dir", 6)), sg.getNodeId(fakeSWHID("dir", 8)), + sg.getNodeId(fakeSWHID("dir", 12)), sg.getNodeId(fakeSWHID("dir", 16)), + sg.getNodeId(fakeSWHID("dir", 17))), nodeList); + sg = new Subgraph(g, new AllowedNodes("snp,rel")); + nodeList = new ArrayList<>(); + nodeIt = sg.nodeIterator(); + nodeIt.forEachRemaining(nodeList::add); + assertEqualsAnyOrder(Arrays.asList(sg.getNodeId(fakeSWHID("snp", 20)), sg.getNodeId(fakeSWHID("rel", 10)), + sg.getNodeId(fakeSWHID("rel", 19))), nodeList); + } +}