diff --git a/java/pom.xml b/java/pom.xml
index 26f0ade..c59405c 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -1,274 +1,341 @@
4.0.0org.softwareheritage.graphswh-graph${git.closest.tag.name}swh-graphhttps://forge.softwareheritage.org/source/swh-graph/UTF-811ch.qos.logbacklogback-classic1.2.3org.junit.jupiterjunit-jupiter-api5.7.0test
+
+ org.junit.vintage
+ junit-vintage-engine
+ 5.7.0
+
+
+ junit
+ junit
+ 4.12
+ org.junit.jupiterjunit-jupiter-engine5.7.0testorg.hamcresthamcrest2.2testio.javalinjavalin3.0.0org.slf4jslf4j-simple1.7.26com.fasterxml.jackson.corejackson-databind2.13.0it.unimi.dsiwebgraph-big3.6.6it.unimi.dsifastutil8.5.6it.unimi.dsidsiutils2.6.17it.unimi.dsisux4j5.2.3it.unimi.dsilaw2.7.2org.apache.hadoophadoop-commonorg.umlgraphumlgraphorg.eclipse.jetty.aggregatejetty-allit.unimi.dimg4jit.unimi.dimg4j-bigcom.martiansoftwarejsap2.1net.sf.py4jpy4j0.10.9.3commons-codeccommons-codec1.15maven-clean-plugin3.1.0maven-resources-plugin3.0.2maven-compiler-plugin3.8.01111-verbose-Xlint:allmaven-surefire-plugin2.22.2maven-failsafe-plugin2.22.2maven-jar-plugin3.0.2maven-install-plugin2.5.2maven-deploy-plugin2.8.2maven-site-plugin3.7.1maven-project-info-reports-plugin3.0.0maven-assembly-plugin3.3.0org.softwareheritage.graph.server.Appjar-with-dependenciesfalsemake-assemblypackagesinglecom.diffplug.spotlessspotless-maven-plugin2.4.1*.md.gitignoretrue44.16.0.coding-style.xmlpl.project13.mavengit-commit-id-plugin3.0.1get-the-git-infosrevisioninitializetruetruetruetruev*git.closest.tag.name^vtrue
-
-
-
-
+
+ maven-source-plugin
+ 2.1.1
+
+
+ bundle-sources
+ package
+
+ jar-no-fork
+ test-jar-no-fork
+
+
+
+ org.apache.maven.pluginsmaven-javadoc-plugin
- 3.1.1
+ 3.3.1
+
+
+ resource-bundles
+ package
+
+
+ resource-bundle
+
+
+ test-resource-bundle
+
+
+ false
+
+
+
+ javadoc-jar
+ package
+
+ jar
+
+
+
+
+ true
+
+ it.unimi.dsi:webgraph-big:*
+
+
+ https://webgraph.di.unimi.it/docs-big/
+ https://dsiutils.di.unimi.it/docs/
+ https://fastutil.di.unimi.it/docs/
+ https://law.di.unimi.it/software/law-docs/
+
+
+
+ implSpec
+ a
+ Implementation Requirements:
+
+
+ implNote
+ a
+ Implementation Note:
+
+
+
-
+
diff --git a/java/src/main/java/org/softwareheritage/graph/BidirectionalImmutableGraph.java b/java/src/main/java/org/softwareheritage/graph/BidirectionalImmutableGraph.java
index be19956..815b426 100644
--- a/java/src/main/java/org/softwareheritage/graph/BidirectionalImmutableGraph.java
+++ b/java/src/main/java/org/softwareheritage/graph/BidirectionalImmutableGraph.java
@@ -1,128 +1,128 @@
package org.softwareheritage.graph;
import it.unimi.dsi.big.webgraph.ImmutableGraph;
import it.unimi.dsi.big.webgraph.LazyLongIterator;
import it.unimi.dsi.big.webgraph.Transform;
import it.unimi.dsi.fastutil.longs.LongIterator;
/**
* A directed immutable graph which can be iterated in both directions (forward and backward). It
* exposes the backward equivalents of the ImmutableGraph primitives (indegree() and
* predecessors()). This is implemented by passing two graphs, one in the forward and one in the
* backward direction.
*/
public class BidirectionalImmutableGraph extends ImmutableGraph {
private final ImmutableGraph forwardGraph;
private final ImmutableGraph backwardGraph;
/**
* Creates a bidirectional immutable graph
*
* @param forwardGraph The graph in the forward direction
* @param backwardGraph The graph in the backward direction
*/
- protected BidirectionalImmutableGraph(ImmutableGraph forwardGraph, ImmutableGraph backwardGraph) {
+ public BidirectionalImmutableGraph(ImmutableGraph forwardGraph, ImmutableGraph backwardGraph) {
this.forwardGraph = forwardGraph;
this.backwardGraph = backwardGraph;
}
@Override
public long numNodes() {
assert forwardGraph.numNodes() == backwardGraph.numNodes();
return this.forwardGraph.numNodes();
}
@Override
public long numArcs() {
assert forwardGraph.numArcs() == backwardGraph.numArcs();
return this.forwardGraph.numArcs();
}
@Override
public boolean randomAccess() {
return this.forwardGraph.randomAccess() && this.backwardGraph.randomAccess();
}
@Override
public boolean hasCopiableIterators() {
return forwardGraph.hasCopiableIterators() && backwardGraph.hasCopiableIterators();
}
@Override
public BidirectionalImmutableGraph copy() {
return new BidirectionalImmutableGraph(this.forwardGraph.copy(), this.backwardGraph.copy());
}
/**
* Returns the transposed version of the bidirectional graph. Successors become predecessors, and
* vice-versa.
*/
public BidirectionalImmutableGraph transpose() {
return new BidirectionalImmutableGraph(backwardGraph, forwardGraph);
}
/**
* Returns the symmetric version of the bidirectional graph. It returns the (lazy) union of the
* forward graph and the backward graph. This is equivalent to removing the directionality of the
* edges: the successors of a node are also its predecessors.
*
* @return a symmetric, undirected BidirectionalImmutableGraph.
*/
public BidirectionalImmutableGraph symmetrize() {
ImmutableGraph symmetric = Transform.union(forwardGraph, backwardGraph);
return new BidirectionalImmutableGraph(symmetric, symmetric);
}
/**
* Returns the simplified version of the bidirectional graph. Works like symmetrize(), but also
* removes the loop edges.
*
* @return a simplified (loopless and symmetric) BidirectionalImmutableGraph
*/
public BidirectionalImmutableGraph simplify() {
ImmutableGraph simplified = Transform.simplify(forwardGraph, backwardGraph);
return new BidirectionalImmutableGraph(simplified, simplified);
}
/** Returns the outdegree of a node */
@Override
public long outdegree(long l) {
return forwardGraph.outdegree(l);
}
/** Returns the indegree of a node */
public long indegree(long l) {
return backwardGraph.outdegree(l);
}
/** Returns a lazy iterator over the successors of a given node. */
@Override
public LazyLongIterator successors(long nodeId) {
return forwardGraph.successors(nodeId);
}
/** Returns a lazy iterator over the predecessors of a given node. */
public LazyLongIterator predecessors(long nodeId) {
return backwardGraph.successors(nodeId);
}
/** Returns a reference to an array containing the predecessors of a given node. */
public long[][] predecessorBigArray(long x) {
return backwardGraph.successorBigArray(x);
}
/** Returns an iterator enumerating the indegrees of the nodes of this graph. */
public LongIterator indegrees() {
return backwardGraph.outdegrees();
}
/** Returns the underlying ImmutableGraph in the forward direction. */
public ImmutableGraph getForwardGraph() {
return forwardGraph;
}
/** Returns the underlying ImmutableGraph in the backward direction. */
public ImmutableGraph getBackwardGraph() {
return backwardGraph;
}
}
diff --git a/java/src/main/java/org/softwareheritage/graph/Entry.java b/java/src/main/java/org/softwareheritage/graph/Entry.java
index a2d3f5a..e6f4494 100644
--- a/java/src/main/java/org/softwareheritage/graph/Entry.java
+++ b/java/src/main/java/org/softwareheritage/graph/Entry.java
@@ -1,193 +1,193 @@
package org.softwareheritage.graph;
import java.io.*;
import java.util.ArrayList;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.PropertyNamingStrategy;
public class Entry {
- private Graph graph;
+ private SwhBidirectionalGraph graph;
public void load_graph(String graphBasename) throws IOException {
System.err.println("Loading graph " + graphBasename + " ...");
- this.graph = Graph.loadMapped(graphBasename);
+ this.graph = SwhBidirectionalGraph.loadMapped(graphBasename);
System.err.println("Graph loaded.");
}
- public Graph get_graph() {
+ public SwhBidirectionalGraph get_graph() {
return graph.copy();
}
public String stats() {
try {
Stats stats = new Stats(graph.getPath());
ObjectMapper objectMapper = new ObjectMapper();
objectMapper.setPropertyNamingStrategy(PropertyNamingStrategy.SNAKE_CASE);
return objectMapper.writeValueAsString(stats);
} catch (IOException e) {
throw new RuntimeException("Cannot read stats: " + e);
}
}
public void check_swhid(String src) {
graph.getNodeId(new SWHID(src));
}
private int count_visitor(NodeCountVisitor f, long srcNodeId) {
int[] count = {0};
f.accept(srcNodeId, (node) -> {
count[0]++;
});
return count[0];
}
public int count_leaves(String direction, String edgesFmt, String src, long maxEdges) {
long srcNodeId = graph.getNodeId(new SWHID(src));
Traversal t = new Traversal(graph.copy(), direction, edgesFmt, maxEdges);
return count_visitor(t::leavesVisitor, srcNodeId);
}
public int count_neighbors(String direction, String edgesFmt, String src, long maxEdges) {
long srcNodeId = graph.getNodeId(new SWHID(src));
Traversal t = new Traversal(graph.copy(), direction, edgesFmt, maxEdges);
return count_visitor(t::neighborsVisitor, srcNodeId);
}
public int count_visit_nodes(String direction, String edgesFmt, String src, long maxEdges) {
long srcNodeId = graph.getNodeId(new SWHID(src));
Traversal t = new Traversal(graph.copy(), direction, edgesFmt, maxEdges);
return count_visitor(t::visitNodesVisitor, srcNodeId);
}
public QueryHandler get_handler(String clientFIFO) {
return new QueryHandler(graph.copy(), clientFIFO);
}
private interface NodeCountVisitor {
void accept(long nodeId, Traversal.NodeIdConsumer consumer);
}
public class QueryHandler {
- Graph graph;
+ SwhBidirectionalGraph graph;
BufferedWriter out;
String clientFIFO;
- public QueryHandler(Graph graph, String clientFIFO) {
+ public QueryHandler(SwhBidirectionalGraph graph, String clientFIFO) {
this.graph = graph;
this.clientFIFO = clientFIFO;
this.out = null;
}
public void writeNode(SWHID swhid) {
try {
out.write(swhid.toString() + "\n");
} catch (IOException e) {
throw new RuntimeException("Cannot write response to client: " + e);
}
}
public void writeEdge(SWHID src, SWHID dst) {
try {
out.write(src.toString() + " " + dst.toString() + "\n");
} catch (IOException e) {
throw new RuntimeException("Cannot write response to client: " + e);
}
}
public void open() {
try {
FileOutputStream file = new FileOutputStream(this.clientFIFO);
this.out = new BufferedWriter(new OutputStreamWriter(file));
} catch (IOException e) {
throw new RuntimeException("Cannot open client FIFO: " + e);
}
}
public void close() {
try {
out.close();
} catch (IOException e) {
throw new RuntimeException("Cannot write response to client: " + e);
}
}
public void leaves(String direction, String edgesFmt, String src, long maxEdges, String returnTypes) {
long srcNodeId = graph.getNodeId(new SWHID(src));
open();
Traversal t = new Traversal(graph, direction, edgesFmt, maxEdges, returnTypes);
for (Long nodeId : t.leaves(srcNodeId)) {
writeNode(graph.getSWHID(nodeId));
}
close();
}
public void neighbors(String direction, String edgesFmt, String src, long maxEdges, String returnTypes) {
long srcNodeId = graph.getNodeId(new SWHID(src));
open();
Traversal t = new Traversal(graph, direction, edgesFmt, maxEdges, returnTypes);
for (Long nodeId : t.neighbors(srcNodeId)) {
writeNode(graph.getSWHID(nodeId));
}
close();
}
public void visit_nodes(String direction, String edgesFmt, String src, long maxEdges, String returnTypes) {
long srcNodeId = graph.getNodeId(new SWHID(src));
open();
Traversal t = new Traversal(graph, direction, edgesFmt, maxEdges, returnTypes);
for (Long nodeId : t.visitNodes(srcNodeId)) {
writeNode(graph.getSWHID(nodeId));
}
close();
}
public void visit_edges(String direction, String edgesFmt, String src, long maxEdges, String returnTypes) {
long srcNodeId = graph.getNodeId(new SWHID(src));
open();
Traversal t = new Traversal(graph, direction, edgesFmt, maxEdges);
t.visitNodesVisitor(srcNodeId, null, (srcId, dstId) -> {
writeEdge(graph.getSWHID(srcId), graph.getSWHID(dstId));
});
close();
}
public void walk(String direction, String edgesFmt, String algorithm, String src, String dst, long maxEdges,
String returnTypes) {
long srcNodeId = graph.getNodeId(new SWHID(src));
open();
ArrayList res;
Traversal t = new Traversal(graph, direction, edgesFmt, maxEdges, returnTypes);
if (dst.matches("ori|snp|rel|rev|dir|cnt")) {
Node.Type dstType = Node.Type.fromStr(dst);
res = t.walk(srcNodeId, dstType, algorithm);
} else {
long dstNodeId = graph.getNodeId(new SWHID(dst));
res = t.walk(srcNodeId, dstNodeId, algorithm);
}
for (Long nodeId : res) {
writeNode(graph.getSWHID(nodeId));
}
close();
}
public void random_walk(String direction, String edgesFmt, int retries, String src, String dst, long maxEdges,
String returnTypes) {
long srcNodeId = graph.getNodeId(new SWHID(src));
open();
ArrayList res;
Traversal t = new Traversal(graph, direction, edgesFmt, maxEdges, returnTypes);
if (dst.matches("ori|snp|rel|rev|dir|cnt")) {
Node.Type dstType = Node.Type.fromStr(dst);
res = t.randomWalk(srcNodeId, dstType, retries);
} else {
long dstNodeId = graph.getNodeId(new SWHID(dst));
res = t.randomWalk(srcNodeId, dstNodeId, retries);
}
for (Long nodeId : res) {
writeNode(graph.getSWHID(nodeId));
}
close();
}
}
}
diff --git a/java/src/main/java/org/softwareheritage/graph/Graph.java b/java/src/main/java/org/softwareheritage/graph/Graph.java
deleted file mode 100644
index 8d9acf1..0000000
--- a/java/src/main/java/org/softwareheritage/graph/Graph.java
+++ /dev/null
@@ -1,304 +0,0 @@
-package org.softwareheritage.graph;
-
-import it.unimi.dsi.big.webgraph.ImmutableGraph;
-import it.unimi.dsi.big.webgraph.LazyLongIterator;
-import it.unimi.dsi.logging.ProgressLogger;
-import org.softwareheritage.graph.maps.NodeIdMap;
-import org.softwareheritage.graph.maps.NodeTypesMap;
-
-import java.io.IOException;
-
-/**
- * Main class storing the compressed graph and node id mappings.
- *
- * The compressed graph is stored using the WebGraph
- * ecosystem. Additional mappings are necessary because Software Heritage uses string based persistent
- * identifiers (SWHID) while WebGraph uses integers internally. These two mappings (long id
- * ↔ SWHID) are used for the input (users refer to the graph using SWHID) and the output
- * (convert back to SWHID for users results). However, since graph traversal can be restricted
- * depending on the node type (see {@link AllowedEdges}), a long id → node type map is stored
- * as well to avoid a full SWHID lookup.
- *
- * @author The Software Heritage developers
- * @see org.softwareheritage.graph.AllowedEdges
- * @see org.softwareheritage.graph.maps.NodeIdMap
- * @see org.softwareheritage.graph.maps.NodeTypesMap
- */
-
-public class Graph extends ImmutableGraph {
- /**
- * Bidirectional graph containing two compressed {@link it.unimi.dsi.big.webgraph.BVGraph} one for
- * each direction
- */
- BidirectionalImmutableGraph graph;
-
- /** Path and basename of the compressed graph */
- String path;
- /** Mapping long id ↔ SWHIDs */
- NodeIdMap nodeIdMap;
- /** Mapping long id → node types */
- NodeTypesMap nodeTypesMap;
-
- /**
- * Constructor.
- *
- * @param path path and basename of the compressed graph to load
- */
-
- private Graph(String path) throws IOException {
- loadInternal(path, null, LoadMethod.MAPPED);
- }
-
- /**
- * Loading mechanisms
- */
-
- enum LoadMethod {
- MEMORY, MAPPED, OFFLINE,
- }
-
- protected Graph loadInternal(String path, ProgressLogger pl, LoadMethod method) throws IOException {
- this.path = path;
- ImmutableGraph direct = null;
- ImmutableGraph transposed = null;
- if (method == LoadMethod.MEMORY) {
- direct = ImmutableGraph.load(path, pl);
- transposed = ImmutableGraph.load(path + "-transposed", pl);
- } else if (method == LoadMethod.MAPPED) {
- direct = ImmutableGraph.load(path, pl);
- transposed = ImmutableGraph.loadMapped(path + "-transposed", pl);
- } else if (method == LoadMethod.OFFLINE) {
- direct = ImmutableGraph.loadOffline(path, pl);
- transposed = ImmutableGraph.loadOffline(path + "-transposed", pl);
- }
- this.graph = new BidirectionalImmutableGraph(direct, transposed);
- this.nodeTypesMap = new NodeTypesMap(path);
- this.nodeIdMap = new NodeIdMap(path, numNodes());
- return this;
- }
-
- protected Graph() {
- }
-
- public static Graph load(String path, ProgressLogger pl) throws IOException {
- return new Graph().loadInternal(path, pl, LoadMethod.MEMORY);
- }
-
- public static Graph loadMapped(String path, ProgressLogger pl) throws IOException {
- return new Graph().loadInternal(path, pl, LoadMethod.MAPPED);
- }
-
- public static Graph loadOffline(String path, ProgressLogger pl) throws IOException {
- return new Graph().loadInternal(path, null, LoadMethod.OFFLINE);
- }
-
- public static Graph load(String path) throws IOException {
- return new Graph().loadInternal(path, null, LoadMethod.MEMORY);
- }
-
- public static Graph loadMapped(String path) throws IOException {
- return new Graph().loadInternal(path, null, LoadMethod.MAPPED);
- }
-
- public static Graph loadOffline(String path) throws IOException {
- return new Graph().loadInternal(path, null, LoadMethod.OFFLINE);
- }
-
- /**
- * Constructor used for copy()
- */
- protected Graph(BidirectionalImmutableGraph graph, String path, NodeIdMap nodeIdMap, NodeTypesMap nodeTypesMap) {
- this.graph = graph;
- this.path = path;
- this.nodeIdMap = nodeIdMap;
- this.nodeTypesMap = nodeTypesMap;
- }
-
- /**
- * Return a flyweight copy of the graph.
- */
- @Override
- public Graph copy() {
- return new Graph(this.graph.copy(), this.path, this.nodeIdMap, this.nodeTypesMap);
- }
-
- @Override
- public boolean randomAccess() {
- return graph.randomAccess();
- }
-
- /**
- * Return a transposed version of the graph.
- */
- public Graph transpose() {
- return new Graph(this.graph.transpose(), this.path, this.nodeIdMap, this.nodeTypesMap);
- }
-
- /**
- * Return a symmetric version of the graph.
- */
- public Graph symmetrize() {
- return new Graph(this.graph.symmetrize(), this.path, this.nodeIdMap, this.nodeTypesMap);
- }
-
- /**
- * Cleans up graph resources after use.
- */
- public void cleanUp() throws IOException {
- nodeIdMap.close();
- }
-
- /**
- * Returns number of nodes in the graph.
- *
- * @return number of nodes in the graph
- */
- @Override
- public long numNodes() {
- return graph.numNodes();
- }
-
- /**
- * Returns number of edges in the graph.
- *
- * @return number of edges in the graph
- */
- @Override
- public long numArcs() {
- return graph.numArcs();
- }
-
- /**
- * Returns lazy iterator of successors of a node.
- *
- * @param nodeId node specified as a long id
- * @return lazy iterator of successors of the node, specified as a
- * WebGraph LazyLongIterator
- */
- @Override
- public LazyLongIterator successors(long nodeId) {
- return graph.successors(nodeId);
- }
-
- /**
- * Returns lazy iterator of successors of a node while following a specific set of edge types.
- *
- * @param nodeId node specified as a long id
- * @param allowedEdges the specification of which edges can be traversed
- * @return lazy iterator of successors of the node, specified as a
- * WebGraph LazyLongIterator
- */
- public LazyLongIterator successors(long nodeId, AllowedEdges allowedEdges) {
- if (allowedEdges.restrictedTo == null) {
- // All edges are allowed, bypass edge check
- return this.successors(nodeId);
- } else {
- LazyLongIterator allSuccessors = this.successors(nodeId);
- Graph thisGraph = this;
- return new LazyLongIterator() {
- @Override
- public long nextLong() {
- long neighbor;
- while ((neighbor = allSuccessors.nextLong()) != -1) {
- if (allowedEdges.isAllowed(thisGraph.getNodeType(nodeId), thisGraph.getNodeType(neighbor))) {
- return neighbor;
- }
- }
- return -1;
- }
-
- @Override
- public long skip(final long n) {
- long i;
- for (i = 0; i < n && nextLong() != -1; i++)
- ;
- return i;
- }
- };
- }
- }
-
- /**
- * Returns the outdegree of a node.
- *
- * @param nodeId node specified as a long id
- * @return outdegree of a node
- */
- @Override
- public long outdegree(long nodeId) {
- return graph.outdegree(nodeId);
- }
-
- /**
- * Returns lazy iterator of predecessors of a node.
- *
- * @param nodeId node specified as a long id
- * @return lazy iterator of predecessors of the node, specified as a
- * WebGraph LazyLongIterator
- */
- public LazyLongIterator predecessors(long nodeId) {
- return graph.predecessors(nodeId);
- }
-
- /**
- * Returns the indegree of a node.
- *
- * @param nodeId node specified as a long id
- * @return indegree of a node
- */
- public long indegree(long nodeId) {
- return graph.indegree(nodeId);
- }
-
- /**
- * Returns the underlying BidirectionalImmutableGraph.
- *
- * @return WebGraph ImmutableGraph
- */
- public ImmutableGraph getGraph() {
- return this.graph;
- }
-
- /**
- * Returns the graph full path.
- *
- * @return graph full path
- */
- public String getPath() {
- return path;
- }
-
- /**
- * Converts {@link SWHID} node to long.
- *
- * @param swhid node specified as a {@link SWHID}
- * @return internal long node id
- * @see SWHID
- */
- public long getNodeId(SWHID swhid) {
- return nodeIdMap.getNodeId(swhid);
- }
-
- /**
- * Converts long id node to {@link SWHID}.
- *
- * @param nodeId node specified as a long id
- * @return external SWHID
- * @see SWHID
- */
- public SWHID getSWHID(long nodeId) {
- return nodeIdMap.getSWHID(nodeId);
- }
-
- /**
- * Returns node type.
- *
- * @param nodeId node specified as a long id
- * @return corresponding node type
- * @see org.softwareheritage.graph.Node.Type
- */
- public Node.Type getNodeType(long nodeId) {
- return nodeTypesMap.getType(nodeId);
- }
-}
diff --git a/java/src/main/java/org/softwareheritage/graph/NodesFiltering.java b/java/src/main/java/org/softwareheritage/graph/NodesFiltering.java
index 3f3e7a3..18eb3b8 100644
--- a/java/src/main/java/org/softwareheritage/graph/NodesFiltering.java
+++ b/java/src/main/java/org/softwareheritage/graph/NodesFiltering.java
@@ -1,107 +1,107 @@
package org.softwareheritage.graph;
import java.util.ArrayList;
/**
- *
NodesFiltering
+ * NodesFiltering
*
* class that manages the filtering of nodes that have been returned after a visit of the graph.
* parameterized by a string that represents either no filtering (*) or a set of node types.
*
*
*
*
*
graph/visit/nodes/swh:1:rel:0000000000000000000000000000000000000010 return_types==rev will
* only return 'rev' nodes.
*
*
graph/visit/nodes/swh:1:rel:0000000000000000000000000000000000000010
* return_types==rev,snp,cnt will only return 'rev' 'snp' 'cnt' nodes.
*
*
graph/visit/nodes/swh:1:rel:0000000000000000000000000000000000000010 return_types==* will
* return all the nodes.
*
*
* How to use NodesFiltering :
*
*
* {@code
* Long id1 = .... // graph.getNodeType(id1) == CNT
* Long id2 = .... // graph.getNodeType(id2) == SNP
* Long id3 = .... // graph.getNodeType(id3) == ORI
* ArrayList nodeIds = nez ArrayList();
* nodeIds.add(id1); nodeIds.add(id2); nodeIds.add(id3);
*
* NodeFiltering nds = new NodesFiltering("snp,ori"); // we allow only snp node types to be shown
* System.out.println(nds.filterByNodeTypes(nodeIds,graph)); // will print id2, id3
*
* nds = NodesFiltering("*");
* System.out.println(nds.filterByNodeTypes(nodeIds,graph)); // will print id1, id2 id3
*
* }
*
*/
public class NodesFiltering {
boolean restricted;
ArrayList allowedNodesTypes;
/**
* Default constructor, in order to handle the * case (all types of nodes are allowed to be
* returned). allowedNodesTypes will contains [SNP,CNT....] all types of nodes.
*
*/
public NodesFiltering() {
restricted = false;
allowedNodesTypes = Node.Type.parse("*");
}
/**
* Constructor
*
* @param strTypes a formatted string describing the types of nodes we want to allow to be shown.
*
* NodesFilterind("cnt,snp") will set allowedNodesTypes to [CNT,SNP]
*
*/
public NodesFiltering(String strTypes) {
restricted = true;
allowedNodesTypes = new ArrayList();
String[] types = strTypes.split(",");
for (String type : types) {
allowedNodesTypes.add(Node.Type.fromStr(type));
}
}
/**
* Check if the type given in parameter is in the list of allowed types.
*
* @param typ the type of the node.
*/
public boolean typeIsAllowed(Node.Type typ) {
return this.allowedNodesTypes.contains(typ);
}
/**
*
* the function that filters the nodes returned, we browse the list of nodes found after a visit and
* we create a new list with only the nodes that have a type that is contained in the list of
* allowed types (allowedNodesTypes)
*
*
* @param nodeIds the nodes founded during the visit
* @param g the graph in order to find the types of nodes from their id in nodeIds
* @return a new list with the id of node which have a type in allowedTypes
*
*
*/
- public ArrayList filterByNodeTypes(ArrayList nodeIds, Graph g) {
+ public ArrayList filterByNodeTypes(ArrayList nodeIds, SwhBidirectionalGraph g) {
ArrayList filteredNodes = new ArrayList();
for (Long node : nodeIds) {
if (this.typeIsAllowed(g.getNodeType(node))) {
filteredNodes.add(node);
}
}
return filteredNodes;
}
}
diff --git a/java/src/main/java/org/softwareheritage/graph/Subgraph.java b/java/src/main/java/org/softwareheritage/graph/Subgraph.java
index 3e7e7fd..53ef937 100644
--- a/java/src/main/java/org/softwareheritage/graph/Subgraph.java
+++ b/java/src/main/java/org/softwareheritage/graph/Subgraph.java
@@ -1,224 +1,224 @@
package org.softwareheritage.graph;
import it.unimi.dsi.big.webgraph.ImmutableGraph;
import it.unimi.dsi.big.webgraph.LazyLongIterator;
import it.unimi.dsi.big.webgraph.NodeIterator;
import java.util.NoSuchElementException;
public class Subgraph extends ImmutableGraph {
- private final Graph underlyingGraph;
+ private final SwhBidirectionalGraph underlyingGraph;
public final AllowedNodes allowedNodeTypes;
private long nodeCount = -1;
/**
* Constructor.
*
*/
- public Subgraph(Graph underlyingGraph, AllowedNodes allowedNodeTypes) {
+ public Subgraph(SwhBidirectionalGraph underlyingGraph, AllowedNodes allowedNodeTypes) {
this.underlyingGraph = underlyingGraph.copy();
this.allowedNodeTypes = allowedNodeTypes;
}
/**
* Return a flyweight copy of the graph.
*/
@Override
public Subgraph copy() {
return new Subgraph(this.underlyingGraph.copy(), allowedNodeTypes);
}
@Override
public boolean randomAccess() {
return underlyingGraph.randomAccess();
}
/**
* Return a transposed version of the graph.
*/
public Subgraph transpose() {
return new Subgraph(underlyingGraph.transpose(), allowedNodeTypes);
}
/**
* Return a symmetric version of the graph.
*/
public Subgraph symmetrize() {
return new Subgraph(underlyingGraph.symmetrize(), allowedNodeTypes);
}
/**
* Returns number of nodes in the graph.
*
* @return number of nodes in the graph
*/
@Override
public long numNodes() {
if (nodeCount == -1) {
for (long i = 0; i < underlyingGraph.numNodes(); ++i) {
if (nodeExists(i))
++nodeCount;
}
}
return nodeCount;
}
/**
* Returns number of edges in the graph.
*
* @return number of edges in the graph
*/
@Override
public long numArcs() {
throw new UnsupportedOperationException("Cannot determine the number of arcs in a subgraph");
}
public long maxNodeNumber() {
return underlyingGraph.numNodes();
}
public boolean nodeExists(long node) {
return allowedNodeTypes.isAllowed(underlyingGraph.getNodeType(node));
}
/**
* Returns lazy iterator of successors of a node.
*
* @param nodeId node specified as a long id
* @return lazy iterator of successors of the node, specified as a
* WebGraph LazyLongIterator
*/
@Override
public LazyLongIterator successors(long nodeId) {
if (!nodeExists(nodeId)) {
throw new IllegalArgumentException("Node " + nodeId + " not in subgraph");
}
LazyLongIterator allSuccessors = underlyingGraph.successors(nodeId);
return new LazyLongIterator() {
@Override
public long nextLong() {
long neighbor;
while ((neighbor = allSuccessors.nextLong()) != -1) {
if (nodeExists(neighbor)) {
return neighbor;
}
}
return -1;
}
@Override
public long skip(final long n) {
long i;
for (i = 0; i < n && nextLong() != -1; i++)
;
return i;
}
};
}
/**
* Returns the outdegree of a node.
*
* @param nodeId node specified as a long id
* @return outdegree of a node
*/
@Override
public long outdegree(long nodeId) {
long deg = 0;
for (LazyLongIterator allSuccessors = successors(nodeId); allSuccessors.nextLong() != -1; ++deg)
;
return deg;
}
@Override
public NodeIterator nodeIterator() {
return new NodeIterator() {
final long n = numNodes();
long i = -1;
long done = 0;
@Override
public boolean hasNext() {
return done <= n;
}
@Override
public long nextLong() {
if (!hasNext())
throw new NoSuchElementException();
do {
++i;
if (i >= underlyingGraph.numNodes())
throw new NoSuchElementException();
} while (!nodeExists(i));
++done;
return i;
}
@Override
public long outdegree() {
return Subgraph.this.outdegree(i);
}
@Override
public LazyLongIterator successors() {
return Subgraph.this.successors(i);
}
};
}
/**
* Returns lazy iterator of predecessors of a node.
*
* @param nodeId node specified as a long id
* @return lazy iterator of predecessors of the node, specified as a
* WebGraph LazyLongIterator
*/
public LazyLongIterator predecessors(long nodeId) {
return this.transpose().successors(nodeId);
}
/**
* Returns the indegree of a node.
*
* @param nodeId node specified as a long id
* @return indegree of a node
*/
public long indegree(long nodeId) {
return this.transpose().outdegree(nodeId);
}
/**
* Converts {@link SWHID} node to long.
*
* @param swhid node specified as a {@link SWHID}
* @return internal long node id
* @see SWHID
*/
public long getNodeId(SWHID swhid) {
return underlyingGraph.getNodeId(swhid);
}
/**
* Converts long id node to {@link SWHID}.
*
* @param nodeId node specified as a long id
* @return external SWHID
* @see SWHID
*/
public SWHID getSWHID(long nodeId) {
return underlyingGraph.getSWHID(nodeId);
}
/**
* Returns node type.
*
* @param nodeId node specified as a long id
* @return corresponding node type
* @see Node.Type
*/
public Node.Type getNodeType(long nodeId) {
return underlyingGraph.getNodeType(nodeId);
}
}
diff --git a/java/src/main/java/org/softwareheritage/graph/SwhBidirectionalGraph.java b/java/src/main/java/org/softwareheritage/graph/SwhBidirectionalGraph.java
new file mode 100644
index 0000000..df12875
--- /dev/null
+++ b/java/src/main/java/org/softwareheritage/graph/SwhBidirectionalGraph.java
@@ -0,0 +1,166 @@
+package org.softwareheritage.graph;
+
+import it.unimi.dsi.big.webgraph.labelling.ArcLabelledNodeIterator;
+import it.unimi.dsi.logging.ProgressLogger;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+/**
+ * Class representing the compressed Software Heritage graph in both directions (forward and
+ * backward).
+ *
+ * This class uses the {@link BidirectionalImmutableGraph} class internally to implement the
+ * backward equivalent of graph operations ({@link SwhBidirectionalGraph#indegree(long)},
+ * {@link SwhBidirectionalGraph#predecessors(long)}, etc.) by holding a reference to two
+ * {@link SwhUnidirectionalGraph} (a forward graph and a backward graph).
+ *
+ * Both graphs share their graph properties in memory by storing references to the same
+ * {@link SwhGraphProperties} object.
+ *
+ * @author The Software Heritage developers
+ * @see SwhUnidirectionalGraph
+ */
+
+public class SwhBidirectionalGraph extends BidirectionalImmutableGraph implements SwhGraph {
+ /** Property data of the graph (id/type mappings etc.) */
+ private final SwhGraphProperties properties;
+
+ private final SwhUnidirectionalGraph forwardGraph;
+ private final SwhUnidirectionalGraph backwardGraph;
+
+ public SwhBidirectionalGraph(SwhUnidirectionalGraph forwardGraph, SwhUnidirectionalGraph backwardGraph,
+ SwhGraphProperties properties) {
+ super(forwardGraph, backwardGraph);
+ this.forwardGraph = forwardGraph;
+ this.backwardGraph = backwardGraph;
+ this.properties = properties;
+ }
+
+ private SwhBidirectionalGraph(BidirectionalImmutableGraph graph, SwhGraphProperties properties) {
+ super(graph.getForwardGraph(), graph.getBackwardGraph());
+ this.forwardGraph = (SwhUnidirectionalGraph) graph.getForwardGraph();
+ this.backwardGraph = (SwhUnidirectionalGraph) graph.getBackwardGraph();
+ this.properties = properties;
+ }
+
+ public static SwhBidirectionalGraph load(LoadMethod method, String path, InputStream is, ProgressLogger pl)
+ throws IOException {
+ SwhUnidirectionalGraph forward = SwhUnidirectionalGraph.loadGraphOnly(method, path, is, pl);
+ SwhUnidirectionalGraph backward = SwhUnidirectionalGraph.loadGraphOnly(method, path + "-transposed", is, pl);
+ SwhGraphProperties properties = SwhGraphProperties.load(path);
+ forward.setProperties(properties);
+ backward.setProperties(properties);
+ return new SwhBidirectionalGraph(forward, backward, properties);
+ }
+
+ public static SwhBidirectionalGraph loadLabelled(LoadMethod method, String path, InputStream is, ProgressLogger pl)
+ throws IOException {
+ SwhUnidirectionalGraph forward = SwhUnidirectionalGraph.loadLabelledGraphOnly(method, path, is, pl);
+ SwhUnidirectionalGraph backward = SwhUnidirectionalGraph.loadLabelledGraphOnly(method, path + "-transposed", is,
+ pl);
+ SwhGraphProperties properties = SwhGraphProperties.load(path);
+ forward.setProperties(properties);
+ backward.setProperties(properties);
+ return new SwhBidirectionalGraph(forward, backward, properties);
+ }
+
+ // loadXXX methods from ImmutableGraph
+ public static SwhBidirectionalGraph load(String path, ProgressLogger pl) throws IOException {
+ return load(LoadMethod.STANDARD, path, null, pl);
+ }
+ public static SwhBidirectionalGraph load(String path) throws IOException {
+ return load(LoadMethod.STANDARD, path, null, null);
+ }
+ public static SwhBidirectionalGraph loadMapped(String path, ProgressLogger pl) throws IOException {
+ return load(LoadMethod.MAPPED, path, null, pl);
+ }
+ public static SwhBidirectionalGraph loadMapped(String path) throws IOException {
+ return load(LoadMethod.MAPPED, path, null, null);
+ }
+ public static SwhBidirectionalGraph loadOffline(String path, ProgressLogger pl) throws IOException {
+ return load(LoadMethod.OFFLINE, path, null, pl);
+ }
+ public static SwhBidirectionalGraph loadOffline(String path) throws IOException {
+ return load(LoadMethod.OFFLINE, path, null, null);
+ }
+
+ // Labelled versions of the loadXXX methods from ImmutableGraph
+ public static SwhBidirectionalGraph loadLabelled(String path, ProgressLogger pl) throws IOException {
+ return loadLabelled(LoadMethod.STANDARD, path, null, pl);
+ }
+ public static SwhBidirectionalGraph loadLabelled(String path) throws IOException {
+ return loadLabelled(LoadMethod.STANDARD, path, null, null);
+ }
+ public static SwhBidirectionalGraph loadLabelledMapped(String path, ProgressLogger pl) throws IOException {
+ return loadLabelled(LoadMethod.MAPPED, path, null, pl);
+ }
+ public static SwhBidirectionalGraph loadLabelledMapped(String path) throws IOException {
+ return loadLabelled(LoadMethod.MAPPED, path, null, null);
+ }
+ public static SwhBidirectionalGraph loadLabelledOffline(String path, ProgressLogger pl) throws IOException {
+ return loadLabelled(LoadMethod.OFFLINE, path, null, pl);
+ }
+ public static SwhBidirectionalGraph loadLabelledOffline(String path) throws IOException {
+ return loadLabelled(LoadMethod.OFFLINE, path, null, null);
+ }
+
+ @Override
+ public SwhBidirectionalGraph copy() {
+ return new SwhBidirectionalGraph(forwardGraph, backwardGraph, this.properties);
+ }
+
+ @Override
+ public SwhBidirectionalGraph transpose() {
+ return new SwhBidirectionalGraph(super.transpose(), this.properties);
+ }
+
+ @Override
+ public SwhBidirectionalGraph symmetrize() {
+ return new SwhBidirectionalGraph(super.symmetrize(), this.properties);
+ }
+
+ public SwhUnidirectionalGraph getForwardGraph() {
+ return this.forwardGraph;
+ }
+
+ public SwhUnidirectionalGraph getBackwardGraph() {
+ return this.backwardGraph;
+ }
+
+ /**
+ * Returns a *labelled* lazy iterator over the successors of a given node. The iteration terminates
+ * when -1 is returned.
+ */
+ public ArcLabelledNodeIterator.LabelledArcIterator labelledSuccessors(long x) {
+ return forwardGraph.labelledSuccessors(x);
+ }
+
+ /**
+ * Returns a *labelled* lazy iterator over the predecessors of a given node. The iteration
+ * terminates when -1 is returned.
+ */
+ public ArcLabelledNodeIterator.LabelledArcIterator labelledPredecessors(long x) {
+ return backwardGraph.labelledSuccessors(x);
+ }
+
+ public void close() throws IOException {
+ this.properties.close();
+ }
+
+ public String getPath() {
+ return properties.getPath();
+ }
+
+ public long getNodeId(SWHID swhid) {
+ return properties.getNodeId(swhid);
+ }
+
+ public SWHID getSWHID(long nodeId) {
+ return properties.getSWHID(nodeId);
+ }
+
+ public Node.Type getNodeType(long nodeId) {
+ return properties.getNodeType(nodeId);
+ }
+}
diff --git a/java/src/main/java/org/softwareheritage/graph/SwhGraph.java b/java/src/main/java/org/softwareheritage/graph/SwhGraph.java
new file mode 100644
index 0000000..b16e677
--- /dev/null
+++ b/java/src/main/java/org/softwareheritage/graph/SwhGraph.java
@@ -0,0 +1,47 @@
+package org.softwareheritage.graph;
+
+import java.io.IOException;
+
+/**
+ * Common interface for SWH graph classes.
+ */
+public interface SwhGraph {
+ /**
+ * Cleans up graph resources after use.
+ */
+ void close() throws IOException;
+
+ /**
+ * Converts {@link SWHID} node to long.
+ *
+ * @param swhid node specified as a {@link SWHID}
+ * @return internal long node id
+ * @see SWHID
+ */
+ long getNodeId(SWHID swhid);
+
+ /**
+ * Converts long id node to {@link SWHID}.
+ *
+ * @param nodeId node specified as a long id
+ * @return external SWHID
+ * @see SWHID
+ */
+ SWHID getSWHID(long nodeId);
+
+ /**
+ * Returns node type.
+ *
+ * @param nodeId node specified as a long id
+ * @return corresponding node type
+ * @see Node.Type
+ */
+ Node.Type getNodeType(long nodeId);
+
+ /**
+ * Returns the graph full path.
+ *
+ * @return graph full path
+ */
+ String getPath();
+}
diff --git a/java/src/main/java/org/softwareheritage/graph/SwhGraphProperties.java b/java/src/main/java/org/softwareheritage/graph/SwhGraphProperties.java
new file mode 100644
index 0000000..ef5e304
--- /dev/null
+++ b/java/src/main/java/org/softwareheritage/graph/SwhGraphProperties.java
@@ -0,0 +1,85 @@
+package org.softwareheritage.graph;
+
+import org.softwareheritage.graph.maps.NodeIdMap;
+import org.softwareheritage.graph.maps.NodeTypesMap;
+
+import java.io.IOException;
+
+/**
+ * This objects contains SWH graph properties such as node labels.
+ *
+ * Some property mappings are necessary because Software Heritage uses string based persistent
+ * identifiers (SWHID) while WebGraph uses integers internally.
+ *
+ * The two node ID mappings (long id ↔ SWHID) are used for the input (users refer to the graph
+ * using SWHID) and the output (convert back to SWHID for users results).
+ *
+ * Since graph traversal can be restricted depending on the node type (see {@link AllowedEdges}), a
+ * long id → node type map is stored as well to avoid a full SWHID lookup.
+ *
+ * @see NodeIdMap
+ * @see NodeTypesMap
+ */
+public class SwhGraphProperties {
+ /** Path and basename of the compressed graph */
+ String path;
+ /** Mapping long id ↔ SWHIDs */
+ NodeIdMap nodeIdMap;
+ /** Mapping long id → node types */
+ NodeTypesMap nodeTypesMap;
+
+ protected SwhGraphProperties(String path, NodeIdMap nodeIdMap, NodeTypesMap nodeTypesMap) {
+ this.path = path;
+ this.nodeIdMap = nodeIdMap;
+ this.nodeTypesMap = nodeTypesMap;
+ }
+
+ public static SwhGraphProperties load(String path) throws IOException {
+ return new SwhGraphProperties(path, new NodeIdMap(path), new NodeTypesMap(path));
+ }
+
+ /**
+ * Cleans up graph resources after use.
+ */
+ public void close() throws IOException {
+ this.nodeIdMap.close();
+ }
+
+ public String getPath() {
+ return path;
+ }
+
+ /**
+ * Converts {@link SWHID} node to long.
+ *
+ * @param swhid node specified as a {@link SWHID}
+ * @return internal long node id
+ * @see SWHID
+ */
+ public long getNodeId(SWHID swhid) {
+ return nodeIdMap.getNodeId(swhid);
+ }
+
+ /**
+ * Converts long id node to {@link SWHID}.
+ *
+ * @param nodeId node specified as a long id
+ * @return external SWHID
+ * @see SWHID
+ */
+ public SWHID getSWHID(long nodeId) {
+ return nodeIdMap.getSWHID(nodeId);
+ }
+
+ /**
+ * Returns node type.
+ *
+ * @param nodeId node specified as a long id
+ * @return corresponding node type
+ * @see Node.Type
+ */
+ public Node.Type getNodeType(long nodeId) {
+ return nodeTypesMap.getType(nodeId);
+ }
+}
diff --git a/java/src/main/java/org/softwareheritage/graph/SwhUnidirectionalGraph.java b/java/src/main/java/org/softwareheritage/graph/SwhUnidirectionalGraph.java
new file mode 100644
index 0000000..35c35f7
--- /dev/null
+++ b/java/src/main/java/org/softwareheritage/graph/SwhUnidirectionalGraph.java
@@ -0,0 +1,221 @@
+package org.softwareheritage.graph;
+
+import it.unimi.dsi.big.webgraph.ImmutableGraph;
+import it.unimi.dsi.big.webgraph.LazyLongIterator;
+import it.unimi.dsi.big.webgraph.labelling.ArcLabelledNodeIterator;
+import it.unimi.dsi.big.webgraph.labelling.BitStreamArcLabelledImmutableGraph;
+import it.unimi.dsi.logging.ProgressLogger;
+import it.unimi.dsi.big.webgraph.labelling.ArcLabelledImmutableGraph;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+/**
+ * Class representing the compressed Software Heritage graph in a single direction.
+ *
+ * The compressed graph is stored using the WebGraph
+ * framework. This class contains an {@link ImmutableGraph} representing the graph itself, as well
+ * as a reference to the object containing the graph properties (e.g. node labels). Optionally,
+ * arc labels (properties stored on the graph edges) can also be loaded with the
+ * loadLabelled...() function family.
+ *
+ * @author The Software Heritage developers
+ * @see SwhGraphProperties
+ * @see SwhUnidirectionalGraph
+ */
+
+public class SwhUnidirectionalGraph extends ImmutableGraph implements SwhGraph {
+ /** Underlying ImmutableGraph */
+ private final ImmutableGraph graph;
+
+ /** Labelled ImmutableGraph, null if labels are not loaded */
+ private ArcLabelledImmutableGraph labelledGraph;
+
+ /** Property data of the graph (id/type mappings etc.) */
+ private SwhGraphProperties properties;
+
+ protected SwhUnidirectionalGraph(ImmutableGraph graph, SwhGraphProperties properties) {
+ this.graph = graph;
+ this.properties = properties;
+ }
+
+ protected SwhUnidirectionalGraph(ArcLabelledImmutableGraph graph, SwhGraphProperties properties) {
+ this.graph = graph;
+ this.labelledGraph = graph;
+ this.properties = properties;
+ }
+
+ /**
+ * Load the (unlabelled) graph only, without the SWH properties.
+ */
+ public static SwhUnidirectionalGraph loadGraphOnly(LoadMethod method, String path, InputStream is,
+ ProgressLogger pl) throws IOException {
+ return new SwhUnidirectionalGraph(ImmutableGraph.load(method, path, is, pl), null);
+ }
+
+ /**
+ * Load the labelled graph only, without the SWH properties.
+ */
+ public static SwhUnidirectionalGraph loadLabelledGraphOnly(LoadMethod method, String path, InputStream is,
+ ProgressLogger pl) throws IOException {
+ BitStreamArcLabelledImmutableGraph g = (BitStreamArcLabelledImmutableGraph) BitStreamArcLabelledImmutableGraph
+ .load(method, path, is, pl);
+ return new SwhUnidirectionalGraph(g, null);
+ }
+
+ /**
+ * Load the SWH properties of the graph from a given path.
+ */
+ public void loadProperties(String path) throws IOException {
+ properties = SwhGraphProperties.load(path);
+ }
+
+ /**
+ * Setter for the SWH graph properties.
+ *
+ * @param properties The {@link SwhGraphProperties} object containing the graph properties
+ */
+ public void setProperties(SwhGraphProperties properties) {
+ this.properties = properties;
+ }
+
+ /**
+ * Load the unlabelled graph and its SWH properties.
+ */
+ public static SwhUnidirectionalGraph load(LoadMethod method, String path, InputStream is, ProgressLogger pl)
+ throws IOException {
+ SwhUnidirectionalGraph g = loadGraphOnly(method, path, is, pl);
+ g.loadProperties(path);
+ return g;
+ }
+
+ /**
+ * Load the labelled graph and its SWH properties.
+ */
+ public static SwhUnidirectionalGraph loadLabelled(LoadMethod method, String path, InputStream is, ProgressLogger pl)
+ throws IOException {
+ SwhUnidirectionalGraph g = loadLabelledGraphOnly(method, path, is, pl);
+ g.loadProperties(path);
+ return g;
+ }
+
+ // loadXXX methods of ImmutableGraph
+ public static SwhUnidirectionalGraph load(String path, ProgressLogger pl) throws IOException {
+ return load(LoadMethod.STANDARD, path, null, pl);
+ }
+ public static SwhUnidirectionalGraph load(String path) throws IOException {
+ return load(LoadMethod.STANDARD, path, null, null);
+ }
+ public static SwhUnidirectionalGraph loadMapped(String path, ProgressLogger pl) throws IOException {
+ return load(LoadMethod.MAPPED, path, null, pl);
+ }
+ public static SwhUnidirectionalGraph loadMapped(String path) throws IOException {
+ return load(LoadMethod.MAPPED, path, null, null);
+ }
+ public static SwhUnidirectionalGraph loadOffline(String path, ProgressLogger pl) throws IOException {
+ return load(LoadMethod.OFFLINE, path, null, pl);
+ }
+ public static SwhUnidirectionalGraph loadOffline(String path) throws IOException {
+ return load(LoadMethod.OFFLINE, path, null, null);
+ }
+
+ // Labelled versions of the loadXXX methods from ImmutableGraph
+ public static SwhUnidirectionalGraph loadLabelled(String path, ProgressLogger pl) throws IOException {
+ return loadLabelled(LoadMethod.STANDARD, path, null, pl);
+ }
+ public static SwhUnidirectionalGraph loadLabelled(String path) throws IOException {
+ return loadLabelled(LoadMethod.STANDARD, path, null, null);
+ }
+ public static SwhUnidirectionalGraph loadLabelledMapped(String path, ProgressLogger pl) throws IOException {
+ return loadLabelled(LoadMethod.MAPPED, path, null, pl);
+ }
+ public static SwhUnidirectionalGraph loadLabelledMapped(String path) throws IOException {
+ return loadLabelled(LoadMethod.MAPPED, path, null, null);
+ }
+ public static SwhUnidirectionalGraph loadLabelledOffline(String path, ProgressLogger pl) throws IOException {
+ return loadLabelled(LoadMethod.OFFLINE, path, null, pl);
+ }
+ public static SwhUnidirectionalGraph loadLabelledOffline(String path) throws IOException {
+ return loadLabelled(LoadMethod.OFFLINE, path, null, null);
+ }
+
+ @Override
+ public SwhUnidirectionalGraph copy() {
+ return new SwhUnidirectionalGraph(this.graph.copy(), this.properties);
+ }
+
+ @Override
+ public boolean randomAccess() {
+ return graph.randomAccess();
+ }
+
+ public void close() throws IOException {
+ this.properties.close();
+ }
+
+ @Override
+ public long numNodes() {
+ return graph.numNodes();
+ }
+
+ @Override
+ public long numArcs() {
+ return graph.numArcs();
+ }
+
+ @Override
+ public LazyLongIterator successors(long nodeId) {
+ return graph.successors(nodeId);
+ }
+
+ /**
+ * Returns a labelled node iterator for scanning the graph sequentially, starting from the
+ * first node.
+ */
+ public ArcLabelledNodeIterator labelledNodeIterator() {
+ if (labelledGraph == null) {
+ throw new RuntimeException("Calling labelledNodeIterator() but labels were not loaded.");
+ }
+ return labelledGraph.nodeIterator();
+ }
+
+ /**
+ * Returns a labelled node iterator for scanning the graph sequentially, starting from a
+ * given node.
+ */
+ public ArcLabelledNodeIterator labelledNodeIterator(long from) {
+ if (labelledGraph == null) {
+ throw new RuntimeException("Calling labelledNodeIterator() but labels were not loaded.");
+ }
+ return labelledGraph.nodeIterator(from);
+ }
+
+ /**
+ * Returns a labelled lazy iterator over the successors of a given node. The iteration
+ * terminates when -1 is returned.
+ */
+ public ArcLabelledNodeIterator.LabelledArcIterator labelledSuccessors(long x) {
+ if (labelledGraph == null) {
+ throw new RuntimeException("Calling labelledNodeIterator() but labels were not loaded.");
+ }
+ return labelledGraph.successors(x);
+ }
+
+ @Override
+ public long outdegree(long nodeId) {
+ return graph.outdegree(nodeId);
+ }
+
+ public String getPath() {
+ return properties.getPath();
+ }
+ public long getNodeId(SWHID swhid) {
+ return properties.getNodeId(swhid);
+ }
+ public SWHID getSWHID(long nodeId) {
+ return properties.getSWHID(nodeId);
+ }
+ public Node.Type getNodeType(long nodeId) {
+ return properties.getNodeType(nodeId);
+ }
+}
diff --git a/java/src/main/java/org/softwareheritage/graph/Traversal.java b/java/src/main/java/org/softwareheritage/graph/Traversal.java
index 4c8c669..71c4407 100644
--- a/java/src/main/java/org/softwareheritage/graph/Traversal.java
+++ b/java/src/main/java/org/softwareheritage/graph/Traversal.java
@@ -1,580 +1,623 @@
package org.softwareheritage.graph;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.Map;
import java.util.Queue;
import java.util.Random;
import java.util.Stack;
import java.util.function.Consumer;
import java.util.function.LongConsumer;
import org.softwareheritage.graph.server.Endpoint;
import it.unimi.dsi.big.webgraph.LazyLongIterator;
/**
* Traversal algorithms on the compressed graph.
*
* Internal implementation of the traversal API endpoints. These methods only input/output internal
* long ids, which are converted in the {@link Endpoint} higher-level class to {@link SWHID}.
*
* @author The Software Heritage developers
* @see Endpoint
*/
public class Traversal {
/** Graph used in the traversal */
- Graph graph;
- /** Graph edge restriction */
+ SwhBidirectionalGraph graph;
+ /** Graph edge restrictions */
AllowedEdges edges;
/** Hash set storing if we have visited a node */
HashSet visited;
/** Hash map storing parent node id for each nodes during a traversal */
Map parentNode;
/** Number of edges accessed during traversal */
long nbEdgesAccessed;
/** The anti Dos limit of edges traversed while a visit */
long maxEdges;
/** The string represent the set of type restriction */
NodesFiltering ndsfilter;
/** random number generator, for random walks */
Random rng;
/**
* Constructor.
*
* @param graph graph used in the traversal
* @param direction a string (either "forward" or "backward") specifying edge orientation
* @param edgesFmt a formatted string describing allowed
* edges
*/
- public Traversal(Graph graph, String direction, String edgesFmt) {
+ public Traversal(SwhBidirectionalGraph graph, String direction, String edgesFmt) {
this(graph, direction, edgesFmt, 0);
}
- public Traversal(Graph graph, String direction, String edgesFmt, long maxEdges) {
+ public Traversal(SwhBidirectionalGraph graph, String direction, String edgesFmt, long maxEdges) {
this(graph, direction, edgesFmt, maxEdges, "*");
}
- public Traversal(Graph graph, String direction, String edgesFmt, long maxEdges, String returnTypes) {
+ public Traversal(SwhBidirectionalGraph graph, String direction, String edgesFmt, long maxEdges,
+ String returnTypes) {
if (!direction.matches("forward|backward")) {
throw new IllegalArgumentException("Unknown traversal direction: " + direction);
}
if (direction.equals("backward")) {
this.graph = graph.transpose();
} else {
this.graph = graph;
}
this.edges = new AllowedEdges(edgesFmt);
this.visited = new HashSet<>();
this.parentNode = new HashMap<>();
this.nbEdgesAccessed = 0;
this.maxEdges = maxEdges;
this.rng = new Random();
if (returnTypes.equals("*")) {
this.ndsfilter = new NodesFiltering();
} else {
this.ndsfilter = new NodesFiltering(returnTypes);
}
}
/**
* Returns number of accessed edges during traversal.
*
* @return number of edges accessed in last traversal
*/
public long getNbEdgesAccessed() {
return nbEdgesAccessed;
}
/**
* Returns number of accessed nodes during traversal.
*
* @return number of nodes accessed in last traversal
*/
public long getNbNodesAccessed() {
return this.visited.size();
}
+ /**
+ * Returns lazy iterator of successors of a node while following a specific set of edge types.
+ *
+ * @param g input graph
+ * @param nodeId node specified as a long id
+ * @param allowedEdges the specification of which edges can be traversed
+ * @return lazy iterator of successors of the node, specified as a
+ * WebGraph LazyLongIterator
+ */
+ public static LazyLongIterator filterSuccessors(SwhBidirectionalGraph g, long nodeId, AllowedEdges allowedEdges) {
+ if (allowedEdges.restrictedTo == null) {
+ // All edges are allowed, bypass edge check
+ return g.successors(nodeId);
+ } else {
+ LazyLongIterator allSuccessors = g.successors(nodeId);
+ return new LazyLongIterator() {
+ @Override
+ public long nextLong() {
+ long neighbor;
+ while ((neighbor = allSuccessors.nextLong()) != -1) {
+ if (allowedEdges.isAllowed(g.getNodeType(nodeId), g.getNodeType(neighbor))) {
+ return neighbor;
+ }
+ }
+ return -1;
+ }
+
+ @Override
+ public long skip(final long n) {
+ long i;
+ for (i = 0; i < n && nextLong() != -1; i++)
+ ;
+ return i;
+ }
+ };
+ }
+ }
+
+ private LazyLongIterator filterSuccessors(long nodeId, AllowedEdges allowedEdges) {
+ return filterSuccessors(graph, nodeId, allowedEdges);
+ }
+
/**
* Push version of {@link #leaves} will fire passed callback for each leaf.
*/
public void leavesVisitor(long srcNodeId, NodeIdConsumer cb) {
Stack stack = new Stack<>();
this.nbEdgesAccessed = 0;
stack.push(srcNodeId);
visited.add(srcNodeId);
while (!stack.isEmpty()) {
long currentNodeId = stack.pop();
long neighborsCnt = 0;
nbEdgesAccessed += graph.outdegree(currentNodeId);
if (this.maxEdges > 0) {
if (nbEdgesAccessed >= this.maxEdges) {
break;
}
}
- LazyLongIterator it = graph.successors(currentNodeId, edges);
+ LazyLongIterator it = filterSuccessors(currentNodeId, edges);
for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) {
neighborsCnt++;
if (!visited.contains(neighborNodeId)) {
stack.push(neighborNodeId);
visited.add(neighborNodeId);
}
}
if (neighborsCnt == 0) {
cb.accept(currentNodeId);
}
}
}
/**
* Returns the leaves of a subgraph rooted at the specified source node.
*
* @param srcNodeId source node
* @return list of node ids corresponding to the leaves
*/
public ArrayList leaves(long srcNodeId) {
ArrayList nodeIds = new ArrayList();
leavesVisitor(srcNodeId, nodeIds::add);
if (ndsfilter.restricted) {
return ndsfilter.filterByNodeTypes(nodeIds, graph);
}
return nodeIds;
}
/**
* Push version of {@link #neighbors}: will fire passed callback on each neighbor.
*/
public void neighborsVisitor(long srcNodeId, NodeIdConsumer cb) {
this.nbEdgesAccessed = graph.outdegree(srcNodeId);
if (this.maxEdges > 0) {
if (nbEdgesAccessed >= this.maxEdges) {
return;
}
}
- LazyLongIterator it = graph.successors(srcNodeId, edges);
+ LazyLongIterator it = filterSuccessors(srcNodeId, edges);
for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) {
cb.accept(neighborNodeId);
}
}
/**
* Returns node direct neighbors (linked with exactly one edge).
*
* @param srcNodeId source node
* @return list of node ids corresponding to the neighbors
*/
public ArrayList neighbors(long srcNodeId) {
ArrayList nodeIds = new ArrayList<>();
neighborsVisitor(srcNodeId, nodeIds::add);
if (ndsfilter.restricted) {
return ndsfilter.filterByNodeTypes(nodeIds, graph);
}
return nodeIds;
}
/**
* Push version of {@link #visitNodes}: will fire passed callback on each visited node.
*/
public void visitNodesVisitor(long srcNodeId, NodeIdConsumer nodeCb, EdgeIdConsumer edgeCb) {
Stack stack = new Stack<>();
this.nbEdgesAccessed = 0;
stack.push(srcNodeId);
visited.add(srcNodeId);
while (!stack.isEmpty()) {
long currentNodeId = stack.pop();
if (nodeCb != null) {
nodeCb.accept(currentNodeId);
}
nbEdgesAccessed += graph.outdegree(currentNodeId);
if (this.maxEdges > 0) {
if (nbEdgesAccessed >= this.maxEdges) {
break;
}
}
- LazyLongIterator it = graph.successors(currentNodeId, edges);
+ LazyLongIterator it = filterSuccessors(currentNodeId, edges);
for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) {
if (edgeCb != null) {
edgeCb.accept(currentNodeId, neighborNodeId);
}
if (!visited.contains(neighborNodeId)) {
stack.push(neighborNodeId);
visited.add(neighborNodeId);
}
}
}
}
/** One-argument version to handle callbacks properly */
public void visitNodesVisitor(long srcNodeId, NodeIdConsumer cb) {
visitNodesVisitor(srcNodeId, cb, null);
}
/**
* Performs a graph traversal and returns explored nodes.
*
* @param srcNodeId source node
* @return list of explored node ids
*/
public ArrayList visitNodes(long srcNodeId) {
ArrayList nodeIds = new ArrayList<>();
visitNodesVisitor(srcNodeId, nodeIds::add);
if (ndsfilter.restricted) {
return ndsfilter.filterByNodeTypes(nodeIds, graph);
}
return nodeIds;
}
/**
* Push version of {@link #visitPaths}: will fire passed callback on each discovered (complete)
* path.
*/
public void visitPathsVisitor(long srcNodeId, PathConsumer cb) {
Stack currentPath = new Stack<>();
this.nbEdgesAccessed = 0;
visitPathsInternalVisitor(srcNodeId, currentPath, cb);
}
/**
* Performs a graph traversal and returns explored paths.
*
* @param srcNodeId source node
* @return list of explored paths (represented as a list of node ids)
*/
public ArrayList> visitPaths(long srcNodeId) {
ArrayList> paths = new ArrayList<>();
visitPathsVisitor(srcNodeId, paths::add);
return paths;
}
private void visitPathsInternalVisitor(long currentNodeId, Stack currentPath, PathConsumer cb) {
currentPath.push(currentNodeId);
long visitedNeighbors = 0;
nbEdgesAccessed += graph.outdegree(currentNodeId);
if (this.maxEdges > 0) {
if (nbEdgesAccessed >= this.maxEdges) {
currentPath.pop();
return;
}
}
- LazyLongIterator it = graph.successors(currentNodeId, edges);
+ LazyLongIterator it = filterSuccessors(currentNodeId, edges);
for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) {
visitPathsInternalVisitor(neighborNodeId, currentPath, cb);
visitedNeighbors++;
}
if (visitedNeighbors == 0) {
ArrayList path = new ArrayList<>(currentPath);
cb.accept(path);
}
currentPath.pop();
}
/**
* Performs a graph traversal with backtracking, and returns the first found path from source to
* destination.
*
* @param srcNodeId source node
* @param dst destination (either a node or a node type)
* @return found path as a list of node ids
*/
public ArrayList walk(long srcNodeId, T dst, String visitOrder) {
long dstNodeId;
if (visitOrder.equals("dfs")) {
dstNodeId = walkInternalDFS(srcNodeId, dst);
} else if (visitOrder.equals("bfs")) {
dstNodeId = walkInternalBFS(srcNodeId, dst);
} else {
throw new IllegalArgumentException("Unknown visit order: " + visitOrder);
}
if (dstNodeId == -1) {
throw new IllegalArgumentException("Cannot find destination: " + dst);
}
return backtracking(srcNodeId, dstNodeId);
}
/**
* Performs a random walk (picking a random successor at each step) from source to destination.
*
* @param srcNodeId source node
* @param dst destination (either a node or a node type)
* @return found path as a list of node ids or an empty path to indicate that no suitable path have
* been found
*/
public ArrayList randomWalk(long srcNodeId, T dst) {
return randomWalk(srcNodeId, dst, 0);
}
/**
* Performs a stubborn random walk (picking a random successor at each step) from source to
* destination. The walk is "stubborn" in the sense that it will not give up the first time if a
* satisfying target node is found, but it will retry up to a limited amount of times.
*
* @param srcNodeId source node
* @param dst destination (either a node or a node type)
* @param retries number of times to retry; 0 means no retries (single walk)
* @return found path as a list of node ids or an empty path to indicate that no suitable path have
* been found
*/
public ArrayList randomWalk(long srcNodeId, T dst, int retries) {
long curNodeId = srcNodeId;
ArrayList path = new ArrayList<>();
this.nbEdgesAccessed = 0;
boolean found;
if (retries < 0) {
throw new IllegalArgumentException("Negative number of retries given: " + retries);
}
while (true) {
path.add(curNodeId);
- LazyLongIterator successors = graph.successors(curNodeId, edges);
+ LazyLongIterator successors = filterSuccessors(curNodeId, edges);
curNodeId = randomPick(successors);
if (curNodeId < 0) {
found = false;
break;
}
if (isDstNode(curNodeId, dst)) {
path.add(curNodeId);
found = true;
break;
}
}
if (found) {
if (ndsfilter.restricted) {
return ndsfilter.filterByNodeTypes(path, graph);
}
return path;
} else if (retries > 0) { // try again
return randomWalk(srcNodeId, dst, retries - 1);
} else { // not found and no retries left
path.clear();
return path;
}
}
/**
* Randomly choose an element from an iterator over Longs using reservoir sampling
*
* @param elements iterator over selection domain
* @return randomly chosen element or -1 if no suitable element was found
*/
private long randomPick(LazyLongIterator elements) {
long curPick = -1;
long seenCandidates = 0;
for (long element; (element = elements.nextLong()) != -1;) {
seenCandidates++;
if (Math.round(rng.nextFloat() * (seenCandidates - 1)) == 0) {
curPick = element;
}
}
return curPick;
}
/**
* Internal DFS function of {@link #walk}.
*
* @param srcNodeId source node
* @param dst destination (either a node or a node type)
* @return final destination node or -1 if no path found
*/
private long walkInternalDFS(long srcNodeId, T dst) {
Stack stack = new Stack<>();
this.nbEdgesAccessed = 0;
stack.push(srcNodeId);
visited.add(srcNodeId);
while (!stack.isEmpty()) {
long currentNodeId = stack.pop();
if (isDstNode(currentNodeId, dst)) {
return currentNodeId;
}
nbEdgesAccessed += graph.outdegree(currentNodeId);
- LazyLongIterator it = graph.successors(currentNodeId, edges);
+ LazyLongIterator it = filterSuccessors(currentNodeId, edges);
for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) {
if (!visited.contains(neighborNodeId)) {
stack.push(neighborNodeId);
visited.add(neighborNodeId);
parentNode.put(neighborNodeId, currentNodeId);
}
}
}
return -1;
}
/**
* Internal BFS function of {@link #walk}.
*
* @param srcNodeId source node
* @param dst destination (either a node or a node type)
* @return final destination node or -1 if no path found
*/
private long walkInternalBFS(long srcNodeId, T dst) {
Queue queue = new LinkedList<>();
this.nbEdgesAccessed = 0;
queue.add(srcNodeId);
visited.add(srcNodeId);
while (!queue.isEmpty()) {
long currentNodeId = queue.poll();
if (isDstNode(currentNodeId, dst)) {
return currentNodeId;
}
nbEdgesAccessed += graph.outdegree(currentNodeId);
- LazyLongIterator it = graph.successors(currentNodeId, edges);
+ LazyLongIterator it = filterSuccessors(currentNodeId, edges);
for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) {
if (!visited.contains(neighborNodeId)) {
queue.add(neighborNodeId);
visited.add(neighborNodeId);
parentNode.put(neighborNodeId, currentNodeId);
}
}
}
return -1;
}
/**
* Internal function of {@link #walk} to check if a node corresponds to the destination.
*
* @param nodeId current node
* @param dst destination (either a node or a node type)
* @return true if the node is a destination, or false otherwise
*/
private boolean isDstNode(long nodeId, T dst) {
if (dst instanceof Long) {
long dstNodeId = (Long) dst;
return nodeId == dstNodeId;
} else if (dst instanceof Node.Type) {
Node.Type dstType = (Node.Type) dst;
return graph.getNodeType(nodeId) == dstType;
} else {
return false;
}
}
/**
* Internal backtracking function of {@link #walk}.
*
* @param srcNodeId source node
* @param dstNodeId destination node
* @return the found path, as a list of node ids
*/
private ArrayList backtracking(long srcNodeId, long dstNodeId) {
ArrayList path = new ArrayList<>();
long currentNodeId = dstNodeId;
while (currentNodeId != srcNodeId) {
path.add(currentNodeId);
currentNodeId = parentNode.get(currentNodeId);
}
path.add(srcNodeId);
Collections.reverse(path);
return path;
}
/**
* Find a common descendant between two given nodes using two parallel BFS
*
* @param lhsNode the first node
* @param rhsNode the second node
* @return the found path, as a list of node ids
*/
public Long findCommonDescendant(long lhsNode, long rhsNode) {
Queue lhsStack = new ArrayDeque<>();
Queue rhsStack = new ArrayDeque<>();
HashSet lhsVisited = new HashSet<>();
HashSet rhsVisited = new HashSet<>();
lhsStack.add(lhsNode);
rhsStack.add(rhsNode);
lhsVisited.add(lhsNode);
rhsVisited.add(rhsNode);
this.nbEdgesAccessed = 0;
Long curNode;
while (!lhsStack.isEmpty() || !rhsStack.isEmpty()) {
if (!lhsStack.isEmpty()) {
curNode = lhsStack.poll();
nbEdgesAccessed += graph.outdegree(curNode);
- LazyLongIterator it = graph.successors(curNode, edges);
+ LazyLongIterator it = filterSuccessors(curNode, edges);
for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) {
if (!lhsVisited.contains(neighborNodeId)) {
if (rhsVisited.contains(neighborNodeId))
return neighborNodeId;
lhsStack.add(neighborNodeId);
lhsVisited.add(neighborNodeId);
}
}
}
if (!rhsStack.isEmpty()) {
curNode = rhsStack.poll();
nbEdgesAccessed += graph.outdegree(curNode);
- LazyLongIterator it = graph.successors(curNode, edges);
+ LazyLongIterator it = filterSuccessors(curNode, edges);
for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) {
if (!rhsVisited.contains(neighborNodeId)) {
if (lhsVisited.contains(neighborNodeId))
return neighborNodeId;
rhsStack.add(neighborNodeId);
rhsVisited.add(neighborNodeId);
}
}
}
}
return null;
}
public interface NodeIdConsumer extends LongConsumer {
/**
* Callback for incrementally receiving node identifiers during a graph visit.
*/
void accept(long nodeId);
}
public interface EdgeIdConsumer {
/**
* Callback for incrementally receiving edge identifiers during a graph visit.
*/
void accept(long srcId, long dstId);
}
public interface PathConsumer extends Consumer> {
/**
* Callback for incrementally receiving node paths (made of node identifiers) during a graph visit.
*/
void accept(ArrayList path);
}
}
diff --git a/java/src/main/java/org/softwareheritage/graph/algo/TopologicalTraversal.java b/java/src/main/java/org/softwareheritage/graph/algo/TopologicalTraversal.java
index bdbb60d..af09f5d 100644
--- a/java/src/main/java/org/softwareheritage/graph/algo/TopologicalTraversal.java
+++ b/java/src/main/java/org/softwareheritage/graph/algo/TopologicalTraversal.java
@@ -1,70 +1,70 @@
package org.softwareheritage.graph.algo;
import com.google.common.primitives.Longs;
import it.unimi.dsi.big.webgraph.LazyLongIterator;
import it.unimi.dsi.bits.LongArrayBitVector;
import it.unimi.dsi.fastutil.Arrays;
import it.unimi.dsi.fastutil.BigArrays;
import it.unimi.dsi.fastutil.longs.LongBigArrays;
import it.unimi.dsi.io.ByteDiskQueue;
import it.unimi.dsi.logging.ProgressLogger;
-import org.softwareheritage.graph.Graph;
+import org.softwareheritage.graph.SwhBidirectionalGraph;
import org.softwareheritage.graph.Traversal;
import org.softwareheritage.graph.experiments.forks.ForkCC;
import java.io.File;
import java.io.IOException;
public class TopologicalTraversal {
- public static void run(final Graph graph, Traversal.NodeIdConsumer cb) throws IOException {
+ public static void run(final SwhBidirectionalGraph graph, Traversal.NodeIdConsumer cb) throws IOException {
final long[][] indegree = LongBigArrays.newBigArray(graph.numNodes());
final ProgressLogger pl = new ProgressLogger();
pl.itemsName = "nodes";
pl.expectedUpdates = graph.numNodes();
pl.start("Fetching indegrees...");
long n = graph.numNodes();
for (long i = 0; i < graph.numNodes(); ++i) {
BigArrays.add(indegree, i, graph.indegree(i));
}
pl.done();
LongArrayBitVector visited = LongArrayBitVector.ofLength(n);
int bufferSize = (int) Math.min(Arrays.MAX_ARRAY_SIZE & ~0x7, 8L * n);
final File queueFile = File.createTempFile(ForkCC.class.getSimpleName(), "queue");
final ByteDiskQueue queue = ByteDiskQueue.createNew(queueFile, bufferSize, true);
final byte[] byteBuf = new byte[Long.BYTES];
pl.start("Traversal in topological order...");
for (long i = 0; i < graph.numNodes(); ++i) {
if (visited.getBoolean(i) || BigArrays.get(indegree, i) != 0L) {
continue;
}
queue.enqueue(Longs.toByteArray(i));
visited.set(i);
while (!queue.isEmpty()) {
queue.dequeue(byteBuf);
final long currentNode = Longs.fromByteArray(byteBuf);
cb.accept(currentNode);
final LazyLongIterator iterator = graph.successors(currentNode);
long succ;
while ((succ = iterator.nextLong()) != -1) {
BigArrays.add(indegree, succ, -1L);
if (visited.getBoolean(succ) || BigArrays.get(indegree, succ) != 0)
continue;
visited.set(succ);
queue.enqueue(Longs.toByteArray(succ));
}
pl.update();
}
}
pl.done();
}
}
diff --git a/java/src/main/java/org/softwareheritage/graph/benchmark/AccessEdge.java b/java/src/main/java/org/softwareheritage/graph/benchmark/AccessEdge.java
index 9397de7..7f05184 100644
--- a/java/src/main/java/org/softwareheritage/graph/benchmark/AccessEdge.java
+++ b/java/src/main/java/org/softwareheritage/graph/benchmark/AccessEdge.java
@@ -1,45 +1,45 @@
package org.softwareheritage.graph.benchmark;
import com.martiansoftware.jsap.JSAPException;
import it.unimi.dsi.big.webgraph.LazyLongIterator;
-import org.softwareheritage.graph.Graph;
+import org.softwareheritage.graph.SwhBidirectionalGraph;
import org.softwareheritage.graph.benchmark.utils.Statistics;
import org.softwareheritage.graph.benchmark.utils.Timing;
import java.io.IOException;
import java.util.ArrayList;
/**
* Benchmark to time edge access time.
*
* @author The Software Heritage developers
*/
public class AccessEdge {
/**
* Main entrypoint.
*
* @param args command line arguments
*/
public static void main(String[] args) throws IOException, JSAPException {
Benchmark bench = new Benchmark();
bench.parseCommandLineArgs(args);
- Graph graph = Graph.loadMapped(bench.args.graphPath);
+ SwhBidirectionalGraph graph = SwhBidirectionalGraph.loadMapped(bench.args.graphPath);
long[] nodeIds = bench.args.random.generateNodeIds(graph, bench.args.nbNodes);
ArrayList timings = new ArrayList<>();
for (long nodeId : nodeIds) {
long startTime = Timing.start();
LazyLongIterator neighbors = graph.successors(nodeId);
long firstNeighbor = neighbors.nextLong();
double duration = Timing.stop(startTime);
timings.add(duration);
}
System.out.println("Used " + bench.args.nbNodes + " random edges (results are in seconds):");
Statistics stats = new Statistics(timings);
stats.printAll();
}
}
diff --git a/java/src/main/java/org/softwareheritage/graph/benchmark/BFS.java b/java/src/main/java/org/softwareheritage/graph/benchmark/BFS.java
index 43aec2e..2ca9b58 100644
--- a/java/src/main/java/org/softwareheritage/graph/benchmark/BFS.java
+++ b/java/src/main/java/org/softwareheritage/graph/benchmark/BFS.java
@@ -1,107 +1,107 @@
package org.softwareheritage.graph.benchmark;
import com.google.common.primitives.Longs;
import com.martiansoftware.jsap.*;
import it.unimi.dsi.big.webgraph.ImmutableGraph;
import it.unimi.dsi.big.webgraph.LazyLongIterator;
import it.unimi.dsi.bits.LongArrayBitVector;
import it.unimi.dsi.fastutil.Arrays;
import it.unimi.dsi.io.ByteDiskQueue;
import it.unimi.dsi.logging.ProgressLogger;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import org.softwareheritage.graph.Graph;
+import org.softwareheritage.graph.SwhBidirectionalGraph;
import java.io.File;
import java.io.IOException;
public class BFS {
private final static Logger LOGGER = LoggerFactory.getLogger(BFS.class);
private final ImmutableGraph graph;
public BFS(ImmutableGraph graph) {
this.graph = graph;
}
private static JSAPResult parse_args(String[] args) {
JSAPResult config = null;
try {
SimpleJSAP jsap = new SimpleJSAP(BFS.class.getName(), "",
new Parameter[]{
new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'g',
"graph", "Basename of the compressed graph"),
new FlaggedOption("useTransposed", JSAP.BOOLEAN_PARSER, "false", JSAP.NOT_REQUIRED, 'T',
"transposed", "Use transposed graph (default: false)"),});
config = jsap.parse(args);
if (jsap.messagePrinted()) {
System.exit(1);
}
} catch (JSAPException e) {
e.printStackTrace();
}
return config;
}
public static void main(String[] args) throws IOException {
JSAPResult config = parse_args(args);
String graphPath = config.getString("graphPath");
boolean useTransposed = config.getBoolean("useTransposed");
System.err.println("Loading graph " + graphPath + " ...");
- Graph graph = Graph.loadMapped(graphPath);
+ SwhBidirectionalGraph graph = SwhBidirectionalGraph.loadMapped(graphPath);
System.err.println("Graph loaded.");
if (useTransposed)
graph = graph.transpose();
BFS bfs = new BFS(graph);
bfs.bfsperm();
}
// Partly inlined from it.unimi.dsi.law.big.graph.BFS
private void bfsperm() throws IOException {
final long n = graph.numNodes();
// Allow enough memory to behave like in-memory queue
int bufferSize = (int) Math.min(Arrays.MAX_ARRAY_SIZE & ~0x7, 8L * n);
// Use a disk based queue to store BFS frontier
final File queueFile = File.createTempFile(BFS.class.getSimpleName(), "queue");
final ByteDiskQueue queue = ByteDiskQueue.createNew(queueFile, bufferSize, true);
final byte[] byteBuf = new byte[Long.BYTES];
// WARNING: no 64-bit version of this data-structure, but it can support
// indices up to 2^37
final LongArrayBitVector visited = LongArrayBitVector.ofLength(n);
final ProgressLogger pl = new ProgressLogger(LOGGER);
pl.expectedUpdates = n;
pl.itemsName = "nodes";
pl.start("Starting breadth-first visit...");
for (long i = 0; i < n; i++) {
if (visited.getBoolean(i))
continue;
queue.enqueue(Longs.toByteArray(i));
visited.set(i);
while (!queue.isEmpty()) {
queue.dequeue(byteBuf);
final long currentNode = Longs.fromByteArray(byteBuf);
final LazyLongIterator iterator = graph.successors(currentNode);
long succ;
while ((succ = iterator.nextLong()) != -1) {
if (!visited.getBoolean(succ)) {
visited.set(succ);
queue.enqueue(Longs.toByteArray(succ));
}
}
pl.update();
}
}
pl.done();
queue.close();
}
}
diff --git a/java/src/main/java/org/softwareheritage/graph/benchmark/Benchmark.java b/java/src/main/java/org/softwareheritage/graph/benchmark/Benchmark.java
index 98dd854..591c7d5 100644
--- a/java/src/main/java/org/softwareheritage/graph/benchmark/Benchmark.java
+++ b/java/src/main/java/org/softwareheritage/graph/benchmark/Benchmark.java
@@ -1,154 +1,154 @@
package org.softwareheritage.graph.benchmark;
import com.martiansoftware.jsap.*;
-import org.softwareheritage.graph.Graph;
+import org.softwareheritage.graph.SwhBidirectionalGraph;
import org.softwareheritage.graph.SWHID;
import org.softwareheritage.graph.benchmark.utils.Random;
import org.softwareheritage.graph.benchmark.utils.Statistics;
import org.softwareheritage.graph.server.Endpoint;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.io.Writer;
import java.util.ArrayList;
import java.util.StringJoiner;
import java.util.function.Function;
/**
* Benchmark common utility functions.
*
* @author The Software Heritage developers
*/
public class Benchmark {
/** CSV separator for log file */
final String CSV_SEPARATOR = ";";
/** Command line arguments */
public Args args;
/**
* Constructor.
*/
public Benchmark() {
this.args = new Args();
}
/**
* Parses benchmark command line arguments.
*
* @param args command line arguments
*/
public void parseCommandLineArgs(String[] args) throws JSAPException {
SimpleJSAP jsap = new SimpleJSAP(Benchmark.class.getName(),
"Benchmark tool for Software Heritage use-cases scenarios.",
new Parameter[]{
new UnflaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED,
JSAP.NOT_GREEDY, "The basename of the compressed graph."),
new FlaggedOption("nbNodes", JSAP.INTEGER_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'n',
"nb-nodes", "Number of random nodes used to do the benchmark."),
new FlaggedOption("logFile", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'l',
"log-file", "File name to output CSV format benchmark log."),
new FlaggedOption("seed", JSAP.LONG_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 's', "seed",
"Random generator seed."),});
JSAPResult config = jsap.parse(args);
if (jsap.messagePrinted()) {
System.exit(1);
}
this.args.graphPath = config.getString("graphPath");
this.args.nbNodes = config.getInt("nbNodes");
this.args.logFile = config.getString("logFile");
this.args.random = config.contains("seed") ? new Random(config.getLong("seed")) : new Random();
}
/**
* Creates CSV file for log output.
*/
public void createCSVLogFile() throws IOException {
try (Writer csvLog = new BufferedWriter(new FileWriter(args.logFile))) {
StringJoiner csvHeader = new StringJoiner(CSV_SEPARATOR);
csvHeader.add("use case name").add("SWHID").add("number of edges accessed").add("traversal timing")
.add("swhid2node timing").add("node2swhid timing");
csvLog.write(csvHeader.toString() + "\n");
}
}
/**
* Times a specific endpoint and outputs individual datapoints along with aggregated statistics.
*
* @param useCaseName benchmark use-case name
* @param graph compressed graph used in the benchmark
* @param nodeIds node ids to use as starting point for the endpoint traversal
* @param operation endpoint function to benchmark
* @param dstFmt destination formatted string as described in the
* API
* @param algorithm traversal algorithm used in endpoint call (either "dfs" or "bfs")
*/
- public void timeEndpoint(String useCaseName, Graph graph, long[] nodeIds,
+ public void timeEndpoint(String useCaseName, SwhBidirectionalGraph graph, long[] nodeIds,
Function operation, String dstFmt, String algorithm) throws IOException {
ArrayList timings = new ArrayList<>();
ArrayList timingsNormalized = new ArrayList<>();
ArrayList nbEdgesAccessed = new ArrayList<>();
final boolean append = true;
try (Writer csvLog = new BufferedWriter(new FileWriter(args.logFile, append))) {
for (long nodeId : nodeIds) {
SWHID swhid = graph.getSWHID(nodeId);
Endpoint.Output output = (dstFmt == null)
? operation.apply(new Endpoint.Input(swhid))
: operation.apply(new Endpoint.Input(swhid, dstFmt, algorithm));
StringJoiner csvLine = new StringJoiner(CSV_SEPARATOR);
csvLine.add(useCaseName).add(swhid.toString()).add(Long.toString(output.meta.nbEdgesAccessed))
.add(Double.toString(output.meta.timings.traversal))
.add(Double.toString(output.meta.timings.swhid2node))
.add(Double.toString(output.meta.timings.node2swhid));
csvLog.write(csvLine.toString() + "\n");
timings.add(output.meta.timings.traversal);
nbEdgesAccessed.add((double) output.meta.nbEdgesAccessed);
if (output.meta.nbEdgesAccessed != 0) {
timingsNormalized.add(output.meta.timings.traversal / output.meta.nbEdgesAccessed);
}
}
}
System.out.println("\n" + useCaseName + " use-case:");
System.out.println("timings:");
Statistics stats = new Statistics(timings);
stats.printAll();
System.out.println("timings normalized:");
Statistics statsNormalized = new Statistics(timingsNormalized);
statsNormalized.printAll();
System.out.println("nb edges accessed:");
Statistics statsNbEdgesAccessed = new Statistics(nbEdgesAccessed);
statsNbEdgesAccessed.printAll();
}
/**
* Same as {@link #timeEndpoint} but without destination or algorithm specified to endpoint call.
*/
- public void timeEndpoint(String useCaseName, Graph graph, long[] nodeIds,
+ public void timeEndpoint(String useCaseName, SwhBidirectionalGraph graph, long[] nodeIds,
Function operation) throws IOException {
timeEndpoint(useCaseName, graph, nodeIds, operation, null, null);
}
/**
* Input arguments.
*/
public class Args {
/** Basename of the compressed graph */
public String graphPath;
/** Number of random nodes to use for the benchmark */
public int nbNodes;
/** File name for CSV format benchmark log */
public String logFile;
/** Random generator */
public Random random;
}
}
diff --git a/java/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java b/java/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java
index 6a0cf58..61b5cbf 100644
--- a/java/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java
+++ b/java/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java
@@ -1,42 +1,42 @@
package org.softwareheritage.graph.benchmark;
import com.martiansoftware.jsap.JSAPException;
-import org.softwareheritage.graph.Graph;
+import org.softwareheritage.graph.SwhBidirectionalGraph;
import org.softwareheritage.graph.Node;
import org.softwareheritage.graph.server.Endpoint;
import java.io.IOException;
/**
* Benchmark Software Heritage
* browsing
* use-cases scenarios.
*
* @author The Software Heritage developers
*/
public class Browsing {
/**
* Main entrypoint.
*
* @param args command line arguments
*/
public static void main(String[] args) throws IOException, JSAPException {
Benchmark bench = new Benchmark();
bench.parseCommandLineArgs(args);
- Graph graph = Graph.loadMapped(bench.args.graphPath);
+ SwhBidirectionalGraph graph = SwhBidirectionalGraph.loadMapped(bench.args.graphPath);
long[] dirNodeIds = bench.args.random.generateNodeIdsOfType(graph, bench.args.nbNodes, Node.Type.DIR);
long[] revNodeIds = bench.args.random.generateNodeIdsOfType(graph, bench.args.nbNodes, Node.Type.REV);
Endpoint dirEndpoint = new Endpoint(graph, "forward", "dir:cnt,dir:dir");
Endpoint revEndpoint = new Endpoint(graph, "forward", "rev:rev");
System.out.println("Used " + bench.args.nbNodes + " random nodes (results are in seconds):");
bench.createCSVLogFile();
bench.timeEndpoint("ls", graph, dirNodeIds, dirEndpoint::neighbors);
bench.timeEndpoint("ls -R", graph, dirNodeIds, dirEndpoint::visitPaths);
bench.timeEndpoint("git log", graph, revNodeIds, revEndpoint::visitNodes);
}
}
diff --git a/java/src/main/java/org/softwareheritage/graph/benchmark/Provenance.java b/java/src/main/java/org/softwareheritage/graph/benchmark/Provenance.java
index 9b3c4c9..960ef5f 100644
--- a/java/src/main/java/org/softwareheritage/graph/benchmark/Provenance.java
+++ b/java/src/main/java/org/softwareheritage/graph/benchmark/Provenance.java
@@ -1,45 +1,45 @@
package org.softwareheritage.graph.benchmark;
import com.martiansoftware.jsap.JSAPException;
-import org.softwareheritage.graph.Graph;
+import org.softwareheritage.graph.SwhBidirectionalGraph;
import org.softwareheritage.graph.server.Endpoint;
import java.io.IOException;
/**
* Benchmark Software Heritage
* provenance
* use-cases scenarios.
*
* @author The Software Heritage developers
*/
public class Provenance {
/**
* Main entrypoint.
*
* @param args command line arguments
*/
public static void main(String[] args) throws IOException, JSAPException {
Benchmark bench = new Benchmark();
bench.parseCommandLineArgs(args);
- Graph graph = Graph.loadMapped(bench.args.graphPath);
+ SwhBidirectionalGraph graph = SwhBidirectionalGraph.loadMapped(bench.args.graphPath);
long[] nodeIds = bench.args.random.generateNodeIds(graph, bench.args.nbNodes);
Endpoint commitProvenanceEndpoint = new Endpoint(graph, "backward", "dir:dir,cnt:dir,dir:rev");
Endpoint originProvenanceEndpoint = new Endpoint(graph, "backward", "*");
System.out.println("Used " + bench.args.nbNodes + " random nodes (results are in seconds):");
bench.createCSVLogFile();
bench.timeEndpoint("commit provenance (dfs)", graph, nodeIds, commitProvenanceEndpoint::walk, "rev", "dfs");
bench.timeEndpoint("commit provenance (bfs)", graph, nodeIds, commitProvenanceEndpoint::walk, "rev", "bfs");
bench.timeEndpoint("complete commit provenance", graph, nodeIds, commitProvenanceEndpoint::leaves);
bench.timeEndpoint("origin provenance (dfs)", graph, nodeIds, originProvenanceEndpoint::walk, "ori", "dfs");
bench.timeEndpoint("origin provenance (bfs)", graph, nodeIds, originProvenanceEndpoint::walk, "ori", "bfs");
bench.timeEndpoint("complete origin provenance", graph, nodeIds, originProvenanceEndpoint::leaves);
}
}
diff --git a/java/src/main/java/org/softwareheritage/graph/benchmark/Vault.java b/java/src/main/java/org/softwareheritage/graph/benchmark/Vault.java
index c0e19f6..9403047 100644
--- a/java/src/main/java/org/softwareheritage/graph/benchmark/Vault.java
+++ b/java/src/main/java/org/softwareheritage/graph/benchmark/Vault.java
@@ -1,37 +1,37 @@
package org.softwareheritage.graph.benchmark;
import com.martiansoftware.jsap.JSAPException;
-import org.softwareheritage.graph.Graph;
+import org.softwareheritage.graph.SwhBidirectionalGraph;
import org.softwareheritage.graph.server.Endpoint;
import java.io.IOException;
/**
* Benchmark Software Heritage
* vault use-case
* scenario.
*
* @author The Software Heritage developers
*/
public class Vault {
/**
* Main entrypoint.
*
* @param args command line arguments
*/
public static void main(String[] args) throws IOException, JSAPException {
Benchmark bench = new Benchmark();
bench.parseCommandLineArgs(args);
- Graph graph = Graph.loadMapped(bench.args.graphPath);
+ SwhBidirectionalGraph graph = SwhBidirectionalGraph.loadMapped(bench.args.graphPath);
long[] nodeIds = bench.args.random.generateNodeIds(graph, bench.args.nbNodes);
Endpoint endpoint = new Endpoint(graph, "forward", "*");
System.out.println("Used " + bench.args.nbNodes + " random nodes (results are in seconds):");
bench.createCSVLogFile();
bench.timeEndpoint("git bundle", graph, nodeIds, endpoint::visitNodes);
}
}
diff --git a/java/src/main/java/org/softwareheritage/graph/benchmark/utils/Random.java b/java/src/main/java/org/softwareheritage/graph/benchmark/utils/Random.java
index ee4c530..8ee0b63 100644
--- a/java/src/main/java/org/softwareheritage/graph/benchmark/utils/Random.java
+++ b/java/src/main/java/org/softwareheritage/graph/benchmark/utils/Random.java
@@ -1,67 +1,67 @@
package org.softwareheritage.graph.benchmark.utils;
-import org.softwareheritage.graph.Graph;
+import org.softwareheritage.graph.SwhBidirectionalGraph;
import org.softwareheritage.graph.Node;
import java.util.PrimitiveIterator;
/**
* Random related utility class.
*
* @author The Software Heritage developers
*/
public class Random {
/** Internal pseudorandom generator */
java.util.Random random;
/**
* Constructor.
*/
public Random() {
this.random = new java.util.Random();
}
/**
* Constructor.
*
* @param seed random generator seed
*/
public Random(long seed) {
this.random = new java.util.Random(seed);
}
/**
* Generates random node ids.
*
* @param graph graph used to pick node ids
* @param nbNodes number of node ids to generate
* @return an array of random node ids
*/
- public long[] generateNodeIds(Graph graph, int nbNodes) {
+ public long[] generateNodeIds(SwhBidirectionalGraph graph, int nbNodes) {
return random.longs(nbNodes, 0, graph.numNodes()).toArray();
}
/**
* Generates random node ids with a specific type.
*
* @param graph graph used to pick node ids
* @param nbNodes number of node ids to generate
* @param expectedType specific node type to pick
* @return an array of random node ids
*/
- public long[] generateNodeIdsOfType(Graph graph, int nbNodes, Node.Type expectedType) {
+ public long[] generateNodeIdsOfType(SwhBidirectionalGraph graph, int nbNodes, Node.Type expectedType) {
PrimitiveIterator.OfLong nodes = random.longs(0, graph.numNodes()).iterator();
long[] nodeIds = new long[nbNodes];
long nextId;
for (int i = 0; i < nbNodes; i++) {
do {
nextId = nodes.nextLong();
} while (graph.getNodeType(nextId) != expectedType);
nodeIds[i] = nextId;
}
return nodeIds;
}
}
diff --git a/java/src/main/java/org/softwareheritage/graph/experiments/forks/FindCommonAncestor.java b/java/src/main/java/org/softwareheritage/graph/experiments/forks/FindCommonAncestor.java
index f36ce88..56c8369 100644
--- a/java/src/main/java/org/softwareheritage/graph/experiments/forks/FindCommonAncestor.java
+++ b/java/src/main/java/org/softwareheritage/graph/experiments/forks/FindCommonAncestor.java
@@ -1,62 +1,62 @@
package org.softwareheritage.graph.experiments.forks;
import com.martiansoftware.jsap.*;
-import org.softwareheritage.graph.Graph;
+import org.softwareheritage.graph.SwhBidirectionalGraph;
import org.softwareheritage.graph.Traversal;
import java.io.IOException;
import java.util.Scanner;
public class FindCommonAncestor {
- private Graph graph;
+ private SwhBidirectionalGraph graph;
private void load_graph(String graphBasename) throws IOException {
System.err.println("Loading graph " + graphBasename + " ...");
- this.graph = Graph.loadMapped(graphBasename);
+ this.graph = SwhBidirectionalGraph.loadMapped(graphBasename);
System.err.println("Graph loaded.");
}
private static JSAPResult parse_args(String[] args) {
JSAPResult config = null;
try {
SimpleJSAP jsap = new SimpleJSAP(FindCommonAncestor.class.getName(), "",
new Parameter[]{
new FlaggedOption("edgesFmt", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'e',
"edges", "Edges constraints"),
new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'g',
"graph", "Basename of the compressed graph"),});
config = jsap.parse(args);
if (jsap.messagePrinted()) {
System.exit(1);
}
} catch (JSAPException e) {
e.printStackTrace();
}
return config;
}
public static void main(String[] args) {
JSAPResult config = parse_args(args);
String graphPath = config.getString("graphPath");
String edgesFmt = config.getString("edgesFmt");
FindCommonAncestor fca = new FindCommonAncestor();
try {
fca.load_graph(graphPath);
} catch (IOException e) {
System.out.println("Could not load graph: " + e);
System.exit(2);
}
Scanner input = new Scanner(System.in);
while (input.hasNextLong()) {
long lhsNode = input.nextLong();
long rhsNode = input.nextLong();
Traversal t = new Traversal(fca.graph.symmetrize(), "forward", edgesFmt);
System.out.println(t.findCommonDescendant(lhsNode, rhsNode));
}
}
}
diff --git a/java/src/main/java/org/softwareheritage/graph/experiments/forks/FindPath.java b/java/src/main/java/org/softwareheritage/graph/experiments/forks/FindPath.java
index 2e5afd9..b5f318c 100644
--- a/java/src/main/java/org/softwareheritage/graph/experiments/forks/FindPath.java
+++ b/java/src/main/java/org/softwareheritage/graph/experiments/forks/FindPath.java
@@ -1,123 +1,123 @@
package org.softwareheritage.graph.experiments.forks;
import com.martiansoftware.jsap.*;
import it.unimi.dsi.big.webgraph.LazyLongIterator;
-import org.softwareheritage.graph.Graph;
+import org.softwareheritage.graph.SwhBidirectionalGraph;
import org.softwareheritage.graph.Node;
import java.io.IOException;
import java.util.*;
public class FindPath {
- private Graph graph;
+ private SwhBidirectionalGraph graph;
private Long emptySnapshot;
private void load_graph(String graphBasename) throws IOException {
System.err.println("Loading graph " + graphBasename + " ...");
- this.graph = Graph.loadMapped(graphBasename).symmetrize();
+ this.graph = SwhBidirectionalGraph.loadMapped(graphBasename).symmetrize();
System.err.println("Graph loaded.");
this.emptySnapshot = null;
}
private static JSAPResult parse_args(String[] args) {
JSAPResult config = null;
try {
SimpleJSAP jsap = new SimpleJSAP(FindPath.class.getName(), "",
new Parameter[]{new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED,
'g', "graph", "Basename of the compressed graph"),});
config = jsap.parse(args);
if (jsap.messagePrinted()) {
System.exit(1);
}
} catch (JSAPException e) {
e.printStackTrace();
}
return config;
}
private boolean nodeIsEmptySnapshot(Long node) {
if (this.emptySnapshot == null && this.graph.getNodeType(node) == Node.Type.SNP
&& this.graph.outdegree(node) == 0) {
System.err.println("Found empty snapshot: " + node);
this.emptySnapshot = node;
}
return node.equals(this.emptySnapshot);
}
private Boolean shouldVisit(Long node) {
Node.Type nt = this.graph.getNodeType(node);
if (nt != Node.Type.REV && nt != Node.Type.REL && nt != Node.Type.SNP && nt != Node.Type.ORI) {
return false;
}
if (this.nodeIsEmptySnapshot(node))
return false;
return true;
}
private ArrayList findPath(Long src, Long dst) {
HashSet visited = new HashSet<>();
Queue queue = new ArrayDeque<>();
Map parentNode = new HashMap<>();
queue.add(src);
visited.add(src);
while (!queue.isEmpty()) {
long currentNode = queue.poll();
final LazyLongIterator iterator = graph.successors(currentNode);
long succ;
while ((succ = iterator.nextLong()) != -1) {
if (!shouldVisit(succ) || visited.contains(succ))
continue;
visited.add(succ);
queue.add(succ);
parentNode.put(succ, currentNode);
if (succ == dst) {
ArrayList path = new ArrayList<>();
long n = dst;
while (n != src) {
path.add(n);
n = parentNode.get(n);
}
path.add(src);
Collections.reverse(path);
return path;
}
}
}
return null;
}
public static void main(String[] args) {
JSAPResult config = parse_args(args);
String graphPath = config.getString("graphPath");
FindPath fpath = new FindPath();
try {
fpath.load_graph(graphPath);
} catch (IOException e) {
System.out.println("Could not load graph: " + e);
System.exit(2);
}
Scanner input = new Scanner(System.in);
while (input.hasNextLong()) {
long lhsNode = input.nextLong();
long rhsNode = input.nextLong();
ArrayList path = fpath.findPath(lhsNode, rhsNode);
if (path != null) {
for (Long n : path) {
System.out.format("%d ", n);
}
System.out.println();
} else {
System.out.println("null");
}
}
}
}
diff --git a/java/src/main/java/org/softwareheritage/graph/experiments/forks/ForkCC.java b/java/src/main/java/org/softwareheritage/graph/experiments/forks/ForkCC.java
index 714df2e..bd5459f 100644
--- a/java/src/main/java/org/softwareheritage/graph/experiments/forks/ForkCC.java
+++ b/java/src/main/java/org/softwareheritage/graph/experiments/forks/ForkCC.java
@@ -1,249 +1,249 @@
package org.softwareheritage.graph.experiments.forks;
import com.google.common.primitives.Longs;
import com.martiansoftware.jsap.*;
import it.unimi.dsi.big.webgraph.LazyLongIterator;
import it.unimi.dsi.bits.LongArrayBitVector;
import it.unimi.dsi.fastutil.Arrays;
import it.unimi.dsi.io.ByteDiskQueue;
import it.unimi.dsi.logging.ProgressLogger;
-import org.softwareheritage.graph.Graph;
+import org.softwareheritage.graph.SwhBidirectionalGraph;
import org.softwareheritage.graph.Node;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.*;
public class ForkCC {
public Boolean includeRootDir;
- private Graph graph;
+ private SwhBidirectionalGraph graph;
private Long emptySnapshot;
private LongArrayBitVector visited;
private LongArrayBitVector whitelist;
private static JSAPResult parse_args(String[] args) {
JSAPResult config = null;
try {
SimpleJSAP jsap = new SimpleJSAP(ForkCC.class.getName(), "",
new Parameter[]{
new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'g',
"graph", "Basename of the compressed graph"),
new FlaggedOption("whitelistPath", JSAP.STRING_PARSER, null, JSAP.NOT_REQUIRED, 't',
"whitelist", "Whitelist of origins"),
new FlaggedOption("includeRootDir", JSAP.BOOLEAN_PARSER, "false", JSAP.NOT_REQUIRED, 'R',
"includerootdir", "Include root directory (default: false)"),
new FlaggedOption("outdir", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'o',
"outdir", "Directory where to put the results"),});
config = jsap.parse(args);
if (jsap.messagePrinted()) {
System.exit(1);
}
} catch (JSAPException e) {
e.printStackTrace();
}
return config;
}
private static void printDistribution(ArrayList> components) {
TreeMap distribution = new TreeMap<>();
for (ArrayList component : components) {
distribution.merge((long) component.size(), 1L, Long::sum);
}
for (Map.Entry entry : distribution.entrySet()) {
System.out.format("%d %d\n", entry.getKey(), entry.getValue());
}
}
private static void printLargestComponent(ArrayList> components) {
int indexLargest = 0;
for (int i = 1; i < components.size(); ++i) {
if (components.get(i).size() > components.get(indexLargest).size())
indexLargest = i;
}
ArrayList component = components.get(indexLargest);
for (Long node : component) {
System.out.println(node);
}
}
private void load_graph(String graphBasename) throws IOException {
System.err.println("Loading graph " + graphBasename + " ...");
- this.graph = Graph.loadMapped(graphBasename).symmetrize();
+ this.graph = SwhBidirectionalGraph.loadMapped(graphBasename).symmetrize();
System.err.println("Graph loaded.");
this.emptySnapshot = null;
this.whitelist = null;
this.visited = null;
this.includeRootDir = null;
}
private boolean nodeIsEmptySnapshot(Long node) {
if (this.emptySnapshot == null && this.graph.getNodeType(node) == Node.Type.SNP
&& this.graph.outdegree(node) == 0) {
System.err.println("Found empty snapshot: " + node);
this.emptySnapshot = node;
}
return node.equals(this.emptySnapshot);
}
private Boolean shouldVisit(Long node) {
Node.Type nt = this.graph.getNodeType(node);
if (nt == Node.Type.CNT) {
return false;
}
if (nt == Node.Type.DIR && !includeRootDir)
return false;
if (this.nodeIsEmptySnapshot(node))
return false;
if (visited.getBoolean(node))
return false;
return true;
}
private ArrayList> compute(ProgressLogger pl) throws IOException {
final long n = graph.numNodes();
// Allow enough memory to behave like in-memory queue
int bufferSize = (int) Math.min(Arrays.MAX_ARRAY_SIZE & ~0x7, 8L * n);
// Use a disk based queue to store BFS frontier
final File queueFile = File.createTempFile(ForkCC.class.getSimpleName(), "queue");
final ByteDiskQueue queue = ByteDiskQueue.createNew(queueFile, bufferSize, true);
final byte[] byteBuf = new byte[Long.BYTES];
// WARNING: no 64-bit version of this data-structure, but it can support
// indices up to 2^37
visited = LongArrayBitVector.ofLength(n);
pl.expectedUpdates = n;
pl.itemsName = "nodes";
pl.start("Starting connected components visit...");
ArrayList> components = new ArrayList<>();
for (long i = 0; i < n; i++) {
if (!shouldVisit(i) || this.graph.getNodeType(i) == Node.Type.DIR)
continue;
ArrayList component = new ArrayList<>();
queue.enqueue(Longs.toByteArray(i));
visited.set(i);
while (!queue.isEmpty()) {
queue.dequeue(byteBuf);
final long currentNode = Longs.fromByteArray(byteBuf);
Node.Type cur_nt = this.graph.getNodeType(currentNode);
if (cur_nt == Node.Type.ORI && (this.whitelist == null || this.whitelist.getBoolean(currentNode))) {
// TODO: add a check that the origin has >=1 non-empty snapshot
component.add(currentNode);
}
final LazyLongIterator iterator = graph.successors(currentNode);
long succ;
while ((succ = iterator.nextLong()) != -1) {
if (!shouldVisit(succ))
continue;
if (this.graph.getNodeType(succ) == Node.Type.DIR && cur_nt != Node.Type.REV)
continue;
visited.set(succ);
queue.enqueue(Longs.toByteArray(succ));
}
pl.update();
}
if (component.size() > 0) {
components.add(component);
}
}
pl.done();
queue.close();
return components;
}
private static void printDistribution(ArrayList> components, Formatter out) {
TreeMap distribution = new TreeMap<>();
for (ArrayList component : components) {
distribution.merge((long) component.size(), 1L, Long::sum);
}
for (Map.Entry entry : distribution.entrySet()) {
out.format("%d %d\n", entry.getKey(), entry.getValue());
}
}
private static void printLargestComponent(ArrayList> components, Formatter out) {
int indexLargest = 0;
for (int i = 1; i < components.size(); ++i) {
if (components.get(i).size() > components.get(indexLargest).size())
indexLargest = i;
}
ArrayList component = components.get(indexLargest);
for (Long node : component) {
out.format("%d\n", node);
}
}
private static void printAllComponents(ArrayList> components, Formatter out) {
for (int i = 1; i < components.size(); ++i) {
ArrayList component = components.get(i);
for (Long node : component) {
out.format("%d ", node);
}
out.format("\n");
}
}
private void parseWhitelist(String path) {
System.err.println("Loading whitelist " + path + " ...");
this.whitelist = LongArrayBitVector.ofLength(this.graph.numNodes());
Scanner scanner;
try {
scanner = new Scanner(new File(path));
while (scanner.hasNextLong()) {
whitelist.set(scanner.nextLong());
}
System.err.println("Whitelist loaded.");
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
public static void main(String[] args) {
JSAPResult config = parse_args(args);
String graphPath = config.getString("graphPath");
String whitelistPath = config.getString("whitelistPath");
boolean includeRootDir = config.getBoolean("includeRootDir");
String outdirPath = config.getString("outdir");
ForkCC forkCc = new ForkCC();
try {
forkCc.load_graph(graphPath);
forkCc.includeRootDir = includeRootDir;
} catch (IOException e) {
System.out.println("Could not load graph: " + e);
System.exit(2);
}
if (whitelistPath != null) {
forkCc.parseWhitelist(whitelistPath);
}
ProgressLogger logger = new ProgressLogger();
// noinspection ResultOfMethodCallIgnored
new File(outdirPath).mkdirs();
try {
ArrayList> components = forkCc.compute(logger);
printDistribution(components, new Formatter(outdirPath + "/distribution.txt"));
printLargestComponent(components, new Formatter(outdirPath + "/largest_clique.txt"));
printAllComponents(components, new Formatter(outdirPath + "/all_cliques.txt"));
} catch (IOException e) {
e.printStackTrace();
}
logger.done();
}
}
diff --git a/java/src/main/java/org/softwareheritage/graph/experiments/forks/ForkCliques.java b/java/src/main/java/org/softwareheritage/graph/experiments/forks/ForkCliques.java
index 4d749bd..aa57ae6 100644
--- a/java/src/main/java/org/softwareheritage/graph/experiments/forks/ForkCliques.java
+++ b/java/src/main/java/org/softwareheritage/graph/experiments/forks/ForkCliques.java
@@ -1,223 +1,223 @@
package org.softwareheritage.graph.experiments.forks;
import ch.qos.logback.classic.Level;
import ch.qos.logback.classic.Logger;
import com.google.common.primitives.Longs;
import com.martiansoftware.jsap.*;
import it.unimi.dsi.big.webgraph.LazyLongIterator;
import it.unimi.dsi.bits.LongArrayBitVector;
import it.unimi.dsi.logging.ProgressLogger;
import org.slf4j.LoggerFactory;
-import org.softwareheritage.graph.Graph;
+import org.softwareheritage.graph.SwhBidirectionalGraph;
import org.softwareheritage.graph.Node;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.*;
public class ForkCliques {
- private Graph graph;
+ private SwhBidirectionalGraph graph;
private LongArrayBitVector whitelist;
private void load_graph(String graphBasename) throws IOException {
System.err.println("Loading graph " + graphBasename + " ...");
- this.graph = Graph.loadMapped(graphBasename);
+ this.graph = SwhBidirectionalGraph.loadMapped(graphBasename);
System.err.println("Graph loaded.");
this.whitelist = null;
}
private static JSAPResult parse_args(String[] args) {
JSAPResult config = null;
try {
SimpleJSAP jsap = new SimpleJSAP(ForkCliques.class.getName(), "",
new Parameter[]{
new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'g',
"graph", "Basename of the compressed graph"),
new FlaggedOption("whitelistPath", JSAP.STRING_PARSER, null, JSAP.NOT_REQUIRED, 't',
"whitelist", "Whitelist of origins"),
new FlaggedOption("outdir", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'o',
"outdir", "Directory where to put the results"),});
config = jsap.parse(args);
if (jsap.messagePrinted()) {
System.exit(1);
}
} catch (JSAPException e) {
e.printStackTrace();
}
return config;
}
private ArrayList dfsAt(Long baseNode) {
ArrayList res = new ArrayList<>();
final Deque stack = new ArrayDeque<>();
HashSet seen = new HashSet<>();
stack.push(baseNode);
while (!stack.isEmpty()) {
final Long currentNode = stack.pop();
final LazyLongIterator iterator = this.graph.predecessors(currentNode);
long succ;
while ((succ = iterator.nextLong()) != -1) {
if (!seen.contains(succ)) {
Node.Type nt = this.graph.getNodeType(succ);
if (nt == Node.Type.DIR || nt == Node.Type.CNT)
continue;
if (nt == Node.Type.ORI && (this.whitelist == null || this.whitelist.getBoolean(succ))) {
res.add(succ);
} else {
stack.push(succ);
seen.add(succ);
}
}
}
}
Collections.sort(res);
return res;
}
private boolean isBaseRevision(Long node) {
if (this.graph.getNodeType(node) != Node.Type.REV)
return false;
final LazyLongIterator iterator = this.graph.successors(node);
long succ;
while ((succ = iterator.nextLong()) != -1) {
if (this.graph.getNodeType(succ) == Node.Type.REV)
return false;
}
return true;
}
static private String fingerprint(ArrayList cluster) {
MessageDigest digest;
try {
digest = MessageDigest.getInstance("SHA-256");
} catch (NoSuchAlgorithmException e) {
e.printStackTrace();
return null;
}
for (Long n : cluster)
digest.update(Longs.toByteArray(n));
return new String(digest.digest());
}
private ArrayList> compute(ProgressLogger pl) {
final long n = this.graph.numNodes();
HashSet fingerprints = new HashSet<>();
ArrayList> clusters = new ArrayList<>();
pl.expectedUpdates = n;
pl.itemsName = "nodes";
pl.start("Starting topological sort...");
for (long i = 0; i < n; i++) {
if (isBaseRevision(i)) {
ArrayList currentCluster = dfsAt(i);
String clusterFp = fingerprint(currentCluster);
if (!fingerprints.contains(clusterFp)) {
fingerprints.add(clusterFp);
clusters.add(currentCluster);
}
}
pl.update();
}
pl.done();
return clusters;
}
private static void printDistribution(ArrayList> components, Formatter out) {
TreeMap distribution = new TreeMap<>();
for (ArrayList component : components) {
distribution.merge((long) component.size(), 1L, Long::sum);
}
for (Map.Entry entry : distribution.entrySet()) {
out.format("%d %d\n", entry.getKey(), entry.getValue());
}
}
private static void printLargestComponent(ArrayList> components, Formatter out) {
int indexLargest = 0;
for (int i = 1; i < components.size(); ++i) {
if (components.get(i).size() > components.get(indexLargest).size())
indexLargest = i;
}
ArrayList component = components.get(indexLargest);
for (Long node : component) {
out.format("%d\n", node);
}
}
private static void printAllComponents(ArrayList> components, Formatter out) {
for (int i = 1; i < components.size(); ++i) {
ArrayList component = components.get(i);
for (Long node : component) {
out.format("%d ", node);
}
out.format("\n");
}
}
private void parseWhitelist(String path) {
System.err.println("Loading whitelist " + path + " ...");
this.whitelist = LongArrayBitVector.ofLength(this.graph.numNodes());
Scanner scanner;
try {
scanner = new Scanner(new File(path));
while (scanner.hasNextLong()) {
whitelist.set(scanner.nextLong());
}
System.err.println("Whitelist loaded.");
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
public static void main(String[] args) {
JSAPResult config = parse_args(args);
String graphPath = config.getString("graphPath");
String whitelistPath = config.getString("whitelistPath");
String outdirPath = config.getString("outdir");
ForkCliques forkCliques = new ForkCliques();
try {
forkCliques.load_graph(graphPath);
} catch (IOException e) {
System.out.println("Could not load graph: " + e);
System.exit(2);
}
if (whitelistPath != null) {
forkCliques.parseWhitelist(whitelistPath);
}
Logger rootLogger = (ch.qos.logback.classic.Logger) LoggerFactory.getLogger(org.slf4j.Logger.ROOT_LOGGER_NAME);
rootLogger.setLevel(Level.DEBUG);
ProgressLogger logger = new ProgressLogger(rootLogger);
ArrayList> components = forkCliques.compute(logger);
// noinspection ResultOfMethodCallIgnored
new File(outdirPath).mkdirs();
try {
printDistribution(components, new Formatter(outdirPath + "/distribution.txt"));
printLargestComponent(components, new Formatter(outdirPath + "/largest_clique.txt"));
printAllComponents(components, new Formatter(outdirPath + "/all_cliques.txt"));
} catch (FileNotFoundException e) {
e.printStackTrace();
}
logger.done();
}
}
diff --git a/java/src/main/java/org/softwareheritage/graph/experiments/forks/ListEmptyOrigins.java b/java/src/main/java/org/softwareheritage/graph/experiments/forks/ListEmptyOrigins.java
index 332a908..8389962 100644
--- a/java/src/main/java/org/softwareheritage/graph/experiments/forks/ListEmptyOrigins.java
+++ b/java/src/main/java/org/softwareheritage/graph/experiments/forks/ListEmptyOrigins.java
@@ -1,88 +1,88 @@
package org.softwareheritage.graph.experiments.forks;
import com.martiansoftware.jsap.*;
import it.unimi.dsi.big.webgraph.ImmutableGraph;
import it.unimi.dsi.big.webgraph.LazyLongIterator;
-import org.softwareheritage.graph.Graph;
+import org.softwareheritage.graph.SwhBidirectionalGraph;
import org.softwareheritage.graph.Node;
import java.io.IOException;
import java.util.ArrayList;
public class ListEmptyOrigins {
- private Graph graph;
+ private SwhBidirectionalGraph graph;
private Long emptySnapshot;
private static JSAPResult parse_args(String[] args) {
JSAPResult config = null;
try {
SimpleJSAP jsap = new SimpleJSAP(ListEmptyOrigins.class.getName(), "",
new Parameter[]{new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED,
'g', "graph", "Basename of the compressed graph"),});
config = jsap.parse(args);
if (jsap.messagePrinted()) {
System.exit(1);
}
} catch (JSAPException e) {
e.printStackTrace();
}
return config;
}
public static void main(String[] args) {
JSAPResult config = parse_args(args);
String graphPath = config.getString("graphPath");
ListEmptyOrigins leo = new ListEmptyOrigins();
try {
leo.load_graph(graphPath);
} catch (IOException e) {
System.out.println("Could not load graph: " + e);
System.exit(2);
}
ArrayList badlist = leo.compute(leo.graph);
for (Long bad : badlist) {
System.out.println(bad);
}
}
private void load_graph(String graphBasename) throws IOException {
System.err.println("Loading graph " + graphBasename + " ...");
- this.graph = Graph.loadMapped(graphBasename);
+ this.graph = SwhBidirectionalGraph.loadMapped(graphBasename);
System.err.println("Graph loaded.");
this.emptySnapshot = null;
}
private boolean nodeIsEmptySnapshot(Long node) {
System.err.println(this.graph.getNodeType(node) + " " + this.graph.outdegree(node) + " " + node);
if (this.emptySnapshot == null && this.graph.getNodeType(node) == Node.Type.SNP
&& this.graph.outdegree(node) == 0) {
System.err.println("Found empty snapshot: " + node);
this.emptySnapshot = node;
}
return node.equals(this.emptySnapshot);
}
private ArrayList compute(ImmutableGraph graph) {
final long n = graph.numNodes();
ArrayList bad = new ArrayList<>();
for (long i = 0; i < n; i++) {
Node.Type nt = this.graph.getNodeType(i);
if (nt != Node.Type.ORI)
continue;
final LazyLongIterator iterator = graph.successors(i);
long succ;
boolean found = false;
while ((succ = iterator.nextLong()) != -1) {
if (this.graph.outdegree(succ) > 0) {
found = true;
}
}
if (!found)
bad.add(i);
}
return bad;
}
}
diff --git a/java/src/main/java/org/softwareheritage/graph/experiments/multiplicationfactor/GenDistribution.java b/java/src/main/java/org/softwareheritage/graph/experiments/multiplicationfactor/GenDistribution.java
index 89fd675..e3fdb3b 100644
--- a/java/src/main/java/org/softwareheritage/graph/experiments/multiplicationfactor/GenDistribution.java
+++ b/java/src/main/java/org/softwareheritage/graph/experiments/multiplicationfactor/GenDistribution.java
@@ -1,130 +1,130 @@
package org.softwareheritage.graph.experiments.multiplicationfactor;
import com.martiansoftware.jsap.*;
-import org.softwareheritage.graph.Graph;
+import org.softwareheritage.graph.SwhBidirectionalGraph;
import org.softwareheritage.graph.Node;
import org.softwareheritage.graph.Traversal;
import org.softwareheritage.graph.benchmark.utils.Timing;
import java.io.IOException;
import java.util.Scanner;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
public class GenDistribution {
- private Graph graph;
+ private SwhBidirectionalGraph graph;
private static JSAPResult parse_args(String[] args) {
JSAPResult config = null;
try {
SimpleJSAP jsap = new SimpleJSAP(GenDistribution.class.getName(), "",
new Parameter[]{
new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'g',
"graph", "Basename of the compressed graph"),
new FlaggedOption("srcType", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 's',
"srctype", "Source node type"),
new FlaggedOption("dstType", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'd',
"dsttype", "Destination node type"),
new FlaggedOption("edgesFmt", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'e',
"edges", "Edges constraints"),
new FlaggedOption("numThreads", JSAP.INTEGER_PARSER, "128", JSAP.NOT_REQUIRED, 't',
"numthreads", "Number of threads"),});
config = jsap.parse(args);
if (jsap.messagePrinted()) {
System.exit(1);
}
} catch (JSAPException e) {
e.printStackTrace();
}
return config;
}
public static void main(String[] args) {
JSAPResult config = parse_args(args);
String graphPath = config.getString("graphPath");
Node.Type srcType = Node.Type.fromStr(config.getString("srcType"));
Node.Type dstType = Node.Type.fromStr(config.getString("dstType"));
String edgesFmt = config.getString("edgesFmt");
int numThreads = config.getInt("numThreads");
GenDistribution tp = new GenDistribution();
try {
tp.load_graph(graphPath);
} catch (IOException e) {
System.out.println("Could not load graph: " + e);
System.exit(2);
}
final long END_OF_QUEUE = -1L;
ArrayBlockingQueue queue = new ArrayBlockingQueue<>(numThreads);
ExecutorService service = Executors.newFixedThreadPool(numThreads + 1);
service.submit(() -> {
try {
Scanner input = new Scanner(System.in);
while (input.hasNextLong()) {
long node = input.nextLong();
if (tp.graph.getNodeType(node) == srcType) {
queue.put(node);
}
}
} catch (InterruptedException e) {
e.printStackTrace();
} finally {
for (int i = 0; i < numThreads; ++i) {
try {
queue.put(END_OF_QUEUE);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
});
for (int i = 0; i < numThreads; ++i) {
service.submit(() -> {
- Graph thread_graph = tp.graph.copy();
+ SwhBidirectionalGraph thread_graph = tp.graph.copy();
long startTime;
double totalTime;
while (true) {
Long node = null;
try {
node = queue.take();
} catch (InterruptedException e) {
e.printStackTrace();
}
if (node == null || node == END_OF_QUEUE) {
return;
}
Traversal t = new Traversal(thread_graph, "backward", edgesFmt);
int[] count = {0};
startTime = Timing.start();
t.visitNodesVisitor(node, (curnode) -> {
if (tp.graph.getNodeType(curnode) == dstType) {
count[0]++;
}
});
totalTime = Timing.stop(startTime);
System.out.format("%d %d %d %d %f\n", node, count[0], t.getNbNodesAccessed(),
t.getNbEdgesAccessed(), totalTime);
}
});
}
service.shutdown();
}
private void load_graph(String graphBasename) throws IOException {
System.err.println("Loading graph " + graphBasename + " ...");
- this.graph = Graph.loadMapped(graphBasename);
+ this.graph = SwhBidirectionalGraph.loadMapped(graphBasename);
System.err.println("Graph loaded.");
}
}
diff --git a/java/src/main/java/org/softwareheritage/graph/experiments/topology/AveragePaths.java b/java/src/main/java/org/softwareheritage/graph/experiments/topology/AveragePaths.java
index ad7eadf..53bcc49 100644
--- a/java/src/main/java/org/softwareheritage/graph/experiments/topology/AveragePaths.java
+++ b/java/src/main/java/org/softwareheritage/graph/experiments/topology/AveragePaths.java
@@ -1,188 +1,188 @@
package org.softwareheritage.graph.experiments.topology;
import com.martiansoftware.jsap.*;
import it.unimi.dsi.Util;
import it.unimi.dsi.big.webgraph.LazyLongIterator;
import it.unimi.dsi.fastutil.BigArrays;
import it.unimi.dsi.fastutil.longs.LongBigArrays;
import it.unimi.dsi.logging.ProgressLogger;
import it.unimi.dsi.util.XoRoShiRo128PlusRandom;
import org.softwareheritage.graph.*;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.*;
import java.util.concurrent.*;
public class AveragePaths {
- private final Graph graph;
+ private final SwhBidirectionalGraph graph;
private final Subgraph subgraph;
private final ConcurrentHashMap result;
private final String outdir;
public AveragePaths(String graphBasename, String allowedNodes, String outdir) throws IOException {
System.err.println("Loading graph " + graphBasename + " ...");
- this.graph = Graph.loadMapped(graphBasename);
+ this.graph = SwhBidirectionalGraph.loadMapped(graphBasename);
this.subgraph = new Subgraph(this.graph, new AllowedNodes(allowedNodes));
this.outdir = outdir;
System.err.println("Graph loaded.");
result = new ConcurrentHashMap<>();
}
private static JSAPResult parse_args(String[] args) {
JSAPResult config = null;
try {
SimpleJSAP jsap = new SimpleJSAP(AveragePaths.class.getName(), "",
new Parameter[]{
new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'g',
"graph", "Basename of the compressed graph"),
new FlaggedOption("nodeTypes", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 's',
"nodetypes", "Node type constraints"),
new FlaggedOption("outdir", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'o',
"outdir", "Directory where to put the results"),
new FlaggedOption("numThreads", JSAP.INTEGER_PARSER, "32", JSAP.NOT_REQUIRED, 't',
"numthreads", "Number of threads"),});
config = jsap.parse(args);
if (jsap.messagePrinted()) {
System.exit(1);
}
} catch (JSAPException e) {
e.printStackTrace();
}
return config;
}
private void run(int numThreads) throws InterruptedException {
final long END_OF_QUEUE = -1L;
ArrayBlockingQueue queue = new ArrayBlockingQueue<>(numThreads);
ExecutorService service = Executors.newFixedThreadPool(numThreads + 1);
service.submit(() -> {
try {
- Graph thread_graph = graph.copy();
+ SwhBidirectionalGraph thread_graph = graph.copy();
Subgraph thread_subgraph = subgraph.copy();
long[][] randomPerm = Util.identity(thread_graph.numNodes());
LongBigArrays.shuffle(randomPerm, new XoRoShiRo128PlusRandom());
long n = thread_graph.numNodes();
ProgressLogger pl = new ProgressLogger();
pl.expectedUpdates = n;
pl.itemsName = "nodes";
pl.start("Filling processor queue...");
for (long j = 0; j < n; ++j) {
long node = BigArrays.get(randomPerm, j);
if (thread_subgraph.nodeExists(node) && thread_subgraph.outdegree(node) == 0) {
queue.put(node);
}
if (j % 10000 == 0) {
printResult();
}
pl.update();
}
} catch (Exception e) {
e.printStackTrace();
} finally {
for (int i = 0; i < numThreads; ++i) {
try {
queue.put(END_OF_QUEUE);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
});
for (int i = 0; i < numThreads; ++i) {
service.submit(() -> {
try {
Subgraph thread_subgraph = subgraph.copy();
while (true) {
Long node = null;
try {
node = queue.take();
} catch (InterruptedException e) {
e.printStackTrace();
}
if (node == null || node == END_OF_QUEUE) {
return;
}
bfsAt(thread_subgraph, node);
}
} catch (Exception e) {
e.printStackTrace();
}
});
}
service.shutdown();
service.awaitTermination(365, TimeUnit.DAYS);
}
private void bfsAt(Subgraph graph, long srcNodeId) {
ArrayDeque queue = new ArrayDeque<>();
HashSet visited = new HashSet<>();
long FRONTIER_MARKER = -1;
queue.addLast(srcNodeId);
visited.add(srcNodeId);
long distance = 0;
queue.addLast(FRONTIER_MARKER);
while (!queue.isEmpty()) {
long currentNodeId = queue.removeFirst();
// System.err.println("curr: " + currentNodeId);
if (currentNodeId == FRONTIER_MARKER) {
if (queue.isEmpty()) // avoid infinite loops
break;
++distance;
queue.addLast(FRONTIER_MARKER);
continue;
}
if (graph.indegree(currentNodeId) == 0) {
result.merge(distance, 1L, Long::sum);
}
LazyLongIterator it = graph.predecessors(currentNodeId);
for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) {
if (!visited.contains(neighborNodeId)) {
queue.addLast(neighborNodeId);
visited.add(neighborNodeId);
}
}
}
}
public void printResult() throws IOException {
new File(outdir).mkdirs();
PrintWriter f = new PrintWriter(new FileWriter(outdir + "/distribution.txt"));
TreeMap sortedDistribution = new TreeMap<>(result);
for (Map.Entry entry : sortedDistribution.entrySet()) {
f.println(entry.getKey() + " " + entry.getValue());
}
f.close();
}
public static void main(String[] args) throws IOException, InterruptedException {
JSAPResult config = parse_args(args);
String graphPath = config.getString("graphPath");
String outdir = config.getString("outdir");
String allowedNodes = config.getString("nodeTypes");
int numThreads = config.getInt("numThreads");
AveragePaths tp = new AveragePaths(graphPath, allowedNodes, outdir);
tp.run(numThreads);
tp.printResult();
}
}
diff --git a/java/src/main/java/org/softwareheritage/graph/experiments/topology/ClusteringCoefficient.java b/java/src/main/java/org/softwareheritage/graph/experiments/topology/ClusteringCoefficient.java
index 2e6fa0c..0564463 100644
--- a/java/src/main/java/org/softwareheritage/graph/experiments/topology/ClusteringCoefficient.java
+++ b/java/src/main/java/org/softwareheritage/graph/experiments/topology/ClusteringCoefficient.java
@@ -1,325 +1,325 @@
package org.softwareheritage.graph.experiments.topology;
import com.martiansoftware.jsap.*;
import it.unimi.dsi.Util;
import it.unimi.dsi.big.webgraph.ImmutableGraph;
import it.unimi.dsi.big.webgraph.LazyLongIterator;
import it.unimi.dsi.fastutil.BigArrays;
import it.unimi.dsi.fastutil.longs.LongBigArrays;
import it.unimi.dsi.logging.ProgressLogger;
import it.unimi.dsi.util.XoRoShiRo128PlusRandom;
-import org.softwareheritage.graph.Graph;
+import org.softwareheritage.graph.SwhBidirectionalGraph;
import org.softwareheritage.graph.Node;
import java.io.*;
import java.util.*;
import java.util.concurrent.*;
public class ClusteringCoefficient {
- private final Graph graph;
+ private final SwhBidirectionalGraph graph;
private final String outdirPath;
private final ConcurrentHashMap result_full;
private final ConcurrentHashMap result_dircnt;
private final ConcurrentHashMap result_rev;
private final ConcurrentHashMap result_revrel;
private final ConcurrentHashMap result_orisnp;
public ClusteringCoefficient(String graphBasename, String outdirPath) throws IOException {
this.outdirPath = outdirPath;
System.err.println("Loading graph " + graphBasename + " ...");
- Graph directedGraph = Graph.loadMapped(graphBasename);
+ SwhBidirectionalGraph directedGraph = SwhBidirectionalGraph.loadMapped(graphBasename);
this.graph = directedGraph.symmetrize();
System.err.println("Graph loaded.");
result_full = new ConcurrentHashMap<>();
result_dircnt = new ConcurrentHashMap<>();
result_rev = new ConcurrentHashMap<>();
result_revrel = new ConcurrentHashMap<>();
result_orisnp = new ConcurrentHashMap<>();
}
private static JSAPResult parse_args(String[] args) {
JSAPResult config = null;
try {
SimpleJSAP jsap = new SimpleJSAP(AveragePaths.class.getName(), "",
new Parameter[]{
new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'g',
"graph", "Basename of the compressed graph"),
new FlaggedOption("outdir", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'o',
"outdir", "Directory where to put the results"),
new FlaggedOption("numThreads", JSAP.INTEGER_PARSER, "32", JSAP.NOT_REQUIRED, 't',
"numthreads", "Number of threads"),});
config = jsap.parse(args);
if (jsap.messagePrinted()) {
System.exit(1);
}
} catch (JSAPException e) {
e.printStackTrace();
}
return config;
}
private void run(int numThreads) throws InterruptedException {
final long END_OF_QUEUE = -1L;
ArrayBlockingQueue queue = new ArrayBlockingQueue<>(numThreads);
ExecutorService service = Executors.newFixedThreadPool(numThreads + 1);
service.submit(() -> {
try {
- Graph thread_graph = graph.copy();
+ SwhBidirectionalGraph thread_graph = graph.copy();
long[][] randomPerm = Util.identity(thread_graph.numNodes());
LongBigArrays.shuffle(randomPerm, new XoRoShiRo128PlusRandom());
long n = thread_graph.numNodes();
ProgressLogger pl = new ProgressLogger();
pl.expectedUpdates = n;
pl.itemsName = "nodes";
pl.start("Filling processor queue...");
for (long j = 0; j < n; ++j) {
long node = BigArrays.get(randomPerm, j);
queue.put(node);
if (j % 10000 == 0) {
printResult();
}
pl.update();
}
} catch (Exception e) {
e.printStackTrace();
} finally {
for (int i = 0; i < numThreads; ++i) {
try {
queue.put(END_OF_QUEUE);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
});
for (int i = 0; i < numThreads; ++i) {
service.submit(() -> {
try {
- Graph thread_graph = graph.copy();
+ SwhBidirectionalGraph thread_graph = graph.copy();
while (true) {
Long node = null;
try {
node = queue.take();
} catch (InterruptedException e) {
e.printStackTrace();
}
if (node == null || node == END_OF_QUEUE) {
return;
}
computeAt(thread_graph, node);
}
} catch (Exception e) {
e.printStackTrace();
}
});
}
service.shutdown();
service.awaitTermination(365, TimeUnit.DAYS);
}
- private void computeAt(Graph graph, long node) {
+ private void computeAt(SwhBidirectionalGraph graph, long node) {
long d = graph.outdegree(node);
if (d < 2) {
return;
}
Node.Type nodeType = graph.getNodeType(node);
HashSet neighborhood = new HashSet<>();
long succ;
final LazyLongIterator iterator = graph.successors(node);
while ((succ = iterator.nextLong()) != -1) {
neighborhood.add(succ);
}
long triangles_full = 0;
long triangles_dircnt = 0;
long triangles_rev = 0;
long triangles_revrel = 0;
long triangles_orisnp = 0;
for (Long neighbor : neighborhood) {
Node.Type neighborNodeType = graph.getNodeType(neighbor);
final LazyLongIterator it = graph.successors(neighbor);
while ((succ = it.nextLong()) != -1) {
if (neighborhood.contains(succ)) {
Node.Type succNodeType = graph.getNodeType(succ);
triangles_full++;
if ((nodeType == Node.Type.DIR || nodeType == Node.Type.CNT)
&& (neighborNodeType == Node.Type.DIR || neighborNodeType == Node.Type.CNT)
&& (succNodeType == Node.Type.DIR || succNodeType == Node.Type.CNT)) {
triangles_dircnt++;
} else if ((nodeType == Node.Type.REV || nodeType == Node.Type.REL)
&& (neighborNodeType == Node.Type.REV || neighborNodeType == Node.Type.REL)
&& (succNodeType == Node.Type.REV || succNodeType == Node.Type.REL)) {
triangles_revrel++;
if (nodeType == Node.Type.REV && neighborNodeType == Node.Type.REV
&& succNodeType == Node.Type.REV)
triangles_rev++;
} else if ((nodeType == Node.Type.ORI || nodeType == Node.Type.SNP)
&& (neighborNodeType == Node.Type.ORI || neighborNodeType == Node.Type.SNP)
&& (succNodeType == Node.Type.ORI || succNodeType == Node.Type.SNP)) {
triangles_orisnp++;
}
}
}
}
result_full.merge(triangles_full, 1L, Long::sum);
result_dircnt.merge(triangles_dircnt, 1L, Long::sum);
result_rev.merge(triangles_rev, 1L, Long::sum);
result_revrel.merge(triangles_revrel, 1L, Long::sum);
result_orisnp.merge(triangles_orisnp, 1L, Long::sum);
}
public void printSortedDistribution(String distribPath, Map distrib) throws IOException {
PrintWriter f = new PrintWriter(new FileWriter(distribPath));
TreeMap sortedDistribution = new TreeMap<>(distrib);
for (Map.Entry entry : sortedDistribution.entrySet()) {
f.println(entry.getKey() + " " + entry.getValue());
}
f.close();
}
public void printResult() throws IOException {
new File(outdirPath).mkdirs();
printSortedDistribution(outdirPath + "/distribution-full.txt", result_full);
printSortedDistribution(outdirPath + "/distribution-dircnt.txt", result_dircnt);
printSortedDistribution(outdirPath + "/distribution-rev.txt", result_rev);
printSortedDistribution(outdirPath + "/distribution-relrev.txt", result_revrel);
printSortedDistribution(outdirPath + "/distribution-orisnp.txt", result_orisnp);
}
public static void main(String[] args) throws IOException, InterruptedException {
JSAPResult config = parse_args(args);
String graphPath = config.getString("graphPath");
String outdir = config.getString("outdir");
int numThreads = config.getInt("numThreads");
ClusteringCoefficient cc = new ClusteringCoefficient(graphPath, outdir);
cc.run(numThreads);
cc.printResult();
}
// Old unused functions
private long oppositeEdges(ImmutableGraph graph, long node) {
HashSet neighborhood = new HashSet<>();
long succ;
final LazyLongIterator iterator = graph.successors(node);
while ((succ = iterator.nextLong()) != -1) {
neighborhood.add(succ);
}
long closed_triplets = 0;
for (Long neighbor : neighborhood) {
final LazyLongIterator it = graph.successors(neighbor);
while ((succ = it.nextLong()) != -1) {
if (neighborhood.contains(succ)) {
closed_triplets++;
}
}
}
return closed_triplets;
}
public void compute(ProgressLogger pl, Formatter out_local, Formatter out_global) {
final long n = this.graph.numNodes();
pl.expectedUpdates = n;
pl.itemsName = "nodes";
long nodes_d2 = 0;
double cum_lcc = 0;
double cum_lcc_c0 = 0;
double cum_lcc_c1 = 0;
HashMap distribution = new HashMap<>();
for (long node = 0; node < n; node++) {
long d = graph.outdegree(node);
if (d >= 2) {
double t = (d * (d - 1));
double m = oppositeEdges(graph, node);
double lcc = m / t;
distribution.merge(lcc, 1L, Long::sum);
cum_lcc += lcc;
nodes_d2++;
} else {
cum_lcc_c1++;
}
pl.update();
}
pl.done();
for (Map.Entry entry : distribution.entrySet()) {
out_local.format("%f %d\n", entry.getKey(), entry.getValue());
}
double gC = cum_lcc / nodes_d2;
double gC0 = cum_lcc_c0 / n;
double gC1 = cum_lcc_c1 / n;
out_global.format("C: %f\n", gC);
out_global.format("C0: %f\n", gC0);
out_global.format("C1: %f\n", gC1);
}
public void compute_approx(Formatter out_global) {
final long n = this.graph.numNodes();
long trials = 0;
long triangles = 0;
while (true) {
long node = ThreadLocalRandom.current().nextLong(0, n);
long d = graph.outdegree(node);
if (d >= 2) {
Long u = null;
Long v = null;
long u_idx = ThreadLocalRandom.current().nextLong(0, d);
long v_idx = ThreadLocalRandom.current().nextLong(0, d - 1);
if (v_idx >= u_idx) {
v_idx++;
}
long succ;
final LazyLongIterator node_iterator = graph.successors(node);
for (long succ_idx = 0; (succ = node_iterator.nextLong()) != -1; succ_idx++) {
if (succ_idx == u_idx) {
u = succ;
}
if (succ_idx == v_idx) {
v = succ;
}
}
final LazyLongIterator u_iterator = graph.successors(u);
while ((succ = u_iterator.nextLong()) != -1) {
if (succ == v)
triangles++;
}
}
trials++;
if (trials % 100 == 0 || true) {
double gC = (double) triangles / (double) trials;
out_global.format("C: %f (triangles: %d, trials: %d)\n", gC, triangles, trials);
System.out.format("C: %f (triangles: %d, trials: %d)\n", gC, triangles, trials);
}
}
}
}
diff --git a/java/src/main/java/org/softwareheritage/graph/experiments/topology/ConnectedComponents.java b/java/src/main/java/org/softwareheritage/graph/experiments/topology/ConnectedComponents.java
index 4dc4c7d..b351869 100644
--- a/java/src/main/java/org/softwareheritage/graph/experiments/topology/ConnectedComponents.java
+++ b/java/src/main/java/org/softwareheritage/graph/experiments/topology/ConnectedComponents.java
@@ -1,200 +1,200 @@
package org.softwareheritage.graph.experiments.topology;
import com.google.common.primitives.Longs;
import com.martiansoftware.jsap.*;
import it.unimi.dsi.big.webgraph.LazyLongIterator;
import it.unimi.dsi.bits.LongArrayBitVector;
import it.unimi.dsi.fastutil.Arrays;
import it.unimi.dsi.io.ByteDiskQueue;
import it.unimi.dsi.logging.ProgressLogger;
import org.softwareheritage.graph.AllowedNodes;
-import org.softwareheritage.graph.Graph;
+import org.softwareheritage.graph.SwhBidirectionalGraph;
import org.softwareheritage.graph.Node;
import org.softwareheritage.graph.Subgraph;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.*;
public class ConnectedComponents {
private Subgraph graph;
private void load_graph(String graphBasename, String nodeTypes) throws IOException {
System.err.println("Loading graph " + graphBasename + " ...");
- var underlyingGraph = Graph.loadMapped(graphBasename);
+ var underlyingGraph = SwhBidirectionalGraph.loadMapped(graphBasename);
var underlyingGraphSym = underlyingGraph.symmetrize();
graph = new Subgraph(underlyingGraphSym, new AllowedNodes(nodeTypes));
System.err.println("Graph loaded.");
}
private static JSAPResult parse_args(String[] args) {
JSAPResult config = null;
try {
SimpleJSAP jsap = new SimpleJSAP(ConnectedComponents.class.getName(), "",
new Parameter[]{
new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'g',
"graph", "Basename of the compressed graph"),
new FlaggedOption("outdir", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'o',
"outdir", "Directory where to put the results"),
new Switch("byOrigins", JSAP.NO_SHORTFLAG, "by-origins",
"Compute size of components by number of origins"),
new FlaggedOption("nodeTypes", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'n',
"nodetypes", "Allowed node types (comma-separated)"),});
config = jsap.parse(args);
if (jsap.messagePrinted()) {
System.exit(1);
}
} catch (JSAPException e) {
e.printStackTrace();
}
return config;
}
private HashMap /* ArrayList> */ compute(ProgressLogger pl, boolean byOrigin)
throws IOException {
final long n = graph.numNodes();
final long maxN = graph.maxNodeNumber();
// Allow enough memory to behave like in-memory queue
int bufferSize = (int) Math.min(Arrays.MAX_ARRAY_SIZE & ~0x7, 8L * maxN);
// Use a disk based queue to store BFS frontier
final File queueFile = File.createTempFile(ConnectedComponents.class.getSimpleName(), "queue");
final ByteDiskQueue queue = ByteDiskQueue.createNew(queueFile, bufferSize, true);
final byte[] byteBuf = new byte[Long.BYTES];
// WARNING: no 64-bit version of this data-structure, but it can support
// indices up to 2^37
LongArrayBitVector visited = LongArrayBitVector.ofLength(maxN);
pl.expectedUpdates = n;
pl.itemsName = "nodes";
pl.start("Starting connected components visit...");
// ArrayList> components = new ArrayList<>();
HashMap componentDistribution = new HashMap<>();
var it = graph.nodeIterator();
while (it.hasNext()) {
long i = it.nextLong();
if (visited.getBoolean(i))
continue;
// ArrayList component = new ArrayList<>();
long componentNodes = 0;
queue.enqueue(Longs.toByteArray(i));
visited.set(i);
while (!queue.isEmpty()) {
queue.dequeue(byteBuf);
final long currentNode = Longs.fromByteArray(byteBuf);
// component.add(currentNode);
if (!byOrigin || graph.getNodeType(currentNode) == Node.Type.ORI)
componentNodes += 1;
final LazyLongIterator iterator = graph.successors(currentNode);
long succ;
while ((succ = iterator.nextLong()) != -1) {
if (visited.getBoolean(succ))
continue;
visited.set(succ);
queue.enqueue(Longs.toByteArray(succ));
}
pl.update();
}
/*
* if (component.size() > 0) { components.add(component); }
*/
if (componentNodes > 0)
componentDistribution.merge(componentNodes, 1L, Long::sum);
}
pl.done();
// return components;
return componentDistribution;
}
private static void printDistribution(ArrayList> components, Formatter out) {
TreeMap distribution = new TreeMap<>();
for (ArrayList component : components) {
distribution.merge((long) component.size(), 1L, Long::sum);
}
for (Map.Entry entry : distribution.entrySet()) {
out.format("%d %d\n", entry.getKey(), entry.getValue());
}
out.close();
}
private static void printLargestComponent(ArrayList> components, Formatter out) {
int indexLargest = 0;
for (int i = 1; i < components.size(); ++i) {
if (components.get(i).size() > components.get(indexLargest).size())
indexLargest = i;
}
ArrayList component = components.get(indexLargest);
for (Long node : component) {
out.format("%d\n", node);
}
out.close();
}
private static void printAllComponents(ArrayList> components, Formatter out) {
for (int i = 1; i < components.size(); ++i) {
ArrayList component = components.get(i);
for (Long node : component) {
out.format("%d ", node);
}
out.format("\n");
}
out.close();
}
public static void main(String[] args) {
JSAPResult config = parse_args(args);
String graphPath = config.getString("graphPath");
String outdirPath = config.getString("outdir");
String nodeTypes = config.getString("nodeTypes");
boolean byOrigin = config.getBoolean("byOrigins");
ConnectedComponents connectedComponents = new ConnectedComponents();
try {
connectedComponents.load_graph(graphPath, nodeTypes);
} catch (IOException e) {
System.out.println("Could not load graph: " + e);
System.exit(2);
}
ProgressLogger logger = new ProgressLogger();
// noinspection ResultOfMethodCallIgnored
new File(outdirPath).mkdirs();
try {
// ArrayList> components = connectedComponents.compute(logger);
// components.sort(Comparator.comparing(ArrayList::size).reversed());
// printDistribution(components, new Formatter(outdirPath + "/distribution.txt"));
// printLargestComponent(components, new Formatter(outdirPath + "/largest_component.txt"));
// printAllComponents(components, new Formatter(outdirPath + "/all_components.txt"));
HashMap componentDistribution = connectedComponents.compute(logger, byOrigin);
PrintWriter f = new PrintWriter(new FileWriter(outdirPath + "/distribution.txt"));
TreeMap sortedDistribution = new TreeMap<>(componentDistribution);
for (Map.Entry entry : sortedDistribution.entrySet()) {
f.println(entry.getKey() + " " + entry.getValue());
}
f.close();
} catch (IOException e) {
e.printStackTrace();
}
logger.done();
}
}
diff --git a/java/src/main/java/org/softwareheritage/graph/experiments/topology/InOutDegree.java b/java/src/main/java/org/softwareheritage/graph/experiments/topology/InOutDegree.java
index 2d6ebdb..a74a31b 100644
--- a/java/src/main/java/org/softwareheritage/graph/experiments/topology/InOutDegree.java
+++ b/java/src/main/java/org/softwareheritage/graph/experiments/topology/InOutDegree.java
@@ -1,239 +1,239 @@
package org.softwareheritage.graph.experiments.topology;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.lang.reflect.InvocationTargetException;
import java.util.*;
import com.martiansoftware.jsap.JSAP;
import com.martiansoftware.jsap.JSAPException;
import com.martiansoftware.jsap.JSAPResult;
import com.martiansoftware.jsap.Parameter;
import com.martiansoftware.jsap.SimpleJSAP;
import com.martiansoftware.jsap.UnflaggedOption;
import it.unimi.dsi.logging.ProgressLogger;
-import org.softwareheritage.graph.Graph;
+import org.softwareheritage.graph.SwhBidirectionalGraph;
import org.softwareheritage.graph.Node;
public class InOutDegree {
private InOutDegree() {
}
private static final int NODE_ARRAY_SIZE = Node.Type.values().length + 1;
private static final int TYPE_ALL = Node.Type.values().length;
private static final int TYPE_CNT = Node.Type.toInt(Node.Type.CNT);
private static final int TYPE_DIR = Node.Type.toInt(Node.Type.DIR);
private static final int TYPE_REV = Node.Type.toInt(Node.Type.REV);
private static final int TYPE_REL = Node.Type.toInt(Node.Type.REL);
private static final int TYPE_SNP = Node.Type.toInt(Node.Type.SNP);
private static final int TYPE_ORI = Node.Type.toInt(Node.Type.ORI);
- public static long[] outdegreeTypes(final Graph graph, long node) {
+ public static long[] outdegreeTypes(final SwhBidirectionalGraph graph, long node) {
long[] out = new long[NODE_ARRAY_SIZE];
var successors = graph.successors(node);
long neighbor;
while ((neighbor = successors.nextLong()) != -1) {
out[Node.Type.toInt(graph.getNodeType(neighbor))]++;
out[TYPE_ALL]++;
}
return out;
}
- public static long[] indegreeTypes(final Graph graph, long node) {
+ public static long[] indegreeTypes(final SwhBidirectionalGraph graph, long node) {
return outdegreeTypes(graph.transpose(), node);
}
public static void writeDistribution(HashMap distribution, String filename) throws IOException {
PrintWriter f = new PrintWriter(new FileWriter(filename));
TreeMap sortedDistribution = new TreeMap<>(distribution);
for (Map.Entry entry : sortedDistribution.entrySet()) {
f.println(entry.getKey() + " " + entry.getValue());
}
f.close();
}
- public static void run(final Graph graph, String resultsDir) throws IOException {
+ public static void run(final SwhBidirectionalGraph graph, String resultsDir) throws IOException {
// Per-type
var cnt_in_dir = new HashMap();
var dir_in_dir = new HashMap();
var dir_in_rev = new HashMap();
var dir_in_all = new HashMap();
var dir_out_all = new HashMap();
var dir_out_dir = new HashMap();
var dir_out_cnt = new HashMap();
var dir_out_rev = new HashMap();
var rev_in_dir = new HashMap();
var rev_in_rel = new HashMap();
var rev_in_rev = new HashMap();
var rev_in_snp = new HashMap();
var rev_in_all = new HashMap();
var rev_out_rev = new HashMap();
var rel_in_snp = new HashMap();
var snp_in_ori = new HashMap();
var snp_out_all = new HashMap();
var snp_out_rel = new HashMap();
var snp_out_rev = new HashMap();
var ori_out_snp = new HashMap();
// Aggregated per layer
var full_in = new HashMap();
var full_out = new HashMap();
var dircnt_in = new HashMap();
var dircnt_out = new HashMap();
var orisnp_in = new HashMap();
var orisnp_out = new HashMap();
var relrev_in = new HashMap();
var relrev_out = new HashMap();
var rev_in = rev_in_rev; // alias for single-type layer
var rev_out = rev_out_rev;
final ProgressLogger pl = new ProgressLogger();
pl.itemsName = "nodes";
pl.expectedUpdates = graph.numNodes();
pl.start("Scanning...");
long[] in;
long[] out;
for (long i = graph.numNodes(); i-- != 0;) {
long d_in = graph.indegree(i);
long d_out = graph.outdegree(i);
full_in.merge(d_in, 1L, Long::sum);
full_out.merge(d_out, 1L, Long::sum);
switch (graph.getNodeType(i)) {
case CNT:
cnt_in_dir.merge(d_in, 1L, Long::sum);
dircnt_in.merge(d_in, 1L, Long::sum);
dircnt_out.merge(0L, 1L, Long::sum);
break;
case DIR:
in = indegreeTypes(graph, i);
out = outdegreeTypes(graph, i);
dir_in_all.merge(in[TYPE_ALL], 1L, Long::sum);
dir_out_all.merge(out[TYPE_ALL], 1L, Long::sum);
dir_in_dir.merge(in[TYPE_DIR], 1L, Long::sum);
dir_in_rev.merge(in[TYPE_REV], 1L, Long::sum);
dir_out_cnt.merge(out[TYPE_CNT], 1L, Long::sum);
dir_out_dir.merge(out[TYPE_DIR], 1L, Long::sum);
dir_out_rev.merge(out[TYPE_REV], 1L, Long::sum);
dircnt_in.merge(in[TYPE_DIR] + in[TYPE_CNT], 1L, Long::sum);
dircnt_out.merge(out[TYPE_DIR] + out[TYPE_CNT], 1L, Long::sum);
break;
case REV:
in = indegreeTypes(graph, i);
out = outdegreeTypes(graph, i);
rev_in_all.merge(in[TYPE_ALL], 1L, Long::sum);
rev_in_dir.merge(in[TYPE_DIR], 1L, Long::sum);
rev_in_rev.merge(in[TYPE_REV], 1L, Long::sum);
rev_in_rel.merge(in[TYPE_REL], 1L, Long::sum);
rev_in_snp.merge(in[TYPE_SNP], 1L, Long::sum);
rev_out_rev.merge(out[TYPE_REV], 1L, Long::sum);
relrev_in.merge(in[TYPE_REL] + in[TYPE_REV], 1L, Long::sum);
relrev_out.merge(out[TYPE_REL] + out[TYPE_REV], 1L, Long::sum);
break;
case REL:
rel_in_snp.merge(d_in, 1L, Long::sum);
relrev_in.merge(0L, 1L, Long::sum);
relrev_out.merge(d_out, 1L, Long::sum);
break;
case SNP:
out = outdegreeTypes(graph, i);
snp_in_ori.merge(d_in, 1L, Long::sum);
snp_out_all.merge(out[TYPE_ALL], 1L, Long::sum);
snp_out_rel.merge(out[TYPE_REL], 1L, Long::sum);
snp_out_rev.merge(out[TYPE_REV], 1L, Long::sum);
orisnp_in.merge(d_in, 1L, Long::sum);
orisnp_out.merge(out[TYPE_REL] + out[TYPE_REV], 1L, Long::sum);
break;
case ORI:
ori_out_snp.merge(d_out, 1L, Long::sum);
orisnp_in.merge(0L, 1L, Long::sum);
orisnp_out.merge(d_out, 1L, Long::sum);
break;
default :
pl.logger().warn("Invalid node type at pos {}", i);
break;
}
pl.update();
}
pl.done();
(new File(resultsDir)).mkdir();
writeDistribution(full_in, resultsDir + "/full_in.txt");
writeDistribution(full_out, resultsDir + "/full_out.txt");
writeDistribution(dircnt_in, resultsDir + "/dir+cnt_in.txt");
writeDistribution(dircnt_out, resultsDir + "/dir+cnt_out.txt");
writeDistribution(relrev_in, resultsDir + "/rel+rev_in.txt");
writeDistribution(relrev_out, resultsDir + "/rel+rev_out.txt");
writeDistribution(orisnp_in, resultsDir + "/ori+snp_in.txt");
writeDistribution(orisnp_out, resultsDir + "/ori+snp_out.txt");
writeDistribution(rev_in, resultsDir + "/rev_in.txt");
writeDistribution(rev_out, resultsDir + "/rev_out.txt");
String resultsTypeDir = resultsDir + "/per_type";
(new File(resultsTypeDir)).mkdir();
writeDistribution(cnt_in_dir, resultsTypeDir + "/cnt_in_dir.txt");
writeDistribution(dir_in_dir, resultsTypeDir + "/dir_in_dir.txt");
writeDistribution(dir_in_rev, resultsTypeDir + "/dir_in_rev.txt");
writeDistribution(dir_in_all, resultsTypeDir + "/dir_in_all.txt");
writeDistribution(dir_out_all, resultsTypeDir + "/dir_out_all.txt");
writeDistribution(dir_out_dir, resultsTypeDir + "/dir_out_dir.txt");
writeDistribution(dir_out_cnt, resultsTypeDir + "/dir_out_cnt.txt");
writeDistribution(dir_out_rev, resultsTypeDir + "/dir_out_rev.txt");
writeDistribution(rev_in_dir, resultsTypeDir + "/rev_in_dir.txt");
writeDistribution(rev_in_rel, resultsTypeDir + "/rev_in_rel.txt");
writeDistribution(rev_in_rev, resultsTypeDir + "/rev_in_rev.txt");
writeDistribution(rev_in_snp, resultsTypeDir + "/rev_in_snp.txt");
writeDistribution(rev_in_all, resultsTypeDir + "/rev_in_all.txt");
writeDistribution(rev_out_rev, resultsTypeDir + "/rev_out_rev.txt");
writeDistribution(rel_in_snp, resultsTypeDir + "/rel_in_snp.txt");
writeDistribution(snp_in_ori, resultsTypeDir + "/snp_in_ori.txt");
writeDistribution(snp_out_all, resultsTypeDir + "/snp_out_all.txt");
writeDistribution(snp_out_rel, resultsTypeDir + "/snp_out_rel.txt");
writeDistribution(snp_out_rev, resultsTypeDir + "/snp_out_rev.txt");
writeDistribution(ori_out_snp, resultsTypeDir + "/ori_out_snp.txt");
}
static public void main(final String[] arg)
throws IllegalArgumentException, SecurityException, IllegalAccessException, InvocationTargetException,
NoSuchMethodException, JSAPException, IOException, ClassNotFoundException {
final SimpleJSAP jsap = new SimpleJSAP(InOutDegree.class.getName(),
"Computes in and out degrees of the given SWHGraph",
new Parameter[]{
new UnflaggedOption("basename", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED,
JSAP.NOT_GREEDY, "The basename of the graph."),
new UnflaggedOption("resultsDir", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED,
JSAP.NOT_GREEDY, "The directory of the resulting files."),});
final JSAPResult jsapResult = jsap.parse(arg);
if (jsap.messagePrinted())
System.exit(1);
final String basename = jsapResult.getString("basename");
final String resultsDir = jsapResult.userSpecified("resultsDir")
? jsapResult.getString("resultsDir")
: basename;
final ProgressLogger pl = new ProgressLogger();
- Graph graph = Graph.loadMapped(basename);
+ SwhBidirectionalGraph graph = SwhBidirectionalGraph.loadMapped(basename);
run(graph, resultsDir);
}
}
diff --git a/java/src/main/java/org/softwareheritage/graph/experiments/topology/SubdatasetSizeFunction.java b/java/src/main/java/org/softwareheritage/graph/experiments/topology/SubdatasetSizeFunction.java
index f897e00..3632d32 100644
--- a/java/src/main/java/org/softwareheritage/graph/experiments/topology/SubdatasetSizeFunction.java
+++ b/java/src/main/java/org/softwareheritage/graph/experiments/topology/SubdatasetSizeFunction.java
@@ -1,98 +1,98 @@
package org.softwareheritage.graph.experiments.topology;
import com.google.common.primitives.Longs;
import com.martiansoftware.jsap.*;
import it.unimi.dsi.Util;
import it.unimi.dsi.big.webgraph.LazyLongIterator;
import it.unimi.dsi.bits.LongArrayBitVector;
import it.unimi.dsi.fastutil.Arrays;
import it.unimi.dsi.fastutil.BigArrays;
import it.unimi.dsi.fastutil.longs.LongBigArrays;
import it.unimi.dsi.io.ByteDiskQueue;
import it.unimi.dsi.logging.ProgressLogger;
import it.unimi.dsi.util.XoRoShiRo128PlusRandom;
-import org.softwareheritage.graph.Graph;
+import org.softwareheritage.graph.SwhBidirectionalGraph;
import org.softwareheritage.graph.Node;
import org.softwareheritage.graph.experiments.forks.ForkCC;
import java.io.*;
public class SubdatasetSizeFunction {
private SubdatasetSizeFunction() {
}
- public static void run(final Graph graph) throws IOException {
+ public static void run(final SwhBidirectionalGraph graph) throws IOException {
final ProgressLogger pl = new ProgressLogger();
pl.itemsName = "nodes";
pl.expectedUpdates = graph.numNodes();
long n = graph.numNodes();
LongArrayBitVector visited = LongArrayBitVector.ofLength(n);
int bufferSize = (int) Math.min(Arrays.MAX_ARRAY_SIZE & ~0x7, 8L * n);
final File queueFile = File.createTempFile(ForkCC.class.getSimpleName(), "queue");
final ByteDiskQueue queue = ByteDiskQueue.createNew(queueFile, bufferSize, true);
final byte[] byteBuf = new byte[Long.BYTES];
long[][] randomPerm = Util.identity(graph.numNodes());
LongBigArrays.shuffle(randomPerm, new XoRoShiRo128PlusRandom());
long visitedNodes = 0;
long visitedEdges = 0;
long visitedOrigins = 0;
long visitedContents = 0;
pl.start("Running traversal starting from origins...");
for (long j = 0; j < n; ++j) {
long i = BigArrays.get(randomPerm, j);
if (visited.getBoolean(i) || graph.getNodeType(i) != Node.Type.ORI) {
continue;
}
visitedOrigins++;
queue.enqueue(Longs.toByteArray(i));
visited.set(i);
while (!queue.isEmpty()) {
queue.dequeue(byteBuf);
final long currentNode = Longs.fromByteArray(byteBuf);
visitedNodes++;
if (graph.getNodeType(currentNode) == Node.Type.CNT)
visitedContents++;
final LazyLongIterator iterator = graph.successors(currentNode);
long succ;
while ((succ = iterator.nextLong()) != -1) {
visitedEdges++;
if (visited.getBoolean(succ))
continue;
visited.set(succ);
queue.enqueue(Longs.toByteArray(succ));
}
pl.update();
}
if (visitedOrigins % 10000 == 0)
System.out.println(visitedNodes + " " + visitedEdges + " " + visitedContents);
}
pl.done();
}
static public void main(final String[] arg)
throws IllegalArgumentException, SecurityException, JSAPException, IOException {
final SimpleJSAP jsap = new SimpleJSAP(SubdatasetSizeFunction.class.getName(),
"Computes subdataset size functions using a random uniform order",
new Parameter[]{new UnflaggedOption("basename", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED,
JSAP.NOT_GREEDY, "The basename of the graph."),});
final JSAPResult jsapResult = jsap.parse(arg);
if (jsap.messagePrinted())
System.exit(1);
final String basename = jsapResult.getString("basename");
- Graph graph = Graph.loadMapped(basename);
+ SwhBidirectionalGraph graph = SwhBidirectionalGraph.loadMapped(basename);
run(graph);
}
}
diff --git a/java/src/main/java/org/softwareheritage/graph/maps/LabelMapBuilder.java b/java/src/main/java/org/softwareheritage/graph/maps/LabelMapBuilder.java
index 04bde71..0ace4bd 100644
--- a/java/src/main/java/org/softwareheritage/graph/maps/LabelMapBuilder.java
+++ b/java/src/main/java/org/softwareheritage/graph/maps/LabelMapBuilder.java
@@ -1,527 +1,527 @@
package org.softwareheritage.graph.maps;
import com.martiansoftware.jsap.*;
import it.unimi.dsi.big.webgraph.LazyLongIterator;
import it.unimi.dsi.big.webgraph.labelling.ArcLabelledImmutableGraph;
import it.unimi.dsi.big.webgraph.labelling.BitStreamArcLabelledImmutableGraph;
import it.unimi.dsi.fastutil.Size64;
import it.unimi.dsi.fastutil.bytes.ByteArrays;
import it.unimi.dsi.fastutil.io.FastBufferedInputStream;
import it.unimi.dsi.fastutil.objects.Object2LongFunction;
import it.unimi.dsi.io.OutputBitStream;
import it.unimi.dsi.logging.ProgressLogger;
import it.unimi.dsi.big.webgraph.BVGraph;
import it.unimi.dsi.big.webgraph.ImmutableGraph;
import it.unimi.dsi.big.webgraph.NodeIterator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.softwareheritage.graph.labels.DirEntry;
import org.softwareheritage.graph.labels.SwhLabel;
import java.io.*;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.*;
import java.util.concurrent.TimeUnit;
public class LabelMapBuilder {
final static String SORT_BUFFER_SIZE = "40%";
final static Logger logger = LoggerFactory.getLogger(LabelMapBuilder.class);
String graphPath;
String outputGraphPath;
String debugPath;
String tmpDir;
ImmutableGraph graph;
long numNodes;
long numArcs;
NodeIdMap nodeIdMap;
Object2LongFunction filenameMph;
long numFilenames;
int totalLabelWidth;
public LabelMapBuilder(String graphPath, String debugPath, String outputGraphPath, String tmpDir)
throws IOException {
this.graphPath = graphPath;
if (outputGraphPath == null) {
this.outputGraphPath = graphPath;
} else {
this.outputGraphPath = outputGraphPath;
}
this.debugPath = debugPath;
this.tmpDir = tmpDir;
// Load the graph in offline mode to retrieve the number of nodes/edges,
// then immediately destroy it. XXX: not even needed?
// ImmutableGraph graphOffline = BVGraph.loadMapped(graphPath);
graph = BVGraph.loadMapped(graphPath);
numArcs = graph.numArcs();
numNodes = graph.numNodes();
- nodeIdMap = new NodeIdMap(graphPath, numNodes);
+ nodeIdMap = new NodeIdMap(graphPath);
filenameMph = NodeIdMap.loadMph(graphPath + "-labels.mph");
numFilenames = getMPHSize(filenameMph);
totalLabelWidth = DirEntry.labelWidth(numFilenames);
}
private static JSAPResult parse_args(String[] args) {
JSAPResult config = null;
try {
SimpleJSAP jsap = new SimpleJSAP(LabelMapBuilder.class.getName(), "", new Parameter[]{
new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'g', "graph",
"Basename of the compressed graph"),
new FlaggedOption("debugPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'd',
"debug-path", "Store the intermediate representation here for debug"),
new FlaggedOption("outputGraphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'o',
"output-graph", "Basename of the output graph, same as --graph if not specified"),
new FlaggedOption("tmpDir", JSAP.STRING_PARSER, "tmp", JSAP.NOT_REQUIRED, 't', "tmp",
"Temporary directory path"),});
config = jsap.parse(args);
if (jsap.messagePrinted()) {
System.exit(1);
}
} catch (JSAPException e) {
e.printStackTrace();
}
return config;
}
public static void main(String[] args) throws IOException, InterruptedException {
JSAPResult config = parse_args(args);
String graphPath = config.getString("graphPath");
String outputGraphPath = config.getString("outputGraphPath");
String tmpDir = config.getString("tmpDir");
String debugPath = config.getString("debugPath");
LabelMapBuilder builder = new LabelMapBuilder(graphPath, debugPath, outputGraphPath, tmpDir);
logger.info("Loading graph and MPH functions...");
builder.computeLabelMap();
}
static long getMPHSize(Object2LongFunction mph) {
return (mph instanceof Size64) ? ((Size64) mph).size64() : mph.size();
}
void computeLabelMap() throws IOException, InterruptedException {
this.loadGraph();
// this.computeLabelMapSort();
this.computeLabelMapBsort();
}
void computeLabelMapSort() throws IOException {
// Pass the intermediate representation to sort(1) so that we see the labels in the order they will
// appear in the label file.
ProcessBuilder processBuilder = new ProcessBuilder();
processBuilder.command("sort", "-k1,1n", "-k2,2n", // Numerical sort
"--numeric-sort", "--buffer-size", SORT_BUFFER_SIZE, "--temporary-directory", tmpDir);
Process sort = processBuilder.start();
BufferedOutputStream sort_stdin = new BufferedOutputStream(sort.getOutputStream());
// BufferedInputStream sort_stdout = new BufferedInputStream(sort.getInputStream());
FastBufferedInputStream sort_stdout = new FastBufferedInputStream(sort.getInputStream());
final FastBufferedInputStream fbis = new FastBufferedInputStream(System.in);
hashLabelStream(fbis, new EdgeLabelLineWriter() {
@Override
public void writeLine(long src, long dst, long filenameId, int permission) throws IOException {
sort_stdin.write((src + "\t" + dst + "\t" + filenameId + "\t" + permission + "\n")
.getBytes(StandardCharsets.US_ASCII));
}
});
sort_stdin.close();
EdgeLabelLineIterator mapLines = new TextualEdgeLabelLineIterator(sort_stdout);
writeLabels(mapLines);
logger.info("Done");
}
void computeLabelMapBsort() throws IOException, InterruptedException {
// Pass the intermediate representation to bsort(1) so that we see the labels in the order they will
// appear in the label file.
String tmpFile = tmpDir + "/labelsToSort.bin";
final FastBufferedInputStream fbis = new FastBufferedInputStream(System.in);
final DataOutputStream out = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(tmpFile)));
// Number of bytes to represent a node.
final int nodeBytes = (Long.SIZE - Long.numberOfLeadingZeros(graph.numNodes())) / 8 + 1;
ByteBuffer buffer = ByteBuffer.allocate(Long.BYTES);
logger.info("Writing labels to a packed binary files (node bytes: {})", nodeBytes);
hashLabelStream(fbis, new EdgeLabelLineWriter() {
@Override
public void writeLine(long src, long dst, long filenameId, int permission) throws IOException {
buffer.putLong(0, src);
out.write(buffer.array(), Long.BYTES - nodeBytes, nodeBytes);
buffer.putLong(0, dst);
out.write(buffer.array(), Long.BYTES - nodeBytes, nodeBytes);
out.writeLong(filenameId);
out.writeInt(permission);
}
});
ProcessBuilder processBuilder = new ProcessBuilder();
processBuilder.command("/home/seirl/bsort/src/bsort", "-v", "-r",
String.valueOf(nodeBytes * 2 + Long.BYTES + Integer.BYTES), "-k", String.valueOf(nodeBytes * 2),
tmpFile);
Process sort = processBuilder.start();
sort.waitFor();
final DataInputStream sortedLabels = new DataInputStream(new BufferedInputStream(new FileInputStream(tmpFile)));
BinaryEdgeLabelLineIterator mapLines = new BinaryEdgeLabelLineIterator(sortedLabels, nodeBytes);
writeLabels(mapLines);
logger.info("Done");
}
void loadGraph() throws IOException {
}
void hashLabelStream(FastBufferedInputStream input, EdgeLabelLineWriter writer) throws IOException {
// Compute intermediate representation and write it on :
// "