* Software Heritage
* graph contains multiple node types (contents, directories, revisions, ...) and restricting
- * the traversal to specific node types is necessary for many querying operations: use cases.
+ * the traversal to specific node types is necessary for many querying operations:
+ * use cases.
*
* @author The Software Heritage developers
*/
public class AllowedEdges {
/**
* 2D boolean matrix storing access rights for all combination of src/dst node types (first
- * dimension is source, second dimension is destination), when edge restriction is not enforced
- * this array is set to null for early bypass.
+ * dimension is source, second dimension is destination), when edge restriction is not enforced this
+ * array is set to null for early bypass.
*/
public boolean[][] restrictedTo;
/**
* Constructor.
*
- * @param edgesFmt a formatted string describing allowed edges
+ * @param edgesFmt a formatted string describing allowed
+ * edges
*/
public AllowedEdges(String edgesFmt) {
int nbNodeTypes = Node.Type.values().length;
this.restrictedTo = new boolean[nbNodeTypes][nbNodeTypes];
// Special values (null, empty, "*")
if (edgesFmt == null || edgesFmt.isEmpty()) {
return;
}
if (edgesFmt.equals("*")) {
// Allows for quick bypass (with simple null check) when no edge restriction
restrictedTo = null;
return;
}
// Format: "src1:dst1,src2:dst2,[...]"
String[] edgeTypes = edgesFmt.split(",");
for (String edgeType : edgeTypes) {
String[] nodeTypes = edgeType.split(":");
if (nodeTypes.length != 2) {
throw new IllegalArgumentException("Cannot parse edge type: " + edgeType);
}
ArrayList srcTypes = Node.Type.parse(nodeTypes[0]);
ArrayList dstTypes = Node.Type.parse(nodeTypes[1]);
for (Node.Type srcType : srcTypes) {
for (Node.Type dstType : dstTypes) {
restrictedTo[srcType.ordinal()][dstType.ordinal()] = true;
}
}
}
}
/**
* Checks if a given edge can be followed during graph traversal.
*
* @param srcType edge source type
* @param dstType edge destination type
* @return true if allowed and false otherwise
*/
public boolean isAllowed(Node.Type srcType, Node.Type dstType) {
if (restrictedTo == null)
return true;
return restrictedTo[srcType.ordinal()][dstType.ordinal()];
}
}
diff --git a/java/src/main/java/org/softwareheritage/graph/Entry.java b/java/src/main/java/org/softwareheritage/graph/Entry.java
index efd4915..75bb4f2 100644
--- a/java/src/main/java/org/softwareheritage/graph/Entry.java
+++ b/java/src/main/java/org/softwareheritage/graph/Entry.java
@@ -1,193 +1,188 @@
package org.softwareheritage.graph;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.PropertyNamingStrategy;
import java.io.DataOutputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
public class Entry {
private final long PATH_SEPARATOR_ID = -1;
private Graph graph;
public void load_graph(String graphBasename) throws IOException {
System.err.println("Loading graph " + graphBasename + " ...");
this.graph = new Graph(graphBasename);
System.err.println("Graph loaded.");
}
public Graph get_graph() {
return graph.copy();
}
public String stats() {
try {
Stats stats = new Stats(graph.getPath());
ObjectMapper objectMapper = new ObjectMapper();
objectMapper.setPropertyNamingStrategy(PropertyNamingStrategy.SNAKE_CASE);
return objectMapper.writeValueAsString(stats);
} catch (IOException e) {
throw new RuntimeException("Cannot read stats: " + e);
}
}
private int count_visitor(NodeCountVisitor f, long srcNodeId) {
int[] count = {0};
f.accept(srcNodeId, (node) -> {
count[0]++;
});
return count[0];
}
public int count_leaves(String direction, String edgesFmt, long srcNodeId) {
Traversal t = new Traversal(this.graph.copy(), direction, edgesFmt);
return count_visitor(t::leavesVisitor, srcNodeId);
}
public int count_neighbors(String direction, String edgesFmt, long srcNodeId) {
Traversal t = new Traversal(this.graph.copy(), direction, edgesFmt);
return count_visitor(t::neighborsVisitor, srcNodeId);
}
public int count_visit_nodes(String direction, String edgesFmt, long srcNodeId) {
Traversal t = new Traversal(this.graph.copy(), direction, edgesFmt);
return count_visitor(t::visitNodesVisitor, srcNodeId);
}
public QueryHandler get_handler(String clientFIFO) {
return new QueryHandler(this.graph.copy(), clientFIFO);
}
private interface NodeCountVisitor {
void accept(long nodeId, Traversal.NodeIdConsumer consumer);
}
public class QueryHandler {
Graph graph;
DataOutputStream out;
String clientFIFO;
public QueryHandler(Graph graph, String clientFIFO) {
this.graph = graph;
this.clientFIFO = clientFIFO;
this.out = null;
}
public void writeNode(long nodeId) {
try {
out.writeLong(nodeId);
} catch (IOException e) {
throw new RuntimeException("Cannot write response to client: " + e);
}
}
public void writeEdge(long srcId, long dstId) {
writeNode(srcId);
writeNode(dstId);
}
public void writePath(ArrayList path) {
for (Long nodeId : path) {
writeNode(nodeId);
}
writeNode(PATH_SEPARATOR_ID);
}
public void open() {
try {
FileOutputStream file = new FileOutputStream(this.clientFIFO);
this.out = new DataOutputStream(file);
} catch (IOException e) {
throw new RuntimeException("Cannot open client FIFO: " + e);
}
}
public void close() {
try {
out.close();
} catch (IOException e) {
throw new RuntimeException("Cannot write response to client: " + e);
}
}
public void leaves(String direction, String edgesFmt, long srcNodeId) {
open();
Traversal t = new Traversal(this.graph, direction, edgesFmt);
t.leavesVisitor(srcNodeId, this::writeNode);
close();
}
public void neighbors(String direction, String edgesFmt, long srcNodeId) {
open();
Traversal t = new Traversal(this.graph, direction, edgesFmt);
t.neighborsVisitor(srcNodeId, this::writeNode);
close();
}
public void visit_nodes(String direction, String edgesFmt, long srcNodeId) {
open();
Traversal t = new Traversal(this.graph, direction, edgesFmt);
t.visitNodesVisitor(srcNodeId, this::writeNode);
close();
}
public void visit_edges(String direction, String edgesFmt, long srcNodeId) {
open();
Traversal t = new Traversal(this.graph, direction, edgesFmt);
t.visitNodesVisitor(srcNodeId, null, this::writeEdge);
close();
}
- public void visit_paths(String direction, String edgesFmt,
- long srcNodeId) {
+ public void visit_paths(String direction, String edgesFmt, long srcNodeId) {
open();
Traversal t = new Traversal(this.graph, direction, edgesFmt);
t.visitPathsVisitor(srcNodeId, this::writePath);
close();
}
- public void walk(String direction, String edgesFmt, String algorithm,
- long srcNodeId, long dstNodeId) {
+ public void walk(String direction, String edgesFmt, String algorithm, long srcNodeId, long dstNodeId) {
open();
Traversal t = new Traversal(this.graph, direction, edgesFmt);
for (Long nodeId : t.walk(srcNodeId, dstNodeId, algorithm)) {
writeNode(nodeId);
}
close();
}
- public void walk_type(String direction, String edgesFmt, String algorithm,
- long srcNodeId, String dst) {
+ public void walk_type(String direction, String edgesFmt, String algorithm, long srcNodeId, String dst) {
open();
Node.Type dstType = Node.Type.fromStr(dst);
Traversal t = new Traversal(this.graph, direction, edgesFmt);
for (Long nodeId : t.walk(srcNodeId, dstType, algorithm)) {
writeNode(nodeId);
}
close();
}
- public void random_walk(String direction, String edgesFmt, int retries,
- long srcNodeId, long dstNodeId) {
+ public void random_walk(String direction, String edgesFmt, int retries, long srcNodeId, long dstNodeId) {
open();
Traversal t = new Traversal(this.graph, direction, edgesFmt);
for (Long nodeId : t.randomWalk(srcNodeId, dstNodeId, retries)) {
writeNode(nodeId);
}
close();
}
- public void random_walk_type(String direction, String edgesFmt, int retries,
- long srcNodeId, String dst) {
+ public void random_walk_type(String direction, String edgesFmt, int retries, long srcNodeId, String dst) {
open();
Node.Type dstType = Node.Type.fromStr(dst);
Traversal t = new Traversal(this.graph, direction, edgesFmt);
for (Long nodeId : t.randomWalk(srcNodeId, dstType, retries)) {
writeNode(nodeId);
}
close();
}
}
}
diff --git a/java/src/main/java/org/softwareheritage/graph/Graph.java b/java/src/main/java/org/softwareheritage/graph/Graph.java
index 3378c0b..ad44a54 100644
--- a/java/src/main/java/org/softwareheritage/graph/Graph.java
+++ b/java/src/main/java/org/softwareheritage/graph/Graph.java
@@ -1,256 +1,257 @@
package org.softwareheritage.graph;
import it.unimi.dsi.big.webgraph.ImmutableGraph;
import it.unimi.dsi.big.webgraph.LazyLongIterator;
import it.unimi.dsi.big.webgraph.Transform;
import org.softwareheritage.graph.maps.NodeIdMap;
import org.softwareheritage.graph.maps.NodeTypesMap;
import java.io.IOException;
/**
* Main class storing the compressed graph and node id mappings.
*
* The compressed graph is stored using the WebGraph
- * ecosystem. Additional mappings are necessary because Software Heritage uses string based persistent
- * identifiers (SWHID) while WebGraph uses integers internally. These two mappings (long id ↔
- * SWHID) are used for the input (users refer to the graph using SWHID) and the output (convert back to
- * SWHID for users results). However, since graph traversal can be restricted depending on the node
- * type (see {@link AllowedEdges}), a long id → node type map is stored as well to avoid a full
- * SWHID lookup.
+ * ecosystem. Additional mappings are necessary because Software Heritage uses string based persistent
+ * identifiers (SWHID) while WebGraph uses integers internally. These two mappings (long id
+ * ↔ SWHID) are used for the input (users refer to the graph using SWHID) and the output
+ * (convert back to SWHID for users results). However, since graph traversal can be restricted
+ * depending on the node type (see {@link AllowedEdges}), a long id → node type map is stored
+ * as well to avoid a full SWHID lookup.
*
* @author The Software Heritage developers
* @see org.softwareheritage.graph.AllowedEdges
* @see org.softwareheritage.graph.maps.NodeIdMap
* @see org.softwareheritage.graph.maps.NodeTypesMap
*/
public class Graph extends ImmutableGraph {
/** File extension for the SWHID to long node id map */
public static final String SWHID_TO_NODE = ".swhid2node.bin";
/** File extension for the long node id to SWHID map */
public static final String NODE_TO_SWHID = ".node2swhid.bin";
/** File extension for the long node id to node type map */
public static final String NODE_TO_TYPE = ".node2type.map";
/** Compressed graph stored as a {@link it.unimi.dsi.big.webgraph.BVGraph} */
ImmutableGraph graph;
/** Transposed compressed graph (used for backward traversals) */
ImmutableGraph graphTransposed;
/** Path and basename of the compressed graph */
String path;
/** Mapping long id ↔ SWHIDs */
NodeIdMap nodeIdMap;
/** Mapping long id → node types */
NodeTypesMap nodeTypesMap;
/**
* Constructor.
*
* @param path path and basename of the compressed graph to load
*/
public Graph(String path) throws IOException {
this.path = path;
this.graph = ImmutableGraph.loadMapped(path);
this.graphTransposed = ImmutableGraph.loadMapped(path + "-transposed");
this.nodeTypesMap = new NodeTypesMap(path);
this.nodeIdMap = new NodeIdMap(path, numNodes());
}
- protected Graph(ImmutableGraph graph, ImmutableGraph graphTransposed,
- String path, NodeIdMap nodeIdMap, NodeTypesMap nodeTypesMap) {
+ protected Graph(ImmutableGraph graph, ImmutableGraph graphTransposed, String path, NodeIdMap nodeIdMap,
+ NodeTypesMap nodeTypesMap) {
this.graph = graph;
this.graphTransposed = graphTransposed;
this.path = path;
this.nodeIdMap = nodeIdMap;
this.nodeTypesMap = nodeTypesMap;
}
/**
* Return a flyweight copy of the graph.
*/
@Override
public Graph copy() {
return new Graph(this.graph.copy(), this.graphTransposed.copy(), this.path, this.nodeIdMap, this.nodeTypesMap);
}
@Override
public boolean randomAccess() {
return graph.randomAccess() && graphTransposed.randomAccess();
}
/**
* Return a transposed version of the graph.
*/
public Graph transpose() {
return new Graph(this.graphTransposed, this.graph, this.path, this.nodeIdMap, this.nodeTypesMap);
}
/**
* Return a symmetric version of the graph.
*/
public Graph symmetrize() {
ImmutableGraph symmetric = Transform.union(graph, graphTransposed);
return new Graph(symmetric, symmetric, this.path, this.nodeIdMap, this.nodeTypesMap);
}
/**
* Cleans up graph resources after use.
*/
public void cleanUp() throws IOException {
nodeIdMap.close();
}
/**
* Returns number of nodes in the graph.
*
* @return number of nodes in the graph
*/
@Override
public long numNodes() {
return graph.numNodes();
}
/**
* Returns number of edges in the graph.
*
* @return number of edges in the graph
*/
@Override
public long numArcs() {
return graph.numArcs();
}
/**
* Returns lazy iterator of successors of a node.
*
* @param nodeId node specified as a long id
- * @return lazy iterator of successors of the node, specified as a WebGraph LazyLongIterator
+ * @return lazy iterator of successors of the node, specified as a
+ * WebGraph LazyLongIterator
*/
@Override
public LazyLongIterator successors(long nodeId) {
return graph.successors(nodeId);
}
/**
* Returns lazy iterator of successors of a node while following a specific set of edge types.
*
* @param nodeId node specified as a long id
* @param allowedEdges the specification of which edges can be traversed
- * @return lazy iterator of successors of the node, specified as a WebGraph LazyLongIterator
+ * @return lazy iterator of successors of the node, specified as a
+ * WebGraph LazyLongIterator
*/
public LazyLongIterator successors(long nodeId, AllowedEdges allowedEdges) {
if (allowedEdges.restrictedTo == null) {
// All edges are allowed, bypass edge check
return this.successors(nodeId);
} else {
LazyLongIterator allSuccessors = this.successors(nodeId);
Graph thisGraph = this;
return new LazyLongIterator() {
@Override
public long nextLong() {
long neighbor;
while ((neighbor = allSuccessors.nextLong()) != -1) {
if (allowedEdges.isAllowed(thisGraph.getNodeType(nodeId), thisGraph.getNodeType(neighbor))) {
return neighbor;
}
}
return -1;
}
@Override
public long skip(final long n) {
long i;
- for (i = 0; i < n && nextLong() != -1; i++) ;
+ for (i = 0; i < n && nextLong() != -1; i++)
+ ;
return i;
}
};
}
}
/**
* Returns the outdegree of a node.
*
* @param nodeId node specified as a long id
* @return outdegree of a node
*/
@Override
public long outdegree(long nodeId) {
return graph.outdegree(nodeId);
}
/**
* Returns lazy iterator of predecessors of a node.
*
* @param nodeId node specified as a long id
- * @return lazy iterator of predecessors of the node, specified as a WebGraph LazyLongIterator
+ * @return lazy iterator of predecessors of the node, specified as a
+ * WebGraph LazyLongIterator
*/
public LazyLongIterator predecessors(long nodeId) {
return this.transpose().successors(nodeId);
}
/**
* Returns the indegree of a node.
*
* @param nodeId node specified as a long id
* @return indegree of a node
*/
public long indegree(long nodeId) {
return this.transpose().outdegree(nodeId);
}
/**
* Returns the underlying BVGraph.
*
* @return WebGraph BVGraph
*/
public ImmutableGraph getGraph() {
return this.graph;
}
/**
* Returns the graph full path.
*
* @return graph full path
*/
public String getPath() {
return path;
}
/**
* Converts {@link SWHID} node to long.
*
* @param swhid node specified as a {@link SWHID}
* @return internal long node id
* @see SWHID
*/
public long getNodeId(SWHID swhid) {
return nodeIdMap.getNodeId(swhid);
}
/**
* Converts long id node to {@link SWHID}.
*
* @param nodeId node specified as a long id
* @return external SWHID
* @see SWHID
*/
public SWHID getSWHID(long nodeId) {
return nodeIdMap.getSWHID(nodeId);
}
/**
* Returns node type.
*
* @param nodeId node specified as a long id
* @return corresponding node type
* @see org.softwareheritage.graph.Node.Type
*/
public Node.Type getNodeType(long nodeId) {
return nodeTypesMap.getType(nodeId);
}
}
diff --git a/java/src/main/java/org/softwareheritage/graph/Node.java b/java/src/main/java/org/softwareheritage/graph/Node.java
index 6f10d4f..e4a61d3 100644
--- a/java/src/main/java/org/softwareheritage/graph/Node.java
+++ b/java/src/main/java/org/softwareheritage/graph/Node.java
@@ -1,117 +1,116 @@
package org.softwareheritage.graph;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
/**
* A node in the Software Heritage graph.
*
* @author The Software Heritage developers
*/
public class Node {
/**
* Software Heritage graph node types, as described in the
* data model.
*/
public enum Type {
/** Content node */
CNT,
/** Directory node */
DIR,
/** Origin node */
ORI,
/** Release node */
REL,
/** Revision node */
REV,
/** Snapshot node */
SNP;
/**
* Converts integer to corresponding SWH node type.
*
* @param intType node type represented as an integer
* @return the corresponding {@link Node.Type} value
* @see org.softwareheritage.graph.Node.Type
*/
public static Node.Type fromInt(int intType) {
switch (intType) {
case 0:
return CNT;
case 1:
return DIR;
case 2:
return ORI;
case 3:
return REL;
case 4:
return REV;
case 5:
return SNP;
}
return null;
}
/**
* Converts node types to the corresponding int value
*
* @param type node type as an enum
* @return the corresponding int value
*/
public static int toInt(Node.Type type) {
switch (type) {
case CNT:
return 0;
case DIR:
return 1;
case ORI:
return 2;
case REL:
return 3;
case REV:
return 4;
case SNP:
return 5;
}
throw new IllegalArgumentException("Unknown node type: " + type);
}
/**
* Converts string to corresponding SWH node type.
*
* @param strType node type represented as a string
* @return the corresponding {@link Node.Type} value
* @see org.softwareheritage.graph.Node.Type
*/
public static Node.Type fromStr(String strType) {
if (!strType.matches("cnt|dir|ori|rel|rev|snp")) {
throw new IllegalArgumentException("Unknown node type: " + strType);
}
return Node.Type.valueOf(strType.toUpperCase());
}
/**
- * Parses SWH node type possible values from formatted string (see the API
- * syntax).
+ * Parses SWH node type possible values from formatted string (see the
+ * API syntax).
*
* @param strFmtType node types represented as a formatted string
* @return a list containing the {@link Node.Type} values
* @see org.softwareheritage.graph.Node.Type
*/
public static ArrayList parse(String strFmtType) {
ArrayList types = new ArrayList<>();
if (strFmtType.equals("*")) {
List nodeTypes = Arrays.asList(Node.Type.values());
types.addAll(nodeTypes);
} else {
types.add(Node.Type.fromStr(strFmtType));
}
return types;
}
}
}
diff --git a/java/src/main/java/org/softwareheritage/graph/SWHID.java b/java/src/main/java/org/softwareheritage/graph/SWHID.java
index 6b810cc..16aff83 100644
--- a/java/src/main/java/org/softwareheritage/graph/SWHID.java
+++ b/java/src/main/java/org/softwareheritage/graph/SWHID.java
@@ -1,124 +1,118 @@
package org.softwareheritage.graph;
import com.fasterxml.jackson.annotation.JsonValue;
import org.apache.commons.codec.DecoderException;
import org.apache.commons.codec.binary.Hex;
/**
- * A Software Heritage persistent identifier (SWHID), see persistent
+ * A Software Heritage persistent identifier (SWHID), see persistent
* identifier documentation.
*
* @author The Software Heritage developers
*/
public class SWHID {
/** Fixed hash length of the SWHID */
public static final int HASH_LENGTH = 40;
/** Full SWHID as a string */
String swhid;
/** SWHID node type */
Node.Type type;
/**
* Constructor.
*
* @param swhid full SWHID as a string
*/
public SWHID(String swhid) {
this.swhid = swhid;
// SWHID format: 'swh:1:type:hash'
String[] parts = swhid.split(":");
if (parts.length != 4 || !parts[0].equals("swh") || !parts[1].equals("1")) {
throw new IllegalArgumentException("malformed SWHID: " + swhid);
}
this.type = Node.Type.fromStr(parts[2]);
if (!parts[3].matches("[0-9a-f]{" + HASH_LENGTH + "}")) {
throw new IllegalArgumentException("malformed SWHID: " + swhid);
}
}
/**
* Creates a SWHID from a compact binary representation.
*
- * The binary format is specified in the Python module
- * swh.graph.swhid:str_to_bytes .
+ * The binary format is specified in the Python module swh.graph.swhid:str_to_bytes .
*/
public static SWHID fromBytes(byte[] input) {
byte[] digest = new byte[20];
System.arraycopy(input, 2, digest, 0, digest.length);
- String swhidStr = String.format(
- "swh:%d:%s:%s",
- input[0],
- Node.Type.fromInt(input[1]).toString().toLowerCase(),
- Hex.encodeHexString(digest)
- );
+ String swhidStr = String.format("swh:%d:%s:%s", input[0], Node.Type.fromInt(input[1]).toString().toLowerCase(),
+ Hex.encodeHexString(digest));
return new SWHID(swhidStr);
}
@Override
public boolean equals(Object otherObj) {
if (otherObj == this)
return true;
if (!(otherObj instanceof SWHID))
return false;
SWHID other = (SWHID) otherObj;
return swhid.equals(other.getSWHID());
}
@Override
public int hashCode() {
return swhid.hashCode();
}
@Override
public String toString() {
return swhid;
}
/**
* Converts SWHID to a compact binary representation.
*
- * The binary format is specified in the Python module
- * swh.graph.swhid:str_to_bytes .
+ * The binary format is specified in the Python module swh.graph.swhid:str_to_bytes .
*/
public byte[] toBytes() {
byte[] bytes = new byte[22];
byte[] digest;
- bytes[0] = (byte) 1; // namespace version
- bytes[1] = (byte) Node.Type.toInt(this.type); // SWHID type
+ bytes[0] = (byte) 1; // namespace version
+ bytes[1] = (byte) Node.Type.toInt(this.type); // SWHID type
try {
- digest = Hex.decodeHex(this.swhid.substring(10)); // SHA1 hash
+ digest = Hex.decodeHex(this.swhid.substring(10)); // SHA1 hash
System.arraycopy(digest, 0, bytes, 2, digest.length);
} catch (DecoderException e) {
throw new IllegalArgumentException("invalid hex sequence in SWHID: " + this.swhid);
}
return bytes;
}
/**
* Returns full SWHID as a string.
*
* @return full SWHID string
*/
@JsonValue
public String getSWHID() {
return swhid;
}
/**
* Returns SWHID node type.
*
* @return SWHID corresponding {@link Node.Type}
* @see org.softwareheritage.graph.Node.Type
*/
public Node.Type getType() {
return type;
}
}
diff --git a/java/src/main/java/org/softwareheritage/graph/Stats.java b/java/src/main/java/org/softwareheritage/graph/Stats.java
index 92d4fd8..1c1cb0f 100644
--- a/java/src/main/java/org/softwareheritage/graph/Stats.java
+++ b/java/src/main/java/org/softwareheritage/graph/Stats.java
@@ -1,67 +1,67 @@
package org.softwareheritage.graph;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.Properties;
/**
* Statistics on the compressed graph.
*
- * These statistics are not computed but directly read from WebGraph generated .stats and .properties files.
+ * These statistics are not computed but directly read from
+ * WebGraph generated .stats and .properties files.
*
* @author The Software Heritage developers
*/
public class Stats {
public Counts counts;
public Ratios ratios;
public Degree indegree;
public Degree outdegree;
/**
* Constructor.
*
* @param graphPath path and basename of compressed graph
*/
public Stats(String graphPath) throws IOException {
Properties properties = new Properties();
properties.load(new FileInputStream(graphPath + ".properties"));
properties.load(new FileInputStream(graphPath + ".stats"));
this.counts = new Counts();
this.ratios = new Ratios();
this.indegree = new Degree();
this.outdegree = new Degree();
this.counts.nodes = Long.parseLong(properties.getProperty("nodes"));
this.counts.edges = Long.parseLong(properties.getProperty("arcs"));
this.ratios.compression = Double.parseDouble(properties.getProperty("compratio"));
this.ratios.bitsPerNode = Double.parseDouble(properties.getProperty("bitspernode"));
this.ratios.bitsPerEdge = Double.parseDouble(properties.getProperty("bitsperlink"));
this.ratios.avgLocality = Double.parseDouble(properties.getProperty("avglocality"));
this.indegree.min = Long.parseLong(properties.getProperty("minindegree"));
this.indegree.max = Long.parseLong(properties.getProperty("maxindegree"));
this.indegree.avg = Double.parseDouble(properties.getProperty("avgindegree"));
this.outdegree.min = Long.parseLong(properties.getProperty("minoutdegree"));
this.outdegree.max = Long.parseLong(properties.getProperty("maxoutdegree"));
this.outdegree.avg = Double.parseDouble(properties.getProperty("avgoutdegree"));
}
public static class Counts {
public long nodes;
public long edges;
}
public static class Ratios {
public double compression;
public double bitsPerNode;
public double bitsPerEdge;
public double avgLocality;
}
public static class Degree {
public long min;
public long max;
public double avg;
}
}
diff --git a/java/src/main/java/org/softwareheritage/graph/Traversal.java b/java/src/main/java/org/softwareheritage/graph/Traversal.java
index f83d1fb..ac50aef 100644
--- a/java/src/main/java/org/softwareheritage/graph/Traversal.java
+++ b/java/src/main/java/org/softwareheritage/graph/Traversal.java
@@ -1,525 +1,516 @@
package org.softwareheritage.graph;
import it.unimi.dsi.big.webgraph.LazyLongIterator;
import org.softwareheritage.graph.server.Endpoint;
import java.util.*;
import java.util.function.Consumer;
import java.util.function.LongConsumer;
/**
* Traversal algorithms on the compressed graph.
*
* Internal implementation of the traversal API endpoints. These methods only input/output internal
* long ids, which are converted in the {@link Endpoint} higher-level class to {@link SWHID}.
*
* @author The Software Heritage developers
* @see Endpoint
*/
public class Traversal {
/** Graph used in the traversal */
Graph graph;
/** Graph edge restriction */
AllowedEdges edges;
/** Hash set storing if we have visited a node */
HashSet visited;
/** Hash map storing parent node id for each nodes during a traversal */
Map parentNode;
/** Number of edges accessed during traversal */
long nbEdgesAccessed;
/** random number generator, for random walks */
Random rng;
/**
* Constructor.
*
* @param graph graph used in the traversal
* @param direction a string (either "forward" or "backward") specifying edge orientation
- * @param edgesFmt a formatted string describing allowed edges
+ * @param edgesFmt a formatted string describing allowed
+ * edges
*/
public Traversal(Graph graph, String direction, String edgesFmt) {
if (!direction.matches("forward|backward")) {
throw new IllegalArgumentException("Unknown traversal direction: " + direction);
}
if (direction.equals("backward")) {
this.graph = graph.transpose();
} else {
this.graph = graph;
}
this.edges = new AllowedEdges(edgesFmt);
this.visited = new HashSet<>();
this.parentNode = new HashMap<>();
this.nbEdgesAccessed = 0;
this.rng = new Random();
}
/**
* Returns number of accessed edges during traversal.
*
* @return number of edges accessed in last traversal
*/
public long getNbEdgesAccessed() {
return nbEdgesAccessed;
}
/**
* Returns number of accessed nodes during traversal.
*
* @return number of nodes accessed in last traversal
*/
public long getNbNodesAccessed() {
return this.visited.size();
}
/**
* Push version of {@link #leaves} will fire passed callback for each leaf.
*/
public void leavesVisitor(long srcNodeId, NodeIdConsumer cb) {
Stack stack = new Stack<>();
this.nbEdgesAccessed = 0;
stack.push(srcNodeId);
visited.add(srcNodeId);
while (!stack.isEmpty()) {
long currentNodeId = stack.pop();
long neighborsCnt = 0;
nbEdgesAccessed += graph.outdegree(currentNodeId);
LazyLongIterator it = graph.successors(currentNodeId, edges);
- for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1; ) {
+ for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) {
neighborsCnt++;
if (!visited.contains(neighborNodeId)) {
stack.push(neighborNodeId);
visited.add(neighborNodeId);
}
}
if (neighborsCnt == 0) {
cb.accept(currentNodeId);
}
}
}
/**
* Returns the leaves of a subgraph rooted at the specified source node.
*
* @param srcNodeId source node
* @return list of node ids corresponding to the leaves
*/
public ArrayList leaves(long srcNodeId) {
ArrayList nodeIds = new ArrayList<>();
leavesVisitor(srcNodeId, nodeIds::add);
return nodeIds;
}
/**
- * Push version of {@link #neighbors}: will fire passed callback on each
- * neighbor.
+ * Push version of {@link #neighbors}: will fire passed callback on each neighbor.
*/
public void neighborsVisitor(long srcNodeId, NodeIdConsumer cb) {
this.nbEdgesAccessed = graph.outdegree(srcNodeId);
LazyLongIterator it = graph.successors(srcNodeId, edges);
- for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1; ) {
+ for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) {
cb.accept(neighborNodeId);
}
}
/**
* Returns node direct neighbors (linked with exactly one edge).
*
* @param srcNodeId source node
* @return list of node ids corresponding to the neighbors
*/
public ArrayList neighbors(long srcNodeId) {
ArrayList nodeIds = new ArrayList<>();
neighborsVisitor(srcNodeId, nodeIds::add);
return nodeIds;
}
/**
- * Push version of {@link #visitNodes}: will fire passed callback on each
- * visited node.
+ * Push version of {@link #visitNodes}: will fire passed callback on each visited node.
*/
public void visitNodesVisitor(long srcNodeId, NodeIdConsumer nodeCb, EdgeIdConsumer edgeCb) {
Stack stack = new Stack<>();
this.nbEdgesAccessed = 0;
stack.push(srcNodeId);
visited.add(srcNodeId);
while (!stack.isEmpty()) {
long currentNodeId = stack.pop();
if (nodeCb != null) {
nodeCb.accept(currentNodeId);
}
nbEdgesAccessed += graph.outdegree(currentNodeId);
LazyLongIterator it = graph.successors(currentNodeId, edges);
- for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1; ) {
+ for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) {
if (edgeCb != null) {
edgeCb.accept(currentNodeId, neighborNodeId);
}
if (!visited.contains(neighborNodeId)) {
stack.push(neighborNodeId);
visited.add(neighborNodeId);
}
}
}
}
/** One-argument version to handle callbacks properly */
public void visitNodesVisitor(long srcNodeId, NodeIdConsumer cb) {
visitNodesVisitor(srcNodeId, cb, null);
}
/**
* Performs a graph traversal and returns explored nodes.
*
* @param srcNodeId source node
* @return list of explored node ids
*/
public ArrayList visitNodes(long srcNodeId) {
ArrayList nodeIds = new ArrayList<>();
visitNodesVisitor(srcNodeId, nodeIds::add);
return nodeIds;
}
/**
- * Push version of {@link #visitPaths}: will fire passed callback on each
- * discovered (complete) path.
+ * Push version of {@link #visitPaths}: will fire passed callback on each discovered (complete)
+ * path.
*/
public void visitPathsVisitor(long srcNodeId, PathConsumer cb) {
Stack currentPath = new Stack<>();
this.nbEdgesAccessed = 0;
visitPathsInternalVisitor(srcNodeId, currentPath, cb);
}
/**
* Performs a graph traversal and returns explored paths.
*
* @param srcNodeId source node
* @return list of explored paths (represented as a list of node ids)
*/
public ArrayList> visitPaths(long srcNodeId) {
ArrayList> paths = new ArrayList<>();
visitPathsVisitor(srcNodeId, paths::add);
return paths;
}
- private void visitPathsInternalVisitor(long currentNodeId,
- Stack currentPath,
- PathConsumer cb) {
+ private void visitPathsInternalVisitor(long currentNodeId, Stack currentPath, PathConsumer cb) {
currentPath.push(currentNodeId);
long visitedNeighbors = 0;
nbEdgesAccessed += graph.outdegree(currentNodeId);
LazyLongIterator it = graph.successors(currentNodeId, edges);
- for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1; ) {
+ for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) {
visitPathsInternalVisitor(neighborNodeId, currentPath, cb);
visitedNeighbors++;
}
if (visitedNeighbors == 0) {
ArrayList path = new ArrayList<>(currentPath);
cb.accept(path);
}
currentPath.pop();
}
/**
- * Performs a graph traversal with backtracking, and returns the first
- * found path from source to destination.
+ * Performs a graph traversal with backtracking, and returns the first found path from source to
+ * destination.
*
* @param srcNodeId source node
* @param dst destination (either a node or a node type)
* @return found path as a list of node ids
*/
public ArrayList walk(long srcNodeId, T dst, String visitOrder) {
long dstNodeId;
if (visitOrder.equals("dfs")) {
dstNodeId = walkInternalDFS(srcNodeId, dst);
} else if (visitOrder.equals("bfs")) {
dstNodeId = walkInternalBFS(srcNodeId, dst);
} else {
throw new IllegalArgumentException("Unknown visit order: " + visitOrder);
}
if (dstNodeId == -1) {
throw new IllegalArgumentException("Cannot find destination: " + dst);
}
return backtracking(srcNodeId, dstNodeId);
}
/**
- * Performs a random walk (picking a random successor at each step) from
- * source to destination.
+ * Performs a random walk (picking a random successor at each step) from source to destination.
*
* @param srcNodeId source node
* @param dst destination (either a node or a node type)
- * @return found path as a list of node ids or an empty path to indicate
- * that no suitable path have been found
+ * @return found path as a list of node ids or an empty path to indicate that no suitable path have
+ * been found
*/
public ArrayList randomWalk(long srcNodeId, T dst) {
return randomWalk(srcNodeId, dst, 0);
}
/**
- * Performs a stubborn random walk (picking a random successor at each
- * step) from source to destination. The walk is "stubborn" in the sense
- * that it will not give up the first time if a satisfying target node is
- * found, but it will retry up to a limited amount of times.
+ * Performs a stubborn random walk (picking a random successor at each step) from source to
+ * destination. The walk is "stubborn" in the sense that it will not give up the first time if a
+ * satisfying target node is found, but it will retry up to a limited amount of times.
*
* @param srcNodeId source node
* @param dst destination (either a node or a node type)
* @param retries number of times to retry; 0 means no retries (single walk)
- * @return found path as a list of node ids or an empty path to indicate
- * that no suitable path have been found
+ * @return found path as a list of node ids or an empty path to indicate that no suitable path have
+ * been found
*/
public ArrayList randomWalk(long srcNodeId, T dst, int retries) {
long curNodeId = srcNodeId;
ArrayList path = new ArrayList<>();
this.nbEdgesAccessed = 0;
boolean found;
if (retries < 0) {
throw new IllegalArgumentException("Negative number of retries given: " + retries);
}
while (true) {
path.add(curNodeId);
LazyLongIterator successors = graph.successors(curNodeId, edges);
curNodeId = randomPick(successors);
if (curNodeId < 0) {
found = false;
break;
}
if (isDstNode(curNodeId, dst)) {
path.add(curNodeId);
found = true;
break;
}
}
if (found) {
return path;
- } else if (retries > 0) { // try again
+ } else if (retries > 0) { // try again
return randomWalk(srcNodeId, dst, retries - 1);
- } else { // not found and no retries left
+ } else { // not found and no retries left
path.clear();
return path;
}
}
/**
- * Randomly choose an element from an iterator over Longs using reservoir
- * sampling
+ * Randomly choose an element from an iterator over Longs using reservoir sampling
*
* @param elements iterator over selection domain
* @return randomly chosen element or -1 if no suitable element was found
*/
private long randomPick(LazyLongIterator elements) {
long curPick = -1;
long seenCandidates = 0;
- for (long element; (element = elements.nextLong()) != -1; ) {
+ for (long element; (element = elements.nextLong()) != -1;) {
seenCandidates++;
if (Math.round(rng.nextFloat() * (seenCandidates - 1)) == 0) {
curPick = element;
}
}
return curPick;
}
/**
* Internal DFS function of {@link #walk}.
*
* @param srcNodeId source node
* @param dst destination (either a node or a node type)
* @return final destination node or -1 if no path found
*/
private long walkInternalDFS(long srcNodeId, T dst) {
Stack stack = new Stack<>();
this.nbEdgesAccessed = 0;
stack.push(srcNodeId);
visited.add(srcNodeId);
while (!stack.isEmpty()) {
long currentNodeId = stack.pop();
if (isDstNode(currentNodeId, dst)) {
return currentNodeId;
}
nbEdgesAccessed += graph.outdegree(currentNodeId);
LazyLongIterator it = graph.successors(currentNodeId, edges);
- for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1; ) {
+ for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) {
if (!visited.contains(neighborNodeId)) {
stack.push(neighborNodeId);
visited.add(neighborNodeId);
parentNode.put(neighborNodeId, currentNodeId);
}
}
}
return -1;
}
/**
* Internal BFS function of {@link #walk}.
*
* @param srcNodeId source node
* @param dst destination (either a node or a node type)
* @return final destination node or -1 if no path found
*/
private long walkInternalBFS(long srcNodeId, T dst) {
Queue queue = new LinkedList<>();
this.nbEdgesAccessed = 0;
queue.add(srcNodeId);
visited.add(srcNodeId);
while (!queue.isEmpty()) {
long currentNodeId = queue.poll();
if (isDstNode(currentNodeId, dst)) {
return currentNodeId;
}
nbEdgesAccessed += graph.outdegree(currentNodeId);
LazyLongIterator it = graph.successors(currentNodeId, edges);
- for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1; ) {
+ for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) {
if (!visited.contains(neighborNodeId)) {
queue.add(neighborNodeId);
visited.add(neighborNodeId);
parentNode.put(neighborNodeId, currentNodeId);
}
}
}
return -1;
}
/**
* Internal function of {@link #walk} to check if a node corresponds to the destination.
*
* @param nodeId current node
* @param dst destination (either a node or a node type)
* @return true if the node is a destination, or false otherwise
*/
private boolean isDstNode(long nodeId, T dst) {
if (dst instanceof Long) {
long dstNodeId = (Long) dst;
return nodeId == dstNodeId;
} else if (dst instanceof Node.Type) {
Node.Type dstType = (Node.Type) dst;
return graph.getNodeType(nodeId) == dstType;
} else {
return false;
}
}
/**
* Internal backtracking function of {@link #walk}.
*
* @param srcNodeId source node
* @param dstNodeId destination node
* @return the found path, as a list of node ids
*/
private ArrayList backtracking(long srcNodeId, long dstNodeId) {
ArrayList path = new ArrayList<>();
long currentNodeId = dstNodeId;
while (currentNodeId != srcNodeId) {
path.add(currentNodeId);
currentNodeId = parentNode.get(currentNodeId);
}
path.add(srcNodeId);
Collections.reverse(path);
return path;
}
/**
* Find a common descendant between two given nodes using two parallel BFS
*
* @param lhsNode the first node
* @param rhsNode the second node
* @return the found path, as a list of node ids
*/
public Long findCommonDescendant(long lhsNode, long rhsNode) {
Queue lhsStack = new ArrayDeque<>();
Queue rhsStack = new ArrayDeque<>();
HashSet lhsVisited = new HashSet<>();
HashSet rhsVisited = new HashSet<>();
lhsStack.add(lhsNode);
rhsStack.add(rhsNode);
lhsVisited.add(lhsNode);
rhsVisited.add(rhsNode);
this.nbEdgesAccessed = 0;
Long curNode;
while (!lhsStack.isEmpty() || !rhsStack.isEmpty()) {
if (!lhsStack.isEmpty()) {
curNode = lhsStack.poll();
nbEdgesAccessed += graph.outdegree(curNode);
LazyLongIterator it = graph.successors(curNode, edges);
- for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1; ) {
+ for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) {
if (!lhsVisited.contains(neighborNodeId)) {
if (rhsVisited.contains(neighborNodeId))
return neighborNodeId;
lhsStack.add(neighborNodeId);
lhsVisited.add(neighborNodeId);
}
}
}
if (!rhsStack.isEmpty()) {
curNode = rhsStack.poll();
nbEdgesAccessed += graph.outdegree(curNode);
LazyLongIterator it = graph.successors(curNode, edges);
- for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1; ) {
+ for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) {
if (!rhsVisited.contains(neighborNodeId)) {
if (lhsVisited.contains(neighborNodeId))
return neighborNodeId;
rhsStack.add(neighborNodeId);
rhsVisited.add(neighborNodeId);
}
}
}
}
return null;
}
public interface NodeIdConsumer extends LongConsumer {
/**
- * Callback for incrementally receiving node identifiers during a graph
- * visit.
+ * Callback for incrementally receiving node identifiers during a graph visit.
*/
void accept(long nodeId);
}
public interface EdgeIdConsumer {
/**
- * Callback for incrementally receiving edge identifiers during a graph
- * visit.
+ * Callback for incrementally receiving edge identifiers during a graph visit.
*/
void accept(long srcId, long dstId);
}
public interface PathConsumer extends Consumer> {
/**
- * Callback for incrementally receiving node paths (made of node
- * identifiers) during a graph visit.
+ * Callback for incrementally receiving node paths (made of node identifiers) during a graph visit.
*/
void accept(ArrayList path);
}
}
diff --git a/java/src/main/java/org/softwareheritage/graph/benchmark/BFS.java b/java/src/main/java/org/softwareheritage/graph/benchmark/BFS.java
index 44f9b92..883264d 100644
--- a/java/src/main/java/org/softwareheritage/graph/benchmark/BFS.java
+++ b/java/src/main/java/org/softwareheritage/graph/benchmark/BFS.java
@@ -1,111 +1,107 @@
package org.softwareheritage.graph.benchmark;
import com.google.common.primitives.Longs;
import com.martiansoftware.jsap.*;
import it.unimi.dsi.big.webgraph.ImmutableGraph;
import it.unimi.dsi.big.webgraph.LazyLongIterator;
import it.unimi.dsi.bits.LongArrayBitVector;
import it.unimi.dsi.fastutil.Arrays;
import it.unimi.dsi.io.ByteDiskQueue;
import it.unimi.dsi.logging.ProgressLogger;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.softwareheritage.graph.Graph;
import java.io.File;
import java.io.IOException;
-
public class BFS {
private final static Logger LOGGER = LoggerFactory.getLogger(BFS.class);
private final ImmutableGraph graph;
public BFS(ImmutableGraph graph) {
this.graph = graph;
}
private static JSAPResult parse_args(String[] args) {
JSAPResult config = null;
try {
- SimpleJSAP jsap = new SimpleJSAP(
- BFS.class.getName(),
- "",
+ SimpleJSAP jsap = new SimpleJSAP(BFS.class.getName(), "",
new Parameter[]{
- new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED,
- 'g', "graph", "Basename of the compressed graph"),
+ new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'g',
+ "graph", "Basename of the compressed graph"),
- new FlaggedOption("useTransposed", JSAP.BOOLEAN_PARSER, "false", JSAP.NOT_REQUIRED,
- 'T', "transposed", "Use transposed graph (default: false)"),
- }
- );
+ new FlaggedOption("useTransposed", JSAP.BOOLEAN_PARSER, "false", JSAP.NOT_REQUIRED, 'T',
+ "transposed", "Use transposed graph (default: false)"),});
config = jsap.parse(args);
if (jsap.messagePrinted()) {
System.exit(1);
}
} catch (JSAPException e) {
e.printStackTrace();
}
return config;
}
public static void main(String[] args) throws IOException {
JSAPResult config = parse_args(args);
String graphPath = config.getString("graphPath");
boolean useTransposed = config.getBoolean("useTransposed");
System.err.println("Loading graph " + graphPath + " ...");
Graph graph = new Graph(graphPath);
System.err.println("Graph loaded.");
if (useTransposed)
graph = graph.transpose();
BFS bfs = new BFS(graph);
bfs.bfsperm();
}
// Partly inlined from it.unimi.dsi.law.big.graph.BFS
private void bfsperm() throws IOException {
final long n = graph.numNodes();
// Allow enough memory to behave like in-memory queue
int bufferSize = (int) Math.min(Arrays.MAX_ARRAY_SIZE & ~0x7, 8L * n);
// Use a disk based queue to store BFS frontier
final File queueFile = File.createTempFile(BFS.class.getSimpleName(), "queue");
final ByteDiskQueue queue = ByteDiskQueue.createNew(queueFile, bufferSize, true);
final byte[] byteBuf = new byte[Long.BYTES];
// WARNING: no 64-bit version of this data-structure, but it can support
// indices up to 2^37
final LongArrayBitVector visited = LongArrayBitVector.ofLength(n);
final ProgressLogger pl = new ProgressLogger(LOGGER);
pl.expectedUpdates = n;
pl.itemsName = "nodes";
pl.start("Starting breadth-first visit...");
for (long i = 0; i < n; i++) {
- if (visited.getBoolean(i)) continue;
+ if (visited.getBoolean(i))
+ continue;
queue.enqueue(Longs.toByteArray(i));
visited.set(i);
while (!queue.isEmpty()) {
queue.dequeue(byteBuf);
final long currentNode = Longs.fromByteArray(byteBuf);
final LazyLongIterator iterator = graph.successors(currentNode);
long succ;
while ((succ = iterator.nextLong()) != -1) {
if (!visited.getBoolean(succ)) {
visited.set(succ);
queue.enqueue(Longs.toByteArray(succ));
}
}
pl.update();
}
}
pl.done();
queue.close();
}
}
diff --git a/java/src/main/java/org/softwareheritage/graph/benchmark/Benchmark.java b/java/src/main/java/org/softwareheritage/graph/benchmark/Benchmark.java
index 7c3cef7..98dd854 100644
--- a/java/src/main/java/org/softwareheritage/graph/benchmark/Benchmark.java
+++ b/java/src/main/java/org/softwareheritage/graph/benchmark/Benchmark.java
@@ -1,162 +1,154 @@
package org.softwareheritage.graph.benchmark;
import com.martiansoftware.jsap.*;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.SWHID;
import org.softwareheritage.graph.benchmark.utils.Random;
import org.softwareheritage.graph.benchmark.utils.Statistics;
import org.softwareheritage.graph.server.Endpoint;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.io.Writer;
import java.util.ArrayList;
import java.util.StringJoiner;
import java.util.function.Function;
/**
* Benchmark common utility functions.
*
* @author The Software Heritage developers
*/
public class Benchmark {
/** CSV separator for log file */
final String CSV_SEPARATOR = ";";
/** Command line arguments */
public Args args;
/**
* Constructor.
*/
public Benchmark() {
this.args = new Args();
}
/**
* Parses benchmark command line arguments.
*
* @param args command line arguments
*/
public void parseCommandLineArgs(String[] args) throws JSAPException {
SimpleJSAP jsap = new SimpleJSAP(Benchmark.class.getName(),
"Benchmark tool for Software Heritage use-cases scenarios.",
new Parameter[]{
new UnflaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED,
JSAP.NOT_GREEDY, "The basename of the compressed graph."),
new FlaggedOption("nbNodes", JSAP.INTEGER_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'n',
"nb-nodes", "Number of random nodes used to do the benchmark."),
new FlaggedOption("logFile", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'l',
"log-file", "File name to output CSV format benchmark log."),
- new FlaggedOption("seed", JSAP.LONG_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 's',
- "seed", "Random generator seed."),
- });
+ new FlaggedOption("seed", JSAP.LONG_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 's', "seed",
+ "Random generator seed."),});
JSAPResult config = jsap.parse(args);
if (jsap.messagePrinted()) {
System.exit(1);
}
this.args.graphPath = config.getString("graphPath");
this.args.nbNodes = config.getInt("nbNodes");
this.args.logFile = config.getString("logFile");
this.args.random = config.contains("seed") ? new Random(config.getLong("seed")) : new Random();
}
/**
* Creates CSV file for log output.
*/
public void createCSVLogFile() throws IOException {
try (Writer csvLog = new BufferedWriter(new FileWriter(args.logFile))) {
StringJoiner csvHeader = new StringJoiner(CSV_SEPARATOR);
- csvHeader.add("use case name")
- .add("SWHID")
- .add("number of edges accessed")
- .add("traversal timing")
- .add("swhid2node timing")
- .add("node2swhid timing");
+ csvHeader.add("use case name").add("SWHID").add("number of edges accessed").add("traversal timing")
+ .add("swhid2node timing").add("node2swhid timing");
csvLog.write(csvHeader.toString() + "\n");
}
}
/**
* Times a specific endpoint and outputs individual datapoints along with aggregated statistics.
*
* @param useCaseName benchmark use-case name
* @param graph compressed graph used in the benchmark
* @param nodeIds node ids to use as starting point for the endpoint traversal
* @param operation endpoint function to benchmark
- * @param dstFmt destination formatted string as described in the API
+ * @param dstFmt destination formatted string as described in the
+ * API
* @param algorithm traversal algorithm used in endpoint call (either "dfs" or "bfs")
*/
public void timeEndpoint(String useCaseName, Graph graph, long[] nodeIds,
- Function operation, String dstFmt, String algorithm)
- throws IOException {
+ Function operation, String dstFmt, String algorithm) throws IOException {
ArrayList timings = new ArrayList<>();
ArrayList timingsNormalized = new ArrayList<>();
ArrayList nbEdgesAccessed = new ArrayList<>();
final boolean append = true;
try (Writer csvLog = new BufferedWriter(new FileWriter(args.logFile, append))) {
for (long nodeId : nodeIds) {
SWHID swhid = graph.getSWHID(nodeId);
Endpoint.Output output = (dstFmt == null)
? operation.apply(new Endpoint.Input(swhid))
: operation.apply(new Endpoint.Input(swhid, dstFmt, algorithm));
StringJoiner csvLine = new StringJoiner(CSV_SEPARATOR);
- csvLine.add(useCaseName)
- .add(swhid.toString())
- .add(Long.toString(output.meta.nbEdgesAccessed))
+ csvLine.add(useCaseName).add(swhid.toString()).add(Long.toString(output.meta.nbEdgesAccessed))
.add(Double.toString(output.meta.timings.traversal))
.add(Double.toString(output.meta.timings.swhid2node))
.add(Double.toString(output.meta.timings.node2swhid));
csvLog.write(csvLine.toString() + "\n");
timings.add(output.meta.timings.traversal);
nbEdgesAccessed.add((double) output.meta.nbEdgesAccessed);
if (output.meta.nbEdgesAccessed != 0) {
timingsNormalized.add(output.meta.timings.traversal / output.meta.nbEdgesAccessed);
}
}
}
System.out.println("\n" + useCaseName + " use-case:");
System.out.println("timings:");
Statistics stats = new Statistics(timings);
stats.printAll();
System.out.println("timings normalized:");
Statistics statsNormalized = new Statistics(timingsNormalized);
statsNormalized.printAll();
System.out.println("nb edges accessed:");
Statistics statsNbEdgesAccessed = new Statistics(nbEdgesAccessed);
statsNbEdgesAccessed.printAll();
}
/**
* Same as {@link #timeEndpoint} but without destination or algorithm specified to endpoint call.
*/
public void timeEndpoint(String useCaseName, Graph graph, long[] nodeIds,
- Function operation) throws IOException {
+ Function operation) throws IOException {
timeEndpoint(useCaseName, graph, nodeIds, operation, null, null);
}
/**
* Input arguments.
*/
public class Args {
/** Basename of the compressed graph */
public String graphPath;
/** Number of random nodes to use for the benchmark */
public int nbNodes;
/** File name for CSV format benchmark log */
public String logFile;
/** Random generator */
public Random random;
}
}
diff --git a/java/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java b/java/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java
index 910ca76..1bfc2a7 100644
--- a/java/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java
+++ b/java/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java
@@ -1,44 +1,42 @@
package org.softwareheritage.graph.benchmark;
import com.martiansoftware.jsap.JSAPException;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.Node;
import org.softwareheritage.graph.server.Endpoint;
import java.io.IOException;
/**
- * Benchmark Software Heritage browsing
+ * Benchmark Software Heritage
+ * browsing
* use-cases scenarios.
*
* @author The Software Heritage developers
*/
public class Browsing {
/**
* Main entrypoint.
*
* @param args command line arguments
*/
public static void main(String[] args) throws IOException, JSAPException {
Benchmark bench = new Benchmark();
bench.parseCommandLineArgs(args);
Graph graph = new Graph(bench.args.graphPath);
- long[] dirNodeIds =
- bench.args.random.generateNodeIdsOfType(graph, bench.args.nbNodes, Node.Type.DIR);
- long[] revNodeIds =
- bench.args.random.generateNodeIdsOfType(graph, bench.args.nbNodes, Node.Type.REV);
+ long[] dirNodeIds = bench.args.random.generateNodeIdsOfType(graph, bench.args.nbNodes, Node.Type.DIR);
+ long[] revNodeIds = bench.args.random.generateNodeIdsOfType(graph, bench.args.nbNodes, Node.Type.REV);
Endpoint dirEndpoint = new Endpoint(graph, "forward", "dir:cnt,dir:dir");
Endpoint revEndpoint = new Endpoint(graph, "forward", "rev:rev");
System.out.println("Used " + bench.args.nbNodes + " random nodes (results are in seconds):");
bench.createCSVLogFile();
bench.timeEndpoint("ls", graph, dirNodeIds, dirEndpoint::neighbors);
bench.timeEndpoint("ls -R", graph, dirNodeIds, dirEndpoint::visitPaths);
bench.timeEndpoint("git log", graph, revNodeIds, revEndpoint::visitNodes);
}
}
diff --git a/java/src/main/java/org/softwareheritage/graph/benchmark/Provenance.java b/java/src/main/java/org/softwareheritage/graph/benchmark/Provenance.java
index 324f38e..9ab04c3 100644
--- a/java/src/main/java/org/softwareheritage/graph/benchmark/Provenance.java
+++ b/java/src/main/java/org/softwareheritage/graph/benchmark/Provenance.java
@@ -1,51 +1,45 @@
package org.softwareheritage.graph.benchmark;
import com.martiansoftware.jsap.JSAPException;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.server.Endpoint;
import java.io.IOException;
/**
- * Benchmark Software Heritage provenance
+ * Benchmark Software Heritage
+ * provenance
* use-cases scenarios.
*
* @author The Software Heritage developers
*/
public class Provenance {
/**
* Main entrypoint.
*
* @param args command line arguments
*/
public static void main(String[] args) throws IOException, JSAPException {
Benchmark bench = new Benchmark();
bench.parseCommandLineArgs(args);
Graph graph = new Graph(bench.args.graphPath);
long[] nodeIds = bench.args.random.generateNodeIds(graph, bench.args.nbNodes);
Endpoint commitProvenanceEndpoint = new Endpoint(graph, "backward", "dir:dir,cnt:dir,dir:rev");
Endpoint originProvenanceEndpoint = new Endpoint(graph, "backward", "*");
System.out.println("Used " + bench.args.nbNodes + " random nodes (results are in seconds):");
bench.createCSVLogFile();
- bench.timeEndpoint(
- "commit provenance (dfs)", graph, nodeIds, commitProvenanceEndpoint::walk, "rev", "dfs");
- bench.timeEndpoint(
- "commit provenance (bfs)", graph, nodeIds, commitProvenanceEndpoint::walk, "rev", "bfs");
- bench.timeEndpoint(
- "complete commit provenance", graph, nodeIds, commitProvenanceEndpoint::leaves);
-
- bench.timeEndpoint(
- "origin provenance (dfs)", graph, nodeIds, originProvenanceEndpoint::walk, "ori", "dfs");
- bench.timeEndpoint(
- "origin provenance (bfs)", graph, nodeIds, originProvenanceEndpoint::walk, "ori", "bfs");
- bench.timeEndpoint(
- "complete origin provenance", graph, nodeIds, originProvenanceEndpoint::leaves);
+ bench.timeEndpoint("commit provenance (dfs)", graph, nodeIds, commitProvenanceEndpoint::walk, "rev", "dfs");
+ bench.timeEndpoint("commit provenance (bfs)", graph, nodeIds, commitProvenanceEndpoint::walk, "rev", "bfs");
+ bench.timeEndpoint("complete commit provenance", graph, nodeIds, commitProvenanceEndpoint::leaves);
+
+ bench.timeEndpoint("origin provenance (dfs)", graph, nodeIds, originProvenanceEndpoint::walk, "ori", "dfs");
+ bench.timeEndpoint("origin provenance (bfs)", graph, nodeIds, originProvenanceEndpoint::walk, "ori", "bfs");
+ bench.timeEndpoint("complete origin provenance", graph, nodeIds, originProvenanceEndpoint::leaves);
}
}
diff --git a/java/src/main/java/org/softwareheritage/graph/benchmark/Vault.java b/java/src/main/java/org/softwareheritage/graph/benchmark/Vault.java
index 3a6c6dc..c0cd7ec 100644
--- a/java/src/main/java/org/softwareheritage/graph/benchmark/Vault.java
+++ b/java/src/main/java/org/softwareheritage/graph/benchmark/Vault.java
@@ -1,37 +1,37 @@
package org.softwareheritage.graph.benchmark;
import com.martiansoftware.jsap.JSAPException;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.server.Endpoint;
import java.io.IOException;
/**
- * Benchmark Software Heritage vault
- * use-case scenario.
+ * Benchmark Software Heritage
+ * vault use-case
+ * scenario.
*
* @author The Software Heritage developers
*/
public class Vault {
/**
* Main entrypoint.
*
* @param args command line arguments
*/
public static void main(String[] args) throws IOException, JSAPException {
Benchmark bench = new Benchmark();
bench.parseCommandLineArgs(args);
Graph graph = new Graph(bench.args.graphPath);
long[] nodeIds = bench.args.random.generateNodeIds(graph, bench.args.nbNodes);
Endpoint endpoint = new Endpoint(graph, "forward", "*");
System.out.println("Used " + bench.args.nbNodes + " random nodes (results are in seconds):");
bench.createCSVLogFile();
bench.timeEndpoint("git bundle", graph, nodeIds, endpoint::visitNodes);
}
}
diff --git a/java/src/main/java/org/softwareheritage/graph/experiments/forks/FindCommonAncestor.java b/java/src/main/java/org/softwareheritage/graph/experiments/forks/FindCommonAncestor.java
index e2e68ff..5dab92b 100644
--- a/java/src/main/java/org/softwareheritage/graph/experiments/forks/FindCommonAncestor.java
+++ b/java/src/main/java/org/softwareheritage/graph/experiments/forks/FindCommonAncestor.java
@@ -1,66 +1,62 @@
package org.softwareheritage.graph.experiments.forks;
import com.martiansoftware.jsap.*;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.Traversal;
import java.io.IOException;
import java.util.Scanner;
public class FindCommonAncestor {
private Graph graph;
private void load_graph(String graphBasename) throws IOException {
System.err.println("Loading graph " + graphBasename + " ...");
this.graph = new Graph(graphBasename);
System.err.println("Graph loaded.");
}
private static JSAPResult parse_args(String[] args) {
JSAPResult config = null;
try {
- SimpleJSAP jsap = new SimpleJSAP(
- FindCommonAncestor.class.getName(),
- "",
- new Parameter[] {
- new FlaggedOption("edgesFmt", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED,
- 'e', "edges", "Edges constraints"),
- new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED,
- 'g', "graph", "Basename of the compressed graph"),
- }
- );
+ SimpleJSAP jsap = new SimpleJSAP(FindCommonAncestor.class.getName(), "",
+ new Parameter[]{
+ new FlaggedOption("edgesFmt", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'e',
+ "edges", "Edges constraints"),
+ new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'g',
+ "graph", "Basename of the compressed graph"),});
config = jsap.parse(args);
if (jsap.messagePrinted()) {
System.exit(1);
}
} catch (JSAPException e) {
e.printStackTrace();
}
return config;
}
public static void main(String[] args) {
JSAPResult config = parse_args(args);
String graphPath = config.getString("graphPath");
String edgesFmt = config.getString("edgesFmt");
FindCommonAncestor fca = new FindCommonAncestor();
try {
fca.load_graph(graphPath);
} catch (IOException e) {
System.out.println("Could not load graph: " + e);
System.exit(2);
}
Scanner input = new Scanner(System.in);
while (input.hasNextLong()) {
long lhsNode = input.nextLong();
long rhsNode = input.nextLong();
Traversal t = new Traversal(fca.graph.symmetrize(), "forward", edgesFmt);
System.out.println(t.findCommonDescendant(lhsNode, rhsNode));
}
}
}
diff --git a/java/src/main/java/org/softwareheritage/graph/experiments/forks/FindPath.java b/java/src/main/java/org/softwareheritage/graph/experiments/forks/FindPath.java
index 6839a8f..f916a8e 100644
--- a/java/src/main/java/org/softwareheritage/graph/experiments/forks/FindPath.java
+++ b/java/src/main/java/org/softwareheritage/graph/experiments/forks/FindPath.java
@@ -1,130 +1,123 @@
package org.softwareheritage.graph.experiments.forks;
import com.martiansoftware.jsap.*;
import it.unimi.dsi.big.webgraph.LazyLongIterator;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.Node;
import java.io.IOException;
import java.util.*;
public class FindPath {
private Graph graph;
private Long emptySnapshot;
private void load_graph(String graphBasename) throws IOException {
System.err.println("Loading graph " + graphBasename + " ...");
this.graph = new Graph(graphBasename).symmetrize();
System.err.println("Graph loaded.");
this.emptySnapshot = null;
}
private static JSAPResult parse_args(String[] args) {
JSAPResult config = null;
try {
- SimpleJSAP jsap = new SimpleJSAP(
- FindPath.class.getName(),
- "",
- new Parameter[] {
- new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED,
- 'g', "graph", "Basename of the compressed graph"),
- }
- );
+ SimpleJSAP jsap = new SimpleJSAP(FindPath.class.getName(), "",
+ new Parameter[]{new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED,
+ 'g', "graph", "Basename of the compressed graph"),});
config = jsap.parse(args);
if (jsap.messagePrinted()) {
System.exit(1);
}
} catch (JSAPException e) {
e.printStackTrace();
}
return config;
}
private boolean nodeIsEmptySnapshot(Long node) {
- if (this.emptySnapshot == null
- && this.graph.getNodeType(node) == Node.Type.SNP
+ if (this.emptySnapshot == null && this.graph.getNodeType(node) == Node.Type.SNP
&& this.graph.outdegree(node) == 0) {
System.err.println("Found empty snapshot: " + node);
this.emptySnapshot = node;
}
return node.equals(this.emptySnapshot);
}
- private Boolean shouldVisit(Long node){
+ private Boolean shouldVisit(Long node) {
Node.Type nt = this.graph.getNodeType(node);
- if (nt != Node.Type.REV && nt != Node.Type.REL
- && nt != Node.Type.SNP && nt != Node.Type.ORI) {
+ if (nt != Node.Type.REV && nt != Node.Type.REL && nt != Node.Type.SNP && nt != Node.Type.ORI) {
return false;
}
if (this.nodeIsEmptySnapshot(node))
return false;
return true;
}
private ArrayList findPath(Long src, Long dst) {
HashSet visited = new HashSet<>();
Queue queue = new ArrayDeque<>();
Map parentNode = new HashMap<>();
queue.add(src);
visited.add(src);
while (!queue.isEmpty()) {
long currentNode = queue.poll();
final LazyLongIterator iterator = graph.successors(currentNode);
long succ;
while ((succ = iterator.nextLong()) != -1) {
- if (!shouldVisit(succ) || visited.contains(succ)) continue;
+ if (!shouldVisit(succ) || visited.contains(succ))
+ continue;
visited.add(succ);
queue.add(succ);
parentNode.put(succ, currentNode);
if (succ == dst) {
ArrayList path = new ArrayList<>();
long n = dst;
while (n != src) {
path.add(n);
n = parentNode.get(n);
}
path.add(src);
Collections.reverse(path);
return path;
}
}
}
return null;
}
public static void main(String[] args) {
JSAPResult config = parse_args(args);
String graphPath = config.getString("graphPath");
FindPath fpath = new FindPath();
try {
fpath.load_graph(graphPath);
} catch (IOException e) {
System.out.println("Could not load graph: " + e);
System.exit(2);
}
Scanner input = new Scanner(System.in);
while (input.hasNextLong()) {
long lhsNode = input.nextLong();
long rhsNode = input.nextLong();
ArrayList path = fpath.findPath(lhsNode, rhsNode);
if (path != null) {
for (Long n : path) {
System.out.format("%d ", n);
}
System.out.println();
- }
- else {
+ } else {
System.out.println("null");
}
}
}
}
diff --git a/java/src/main/java/org/softwareheritage/graph/experiments/forks/ForkCC.java b/java/src/main/java/org/softwareheritage/graph/experiments/forks/ForkCC.java
index 0b239d1..8dddf1d 100644
--- a/java/src/main/java/org/softwareheritage/graph/experiments/forks/ForkCC.java
+++ b/java/src/main/java/org/softwareheritage/graph/experiments/forks/ForkCC.java
@@ -1,253 +1,249 @@
package org.softwareheritage.graph.experiments.forks;
import com.google.common.primitives.Longs;
import com.martiansoftware.jsap.*;
import it.unimi.dsi.big.webgraph.LazyLongIterator;
import it.unimi.dsi.bits.LongArrayBitVector;
import it.unimi.dsi.fastutil.Arrays;
import it.unimi.dsi.io.ByteDiskQueue;
import it.unimi.dsi.logging.ProgressLogger;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.Node;
-import org.softwareheritage.graph.benchmark.BFS;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.*;
public class ForkCC {
public Boolean includeRootDir;
private Graph graph;
private Long emptySnapshot;
private LongArrayBitVector visited;
private LongArrayBitVector whitelist;
private static JSAPResult parse_args(String[] args) {
JSAPResult config = null;
try {
- SimpleJSAP jsap = new SimpleJSAP(
- ForkCC.class.getName(),
- "",
- new Parameter[] {
- new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED,
- 'g', "graph", "Basename of the compressed graph"),
- new FlaggedOption("whitelistPath", JSAP.STRING_PARSER, null, JSAP.NOT_REQUIRED,
- 't', "whitelist", "Whitelist of origins"),
- new FlaggedOption("includeRootDir", JSAP.BOOLEAN_PARSER, "false", JSAP.NOT_REQUIRED,
- 'R', "includerootdir", "Include root directory (default: false)"),
- new FlaggedOption("outdir", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED,
- 'o', "outdir", "Directory where to put the results"),
- }
- );
+ SimpleJSAP jsap = new SimpleJSAP(ForkCC.class.getName(), "",
+ new Parameter[]{
+ new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'g',
+ "graph", "Basename of the compressed graph"),
+ new FlaggedOption("whitelistPath", JSAP.STRING_PARSER, null, JSAP.NOT_REQUIRED, 't',
+ "whitelist", "Whitelist of origins"),
+ new FlaggedOption("includeRootDir", JSAP.BOOLEAN_PARSER, "false", JSAP.NOT_REQUIRED, 'R',
+ "includerootdir", "Include root directory (default: false)"),
+ new FlaggedOption("outdir", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'o',
+ "outdir", "Directory where to put the results"),});
config = jsap.parse(args);
if (jsap.messagePrinted()) {
System.exit(1);
}
} catch (JSAPException e) {
e.printStackTrace();
}
return config;
}
private static void printDistribution(ArrayList> components) {
TreeMap distribution = new TreeMap<>();
for (ArrayList component : components) {
distribution.merge((long) component.size(), 1L, Long::sum);
}
for (Map.Entry entry : distribution.entrySet()) {
System.out.format("%d %d\n", entry.getKey(), entry.getValue());
}
}
private static void printLargestComponent(ArrayList> components) {
int indexLargest = 0;
for (int i = 1; i < components.size(); ++i) {
if (components.get(i).size() > components.get(indexLargest).size())
indexLargest = i;
}
ArrayList component = components.get(indexLargest);
for (Long node : component) {
System.out.println(node);
}
}
private void load_graph(String graphBasename) throws IOException {
System.err.println("Loading graph " + graphBasename + " ...");
this.graph = new Graph(graphBasename).symmetrize();
System.err.println("Graph loaded.");
this.emptySnapshot = null;
this.whitelist = null;
this.visited = null;
this.includeRootDir = null;
}
private boolean nodeIsEmptySnapshot(Long node) {
- if (this.emptySnapshot == null
- && this.graph.getNodeType(node) == Node.Type.SNP
+ if (this.emptySnapshot == null && this.graph.getNodeType(node) == Node.Type.SNP
&& this.graph.outdegree(node) == 0) {
System.err.println("Found empty snapshot: " + node);
this.emptySnapshot = node;
}
return node.equals(this.emptySnapshot);
}
private Boolean shouldVisit(Long node) {
Node.Type nt = this.graph.getNodeType(node);
if (nt == Node.Type.CNT) {
return false;
}
if (nt == Node.Type.DIR && !includeRootDir)
return false;
if (this.nodeIsEmptySnapshot(node))
return false;
if (visited.getBoolean(node))
return false;
return true;
}
private ArrayList> compute(ProgressLogger pl) throws IOException {
final long n = graph.numNodes();
// Allow enough memory to behave like in-memory queue
int bufferSize = (int) Math.min(Arrays.MAX_ARRAY_SIZE & ~0x7, 8L * n);
// Use a disk based queue to store BFS frontier
final File queueFile = File.createTempFile(ForkCC.class.getSimpleName(), "queue");
final ByteDiskQueue queue = ByteDiskQueue.createNew(queueFile, bufferSize, true);
final byte[] byteBuf = new byte[Long.BYTES];
// WARNING: no 64-bit version of this data-structure, but it can support
// indices up to 2^37
visited = LongArrayBitVector.ofLength(n);
pl.expectedUpdates = n;
pl.itemsName = "nodes";
pl.start("Starting connected components visit...");
ArrayList> components = new ArrayList<>();
for (long i = 0; i < n; i++) {
- if (!shouldVisit(i) || this.graph.getNodeType(i) == Node.Type.DIR) continue;
+ if (!shouldVisit(i) || this.graph.getNodeType(i) == Node.Type.DIR)
+ continue;
ArrayList component = new ArrayList<>();
queue.enqueue(Longs.toByteArray(i));
visited.set(i);
while (!queue.isEmpty()) {
queue.dequeue(byteBuf);
final long currentNode = Longs.fromByteArray(byteBuf);
Node.Type cur_nt = this.graph.getNodeType(currentNode);
- if (cur_nt == Node.Type.ORI
- && (this.whitelist == null || this.whitelist.getBoolean(currentNode))) {
+ if (cur_nt == Node.Type.ORI && (this.whitelist == null || this.whitelist.getBoolean(currentNode))) {
// TODO: add a check that the origin has >=1 non-empty snapshot
component.add(currentNode);
}
final LazyLongIterator iterator = graph.successors(currentNode);
long succ;
while ((succ = iterator.nextLong()) != -1) {
- if (!shouldVisit(succ)) continue;
- if (this.graph.getNodeType(succ) == Node.Type.DIR && cur_nt != Node.Type.REV) continue;
+ if (!shouldVisit(succ))
+ continue;
+ if (this.graph.getNodeType(succ) == Node.Type.DIR && cur_nt != Node.Type.REV)
+ continue;
visited.set(succ);
queue.enqueue(Longs.toByteArray(succ));
}
pl.update();
}
if (component.size() > 0) {
components.add(component);
}
}
pl.done();
queue.close();
return components;
}
private static void printDistribution(ArrayList> components, Formatter out) {
TreeMap distribution = new TreeMap<>();
for (ArrayList component : components) {
distribution.merge((long) component.size(), 1L, Long::sum);
}
for (Map.Entry entry : distribution.entrySet()) {
out.format("%d %d\n", entry.getKey(), entry.getValue());
}
}
private static void printLargestComponent(ArrayList> components, Formatter out) {
int indexLargest = 0;
for (int i = 1; i < components.size(); ++i) {
if (components.get(i).size() > components.get(indexLargest).size())
indexLargest = i;
}
ArrayList component = components.get(indexLargest);
for (Long node : component) {
out.format("%d\n", node);
}
}
private static void printAllComponents(ArrayList> components, Formatter out) {
for (int i = 1; i < components.size(); ++i) {
ArrayList component = components.get(i);
for (Long node : component) {
out.format("%d ", node);
}
out.format("\n");
}
}
private void parseWhitelist(String path) {
System.err.println("Loading whitelist " + path + " ...");
this.whitelist = LongArrayBitVector.ofLength(this.graph.numNodes());
Scanner scanner;
try {
scanner = new Scanner(new File(path));
while (scanner.hasNextLong()) {
whitelist.set(scanner.nextLong());
}
System.err.println("Whitelist loaded.");
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
public static void main(String[] args) {
JSAPResult config = parse_args(args);
String graphPath = config.getString("graphPath");
String whitelistPath = config.getString("whitelistPath");
boolean includeRootDir = config.getBoolean("includeRootDir");
String outdirPath = config.getString("outdir");
ForkCC forkCc = new ForkCC();
try {
forkCc.load_graph(graphPath);
forkCc.includeRootDir = includeRootDir;
} catch (IOException e) {
System.out.println("Could not load graph: " + e);
System.exit(2);
}
if (whitelistPath != null) {
forkCc.parseWhitelist(whitelistPath);
}
ProgressLogger logger = new ProgressLogger();
- //noinspection ResultOfMethodCallIgnored
+ // noinspection ResultOfMethodCallIgnored
new File(outdirPath).mkdirs();
try {
ArrayList> components = forkCc.compute(logger);
printDistribution(components, new Formatter(outdirPath + "/distribution.txt"));
printLargestComponent(components, new Formatter(outdirPath + "/largest_clique.txt"));
printAllComponents(components, new Formatter(outdirPath + "/all_cliques.txt"));
} catch (IOException e) {
e.printStackTrace();
}
logger.done();
}
}
diff --git a/java/src/main/java/org/softwareheritage/graph/experiments/forks/ForkCliques.java b/java/src/main/java/org/softwareheritage/graph/experiments/forks/ForkCliques.java
index 1ebc7fb..8e6da02 100644
--- a/java/src/main/java/org/softwareheritage/graph/experiments/forks/ForkCliques.java
+++ b/java/src/main/java/org/softwareheritage/graph/experiments/forks/ForkCliques.java
@@ -1,228 +1,223 @@
package org.softwareheritage.graph.experiments.forks;
import ch.qos.logback.classic.Level;
import ch.qos.logback.classic.Logger;
import com.google.common.primitives.Longs;
import com.martiansoftware.jsap.*;
import it.unimi.dsi.big.webgraph.LazyLongIterator;
import it.unimi.dsi.bits.LongArrayBitVector;
import it.unimi.dsi.logging.ProgressLogger;
import org.slf4j.LoggerFactory;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.Node;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.*;
public class ForkCliques {
private Graph graph;
private LongArrayBitVector whitelist;
private void load_graph(String graphBasename) throws IOException {
System.err.println("Loading graph " + graphBasename + " ...");
this.graph = new Graph(graphBasename);
System.err.println("Graph loaded.");
this.whitelist = null;
}
private static JSAPResult parse_args(String[] args) {
JSAPResult config = null;
try {
- SimpleJSAP jsap = new SimpleJSAP(
- ForkCliques.class.getName(),
- "",
+ SimpleJSAP jsap = new SimpleJSAP(ForkCliques.class.getName(), "",
new Parameter[]{
- new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED,
- 'g', "graph", "Basename of the compressed graph"),
- new FlaggedOption("whitelistPath", JSAP.STRING_PARSER, null, JSAP.NOT_REQUIRED,
- 't', "whitelist", "Whitelist of origins"),
- new FlaggedOption("outdir", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED,
- 'o', "outdir", "Directory where to put the results"),
- }
- );
+ new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'g',
+ "graph", "Basename of the compressed graph"),
+ new FlaggedOption("whitelistPath", JSAP.STRING_PARSER, null, JSAP.NOT_REQUIRED, 't',
+ "whitelist", "Whitelist of origins"),
+ new FlaggedOption("outdir", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'o',
+ "outdir", "Directory where to put the results"),});
config = jsap.parse(args);
if (jsap.messagePrinted()) {
System.exit(1);
}
} catch (JSAPException e) {
e.printStackTrace();
}
return config;
}
private ArrayList dfsAt(Long baseNode) {
ArrayList res = new ArrayList<>();
final Deque stack = new ArrayDeque<>();
HashSet seen = new HashSet<>();
stack.push(baseNode);
while (!stack.isEmpty()) {
final Long currentNode = stack.pop();
final LazyLongIterator iterator = this.graph.predecessors(currentNode);
long succ;
while ((succ = iterator.nextLong()) != -1) {
if (!seen.contains(succ)) {
Node.Type nt = this.graph.getNodeType(succ);
if (nt == Node.Type.DIR || nt == Node.Type.CNT)
continue;
- if (nt == Node.Type.ORI
- && (this.whitelist == null || this.whitelist.getBoolean(succ))) {
+ if (nt == Node.Type.ORI && (this.whitelist == null || this.whitelist.getBoolean(succ))) {
res.add(succ);
} else {
stack.push(succ);
seen.add(succ);
}
}
}
}
Collections.sort(res);
return res;
}
private boolean isBaseRevision(Long node) {
if (this.graph.getNodeType(node) != Node.Type.REV)
return false;
final LazyLongIterator iterator = this.graph.successors(node);
long succ;
while ((succ = iterator.nextLong()) != -1) {
if (this.graph.getNodeType(succ) == Node.Type.REV)
return false;
}
- return true;
+ return true;
}
static private String fingerprint(ArrayList cluster) {
MessageDigest digest;
try {
digest = MessageDigest.getInstance("SHA-256");
} catch (NoSuchAlgorithmException e) {
e.printStackTrace();
return null;
}
for (Long n : cluster)
digest.update(Longs.toByteArray(n));
return new String(digest.digest());
}
private ArrayList> compute(ProgressLogger pl) {
final long n = this.graph.numNodes();
HashSet fingerprints = new HashSet<>();
ArrayList> clusters = new ArrayList<>();
pl.expectedUpdates = n;
pl.itemsName = "nodes";
pl.start("Starting topological sort...");
for (long i = 0; i < n; i++) {
if (isBaseRevision(i)) {
ArrayList currentCluster = dfsAt(i);
String clusterFp = fingerprint(currentCluster);
if (!fingerprints.contains(clusterFp)) {
fingerprints.add(clusterFp);
clusters.add(currentCluster);
}
}
pl.update();
}
pl.done();
return clusters;
}
private static void printDistribution(ArrayList> components, Formatter out) {
TreeMap distribution = new TreeMap<>();
for (ArrayList component : components) {
distribution.merge((long) component.size(), 1L, Long::sum);
}
for (Map.Entry entry : distribution.entrySet()) {
out.format("%d %d\n", entry.getKey(), entry.getValue());
}
}
private static void printLargestComponent(ArrayList> components, Formatter out) {
int indexLargest = 0;
for (int i = 1; i < components.size(); ++i) {
if (components.get(i).size() > components.get(indexLargest).size())
indexLargest = i;
}
ArrayList component = components.get(indexLargest);
for (Long node : component) {
out.format("%d\n", node);
}
}
private static void printAllComponents(ArrayList> components, Formatter out) {
for (int i = 1; i < components.size(); ++i) {
ArrayList component = components.get(i);
for (Long node : component) {
out.format("%d ", node);
}
out.format("\n");
}
}
private void parseWhitelist(String path) {
System.err.println("Loading whitelist " + path + " ...");
this.whitelist = LongArrayBitVector.ofLength(this.graph.numNodes());
Scanner scanner;
try {
scanner = new Scanner(new File(path));
- while(scanner.hasNextLong()) {
+ while (scanner.hasNextLong()) {
whitelist.set(scanner.nextLong());
}
System.err.println("Whitelist loaded.");
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
public static void main(String[] args) {
JSAPResult config = parse_args(args);
String graphPath = config.getString("graphPath");
String whitelistPath = config.getString("whitelistPath");
String outdirPath = config.getString("outdir");
ForkCliques forkCliques = new ForkCliques();
try {
forkCliques.load_graph(graphPath);
} catch (IOException e) {
System.out.println("Could not load graph: " + e);
System.exit(2);
}
if (whitelistPath != null) {
forkCliques.parseWhitelist(whitelistPath);
}
Logger rootLogger = (ch.qos.logback.classic.Logger) LoggerFactory.getLogger(org.slf4j.Logger.ROOT_LOGGER_NAME);
rootLogger.setLevel(Level.DEBUG);
ProgressLogger logger = new ProgressLogger(rootLogger);
ArrayList> components = forkCliques.compute(logger);
- //noinspection ResultOfMethodCallIgnored
+ // noinspection ResultOfMethodCallIgnored
new File(outdirPath).mkdirs();
try {
printDistribution(components, new Formatter(outdirPath + "/distribution.txt"));
printLargestComponent(components, new Formatter(outdirPath + "/largest_clique.txt"));
printAllComponents(components, new Formatter(outdirPath + "/all_cliques.txt"));
} catch (FileNotFoundException e) {
e.printStackTrace();
}
logger.done();
}
}
diff --git a/java/src/main/java/org/softwareheritage/graph/experiments/forks/ListEmptyOrigins.java b/java/src/main/java/org/softwareheritage/graph/experiments/forks/ListEmptyOrigins.java
index 81a0c50..d99dc2c 100644
--- a/java/src/main/java/org/softwareheritage/graph/experiments/forks/ListEmptyOrigins.java
+++ b/java/src/main/java/org/softwareheritage/graph/experiments/forks/ListEmptyOrigins.java
@@ -1,93 +1,88 @@
package org.softwareheritage.graph.experiments.forks;
import com.martiansoftware.jsap.*;
import it.unimi.dsi.big.webgraph.ImmutableGraph;
import it.unimi.dsi.big.webgraph.LazyLongIterator;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.Node;
import java.io.IOException;
import java.util.ArrayList;
public class ListEmptyOrigins {
private Graph graph;
private Long emptySnapshot;
private static JSAPResult parse_args(String[] args) {
JSAPResult config = null;
try {
- SimpleJSAP jsap = new SimpleJSAP(
- ListEmptyOrigins.class.getName(),
- "",
- new Parameter[]{
- new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED,
- 'g', "graph", "Basename of the compressed graph"),
- }
- );
+ SimpleJSAP jsap = new SimpleJSAP(ListEmptyOrigins.class.getName(), "",
+ new Parameter[]{new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED,
+ 'g', "graph", "Basename of the compressed graph"),});
config = jsap.parse(args);
if (jsap.messagePrinted()) {
System.exit(1);
}
} catch (JSAPException e) {
e.printStackTrace();
}
return config;
}
public static void main(String[] args) {
JSAPResult config = parse_args(args);
String graphPath = config.getString("graphPath");
ListEmptyOrigins leo = new ListEmptyOrigins();
try {
leo.load_graph(graphPath);
} catch (IOException e) {
System.out.println("Could not load graph: " + e);
System.exit(2);
}
ArrayList badlist = leo.compute(leo.graph);
for (Long bad : badlist) {
System.out.println(bad);
}
}
private void load_graph(String graphBasename) throws IOException {
System.err.println("Loading graph " + graphBasename + " ...");
this.graph = new Graph(graphBasename);
System.err.println("Graph loaded.");
this.emptySnapshot = null;
}
private boolean nodeIsEmptySnapshot(Long node) {
System.err.println(this.graph.getNodeType(node) + " " + this.graph.outdegree(node) + " " + node);
- if (this.emptySnapshot == null
- && this.graph.getNodeType(node) == Node.Type.SNP
+ if (this.emptySnapshot == null && this.graph.getNodeType(node) == Node.Type.SNP
&& this.graph.outdegree(node) == 0) {
System.err.println("Found empty snapshot: " + node);
this.emptySnapshot = node;
}
return node.equals(this.emptySnapshot);
}
private ArrayList compute(ImmutableGraph graph) {
final long n = graph.numNodes();
ArrayList bad = new ArrayList<>();
for (long i = 0; i < n; i++) {
Node.Type nt = this.graph.getNodeType(i);
- if (nt != Node.Type.ORI) continue;
+ if (nt != Node.Type.ORI)
+ continue;
final LazyLongIterator iterator = graph.successors(i);
long succ;
boolean found = false;
while ((succ = iterator.nextLong()) != -1) {
if (this.graph.outdegree(succ) > 0) {
found = true;
}
}
if (!found)
bad.add(i);
}
return bad;
}
}
diff --git a/java/src/main/java/org/softwareheritage/graph/experiments/multiplicationfactor/GenDistribution.java b/java/src/main/java/org/softwareheritage/graph/experiments/multiplicationfactor/GenDistribution.java
index 9fd8e6b..1681bd5 100644
--- a/java/src/main/java/org/softwareheritage/graph/experiments/multiplicationfactor/GenDistribution.java
+++ b/java/src/main/java/org/softwareheritage/graph/experiments/multiplicationfactor/GenDistribution.java
@@ -1,136 +1,130 @@
package org.softwareheritage.graph.experiments.multiplicationfactor;
import com.martiansoftware.jsap.*;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.Node;
import org.softwareheritage.graph.Traversal;
import org.softwareheritage.graph.benchmark.utils.Timing;
import java.io.IOException;
import java.util.Scanner;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
public class GenDistribution {
private Graph graph;
private static JSAPResult parse_args(String[] args) {
JSAPResult config = null;
try {
- SimpleJSAP jsap = new SimpleJSAP(
- GenDistribution.class.getName(),
- "",
+ SimpleJSAP jsap = new SimpleJSAP(GenDistribution.class.getName(), "",
new Parameter[]{
- new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED,
- 'g', "graph", "Basename of the compressed graph"),
- new FlaggedOption("srcType", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED,
- 's', "srctype", "Source node type"),
- new FlaggedOption("dstType", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED,
- 'd', "dsttype", "Destination node type"),
- new FlaggedOption("edgesFmt", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED,
- 'e', "edges", "Edges constraints"),
-
- new FlaggedOption("numThreads", JSAP.INTEGER_PARSER, "128", JSAP.NOT_REQUIRED,
- 't', "numthreads", "Number of threads"),
- }
- );
+ new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'g',
+ "graph", "Basename of the compressed graph"),
+ new FlaggedOption("srcType", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 's',
+ "srctype", "Source node type"),
+ new FlaggedOption("dstType", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'd',
+ "dsttype", "Destination node type"),
+ new FlaggedOption("edgesFmt", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'e',
+ "edges", "Edges constraints"),
+
+ new FlaggedOption("numThreads", JSAP.INTEGER_PARSER, "128", JSAP.NOT_REQUIRED, 't',
+ "numthreads", "Number of threads"),});
config = jsap.parse(args);
if (jsap.messagePrinted()) {
System.exit(1);
}
} catch (JSAPException e) {
e.printStackTrace();
}
return config;
}
public static void main(String[] args) {
JSAPResult config = parse_args(args);
String graphPath = config.getString("graphPath");
Node.Type srcType = Node.Type.fromStr(config.getString("srcType"));
Node.Type dstType = Node.Type.fromStr(config.getString("dstType"));
String edgesFmt = config.getString("edgesFmt");
int numThreads = config.getInt("numThreads");
GenDistribution tp = new GenDistribution();
try {
tp.load_graph(graphPath);
} catch (IOException e) {
System.out.println("Could not load graph: " + e);
System.exit(2);
}
final long END_OF_QUEUE = -1L;
ArrayBlockingQueue queue = new ArrayBlockingQueue<>(numThreads);
ExecutorService service = Executors.newFixedThreadPool(numThreads + 1);
service.submit(() -> {
try {
Scanner input = new Scanner(System.in);
while (input.hasNextLong()) {
long node = input.nextLong();
if (tp.graph.getNodeType(node) == srcType) {
queue.put(node);
}
}
} catch (InterruptedException e) {
e.printStackTrace();
} finally {
for (int i = 0; i < numThreads; ++i) {
try {
queue.put(END_OF_QUEUE);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
});
for (int i = 0; i < numThreads; ++i) {
service.submit(() -> {
Graph thread_graph = tp.graph.copy();
long startTime;
double totalTime;
while (true) {
Long node = null;
try {
node = queue.take();
} catch (InterruptedException e) {
e.printStackTrace();
}
if (node == null || node == END_OF_QUEUE) {
return;
}
Traversal t = new Traversal(thread_graph, "backward", edgesFmt);
int[] count = {0};
startTime = Timing.start();
t.visitNodesVisitor(node, (curnode) -> {
if (tp.graph.getNodeType(curnode) == dstType) {
count[0]++;
}
});
totalTime = Timing.stop(startTime);
- System.out.format("%d %d %d %d %f\n",
- node, count[0], t.getNbNodesAccessed(),
- t.getNbEdgesAccessed(), totalTime
- );
+ System.out.format("%d %d %d %d %f\n", node, count[0], t.getNbNodesAccessed(),
+ t.getNbEdgesAccessed(), totalTime);
}
});
}
service.shutdown();
}
private void load_graph(String graphBasename) throws IOException {
System.err.println("Loading graph " + graphBasename + " ...");
this.graph = new Graph(graphBasename);
System.err.println("Graph loaded.");
}
}
diff --git a/java/src/main/java/org/softwareheritage/graph/experiments/topology/ClusteringCoefficient.java b/java/src/main/java/org/softwareheritage/graph/experiments/topology/ClusteringCoefficient.java
index d1c8610..9064ef2 100644
--- a/java/src/main/java/org/softwareheritage/graph/experiments/topology/ClusteringCoefficient.java
+++ b/java/src/main/java/org/softwareheritage/graph/experiments/topology/ClusteringCoefficient.java
@@ -1,200 +1,189 @@
package org.softwareheritage.graph.experiments.topology;
import ch.qos.logback.classic.Level;
import ch.qos.logback.classic.Logger;
import com.martiansoftware.jsap.*;
import it.unimi.dsi.big.webgraph.ImmutableGraph;
import it.unimi.dsi.big.webgraph.LazyLongIterator;
-import it.unimi.dsi.big.webgraph.Transform;
import it.unimi.dsi.logging.ProgressLogger;
import org.slf4j.LoggerFactory;
import org.softwareheritage.graph.Graph;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.*;
import java.util.concurrent.ThreadLocalRandom;
public class ClusteringCoefficient {
private Graph graph;
private void load_graph(String graphBasename) throws IOException {
System.err.println("Loading graph " + graphBasename + " ...");
this.graph = new Graph(graphBasename).symmetrize();
System.err.println("Graph loaded.");
}
private static JSAPResult parse_args(String[] args) {
JSAPResult config = null;
try {
- SimpleJSAP jsap = new SimpleJSAP(
- ClusteringCoefficient.class.getName(),
- "",
+ SimpleJSAP jsap = new SimpleJSAP(ClusteringCoefficient.class.getName(), "",
new Parameter[]{
- new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED,
- 'g', "graph", "Basename of the compressed graph"),
- new FlaggedOption("outdir", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED,
- 'o', "outdir", "Directory where to put the results"),
- }
- );
+ new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'g',
+ "graph", "Basename of the compressed graph"),
+ new FlaggedOption("outdir", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'o',
+ "outdir", "Directory where to put the results"),});
config = jsap.parse(args);
if (jsap.messagePrinted()) {
System.exit(1);
}
} catch (JSAPException e) {
e.printStackTrace();
}
return config;
}
private long oppositeEdges(ImmutableGraph graph, long node) {
System.out.format("%d\n", node);
HashSet neighborhood = new HashSet<>();
long succ;
final LazyLongIterator iterator = graph.successors(node);
while ((succ = iterator.nextLong()) != -1) {
System.out.format("%d neighbor add %d\n", node, succ);
neighborhood.add(succ);
}
long closed_triplets = 0;
for (Long neighbor : neighborhood) {
System.out.format("%d neighbor visit %d\n", node, neighbor);
final LazyLongIterator it = graph.successors(neighbor);
while ((succ = it.nextLong()) != -1) {
System.out.format("%d neighbor visit %d succ %d\n", node, neighbor, succ);
if (neighborhood.contains(succ)) {
closed_triplets++;
}
}
}
return closed_triplets;
}
public void compute(ProgressLogger pl, Formatter out_local, Formatter out_global) {
final long n = this.graph.numNodes();
pl.expectedUpdates = n;
pl.itemsName = "nodes";
long nodes_d2 = 0;
double cum_lcc = 0;
double cum_lcc_c0 = 0;
double cum_lcc_c1 = 0;
HashMap distribution = new HashMap<>();
for (long node = 0; node < n; node++) {
long d = graph.outdegree(node);
if (d >= 2) {
double t = (d * (d - 1));
double m = oppositeEdges(graph, node);
double lcc = m / t;
distribution.merge(lcc, 1L, Long::sum);
cum_lcc += lcc;
nodes_d2++;
} else {
cum_lcc_c1++;
}
pl.update();
}
pl.done();
for (Map.Entry entry : distribution.entrySet()) {
out_local.format("%f %d\n", entry.getKey(), entry.getValue());
}
double gC = cum_lcc / nodes_d2;
double gC0 = cum_lcc_c0 / n;
double gC1 = cum_lcc_c1 / n;
out_global.format("C: %f\n", gC);
out_global.format("C0: %f\n", gC0);
out_global.format("C1: %f\n", gC1);
}
public void compute_approx(Formatter out_global) {
final long n = this.graph.numNodes();
long trials = 0;
long triangles = 0;
while (true) {
long node = ThreadLocalRandom.current().nextLong(0, n);
long d = graph.outdegree(node);
if (d >= 2) {
Long u = null;
Long v = null;
long u_idx = ThreadLocalRandom.current().nextLong(0, d);
long v_idx = ThreadLocalRandom.current().nextLong(0, d - 1);
if (v_idx >= u_idx) {
v_idx++;
}
long succ;
final LazyLongIterator node_iterator = graph.successors(node);
for (long succ_idx = 0; (succ = node_iterator.nextLong()) != -1; succ_idx++) {
if (succ_idx == u_idx) {
u = succ;
}
if (succ_idx == v_idx) {
v = succ;
}
}
final LazyLongIterator u_iterator = graph.successors(u);
while ((succ = u_iterator.nextLong()) != -1) {
if (succ == v)
triangles++;
}
}
trials++;
if (trials % 100 == 0 || true) {
- double gC = (double)triangles / (double)trials;
+ double gC = (double) triangles / (double) trials;
out_global.format("C: %f (triangles: %d, trials: %d)\n", gC, triangles, trials);
System.out.format("C: %f (triangles: %d, trials: %d)\n", gC, triangles, trials);
}
}
}
public static void main(String[] args) {
JSAPResult config = parse_args(args);
String graphPath = config.getString("graphPath");
String outdirPath = config.getString("outdir");
ClusteringCoefficient ccoef = new ClusteringCoefficient();
try {
ccoef.load_graph(graphPath);
} catch (IOException e) {
System.out.println("Could not load graph: " + e);
System.exit(2);
}
Logger rootLogger = (Logger) LoggerFactory.getLogger(org.slf4j.Logger.ROOT_LOGGER_NAME);
rootLogger.setLevel(Level.DEBUG);
new File(outdirPath).mkdirs();
try {
- ccoef.compute_approx(
- new Formatter(outdirPath + "/local.txt")
- );
+ ccoef.compute_approx(new Formatter(outdirPath + "/local.txt"));
/*
- ccoef.compute(
- symmetric,
- new ProgressLogger(rootLogger),
- new Formatter(outdirPath + "/local.txt"),
- new Formatter(outdirPath + "/global.txt")
- );
+ * ccoef.compute( symmetric, new ProgressLogger(rootLogger), new Formatter(outdirPath +
+ * "/local.txt"), new Formatter(outdirPath + "/global.txt") );
*/
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
}
diff --git a/java/src/main/java/org/softwareheritage/graph/experiments/topology/ConnectedComponents.java b/java/src/main/java/org/softwareheritage/graph/experiments/topology/ConnectedComponents.java
index f2e27d3..ae573fd 100644
--- a/java/src/main/java/org/softwareheritage/graph/experiments/topology/ConnectedComponents.java
+++ b/java/src/main/java/org/softwareheritage/graph/experiments/topology/ConnectedComponents.java
@@ -1,187 +1,179 @@
package org.softwareheritage.graph.experiments.topology;
import com.google.common.primitives.Longs;
import com.martiansoftware.jsap.*;
-import it.unimi.dsi.big.webgraph.ImmutableGraph;
import it.unimi.dsi.big.webgraph.LazyLongIterator;
-import it.unimi.dsi.big.webgraph.Transform;
import it.unimi.dsi.bits.LongArrayBitVector;
import it.unimi.dsi.fastutil.Arrays;
import it.unimi.dsi.io.ByteDiskQueue;
import it.unimi.dsi.logging.ProgressLogger;
import org.softwareheritage.graph.Graph;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.*;
public class ConnectedComponents {
private Graph graph;
private void load_graph(String graphBasename) throws IOException {
System.err.println("Loading graph " + graphBasename + " ...");
this.graph = new Graph(graphBasename).symmetrize();
System.err.println("Graph loaded.");
}
private static JSAPResult parse_args(String[] args) {
JSAPResult config = null;
try {
- SimpleJSAP jsap = new SimpleJSAP(
- ConnectedComponents.class.getName(),
- "",
- new Parameter[] {
- new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED,
- 'g', "graph", "Basename of the compressed graph"),
- new FlaggedOption("outdir", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED,
- 'o', "outdir", "Directory where to put the results"),
- }
- );
+ SimpleJSAP jsap = new SimpleJSAP(ConnectedComponents.class.getName(), "",
+ new Parameter[]{
+ new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'g',
+ "graph", "Basename of the compressed graph"),
+ new FlaggedOption("outdir", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'o',
+ "outdir", "Directory where to put the results"),});
config = jsap.parse(args);
if (jsap.messagePrinted()) {
System.exit(1);
}
} catch (JSAPException e) {
e.printStackTrace();
}
return config;
}
- private HashMap /*ArrayList>*/ compute(ProgressLogger pl) throws IOException {
+ private HashMap /* ArrayList> */ compute(ProgressLogger pl) throws IOException {
final long n = graph.numNodes();
// Allow enough memory to behave like in-memory queue
- int bufferSize = (int)Math.min(Arrays.MAX_ARRAY_SIZE & ~0x7, 8L * n);
+ int bufferSize = (int) Math.min(Arrays.MAX_ARRAY_SIZE & ~0x7, 8L * n);
// Use a disk based queue to store BFS frontier
final File queueFile = File.createTempFile(ConnectedComponents.class.getSimpleName(), "queue");
final ByteDiskQueue queue = ByteDiskQueue.createNew(queueFile, bufferSize, true);
final byte[] byteBuf = new byte[Long.BYTES];
// WARNING: no 64-bit version of this data-structure, but it can support
// indices up to 2^37
LongArrayBitVector visited = LongArrayBitVector.ofLength(n);
pl.expectedUpdates = n;
pl.itemsName = "nodes";
pl.start("Starting connected components visit...");
// ArrayList> components = new ArrayList<>();
HashMap componentDistribution = new HashMap<>();
for (long i = 0; i < n; i++) {
// ArrayList component = new ArrayList<>();
long componentNodes = 0;
queue.enqueue(Longs.toByteArray(i));
visited.set(i);
while (!queue.isEmpty()) {
queue.dequeue(byteBuf);
final long currentNode = Longs.fromByteArray(byteBuf);
// component.add(currentNode);
componentNodes += 1;
final LazyLongIterator iterator = graph.successors(currentNode);
long succ;
while ((succ = iterator.nextLong()) != -1) {
if (visited.getBoolean(succ))
continue;
visited.set(succ);
queue.enqueue(Longs.toByteArray(succ));
}
pl.update();
}
/*
- if (component.size() > 0) {
- components.add(component);
- }
- */
+ * if (component.size() > 0) { components.add(component); }
+ */
if (componentNodes > 0)
componentDistribution.merge(componentNodes, 1L, Long::sum);
}
pl.done();
// return components;
return componentDistribution;
}
private static void printDistribution(ArrayList> components, Formatter out) {
TreeMap distribution = new TreeMap<>();
for (ArrayList component : components) {
distribution.merge((long) component.size(), 1L, Long::sum);
}
for (Map.Entry entry : distribution.entrySet()) {
out.format("%d %d\n", entry.getKey(), entry.getValue());
}
out.close();
}
private static void printLargestComponent(ArrayList> components, Formatter out) {
int indexLargest = 0;
for (int i = 1; i < components.size(); ++i) {
if (components.get(i).size() > components.get(indexLargest).size())
indexLargest = i;
}
ArrayList component = components.get(indexLargest);
for (Long node : component) {
out.format("%d\n", node);
}
out.close();
}
private static void printAllComponents(ArrayList> components, Formatter out) {
for (int i = 1; i < components.size(); ++i) {
ArrayList component = components.get(i);
for (Long node : component) {
out.format("%d ", node);
}
out.format("\n");
}
out.close();
}
public static void main(String[] args) {
JSAPResult config = parse_args(args);
String graphPath = config.getString("graphPath");
String outdirPath = config.getString("outdir");
ConnectedComponents connectedComponents = new ConnectedComponents();
try {
connectedComponents.load_graph(graphPath);
} catch (IOException e) {
System.out.println("Could not load graph: " + e);
System.exit(2);
}
ProgressLogger logger = new ProgressLogger();
- //noinspection ResultOfMethodCallIgnored
+ // noinspection ResultOfMethodCallIgnored
new File(outdirPath).mkdirs();
try {
// ArrayList> components = connectedComponents.compute(logger);
// components.sort(Comparator.comparing(ArrayList::size).reversed());
// printDistribution(components, new Formatter(outdirPath + "/distribution.txt"));
// printLargestComponent(components, new Formatter(outdirPath + "/largest_component.txt"));
// printAllComponents(components, new Formatter(outdirPath + "/all_components.txt"));
HashMap componentDistribution = connectedComponents.compute(logger);
PrintWriter f = new PrintWriter(new FileWriter(outdirPath + "/distribution.txt"));
TreeMap sortedDistribution = new TreeMap<>(componentDistribution);
for (Map.Entry entry : sortedDistribution.entrySet()) {
f.println(entry.getKey() + " " + entry.getValue());
}
f.close();
} catch (IOException e) {
e.printStackTrace();
}
logger.done();
}
}
diff --git a/java/src/main/java/org/softwareheritage/graph/experiments/topology/InOutDegree.java b/java/src/main/java/org/softwareheritage/graph/experiments/topology/InOutDegree.java
index 57830c6..c82d4ea 100644
--- a/java/src/main/java/org/softwareheritage/graph/experiments/topology/InOutDegree.java
+++ b/java/src/main/java/org/softwareheritage/graph/experiments/topology/InOutDegree.java
@@ -1,167 +1,174 @@
package org.softwareheritage.graph.experiments.topology;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.lang.reflect.InvocationTargetException;
import java.util.*;
import com.martiansoftware.jsap.JSAP;
import com.martiansoftware.jsap.JSAPException;
import com.martiansoftware.jsap.JSAPResult;
import com.martiansoftware.jsap.Parameter;
import com.martiansoftware.jsap.SimpleJSAP;
import com.martiansoftware.jsap.UnflaggedOption;
import it.unimi.dsi.logging.ProgressLogger;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.Node;
public class InOutDegree {
- private InOutDegree() {}
-
- private static final int NODE_ARRAY_SIZE = Node.Type.values().length + 1;
- private static final int TYPE_ALL = Node.Type.values().length;
- private static final int TYPE_CNT = Node.Type.toInt(Node.Type.CNT);
- private static final int TYPE_DIR = Node.Type.toInt(Node.Type.DIR);
- private static final int TYPE_REV = Node.Type.toInt(Node.Type.REV);
- private static final int TYPE_REL = Node.Type.toInt(Node.Type.REL);
- private static final int TYPE_SNP = Node.Type.toInt(Node.Type.SNP);
- private static final int TYPE_ORI = Node.Type.toInt(Node.Type.ORI);
-
- public static long[] outdegreeTypes(final Graph graph, long node) {
- long[] out = new long[NODE_ARRAY_SIZE];
- var successors = graph.successors(node);
- long neighbor;
- while ((neighbor = successors.nextLong()) != -1) {
- out[Node.Type.toInt(graph.getNodeType(neighbor))]++;
- out[TYPE_ALL]++;
- }
- return out;
- }
-
- public static long[] indegreeTypes(final Graph graph, long node) {
- return outdegreeTypes(graph.transpose(), node);
- }
-
- public static void writeDistribution(HashMap distribution, String filename) throws IOException {
- PrintWriter f = new PrintWriter(new FileWriter(filename));
- TreeMap sortedDistribution = new TreeMap<>(distribution);
- for (Map.Entry entry : sortedDistribution.entrySet()) {
- f.println(entry.getKey() + " " + entry.getValue());
- }
- f.close();
- }
-
- public static void run(final Graph graph, String resultsDir) throws IOException {
- var cnt_in_dir = new HashMap();
- var dir_in_dir = new HashMap();
- var dir_in_rev = new HashMap();
- var dir_in_all = new HashMap();
- var dir_out_all = new HashMap();
- var dir_out_dir = new HashMap();
- var dir_out_cnt = new HashMap();
- var dir_out_rev = new HashMap();
- var rev_in_dir = new HashMap();
- var rev_in_rel = new HashMap();
- var rev_in_rev = new HashMap();
- var rev_in_snp = new HashMap();
- var rev_in_all = new HashMap();
- var rev_out_rev = new HashMap();
- var rel_in_snp = new HashMap();
- var snp_in_ori = new HashMap();
- var snp_out_all = new HashMap();
- var snp_out_rel = new HashMap();
- var snp_out_rev = new HashMap();
- var ori_out_snp = new HashMap();
-
- final ProgressLogger pl = new ProgressLogger();
- pl.itemsName = "nodes";
- pl.expectedUpdates = graph.numNodes();
- pl.start("Scanning...");
-
- long[] in;
- long[] out;
-
- for(long i = graph.numNodes(); i-- != 0;) {
- switch (graph.getNodeType(i)) {
- case CNT:
- cnt_in_dir.merge(graph.indegree(i), 1L, Long::sum);
- case DIR:
- in = indegreeTypes(graph, i);
- out = outdegreeTypes(graph, i);
- dir_in_all.merge(in[TYPE_ALL], 1L, Long::sum);
- dir_out_all.merge(out[TYPE_ALL], 1L, Long::sum);
- dir_in_dir.merge(in[TYPE_DIR], 1L, Long::sum);
- dir_in_rev.merge(in[TYPE_REV], 1L, Long::sum);
- dir_out_cnt.merge(out[TYPE_CNT], 1L, Long::sum);
- dir_out_dir.merge(out[TYPE_DIR], 1L, Long::sum);
- dir_out_rev.merge(out[TYPE_REV], 1L, Long::sum);
- case REV:
- in = indegreeTypes(graph, i);
- out = outdegreeTypes(graph, i);
- rev_in_all.merge(in[TYPE_ALL], 1L, Long::sum);
- rev_in_dir.merge(in[TYPE_DIR], 1L, Long::sum);
- rev_in_rev.merge(in[TYPE_REV], 1L, Long::sum);
- rev_in_rel.merge(in[TYPE_REL], 1L, Long::sum);
- rev_in_snp.merge(in[TYPE_SNP], 1L, Long::sum);
- rev_out_rev.merge(out[TYPE_REV], 1L, Long::sum);
- case REL:
- rel_in_snp.merge(graph.indegree(i), 1L, Long::sum);
- case SNP:
- out = outdegreeTypes(graph, i);
- snp_in_ori.merge(graph.indegree(i), 1L, Long::sum);
- snp_out_all.merge(out[TYPE_ALL], 1L, Long::sum);
- snp_out_rel.merge(out[TYPE_REL], 1L, Long::sum);
- snp_out_rev.merge(out[TYPE_REV], 1L, Long::sum);
- case ORI:
- ori_out_snp.merge(graph.outdegree(i), 1L, Long::sum);
- }
-
- pl.update();
- }
-
- pl.done();
-
- writeDistribution(cnt_in_dir, resultsDir + "/cnt_in_dir.txt");
- writeDistribution(dir_in_dir, resultsDir + "/dir_in_dir.txt");
- writeDistribution(dir_in_rev, resultsDir + "/dir_in_rev.txt");
- writeDistribution(dir_in_all, resultsDir + "/dir_in_all.txt");
- writeDistribution(dir_out_all, resultsDir + "/dir_out_all.txt");
- writeDistribution(dir_out_dir, resultsDir + "/dir_out_dir.txt");
- writeDistribution(dir_out_cnt, resultsDir + "/dir_out_cnt.txt");
- writeDistribution(dir_out_rev, resultsDir + "/dir_out_rev.txt");
- writeDistribution(rev_in_dir, resultsDir + "/rev_in_dir.txt");
- writeDistribution(rev_in_rel, resultsDir + "/rev_in_rel.txt");
- writeDistribution(rev_in_rev, resultsDir + "/rev_in_rev.txt");
- writeDistribution(rev_in_snp, resultsDir + "/rev_in_snp.txt");
- writeDistribution(rev_in_all, resultsDir + "/rev_in_all.txt");
- writeDistribution(rev_out_rev, resultsDir + "/rev_out_rev.txt");
- writeDistribution(rel_in_snp, resultsDir + "/rel_in_snp.txt");
- writeDistribution(snp_in_ori, resultsDir + "/snp_in_ori.txt");
- writeDistribution(snp_out_all, resultsDir + "/snp_out_all.txt");
- writeDistribution(snp_out_rel, resultsDir + "/snp_out_rel.txt");
- writeDistribution(snp_out_rev, resultsDir + "/snp_out_rev.txt");
- writeDistribution(ori_out_snp, resultsDir + "/ori_out_snp.txt");
- }
-
- static public void main(final String[] arg) throws IllegalArgumentException, SecurityException, IllegalAccessException, InvocationTargetException, NoSuchMethodException, JSAPException, IOException, ClassNotFoundException {
- final SimpleJSAP jsap = new SimpleJSAP(InOutDegree.class.getName(), "Computes in and out degrees of the given SWHGraph",
- new Parameter[] {
- new UnflaggedOption("basename", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The basename of the graph."),
- new UnflaggedOption("resultsDir", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, JSAP.NOT_GREEDY, "The directory of the resulting files."),
- }
- );
-
- final JSAPResult jsapResult = jsap.parse(arg);
- if (jsap.messagePrinted()) System.exit(1);
-
- final String basename = jsapResult.getString("basename");
- final String resultsDir = jsapResult.userSpecified("resultsDir") ? jsapResult.getString("resultsDir") : basename;
-
- final ProgressLogger pl = new ProgressLogger();
-
- Graph graph = new Graph(basename);
- run(graph, resultsDir);
- }
+ private InOutDegree() {
+ }
+
+ private static final int NODE_ARRAY_SIZE = Node.Type.values().length + 1;
+ private static final int TYPE_ALL = Node.Type.values().length;
+ private static final int TYPE_CNT = Node.Type.toInt(Node.Type.CNT);
+ private static final int TYPE_DIR = Node.Type.toInt(Node.Type.DIR);
+ private static final int TYPE_REV = Node.Type.toInt(Node.Type.REV);
+ private static final int TYPE_REL = Node.Type.toInt(Node.Type.REL);
+ private static final int TYPE_SNP = Node.Type.toInt(Node.Type.SNP);
+ private static final int TYPE_ORI = Node.Type.toInt(Node.Type.ORI);
+
+ public static long[] outdegreeTypes(final Graph graph, long node) {
+ long[] out = new long[NODE_ARRAY_SIZE];
+ var successors = graph.successors(node);
+ long neighbor;
+ while ((neighbor = successors.nextLong()) != -1) {
+ out[Node.Type.toInt(graph.getNodeType(neighbor))]++;
+ out[TYPE_ALL]++;
+ }
+ return out;
+ }
+
+ public static long[] indegreeTypes(final Graph graph, long node) {
+ return outdegreeTypes(graph.transpose(), node);
+ }
+
+ public static void writeDistribution(HashMap distribution, String filename) throws IOException {
+ PrintWriter f = new PrintWriter(new FileWriter(filename));
+ TreeMap sortedDistribution = new TreeMap<>(distribution);
+ for (Map.Entry entry : sortedDistribution.entrySet()) {
+ f.println(entry.getKey() + " " + entry.getValue());
+ }
+ f.close();
+ }
+
+ public static void run(final Graph graph, String resultsDir) throws IOException {
+ var cnt_in_dir = new HashMap();
+ var dir_in_dir = new HashMap();
+ var dir_in_rev = new HashMap();
+ var dir_in_all = new HashMap();
+ var dir_out_all = new HashMap();
+ var dir_out_dir = new HashMap();
+ var dir_out_cnt = new HashMap();
+ var dir_out_rev = new HashMap();
+ var rev_in_dir = new HashMap();
+ var rev_in_rel = new HashMap();
+ var rev_in_rev = new HashMap();
+ var rev_in_snp = new HashMap();
+ var rev_in_all = new HashMap();
+ var rev_out_rev = new HashMap();
+ var rel_in_snp = new HashMap();
+ var snp_in_ori = new HashMap();
+ var snp_out_all = new HashMap();
+ var snp_out_rel = new HashMap();
+ var snp_out_rev = new HashMap();
+ var ori_out_snp = new HashMap();
+
+ final ProgressLogger pl = new ProgressLogger();
+ pl.itemsName = "nodes";
+ pl.expectedUpdates = graph.numNodes();
+ pl.start("Scanning...");
+
+ long[] in;
+ long[] out;
+
+ for (long i = graph.numNodes(); i-- != 0;) {
+ switch (graph.getNodeType(i)) {
+ case CNT:
+ cnt_in_dir.merge(graph.indegree(i), 1L, Long::sum);
+ case DIR:
+ in = indegreeTypes(graph, i);
+ out = outdegreeTypes(graph, i);
+ dir_in_all.merge(in[TYPE_ALL], 1L, Long::sum);
+ dir_out_all.merge(out[TYPE_ALL], 1L, Long::sum);
+ dir_in_dir.merge(in[TYPE_DIR], 1L, Long::sum);
+ dir_in_rev.merge(in[TYPE_REV], 1L, Long::sum);
+ dir_out_cnt.merge(out[TYPE_CNT], 1L, Long::sum);
+ dir_out_dir.merge(out[TYPE_DIR], 1L, Long::sum);
+ dir_out_rev.merge(out[TYPE_REV], 1L, Long::sum);
+ case REV:
+ in = indegreeTypes(graph, i);
+ out = outdegreeTypes(graph, i);
+ rev_in_all.merge(in[TYPE_ALL], 1L, Long::sum);
+ rev_in_dir.merge(in[TYPE_DIR], 1L, Long::sum);
+ rev_in_rev.merge(in[TYPE_REV], 1L, Long::sum);
+ rev_in_rel.merge(in[TYPE_REL], 1L, Long::sum);
+ rev_in_snp.merge(in[TYPE_SNP], 1L, Long::sum);
+ rev_out_rev.merge(out[TYPE_REV], 1L, Long::sum);
+ case REL:
+ rel_in_snp.merge(graph.indegree(i), 1L, Long::sum);
+ case SNP:
+ out = outdegreeTypes(graph, i);
+ snp_in_ori.merge(graph.indegree(i), 1L, Long::sum);
+ snp_out_all.merge(out[TYPE_ALL], 1L, Long::sum);
+ snp_out_rel.merge(out[TYPE_REL], 1L, Long::sum);
+ snp_out_rev.merge(out[TYPE_REV], 1L, Long::sum);
+ case ORI:
+ ori_out_snp.merge(graph.outdegree(i), 1L, Long::sum);
+ }
+
+ pl.update();
+ }
+
+ pl.done();
+
+ writeDistribution(cnt_in_dir, resultsDir + "/cnt_in_dir.txt");
+ writeDistribution(dir_in_dir, resultsDir + "/dir_in_dir.txt");
+ writeDistribution(dir_in_rev, resultsDir + "/dir_in_rev.txt");
+ writeDistribution(dir_in_all, resultsDir + "/dir_in_all.txt");
+ writeDistribution(dir_out_all, resultsDir + "/dir_out_all.txt");
+ writeDistribution(dir_out_dir, resultsDir + "/dir_out_dir.txt");
+ writeDistribution(dir_out_cnt, resultsDir + "/dir_out_cnt.txt");
+ writeDistribution(dir_out_rev, resultsDir + "/dir_out_rev.txt");
+ writeDistribution(rev_in_dir, resultsDir + "/rev_in_dir.txt");
+ writeDistribution(rev_in_rel, resultsDir + "/rev_in_rel.txt");
+ writeDistribution(rev_in_rev, resultsDir + "/rev_in_rev.txt");
+ writeDistribution(rev_in_snp, resultsDir + "/rev_in_snp.txt");
+ writeDistribution(rev_in_all, resultsDir + "/rev_in_all.txt");
+ writeDistribution(rev_out_rev, resultsDir + "/rev_out_rev.txt");
+ writeDistribution(rel_in_snp, resultsDir + "/rel_in_snp.txt");
+ writeDistribution(snp_in_ori, resultsDir + "/snp_in_ori.txt");
+ writeDistribution(snp_out_all, resultsDir + "/snp_out_all.txt");
+ writeDistribution(snp_out_rel, resultsDir + "/snp_out_rel.txt");
+ writeDistribution(snp_out_rev, resultsDir + "/snp_out_rev.txt");
+ writeDistribution(ori_out_snp, resultsDir + "/ori_out_snp.txt");
+ }
+
+ static public void main(final String[] arg)
+ throws IllegalArgumentException, SecurityException, IllegalAccessException, InvocationTargetException,
+ NoSuchMethodException, JSAPException, IOException, ClassNotFoundException {
+ final SimpleJSAP jsap = new SimpleJSAP(InOutDegree.class.getName(),
+ "Computes in and out degrees of the given SWHGraph",
+ new Parameter[]{
+ new UnflaggedOption("basename", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED,
+ JSAP.NOT_GREEDY, "The basename of the graph."),
+ new UnflaggedOption("resultsDir", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED,
+ JSAP.NOT_GREEDY, "The directory of the resulting files."),});
+
+ final JSAPResult jsapResult = jsap.parse(arg);
+ if (jsap.messagePrinted())
+ System.exit(1);
+
+ final String basename = jsapResult.getString("basename");
+ final String resultsDir = jsapResult.userSpecified("resultsDir")
+ ? jsapResult.getString("resultsDir")
+ : basename;
+
+ final ProgressLogger pl = new ProgressLogger();
+
+ Graph graph = new Graph(basename);
+ run(graph, resultsDir);
+ }
}
diff --git a/java/src/main/java/org/softwareheritage/graph/experiments/topology/SubdatasetSizeFunction.java b/java/src/main/java/org/softwareheritage/graph/experiments/topology/SubdatasetSizeFunction.java
index b84cabe..d1be386 100644
--- a/java/src/main/java/org/softwareheritage/graph/experiments/topology/SubdatasetSizeFunction.java
+++ b/java/src/main/java/org/softwareheritage/graph/experiments/topology/SubdatasetSizeFunction.java
@@ -1,88 +1,90 @@
package org.softwareheritage.graph.experiments.topology;
import com.google.common.primitives.Longs;
import com.martiansoftware.jsap.*;
import it.unimi.dsi.Util;
import it.unimi.dsi.big.webgraph.LazyLongIterator;
import it.unimi.dsi.bits.LongArrayBitVector;
import it.unimi.dsi.fastutil.Arrays;
import it.unimi.dsi.fastutil.BigArrays;
import it.unimi.dsi.fastutil.longs.LongBigArrays;
import it.unimi.dsi.io.ByteDiskQueue;
import it.unimi.dsi.logging.ProgressLogger;
import it.unimi.dsi.util.XoRoShiRo128PlusRandom;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.Node;
import org.softwareheritage.graph.experiments.forks.ForkCC;
import java.io.*;
public class SubdatasetSizeFunction {
- private SubdatasetSizeFunction() {}
-
- public static void run(final Graph graph) throws IOException {
- final ProgressLogger pl = new ProgressLogger();
- pl.itemsName = "nodes";
- pl.expectedUpdates = graph.numNodes();
-
- long n = graph.numNodes();
- LongArrayBitVector visited = LongArrayBitVector.ofLength(n);
-
- int bufferSize = (int) Math.min(Arrays.MAX_ARRAY_SIZE & ~0x7, 8L * n);
- final File queueFile = File.createTempFile(ForkCC.class.getSimpleName(), "queue");
- final ByteDiskQueue queue = ByteDiskQueue.createNew(queueFile, bufferSize, true);
- final byte[] byteBuf = new byte[Long.BYTES];
-
- long[][] randomPerm = Util.identity(graph.numNodes());
- LongBigArrays.shuffle(randomPerm, new XoRoShiRo128PlusRandom());
-
- long visitedSize = 0;
- pl.start("Running traversal starting from origins...");
- for (long j = 0; j < n; ++j) {
- long i = BigArrays.get(randomPerm, j);
- if (visited.getBoolean(i) || graph.getNodeType(i) != Node.Type.ORI) {
- continue;
- }
- queue.enqueue(Longs.toByteArray(i));
- visited.set(i);
-
- while (!queue.isEmpty()) {
- queue.dequeue(byteBuf);
- final long currentNode = Longs.fromByteArray(byteBuf);
-
- visitedSize++;
-
- final LazyLongIterator iterator = graph.successors(currentNode);
- long succ;
- while ((succ = iterator.nextLong()) != -1) {
- if (visited.getBoolean(succ))
- continue;
- visited.set(succ);
- queue.enqueue(Longs.toByteArray(succ));
- }
-
- pl.update();
- }
-
- System.out.println(visitedSize);
- }
- pl.done();
- }
-
- static public void main(final String[] arg) throws IllegalArgumentException, SecurityException, JSAPException, IOException {
- final SimpleJSAP jsap = new SimpleJSAP(SubdatasetSizeFunction.class.getName(), "Computes in and out degrees of the given SWHGraph",
- new Parameter[] {
- new UnflaggedOption("basename", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The basename of the graph."),
- }
- );
-
- final JSAPResult jsapResult = jsap.parse(arg);
- if (jsap.messagePrinted()) System.exit(1);
-
- final String basename = jsapResult.getString("basename");
-
- Graph graph = new Graph(basename);
- run(graph);
- }
+ private SubdatasetSizeFunction() {
+ }
+
+ public static void run(final Graph graph) throws IOException {
+ final ProgressLogger pl = new ProgressLogger();
+ pl.itemsName = "nodes";
+ pl.expectedUpdates = graph.numNodes();
+
+ long n = graph.numNodes();
+ LongArrayBitVector visited = LongArrayBitVector.ofLength(n);
+
+ int bufferSize = (int) Math.min(Arrays.MAX_ARRAY_SIZE & ~0x7, 8L * n);
+ final File queueFile = File.createTempFile(ForkCC.class.getSimpleName(), "queue");
+ final ByteDiskQueue queue = ByteDiskQueue.createNew(queueFile, bufferSize, true);
+ final byte[] byteBuf = new byte[Long.BYTES];
+
+ long[][] randomPerm = Util.identity(graph.numNodes());
+ LongBigArrays.shuffle(randomPerm, new XoRoShiRo128PlusRandom());
+
+ long visitedSize = 0;
+ pl.start("Running traversal starting from origins...");
+ for (long j = 0; j < n; ++j) {
+ long i = BigArrays.get(randomPerm, j);
+ if (visited.getBoolean(i) || graph.getNodeType(i) != Node.Type.ORI) {
+ continue;
+ }
+ queue.enqueue(Longs.toByteArray(i));
+ visited.set(i);
+
+ while (!queue.isEmpty()) {
+ queue.dequeue(byteBuf);
+ final long currentNode = Longs.fromByteArray(byteBuf);
+
+ visitedSize++;
+
+ final LazyLongIterator iterator = graph.successors(currentNode);
+ long succ;
+ while ((succ = iterator.nextLong()) != -1) {
+ if (visited.getBoolean(succ))
+ continue;
+ visited.set(succ);
+ queue.enqueue(Longs.toByteArray(succ));
+ }
+
+ pl.update();
+ }
+
+ System.out.println(visitedSize);
+ }
+ pl.done();
+ }
+
+ static public void main(final String[] arg)
+ throws IllegalArgumentException, SecurityException, JSAPException, IOException {
+ final SimpleJSAP jsap = new SimpleJSAP(SubdatasetSizeFunction.class.getName(),
+ "Computes in and out degrees of the given SWHGraph",
+ new Parameter[]{new UnflaggedOption("basename", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED,
+ JSAP.NOT_GREEDY, "The basename of the graph."),});
+
+ final JSAPResult jsapResult = jsap.parse(arg);
+ if (jsap.messagePrinted())
+ System.exit(1);
+
+ final String basename = jsapResult.getString("basename");
+
+ Graph graph = new Graph(basename);
+ run(graph);
+ }
}
diff --git a/java/src/main/java/org/softwareheritage/graph/maps/LabelMapBuilder.java b/java/src/main/java/org/softwareheritage/graph/maps/LabelMapBuilder.java
index 3076517..0d87a61 100644
--- a/java/src/main/java/org/softwareheritage/graph/maps/LabelMapBuilder.java
+++ b/java/src/main/java/org/softwareheritage/graph/maps/LabelMapBuilder.java
@@ -1,235 +1,215 @@
package org.softwareheritage.graph.maps;
import com.martiansoftware.jsap.*;
import it.unimi.dsi.big.webgraph.LazyLongIterator;
import it.unimi.dsi.big.webgraph.labelling.ArcLabelledImmutableGraph;
import it.unimi.dsi.big.webgraph.labelling.BitStreamArcLabelledImmutableGraph;
-import it.unimi.dsi.big.webgraph.labelling.FixedWidthIntLabel;
import it.unimi.dsi.big.webgraph.labelling.FixedWidthIntListLabel;
import it.unimi.dsi.fastutil.BigArrays;
import it.unimi.dsi.fastutil.Size64;
import it.unimi.dsi.fastutil.io.BinIO;
import it.unimi.dsi.fastutil.longs.LongBigArrays;
import it.unimi.dsi.fastutil.objects.Object2LongFunction;
import it.unimi.dsi.io.FastBufferedReader;
import it.unimi.dsi.io.LineIterator;
import it.unimi.dsi.io.OutputBitStream;
import it.unimi.dsi.logging.ProgressLogger;
import it.unimi.dsi.big.webgraph.BVGraph;
import it.unimi.dsi.big.webgraph.ImmutableGraph;
import it.unimi.dsi.big.webgraph.NodeIterator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.util.*;
import java.util.concurrent.TimeUnit;
public class LabelMapBuilder {
final static String SORT_BUFFER_SIZE = "40%";
final static Logger logger = LoggerFactory.getLogger(LabelMapBuilder.class);
private static JSAPResult parse_args(String[] args) {
JSAPResult config = null;
try {
- SimpleJSAP jsap = new SimpleJSAP(
- LabelMapBuilder.class.getName(),
- "",
- new Parameter[] {
- new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED,
- 'g', "graph", "Basename of the compressed graph"),
- new FlaggedOption("debugPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED,
- 'd', "debug-path",
- "Store the intermediate representation here for debug"),
-
- new FlaggedOption("tmpDir", JSAP.STRING_PARSER, "tmp", JSAP.NOT_REQUIRED,
- 't', "tmp", "Temporary directory path"),
- }
- );
+ SimpleJSAP jsap = new SimpleJSAP(LabelMapBuilder.class.getName(), "",
+ new Parameter[]{
+ new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'g',
+ "graph", "Basename of the compressed graph"),
+ new FlaggedOption("debugPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'd',
+ "debug-path", "Store the intermediate representation here for debug"),
+
+ new FlaggedOption("tmpDir", JSAP.STRING_PARSER, "tmp", JSAP.NOT_REQUIRED, 't', "tmp",
+ "Temporary directory path"),});
config = jsap.parse(args);
if (jsap.messagePrinted()) {
System.exit(1);
}
} catch (JSAPException e) {
e.printStackTrace();
}
return config;
}
public static void main(String[] args) throws IOException {
JSAPResult config = parse_args(args);
String graphPath = config.getString("graphPath");
String tmpDir = config.getString("tmpDir");
String debugPath = config.getString("debugPath");
logger.info("Starting label map generation...");
computeLabelMap(graphPath, debugPath, tmpDir);
logger.info("Label map generation ended.");
}
@SuppressWarnings("unchecked") // Suppress warning for Object2LongFunction cast
static Object2LongFunction loadMPH(String mphBasename) throws IOException {
Object2LongFunction mphMap = null;
try {
logger.info("loading MPH function...");
mphMap = (Object2LongFunction) BinIO.loadObject(mphBasename + ".mph");
logger.info("MPH function loaded");
} catch (ClassNotFoundException e) {
logger.error("unknown class object in .mph file: " + e);
System.exit(2);
}
return mphMap;
}
- static long getMPHSize(Object2LongFunction mph)
- {
+ static long getMPHSize(Object2LongFunction mph) {
return (mph instanceof Size64) ? ((Size64) mph).size64() : mph.size();
}
- static long SwhIDToNode(String strSWHID, Object2LongFunction mphMap, long[][] orderMap)
- {
+ static long SwhIDToNode(String strSWHID, Object2LongFunction mphMap, long[][] orderMap) {
long mphId = mphMap.getLong(strSWHID);
return BigArrays.get(orderMap, mphId);
}
- static void computeLabelMap(String graphPath, String debugPath, String tmpDir)
- throws IOException
- {
- // Compute intermediate representation in the format "