diff --git a/java/src/main/java/org/softwareheritage/graph/AllowedEdges.java b/java/src/main/java/org/softwareheritage/graph/AllowedEdges.java
index 1d63774..dadf2b6 100644
--- a/java/src/main/java/org/softwareheritage/graph/AllowedEdges.java
+++ b/java/src/main/java/org/softwareheritage/graph/AllowedEdges.java
@@ -1,91 +1,88 @@
package org.softwareheritage.graph;
import java.util.ArrayList;
-import org.softwareheritage.graph.Graph;
-import org.softwareheritage.graph.Node;
-
/**
* Edge restriction based on node types, used when visiting the graph.
*
* Software Heritage
* graph contains multiple node types (contents, directories, revisions, ...) and restricting
* the traversal to specific node types is necessary for many querying operations: use cases.
*
* @author The Software Heritage developers
*/
public class AllowedEdges {
- /** Graph on which edge restriction is performed */
- Graph graph;
/**
* 2D boolean matrix storing access rights for all combination of src/dst node types (first
* dimension is source, second dimension is destination), when edge restriction is not enforced
* this array is set to null for early bypass.
*/
public boolean[][] restrictedTo;
+ /** Graph on which edge restriction is performed */
+ Graph graph;
/**
* Constructor.
*
* @param graph the graph on which to perform edge restriction
* @param edgesFmt a formatted string describing allowed edges
*/
public AllowedEdges(Graph graph, String edgesFmt) {
this.graph = graph;
int nbNodeTypes = Node.Type.values().length;
this.restrictedTo = new boolean[nbNodeTypes][nbNodeTypes];
// Special values (null, empty, "*")
if (edgesFmt == null || edgesFmt.isEmpty()) {
return;
}
if (edgesFmt.equals("*")) {
// Allows for quick bypass (with simple null check) when no edge restriction
restrictedTo = null;
return;
}
// Format: "src1:dst1,src2:dst2,[...]"
String[] edgeTypes = edgesFmt.split(",");
for (String edgeType : edgeTypes) {
String[] nodeTypes = edgeType.split(":");
if (nodeTypes.length != 2) {
throw new IllegalArgumentException("Cannot parse edge type: " + edgeType);
}
ArrayList srcTypes = Node.Type.parse(nodeTypes[0]);
ArrayList dstTypes = Node.Type.parse(nodeTypes[1]);
for (Node.Type srcType : srcTypes) {
for (Node.Type dstType : dstTypes) {
restrictedTo[srcType.ordinal()][dstType.ordinal()] = true;
}
}
}
}
/**
* Checks if a given edge can be followed during graph traversal.
*
* @param srcNodeId edge source node
* @param dstNodeId edge destination node
* @return true if allowed and false otherwise
*/
public boolean isAllowed(long srcNodeId, long dstNodeId) {
Node.Type srcType = graph.getNodeType(srcNodeId);
Node.Type dstType = graph.getNodeType(dstNodeId);
return restrictedTo[srcType.ordinal()][dstType.ordinal()];
}
/**
* Works like {@link AllowedEdges#isAllowed(long, long)} but with node types directly instead of
* node ids.
*
* @see AllowedEdges#isAllowed(long, long)
*/
public boolean isAllowed(Node.Type srcType, Node.Type dstType) {
return restrictedTo[srcType.ordinal()][dstType.ordinal()];
}
}
diff --git a/java/src/main/java/org/softwareheritage/graph/Entry.java b/java/src/main/java/org/softwareheritage/graph/Entry.java
index b496687..efd4915 100644
--- a/java/src/main/java/org/softwareheritage/graph/Entry.java
+++ b/java/src/main/java/org/softwareheritage/graph/Entry.java
@@ -1,192 +1,193 @@
package org.softwareheritage.graph;
-import java.util.ArrayList;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.PropertyNamingStrategy;
+
import java.io.DataOutputStream;
import java.io.FileOutputStream;
import java.io.IOException;
-
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.fasterxml.jackson.databind.PropertyNamingStrategy;
+import java.util.ArrayList;
public class Entry {
- private Graph graph;
-
private final long PATH_SEPARATOR_ID = -1;
+ private Graph graph;
public void load_graph(String graphBasename) throws IOException {
System.err.println("Loading graph " + graphBasename + " ...");
this.graph = new Graph(graphBasename);
System.err.println("Graph loaded.");
}
public Graph get_graph() {
return graph.copy();
}
public String stats() {
try {
Stats stats = new Stats(graph.getPath());
ObjectMapper objectMapper = new ObjectMapper();
objectMapper.setPropertyNamingStrategy(PropertyNamingStrategy.SNAKE_CASE);
return objectMapper.writeValueAsString(stats);
} catch (IOException e) {
throw new RuntimeException("Cannot read stats: " + e);
}
}
- private interface NodeCountVisitor {
- void accept(long nodeId, Traversal.NodeIdConsumer consumer);
- }
-
private int count_visitor(NodeCountVisitor f, long srcNodeId) {
- int[] count = { 0 };
- f.accept(srcNodeId, (node) -> { count[0]++; });
+ int[] count = {0};
+ f.accept(srcNodeId, (node) -> {
+ count[0]++;
+ });
return count[0];
}
public int count_leaves(String direction, String edgesFmt, long srcNodeId) {
Traversal t = new Traversal(this.graph.copy(), direction, edgesFmt);
return count_visitor(t::leavesVisitor, srcNodeId);
}
public int count_neighbors(String direction, String edgesFmt, long srcNodeId) {
Traversal t = new Traversal(this.graph.copy(), direction, edgesFmt);
return count_visitor(t::neighborsVisitor, srcNodeId);
}
public int count_visit_nodes(String direction, String edgesFmt, long srcNodeId) {
Traversal t = new Traversal(this.graph.copy(), direction, edgesFmt);
return count_visitor(t::visitNodesVisitor, srcNodeId);
}
public QueryHandler get_handler(String clientFIFO) {
return new QueryHandler(this.graph.copy(), clientFIFO);
}
+ private interface NodeCountVisitor {
+ void accept(long nodeId, Traversal.NodeIdConsumer consumer);
+ }
+
public class QueryHandler {
Graph graph;
DataOutputStream out;
String clientFIFO;
public QueryHandler(Graph graph, String clientFIFO) {
this.graph = graph;
this.clientFIFO = clientFIFO;
this.out = null;
}
public void writeNode(long nodeId) {
try {
out.writeLong(nodeId);
} catch (IOException e) {
throw new RuntimeException("Cannot write response to client: " + e);
}
}
public void writeEdge(long srcId, long dstId) {
writeNode(srcId);
writeNode(dstId);
}
public void writePath(ArrayList path) {
for (Long nodeId : path) {
writeNode(nodeId);
}
writeNode(PATH_SEPARATOR_ID);
}
public void open() {
try {
FileOutputStream file = new FileOutputStream(this.clientFIFO);
this.out = new DataOutputStream(file);
} catch (IOException e) {
throw new RuntimeException("Cannot open client FIFO: " + e);
}
}
public void close() {
try {
out.close();
} catch (IOException e) {
throw new RuntimeException("Cannot write response to client: " + e);
}
}
public void leaves(String direction, String edgesFmt, long srcNodeId) {
open();
Traversal t = new Traversal(this.graph, direction, edgesFmt);
t.leavesVisitor(srcNodeId, this::writeNode);
close();
}
public void neighbors(String direction, String edgesFmt, long srcNodeId) {
open();
Traversal t = new Traversal(this.graph, direction, edgesFmt);
t.neighborsVisitor(srcNodeId, this::writeNode);
close();
}
public void visit_nodes(String direction, String edgesFmt, long srcNodeId) {
open();
Traversal t = new Traversal(this.graph, direction, edgesFmt);
t.visitNodesVisitor(srcNodeId, this::writeNode);
close();
}
public void visit_edges(String direction, String edgesFmt, long srcNodeId) {
open();
Traversal t = new Traversal(this.graph, direction, edgesFmt);
t.visitNodesVisitor(srcNodeId, null, this::writeEdge);
close();
}
public void visit_paths(String direction, String edgesFmt,
long srcNodeId) {
open();
Traversal t = new Traversal(this.graph, direction, edgesFmt);
t.visitPathsVisitor(srcNodeId, this::writePath);
close();
}
public void walk(String direction, String edgesFmt, String algorithm,
long srcNodeId, long dstNodeId) {
open();
Traversal t = new Traversal(this.graph, direction, edgesFmt);
for (Long nodeId : t.walk(srcNodeId, dstNodeId, algorithm)) {
writeNode(nodeId);
}
close();
}
public void walk_type(String direction, String edgesFmt, String algorithm,
long srcNodeId, String dst) {
open();
Node.Type dstType = Node.Type.fromStr(dst);
Traversal t = new Traversal(this.graph, direction, edgesFmt);
for (Long nodeId : t.walk(srcNodeId, dstType, algorithm)) {
writeNode(nodeId);
}
close();
}
public void random_walk(String direction, String edgesFmt, int retries,
long srcNodeId, long dstNodeId) {
open();
Traversal t = new Traversal(this.graph, direction, edgesFmt);
for (Long nodeId : t.randomWalk(srcNodeId, dstNodeId, retries)) {
writeNode(nodeId);
}
close();
}
public void random_walk_type(String direction, String edgesFmt, int retries,
long srcNodeId, String dst) {
open();
Node.Type dstType = Node.Type.fromStr(dst);
Traversal t = new Traversal(this.graph, direction, edgesFmt);
for (Long nodeId : t.randomWalk(srcNodeId, dstType, retries)) {
writeNode(nodeId);
}
close();
}
}
}
diff --git a/java/src/main/java/org/softwareheritage/graph/Graph.java b/java/src/main/java/org/softwareheritage/graph/Graph.java
index 2fce7cb..8103229 100644
--- a/java/src/main/java/org/softwareheritage/graph/Graph.java
+++ b/java/src/main/java/org/softwareheritage/graph/Graph.java
@@ -1,253 +1,256 @@
package org.softwareheritage.graph;
-import java.io.IOException;
-
-import it.unimi.dsi.big.webgraph.*;
-
+import it.unimi.dsi.big.webgraph.BVGraph;
+import it.unimi.dsi.big.webgraph.ImmutableGraph;
+import it.unimi.dsi.big.webgraph.LazyLongIterator;
+import it.unimi.dsi.big.webgraph.Transform;
import org.softwareheritage.graph.maps.NodeIdMap;
import org.softwareheritage.graph.maps.NodeTypesMap;
+import java.io.IOException;
+
/**
* Main class storing the compressed graph and node id mappings.
*
* The compressed graph is stored using the WebGraph
* ecosystem. Additional mappings are necessary because Software Heritage uses string based persistent
* identifiers (PID) while WebGraph uses integers internally. These two mappings (long id ↔
* PID) are used for the input (users refer to the graph using PID) and the output (convert back to
* PID for users results). However, since graph traversal can be restricted depending on the node
* type (see {@link AllowedEdges}), a long id → node type map is stored as well to avoid a full
* PID lookup.
*
* @author The Software Heritage developers
* @see org.softwareheritage.graph.AllowedEdges
* @see org.softwareheritage.graph.maps.NodeIdMap
* @see org.softwareheritage.graph.maps.NodeTypesMap
*/
public class Graph extends ImmutableGraph {
/** File extension for the SWH PID to long node id map */
public static final String PID_TO_NODE = ".pid2node.bin";
/** File extension for the long node id to SWH PID map */
public static final String NODE_TO_PID = ".node2pid.bin";
/** File extension for the long node id to node type map */
public static final String NODE_TO_TYPE = ".node2type.map";
/** Compressed graph stored as a {@link it.unimi.dsi.big.webgraph.BVGraph} */
ImmutableGraph graph;
/** Transposed compressed graph (used for backward traversals) */
ImmutableGraph graphTransposed;
/** Path and basename of the compressed graph */
String path;
/** Mapping long id ↔ SWH PIDs */
NodeIdMap nodeIdMap;
/** Mapping long id → node types */
NodeTypesMap nodeTypesMap;
/**
* Constructor.
*
* @param path path and basename of the compressed graph to load
*/
public Graph(String path) throws IOException {
this.path = path;
this.graph = BVGraph.loadMapped(path);
this.graphTransposed = BVGraph.loadMapped(path + "-transposed");
this.nodeTypesMap = new NodeTypesMap(path);
this.nodeIdMap = new NodeIdMap(path, numNodes());
}
protected Graph(ImmutableGraph graph, ImmutableGraph graphTransposed,
String path, NodeIdMap nodeIdMap, NodeTypesMap nodeTypesMap) {
this.graph = graph;
this.graphTransposed = graphTransposed;
this.path = path;
this.nodeIdMap = nodeIdMap;
this.nodeTypesMap = nodeTypesMap;
}
/**
* Return a flyweight copy of the graph.
*/
@Override
public Graph copy() {
return new Graph(this.graph.copy(), this.graphTransposed.copy(), this.path, this.nodeIdMap, this.nodeTypesMap);
}
@Override
public boolean randomAccess() {
return graph.randomAccess() && graphTransposed.randomAccess();
}
/**
* Return a transposed version of the graph.
*/
public Graph transpose() {
return new Graph(this.graphTransposed, this.graph, this.path, this.nodeIdMap, this.nodeTypesMap);
}
/**
* Return a symmetric version of the graph.
*/
public Graph symmetrize() {
ImmutableGraph symmetric = Transform.union(graph, graphTransposed);
return new Graph(symmetric, symmetric, this.path, this.nodeIdMap, this.nodeTypesMap);
}
/**
* Cleans up graph resources after use.
*/
public void cleanUp() throws IOException {
nodeIdMap.close();
}
+
/**
* Returns number of nodes in the graph.
*
* @return number of nodes in the graph
*/
@Override
public long numNodes() {
return graph.numNodes();
}
/**
* Returns number of edges in the graph.
*
* @return number of edges in the graph
*/
@Override
public long numArcs() {
return graph.numArcs();
}
/**
* Returns lazy iterator of successors of a node.
*
* @param nodeId node specified as a long id
* @return lazy iterator of successors of the node, specified as a WebGraph LazyLongIterator
*/
@Override
public LazyLongIterator successors(long nodeId) {
return graph.successors(nodeId);
}
/**
* Returns lazy iterator of successors of a node while following a specific set of edge types.
*
* @param nodeId node specified as a long id
* @param allowedEdges the specification of which edges can be traversed
* @return lazy iterator of successors of the node, specified as a WebGraph LazyLongIterator
*/
public LazyLongIterator successors(long nodeId, AllowedEdges allowedEdges) {
if (allowedEdges.restrictedTo == null) {
// All edges are allowed, bypass edge check
return this.successors(nodeId);
} else {
LazyLongIterator allSuccessors = this.successors(nodeId);
return new LazyLongIterator() {
@Override
public long nextLong() {
long neighbor;
while ((neighbor = allSuccessors.nextLong()) != -1) {
if (allowedEdges.isAllowed(nodeId, neighbor)) {
return neighbor;
}
}
return -1;
}
@Override
public long skip(final long n) {
long i;
- for (i = 0; i < n && nextLong() != -1; i++);
+ for (i = 0; i < n && nextLong() != -1; i++) ;
return i;
}
};
}
}
/**
* Returns the outdegree of a node.
*
* @param nodeId node specified as a long id
* @return outdegree of a node
*/
@Override
public long outdegree(long nodeId) {
return graph.outdegree(nodeId);
}
/**
* Returns lazy iterator of predecessors of a node.
*
* @param nodeId node specified as a long id
* @return lazy iterator of predecessors of the node, specified as a WebGraph LazyLongIterator
*/
public LazyLongIterator predecessors(long nodeId) {
return this.transpose().successors(nodeId);
}
/**
* Returns the indegree of a node.
*
* @param nodeId node specified as a long id
* @return indegree of a node
*/
public long indegree(long nodeId) {
return this.transpose().outdegree(nodeId);
}
/**
* Returns the underlying BVGraph.
*
* @return WebGraph BVGraph
*/
public ImmutableGraph getGraph() {
return this.graph;
}
/**
* Returns the graph full path.
*
* @return graph full path
*/
public String getPath() {
return path;
}
/**
* Converts {@link SwhPID} node to long.
*
* @param swhPID node specified as a {@link SwhPID}
* @return internal long node id
* @see org.softwareheritage.graph.SwhPID
*/
public long getNodeId(SwhPID swhPID) {
return nodeIdMap.getNodeId(swhPID);
}
/**
* Converts long id node to {@link SwhPID}.
*
* @param nodeId node specified as a long id
* @return external SWH PID
* @see org.softwareheritage.graph.SwhPID
*/
public SwhPID getSwhPID(long nodeId) {
return nodeIdMap.getSwhPID(nodeId);
}
/**
* Returns node type.
*
* @param nodeId node specified as a long id
* @return corresponding node type
* @see org.softwareheritage.graph.Node.Type
*/
public Node.Type getNodeType(long nodeId) {
return nodeTypesMap.getType(nodeId);
}
}
diff --git a/java/src/main/java/org/softwareheritage/graph/Node.java b/java/src/main/java/org/softwareheritage/graph/Node.java
index 0f18ee0..6f10d4f 100644
--- a/java/src/main/java/org/softwareheritage/graph/Node.java
+++ b/java/src/main/java/org/softwareheritage/graph/Node.java
@@ -1,105 +1,117 @@
package org.softwareheritage.graph;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
/**
* A node in the Software Heritage graph.
*
* @author The Software Heritage developers
*/
public class Node {
/**
* Software Heritage graph node types, as described in the
* data model.
*/
public enum Type {
/** Content node */
CNT,
/** Directory node */
DIR,
/** Origin node */
ORI,
/** Release node */
REL,
/** Revision node */
REV,
/** Snapshot node */
SNP;
/**
* Converts integer to corresponding SWH node type.
*
* @param intType node type represented as an integer
* @return the corresponding {@link Node.Type} value
* @see org.softwareheritage.graph.Node.Type
*/
public static Node.Type fromInt(int intType) {
switch (intType) {
- case 0: return CNT;
- case 1: return DIR;
- case 2: return ORI;
- case 3: return REL;
- case 4: return REV;
- case 5: return SNP;
+ case 0:
+ return CNT;
+ case 1:
+ return DIR;
+ case 2:
+ return ORI;
+ case 3:
+ return REL;
+ case 4:
+ return REV;
+ case 5:
+ return SNP;
}
return null;
}
/**
* Converts node types to the corresponding int value
*
* @param type node type as an enum
* @return the corresponding int value
*/
public static int toInt(Node.Type type) {
- switch (type) {
- case CNT: return 0;
- case DIR: return 1;
- case ORI: return 2;
- case REL: return 3;
- case REV: return 4;
- case SNP: return 5;
+ switch (type) {
+ case CNT:
+ return 0;
+ case DIR:
+ return 1;
+ case ORI:
+ return 2;
+ case REL:
+ return 3;
+ case REV:
+ return 4;
+ case SNP:
+ return 5;
}
throw new IllegalArgumentException("Unknown node type: " + type);
}
/**
* Converts string to corresponding SWH node type.
*
* @param strType node type represented as a string
* @return the corresponding {@link Node.Type} value
* @see org.softwareheritage.graph.Node.Type
*/
public static Node.Type fromStr(String strType) {
if (!strType.matches("cnt|dir|ori|rel|rev|snp")) {
throw new IllegalArgumentException("Unknown node type: " + strType);
}
return Node.Type.valueOf(strType.toUpperCase());
}
/**
* Parses SWH node type possible values from formatted string (see the API
* syntax).
*
* @param strFmtType node types represented as a formatted string
* @return a list containing the {@link Node.Type} values
* @see org.softwareheritage.graph.Node.Type
*/
public static ArrayList parse(String strFmtType) {
ArrayList types = new ArrayList<>();
if (strFmtType.equals("*")) {
List nodeTypes = Arrays.asList(Node.Type.values());
types.addAll(nodeTypes);
} else {
types.add(Node.Type.fromStr(strFmtType));
}
return types;
}
}
}
diff --git a/java/src/main/java/org/softwareheritage/graph/Stats.java b/java/src/main/java/org/softwareheritage/graph/Stats.java
index b7200d6..92d4fd8 100644
--- a/java/src/main/java/org/softwareheritage/graph/Stats.java
+++ b/java/src/main/java/org/softwareheritage/graph/Stats.java
@@ -1,68 +1,67 @@
package org.softwareheritage.graph;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.Properties;
/**
* Statistics on the compressed graph.
*
* These statistics are not computed but directly read from WebGraph generated .stats and .properties files.
*
* @author The Software Heritage developers
*/
public class Stats {
- public static class Counts {
- public long nodes;
- public long edges;
- }
-
- public static class Ratios {
- public double compression;
- public double bitsPerNode;
- public double bitsPerEdge;
- public double avgLocality;
- }
-
- public static class Degree {
- public long min;
- public long max;
- public double avg;
- }
-
public Counts counts;
public Ratios ratios;
public Degree indegree;
public Degree outdegree;
-
/**
* Constructor.
*
* @param graphPath path and basename of compressed graph
*/
public Stats(String graphPath) throws IOException {
Properties properties = new Properties();
properties.load(new FileInputStream(graphPath + ".properties"));
properties.load(new FileInputStream(graphPath + ".stats"));
this.counts = new Counts();
this.ratios = new Ratios();
this.indegree = new Degree();
this.outdegree = new Degree();
this.counts.nodes = Long.parseLong(properties.getProperty("nodes"));
this.counts.edges = Long.parseLong(properties.getProperty("arcs"));
this.ratios.compression = Double.parseDouble(properties.getProperty("compratio"));
this.ratios.bitsPerNode = Double.parseDouble(properties.getProperty("bitspernode"));
this.ratios.bitsPerEdge = Double.parseDouble(properties.getProperty("bitsperlink"));
this.ratios.avgLocality = Double.parseDouble(properties.getProperty("avglocality"));
this.indegree.min = Long.parseLong(properties.getProperty("minindegree"));
this.indegree.max = Long.parseLong(properties.getProperty("maxindegree"));
this.indegree.avg = Double.parseDouble(properties.getProperty("avgindegree"));
this.outdegree.min = Long.parseLong(properties.getProperty("minoutdegree"));
this.outdegree.max = Long.parseLong(properties.getProperty("maxoutdegree"));
this.outdegree.avg = Double.parseDouble(properties.getProperty("avgoutdegree"));
}
+
+ public static class Counts {
+ public long nodes;
+ public long edges;
+ }
+
+ public static class Ratios {
+ public double compression;
+ public double bitsPerNode;
+ public double bitsPerEdge;
+ public double avgLocality;
+ }
+
+ public static class Degree {
+ public long min;
+ public long max;
+ public double avg;
+ }
}
diff --git a/java/src/main/java/org/softwareheritage/graph/SwhPID.java b/java/src/main/java/org/softwareheritage/graph/SwhPID.java
index c355834..c44abdc 100644
--- a/java/src/main/java/org/softwareheritage/graph/SwhPID.java
+++ b/java/src/main/java/org/softwareheritage/graph/SwhPID.java
@@ -1,126 +1,124 @@
package org.softwareheritage.graph;
-import java.lang.System;
-
import com.fasterxml.jackson.annotation.JsonValue;
-import org.apache.commons.codec.binary.Hex;
import org.apache.commons.codec.DecoderException;
-
-import org.softwareheritage.graph.Node;
+import org.apache.commons.codec.binary.Hex;
/**
* A Software Heritage PID, see persistent
* identifier documentation.
*
* @author The Software Heritage developers
*/
public class SwhPID {
/** Fixed hash length of the PID */
public static final int HASH_LENGTH = 40;
/** Full PID as a string */
String swhPID;
/** PID node type */
Node.Type type;
/**
* Constructor.
*
* @param swhPID full PID as a string
*/
public SwhPID(String swhPID) {
this.swhPID = swhPID;
// PID format: 'swh:1:type:hash'
String[] parts = swhPID.split(":");
if (parts.length != 4 || !parts[0].equals("swh") || !parts[1].equals("1")) {
throw new IllegalArgumentException("malformed SWH PID: " + swhPID);
}
this.type = Node.Type.fromStr(parts[2]);
if (!parts[3].matches("[0-9a-f]{" + HASH_LENGTH + "}")) {
throw new IllegalArgumentException("malformed SWH PID: " + swhPID);
}
}
+ /**
+ * Creates a SwhPID from a compact binary representation.
+ *
+ * The binary format is specified in the Python module
+ * swh.graph.pid:str_to_bytes .
+ */
+ public static SwhPID fromBytes(byte[] input) {
+ byte[] digest = new byte[20];
+ System.arraycopy(input, 2, digest, 0, digest.length);
+
+ String pidStr = String.format(
+ "swh:%d:%s:%s",
+ input[0],
+ Node.Type.fromInt(input[1]).toString().toLowerCase(),
+ Hex.encodeHexString(digest)
+ );
+ return new SwhPID(pidStr);
+ }
+
@Override
public boolean equals(Object otherObj) {
if (otherObj == this)
return true;
if (!(otherObj instanceof SwhPID))
return false;
SwhPID other = (SwhPID) otherObj;
return swhPID.equals(other.getSwhPID());
}
@Override
public int hashCode() {
return swhPID.hashCode();
}
@Override
public String toString() {
return swhPID;
}
- /** Converts PID to a compact binary representation.
- *
+ /**
+ * Converts PID to a compact binary representation.
+ *
* The binary format is specified in the Python module
* swh.graph.pid:str_to_bytes .
*/
public byte[] toBytes() {
byte[] bytes = new byte[22];
byte[] digest;
bytes[0] = (byte) 1; // namespace version
bytes[1] = (byte) Node.Type.toInt(this.type); // PID type
try {
digest = Hex.decodeHex(this.swhPID.substring(10)); // SHA1 hash
System.arraycopy(digest, 0, bytes, 2, digest.length);
} catch (DecoderException e) {
throw new IllegalArgumentException("invalid hex sequence in PID: " + this.swhPID);
}
return bytes;
}
- /** Creates a SwhPID from a compact binary representation.
- *
- * The binary format is specified in the Python module
- * swh.graph.pid:str_to_bytes .
- */
- public static SwhPID fromBytes(byte[] input) {
- byte[] digest = new byte[20];
- System.arraycopy(input, 2, digest, 0, digest.length);
-
- String pidStr = String.format(
- "swh:%d:%s:%s",
- input[0],
- Node.Type.fromInt(input[1]).toString().toLowerCase(),
- Hex.encodeHexString(digest)
- );
- return new SwhPID(pidStr);
- }
-
/**
* Returns full PID as a string.
*
* @return full PID string
*/
@JsonValue
public String getSwhPID() {
return swhPID;
}
/**
* Returns PID node type.
*
* @return PID corresponding {@link Node.Type}
* @see org.softwareheritage.graph.Node.Type
*/
public Node.Type getType() {
return type;
}
}
diff --git a/java/src/main/java/org/softwareheritage/graph/Traversal.java b/java/src/main/java/org/softwareheritage/graph/Traversal.java
index 421c02e..5c4902f 100644
--- a/java/src/main/java/org/softwareheritage/graph/Traversal.java
+++ b/java/src/main/java/org/softwareheritage/graph/Traversal.java
@@ -1,523 +1,526 @@
package org.softwareheritage.graph;
+import it.unimi.dsi.big.webgraph.LazyLongIterator;
+import org.softwareheritage.graph.server.Endpoint;
+
import java.util.*;
import java.util.function.Consumer;
import java.util.function.LongConsumer;
-import it.unimi.dsi.big.webgraph.LazyLongIterator;
-import org.softwareheritage.graph.server.Endpoint;
-
/**
* Traversal algorithms on the compressed graph.
*
* Internal implementation of the traversal API endpoints. These methods only input/output internal
* long ids, which are converted in the {@link Endpoint} higher-level class to Software Heritage
* PID.
*
* @author The Software Heritage developers
* @see Endpoint
*/
public class Traversal {
/** Graph used in the traversal */
Graph graph;
/** Graph edge restriction */
AllowedEdges edges;
/** Hash set storing if we have visited a node */
HashSet visited;
/** Hash map storing parent node id for each nodes during a traversal */
Map parentNode;
/** Number of edges accessed during traversal */
long nbEdgesAccessed;
/** random number generator, for random walks */
Random rng;
/**
* Constructor.
*
* @param graph graph used in the traversal
* @param direction a string (either "forward" or "backward") specifying edge orientation
* @param edgesFmt a formatted string describing allowed edges
*/
public Traversal(Graph graph, String direction, String edgesFmt) {
if (!direction.matches("forward|backward")) {
throw new IllegalArgumentException("Unknown traversal direction: " + direction);
}
if (direction.equals("backward")) {
this.graph = graph.transpose();
} else {
this.graph = graph;
}
this.edges = new AllowedEdges(graph, edgesFmt);
this.visited = new HashSet<>();
this.parentNode = new HashMap<>();
this.nbEdgesAccessed = 0;
this.rng = new Random();
}
/**
* Returns number of accessed edges during traversal.
*
* @return number of edges accessed in last traversal
*/
public long getNbEdgesAccessed() {
return nbEdgesAccessed;
}
/**
* Returns number of accessed nodes during traversal.
*
* @return number of nodes accessed in last traversal
*/
public long getNbNodesAccessed() {
return this.visited.size();
}
/**
* Push version of {@link #leaves} will fire passed callback for each leaf.
*/
public void leavesVisitor(long srcNodeId, NodeIdConsumer cb) {
Stack stack = new Stack<>();
this.nbEdgesAccessed = 0;
stack.push(srcNodeId);
visited.add(srcNodeId);
while (!stack.isEmpty()) {
long currentNodeId = stack.pop();
long neighborsCnt = 0;
nbEdgesAccessed += graph.outdegree(currentNodeId);
LazyLongIterator it = graph.successors(currentNodeId, edges);
- for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) {
+ for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1; ) {
neighborsCnt++;
if (!visited.contains(neighborNodeId)) {
stack.push(neighborNodeId);
visited.add(neighborNodeId);
}
}
if (neighborsCnt == 0) {
cb.accept(currentNodeId);
}
}
}
/**
* Returns the leaves of a subgraph rooted at the specified source node.
*
* @param srcNodeId source node
* @return list of node ids corresponding to the leaves
*/
public ArrayList leaves(long srcNodeId) {
ArrayList nodeIds = new ArrayList<>();
leavesVisitor(srcNodeId, nodeIds::add);
return nodeIds;
}
/**
* Push version of {@link #neighbors}: will fire passed callback on each
* neighbor.
*/
public void neighborsVisitor(long srcNodeId, NodeIdConsumer cb) {
this.nbEdgesAccessed = graph.outdegree(srcNodeId);
LazyLongIterator it = graph.successors(srcNodeId, edges);
- for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) {
+ for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1; ) {
cb.accept(neighborNodeId);
}
}
/**
* Returns node direct neighbors (linked with exactly one edge).
*
* @param srcNodeId source node
* @return list of node ids corresponding to the neighbors
*/
public ArrayList neighbors(long srcNodeId) {
ArrayList nodeIds = new ArrayList<>();
neighborsVisitor(srcNodeId, nodeIds::add);
return nodeIds;
}
/**
* Push version of {@link #visitNodes}: will fire passed callback on each
* visited node.
*/
public void visitNodesVisitor(long srcNodeId, NodeIdConsumer nodeCb, EdgeIdConsumer edgeCb) {
Stack stack = new Stack<>();
this.nbEdgesAccessed = 0;
stack.push(srcNodeId);
visited.add(srcNodeId);
while (!stack.isEmpty()) {
long currentNodeId = stack.pop();
if (nodeCb != null) {
nodeCb.accept(currentNodeId);
}
nbEdgesAccessed += graph.outdegree(currentNodeId);
LazyLongIterator it = graph.successors(currentNodeId, edges);
- for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) {
+ for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1; ) {
if (edgeCb != null) {
edgeCb.accept(currentNodeId, neighborNodeId);
}
if (!visited.contains(neighborNodeId)) {
stack.push(neighborNodeId);
visited.add(neighborNodeId);
}
}
}
}
/** One-argument version to handle callbacks properly */
public void visitNodesVisitor(long srcNodeId, NodeIdConsumer cb) {
visitNodesVisitor(srcNodeId, cb, null);
}
/**
* Performs a graph traversal and returns explored nodes.
*
* @param srcNodeId source node
* @return list of explored node ids
*/
public ArrayList visitNodes(long srcNodeId) {
ArrayList nodeIds = new ArrayList<>();
visitNodesVisitor(srcNodeId, nodeIds::add);
return nodeIds;
}
/**
* Push version of {@link #visitPaths}: will fire passed callback on each
* discovered (complete) path.
*/
public void visitPathsVisitor(long srcNodeId, PathConsumer cb) {
Stack currentPath = new Stack<>();
this.nbEdgesAccessed = 0;
visitPathsInternalVisitor(srcNodeId, currentPath, cb);
}
/**
* Performs a graph traversal and returns explored paths.
*
* @param srcNodeId source node
* @return list of explored paths (represented as a list of node ids)
*/
public ArrayList> visitPaths(long srcNodeId) {
ArrayList> paths = new ArrayList<>();
visitPathsVisitor(srcNodeId, paths::add);
return paths;
}
private void visitPathsInternalVisitor(long currentNodeId,
Stack currentPath,
PathConsumer cb) {
currentPath.push(currentNodeId);
long visitedNeighbors = 0;
nbEdgesAccessed += graph.outdegree(currentNodeId);
LazyLongIterator it = graph.successors(currentNodeId, edges);
- for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) {
+ for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1; ) {
visitPathsInternalVisitor(neighborNodeId, currentPath, cb);
visitedNeighbors++;
}
if (visitedNeighbors == 0) {
ArrayList path = new ArrayList<>(currentPath);
cb.accept(path);
}
currentPath.pop();
}
/**
* Performs a graph traversal with backtracking, and returns the first
* found path from source to destination.
*
* @param srcNodeId source node
* @param dst destination (either a node or a node type)
* @return found path as a list of node ids
*/
public ArrayList walk(long srcNodeId, T dst, String visitOrder) {
long dstNodeId;
if (visitOrder.equals("dfs")) {
dstNodeId = walkInternalDFS(srcNodeId, dst);
} else if (visitOrder.equals("bfs")) {
dstNodeId = walkInternalBFS(srcNodeId, dst);
} else {
throw new IllegalArgumentException("Unknown visit order: " + visitOrder);
}
if (dstNodeId == -1) {
throw new IllegalArgumentException("Cannot find destination: " + dst);
}
return backtracking(srcNodeId, dstNodeId);
}
/**
* Performs a random walk (picking a random successor at each step) from
* source to destination.
*
* @param srcNodeId source node
* @param dst destination (either a node or a node type)
* @return found path as a list of node ids or an empty path to indicate
* that no suitable path have been found
*/
public ArrayList randomWalk(long srcNodeId, T dst) {
return randomWalk(srcNodeId, dst, 0);
}
/**
* Performs a stubborn random walk (picking a random successor at each
* step) from source to destination. The walk is "stubborn" in the sense
* that it will not give up the first time if a satisfying target node is
* found, but it will retry up to a limited amount of times.
*
* @param srcNodeId source node
* @param dst destination (either a node or a node type)
* @param retries number of times to retry; 0 means no retries (single walk)
* @return found path as a list of node ids or an empty path to indicate
* that no suitable path have been found
*/
public ArrayList randomWalk(long srcNodeId, T dst, int retries) {
long curNodeId = srcNodeId;
ArrayList path = new ArrayList<>();
this.nbEdgesAccessed = 0;
boolean found;
if (retries < 0) {
throw new IllegalArgumentException("Negative number of retries given: " + retries);
}
while (true) {
path.add(curNodeId);
LazyLongIterator successors = graph.successors(curNodeId, edges);
curNodeId = randomPick(successors);
if (curNodeId < 0) {
found = false;
break;
}
if (isDstNode(curNodeId, dst)) {
path.add(curNodeId);
found = true;
break;
}
}
if (found) {
return path;
} else if (retries > 0) { // try again
return randomWalk(srcNodeId, dst, retries - 1);
} else { // not found and no retries left
path.clear();
return path;
}
}
/**
* Randomly choose an element from an iterator over Longs using reservoir
* sampling
*
* @param elements iterator over selection domain
* @return randomly chosen element or -1 if no suitable element was found
*/
private long randomPick(LazyLongIterator elements) {
long curPick = -1;
long seenCandidates = 0;
- for (long element; (element = elements.nextLong()) != -1;) {
+ for (long element; (element = elements.nextLong()) != -1; ) {
seenCandidates++;
if (Math.round(rng.nextFloat() * (seenCandidates - 1)) == 0) {
curPick = element;
}
}
return curPick;
}
/**
* Internal DFS function of {@link #walk}.
*
* @param srcNodeId source node
* @param dst destination (either a node or a node type)
* @return final destination node or -1 if no path found
*/
private long walkInternalDFS(long srcNodeId, T dst) {
Stack stack = new Stack<>();
this.nbEdgesAccessed = 0;
stack.push(srcNodeId);
visited.add(srcNodeId);
while (!stack.isEmpty()) {
long currentNodeId = stack.pop();
if (isDstNode(currentNodeId, dst)) {
return currentNodeId;
}
nbEdgesAccessed += graph.outdegree(currentNodeId);
LazyLongIterator it = graph.successors(currentNodeId, edges);
- for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) {
+ for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1; ) {
if (!visited.contains(neighborNodeId)) {
stack.push(neighborNodeId);
visited.add(neighborNodeId);
parentNode.put(neighborNodeId, currentNodeId);
}
}
}
return -1;
}
/**
* Internal BFS function of {@link #walk}.
*
* @param srcNodeId source node
* @param dst destination (either a node or a node type)
* @return final destination node or -1 if no path found
*/
private long walkInternalBFS(long srcNodeId, T dst) {
Queue queue = new LinkedList<>();
this.nbEdgesAccessed = 0;
queue.add(srcNodeId);
visited.add(srcNodeId);
while (!queue.isEmpty()) {
long currentNodeId = queue.poll();
if (isDstNode(currentNodeId, dst)) {
return currentNodeId;
}
nbEdgesAccessed += graph.outdegree(currentNodeId);
LazyLongIterator it = graph.successors(currentNodeId, edges);
- for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) {
+ for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1; ) {
if (!visited.contains(neighborNodeId)) {
queue.add(neighborNodeId);
visited.add(neighborNodeId);
parentNode.put(neighborNodeId, currentNodeId);
}
}
}
return -1;
}
/**
* Internal function of {@link #walk} to check if a node corresponds to the destination.
*
* @param nodeId current node
* @param dst destination (either a node or a node type)
* @return true if the node is a destination, or false otherwise
*/
private boolean isDstNode(long nodeId, T dst) {
if (dst instanceof Long) {
long dstNodeId = (Long) dst;
return nodeId == dstNodeId;
} else if (dst instanceof Node.Type) {
Node.Type dstType = (Node.Type) dst;
return graph.getNodeType(nodeId) == dstType;
} else {
return false;
}
}
/**
* Internal backtracking function of {@link #walk}.
*
* @param srcNodeId source node
* @param dstNodeId destination node
* @return the found path, as a list of node ids
*/
private ArrayList backtracking(long srcNodeId, long dstNodeId) {
ArrayList path = new ArrayList<>();
long currentNodeId = dstNodeId;
while (currentNodeId != srcNodeId) {
path.add(currentNodeId);
currentNodeId = parentNode.get(currentNodeId);
}
path.add(srcNodeId);
Collections.reverse(path);
return path;
}
/**
* Find a common descendant between two given nodes using two parallel BFS
*
* @param lhsNode the first node
* @param rhsNode the second node
* @return the found path, as a list of node ids
*/
public Long findCommonDescendant(long lhsNode, long rhsNode) {
Queue lhsStack = new ArrayDeque<>();
Queue rhsStack = new ArrayDeque<>();
HashSet lhsVisited = new HashSet<>();
HashSet rhsVisited = new HashSet<>();
lhsStack.add(lhsNode);
rhsStack.add(rhsNode);
lhsVisited.add(lhsNode);
rhsVisited.add(rhsNode);
this.nbEdgesAccessed = 0;
Long curNode;
while (!lhsStack.isEmpty() || !rhsStack.isEmpty()) {
if (!lhsStack.isEmpty()) {
curNode = lhsStack.poll();
nbEdgesAccessed += graph.outdegree(curNode);
LazyLongIterator it = graph.successors(curNode, edges);
- for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) {
+ for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1; ) {
if (!lhsVisited.contains(neighborNodeId)) {
if (rhsVisited.contains(neighborNodeId))
return neighborNodeId;
lhsStack.add(neighborNodeId);
lhsVisited.add(neighborNodeId);
}
}
}
if (!rhsStack.isEmpty()) {
curNode = rhsStack.poll();
nbEdgesAccessed += graph.outdegree(curNode);
LazyLongIterator it = graph.successors(curNode, edges);
- for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) {
+ for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1; ) {
if (!rhsVisited.contains(neighborNodeId)) {
if (lhsVisited.contains(neighborNodeId))
return neighborNodeId;
rhsStack.add(neighborNodeId);
rhsVisited.add(neighborNodeId);
}
}
}
}
return null;
}
public interface NodeIdConsumer extends LongConsumer {
- /** Callback for incrementally receiving node identifiers during a graph
+ /**
+ * Callback for incrementally receiving node identifiers during a graph
* visit.
*/
void accept(long nodeId);
}
public interface EdgeIdConsumer {
- /** Callback for incrementally receiving edge identifiers during a graph
+ /**
+ * Callback for incrementally receiving edge identifiers during a graph
* visit.
*/
void accept(long srcId, long dstId);
}
public interface PathConsumer extends Consumer> {
- /** Callback for incrementally receiving node paths (made of node
+ /**
+ * Callback for incrementally receiving node paths (made of node
* identifiers) during a graph visit.
*/
void accept(ArrayList path);
}
}
diff --git a/java/src/main/java/org/softwareheritage/graph/benchmark/AccessEdge.java b/java/src/main/java/org/softwareheritage/graph/benchmark/AccessEdge.java
index dae2af1..038a3e6 100644
--- a/java/src/main/java/org/softwareheritage/graph/benchmark/AccessEdge.java
+++ b/java/src/main/java/org/softwareheritage/graph/benchmark/AccessEdge.java
@@ -1,47 +1,45 @@
package org.softwareheritage.graph.benchmark;
-import java.io.IOException;
-import java.util.ArrayList;
-
import com.martiansoftware.jsap.JSAPException;
import it.unimi.dsi.big.webgraph.LazyLongIterator;
-
import org.softwareheritage.graph.Graph;
-import org.softwareheritage.graph.benchmark.Benchmark;
import org.softwareheritage.graph.benchmark.utils.Statistics;
import org.softwareheritage.graph.benchmark.utils.Timing;
+import java.io.IOException;
+import java.util.ArrayList;
+
/**
* Benchmark to time edge access time.
*
* @author The Software Heritage developers
*/
public class AccessEdge {
/**
* Main entrypoint.
*
* @param args command line arguments
*/
public static void main(String[] args) throws IOException, JSAPException {
Benchmark bench = new Benchmark();
bench.parseCommandLineArgs(args);
Graph graph = new Graph(bench.args.graphPath);
long[] nodeIds = bench.args.random.generateNodeIds(graph, bench.args.nbNodes);
ArrayList timings = new ArrayList<>();
for (long nodeId : nodeIds) {
long startTime = Timing.start();
LazyLongIterator neighbors = graph.successors(nodeId);
long firstNeighbor = neighbors.nextLong();
double duration = Timing.stop(startTime);
timings.add(duration);
}
System.out.println("Used " + bench.args.nbNodes + " random edges (results are in seconds):");
Statistics stats = new Statistics(timings);
stats.printAll();
}
}
diff --git a/java/src/main/java/org/softwareheritage/graph/benchmark/BFS.java b/java/src/main/java/org/softwareheritage/graph/benchmark/BFS.java
index 5d5069b..44f9b92 100644
--- a/java/src/main/java/org/softwareheritage/graph/benchmark/BFS.java
+++ b/java/src/main/java/org/softwareheritage/graph/benchmark/BFS.java
@@ -1,115 +1,111 @@
package org.softwareheritage.graph.benchmark;
import com.google.common.primitives.Longs;
+import com.martiansoftware.jsap.*;
import it.unimi.dsi.big.webgraph.ImmutableGraph;
import it.unimi.dsi.big.webgraph.LazyLongIterator;
import it.unimi.dsi.bits.LongArrayBitVector;
+import it.unimi.dsi.fastutil.Arrays;
import it.unimi.dsi.io.ByteDiskQueue;
+import it.unimi.dsi.logging.ProgressLogger;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.softwareheritage.graph.Graph;
-import com.martiansoftware.jsap.*;
-
-import it.unimi.dsi.logging.ProgressLogger;
-import it.unimi.dsi.fastutil.Arrays;
-
import java.io.File;
import java.io.IOException;
public class BFS {
- private final ImmutableGraph graph;
-
private final static Logger LOGGER = LoggerFactory.getLogger(BFS.class);
+ private final ImmutableGraph graph;
public BFS(ImmutableGraph graph) {
this.graph = graph;
}
+ private static JSAPResult parse_args(String[] args) {
+ JSAPResult config = null;
+ try {
+ SimpleJSAP jsap = new SimpleJSAP(
+ BFS.class.getName(),
+ "",
+ new Parameter[]{
+ new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED,
+ 'g', "graph", "Basename of the compressed graph"),
+
+ new FlaggedOption("useTransposed", JSAP.BOOLEAN_PARSER, "false", JSAP.NOT_REQUIRED,
+ 'T', "transposed", "Use transposed graph (default: false)"),
+ }
+ );
+
+ config = jsap.parse(args);
+ if (jsap.messagePrinted()) {
+ System.exit(1);
+ }
+ } catch (JSAPException e) {
+ e.printStackTrace();
+ }
+ return config;
+ }
+
+ public static void main(String[] args) throws IOException {
+ JSAPResult config = parse_args(args);
+ String graphPath = config.getString("graphPath");
+ boolean useTransposed = config.getBoolean("useTransposed");
+
+ System.err.println("Loading graph " + graphPath + " ...");
+ Graph graph = new Graph(graphPath);
+ System.err.println("Graph loaded.");
+
+ if (useTransposed)
+ graph = graph.transpose();
+
+ BFS bfs = new BFS(graph);
+ bfs.bfsperm();
+ }
+
// Partly inlined from it.unimi.dsi.law.big.graph.BFS
private void bfsperm() throws IOException {
final long n = graph.numNodes();
// Allow enough memory to behave like in-memory queue
- int bufferSize = (int)Math.min(Arrays.MAX_ARRAY_SIZE & ~0x7, 8L * n);
+ int bufferSize = (int) Math.min(Arrays.MAX_ARRAY_SIZE & ~0x7, 8L * n);
// Use a disk based queue to store BFS frontier
final File queueFile = File.createTempFile(BFS.class.getSimpleName(), "queue");
final ByteDiskQueue queue = ByteDiskQueue.createNew(queueFile, bufferSize, true);
final byte[] byteBuf = new byte[Long.BYTES];
// WARNING: no 64-bit version of this data-structure, but it can support
// indices up to 2^37
final LongArrayBitVector visited = LongArrayBitVector.ofLength(n);
final ProgressLogger pl = new ProgressLogger(LOGGER);
pl.expectedUpdates = n;
pl.itemsName = "nodes";
pl.start("Starting breadth-first visit...");
for (long i = 0; i < n; i++) {
if (visited.getBoolean(i)) continue;
queue.enqueue(Longs.toByteArray(i));
visited.set(i);
while (!queue.isEmpty()) {
queue.dequeue(byteBuf);
final long currentNode = Longs.fromByteArray(byteBuf);
final LazyLongIterator iterator = graph.successors(currentNode);
long succ;
- while((succ = iterator.nextLong()) != -1) {
+ while ((succ = iterator.nextLong()) != -1) {
if (!visited.getBoolean(succ)) {
visited.set(succ);
queue.enqueue(Longs.toByteArray(succ));
}
}
pl.update();
}
}
pl.done();
queue.close();
}
-
-
- private static JSAPResult parse_args(String[] args) {
- JSAPResult config = null;
- try {
- SimpleJSAP jsap = new SimpleJSAP(
- BFS.class.getName(),
- "",
- new Parameter[] {
- new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED,
- 'g', "graph", "Basename of the compressed graph"),
-
- new FlaggedOption("useTransposed", JSAP.BOOLEAN_PARSER, "false", JSAP.NOT_REQUIRED,
- 'T', "transposed", "Use transposed graph (default: false)"),
- }
- );
-
- config = jsap.parse(args);
- if (jsap.messagePrinted()) {
- System.exit(1);
- }
- } catch (JSAPException e) {
- e.printStackTrace();
- }
- return config;
- }
-
- public static void main(String[] args) throws IOException {
- JSAPResult config = parse_args(args);
- String graphPath = config.getString("graphPath");
- boolean useTransposed = config.getBoolean("useTransposed");
-
- System.err.println("Loading graph " + graphPath + " ...");
- Graph graph = new Graph(graphPath);
- System.err.println("Graph loaded.");
-
- if (useTransposed)
- graph = graph.transpose();
-
- BFS bfs = new BFS(graph);
- bfs.bfsperm();
- }
}
diff --git a/java/src/main/java/org/softwareheritage/graph/benchmark/Benchmark.java b/java/src/main/java/org/softwareheritage/graph/benchmark/Benchmark.java
index 005d3f1..884dea2 100644
--- a/java/src/main/java/org/softwareheritage/graph/benchmark/Benchmark.java
+++ b/java/src/main/java/org/softwareheritage/graph/benchmark/Benchmark.java
@@ -1,170 +1,162 @@
package org.softwareheritage.graph.benchmark;
+import com.martiansoftware.jsap.*;
+import org.softwareheritage.graph.Graph;
+import org.softwareheritage.graph.SwhPID;
+import org.softwareheritage.graph.benchmark.utils.Random;
+import org.softwareheritage.graph.benchmark.utils.Statistics;
+import org.softwareheritage.graph.server.Endpoint;
+
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.io.Writer;
import java.util.ArrayList;
import java.util.StringJoiner;
import java.util.function.Function;
-import com.martiansoftware.jsap.FlaggedOption;
-import com.martiansoftware.jsap.JSAP;
-import com.martiansoftware.jsap.JSAPException;
-import com.martiansoftware.jsap.JSAPResult;
-import com.martiansoftware.jsap.Parameter;
-import com.martiansoftware.jsap.SimpleJSAP;
-import com.martiansoftware.jsap.UnflaggedOption;
-
-import org.softwareheritage.graph.server.Endpoint;
-import org.softwareheritage.graph.Graph;
-import org.softwareheritage.graph.SwhPID;
-import org.softwareheritage.graph.benchmark.utils.Random;
-import org.softwareheritage.graph.benchmark.utils.Statistics;
-
/**
* Benchmark common utility functions.
*
* @author The Software Heritage developers
*/
public class Benchmark {
- /**
- * Input arguments.
- */
- public class Args {
- /** Basename of the compressed graph */
- public String graphPath;
- /** Number of random nodes to use for the benchmark */
- public int nbNodes;
- /** File name for CSV format benchmark log */
- public String logFile;
- /** Random generator */
- public Random random;
- }
-
- /** Command line arguments */
- public Args args;
/** CSV separator for log file */
final String CSV_SEPARATOR = ";";
-
+ /** Command line arguments */
+ public Args args;
/**
* Constructor.
*/
public Benchmark() {
this.args = new Args();
}
/**
* Parses benchmark command line arguments.
*
* @param args command line arguments
*/
public void parseCommandLineArgs(String[] args) throws JSAPException {
SimpleJSAP jsap = new SimpleJSAP(Benchmark.class.getName(),
- "Benchmark tool for Software Heritage use-cases scenarios.",
- new Parameter[] {
- new UnflaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED,
- JSAP.NOT_GREEDY, "The basename of the compressed graph."),
- new FlaggedOption("nbNodes", JSAP.INTEGER_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'n',
- "nb-nodes", "Number of random nodes used to do the benchmark."),
- new FlaggedOption("logFile", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'l',
- "log-file", "File name to output CSV format benchmark log."),
- new FlaggedOption("seed", JSAP.LONG_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 's',
- "seed", "Random generator seed."),
- });
+ "Benchmark tool for Software Heritage use-cases scenarios.",
+ new Parameter[]{
+ new UnflaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED,
+ JSAP.NOT_GREEDY, "The basename of the compressed graph."),
+ new FlaggedOption("nbNodes", JSAP.INTEGER_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'n',
+ "nb-nodes", "Number of random nodes used to do the benchmark."),
+ new FlaggedOption("logFile", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'l',
+ "log-file", "File name to output CSV format benchmark log."),
+ new FlaggedOption("seed", JSAP.LONG_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 's',
+ "seed", "Random generator seed."),
+ });
JSAPResult config = jsap.parse(args);
if (jsap.messagePrinted()) {
System.exit(1);
}
this.args.graphPath = config.getString("graphPath");
this.args.nbNodes = config.getInt("nbNodes");
this.args.logFile = config.getString("logFile");
this.args.random = config.contains("seed") ? new Random(config.getLong("seed")) : new Random();
}
/**
* Creates CSV file for log output.
*/
public void createCSVLogFile() throws IOException {
try (Writer csvLog = new BufferedWriter(new FileWriter(args.logFile))) {
StringJoiner csvHeader = new StringJoiner(CSV_SEPARATOR);
csvHeader.add("use case name")
- .add("SWH PID")
- .add("number of edges accessed")
- .add("traversal timing")
- .add("pid2node timing")
- .add("node2pid timing");
+ .add("SWH PID")
+ .add("number of edges accessed")
+ .add("traversal timing")
+ .add("pid2node timing")
+ .add("node2pid timing");
csvLog.write(csvHeader.toString() + "\n");
}
}
/**
* Times a specific endpoint and outputs individual datapoints along with aggregated statistics.
*
* @param useCaseName benchmark use-case name
* @param graph compressed graph used in the benchmark
* @param nodeIds node ids to use as starting point for the endpoint traversal
* @param operation endpoint function to benchmark
* @param dstFmt destination formatted string as described in the API
* @param algorithm traversal algorithm used in endpoint call (either "dfs" or "bfs")
*/
public void timeEndpoint(String useCaseName, Graph graph, long[] nodeIds,
Function operation, String dstFmt, String algorithm)
- throws IOException {
+ throws IOException {
ArrayList timings = new ArrayList<>();
ArrayList timingsNormalized = new ArrayList<>();
ArrayList nbEdgesAccessed = new ArrayList<>();
final boolean append = true;
try (Writer csvLog = new BufferedWriter(new FileWriter(args.logFile, append))) {
for (long nodeId : nodeIds) {
SwhPID swhPID = graph.getSwhPID(nodeId);
Endpoint.Output output = (dstFmt == null)
- ? operation.apply(new Endpoint.Input(swhPID))
- : operation.apply(new Endpoint.Input(swhPID, dstFmt, algorithm));
+ ? operation.apply(new Endpoint.Input(swhPID))
+ : operation.apply(new Endpoint.Input(swhPID, dstFmt, algorithm));
StringJoiner csvLine = new StringJoiner(CSV_SEPARATOR);
csvLine.add(useCaseName)
- .add(swhPID.toString())
- .add(Long.toString(output.meta.nbEdgesAccessed))
- .add(Double.toString(output.meta.timings.traversal))
- .add(Double.toString(output.meta.timings.pid2node))
- .add(Double.toString(output.meta.timings.node2pid));
+ .add(swhPID.toString())
+ .add(Long.toString(output.meta.nbEdgesAccessed))
+ .add(Double.toString(output.meta.timings.traversal))
+ .add(Double.toString(output.meta.timings.pid2node))
+ .add(Double.toString(output.meta.timings.node2pid));
csvLog.write(csvLine.toString() + "\n");
timings.add(output.meta.timings.traversal);
nbEdgesAccessed.add((double) output.meta.nbEdgesAccessed);
if (output.meta.nbEdgesAccessed != 0) {
timingsNormalized.add(output.meta.timings.traversal / output.meta.nbEdgesAccessed);
}
}
}
System.out.println("\n" + useCaseName + " use-case:");
System.out.println("timings:");
Statistics stats = new Statistics(timings);
stats.printAll();
System.out.println("timings normalized:");
Statistics statsNormalized = new Statistics(timingsNormalized);
statsNormalized.printAll();
System.out.println("nb edges accessed:");
Statistics statsNbEdgesAccessed = new Statistics(nbEdgesAccessed);
statsNbEdgesAccessed.printAll();
}
/**
* Same as {@link #timeEndpoint} but without destination or algorithm specified to endpoint call.
*/
public void timeEndpoint(String useCaseName, Graph graph, long[] nodeIds,
Function operation) throws IOException {
timeEndpoint(useCaseName, graph, nodeIds, operation, null, null);
}
+
+ /**
+ * Input arguments.
+ */
+ public class Args {
+ /** Basename of the compressed graph */
+ public String graphPath;
+ /** Number of random nodes to use for the benchmark */
+ public int nbNodes;
+ /** File name for CSV format benchmark log */
+ public String logFile;
+ /** Random generator */
+ public Random random;
+ }
}
diff --git a/java/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java b/java/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java
index b060d55..910ca76 100644
--- a/java/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java
+++ b/java/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java
@@ -1,45 +1,44 @@
package org.softwareheritage.graph.benchmark;
-import java.io.IOException;
-
import com.martiansoftware.jsap.JSAPException;
-
-import org.softwareheritage.graph.server.Endpoint;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.Node;
+import org.softwareheritage.graph.server.Endpoint;
+
+import java.io.IOException;
/**
* Benchmark Software Heritage browsing
* use-cases scenarios.
*
* @author The Software Heritage developers
*/
public class Browsing {
/**
* Main entrypoint.
*
* @param args command line arguments
*/
public static void main(String[] args) throws IOException, JSAPException {
Benchmark bench = new Benchmark();
bench.parseCommandLineArgs(args);
Graph graph = new Graph(bench.args.graphPath);
long[] dirNodeIds =
- bench.args.random.generateNodeIdsOfType(graph, bench.args.nbNodes, Node.Type.DIR);
+ bench.args.random.generateNodeIdsOfType(graph, bench.args.nbNodes, Node.Type.DIR);
long[] revNodeIds =
- bench.args.random.generateNodeIdsOfType(graph, bench.args.nbNodes, Node.Type.REV);
+ bench.args.random.generateNodeIdsOfType(graph, bench.args.nbNodes, Node.Type.REV);
Endpoint dirEndpoint = new Endpoint(graph, "forward", "dir:cnt,dir:dir");
Endpoint revEndpoint = new Endpoint(graph, "forward", "rev:rev");
System.out.println("Used " + bench.args.nbNodes + " random nodes (results are in seconds):");
bench.createCSVLogFile();
bench.timeEndpoint("ls", graph, dirNodeIds, dirEndpoint::neighbors);
bench.timeEndpoint("ls -R", graph, dirNodeIds, dirEndpoint::visitPaths);
bench.timeEndpoint("git log", graph, revNodeIds, revEndpoint::visitNodes);
}
}
diff --git a/java/src/main/java/org/softwareheritage/graph/benchmark/GenDistribution.java b/java/src/main/java/org/softwareheritage/graph/benchmark/GenDistribution.java
index d4ccdff..d9c3846 100644
--- a/java/src/main/java/org/softwareheritage/graph/benchmark/GenDistribution.java
+++ b/java/src/main/java/org/softwareheritage/graph/benchmark/GenDistribution.java
@@ -1,134 +1,136 @@
package org.softwareheritage.graph.benchmark;
import com.martiansoftware.jsap.*;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.Node;
import org.softwareheritage.graph.Traversal;
import org.softwareheritage.graph.benchmark.utils.Timing;
import java.io.IOException;
import java.util.Scanner;
-import java.util.concurrent.*;
+import java.util.concurrent.ArrayBlockingQueue;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
public class GenDistribution {
private Graph graph;
- private void load_graph(String graphBasename) throws IOException {
- System.err.println("Loading graph " + graphBasename + " ...");
- this.graph = new Graph(graphBasename);
- System.err.println("Graph loaded.");
- }
-
private static JSAPResult parse_args(String[] args) {
JSAPResult config = null;
try {
SimpleJSAP jsap = new SimpleJSAP(
- GenDistribution.class.getName(),
- "",
- new Parameter[] {
- new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED,
- 'g', "graph", "Basename of the compressed graph"),
- new FlaggedOption("srcType", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED,
- 's', "srctype", "Source node type"),
- new FlaggedOption("dstType", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED,
- 'd', "dsttype", "Destination node type"),
- new FlaggedOption("edgesFmt", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED,
- 'e', "edges", "Edges constraints"),
-
- new FlaggedOption("numThreads", JSAP.INTEGER_PARSER, "128", JSAP.NOT_REQUIRED,
- 't', "numthreads", "Number of threads"),
- }
+ GenDistribution.class.getName(),
+ "",
+ new Parameter[]{
+ new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED,
+ 'g', "graph", "Basename of the compressed graph"),
+ new FlaggedOption("srcType", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED,
+ 's', "srctype", "Source node type"),
+ new FlaggedOption("dstType", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED,
+ 'd', "dsttype", "Destination node type"),
+ new FlaggedOption("edgesFmt", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED,
+ 'e', "edges", "Edges constraints"),
+
+ new FlaggedOption("numThreads", JSAP.INTEGER_PARSER, "128", JSAP.NOT_REQUIRED,
+ 't', "numthreads", "Number of threads"),
+ }
);
config = jsap.parse(args);
if (jsap.messagePrinted()) {
System.exit(1);
}
} catch (JSAPException e) {
e.printStackTrace();
}
return config;
}
public static void main(String[] args) {
JSAPResult config = parse_args(args);
String graphPath = config.getString("graphPath");
Node.Type srcType = Node.Type.fromStr(config.getString("srcType"));
Node.Type dstType = Node.Type.fromStr(config.getString("dstType"));
String edgesFmt = config.getString("edgesFmt");
int numThreads = config.getInt("numThreads");
GenDistribution tp = new GenDistribution();
try {
tp.load_graph(graphPath);
} catch (IOException e) {
System.out.println("Could not load graph: " + e);
System.exit(2);
}
final long END_OF_QUEUE = -1L;
ArrayBlockingQueue queue = new ArrayBlockingQueue<>(numThreads);
ExecutorService service = Executors.newFixedThreadPool(numThreads + 1);
service.submit(() -> {
try {
Scanner input = new Scanner(System.in);
while (input.hasNextLong()) {
long node = input.nextLong();
if (tp.graph.getNodeType(node) == srcType) {
queue.put(node);
}
}
} catch (InterruptedException e) {
e.printStackTrace();
} finally {
for (int i = 0; i < numThreads; ++i) {
try {
queue.put(END_OF_QUEUE);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
});
for (int i = 0; i < numThreads; ++i) {
service.submit(() -> {
Graph thread_graph = tp.graph.copy();
long startTime;
double totalTime;
while (true) {
Long node = null;
try {
node = queue.take();
} catch (InterruptedException e) {
e.printStackTrace();
}
if (node == null || node == END_OF_QUEUE) {
return;
}
Traversal t = new Traversal(thread_graph, "backward", edgesFmt);
- int[] count = { 0 };
+ int[] count = {0};
startTime = Timing.start();
t.visitNodesVisitor(node, (curnode) -> {
if (tp.graph.getNodeType(curnode) == dstType) {
count[0]++;
}
});
totalTime = Timing.stop(startTime);
System.out.format("%d %d %d %d %f\n",
node, count[0], t.getNbNodesAccessed(),
t.getNbEdgesAccessed(), totalTime
);
}
});
}
service.shutdown();
}
+
+ private void load_graph(String graphBasename) throws IOException {
+ System.err.println("Loading graph " + graphBasename + " ...");
+ this.graph = new Graph(graphBasename);
+ System.err.println("Graph loaded.");
+ }
}
diff --git a/java/src/main/java/org/softwareheritage/graph/benchmark/ListEmptyOrigins.java b/java/src/main/java/org/softwareheritage/graph/benchmark/ListEmptyOrigins.java
index 402632c..8ee8ede 100644
--- a/java/src/main/java/org/softwareheritage/graph/benchmark/ListEmptyOrigins.java
+++ b/java/src/main/java/org/softwareheritage/graph/benchmark/ListEmptyOrigins.java
@@ -1,93 +1,93 @@
package org.softwareheritage.graph.benchmark;
import com.martiansoftware.jsap.*;
import it.unimi.dsi.big.webgraph.ImmutableGraph;
import it.unimi.dsi.big.webgraph.LazyLongIterator;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.Node;
import java.io.IOException;
import java.util.ArrayList;
public class ListEmptyOrigins {
private Graph graph;
private Long emptySnapshot;
- private void load_graph(String graphBasename) throws IOException {
- System.err.println("Loading graph " + graphBasename + " ...");
- this.graph = new Graph(graphBasename);
- System.err.println("Graph loaded.");
- this.emptySnapshot = null;
- }
-
private static JSAPResult parse_args(String[] args) {
JSAPResult config = null;
try {
SimpleJSAP jsap = new SimpleJSAP(
- ListEmptyOrigins.class.getName(),
- "",
- new Parameter[] {
- new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED,
- 'g', "graph", "Basename of the compressed graph"),
- }
+ ListEmptyOrigins.class.getName(),
+ "",
+ new Parameter[]{
+ new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED,
+ 'g', "graph", "Basename of the compressed graph"),
+ }
);
config = jsap.parse(args);
if (jsap.messagePrinted()) {
System.exit(1);
}
} catch (JSAPException e) {
e.printStackTrace();
}
return config;
}
+ public static void main(String[] args) {
+ JSAPResult config = parse_args(args);
+ String graphPath = config.getString("graphPath");
+
+ ListEmptyOrigins leo = new ListEmptyOrigins();
+ try {
+ leo.load_graph(graphPath);
+ } catch (IOException e) {
+ System.out.println("Could not load graph: " + e);
+ System.exit(2);
+ }
+ ArrayList badlist = leo.compute(leo.graph);
+ for (Long bad : badlist) {
+ System.out.println(bad);
+ }
+ }
+
+ private void load_graph(String graphBasename) throws IOException {
+ System.err.println("Loading graph " + graphBasename + " ...");
+ this.graph = new Graph(graphBasename);
+ System.err.println("Graph loaded.");
+ this.emptySnapshot = null;
+ }
+
private boolean nodeIsEmptySnapshot(Long node) {
System.err.println(this.graph.getNodeType(node) + " " + this.graph.outdegree(node) + " " + node);
if (this.emptySnapshot == null
&& this.graph.getNodeType(node) == Node.Type.SNP
&& this.graph.outdegree(node) == 0) {
System.err.println("Found empty snapshot: " + node);
this.emptySnapshot = node;
}
return node.equals(this.emptySnapshot);
}
private ArrayList compute(ImmutableGraph graph) {
final long n = graph.numNodes();
ArrayList bad = new ArrayList<>();
for (long i = 0; i < n; i++) {
Node.Type nt = this.graph.getNodeType(i);
if (nt != Node.Type.ORI) continue;
final LazyLongIterator iterator = graph.successors(i);
long succ;
boolean found = false;
while ((succ = iterator.nextLong()) != -1) {
if (this.graph.outdegree(succ) > 0) {
found = true;
}
}
if (!found)
bad.add(i);
}
return bad;
}
-
- public static void main(String[] args) {
- JSAPResult config = parse_args(args);
- String graphPath = config.getString("graphPath");
-
- ListEmptyOrigins leo = new ListEmptyOrigins();
- try {
- leo.load_graph(graphPath);
- } catch (IOException e) {
- System.out.println("Could not load graph: " + e);
- System.exit(2);
- }
- ArrayList badlist = leo.compute(leo.graph);
- for (Long bad : badlist) {
- System.out.println(bad);
- }
- }
}
diff --git a/java/src/main/java/org/softwareheritage/graph/benchmark/Provenance.java b/java/src/main/java/org/softwareheritage/graph/benchmark/Provenance.java
index f8aec35..324f38e 100644
--- a/java/src/main/java/org/softwareheritage/graph/benchmark/Provenance.java
+++ b/java/src/main/java/org/softwareheritage/graph/benchmark/Provenance.java
@@ -1,52 +1,51 @@
package org.softwareheritage.graph.benchmark;
-import java.io.IOException;
-
import com.martiansoftware.jsap.JSAPException;
-
-import org.softwareheritage.graph.server.Endpoint;
import org.softwareheritage.graph.Graph;
+import org.softwareheritage.graph.server.Endpoint;
+
+import java.io.IOException;
/**
* Benchmark Software Heritage provenance
* use-cases scenarios.
*
* @author The Software Heritage developers
*/
public class Provenance {
/**
* Main entrypoint.
*
* @param args command line arguments
*/
public static void main(String[] args) throws IOException, JSAPException {
Benchmark bench = new Benchmark();
bench.parseCommandLineArgs(args);
Graph graph = new Graph(bench.args.graphPath);
long[] nodeIds = bench.args.random.generateNodeIds(graph, bench.args.nbNodes);
Endpoint commitProvenanceEndpoint = new Endpoint(graph, "backward", "dir:dir,cnt:dir,dir:rev");
Endpoint originProvenanceEndpoint = new Endpoint(graph, "backward", "*");
System.out.println("Used " + bench.args.nbNodes + " random nodes (results are in seconds):");
bench.createCSVLogFile();
bench.timeEndpoint(
- "commit provenance (dfs)", graph, nodeIds, commitProvenanceEndpoint::walk, "rev", "dfs");
+ "commit provenance (dfs)", graph, nodeIds, commitProvenanceEndpoint::walk, "rev", "dfs");
bench.timeEndpoint(
- "commit provenance (bfs)", graph, nodeIds, commitProvenanceEndpoint::walk, "rev", "bfs");
+ "commit provenance (bfs)", graph, nodeIds, commitProvenanceEndpoint::walk, "rev", "bfs");
bench.timeEndpoint(
- "complete commit provenance", graph, nodeIds, commitProvenanceEndpoint::leaves);
+ "complete commit provenance", graph, nodeIds, commitProvenanceEndpoint::leaves);
bench.timeEndpoint(
- "origin provenance (dfs)", graph, nodeIds, originProvenanceEndpoint::walk, "ori", "dfs");
+ "origin provenance (dfs)", graph, nodeIds, originProvenanceEndpoint::walk, "ori", "dfs");
bench.timeEndpoint(
- "origin provenance (bfs)", graph, nodeIds, originProvenanceEndpoint::walk, "ori", "bfs");
+ "origin provenance (bfs)", graph, nodeIds, originProvenanceEndpoint::walk, "ori", "bfs");
bench.timeEndpoint(
- "complete origin provenance", graph, nodeIds, originProvenanceEndpoint::leaves);
+ "complete origin provenance", graph, nodeIds, originProvenanceEndpoint::leaves);
}
}
diff --git a/java/src/main/java/org/softwareheritage/graph/benchmark/Vault.java b/java/src/main/java/org/softwareheritage/graph/benchmark/Vault.java
index c42b96a..3a6c6dc 100644
--- a/java/src/main/java/org/softwareheritage/graph/benchmark/Vault.java
+++ b/java/src/main/java/org/softwareheritage/graph/benchmark/Vault.java
@@ -1,38 +1,37 @@
package org.softwareheritage.graph.benchmark;
-import java.io.IOException;
-
import com.martiansoftware.jsap.JSAPException;
-
-import org.softwareheritage.graph.server.Endpoint;
import org.softwareheritage.graph.Graph;
+import org.softwareheritage.graph.server.Endpoint;
+
+import java.io.IOException;
/**
* Benchmark Software Heritage vault
* use-case scenario.
*
* @author The Software Heritage developers
*/
public class Vault {
/**
* Main entrypoint.
*
* @param args command line arguments
*/
public static void main(String[] args) throws IOException, JSAPException {
Benchmark bench = new Benchmark();
bench.parseCommandLineArgs(args);
Graph graph = new Graph(bench.args.graphPath);
long[] nodeIds = bench.args.random.generateNodeIds(graph, bench.args.nbNodes);
Endpoint endpoint = new Endpoint(graph, "forward", "*");
System.out.println("Used " + bench.args.nbNodes + " random nodes (results are in seconds):");
bench.createCSVLogFile();
bench.timeEndpoint("git bundle", graph, nodeIds, endpoint::visitNodes);
}
}
diff --git a/java/src/main/java/org/softwareheritage/graph/benchmark/utils/Random.java b/java/src/main/java/org/softwareheritage/graph/benchmark/utils/Random.java
index df93c96..ee4c530 100644
--- a/java/src/main/java/org/softwareheritage/graph/benchmark/utils/Random.java
+++ b/java/src/main/java/org/softwareheritage/graph/benchmark/utils/Random.java
@@ -1,67 +1,67 @@
package org.softwareheritage.graph.benchmark.utils;
-import java.util.PrimitiveIterator;
-
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.Node;
+import java.util.PrimitiveIterator;
+
/**
* Random related utility class.
*
* @author The Software Heritage developers
*/
public class Random {
/** Internal pseudorandom generator */
java.util.Random random;
/**
* Constructor.
*/
public Random() {
this.random = new java.util.Random();
}
/**
* Constructor.
*
* @param seed random generator seed
*/
public Random(long seed) {
this.random = new java.util.Random(seed);
}
/**
* Generates random node ids.
*
* @param graph graph used to pick node ids
* @param nbNodes number of node ids to generate
* @return an array of random node ids
*/
public long[] generateNodeIds(Graph graph, int nbNodes) {
return random.longs(nbNodes, 0, graph.numNodes()).toArray();
}
/**
* Generates random node ids with a specific type.
*
* @param graph graph used to pick node ids
* @param nbNodes number of node ids to generate
* @param expectedType specific node type to pick
* @return an array of random node ids
*/
public long[] generateNodeIdsOfType(Graph graph, int nbNodes, Node.Type expectedType) {
PrimitiveIterator.OfLong nodes = random.longs(0, graph.numNodes()).iterator();
long[] nodeIds = new long[nbNodes];
long nextId;
for (int i = 0; i < nbNodes; i++) {
do {
nextId = nodes.nextLong();
} while (graph.getNodeType(nextId) != expectedType);
nodeIds[i] = nextId;
}
return nodeIds;
}
}
diff --git a/java/src/main/java/org/softwareheritage/graph/experiments/forks/ForkCC.java b/java/src/main/java/org/softwareheritage/graph/experiments/forks/ForkCC.java
index 2eebc4c..fc9667c 100644
--- a/java/src/main/java/org/softwareheritage/graph/experiments/forks/ForkCC.java
+++ b/java/src/main/java/org/softwareheritage/graph/experiments/forks/ForkCC.java
@@ -1,217 +1,219 @@
package org.softwareheritage.graph.experiments.forks;
import com.google.common.primitives.Longs;
import com.martiansoftware.jsap.*;
import it.unimi.dsi.big.webgraph.LazyLongIterator;
import it.unimi.dsi.bits.LongArrayBitVector;
import it.unimi.dsi.fastutil.Arrays;
import it.unimi.dsi.io.ByteDiskQueue;
import it.unimi.dsi.logging.ProgressLogger;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.Node;
import org.softwareheritage.graph.benchmark.BFS;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.Map;
+import java.util.Scanner;
+import java.util.TreeMap;
public class ForkCC {
public Boolean includeRootDir;
private Graph graph;
private Long emptySnapshot;
private LongArrayBitVector visited;
private LongArrayBitVector whitelist;
- private void load_graph(String graphBasename) throws IOException {
- System.err.println("Loading graph " + graphBasename + " ...");
- this.graph = new Graph(graphBasename).symmetrize();
- System.err.println("Graph loaded.");
- this.emptySnapshot = null;
- this.whitelist = null;
- this.visited = null;
- this.includeRootDir = null;
- }
-
private static JSAPResult parse_args(String[] args) {
JSAPResult config = null;
try {
SimpleJSAP jsap = new SimpleJSAP(
- ForkCC.class.getName(),
- "",
- new Parameter[] {
- new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED,
- 'g', "graph", "Basename of the compressed graph"),
- new FlaggedOption("whitelistPath", JSAP.STRING_PARSER, null, JSAP.NOT_REQUIRED,
- 't', "whitelist", "Whitelist of origins"),
- new FlaggedOption("numThreads", JSAP.INTEGER_PARSER, "128", JSAP.NOT_REQUIRED,
- 'w', "numthreads", "Number of threads"),
- new FlaggedOption("includeRootDir", JSAP.BOOLEAN_PARSER, "false", JSAP.NOT_REQUIRED,
- 'R', "includerootdir", "Include root directory (default: false)"),
- }
+ ForkCC.class.getName(),
+ "",
+ new Parameter[]{
+ new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED,
+ 'g', "graph", "Basename of the compressed graph"),
+ new FlaggedOption("whitelistPath", JSAP.STRING_PARSER, null, JSAP.NOT_REQUIRED,
+ 't', "whitelist", "Whitelist of origins"),
+ new FlaggedOption("numThreads", JSAP.INTEGER_PARSER, "128", JSAP.NOT_REQUIRED,
+ 'w', "numthreads", "Number of threads"),
+ new FlaggedOption("includeRootDir", JSAP.BOOLEAN_PARSER, "false", JSAP.NOT_REQUIRED,
+ 'R', "includerootdir", "Include root directory (default: false)"),
+ }
);
config = jsap.parse(args);
if (jsap.messagePrinted()) {
System.exit(1);
}
} catch (JSAPException e) {
e.printStackTrace();
}
return config;
}
+ private static void printDistribution(ArrayList> components) {
+ TreeMap distribution = new TreeMap<>();
+ for (ArrayList component : components) {
+ distribution.merge((long) component.size(), 1L, Long::sum);
+ }
+
+ for (Map.Entry entry : distribution.entrySet()) {
+ System.out.format("%d %d\n", entry.getKey(), entry.getValue());
+ }
+ }
+
+ private static void printLargestComponent(ArrayList> components) {
+ int indexLargest = 0;
+ for (int i = 1; i < components.size(); ++i) {
+ if (components.get(i).size() > components.get(indexLargest).size())
+ indexLargest = i;
+ }
+
+ ArrayList component = components.get(indexLargest);
+ for (Long node : component) {
+ System.out.println(node);
+ }
+ }
+
+ public static void main(String[] args) {
+ JSAPResult config = parse_args(args);
+
+ String graphPath = config.getString("graphPath");
+ int numThreads = config.getInt("numThreads");
+ String whitelistPath = config.getString("whitelistPath");
+ boolean includeRootDir = config.getBoolean("includeRootDir");
+
+ ForkCC forkCc = new ForkCC();
+ try {
+ forkCc.load_graph(graphPath);
+ forkCc.includeRootDir = includeRootDir;
+ } catch (IOException e) {
+ System.out.println("Could not load graph: " + e);
+ System.exit(2);
+ }
+
+ if (whitelistPath != null) {
+ forkCc.parseWhitelist(whitelistPath);
+ }
+
+ ProgressLogger logger = new ProgressLogger();
+ try {
+ ArrayList> components = forkCc.compute(logger);
+ printDistribution(components);
+ // printLargestComponent(components);
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ logger.done();
+ }
+
+ private void load_graph(String graphBasename) throws IOException {
+ System.err.println("Loading graph " + graphBasename + " ...");
+ this.graph = new Graph(graphBasename).symmetrize();
+ System.err.println("Graph loaded.");
+ this.emptySnapshot = null;
+ this.whitelist = null;
+ this.visited = null;
+ this.includeRootDir = null;
+ }
+
private boolean nodeIsEmptySnapshot(Long node) {
if (this.emptySnapshot == null
&& this.graph.getNodeType(node) == Node.Type.SNP
&& this.graph.outdegree(node) == 0) {
System.err.println("Found empty snapshot: " + node);
this.emptySnapshot = node;
}
return node.equals(this.emptySnapshot);
}
- private Boolean shouldVisit(Long node){
+ private Boolean shouldVisit(Long node) {
Node.Type nt = this.graph.getNodeType(node);
if (nt == Node.Type.CNT) {
return false;
}
if (nt == Node.Type.DIR && !includeRootDir)
return false;
if (this.nodeIsEmptySnapshot(node))
return false;
if (visited.getBoolean(node))
return false;
return true;
}
-
private ArrayList> compute(ProgressLogger pl) throws IOException {
final long n = graph.numNodes();
// Allow enough memory to behave like in-memory queue
- int bufferSize = (int)Math.min(Arrays.MAX_ARRAY_SIZE & ~0x7, 8L * n);
+ int bufferSize = (int) Math.min(Arrays.MAX_ARRAY_SIZE & ~0x7, 8L * n);
// Use a disk based queue to store BFS frontier
final File queueFile = File.createTempFile(BFS.class.getSimpleName(), "queue");
final ByteDiskQueue queue = ByteDiskQueue.createNew(queueFile, bufferSize, true);
final byte[] byteBuf = new byte[Long.BYTES];
// WARNING: no 64-bit version of this data-structure, but it can support
// indices up to 2^37
visited = LongArrayBitVector.ofLength(n);
pl.expectedUpdates = n;
pl.itemsName = "nodes";
pl.start("Starting connected components visit...");
ArrayList> components = new ArrayList<>();
for (long i = 0; i < n; i++) {
if (!shouldVisit(i) || this.graph.getNodeType(i) == Node.Type.DIR) continue;
ArrayList component = new ArrayList<>();
queue.enqueue(Longs.toByteArray(i));
visited.set(i);
while (!queue.isEmpty()) {
queue.dequeue(byteBuf);
final long currentNode = Longs.fromByteArray(byteBuf);
Node.Type cur_nt = this.graph.getNodeType(currentNode);
if (cur_nt == Node.Type.ORI
&& (this.whitelist == null || this.whitelist.getBoolean(currentNode))) {
// TODO: add a check that the origin has >=1 non-empty snapshot
component.add(currentNode);
}
final LazyLongIterator iterator = graph.successors(currentNode);
long succ;
while ((succ = iterator.nextLong()) != -1) {
if (!shouldVisit(succ)) continue;
if (this.graph.getNodeType(succ) == Node.Type.DIR && cur_nt != Node.Type.REV) continue;
visited.set(succ);
queue.enqueue(Longs.toByteArray(succ));
}
pl.update();
}
if (component.size() > 0) {
components.add(component);
}
}
pl.done();
queue.close();
return components;
}
- private static void printDistribution(ArrayList> components) {
- TreeMap distribution = new TreeMap<>();
- for (ArrayList component : components) {
- distribution.merge((long) component.size(), 1L, Long::sum);
- }
-
- for (Map.Entry entry : distribution.entrySet()) {
- System.out.format("%d %d\n", entry.getKey(), entry.getValue());
- }
- }
-
- private static void printLargestComponent(ArrayList> components) {
- int indexLargest = 0;
- for (int i = 1; i < components.size(); ++i) {
- if (components.get(i).size() > components.get(indexLargest).size())
- indexLargest = i;
- }
-
- ArrayList component = components.get(indexLargest);
- for (Long node : component) {
- System.out.println(node);
- }
- }
-
private void parseWhitelist(String path) {
System.err.println("Loading whitelist " + path + " ...");
this.whitelist = LongArrayBitVector.ofLength(this.graph.numNodes());
Scanner scanner = null;
try {
scanner = new Scanner(new File(path));
- while(scanner.hasNextLong()) {
+ while (scanner.hasNextLong()) {
whitelist.set(scanner.nextLong());
}
System.err.println("Whitelist loaded.");
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
-
- public static void main(String[] args) {
- JSAPResult config = parse_args(args);
-
- String graphPath = config.getString("graphPath");
- int numThreads = config.getInt("numThreads");
- String whitelistPath = config.getString("whitelistPath");
- boolean includeRootDir = config.getBoolean("includeRootDir");
-
- ForkCC forkCc = new ForkCC();
- try {
- forkCc.load_graph(graphPath);
- forkCc.includeRootDir = includeRootDir;
- } catch (IOException e) {
- System.out.println("Could not load graph: " + e);
- System.exit(2);
- }
-
- if (whitelistPath != null) {
- forkCc.parseWhitelist(whitelistPath);
- }
-
- ProgressLogger logger = new ProgressLogger();
- try {
- ArrayList> components = forkCc.compute(logger);
- printDistribution(components);
- // printLargestComponent(components);
- } catch (IOException e) {
- e.printStackTrace();
- }
- logger.done();
- }
}
diff --git a/java/src/main/java/org/softwareheritage/graph/maps/MapBuilder.java b/java/src/main/java/org/softwareheritage/graph/maps/MapBuilder.java
index 4bb6506..90fcd50 100644
--- a/java/src/main/java/org/softwareheritage/graph/maps/MapBuilder.java
+++ b/java/src/main/java/org/softwareheritage/graph/maps/MapBuilder.java
@@ -1,215 +1,212 @@
package org.softwareheritage.graph.maps;
-import java.io.*;
-import java.nio.charset.StandardCharsets;
-import java.util.Scanner;
-import java.util.concurrent.*;
-
import it.unimi.dsi.bits.LongArrayBitVector;
import it.unimi.dsi.fastutil.BigArrays;
import it.unimi.dsi.fastutil.Size64;
import it.unimi.dsi.fastutil.io.BinIO;
import it.unimi.dsi.fastutil.longs.LongBigArrays;
import it.unimi.dsi.fastutil.longs.LongBigList;
import it.unimi.dsi.fastutil.objects.Object2LongFunction;
import it.unimi.dsi.io.FastBufferedReader;
import it.unimi.dsi.io.LineIterator;
import it.unimi.dsi.logging.ProgressLogger;
-
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.Node;
import org.softwareheritage.graph.SwhPID;
+import java.io.*;
+import java.nio.charset.StandardCharsets;
+import java.util.Scanner;
+import java.util.concurrent.TimeUnit;
+
/**
* Create maps needed at runtime by the graph service, in particular:
- *
+ *
* - SWH PID → WebGraph long node id
* - WebGraph long node id → SWH PID (converse of the former)
* - WebGraph long node id → SWH node type (enum)
*
* @author The Software Heritage developers
*/
public class MapBuilder {
final static String SORT_BUFFER_SIZE = "40%";
final static Logger logger = LoggerFactory.getLogger(MapBuilder.class);
/**
* Main entrypoint.
*
* @param args command line arguments
*/
public static void main(String[] args) throws IOException {
if (args.length != 2) {
logger.error("Usage: COMPRESSED_GRAPH_BASE_NAME TEMP_DIR < NODES_CSV");
System.exit(1);
}
String graphPath = args[0];
String tmpDir = args[1];
logger.info("starting maps generation...");
precomputeNodeIdMap(graphPath, tmpDir);
logger.info("maps generation completed");
}
/**
* Computes and dumps on disk mapping files.
*
* @param graphPath path of the compressed graph
*/
// Suppress warning for Object2LongFunction cast
@SuppressWarnings("unchecked")
static void precomputeNodeIdMap(String graphPath, String tmpDir)
- throws IOException
- {
+ throws IOException {
ProgressLogger plPid2Node = new ProgressLogger(logger, 10, TimeUnit.SECONDS);
ProgressLogger plNode2Pid = new ProgressLogger(logger, 10, TimeUnit.SECONDS);
plPid2Node.itemsName = "pid→node";
plNode2Pid.itemsName = "node→pid";
// avg speed for pid→node is sometime skewed due to write to the sort
// pipe hanging when sort is sorting; hence also desplay local speed
plPid2Node.displayLocalSpeed = true;
// first half of PID->node mapping: PID -> WebGraph MPH (long)
Object2LongFunction mphMap = null;
try {
logger.info("loading MPH function...");
mphMap = (Object2LongFunction) BinIO.loadObject(graphPath + ".mph");
logger.info("MPH function loaded");
} catch (ClassNotFoundException e) {
logger.error("unknown class object in .mph file: " + e);
System.exit(2);
}
long nbIds = (mphMap instanceof Size64) ? ((Size64) mphMap).size64() : mphMap.size();
plPid2Node.expectedUpdates = nbIds;
plNode2Pid.expectedUpdates = nbIds;
// second half of PID->node mapping: WebGraph MPH (long) -> BFS order (long)
long[][] bfsMap = LongBigArrays.newBigArray(nbIds);
logger.info("loading BFS order file...");
long loaded = BinIO.loadLongs(graphPath + ".order", bfsMap);
logger.info("BFS order file loaded");
if (loaded != nbIds) {
logger.error("graph contains " + nbIds + " nodes, but read " + loaded);
System.exit(2);
}
// Create mapping SWH PID -> WebGraph node id, by sequentially reading
// nodes, hashing them with MPH, and permuting according to BFS order
FastBufferedReader buffer = new FastBufferedReader(new InputStreamReader(System.in,
- StandardCharsets.US_ASCII));
+ StandardCharsets.US_ASCII));
LineIterator swhPIDIterator = new LineIterator(buffer);
// The WebGraph node id -> SWH PID mapping can be obtained from the
// PID->node one by numerically sorting on node id and sequentially
// writing obtained PIDs to a binary map. Delegates the sorting job to
// /usr/bin/sort via pipes
ProcessBuilder processBuilder = new ProcessBuilder();
processBuilder.command("sort", "--numeric-sort", "--key", "2",
- "--buffer-size", SORT_BUFFER_SIZE,
- "--temporary-directory", tmpDir);
+ "--buffer-size", SORT_BUFFER_SIZE,
+ "--temporary-directory", tmpDir);
Process sort = processBuilder.start();
BufferedOutputStream sort_stdin = new BufferedOutputStream(sort.getOutputStream());
BufferedInputStream sort_stdout = new BufferedInputStream(sort.getInputStream());
// for the binary format of pidToNodeMap, see Python module swh.graph.pid:PidToIntMap
// for the binary format of nodeToPidMap, see Python module swh.graph.pid:IntToPidMap
try (DataOutputStream pidToNodeMap = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(graphPath + Graph.PID_TO_NODE)));
BufferedOutputStream nodeToPidMap = new BufferedOutputStream(new FileOutputStream(graphPath + Graph.NODE_TO_PID))) {
// background handler for sort output, it will be fed PID/node
// pairs while pidToNodeMap is being filled, and will itself fill
// nodeToPidMap as soon as data from sort is ready
SortOutputHandler outputHandler = new SortOutputHandler(sort_stdout, nodeToPidMap, plNode2Pid);
outputHandler.start();
// Type map from WebGraph node ID to SWH type. Used at runtime by
// pure Java graph traversals to efficiently check edge
// restrictions.
final int log2NbTypes = (int) Math.ceil(Math.log(Node.Type.values().length)
- / Math.log(2));
+ / Math.log(2));
final int nbBitsPerNodeType = log2NbTypes;
LongArrayBitVector nodeTypesBitVector =
- LongArrayBitVector.ofLength(nbBitsPerNodeType * nbIds);
+ LongArrayBitVector.ofLength(nbBitsPerNodeType * nbIds);
LongBigList nodeTypesMap = nodeTypesBitVector.asLongBigList(nbBitsPerNodeType);
plPid2Node.start("filling pid2node map");
for (long iNode = 0; iNode < nbIds && swhPIDIterator.hasNext(); iNode++) {
String strSwhPID = swhPIDIterator.next().toString();
SwhPID swhPID = new SwhPID(strSwhPID);
byte[] swhPIDBin = swhPID.toBytes();
long mphId = mphMap.getLong(strSwhPID);
long nodeId = BigArrays.get(bfsMap, mphId);
pidToNodeMap.write(swhPIDBin, 0, swhPIDBin.length);
pidToNodeMap.writeLong(nodeId);
sort_stdin.write((strSwhPID + "\t" + nodeId + "\n")
- .getBytes(StandardCharsets.US_ASCII));
+ .getBytes(StandardCharsets.US_ASCII));
nodeTypesMap.set(nodeId, swhPID.getType().ordinal());
plPid2Node.lightUpdate();
}
plPid2Node.done();
sort_stdin.close();
// write type map
logger.info("storing type map");
BinIO.storeObject(nodeTypesMap, graphPath + Graph.NODE_TO_TYPE);
logger.info("type map stored");
// wait for nodeToPidMap filling
try {
logger.info("waiting for node2pid map...");
int sortExitCode = sort.waitFor();
if (sortExitCode != 0) {
logger.error("sort returned non-zero exit code: " + sortExitCode);
System.exit(2);
}
outputHandler.join();
} catch (InterruptedException e) {
logger.error("processing of sort output failed with: " + e);
System.exit(2);
}
}
}
private static class SortOutputHandler extends Thread {
private Scanner input;
private OutputStream output;
private ProgressLogger pl;
SortOutputHandler(InputStream input, OutputStream output, ProgressLogger pl) {
this.input = new Scanner(input, StandardCharsets.US_ASCII);
this.output = output;
this.pl = pl;
}
public void run() {
boolean sortDone = false;
logger.info("node2pid: waiting for sort output...");
while (input.hasNextLine()) {
- if (! sortDone) {
+ if (!sortDone) {
sortDone = true;
this.pl.start("filling node2pid map");
}
String line = input.nextLine(); // format: SWH_PID NODE_ID
SwhPID swhPID = new SwhPID(line.split("\\t")[0]); // get PID
try {
output.write((byte[]) swhPID.toBytes());
} catch (IOException e) {
logger.error("writing to node->PID map failed with: " + e);
}
this.pl.lightUpdate();
}
this.pl.done();
}
}
}
diff --git a/java/src/main/java/org/softwareheritage/graph/maps/MapFile.java b/java/src/main/java/org/softwareheritage/graph/maps/MapFile.java
index 4c16130..64f577c 100644
--- a/java/src/main/java/org/softwareheritage/graph/maps/MapFile.java
+++ b/java/src/main/java/org/softwareheritage/graph/maps/MapFile.java
@@ -1,62 +1,62 @@
package org.softwareheritage.graph.maps;
+import it.unimi.dsi.io.ByteBufferInputStream;
+
import java.io.File;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.channels.FileChannel;
-import it.unimi.dsi.io.ByteBufferInputStream;
-
/**
* Wrapper class around very big mmap()-ed file.
*
* Java has a limit for mmap()-ed files because of unsupported 64-bit indexing. The dsiutils ByteBufferInputStream is used to overcome this
* Java limit.
*
* @author The Software Heritage developers
*/
public class MapFile {
/** Memory-mapped file buffer */
ByteBufferInputStream bufferMap;
/** Fixed line length of the mmap()-ed file */
int lineLength;
/**
* Constructor.
*
* @param path file path to mmap()
* @param lineLength fixed length of a line in the file
*/
public MapFile(String path, int lineLength) throws IOException {
this.bufferMap = null;
this.lineLength = lineLength;
try (RandomAccessFile mapFile = new RandomAccessFile(new File(path), "r")) {
FileChannel fileChannel = mapFile.getChannel();
bufferMap = ByteBufferInputStream.map(fileChannel, FileChannel.MapMode.READ_ONLY);
}
}
/**
* Returns a specific line in the file.
*
* @param lineIndex line number in the file
* @return the line at the specified position
*/
public byte[] readAtLine(long lineIndex) {
byte[] buffer = new byte[lineLength];
long position = lineIndex * (long) lineLength;
bufferMap.position(position);
bufferMap.read(buffer, 0, lineLength);
return buffer;
}
/**
* Closes the mmap()-ed file.
*/
public void close() throws IOException {
bufferMap.close();
}
}
diff --git a/java/src/main/java/org/softwareheritage/graph/maps/NodeIdMap.java b/java/src/main/java/org/softwareheritage/graph/maps/NodeIdMap.java
index fd21f50..5ddabb3 100644
--- a/java/src/main/java/org/softwareheritage/graph/maps/NodeIdMap.java
+++ b/java/src/main/java/org/softwareheritage/graph/maps/NodeIdMap.java
@@ -1,114 +1,114 @@
package org.softwareheritage.graph.maps;
-import java.io.IOException;
-
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.SwhPID;
+import java.io.IOException;
+
/**
* Mapping between internal long node id and external SWH PID.
- *
+ *
* Mappings in both directions are pre-computed and dumped on disk in the
* {@link MapBuilder} class, then they are loaded here using mmap().
*
* @author The Software Heritage developers
* @see org.softwareheritage.graph.maps.MapBuilder
*/
public class NodeIdMap {
/** Fixed length of full SWH PID */
public static final int SWH_ID_LENGTH = 50;
/** Fixed length of long node id */
public static final int NODE_ID_LENGTH = 20;
/** Fixed length of binary SWH PID buffer */
public static final int SWH_ID_BIN_SIZE = 22;
/** Fixed length of binary node id buffer */
public static final int NODE_ID_BIN_SIZE = 8;
/** Graph path and basename */
String graphPath;
/** Number of ids to map */
long nbIds;
/** mmap()-ed PID_TO_NODE file */
MapFile swhToNodeMap;
/** mmap()-ed NODE_TO_PID file */
MapFile nodeToSwhMap;
/**
* Constructor.
*
* @param graphPath full graph path
* @param nbNodes number of nodes in the graph
*/
public NodeIdMap(String graphPath, long nbNodes) throws IOException {
this.graphPath = graphPath;
this.nbIds = nbNodes;
this.swhToNodeMap = new MapFile(graphPath + Graph.PID_TO_NODE, SWH_ID_BIN_SIZE + NODE_ID_BIN_SIZE);
this.nodeToSwhMap = new MapFile(graphPath + Graph.NODE_TO_PID, SWH_ID_BIN_SIZE);
}
/**
* Converts SWH PID to corresponding long node id.
*
* @param swhPID node represented as a {@link SwhPID}
* @return corresponding node as a long id
* @see org.softwareheritage.graph.SwhPID
*/
public long getNodeId(SwhPID swhPID) {
// The file is sorted by swhPID, hence we can binary search on swhPID to get corresponding
// nodeId
long start = 0;
long end = nbIds - 1;
while (start <= end) {
long lineNumber = (start + end) / 2L;
byte[] buffer = swhToNodeMap.readAtLine(lineNumber);
byte[] pidBuffer = new byte[SWH_ID_BIN_SIZE];
byte[] nodeBuffer = new byte[NODE_ID_BIN_SIZE];
System.arraycopy(buffer, 0, pidBuffer, 0, SWH_ID_BIN_SIZE);
System.arraycopy(buffer, SWH_ID_BIN_SIZE, nodeBuffer, 0, NODE_ID_BIN_SIZE);
String currentSwhPID = SwhPID.fromBytes(pidBuffer).getSwhPID();
long currentNodeId = java.nio.ByteBuffer.wrap(nodeBuffer).getLong();
int cmp = currentSwhPID.compareTo(swhPID.toString());
if (cmp == 0) {
return currentNodeId;
} else if (cmp < 0) {
start = lineNumber + 1;
} else {
end = lineNumber - 1;
}
}
throw new IllegalArgumentException("Unknown SWH PID: " + swhPID);
}
/**
* Converts a node long id to corresponding SWH PID.
*
* @param nodeId node as a long id
* @return corresponding node as a {@link SwhPID}
* @see org.softwareheritage.graph.SwhPID
*/
public SwhPID getSwhPID(long nodeId) {
// Each line in NODE_TO_PID is formatted as: swhPID
// The file is ordered by nodeId, meaning node0's swhPID is at line 0, hence we can read the
// nodeId-th line to get corresponding swhPID
if (nodeId < 0 || nodeId >= nbIds) {
throw new IllegalArgumentException("Node id " + nodeId + " should be between 0 and " + nbIds);
}
return SwhPID.fromBytes(nodeToSwhMap.readAtLine(nodeId));
}
/**
* Closes the mapping files.
*/
public void close() throws IOException {
swhToNodeMap.close();
nodeToSwhMap.close();
}
}
diff --git a/java/src/main/java/org/softwareheritage/graph/maps/NodeTypesMap.java b/java/src/main/java/org/softwareheritage/graph/maps/NodeTypesMap.java
index fccfa18..c2747a0 100644
--- a/java/src/main/java/org/softwareheritage/graph/maps/NodeTypesMap.java
+++ b/java/src/main/java/org/softwareheritage/graph/maps/NodeTypesMap.java
@@ -1,55 +1,54 @@
package org.softwareheritage.graph.maps;
-import java.io.IOException;
-
import it.unimi.dsi.fastutil.io.BinIO;
import it.unimi.dsi.fastutil.longs.LongBigList;
-
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.Node;
+import java.io.IOException;
+
/**
* Mapping between long node id and SWH node type as described in the data
* model.
- *
+ *
* The type mapping is pre-computed and dumped on disk in the {@link MapBuilder}
* class, then it is loaded in-memory here using
* fastutil LongBigList. To be
* space-efficient, the mapping is stored as a bitmap using minimum number of
* bits per {@link Node.Type}.
*
* @author The Software Heritage developers
*/
public class NodeTypesMap {
/**
* Array storing for each node its type
*/
public LongBigList nodeTypesMap;
/**
* Constructor.
*
* @param graphPath path and basename of the compressed graph
*/
public NodeTypesMap(String graphPath) throws IOException {
try {
nodeTypesMap = (LongBigList) BinIO.loadObject(graphPath + Graph.NODE_TO_TYPE);
} catch (ClassNotFoundException e) {
throw new IllegalArgumentException("Unknown class object: " + e);
}
}
/**
* Returns node type from a node long id.
*
* @param nodeId node as a long id
* @return corresponding {@link Node.Type} value
* @see org.softwareheritage.graph.Node.Type
*/
public Node.Type getType(long nodeId) {
long type = nodeTypesMap.getLong(nodeId);
return Node.Type.fromInt((int) type);
}
}
diff --git a/java/src/main/java/org/softwareheritage/graph/server/App.java b/java/src/main/java/org/softwareheritage/graph/server/App.java
index c9edc14..78836b0 100644
--- a/java/src/main/java/org/softwareheritage/graph/server/App.java
+++ b/java/src/main/java/org/softwareheritage/graph/server/App.java
@@ -1,193 +1,198 @@
package org.softwareheritage.graph.server;
-import java.io.IOException;
-import java.util.List;
-import java.util.Map;
-
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.PropertyNamingStrategy;
-import com.martiansoftware.jsap.FlaggedOption;
-import com.martiansoftware.jsap.JSAP;
-import com.martiansoftware.jsap.JSAPException;
-import com.martiansoftware.jsap.JSAPResult;
-import com.martiansoftware.jsap.Parameter;
-import com.martiansoftware.jsap.SimpleJSAP;
-import com.martiansoftware.jsap.Switch;
-import com.martiansoftware.jsap.UnflaggedOption;
+import com.martiansoftware.jsap.*;
import io.javalin.Javalin;
import io.javalin.http.Context;
import io.javalin.plugin.json.JavalinJackson;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.Stats;
import org.softwareheritage.graph.SwhPID;
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+
/**
* Web framework of the swh-graph server REST API.
*
* @author The Software Heritage developers
*/
public class App {
/**
* Main entrypoint.
*
* @param args command line arguments
*/
public static void main(String[] args) throws IOException, JSAPException {
SimpleJSAP jsap = new SimpleJSAP(App.class.getName(),
- "Server to load and query a compressed graph representation of Software Heritage archive.",
- new Parameter[] {
- new FlaggedOption("port", JSAP.INTEGER_PARSER, "5009", JSAP.NOT_REQUIRED, 'p', "port",
- "Binding port of the server."),
- new UnflaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED,
- JSAP.NOT_GREEDY, "The basename of the compressed graph."),
- new Switch("timings", 't', "timings", "Show timings in API result metadata."),
- });
+ "Server to load and query a compressed graph representation of Software Heritage archive.",
+ new Parameter[]{
+ new FlaggedOption("port", JSAP.INTEGER_PARSER, "5009", JSAP.NOT_REQUIRED, 'p', "port",
+ "Binding port of the server."),
+ new UnflaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED,
+ JSAP.NOT_GREEDY, "The basename of the compressed graph."),
+ new Switch("timings", 't', "timings", "Show timings in API result metadata."),
+ });
JSAPResult config = jsap.parse(args);
if (jsap.messagePrinted()) {
System.exit(1);
}
String graphPath = config.getString("graphPath");
int port = config.getInt("port");
boolean showTimings = config.getBoolean("timings");
startServer(graphPath, port, showTimings);
}
/**
* Loads compressed graph and starts the web server to query it.
*
* @param graphPath basename of the compressed graph
* @param port binding port of the server
* @param showTimings true if timings should be in results metadata, false otherwise
*/
private static void startServer(String graphPath, int port, boolean showTimings)
- throws IOException {
+ throws IOException {
Graph graph = new Graph(graphPath);
Stats stats = new Stats(graphPath);
// Clean up on exit
Runtime.getRuntime().addShutdownHook(new Thread() {
public void run() {
try {
graph.cleanUp();
} catch (IOException e) {
System.out.println("Could not clean up graph on exit: " + e);
}
}
});
// Configure Jackson JSON to use snake case naming style
ObjectMapper objectMapper = JavalinJackson.getObjectMapper();
objectMapper.setPropertyNamingStrategy(PropertyNamingStrategy.SNAKE_CASE);
JavalinJackson.configure(objectMapper);
Javalin app = Javalin.create().start(port);
- app.before("/stats/*", ctx -> { checkQueryStrings(ctx, ""); });
- app.before("/leaves/*", ctx -> { checkQueryStrings(ctx, "direction|edges"); });
- app.before("/neighbors/*", ctx -> { checkQueryStrings(ctx, "direction|edges"); });
- app.before("/visit/*", ctx -> { checkQueryStrings(ctx, "direction|edges"); });
- app.before("/walk/*", ctx -> { checkQueryStrings(ctx, "direction|edges|traversal"); });
+ app.before("/stats/*", ctx -> {
+ checkQueryStrings(ctx, "");
+ });
+ app.before("/leaves/*", ctx -> {
+ checkQueryStrings(ctx, "direction|edges");
+ });
+ app.before("/neighbors/*", ctx -> {
+ checkQueryStrings(ctx, "direction|edges");
+ });
+ app.before("/visit/*", ctx -> {
+ checkQueryStrings(ctx, "direction|edges");
+ });
+ app.before("/walk/*", ctx -> {
+ checkQueryStrings(ctx, "direction|edges|traversal");
+ });
- app.get("/stats/", ctx -> { ctx.json(stats); });
+ app.get("/stats/", ctx -> {
+ ctx.json(stats);
+ });
// Graph traversal endpoints
// By default the traversal is a forward DFS using all edges
app.get("/leaves/:src", ctx -> {
SwhPID src = new SwhPID(ctx.pathParam("src"));
String direction = ctx.queryParam("direction", "forward");
String edgesFmt = ctx.queryParam("edges", "*");
Endpoint endpoint = new Endpoint(graph, direction, edgesFmt);
Endpoint.Output output = endpoint.leaves(new Endpoint.Input(src));
ctx.json(formatEndpointOutput(output, showTimings));
});
app.get("/neighbors/:src", ctx -> {
SwhPID src = new SwhPID(ctx.pathParam("src"));
String direction = ctx.queryParam("direction", "forward");
String edgesFmt = ctx.queryParam("edges", "*");
Endpoint endpoint = new Endpoint(graph, direction, edgesFmt);
Endpoint.Output output = endpoint.neighbors(new Endpoint.Input(src));
ctx.json(formatEndpointOutput(output, showTimings));
});
app.get("/visit/nodes/:src", ctx -> {
SwhPID src = new SwhPID(ctx.pathParam("src"));
String direction = ctx.queryParam("direction", "forward");
String edgesFmt = ctx.queryParam("edges", "*");
Endpoint endpoint = new Endpoint(graph, direction, edgesFmt);
Endpoint.Output output = endpoint.visitNodes(new Endpoint.Input(src));
ctx.json(formatEndpointOutput(output, showTimings));
});
app.get("/visit/paths/:src", ctx -> {
SwhPID src = new SwhPID(ctx.pathParam("src"));
String direction = ctx.queryParam("direction", "forward");
String edgesFmt = ctx.queryParam("edges", "*");
Endpoint endpoint = new Endpoint(graph, direction, edgesFmt);
Endpoint.Output output = endpoint.visitPaths(new Endpoint.Input(src));
ctx.json(formatEndpointOutput(output, showTimings));
});
app.get("/walk/:src/:dst", ctx -> {
SwhPID src = new SwhPID(ctx.pathParam("src"));
String dstFmt = ctx.pathParam("dst");
String direction = ctx.queryParam("direction", "forward");
String edgesFmt = ctx.queryParam("edges", "*");
String algorithm = ctx.queryParam("traversal", "dfs");
Endpoint endpoint = new Endpoint(graph, direction, edgesFmt);
Endpoint.Output output = endpoint.walk(new Endpoint.Input(src, dstFmt, algorithm));
ctx.json(formatEndpointOutput(output, showTimings));
});
app.exception(IllegalArgumentException.class, (e, ctx) -> {
ctx.status(400);
ctx.result(e.getMessage());
});
}
/**
* Checks query strings names provided to the REST API.
*
* @param ctx Javalin HTTP request context
* @param allowedFmt a regular expression describing allowed query strings names
* @throws IllegalArgumentException unknown query string provided
*/
private static void checkQueryStrings(Context ctx, String allowedFmt) {
Map> queryParamMap = ctx.queryParamMap();
for (String key : queryParamMap.keySet()) {
if (!key.matches(allowedFmt)) {
throw new IllegalArgumentException("Unknown query string: " + key);
}
}
}
/**
* Formats endpoint result into final JSON for the REST API.
*
* Removes unwanted information if necessary, such as timings (to prevent use of side channels
* attacks).
*
* @param output endpoint operation output which needs formatting
* @param showTimings true if timings should be in results metadata, false otherwise
* @return final Object with desired JSON format
*/
private static Object formatEndpointOutput(Endpoint.Output output, boolean showTimings) {
if (showTimings) {
return output;
} else {
Map metaNoTimings = Map.of("nb_edges_accessed", output.meta.nbEdgesAccessed);
Map outputNoTimings = Map.of("result", output.result, "meta", metaNoTimings);
return outputNoTimings;
}
}
}
diff --git a/java/src/main/java/org/softwareheritage/graph/server/Endpoint.java b/java/src/main/java/org/softwareheritage/graph/server/Endpoint.java
index c1e3268..16c96e3 100644
--- a/java/src/main/java/org/softwareheritage/graph/server/Endpoint.java
+++ b/java/src/main/java/org/softwareheritage/graph/server/Endpoint.java
@@ -1,308 +1,308 @@
package org.softwareheritage.graph.server;
-import java.util.ArrayList;
-
import org.softwareheritage.graph.*;
import org.softwareheritage.graph.benchmark.utils.Timing;
+import java.util.ArrayList;
+
/**
* REST API endpoints wrapper functions.
*
* Graph operations are segmented between high-level class (this one) and the low-level class
* ({@link Traversal}). The {@link Endpoint} class creates wrappers for each endpoints by performing
* all the input/output node ids conversions and logging timings.
*
* @author The Software Heritage developers
* @see Traversal
*/
public class Endpoint {
- /**
- * Wrapper class to unify traversal methods input signatures.
- */
- public static class Input {
- /** Source node of endpoint call specified as a {@link SwhPID} */
- public SwhPID src;
- /**
- * Destination formatted string as described in the API
- */
- public String dstFmt;
- /** Traversal algorithm used in endpoint call (either "dfs" or "bfs") */
- public String algorithm;
-
- public Input(SwhPID src) {
- this.src = src;
- }
-
- public Input(SwhPID src, String dstFmt, String algorithm) {
- this.src = src;
- this.dstFmt = dstFmt;
- this.algorithm = algorithm;
- }
- }
-
- /**
- * Wrapper class to return both the endpoint result and metadata (such as timings).
- */
- public static class Output {
- /** The result content itself */
- public T result;
- /** Various metadata about the result */
- public Meta meta;
-
- public Output() {
- this.result = null;
- this.meta = new Meta();
- }
-
- /**
- * Endpoint result metadata.
- */
- public class Meta {
- /** Operations timings */
- public Timings timings;
- /** Number of edges accessed during traversal */
- public long nbEdgesAccessed;
-
- public Meta() {
- this.timings = new Timings();
- this.nbEdgesAccessed = 0;
- }
-
- /**
- * Wrapper class for JSON output format.
- */
- public class Timings {
- /** Time in seconds to do the traversal */
- public double traversal;
- /** Time in seconds to convert input SWH PID to node id */
- public double pid2node;
- /** Time in seconds to convert output node ids to SWH PIDs */
- public double node2pid;
- }
- }
- }
-
/** Graph where traversal endpoint is performed */
Graph graph;
/** Internal traversal API */
Traversal traversal;
/**
* Constructor.
*
* @param graph the graph used for traversal endpoint
* @param direction a string (either "forward" or "backward") specifying edge orientation
* @param edgesFmt a formatted string describing allowed edges
*/
public Endpoint(Graph graph, String direction, String edgesFmt) {
this.graph = graph;
this.traversal = new Traversal(graph, direction, edgesFmt);
}
/**
* Converts a list of (internal) long node ids to a list of corresponding (external) SWH PIDs.
*
* @param nodeIds the list of long node ids
* @return a list of corresponding SWH PIDs
*/
private ArrayList convertNodesToSwhPIDs(ArrayList nodeIds) {
ArrayList swhPIDs = new ArrayList<>();
for (long nodeId : nodeIds) {
swhPIDs.add(graph.getSwhPID(nodeId));
}
return swhPIDs;
}
/**
* Converts a list of (internal) long node ids to the corresponding {@link SwhPath}.
*
* @param nodeIds the list of long node ids
* @return the corresponding {@link SwhPath}
* @see org.softwareheritage.graph.SwhPath
*/
private SwhPath convertNodesToSwhPath(ArrayList nodeIds) {
SwhPath path = new SwhPath();
for (long nodeId : nodeIds) {
path.add(graph.getSwhPID(nodeId));
}
return path;
}
/**
* Converts a list of paths made of (internal) long node ids to one made of {@link SwhPath}-s.
*
* @param pathsNodeId the list of paths with long node ids
* @return a list of corresponding {@link SwhPath}
* @see org.softwareheritage.graph.SwhPath
*/
private ArrayList convertPathsToSwhPIDs(ArrayList> pathsNodeId) {
ArrayList paths = new ArrayList<>();
for (ArrayList path : pathsNodeId) {
paths.add(convertNodesToSwhPath(path));
}
return paths;
}
/**
* Leaves endpoint wrapper.
*
* @param input input parameters for the underlying endpoint call
* @return the resulting list of {@link SwhPID} from endpoint call and operation metadata
* @see org.softwareheritage.graph.SwhPID
* @see Traversal#leaves(long)
*/
public Output leaves(Input input) {
Output> output = new Output<>();
long startTime;
startTime = Timing.start();
long srcNodeId = graph.getNodeId(input.src);
output.meta.timings.pid2node = Timing.stop(startTime);
startTime = Timing.start();
ArrayList nodeIds = traversal.leaves(srcNodeId);
output.meta.timings.traversal = Timing.stop(startTime);
output.meta.nbEdgesAccessed = traversal.getNbEdgesAccessed();
startTime = Timing.start();
output.result = convertNodesToSwhPIDs(nodeIds);
output.meta.timings.node2pid = Timing.stop(startTime);
return output;
}
/**
* Neighbors endpoint wrapper.
*
* @param input input parameters for the underlying endpoint call
* @return the resulting list of {@link SwhPID} from endpoint call and operation metadata
* @see org.softwareheritage.graph.SwhPID
* @see Traversal#neighbors(long)
*/
public Output neighbors(Input input) {
Output> output = new Output<>();
long startTime;
startTime = Timing.start();
long srcNodeId = graph.getNodeId(input.src);
output.meta.timings.pid2node = Timing.stop(startTime);
startTime = Timing.start();
ArrayList nodeIds = traversal.neighbors(srcNodeId);
output.meta.timings.traversal = Timing.stop(startTime);
output.meta.nbEdgesAccessed = traversal.getNbEdgesAccessed();
startTime = Timing.start();
output.result = convertNodesToSwhPIDs(nodeIds);
output.meta.timings.node2pid = Timing.stop(startTime);
return output;
}
/**
* Walk endpoint wrapper.
*
* @param input input parameters for the underlying endpoint call
* @return the resulting {@link SwhPath} from endpoint call and operation metadata
* @see org.softwareheritage.graph.SwhPID
* @see org.softwareheritage.graph.SwhPath
* @see Traversal#walk
*/
public Output walk(Input input) {
Output output = new Output<>();
long startTime;
startTime = Timing.start();
long srcNodeId = graph.getNodeId(input.src);
output.meta.timings.pid2node = Timing.stop(startTime);
ArrayList nodeIds = new ArrayList();
// Destination is either a SWH PID or a node type
try {
SwhPID dstSwhPID = new SwhPID(input.dstFmt);
long dstNodeId = graph.getNodeId(dstSwhPID);
startTime = Timing.start();
nodeIds = traversal.walk(srcNodeId, dstNodeId, input.algorithm);
output.meta.timings.traversal = Timing.stop(startTime);
} catch (IllegalArgumentException ignored1) {
try {
Node.Type dstType = Node.Type.fromStr(input.dstFmt);
startTime = Timing.start();
nodeIds = traversal.walk(srcNodeId, dstType, input.algorithm);
output.meta.timings.traversal = Timing.stop(startTime);
} catch (IllegalArgumentException ignored2) {
}
}
output.meta.nbEdgesAccessed = traversal.getNbEdgesAccessed();
startTime = Timing.start();
output.result = convertNodesToSwhPath(nodeIds);
output.meta.timings.node2pid = Timing.stop(startTime);
return output;
}
/**
* VisitNodes endpoint wrapper.
*
* @param input input parameters for the underlying endpoint call
* @return the resulting list of {@link SwhPID} from endpoint call and operation metadata
* @see org.softwareheritage.graph.SwhPID
* @see Traversal#visitNodes(long)
*/
public Output visitNodes(Input input) {
Output> output = new Output<>();
long startTime;
startTime = Timing.start();
long srcNodeId = graph.getNodeId(input.src);
output.meta.timings.pid2node = Timing.stop(startTime);
startTime = Timing.start();
ArrayList nodeIds = traversal.visitNodes(srcNodeId);
output.meta.timings.traversal = Timing.stop(startTime);
output.meta.nbEdgesAccessed = traversal.getNbEdgesAccessed();
startTime = Timing.start();
output.result = convertNodesToSwhPIDs(nodeIds);
output.meta.timings.node2pid = Timing.stop(startTime);
return output;
}
/**
* VisitPaths endpoint wrapper.
*
* @param input input parameters for the underlying endpoint call
* @return the resulting list of {@link SwhPath} from endpoint call and operation metadata
* @see org.softwareheritage.graph.SwhPID
* @see org.softwareheritage.graph.SwhPath
* @see Traversal#visitPaths(long)
*/
public Output visitPaths(Input input) {
Output> output = new Output<>();
long startTime;
startTime = Timing.start();
long srcNodeId = graph.getNodeId(input.src);
output.meta.timings.pid2node = Timing.stop(startTime);
startTime = Timing.start();
ArrayList> paths = traversal.visitPaths(srcNodeId);
output.meta.timings.traversal = Timing.stop(startTime);
output.meta.nbEdgesAccessed = traversal.getNbEdgesAccessed();
startTime = Timing.start();
output.result = convertPathsToSwhPIDs(paths);
output.meta.timings.node2pid = Timing.stop(startTime);
return output;
}
+
+ /**
+ * Wrapper class to unify traversal methods input signatures.
+ */
+ public static class Input {
+ /** Source node of endpoint call specified as a {@link SwhPID} */
+ public SwhPID src;
+ /**
+ * Destination formatted string as described in the API
+ */
+ public String dstFmt;
+ /** Traversal algorithm used in endpoint call (either "dfs" or "bfs") */
+ public String algorithm;
+
+ public Input(SwhPID src) {
+ this.src = src;
+ }
+
+ public Input(SwhPID src, String dstFmt, String algorithm) {
+ this.src = src;
+ this.dstFmt = dstFmt;
+ this.algorithm = algorithm;
+ }
+ }
+
+ /**
+ * Wrapper class to return both the endpoint result and metadata (such as timings).
+ */
+ public static class Output {
+ /** The result content itself */
+ public T result;
+ /** Various metadata about the result */
+ public Meta meta;
+
+ public Output() {
+ this.result = null;
+ this.meta = new Meta();
+ }
+
+ /**
+ * Endpoint result metadata.
+ */
+ public class Meta {
+ /** Operations timings */
+ public Timings timings;
+ /** Number of edges accessed during traversal */
+ public long nbEdgesAccessed;
+
+ public Meta() {
+ this.timings = new Timings();
+ this.nbEdgesAccessed = 0;
+ }
+
+ /**
+ * Wrapper class for JSON output format.
+ */
+ public class Timings {
+ /** Time in seconds to do the traversal */
+ public double traversal;
+ /** Time in seconds to convert input SWH PID to node id */
+ public double pid2node;
+ /** Time in seconds to convert output node ids to SWH PIDs */
+ public double node2pid;
+ }
+ }
+ }
}