diff --git a/java/server/src/main/java/org/softwareheritage/graph/Endpoint.java b/java/server/src/main/java/org/softwareheritage/graph/Endpoint.java
index 0619b42..3dd7fa4 100644
--- a/java/server/src/main/java/org/softwareheritage/graph/Endpoint.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/Endpoint.java
@@ -1,282 +1,282 @@
package org.softwareheritage.graph;
import java.util.ArrayList;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.SwhPID;
import org.softwareheritage.graph.SwhPath;
import org.softwareheritage.graph.algo.Traversal;
import org.softwareheritage.graph.utils.Timing;
/**
* REST API endpoints wrapper functions.
*
* Graph operations are segmented between high-level class (this one) and the low-level class
* ({@link Traversal}). The {@link Endpoint} class creates wrappers for each endpoints by performing
* all the input/output node ids conversions and logging timings.
*
* @author Thibault Allançon
* @version 0.0.1
* @since 0.0.1
* @see org.softwareheritage.graph.algo.Traversal
*/
public class Endpoint {
/**
* Wrapper class to return both the endpoint result and metadata (such as timings).
*/
public class Output {
/** The result content itself */
public T result;
/** Various metadata about the result */
public Meta meta;
public Output() {
this.result = null;
this.meta = new Meta();
}
/**
* Endpoint result metadata.
*/
public class Meta {
/** Operations timings */
public Timings timings;
public Meta() {
this.timings = new Timings();
}
/**
* Wrapper class for JSON output format.
*/
public class Timings {
/** Time in seconds to do the traversal */
- public float traversal;
+ public double traversal;
/** Time in seconds to convert input SWH PID to node id */
- public float pid2node;
+ public double pid2node;
/** Time in seconds to convert output node ids to SWH PIDs */
- public float node2pid;
+ public double node2pid;
}
}
}
/** Graph where traversal endpoint is performed */
Graph graph;
/** Internal traversal API */
Traversal traversal;
/**
* Constructor.
*
* @param graph the graph used for traversal endpoint
* @param direction a string (either "forward" or "backward") specifying edge orientation
* @param edgesFmt a formatted string describing allowed edges
*/
public Endpoint(Graph graph, String direction, String edgesFmt) {
this.graph = graph;
this.traversal = new Traversal(graph, direction, edgesFmt);
}
/**
* Converts a list of (internal) long node ids to a list of corresponding (external) SWH PIDs.
*
* @param nodeIds the list of long node ids
* @return a list of corresponding SWH PIDs
*/
private ArrayList convertNodesToSwhPIDs(ArrayList nodeIds) {
ArrayList swhPIDs = new ArrayList<>();
for (long nodeId : nodeIds) {
swhPIDs.add(graph.getSwhPID(nodeId));
}
return swhPIDs;
}
/**
* Converts a list of (internal) long node ids to the corresponding {@link SwhPath}.
*
* @param nodeIds the list of long node ids
* @return the corresponding {@link SwhPath}
* @see org.softwareheritage.graph.SwhPath
*/
private SwhPath convertNodesToSwhPath(ArrayList nodeIds) {
SwhPath path = new SwhPath();
for (long nodeId : nodeIds) {
path.add(graph.getSwhPID(nodeId));
}
return path;
}
/**
* Converts a list of paths made of (internal) long node ids to one made of {@link SwhPath}-s.
*
* @param pathsNodeId the list of paths with long node ids
* @return a list of corresponding {@link SwhPath}
* @see org.softwareheritage.graph.SwhPath
*/
private ArrayList convertPathsToSwhPIDs(ArrayList> pathsNodeId) {
ArrayList paths = new ArrayList<>();
for (ArrayList path : pathsNodeId) {
paths.add(convertNodesToSwhPath(path));
}
return paths;
}
/**
* Leaves endpoint wrapper.
*
* @param src source node of endpoint call specified as a {@link SwhPID}
* @return the resulting list of {@link SwhPID} from endpoint call and operation metadata
* @see org.softwareheritage.graph.SwhPID
* @see org.softwareheritage.graph.algo.Traversal#leaves(long)
*/
public Output leaves(SwhPID src) {
Output> output = new Output<>();
long startTime;
startTime = Timing.start();
long srcNodeId = graph.getNodeId(src);
output.meta.timings.pid2node = Timing.stop(startTime);
startTime = Timing.start();
ArrayList nodeIds = traversal.leaves(srcNodeId);
output.meta.timings.traversal = Timing.stop(startTime);
startTime = Timing.start();
output.result = convertNodesToSwhPIDs(nodeIds);
output.meta.timings.node2pid = Timing.stop(startTime);
return output;
}
/**
* Neighbors endpoint wrapper.
*
* @param src source node of endpoint call specified as a {@link SwhPID}
* @return the resulting list of {@link SwhPID} from endpoint call and operation metadata
* @see org.softwareheritage.graph.SwhPID
* @see org.softwareheritage.graph.algo.Traversal#neighbors(long)
*/
public Output neighbors(SwhPID src) {
Output> output = new Output<>();
long startTime;
startTime = Timing.start();
long srcNodeId = graph.getNodeId(src);
output.meta.timings.pid2node = Timing.stop(startTime);
startTime = Timing.start();
ArrayList nodeIds = traversal.neighbors(srcNodeId);
output.meta.timings.traversal = Timing.stop(startTime);
startTime = Timing.start();
output.result = convertNodesToSwhPIDs(nodeIds);
output.meta.timings.node2pid = Timing.stop(startTime);
return output;
}
/**
* Walk endpoint wrapper.
*
* @param src source node of endpoint call specified as a {@link SwhPID}
* @param dstFmt destination formatted string as described in the API
* @param algorithm traversal algorithm used in endpoint call (either "dfs" or "bfs")
* @return the resulting {@link SwhPath} from endpoint call and operation metadata
* @see org.softwareheritage.graph.SwhPID
* @see org.softwareheritage.graph.SwhPath
* @see org.softwareheritage.graph.algo.Traversal#walk
*/
public Output walk(SwhPID src, String dstFmt, String algorithm) {
Output output = new Output<>();
long startTime;
startTime = Timing.start();
long srcNodeId = graph.getNodeId(src);
output.meta.timings.pid2node = Timing.stop(startTime);
ArrayList nodeIds = new ArrayList();
// Destination is either a SWH PID or a node type
try {
SwhPID dstSwhPID = new SwhPID(dstFmt);
long dstNodeId = graph.getNodeId(dstSwhPID);
startTime = Timing.start();
nodeIds = traversal.walk(srcNodeId, dstNodeId, algorithm);
output.meta.timings.traversal = Timing.stop(startTime);
} catch (IllegalArgumentException ignored1) {
try {
Node.Type dstType = Node.Type.fromStr(dstFmt);
startTime = Timing.start();
nodeIds = traversal.walk(srcNodeId, dstType, algorithm);
output.meta.timings.traversal = Timing.stop(startTime);
} catch (IllegalArgumentException ignored2) {
}
}
startTime = Timing.start();
output.result = convertNodesToSwhPath(nodeIds);
output.meta.timings.node2pid = Timing.stop(startTime);
return output;
}
/**
* VisitNodes endpoint wrapper.
*
* @param src source node of endpoint call specified as a {@link SwhPID}
* @return the resulting list of {@link SwhPID} from endpoint call and operation metadata
* @see org.softwareheritage.graph.SwhPID
* @see org.softwareheritage.graph.algo.Traversal#visitNodes(long)
*/
public Output visitNodes(SwhPID src) {
Output> output = new Output<>();
long startTime;
startTime = Timing.start();
long srcNodeId = graph.getNodeId(src);
output.meta.timings.pid2node = Timing.stop(startTime);
startTime = Timing.start();
ArrayList nodeIds = traversal.visitNodes(srcNodeId);
output.meta.timings.traversal = Timing.stop(startTime);
startTime = Timing.start();
output.result = convertNodesToSwhPIDs(nodeIds);
output.meta.timings.node2pid = Timing.stop(startTime);
return output;
}
/**
* VisitPaths endpoint wrapper.
*
* @param src source node of endpoint call specified as a {@link SwhPID}
* @return the resulting list of {@link SwhPath} from endpoint call and operation metadata
* @see org.softwareheritage.graph.SwhPID
* @see org.softwareheritage.graph.SwhPath
* @see org.softwareheritage.graph.algo.Traversal#visitPaths(long)
*/
public Output visitPaths(SwhPID src) {
Output> output = new Output<>();
long startTime;
startTime = Timing.start();
long srcNodeId = graph.getNodeId(src);
output.meta.timings.pid2node = Timing.stop(startTime);
startTime = Timing.start();
ArrayList> paths = traversal.visitPaths(srcNodeId);
output.meta.timings.traversal = Timing.stop(startTime);
startTime = Timing.start();
output.result = convertPathsToSwhPIDs(paths);
output.meta.timings.node2pid = Timing.stop(startTime);
return output;
}
}
diff --git a/java/server/src/main/java/org/softwareheritage/graph/backend/Setup.java b/java/server/src/main/java/org/softwareheritage/graph/backend/Setup.java
index 4482497..77df754 100644
--- a/java/server/src/main/java/org/softwareheritage/graph/backend/Setup.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/backend/Setup.java
@@ -1,125 +1,125 @@
package org.softwareheritage.graph.backend;
import java.io.BufferedWriter;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Writer;
import java.util.zip.GZIPInputStream;
import it.unimi.dsi.bits.LongArrayBitVector;
import it.unimi.dsi.fastutil.Size64;
import it.unimi.dsi.fastutil.io.BinIO;
import it.unimi.dsi.fastutil.longs.LongBigArrays;
import it.unimi.dsi.fastutil.longs.LongBigList;
import it.unimi.dsi.fastutil.objects.Object2LongFunction;
import it.unimi.dsi.fastutil.objects.ObjectBigArrays;
import it.unimi.dsi.io.FastBufferedReader;
import it.unimi.dsi.io.LineIterator;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.Node;
import org.softwareheritage.graph.SwhPID;
import org.softwareheritage.graph.backend.NodeTypesMap;
/**
* Pre-processing steps (such as dumping mapping files on disk) before running the graph service.
*
* @author Thibault Allançon
* @version 0.0.1
* @since 0.0.1
*/
public class Setup {
/**
* Main entrypoint.
*
* @param args command line arguments
*/
public static void main(String[] args) throws IOException {
if (args.length != 2) {
System.err.println("Expected parameters: ");
System.exit(1);
}
String nodesPath = args[0];
String graphPath = args[1];
System.out.println("Pre-computing node id maps...");
long startTime = System.nanoTime();
precomputeNodeIdMap(nodesPath, graphPath);
long endTime = System.nanoTime();
- double duration = (double) (endTime - startTime) / 1_000_000_000;
+ double duration = (endTime - startTime) / 1_000_000_000;
System.out.println("Done in: " + duration + " seconds");
}
/**
* Computes and dumps on disk mapping files.
*
* @param nodesPath path of the compressed csv nodes file
* @param graphPath path of the compressed graph
*/
// Suppress warning for Object2LongFunction cast
@SuppressWarnings("unchecked")
static void precomputeNodeIdMap(String nodesPath, String graphPath) throws IOException {
// First internal mapping: SWH PID (string) -> WebGraph MPH (long)
Object2LongFunction mphMap = null;
try {
mphMap = (Object2LongFunction) BinIO.loadObject(graphPath + ".mph");
} catch (ClassNotFoundException e) {
throw new IllegalArgumentException("The .mph file contains unknown class object: " + e);
}
long nbIds = (mphMap instanceof Size64) ? ((Size64) mphMap).size64() : mphMap.size();
// Second internal mapping: WebGraph MPH (long) -> BFS ordering (long)
long[][] bfsMap = LongBigArrays.newBigArray(nbIds);
long loaded = BinIO.loadLongs(graphPath + ".order", bfsMap);
if (loaded != nbIds) {
throw new IllegalArgumentException("Graph contains " + nbIds + " nodes, but read " + loaded);
}
// Dump complete mapping for all nodes: SWH PID (string) <=> WebGraph node id (long)
InputStream nodesStream = new GZIPInputStream(new FileInputStream(nodesPath));
FastBufferedReader buffer = new FastBufferedReader(new InputStreamReader(nodesStream, "UTF-8"));
LineIterator swhPIDIterator = new LineIterator(buffer);
try (Writer swhToNodeMap = new BufferedWriter(new FileWriter(graphPath + Graph.PID_TO_NODE));
Writer nodeToSwhMap = new BufferedWriter(new FileWriter(graphPath + Graph.NODE_TO_PID))) {
// nodeToSwhMap needs to write SWH PID in order of node id, so use a temporary array
Object[][] nodeToSwhPID = ObjectBigArrays.newBigArray(nbIds);
// To effectively run edge restriction during graph traversals, we store node id (long) -> SWH
// type map. This is represented as a bitmap using minimum number of bits per Node.Type.
final int log2NbTypes = (int) Math.ceil(Math.log(Node.Type.values().length) / Math.log(2));
final int nbBitsPerNodeType = log2NbTypes;
LongArrayBitVector nodeTypesBitVector =
LongArrayBitVector.ofLength(nbBitsPerNodeType * nbIds);
LongBigList nodeTypesMap = nodeTypesBitVector.asLongBigList(nbBitsPerNodeType);
for (long iNode = 0; iNode < nbIds && swhPIDIterator.hasNext(); iNode++) {
String strSwhPID = swhPIDIterator.next().toString();
long mphId = mphMap.getLong(strSwhPID);
long nodeId = LongBigArrays.get(bfsMap, mphId);
String paddedNodeId = String.format("%0" + NodeIdMap.NODE_ID_LENGTH + "d", nodeId);
String line = strSwhPID + " " + paddedNodeId + "\n";
swhToNodeMap.write(line);
ObjectBigArrays.set(nodeToSwhPID, nodeId, strSwhPID);
SwhPID swhPID = new SwhPID(strSwhPID);
nodeTypesMap.set(nodeId, swhPID.getType().ordinal());
}
BinIO.storeObject(nodeTypesMap, graphPath + Graph.NODE_TO_TYPE);
for (long iNode = 0; iNode < nbIds; iNode++) {
String line = ObjectBigArrays.get(nodeToSwhPID, iNode).toString() + "\n";
nodeToSwhMap.write(line);
}
}
}
}
diff --git a/java/server/src/main/java/org/softwareheritage/graph/benchmark/AccessEdge.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/AccessEdge.java
index 769e990..6e4aef9 100644
--- a/java/server/src/main/java/org/softwareheritage/graph/benchmark/AccessEdge.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/AccessEdge.java
@@ -1,49 +1,49 @@
package org.softwareheritage.graph.benchmark;
import java.io.IOException;
import java.util.ArrayList;
import it.unimi.dsi.big.webgraph.LazyLongIterator;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.utils.Random;
import org.softwareheritage.graph.utils.Statistics;
import org.softwareheritage.graph.utils.Timing;
/**
* Benchmark to time edge access time.
*
* @author Thibault Allançon
* @version 0.0.1
* @since 0.0.1
*/
public class AccessEdge {
/**
* Main entrypoint.
*
* @param args command line arguments
*/
public static void main(String[] args) throws IOException {
String path = args[0];
Graph graph = new Graph(path);
final long seed = 42;
final int nbNodes = 1_000_000;
Random random = new Random(seed);
long[] nodeIds = random.generateNodeIds(graph, nbNodes);
ArrayList timings = new ArrayList<>();
for (long nodeId : nodeIds) {
long startTime = Timing.start();
LazyLongIterator neighbors = graph.successors(nodeId);
long firstNeighbor = neighbors.nextLong();
- double duration = (double) Timing.stop(startTime);
+ double duration = Timing.stop(startTime);
timings.add(duration);
}
System.out.println("Used " + nbNodes + " random edges (results are in seconds):");
Statistics stats = new Statistics(timings);
stats.printAll();
}
}
diff --git a/java/server/src/main/java/org/softwareheritage/graph/utils/Timing.java b/java/server/src/main/java/org/softwareheritage/graph/utils/Timing.java
index 4b3ec54..6deada3 100644
--- a/java/server/src/main/java/org/softwareheritage/graph/utils/Timing.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/utils/Timing.java
@@ -1,32 +1,32 @@
package org.softwareheritage.graph.utils;
/**
* Time measurement utility class.
*
* @author Thibault Allançon
* @version 0.0.1
* @since 0.0.1
*/
public class Timing {
/**
* Returns measurement starting timestamp.
*
* @return timestamp used for time measurement
*/
public static long start() {
return System.nanoTime();
}
/**
* Ends timing measurement and returns total duration in seconds.
*
* @param startTime measurement starting timestamp
* @return time in seconds elapsed since starting point
*/
- public static float stop(long startTime) {
+ public static double stop(long startTime) {
long endTime = System.nanoTime();
- float duration = (float) (endTime - startTime) / 1_000_000_000;
+ double duration = (double) (endTime - startTime) / 1_000_000_000;
return duration;
}
}