diff --git a/java/server/src/main/java/org/softwareheritage/graph/Endpoint.java b/java/server/src/main/java/org/softwareheritage/graph/Endpoint.java index 0619b42..3dd7fa4 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/Endpoint.java +++ b/java/server/src/main/java/org/softwareheritage/graph/Endpoint.java @@ -1,282 +1,282 @@ package org.softwareheritage.graph; import java.util.ArrayList; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.SwhPID; import org.softwareheritage.graph.SwhPath; import org.softwareheritage.graph.algo.Traversal; import org.softwareheritage.graph.utils.Timing; /** * REST API endpoints wrapper functions. *

* Graph operations are segmented between high-level class (this one) and the low-level class * ({@link Traversal}). The {@link Endpoint} class creates wrappers for each endpoints by performing * all the input/output node ids conversions and logging timings. * * @author Thibault Allançon * @version 0.0.1 * @since 0.0.1 * @see org.softwareheritage.graph.algo.Traversal */ public class Endpoint { /** * Wrapper class to return both the endpoint result and metadata (such as timings). */ public class Output { /** The result content itself */ public T result; /** Various metadata about the result */ public Meta meta; public Output() { this.result = null; this.meta = new Meta(); } /** * Endpoint result metadata. */ public class Meta { /** Operations timings */ public Timings timings; public Meta() { this.timings = new Timings(); } /** * Wrapper class for JSON output format. */ public class Timings { /** Time in seconds to do the traversal */ - public float traversal; + public double traversal; /** Time in seconds to convert input SWH PID to node id */ - public float pid2node; + public double pid2node; /** Time in seconds to convert output node ids to SWH PIDs */ - public float node2pid; + public double node2pid; } } } /** Graph where traversal endpoint is performed */ Graph graph; /** Internal traversal API */ Traversal traversal; /** * Constructor. * * @param graph the graph used for traversal endpoint * @param direction a string (either "forward" or "backward") specifying edge orientation * @param edgesFmt a formatted string describing allowed edges */ public Endpoint(Graph graph, String direction, String edgesFmt) { this.graph = graph; this.traversal = new Traversal(graph, direction, edgesFmt); } /** * Converts a list of (internal) long node ids to a list of corresponding (external) SWH PIDs. * * @param nodeIds the list of long node ids * @return a list of corresponding SWH PIDs */ private ArrayList convertNodesToSwhPIDs(ArrayList nodeIds) { ArrayList swhPIDs = new ArrayList<>(); for (long nodeId : nodeIds) { swhPIDs.add(graph.getSwhPID(nodeId)); } return swhPIDs; } /** * Converts a list of (internal) long node ids to the corresponding {@link SwhPath}. * * @param nodeIds the list of long node ids * @return the corresponding {@link SwhPath} * @see org.softwareheritage.graph.SwhPath */ private SwhPath convertNodesToSwhPath(ArrayList nodeIds) { SwhPath path = new SwhPath(); for (long nodeId : nodeIds) { path.add(graph.getSwhPID(nodeId)); } return path; } /** * Converts a list of paths made of (internal) long node ids to one made of {@link SwhPath}-s. * * @param pathsNodeId the list of paths with long node ids * @return a list of corresponding {@link SwhPath} * @see org.softwareheritage.graph.SwhPath */ private ArrayList convertPathsToSwhPIDs(ArrayList> pathsNodeId) { ArrayList paths = new ArrayList<>(); for (ArrayList path : pathsNodeId) { paths.add(convertNodesToSwhPath(path)); } return paths; } /** * Leaves endpoint wrapper. * * @param src source node of endpoint call specified as a {@link SwhPID} * @return the resulting list of {@link SwhPID} from endpoint call and operation metadata * @see org.softwareheritage.graph.SwhPID * @see org.softwareheritage.graph.algo.Traversal#leaves(long) */ public Output leaves(SwhPID src) { Output> output = new Output<>(); long startTime; startTime = Timing.start(); long srcNodeId = graph.getNodeId(src); output.meta.timings.pid2node = Timing.stop(startTime); startTime = Timing.start(); ArrayList nodeIds = traversal.leaves(srcNodeId); output.meta.timings.traversal = Timing.stop(startTime); startTime = Timing.start(); output.result = convertNodesToSwhPIDs(nodeIds); output.meta.timings.node2pid = Timing.stop(startTime); return output; } /** * Neighbors endpoint wrapper. * * @param src source node of endpoint call specified as a {@link SwhPID} * @return the resulting list of {@link SwhPID} from endpoint call and operation metadata * @see org.softwareheritage.graph.SwhPID * @see org.softwareheritage.graph.algo.Traversal#neighbors(long) */ public Output neighbors(SwhPID src) { Output> output = new Output<>(); long startTime; startTime = Timing.start(); long srcNodeId = graph.getNodeId(src); output.meta.timings.pid2node = Timing.stop(startTime); startTime = Timing.start(); ArrayList nodeIds = traversal.neighbors(srcNodeId); output.meta.timings.traversal = Timing.stop(startTime); startTime = Timing.start(); output.result = convertNodesToSwhPIDs(nodeIds); output.meta.timings.node2pid = Timing.stop(startTime); return output; } /** * Walk endpoint wrapper. * * @param src source node of endpoint call specified as a {@link SwhPID} * @param dstFmt destination formatted string as described in the API * @param algorithm traversal algorithm used in endpoint call (either "dfs" or "bfs") * @return the resulting {@link SwhPath} from endpoint call and operation metadata * @see org.softwareheritage.graph.SwhPID * @see org.softwareheritage.graph.SwhPath * @see org.softwareheritage.graph.algo.Traversal#walk */ public Output walk(SwhPID src, String dstFmt, String algorithm) { Output output = new Output<>(); long startTime; startTime = Timing.start(); long srcNodeId = graph.getNodeId(src); output.meta.timings.pid2node = Timing.stop(startTime); ArrayList nodeIds = new ArrayList(); // Destination is either a SWH PID or a node type try { SwhPID dstSwhPID = new SwhPID(dstFmt); long dstNodeId = graph.getNodeId(dstSwhPID); startTime = Timing.start(); nodeIds = traversal.walk(srcNodeId, dstNodeId, algorithm); output.meta.timings.traversal = Timing.stop(startTime); } catch (IllegalArgumentException ignored1) { try { Node.Type dstType = Node.Type.fromStr(dstFmt); startTime = Timing.start(); nodeIds = traversal.walk(srcNodeId, dstType, algorithm); output.meta.timings.traversal = Timing.stop(startTime); } catch (IllegalArgumentException ignored2) { } } startTime = Timing.start(); output.result = convertNodesToSwhPath(nodeIds); output.meta.timings.node2pid = Timing.stop(startTime); return output; } /** * VisitNodes endpoint wrapper. * * @param src source node of endpoint call specified as a {@link SwhPID} * @return the resulting list of {@link SwhPID} from endpoint call and operation metadata * @see org.softwareheritage.graph.SwhPID * @see org.softwareheritage.graph.algo.Traversal#visitNodes(long) */ public Output visitNodes(SwhPID src) { Output> output = new Output<>(); long startTime; startTime = Timing.start(); long srcNodeId = graph.getNodeId(src); output.meta.timings.pid2node = Timing.stop(startTime); startTime = Timing.start(); ArrayList nodeIds = traversal.visitNodes(srcNodeId); output.meta.timings.traversal = Timing.stop(startTime); startTime = Timing.start(); output.result = convertNodesToSwhPIDs(nodeIds); output.meta.timings.node2pid = Timing.stop(startTime); return output; } /** * VisitPaths endpoint wrapper. * * @param src source node of endpoint call specified as a {@link SwhPID} * @return the resulting list of {@link SwhPath} from endpoint call and operation metadata * @see org.softwareheritage.graph.SwhPID * @see org.softwareheritage.graph.SwhPath * @see org.softwareheritage.graph.algo.Traversal#visitPaths(long) */ public Output visitPaths(SwhPID src) { Output> output = new Output<>(); long startTime; startTime = Timing.start(); long srcNodeId = graph.getNodeId(src); output.meta.timings.pid2node = Timing.stop(startTime); startTime = Timing.start(); ArrayList> paths = traversal.visitPaths(srcNodeId); output.meta.timings.traversal = Timing.stop(startTime); startTime = Timing.start(); output.result = convertPathsToSwhPIDs(paths); output.meta.timings.node2pid = Timing.stop(startTime); return output; } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/backend/Setup.java b/java/server/src/main/java/org/softwareheritage/graph/backend/Setup.java index 4482497..77df754 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/backend/Setup.java +++ b/java/server/src/main/java/org/softwareheritage/graph/backend/Setup.java @@ -1,125 +1,125 @@ package org.softwareheritage.graph.backend; import java.io.BufferedWriter; import java.io.FileInputStream; import java.io.FileWriter; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.Writer; import java.util.zip.GZIPInputStream; import it.unimi.dsi.bits.LongArrayBitVector; import it.unimi.dsi.fastutil.Size64; import it.unimi.dsi.fastutil.io.BinIO; import it.unimi.dsi.fastutil.longs.LongBigArrays; import it.unimi.dsi.fastutil.longs.LongBigList; import it.unimi.dsi.fastutil.objects.Object2LongFunction; import it.unimi.dsi.fastutil.objects.ObjectBigArrays; import it.unimi.dsi.io.FastBufferedReader; import it.unimi.dsi.io.LineIterator; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.Node; import org.softwareheritage.graph.SwhPID; import org.softwareheritage.graph.backend.NodeTypesMap; /** * Pre-processing steps (such as dumping mapping files on disk) before running the graph service. * * @author Thibault Allançon * @version 0.0.1 * @since 0.0.1 */ public class Setup { /** * Main entrypoint. * * @param args command line arguments */ public static void main(String[] args) throws IOException { if (args.length != 2) { System.err.println("Expected parameters: "); System.exit(1); } String nodesPath = args[0]; String graphPath = args[1]; System.out.println("Pre-computing node id maps..."); long startTime = System.nanoTime(); precomputeNodeIdMap(nodesPath, graphPath); long endTime = System.nanoTime(); - double duration = (double) (endTime - startTime) / 1_000_000_000; + double duration = (endTime - startTime) / 1_000_000_000; System.out.println("Done in: " + duration + " seconds"); } /** * Computes and dumps on disk mapping files. * * @param nodesPath path of the compressed csv nodes file * @param graphPath path of the compressed graph */ // Suppress warning for Object2LongFunction cast @SuppressWarnings("unchecked") static void precomputeNodeIdMap(String nodesPath, String graphPath) throws IOException { // First internal mapping: SWH PID (string) -> WebGraph MPH (long) Object2LongFunction mphMap = null; try { mphMap = (Object2LongFunction) BinIO.loadObject(graphPath + ".mph"); } catch (ClassNotFoundException e) { throw new IllegalArgumentException("The .mph file contains unknown class object: " + e); } long nbIds = (mphMap instanceof Size64) ? ((Size64) mphMap).size64() : mphMap.size(); // Second internal mapping: WebGraph MPH (long) -> BFS ordering (long) long[][] bfsMap = LongBigArrays.newBigArray(nbIds); long loaded = BinIO.loadLongs(graphPath + ".order", bfsMap); if (loaded != nbIds) { throw new IllegalArgumentException("Graph contains " + nbIds + " nodes, but read " + loaded); } // Dump complete mapping for all nodes: SWH PID (string) <=> WebGraph node id (long) InputStream nodesStream = new GZIPInputStream(new FileInputStream(nodesPath)); FastBufferedReader buffer = new FastBufferedReader(new InputStreamReader(nodesStream, "UTF-8")); LineIterator swhPIDIterator = new LineIterator(buffer); try (Writer swhToNodeMap = new BufferedWriter(new FileWriter(graphPath + Graph.PID_TO_NODE)); Writer nodeToSwhMap = new BufferedWriter(new FileWriter(graphPath + Graph.NODE_TO_PID))) { // nodeToSwhMap needs to write SWH PID in order of node id, so use a temporary array Object[][] nodeToSwhPID = ObjectBigArrays.newBigArray(nbIds); // To effectively run edge restriction during graph traversals, we store node id (long) -> SWH // type map. This is represented as a bitmap using minimum number of bits per Node.Type. final int log2NbTypes = (int) Math.ceil(Math.log(Node.Type.values().length) / Math.log(2)); final int nbBitsPerNodeType = log2NbTypes; LongArrayBitVector nodeTypesBitVector = LongArrayBitVector.ofLength(nbBitsPerNodeType * nbIds); LongBigList nodeTypesMap = nodeTypesBitVector.asLongBigList(nbBitsPerNodeType); for (long iNode = 0; iNode < nbIds && swhPIDIterator.hasNext(); iNode++) { String strSwhPID = swhPIDIterator.next().toString(); long mphId = mphMap.getLong(strSwhPID); long nodeId = LongBigArrays.get(bfsMap, mphId); String paddedNodeId = String.format("%0" + NodeIdMap.NODE_ID_LENGTH + "d", nodeId); String line = strSwhPID + " " + paddedNodeId + "\n"; swhToNodeMap.write(line); ObjectBigArrays.set(nodeToSwhPID, nodeId, strSwhPID); SwhPID swhPID = new SwhPID(strSwhPID); nodeTypesMap.set(nodeId, swhPID.getType().ordinal()); } BinIO.storeObject(nodeTypesMap, graphPath + Graph.NODE_TO_TYPE); for (long iNode = 0; iNode < nbIds; iNode++) { String line = ObjectBigArrays.get(nodeToSwhPID, iNode).toString() + "\n"; nodeToSwhMap.write(line); } } } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/benchmark/AccessEdge.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/AccessEdge.java index 769e990..6e4aef9 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/benchmark/AccessEdge.java +++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/AccessEdge.java @@ -1,49 +1,49 @@ package org.softwareheritage.graph.benchmark; import java.io.IOException; import java.util.ArrayList; import it.unimi.dsi.big.webgraph.LazyLongIterator; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.utils.Random; import org.softwareheritage.graph.utils.Statistics; import org.softwareheritage.graph.utils.Timing; /** * Benchmark to time edge access time. * * @author Thibault Allançon * @version 0.0.1 * @since 0.0.1 */ public class AccessEdge { /** * Main entrypoint. * * @param args command line arguments */ public static void main(String[] args) throws IOException { String path = args[0]; Graph graph = new Graph(path); final long seed = 42; final int nbNodes = 1_000_000; Random random = new Random(seed); long[] nodeIds = random.generateNodeIds(graph, nbNodes); ArrayList timings = new ArrayList<>(); for (long nodeId : nodeIds) { long startTime = Timing.start(); LazyLongIterator neighbors = graph.successors(nodeId); long firstNeighbor = neighbors.nextLong(); - double duration = (double) Timing.stop(startTime); + double duration = Timing.stop(startTime); timings.add(duration); } System.out.println("Used " + nbNodes + " random edges (results are in seconds):"); Statistics stats = new Statistics(timings); stats.printAll(); } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/utils/Timing.java b/java/server/src/main/java/org/softwareheritage/graph/utils/Timing.java index 4b3ec54..6deada3 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/utils/Timing.java +++ b/java/server/src/main/java/org/softwareheritage/graph/utils/Timing.java @@ -1,32 +1,32 @@ package org.softwareheritage.graph.utils; /** * Time measurement utility class. * * @author Thibault Allançon * @version 0.0.1 * @since 0.0.1 */ public class Timing { /** * Returns measurement starting timestamp. * * @return timestamp used for time measurement */ public static long start() { return System.nanoTime(); } /** * Ends timing measurement and returns total duration in seconds. * * @param startTime measurement starting timestamp * @return time in seconds elapsed since starting point */ - public static float stop(long startTime) { + public static double stop(long startTime) { long endTime = System.nanoTime(); - float duration = (float) (endTime - startTime) / 1_000_000_000; + double duration = (double) (endTime - startTime) / 1_000_000_000; return duration; } }