diff --git a/java/server/src/main/java/org/softwareheritage/graph/backend/Setup.java b/java/server/src/main/java/org/softwareheritage/graph/backend/Setup.java --- a/java/server/src/main/java/org/softwareheritage/graph/backend/Setup.java +++ b/java/server/src/main/java/org/softwareheritage/graph/backend/Setup.java @@ -6,6 +6,7 @@ import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; +import java.io.RandomAccessFile; import java.io.Writer; import java.util.zip.GZIPInputStream; @@ -15,14 +16,16 @@ import it.unimi.dsi.fastutil.longs.LongBigArrays; import it.unimi.dsi.fastutil.longs.LongBigList; import it.unimi.dsi.fastutil.objects.Object2LongFunction; -import it.unimi.dsi.fastutil.objects.ObjectBigArrays; import it.unimi.dsi.io.FastBufferedReader; import it.unimi.dsi.io.LineIterator; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.Node; import org.softwareheritage.graph.SwhId; import org.softwareheritage.graph.backend.NodeTypesMap; +import org.softwareheritage.graph.utils.Timing; /** * Pre-processing steps (such as dumping mapping files on disk) before running the graph service. @@ -33,6 +36,9 @@ */ public class Setup { + /** Timings logger */ + private static final Logger logger = LoggerFactory.getLogger(Setup.class); + public static void main(String[] args) throws IOException { if (args.length != 2) { System.err.println("Expected parameters: "); @@ -42,12 +48,10 @@ String nodesPath = args[0]; String graphPath = args[1]; - System.out.println("Pre-computing node id maps..."); - long startTime = System.nanoTime(); + long startTime = Timing.start(); precomputeNodeIdMap(nodesPath, graphPath); - long endTime = System.nanoTime(); - double duration = (double) (endTime - startTime) / 1_000_000_000; - System.out.println("Done in: " + duration + " seconds"); + float duration = Timing.stop(startTime); + logger.debug("Finished mapping files dumping, took " + duration + " seconds in total."); } /** @@ -82,9 +86,8 @@ LineIterator swhIdIterator = new LineIterator(buffer); try (Writer swhToNodeMap = new BufferedWriter(new FileWriter(graphPath + Graph.PID_TO_NODE)); - Writer nodeToSwhMap = new BufferedWriter(new FileWriter(graphPath + Graph.NODE_TO_PID))) { - // nodeToSwhMap needs to write SWH id in order of node id, so use a temporary array - Object[][] nodeToSwhId = ObjectBigArrays.newBigArray(nbIds); + RandomAccessFile nodeToSwhMap = new RandomAccessFile(graphPath + Graph.NODE_TO_PID, "rw")) { + // RandomAccessFile is necessary for nodeToSwhMap to write SWH id in order of node id // To effectively run edge restriction during graph traversals, we store node id (long) -> SWH // type map. This is represented as a bitmap using minimum number of bits per Node.Type. @@ -94,6 +97,7 @@ LongArrayBitVector.ofLength(nbBitsPerNodeType * nbIds); LongBigList nodeTypesMap = nodeTypesBitVector.asLongBigList(nbBitsPerNodeType); + long startTime = Timing.start(); for (long iNode = 0; iNode < nbIds && swhIdIterator.hasNext(); iNode++) { String strSwhId = swhIdIterator.next().toString(); long mphId = mphMap.getLong(strSwhId); @@ -103,18 +107,21 @@ String line = strSwhId + " " + paddedNodeId + "\n"; swhToNodeMap.write(line); - ObjectBigArrays.set(nodeToSwhId, nodeId, strSwhId); + nodeToSwhMap.seek((NodeIdMap.SWH_ID_LENGTH + 1) * nodeId); + nodeToSwhMap.writeBytes(strSwhId + "\n"); SwhId swhId = new SwhId(strSwhId); nodeTypesMap.set(nodeId, swhId.getType().ordinal()); + + if (iNode % 10_000_000 == 0) { + long percentage = iNode * 100 / nbIds; + long duration = (long) Timing.stop(startTime); + logger.debug("{}% done in {} seconds.", percentage, duration); + startTime = Timing.start(); + } } BinIO.storeObject(nodeTypesMap, graphPath + Graph.NODE_TO_TYPE); - - for (long iNode = 0; iNode < nbIds; iNode++) { - String line = ObjectBigArrays.get(nodeToSwhId, iNode).toString() + "\n"; - nodeToSwhMap.write(line); - } } } }