diff --git a/java/src/main/java/org/softwareheritage/graph/maps/LabelMapBuilder.java b/java/src/main/java/org/softwareheritage/graph/maps/LabelMapBuilder.java new file mode 100644 --- /dev/null +++ b/java/src/main/java/org/softwareheritage/graph/maps/LabelMapBuilder.java @@ -0,0 +1,235 @@ +package org.softwareheritage.graph.maps; + +import com.martiansoftware.jsap.*; +import it.unimi.dsi.big.webgraph.LazyLongIterator; +import it.unimi.dsi.big.webgraph.labelling.ArcLabelledImmutableGraph; +import it.unimi.dsi.big.webgraph.labelling.BitStreamArcLabelledImmutableGraph; +import it.unimi.dsi.big.webgraph.labelling.FixedWidthIntLabel; +import it.unimi.dsi.big.webgraph.labelling.FixedWidthIntListLabel; +import it.unimi.dsi.fastutil.BigArrays; +import it.unimi.dsi.fastutil.Size64; +import it.unimi.dsi.fastutil.io.BinIO; +import it.unimi.dsi.fastutil.longs.LongBigArrays; +import it.unimi.dsi.fastutil.objects.Object2LongFunction; +import it.unimi.dsi.io.FastBufferedReader; +import it.unimi.dsi.io.LineIterator; +import it.unimi.dsi.io.OutputBitStream; +import it.unimi.dsi.logging.ProgressLogger; +import it.unimi.dsi.big.webgraph.BVGraph; +import it.unimi.dsi.big.webgraph.ImmutableGraph; +import it.unimi.dsi.big.webgraph.NodeIterator; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.*; +import java.nio.charset.StandardCharsets; +import java.util.*; +import java.util.concurrent.TimeUnit; + +public class LabelMapBuilder { + + final static String SORT_BUFFER_SIZE = "40%"; + + final static Logger logger = LoggerFactory.getLogger(LabelMapBuilder.class); + + private static JSAPResult parse_args(String[] args) { + JSAPResult config = null; + try { + SimpleJSAP jsap = new SimpleJSAP( + LabelMapBuilder.class.getName(), + "", + new Parameter[] { + new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, + 'g', "graph", "Basename of the compressed graph"), + new FlaggedOption("debugPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, + 'd', "debug-path", + "Store the intermediate representation here for debug"), + + new FlaggedOption("tmpDir", JSAP.STRING_PARSER, "tmp", JSAP.NOT_REQUIRED, + 't', "tmp", "Temporary directory path"), + } + ); + + config = jsap.parse(args); + if (jsap.messagePrinted()) { + System.exit(1); + } + } catch (JSAPException e) { + e.printStackTrace(); + } + return config; + } + + public static void main(String[] args) throws IOException { + JSAPResult config = parse_args(args); + String graphPath = config.getString("graphPath"); + String tmpDir = config.getString("tmpDir"); + String debugPath = config.getString("debugPath"); + + logger.info("Starting label map generation..."); + computeLabelMap(graphPath, debugPath, tmpDir); + logger.info("Label map generation ended."); + } + + @SuppressWarnings("unchecked") // Suppress warning for Object2LongFunction cast + static Object2LongFunction loadMPH(String mphBasename) throws IOException { + Object2LongFunction mphMap = null; + try { + logger.info("loading MPH function..."); + mphMap = (Object2LongFunction) BinIO.loadObject(mphBasename + ".mph"); + logger.info("MPH function loaded"); + } catch (ClassNotFoundException e) { + logger.error("unknown class object in .mph file: " + e); + System.exit(2); + } + return mphMap; + } + + static long getMPHSize(Object2LongFunction mph) + { + return (mph instanceof Size64) ? ((Size64) mph).size64() : mph.size(); + } + + static long SwhIDToNode(String strSWHID, Object2LongFunction mphMap, long[][] orderMap) + { + long mphId = mphMap.getLong(strSWHID); + return BigArrays.get(orderMap, mphId); + } + + static void computeLabelMap(String graphPath, String debugPath, String tmpDir) + throws IOException + { + // Compute intermediate representation in the format "