diff --git a/java/src/main/java/org/softwareheritage/graph/utils/ExportSubdataset.java b/java/src/main/java/org/softwareheritage/graph/utils/ExportSubdataset.java new file mode 100644 index 0000000..7e407e3 --- /dev/null +++ b/java/src/main/java/org/softwareheritage/graph/utils/ExportSubdataset.java @@ -0,0 +1,81 @@ +package org.softwareheritage.graph.utils; + +import com.google.common.primitives.Longs; +import it.unimi.dsi.big.webgraph.LazyLongIterator; +import it.unimi.dsi.bits.LongArrayBitVector; +import it.unimi.dsi.fastutil.Arrays; +import it.unimi.dsi.fastutil.io.BinIO; +import it.unimi.dsi.fastutil.objects.Object2LongFunction; +import it.unimi.dsi.io.ByteDiskQueue; +import it.unimi.dsi.io.FastBufferedReader; +import it.unimi.dsi.io.LineIterator; +import org.softwareheritage.graph.Graph; +import org.softwareheritage.graph.SWHID; +import org.softwareheritage.graph.experiments.topology.ConnectedComponents; + +import java.io.File; +import java.io.IOException; +import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; + +public class ExportSubdataset { + @SuppressWarnings("unchecked") // Suppress warning for Object2LongFunction cast + static Object2LongFunction loadMPH(String mphPath) throws IOException, ClassNotFoundException { + return (Object2LongFunction) BinIO.loadObject(mphPath); + } + + public static void main(String[] args) throws IOException, ClassNotFoundException { + System.err.print("Loading everything..."); + String graphPath = args[0]; + Graph graph = new Graph(graphPath); + Object2LongFunction mphMap = loadMPH(graphPath + ".mph"); + System.err.println(" done."); + + final long n = graph.numNodes(); + + // Allow enough memory to behave like in-memory queue + int bufferSize = (int) Math.min(Arrays.MAX_ARRAY_SIZE & ~0x7, 8L * n); + + // Use a disk based queue to store BFS frontier + final File queueFile = File.createTempFile(ConnectedComponents.class.getSimpleName(), "queue"); + final ByteDiskQueue queue = ByteDiskQueue.createNew(queueFile, bufferSize, true); + final byte[] byteBuf = new byte[Long.BYTES]; + // WARNING: no 64-bit version of this data-structure, but it can support + // indices up to 2^37 + LongArrayBitVector visited = LongArrayBitVector.ofLength(n); + + FastBufferedReader buffer = new FastBufferedReader(new InputStreamReader(System.in, StandardCharsets.US_ASCII)); + LineIterator lineIterator = new LineIterator(buffer); + + while (lineIterator.hasNext()) { + String line = lineIterator.next().toString(); + long i; + try { + // i = mphMap.getLong(line); + i = graph.getNodeId(new SWHID(line)); + } catch (IllegalArgumentException e) { + continue; + } + + queue.enqueue(Longs.toByteArray(i)); + visited.set(i); + + while (!queue.isEmpty()) { + queue.dequeue(byteBuf); + final long currentNode = Longs.fromByteArray(byteBuf); + SWHID currentNodeSWHID = graph.getSWHID(currentNode); + + final LazyLongIterator iterator = graph.successors(currentNode); + long succ; + while ((succ = iterator.nextLong()) != -1) { + System.out.format("%s %s\n", currentNodeSWHID, graph.getSWHID(succ)); + if (visited.getBoolean(succ)) + continue; + visited.set(succ); + queue.enqueue(Longs.toByteArray(succ)); + } + } + + } + } +} diff --git a/java/src/main/java/org/softwareheritage/graph/utils/WriteRevisionTimestamps.java b/java/src/main/java/org/softwareheritage/graph/utils/WriteRevisionTimestamps.java new file mode 100644 index 0000000..a0e1a12 --- /dev/null +++ b/java/src/main/java/org/softwareheritage/graph/utils/WriteRevisionTimestamps.java @@ -0,0 +1,56 @@ +package org.softwareheritage.graph.utils; + +import it.unimi.dsi.fastutil.BigArrays; +import it.unimi.dsi.fastutil.Size64; +import it.unimi.dsi.fastutil.io.BinIO; +import it.unimi.dsi.fastutil.longs.LongBigArrays; +import it.unimi.dsi.fastutil.objects.Object2LongFunction; +import it.unimi.dsi.io.FastBufferedReader; +import it.unimi.dsi.io.LineIterator; + +import java.io.IOException; +import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; + +public class WriteRevisionTimestamps { + @SuppressWarnings("unchecked") // Suppress warning for Object2LongFunction cast + static Object2LongFunction loadMPH(String mphPath) throws IOException, ClassNotFoundException { + return (Object2LongFunction) BinIO.loadObject(mphPath); + } + + public static void main(String[] args) throws IOException, ClassNotFoundException { + System.err.print("Loading everything..."); + String graphPath = args[0]; + String outputFile = args[1]; + Object2LongFunction mphMap = loadMPH(graphPath + ".mph"); + long nbIds = (mphMap instanceof Size64) ? ((Size64) mphMap).size64() : mphMap.size(); + long[][] nodePerm = BinIO.loadLongsBig(graphPath + ".order"); + // NodeIdMap nodeIdMap = new NodeIdMap(graphPath, nbIds); + long[][] timestampArray = LongBigArrays.newBigArray(nbIds); + BigArrays.fill(timestampArray, Long.MIN_VALUE); + System.err.println(" done."); + + FastBufferedReader buffer = new FastBufferedReader(new InputStreamReader(System.in, StandardCharsets.US_ASCII)); + LineIterator lineIterator = new LineIterator(buffer); + + while (lineIterator.hasNext()) { + String line = lineIterator.next().toString(); + String[] line_elements = line.split("[ \\t]"); + + // SWHID currentRev = new SWHID(line_elements[0].strip()); + long revId = -1; + long timestamp = -1; + try { + // revId = nodeIdMap.getNodeId(currentRev); + long revHash = mphMap.getLong(line_elements[0].strip()); + revId = BigArrays.get(nodePerm, revHash); + timestamp = Long.parseLong(line_elements[1].strip()); + } catch (IllegalArgumentException e) { + continue; + } + BigArrays.set(timestampArray, revId, timestamp); + // System.err.println(revId + " " + timestamp); + } + BinIO.storeLongs(timestampArray, outputFile); + } +}