diff --git a/java/src/main/java/org/softwareheritage/graph/labels/AbstractLongListLabel.java b/java/src/main/java/org/softwareheritage/graph/labels/AbstractLongListLabel.java
index 4f2dd00..d71d7d8 100644
--- a/java/src/main/java/org/softwareheritage/graph/labels/AbstractLongListLabel.java
+++ b/java/src/main/java/org/softwareheritage/graph/labels/AbstractLongListLabel.java
@@ -1,95 +1,103 @@
// TODO: should be in webgraph upstream
// package it.unimi.dsi.big.webgraph.labelling;
package org.softwareheritage.graph.labels;
/*
* Copyright (C) 2020 TODO
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 3 of the License, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see .
*
*/
import it.unimi.dsi.big.webgraph.labelling.AbstractLabel;
import it.unimi.dsi.big.webgraph.labelling.Label;
import java.util.Arrays;
-/** An abstract (single-attribute) list-of-longs label.
-*
-*
This class provides basic methods for a label holding a list of longs.
-* Concrete implementations may impose further requirements on the long.
-*
-*
Implementing subclasses must provide constructors, {@link Label#copy()},
-* {@link Label#fromBitStream(it.unimi.dsi.io.InputBitStream, int)}, {@link Label#toBitStream(it.unimi.dsi.io.OutputBitStream, int)}
-* and possibly override {@link #toString()}.
-*/
+/**
+ * An abstract (single-attribute) list-of-longs label.
+ *
+ *
+ * This class provides basic methods for a label holding a list of longs. Concrete implementations
+ * may impose further requirements on the long.
+ *
+ *
+ * Implementing subclasses must provide constructors, {@link Label#copy()},
+ * {@link Label#fromBitStream(it.unimi.dsi.io.InputBitStream, int)},
+ * {@link Label#toBitStream(it.unimi.dsi.io.OutputBitStream, int)} and possibly override
+ * {@link #toString()}.
+ */
public abstract class AbstractLongListLabel extends AbstractLabel implements Label {
- /** The key of the attribute represented by this label. */
- protected final String key;
- /** The values of the attribute represented by this label. */
- public long[] value;
+ /** The key of the attribute represented by this label. */
+ protected final String key;
+ /** The values of the attribute represented by this label. */
+ public long[] value;
- /** Creates an long label with given key and value.
- *
- * @param key the (only) key of this label.
- * @param value the value of this label.
- */
- public AbstractLongListLabel(String key, long[] value) {
- this.key = key;
- this.value = value;
- }
+ /**
+ * Creates an long label with given key and value.
+ *
+ * @param key the (only) key of this label.
+ * @param value the value of this label.
+ */
+ public AbstractLongListLabel(String key, long[] value) {
+ this.key = key;
+ this.value = value;
+ }
- @Override
- public String wellKnownAttributeKey() {
- return key;
- }
+ @Override
+ public String wellKnownAttributeKey() {
+ return key;
+ }
- @Override
- public String[] attributeKeys() {
- return new String[] { key };
- }
+ @Override
+ public String[] attributeKeys() {
+ return new String[]{key};
+ }
- @Override
- public Class>[] attributeTypes() {
- return new Class[] { long[].class };
- }
+ @Override
+ public Class>[] attributeTypes() {
+ return new Class[]{long[].class};
+ }
- @Override
- public Object get(String key) {
- if (this.key.equals(key)) return value;
- throw new IllegalArgumentException();
- }
+ @Override
+ public Object get(String key) {
+ if (this.key.equals(key))
+ return value;
+ throw new IllegalArgumentException();
+ }
- @Override
- public Object get() {
- return value;
- }
+ @Override
+ public Object get() {
+ return value;
+ }
- @Override
- public String toString() {
- return key + ":" + Arrays.toString(value);
- }
+ @Override
+ public String toString() {
+ return key + ":" + Arrays.toString(value);
+ }
- @Override
- public boolean equals(Object x) {
- if (x instanceof AbstractLongListLabel) return Arrays.equals(value, ((AbstractLongListLabel)x).value);
- else return false;
- }
+ @Override
+ public boolean equals(Object x) {
+ if (x instanceof AbstractLongListLabel)
+ return Arrays.equals(value, ((AbstractLongListLabel) x).value);
+ else
+ return false;
+ }
- @Override
- public int hashCode() {
- return Arrays.hashCode(value);
- }
+ @Override
+ public int hashCode() {
+ return Arrays.hashCode(value);
+ }
}
diff --git a/java/src/main/java/org/softwareheritage/graph/labels/FixedWidthLongListLabel.java b/java/src/main/java/org/softwareheritage/graph/labels/FixedWidthLongListLabel.java
index f99ec3e..f1672d9 100644
--- a/java/src/main/java/org/softwareheritage/graph/labels/FixedWidthLongListLabel.java
+++ b/java/src/main/java/org/softwareheritage/graph/labels/FixedWidthLongListLabel.java
@@ -1,108 +1,115 @@
// TODO: should be in webgraph upstream
// package it.unimi.dsi.big.webgraph.labelling;
package org.softwareheritage.graph.labels;
/*
* Copyright (C) 2020 TODO
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 3 of the License, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see .
*
*/
-
import it.unimi.dsi.big.webgraph.labelling.Label;
import it.unimi.dsi.fastutil.longs.LongArrays;
import it.unimi.dsi.io.InputBitStream;
import it.unimi.dsi.io.OutputBitStream;
import java.io.IOException;
import java.util.Arrays;
-/** A list of longs represented in fixed width. Each list is prefixed by its
- * length written in {@linkplain OutputBitStream#writeGamma(int) γ
- * coding}.
+/**
+ * A list of longs represented in fixed width. Each list is prefixed by its length written in
+ * {@linkplain OutputBitStream#writeGamma(int) γ coding}.
*/
public class FixedWidthLongListLabel extends org.softwareheritage.graph.labels.AbstractLongListLabel {
- /** The bit width used to represent the value of this label. */
- private final int width;
+ /** The bit width used to represent the value of this label. */
+ private final int width;
- /** Creates a new fixed-width long label.
- *
- * @param key the (only) key of this label.
- * @param width the label width (in bits).
- * @param value the value of this label.
- */
- public FixedWidthLongListLabel(String key, int width, long[] value) {
- super(key, value);
- for(int i = value.length; i-- != 0;) if (value[i] < 0 || value[i] >= 1L << width) throw new IllegalArgumentException("Value out of range: " + Long.toString(value[i]));
- this.width = width;
- }
+ /**
+ * Creates a new fixed-width long label.
+ *
+ * @param key the (only) key of this label.
+ * @param width the label width (in bits).
+ * @param value the value of this label.
+ */
+ public FixedWidthLongListLabel(String key, int width, long[] value) {
+ super(key, value);
+ for (int i = value.length; i-- != 0;)
+ if (value[i] < 0 || value[i] >= 1L << width)
+ throw new IllegalArgumentException("Value out of range: " + Long.toString(value[i]));
+ this.width = width;
+ }
- /** Creates a new fixed-width label with an empty list.
- *
- * @param key the (only) key of this label.
- * @param width the label width (in bits).
- */
- public FixedWidthLongListLabel(String key, int width) {
- this(key, width, LongArrays.EMPTY_ARRAY);
- }
+ /**
+ * Creates a new fixed-width label with an empty list.
+ *
+ * @param key the (only) key of this label.
+ * @param width the label width (in bits).
+ */
+ public FixedWidthLongListLabel(String key, int width) {
+ this(key, width, LongArrays.EMPTY_ARRAY);
+ }
- /** Creates a new fixed-width long label using the given key and width
- * with an empty list.
- *
- * @param arg two strings containing the key and the width of this label.
- */
- public FixedWidthLongListLabel(String... arg) {
- this(arg[0], Integer.parseInt(arg[1]));
- }
+ /**
+ * Creates a new fixed-width long label using the given key and width with an empty list.
+ *
+ * @param arg two strings containing the key and the width of this label.
+ */
+ public FixedWidthLongListLabel(String... arg) {
+ this(arg[0], Integer.parseInt(arg[1]));
+ }
- @Override
- public Label copy() {
- return new FixedWidthLongListLabel(key, width, value.clone());
- }
+ @Override
+ public Label copy() {
+ return new FixedWidthLongListLabel(key, width, value.clone());
+ }
- @Override
- public int fromBitStream(InputBitStream inputBitStream, final long sourceUnused) throws IOException {
- long readBits = inputBitStream.readBits();
- value = new long[inputBitStream.readGamma()];
- for(int i = 0; i < value.length; i++) value[i] = inputBitStream.readLong(width);
- return (int)(inputBitStream.readBits() - readBits);
- }
+ @Override
+ public int fromBitStream(InputBitStream inputBitStream, final long sourceUnused) throws IOException {
+ long readBits = inputBitStream.readBits();
+ value = new long[inputBitStream.readGamma()];
+ for (int i = 0; i < value.length; i++)
+ value[i] = inputBitStream.readLong(width);
+ return (int) (inputBitStream.readBits() - readBits);
+ }
- @Override
- public int toBitStream(OutputBitStream outputBitStream, final long sourceUnused) throws IOException {
- int bits = outputBitStream.writeGamma(value.length);
- for(int i = 0; i < value.length; i++) bits += outputBitStream.writeLong(value[i], width);
- return bits;
- }
+ @Override
+ public int toBitStream(OutputBitStream outputBitStream, final long sourceUnused) throws IOException {
+ int bits = outputBitStream.writeGamma(value.length);
+ for (int i = 0; i < value.length; i++)
+ bits += outputBitStream.writeLong(value[i], width);
+ return bits;
+ }
- /** Returns -1 (the fixed width refers to a single long, not to the entire list).
- * @return -1;
- */
- @Override
- public int fixedWidth() {
- return -1;
- }
+ /**
+ * Returns -1 (the fixed width refers to a single long, not to the entire list).
+ *
+ * @return -1;
+ */
+ @Override
+ public int fixedWidth() {
+ return -1;
+ }
- @Override
- public String toString() {
- return key + ":" + Arrays.toString(value) + " (width:" + width + ")";
- }
+ @Override
+ public String toString() {
+ return key + ":" + Arrays.toString(value) + " (width:" + width + ")";
+ }
- @Override
- public String toSpec() {
- return this.getClass().getName() + "(" + key + "," + width + ")";
- }
+ @Override
+ public String toSpec() {
+ return this.getClass().getName() + "(" + key + "," + width + ")";
+ }
}
diff --git a/java/src/main/java/org/softwareheritage/graph/labels/SwhLabel.java b/java/src/main/java/org/softwareheritage/graph/labels/SwhLabel.java
index efeef38..41f7d5f 100644
--- a/java/src/main/java/org/softwareheritage/graph/labels/SwhLabel.java
+++ b/java/src/main/java/org/softwareheritage/graph/labels/SwhLabel.java
@@ -1,108 +1,109 @@
package org.softwareheritage.graph.labels;
import it.unimi.dsi.big.webgraph.labelling.AbstractLabel;
import it.unimi.dsi.big.webgraph.labelling.Label;
import it.unimi.dsi.io.InputBitStream;
import it.unimi.dsi.io.OutputBitStream;
import java.io.IOException;
import java.util.Arrays;
/**
* Software Heritage graph edge labels following Webgraph labels convention.
*
* @author The Software Heritage developers
*/
public class SwhLabel extends AbstractLabel {
private final String key;
private final int width;
- // TODO: in the future we would like this to be edge type dependent (eg: having a similar SnpEntry to store branch names)
+ // TODO: in the future we would like this to be edge type dependent (eg: having a similar SnpEntry
+ // to store branch names)
public DirEntry[] value;
// Use existing Webgraph class to represent a list of DirEntry as a list of encoded long
private final FixedWidthLongListLabel longList;
private static final DirEntry[] EMPTY_ARRAY = {};
public SwhLabel(String key, int width, DirEntry[] value) {
this.key = key;
this.width = width;
this.value = value;
long[] valueEncoded = new long[value.length];
for (int i = 0; i < value.length; i++)
valueEncoded[i] = value[i].toEncoded();
this.longList = new FixedWidthLongListLabel(key, width, valueEncoded);
}
public SwhLabel(String key, int width) {
this(key, width, EMPTY_ARRAY);
}
public SwhLabel(String... arg) {
this(arg[0], Integer.parseInt(arg[1]));
}
@Override
public int fromBitStream(InputBitStream inputBitStream, final long sourceUnused) throws IOException {
int ret = longList.fromBitStream(inputBitStream, sourceUnused);
// Decode values from their internal long representation
value = new DirEntry[longList.value.length];
for (int i = 0; i < value.length; i++)
value[i] = new DirEntry(longList.value[i]);
return ret;
}
@Override
public int toBitStream(OutputBitStream outputBitStream, final long sourceUnused) throws IOException {
// Values have already been encoded in the SwhLabel constructor
return longList.toBitStream(outputBitStream, sourceUnused);
}
@Override
public String wellKnownAttributeKey() {
return key;
}
@Override
public String[] attributeKeys() {
return new String[]{key};
}
@Override
public Class>[] attributeTypes() {
return new Class[]{DirEntry[].class};
}
@Override
public Object get(String s) {
if (this.key.equals(key))
return value;
throw new IllegalArgumentException();
}
@Override
public Object get() {
return value;
}
@Override
public Label copy() {
return new SwhLabel(key, width, value.clone());
}
@Override
public int fixedWidth() {
return -1;
}
@Override
public String toString() {
return key + ":" + Arrays.toString(value) + " (width:" + width + ")";
}
@Override
public String toSpec() {
return this.getClass().getName() + "(" + key + "," + width + ")";
}
}
diff --git a/java/src/main/java/org/softwareheritage/graph/maps/LabelMapBuilder.java b/java/src/main/java/org/softwareheritage/graph/maps/LabelMapBuilder.java
index 47905cd..ce140c5 100644
--- a/java/src/main/java/org/softwareheritage/graph/maps/LabelMapBuilder.java
+++ b/java/src/main/java/org/softwareheritage/graph/maps/LabelMapBuilder.java
@@ -1,541 +1,527 @@
package org.softwareheritage.graph.maps;
import com.martiansoftware.jsap.*;
import it.unimi.dsi.big.webgraph.LazyLongIterator;
import it.unimi.dsi.big.webgraph.labelling.ArcLabelledImmutableGraph;
import it.unimi.dsi.big.webgraph.labelling.BitStreamArcLabelledImmutableGraph;
import it.unimi.dsi.fastutil.BigArrays;
import it.unimi.dsi.fastutil.Size64;
import it.unimi.dsi.fastutil.bytes.ByteArrays;
import it.unimi.dsi.fastutil.io.BinIO;
import it.unimi.dsi.fastutil.io.FastBufferedInputStream;
import it.unimi.dsi.fastutil.longs.LongBigArrays;
-import it.unimi.dsi.fastutil.longs.LongIterator;
import it.unimi.dsi.fastutil.objects.Object2LongFunction;
import it.unimi.dsi.io.OutputBitStream;
import it.unimi.dsi.logging.ProgressLogger;
import it.unimi.dsi.big.webgraph.BVGraph;
import it.unimi.dsi.big.webgraph.ImmutableGraph;
import it.unimi.dsi.big.webgraph.NodeIterator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.softwareheritage.graph.labels.DirEntry;
import org.softwareheritage.graph.labels.SwhLabel;
import java.io.*;
-import java.lang.reflect.Array;
-import java.math.BigInteger;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.*;
import java.util.concurrent.TimeUnit;
public class LabelMapBuilder {
final static String SORT_BUFFER_SIZE = "40%";
final static Logger logger = LoggerFactory.getLogger(LabelMapBuilder.class);
String graphPath;
String debugPath;
String tmpDir;
ImmutableGraph graph;
Object2LongFunction swhIdMph;
long[][] orderMap;
Object2LongFunction filenameMph;
long numFilenames;
int totalLabelWidth;
public LabelMapBuilder(String graphPath, String debugPath, String tmpDir) {
this.graphPath = graphPath;
this.debugPath = debugPath;
this.tmpDir = tmpDir;
}
private static JSAPResult parse_args(String[] args) {
JSAPResult config = null;
try {
SimpleJSAP jsap = new SimpleJSAP(LabelMapBuilder.class.getName(), "",
new Parameter[]{
new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'g',
"graph", "Basename of the compressed graph"),
new FlaggedOption("debugPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'd',
"debug-path", "Store the intermediate representation here for debug"),
new FlaggedOption("tmpDir", JSAP.STRING_PARSER, "tmp", JSAP.NOT_REQUIRED, 't', "tmp",
"Temporary directory path"),});
config = jsap.parse(args);
if (jsap.messagePrinted()) {
System.exit(1);
}
} catch (JSAPException e) {
e.printStackTrace();
}
return config;
}
public static void main(String[] args) throws IOException, InterruptedException {
JSAPResult config = parse_args(args);
String graphPath = config.getString("graphPath");
String tmpDir = config.getString("tmpDir");
String debugPath = config.getString("debugPath");
LabelMapBuilder builder = new LabelMapBuilder(graphPath, debugPath, tmpDir);
logger.info("Loading graph and MPH functions...");
builder.loadGraph();
// builder.computeLabelMapSort();
builder.computeLabelMapBsort();
}
@SuppressWarnings("unchecked") // Suppress warning for Object2LongFunction cast
static Object2LongFunction loadMPH(String mphBasename) throws IOException {
Object2LongFunction mphMap = null;
try {
mphMap = (Object2LongFunction) BinIO.loadObject(mphBasename + ".mph");
} catch (ClassNotFoundException e) {
logger.error("unknown class object in .mph file: " + e);
System.exit(2);
}
return mphMap;
}
static long getMPHSize(Object2LongFunction mph) {
return (mph instanceof Size64) ? ((Size64) mph).size64() : mph.size();
}
void computeLabelMapSort() throws IOException {
// Pass the intermediate representation to sort(1) so that we see the labels in the order they will
// appear in the label file.
ProcessBuilder processBuilder = new ProcessBuilder();
processBuilder.command("sort", "-k1,1n", "-k2,2n", // Numerical sort
"--numeric-sort", "--buffer-size", SORT_BUFFER_SIZE, "--temporary-directory", tmpDir);
Process sort = processBuilder.start();
BufferedOutputStream sort_stdin = new BufferedOutputStream(sort.getOutputStream());
// BufferedInputStream sort_stdout = new BufferedInputStream(sort.getInputStream());
FastBufferedInputStream sort_stdout = new FastBufferedInputStream(sort.getInputStream());
final FastBufferedInputStream fbis = new FastBufferedInputStream(System.in);
hashLabelStream(fbis, new EdgeLabelLineWriter() {
@Override
public void writeLine(long src, long dst, long filenameId, int permission) throws IOException {
sort_stdin.write((src + "\t" + dst + "\t" + filenameId + "\t" + permission + "\n")
.getBytes(StandardCharsets.US_ASCII));
}
});
sort_stdin.close();
EdgeLabelLineIterator mapLines = new TextualEdgeLabelLineIterator(sort_stdout);
writeLabels(mapLines);
logger.info("Done");
}
void computeLabelMapBsort() throws IOException, InterruptedException {
// Pass the intermediate representation to bsort(1) so that we see the labels in the order they will
// appear in the label file.
String tmpFile = tmpDir + "/labelsToSort.bin";
final FastBufferedInputStream fbis = new FastBufferedInputStream(System.in);
final DataOutputStream out = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(tmpFile)));
// Number of bytes to represent a node.
final int nodeBytes = (Long.SIZE - Long.numberOfLeadingZeros(graph.numNodes())) / 8 + 1;
ByteBuffer buffer = ByteBuffer.allocate(Long.BYTES);
logger.info("Writing labels to a packed binary files (node bytes: {})", nodeBytes);
hashLabelStream(fbis, new EdgeLabelLineWriter() {
@Override
public void writeLine(long src, long dst, long filenameId, int permission) throws IOException {
buffer.putLong(0, src);
out.write(buffer.array(), Long.BYTES - nodeBytes, nodeBytes);
buffer.putLong(0, dst);
out.write(buffer.array(), Long.BYTES - nodeBytes, nodeBytes);
out.writeLong(filenameId);
out.writeInt(permission);
}
});
ProcessBuilder processBuilder = new ProcessBuilder();
- processBuilder.command(
- "/home/seirl/bsort/src/bsort",
- "-v",
- "-r", String.valueOf(nodeBytes * 2 + Long.BYTES + Integer.BYTES),
- "-k", String.valueOf(nodeBytes * 2),
- tmpFile
- );
+ processBuilder.command("/home/seirl/bsort/src/bsort", "-v", "-r",
+ String.valueOf(nodeBytes * 2 + Long.BYTES + Integer.BYTES), "-k", String.valueOf(nodeBytes * 2),
+ tmpFile);
Process sort = processBuilder.start();
sort.waitFor();
final DataInputStream sortedLabels = new DataInputStream(new BufferedInputStream(new FileInputStream(tmpFile)));
BinaryEdgeLabelLineIterator mapLines = new BinaryEdgeLabelLineIterator(sortedLabels, nodeBytes);
writeLabels(mapLines);
logger.info("Done");
}
void loadGraph() throws IOException {
graph = BVGraph.loadMapped(graphPath);
swhIdMph = loadMPH(graphPath);
orderMap = LongBigArrays.newBigArray(getMPHSize(swhIdMph));
BinIO.loadLongs(graphPath + ".order", orderMap);
filenameMph = loadMPH(graphPath + "-labels");
numFilenames = getMPHSize(filenameMph);
totalLabelWidth = DirEntry.labelWidth(numFilenames);
}
void hashLabelStream(FastBufferedInputStream input, EdgeLabelLineWriter writer) throws IOException {
// Compute intermediate representation and write it on :
// "