Page MenuHomeSoftware Heritage

D3962.diff
No OneTemporary

D3962.diff

diff --git a/java/src/main/java/org/softwareheritage/graph/SwhLabel.java b/java/src/main/java/org/softwareheritage/graph/SwhLabel.java
new file mode 100644
--- /dev/null
+++ b/java/src/main/java/org/softwareheritage/graph/SwhLabel.java
@@ -0,0 +1,17 @@
+package org.softwareheritage.graph;
+
+/**
+ * Wrapper class to store the edge labels of the graph.
+ *
+ * @author The Software Heritage developers
+ */
+public class SwhLabel {
+ public int permissionId;
+ public long filenameId;
+
+ public SwhLabel(int permissionId, long filenameId)
+ {
+ this.permissionId = permissionId;
+ this.filenameId = filenameId;
+ }
+}
diff --git a/java/src/main/java/org/softwareheritage/graph/SwhPerm.java b/java/src/main/java/org/softwareheritage/graph/SwhPerm.java
new file mode 100644
--- /dev/null
+++ b/java/src/main/java/org/softwareheritage/graph/SwhPerm.java
@@ -0,0 +1,81 @@
+package org.softwareheritage.graph;
+
+/**
+ * Permission types present in the Software Heritage graph.
+ *
+ * @author The Software Heritage developers
+ */
+
+public class SwhPerm {
+ public enum Type {
+ CONTENT,
+ EXECUTABLE_CONTENT,
+ SYMLINK,
+ DIRECTORY,
+ REVISION;
+
+ public static Type fromInt(int intType) {
+ switch (intType) {
+ case 0:
+ return CONTENT;
+ case 1:
+ return EXECUTABLE_CONTENT;
+ case 2:
+ return SYMLINK;
+ case 3:
+ return DIRECTORY;
+ case 4:
+ return REVISION;
+ }
+ return null;
+ }
+
+ public static int toInt(Type type) {
+ switch (type) {
+ case CONTENT:
+ return 0;
+ case EXECUTABLE_CONTENT:
+ return 1;
+ case SYMLINK:
+ return 2;
+ case DIRECTORY:
+ return 3;
+ case REVISION:
+ return 4;
+ }
+ throw new IllegalArgumentException("Unknown node type: " + type);
+ }
+
+ public static Type fromOct(int octType) {
+ switch (octType) {
+ case 0100644:
+ return CONTENT;
+ case 0100755:
+ return EXECUTABLE_CONTENT;
+ case 0120000:
+ return SYMLINK;
+ case 0040000:
+ return DIRECTORY;
+ case 0160000:
+ return REVISION;
+ }
+ return null;
+ }
+
+ public static int toOct(Type type) {
+ switch (type) {
+ case CONTENT:
+ return 0100644;
+ case EXECUTABLE_CONTENT:
+ return 0100755;
+ case SYMLINK:
+ return 0120000;
+ case DIRECTORY:
+ return 0040000;
+ case REVISION:
+ return 0160000;
+ }
+ throw new IllegalArgumentException("Unknown node type: " + type);
+ }
+ }
+}
diff --git a/java/src/main/java/org/softwareheritage/graph/maps/LabelMapBuilder.java b/java/src/main/java/org/softwareheritage/graph/maps/LabelMapBuilder.java
--- a/java/src/main/java/org/softwareheritage/graph/maps/LabelMapBuilder.java
+++ b/java/src/main/java/org/softwareheritage/graph/maps/LabelMapBuilder.java
@@ -4,7 +4,6 @@
import it.unimi.dsi.big.webgraph.LazyLongIterator;
import it.unimi.dsi.big.webgraph.labelling.ArcLabelledImmutableGraph;
import it.unimi.dsi.big.webgraph.labelling.BitStreamArcLabelledImmutableGraph;
-import it.unimi.dsi.big.webgraph.labelling.FixedWidthIntLabel;
import it.unimi.dsi.big.webgraph.labelling.FixedWidthIntListLabel;
import it.unimi.dsi.fastutil.BigArrays;
import it.unimi.dsi.fastutil.Size64;
@@ -20,6 +19,8 @@
import it.unimi.dsi.big.webgraph.NodeIterator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import org.softwareheritage.graph.SwhPerm;
+import org.softwareheritage.graph.SwhLabel;
import java.io.*;
import java.nio.charset.StandardCharsets;
@@ -99,18 +100,23 @@
static void computeLabelMap(String graphPath, String debugPath, String tmpDir)
throws IOException
{
- // Compute intermediate representation in the format "<src node id> <dst node id> <label ids>\n"
+ // Compute intermediate representation in the format:
+ // "<src node id> <dst node id> <permission id> <filename ids>\n"
+
ImmutableGraph graph = BVGraph.loadMapped(graphPath);
Object2LongFunction<String> swhIdMph = loadMPH(graphPath);
long[][] orderMap = LongBigArrays.newBigArray(getMPHSize(swhIdMph));
BinIO.loadLongs(graphPath + ".order", orderMap);
- Object2LongFunction<String> labelMPH = loadMPH(graphPath + "-labels");
- long numLabels = getMPHSize(labelMPH);
- int labelWidth = (int) Math.ceil(Math.log(numLabels) / Math.log(2));
- if (labelWidth > 30) {
- logger.error("FIXME: Too many labels, we can't handle more than 2^30 for now.");
+ int permissionIdWidth = (int) Math.ceil(Math.log(SwhPerm.Type.values().length) / Math.log(2));
+
+ // TODO: change the path to be explicit it is only filenames
+ Object2LongFunction<String> filenameMPH = loadMPH(graphPath + "-labels");
+ long numFilenames = getMPHSize(filenameMPH);
+ int filenameIdWidth = (int) Math.ceil(Math.log(numFilenames) / Math.log(2));
+ if (filenameIdWidth > 30) {
+ logger.error("FIXME: Too many filenames, we can't handle more than 2^30 for now.");
System.exit(2);
}
@@ -123,7 +129,7 @@
ProcessBuilder processBuilder = new ProcessBuilder();
processBuilder.command(
"sort",
- "-k1,1n", "-k2,2n", "-k3,3n", // Numerical sort on all fields
+ "-k1,1n", "-k2,2n", "-k3,3n", "-k4,4n", // Numerical sort on all fields
"--numeric-sort",
"--buffer-size", SORT_BUFFER_SIZE,
"--temporary-directory", tmpDir
@@ -139,14 +145,15 @@
plInter.start("Piping intermediate representation to sort(1)");
while (edgeIterator.hasNext()) {
String[] edge = edgeIterator.next().toString().split(" ");
- if (edge.length < 3)
+ if (edge.length < 4)
continue;
long srcNode = SwhIDToNode(edge[0], swhIdMph, orderMap);
long dstNode = SwhIDToNode(edge[1], swhIdMph, orderMap);
- long labelId = labelMPH.getLong(edge[2]);
+ int permissionId = Integer.parseInt(edge[2]);
+ long filenameId = filenameMPH.getLong(edge[3]);
- sort_stdin.write((srcNode + "\t" + dstNode + "\t" + labelId + "\n")
+ sort_stdin.write((srcNode + "\t" + dstNode + "\t" + permissionId + "\t" + filenameId + "\n")
.getBytes(StandardCharsets.US_ASCII));
plInter.lightUpdate();
}
@@ -176,22 +183,23 @@
NodeIterator it = graph.nodeIterator();
long labelSrcNode = -1;
long labelDstNode = -1;
- long labelId = -1;
+ int labelPermissionId = -1;
+ long labelFilenameId = -1;
while (it.hasNext()) {
long srcNode = it.nextLong();
// Fill a hashmap with the labels of each edge starting from this node
- HashMap<Long, List<Long>> successorsLabels = new HashMap<>();
+ HashMap<Long, List<SwhLabel>> successorsLabels = new HashMap<>();
while (labelSrcNode <= srcNode) {
if (labelSrcNode == srcNode) {
successorsLabels
.computeIfAbsent(
labelDstNode,
k -> new ArrayList<>()
- ).add(labelId);
+ ).add(new SwhLabel(labelPermissionId, labelFilenameId));
if (debugFile != null) {
- debugFile.write(labelSrcNode + " " + labelDstNode + " " + labelId + "\n");
+ debugFile.write(labelSrcNode + " " + labelDstNode + " " + labelPermissionId + " " + labelFilenameId + "\n");
}
}
@@ -202,17 +210,19 @@
String[] parts = line.split("\\t");
labelSrcNode = Long.parseLong(parts[0]);
labelDstNode = Long.parseLong(parts[1]);
- labelId = Long.parseLong(parts[2]);
+ labelPermissionId = Integer.parseInt(parts[2]);
+ labelFilenameId = Long.parseLong(parts[3]);
}
int bits = 0;
LazyLongIterator s = it.successors();
long dstNode;
while ((dstNode = s.nextLong()) >= 0) {
- List<Long> edgeLabels = successorsLabels.getOrDefault(dstNode, Collections.emptyList());
- bits += labels.writeGamma(edgeLabels.size());
- for (Long label : edgeLabels) {
- bits += labels.writeLong(label, labelWidth);
+ List<SwhLabel> currentLabels = successorsLabels.getOrDefault(dstNode, Collections.emptyList());
+ bits += labels.writeGamma(currentLabels.size());
+ for (SwhLabel label : currentLabels) {
+ bits += labels.writeInt(label.permissionId, permissionIdWidth);
+ bits += labels.writeLong(label.filenameId, filenameIdWidth);
}
}
offsets.writeGamma(bits);
@@ -228,7 +238,7 @@
PrintWriter pw = new PrintWriter(new FileWriter((new File(graphPath)).getName() + "-labelled.properties" ));
pw.println(ImmutableGraph.GRAPHCLASS_PROPERTY_KEY + " = " + BitStreamArcLabelledImmutableGraph.class.getName());
- pw.println(BitStreamArcLabelledImmutableGraph.LABELSPEC_PROPERTY_KEY + " = " + FixedWidthIntListLabel.class.getName() + "(TEST," + labelWidth + ")" );
+ pw.println(BitStreamArcLabelledImmutableGraph.LABELSPEC_PROPERTY_KEY + " = " + FixedWidthIntListLabel.class.getName() + "(TEST," + filenameIdWidth + ")" );
pw.println(ArcLabelledImmutableGraph.UNDERLYINGGRAPH_PROPERTY_KEY + " = " + graphPath);
pw.close();
}

File Metadata

Mime Type
text/plain
Expires
Tue, Dec 17, 2:34 AM (2 w, 1 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3221034

Event Timeline