Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9345176
D4006.id14181.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
17 KB
Subscribers
None
D4006.id14181.diff
View Options
diff --git a/java/pom.xml b/java/pom.xml
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -57,7 +57,12 @@
<dependency>
<groupId>it.unimi.dsi</groupId>
<artifactId>fastutil</artifactId>
- <version>8.4.1</version>
+ <version>8.4.2</version>
+ </dependency>
+ <dependency>
+ <groupId>it.unimi.dsi</groupId>
+ <artifactId>dsiutils</artifactId>
+ <version>2.6.7</version>
</dependency>
<dependency>
<groupId>it.unimi.dsi</groupId>
diff --git a/java/src/main/java/org/softwareheritage/graph/labels/DirEntry.java b/java/src/main/java/org/softwareheritage/graph/labels/DirEntry.java
new file mode 100644
--- /dev/null
+++ b/java/src/main/java/org/softwareheritage/graph/labels/DirEntry.java
@@ -0,0 +1,131 @@
+package org.softwareheritage.graph.labels;
+
+/**
+ * Directory entries metadata are stored as edge labels on the graph. {@link DirEntry} can be encoded in a single long
+ * type, to re-use Webgraph interface.
+ *
+ * @author The Software Heritage developers
+ */
+public class DirEntry {
+ public long filenameId;
+ public int permission;
+
+ public DirEntry(long filenameId, int permission)
+ {
+ this.filenameId = filenameId;
+ this.permission = permission;
+ }
+
+ public DirEntry(long dirEntryEncoded)
+ {
+ this.filenameId = dirEntryEncoded >> Permission.NB_BITS_PER_TYPE;
+ int dirBytes = (int) (dirEntryEncoded & ((1 << Permission.NB_BITS_PER_TYPE) - 1));
+ this.permission = Permission.Type.fromEncoded(dirBytes);
+ }
+
+ public long toEncoded()
+ {
+ return (filenameId << Permission.NB_BITS_PER_TYPE) + Permission.Type.toEncoded(permission);
+ }
+
+ public static int labelWidth(long numLabels)
+ {
+ int filenameIdWidth = (int) Math.ceil(Math.log(numLabels) / Math.log(2));
+ if (filenameIdWidth > 64 - Permission.NB_BITS_PER_TYPE) {
+ System.err.println("FIXME: Too many filenames, we can't handle more than 2^" + (64 - Permission.NB_BITS_PER_TYPE) + " for now.");
+ System.exit(2);
+ }
+ return filenameIdWidth + Permission.NB_BITS_PER_TYPE;
+ }
+
+ /**
+ * Permission types present in the Software Heritage graph.
+ *
+ * @author The Software Heritage developers
+ */
+ private static class Permission {
+ public static final int NB_BITS_PER_TYPE =
+ (int) Math.ceil(Math.log(Permission.Type.values().length) / Math.log(2));
+
+ public enum Type {
+ CONTENT,
+ EXECUTABLE_CONTENT,
+ SYMLINK,
+ DIRECTORY,
+ REVISION;
+
+ public static Permission.Type fromIntCode(int intCode) {
+ switch (intCode) {
+ case 0:
+ return CONTENT;
+ case 1:
+ return EXECUTABLE_CONTENT;
+ case 2:
+ return SYMLINK;
+ case 3:
+ return DIRECTORY;
+ case 4:
+ return REVISION;
+ }
+ throw new IllegalArgumentException("Unknown node permission code: " + intCode);
+ }
+
+ public static int toIntCode(Permission.Type type) {
+ switch (type) {
+ case CONTENT:
+ return 0;
+ case EXECUTABLE_CONTENT:
+ return 1;
+ case SYMLINK:
+ return 2;
+ case DIRECTORY:
+ return 3;
+ case REVISION:
+ return 4;
+ }
+ throw new IllegalArgumentException("Unknown node permission type: " + type);
+ }
+
+ public static Permission.Type fromIntPerm(int intPerm) {
+ switch (intPerm) {
+ case 0100644:
+ return CONTENT;
+ case 0100755:
+ return EXECUTABLE_CONTENT;
+ case 0120000:
+ return SYMLINK;
+ case 0040000:
+ return DIRECTORY;
+ case 0160000:
+ return REVISION;
+ }
+ throw new IllegalArgumentException("Unknown node permission: " + intPerm);
+ }
+
+ public static int toIntPerm(Permission.Type type) {
+ switch (type) {
+ case CONTENT:
+ return 0100644;
+ case EXECUTABLE_CONTENT:
+ return 0100755;
+ case SYMLINK:
+ return 0120000;
+ case DIRECTORY:
+ return 0040000;
+ case REVISION:
+ return 0160000;
+ }
+ throw new IllegalArgumentException("Unknown node permission type: " + type);
+ }
+
+ public static int fromEncoded(int encoded) {
+ return toIntPerm(fromIntCode(encoded));
+ }
+
+ public static int toEncoded(int permission) {
+ return toIntCode(fromIntPerm(permission));
+ }
+ }
+ }
+
+}
diff --git a/java/src/main/java/org/softwareheritage/graph/labels/SwhLabel.java b/java/src/main/java/org/softwareheritage/graph/labels/SwhLabel.java
new file mode 100644
--- /dev/null
+++ b/java/src/main/java/org/softwareheritage/graph/labels/SwhLabel.java
@@ -0,0 +1,109 @@
+package org.softwareheritage.graph.labels;
+
+import it.unimi.dsi.big.webgraph.labelling.AbstractLabel;
+import it.unimi.dsi.big.webgraph.labelling.FixedWidthLongListLabel;
+import it.unimi.dsi.big.webgraph.labelling.Label;
+import it.unimi.dsi.io.InputBitStream;
+import it.unimi.dsi.io.OutputBitStream;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+/**
+ * Software Heritage graph edge labels following Webgraph labels convention.
+ *
+ * @author The Software Heritage developers
+ */
+
+public class SwhLabel extends AbstractLabel {
+ private final String key;
+ private final int width;
+ // TODO: in the future we would like this to be edge type dependent (eg: having a similar SnpEntry to store branch names)
+ public DirEntry[] value;
+ // Use existing Webgraph class to represent a list of DirEntry as a list of encoded long
+ private final FixedWidthLongListLabel longList;
+
+ private static final DirEntry[] EMPTY_ARRAY = {};
+
+ public SwhLabel(String key, int width, DirEntry[] value) {
+ this.key = key;
+ this.width = width;
+ this.value = value;
+
+ long[] valueEncoded = new long[value.length];
+ for (int i = 0; i < value.length; i++)
+ valueEncoded[i] = value[i].toEncoded();
+ this.longList = new FixedWidthLongListLabel(key, width, valueEncoded);
+ }
+
+ public SwhLabel(String key, int width) {
+ this(key, width, EMPTY_ARRAY);
+ }
+
+ public SwhLabel(String... arg) {
+ this(arg[0], Integer.parseInt(arg[1]));
+ }
+
+ @Override
+ public int fromBitStream(InputBitStream inputBitStream, final long sourceUnused) throws IOException {
+ int ret = longList.fromBitStream(inputBitStream, sourceUnused);
+ // Decode values from their internal long representation
+ value = new DirEntry[longList.value.length];
+ for (int i = 0; i < value.length; i++)
+ value[i] = new DirEntry(longList.value[i]);
+ return ret;
+ }
+
+ @Override
+ public int toBitStream(OutputBitStream outputBitStream, final long sourceUnused) throws IOException {
+ // Values have already been encoded in the SwhLabel constructor
+ return longList.toBitStream(outputBitStream, sourceUnused);
+ }
+
+ @Override
+ public String wellKnownAttributeKey() {
+ return key;
+ }
+
+ @Override
+ public String[] attributeKeys() {
+ return new String[]{key};
+ }
+
+ @Override
+ public Class<?>[] attributeTypes() {
+ return new Class[]{DirEntry[].class};
+ }
+
+ @Override
+ public Object get(String s) {
+ if (this.key.equals(key))
+ return value;
+ throw new IllegalArgumentException();
+ }
+
+ @Override
+ public Object get() {
+ return value;
+ }
+
+ @Override
+ public Label copy() {
+ return new SwhLabel(key, width, value.clone());
+ }
+
+ @Override
+ public int fixedWidth() {
+ return -1;
+ }
+
+ @Override
+ public String toString() {
+ return key + ":" + Arrays.toString(value) + " (width:" + width + ")";
+ }
+
+ @Override
+ public String toSpec() {
+ return this.getClass().getName() + "(" + key + "," + width + ")";
+ }
+}
diff --git a/java/src/main/java/org/softwareheritage/graph/maps/LabelMapBuilder.java b/java/src/main/java/org/softwareheritage/graph/maps/LabelMapBuilder.java
--- a/java/src/main/java/org/softwareheritage/graph/maps/LabelMapBuilder.java
+++ b/java/src/main/java/org/softwareheritage/graph/maps/LabelMapBuilder.java
@@ -4,8 +4,6 @@
import it.unimi.dsi.big.webgraph.LazyLongIterator;
import it.unimi.dsi.big.webgraph.labelling.ArcLabelledImmutableGraph;
import it.unimi.dsi.big.webgraph.labelling.BitStreamArcLabelledImmutableGraph;
-import it.unimi.dsi.big.webgraph.labelling.FixedWidthIntLabel;
-import it.unimi.dsi.big.webgraph.labelling.FixedWidthIntListLabel;
import it.unimi.dsi.fastutil.BigArrays;
import it.unimi.dsi.fastutil.Size64;
import it.unimi.dsi.fastutil.io.BinIO;
@@ -20,6 +18,8 @@
import it.unimi.dsi.big.webgraph.NodeIterator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import org.softwareheritage.graph.labels.DirEntry;
+import org.softwareheritage.graph.labels.SwhLabel;
import java.io.*;
import java.nio.charset.StandardCharsets;
@@ -106,13 +106,10 @@
long[][] orderMap = LongBigArrays.newBigArray(getMPHSize(swhIdMph));
BinIO.loadLongs(graphPath + ".order", orderMap);
- Object2LongFunction<String> labelMPH = loadMPH(graphPath + "-labels");
- long numLabels = getMPHSize(labelMPH);
- int labelWidth = (int) Math.ceil(Math.log(numLabels) / Math.log(2));
- if (labelWidth > 30) {
- logger.error("FIXME: Too many labels, we can't handle more than 2^30 for now.");
- System.exit(2);
- }
+ Object2LongFunction<String> filenameMph = loadMPH(graphPath + "-filename-labels");
+ long numFilenames = getMPHSize(filenameMph);
+
+ int totalLabelWidth = DirEntry.labelWidth(numFilenames);
ProgressLogger plInter = new ProgressLogger(logger, 10, TimeUnit.SECONDS);
plInter.itemsName = "edges";
@@ -123,7 +120,7 @@
ProcessBuilder processBuilder = new ProcessBuilder();
processBuilder.command(
"sort",
- "-k1,1n", "-k2,2n", "-k3,3n", // Numerical sort on all fields
+ "-k1,1n", "-k2,2n", // Numerical sort
"--numeric-sort",
"--buffer-size", SORT_BUFFER_SIZE,
"--temporary-directory", tmpDir
@@ -139,14 +136,15 @@
plInter.start("Piping intermediate representation to sort(1)");
while (edgeIterator.hasNext()) {
String[] edge = edgeIterator.next().toString().split(" ");
- if (edge.length < 3)
+ if (edge.length < 4)
continue;
long srcNode = SwhIDToNode(edge[0], swhIdMph, orderMap);
long dstNode = SwhIDToNode(edge[1], swhIdMph, orderMap);
- long labelId = labelMPH.getLong(edge[2]);
+ long filenameId = filenameMph.getLong(edge[2]);
+ int permission = Integer.parseInt(edge[3]);
- sort_stdin.write((srcNode + "\t" + dstNode + "\t" + labelId + "\n")
+ sort_stdin.write((srcNode + "\t" + dstNode + "\t" + filenameId + "\t" + permission + "\n")
.getBytes(StandardCharsets.US_ASCII));
plInter.lightUpdate();
}
@@ -176,22 +174,23 @@
NodeIterator it = graph.nodeIterator();
long labelSrcNode = -1;
long labelDstNode = -1;
- long labelId = -1;
+ long labelFilenameId = -1;
+ int labelPermission = -1;
while (it.hasNext()) {
long srcNode = it.nextLong();
// Fill a hashmap with the labels of each edge starting from this node
- HashMap<Long, List<Long>> successorsLabels = new HashMap<>();
+ HashMap<Long, List<DirEntry>> successorsLabels = new HashMap<>();
while (labelSrcNode <= srcNode) {
if (labelSrcNode == srcNode) {
successorsLabels
.computeIfAbsent(
labelDstNode,
k -> new ArrayList<>()
- ).add(labelId);
+ ).add(new DirEntry(labelFilenameId, labelPermission));
if (debugFile != null) {
- debugFile.write(labelSrcNode + " " + labelDstNode + " " + labelId + "\n");
+ debugFile.write(labelSrcNode + " " + labelDstNode + " " + labelFilenameId + " " + labelPermission + "\n");
}
}
@@ -202,18 +201,17 @@
String[] parts = line.split("\\t");
labelSrcNode = Long.parseLong(parts[0]);
labelDstNode = Long.parseLong(parts[1]);
- labelId = Long.parseLong(parts[2]);
+ labelFilenameId = Long.parseLong(parts[2]);
+ labelPermission = Integer.parseInt(parts[3]);
}
int bits = 0;
LazyLongIterator s = it.successors();
long dstNode;
while ((dstNode = s.nextLong()) >= 0) {
- List<Long> edgeLabels = successorsLabels.getOrDefault(dstNode, Collections.emptyList());
- bits += labels.writeGamma(edgeLabels.size());
- for (Long label : edgeLabels) {
- bits += labels.writeLong(label, labelWidth);
- }
+ List<DirEntry> currentLabels = successorsLabels.getOrDefault(dstNode, Collections.emptyList());
+ SwhLabel l = new SwhLabel("edgelabel", totalLabelWidth, currentLabels.toArray(new DirEntry[0]));
+ bits += l.toBitStream(labels, -1);
}
offsets.writeGamma(bits);
@@ -228,7 +226,7 @@
PrintWriter pw = new PrintWriter(new FileWriter((new File(graphPath)).getName() + "-labelled.properties" ));
pw.println(ImmutableGraph.GRAPHCLASS_PROPERTY_KEY + " = " + BitStreamArcLabelledImmutableGraph.class.getName());
- pw.println(BitStreamArcLabelledImmutableGraph.LABELSPEC_PROPERTY_KEY + " = " + FixedWidthIntListLabel.class.getName() + "(TEST," + labelWidth + ")" );
+ pw.println(BitStreamArcLabelledImmutableGraph.LABELSPEC_PROPERTY_KEY + " = " + SwhLabel.class.getName() + "(DirEntry," + totalLabelWidth + ")" );
pw.println(ArcLabelledImmutableGraph.UNDERLYINGGRAPH_PROPERTY_KEY + " = " + graphPath);
pw.close();
}
diff --git a/java/src/main/java/org/softwareheritage/graph/utils/ReadLabelledGraph.java b/java/src/main/java/org/softwareheritage/graph/utils/ReadLabelledGraph.java
--- a/java/src/main/java/org/softwareheritage/graph/utils/ReadLabelledGraph.java
+++ b/java/src/main/java/org/softwareheritage/graph/utils/ReadLabelledGraph.java
@@ -1,10 +1,11 @@
package org.softwareheritage.graph.utils;
+import it.unimi.dsi.big.util.PermutedFrontCodedStringBigList;
import it.unimi.dsi.big.webgraph.labelling.ArcLabelledImmutableGraph;
import it.unimi.dsi.big.webgraph.labelling.ArcLabelledNodeIterator;
import it.unimi.dsi.big.webgraph.labelling.BitStreamArcLabelledImmutableGraph;
import it.unimi.dsi.fastutil.io.BinIO;
-import it.unimi.dsi.util.PermutedFrontCodedStringList;
+import org.softwareheritage.graph.labels.DirEntry;
import org.softwareheritage.graph.maps.NodeIdMap;
import java.io.IOException;
@@ -15,7 +16,7 @@
ArcLabelledImmutableGraph graph = BitStreamArcLabelledImmutableGraph.loadOffline(graphPath + "-labelled");
NodeIdMap nodeMap = new NodeIdMap(graphPath, graph.numNodes());
- PermutedFrontCodedStringList labelMap = (PermutedFrontCodedStringList) BinIO.loadObject(graphPath + "-labels.fcl");
+ PermutedFrontCodedStringBigList filenameMap = (PermutedFrontCodedStringBigList) BinIO.loadObject(graphPath + "-filename-labels.fcl");
ArcLabelledNodeIterator it = graph.nodeIterator();
while (it.hasNext()) {
@@ -24,14 +25,15 @@
ArcLabelledNodeIterator.LabelledArcIterator s = it.successors();
long dstNode;
while ((dstNode = s.nextLong()) >= 0) {
- int[] labels = (int[]) s.label().get();
+ DirEntry[] labels = (DirEntry[]) s.label().get();
if (labels.length > 0) {
- for (int label : labels) {
+ for (DirEntry label : labels) {
System.out.format(
- "%s %s %s\n",
+ "%s %s %s %d\n",
nodeMap.getSWHID(srcNode),
nodeMap.getSWHID(dstNode),
- labelMap.get(label)
+ filenameMap.get(label.filenameId),
+ label.permission
);
}
} else {
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Jul 3, 3:10 PM (5 d, 10 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3230860
Attached To
D4006: WIP: add permissions on edge labels
Event Timeline
Log In to Comment