Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7123205
D4006.id14145.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
16 KB
Subscribers
None
D4006.id14145.diff
View Options
diff --git a/java/src/main/java/org/softwareheritage/graph/SwhLabel.java b/java/src/main/java/org/softwareheritage/graph/SwhLabel.java
new file mode 100644
--- /dev/null
+++ b/java/src/main/java/org/softwareheritage/graph/SwhLabel.java
@@ -0,0 +1,30 @@
+package org.softwareheritage.graph;
+
+/**
+ * Wrapper class to store the edge labels of the graph. Labels can be encoded in a single long type.
+ *
+ * @author The Software Heritage developers
+ */
+public class SwhLabel {
+ public long filenameId;
+ public int permission;
+
+ public SwhLabel(long filenameId, int permission)
+ {
+ this.filenameId = filenameId;
+ this.permission = permission;
+ }
+
+ public static SwhLabel fromEncoded(long labelEncoded)
+ {
+ long filenameId = labelEncoded >> SwhPerm.TOTAL_NB_BITS;
+ int permission = (int) (labelEncoded & ((1 << SwhPerm.TOTAL_NB_BITS) - 1));
+ return new SwhLabel(filenameId, permission);
+ }
+
+ public static long toEncoded(SwhLabel label)
+ {
+ long labelEncoded = (label.filenameId << SwhPerm.TOTAL_NB_BITS) + label.permission;
+ return labelEncoded;
+ }
+}
diff --git a/java/src/main/java/org/softwareheritage/graph/SwhPerm.java b/java/src/main/java/org/softwareheritage/graph/SwhPerm.java
new file mode 100644
--- /dev/null
+++ b/java/src/main/java/org/softwareheritage/graph/SwhPerm.java
@@ -0,0 +1,84 @@
+package org.softwareheritage.graph;
+
+/**
+ * Permission types present in the Software Heritage graph.
+ *
+ * @author The Software Heritage developers
+ */
+
+public class SwhPerm {
+ /** Software Heritage archives 5 types of permissions (listed below), so we only need 3 bits to compress them. */
+ public static final int TOTAL_NB_BITS = 3;
+
+ public enum Type {
+ CONTENT,
+ EXECUTABLE_CONTENT,
+ SYMLINK,
+ DIRECTORY,
+ REVISION;
+
+ public static Type fromInt(int intType) {
+ switch (intType) {
+ case 0:
+ return CONTENT;
+ case 1:
+ return EXECUTABLE_CONTENT;
+ case 2:
+ return SYMLINK;
+ case 3:
+ return DIRECTORY;
+ case 4:
+ return REVISION;
+ }
+ throw new IllegalArgumentException("Unknown node type: " + intType);
+ }
+
+ public static int toInt(Type type) {
+ switch (type) {
+ case CONTENT:
+ return 0;
+ case EXECUTABLE_CONTENT:
+ return 1;
+ case SYMLINK:
+ return 2;
+ case DIRECTORY:
+ return 3;
+ case REVISION:
+ return 4;
+ }
+ throw new IllegalArgumentException("Unknown node type: " + type);
+ }
+
+ public static Type fromOct(int octType) {
+ switch (octType) {
+ case 0100644:
+ return CONTENT;
+ case 0100755:
+ return EXECUTABLE_CONTENT;
+ case 0120000:
+ return SYMLINK;
+ case 0040000:
+ return DIRECTORY;
+ case 0160000:
+ return REVISION;
+ }
+ throw new IllegalArgumentException("Unknown node type: " + octType);
+ }
+
+ public static int toOct(Type type) {
+ switch (type) {
+ case CONTENT:
+ return 0100644;
+ case EXECUTABLE_CONTENT:
+ return 0100755;
+ case SYMLINK:
+ return 0120000;
+ case DIRECTORY:
+ return 0040000;
+ case REVISION:
+ return 0160000;
+ }
+ throw new IllegalArgumentException("Unknown node type: " + type);
+ }
+ }
+}
diff --git a/java/src/main/java/org/softwareheritage/graph/maps/LabelMapBuilder.java b/java/src/main/java/org/softwareheritage/graph/maps/LabelMapBuilder.java
--- a/java/src/main/java/org/softwareheritage/graph/maps/LabelMapBuilder.java
+++ b/java/src/main/java/org/softwareheritage/graph/maps/LabelMapBuilder.java
@@ -4,8 +4,6 @@
import it.unimi.dsi.big.webgraph.LazyLongIterator;
import it.unimi.dsi.big.webgraph.labelling.ArcLabelledImmutableGraph;
import it.unimi.dsi.big.webgraph.labelling.BitStreamArcLabelledImmutableGraph;
-import it.unimi.dsi.big.webgraph.labelling.FixedWidthIntLabel;
-import it.unimi.dsi.big.webgraph.labelling.FixedWidthIntListLabel;
import it.unimi.dsi.fastutil.BigArrays;
import it.unimi.dsi.fastutil.Size64;
import it.unimi.dsi.fastutil.io.BinIO;
@@ -20,6 +18,9 @@
import it.unimi.dsi.big.webgraph.NodeIterator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import org.softwareheritage.graph.SwhLabel;
+import org.softwareheritage.graph.SwhPerm;
+import org.softwareheritage.graph.webgraph.FixedWidthSwhLabelList;
import java.io.*;
import java.nio.charset.StandardCharsets;
@@ -106,14 +107,17 @@
long[][] orderMap = LongBigArrays.newBigArray(getMPHSize(swhIdMph));
BinIO.loadLongs(graphPath + ".order", orderMap);
- Object2LongFunction<String> labelMPH = loadMPH(graphPath + "-labels");
- long numLabels = getMPHSize(labelMPH);
- int labelWidth = (int) Math.ceil(Math.log(numLabels) / Math.log(2));
- if (labelWidth > 30) {
- logger.error("FIXME: Too many labels, we can't handle more than 2^30 for now.");
+ // TODO: change the path to be explicit it is only filenames
+ Object2LongFunction<String> filenameMph = loadMPH(graphPath + "-labels");
+ long numFilenames = getMPHSize(filenameMph);
+ int filenameIdWidth = (int) Math.ceil(Math.log(numFilenames) / Math.log(2));
+ if (filenameIdWidth > 30) {
+ logger.error("FIXME: Too many filenames, we can't handle more than 2^30 for now.");
System.exit(2);
}
+ int totalLabelWidth = filenameIdWidth + SwhPerm.TOTAL_NB_BITS;
+
ProgressLogger plInter = new ProgressLogger(logger, 10, TimeUnit.SECONDS);
plInter.itemsName = "edges";
plInter.expectedUpdates = graph.numArcs();
@@ -123,7 +127,7 @@
ProcessBuilder processBuilder = new ProcessBuilder();
processBuilder.command(
"sort",
- "-k1,1n", "-k2,2n", "-k3,3n", // Numerical sort on all fields
+ "-k1,1n", "-k2,2n", "-k3,3n", "-k4,4n", // Numerical sort on all fields
"--numeric-sort",
"--buffer-size", SORT_BUFFER_SIZE,
"--temporary-directory", tmpDir
@@ -139,14 +143,15 @@
plInter.start("Piping intermediate representation to sort(1)");
while (edgeIterator.hasNext()) {
String[] edge = edgeIterator.next().toString().split(" ");
- if (edge.length < 3)
+ if (edge.length < 4)
continue;
long srcNode = SwhIDToNode(edge[0], swhIdMph, orderMap);
long dstNode = SwhIDToNode(edge[1], swhIdMph, orderMap);
- long labelId = labelMPH.getLong(edge[2]);
+ long filenameId = filenameMph.getLong(edge[2]);
+ int permission = Integer.parseInt(edge[3]);
- sort_stdin.write((srcNode + "\t" + dstNode + "\t" + labelId + "\n")
+ sort_stdin.write((srcNode + "\t" + dstNode + "\t" + filenameId + "\t" + permission + "\n")
.getBytes(StandardCharsets.US_ASCII));
plInter.lightUpdate();
}
@@ -176,22 +181,23 @@
NodeIterator it = graph.nodeIterator();
long labelSrcNode = -1;
long labelDstNode = -1;
- long labelId = -1;
+ long labelFilenameId = -1;
+ int labelPermission = -1;
while (it.hasNext()) {
long srcNode = it.nextLong();
// Fill a hashmap with the labels of each edge starting from this node
- HashMap<Long, List<Long>> successorsLabels = new HashMap<>();
+ HashMap<Long, List<SwhLabel>> successorsLabels = new HashMap<>();
while (labelSrcNode <= srcNode) {
if (labelSrcNode == srcNode) {
successorsLabels
.computeIfAbsent(
labelDstNode,
k -> new ArrayList<>()
- ).add(labelId);
+ ).add(new SwhLabel(labelFilenameId, labelPermission));
if (debugFile != null) {
- debugFile.write(labelSrcNode + " " + labelDstNode + " " + labelId + "\n");
+ debugFile.write(labelSrcNode + " " + labelDstNode + " " + labelFilenameId + " " + labelPermission + "\n");
}
}
@@ -202,17 +208,20 @@
String[] parts = line.split("\\t");
labelSrcNode = Long.parseLong(parts[0]);
labelDstNode = Long.parseLong(parts[1]);
- labelId = Long.parseLong(parts[2]);
+ labelFilenameId = Long.parseLong(parts[2]);
+ SwhPerm.Type permissionType = SwhPerm.Type.fromOct(Integer.parseInt(parts[3]));
+ labelPermission = SwhPerm.Type.toInt(permissionType);
}
int bits = 0;
LazyLongIterator s = it.successors();
long dstNode;
while ((dstNode = s.nextLong()) >= 0) {
- List<Long> edgeLabels = successorsLabels.getOrDefault(dstNode, Collections.emptyList());
- bits += labels.writeGamma(edgeLabels.size());
- for (Long label : edgeLabels) {
- bits += labels.writeLong(label, labelWidth);
+ List<SwhLabel> currentLabels = successorsLabels.getOrDefault(dstNode, Collections.emptyList());
+ bits += labels.writeGamma(currentLabels.size());
+ for (SwhLabel label : currentLabels) {
+ long labelEncoded = SwhLabel.toEncoded(label);
+ bits += labels.writeLong(labelEncoded, totalLabelWidth);
}
}
offsets.writeGamma(bits);
@@ -228,7 +237,7 @@
PrintWriter pw = new PrintWriter(new FileWriter((new File(graphPath)).getName() + "-labelled.properties" ));
pw.println(ImmutableGraph.GRAPHCLASS_PROPERTY_KEY + " = " + BitStreamArcLabelledImmutableGraph.class.getName());
- pw.println(BitStreamArcLabelledImmutableGraph.LABELSPEC_PROPERTY_KEY + " = " + FixedWidthIntListLabel.class.getName() + "(TEST," + labelWidth + ")" );
+ pw.println(BitStreamArcLabelledImmutableGraph.LABELSPEC_PROPERTY_KEY + " = " + FixedWidthSwhLabelList.class.getName() + "(SwhLabel," + totalLabelWidth + ")" );
pw.println(ArcLabelledImmutableGraph.UNDERLYINGGRAPH_PROPERTY_KEY + " = " + graphPath);
pw.close();
}
diff --git a/java/src/main/java/org/softwareheritage/graph/utils/ReadLabelledGraph.java b/java/src/main/java/org/softwareheritage/graph/utils/ReadLabelledGraph.java
--- a/java/src/main/java/org/softwareheritage/graph/utils/ReadLabelledGraph.java
+++ b/java/src/main/java/org/softwareheritage/graph/utils/ReadLabelledGraph.java
@@ -5,6 +5,8 @@
import it.unimi.dsi.big.webgraph.labelling.BitStreamArcLabelledImmutableGraph;
import it.unimi.dsi.fastutil.io.BinIO;
import it.unimi.dsi.util.PermutedFrontCodedStringList;
+import org.softwareheritage.graph.SwhLabel;
+import org.softwareheritage.graph.SwhPerm;
import org.softwareheritage.graph.maps.NodeIdMap;
import java.io.IOException;
@@ -15,7 +17,8 @@
ArcLabelledImmutableGraph graph = BitStreamArcLabelledImmutableGraph.loadOffline(graphPath + "-labelled");
NodeIdMap nodeMap = new NodeIdMap(graphPath, graph.numNodes());
- PermutedFrontCodedStringList labelMap = (PermutedFrontCodedStringList) BinIO.loadObject(graphPath + "-labels.fcl");
+ // TODO: change the path to be explicit it is only filenames
+ PermutedFrontCodedStringList filenameMap = (PermutedFrontCodedStringList) BinIO.loadObject(graphPath + "-labels.fcl");
ArcLabelledNodeIterator it = graph.nodeIterator();
while (it.hasNext()) {
@@ -24,14 +27,16 @@
ArcLabelledNodeIterator.LabelledArcIterator s = it.successors();
long dstNode;
while ((dstNode = s.nextLong()) >= 0) {
- int[] labels = (int[]) s.label().get();
+ SwhLabel[] labels = (SwhLabel[]) s.label().get();
if (labels.length > 0) {
- for (int label : labels) {
+ for (SwhLabel label : labels) {
+ SwhPerm.Type permissionType = SwhPerm.Type.fromInt(label.permission);
System.out.format(
- "%s %s %s\n",
+ "%s %s %s %d\n",
nodeMap.getSWHID(srcNode),
nodeMap.getSWHID(dstNode),
- labelMap.get(label)
+ filenameMap.get((int) label.filenameId),
+ SwhPerm.Type.toOct(permissionType)
);
}
} else {
diff --git a/java/src/main/java/org/softwareheritage/graph/webgraph/FixedWidthSwhLabelList.java b/java/src/main/java/org/softwareheritage/graph/webgraph/FixedWidthSwhLabelList.java
new file mode 100644
--- /dev/null
+++ b/java/src/main/java/org/softwareheritage/graph/webgraph/FixedWidthSwhLabelList.java
@@ -0,0 +1,109 @@
+package org.softwareheritage.graph.webgraph;
+
+import it.unimi.dsi.big.webgraph.labelling.AbstractLabel;
+import it.unimi.dsi.big.webgraph.labelling.FixedWidthLongListLabel;
+import it.unimi.dsi.big.webgraph.labelling.Label;
+import it.unimi.dsi.io.InputBitStream;
+import it.unimi.dsi.io.OutputBitStream;
+import org.softwareheritage.graph.SwhLabel;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+/**
+ * List of {@link SwhLabel} represented in fixed width and following Webgraph labels convention.
+ *
+ * @author The Software Heritage developers
+ */
+
+public class FixedWidthSwhLabelList extends AbstractLabel {
+ private final String key;
+ private final int width;
+ public SwhLabel[] value;
+ // Use existing Webgraph class to represent a list of SwhLabel as a list of encoded long
+ private final FixedWidthLongListLabel longList;
+
+ private static final SwhLabel[] EMPTY_ARRAY = {};
+
+ public FixedWidthSwhLabelList(String key, int width, SwhLabel[] value) {
+ this.key = key;
+ this.width = width;
+ this.value = value;
+
+ long[] valueEncoded = new long[value.length];
+ for (int i = 0; i < value.length; i++)
+ valueEncoded[i] = SwhLabel.toEncoded(value[i]);
+ this.longList = new FixedWidthLongListLabel(key, width, valueEncoded);
+ }
+
+ public FixedWidthSwhLabelList(String key, int width) {
+ this(key, width, EMPTY_ARRAY);
+ }
+
+ public FixedWidthSwhLabelList(String... arg) {
+ this(arg[0], Integer.parseInt(arg[1]));
+ }
+
+ @Override
+ public int fromBitStream(InputBitStream inputBitStream, final long sourceUnused) throws IOException {
+ int ret = longList.fromBitStream(inputBitStream, sourceUnused);
+ // Decode values from their internal long representation
+ value = new SwhLabel[longList.value.length];
+ for (int i = 0; i < value.length; i++)
+ value[i] = SwhLabel.fromEncoded(longList.value[i]);
+ return ret;
+ }
+
+ @Override
+ public int toBitStream(OutputBitStream outputBitStream, final long sourceUnused) throws IOException {
+ // Values have already been encoded in the FixedWidthSwhLabelList constructor
+ return longList.toBitStream(outputBitStream, sourceUnused);
+ }
+
+ @Override
+ public String wellKnownAttributeKey() {
+ return key;
+ }
+
+ @Override
+ public String[] attributeKeys() {
+ return new String[]{key};
+ }
+
+ @Override
+ public Class<?>[] attributeTypes() {
+ return new Class[]{SwhLabel[].class};
+ }
+
+ @Override
+ public Object get(String s) {
+ if (this.key.equals(key))
+ return value;
+ throw new IllegalArgumentException();
+ }
+
+ @Override
+ public Object get() {
+ return value;
+ }
+
+ @Override
+ public Label copy() {
+ return new FixedWidthSwhLabelList(key, width, value.clone());
+ }
+
+ @Override
+ public int fixedWidth() {
+ return -1;
+ }
+
+ @Override
+ public String toString() {
+ return key + ":" + Arrays.toString(value) + " (width:" + width + ")";
+ }
+
+ @Override
+ public String toSpec() {
+ return this.getClass().getName() + "(" + key + "," + width + ")";
+ }
+}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Wed, Dec 18, 2:53 AM (2 d, 4 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3230862
Attached To
D4006: WIP: add permissions on edge labels
Event Timeline
Log In to Comment