Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9312073
D1726.id5821.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
8 KB
Subscribers
None
D1726.id5821.diff
View Options
diff --git a/api/server/src/main/java/org/softwareheritage/graph/Graph.java b/api/server/src/main/java/org/softwareheritage/graph/Graph.java
--- a/api/server/src/main/java/org/softwareheritage/graph/Graph.java
+++ b/api/server/src/main/java/org/softwareheritage/graph/Graph.java
@@ -5,20 +5,28 @@
import it.unimi.dsi.big.webgraph.BVGraph;
import it.unimi.dsi.big.webgraph.LazyLongIterator;
+import org.softwareheritage.graph.Node;
import org.softwareheritage.graph.SwhId;
import org.softwareheritage.graph.backend.NodeIdMap;
+import org.softwareheritage.graph.backend.NodeTypesMap;
public class Graph {
+ public static final String PID_TO_NODE = ".pid2node.csv";
+ public static final String NODE_TO_PID = ".node2pid.csv";
+ public static final String NODE_TO_TYPE = ".node2type.map";
+
BVGraph graph;
BVGraph graphTransposed;
String path;
NodeIdMap nodeIdMap;
+ NodeTypesMap nodeTypesMap;
public Graph(String path) throws IOException {
this.graph = BVGraph.load(path);
this.graphTransposed = BVGraph.load(path + "-transposed");
this.path = path;
this.nodeIdMap = new NodeIdMap(path, getNbNodes());
+ this.nodeTypesMap = new NodeTypesMap(path);
}
public void cleanUp() throws IOException {
@@ -37,6 +45,10 @@
return nodeIdMap.getSwhId(nodeId);
}
+ public Node.Type getNodeType(long nodeId) {
+ return nodeTypesMap.getType(nodeId);
+ }
+
public long getNbNodes() {
return graph.numNodes();
}
diff --git a/api/server/src/main/java/org/softwareheritage/graph/Node.java b/api/server/src/main/java/org/softwareheritage/graph/Node.java
--- a/api/server/src/main/java/org/softwareheritage/graph/Node.java
+++ b/api/server/src/main/java/org/softwareheritage/graph/Node.java
@@ -12,6 +12,22 @@
REV,
SNP;
+ public static Node.Type fromInt(int intType) {
+ switch (intType) {
+ case 0:
+ return CNT;
+ case 1:
+ return DIR;
+ case 2:
+ return REL;
+ case 3:
+ return REV;
+ case 4:
+ return SNP;
+ }
+ return null;
+ }
+
public static Node.Type fromStr(String strType) {
return Node.Type.valueOf(strType.toUpperCase());
}
diff --git a/api/server/src/main/java/org/softwareheritage/graph/backend/NodeIdMap.java b/api/server/src/main/java/org/softwareheritage/graph/backend/NodeIdMap.java
--- a/api/server/src/main/java/org/softwareheritage/graph/backend/NodeIdMap.java
+++ b/api/server/src/main/java/org/softwareheritage/graph/backend/NodeIdMap.java
@@ -2,6 +2,7 @@
import java.io.IOException;
+import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.SwhId;
import org.softwareheritage.graph.backend.MapFile;
@@ -21,12 +22,12 @@
// +1 are for spaces and end of lines
int swhToNodeLineLength = SWH_ID_LENGTH + 1 + NODE_ID_LENGTH + 1;
int nodeToSwhLineLength = SWH_ID_LENGTH + 1;
- this.swhToNodeMap = new MapFile(graphPath + ".swhToNodeMap.csv", swhToNodeLineLength);
- this.nodeToSwhMap = new MapFile(graphPath + ".nodeToSwhMap.csv", nodeToSwhLineLength);
+ this.swhToNodeMap = new MapFile(graphPath + Graph.PID_TO_NODE, swhToNodeLineLength);
+ this.nodeToSwhMap = new MapFile(graphPath + Graph.NODE_TO_PID, nodeToSwhLineLength);
}
// SWH id (string) -> WebGraph node id (long)
- // Each line in .swhToNode.csv is formatted as: swhId nodeId
+ // Each line in PID_TO_NODE is formatted as: swhId nodeId
// The file is sorted by swhId, hence we can binary search on swhId to get corresponding nodeId
public long getNodeId(SwhId swhId) {
long start = 0;
@@ -56,7 +57,7 @@
}
// WebGraph node id (long) -> SWH id (string)
- // Each line in .nodeToSwh.csv is formatted as: swhId
+ // Each line in NODE_TO_PID is formatted as: swhId
// The file is ordered by nodeId, meaning node0's swhId is at line 0, hence we can read the
// nodeId-th line to get corresponding swhId
public SwhId getSwhId(long nodeId) {
diff --git a/api/server/src/main/java/org/softwareheritage/graph/backend/NodeTypesMap.java b/api/server/src/main/java/org/softwareheritage/graph/backend/NodeTypesMap.java
new file mode 100644
--- /dev/null
+++ b/api/server/src/main/java/org/softwareheritage/graph/backend/NodeTypesMap.java
@@ -0,0 +1,26 @@
+package org.softwareheritage.graph.backend;
+
+import java.io.IOException;
+
+import it.unimi.dsi.fastutil.io.BinIO;
+import it.unimi.dsi.fastutil.longs.LongBigList;
+
+import org.softwareheritage.graph.Graph;
+import org.softwareheritage.graph.Node;
+
+public class NodeTypesMap {
+ LongBigList nodeTypesMap;
+
+ public NodeTypesMap(String graphPath) throws IOException {
+ try {
+ nodeTypesMap = (LongBigList) BinIO.loadObject(graphPath + Graph.NODE_TO_TYPE);
+ } catch (ClassNotFoundException e) {
+ throw new IllegalArgumentException("Unknown class object: " + e);
+ }
+ }
+
+ public Node.Type getType(long nodeId) {
+ long type = nodeTypesMap.getLong(nodeId);
+ return Node.Type.fromInt((int) type);
+ }
+}
diff --git a/api/server/src/main/java/org/softwareheritage/graph/backend/Setup.java b/api/server/src/main/java/org/softwareheritage/graph/backend/Setup.java
--- a/api/server/src/main/java/org/softwareheritage/graph/backend/Setup.java
+++ b/api/server/src/main/java/org/softwareheritage/graph/backend/Setup.java
@@ -9,15 +9,20 @@
import java.io.Writer;
import java.util.zip.GZIPInputStream;
+import it.unimi.dsi.bits.LongArrayBitVector;
import it.unimi.dsi.fastutil.Size64;
import it.unimi.dsi.fastutil.io.BinIO;
import it.unimi.dsi.fastutil.longs.LongBigArrays;
+import it.unimi.dsi.fastutil.longs.LongBigList;
import it.unimi.dsi.fastutil.objects.Object2LongFunction;
import it.unimi.dsi.fastutil.objects.ObjectBigArrays;
import it.unimi.dsi.io.FastBufferedReader;
import it.unimi.dsi.io.LineIterator;
-import org.softwareheritage.graph.backend.NodeIdMap;
+import org.softwareheritage.graph.Graph;
+import org.softwareheritage.graph.Node;
+import org.softwareheritage.graph.SwhId;
+import org.softwareheritage.graph.backend.NodeTypesMap;
public class Setup {
public static void main(String[] args) throws IOException {
@@ -63,23 +68,35 @@
LineIterator swhIdIterator = new LineIterator(buffer);
try (
- Writer swhToNodeMap = new BufferedWriter(new FileWriter(graphPath + ".swhToNodeMap.csv"));
- Writer nodeToSwhMap = new BufferedWriter(new FileWriter(graphPath + ".nodeToSwhMap.csv"))) {
+ Writer swhToNodeMap = new BufferedWriter(new FileWriter(graphPath + Graph.PID_TO_NODE));
+ Writer nodeToSwhMap = new BufferedWriter(new FileWriter(graphPath + Graph.NODE_TO_PID))) {
// nodeToSwhMap needs to write SWH id in order of node id, so use a temporary array
Object[][] nodeToSwhId = ObjectBigArrays.newBigArray(nbIds);
+ // To effectively run edge restriction during graph traversals, we store node id (long) -> SWH
+ // type map. This is represented as a bitmap using minimum number of bits per Node.Type.
+ final int log2NbTypes = (int) Math.ceil(Math.log(Node.Type.values().length) / Math.log(2));
+ final int nbBitsPerNodeType = log2NbTypes;
+ LongArrayBitVector nodeTypesBitVector = LongArrayBitVector.ofLength(nbBitsPerNodeType * nbIds);
+ LongBigList nodeTypesMap = nodeTypesBitVector.asLongBigList(nbBitsPerNodeType);
+
for (long iNode = 0; iNode < nbIds && swhIdIterator.hasNext(); iNode++) {
- String swhId = swhIdIterator.next().toString();
- long mphId = mphMap.getLong(swhId);
+ String strSwhId = swhIdIterator.next().toString();
+ long mphId = mphMap.getLong(strSwhId);
long nodeId = LongBigArrays.get(bfsMap, mphId);
String paddedNodeId = String.format("%0" + NodeIdMap.NODE_ID_LENGTH + "d", nodeId);
- String line = swhId + " " + paddedNodeId + "\n";
+ String line = strSwhId + " " + paddedNodeId + "\n";
swhToNodeMap.write(line);
- ObjectBigArrays.set(nodeToSwhId, nodeId, swhId);
+ ObjectBigArrays.set(nodeToSwhId, nodeId, strSwhId);
+
+ SwhId swhId = new SwhId(strSwhId);
+ nodeTypesMap.set(nodeId, swhId.getType().ordinal());
}
+ BinIO.storeObject(nodeTypesMap, graphPath + Graph.NODE_TO_TYPE);
+
for (long iNode = 0; iNode < nbIds; iNode++) {
String line = ObjectBigArrays.get(nodeToSwhId, iNode).toString() + "\n";
nodeToSwhMap.write(line);
diff --git a/api/server/src/test/dataset/.gitignore b/api/server/src/test/dataset/.gitignore
--- a/api/server/src/test/dataset/.gitignore
+++ b/api/server/src/test/dataset/.gitignore
@@ -16,8 +16,9 @@
*.stats
# Generated node ids mapping
-*.nodeToSwhMap.csv
-*.swhToNodeMap.csv
+*.node2pid.csv
+*.pid2node.csv
+*.node2type.csv
# Logs
stdout
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Wed, Jul 2, 10:42 AM (2 w, 2 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3220491
Attached To
D1726: Add node id -> node types bitmap
Event Timeline
Log In to Comment