Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9345699
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
11 KB
Subscribers
None
View Options
diff --git a/api/server/pom.xml b/api/server/pom.xml
index 137e9bb..d68bb29 100644
--- a/api/server/pom.xml
+++ b/api/server/pom.xml
@@ -1,116 +1,121 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.softwareheritage.graph</groupId>
<artifactId>graph</artifactId>
<version>1.0-ALPHA</version>
<name>graph</name>
<url>https://www.softwareheritage.org/</url>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
</properties>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>io.javalin</groupId>
<artifactId>javalin</artifactId>
<version>2.8.0</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-simple</artifactId>
<version>1.7.26</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
<version>2.9.8</version>
</dependency>
<dependency>
<groupId>it.unimi.dsi</groupId>
<artifactId>webgraph-big</artifactId>
<version>3.5.0</version>
</dependency>
+ <dependency>
+ <groupId>it.unimi.dsi</groupId>
+ <artifactId>fastutil</artifactId>
+ <version>8.2.2</version>
+ </dependency>
</dependencies>
<build>
<pluginManagement><!-- lock down plugins versions to avoid using Maven defaults (may be moved to parent pom) -->
<plugins>
<!-- clean lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#clean_Lifecycle -->
<plugin>
<artifactId>maven-clean-plugin</artifactId>
<version>3.1.0</version>
</plugin>
<!-- default lifecycle, jar packaging: see https://maven.apache.org/ref/current/maven-core/default-bindings.html#Plugin_bindings_for_jar_packaging -->
<plugin>
<artifactId>maven-resources-plugin</artifactId>
<version>3.0.2</version>
</plugin>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.8.0</version>
</plugin>
<plugin>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.22.1</version>
</plugin>
<plugin>
<artifactId>maven-jar-plugin</artifactId>
<version>3.0.2</version>
</plugin>
<plugin>
<artifactId>maven-install-plugin</artifactId>
<version>2.5.2</version>
</plugin>
<plugin>
<artifactId>maven-deploy-plugin</artifactId>
<version>2.8.2</version>
</plugin>
<!-- site lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#site_Lifecycle -->
<plugin>
<artifactId>maven-site-plugin</artifactId>
<version>3.7.1</version>
</plugin>
<plugin>
<artifactId>maven-project-info-reports-plugin</artifactId>
<version>3.0.0</version>
</plugin>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<archive>
<manifest>
<mainClass>org.softwareheritage.graph.App</mainClass>
</manifest>
</archive>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
<executions>
<execution>
<id>make-assembly</id> <!-- this is used for inheritance merges -->
<phase>package</phase> <!-- bind to the packaging phase -->
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</pluginManagement>
</build>
</project>
diff --git a/api/server/src/main/java/org/softwareheritage/graph/App.java b/api/server/src/main/java/org/softwareheritage/graph/App.java
index ffad3ad..f93e671 100644
--- a/api/server/src/main/java/org/softwareheritage/graph/App.java
+++ b/api/server/src/main/java/org/softwareheritage/graph/App.java
@@ -1,18 +1,23 @@
package org.softwareheritage.graph;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+
import io.javalin.Javalin;
-import org.softwareheritage.graph.GraphAPI;
+import org.softwareheritage.graph.Dataset;
+import org.softwareheritage.graph.Graph;
public class App
{
public static void main(String[] args)
{
- GraphAPI graph = new GraphAPI("/path/to/graph");
+ Path path = Paths.get(args[0]);
+ Graph graph = new Graph(path.toString());
Javalin app = Javalin.create().start(5010);
app.get("/nb_nodes", ctx -> {
ctx.json(graph.nbNodes());
});
}
}
diff --git a/api/server/src/main/java/org/softwareheritage/graph/Dataset.java b/api/server/src/main/java/org/softwareheritage/graph/Dataset.java
new file mode 100644
index 0000000..dda4917
--- /dev/null
+++ b/api/server/src/main/java/org/softwareheritage/graph/Dataset.java
@@ -0,0 +1,115 @@
+package org.softwareheritage.graph;
+
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.zip.GZIPInputStream;
+
+import it.unimi.dsi.big.webgraph.BVGraph;
+import it.unimi.dsi.big.webgraph.LazyLongIterator;
+import it.unimi.dsi.fastutil.io.BinIO;
+import it.unimi.dsi.fastutil.longs.LongBigArrays;
+import it.unimi.dsi.fastutil.objects.Object2LongFunction;
+import it.unimi.dsi.io.FastBufferedReader;
+import it.unimi.dsi.io.LineIterator;
+import it.unimi.dsi.lang.MutableString;
+
+public class Dataset
+{
+ public enum Name {
+ DIR_TO_DIR,
+ DIR_TO_FILE,
+ DIR_TO_REV,
+ ORIGIN_TO_SNAPSHOT,
+ RELEASE_TO_OBJ,
+ REV_TO_DIR,
+ REV_TO_REV,
+ SNAPSHOT_TO_OBJ
+ }
+
+ BVGraph graph;
+ String path;
+ HashMap<String, Long> hashToNode;
+ HashMap<Long, String> nodeToHash;
+
+ public Dataset(String datasetPath)
+ {
+ try {
+ this.graph = BVGraph.load(datasetPath);
+ this.path = datasetPath;
+ setupNodesMapping();
+ } catch (Exception e) {
+ System.out.println("[WARNING] Could not load dataset " + datasetPath + ": " + e);
+ }
+ }
+
+ void setupNodesMapping() throws IOException, ClassNotFoundException
+ {
+ this.hashToNode = new HashMap<String, Long>();
+ this.nodeToHash = new HashMap<Long, String>();
+
+ // First mapping: SWH hexhash (strings) <=> WebGraph MPH (longs)
+ HashMap<Long, String> mphToHash = new HashMap<Long, String>();
+ Object2LongFunction<String> mphMap =
+ (Object2LongFunction<String>) BinIO.loadObject(path + ".mph");
+
+ InputStream nodeFile = new FileInputStream(path + ".nodes.csv.gz");
+ Collection<MutableString> hashes =
+ new LineIterator(
+ new FastBufferedReader(
+ new InputStreamReader(
+ new GZIPInputStream(nodeFile), "UTF-8"))).allLines();
+
+ for (MutableString h : hashes)
+ {
+ String hash = new String(h.toString());
+ long mph = mphMap.getLong(hash);
+ mphToHash.put(mph, hash);
+ }
+
+ // Second mapping: WebGraph MPH (longs) <=> BFS ordering (longs)
+ long n = mphMap.size();
+ long[][] bfsMap = LongBigArrays.newBigArray(n);
+ long loaded = BinIO.loadLongs(path + ".order", bfsMap);
+ if (loaded != n)
+ throw new IllegalArgumentException("Graph contains " + n + " nodes, but read " + loaded);
+
+ // Create final mapping: SWH hexhash (strings) <=> BFS ordering (longs)
+ for (long id = 0; id < n; id++)
+ {
+ String hash = mphToHash.get(id);
+ long node = LongBigArrays.get(bfsMap, id);
+
+ hashToNode.put(hash, node);
+ nodeToHash.put(node, hash);
+ }
+ }
+
+ public String getPath()
+ {
+ return path;
+ }
+
+ public long getNode(String hash)
+ {
+ return hashToNode.get(hash);
+ }
+
+ public String getHash(long node)
+ {
+ return nodeToHash.get(node);
+ }
+
+ public LazyLongIterator successors(long node)
+ {
+ return graph.successors(node);
+ }
+
+ public long outdegree(long node)
+ {
+ return graph.outdegree(node);
+ }
+}
diff --git a/api/server/src/main/java/org/softwareheritage/graph/Graph.java b/api/server/src/main/java/org/softwareheritage/graph/Graph.java
new file mode 100644
index 0000000..8c02922
--- /dev/null
+++ b/api/server/src/main/java/org/softwareheritage/graph/Graph.java
@@ -0,0 +1,33 @@
+package org.softwareheritage.graph;
+
+import java.util.EnumMap;
+
+import org.softwareheritage.graph.Dataset;
+
+public class Graph
+{
+ EnumMap<Dataset.Name, Dataset> graph;
+ String path;
+
+ public Graph(String graphPath)
+ {
+ this.graph = new EnumMap<Dataset.Name, Dataset>(Dataset.Name.class);
+ this.path = graphPath;
+ if (!path.endsWith("/"))
+ path += "/";
+
+ for (Dataset.Name dataset : Dataset.Name.values())
+ addDataset(dataset);
+ }
+
+ public void addDataset(Dataset.Name dataset)
+ {
+ String datasetPath = path + dataset.name().toLowerCase();
+ graph.put(dataset, new Dataset(datasetPath));
+ }
+
+ public Dataset getDataset(Dataset.Name dataset)
+ {
+ return graph.get(dataset);
+ }
+}
diff --git a/api/server/src/main/java/org/softwareheritage/graph/GraphAPI.java b/api/server/src/main/java/org/softwareheritage/graph/GraphAPI.java
deleted file mode 100644
index c16ff19..0000000
--- a/api/server/src/main/java/org/softwareheritage/graph/GraphAPI.java
+++ /dev/null
@@ -1,27 +0,0 @@
-package org.softwareheritage.graph;
-
-import java.io.IOException;
-
-import it.unimi.dsi.webgraph.BVGraph;
-
-public class GraphAPI
-{
- String graphName;
- BVGraph graph;
-
- public GraphAPI(String graphName)
- {
- this.graphName = graphName;
- try {
- this.graph = BVGraph.load(graphName);
- }
- catch (IOException e) {
- throw new RuntimeException(e);
- }
- }
-
- public int nbNodes()
- {
- return graph.numNodes();
- }
-}
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Fri, Jul 4, 3:28 PM (1 w, 1 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3451100
Attached To
rDGRPH Compressed graph representation
Event Timeline
Log In to Comment