Page MenuHomeSoftware Heritage

No OneTemporary

diff --git a/api/server/pom.xml b/api/server/pom.xml
index 137e9bb..d68bb29 100644
--- a/api/server/pom.xml
+++ b/api/server/pom.xml
@@ -1,116 +1,121 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.softwareheritage.graph</groupId>
<artifactId>graph</artifactId>
<version>1.0-ALPHA</version>
<name>graph</name>
<url>https://www.softwareheritage.org/</url>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
</properties>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>io.javalin</groupId>
<artifactId>javalin</artifactId>
<version>2.8.0</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-simple</artifactId>
<version>1.7.26</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
<version>2.9.8</version>
</dependency>
<dependency>
<groupId>it.unimi.dsi</groupId>
<artifactId>webgraph-big</artifactId>
<version>3.5.0</version>
</dependency>
+ <dependency>
+ <groupId>it.unimi.dsi</groupId>
+ <artifactId>fastutil</artifactId>
+ <version>8.2.2</version>
+ </dependency>
</dependencies>
<build>
<pluginManagement><!-- lock down plugins versions to avoid using Maven defaults (may be moved to parent pom) -->
<plugins>
<!-- clean lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#clean_Lifecycle -->
<plugin>
<artifactId>maven-clean-plugin</artifactId>
<version>3.1.0</version>
</plugin>
<!-- default lifecycle, jar packaging: see https://maven.apache.org/ref/current/maven-core/default-bindings.html#Plugin_bindings_for_jar_packaging -->
<plugin>
<artifactId>maven-resources-plugin</artifactId>
<version>3.0.2</version>
</plugin>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.8.0</version>
</plugin>
<plugin>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.22.1</version>
</plugin>
<plugin>
<artifactId>maven-jar-plugin</artifactId>
<version>3.0.2</version>
</plugin>
<plugin>
<artifactId>maven-install-plugin</artifactId>
<version>2.5.2</version>
</plugin>
<plugin>
<artifactId>maven-deploy-plugin</artifactId>
<version>2.8.2</version>
</plugin>
<!-- site lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#site_Lifecycle -->
<plugin>
<artifactId>maven-site-plugin</artifactId>
<version>3.7.1</version>
</plugin>
<plugin>
<artifactId>maven-project-info-reports-plugin</artifactId>
<version>3.0.0</version>
</plugin>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<archive>
<manifest>
<mainClass>org.softwareheritage.graph.App</mainClass>
</manifest>
</archive>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
<executions>
<execution>
<id>make-assembly</id> <!-- this is used for inheritance merges -->
<phase>package</phase> <!-- bind to the packaging phase -->
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</pluginManagement>
</build>
</project>
diff --git a/api/server/src/main/java/org/softwareheritage/graph/App.java b/api/server/src/main/java/org/softwareheritage/graph/App.java
index ffad3ad..f93e671 100644
--- a/api/server/src/main/java/org/softwareheritage/graph/App.java
+++ b/api/server/src/main/java/org/softwareheritage/graph/App.java
@@ -1,18 +1,23 @@
package org.softwareheritage.graph;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+
import io.javalin.Javalin;
-import org.softwareheritage.graph.GraphAPI;
+import org.softwareheritage.graph.Dataset;
+import org.softwareheritage.graph.Graph;
public class App
{
public static void main(String[] args)
{
- GraphAPI graph = new GraphAPI("/path/to/graph");
+ Path path = Paths.get(args[0]);
+ Graph graph = new Graph(path.toString());
Javalin app = Javalin.create().start(5010);
app.get("/nb_nodes", ctx -> {
ctx.json(graph.nbNodes());
});
}
}
diff --git a/api/server/src/main/java/org/softwareheritage/graph/Dataset.java b/api/server/src/main/java/org/softwareheritage/graph/Dataset.java
new file mode 100644
index 0000000..dda4917
--- /dev/null
+++ b/api/server/src/main/java/org/softwareheritage/graph/Dataset.java
@@ -0,0 +1,115 @@
+package org.softwareheritage.graph;
+
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.zip.GZIPInputStream;
+
+import it.unimi.dsi.big.webgraph.BVGraph;
+import it.unimi.dsi.big.webgraph.LazyLongIterator;
+import it.unimi.dsi.fastutil.io.BinIO;
+import it.unimi.dsi.fastutil.longs.LongBigArrays;
+import it.unimi.dsi.fastutil.objects.Object2LongFunction;
+import it.unimi.dsi.io.FastBufferedReader;
+import it.unimi.dsi.io.LineIterator;
+import it.unimi.dsi.lang.MutableString;
+
+public class Dataset
+{
+ public enum Name {
+ DIR_TO_DIR,
+ DIR_TO_FILE,
+ DIR_TO_REV,
+ ORIGIN_TO_SNAPSHOT,
+ RELEASE_TO_OBJ,
+ REV_TO_DIR,
+ REV_TO_REV,
+ SNAPSHOT_TO_OBJ
+ }
+
+ BVGraph graph;
+ String path;
+ HashMap<String, Long> hashToNode;
+ HashMap<Long, String> nodeToHash;
+
+ public Dataset(String datasetPath)
+ {
+ try {
+ this.graph = BVGraph.load(datasetPath);
+ this.path = datasetPath;
+ setupNodesMapping();
+ } catch (Exception e) {
+ System.out.println("[WARNING] Could not load dataset " + datasetPath + ": " + e);
+ }
+ }
+
+ void setupNodesMapping() throws IOException, ClassNotFoundException
+ {
+ this.hashToNode = new HashMap<String, Long>();
+ this.nodeToHash = new HashMap<Long, String>();
+
+ // First mapping: SWH hexhash (strings) <=> WebGraph MPH (longs)
+ HashMap<Long, String> mphToHash = new HashMap<Long, String>();
+ Object2LongFunction<String> mphMap =
+ (Object2LongFunction<String>) BinIO.loadObject(path + ".mph");
+
+ InputStream nodeFile = new FileInputStream(path + ".nodes.csv.gz");
+ Collection<MutableString> hashes =
+ new LineIterator(
+ new FastBufferedReader(
+ new InputStreamReader(
+ new GZIPInputStream(nodeFile), "UTF-8"))).allLines();
+
+ for (MutableString h : hashes)
+ {
+ String hash = new String(h.toString());
+ long mph = mphMap.getLong(hash);
+ mphToHash.put(mph, hash);
+ }
+
+ // Second mapping: WebGraph MPH (longs) <=> BFS ordering (longs)
+ long n = mphMap.size();
+ long[][] bfsMap = LongBigArrays.newBigArray(n);
+ long loaded = BinIO.loadLongs(path + ".order", bfsMap);
+ if (loaded != n)
+ throw new IllegalArgumentException("Graph contains " + n + " nodes, but read " + loaded);
+
+ // Create final mapping: SWH hexhash (strings) <=> BFS ordering (longs)
+ for (long id = 0; id < n; id++)
+ {
+ String hash = mphToHash.get(id);
+ long node = LongBigArrays.get(bfsMap, id);
+
+ hashToNode.put(hash, node);
+ nodeToHash.put(node, hash);
+ }
+ }
+
+ public String getPath()
+ {
+ return path;
+ }
+
+ public long getNode(String hash)
+ {
+ return hashToNode.get(hash);
+ }
+
+ public String getHash(long node)
+ {
+ return nodeToHash.get(node);
+ }
+
+ public LazyLongIterator successors(long node)
+ {
+ return graph.successors(node);
+ }
+
+ public long outdegree(long node)
+ {
+ return graph.outdegree(node);
+ }
+}
diff --git a/api/server/src/main/java/org/softwareheritage/graph/Graph.java b/api/server/src/main/java/org/softwareheritage/graph/Graph.java
new file mode 100644
index 0000000..8c02922
--- /dev/null
+++ b/api/server/src/main/java/org/softwareheritage/graph/Graph.java
@@ -0,0 +1,33 @@
+package org.softwareheritage.graph;
+
+import java.util.EnumMap;
+
+import org.softwareheritage.graph.Dataset;
+
+public class Graph
+{
+ EnumMap<Dataset.Name, Dataset> graph;
+ String path;
+
+ public Graph(String graphPath)
+ {
+ this.graph = new EnumMap<Dataset.Name, Dataset>(Dataset.Name.class);
+ this.path = graphPath;
+ if (!path.endsWith("/"))
+ path += "/";
+
+ for (Dataset.Name dataset : Dataset.Name.values())
+ addDataset(dataset);
+ }
+
+ public void addDataset(Dataset.Name dataset)
+ {
+ String datasetPath = path + dataset.name().toLowerCase();
+ graph.put(dataset, new Dataset(datasetPath));
+ }
+
+ public Dataset getDataset(Dataset.Name dataset)
+ {
+ return graph.get(dataset);
+ }
+}
diff --git a/api/server/src/main/java/org/softwareheritage/graph/GraphAPI.java b/api/server/src/main/java/org/softwareheritage/graph/GraphAPI.java
deleted file mode 100644
index c16ff19..0000000
--- a/api/server/src/main/java/org/softwareheritage/graph/GraphAPI.java
+++ /dev/null
@@ -1,27 +0,0 @@
-package org.softwareheritage.graph;
-
-import java.io.IOException;
-
-import it.unimi.dsi.webgraph.BVGraph;
-
-public class GraphAPI
-{
- String graphName;
- BVGraph graph;
-
- public GraphAPI(String graphName)
- {
- this.graphName = graphName;
- try {
- this.graph = BVGraph.load(graphName);
- }
- catch (IOException e) {
- throw new RuntimeException(e);
- }
- }
-
- public int nbNodes()
- {
- return graph.numNodes();
- }
-}

File Metadata

Mime Type
text/x-diff
Expires
Fri, Jul 4, 3:28 PM (1 w, 1 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3451100

Event Timeline