Page MenuHomeSoftware Heritage

No OneTemporary

diff --git a/docs/docker.rst b/docs/docker.rst
index b628749..d277867 100644
--- a/docs/docker.rst
+++ b/docs/docker.rst
@@ -1,75 +1,78 @@
Graph Docker environment
========================
Build
-----
.. code:: bash
$ git clone https://forge.softwareheritage.org/source/swh-graph.git
$ cd swh-graph
$ docker build --tag swh-graph dockerfiles
Run
---
Given a graph ``g`` specified by:
- ``g.edges.csv.gz``: gzip-compressed csv file with one edge per line, as a
"SRC_ID SPACE DST_ID" string, where identifiers are the
:ref:`persistent-identifiers` of each node.
- ``g.nodes.csv.gz``: sorted list of unique node identifiers appearing in the
corresponding ``g.edges.csv.gz`` file. The format is a gzip-compressed csv
file with one persistent identifier per line.
.. code:: bash
$ docker run -ti \
--volume /PATH/TO/GRAPH/:/srv/softwareheritage/graph/data \
--publish 127.0.0.1:5009:5009 \
swh-graph:latest \
bash
Where ``/PATH/TO/GRAPH`` is a directory containing the ``g.edges.csv.gz`` and
``g.nodes.csv.gz`` files. By default, when entering the container the current
working directory will be ``/srv/softwareheritage/graph``; all relative paths
found below are intended to be relative to that dir.
Graph compression
~~~~~~~~~~~~~~~~~
To compress the graph:
.. code:: bash
$ app/scripts/compress_graph.sh --lib lib/ --input data/g
Warning: very large graphs may need a bigger batch size parameter for WebGraph
internals (you can specify a value when running the compression script using:
``--batch-size 1000000000``).
Node identifier mappings
~~~~~~~~~~~~~~~~~~~~~~~~
To dump the mapping files (i.e., various node id <-> other info mapping files,
in either ``.csv.gz`` or ad-hoc ``.map`` format):
.. code:: bash
$ java -cp app/swh-graph.jar \
org.softwareheritage.graph.backend.Setup data/compressed/g
Graph server
~~~~~~~~~~~~
To start the swh-graph server:
.. code:: bash
$ java -cp app/swh-graph.jar \
org.softwareheritage.graph.App data/compressed/g
+
+To specify the port on which the server will run, use the `--port` or `-p` flag
+(default is 5009).
diff --git a/java/server/pom.xml b/java/server/pom.xml
index 5cd22a0..544be2f 100644
--- a/java/server/pom.xml
+++ b/java/server/pom.xml
@@ -1,146 +1,151 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.softwareheritage.graph</groupId>
<artifactId>swh-graph</artifactId>
<version>1.0</version>
<name>swh-graph</name>
<url>https://www.softwareheritage.org/</url>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.release>11</maven.compiler.release>
</properties>
<dependencies>
<dependency>
<groupId>ch.qos.logback</groupId>
<artifactId>logback-classic</artifactId>
<version>1.2.3</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.hamcrest</groupId>
<artifactId>hamcrest</artifactId>
<version>2.1</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>io.javalin</groupId>
<artifactId>javalin</artifactId>
<version>3.0.0</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-simple</artifactId>
<version>1.7.26</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
<version>2.9.8</version>
</dependency>
<dependency>
<groupId>it.unimi.dsi</groupId>
<artifactId>webgraph-big</artifactId>
<version>3.5.1</version>
</dependency>
<dependency>
<groupId>it.unimi.dsi</groupId>
<artifactId>fastutil</artifactId>
<version>8.2.2</version>
</dependency>
+ <dependency>
+ <groupId>com.martiansoftware</groupId>
+ <artifactId>jsap</artifactId>
+ <version>2.1</version>
+ </dependency>
</dependencies>
<build>
<pluginManagement><!-- lock down plugins versions to avoid using Maven defaults (may be moved to parent pom) -->
<plugins>
<!-- clean lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#clean_Lifecycle -->
<plugin>
<artifactId>maven-clean-plugin</artifactId>
<version>3.1.0</version>
</plugin>
<!-- default lifecycle, jar packaging: see https://maven.apache.org/ref/current/maven-core/default-bindings.html#Plugin_bindings_for_jar_packaging -->
<plugin>
<artifactId>maven-resources-plugin</artifactId>
<version>3.0.2</version>
</plugin>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.8.0</version>
<configuration>
<compilerArgs>
<arg>-verbose</arg>
<arg>-Xlint:all</arg>
</compilerArgs>
</configuration>
</plugin>
<plugin>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.22.1</version>
</plugin>
<plugin>
<artifactId>maven-jar-plugin</artifactId>
<version>3.0.2</version>
</plugin>
<plugin>
<artifactId>maven-install-plugin</artifactId>
<version>2.5.2</version>
</plugin>
<plugin>
<artifactId>maven-deploy-plugin</artifactId>
<version>2.8.2</version>
</plugin>
<!-- site lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#site_Lifecycle -->
<plugin>
<artifactId>maven-site-plugin</artifactId>
<version>3.7.1</version>
</plugin>
<plugin>
<artifactId>maven-project-info-reports-plugin</artifactId>
<version>3.0.0</version>
</plugin>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<archive>
<manifest>
<mainClass>org.softwareheritage.graph.App</mainClass>
</manifest>
</archive>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
<executions>
<execution>
<id>make-assembly</id> <!-- this is used for inheritance merges -->
<phase>package</phase> <!-- bind to the packaging phase -->
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</pluginManagement>
</build>
<reporting>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-javadoc-plugin</artifactId>
<version>3.1.1</version>
</plugin>
</plugins>
</reporting>
</project>
diff --git a/java/server/src/main/java/org/softwareheritage/graph/App.java b/java/server/src/main/java/org/softwareheritage/graph/App.java
index af56ca8..654597c 100644
--- a/java/server/src/main/java/org/softwareheritage/graph/App.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/App.java
@@ -1,122 +1,151 @@
package org.softwareheritage.graph;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.PropertyNamingStrategy;
+import com.martiansoftware.jsap.FlaggedOption;
+import com.martiansoftware.jsap.JSAP;
+import com.martiansoftware.jsap.JSAPException;
+import com.martiansoftware.jsap.JSAPResult;
+import com.martiansoftware.jsap.Parameter;
+import com.martiansoftware.jsap.SimpleJSAP;
+import com.martiansoftware.jsap.UnflaggedOption;
import io.javalin.Javalin;
import io.javalin.http.Context;
import io.javalin.plugin.json.JavalinJackson;
import org.softwareheritage.graph.Endpoint;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.SwhId;
import org.softwareheritage.graph.algo.Stats;
/**
* Entrypoint of the swh-graph server REST API.
*
* @author Thibault Allançon
* @version 1.0
* @since 1.0
*/
public class App {
- public static void main(String[] args) throws IOException {
- String path = args[0];
- Graph graph = new Graph(path);
- Stats stats = new Stats(path);
+ public static void main(String[] args) throws IOException, JSAPException {
+ SimpleJSAP jsap = new SimpleJSAP(
+ App.class.getName(),
+ "Server to load and query a compressed graph representation of Software Heritage archive.",
+ new Parameter[] {
+ new FlaggedOption("port", JSAP.INTEGER_PARSER, "5009", JSAP.NOT_REQUIRED, 'p', "port",
+ "Binding port of the server."),
+ new UnflaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED,
+ JSAP.NOT_GREEDY, "The basename of the compressed graph."),
+ }
+ );
+
+ JSAPResult config = jsap.parse(args);
+ if (jsap.messagePrinted()) {
+ System.exit(1);
+ }
+
+ String graphPath = config.getString("graphPath");
+ int port = config.getInt("port");
+
+ startServer(graphPath, port);
+ }
+
+ private static void startServer(String graphPath, int port) throws IOException {
+ Graph graph = new Graph(graphPath);
+ Stats stats = new Stats(graphPath);
// Clean up on exit
Runtime.getRuntime().addShutdownHook(new Thread() {
public void run() {
try {
graph.cleanUp();
} catch (IOException e) {
System.out.println("Could not clean up graph on exit: " + e);
}
}
});
// Configure Jackson JSON to use snake case naming style
ObjectMapper objectMapper = JavalinJackson.getObjectMapper();
objectMapper.setPropertyNamingStrategy(PropertyNamingStrategy.SNAKE_CASE);
JavalinJackson.configure(objectMapper);
- Javalin app = Javalin.create().start(5009);
+ Javalin app = Javalin.create().start(port);
app.before("/stats/*", ctx -> { checkQueryStrings(ctx, ""); });
app.before("/leaves/*", ctx -> { checkQueryStrings(ctx, "direction|edges"); });
app.before("/neighbors/*", ctx -> { checkQueryStrings(ctx, "direction|edges"); });
app.before("/visit/*", ctx -> { checkQueryStrings(ctx, "direction|edges"); });
app.before("/walk/*", ctx -> { checkQueryStrings(ctx, "direction|edges|traversal"); });
app.get("/stats/", ctx -> { ctx.json(stats); });
// Graph traversal endpoints
// By default the traversal is a forward DFS using all edges
app.get("/leaves/:src", ctx -> {
SwhId src = new SwhId(ctx.pathParam("src"));
String direction = ctx.queryParam("direction", "forward");
String edgesFmt = ctx.queryParam("edges", "*");
Endpoint endpoint = new Endpoint(graph, direction, edgesFmt);
ctx.json(endpoint.leaves(src));
});
app.get("/neighbors/:src", ctx -> {
SwhId src = new SwhId(ctx.pathParam("src"));
String direction = ctx.queryParam("direction", "forward");
String edgesFmt = ctx.queryParam("edges", "*");
Endpoint endpoint = new Endpoint(graph, direction, edgesFmt);
ctx.json(endpoint.neighbors(src));
});
app.get("/visit/nodes/:src", ctx -> {
SwhId src = new SwhId(ctx.pathParam("src"));
String direction = ctx.queryParam("direction", "forward");
String edgesFmt = ctx.queryParam("edges", "*");
Endpoint endpoint = new Endpoint(graph, direction, edgesFmt);
ctx.json(endpoint.visitNodes(src));
});
app.get("/visit/paths/:src", ctx -> {
SwhId src = new SwhId(ctx.pathParam("src"));
String direction = ctx.queryParam("direction", "forward");
String edgesFmt = ctx.queryParam("edges", "*");
Endpoint endpoint = new Endpoint(graph, direction, edgesFmt);
ctx.json(endpoint.visitPaths(src));
});
app.get("/walk/:src/:dst", ctx -> {
SwhId src = new SwhId(ctx.pathParam("src"));
String dstFmt = ctx.pathParam("dst");
String direction = ctx.queryParam("direction", "forward");
String edgesFmt = ctx.queryParam("edges", "*");
String algorithm = ctx.queryParam("traversal", "dfs");
Endpoint endpoint = new Endpoint(graph, direction, edgesFmt);
ctx.json(endpoint.walk(src, dstFmt, algorithm));
});
app.exception(IllegalArgumentException.class, (e, ctx) -> {
ctx.status(400);
ctx.result(e.getMessage());
});
}
private static void checkQueryStrings(Context ctx, String allowedFmt) {
Map<String, List<String>> queryParamMap = ctx.queryParamMap();
for (String key : queryParamMap.keySet()) {
if (!key.matches(allowedFmt)) {
throw new IllegalArgumentException("Unknown query string: " + key);
}
}
}
}

File Metadata

Mime Type
text/x-diff
Expires
Fri, Jul 4, 3:29 PM (1 w, 1 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3253493

Event Timeline