diff --git a/docs/docker.rst b/docs/docker.rst index b628749..d277867 100644 --- a/docs/docker.rst +++ b/docs/docker.rst @@ -1,75 +1,78 @@ Graph Docker environment ======================== Build ----- .. code:: bash $ git clone https://forge.softwareheritage.org/source/swh-graph.git $ cd swh-graph $ docker build --tag swh-graph dockerfiles Run --- Given a graph ``g`` specified by: - ``g.edges.csv.gz``: gzip-compressed csv file with one edge per line, as a "SRC_ID SPACE DST_ID" string, where identifiers are the :ref:`persistent-identifiers` of each node. - ``g.nodes.csv.gz``: sorted list of unique node identifiers appearing in the corresponding ``g.edges.csv.gz`` file. The format is a gzip-compressed csv file with one persistent identifier per line. .. code:: bash $ docker run -ti \ --volume /PATH/TO/GRAPH/:/srv/softwareheritage/graph/data \ --publish 127.0.0.1:5009:5009 \ swh-graph:latest \ bash Where ``/PATH/TO/GRAPH`` is a directory containing the ``g.edges.csv.gz`` and ``g.nodes.csv.gz`` files. By default, when entering the container the current working directory will be ``/srv/softwareheritage/graph``; all relative paths found below are intended to be relative to that dir. Graph compression ~~~~~~~~~~~~~~~~~ To compress the graph: .. code:: bash $ app/scripts/compress_graph.sh --lib lib/ --input data/g Warning: very large graphs may need a bigger batch size parameter for WebGraph internals (you can specify a value when running the compression script using: ``--batch-size 1000000000``). Node identifier mappings ~~~~~~~~~~~~~~~~~~~~~~~~ To dump the mapping files (i.e., various node id <-> other info mapping files, in either ``.csv.gz`` or ad-hoc ``.map`` format): .. code:: bash $ java -cp app/swh-graph.jar \ org.softwareheritage.graph.backend.Setup data/compressed/g Graph server ~~~~~~~~~~~~ To start the swh-graph server: .. code:: bash $ java -cp app/swh-graph.jar \ org.softwareheritage.graph.App data/compressed/g + +To specify the port on which the server will run, use the `--port` or `-p` flag +(default is 5009). diff --git a/java/server/pom.xml b/java/server/pom.xml index 5cd22a0..544be2f 100644 --- a/java/server/pom.xml +++ b/java/server/pom.xml @@ -1,146 +1,151 @@ 4.0.0 org.softwareheritage.graph swh-graph 1.0 swh-graph https://www.softwareheritage.org/ UTF-8 11 ch.qos.logback logback-classic 1.2.3 junit junit 4.11 test org.hamcrest hamcrest 2.1 test io.javalin javalin 3.0.0 org.slf4j slf4j-simple 1.7.26 com.fasterxml.jackson.core jackson-databind 2.9.8 it.unimi.dsi webgraph-big 3.5.1 it.unimi.dsi fastutil 8.2.2 + + com.martiansoftware + jsap + 2.1 + maven-clean-plugin 3.1.0 maven-resources-plugin 3.0.2 maven-compiler-plugin 3.8.0 -verbose -Xlint:all maven-surefire-plugin 2.22.1 maven-jar-plugin 3.0.2 maven-install-plugin 2.5.2 maven-deploy-plugin 2.8.2 maven-site-plugin 3.7.1 maven-project-info-reports-plugin 3.0.0 maven-assembly-plugin org.softwareheritage.graph.App jar-with-dependencies make-assembly package single org.apache.maven.plugins maven-javadoc-plugin 3.1.1 diff --git a/java/server/src/main/java/org/softwareheritage/graph/App.java b/java/server/src/main/java/org/softwareheritage/graph/App.java index af56ca8..654597c 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/App.java +++ b/java/server/src/main/java/org/softwareheritage/graph/App.java @@ -1,122 +1,151 @@ package org.softwareheritage.graph; import java.io.IOException; import java.util.List; import java.util.Map; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.PropertyNamingStrategy; +import com.martiansoftware.jsap.FlaggedOption; +import com.martiansoftware.jsap.JSAP; +import com.martiansoftware.jsap.JSAPException; +import com.martiansoftware.jsap.JSAPResult; +import com.martiansoftware.jsap.Parameter; +import com.martiansoftware.jsap.SimpleJSAP; +import com.martiansoftware.jsap.UnflaggedOption; import io.javalin.Javalin; import io.javalin.http.Context; import io.javalin.plugin.json.JavalinJackson; import org.softwareheritage.graph.Endpoint; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.SwhId; import org.softwareheritage.graph.algo.Stats; /** * Entrypoint of the swh-graph server REST API. * * @author Thibault Allançon * @version 1.0 * @since 1.0 */ public class App { - public static void main(String[] args) throws IOException { - String path = args[0]; - Graph graph = new Graph(path); - Stats stats = new Stats(path); + public static void main(String[] args) throws IOException, JSAPException { + SimpleJSAP jsap = new SimpleJSAP( + App.class.getName(), + "Server to load and query a compressed graph representation of Software Heritage archive.", + new Parameter[] { + new FlaggedOption("port", JSAP.INTEGER_PARSER, "5009", JSAP.NOT_REQUIRED, 'p', "port", + "Binding port of the server."), + new UnflaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, + JSAP.NOT_GREEDY, "The basename of the compressed graph."), + } + ); + + JSAPResult config = jsap.parse(args); + if (jsap.messagePrinted()) { + System.exit(1); + } + + String graphPath = config.getString("graphPath"); + int port = config.getInt("port"); + + startServer(graphPath, port); + } + + private static void startServer(String graphPath, int port) throws IOException { + Graph graph = new Graph(graphPath); + Stats stats = new Stats(graphPath); // Clean up on exit Runtime.getRuntime().addShutdownHook(new Thread() { public void run() { try { graph.cleanUp(); } catch (IOException e) { System.out.println("Could not clean up graph on exit: " + e); } } }); // Configure Jackson JSON to use snake case naming style ObjectMapper objectMapper = JavalinJackson.getObjectMapper(); objectMapper.setPropertyNamingStrategy(PropertyNamingStrategy.SNAKE_CASE); JavalinJackson.configure(objectMapper); - Javalin app = Javalin.create().start(5009); + Javalin app = Javalin.create().start(port); app.before("/stats/*", ctx -> { checkQueryStrings(ctx, ""); }); app.before("/leaves/*", ctx -> { checkQueryStrings(ctx, "direction|edges"); }); app.before("/neighbors/*", ctx -> { checkQueryStrings(ctx, "direction|edges"); }); app.before("/visit/*", ctx -> { checkQueryStrings(ctx, "direction|edges"); }); app.before("/walk/*", ctx -> { checkQueryStrings(ctx, "direction|edges|traversal"); }); app.get("/stats/", ctx -> { ctx.json(stats); }); // Graph traversal endpoints // By default the traversal is a forward DFS using all edges app.get("/leaves/:src", ctx -> { SwhId src = new SwhId(ctx.pathParam("src")); String direction = ctx.queryParam("direction", "forward"); String edgesFmt = ctx.queryParam("edges", "*"); Endpoint endpoint = new Endpoint(graph, direction, edgesFmt); ctx.json(endpoint.leaves(src)); }); app.get("/neighbors/:src", ctx -> { SwhId src = new SwhId(ctx.pathParam("src")); String direction = ctx.queryParam("direction", "forward"); String edgesFmt = ctx.queryParam("edges", "*"); Endpoint endpoint = new Endpoint(graph, direction, edgesFmt); ctx.json(endpoint.neighbors(src)); }); app.get("/visit/nodes/:src", ctx -> { SwhId src = new SwhId(ctx.pathParam("src")); String direction = ctx.queryParam("direction", "forward"); String edgesFmt = ctx.queryParam("edges", "*"); Endpoint endpoint = new Endpoint(graph, direction, edgesFmt); ctx.json(endpoint.visitNodes(src)); }); app.get("/visit/paths/:src", ctx -> { SwhId src = new SwhId(ctx.pathParam("src")); String direction = ctx.queryParam("direction", "forward"); String edgesFmt = ctx.queryParam("edges", "*"); Endpoint endpoint = new Endpoint(graph, direction, edgesFmt); ctx.json(endpoint.visitPaths(src)); }); app.get("/walk/:src/:dst", ctx -> { SwhId src = new SwhId(ctx.pathParam("src")); String dstFmt = ctx.pathParam("dst"); String direction = ctx.queryParam("direction", "forward"); String edgesFmt = ctx.queryParam("edges", "*"); String algorithm = ctx.queryParam("traversal", "dfs"); Endpoint endpoint = new Endpoint(graph, direction, edgesFmt); ctx.json(endpoint.walk(src, dstFmt, algorithm)); }); app.exception(IllegalArgumentException.class, (e, ctx) -> { ctx.status(400); ctx.result(e.getMessage()); }); } private static void checkQueryStrings(Context ctx, String allowedFmt) { Map> queryParamMap = ctx.queryParamMap(); for (String key : queryParamMap.keySet()) { if (!key.matches(allowedFmt)) { throw new IllegalArgumentException("Unknown query string: " + key); } } } }