diff --git a/docs/api.rst b/docs/api.rst index 00386ca..35b3e9e 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -1,248 +1,218 @@ Graph traversal API =================== Terminology ----------- This API uses the following notions: - **Node**: a node in the `Software Heritage graph `_, represented by a `persistent identifier `_ (abbreviated as *SWH PID*, or simply *PID*). - **Node type**: the 3-letter specifier from the node PID (``cnt``, ``dir``, ``rel``, ``rev``, ``snp``), or ``*`` for all node types. - **Edge type**: a comma-separated list of ``src:dst`` strings where ``src`` and ``dst`` are node types, or ``*`` for all edge types. Examples ~~~~~~~~ - ``swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2`` the PID of a node of type content containing the full text of the GPL3 license. - ``swh:1:rev:f39d7d78b70e0f39facb1e4fab77ad3df5c52a35`` the PID of a node of type revision corresponding to the commit in Linux that merged the 'x86/urgent' branch on 31 December 2017. - ``"dir:dir,dir:cnt"`` node types allowing edges from directories to directories nodes, or directories to contents nodes. - ``"rev:rev,dir:*"`` node types allowing edges from revisions to revisions nodes, or from directories nodes. - ``"*:rel"`` node types allowing all edges to releases. Leaves ------ .. http:get:: /graph/leaves/:src Performs a graph traversal and returns the leaves of the subgraph rooted at the specified source node. :param string src: source node specified as a SWH PID :query string edges: edges types the traversal can follow; default to ``"*"`` :query string direction: direction in which graph edges will be followed; can be either ``forward`` or ``backward``, default to ``forward`` :statuscode 200: success :statuscode 400: invalid query string provided :statuscode 404: starting node cannot be found .. sourcecode:: http HTTP/1.1 200 OK Content-Type: application/json [ "swh:1:cnt:669ac7c32292798644b21dbb5a0dc657125f444d", "swh:1:cnt:da4cb28febe66172a9fdf1a235525ae6c00cde1d", ... ] Neighbors --------- .. http:get:: /graph/neighbors/:src Returns node direct neighbors (linked with exactly one edge) in the graph. :param string src: source node specified as a SWH PID :query string edges: edges types allowed to be listed as neighbors; default to ``"*"`` :query string direction: direction in which graph edges will be followed; can be either ``forward`` or ``backward``, default to ``forward`` :statuscode 200: success :statuscode 400: invalid query string provided :statuscode 404: starting node cannot be found .. sourcecode:: http HTTP/1.1 200 OK Content-Type: application/json [ "swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2", "swh:1:dir:d198bc9d7a6bcf6db04f476d29314f157507d505", ... ] Walk ---- .. http:get:: /graph/walk/:src/:dst Performs a graph traversal and returns the first found path from source to destination (final destination node included). :param string src: starting node specified as a SWH PID :param string dst: destination node, either as a node PID or a node type. The traversal will stop at the first node encountered matching the desired destination. :query string edges: edges types the traversal can follow; default to ``"*"`` :query string traversal: traversal algorithm; can be either ``dfs`` or ``bfs``, default to ``dfs`` :query string direction: direction in which graph edges will be followed; can be either ``forward`` or ``backward``, default to ``forward`` :statuscode 200: success :statuscode 400: invalid query string provided :statuscode 404: starting node cannot be found .. sourcecode:: http HTTP/1.1 200 OK Content-Type: application/json [ "swh:1:rev:f39d7d78b70e0f39facb1e4fab77ad3df5c52a35", "swh:1:rev:52c90f2d32bfa7d6eccd66a56c44ace1f78fbadd", "swh:1:rev:cea92e843e40452c08ba313abc39f59efbb4c29c", "swh:1:rev:8d517bdfb57154b8a11d7f1682ecc0f79abf8e02", ... ] Visit ----- -.. http:get:: /graph/visit/:src .. http:get:: /graph/visit/nodes/:src .. http:get:: /graph/visit/paths/:src - Performs a graph traversal and returns explored nodes and/or paths (in the - order of the traversal). + Performs a graph traversal and returns explored nodes or paths (in the order + of the traversal). :param string src: starting node specified as a SWH PID :query string edges: edges types the traversal can follow; default to ``"*"`` :query string direction: direction in which graph edges will be followed; can be either ``forward`` or ``backward``, default to ``forward`` :statuscode 200: success :statuscode 400: invalid query string provided :statuscode 404: starting node cannot be found .. sourcecode:: http - GET /graph/visit/ - HTTP/1.1 200 OK - Content-Type: application/json - - { - "paths": [ - [ - "swh:1:rev:f39d7d78b70e0f39facb1e4fab77ad3df5c52a35", - "swh:1:rev:52c90f2d32bfa7d6eccd66a56c44ace1f78fbadd", - ... - ], - [ - "swh:1:rev:f39d7d78b70e0f39facb1e4fab77ad3df5c52a35", - "swh:1:rev:a31e58e129f73ab5b04016330b13ed51fde7a961", - ... - ], - ... - ], - "nodes": [ - "swh:1:rev:f39d7d78b70e0f39facb1e4fab77ad3df5c52a35", - "swh:1:rev:52c90f2d32bfa7d6eccd66a56c44ace1f78fbadd", - ... - "swh:1:rev:a31e58e129f73ab5b04016330b13ed51fde7a961", - ... - ] - } - - .. sourcecode:: http - GET /graph/visit/nodes/ HTTP/1.1 200 OK Content-Type: application/json [ "swh:1:rev:f39d7d78b70e0f39facb1e4fab77ad3df5c52a35", "swh:1:rev:52c90f2d32bfa7d6eccd66a56c44ace1f78fbadd", ... "swh:1:rev:a31e58e129f73ab5b04016330b13ed51fde7a961", ... ] .. sourcecode:: http GET /graph/visit/paths/ HTTP/1.1 200 OK Content-Type: application/json [ [ "swh:1:rev:f39d7d78b70e0f39facb1e4fab77ad3df5c52a35", "swh:1:rev:52c90f2d32bfa7d6eccd66a56c44ace1f78fbadd", ... ], [ "swh:1:rev:f39d7d78b70e0f39facb1e4fab77ad3df5c52a35", "swh:1:rev:a31e58e129f73ab5b04016330b13ed51fde7a961", ... ], ... ] Stats ----- .. http:get:: /graph/stats Returns statistics on the compressed graph. :statuscode 200: success .. sourcecode:: http HTTP/1.1 200 OK Content-Type: application/json { "counts": { "nodes": 16222788, "edges": 9907464 }, "ratios": { "compression": 0.367, "bits_per_node": 5.846, "bits_per_edge": 9.573, "avg_locality": 270.369 }, "indegree": { "min": 0, "max": 12382, "avg": 0.6107127825377487 }, "outdegree": { "min": 0, "max": 1, "avg": 0.6107127825377487 } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/App.java b/java/server/src/main/java/org/softwareheritage/graph/App.java index 841e37c..af56ca8 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/App.java +++ b/java/server/src/main/java/org/softwareheritage/graph/App.java @@ -1,132 +1,122 @@ package org.softwareheritage.graph; import java.io.IOException; import java.util.List; import java.util.Map; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.PropertyNamingStrategy; import io.javalin.Javalin; import io.javalin.http.Context; import io.javalin.plugin.json.JavalinJackson; import org.softwareheritage.graph.Endpoint; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.SwhId; import org.softwareheritage.graph.algo.Stats; /** * Entrypoint of the swh-graph server REST API. * * @author Thibault Allançon * @version 1.0 * @since 1.0 */ public class App { public static void main(String[] args) throws IOException { String path = args[0]; Graph graph = new Graph(path); Stats stats = new Stats(path); // Clean up on exit Runtime.getRuntime().addShutdownHook(new Thread() { public void run() { try { graph.cleanUp(); } catch (IOException e) { System.out.println("Could not clean up graph on exit: " + e); } } }); // Configure Jackson JSON to use snake case naming style ObjectMapper objectMapper = JavalinJackson.getObjectMapper(); objectMapper.setPropertyNamingStrategy(PropertyNamingStrategy.SNAKE_CASE); JavalinJackson.configure(objectMapper); Javalin app = Javalin.create().start(5009); app.before("/stats/*", ctx -> { checkQueryStrings(ctx, ""); }); app.before("/leaves/*", ctx -> { checkQueryStrings(ctx, "direction|edges"); }); app.before("/neighbors/*", ctx -> { checkQueryStrings(ctx, "direction|edges"); }); app.before("/visit/*", ctx -> { checkQueryStrings(ctx, "direction|edges"); }); app.before("/walk/*", ctx -> { checkQueryStrings(ctx, "direction|edges|traversal"); }); app.get("/stats/", ctx -> { ctx.json(stats); }); // Graph traversal endpoints // By default the traversal is a forward DFS using all edges app.get("/leaves/:src", ctx -> { SwhId src = new SwhId(ctx.pathParam("src")); String direction = ctx.queryParam("direction", "forward"); String edgesFmt = ctx.queryParam("edges", "*"); Endpoint endpoint = new Endpoint(graph, direction, edgesFmt); ctx.json(endpoint.leaves(src)); }); app.get("/neighbors/:src", ctx -> { SwhId src = new SwhId(ctx.pathParam("src")); String direction = ctx.queryParam("direction", "forward"); String edgesFmt = ctx.queryParam("edges", "*"); Endpoint endpoint = new Endpoint(graph, direction, edgesFmt); ctx.json(endpoint.neighbors(src)); }); - // TODO: anonymous class to return both nodes/paths? (waiting on node types map merged/refactor) - /*app.get("/visit/:src", ctx -> { - SwhId src = new SwhId(ctx.pathParam("src")); - String direction = ctx.queryParam("direction", "forward"); - String edgesFmt = ctx.queryParam("edges", "*"); - - Visit visit = new Visit(graph, src, edgesFmt, direction, Visit.OutputFmt.NODES_AND_PATHS); - ctx.json(visit); - });*/ - app.get("/visit/nodes/:src", ctx -> { SwhId src = new SwhId(ctx.pathParam("src")); String direction = ctx.queryParam("direction", "forward"); String edgesFmt = ctx.queryParam("edges", "*"); Endpoint endpoint = new Endpoint(graph, direction, edgesFmt); ctx.json(endpoint.visitNodes(src)); }); app.get("/visit/paths/:src", ctx -> { SwhId src = new SwhId(ctx.pathParam("src")); String direction = ctx.queryParam("direction", "forward"); String edgesFmt = ctx.queryParam("edges", "*"); Endpoint endpoint = new Endpoint(graph, direction, edgesFmt); ctx.json(endpoint.visitPaths(src)); }); app.get("/walk/:src/:dst", ctx -> { SwhId src = new SwhId(ctx.pathParam("src")); String dstFmt = ctx.pathParam("dst"); String direction = ctx.queryParam("direction", "forward"); String edgesFmt = ctx.queryParam("edges", "*"); String algorithm = ctx.queryParam("traversal", "dfs"); Endpoint endpoint = new Endpoint(graph, direction, edgesFmt); ctx.json(endpoint.walk(src, dstFmt, algorithm)); }); app.exception(IllegalArgumentException.class, (e, ctx) -> { ctx.status(400); ctx.result(e.getMessage()); }); } private static void checkQueryStrings(Context ctx, String allowedFmt) { Map> queryParamMap = ctx.queryParamMap(); for (String key : queryParamMap.keySet()) { if (!key.matches(allowedFmt)) { throw new IllegalArgumentException("Unknown query string: " + key); } } } } diff --git a/swh/graph/client.py b/swh/graph/client.py index d6e3bb6..d74438a 100644 --- a/swh/graph/client.py +++ b/swh/graph/client.py @@ -1,70 +1,64 @@ # Copyright (C) 2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from enum import Enum from swh.core.api import SWHRemoteAPI class GraphAPIError(Exception): """Graph API Error""" def __str__(self): return ('An unexpected error occurred in the Graph backend: {}' .format(self.args)) -class OutputFmt(Enum): - ONLY_NODES = 1 - ONLY_PATHS = 2 - NODES_AND_PATHS = 3 - - class RemoteGraphClient(SWHRemoteAPI): """Client to the Software Heritage Graph.""" def __init__(self, url, timeout=None): super().__init__( api_exception=GraphAPIError, url=url, timeout=timeout) # Web API endpoints def leaves(self, src, edges="*", direction="forward"): return self.get('leaves/{}'.format(src), params={ 'edges': edges, 'direction': direction }) def neighbors(self, src, edges="*", direction="forward"): return self.get('neighbors/{}'.format(src), params={ 'edges': edges, 'direction': direction }) def stats(self): return self.get('stats') - def visit(self, src, edges="*", direction="forward", - output_fmt=OutputFmt.NODES_AND_PATHS): - subendpoint = "" - if output_fmt is OutputFmt.ONLY_NODES: - subendpoint = "/nodes" - elif output_fmt is OutputFmt.ONLY_PATHS: - subendpoint = "/paths" + def visit_nodes(self, src, edges="*", direction="forward"): + return self.get('visit/nodes/{}'.format(src), + params={ + 'edges': edges, + 'direction': direction + }) - return self.get('visit{}/{}'.format(subendpoint, src), + def visit_paths(self, src, edges="*", direction="forward"): + return self.get('visit/paths/{}'.format(src), params={ 'edges': edges, 'direction': direction }) def walk(self, src, dst, edges="*", traversal="dfs", direction="forward"): return self.get('walk/{}/{}'.format(src, dst), params={ 'edges': edges, 'traversal': traversal, 'direction': direction })