diff --git a/api/client/swh/graph/client.py b/api/client/swh/graph/client.py index 82fa44a..3c27a22 100644 --- a/api/client/swh/graph/client.py +++ b/api/client/swh/graph/client.py @@ -1,56 +1,63 @@ # Copyright (C) 2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from enum import Enum from swh.core.api import SWHRemoteAPI class GraphAPIError(Exception): """Graph API Error""" def __str__(self): return ('An unexpected error occurred in the Graph backend: {}' .format(self.args)) class OutputFmt(Enum): ONLY_NODES = 1 ONLY_PATHS = 2 NODES_AND_PATHS = 3 class RemoteGraphClient(SWHRemoteAPI): """Client to the Software Heritage Graph.""" def __init__(self, url, timeout=None): super().__init__( api_exception=GraphAPIError, url=url, timeout=timeout) # Web API endpoints + def neighbors(self, src, edges="*", direction="forward"): + return self.get('neighbors/{}'.format(src), + params={ + 'edges': edges, + 'direction': direction + }) + def stats(self): return self.get('stats') def visit(self, src, edges="*", direction="forward", output_fmt=OutputFmt.NODES_AND_PATHS): subendpoint = "" if output_fmt is OutputFmt.ONLY_NODES: subendpoint = "/nodes" elif output_fmt is OutputFmt.ONLY_PATHS: subendpoint = "/paths" return self.get('visit{}/{}'.format(subendpoint, src), params={ 'edges': edges, 'direction': direction }) def walk(self, src, dst, edges="*", traversal="dfs", direction="forward"): return self.get('walk/{}/{}'.format(src, dst), params={ 'edges': edges, 'traversal': traversal, 'direction': direction }) diff --git a/api/server/src/main/java/org/softwareheritage/graph/App.java b/api/server/src/main/java/org/softwareheritage/graph/App.java index 01156f7..7e9f05c 100644 --- a/api/server/src/main/java/org/softwareheritage/graph/App.java +++ b/api/server/src/main/java/org/softwareheritage/graph/App.java @@ -1,104 +1,115 @@ package org.softwareheritage.graph; import java.io.IOException; import java.util.List; import java.util.Map; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.PropertyNamingStrategy; import io.javalin.Javalin; import io.javalin.http.Context; import io.javalin.plugin.json.JavalinJackson; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.SwhId; +import org.softwareheritage.graph.algo.Neighbors; import org.softwareheritage.graph.algo.Stats; import org.softwareheritage.graph.algo.Visit; import org.softwareheritage.graph.algo.Walk; public class App { public static void main(String[] args) throws IOException { String path = args[0]; Graph graph = new Graph(path); Stats stats = new Stats(path); // Clean up on exit Runtime.getRuntime().addShutdownHook(new Thread() { public void run() { try { graph.cleanUp(); } catch (IOException e) { System.out.println("Could not clean up graph on exit: " + e); } } }); // Configure Jackson JSON to use snake case naming style ObjectMapper objectMapper = JavalinJackson.getObjectMapper(); objectMapper.setPropertyNamingStrategy(PropertyNamingStrategy.SNAKE_CASE); JavalinJackson.configure(objectMapper); Javalin app = Javalin.create().start(5009); app.before("/stats/*", ctx -> { checkQueryStrings(ctx, ""); }); + app.before("/neighbors/*", ctx -> { checkQueryStrings(ctx, "direction|edges"); }); app.before("/visit/*", ctx -> { checkQueryStrings(ctx, "direction|edges"); }); app.before("/walk/*", ctx -> { checkQueryStrings(ctx, "direction|edges|traversal"); }); app.get("/stats/", ctx -> { ctx.json(stats); }); // Graph traversal endpoints // By default the traversal is a forward DFS using all edges + app.get("/neighbors/:src", ctx -> { + SwhId src = new SwhId(ctx.pathParam("src")); + String direction = ctx.queryParam("direction", "forward"); + String edgesFmt = ctx.queryParam("edges", "*"); + + Neighbors neighbors = new Neighbors(graph, src, edgesFmt, direction); + ctx.json(neighbors.getNeighbors()); + }); + app.get("/visit/:src", ctx -> { SwhId src = new SwhId(ctx.pathParam("src")); String direction = ctx.queryParam("direction", "forward"); String edgesFmt = ctx.queryParam("edges", "*"); Visit visit = new Visit(graph, src, edgesFmt, direction, Visit.OutputFmt.NODES_AND_PATHS); ctx.json(visit); }); app.get("/visit/nodes/:src", ctx -> { SwhId src = new SwhId(ctx.pathParam("src")); String direction = ctx.queryParam("direction", "forward"); String edgesFmt = ctx.queryParam("edges", "*"); Visit visit = new Visit(graph, src, edgesFmt, direction, Visit.OutputFmt.ONLY_NODES); ctx.json(visit.getNodes()); }); app.get("/visit/paths/:src", ctx -> { SwhId src = new SwhId(ctx.pathParam("src")); String direction = ctx.queryParam("direction", "forward"); String edgesFmt = ctx.queryParam("edges", "*"); Visit visit = new Visit(graph, src, edgesFmt, direction, Visit.OutputFmt.ONLY_PATHS); ctx.json(visit.getPaths()); }); app.get("/walk/:src/:dst", ctx -> { SwhId src = new SwhId(ctx.pathParam("src")); String dstFmt = ctx.pathParam("dst"); String direction = ctx.queryParam("direction", "forward"); String edgesFmt = ctx.queryParam("edges", "*"); String traversal = ctx.queryParam("traversal", "dfs"); Walk walk = new Walk(graph, src, dstFmt, edgesFmt, direction, traversal); ctx.json(walk.getPath()); }); app.exception(IllegalArgumentException.class, (e, ctx) -> { ctx.status(400); ctx.result(e.getMessage()); }); } private static void checkQueryStrings(Context ctx, String allowedFmt) { Map> queryParamMap = ctx.queryParamMap(); for (String key : queryParamMap.keySet()) { if (!key.matches(allowedFmt)) { throw new IllegalArgumentException("Unknown query string: " + key); } } } } diff --git a/api/server/src/main/java/org/softwareheritage/graph/algo/Neighbors.java b/api/server/src/main/java/org/softwareheritage/graph/algo/Neighbors.java new file mode 100644 index 0000000..133d110 --- /dev/null +++ b/api/server/src/main/java/org/softwareheritage/graph/algo/Neighbors.java @@ -0,0 +1,49 @@ +package org.softwareheritage.graph.algo; + +import java.util.ArrayList; + +import it.unimi.dsi.big.webgraph.LazyLongIterator; + +import org.softwareheritage.graph.AllowedEdges; +import org.softwareheritage.graph.Graph; +import org.softwareheritage.graph.SwhId; + +public class Neighbors { + Graph graph; + boolean useTransposed; + AllowedEdges edges; + + ArrayList neighbors; + + public Neighbors(Graph graph, SwhId src, String edgesFmt, String direction) { + if (!direction.matches("forward|backward")) { + throw new IllegalArgumentException("Unknown traversal direction: " + direction); + } + + this.graph = graph; + this.useTransposed = (direction.equals("backward")); + this.edges = new AllowedEdges(edgesFmt); + this.neighbors = new ArrayList(); + + iterateNeighbors(src); + } + + public ArrayList getNeighbors() { + return neighbors; + } + + private void iterateNeighbors(SwhId swhId) { + long nodeId = graph.getNodeId(swhId); + long degree = graph.degree(nodeId, useTransposed); + LazyLongIterator neighborsNodeIds = graph.neighbors(nodeId, useTransposed); + + while (degree-- > 0) { + long neighborNodeId = neighborsNodeIds.nextLong(); + SwhId neighborSwhId = graph.getSwhId(neighborNodeId); + if (edges.isAllowed(swhId.getType(), neighborSwhId.getType())) { + neighbors.add(neighborSwhId); + } + } + } +} + diff --git a/api/server/src/test/java/org/softwareheritage/graph/NeighborsTest.java b/api/server/src/test/java/org/softwareheritage/graph/NeighborsTest.java new file mode 100644 index 0000000..800301f --- /dev/null +++ b/api/server/src/test/java/org/softwareheritage/graph/NeighborsTest.java @@ -0,0 +1,122 @@ +package org.softwareheritage.graph; + +import java.util.ArrayList; + +import org.junit.Test; +import static org.junit.Assert.assertEquals; + +import org.softwareheritage.graph.Graph; +import org.softwareheritage.graph.GraphTest; +import org.softwareheritage.graph.SwhId; +import org.softwareheritage.graph.algo.Neighbors; + +public class NeighborsTest extends GraphTest { + @Test + public void zeroNeighbor() { + Graph graph = getGraph(); + ArrayList expectedNodes = new ArrayList<>(); + + SwhId src1 = new SwhId("swh:1:snp:0000000000000000000000000000000000000020"); + Neighbors neighbors1 = new Neighbors(graph, src1, "*", "backward"); + assertEquals(expectedNodes, neighbors1.getNeighbors()); + + SwhId src2 = new SwhId("swh:1:cnt:0000000000000000000000000000000000000004"); + Neighbors neighbors2 = new Neighbors(graph, src2, "*", "forward"); + assertEquals(expectedNodes, neighbors2.getNeighbors()); + + SwhId src3 = new SwhId("swh:1:cnt:0000000000000000000000000000000000000015"); + Neighbors neighbors3 = new Neighbors(graph, src3, "*", "forward"); + assertEquals(expectedNodes, neighbors3.getNeighbors()); + + SwhId src4 = new SwhId("swh:1:rel:0000000000000000000000000000000000000019"); + Neighbors neighbors4 = new Neighbors(graph, src4, "*", "backward"); + assertEquals(expectedNodes, neighbors4.getNeighbors()); + + SwhId src5 = new SwhId("swh:1:dir:0000000000000000000000000000000000000008"); + Neighbors neighbors5 = new Neighbors(graph, src5, "snp:*,rev:*,rel:*", "forward"); + assertEquals(expectedNodes, neighbors5.getNeighbors()); + } + + @Test + public void oneNeighbor() { + Graph graph = getGraph(); + + SwhId src1 = new SwhId("swh:1:rev:0000000000000000000000000000000000000003"); + Neighbors neighbors1 = new Neighbors(graph, src1, "*", "forward"); + ArrayList expectedNodes1 = new ArrayList<>(); + expectedNodes1.add(new SwhId("swh:1:dir:0000000000000000000000000000000000000002")); + assertEquals(expectedNodes1, neighbors1.getNeighbors()); + + SwhId src2 = new SwhId("swh:1:dir:0000000000000000000000000000000000000017"); + Neighbors neighbors2 = new Neighbors(graph, src2, "dir:cnt", "forward"); + ArrayList expectedNodes2 = new ArrayList<>(); + expectedNodes2.add(new SwhId("swh:1:cnt:0000000000000000000000000000000000000014")); + assertEquals(expectedNodes2, neighbors2.getNeighbors()); + + SwhId src3 = new SwhId("swh:1:dir:0000000000000000000000000000000000000012"); + Neighbors neighbors3 = new Neighbors(graph, src3, "*", "backward"); + ArrayList expectedNodes3 = new ArrayList<>(); + expectedNodes3.add(new SwhId("swh:1:rev:0000000000000000000000000000000000000013")); + assertEquals(expectedNodes3, neighbors3.getNeighbors()); + + SwhId src4 = new SwhId("swh:1:rev:0000000000000000000000000000000000000009"); + Neighbors neighbors4 = new Neighbors(graph, src4, "rev:rev", "backward"); + ArrayList expectedNodes4 = new ArrayList<>(); + expectedNodes4.add(new SwhId("swh:1:rev:0000000000000000000000000000000000000013")); + assertEquals(expectedNodes4, neighbors4.getNeighbors()); + } + + @Test + public void twoNeighbors() { + Graph graph = getGraph(); + + SwhId src1 = new SwhId("swh:1:snp:0000000000000000000000000000000000000020"); + Neighbors neighbors1 = new Neighbors(graph, src1, "*", "forward"); + ArrayList expectedNodes1 = new ArrayList<>(); + expectedNodes1.add(new SwhId("swh:1:rel:0000000000000000000000000000000000000010")); + expectedNodes1.add(new SwhId("swh:1:rev:0000000000000000000000000000000000000009")); + assertEquals(expectedNodes1, neighbors1.getNeighbors()); + + SwhId src2 = new SwhId("swh:1:dir:0000000000000000000000000000000000000008"); + Neighbors neighbors2 = new Neighbors(graph, src2, "dir:cnt", "forward"); + ArrayList expectedNodes2 = new ArrayList<>(); + expectedNodes2.add(new SwhId("swh:1:cnt:0000000000000000000000000000000000000001")); + expectedNodes2.add(new SwhId("swh:1:cnt:0000000000000000000000000000000000000007")); + assertEquals(expectedNodes2, neighbors2.getNeighbors()); + + SwhId src3 = new SwhId("swh:1:cnt:0000000000000000000000000000000000000001"); + Neighbors neighbors3 = new Neighbors(graph, src3, "*", "backward"); + ArrayList expectedNodes3 = new ArrayList<>(); + expectedNodes3.add(new SwhId("swh:1:dir:0000000000000000000000000000000000000008")); + expectedNodes3.add(new SwhId("swh:1:dir:0000000000000000000000000000000000000002")); + assertEquals(expectedNodes3, neighbors3.getNeighbors()); + + SwhId src4 = new SwhId("swh:1:rev:0000000000000000000000000000000000000009"); + Neighbors neighbors4 = new Neighbors(graph, src4, "rev:snp,rev:rel", "backward"); + ArrayList expectedNodes4 = new ArrayList<>(); + expectedNodes4.add(new SwhId("swh:1:snp:0000000000000000000000000000000000000020")); + expectedNodes4.add(new SwhId("swh:1:rel:0000000000000000000000000000000000000010")); + assertEquals(expectedNodes4, neighbors4.getNeighbors()); + } + + @Test + public void threeNeighbors() { + Graph graph = getGraph(); + + SwhId src1 = new SwhId("swh:1:dir:0000000000000000000000000000000000000008"); + Neighbors neighbors1 = new Neighbors(graph, src1, "*", "forward"); + ArrayList expectedNodes1 = new ArrayList<>(); + expectedNodes1.add(new SwhId("swh:1:dir:0000000000000000000000000000000000000006")); + expectedNodes1.add(new SwhId("swh:1:cnt:0000000000000000000000000000000000000001")); + expectedNodes1.add(new SwhId("swh:1:cnt:0000000000000000000000000000000000000007")); + assertEquals(expectedNodes1, neighbors1.getNeighbors()); + + SwhId src2 = new SwhId("swh:1:rev:0000000000000000000000000000000000000009"); + Neighbors neighbors2 = new Neighbors(graph, src2, "*", "backward"); + ArrayList expectedNodes2 = new ArrayList<>(); + expectedNodes2.add(new SwhId("swh:1:snp:0000000000000000000000000000000000000020")); + expectedNodes2.add(new SwhId("swh:1:rel:0000000000000000000000000000000000000010")); + expectedNodes2.add(new SwhId("swh:1:rev:0000000000000000000000000000000000000013")); + assertEquals(expectedNodes2, neighbors2.getNeighbors()); + } +} diff --git a/docs/api.rst b/docs/api.rst index dfcadbe..7588749 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -1,189 +1,218 @@ Graph traversal API =================== Terminology ----------- This API uses the following notions: - **Node**: a node in the `Software Heritage graph `_, represented by a `persistent identifier `_ (abbreviated as *SWH PID*, or simply *PID*). - **Node type**: the 3-letter specifier from the node PID (``cnt``, ``dir``, ``rel``, ``rev``, ``snp``), or ``*`` for all node types. - **Edge type**: a comma-separated list of ``src:dst`` strings where ``src`` and ``dst`` are node types, or ``*`` for all edge types. Examples ~~~~~~~~ - ``swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2`` the PID of a node of type content containing the full text of the GPL3 license. - ``swh:1:rev:f39d7d78b70e0f39facb1e4fab77ad3df5c52a35`` the PID of a node of type revision corresponding to the commit in Linux that merged the 'x86/urgent' branch on 31 December 2017. - ``"dir:dir,dir:cnt"`` node types allowing edges from directories to directories nodes, or directories to contents nodes. - ``"rev:rev,dir:*"`` node types allowing edges from revisions to revisions nodes, or from directories nodes. - ``"*:rel"`` node types allowing all edges to releases. +Neighbors +--------- + +.. http:get:: /graph/neighbors/:src + + Returns node direct neighbors (linked with exactly one edge) in the graph. + + :param string src: source node specified as a SWH PID + + :query string edges: edges types allowed to be listed as neighbors; default + to ``"*"`` + :query string direction: direction in which graph edges will be followed; + can be either ``forward`` or ``backward``, default to ``forward`` + + :statuscode 200: success + :statuscode 400: invalid query string provided + :statuscode 404: starting node cannot be found + + .. sourcecode:: http + + HTTP/1.1 200 OK + Content-Type: application/json + + [ + "swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2", + "swh:1:dir:d198bc9d7a6bcf6db04f476d29314f157507d505", + ... + ] + Walk ---- .. http:get:: /graph/walk/:src/:dst Performs a graph traversal and returns the first found path from source to destination (final destination node included). :param string src: starting node specified as a SWH PID :param string dst: destination node, either as a node PID or a node type. The traversal will stop at the first node encountered matching the desired destination. :query string edges: edges types the traversal can follow; default to ``"*"`` :query string traversal: traversal algorithm; can be either ``dfs`` or ``bfs``, default to ``dfs`` :query string direction: direction in which graph edges will be followed; can be either ``forward`` or ``backward``, default to ``forward`` :statuscode 200: success :statuscode 400: invalid query string provided :statuscode 404: starting node cannot be found .. sourcecode:: http HTTP/1.1 200 OK Content-Type: application/json [ "swh:1:rev:f39d7d78b70e0f39facb1e4fab77ad3df5c52a35", "swh:1:rev:52c90f2d32bfa7d6eccd66a56c44ace1f78fbadd", "swh:1:rev:cea92e843e40452c08ba313abc39f59efbb4c29c", "swh:1:rev:8d517bdfb57154b8a11d7f1682ecc0f79abf8e02", ... ] Visit ----- .. http:get:: /graph/visit/:src .. http:get:: /graph/visit/nodes/:src .. http:get:: /graph/visit/paths/:src Performs a graph traversal and returns explored nodes and/or paths (in the order of the traversal). :param string src: starting node specified as a SWH PID :query string edges: edges types the traversal can follow; default to ``"*"`` :query string direction: direction in which graph edges will be followed; can be either ``forward`` or ``backward``, default to ``forward`` :statuscode 200: success :statuscode 400: invalid query string provided :statuscode 404: starting node cannot be found .. sourcecode:: http GET /graph/visit/ HTTP/1.1 200 OK Content-Type: application/json { "paths": [ [ "swh:1:rev:f39d7d78b70e0f39facb1e4fab77ad3df5c52a35", "swh:1:rev:52c90f2d32bfa7d6eccd66a56c44ace1f78fbadd", ... ], [ "swh:1:rev:f39d7d78b70e0f39facb1e4fab77ad3df5c52a35", "swh:1:rev:a31e58e129f73ab5b04016330b13ed51fde7a961", ... ], ... ], "nodes": [ "swh:1:rev:f39d7d78b70e0f39facb1e4fab77ad3df5c52a35", "swh:1:rev:52c90f2d32bfa7d6eccd66a56c44ace1f78fbadd", ... "swh:1:rev:a31e58e129f73ab5b04016330b13ed51fde7a961", ... ] } .. sourcecode:: http GET /graph/visit/nodes/ HTTP/1.1 200 OK Content-Type: application/json [ "swh:1:rev:f39d7d78b70e0f39facb1e4fab77ad3df5c52a35", "swh:1:rev:52c90f2d32bfa7d6eccd66a56c44ace1f78fbadd", ... "swh:1:rev:a31e58e129f73ab5b04016330b13ed51fde7a961", ... ] .. sourcecode:: http GET /graph/visit/paths/ HTTP/1.1 200 OK Content-Type: application/json [ [ "swh:1:rev:f39d7d78b70e0f39facb1e4fab77ad3df5c52a35", "swh:1:rev:52c90f2d32bfa7d6eccd66a56c44ace1f78fbadd", ... ], [ "swh:1:rev:f39d7d78b70e0f39facb1e4fab77ad3df5c52a35", "swh:1:rev:a31e58e129f73ab5b04016330b13ed51fde7a961", ... ], ... ] Stats ----- .. http:get:: /graph/stats Returns statistics on the compressed graph. :statuscode 200: success .. sourcecode:: http HTTP/1.1 200 OK Content-Type: application/json { "counts": { "nodes": 16222788, "edges": 9907464 }, "ratios": { "compression": 0.367, "bits_per_node": 5.846, "bits_per_edge": 9.573, "avg_locality": 270.369 }, "indegree": { "min": 0, "max": 12382, "avg": 0.6107127825377487 }, "outdegree": { "min": 0, "max": 1, "avg": 0.6107127825377487 } }