diff --git a/api/client/swh/graph/client.py b/api/client/swh/graph/client.py index 3c27a22..d6e3bb6 100644 --- a/api/client/swh/graph/client.py +++ b/api/client/swh/graph/client.py @@ -1,63 +1,70 @@ # Copyright (C) 2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from enum import Enum from swh.core.api import SWHRemoteAPI class GraphAPIError(Exception): """Graph API Error""" def __str__(self): return ('An unexpected error occurred in the Graph backend: {}' .format(self.args)) class OutputFmt(Enum): ONLY_NODES = 1 ONLY_PATHS = 2 NODES_AND_PATHS = 3 class RemoteGraphClient(SWHRemoteAPI): """Client to the Software Heritage Graph.""" def __init__(self, url, timeout=None): super().__init__( api_exception=GraphAPIError, url=url, timeout=timeout) # Web API endpoints + def leaves(self, src, edges="*", direction="forward"): + return self.get('leaves/{}'.format(src), + params={ + 'edges': edges, + 'direction': direction + }) + def neighbors(self, src, edges="*", direction="forward"): return self.get('neighbors/{}'.format(src), params={ 'edges': edges, 'direction': direction }) def stats(self): return self.get('stats') def visit(self, src, edges="*", direction="forward", output_fmt=OutputFmt.NODES_AND_PATHS): subendpoint = "" if output_fmt is OutputFmt.ONLY_NODES: subendpoint = "/nodes" elif output_fmt is OutputFmt.ONLY_PATHS: subendpoint = "/paths" return self.get('visit{}/{}'.format(subendpoint, src), params={ 'edges': edges, 'direction': direction }) def walk(self, src, dst, edges="*", traversal="dfs", direction="forward"): return self.get('walk/{}/{}'.format(src, dst), params={ 'edges': edges, 'traversal': traversal, 'direction': direction }) diff --git a/api/server/src/main/java/org/softwareheritage/graph/App.java b/api/server/src/main/java/org/softwareheritage/graph/App.java index 7e9f05c..49780cc 100644 --- a/api/server/src/main/java/org/softwareheritage/graph/App.java +++ b/api/server/src/main/java/org/softwareheritage/graph/App.java @@ -1,115 +1,126 @@ package org.softwareheritage.graph; import java.io.IOException; import java.util.List; import java.util.Map; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.PropertyNamingStrategy; import io.javalin.Javalin; import io.javalin.http.Context; import io.javalin.plugin.json.JavalinJackson; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.SwhId; +import org.softwareheritage.graph.algo.Leaves; import org.softwareheritage.graph.algo.Neighbors; import org.softwareheritage.graph.algo.Stats; import org.softwareheritage.graph.algo.Visit; import org.softwareheritage.graph.algo.Walk; public class App { public static void main(String[] args) throws IOException { String path = args[0]; Graph graph = new Graph(path); Stats stats = new Stats(path); // Clean up on exit Runtime.getRuntime().addShutdownHook(new Thread() { public void run() { try { graph.cleanUp(); } catch (IOException e) { System.out.println("Could not clean up graph on exit: " + e); } } }); // Configure Jackson JSON to use snake case naming style ObjectMapper objectMapper = JavalinJackson.getObjectMapper(); objectMapper.setPropertyNamingStrategy(PropertyNamingStrategy.SNAKE_CASE); JavalinJackson.configure(objectMapper); Javalin app = Javalin.create().start(5009); app.before("/stats/*", ctx -> { checkQueryStrings(ctx, ""); }); + app.before("/leaves/*", ctx -> { checkQueryStrings(ctx, "direction|edges"); }); app.before("/neighbors/*", ctx -> { checkQueryStrings(ctx, "direction|edges"); }); app.before("/visit/*", ctx -> { checkQueryStrings(ctx, "direction|edges"); }); app.before("/walk/*", ctx -> { checkQueryStrings(ctx, "direction|edges|traversal"); }); app.get("/stats/", ctx -> { ctx.json(stats); }); // Graph traversal endpoints // By default the traversal is a forward DFS using all edges + app.get("/leaves/:src", ctx -> { + SwhId src = new SwhId(ctx.pathParam("src")); + String direction = ctx.queryParam("direction", "forward"); + String edgesFmt = ctx.queryParam("edges", "*"); + + Leaves leaves = new Leaves(graph, src, edgesFmt, direction); + ctx.json(leaves.getLeaves()); + }); + app.get("/neighbors/:src", ctx -> { SwhId src = new SwhId(ctx.pathParam("src")); String direction = ctx.queryParam("direction", "forward"); String edgesFmt = ctx.queryParam("edges", "*"); Neighbors neighbors = new Neighbors(graph, src, edgesFmt, direction); ctx.json(neighbors.getNeighbors()); }); app.get("/visit/:src", ctx -> { SwhId src = new SwhId(ctx.pathParam("src")); String direction = ctx.queryParam("direction", "forward"); String edgesFmt = ctx.queryParam("edges", "*"); Visit visit = new Visit(graph, src, edgesFmt, direction, Visit.OutputFmt.NODES_AND_PATHS); ctx.json(visit); }); app.get("/visit/nodes/:src", ctx -> { SwhId src = new SwhId(ctx.pathParam("src")); String direction = ctx.queryParam("direction", "forward"); String edgesFmt = ctx.queryParam("edges", "*"); Visit visit = new Visit(graph, src, edgesFmt, direction, Visit.OutputFmt.ONLY_NODES); ctx.json(visit.getNodes()); }); app.get("/visit/paths/:src", ctx -> { SwhId src = new SwhId(ctx.pathParam("src")); String direction = ctx.queryParam("direction", "forward"); String edgesFmt = ctx.queryParam("edges", "*"); Visit visit = new Visit(graph, src, edgesFmt, direction, Visit.OutputFmt.ONLY_PATHS); ctx.json(visit.getPaths()); }); app.get("/walk/:src/:dst", ctx -> { SwhId src = new SwhId(ctx.pathParam("src")); String dstFmt = ctx.pathParam("dst"); String direction = ctx.queryParam("direction", "forward"); String edgesFmt = ctx.queryParam("edges", "*"); String traversal = ctx.queryParam("traversal", "dfs"); Walk walk = new Walk(graph, src, dstFmt, edgesFmt, direction, traversal); ctx.json(walk.getPath()); }); app.exception(IllegalArgumentException.class, (e, ctx) -> { ctx.status(400); ctx.result(e.getMessage()); }); } private static void checkQueryStrings(Context ctx, String allowedFmt) { Map> queryParamMap = ctx.queryParamMap(); for (String key : queryParamMap.keySet()) { if (!key.matches(allowedFmt)) { throw new IllegalArgumentException("Unknown query string: " + key); } } } } diff --git a/api/server/src/main/java/org/softwareheritage/graph/algo/Leaves.java b/api/server/src/main/java/org/softwareheritage/graph/algo/Leaves.java new file mode 100644 index 0000000..40b9062 --- /dev/null +++ b/api/server/src/main/java/org/softwareheritage/graph/algo/Leaves.java @@ -0,0 +1,64 @@ +package org.softwareheritage.graph.algo; + +import java.util.ArrayList; + +import it.unimi.dsi.big.webgraph.LazyLongIterator; +import it.unimi.dsi.bits.LongArrayBitVector; + +import org.softwareheritage.graph.AllowedEdges; +import org.softwareheritage.graph.Graph; +import org.softwareheritage.graph.Node; +import org.softwareheritage.graph.SwhId; + +public class Leaves { + Graph graph; + boolean useTransposed; + AllowedEdges edges; + + ArrayList leaves; + LongArrayBitVector visited; + + public Leaves(Graph graph, SwhId src, String edgesFmt, String direction) { + if (!direction.matches("forward|backward")) { + throw new IllegalArgumentException("Unknown traversal direction: " + direction); + } + + this.graph = graph; + this.useTransposed = (direction.equals("backward")); + this.edges = new AllowedEdges(edgesFmt); + this.leaves = new ArrayList(); + this.visited = LongArrayBitVector.ofLength(graph.getNbNodes()); + + long nodeId = graph.getNodeId(src); + dfs(nodeId); + } + + public ArrayList getLeaves() { + return leaves; + } + + private void dfs(long currentNodeId) { + visited.set(currentNodeId); + SwhId currentSwhId = graph.getSwhId(currentNodeId); + + long degree = graph.degree(currentNodeId, useTransposed); + LazyLongIterator neighbors = graph.neighbors(currentNodeId, useTransposed); + long neighborsCnt = 0; + + while (degree-- > 0) { + long neighborNodeId = neighbors.nextLong(); + Node.Type currentNodeType = currentSwhId.getType(); + Node.Type neighborNodeType = graph.getSwhId(neighborNodeId).getType(); + if (edges.isAllowed(currentNodeType, neighborNodeType)) { + neighborsCnt++; + if (!visited.getBoolean(neighborNodeId)) { + dfs(neighborNodeId); + } + } + } + + if (neighborsCnt == 0) { + leaves.add(currentSwhId); + } + } +} diff --git a/api/server/src/test/java/org/softwareheritage/graph/LeavesTest.java b/api/server/src/test/java/org/softwareheritage/graph/LeavesTest.java new file mode 100644 index 0000000..725a4a7 --- /dev/null +++ b/api/server/src/test/java/org/softwareheritage/graph/LeavesTest.java @@ -0,0 +1,103 @@ +package org.softwareheritage.graph; + +import java.util.ArrayList; + +import org.junit.Test; +import static org.junit.Assert.assertEquals; + +import org.softwareheritage.graph.Graph; +import org.softwareheritage.graph.GraphTest; +import org.softwareheritage.graph.SwhId; +import org.softwareheritage.graph.algo.Leaves; + +public class LeavesTest extends GraphTest { + @Test + public void forwardFromSnp() { + Graph graph = getGraph(); + SwhId src = new SwhId("swh:1:snp:0000000000000000000000000000000000000020"); + Leaves leaves = new Leaves(graph, src, "*", "forward"); + + ArrayList expectedLeaves = new ArrayList<>(); + expectedLeaves.add(new SwhId("swh:1:cnt:0000000000000000000000000000000000000001")); + expectedLeaves.add(new SwhId("swh:1:cnt:0000000000000000000000000000000000000004")); + expectedLeaves.add(new SwhId("swh:1:cnt:0000000000000000000000000000000000000005")); + expectedLeaves.add(new SwhId("swh:1:cnt:0000000000000000000000000000000000000007")); + + assertEquals(expectedLeaves, leaves.getLeaves()); + } + + @Test + public void forwardFromRel() { + Graph graph = getGraph(); + SwhId src = new SwhId("swh:1:rel:0000000000000000000000000000000000000019"); + Leaves leaves = new Leaves(graph, src, "*", "forward"); + + ArrayList expectedLeaves = new ArrayList<>(); + expectedLeaves.add(new SwhId("swh:1:cnt:0000000000000000000000000000000000000015")); + expectedLeaves.add(new SwhId("swh:1:cnt:0000000000000000000000000000000000000014")); + expectedLeaves.add(new SwhId("swh:1:cnt:0000000000000000000000000000000000000001")); + expectedLeaves.add(new SwhId("swh:1:cnt:0000000000000000000000000000000000000004")); + expectedLeaves.add(new SwhId("swh:1:cnt:0000000000000000000000000000000000000005")); + expectedLeaves.add(new SwhId("swh:1:cnt:0000000000000000000000000000000000000007")); + expectedLeaves.add(new SwhId("swh:1:cnt:0000000000000000000000000000000000000011")); + + assertEquals(expectedLeaves, leaves.getLeaves()); + } + + @Test + public void backwardFromLeaf() { + Graph graph = getGraph(); + + SwhId src1 = new SwhId("swh:1:cnt:0000000000000000000000000000000000000015"); + Leaves leaves1 = new Leaves(graph, src1, "*", "backward"); + ArrayList expectedLeaves1 = new ArrayList<>(); + expectedLeaves1.add(new SwhId("swh:1:rel:0000000000000000000000000000000000000019")); + assertEquals(expectedLeaves1, leaves1.getLeaves()); + + SwhId src2 = new SwhId("swh:1:cnt:0000000000000000000000000000000000000004"); + Leaves leaves2 = new Leaves(graph, src2, "*", "backward"); + ArrayList expectedLeaves2 = new ArrayList<>(); + expectedLeaves2.add(new SwhId("swh:1:snp:0000000000000000000000000000000000000020")); + expectedLeaves2.add(new SwhId("swh:1:rel:0000000000000000000000000000000000000019")); + assertEquals(expectedLeaves2, leaves2.getLeaves()); + } + + @Test + public void forwardRevToRevOnly() { + Graph graph = getGraph(); + SwhId src = new SwhId("swh:1:rev:0000000000000000000000000000000000000018"); + Leaves leaves = new Leaves(graph, src, "rev:rev", "forward"); + + ArrayList expectedLeaves = new ArrayList<>(); + expectedLeaves.add(new SwhId("swh:1:rev:0000000000000000000000000000000000000003")); + + assertEquals(expectedLeaves, leaves.getLeaves()); + } + + @Test + public void forwardDirToAll() { + Graph graph = getGraph(); + SwhId src = new SwhId("swh:1:dir:0000000000000000000000000000000000000008"); + Leaves leaves = new Leaves(graph, src, "dir:*", "forward"); + + ArrayList expectedLeaves = new ArrayList<>(); + expectedLeaves.add(new SwhId("swh:1:cnt:0000000000000000000000000000000000000004")); + expectedLeaves.add(new SwhId("swh:1:cnt:0000000000000000000000000000000000000005")); + expectedLeaves.add(new SwhId("swh:1:cnt:0000000000000000000000000000000000000001")); + expectedLeaves.add(new SwhId("swh:1:cnt:0000000000000000000000000000000000000007")); + + assertEquals(expectedLeaves, leaves.getLeaves()); + } + + @Test + public void backwardCntToDirDirToDir() { + Graph graph = getGraph(); + SwhId src = new SwhId("swh:1:cnt:0000000000000000000000000000000000000005"); + Leaves leaves = new Leaves(graph, src, "cnt:dir,dir:dir", "backward"); + + ArrayList expectedLeaves = new ArrayList<>(); + expectedLeaves.add(new SwhId("swh:1:dir:0000000000000000000000000000000000000012")); + + assertEquals(expectedLeaves, leaves.getLeaves()); + } +} diff --git a/docs/api.rst b/docs/api.rst index 7588749..00386ca 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -1,218 +1,248 @@ Graph traversal API =================== Terminology ----------- This API uses the following notions: - **Node**: a node in the `Software Heritage graph `_, represented by a `persistent identifier `_ (abbreviated as *SWH PID*, or simply *PID*). - **Node type**: the 3-letter specifier from the node PID (``cnt``, ``dir``, ``rel``, ``rev``, ``snp``), or ``*`` for all node types. - **Edge type**: a comma-separated list of ``src:dst`` strings where ``src`` and ``dst`` are node types, or ``*`` for all edge types. Examples ~~~~~~~~ - ``swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2`` the PID of a node of type content containing the full text of the GPL3 license. - ``swh:1:rev:f39d7d78b70e0f39facb1e4fab77ad3df5c52a35`` the PID of a node of type revision corresponding to the commit in Linux that merged the 'x86/urgent' branch on 31 December 2017. - ``"dir:dir,dir:cnt"`` node types allowing edges from directories to directories nodes, or directories to contents nodes. - ``"rev:rev,dir:*"`` node types allowing edges from revisions to revisions nodes, or from directories nodes. - ``"*:rel"`` node types allowing all edges to releases. +Leaves +------ + +.. http:get:: /graph/leaves/:src + + Performs a graph traversal and returns the leaves of the subgraph rooted at + the specified source node. + + :param string src: source node specified as a SWH PID + + :query string edges: edges types the traversal can follow; default to + ``"*"`` + :query string direction: direction in which graph edges will be followed; + can be either ``forward`` or ``backward``, default to ``forward`` + + :statuscode 200: success + :statuscode 400: invalid query string provided + :statuscode 404: starting node cannot be found + + .. sourcecode:: http + + HTTP/1.1 200 OK + Content-Type: application/json + + [ + "swh:1:cnt:669ac7c32292798644b21dbb5a0dc657125f444d", + "swh:1:cnt:da4cb28febe66172a9fdf1a235525ae6c00cde1d", + ... + ] + Neighbors --------- .. http:get:: /graph/neighbors/:src Returns node direct neighbors (linked with exactly one edge) in the graph. :param string src: source node specified as a SWH PID :query string edges: edges types allowed to be listed as neighbors; default to ``"*"`` :query string direction: direction in which graph edges will be followed; can be either ``forward`` or ``backward``, default to ``forward`` :statuscode 200: success :statuscode 400: invalid query string provided :statuscode 404: starting node cannot be found .. sourcecode:: http HTTP/1.1 200 OK Content-Type: application/json [ "swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2", "swh:1:dir:d198bc9d7a6bcf6db04f476d29314f157507d505", ... ] Walk ---- .. http:get:: /graph/walk/:src/:dst Performs a graph traversal and returns the first found path from source to destination (final destination node included). :param string src: starting node specified as a SWH PID :param string dst: destination node, either as a node PID or a node type. The traversal will stop at the first node encountered matching the desired destination. :query string edges: edges types the traversal can follow; default to ``"*"`` :query string traversal: traversal algorithm; can be either ``dfs`` or ``bfs``, default to ``dfs`` :query string direction: direction in which graph edges will be followed; can be either ``forward`` or ``backward``, default to ``forward`` :statuscode 200: success :statuscode 400: invalid query string provided :statuscode 404: starting node cannot be found .. sourcecode:: http HTTP/1.1 200 OK Content-Type: application/json [ "swh:1:rev:f39d7d78b70e0f39facb1e4fab77ad3df5c52a35", "swh:1:rev:52c90f2d32bfa7d6eccd66a56c44ace1f78fbadd", "swh:1:rev:cea92e843e40452c08ba313abc39f59efbb4c29c", "swh:1:rev:8d517bdfb57154b8a11d7f1682ecc0f79abf8e02", ... ] Visit ----- .. http:get:: /graph/visit/:src .. http:get:: /graph/visit/nodes/:src .. http:get:: /graph/visit/paths/:src Performs a graph traversal and returns explored nodes and/or paths (in the order of the traversal). :param string src: starting node specified as a SWH PID :query string edges: edges types the traversal can follow; default to ``"*"`` :query string direction: direction in which graph edges will be followed; can be either ``forward`` or ``backward``, default to ``forward`` :statuscode 200: success :statuscode 400: invalid query string provided :statuscode 404: starting node cannot be found .. sourcecode:: http GET /graph/visit/ HTTP/1.1 200 OK Content-Type: application/json { "paths": [ [ "swh:1:rev:f39d7d78b70e0f39facb1e4fab77ad3df5c52a35", "swh:1:rev:52c90f2d32bfa7d6eccd66a56c44ace1f78fbadd", ... ], [ "swh:1:rev:f39d7d78b70e0f39facb1e4fab77ad3df5c52a35", "swh:1:rev:a31e58e129f73ab5b04016330b13ed51fde7a961", ... ], ... ], "nodes": [ "swh:1:rev:f39d7d78b70e0f39facb1e4fab77ad3df5c52a35", "swh:1:rev:52c90f2d32bfa7d6eccd66a56c44ace1f78fbadd", ... "swh:1:rev:a31e58e129f73ab5b04016330b13ed51fde7a961", ... ] } .. sourcecode:: http GET /graph/visit/nodes/ HTTP/1.1 200 OK Content-Type: application/json [ "swh:1:rev:f39d7d78b70e0f39facb1e4fab77ad3df5c52a35", "swh:1:rev:52c90f2d32bfa7d6eccd66a56c44ace1f78fbadd", ... "swh:1:rev:a31e58e129f73ab5b04016330b13ed51fde7a961", ... ] .. sourcecode:: http GET /graph/visit/paths/ HTTP/1.1 200 OK Content-Type: application/json [ [ "swh:1:rev:f39d7d78b70e0f39facb1e4fab77ad3df5c52a35", "swh:1:rev:52c90f2d32bfa7d6eccd66a56c44ace1f78fbadd", ... ], [ "swh:1:rev:f39d7d78b70e0f39facb1e4fab77ad3df5c52a35", "swh:1:rev:a31e58e129f73ab5b04016330b13ed51fde7a961", ... ], ... ] Stats ----- .. http:get:: /graph/stats Returns statistics on the compressed graph. :statuscode 200: success .. sourcecode:: http HTTP/1.1 200 OK Content-Type: application/json { "counts": { "nodes": 16222788, "edges": 9907464 }, "ratios": { "compression": 0.367, "bits_per_node": 5.846, "bits_per_edge": 9.573, "avg_locality": 270.369 }, "indegree": { "min": 0, "max": 12382, "avg": 0.6107127825377487 }, "outdegree": { "min": 0, "max": 1, "avg": 0.6107127825377487 } }