Page MenuHomeSoftware Heritage

No OneTemporary

diff --git a/api/client/swh/graph/client.py b/api/client/swh/graph/client.py
index 82fa44a..3c27a22 100644
--- a/api/client/swh/graph/client.py
+++ b/api/client/swh/graph/client.py
@@ -1,56 +1,63 @@
# Copyright (C) 2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from enum import Enum
from swh.core.api import SWHRemoteAPI
class GraphAPIError(Exception):
"""Graph API Error"""
def __str__(self):
return ('An unexpected error occurred in the Graph backend: {}'
.format(self.args))
class OutputFmt(Enum):
ONLY_NODES = 1
ONLY_PATHS = 2
NODES_AND_PATHS = 3
class RemoteGraphClient(SWHRemoteAPI):
"""Client to the Software Heritage Graph."""
def __init__(self, url, timeout=None):
super().__init__(
api_exception=GraphAPIError, url=url, timeout=timeout)
# Web API endpoints
+ def neighbors(self, src, edges="*", direction="forward"):
+ return self.get('neighbors/{}'.format(src),
+ params={
+ 'edges': edges,
+ 'direction': direction
+ })
+
def stats(self):
return self.get('stats')
def visit(self, src, edges="*", direction="forward",
output_fmt=OutputFmt.NODES_AND_PATHS):
subendpoint = ""
if output_fmt is OutputFmt.ONLY_NODES:
subendpoint = "/nodes"
elif output_fmt is OutputFmt.ONLY_PATHS:
subendpoint = "/paths"
return self.get('visit{}/{}'.format(subendpoint, src),
params={
'edges': edges,
'direction': direction
})
def walk(self, src, dst, edges="*", traversal="dfs", direction="forward"):
return self.get('walk/{}/{}'.format(src, dst),
params={
'edges': edges,
'traversal': traversal,
'direction': direction
})
diff --git a/api/server/src/main/java/org/softwareheritage/graph/App.java b/api/server/src/main/java/org/softwareheritage/graph/App.java
index 01156f7..7e9f05c 100644
--- a/api/server/src/main/java/org/softwareheritage/graph/App.java
+++ b/api/server/src/main/java/org/softwareheritage/graph/App.java
@@ -1,104 +1,115 @@
package org.softwareheritage.graph;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.PropertyNamingStrategy;
import io.javalin.Javalin;
import io.javalin.http.Context;
import io.javalin.plugin.json.JavalinJackson;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.SwhId;
+import org.softwareheritage.graph.algo.Neighbors;
import org.softwareheritage.graph.algo.Stats;
import org.softwareheritage.graph.algo.Visit;
import org.softwareheritage.graph.algo.Walk;
public class App {
public static void main(String[] args) throws IOException {
String path = args[0];
Graph graph = new Graph(path);
Stats stats = new Stats(path);
// Clean up on exit
Runtime.getRuntime().addShutdownHook(new Thread() {
public void run() {
try {
graph.cleanUp();
} catch (IOException e) {
System.out.println("Could not clean up graph on exit: " + e);
}
}
});
// Configure Jackson JSON to use snake case naming style
ObjectMapper objectMapper = JavalinJackson.getObjectMapper();
objectMapper.setPropertyNamingStrategy(PropertyNamingStrategy.SNAKE_CASE);
JavalinJackson.configure(objectMapper);
Javalin app = Javalin.create().start(5009);
app.before("/stats/*", ctx -> { checkQueryStrings(ctx, ""); });
+ app.before("/neighbors/*", ctx -> { checkQueryStrings(ctx, "direction|edges"); });
app.before("/visit/*", ctx -> { checkQueryStrings(ctx, "direction|edges"); });
app.before("/walk/*", ctx -> { checkQueryStrings(ctx, "direction|edges|traversal"); });
app.get("/stats/", ctx -> { ctx.json(stats); });
// Graph traversal endpoints
// By default the traversal is a forward DFS using all edges
+ app.get("/neighbors/:src", ctx -> {
+ SwhId src = new SwhId(ctx.pathParam("src"));
+ String direction = ctx.queryParam("direction", "forward");
+ String edgesFmt = ctx.queryParam("edges", "*");
+
+ Neighbors neighbors = new Neighbors(graph, src, edgesFmt, direction);
+ ctx.json(neighbors.getNeighbors());
+ });
+
app.get("/visit/:src", ctx -> {
SwhId src = new SwhId(ctx.pathParam("src"));
String direction = ctx.queryParam("direction", "forward");
String edgesFmt = ctx.queryParam("edges", "*");
Visit visit = new Visit(graph, src, edgesFmt, direction, Visit.OutputFmt.NODES_AND_PATHS);
ctx.json(visit);
});
app.get("/visit/nodes/:src", ctx -> {
SwhId src = new SwhId(ctx.pathParam("src"));
String direction = ctx.queryParam("direction", "forward");
String edgesFmt = ctx.queryParam("edges", "*");
Visit visit = new Visit(graph, src, edgesFmt, direction, Visit.OutputFmt.ONLY_NODES);
ctx.json(visit.getNodes());
});
app.get("/visit/paths/:src", ctx -> {
SwhId src = new SwhId(ctx.pathParam("src"));
String direction = ctx.queryParam("direction", "forward");
String edgesFmt = ctx.queryParam("edges", "*");
Visit visit = new Visit(graph, src, edgesFmt, direction, Visit.OutputFmt.ONLY_PATHS);
ctx.json(visit.getPaths());
});
app.get("/walk/:src/:dst", ctx -> {
SwhId src = new SwhId(ctx.pathParam("src"));
String dstFmt = ctx.pathParam("dst");
String direction = ctx.queryParam("direction", "forward");
String edgesFmt = ctx.queryParam("edges", "*");
String traversal = ctx.queryParam("traversal", "dfs");
Walk walk = new Walk(graph, src, dstFmt, edgesFmt, direction, traversal);
ctx.json(walk.getPath());
});
app.exception(IllegalArgumentException.class, (e, ctx) -> {
ctx.status(400);
ctx.result(e.getMessage());
});
}
private static void checkQueryStrings(Context ctx, String allowedFmt) {
Map<String, List<String>> queryParamMap = ctx.queryParamMap();
for (String key : queryParamMap.keySet()) {
if (!key.matches(allowedFmt)) {
throw new IllegalArgumentException("Unknown query string: " + key);
}
}
}
}
diff --git a/api/server/src/main/java/org/softwareheritage/graph/algo/Neighbors.java b/api/server/src/main/java/org/softwareheritage/graph/algo/Neighbors.java
new file mode 100644
index 0000000..133d110
--- /dev/null
+++ b/api/server/src/main/java/org/softwareheritage/graph/algo/Neighbors.java
@@ -0,0 +1,49 @@
+package org.softwareheritage.graph.algo;
+
+import java.util.ArrayList;
+
+import it.unimi.dsi.big.webgraph.LazyLongIterator;
+
+import org.softwareheritage.graph.AllowedEdges;
+import org.softwareheritage.graph.Graph;
+import org.softwareheritage.graph.SwhId;
+
+public class Neighbors {
+ Graph graph;
+ boolean useTransposed;
+ AllowedEdges edges;
+
+ ArrayList<SwhId> neighbors;
+
+ public Neighbors(Graph graph, SwhId src, String edgesFmt, String direction) {
+ if (!direction.matches("forward|backward")) {
+ throw new IllegalArgumentException("Unknown traversal direction: " + direction);
+ }
+
+ this.graph = graph;
+ this.useTransposed = (direction.equals("backward"));
+ this.edges = new AllowedEdges(edgesFmt);
+ this.neighbors = new ArrayList<SwhId>();
+
+ iterateNeighbors(src);
+ }
+
+ public ArrayList<SwhId> getNeighbors() {
+ return neighbors;
+ }
+
+ private void iterateNeighbors(SwhId swhId) {
+ long nodeId = graph.getNodeId(swhId);
+ long degree = graph.degree(nodeId, useTransposed);
+ LazyLongIterator neighborsNodeIds = graph.neighbors(nodeId, useTransposed);
+
+ while (degree-- > 0) {
+ long neighborNodeId = neighborsNodeIds.nextLong();
+ SwhId neighborSwhId = graph.getSwhId(neighborNodeId);
+ if (edges.isAllowed(swhId.getType(), neighborSwhId.getType())) {
+ neighbors.add(neighborSwhId);
+ }
+ }
+ }
+}
+
diff --git a/api/server/src/test/java/org/softwareheritage/graph/NeighborsTest.java b/api/server/src/test/java/org/softwareheritage/graph/NeighborsTest.java
new file mode 100644
index 0000000..800301f
--- /dev/null
+++ b/api/server/src/test/java/org/softwareheritage/graph/NeighborsTest.java
@@ -0,0 +1,122 @@
+package org.softwareheritage.graph;
+
+import java.util.ArrayList;
+
+import org.junit.Test;
+import static org.junit.Assert.assertEquals;
+
+import org.softwareheritage.graph.Graph;
+import org.softwareheritage.graph.GraphTest;
+import org.softwareheritage.graph.SwhId;
+import org.softwareheritage.graph.algo.Neighbors;
+
+public class NeighborsTest extends GraphTest {
+ @Test
+ public void zeroNeighbor() {
+ Graph graph = getGraph();
+ ArrayList<SwhId> expectedNodes = new ArrayList<>();
+
+ SwhId src1 = new SwhId("swh:1:snp:0000000000000000000000000000000000000020");
+ Neighbors neighbors1 = new Neighbors(graph, src1, "*", "backward");
+ assertEquals(expectedNodes, neighbors1.getNeighbors());
+
+ SwhId src2 = new SwhId("swh:1:cnt:0000000000000000000000000000000000000004");
+ Neighbors neighbors2 = new Neighbors(graph, src2, "*", "forward");
+ assertEquals(expectedNodes, neighbors2.getNeighbors());
+
+ SwhId src3 = new SwhId("swh:1:cnt:0000000000000000000000000000000000000015");
+ Neighbors neighbors3 = new Neighbors(graph, src3, "*", "forward");
+ assertEquals(expectedNodes, neighbors3.getNeighbors());
+
+ SwhId src4 = new SwhId("swh:1:rel:0000000000000000000000000000000000000019");
+ Neighbors neighbors4 = new Neighbors(graph, src4, "*", "backward");
+ assertEquals(expectedNodes, neighbors4.getNeighbors());
+
+ SwhId src5 = new SwhId("swh:1:dir:0000000000000000000000000000000000000008");
+ Neighbors neighbors5 = new Neighbors(graph, src5, "snp:*,rev:*,rel:*", "forward");
+ assertEquals(expectedNodes, neighbors5.getNeighbors());
+ }
+
+ @Test
+ public void oneNeighbor() {
+ Graph graph = getGraph();
+
+ SwhId src1 = new SwhId("swh:1:rev:0000000000000000000000000000000000000003");
+ Neighbors neighbors1 = new Neighbors(graph, src1, "*", "forward");
+ ArrayList<SwhId> expectedNodes1 = new ArrayList<>();
+ expectedNodes1.add(new SwhId("swh:1:dir:0000000000000000000000000000000000000002"));
+ assertEquals(expectedNodes1, neighbors1.getNeighbors());
+
+ SwhId src2 = new SwhId("swh:1:dir:0000000000000000000000000000000000000017");
+ Neighbors neighbors2 = new Neighbors(graph, src2, "dir:cnt", "forward");
+ ArrayList<SwhId> expectedNodes2 = new ArrayList<>();
+ expectedNodes2.add(new SwhId("swh:1:cnt:0000000000000000000000000000000000000014"));
+ assertEquals(expectedNodes2, neighbors2.getNeighbors());
+
+ SwhId src3 = new SwhId("swh:1:dir:0000000000000000000000000000000000000012");
+ Neighbors neighbors3 = new Neighbors(graph, src3, "*", "backward");
+ ArrayList<SwhId> expectedNodes3 = new ArrayList<>();
+ expectedNodes3.add(new SwhId("swh:1:rev:0000000000000000000000000000000000000013"));
+ assertEquals(expectedNodes3, neighbors3.getNeighbors());
+
+ SwhId src4 = new SwhId("swh:1:rev:0000000000000000000000000000000000000009");
+ Neighbors neighbors4 = new Neighbors(graph, src4, "rev:rev", "backward");
+ ArrayList<SwhId> expectedNodes4 = new ArrayList<>();
+ expectedNodes4.add(new SwhId("swh:1:rev:0000000000000000000000000000000000000013"));
+ assertEquals(expectedNodes4, neighbors4.getNeighbors());
+ }
+
+ @Test
+ public void twoNeighbors() {
+ Graph graph = getGraph();
+
+ SwhId src1 = new SwhId("swh:1:snp:0000000000000000000000000000000000000020");
+ Neighbors neighbors1 = new Neighbors(graph, src1, "*", "forward");
+ ArrayList<SwhId> expectedNodes1 = new ArrayList<>();
+ expectedNodes1.add(new SwhId("swh:1:rel:0000000000000000000000000000000000000010"));
+ expectedNodes1.add(new SwhId("swh:1:rev:0000000000000000000000000000000000000009"));
+ assertEquals(expectedNodes1, neighbors1.getNeighbors());
+
+ SwhId src2 = new SwhId("swh:1:dir:0000000000000000000000000000000000000008");
+ Neighbors neighbors2 = new Neighbors(graph, src2, "dir:cnt", "forward");
+ ArrayList<SwhId> expectedNodes2 = new ArrayList<>();
+ expectedNodes2.add(new SwhId("swh:1:cnt:0000000000000000000000000000000000000001"));
+ expectedNodes2.add(new SwhId("swh:1:cnt:0000000000000000000000000000000000000007"));
+ assertEquals(expectedNodes2, neighbors2.getNeighbors());
+
+ SwhId src3 = new SwhId("swh:1:cnt:0000000000000000000000000000000000000001");
+ Neighbors neighbors3 = new Neighbors(graph, src3, "*", "backward");
+ ArrayList<SwhId> expectedNodes3 = new ArrayList<>();
+ expectedNodes3.add(new SwhId("swh:1:dir:0000000000000000000000000000000000000008"));
+ expectedNodes3.add(new SwhId("swh:1:dir:0000000000000000000000000000000000000002"));
+ assertEquals(expectedNodes3, neighbors3.getNeighbors());
+
+ SwhId src4 = new SwhId("swh:1:rev:0000000000000000000000000000000000000009");
+ Neighbors neighbors4 = new Neighbors(graph, src4, "rev:snp,rev:rel", "backward");
+ ArrayList<SwhId> expectedNodes4 = new ArrayList<>();
+ expectedNodes4.add(new SwhId("swh:1:snp:0000000000000000000000000000000000000020"));
+ expectedNodes4.add(new SwhId("swh:1:rel:0000000000000000000000000000000000000010"));
+ assertEquals(expectedNodes4, neighbors4.getNeighbors());
+ }
+
+ @Test
+ public void threeNeighbors() {
+ Graph graph = getGraph();
+
+ SwhId src1 = new SwhId("swh:1:dir:0000000000000000000000000000000000000008");
+ Neighbors neighbors1 = new Neighbors(graph, src1, "*", "forward");
+ ArrayList<SwhId> expectedNodes1 = new ArrayList<>();
+ expectedNodes1.add(new SwhId("swh:1:dir:0000000000000000000000000000000000000006"));
+ expectedNodes1.add(new SwhId("swh:1:cnt:0000000000000000000000000000000000000001"));
+ expectedNodes1.add(new SwhId("swh:1:cnt:0000000000000000000000000000000000000007"));
+ assertEquals(expectedNodes1, neighbors1.getNeighbors());
+
+ SwhId src2 = new SwhId("swh:1:rev:0000000000000000000000000000000000000009");
+ Neighbors neighbors2 = new Neighbors(graph, src2, "*", "backward");
+ ArrayList<SwhId> expectedNodes2 = new ArrayList<>();
+ expectedNodes2.add(new SwhId("swh:1:snp:0000000000000000000000000000000000000020"));
+ expectedNodes2.add(new SwhId("swh:1:rel:0000000000000000000000000000000000000010"));
+ expectedNodes2.add(new SwhId("swh:1:rev:0000000000000000000000000000000000000013"));
+ assertEquals(expectedNodes2, neighbors2.getNeighbors());
+ }
+}
diff --git a/docs/api.rst b/docs/api.rst
index dfcadbe..7588749 100644
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -1,189 +1,218 @@
Graph traversal API
===================
Terminology
-----------
This API uses the following notions:
- **Node**: a node in the `Software Heritage graph
<https://docs.softwareheritage.org/devel/swh-model/data-model.html>`_,
represented by a `persistent identifier
<https://docs.softwareheritage.org/devel/swh-model/persistent-identifiers.html#persistent-identifiers>`_
(abbreviated as *SWH PID*, or simply *PID*).
- **Node type**: the 3-letter specifier from the node PID (``cnt``, ``dir``,
``rel``, ``rev``, ``snp``), or ``*`` for all node types.
- **Edge type**: a comma-separated list of ``src:dst`` strings where ``src`` and
``dst`` are node types, or ``*`` for all edge types.
Examples
~~~~~~~~
- ``swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2`` the PID of a node of
type content containing the full text of the GPL3 license.
- ``swh:1:rev:f39d7d78b70e0f39facb1e4fab77ad3df5c52a35`` the PID of a node of
type revision corresponding to the commit in Linux that merged the
'x86/urgent' branch on 31 December 2017.
- ``"dir:dir,dir:cnt"`` node types allowing edges from directories to
directories nodes, or directories to contents nodes.
- ``"rev:rev,dir:*"`` node types allowing edges from revisions to revisions
nodes, or from directories nodes.
- ``"*:rel"`` node types allowing all edges to releases.
+Neighbors
+---------
+
+.. http:get:: /graph/neighbors/:src
+
+ Returns node direct neighbors (linked with exactly one edge) in the graph.
+
+ :param string src: source node specified as a SWH PID
+
+ :query string edges: edges types allowed to be listed as neighbors; default
+ to ``"*"``
+ :query string direction: direction in which graph edges will be followed;
+ can be either ``forward`` or ``backward``, default to ``forward``
+
+ :statuscode 200: success
+ :statuscode 400: invalid query string provided
+ :statuscode 404: starting node cannot be found
+
+ .. sourcecode:: http
+
+ HTTP/1.1 200 OK
+ Content-Type: application/json
+
+ [
+ "swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2",
+ "swh:1:dir:d198bc9d7a6bcf6db04f476d29314f157507d505",
+ ...
+ ]
+
Walk
----
.. http:get:: /graph/walk/:src/:dst
Performs a graph traversal and returns the first found path from source to
destination (final destination node included).
:param string src: starting node specified as a SWH PID
:param string dst: destination node, either as a node PID or a node type.
The traversal will stop at the first node encountered matching the desired
destination.
:query string edges: edges types the traversal can follow; default to
``"*"``
:query string traversal: traversal algorithm; can be either ``dfs`` or
``bfs``, default to ``dfs``
:query string direction: direction in which graph edges will be followed;
can be either ``forward`` or ``backward``, default to ``forward``
:statuscode 200: success
:statuscode 400: invalid query string provided
:statuscode 404: starting node cannot be found
.. sourcecode:: http
HTTP/1.1 200 OK
Content-Type: application/json
[
"swh:1:rev:f39d7d78b70e0f39facb1e4fab77ad3df5c52a35",
"swh:1:rev:52c90f2d32bfa7d6eccd66a56c44ace1f78fbadd",
"swh:1:rev:cea92e843e40452c08ba313abc39f59efbb4c29c",
"swh:1:rev:8d517bdfb57154b8a11d7f1682ecc0f79abf8e02",
...
]
Visit
-----
.. http:get:: /graph/visit/:src
.. http:get:: /graph/visit/nodes/:src
.. http:get:: /graph/visit/paths/:src
Performs a graph traversal and returns explored nodes and/or paths (in the
order of the traversal).
:param string src: starting node specified as a SWH PID
:query string edges: edges types the traversal can follow; default to
``"*"``
:query string direction: direction in which graph edges will be followed;
can be either ``forward`` or ``backward``, default to ``forward``
:statuscode 200: success
:statuscode 400: invalid query string provided
:statuscode 404: starting node cannot be found
.. sourcecode:: http
GET /graph/visit/
HTTP/1.1 200 OK
Content-Type: application/json
{
"paths": [
[
"swh:1:rev:f39d7d78b70e0f39facb1e4fab77ad3df5c52a35",
"swh:1:rev:52c90f2d32bfa7d6eccd66a56c44ace1f78fbadd",
...
],
[
"swh:1:rev:f39d7d78b70e0f39facb1e4fab77ad3df5c52a35",
"swh:1:rev:a31e58e129f73ab5b04016330b13ed51fde7a961",
...
],
...
],
"nodes": [
"swh:1:rev:f39d7d78b70e0f39facb1e4fab77ad3df5c52a35",
"swh:1:rev:52c90f2d32bfa7d6eccd66a56c44ace1f78fbadd",
...
"swh:1:rev:a31e58e129f73ab5b04016330b13ed51fde7a961",
...
]
}
.. sourcecode:: http
GET /graph/visit/nodes/
HTTP/1.1 200 OK
Content-Type: application/json
[
"swh:1:rev:f39d7d78b70e0f39facb1e4fab77ad3df5c52a35",
"swh:1:rev:52c90f2d32bfa7d6eccd66a56c44ace1f78fbadd",
...
"swh:1:rev:a31e58e129f73ab5b04016330b13ed51fde7a961",
...
]
.. sourcecode:: http
GET /graph/visit/paths/
HTTP/1.1 200 OK
Content-Type: application/json
[
[
"swh:1:rev:f39d7d78b70e0f39facb1e4fab77ad3df5c52a35",
"swh:1:rev:52c90f2d32bfa7d6eccd66a56c44ace1f78fbadd",
...
],
[
"swh:1:rev:f39d7d78b70e0f39facb1e4fab77ad3df5c52a35",
"swh:1:rev:a31e58e129f73ab5b04016330b13ed51fde7a961",
...
],
...
]
Stats
-----
.. http:get:: /graph/stats
Returns statistics on the compressed graph.
:statuscode 200: success
.. sourcecode:: http
HTTP/1.1 200 OK
Content-Type: application/json
{
"counts": {
"nodes": 16222788,
"edges": 9907464
},
"ratios": {
"compression": 0.367,
"bits_per_node": 5.846,
"bits_per_edge": 9.573,
"avg_locality": 270.369
},
"indegree": {
"min": 0,
"max": 12382,
"avg": 0.6107127825377487
},
"outdegree": {
"min": 0,
"max": 1,
"avg": 0.6107127825377487
}
}

File Metadata

Mime Type
text/x-diff
Expires
Sat, Jun 21, 6:24 PM (1 w, 6 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3214539

Event Timeline