Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9123944
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
21 KB
Subscribers
None
View Options
diff --git a/api/client/swh/graph/client.py b/api/client/swh/graph/client.py
index 82fa44a..3c27a22 100644
--- a/api/client/swh/graph/client.py
+++ b/api/client/swh/graph/client.py
@@ -1,56 +1,63 @@
# Copyright (C) 2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from enum import Enum
from swh.core.api import SWHRemoteAPI
class GraphAPIError(Exception):
"""Graph API Error"""
def __str__(self):
return ('An unexpected error occurred in the Graph backend: {}'
.format(self.args))
class OutputFmt(Enum):
ONLY_NODES = 1
ONLY_PATHS = 2
NODES_AND_PATHS = 3
class RemoteGraphClient(SWHRemoteAPI):
"""Client to the Software Heritage Graph."""
def __init__(self, url, timeout=None):
super().__init__(
api_exception=GraphAPIError, url=url, timeout=timeout)
# Web API endpoints
+ def neighbors(self, src, edges="*", direction="forward"):
+ return self.get('neighbors/{}'.format(src),
+ params={
+ 'edges': edges,
+ 'direction': direction
+ })
+
def stats(self):
return self.get('stats')
def visit(self, src, edges="*", direction="forward",
output_fmt=OutputFmt.NODES_AND_PATHS):
subendpoint = ""
if output_fmt is OutputFmt.ONLY_NODES:
subendpoint = "/nodes"
elif output_fmt is OutputFmt.ONLY_PATHS:
subendpoint = "/paths"
return self.get('visit{}/{}'.format(subendpoint, src),
params={
'edges': edges,
'direction': direction
})
def walk(self, src, dst, edges="*", traversal="dfs", direction="forward"):
return self.get('walk/{}/{}'.format(src, dst),
params={
'edges': edges,
'traversal': traversal,
'direction': direction
})
diff --git a/api/server/src/main/java/org/softwareheritage/graph/App.java b/api/server/src/main/java/org/softwareheritage/graph/App.java
index 01156f7..7e9f05c 100644
--- a/api/server/src/main/java/org/softwareheritage/graph/App.java
+++ b/api/server/src/main/java/org/softwareheritage/graph/App.java
@@ -1,104 +1,115 @@
package org.softwareheritage.graph;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.PropertyNamingStrategy;
import io.javalin.Javalin;
import io.javalin.http.Context;
import io.javalin.plugin.json.JavalinJackson;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.SwhId;
+import org.softwareheritage.graph.algo.Neighbors;
import org.softwareheritage.graph.algo.Stats;
import org.softwareheritage.graph.algo.Visit;
import org.softwareheritage.graph.algo.Walk;
public class App {
public static void main(String[] args) throws IOException {
String path = args[0];
Graph graph = new Graph(path);
Stats stats = new Stats(path);
// Clean up on exit
Runtime.getRuntime().addShutdownHook(new Thread() {
public void run() {
try {
graph.cleanUp();
} catch (IOException e) {
System.out.println("Could not clean up graph on exit: " + e);
}
}
});
// Configure Jackson JSON to use snake case naming style
ObjectMapper objectMapper = JavalinJackson.getObjectMapper();
objectMapper.setPropertyNamingStrategy(PropertyNamingStrategy.SNAKE_CASE);
JavalinJackson.configure(objectMapper);
Javalin app = Javalin.create().start(5009);
app.before("/stats/*", ctx -> { checkQueryStrings(ctx, ""); });
+ app.before("/neighbors/*", ctx -> { checkQueryStrings(ctx, "direction|edges"); });
app.before("/visit/*", ctx -> { checkQueryStrings(ctx, "direction|edges"); });
app.before("/walk/*", ctx -> { checkQueryStrings(ctx, "direction|edges|traversal"); });
app.get("/stats/", ctx -> { ctx.json(stats); });
// Graph traversal endpoints
// By default the traversal is a forward DFS using all edges
+ app.get("/neighbors/:src", ctx -> {
+ SwhId src = new SwhId(ctx.pathParam("src"));
+ String direction = ctx.queryParam("direction", "forward");
+ String edgesFmt = ctx.queryParam("edges", "*");
+
+ Neighbors neighbors = new Neighbors(graph, src, edgesFmt, direction);
+ ctx.json(neighbors.getNeighbors());
+ });
+
app.get("/visit/:src", ctx -> {
SwhId src = new SwhId(ctx.pathParam("src"));
String direction = ctx.queryParam("direction", "forward");
String edgesFmt = ctx.queryParam("edges", "*");
Visit visit = new Visit(graph, src, edgesFmt, direction, Visit.OutputFmt.NODES_AND_PATHS);
ctx.json(visit);
});
app.get("/visit/nodes/:src", ctx -> {
SwhId src = new SwhId(ctx.pathParam("src"));
String direction = ctx.queryParam("direction", "forward");
String edgesFmt = ctx.queryParam("edges", "*");
Visit visit = new Visit(graph, src, edgesFmt, direction, Visit.OutputFmt.ONLY_NODES);
ctx.json(visit.getNodes());
});
app.get("/visit/paths/:src", ctx -> {
SwhId src = new SwhId(ctx.pathParam("src"));
String direction = ctx.queryParam("direction", "forward");
String edgesFmt = ctx.queryParam("edges", "*");
Visit visit = new Visit(graph, src, edgesFmt, direction, Visit.OutputFmt.ONLY_PATHS);
ctx.json(visit.getPaths());
});
app.get("/walk/:src/:dst", ctx -> {
SwhId src = new SwhId(ctx.pathParam("src"));
String dstFmt = ctx.pathParam("dst");
String direction = ctx.queryParam("direction", "forward");
String edgesFmt = ctx.queryParam("edges", "*");
String traversal = ctx.queryParam("traversal", "dfs");
Walk walk = new Walk(graph, src, dstFmt, edgesFmt, direction, traversal);
ctx.json(walk.getPath());
});
app.exception(IllegalArgumentException.class, (e, ctx) -> {
ctx.status(400);
ctx.result(e.getMessage());
});
}
private static void checkQueryStrings(Context ctx, String allowedFmt) {
Map<String, List<String>> queryParamMap = ctx.queryParamMap();
for (String key : queryParamMap.keySet()) {
if (!key.matches(allowedFmt)) {
throw new IllegalArgumentException("Unknown query string: " + key);
}
}
}
}
diff --git a/api/server/src/main/java/org/softwareheritage/graph/algo/Neighbors.java b/api/server/src/main/java/org/softwareheritage/graph/algo/Neighbors.java
new file mode 100644
index 0000000..133d110
--- /dev/null
+++ b/api/server/src/main/java/org/softwareheritage/graph/algo/Neighbors.java
@@ -0,0 +1,49 @@
+package org.softwareheritage.graph.algo;
+
+import java.util.ArrayList;
+
+import it.unimi.dsi.big.webgraph.LazyLongIterator;
+
+import org.softwareheritage.graph.AllowedEdges;
+import org.softwareheritage.graph.Graph;
+import org.softwareheritage.graph.SwhId;
+
+public class Neighbors {
+ Graph graph;
+ boolean useTransposed;
+ AllowedEdges edges;
+
+ ArrayList<SwhId> neighbors;
+
+ public Neighbors(Graph graph, SwhId src, String edgesFmt, String direction) {
+ if (!direction.matches("forward|backward")) {
+ throw new IllegalArgumentException("Unknown traversal direction: " + direction);
+ }
+
+ this.graph = graph;
+ this.useTransposed = (direction.equals("backward"));
+ this.edges = new AllowedEdges(edgesFmt);
+ this.neighbors = new ArrayList<SwhId>();
+
+ iterateNeighbors(src);
+ }
+
+ public ArrayList<SwhId> getNeighbors() {
+ return neighbors;
+ }
+
+ private void iterateNeighbors(SwhId swhId) {
+ long nodeId = graph.getNodeId(swhId);
+ long degree = graph.degree(nodeId, useTransposed);
+ LazyLongIterator neighborsNodeIds = graph.neighbors(nodeId, useTransposed);
+
+ while (degree-- > 0) {
+ long neighborNodeId = neighborsNodeIds.nextLong();
+ SwhId neighborSwhId = graph.getSwhId(neighborNodeId);
+ if (edges.isAllowed(swhId.getType(), neighborSwhId.getType())) {
+ neighbors.add(neighborSwhId);
+ }
+ }
+ }
+}
+
diff --git a/api/server/src/test/java/org/softwareheritage/graph/NeighborsTest.java b/api/server/src/test/java/org/softwareheritage/graph/NeighborsTest.java
new file mode 100644
index 0000000..800301f
--- /dev/null
+++ b/api/server/src/test/java/org/softwareheritage/graph/NeighborsTest.java
@@ -0,0 +1,122 @@
+package org.softwareheritage.graph;
+
+import java.util.ArrayList;
+
+import org.junit.Test;
+import static org.junit.Assert.assertEquals;
+
+import org.softwareheritage.graph.Graph;
+import org.softwareheritage.graph.GraphTest;
+import org.softwareheritage.graph.SwhId;
+import org.softwareheritage.graph.algo.Neighbors;
+
+public class NeighborsTest extends GraphTest {
+ @Test
+ public void zeroNeighbor() {
+ Graph graph = getGraph();
+ ArrayList<SwhId> expectedNodes = new ArrayList<>();
+
+ SwhId src1 = new SwhId("swh:1:snp:0000000000000000000000000000000000000020");
+ Neighbors neighbors1 = new Neighbors(graph, src1, "*", "backward");
+ assertEquals(expectedNodes, neighbors1.getNeighbors());
+
+ SwhId src2 = new SwhId("swh:1:cnt:0000000000000000000000000000000000000004");
+ Neighbors neighbors2 = new Neighbors(graph, src2, "*", "forward");
+ assertEquals(expectedNodes, neighbors2.getNeighbors());
+
+ SwhId src3 = new SwhId("swh:1:cnt:0000000000000000000000000000000000000015");
+ Neighbors neighbors3 = new Neighbors(graph, src3, "*", "forward");
+ assertEquals(expectedNodes, neighbors3.getNeighbors());
+
+ SwhId src4 = new SwhId("swh:1:rel:0000000000000000000000000000000000000019");
+ Neighbors neighbors4 = new Neighbors(graph, src4, "*", "backward");
+ assertEquals(expectedNodes, neighbors4.getNeighbors());
+
+ SwhId src5 = new SwhId("swh:1:dir:0000000000000000000000000000000000000008");
+ Neighbors neighbors5 = new Neighbors(graph, src5, "snp:*,rev:*,rel:*", "forward");
+ assertEquals(expectedNodes, neighbors5.getNeighbors());
+ }
+
+ @Test
+ public void oneNeighbor() {
+ Graph graph = getGraph();
+
+ SwhId src1 = new SwhId("swh:1:rev:0000000000000000000000000000000000000003");
+ Neighbors neighbors1 = new Neighbors(graph, src1, "*", "forward");
+ ArrayList<SwhId> expectedNodes1 = new ArrayList<>();
+ expectedNodes1.add(new SwhId("swh:1:dir:0000000000000000000000000000000000000002"));
+ assertEquals(expectedNodes1, neighbors1.getNeighbors());
+
+ SwhId src2 = new SwhId("swh:1:dir:0000000000000000000000000000000000000017");
+ Neighbors neighbors2 = new Neighbors(graph, src2, "dir:cnt", "forward");
+ ArrayList<SwhId> expectedNodes2 = new ArrayList<>();
+ expectedNodes2.add(new SwhId("swh:1:cnt:0000000000000000000000000000000000000014"));
+ assertEquals(expectedNodes2, neighbors2.getNeighbors());
+
+ SwhId src3 = new SwhId("swh:1:dir:0000000000000000000000000000000000000012");
+ Neighbors neighbors3 = new Neighbors(graph, src3, "*", "backward");
+ ArrayList<SwhId> expectedNodes3 = new ArrayList<>();
+ expectedNodes3.add(new SwhId("swh:1:rev:0000000000000000000000000000000000000013"));
+ assertEquals(expectedNodes3, neighbors3.getNeighbors());
+
+ SwhId src4 = new SwhId("swh:1:rev:0000000000000000000000000000000000000009");
+ Neighbors neighbors4 = new Neighbors(graph, src4, "rev:rev", "backward");
+ ArrayList<SwhId> expectedNodes4 = new ArrayList<>();
+ expectedNodes4.add(new SwhId("swh:1:rev:0000000000000000000000000000000000000013"));
+ assertEquals(expectedNodes4, neighbors4.getNeighbors());
+ }
+
+ @Test
+ public void twoNeighbors() {
+ Graph graph = getGraph();
+
+ SwhId src1 = new SwhId("swh:1:snp:0000000000000000000000000000000000000020");
+ Neighbors neighbors1 = new Neighbors(graph, src1, "*", "forward");
+ ArrayList<SwhId> expectedNodes1 = new ArrayList<>();
+ expectedNodes1.add(new SwhId("swh:1:rel:0000000000000000000000000000000000000010"));
+ expectedNodes1.add(new SwhId("swh:1:rev:0000000000000000000000000000000000000009"));
+ assertEquals(expectedNodes1, neighbors1.getNeighbors());
+
+ SwhId src2 = new SwhId("swh:1:dir:0000000000000000000000000000000000000008");
+ Neighbors neighbors2 = new Neighbors(graph, src2, "dir:cnt", "forward");
+ ArrayList<SwhId> expectedNodes2 = new ArrayList<>();
+ expectedNodes2.add(new SwhId("swh:1:cnt:0000000000000000000000000000000000000001"));
+ expectedNodes2.add(new SwhId("swh:1:cnt:0000000000000000000000000000000000000007"));
+ assertEquals(expectedNodes2, neighbors2.getNeighbors());
+
+ SwhId src3 = new SwhId("swh:1:cnt:0000000000000000000000000000000000000001");
+ Neighbors neighbors3 = new Neighbors(graph, src3, "*", "backward");
+ ArrayList<SwhId> expectedNodes3 = new ArrayList<>();
+ expectedNodes3.add(new SwhId("swh:1:dir:0000000000000000000000000000000000000008"));
+ expectedNodes3.add(new SwhId("swh:1:dir:0000000000000000000000000000000000000002"));
+ assertEquals(expectedNodes3, neighbors3.getNeighbors());
+
+ SwhId src4 = new SwhId("swh:1:rev:0000000000000000000000000000000000000009");
+ Neighbors neighbors4 = new Neighbors(graph, src4, "rev:snp,rev:rel", "backward");
+ ArrayList<SwhId> expectedNodes4 = new ArrayList<>();
+ expectedNodes4.add(new SwhId("swh:1:snp:0000000000000000000000000000000000000020"));
+ expectedNodes4.add(new SwhId("swh:1:rel:0000000000000000000000000000000000000010"));
+ assertEquals(expectedNodes4, neighbors4.getNeighbors());
+ }
+
+ @Test
+ public void threeNeighbors() {
+ Graph graph = getGraph();
+
+ SwhId src1 = new SwhId("swh:1:dir:0000000000000000000000000000000000000008");
+ Neighbors neighbors1 = new Neighbors(graph, src1, "*", "forward");
+ ArrayList<SwhId> expectedNodes1 = new ArrayList<>();
+ expectedNodes1.add(new SwhId("swh:1:dir:0000000000000000000000000000000000000006"));
+ expectedNodes1.add(new SwhId("swh:1:cnt:0000000000000000000000000000000000000001"));
+ expectedNodes1.add(new SwhId("swh:1:cnt:0000000000000000000000000000000000000007"));
+ assertEquals(expectedNodes1, neighbors1.getNeighbors());
+
+ SwhId src2 = new SwhId("swh:1:rev:0000000000000000000000000000000000000009");
+ Neighbors neighbors2 = new Neighbors(graph, src2, "*", "backward");
+ ArrayList<SwhId> expectedNodes2 = new ArrayList<>();
+ expectedNodes2.add(new SwhId("swh:1:snp:0000000000000000000000000000000000000020"));
+ expectedNodes2.add(new SwhId("swh:1:rel:0000000000000000000000000000000000000010"));
+ expectedNodes2.add(new SwhId("swh:1:rev:0000000000000000000000000000000000000013"));
+ assertEquals(expectedNodes2, neighbors2.getNeighbors());
+ }
+}
diff --git a/docs/api.rst b/docs/api.rst
index dfcadbe..7588749 100644
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -1,189 +1,218 @@
Graph traversal API
===================
Terminology
-----------
This API uses the following notions:
- **Node**: a node in the `Software Heritage graph
<https://docs.softwareheritage.org/devel/swh-model/data-model.html>`_,
represented by a `persistent identifier
<https://docs.softwareheritage.org/devel/swh-model/persistent-identifiers.html#persistent-identifiers>`_
(abbreviated as *SWH PID*, or simply *PID*).
- **Node type**: the 3-letter specifier from the node PID (``cnt``, ``dir``,
``rel``, ``rev``, ``snp``), or ``*`` for all node types.
- **Edge type**: a comma-separated list of ``src:dst`` strings where ``src`` and
``dst`` are node types, or ``*`` for all edge types.
Examples
~~~~~~~~
- ``swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2`` the PID of a node of
type content containing the full text of the GPL3 license.
- ``swh:1:rev:f39d7d78b70e0f39facb1e4fab77ad3df5c52a35`` the PID of a node of
type revision corresponding to the commit in Linux that merged the
'x86/urgent' branch on 31 December 2017.
- ``"dir:dir,dir:cnt"`` node types allowing edges from directories to
directories nodes, or directories to contents nodes.
- ``"rev:rev,dir:*"`` node types allowing edges from revisions to revisions
nodes, or from directories nodes.
- ``"*:rel"`` node types allowing all edges to releases.
+Neighbors
+---------
+
+.. http:get:: /graph/neighbors/:src
+
+ Returns node direct neighbors (linked with exactly one edge) in the graph.
+
+ :param string src: source node specified as a SWH PID
+
+ :query string edges: edges types allowed to be listed as neighbors; default
+ to ``"*"``
+ :query string direction: direction in which graph edges will be followed;
+ can be either ``forward`` or ``backward``, default to ``forward``
+
+ :statuscode 200: success
+ :statuscode 400: invalid query string provided
+ :statuscode 404: starting node cannot be found
+
+ .. sourcecode:: http
+
+ HTTP/1.1 200 OK
+ Content-Type: application/json
+
+ [
+ "swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2",
+ "swh:1:dir:d198bc9d7a6bcf6db04f476d29314f157507d505",
+ ...
+ ]
+
Walk
----
.. http:get:: /graph/walk/:src/:dst
Performs a graph traversal and returns the first found path from source to
destination (final destination node included).
:param string src: starting node specified as a SWH PID
:param string dst: destination node, either as a node PID or a node type.
The traversal will stop at the first node encountered matching the desired
destination.
:query string edges: edges types the traversal can follow; default to
``"*"``
:query string traversal: traversal algorithm; can be either ``dfs`` or
``bfs``, default to ``dfs``
:query string direction: direction in which graph edges will be followed;
can be either ``forward`` or ``backward``, default to ``forward``
:statuscode 200: success
:statuscode 400: invalid query string provided
:statuscode 404: starting node cannot be found
.. sourcecode:: http
HTTP/1.1 200 OK
Content-Type: application/json
[
"swh:1:rev:f39d7d78b70e0f39facb1e4fab77ad3df5c52a35",
"swh:1:rev:52c90f2d32bfa7d6eccd66a56c44ace1f78fbadd",
"swh:1:rev:cea92e843e40452c08ba313abc39f59efbb4c29c",
"swh:1:rev:8d517bdfb57154b8a11d7f1682ecc0f79abf8e02",
...
]
Visit
-----
.. http:get:: /graph/visit/:src
.. http:get:: /graph/visit/nodes/:src
.. http:get:: /graph/visit/paths/:src
Performs a graph traversal and returns explored nodes and/or paths (in the
order of the traversal).
:param string src: starting node specified as a SWH PID
:query string edges: edges types the traversal can follow; default to
``"*"``
:query string direction: direction in which graph edges will be followed;
can be either ``forward`` or ``backward``, default to ``forward``
:statuscode 200: success
:statuscode 400: invalid query string provided
:statuscode 404: starting node cannot be found
.. sourcecode:: http
GET /graph/visit/
HTTP/1.1 200 OK
Content-Type: application/json
{
"paths": [
[
"swh:1:rev:f39d7d78b70e0f39facb1e4fab77ad3df5c52a35",
"swh:1:rev:52c90f2d32bfa7d6eccd66a56c44ace1f78fbadd",
...
],
[
"swh:1:rev:f39d7d78b70e0f39facb1e4fab77ad3df5c52a35",
"swh:1:rev:a31e58e129f73ab5b04016330b13ed51fde7a961",
...
],
...
],
"nodes": [
"swh:1:rev:f39d7d78b70e0f39facb1e4fab77ad3df5c52a35",
"swh:1:rev:52c90f2d32bfa7d6eccd66a56c44ace1f78fbadd",
...
"swh:1:rev:a31e58e129f73ab5b04016330b13ed51fde7a961",
...
]
}
.. sourcecode:: http
GET /graph/visit/nodes/
HTTP/1.1 200 OK
Content-Type: application/json
[
"swh:1:rev:f39d7d78b70e0f39facb1e4fab77ad3df5c52a35",
"swh:1:rev:52c90f2d32bfa7d6eccd66a56c44ace1f78fbadd",
...
"swh:1:rev:a31e58e129f73ab5b04016330b13ed51fde7a961",
...
]
.. sourcecode:: http
GET /graph/visit/paths/
HTTP/1.1 200 OK
Content-Type: application/json
[
[
"swh:1:rev:f39d7d78b70e0f39facb1e4fab77ad3df5c52a35",
"swh:1:rev:52c90f2d32bfa7d6eccd66a56c44ace1f78fbadd",
...
],
[
"swh:1:rev:f39d7d78b70e0f39facb1e4fab77ad3df5c52a35",
"swh:1:rev:a31e58e129f73ab5b04016330b13ed51fde7a961",
...
],
...
]
Stats
-----
.. http:get:: /graph/stats
Returns statistics on the compressed graph.
:statuscode 200: success
.. sourcecode:: http
HTTP/1.1 200 OK
Content-Type: application/json
{
"counts": {
"nodes": 16222788,
"edges": 9907464
},
"ratios": {
"compression": 0.367,
"bits_per_node": 5.846,
"bits_per_edge": 9.573,
"avg_locality": 270.369
},
"indegree": {
"min": 0,
"max": 12382,
"avg": 0.6107127825377487
},
"outdegree": {
"min": 0,
"max": 1,
"avg": 0.6107127825377487
}
}
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Sat, Jun 21, 6:24 PM (1 w, 6 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3214539
Attached To
rDGRPH Compressed graph representation
Event Timeline
Log In to Comment