path);
+}
diff --git a/java/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java b/java/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java
index fe0f56f..9aecce8 100644
--- a/java/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java
+++ b/java/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java
@@ -1,328 +1,353 @@
package org.softwareheritage.graph.algo;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.Map;
import java.util.Queue;
import java.util.Stack;
import it.unimi.dsi.bits.LongArrayBitVector;
import org.softwareheritage.graph.AllowedEdges;
import org.softwareheritage.graph.Endpoint;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.Neighbors;
import org.softwareheritage.graph.Node;
+import org.softwareheritage.graph.algo.NodeIdConsumer;
+import org.softwareheritage.graph.algo.PathConsumer;
/**
* Traversal algorithms on the compressed graph.
*
* Internal implementation of the traversal API endpoints. These methods only input/output internal
* long ids, which are converted in the {@link Endpoint} higher-level class to Software Heritage
* PID.
*
* @author The Software Heritage developers
* @see org.softwareheritage.graph.Endpoint
*/
public class Traversal {
/** Graph used in the traversal */
Graph graph;
/** Boolean to specify the use of the transposed graph */
boolean useTransposed;
/** Graph edge restriction */
AllowedEdges edges;
/** Bit array storing if we have visited a node */
LongArrayBitVector visited;
/** Hash map storing parent node id for each nodes during a traversal */
Map parentNode;
/** Number of edges accessed during traversal */
long nbEdgesAccessed;
/**
* Constructor.
*
* @param graph graph used in the traversal
* @param direction a string (either "forward" or "backward") specifying edge orientation
* @param edgesFmt a formatted string describing allowed edges
*/
public Traversal(Graph graph, String direction, String edgesFmt) {
if (!direction.matches("forward|backward")) {
throw new IllegalArgumentException("Unknown traversal direction: " + direction);
}
this.graph = graph;
this.useTransposed = (direction.equals("backward"));
this.edges = new AllowedEdges(graph, edgesFmt);
long nbNodes = graph.getNbNodes();
this.visited = LongArrayBitVector.ofLength(nbNodes);
this.parentNode = new HashMap<>();
this.nbEdgesAccessed = 0;
}
/**
* Returns number of accessed edges during traversal.
*
* @return number of edges accessed in last traversal
*/
public long getNbEdgesAccessed() {
return nbEdgesAccessed;
}
/**
- * Returns the leaves of a subgraph rooted at the specified source node.
- *
- * @param srcNodeId source node
- * @return list of node ids corresponding to the leaves
+ * Push version of {@link leaves}: will fire passed callback for each leaf.
*/
- public ArrayList leaves(long srcNodeId) {
- ArrayList nodeIds = new ArrayList();
+ public void leavesVisitor(long srcNodeId, NodeIdConsumer cb) {
Stack stack = new Stack();
this.nbEdgesAccessed = 0;
stack.push(srcNodeId);
visited.set(srcNodeId);
while (!stack.isEmpty()) {
long currentNodeId = stack.pop();
long neighborsCnt = 0;
nbEdgesAccessed += graph.degree(currentNodeId, useTransposed);
for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) {
neighborsCnt++;
if (!visited.getBoolean(neighborNodeId)) {
stack.push(neighborNodeId);
visited.set(neighborNodeId);
}
}
if (neighborsCnt == 0) {
- nodeIds.add(currentNodeId);
+ cb.accept(currentNodeId);
}
}
-
- return nodeIds;
}
/**
- * Returns node direct neighbors (linked with exactly one edge).
+ * Returns the leaves of a subgraph rooted at the specified source node.
*
* @param srcNodeId source node
- * @return list of node ids corresponding to the neighbors
+ * @return list of node ids corresponding to the leaves
*/
- public ArrayList neighbors(long srcNodeId) {
+ public ArrayList leaves(long srcNodeId) {
ArrayList nodeIds = new ArrayList();
+ leavesVisitor(srcNodeId, (nodeId) -> nodeIds.add(nodeId));
+ return nodeIds;
+ }
+
+ /**
+ * Push version of {@link neighbors}: will fire passed callback on each
+ * neighbor.
+ */
+ public void neighborsVisitor(long srcNodeId, NodeIdConsumer cb) {
this.nbEdgesAccessed = graph.degree(srcNodeId, useTransposed);
for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, srcNodeId)) {
- nodeIds.add(neighborNodeId);
+ cb.accept(neighborNodeId);
}
- return nodeIds;
}
/**
- * Performs a graph traversal and returns explored nodes.
+ * Returns node direct neighbors (linked with exactly one edge).
*
* @param srcNodeId source node
- * @return list of explored node ids
+ * @return list of node ids corresponding to the neighbors
*/
- public ArrayList visitNodes(long srcNodeId) {
+ public ArrayList neighbors(long srcNodeId) {
ArrayList nodeIds = new ArrayList();
+ neighborsVisitor(srcNodeId, (nodeId) -> nodeIds.add(nodeId));
+ return nodeIds;
+ }
+
+ /**
+ * Push version of {@link visitNodes}: will fire passed callback on each
+ * visited node.
+ */
+ public void visitNodesVisitor(long srcNodeId, NodeIdConsumer cb) {
Stack stack = new Stack();
this.nbEdgesAccessed = 0;
stack.push(srcNodeId);
visited.set(srcNodeId);
while (!stack.isEmpty()) {
long currentNodeId = stack.pop();
- nodeIds.add(currentNodeId);
+ cb.accept(currentNodeId);
nbEdgesAccessed += graph.degree(currentNodeId, useTransposed);
for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) {
if (!visited.getBoolean(neighborNodeId)) {
stack.push(neighborNodeId);
visited.set(neighborNodeId);
}
}
}
+ }
- return nodeIds;
+ /**
+ * Performs a graph traversal and returns explored nodes.
+ *
+ * @param srcNodeId source node
+ * @return list of explored node ids
+ */
+ public ArrayList visitNodes(long srcNodeId) {
+ ArrayList nodeIds = new ArrayList();
+ visitNodesVisitor(srcNodeId, (nodeId) -> nodeIds.add(nodeId));
+ return nodeIds;
+ }
+
+ /**
+ * Push version of {@link visitPaths}: will fire passed callback on each
+ * discovered (complete) path.
+ */
+ public void visitPathsVisitor(long srcNodeId, PathConsumer cb) {
+ Stack currentPath = new Stack();
+ this.nbEdgesAccessed = 0;
+ visitPathsInternalVisitor(srcNodeId, currentPath, cb);
}
/**
* Performs a graph traversal and returns explored paths.
*
* @param srcNodeId source node
* @return list of explored paths (represented as a list of node ids)
*/
public ArrayList> visitPaths(long srcNodeId) {
ArrayList> paths = new ArrayList<>();
- Stack currentPath = new Stack();
- this.nbEdgesAccessed = 0;
- visitPathsInternal(srcNodeId, paths, currentPath);
+ visitPathsVisitor(srcNodeId, (path) -> paths.add(path));
return paths;
}
- /**
- * Internal recursive function of {@link #visitPaths}.
- *
- * @param currentNodeId current node
- * @param paths list of currently stored paths
- * @param currentPath current path as node ids
- */
- private void visitPathsInternal(
- long currentNodeId, ArrayList> paths, Stack currentPath) {
+ private void visitPathsInternalVisitor(long currentNodeId,
+ Stack currentPath,
+ PathConsumer cb) {
currentPath.push(currentNodeId);
long visitedNeighbors = 0;
nbEdgesAccessed += graph.degree(currentNodeId, useTransposed);
for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) {
- visitPathsInternal(neighborNodeId, paths, currentPath);
+ visitPathsInternalVisitor(neighborNodeId, currentPath, cb);
visitedNeighbors++;
}
if (visitedNeighbors == 0) {
ArrayList path = new ArrayList();
for (long nodeId : currentPath) {
path.add(nodeId);
}
- paths.add(path);
+ cb.accept(path);
}
currentPath.pop();
}
/**
* Performs a graph traversal and returns the first found path from source to destination.
*
* @param srcNodeId source node
* @param dst destination (either a node or a node type)
* @return found path as a list of node ids
*/
public ArrayList walk(long srcNodeId, T dst, String algorithm) {
long dstNodeId = -1;
if (algorithm.equals("dfs")) {
dstNodeId = walkInternalDfs(srcNodeId, dst);
} else if (algorithm.equals("bfs")) {
dstNodeId = walkInternalBfs(srcNodeId, dst);
} else {
throw new IllegalArgumentException("Unknown traversal algorithm: " + algorithm);
}
if (dstNodeId == -1) {
throw new IllegalArgumentException("Unable to find destination point: " + dst);
}
ArrayList nodeIds = backtracking(srcNodeId, dstNodeId);
return nodeIds;
}
/**
* Internal DFS function of {@link #walk}.
*
* @param srcNodeId source node
* @param dst destination (either a node or a node type)
* @return final destination node or -1 if no path found
*/
private long walkInternalDfs(long srcNodeId, T dst) {
Stack stack = new Stack();
this.nbEdgesAccessed = 0;
stack.push(srcNodeId);
visited.set(srcNodeId);
while (!stack.isEmpty()) {
long currentNodeId = stack.pop();
if (isDstNode(currentNodeId, dst)) {
return currentNodeId;
}
nbEdgesAccessed += graph.degree(currentNodeId, useTransposed);
for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) {
if (!visited.getBoolean(neighborNodeId)) {
stack.push(neighborNodeId);
visited.set(neighborNodeId);
parentNode.put(neighborNodeId, currentNodeId);
}
}
}
return -1;
}
/**
* Internal BFS function of {@link #walk}.
*
* @param srcNodeId source node
* @param dst destination (either a node or a node type)
* @return final destination node or -1 if no path found
*/
private long walkInternalBfs(long srcNodeId, T dst) {
Queue queue = new LinkedList();
this.nbEdgesAccessed = 0;
queue.add(srcNodeId);
visited.set(srcNodeId);
while (!queue.isEmpty()) {
long currentNodeId = queue.poll();
if (isDstNode(currentNodeId, dst)) {
return currentNodeId;
}
nbEdgesAccessed += graph.degree(currentNodeId, useTransposed);
for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) {
if (!visited.getBoolean(neighborNodeId)) {
queue.add(neighborNodeId);
visited.set(neighborNodeId);
parentNode.put(neighborNodeId, currentNodeId);
}
}
}
return -1;
}
/**
* Internal function of {@link #walk} to check if a node corresponds to the destination.
*
* @param nodeId current node
* @param dst destination (either a node or a node type)
* @return true if the node is a destination, or false otherwise
*/
private boolean isDstNode(long nodeId, T dst) {
if (dst instanceof Long) {
long dstNodeId = (Long) dst;
return nodeId == dstNodeId;
} else if (dst instanceof Node.Type) {
Node.Type dstType = (Node.Type) dst;
return graph.getNodeType(nodeId) == dstType;
} else {
return false;
}
}
/**
* Internal backtracking function of {@link #walk}.
*
* @param srcNodeId source node
* @param dstNodeId destination node
* @return the found path, as a list of node ids
*/
private ArrayList backtracking(long srcNodeId, long dstNodeId) {
ArrayList path = new ArrayList();
long currentNodeId = dstNodeId;
while (currentNodeId != srcNodeId) {
path.add(currentNodeId);
currentNodeId = parentNode.get(currentNodeId);
}
path.add(srcNodeId);
Collections.reverse(path);
return path;
}
}
diff --git a/java/server/src/main/java/org/softwareheritage/graph/backend/Pp.java b/java/server/src/main/java/org/softwareheritage/graph/backend/Pp.java
new file mode 100644
index 0000000..aa7ab3c
--- /dev/null
+++ b/java/server/src/main/java/org/softwareheritage/graph/backend/Pp.java
@@ -0,0 +1,42 @@
+package org.softwareheritage.graph.backend;
+
+import java.io.BufferedWriter;
+import java.io.FileInputStream;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.Writer;
+import java.util.zip.GZIPInputStream;
+
+import it.unimi.dsi.bits.LongArrayBitVector;
+import it.unimi.dsi.fastutil.Size64;
+import it.unimi.dsi.fastutil.io.BinIO;
+import it.unimi.dsi.fastutil.longs.LongBigArrays;
+import it.unimi.dsi.fastutil.longs.LongBigList;
+import it.unimi.dsi.fastutil.objects.Object2LongFunction;
+import it.unimi.dsi.fastutil.objects.ObjectBigArrays;
+import it.unimi.dsi.io.FastBufferedReader;
+import it.unimi.dsi.io.LineIterator;
+
+import org.softwareheritage.graph.Graph;
+import org.softwareheritage.graph.Node;
+import org.softwareheritage.graph.SwhPID;
+import org.softwareheritage.graph.backend.NodeTypesMap;
+
+public class Pp {
+
+ public static void main(String[] args) throws IOException {
+
+ Object2LongFunction mphMap = null;
+ try {
+ mphMap = (Object2LongFunction) BinIO.loadObject("all.mph");
+ } catch (ClassNotFoundException e) {
+ throw new IllegalArgumentException("The .mph file contains unknown class object: " + e);
+ }
+
+ long nbIds = (mphMap instanceof Size64) ? ((Size64) mphMap).size64() : mphMap.size();
+
+ System.out.println("mph size: " + nbIds);
+ }
+}
diff --git a/java/server/src/main/java/org/softwareheritage/graph/t/WriteLong.java b/java/server/src/main/java/org/softwareheritage/graph/t/WriteLong.java
new file mode 100644
index 0000000..547011a
--- /dev/null
+++ b/java/server/src/main/java/org/softwareheritage/graph/t/WriteLong.java
@@ -0,0 +1,24 @@
+import java.io.DataOutputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+
+public class WriteLong {
+ public static void main(String args[]) {
+ String filename = null;
+ try {
+ filename = args[0];
+ FileOutputStream file = new FileOutputStream(filename);
+ DataOutputStream data = new DataOutputStream(file);
+ while (true) {
+ data.writeLong(Long.parseLong(args[1]));
+ }
+ //data.close();
+ } catch (IOException e) {
+ System.out.println("cannot write to file " + filename + "\n" + e);
+ System.exit(2);
+ } catch (ArrayIndexOutOfBoundsException e) {
+ System.out.println("Usage: Writer FILENAME INT");
+ System.exit(1);
+ }
+ }
+}
diff --git a/java/server/src/main/java/org/softwareheritage/graph/t/aiohttp_rest.py b/java/server/src/main/java/org/softwareheritage/graph/t/aiohttp_rest.py
new file mode 100644
index 0000000..87c89a3
--- /dev/null
+++ b/java/server/src/main/java/org/softwareheritage/graph/t/aiohttp_rest.py
@@ -0,0 +1,43 @@
+#!/usr/bin/env python3
+
+# Copyright (C) 2018 Antoine Pietri
+# SPDX-License-Identifier: MIT
+
+import argparse
+import aiohttp
+import aiohttp.web
+import hashutil
+
+
+async def hello(request):
+ return aiohttp.web.json_response(
+ {'hi': 'hello'}, headers={'Access-Control-Allow-Origin': '*'})
+
+
+async def make_app():
+ app = aiohttp.web.Application()
+ app.add_routes([
+ aiohttp.web.get('/hello', hello)])
+ return app
+
+
+async def get_stream(request): # from objstorage
+ hex_id = request.match_info['hex_id']
+ obj_id = hashutil.hash_to_bytes(hex_id)
+ response = aiohttp.web.StreamResponse()
+ await response.prepare(request)
+ for chunk in request.app['objstorage'].get_stream(obj_id, 2 << 20):
+ await response.write(chunk)
+ await response.write_eof()
+ return response
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser(
+ description='Test')
+ parser.add_argument('--host', default='127.0.0.1', help='Bind address')
+ parser.add_argument('--port', default=9012, help='Bind port')
+
+ args = parser.parse_args()
+
+ aiohttp.web.run_app(make_app(), host=args.host, port=args.port)
diff --git a/java/server/src/main/java/org/softwareheritage/graph/t/data.bin b/java/server/src/main/java/org/softwareheritage/graph/t/data.bin
new file mode 100644
index 0000000..2580cea
Binary files /dev/null and b/java/server/src/main/java/org/softwareheritage/graph/t/data.bin differ
diff --git a/java/server/src/main/java/org/softwareheritage/graph/t/read_long.py b/java/server/src/main/java/org/softwareheritage/graph/t/read_long.py
new file mode 100755
index 0000000..c353798
--- /dev/null
+++ b/java/server/src/main/java/org/softwareheritage/graph/t/read_long.py
@@ -0,0 +1,24 @@
+#!/usr/bin/python3
+
+import struct
+import sys
+
+BUF_SIZE = 64*1024
+BIN_FMT = '>q' # 64 bit integer, big endian
+
+
+def main(fname):
+ with open(fname, 'rb') as f:
+ data = f.read(BUF_SIZE)
+ while(data):
+ for data in struct.iter_unpack(BIN_FMT, data):
+ print(data[0])
+ data = f.read(BUF_SIZE)
+
+
+if __name__ == '__main__':
+ try:
+ main(sys.argv[1])
+ except IndexError:
+ print('Usage: read_long FILENAME')
+ sys.exit(1)
diff --git a/java/server/src/test/java/org/softwareheritage/graph/GraphTest.java b/java/server/src/test/java/org/softwareheritage/graph/GraphTest.java
index b58a540..9355989 100644
--- a/java/server/src/test/java/org/softwareheritage/graph/GraphTest.java
+++ b/java/server/src/test/java/org/softwareheritage/graph/GraphTest.java
@@ -1,30 +1,30 @@
package org.softwareheritage.graph;
import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Collection;
import org.junit.Assert;
import org.junit.BeforeClass;
import static org.hamcrest.collection.IsIterableContainingInAnyOrder.containsInAnyOrder;
import org.softwareheritage.graph.Graph;
public class GraphTest {
static Graph graph;
public static void assertEqualsAnyOrder(Collection expecteds, Collection actuals) {
Assert.assertThat(expecteds, containsInAnyOrder(actuals.toArray()));
}
@BeforeClass
public static void setUp() throws IOException {
- Path graphPath = Paths.get("src", "test", "dataset", "output", "example");
+ Path graphPath = Paths.get("..", "..", "tests", "dataset", "output", "example");
graph = new Graph(graphPath.toString());
}
public Graph getGraph() {
return graph;
}
}
diff --git a/requirements.txt b/requirements.txt
index f0777c3..3f9470a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,4 @@
aiohttp
click
vcversioner
+py4j
diff --git a/setup.py b/setup.py
index ea4133f..751640d 100755
--- a/setup.py
+++ b/setup.py
@@ -1,71 +1,75 @@
#!/usr/bin/env python3
# Copyright (C) 2015-2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from setuptools import setup, find_packages
from os import path
from io import open
+from glob import glob
here = path.abspath(path.dirname(__file__))
# Get the long description from the README file
with open(path.join(here, 'README.md'), encoding='utf-8') as f:
long_description = f.read()
def parse_requirements(name=None):
if name:
reqf = 'requirements-%s.txt' % name
else:
reqf = 'requirements.txt'
requirements = []
if not path.exists(reqf):
return requirements
with open(reqf) as f:
for line in f.readlines():
line = line.strip()
if not line or line.startswith('#'):
continue
requirements.append(line)
return requirements
+JAR_PATHS = list(glob('java/server/target/swh-graph-*.jar'))
+
setup(
name='swh.graph',
description='Software Heritage graph service',
long_description=long_description,
long_description_content_type='text/markdown',
author='Software Heritage developers',
author_email='swh-devel@inria.fr',
url='https://forge.softwareheritage.org/diffusion/DGRPH',
packages=find_packages(),
install_requires=parse_requirements() + parse_requirements('swh'),
tests_require=parse_requirements('test'),
setup_requires=['vcversioner'],
extras_require={'testing': parse_requirements('test')},
vcversioner={},
include_package_data=True,
+ data_files=[('share/swh-graph', JAR_PATHS)],
entry_points='''
[console_scripts]
swh-graph=swh.graph.cli:main
[swh.cli.subcommands]
graph=swh.graph.cli:cli
''',
classifiers=[
"Programming Language :: Python :: 3",
"Intended Audience :: Developers",
"License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
"Operating System :: OS Independent",
"Development Status :: 3 - Alpha",
],
project_urls={
'Bug Reports': 'https://forge.softwareheritage.org/maniphest',
'Funding': 'https://www.softwareheritage.org/donate',
'Source': 'https://forge.softwareheritage.org/source/swh-graph',
},
)
diff --git a/swh/graph/cli.py b/swh/graph/cli.py
index 329cea8..c0f5626 100644
--- a/swh/graph/cli.py
+++ b/swh/graph/cli.py
@@ -1,122 +1,150 @@
# Copyright (C) 2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+import aiohttp
import click
import sys
from swh.core.cli import CONTEXT_SETTINGS, AliasedGroup
from swh.graph import client
from swh.graph.pid import PidToIntMap, IntToPidMap
+from swh.graph.server.app import make_app
+from swh.graph.server.backend import Backend
@click.group(name='graph', context_settings=CONTEXT_SETTINGS,
cls=AliasedGroup)
@click.pass_context
def cli(ctx):
"""Software Heritage graph tools."""
ctx.ensure_object(dict)
@cli.command('api-client')
@click.option('--host', default='localhost', help='Graph server host')
@click.option('--port', default='5009', help='Graph server port')
@click.pass_context
def api_client(ctx, host, port):
"""Client for the Software Heritage Graph REST service
"""
url = 'http://{}:{}'.format(host, port)
app = client.RemoteGraphClient(url)
# TODO: run web app
print(app.stats())
@cli.group('map')
@click.pass_context
def map(ctx):
"""Manage swh-graph on-disk maps"""
pass
def dump_pid2int(filename):
for (pid, int) in PidToIntMap(filename):
print('{}\t{}'.format(pid, int))
def dump_int2pid(filename):
for (int, pid) in IntToPidMap(filename):
print('{}\t{}'.format(int, pid))
def restore_pid2int(filename):
"""read a textual PID->int map from stdin and write its binary version to
filename
"""
with open(filename, 'wb') as dst:
for line in sys.stdin:
(str_pid, str_int) = line.split()
PidToIntMap.write_record(dst, str_pid, int(str_int))
def restore_int2pid(filename, length):
"""read a textual int->PID map from stdin and write its binary version to
filename
"""
int2pid = IntToPidMap(filename, mode='wb', length=length)
for line in sys.stdin:
(str_int, str_pid) = line.split()
int2pid[int(str_int)] = str_pid
int2pid.close()
@map.command('dump')
@click.option('--type', '-t', 'map_type', required=True,
type=click.Choice(['pid2int', 'int2pid']),
help='type of map to dump')
@click.argument('filename', required=True, type=click.Path(exists=True))
@click.pass_context
def dump_map(ctx, map_type, filename):
"""dump a binary PID<->int map to textual format"""
if map_type == 'pid2int':
dump_pid2int(filename)
elif map_type == 'int2pid':
dump_int2pid(filename)
else:
raise ValueError('invalid map type: ' + map_type)
pass
@map.command('restore')
@click.option('--type', '-t', 'map_type', required=True,
type=click.Choice(['pid2int', 'int2pid']),
help='type of map to dump')
@click.option('--length', '-l', type=int,
help='''map size in number of logical records
(required for int2pid maps)''')
@click.argument('filename', required=True, type=click.Path())
@click.pass_context
def restore_map(ctx, map_type, length, filename):
"""restore a binary PID<->int map from textual format"""
if map_type == 'pid2int':
- restore_pid2int(filename, length)
+ restore_pid2int(filename)
elif map_type == 'int2pid':
if length is None:
raise click.UsageError(
'map length is required when restoring {} maps'.format(
map_type), ctx)
restore_int2pid(filename, length)
else:
raise ValueError('invalid map type: ' + map_type)
+@cli.group('graph')
+@click.pass_context
+def graph(ctx):
+ """Manage swh-graph on-disk maps"""
+ pass
+
+
+@graph.command(name='rpc-serve')
+@click.option('--host', default='0.0.0.0',
+ metavar='IP', show_default=True,
+ help="Host ip address to bind the server on")
+@click.option('--port', default=5009, type=click.INT,
+ metavar='PORT', show_default=True,
+ help="Binding port of the server")
+@click.option('--graph', required=True, metavar='GRAPH',
+ help="Path prefix of the graph to load")
+@click.pass_context
+def serve(ctx, host, port, graph):
+ backend = Backend(graph_path=graph)
+ app = make_app(backend=backend)
+
+ with backend:
+ aiohttp.web.run_app(app, host=host, port=port)
+
+
def main():
return cli(auto_envvar_prefix='SWH_GRAPH')
if __name__ == '__main__':
main()
diff --git a/swh/graph/client.py b/swh/graph/client.py
index ac66da2..a2a8095 100644
--- a/swh/graph/client.py
+++ b/swh/graph/client.py
@@ -1,62 +1,82 @@
# Copyright (C) 2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+import json
+
from swh.core.api import RPCClient
class GraphAPIError(Exception):
"""Graph API Error"""
def __str__(self):
return ('An unexpected error occurred in the Graph backend: {}'
.format(self.args))
class RemoteGraphClient(RPCClient):
"""Client to the Software Heritage Graph."""
def __init__(self, url, timeout=None):
super().__init__(
api_exception=GraphAPIError, url=url, timeout=timeout)
- # Web API endpoints
+ def raw_verb_lines(self, verb, endpoint, **kwargs):
+ response = self.raw_verb(verb, endpoint, stream=True, **kwargs)
+ for line in response.iter_lines():
+ yield line.decode().lstrip('\n')
- def leaves(self, src, edges="*", direction="forward"):
- return self.get('leaves/{}'.format(src),
- params={
- 'edges': edges,
- 'direction': direction
- })
+ def get_lines(self, endpoint, **kwargs):
+ yield from self.raw_verb_lines('get', endpoint, **kwargs)
- def neighbors(self, src, edges="*", direction="forward"):
- return self.get('neighbors/{}'.format(src),
- params={
- 'edges': edges,
- 'direction': direction
- })
+ # Web API endpoints
def stats(self):
return self.get('stats')
+ def leaves(self, src, edges="*", direction="forward"):
+ return self.get_lines(
+ 'leaves/{}'.format(src),
+ params={
+ 'edges': edges,
+ 'direction': direction
+ })
+
+ def neighbors(self, src, edges="*", direction="forward"):
+ return self.get_lines(
+ 'neighbors/{}'.format(src),
+ params={
+ 'edges': edges,
+ 'direction': direction
+ })
+
def visit_nodes(self, src, edges="*", direction="forward"):
- return self.get('visit/nodes/{}'.format(src),
- params={
- 'edges': edges,
- 'direction': direction
- })
+ return self.get_lines(
+ 'visit/nodes/{}'.format(src),
+ params={
+ 'edges': edges,
+ 'direction': direction
+ })
def visit_paths(self, src, edges="*", direction="forward"):
- return self.get('visit/paths/{}'.format(src),
- params={
- 'edges': edges,
- 'direction': direction
- })
+ def decode_path_wrapper(it):
+ for e in it:
+ yield json.loads(e)
+
+ return decode_path_wrapper(
+ self.get_lines(
+ 'visit/paths/{}'.format(src),
+ params={
+ 'edges': edges,
+ 'direction': direction
+ }))
def walk(self, src, dst, edges="*", traversal="dfs", direction="forward"):
- return self.get('walk/{}/{}'.format(src, dst),
- params={
- 'edges': edges,
- 'traversal': traversal,
- 'direction': direction
- })
+ return self.get_lines(
+ 'walk/{}/{}'.format(src, dst),
+ params={
+ 'edges': edges,
+ 'traversal': traversal,
+ 'direction': direction
+ })
diff --git a/swh/graph/server/__init__.py b/swh/graph/server/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/swh/graph/server/app.py b/swh/graph/server/app.py
new file mode 100644
index 0000000..21b9dbc
--- /dev/null
+++ b/swh/graph/server/app.py
@@ -0,0 +1,102 @@
+# Copyright (C) 2019 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+"""
+A proxy HTTP server for swh-graph, talking to the Java code via py4j, and using
+FIFO as a transport to stream integers between the two languages.
+"""
+
+import contextlib
+import aiohttp.web
+
+from swh.core.api.asynchronous import RPCServerApp
+
+
+@contextlib.asynccontextmanager
+async def stream_response(request, *args, **kwargs):
+ response = aiohttp.web.StreamResponse(*args, **kwargs)
+ await response.prepare(request)
+ yield response
+ await response.write_eof()
+
+
+async def index(request):
+ return aiohttp.web.Response(
+ content_type='text/html',
+ body="""
+Software Heritage storage server
+
+You have reached the
+Software Heritage graph API server.
+
+See its
+API
+documentation for more information.
+
+""")
+
+
+async def stats(request):
+ stats = request.app['backend'].stats()
+ return aiohttp.web.Response(body=stats, content_type='application/json')
+
+
+def get_simple_traversal_handler(ttype):
+ async def simple_traversal(request):
+ src = request.match_info['src']
+ edges = request.query.get('edges', '*')
+ direction = request.query.get('direction', 'forward')
+
+ async with stream_response(request) as response:
+ async for res_pid in request.app['backend'].simple_traversal(
+ ttype, direction, edges, src
+ ):
+ await response.write('{}\n'.format(res_pid).encode())
+ return response
+
+ return simple_traversal
+
+
+async def walk(request):
+ src = request.match_info['src']
+ dst = request.match_info['dst']
+ edges = request.query.get('edges', '*')
+ direction = request.query.get('direction', 'forward')
+ algo = request.query.get('traversal', 'dfs')
+
+ it = request.app['backend'].walk(direction, edges, algo, src, dst)
+ async with stream_response(request) as response:
+ async for res_pid in it:
+ await response.write('{}\n'.format(res_pid).encode())
+ return response
+
+
+async def visit_paths(request):
+ src = request.match_info['src']
+ edges = request.query.get('edges', '*')
+ direction = request.query.get('direction', 'forward')
+
+ it = request.app['backend'].visit_paths(direction, edges, src)
+ async with stream_response(request) as response:
+ async for res_pid in it:
+ await response.write('{}\n'.format(res_pid).encode())
+ return response
+
+
+def make_app(backend, **kwargs):
+ app = RPCServerApp(**kwargs)
+ app.router.add_route('GET', '/', index)
+ app.router.add_route('GET', '/graph/stats', stats)
+ app.router.add_route('GET', '/graph/leaves/{src}',
+ get_simple_traversal_handler('leaves'))
+ app.router.add_route('GET', '/graph/neighbors/{src}',
+ get_simple_traversal_handler('neighbors'))
+ app.router.add_route('GET', '/graph/visit/nodes/{src}',
+ get_simple_traversal_handler('visit_nodes'))
+ app.router.add_route('GET', '/graph/visit/paths/{src}', visit_paths)
+ app.router.add_route('GET', '/graph/walk/{src}/{dst}', walk)
+
+ app['backend'] = backend
+ return app
diff --git a/swh/graph/server/backend.py b/swh/graph/server/backend.py
new file mode 100644
index 0000000..0aecc14
--- /dev/null
+++ b/swh/graph/server/backend.py
@@ -0,0 +1,161 @@
+# Copyright (C) 2019 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import asyncio
+import contextlib
+import json
+import os
+import pathlib
+import struct
+import sys
+import tempfile
+
+from py4j.java_gateway import JavaGateway
+
+from swh.graph.pid import IntToPidMap, PidToIntMap
+from swh.model.identifiers import PID_TYPES
+
+BUF_SIZE = 64*1024
+BIN_FMT = '>q' # 64 bit integer, big endian
+PATH_SEPARATOR_ID = -1
+NODE2PID_EXT = 'node2pid.bin'
+PID2NODE_EXT = 'pid2node.bin'
+
+
+def find_graph_jar():
+ swh_graph_root = pathlib.Path(__file__).parents[3]
+ try_paths = [
+ swh_graph_root / 'java/server/target/',
+ pathlib.Path(sys.prefix) / 'share/swh-graph/',
+ ]
+ for path in try_paths:
+ glob = list(path.glob('swh-graph-*.jar'))
+ if glob:
+ return str(glob[0])
+ raise RuntimeError("swh-graph-*.jar not found. Have you run `make java`?")
+
+
+class Backend:
+ def __init__(self, graph_path):
+ self.gateway = None
+ self.entry = None
+ self.graph_path = graph_path
+
+ def __enter__(self):
+ self.gateway = JavaGateway.launch_gateway(
+ java_path=None,
+ classpath=find_graph_jar(),
+ die_on_exit=True,
+ redirect_stdout=sys.stdout,
+ redirect_stderr=sys.stderr,
+ )
+ self.entry = self.gateway.jvm.org.softwareheritage.graph.Entry()
+ self.entry.load_graph(self.graph_path)
+ self.node2pid = IntToPidMap(self.graph_path + '.' + NODE2PID_EXT)
+ self.pid2node = PidToIntMap(self.graph_path + '.' + PID2NODE_EXT)
+ self.stream_proxy = JavaStreamProxy(self.entry)
+
+ def __exit__(self, exc_type, exc_value, tb):
+ self.gateway.shutdown()
+
+ def stats(self):
+ return self.entry.stats()
+
+ async def simple_traversal(self, ttype, direction, edges_fmt, src):
+ assert ttype in ('leaves', 'neighbors', 'visit_nodes', 'visit_paths')
+ src_id = self.pid2node[src]
+ method = getattr(self.stream_proxy, ttype)
+ async for node_id in method(direction, edges_fmt, src_id):
+ if node_id == PATH_SEPARATOR_ID:
+ yield None
+ else:
+ yield self.node2pid[node_id]
+
+ async def walk(self, direction, edges_fmt, algo, src, dst):
+ src_id = self.pid2node[src]
+ if dst in PID_TYPES:
+ it = self.stream_proxy.walk_type(direction, edges_fmt, algo,
+ src_id, dst)
+ else:
+ dst_id = self.pid2node[dst]
+ it = self.stream_proxy.walk(direction, edges_fmt, algo,
+ src_id, dst_id)
+
+ async for node_id in it:
+ yield self.node2pid[node_id]
+
+ async def visit_paths(self, *args):
+ buffer = []
+ async for res_pid in self.simple_traversal('visit_paths', *args):
+ if res_pid is None: # Path separator, flush
+ yield json.dumps(buffer)
+ buffer = []
+ else:
+ buffer.append(res_pid)
+
+
+class JavaStreamProxy:
+ """A proxy class for the org.softwareheritage.graph.Entry Java class that
+ takes care of the setup and teardown of the named-pipe FIFO communication
+ between Python and Java.
+
+ Initialize JavaStreamProxy using:
+
+ proxy = JavaStreamProxy(swh_entry_class_instance)
+
+ Then you can call an Entry method and iterate on the FIFO results like
+ this:
+
+ async for value in proxy.java_method(arg1, arg2):
+ print(value)
+ """
+
+ def __init__(self, entry):
+ self.entry = entry
+
+ async def read_node_ids(self, fname):
+ loop = asyncio.get_event_loop()
+ with (await loop.run_in_executor(None, open, fname, 'rb')) as f:
+ while True:
+ data = await loop.run_in_executor(None, f.read, BUF_SIZE)
+ if not data:
+ break
+ for data in struct.iter_unpack(BIN_FMT, data):
+ yield data[0]
+
+ class _HandlerWrapper:
+ def __init__(self, handler):
+ self._handler = handler
+
+ def __getattr__(self, name):
+ func = getattr(self._handler, name)
+
+ async def java_call(*args, **kwargs):
+ loop = asyncio.get_event_loop()
+ await loop.run_in_executor(None, lambda: func(*args, **kwargs))
+
+ def java_task(*args, **kwargs):
+ return asyncio.create_task(java_call(*args, **kwargs))
+
+ return java_task
+
+ @contextlib.contextmanager
+ def get_handler(self):
+ with tempfile.TemporaryDirectory(prefix='swh-graph-') as tmpdirname:
+ cli_fifo = os.path.join(tmpdirname, 'swh-graph.fifo')
+ os.mkfifo(cli_fifo)
+ reader = self.read_node_ids(cli_fifo)
+ query_handler = self.entry.get_handler(cli_fifo)
+ handler = self._HandlerWrapper(query_handler)
+ yield (handler, reader)
+
+ def __getattr__(self, name):
+ async def java_call_iterator(*args, **kwargs):
+ with self.get_handler() as (handler, reader):
+ java_task = getattr(handler, name)(*args, **kwargs)
+ async for value in reader:
+ yield value
+ await java_task
+ return java_call_iterator
diff --git a/swh/graph/tests/conftest.py b/swh/graph/tests/conftest.py
new file mode 100644
index 0000000..92ace00
--- /dev/null
+++ b/swh/graph/tests/conftest.py
@@ -0,0 +1,40 @@
+import multiprocessing
+import pytest
+from pathlib import Path
+
+from aiohttp.test_utils import TestServer, TestClient, loop_context
+
+from swh.graph.client import RemoteGraphClient
+from swh.graph.server.backend import Backend
+from swh.graph.server.app import make_app
+
+SWH_GRAPH_ROOT = Path(__file__).parents[3]
+TEST_GRAPH_PATH = SWH_GRAPH_ROOT / 'tests/dataset/output/example'
+
+
+class GraphServerProcess(multiprocessing.Process):
+ def __init__(self, q, *args, **kwargs):
+ self.q = q
+ super().__init__(*args, **kwargs)
+
+ def run(self):
+ backend = Backend(graph_path=str(TEST_GRAPH_PATH))
+ with backend:
+ with loop_context() as loop:
+ self.loop = loop
+ app = make_app(backend=backend)
+ client = TestClient(TestServer(app), loop=loop)
+ loop.run_until_complete(client.start_server())
+ url = client.make_url('/graph/')
+ self.q.put(url)
+ loop.run_forever()
+
+
+@pytest.fixture(scope="module")
+def graph_client():
+ queue = multiprocessing.Queue()
+ server = GraphServerProcess(queue)
+ server.start()
+ url = queue.get()
+ yield RemoteGraphClient(str(url))
+ server.terminate()
diff --git a/swh/graph/tests/test_api_client.py b/swh/graph/tests/test_api_client.py
index b49ee81..bbf55a2 100644
--- a/swh/graph/tests/test_api_client.py
+++ b/swh/graph/tests/test_api_client.py
@@ -1,190 +1,104 @@
-from pathlib import Path
-from urllib.request import urlopen
-import subprocess
-import time
-
-import aiohttp.test_utils
-import pytest
-
-from swh.graph.client import RemoteGraphClient
-from swh.graph.tests import SWH_GRAPH_VERSION
-
-
-@pytest.fixture(scope='module')
-def graph_client():
- swh_graph_root = Path(__file__).parents[3]
- java_dir = swh_graph_root / 'java/server'
-
- # Compile Java server using maven
- pom_path = java_dir / 'pom.xml'
- subprocess.run(
- ['mvn', '-f', str(pom_path), 'compile', 'assembly:single'], check=True)
-
- port = aiohttp.test_utils.unused_port()
-
- # Start Java server
- jar_file = 'swh-graph-{}-jar-with-dependencies.jar'.format(
- SWH_GRAPH_VERSION)
- jar_path = java_dir / 'target' / jar_file
- graph_path = java_dir / 'src/test/dataset/output/example'
- server = subprocess.Popen([
- 'java', '-cp', str(jar_path),
- 'org.softwareheritage.graph.App', str(graph_path), '-p', str(port)
- ])
-
- # Wait max 5 seconds for server to spawn
- localhost = 'http://0.0.0.0:{}'.format(port)
- i = 0
- while i < 20:
- try:
- urlopen(localhost)
- except Exception:
- i += 1
- time.sleep(0.25)
- else:
- break
-
- # Start Python client
- client = RemoteGraphClient(localhost)
-
- yield client
-
- print('Service teardown')
- server.kill()
-
-
-class TestEndpoints:
- @pytest.fixture(autouse=True)
- def init_graph_client(self, graph_client):
- self.client = graph_client
-
- @staticmethod
- def assert_endpoint_output(actual, expected):
- assert set(actual.keys()) == {'result', 'meta'}
- assert set(actual['result']) == set(expected['result'])
- assert actual['meta'] == expected['meta']
-
- def test_leaves(self):
- actual = self.client.leaves(
- 'swh:1:ori:0000000000000000000000000000000000000021'
- )
- expected = {
- 'result': [
- 'swh:1:cnt:0000000000000000000000000000000000000001',
- 'swh:1:cnt:0000000000000000000000000000000000000004',
- 'swh:1:cnt:0000000000000000000000000000000000000005',
- 'swh:1:cnt:0000000000000000000000000000000000000007'
- ],
- 'meta': {
- 'nb_edges_accessed': 13
- }
- }
- TestEndpoints.assert_endpoint_output(actual, expected)
-
- def test_neighbors(self):
- actual = self.client.neighbors(
+def test_stats(graph_client):
+ stats = graph_client.stats()
+
+ assert set(stats.keys()) == {'counts', 'ratios', 'indegree',
+ 'outdegree'}
+
+ assert set(stats['counts'].keys()) == {'nodes', 'edges'}
+ assert set(stats['ratios'].keys()) == {'compression', 'bits_per_node',
+ 'bits_per_edge', 'avg_locality'}
+ assert set(stats['indegree'].keys()) == {'min', 'max', 'avg'}
+ assert set(stats['outdegree'].keys()) == {'min', 'max', 'avg'}
+
+ assert stats['counts']['nodes'] == 21
+ assert stats['counts']['edges'] == 23
+ assert isinstance(stats['ratios']['compression'], float)
+ assert isinstance(stats['ratios']['bits_per_node'], float)
+ assert isinstance(stats['ratios']['bits_per_edge'], float)
+ assert isinstance(stats['ratios']['avg_locality'], float)
+ assert stats['indegree']['min'] == 0
+ assert stats['indegree']['max'] == 3
+ assert isinstance(stats['indegree']['avg'], float)
+ assert stats['outdegree']['min'] == 0
+ assert stats['outdegree']['max'] == 3
+ assert isinstance(stats['outdegree']['avg'], float)
+
+
+def test_leaves(graph_client):
+ actual = list(graph_client.leaves(
+ 'swh:1:ori:0000000000000000000000000000000000000021'
+ ))
+ expected = [
+ 'swh:1:cnt:0000000000000000000000000000000000000001',
+ 'swh:1:cnt:0000000000000000000000000000000000000004',
+ 'swh:1:cnt:0000000000000000000000000000000000000005',
+ 'swh:1:cnt:0000000000000000000000000000000000000007'
+ ]
+ assert set(actual) == set(expected)
+
+
+def test_neighbors(graph_client):
+ actual = list(graph_client.neighbors(
+ 'swh:1:rev:0000000000000000000000000000000000000009',
+ direction='backward'
+ ))
+ expected = [
+ 'swh:1:snp:0000000000000000000000000000000000000020',
+ 'swh:1:rel:0000000000000000000000000000000000000010',
+ 'swh:1:rev:0000000000000000000000000000000000000013'
+ ]
+ assert set(actual) == set(expected)
+
+
+def test_visit_nodes(graph_client):
+ actual = list(graph_client.visit_nodes(
+ 'swh:1:rel:0000000000000000000000000000000000000010',
+ edges='rel:rev,rev:rev'
+ ))
+ expected = [
+ 'swh:1:rel:0000000000000000000000000000000000000010',
+ 'swh:1:rev:0000000000000000000000000000000000000009',
+ 'swh:1:rev:0000000000000000000000000000000000000003'
+ ]
+ assert set(actual) == set(expected)
+
+
+def test_visit_paths(graph_client):
+ actual = list(graph_client.visit_paths(
+ 'swh:1:snp:0000000000000000000000000000000000000020',
+ edges='snp:*,rev:*'))
+ actual = [tuple(path) for path in actual]
+ expected = [
+ (
+ 'swh:1:snp:0000000000000000000000000000000000000020',
'swh:1:rev:0000000000000000000000000000000000000009',
- direction='backward'
- )
- expected = {
- 'result': [
- 'swh:1:snp:0000000000000000000000000000000000000020',
- 'swh:1:rel:0000000000000000000000000000000000000010',
- 'swh:1:rev:0000000000000000000000000000000000000013'
- ],
- 'meta': {
- 'nb_edges_accessed': 3
- }
- }
- TestEndpoints.assert_endpoint_output(actual, expected)
-
- def test_stats(self):
- stats = self.client.stats()
-
- assert set(stats.keys()) == {'counts', 'ratios', 'indegree',
- 'outdegree'}
-
- assert set(stats['counts'].keys()) == {'nodes', 'edges'}
- assert set(stats['ratios'].keys()) == {'compression', 'bits_per_node',
- 'bits_per_edge', 'avg_locality'}
- assert set(stats['indegree'].keys()) == {'min', 'max', 'avg'}
- assert set(stats['outdegree'].keys()) == {'min', 'max', 'avg'}
-
- assert stats['counts']['nodes'] == 21
- assert stats['counts']['edges'] == 23
- assert isinstance(stats['ratios']['compression'], float)
- assert isinstance(stats['ratios']['bits_per_node'], float)
- assert isinstance(stats['ratios']['bits_per_edge'], float)
- assert isinstance(stats['ratios']['avg_locality'], float)
- assert stats['indegree']['min'] == 0
- assert stats['indegree']['max'] == 3
- assert isinstance(stats['indegree']['avg'], float)
- assert stats['outdegree']['min'] == 0
- assert stats['outdegree']['max'] == 3
- assert isinstance(stats['outdegree']['avg'], float)
-
- def test_visit_nodes(self):
- actual = self.client.visit_nodes(
- 'swh:1:rel:0000000000000000000000000000000000000010',
- edges='rel:rev,rev:rev'
- )
- expected = {
- 'result': [
- 'swh:1:rel:0000000000000000000000000000000000000010',
- 'swh:1:rev:0000000000000000000000000000000000000009',
- 'swh:1:rev:0000000000000000000000000000000000000003'
- ],
- 'meta': {
- 'nb_edges_accessed': 4
- }
- }
- TestEndpoints.assert_endpoint_output(actual, expected)
-
- def test_visit_paths(self):
- actual = self.client.visit_paths(
- 'swh:1:snp:0000000000000000000000000000000000000020',
- edges='snp:*,rev:*')
- actual['result'] = [tuple(path) for path in actual['result']]
- expected = {
- 'result': [
- (
- 'swh:1:snp:0000000000000000000000000000000000000020',
- 'swh:1:rev:0000000000000000000000000000000000000009',
- 'swh:1:rev:0000000000000000000000000000000000000003',
- 'swh:1:dir:0000000000000000000000000000000000000002'
- ),
- (
- 'swh:1:snp:0000000000000000000000000000000000000020',
- 'swh:1:rev:0000000000000000000000000000000000000009',
- 'swh:1:dir:0000000000000000000000000000000000000008'
- ),
- (
- 'swh:1:snp:0000000000000000000000000000000000000020',
- 'swh:1:rel:0000000000000000000000000000000000000010'
- )
- ],
- 'meta': {
- 'nb_edges_accessed': 10
- }
- }
- TestEndpoints.assert_endpoint_output(actual, expected)
-
- def test_walk(self):
- actual = self.client.walk(
- 'swh:1:dir:0000000000000000000000000000000000000016', 'rel',
- edges='dir:dir,dir:rev,rev:*',
- direction='backward',
- traversal='bfs'
+ 'swh:1:rev:0000000000000000000000000000000000000003',
+ 'swh:1:dir:0000000000000000000000000000000000000002'
+ ),
+ (
+ 'swh:1:snp:0000000000000000000000000000000000000020',
+ 'swh:1:rev:0000000000000000000000000000000000000009',
+ 'swh:1:dir:0000000000000000000000000000000000000008'
+ ),
+ (
+ 'swh:1:snp:0000000000000000000000000000000000000020',
+ 'swh:1:rel:0000000000000000000000000000000000000010'
)
- expected = {
- 'result': [
- 'swh:1:dir:0000000000000000000000000000000000000016',
- 'swh:1:dir:0000000000000000000000000000000000000017',
- 'swh:1:rev:0000000000000000000000000000000000000018',
- 'swh:1:rel:0000000000000000000000000000000000000019'
- ],
- 'meta': {
- 'nb_edges_accessed': 3
- }
- }
- TestEndpoints.assert_endpoint_output(actual, expected)
+ ]
+ assert set(actual) == set(expected)
+
+
+def test_walk(graph_client):
+ actual = list(graph_client.walk(
+ 'swh:1:dir:0000000000000000000000000000000000000016', 'rel',
+ edges='dir:dir,dir:rev,rev:*',
+ direction='backward',
+ traversal='bfs'
+ ))
+ expected = [
+ 'swh:1:dir:0000000000000000000000000000000000000016',
+ 'swh:1:dir:0000000000000000000000000000000000000017',
+ 'swh:1:rev:0000000000000000000000000000000000000018',
+ 'swh:1:rel:0000000000000000000000000000000000000019'
+ ]
+ assert set(actual) == set(expected)
diff --git a/java/server/src/test/dataset/.gitignore b/tests/dataset/.gitignore
similarity index 100%
rename from java/server/src/test/dataset/.gitignore
rename to tests/dataset/.gitignore
diff --git a/java/server/src/test/dataset/example.edges.csv b/tests/dataset/example.edges.csv
similarity index 100%
rename from java/server/src/test/dataset/example.edges.csv
rename to tests/dataset/example.edges.csv
diff --git a/java/server/src/test/dataset/example.edges.csv.gz b/tests/dataset/example.edges.csv.gz
similarity index 100%
rename from java/server/src/test/dataset/example.edges.csv.gz
rename to tests/dataset/example.edges.csv.gz
diff --git a/java/server/src/test/dataset/example.nodes.csv b/tests/dataset/example.nodes.csv
similarity index 100%
rename from java/server/src/test/dataset/example.nodes.csv
rename to tests/dataset/example.nodes.csv
diff --git a/java/server/src/test/dataset/example.nodes.csv.gz b/tests/dataset/example.nodes.csv.gz
similarity index 100%
rename from java/server/src/test/dataset/example.nodes.csv.gz
rename to tests/dataset/example.nodes.csv.gz
diff --git a/java/server/src/test/dataset/generate_graph.sh b/tests/dataset/generate_graph.sh
similarity index 100%
rename from java/server/src/test/dataset/generate_graph.sh
rename to tests/dataset/generate_graph.sh
diff --git a/java/server/src/test/dataset/img/.gitignore b/tests/dataset/img/.gitignore
similarity index 100%
rename from java/server/src/test/dataset/img/.gitignore
rename to tests/dataset/img/.gitignore
diff --git a/java/server/src/test/dataset/img/Makefile b/tests/dataset/img/Makefile
similarity index 100%
rename from java/server/src/test/dataset/img/Makefile
rename to tests/dataset/img/Makefile
diff --git a/java/server/src/test/dataset/img/example.dot b/tests/dataset/img/example.dot
similarity index 100%
rename from java/server/src/test/dataset/img/example.dot
rename to tests/dataset/img/example.dot
diff --git a/java/server/src/test/dataset/output/example-transposed.graph b/tests/dataset/output/example-transposed.graph
similarity index 100%
rename from java/server/src/test/dataset/output/example-transposed.graph
rename to tests/dataset/output/example-transposed.graph
diff --git a/java/server/src/test/dataset/output/example-transposed.obl b/tests/dataset/output/example-transposed.obl
similarity index 100%
rename from java/server/src/test/dataset/output/example-transposed.obl
rename to tests/dataset/output/example-transposed.obl
diff --git a/java/server/src/test/dataset/output/example-transposed.offsets b/tests/dataset/output/example-transposed.offsets
similarity index 100%
rename from java/server/src/test/dataset/output/example-transposed.offsets
rename to tests/dataset/output/example-transposed.offsets
diff --git a/java/server/src/test/dataset/output/example-transposed.properties b/tests/dataset/output/example-transposed.properties
similarity index 100%
rename from java/server/src/test/dataset/output/example-transposed.properties
rename to tests/dataset/output/example-transposed.properties
diff --git a/java/server/src/test/dataset/output/example.graph b/tests/dataset/output/example.graph
similarity index 100%
rename from java/server/src/test/dataset/output/example.graph
rename to tests/dataset/output/example.graph
diff --git a/java/server/src/test/dataset/output/example.indegree b/tests/dataset/output/example.indegree
similarity index 100%
rename from java/server/src/test/dataset/output/example.indegree
rename to tests/dataset/output/example.indegree
diff --git a/java/server/src/test/dataset/output/example.mph b/tests/dataset/output/example.mph
similarity index 100%
rename from java/server/src/test/dataset/output/example.mph
rename to tests/dataset/output/example.mph
diff --git a/tests/dataset/output/example.node2pid.bin b/tests/dataset/output/example.node2pid.bin
new file mode 100644
index 0000000..7755043
Binary files /dev/null and b/tests/dataset/output/example.node2pid.bin differ
diff --git a/java/server/src/test/dataset/output/example.node2pid.csv b/tests/dataset/output/example.node2pid.csv
similarity index 100%
rename from java/server/src/test/dataset/output/example.node2pid.csv
rename to tests/dataset/output/example.node2pid.csv
diff --git a/java/server/src/test/dataset/output/example.node2type.map b/tests/dataset/output/example.node2type.map
similarity index 100%
rename from java/server/src/test/dataset/output/example.node2type.map
rename to tests/dataset/output/example.node2type.map
diff --git a/java/server/src/test/dataset/output/example.obl b/tests/dataset/output/example.obl
similarity index 100%
rename from java/server/src/test/dataset/output/example.obl
rename to tests/dataset/output/example.obl
diff --git a/java/server/src/test/dataset/output/example.offsets b/tests/dataset/output/example.offsets
similarity index 100%
rename from java/server/src/test/dataset/output/example.offsets
rename to tests/dataset/output/example.offsets
diff --git a/java/server/src/test/dataset/output/example.order b/tests/dataset/output/example.order
similarity index 100%
rename from java/server/src/test/dataset/output/example.order
rename to tests/dataset/output/example.order
diff --git a/java/server/src/test/dataset/output/example.outdegree b/tests/dataset/output/example.outdegree
similarity index 100%
rename from java/server/src/test/dataset/output/example.outdegree
rename to tests/dataset/output/example.outdegree
diff --git a/tests/dataset/output/example.pid2node.bin b/tests/dataset/output/example.pid2node.bin
new file mode 100644
index 0000000..753264c
Binary files /dev/null and b/tests/dataset/output/example.pid2node.bin differ
diff --git a/java/server/src/test/dataset/output/example.pid2node.csv b/tests/dataset/output/example.pid2node.csv
similarity index 100%
rename from java/server/src/test/dataset/output/example.pid2node.csv
rename to tests/dataset/output/example.pid2node.csv
diff --git a/java/server/src/test/dataset/output/example.properties b/tests/dataset/output/example.properties
similarity index 100%
rename from java/server/src/test/dataset/output/example.properties
rename to tests/dataset/output/example.properties
diff --git a/java/server/src/test/dataset/output/example.stats b/tests/dataset/output/example.stats
similarity index 100%
rename from java/server/src/test/dataset/output/example.stats
rename to tests/dataset/output/example.stats
diff --git a/tox.ini b/tox.ini
index 1eb5527..c52988e 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,24 +1,27 @@
[tox]
envlist=flake8,mypy,py3
[testenv:py3]
deps =
.[testing]
pytest-cov
+whitelist_externals = mvn
commands =
+ mvn -f java/server/pom.xml compile assembly:single
pytest --cov=swh --cov-branch {posargs}
[testenv:flake8]
skip_install = true
deps =
flake8
commands =
{envpython} -m flake8
[testenv:mypy]
skip_install = true
+ignore_missing_imports = true
deps =
.[testing]
mypy
commands =
mypy swh