diff --git a/Makefile.local b/Makefile.local new file mode 100644 index 0000000..3b72b8e --- /dev/null +++ b/Makefile.local @@ -0,0 +1,6 @@ +POM_PATH=java/server/pom.xml + +java: + mvn -f $(POM_PATH) compile assembly:single + +.PHONY: java diff --git a/java/server/pom.xml b/java/server/pom.xml index 7da3b7c..f5fd58e 100644 --- a/java/server/pom.xml +++ b/java/server/pom.xml @@ -1,151 +1,156 @@ 4.0.0 org.softwareheritage.graph swh-graph 0.0.2 swh-graph https://www.softwareheritage.org/ UTF-8 11 ch.qos.logback logback-classic 1.2.3 junit junit 4.11 test org.hamcrest hamcrest 2.1 test io.javalin javalin 3.0.0 org.slf4j slf4j-simple 1.7.26 com.fasterxml.jackson.core jackson-databind 2.9.8 it.unimi.dsi webgraph-big 3.5.1 it.unimi.dsi fastutil 8.2.2 com.martiansoftware jsap 2.1 + + net.sf.py4j + py4j + 0.10.8.1 + maven-clean-plugin 3.1.0 maven-resources-plugin 3.0.2 maven-compiler-plugin 3.8.0 -verbose -Xlint:all maven-surefire-plugin 2.22.1 maven-jar-plugin 3.0.2 maven-install-plugin 2.5.2 maven-deploy-plugin 2.8.2 maven-site-plugin 3.7.1 maven-project-info-reports-plugin 3.0.0 maven-assembly-plugin org.softwareheritage.graph.App jar-with-dependencies make-assembly package single org.apache.maven.plugins maven-javadoc-plugin 3.1.1 diff --git a/java/server/src/main/java/org/softwareheritage/graph/Entry.java b/java/server/src/main/java/org/softwareheritage/graph/Entry.java new file mode 100644 index 0000000..c666333 --- /dev/null +++ b/java/server/src/main/java/org/softwareheritage/graph/Entry.java @@ -0,0 +1,139 @@ +package org.softwareheritage.graph; + +import java.util.ArrayList; +import java.util.Map; +import java.io.DataOutputStream; +import java.io.FileOutputStream; +import java.io.IOException; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.PropertyNamingStrategy; +import py4j.GatewayServer; + +import org.softwareheritage.graph.Graph; +import org.softwareheritage.graph.Node; +import org.softwareheritage.graph.algo.Stats; +import org.softwareheritage.graph.algo.NodeIdsConsumer; +import org.softwareheritage.graph.algo.Traversal; + +public class Entry { + Graph graph; + + final long PATH_SEPARATOR_ID = -1; + + public void load_graph(String graphBasename) throws IOException { + System.err.println("Loading graph " + graphBasename + " ..."); + this.graph = new Graph(graphBasename); + System.err.println("Graph loaded."); + } + + public String stats() { + try { + Stats stats = new Stats(graph.getPath()); + ObjectMapper objectMapper = new ObjectMapper(); + objectMapper.setPropertyNamingStrategy(PropertyNamingStrategy.SNAKE_CASE); + String res = objectMapper.writeValueAsString(stats); + return res; + } catch (IOException e) { + throw new RuntimeException("Cannot read stats: " + e); + } + } + + public QueryHandler get_handler(String clientFIFO) { + return new QueryHandler(this.graph.copy(), clientFIFO); + } + + public class QueryHandler { + Graph graph; + DataOutputStream out; + String clientFIFO; + + public QueryHandler(Graph graph, String clientFIFO) { + this.graph = graph; + this.clientFIFO = clientFIFO; + this.out = null; + } + + public void writeNode(long nodeId) { + try { + out.writeLong(nodeId); + } catch (IOException e) { + throw new RuntimeException("Cannot write response to client: " + e); + } + } + + public void writePath(ArrayList path) { + for (Long nodeId : path) { + writeNode(nodeId); + } + writeNode(PATH_SEPARATOR_ID); + } + + public void open() { + try { + FileOutputStream file = new FileOutputStream(this.clientFIFO); + this.out = new DataOutputStream(file); + } catch (IOException e) { + throw new RuntimeException("Cannot open client FIFO: " + e); + } + } + + public void close() { + try { + out.close(); + } catch (IOException e) { + throw new RuntimeException("Cannot write response to client: " + e); + } + } + + public void leaves(String direction, String edgesFmt, long srcNodeId) { + open(); + Traversal t = new Traversal(this.graph, direction, edgesFmt); + t.leavesVisitor(srcNodeId, this::writeNode); + close(); + } + + public void neighbors(String direction, String edgesFmt, long srcNodeId) { + open(); + Traversal t = new Traversal(this.graph, direction, edgesFmt); + t.neighborsVisitor(srcNodeId, this::writeNode); + close(); + } + + public void visit_nodes(String direction, String edgesFmt, long srcNodeId) { + open(); + Traversal t = new Traversal(this.graph, direction, edgesFmt); + t.visitNodesVisitor(srcNodeId, this::writeNode); + close(); + } + + public void visit_paths(String direction, String edgesFmt, + long srcNodeId) { + open(); + Traversal t = new Traversal(this.graph, direction, edgesFmt); + t.visitPathsVisitor(srcNodeId, this::writePath); + close(); + } + + public void walk(String direction, String edgesFmt, String algorithm, + long srcNodeId, long dstNodeId) { + open(); + Traversal t = new Traversal(this.graph, direction, edgesFmt); + for (Long nodeId : t.walk(srcNodeId, dstNodeId, algorithm)) { + writeNode(nodeId); + } + close(); + } + + public void walk_type(String direction, String edgesFmt, String algorithm, + long srcNodeId, String dst) { + open(); + Node.Type dstType = Node.Type.fromStr(dst); + Traversal t = new Traversal(this.graph, direction, edgesFmt); + for (Long nodeId : t.walk(srcNodeId, dstType, algorithm)) { + writeNode(nodeId); + } + close(); + } + } +} diff --git a/java/server/src/main/java/org/softwareheritage/graph/algo/NodeIdConsumer.java b/java/server/src/main/java/org/softwareheritage/graph/algo/NodeIdConsumer.java new file mode 100644 index 0000000..fdcd6f1 --- /dev/null +++ b/java/server/src/main/java/org/softwareheritage/graph/algo/NodeIdConsumer.java @@ -0,0 +1,11 @@ +package org.softwareheritage.graph.algo; + +import java.util.function.LongConsumer; + +public interface NodeIdConsumer extends LongConsumer { + + /** Callback for incrementally receiving node identifiers during a graph + * visit. + */ + void accept(long nodeId); +} diff --git a/java/server/src/main/java/org/softwareheritage/graph/algo/NodeIdsConsumer.java b/java/server/src/main/java/org/softwareheritage/graph/algo/NodeIdsConsumer.java new file mode 100644 index 0000000..cc4fd85 --- /dev/null +++ b/java/server/src/main/java/org/softwareheritage/graph/algo/NodeIdsConsumer.java @@ -0,0 +1,14 @@ +package org.softwareheritage.graph.algo; + +import java.util.function.BiConsumer; + +public interface NodeIdsConsumer extends BiConsumer { + + /** Callback for returning a (potentially large) list of node identifiers. + * The callback will be invoked repeatedly without reallocating the array. + * At each invocation the array might contain more than size node + * identifiers, but only identifiers located up to position size-1 are to + * be considered during that specific invocation. + */ + void accept(long nodeIds[], int size); +} diff --git a/java/server/src/main/java/org/softwareheritage/graph/algo/PathConsumer.java b/java/server/src/main/java/org/softwareheritage/graph/algo/PathConsumer.java new file mode 100644 index 0000000..50d5b15 --- /dev/null +++ b/java/server/src/main/java/org/softwareheritage/graph/algo/PathConsumer.java @@ -0,0 +1,12 @@ +package org.softwareheritage.graph.algo; + +import java.util.ArrayList; +import java.util.function.Consumer; + +public interface PathConsumer extends Consumer> { + + /** Callback for incrementally receiving node paths (made of node + * identifiers) during a graph visit. + */ + void accept(ArrayList path); +} diff --git a/java/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java b/java/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java index fe0f56f..9aecce8 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java +++ b/java/server/src/main/java/org/softwareheritage/graph/algo/Traversal.java @@ -1,328 +1,353 @@ package org.softwareheritage.graph.algo; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.LinkedList; import java.util.Map; import java.util.Queue; import java.util.Stack; import it.unimi.dsi.bits.LongArrayBitVector; import org.softwareheritage.graph.AllowedEdges; import org.softwareheritage.graph.Endpoint; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.Neighbors; import org.softwareheritage.graph.Node; +import org.softwareheritage.graph.algo.NodeIdConsumer; +import org.softwareheritage.graph.algo.PathConsumer; /** * Traversal algorithms on the compressed graph. *

* Internal implementation of the traversal API endpoints. These methods only input/output internal * long ids, which are converted in the {@link Endpoint} higher-level class to Software Heritage * PID. * * @author The Software Heritage developers * @see org.softwareheritage.graph.Endpoint */ public class Traversal { /** Graph used in the traversal */ Graph graph; /** Boolean to specify the use of the transposed graph */ boolean useTransposed; /** Graph edge restriction */ AllowedEdges edges; /** Bit array storing if we have visited a node */ LongArrayBitVector visited; /** Hash map storing parent node id for each nodes during a traversal */ Map parentNode; /** Number of edges accessed during traversal */ long nbEdgesAccessed; /** * Constructor. * * @param graph graph used in the traversal * @param direction a string (either "forward" or "backward") specifying edge orientation * @param edgesFmt a formatted string describing allowed edges */ public Traversal(Graph graph, String direction, String edgesFmt) { if (!direction.matches("forward|backward")) { throw new IllegalArgumentException("Unknown traversal direction: " + direction); } this.graph = graph; this.useTransposed = (direction.equals("backward")); this.edges = new AllowedEdges(graph, edgesFmt); long nbNodes = graph.getNbNodes(); this.visited = LongArrayBitVector.ofLength(nbNodes); this.parentNode = new HashMap<>(); this.nbEdgesAccessed = 0; } /** * Returns number of accessed edges during traversal. * * @return number of edges accessed in last traversal */ public long getNbEdgesAccessed() { return nbEdgesAccessed; } /** - * Returns the leaves of a subgraph rooted at the specified source node. - * - * @param srcNodeId source node - * @return list of node ids corresponding to the leaves + * Push version of {@link leaves}: will fire passed callback for each leaf. */ - public ArrayList leaves(long srcNodeId) { - ArrayList nodeIds = new ArrayList(); + public void leavesVisitor(long srcNodeId, NodeIdConsumer cb) { Stack stack = new Stack(); this.nbEdgesAccessed = 0; stack.push(srcNodeId); visited.set(srcNodeId); while (!stack.isEmpty()) { long currentNodeId = stack.pop(); long neighborsCnt = 0; nbEdgesAccessed += graph.degree(currentNodeId, useTransposed); for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) { neighborsCnt++; if (!visited.getBoolean(neighborNodeId)) { stack.push(neighborNodeId); visited.set(neighborNodeId); } } if (neighborsCnt == 0) { - nodeIds.add(currentNodeId); + cb.accept(currentNodeId); } } - - return nodeIds; } /** - * Returns node direct neighbors (linked with exactly one edge). + * Returns the leaves of a subgraph rooted at the specified source node. * * @param srcNodeId source node - * @return list of node ids corresponding to the neighbors + * @return list of node ids corresponding to the leaves */ - public ArrayList neighbors(long srcNodeId) { + public ArrayList leaves(long srcNodeId) { ArrayList nodeIds = new ArrayList(); + leavesVisitor(srcNodeId, (nodeId) -> nodeIds.add(nodeId)); + return nodeIds; + } + + /** + * Push version of {@link neighbors}: will fire passed callback on each + * neighbor. + */ + public void neighborsVisitor(long srcNodeId, NodeIdConsumer cb) { this.nbEdgesAccessed = graph.degree(srcNodeId, useTransposed); for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, srcNodeId)) { - nodeIds.add(neighborNodeId); + cb.accept(neighborNodeId); } - return nodeIds; } /** - * Performs a graph traversal and returns explored nodes. + * Returns node direct neighbors (linked with exactly one edge). * * @param srcNodeId source node - * @return list of explored node ids + * @return list of node ids corresponding to the neighbors */ - public ArrayList visitNodes(long srcNodeId) { + public ArrayList neighbors(long srcNodeId) { ArrayList nodeIds = new ArrayList(); + neighborsVisitor(srcNodeId, (nodeId) -> nodeIds.add(nodeId)); + return nodeIds; + } + + /** + * Push version of {@link visitNodes}: will fire passed callback on each + * visited node. + */ + public void visitNodesVisitor(long srcNodeId, NodeIdConsumer cb) { Stack stack = new Stack(); this.nbEdgesAccessed = 0; stack.push(srcNodeId); visited.set(srcNodeId); while (!stack.isEmpty()) { long currentNodeId = stack.pop(); - nodeIds.add(currentNodeId); + cb.accept(currentNodeId); nbEdgesAccessed += graph.degree(currentNodeId, useTransposed); for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) { if (!visited.getBoolean(neighborNodeId)) { stack.push(neighborNodeId); visited.set(neighborNodeId); } } } + } - return nodeIds; + /** + * Performs a graph traversal and returns explored nodes. + * + * @param srcNodeId source node + * @return list of explored node ids + */ + public ArrayList visitNodes(long srcNodeId) { + ArrayList nodeIds = new ArrayList(); + visitNodesVisitor(srcNodeId, (nodeId) -> nodeIds.add(nodeId)); + return nodeIds; + } + + /** + * Push version of {@link visitPaths}: will fire passed callback on each + * discovered (complete) path. + */ + public void visitPathsVisitor(long srcNodeId, PathConsumer cb) { + Stack currentPath = new Stack(); + this.nbEdgesAccessed = 0; + visitPathsInternalVisitor(srcNodeId, currentPath, cb); } /** * Performs a graph traversal and returns explored paths. * * @param srcNodeId source node * @return list of explored paths (represented as a list of node ids) */ public ArrayList> visitPaths(long srcNodeId) { ArrayList> paths = new ArrayList<>(); - Stack currentPath = new Stack(); - this.nbEdgesAccessed = 0; - visitPathsInternal(srcNodeId, paths, currentPath); + visitPathsVisitor(srcNodeId, (path) -> paths.add(path)); return paths; } - /** - * Internal recursive function of {@link #visitPaths}. - * - * @param currentNodeId current node - * @param paths list of currently stored paths - * @param currentPath current path as node ids - */ - private void visitPathsInternal( - long currentNodeId, ArrayList> paths, Stack currentPath) { + private void visitPathsInternalVisitor(long currentNodeId, + Stack currentPath, + PathConsumer cb) { currentPath.push(currentNodeId); long visitedNeighbors = 0; nbEdgesAccessed += graph.degree(currentNodeId, useTransposed); for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) { - visitPathsInternal(neighborNodeId, paths, currentPath); + visitPathsInternalVisitor(neighborNodeId, currentPath, cb); visitedNeighbors++; } if (visitedNeighbors == 0) { ArrayList path = new ArrayList(); for (long nodeId : currentPath) { path.add(nodeId); } - paths.add(path); + cb.accept(path); } currentPath.pop(); } /** * Performs a graph traversal and returns the first found path from source to destination. * * @param srcNodeId source node * @param dst destination (either a node or a node type) * @return found path as a list of node ids */ public ArrayList walk(long srcNodeId, T dst, String algorithm) { long dstNodeId = -1; if (algorithm.equals("dfs")) { dstNodeId = walkInternalDfs(srcNodeId, dst); } else if (algorithm.equals("bfs")) { dstNodeId = walkInternalBfs(srcNodeId, dst); } else { throw new IllegalArgumentException("Unknown traversal algorithm: " + algorithm); } if (dstNodeId == -1) { throw new IllegalArgumentException("Unable to find destination point: " + dst); } ArrayList nodeIds = backtracking(srcNodeId, dstNodeId); return nodeIds; } /** * Internal DFS function of {@link #walk}. * * @param srcNodeId source node * @param dst destination (either a node or a node type) * @return final destination node or -1 if no path found */ private long walkInternalDfs(long srcNodeId, T dst) { Stack stack = new Stack(); this.nbEdgesAccessed = 0; stack.push(srcNodeId); visited.set(srcNodeId); while (!stack.isEmpty()) { long currentNodeId = stack.pop(); if (isDstNode(currentNodeId, dst)) { return currentNodeId; } nbEdgesAccessed += graph.degree(currentNodeId, useTransposed); for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) { if (!visited.getBoolean(neighborNodeId)) { stack.push(neighborNodeId); visited.set(neighborNodeId); parentNode.put(neighborNodeId, currentNodeId); } } } return -1; } /** * Internal BFS function of {@link #walk}. * * @param srcNodeId source node * @param dst destination (either a node or a node type) * @return final destination node or -1 if no path found */ private long walkInternalBfs(long srcNodeId, T dst) { Queue queue = new LinkedList(); this.nbEdgesAccessed = 0; queue.add(srcNodeId); visited.set(srcNodeId); while (!queue.isEmpty()) { long currentNodeId = queue.poll(); if (isDstNode(currentNodeId, dst)) { return currentNodeId; } nbEdgesAccessed += graph.degree(currentNodeId, useTransposed); for (long neighborNodeId : new Neighbors(graph, useTransposed, edges, currentNodeId)) { if (!visited.getBoolean(neighborNodeId)) { queue.add(neighborNodeId); visited.set(neighborNodeId); parentNode.put(neighborNodeId, currentNodeId); } } } return -1; } /** * Internal function of {@link #walk} to check if a node corresponds to the destination. * * @param nodeId current node * @param dst destination (either a node or a node type) * @return true if the node is a destination, or false otherwise */ private boolean isDstNode(long nodeId, T dst) { if (dst instanceof Long) { long dstNodeId = (Long) dst; return nodeId == dstNodeId; } else if (dst instanceof Node.Type) { Node.Type dstType = (Node.Type) dst; return graph.getNodeType(nodeId) == dstType; } else { return false; } } /** * Internal backtracking function of {@link #walk}. * * @param srcNodeId source node * @param dstNodeId destination node * @return the found path, as a list of node ids */ private ArrayList backtracking(long srcNodeId, long dstNodeId) { ArrayList path = new ArrayList(); long currentNodeId = dstNodeId; while (currentNodeId != srcNodeId) { path.add(currentNodeId); currentNodeId = parentNode.get(currentNodeId); } path.add(srcNodeId); Collections.reverse(path); return path; } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/backend/Pp.java b/java/server/src/main/java/org/softwareheritage/graph/backend/Pp.java new file mode 100644 index 0000000..aa7ab3c --- /dev/null +++ b/java/server/src/main/java/org/softwareheritage/graph/backend/Pp.java @@ -0,0 +1,42 @@ +package org.softwareheritage.graph.backend; + +import java.io.BufferedWriter; +import java.io.FileInputStream; +import java.io.FileWriter; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.Writer; +import java.util.zip.GZIPInputStream; + +import it.unimi.dsi.bits.LongArrayBitVector; +import it.unimi.dsi.fastutil.Size64; +import it.unimi.dsi.fastutil.io.BinIO; +import it.unimi.dsi.fastutil.longs.LongBigArrays; +import it.unimi.dsi.fastutil.longs.LongBigList; +import it.unimi.dsi.fastutil.objects.Object2LongFunction; +import it.unimi.dsi.fastutil.objects.ObjectBigArrays; +import it.unimi.dsi.io.FastBufferedReader; +import it.unimi.dsi.io.LineIterator; + +import org.softwareheritage.graph.Graph; +import org.softwareheritage.graph.Node; +import org.softwareheritage.graph.SwhPID; +import org.softwareheritage.graph.backend.NodeTypesMap; + +public class Pp { + + public static void main(String[] args) throws IOException { + + Object2LongFunction mphMap = null; + try { + mphMap = (Object2LongFunction) BinIO.loadObject("all.mph"); + } catch (ClassNotFoundException e) { + throw new IllegalArgumentException("The .mph file contains unknown class object: " + e); + } + + long nbIds = (mphMap instanceof Size64) ? ((Size64) mphMap).size64() : mphMap.size(); + + System.out.println("mph size: " + nbIds); + } +} diff --git a/java/server/src/main/java/org/softwareheritage/graph/t/WriteLong.java b/java/server/src/main/java/org/softwareheritage/graph/t/WriteLong.java new file mode 100644 index 0000000..547011a --- /dev/null +++ b/java/server/src/main/java/org/softwareheritage/graph/t/WriteLong.java @@ -0,0 +1,24 @@ +import java.io.DataOutputStream; +import java.io.FileOutputStream; +import java.io.IOException; + +public class WriteLong { + public static void main(String args[]) { + String filename = null; + try { + filename = args[0]; + FileOutputStream file = new FileOutputStream(filename); + DataOutputStream data = new DataOutputStream(file); + while (true) { + data.writeLong(Long.parseLong(args[1])); + } + //data.close(); + } catch (IOException e) { + System.out.println("cannot write to file " + filename + "\n" + e); + System.exit(2); + } catch (ArrayIndexOutOfBoundsException e) { + System.out.println("Usage: Writer FILENAME INT"); + System.exit(1); + } + } +} diff --git a/java/server/src/main/java/org/softwareheritage/graph/t/aiohttp_rest.py b/java/server/src/main/java/org/softwareheritage/graph/t/aiohttp_rest.py new file mode 100644 index 0000000..87c89a3 --- /dev/null +++ b/java/server/src/main/java/org/softwareheritage/graph/t/aiohttp_rest.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python3 + +# Copyright (C) 2018 Antoine Pietri +# SPDX-License-Identifier: MIT + +import argparse +import aiohttp +import aiohttp.web +import hashutil + + +async def hello(request): + return aiohttp.web.json_response( + {'hi': 'hello'}, headers={'Access-Control-Allow-Origin': '*'}) + + +async def make_app(): + app = aiohttp.web.Application() + app.add_routes([ + aiohttp.web.get('/hello', hello)]) + return app + + +async def get_stream(request): # from objstorage + hex_id = request.match_info['hex_id'] + obj_id = hashutil.hash_to_bytes(hex_id) + response = aiohttp.web.StreamResponse() + await response.prepare(request) + for chunk in request.app['objstorage'].get_stream(obj_id, 2 << 20): + await response.write(chunk) + await response.write_eof() + return response + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description='Test') + parser.add_argument('--host', default='127.0.0.1', help='Bind address') + parser.add_argument('--port', default=9012, help='Bind port') + + args = parser.parse_args() + + aiohttp.web.run_app(make_app(), host=args.host, port=args.port) diff --git a/java/server/src/main/java/org/softwareheritage/graph/t/data.bin b/java/server/src/main/java/org/softwareheritage/graph/t/data.bin new file mode 100644 index 0000000..2580cea Binary files /dev/null and b/java/server/src/main/java/org/softwareheritage/graph/t/data.bin differ diff --git a/java/server/src/main/java/org/softwareheritage/graph/t/read_long.py b/java/server/src/main/java/org/softwareheritage/graph/t/read_long.py new file mode 100755 index 0000000..c353798 --- /dev/null +++ b/java/server/src/main/java/org/softwareheritage/graph/t/read_long.py @@ -0,0 +1,24 @@ +#!/usr/bin/python3 + +import struct +import sys + +BUF_SIZE = 64*1024 +BIN_FMT = '>q' # 64 bit integer, big endian + + +def main(fname): + with open(fname, 'rb') as f: + data = f.read(BUF_SIZE) + while(data): + for data in struct.iter_unpack(BIN_FMT, data): + print(data[0]) + data = f.read(BUF_SIZE) + + +if __name__ == '__main__': + try: + main(sys.argv[1]) + except IndexError: + print('Usage: read_long FILENAME') + sys.exit(1) diff --git a/java/server/src/test/java/org/softwareheritage/graph/GraphTest.java b/java/server/src/test/java/org/softwareheritage/graph/GraphTest.java index b58a540..9355989 100644 --- a/java/server/src/test/java/org/softwareheritage/graph/GraphTest.java +++ b/java/server/src/test/java/org/softwareheritage/graph/GraphTest.java @@ -1,30 +1,30 @@ package org.softwareheritage.graph; import java.io.IOException; import java.nio.file.Path; import java.nio.file.Paths; import java.util.Collection; import org.junit.Assert; import org.junit.BeforeClass; import static org.hamcrest.collection.IsIterableContainingInAnyOrder.containsInAnyOrder; import org.softwareheritage.graph.Graph; public class GraphTest { static Graph graph; public static void assertEqualsAnyOrder(Collection expecteds, Collection actuals) { Assert.assertThat(expecteds, containsInAnyOrder(actuals.toArray())); } @BeforeClass public static void setUp() throws IOException { - Path graphPath = Paths.get("src", "test", "dataset", "output", "example"); + Path graphPath = Paths.get("..", "..", "tests", "dataset", "output", "example"); graph = new Graph(graphPath.toString()); } public Graph getGraph() { return graph; } } diff --git a/requirements.txt b/requirements.txt index f0777c3..3f9470a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ aiohttp click vcversioner +py4j diff --git a/setup.py b/setup.py index ea4133f..751640d 100755 --- a/setup.py +++ b/setup.py @@ -1,71 +1,75 @@ #!/usr/bin/env python3 # Copyright (C) 2015-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from setuptools import setup, find_packages from os import path from io import open +from glob import glob here = path.abspath(path.dirname(__file__)) # Get the long description from the README file with open(path.join(here, 'README.md'), encoding='utf-8') as f: long_description = f.read() def parse_requirements(name=None): if name: reqf = 'requirements-%s.txt' % name else: reqf = 'requirements.txt' requirements = [] if not path.exists(reqf): return requirements with open(reqf) as f: for line in f.readlines(): line = line.strip() if not line or line.startswith('#'): continue requirements.append(line) return requirements +JAR_PATHS = list(glob('java/server/target/swh-graph-*.jar')) + setup( name='swh.graph', description='Software Heritage graph service', long_description=long_description, long_description_content_type='text/markdown', author='Software Heritage developers', author_email='swh-devel@inria.fr', url='https://forge.softwareheritage.org/diffusion/DGRPH', packages=find_packages(), install_requires=parse_requirements() + parse_requirements('swh'), tests_require=parse_requirements('test'), setup_requires=['vcversioner'], extras_require={'testing': parse_requirements('test')}, vcversioner={}, include_package_data=True, + data_files=[('share/swh-graph', JAR_PATHS)], entry_points=''' [console_scripts] swh-graph=swh.graph.cli:main [swh.cli.subcommands] graph=swh.graph.cli:cli ''', classifiers=[ "Programming Language :: Python :: 3", "Intended Audience :: Developers", "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", "Operating System :: OS Independent", "Development Status :: 3 - Alpha", ], project_urls={ 'Bug Reports': 'https://forge.softwareheritage.org/maniphest', 'Funding': 'https://www.softwareheritage.org/donate', 'Source': 'https://forge.softwareheritage.org/source/swh-graph', }, ) diff --git a/swh/graph/cli.py b/swh/graph/cli.py index 329cea8..c0f5626 100644 --- a/swh/graph/cli.py +++ b/swh/graph/cli.py @@ -1,122 +1,150 @@ # Copyright (C) 2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +import aiohttp import click import sys from swh.core.cli import CONTEXT_SETTINGS, AliasedGroup from swh.graph import client from swh.graph.pid import PidToIntMap, IntToPidMap +from swh.graph.server.app import make_app +from swh.graph.server.backend import Backend @click.group(name='graph', context_settings=CONTEXT_SETTINGS, cls=AliasedGroup) @click.pass_context def cli(ctx): """Software Heritage graph tools.""" ctx.ensure_object(dict) @cli.command('api-client') @click.option('--host', default='localhost', help='Graph server host') @click.option('--port', default='5009', help='Graph server port') @click.pass_context def api_client(ctx, host, port): """Client for the Software Heritage Graph REST service """ url = 'http://{}:{}'.format(host, port) app = client.RemoteGraphClient(url) # TODO: run web app print(app.stats()) @cli.group('map') @click.pass_context def map(ctx): """Manage swh-graph on-disk maps""" pass def dump_pid2int(filename): for (pid, int) in PidToIntMap(filename): print('{}\t{}'.format(pid, int)) def dump_int2pid(filename): for (int, pid) in IntToPidMap(filename): print('{}\t{}'.format(int, pid)) def restore_pid2int(filename): """read a textual PID->int map from stdin and write its binary version to filename """ with open(filename, 'wb') as dst: for line in sys.stdin: (str_pid, str_int) = line.split() PidToIntMap.write_record(dst, str_pid, int(str_int)) def restore_int2pid(filename, length): """read a textual int->PID map from stdin and write its binary version to filename """ int2pid = IntToPidMap(filename, mode='wb', length=length) for line in sys.stdin: (str_int, str_pid) = line.split() int2pid[int(str_int)] = str_pid int2pid.close() @map.command('dump') @click.option('--type', '-t', 'map_type', required=True, type=click.Choice(['pid2int', 'int2pid']), help='type of map to dump') @click.argument('filename', required=True, type=click.Path(exists=True)) @click.pass_context def dump_map(ctx, map_type, filename): """dump a binary PID<->int map to textual format""" if map_type == 'pid2int': dump_pid2int(filename) elif map_type == 'int2pid': dump_int2pid(filename) else: raise ValueError('invalid map type: ' + map_type) pass @map.command('restore') @click.option('--type', '-t', 'map_type', required=True, type=click.Choice(['pid2int', 'int2pid']), help='type of map to dump') @click.option('--length', '-l', type=int, help='''map size in number of logical records (required for int2pid maps)''') @click.argument('filename', required=True, type=click.Path()) @click.pass_context def restore_map(ctx, map_type, length, filename): """restore a binary PID<->int map from textual format""" if map_type == 'pid2int': - restore_pid2int(filename, length) + restore_pid2int(filename) elif map_type == 'int2pid': if length is None: raise click.UsageError( 'map length is required when restoring {} maps'.format( map_type), ctx) restore_int2pid(filename, length) else: raise ValueError('invalid map type: ' + map_type) +@cli.group('graph') +@click.pass_context +def graph(ctx): + """Manage swh-graph on-disk maps""" + pass + + +@graph.command(name='rpc-serve') +@click.option('--host', default='0.0.0.0', + metavar='IP', show_default=True, + help="Host ip address to bind the server on") +@click.option('--port', default=5009, type=click.INT, + metavar='PORT', show_default=True, + help="Binding port of the server") +@click.option('--graph', required=True, metavar='GRAPH', + help="Path prefix of the graph to load") +@click.pass_context +def serve(ctx, host, port, graph): + backend = Backend(graph_path=graph) + app = make_app(backend=backend) + + with backend: + aiohttp.web.run_app(app, host=host, port=port) + + def main(): return cli(auto_envvar_prefix='SWH_GRAPH') if __name__ == '__main__': main() diff --git a/swh/graph/client.py b/swh/graph/client.py index ac66da2..a2a8095 100644 --- a/swh/graph/client.py +++ b/swh/graph/client.py @@ -1,62 +1,82 @@ # Copyright (C) 2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +import json + from swh.core.api import RPCClient class GraphAPIError(Exception): """Graph API Error""" def __str__(self): return ('An unexpected error occurred in the Graph backend: {}' .format(self.args)) class RemoteGraphClient(RPCClient): """Client to the Software Heritage Graph.""" def __init__(self, url, timeout=None): super().__init__( api_exception=GraphAPIError, url=url, timeout=timeout) - # Web API endpoints + def raw_verb_lines(self, verb, endpoint, **kwargs): + response = self.raw_verb(verb, endpoint, stream=True, **kwargs) + for line in response.iter_lines(): + yield line.decode().lstrip('\n') - def leaves(self, src, edges="*", direction="forward"): - return self.get('leaves/{}'.format(src), - params={ - 'edges': edges, - 'direction': direction - }) + def get_lines(self, endpoint, **kwargs): + yield from self.raw_verb_lines('get', endpoint, **kwargs) - def neighbors(self, src, edges="*", direction="forward"): - return self.get('neighbors/{}'.format(src), - params={ - 'edges': edges, - 'direction': direction - }) + # Web API endpoints def stats(self): return self.get('stats') + def leaves(self, src, edges="*", direction="forward"): + return self.get_lines( + 'leaves/{}'.format(src), + params={ + 'edges': edges, + 'direction': direction + }) + + def neighbors(self, src, edges="*", direction="forward"): + return self.get_lines( + 'neighbors/{}'.format(src), + params={ + 'edges': edges, + 'direction': direction + }) + def visit_nodes(self, src, edges="*", direction="forward"): - return self.get('visit/nodes/{}'.format(src), - params={ - 'edges': edges, - 'direction': direction - }) + return self.get_lines( + 'visit/nodes/{}'.format(src), + params={ + 'edges': edges, + 'direction': direction + }) def visit_paths(self, src, edges="*", direction="forward"): - return self.get('visit/paths/{}'.format(src), - params={ - 'edges': edges, - 'direction': direction - }) + def decode_path_wrapper(it): + for e in it: + yield json.loads(e) + + return decode_path_wrapper( + self.get_lines( + 'visit/paths/{}'.format(src), + params={ + 'edges': edges, + 'direction': direction + })) def walk(self, src, dst, edges="*", traversal="dfs", direction="forward"): - return self.get('walk/{}/{}'.format(src, dst), - params={ - 'edges': edges, - 'traversal': traversal, - 'direction': direction - }) + return self.get_lines( + 'walk/{}/{}'.format(src, dst), + params={ + 'edges': edges, + 'traversal': traversal, + 'direction': direction + }) diff --git a/swh/graph/server/__init__.py b/swh/graph/server/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/swh/graph/server/app.py b/swh/graph/server/app.py new file mode 100644 index 0000000..21b9dbc --- /dev/null +++ b/swh/graph/server/app.py @@ -0,0 +1,102 @@ +# Copyright (C) 2019 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +""" +A proxy HTTP server for swh-graph, talking to the Java code via py4j, and using +FIFO as a transport to stream integers between the two languages. +""" + +import contextlib +import aiohttp.web + +from swh.core.api.asynchronous import RPCServerApp + + +@contextlib.asynccontextmanager +async def stream_response(request, *args, **kwargs): + response = aiohttp.web.StreamResponse(*args, **kwargs) + await response.prepare(request) + yield response + await response.write_eof() + + +async def index(request): + return aiohttp.web.Response( + content_type='text/html', + body=""" +Software Heritage storage server + +

You have reached the +Software Heritage graph API server.

+ +

See its +API +documentation for more information.

+ +""") + + +async def stats(request): + stats = request.app['backend'].stats() + return aiohttp.web.Response(body=stats, content_type='application/json') + + +def get_simple_traversal_handler(ttype): + async def simple_traversal(request): + src = request.match_info['src'] + edges = request.query.get('edges', '*') + direction = request.query.get('direction', 'forward') + + async with stream_response(request) as response: + async for res_pid in request.app['backend'].simple_traversal( + ttype, direction, edges, src + ): + await response.write('{}\n'.format(res_pid).encode()) + return response + + return simple_traversal + + +async def walk(request): + src = request.match_info['src'] + dst = request.match_info['dst'] + edges = request.query.get('edges', '*') + direction = request.query.get('direction', 'forward') + algo = request.query.get('traversal', 'dfs') + + it = request.app['backend'].walk(direction, edges, algo, src, dst) + async with stream_response(request) as response: + async for res_pid in it: + await response.write('{}\n'.format(res_pid).encode()) + return response + + +async def visit_paths(request): + src = request.match_info['src'] + edges = request.query.get('edges', '*') + direction = request.query.get('direction', 'forward') + + it = request.app['backend'].visit_paths(direction, edges, src) + async with stream_response(request) as response: + async for res_pid in it: + await response.write('{}\n'.format(res_pid).encode()) + return response + + +def make_app(backend, **kwargs): + app = RPCServerApp(**kwargs) + app.router.add_route('GET', '/', index) + app.router.add_route('GET', '/graph/stats', stats) + app.router.add_route('GET', '/graph/leaves/{src}', + get_simple_traversal_handler('leaves')) + app.router.add_route('GET', '/graph/neighbors/{src}', + get_simple_traversal_handler('neighbors')) + app.router.add_route('GET', '/graph/visit/nodes/{src}', + get_simple_traversal_handler('visit_nodes')) + app.router.add_route('GET', '/graph/visit/paths/{src}', visit_paths) + app.router.add_route('GET', '/graph/walk/{src}/{dst}', walk) + + app['backend'] = backend + return app diff --git a/swh/graph/server/backend.py b/swh/graph/server/backend.py new file mode 100644 index 0000000..0aecc14 --- /dev/null +++ b/swh/graph/server/backend.py @@ -0,0 +1,161 @@ +# Copyright (C) 2019 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import asyncio +import contextlib +import json +import os +import pathlib +import struct +import sys +import tempfile + +from py4j.java_gateway import JavaGateway + +from swh.graph.pid import IntToPidMap, PidToIntMap +from swh.model.identifiers import PID_TYPES + +BUF_SIZE = 64*1024 +BIN_FMT = '>q' # 64 bit integer, big endian +PATH_SEPARATOR_ID = -1 +NODE2PID_EXT = 'node2pid.bin' +PID2NODE_EXT = 'pid2node.bin' + + +def find_graph_jar(): + swh_graph_root = pathlib.Path(__file__).parents[3] + try_paths = [ + swh_graph_root / 'java/server/target/', + pathlib.Path(sys.prefix) / 'share/swh-graph/', + ] + for path in try_paths: + glob = list(path.glob('swh-graph-*.jar')) + if glob: + return str(glob[0]) + raise RuntimeError("swh-graph-*.jar not found. Have you run `make java`?") + + +class Backend: + def __init__(self, graph_path): + self.gateway = None + self.entry = None + self.graph_path = graph_path + + def __enter__(self): + self.gateway = JavaGateway.launch_gateway( + java_path=None, + classpath=find_graph_jar(), + die_on_exit=True, + redirect_stdout=sys.stdout, + redirect_stderr=sys.stderr, + ) + self.entry = self.gateway.jvm.org.softwareheritage.graph.Entry() + self.entry.load_graph(self.graph_path) + self.node2pid = IntToPidMap(self.graph_path + '.' + NODE2PID_EXT) + self.pid2node = PidToIntMap(self.graph_path + '.' + PID2NODE_EXT) + self.stream_proxy = JavaStreamProxy(self.entry) + + def __exit__(self, exc_type, exc_value, tb): + self.gateway.shutdown() + + def stats(self): + return self.entry.stats() + + async def simple_traversal(self, ttype, direction, edges_fmt, src): + assert ttype in ('leaves', 'neighbors', 'visit_nodes', 'visit_paths') + src_id = self.pid2node[src] + method = getattr(self.stream_proxy, ttype) + async for node_id in method(direction, edges_fmt, src_id): + if node_id == PATH_SEPARATOR_ID: + yield None + else: + yield self.node2pid[node_id] + + async def walk(self, direction, edges_fmt, algo, src, dst): + src_id = self.pid2node[src] + if dst in PID_TYPES: + it = self.stream_proxy.walk_type(direction, edges_fmt, algo, + src_id, dst) + else: + dst_id = self.pid2node[dst] + it = self.stream_proxy.walk(direction, edges_fmt, algo, + src_id, dst_id) + + async for node_id in it: + yield self.node2pid[node_id] + + async def visit_paths(self, *args): + buffer = [] + async for res_pid in self.simple_traversal('visit_paths', *args): + if res_pid is None: # Path separator, flush + yield json.dumps(buffer) + buffer = [] + else: + buffer.append(res_pid) + + +class JavaStreamProxy: + """A proxy class for the org.softwareheritage.graph.Entry Java class that + takes care of the setup and teardown of the named-pipe FIFO communication + between Python and Java. + + Initialize JavaStreamProxy using: + + proxy = JavaStreamProxy(swh_entry_class_instance) + + Then you can call an Entry method and iterate on the FIFO results like + this: + + async for value in proxy.java_method(arg1, arg2): + print(value) + """ + + def __init__(self, entry): + self.entry = entry + + async def read_node_ids(self, fname): + loop = asyncio.get_event_loop() + with (await loop.run_in_executor(None, open, fname, 'rb')) as f: + while True: + data = await loop.run_in_executor(None, f.read, BUF_SIZE) + if not data: + break + for data in struct.iter_unpack(BIN_FMT, data): + yield data[0] + + class _HandlerWrapper: + def __init__(self, handler): + self._handler = handler + + def __getattr__(self, name): + func = getattr(self._handler, name) + + async def java_call(*args, **kwargs): + loop = asyncio.get_event_loop() + await loop.run_in_executor(None, lambda: func(*args, **kwargs)) + + def java_task(*args, **kwargs): + return asyncio.create_task(java_call(*args, **kwargs)) + + return java_task + + @contextlib.contextmanager + def get_handler(self): + with tempfile.TemporaryDirectory(prefix='swh-graph-') as tmpdirname: + cli_fifo = os.path.join(tmpdirname, 'swh-graph.fifo') + os.mkfifo(cli_fifo) + reader = self.read_node_ids(cli_fifo) + query_handler = self.entry.get_handler(cli_fifo) + handler = self._HandlerWrapper(query_handler) + yield (handler, reader) + + def __getattr__(self, name): + async def java_call_iterator(*args, **kwargs): + with self.get_handler() as (handler, reader): + java_task = getattr(handler, name)(*args, **kwargs) + async for value in reader: + yield value + await java_task + return java_call_iterator diff --git a/swh/graph/tests/conftest.py b/swh/graph/tests/conftest.py new file mode 100644 index 0000000..92ace00 --- /dev/null +++ b/swh/graph/tests/conftest.py @@ -0,0 +1,40 @@ +import multiprocessing +import pytest +from pathlib import Path + +from aiohttp.test_utils import TestServer, TestClient, loop_context + +from swh.graph.client import RemoteGraphClient +from swh.graph.server.backend import Backend +from swh.graph.server.app import make_app + +SWH_GRAPH_ROOT = Path(__file__).parents[3] +TEST_GRAPH_PATH = SWH_GRAPH_ROOT / 'tests/dataset/output/example' + + +class GraphServerProcess(multiprocessing.Process): + def __init__(self, q, *args, **kwargs): + self.q = q + super().__init__(*args, **kwargs) + + def run(self): + backend = Backend(graph_path=str(TEST_GRAPH_PATH)) + with backend: + with loop_context() as loop: + self.loop = loop + app = make_app(backend=backend) + client = TestClient(TestServer(app), loop=loop) + loop.run_until_complete(client.start_server()) + url = client.make_url('/graph/') + self.q.put(url) + loop.run_forever() + + +@pytest.fixture(scope="module") +def graph_client(): + queue = multiprocessing.Queue() + server = GraphServerProcess(queue) + server.start() + url = queue.get() + yield RemoteGraphClient(str(url)) + server.terminate() diff --git a/swh/graph/tests/test_api_client.py b/swh/graph/tests/test_api_client.py index b49ee81..bbf55a2 100644 --- a/swh/graph/tests/test_api_client.py +++ b/swh/graph/tests/test_api_client.py @@ -1,190 +1,104 @@ -from pathlib import Path -from urllib.request import urlopen -import subprocess -import time - -import aiohttp.test_utils -import pytest - -from swh.graph.client import RemoteGraphClient -from swh.graph.tests import SWH_GRAPH_VERSION - - -@pytest.fixture(scope='module') -def graph_client(): - swh_graph_root = Path(__file__).parents[3] - java_dir = swh_graph_root / 'java/server' - - # Compile Java server using maven - pom_path = java_dir / 'pom.xml' - subprocess.run( - ['mvn', '-f', str(pom_path), 'compile', 'assembly:single'], check=True) - - port = aiohttp.test_utils.unused_port() - - # Start Java server - jar_file = 'swh-graph-{}-jar-with-dependencies.jar'.format( - SWH_GRAPH_VERSION) - jar_path = java_dir / 'target' / jar_file - graph_path = java_dir / 'src/test/dataset/output/example' - server = subprocess.Popen([ - 'java', '-cp', str(jar_path), - 'org.softwareheritage.graph.App', str(graph_path), '-p', str(port) - ]) - - # Wait max 5 seconds for server to spawn - localhost = 'http://0.0.0.0:{}'.format(port) - i = 0 - while i < 20: - try: - urlopen(localhost) - except Exception: - i += 1 - time.sleep(0.25) - else: - break - - # Start Python client - client = RemoteGraphClient(localhost) - - yield client - - print('Service teardown') - server.kill() - - -class TestEndpoints: - @pytest.fixture(autouse=True) - def init_graph_client(self, graph_client): - self.client = graph_client - - @staticmethod - def assert_endpoint_output(actual, expected): - assert set(actual.keys()) == {'result', 'meta'} - assert set(actual['result']) == set(expected['result']) - assert actual['meta'] == expected['meta'] - - def test_leaves(self): - actual = self.client.leaves( - 'swh:1:ori:0000000000000000000000000000000000000021' - ) - expected = { - 'result': [ - 'swh:1:cnt:0000000000000000000000000000000000000001', - 'swh:1:cnt:0000000000000000000000000000000000000004', - 'swh:1:cnt:0000000000000000000000000000000000000005', - 'swh:1:cnt:0000000000000000000000000000000000000007' - ], - 'meta': { - 'nb_edges_accessed': 13 - } - } - TestEndpoints.assert_endpoint_output(actual, expected) - - def test_neighbors(self): - actual = self.client.neighbors( +def test_stats(graph_client): + stats = graph_client.stats() + + assert set(stats.keys()) == {'counts', 'ratios', 'indegree', + 'outdegree'} + + assert set(stats['counts'].keys()) == {'nodes', 'edges'} + assert set(stats['ratios'].keys()) == {'compression', 'bits_per_node', + 'bits_per_edge', 'avg_locality'} + assert set(stats['indegree'].keys()) == {'min', 'max', 'avg'} + assert set(stats['outdegree'].keys()) == {'min', 'max', 'avg'} + + assert stats['counts']['nodes'] == 21 + assert stats['counts']['edges'] == 23 + assert isinstance(stats['ratios']['compression'], float) + assert isinstance(stats['ratios']['bits_per_node'], float) + assert isinstance(stats['ratios']['bits_per_edge'], float) + assert isinstance(stats['ratios']['avg_locality'], float) + assert stats['indegree']['min'] == 0 + assert stats['indegree']['max'] == 3 + assert isinstance(stats['indegree']['avg'], float) + assert stats['outdegree']['min'] == 0 + assert stats['outdegree']['max'] == 3 + assert isinstance(stats['outdegree']['avg'], float) + + +def test_leaves(graph_client): + actual = list(graph_client.leaves( + 'swh:1:ori:0000000000000000000000000000000000000021' + )) + expected = [ + 'swh:1:cnt:0000000000000000000000000000000000000001', + 'swh:1:cnt:0000000000000000000000000000000000000004', + 'swh:1:cnt:0000000000000000000000000000000000000005', + 'swh:1:cnt:0000000000000000000000000000000000000007' + ] + assert set(actual) == set(expected) + + +def test_neighbors(graph_client): + actual = list(graph_client.neighbors( + 'swh:1:rev:0000000000000000000000000000000000000009', + direction='backward' + )) + expected = [ + 'swh:1:snp:0000000000000000000000000000000000000020', + 'swh:1:rel:0000000000000000000000000000000000000010', + 'swh:1:rev:0000000000000000000000000000000000000013' + ] + assert set(actual) == set(expected) + + +def test_visit_nodes(graph_client): + actual = list(graph_client.visit_nodes( + 'swh:1:rel:0000000000000000000000000000000000000010', + edges='rel:rev,rev:rev' + )) + expected = [ + 'swh:1:rel:0000000000000000000000000000000000000010', + 'swh:1:rev:0000000000000000000000000000000000000009', + 'swh:1:rev:0000000000000000000000000000000000000003' + ] + assert set(actual) == set(expected) + + +def test_visit_paths(graph_client): + actual = list(graph_client.visit_paths( + 'swh:1:snp:0000000000000000000000000000000000000020', + edges='snp:*,rev:*')) + actual = [tuple(path) for path in actual] + expected = [ + ( + 'swh:1:snp:0000000000000000000000000000000000000020', 'swh:1:rev:0000000000000000000000000000000000000009', - direction='backward' - ) - expected = { - 'result': [ - 'swh:1:snp:0000000000000000000000000000000000000020', - 'swh:1:rel:0000000000000000000000000000000000000010', - 'swh:1:rev:0000000000000000000000000000000000000013' - ], - 'meta': { - 'nb_edges_accessed': 3 - } - } - TestEndpoints.assert_endpoint_output(actual, expected) - - def test_stats(self): - stats = self.client.stats() - - assert set(stats.keys()) == {'counts', 'ratios', 'indegree', - 'outdegree'} - - assert set(stats['counts'].keys()) == {'nodes', 'edges'} - assert set(stats['ratios'].keys()) == {'compression', 'bits_per_node', - 'bits_per_edge', 'avg_locality'} - assert set(stats['indegree'].keys()) == {'min', 'max', 'avg'} - assert set(stats['outdegree'].keys()) == {'min', 'max', 'avg'} - - assert stats['counts']['nodes'] == 21 - assert stats['counts']['edges'] == 23 - assert isinstance(stats['ratios']['compression'], float) - assert isinstance(stats['ratios']['bits_per_node'], float) - assert isinstance(stats['ratios']['bits_per_edge'], float) - assert isinstance(stats['ratios']['avg_locality'], float) - assert stats['indegree']['min'] == 0 - assert stats['indegree']['max'] == 3 - assert isinstance(stats['indegree']['avg'], float) - assert stats['outdegree']['min'] == 0 - assert stats['outdegree']['max'] == 3 - assert isinstance(stats['outdegree']['avg'], float) - - def test_visit_nodes(self): - actual = self.client.visit_nodes( - 'swh:1:rel:0000000000000000000000000000000000000010', - edges='rel:rev,rev:rev' - ) - expected = { - 'result': [ - 'swh:1:rel:0000000000000000000000000000000000000010', - 'swh:1:rev:0000000000000000000000000000000000000009', - 'swh:1:rev:0000000000000000000000000000000000000003' - ], - 'meta': { - 'nb_edges_accessed': 4 - } - } - TestEndpoints.assert_endpoint_output(actual, expected) - - def test_visit_paths(self): - actual = self.client.visit_paths( - 'swh:1:snp:0000000000000000000000000000000000000020', - edges='snp:*,rev:*') - actual['result'] = [tuple(path) for path in actual['result']] - expected = { - 'result': [ - ( - 'swh:1:snp:0000000000000000000000000000000000000020', - 'swh:1:rev:0000000000000000000000000000000000000009', - 'swh:1:rev:0000000000000000000000000000000000000003', - 'swh:1:dir:0000000000000000000000000000000000000002' - ), - ( - 'swh:1:snp:0000000000000000000000000000000000000020', - 'swh:1:rev:0000000000000000000000000000000000000009', - 'swh:1:dir:0000000000000000000000000000000000000008' - ), - ( - 'swh:1:snp:0000000000000000000000000000000000000020', - 'swh:1:rel:0000000000000000000000000000000000000010' - ) - ], - 'meta': { - 'nb_edges_accessed': 10 - } - } - TestEndpoints.assert_endpoint_output(actual, expected) - - def test_walk(self): - actual = self.client.walk( - 'swh:1:dir:0000000000000000000000000000000000000016', 'rel', - edges='dir:dir,dir:rev,rev:*', - direction='backward', - traversal='bfs' + 'swh:1:rev:0000000000000000000000000000000000000003', + 'swh:1:dir:0000000000000000000000000000000000000002' + ), + ( + 'swh:1:snp:0000000000000000000000000000000000000020', + 'swh:1:rev:0000000000000000000000000000000000000009', + 'swh:1:dir:0000000000000000000000000000000000000008' + ), + ( + 'swh:1:snp:0000000000000000000000000000000000000020', + 'swh:1:rel:0000000000000000000000000000000000000010' ) - expected = { - 'result': [ - 'swh:1:dir:0000000000000000000000000000000000000016', - 'swh:1:dir:0000000000000000000000000000000000000017', - 'swh:1:rev:0000000000000000000000000000000000000018', - 'swh:1:rel:0000000000000000000000000000000000000019' - ], - 'meta': { - 'nb_edges_accessed': 3 - } - } - TestEndpoints.assert_endpoint_output(actual, expected) + ] + assert set(actual) == set(expected) + + +def test_walk(graph_client): + actual = list(graph_client.walk( + 'swh:1:dir:0000000000000000000000000000000000000016', 'rel', + edges='dir:dir,dir:rev,rev:*', + direction='backward', + traversal='bfs' + )) + expected = [ + 'swh:1:dir:0000000000000000000000000000000000000016', + 'swh:1:dir:0000000000000000000000000000000000000017', + 'swh:1:rev:0000000000000000000000000000000000000018', + 'swh:1:rel:0000000000000000000000000000000000000019' + ] + assert set(actual) == set(expected) diff --git a/java/server/src/test/dataset/.gitignore b/tests/dataset/.gitignore similarity index 100% rename from java/server/src/test/dataset/.gitignore rename to tests/dataset/.gitignore diff --git a/java/server/src/test/dataset/example.edges.csv b/tests/dataset/example.edges.csv similarity index 100% rename from java/server/src/test/dataset/example.edges.csv rename to tests/dataset/example.edges.csv diff --git a/java/server/src/test/dataset/example.edges.csv.gz b/tests/dataset/example.edges.csv.gz similarity index 100% rename from java/server/src/test/dataset/example.edges.csv.gz rename to tests/dataset/example.edges.csv.gz diff --git a/java/server/src/test/dataset/example.nodes.csv b/tests/dataset/example.nodes.csv similarity index 100% rename from java/server/src/test/dataset/example.nodes.csv rename to tests/dataset/example.nodes.csv diff --git a/java/server/src/test/dataset/example.nodes.csv.gz b/tests/dataset/example.nodes.csv.gz similarity index 100% rename from java/server/src/test/dataset/example.nodes.csv.gz rename to tests/dataset/example.nodes.csv.gz diff --git a/java/server/src/test/dataset/generate_graph.sh b/tests/dataset/generate_graph.sh similarity index 100% rename from java/server/src/test/dataset/generate_graph.sh rename to tests/dataset/generate_graph.sh diff --git a/java/server/src/test/dataset/img/.gitignore b/tests/dataset/img/.gitignore similarity index 100% rename from java/server/src/test/dataset/img/.gitignore rename to tests/dataset/img/.gitignore diff --git a/java/server/src/test/dataset/img/Makefile b/tests/dataset/img/Makefile similarity index 100% rename from java/server/src/test/dataset/img/Makefile rename to tests/dataset/img/Makefile diff --git a/java/server/src/test/dataset/img/example.dot b/tests/dataset/img/example.dot similarity index 100% rename from java/server/src/test/dataset/img/example.dot rename to tests/dataset/img/example.dot diff --git a/java/server/src/test/dataset/output/example-transposed.graph b/tests/dataset/output/example-transposed.graph similarity index 100% rename from java/server/src/test/dataset/output/example-transposed.graph rename to tests/dataset/output/example-transposed.graph diff --git a/java/server/src/test/dataset/output/example-transposed.obl b/tests/dataset/output/example-transposed.obl similarity index 100% rename from java/server/src/test/dataset/output/example-transposed.obl rename to tests/dataset/output/example-transposed.obl diff --git a/java/server/src/test/dataset/output/example-transposed.offsets b/tests/dataset/output/example-transposed.offsets similarity index 100% rename from java/server/src/test/dataset/output/example-transposed.offsets rename to tests/dataset/output/example-transposed.offsets diff --git a/java/server/src/test/dataset/output/example-transposed.properties b/tests/dataset/output/example-transposed.properties similarity index 100% rename from java/server/src/test/dataset/output/example-transposed.properties rename to tests/dataset/output/example-transposed.properties diff --git a/java/server/src/test/dataset/output/example.graph b/tests/dataset/output/example.graph similarity index 100% rename from java/server/src/test/dataset/output/example.graph rename to tests/dataset/output/example.graph diff --git a/java/server/src/test/dataset/output/example.indegree b/tests/dataset/output/example.indegree similarity index 100% rename from java/server/src/test/dataset/output/example.indegree rename to tests/dataset/output/example.indegree diff --git a/java/server/src/test/dataset/output/example.mph b/tests/dataset/output/example.mph similarity index 100% rename from java/server/src/test/dataset/output/example.mph rename to tests/dataset/output/example.mph diff --git a/tests/dataset/output/example.node2pid.bin b/tests/dataset/output/example.node2pid.bin new file mode 100644 index 0000000..7755043 Binary files /dev/null and b/tests/dataset/output/example.node2pid.bin differ diff --git a/java/server/src/test/dataset/output/example.node2pid.csv b/tests/dataset/output/example.node2pid.csv similarity index 100% rename from java/server/src/test/dataset/output/example.node2pid.csv rename to tests/dataset/output/example.node2pid.csv diff --git a/java/server/src/test/dataset/output/example.node2type.map b/tests/dataset/output/example.node2type.map similarity index 100% rename from java/server/src/test/dataset/output/example.node2type.map rename to tests/dataset/output/example.node2type.map diff --git a/java/server/src/test/dataset/output/example.obl b/tests/dataset/output/example.obl similarity index 100% rename from java/server/src/test/dataset/output/example.obl rename to tests/dataset/output/example.obl diff --git a/java/server/src/test/dataset/output/example.offsets b/tests/dataset/output/example.offsets similarity index 100% rename from java/server/src/test/dataset/output/example.offsets rename to tests/dataset/output/example.offsets diff --git a/java/server/src/test/dataset/output/example.order b/tests/dataset/output/example.order similarity index 100% rename from java/server/src/test/dataset/output/example.order rename to tests/dataset/output/example.order diff --git a/java/server/src/test/dataset/output/example.outdegree b/tests/dataset/output/example.outdegree similarity index 100% rename from java/server/src/test/dataset/output/example.outdegree rename to tests/dataset/output/example.outdegree diff --git a/tests/dataset/output/example.pid2node.bin b/tests/dataset/output/example.pid2node.bin new file mode 100644 index 0000000..753264c Binary files /dev/null and b/tests/dataset/output/example.pid2node.bin differ diff --git a/java/server/src/test/dataset/output/example.pid2node.csv b/tests/dataset/output/example.pid2node.csv similarity index 100% rename from java/server/src/test/dataset/output/example.pid2node.csv rename to tests/dataset/output/example.pid2node.csv diff --git a/java/server/src/test/dataset/output/example.properties b/tests/dataset/output/example.properties similarity index 100% rename from java/server/src/test/dataset/output/example.properties rename to tests/dataset/output/example.properties diff --git a/java/server/src/test/dataset/output/example.stats b/tests/dataset/output/example.stats similarity index 100% rename from java/server/src/test/dataset/output/example.stats rename to tests/dataset/output/example.stats diff --git a/tox.ini b/tox.ini index 1eb5527..c52988e 100644 --- a/tox.ini +++ b/tox.ini @@ -1,24 +1,27 @@ [tox] envlist=flake8,mypy,py3 [testenv:py3] deps = .[testing] pytest-cov +whitelist_externals = mvn commands = + mvn -f java/server/pom.xml compile assembly:single pytest --cov=swh --cov-branch {posargs} [testenv:flake8] skip_install = true deps = flake8 commands = {envpython} -m flake8 [testenv:mypy] skip_install = true +ignore_missing_imports = true deps = .[testing] mypy commands = mypy swh