diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5bf56ae..86214cf 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,52 +1,52 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.1.0 hooks: - id: trailing-whitespace - id: check-json - id: check-yaml - repo: https://gitlab.com/pycqa/flake8 rev: 4.0.1 hooks: - id: flake8 additional_dependencies: [flake8-bugbear==22.3.23] - repo: https://github.com/codespell-project/codespell rev: v2.1.0 hooks: - id: codespell name: Check source code spelling args: ["-L te,wth,alledges,afterall"] stages: [commit] - repo: local hooks: - id: mypy name: mypy entry: mypy args: [swh] pass_filenames: false language: system types: [python] - repo: https://github.com/PyCQA/isort rev: 5.10.1 hooks: - id: isort - repo: https://github.com/python/black rev: 22.3.0 hooks: - id: black - repo: local hooks: - id: java-coding-style name: java style entry: mvn args: ["-f", "java/pom.xml", "spotless:apply"] pass_filenames: false language: system -exclude: ^swh/graph/rpc/ +exclude: ^swh/graph/grpc/ diff --git a/PKG-INFO b/PKG-INFO index 0eb9464..3551549 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,52 +1,52 @@ Metadata-Version: 2.1 Name: swh.graph -Version: 2.0.0 +Version: 2.1.0 Summary: Software Heritage graph service Home-page: https://forge.softwareheritage.org/diffusion/DGRPH Author: Software Heritage developers Author-email: swh-devel@inria.fr Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Project-URL: Funding, https://www.softwareheritage.org/donate Project-URL: Source, https://forge.softwareheritage.org/source/swh-graph Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-graph/ Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 3 - Alpha Requires-Python: >=3.7 Description-Content-Type: text/x-rst Provides-Extra: testing License-File: LICENSE License-File: AUTHORS Software Heritage - graph service ================================= Tooling and services, collectively known as ``swh-graph``, providing fast access to the graph representation of the `Software Heritage `_ `archive `_. The service is in-memory, based on a compressed representation of the Software Heritage Merkle DAG. Bibliography ------------ In addition to accompanying technical documentation, ``swh-graph`` is also described in the following scientific paper. If you publish results based on ``swh-graph``, please acknowledge it by citing the paper as follows: .. note:: Paolo Boldi, Antoine Pietri, Sebastiano Vigna, Stefano Zacchiroli. `Ultra-Large-Scale Repository Analysis via Graph Compression `_. In proceedings of `SANER 2020 `_: The 27th IEEE International Conference on Software Analysis, Evolution and Reengineering, pages 184-194. IEEE 2020. Links: `preprint `_, `bibtex `_. diff --git a/java/pom.xml b/java/pom.xml index 6f689f7..f597b66 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -1,403 +1,409 @@ 4.0.0 org.softwareheritage.graph swh-graph ${git.closest.tag.name} swh-graph https://forge.softwareheritage.org/source/swh-graph/ UTF-8 11 3.21.1 1.47.0 ch.qos.logback logback-classic 1.2.3 org.junit.jupiter junit-jupiter-api 5.7.0 test org.junit.jupiter junit-jupiter-engine 5.7.0 test + + org.junit.jupiter + junit-jupiter-params + 5.7.0 + test + org.slf4j slf4j-simple 1.7.26 it.unimi.dsi webgraph-big 3.7.0 it.unimi.dsi fastutil 8.5.8 it.unimi.dsi dsiutils 2.7.2 it.unimi.dsi sux4j 5.4.0 it.unimi.dsi law 2.7.2 org.apache.hadoop hadoop-common org.umlgraph umlgraph org.eclipse.jetty.aggregate jetty-all it.unimi.di mg4j it.unimi.di mg4j-big com.martiansoftware jsap 2.1 commons-codec commons-codec 1.15 com.github.luben zstd-jni 1.5.1-1 org.apache.orc orc-core 1.7.1 org.apache.hadoop hadoop-common 3.3.1 org.apache.hadoop hadoop-client-runtime 3.3.1 com.google.protobuf protobuf-java ${protobuf.version} io.grpc grpc-netty-shaded ${grpc.version} io.grpc grpc-protobuf ${grpc.version} io.grpc grpc-stub ${grpc.version} io.grpc grpc-services ${grpc.version} io.grpc grpc-testing ${grpc.version} javax.annotation javax.annotation-api 1.3.2 com.google.protobuf protobuf-java-util ${protobuf.version} maven-clean-plugin 3.1.0 maven-resources-plugin 3.0.2 maven-compiler-plugin 3.8.0 11 11 -verbose -Xlint:all maven-surefire-plugin 2.22.2 maven-failsafe-plugin 2.22.2 maven-jar-plugin 3.0.2 maven-install-plugin 2.5.2 maven-deploy-plugin 2.8.2 maven-site-plugin 3.7.1 maven-project-info-reports-plugin 3.0.0 maven-dependency-plugin 3.1.2 maven-assembly-plugin 3.3.0 org.softwareheritage.graph.rpc.GraphServer jar-with-dependencies false make-assembly package single com.diffplug.spotless spotless-maven-plugin 2.22.1 *.md .gitignore true 4 4.16.0 .coding-style.xml pl.project13.maven git-commit-id-plugin 3.0.1 get-the-git-infos revision initialize true true true true v* git.closest.tag.name ^v true maven-source-plugin 2.1.1 bundle-sources package jar-no-fork test-jar-no-fork org.apache.maven.plugins maven-javadoc-plugin 3.3.1 resource-bundles package resource-bundle test-resource-bundle false javadoc-jar package jar true it.unimi.dsi:webgraph-big:* https://webgraph.di.unimi.it/docs-big/ https://dsiutils.di.unimi.it/docs/ https://fastutil.di.unimi.it/docs/ https://law.di.unimi.it/software/law-docs/ implSpec a Implementation Requirements: implNote a Implementation Note: org.xolstice.maven.plugins protobuf-maven-plugin 0.6.1 com.google.protobuf:protoc:${protobuf.version}:exe:${os.detected.classifier} grpc-java io.grpc:protoc-gen-grpc-java:${grpc.version}:exe:${os.detected.classifier} compile compile-custom test-compile test-compile-custom kr.motd.maven os-maven-plugin 1.6.2 diff --git a/java/src/main/java/org/softwareheritage/graph/rpc/Traversal.java b/java/src/main/java/org/softwareheritage/graph/rpc/Traversal.java index bbdf4fa..394caf9 100644 --- a/java/src/main/java/org/softwareheritage/graph/rpc/Traversal.java +++ b/java/src/main/java/org/softwareheritage/graph/rpc/Traversal.java @@ -1,533 +1,553 @@ /* * Copyright (c) 2022 The Software Heritage developers * See the AUTHORS file at the top-level directory of this distribution * License: GNU General Public License version 3, or any later version * See top-level LICENSE file for more information */ package org.softwareheritage.graph.rpc; import it.unimi.dsi.big.webgraph.labelling.ArcLabelledNodeIterator; import it.unimi.dsi.big.webgraph.labelling.Label; import org.softwareheritage.graph.*; import java.util.*; /** Traversal contains all the algorithms used for graph traversals */ public class Traversal { /** * Wrapper around g.successors(), only follows edges that are allowed by the given * {@link AllowedEdges} object. */ private static ArcLabelledNodeIterator.LabelledArcIterator filterLabelledSuccessors(SwhUnidirectionalGraph g, long nodeId, AllowedEdges allowedEdges) { if (allowedEdges.restrictedTo == null) { // All edges are allowed, bypass edge check return g.labelledSuccessors(nodeId); } else { ArcLabelledNodeIterator.LabelledArcIterator allSuccessors = g.labelledSuccessors(nodeId); return new ArcLabelledNodeIterator.LabelledArcIterator() { @Override public Label label() { return allSuccessors.label(); } @Override public long nextLong() { long neighbor; while ((neighbor = allSuccessors.nextLong()) != -1) { if (allowedEdges.isAllowed(g.getNodeType(nodeId), g.getNodeType(neighbor))) { return neighbor; } } return -1; } @Override public long skip(final long n) { long i = 0; while (i < n && nextLong() != -1) i++; return i; } }; } } /** Helper class to check that a given node is "valid" for some given {@link NodeFilter} */ private static class NodeFilterChecker { private final SwhUnidirectionalGraph g; private final NodeFilter filter; private final AllowedNodes allowedNodes; private NodeFilterChecker(SwhUnidirectionalGraph graph, NodeFilter filter) { this.g = graph; this.filter = filter; this.allowedNodes = new AllowedNodes(filter.hasTypes() ? filter.getTypes() : "*"); } public boolean allowed(long nodeId) { if (filter == null) { return true; } if (!this.allowedNodes.isAllowed(g.getNodeType(nodeId))) { return false; } return true; } } /** Returns the unidirectional graph from a bidirectional graph and a {@link GraphDirection}. */ public static SwhUnidirectionalGraph getDirectedGraph(SwhBidirectionalGraph g, GraphDirection direction) { switch (direction) { case FORWARD: return g.getForwardGraph(); case BACKWARD: return g.getBackwardGraph(); /* * TODO: add support for BOTH case BOTH: return new SwhUnidirectionalGraph(g.symmetrize(), * g.getProperties()); */ default : throw new IllegalArgumentException("Unknown direction: " + direction); } } /** Returns the opposite of a given {@link GraphDirection} (equivalent to a graph transposition). */ public static GraphDirection reverseDirection(GraphDirection direction) { switch (direction) { case FORWARD: return GraphDirection.BACKWARD; case BACKWARD: return GraphDirection.FORWARD; /* * TODO: add support for BOTH case BOTH: return GraphDirection.BOTH; */ default : throw new IllegalArgumentException("Unknown direction: " + direction); } } /** Dummy exception to short-circuit and interrupt a graph traversal. */ static class StopTraversalException extends RuntimeException { } /** Generic BFS traversal algorithm. */ static class BFSVisitor { /** The graph to traverse. */ protected final SwhUnidirectionalGraph g; /** Depth of the node currently being visited */ protected long depth = 0; /** * Number of traversal successors (i.e., successors that will be considered by the traversal) of the * node currently being visited */ protected long traversalSuccessors = 0; /** Number of edges accessed since the beginning of the traversal */ protected long edgesAccessed = 0; /** * Map from a node ID to its parent node ID. The key set can be used as the set of all visited * nodes. */ protected HashMap parents = new HashMap<>(); /** Queue of nodes to visit (also called "frontier", "open set", "wavefront" etc.) */ protected ArrayDeque queue = new ArrayDeque<>(); /** If > 0, the maximum depth of the traversal. */ private long maxDepth = -1; /** If > 0, the maximum number of edges to traverse. */ private long maxEdges = -1; BFSVisitor(SwhUnidirectionalGraph g) { this.g = g; } /** Add a new source node to the initial queue. */ public void addSource(long nodeId) { queue.add(nodeId); parents.put(nodeId, -1L); } /** Set the maximum depth of the traversal. */ public void setMaxDepth(long depth) { maxDepth = depth; } /** Set the maximum number of edges to traverse. */ public void setMaxEdges(long edges) { maxEdges = edges; } /** Setup the visit counters and depth sentinel. */ public void visitSetup() { edgesAccessed = 0; depth = 0; queue.add(-1L); // depth sentinel } /** Perform the visit */ public void visit() { visitSetup(); while (!queue.isEmpty()) { visitStep(); } } /** Single "step" of a visit. Advance the frontier of exactly one node. */ public void visitStep() { try { assert !queue.isEmpty(); long curr = queue.poll(); if (curr == -1L) { ++depth; if (!queue.isEmpty()) { queue.add(-1L); visitStep(); } return; } if (maxDepth >= 0 && depth > maxDepth) { throw new StopTraversalException(); } edgesAccessed += g.outdegree(curr); if (maxEdges >= 0 && edgesAccessed > maxEdges) { throw new StopTraversalException(); } visitNode(curr); } catch (StopTraversalException e) { // Traversal is over, clear the to-do queue. queue.clear(); } } /** * Get the successors of a node. Override this function if you want to filter which successors are * considered during the traversal. */ protected ArcLabelledNodeIterator.LabelledArcIterator getSuccessors(long nodeId) { return g.labelledSuccessors(nodeId); } /** Visit a node. Override to do additional processing on the node. */ protected void visitNode(long node) { ArcLabelledNodeIterator.LabelledArcIterator it = getSuccessors(node); traversalSuccessors = 0; for (long succ; (succ = it.nextLong()) != -1;) { traversalSuccessors++; visitEdge(node, succ, it.label()); } } /** Visit an edge. Override to do additional processing on the edge. */ protected void visitEdge(long src, long dst, Label label) { if (!parents.containsKey(dst)) { queue.add(dst); parents.put(dst, src); } } } /** * SimpleTraversal is used by the Traverse endpoint. It extends BFSVisitor with additional * processing, notably related to graph properties and filters. */ static class SimpleTraversal extends BFSVisitor { private final NodeFilterChecker nodeReturnChecker; private final AllowedEdges allowedEdges; private final TraversalRequest request; private final NodePropertyBuilder.NodeDataMask nodeDataMask; private final NodeObserver nodeObserver; + private long remainingMatches; private Node.Builder nodeBuilder; SimpleTraversal(SwhBidirectionalGraph bidirectionalGraph, TraversalRequest request, NodeObserver nodeObserver) { super(getDirectedGraph(bidirectionalGraph, request.getDirection())); this.request = request; this.nodeObserver = nodeObserver; this.nodeReturnChecker = new NodeFilterChecker(g, request.getReturnNodes()); this.nodeDataMask = new NodePropertyBuilder.NodeDataMask(request.hasMask() ? request.getMask() : null); this.allowedEdges = new AllowedEdges(request.hasEdges() ? request.getEdges() : "*"); request.getSrcList().forEach(srcSwhid -> { long srcNodeId = g.getNodeId(new SWHID(srcSwhid)); addSource(srcNodeId); }); if (request.hasMaxDepth()) { setMaxDepth(request.getMaxDepth()); } if (request.hasMaxEdges()) { setMaxEdges(request.getMaxEdges()); } + if (request.hasMaxMatchingNodes() && request.getMaxMatchingNodes() > 0) { + this.remainingMatches = request.getMaxMatchingNodes(); + } else { + this.remainingMatches = -1; + } } @Override protected ArcLabelledNodeIterator.LabelledArcIterator getSuccessors(long nodeId) { return filterLabelledSuccessors(g, nodeId, allowedEdges); } @Override public void visitNode(long node) { nodeBuilder = null; if (nodeReturnChecker.allowed(node) && (!request.hasMinDepth() || depth >= request.getMinDepth())) { nodeBuilder = Node.newBuilder(); NodePropertyBuilder.buildNodeProperties(g, nodeDataMask, nodeBuilder, node); } super.visitNode(node); - if (request.getReturnNodes().hasMinTraversalSuccessors() - && traversalSuccessors < request.getReturnNodes().getMinTraversalSuccessors() - || request.getReturnNodes().hasMaxTraversalSuccessors() - && traversalSuccessors > request.getReturnNodes().getMaxTraversalSuccessors()) { - nodeBuilder = null; + + boolean nodeMatchesConstraints = true; + + if (request.getReturnNodes().hasMinTraversalSuccessors()) { + nodeMatchesConstraints &= traversalSuccessors >= request.getReturnNodes().getMinTraversalSuccessors(); } - if (nodeBuilder != null) { - nodeObserver.onNext(nodeBuilder.build()); + if (request.getReturnNodes().hasMaxTraversalSuccessors()) { + nodeMatchesConstraints &= traversalSuccessors <= request.getReturnNodes().getMaxTraversalSuccessors(); + } + + if (nodeMatchesConstraints) { + if (nodeBuilder != null) { + nodeObserver.onNext(nodeBuilder.build()); + } + + if (remainingMatches >= 0) { + remainingMatches--; + if (remainingMatches == 0) { + // We matched as many nodes as allowed + throw new StopTraversalException(); + } + } } } @Override protected void visitEdge(long src, long dst, Label label) { super.visitEdge(src, dst, label); NodePropertyBuilder.buildSuccessorProperties(g, nodeDataMask, nodeBuilder, src, dst, label); } } /** * FindPathTo searches for a path from a source node to a node matching a given criteria It extends * BFSVisitor with additional processing, and makes the traversal stop as soon as a node matching * the given criteria is found. */ static class FindPathTo extends BFSVisitor { private final AllowedEdges allowedEdges; private final FindPathToRequest request; private final NodePropertyBuilder.NodeDataMask nodeDataMask; private final NodeFilterChecker targetChecker; private Long targetNode = null; FindPathTo(SwhBidirectionalGraph bidirectionalGraph, FindPathToRequest request) { super(getDirectedGraph(bidirectionalGraph, request.getDirection())); this.request = request; this.targetChecker = new NodeFilterChecker(g, request.getTarget()); this.nodeDataMask = new NodePropertyBuilder.NodeDataMask(request.hasMask() ? request.getMask() : null); this.allowedEdges = new AllowedEdges(request.hasEdges() ? request.getEdges() : "*"); if (request.hasMaxDepth()) { setMaxDepth(request.getMaxDepth()); } if (request.hasMaxEdges()) { setMaxEdges(request.getMaxEdges()); } request.getSrcList().forEach(srcSwhid -> { long srcNodeId = g.getNodeId(new SWHID(srcSwhid)); addSource(srcNodeId); }); } @Override protected ArcLabelledNodeIterator.LabelledArcIterator getSuccessors(long nodeId) { return filterLabelledSuccessors(g, nodeId, allowedEdges); } @Override public void visitNode(long node) { if (targetChecker.allowed(node)) { targetNode = node; throw new StopTraversalException(); } super.visitNode(node); } /** * Once the visit has been performed and a matching node has been found, return the shortest path * from the source set to that node. To do so, we need to backtrack the parents of the node until we * find one of the source nodes (whose parent is -1). */ public Path getPath() { if (targetNode == null) { return null; // No path found. } /* Backtrack from targetNode to a source node */ long curNode = targetNode; ArrayList path = new ArrayList<>(); while (curNode != -1) { path.add(curNode); curNode = parents.get(curNode); } Collections.reverse(path); /* Enrich path with node properties */ Path.Builder pathBuilder = Path.newBuilder(); for (long nodeId : path) { Node.Builder nodeBuilder = Node.newBuilder(); NodePropertyBuilder.buildNodeProperties(g, nodeDataMask, nodeBuilder, nodeId); pathBuilder.addNode(nodeBuilder.build()); } return pathBuilder.build(); } } /** * FindPathBetween searches for a shortest path between a set of source nodes and a set of * destination nodes. * * It does so by performing a *bidirectional breadth-first search*, i.e., two parallel breadth-first * searches, one from the source set ("src-BFS") and one from the destination set ("dst-BFS"), until * both searches find a common node that joins their visited sets. This node is called the "midpoint * node". The path returned is the path src -> ... -> midpoint -> ... -> dst, which is always a * shortest path between src and dst. * * The graph direction of both BFS can be configured separately. By default, the dst-BFS will use * the graph in the opposite direction than the src-BFS (if direction = FORWARD, by default * direction_reverse = BACKWARD, and vice-versa). The default behavior is thus to search for a * shortest path between two nodes in a given direction. However, one can also specify FORWARD or * BACKWARD for *both* the src-BFS and the dst-BFS. This will search for a common descendant or a * common ancestor between the two sets, respectively. These will be the midpoints of the returned * path. */ static class FindPathBetween extends BFSVisitor { private final FindPathBetweenRequest request; private final NodePropertyBuilder.NodeDataMask nodeDataMask; private final AllowedEdges allowedEdgesSrc; private final AllowedEdges allowedEdgesDst; private final BFSVisitor srcVisitor; private final BFSVisitor dstVisitor; private Long middleNode = null; FindPathBetween(SwhBidirectionalGraph bidirectionalGraph, FindPathBetweenRequest request) { super(getDirectedGraph(bidirectionalGraph, request.getDirection())); this.request = request; this.nodeDataMask = new NodePropertyBuilder.NodeDataMask(request.hasMask() ? request.getMask() : null); GraphDirection direction = request.getDirection(); // if direction_reverse is not specified, use the opposite direction of direction GraphDirection directionReverse = request.hasDirectionReverse() ? request.getDirectionReverse() : reverseDirection(request.getDirection()); SwhUnidirectionalGraph srcGraph = getDirectedGraph(bidirectionalGraph, direction); SwhUnidirectionalGraph dstGraph = getDirectedGraph(bidirectionalGraph, directionReverse); this.allowedEdgesSrc = new AllowedEdges(request.hasEdges() ? request.getEdges() : "*"); /* * If edges_reverse is not specified: - If `edges` is not specified either, defaults to "*" - If * direction == direction_reverse, defaults to `edges` - If direction != direction_reverse, defaults * to the reverse of `edges` (e.g. "rev:dir" becomes "dir:rev"). */ this.allowedEdgesDst = request.hasEdgesReverse() ? new AllowedEdges(request.getEdgesReverse()) : (request.hasEdges() ? (direction == directionReverse ? new AllowedEdges(request.getEdges()) : new AllowedEdges(request.getEdges()).reverse()) : new AllowedEdges("*")); /* * Source sub-visitor. Aborts as soon as it finds a node already visited by the destination * sub-visitor. */ this.srcVisitor = new BFSVisitor(srcGraph) { @Override protected ArcLabelledNodeIterator.LabelledArcIterator getSuccessors(long nodeId) { return filterLabelledSuccessors(g, nodeId, allowedEdgesSrc); } @Override public void visitNode(long node) { if (dstVisitor.parents.containsKey(node)) { middleNode = node; throw new StopTraversalException(); } super.visitNode(node); } }; /* * Destination sub-visitor. Aborts as soon as it finds a node already visited by the source * sub-visitor. */ this.dstVisitor = new BFSVisitor(dstGraph) { @Override protected ArcLabelledNodeIterator.LabelledArcIterator getSuccessors(long nodeId) { return filterLabelledSuccessors(g, nodeId, allowedEdgesDst); } @Override public void visitNode(long node) { if (srcVisitor.parents.containsKey(node)) { middleNode = node; throw new StopTraversalException(); } super.visitNode(node); } }; if (request.hasMaxDepth()) { this.srcVisitor.setMaxDepth(request.getMaxDepth()); this.dstVisitor.setMaxDepth(request.getMaxDepth()); } if (request.hasMaxEdges()) { this.srcVisitor.setMaxEdges(request.getMaxEdges()); this.dstVisitor.setMaxEdges(request.getMaxEdges()); } request.getSrcList().forEach(srcSwhid -> { long srcNodeId = g.getNodeId(new SWHID(srcSwhid)); srcVisitor.addSource(srcNodeId); }); request.getDstList().forEach(srcSwhid -> { long srcNodeId = g.getNodeId(new SWHID(srcSwhid)); dstVisitor.addSource(srcNodeId); }); } @Override public void visit() { /* * Bidirectional BFS: maintain two sub-visitors, and alternately run a visit step in each of them. */ srcVisitor.visitSetup(); dstVisitor.visitSetup(); while (!srcVisitor.queue.isEmpty() || !dstVisitor.queue.isEmpty()) { if (!srcVisitor.queue.isEmpty()) { srcVisitor.visitStep(); } if (!dstVisitor.queue.isEmpty()) { dstVisitor.visitStep(); } } } public Path getPath() { if (middleNode == null) { return null; // No path found. } Path.Builder pathBuilder = Path.newBuilder(); ArrayList path = new ArrayList<>(); /* First section of the path: src -> midpoint */ long curNode = middleNode; while (curNode != -1) { path.add(curNode); curNode = srcVisitor.parents.get(curNode); } pathBuilder.setMidpointIndex(path.size() - 1); Collections.reverse(path); /* Second section of the path: midpoint -> dst */ curNode = dstVisitor.parents.get(middleNode); while (curNode != -1) { path.add(curNode); curNode = dstVisitor.parents.get(curNode); } /* Enrich path with node properties */ for (long nodeId : path) { Node.Builder nodeBuilder = Node.newBuilder(); NodePropertyBuilder.buildNodeProperties(g, nodeDataMask, nodeBuilder, nodeId); pathBuilder.addNode(nodeBuilder.build()); } return pathBuilder.build(); } } public interface NodeObserver { void onNext(Node nodeId); } } diff --git a/java/src/test/java/org/softwareheritage/graph/GraphTest.java b/java/src/test/java/org/softwareheritage/graph/GraphTest.java index 872784f..63defa5 100644 --- a/java/src/test/java/org/softwareheritage/graph/GraphTest.java +++ b/java/src/test/java/org/softwareheritage/graph/GraphTest.java @@ -1,67 +1,96 @@ /* * Copyright (c) 2022 The Software Heritage developers * See the AUTHORS file at the top-level directory of this distribution * License: GNU General Public License version 3, or any later version * See top-level LICENSE file for more information */ package org.softwareheritage.graph; import java.io.FileInputStream; import java.io.IOException; import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; import java.util.Collection; import java.util.Comparator; import java.util.Iterator; import com.github.luben.zstd.ZstdInputStream; import it.unimi.dsi.big.webgraph.LazyLongIterator; import it.unimi.dsi.big.webgraph.LazyLongIterators; import org.junit.jupiter.api.BeforeAll; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.fail; public class GraphTest { static SwhBidirectionalGraph graph; final protected String TEST_ORIGIN_ID = "swh:1:ori:83404f995118bd25774f4ac14422a8f175e7a054"; @BeforeAll public static void setUp() throws IOException { graph = SwhBidirectionalGraph.loadLabelled(getGraphPath().toString()); } public static Path getGraphPath() { return Paths.get("..", "swh", "graph", "tests", "dataset", "compressed", "example"); } public static SwhBidirectionalGraph getGraph() { return graph; } public static SWHID fakeSWHID(String type, int num) { return new SWHID(String.format("swh:1:%s:%040d", type, num)); } public static void assertEqualsAnyOrder(Collection expected, Collection actual) { ArrayList expectedList = new ArrayList<>(expected); ArrayList actualList = new ArrayList<>(actual); expectedList.sort(Comparator.comparing(Object::toString)); actualList.sort(Comparator.comparing(Object::toString)); assertEquals(expectedList, actualList); } + public static void assertContainsAll(Collection expected, Collection actual) { + ArrayList expectedList = new ArrayList<>(expected); + ArrayList actualList = new ArrayList<>(actual); + expectedList.sort(Comparator.comparing(Object::toString)); + Iterator expectedIterator = expectedList.iterator(); + + actualList.sort(Comparator.comparing(Object::toString)); + + for (T actualItem : actualList) { + boolean found = false; + while (expectedIterator.hasNext()) { + if (expectedIterator.next().equals(actualItem)) { + found = true; + break; + } + } + if (!found) { + // TODO: better message when actualItem is present twice in actualList, + // but only once in expectedList + fail(String.format("%s not found in %s", actualItem, expectedList)); + } + } + } + + public static void assertLength(int expected, Collection actual) { + assertEquals(String.format("Size of collection %s:", actual), expected, actual.size()); + } + public static ArrayList lazyLongIteratorToList(LazyLongIterator input) { ArrayList inputList = new ArrayList<>(); Iterator inputIt = LazyLongIterators.eager(input); inputIt.forEachRemaining(inputList::add); return inputList; } public static String[] readZstFile(Path zstFile) throws IOException { ZstdInputStream zis = new ZstdInputStream(new FileInputStream(zstFile.toFile())); return (new String(zis.readAllBytes())).split("\n"); } } diff --git a/java/src/test/java/org/softwareheritage/graph/rpc/CountEdgesTest.java b/java/src/test/java/org/softwareheritage/graph/rpc/CountEdgesTest.java index 7445671..bc0afc6 100644 --- a/java/src/test/java/org/softwareheritage/graph/rpc/CountEdgesTest.java +++ b/java/src/test/java/org/softwareheritage/graph/rpc/CountEdgesTest.java @@ -1,84 +1,109 @@ /* * Copyright (c) 2022 The Software Heritage developers * See the AUTHORS file at the top-level directory of this distribution * License: GNU General Public License version 3, or any later version * See top-level LICENSE file for more information */ package org.softwareheritage.graph.rpc; import com.google.protobuf.FieldMask; import io.grpc.Status; import io.grpc.StatusRuntimeException; import org.junit.jupiter.api.Test; import org.softwareheritage.graph.SWHID; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; public class CountEdgesTest extends TraversalServiceTest { private TraversalRequest.Builder getTraversalRequestBuilder(SWHID src) { return TraversalRequest.newBuilder().addSrc(src.toString()); } @Test public void testSwhidErrors() { StatusRuntimeException thrown; thrown = assertThrows(StatusRuntimeException.class, () -> client .countEdges(TraversalRequest.newBuilder().addSrc(fakeSWHID("cnt", 404).toString()).build())); assertEquals(Status.INVALID_ARGUMENT.getCode(), thrown.getStatus().getCode()); thrown = assertThrows(StatusRuntimeException.class, () -> client.countEdges( TraversalRequest.newBuilder().addSrc("swh:1:lol:0000000000000000000000000000000000000001").build())); assertEquals(Status.INVALID_ARGUMENT.getCode(), thrown.getStatus().getCode()); thrown = assertThrows(StatusRuntimeException.class, () -> client.countEdges( TraversalRequest.newBuilder().addSrc("swh:1:cnt:000000000000000000000000000000000000000z").build())); assertEquals(Status.INVALID_ARGUMENT.getCode(), thrown.getStatus().getCode()); } @Test public void forwardFromRoot() { CountResponse actual = client.countEdges(getTraversalRequestBuilder(new SWHID(TEST_ORIGIN_ID)).build()); assertEquals(13, actual.getCount()); } + @ParameterizedTest + @ValueSource(ints = {0, 1, 2, 13, 14, 15, Integer.MAX_VALUE}) + public void forwardFromRootWithLimit(int limit) { + CountResponse actual = client + .countEdges(getTraversalRequestBuilder(new SWHID(TEST_ORIGIN_ID)).setMaxMatchingNodes(limit).build()); + + switch (limit) { + case 1: + // 1. origin -> snp:20 + assertEquals(1, actual.getCount()); + break; + case 2: + // 1. origin -> snp:20 + // 2. either snp:20 -> rev:9 or snp:20 -> rel:10 + assertEquals(3, actual.getCount()); + break; + default : + // Counts all edges + assertEquals(13, actual.getCount()); + break; + } + } + @Test public void forwardFromMiddle() { CountResponse actual = client.countEdges(getTraversalRequestBuilder(fakeSWHID("dir", 12)).build()); assertEquals(7, actual.getCount()); } @Test public void forwardRelRev() { CountResponse actual = client .countEdges(getTraversalRequestBuilder(fakeSWHID("rel", 10)).setEdges("rel:rev,rev:rev").build()); assertEquals(2, actual.getCount()); } @Test public void backwardFromMiddle() { CountResponse actual = client.countEdges( getTraversalRequestBuilder(fakeSWHID("dir", 12)).setDirection(GraphDirection.BACKWARD).build()); assertEquals(3, actual.getCount()); } @Test public void backwardFromLeaf() { CountResponse actual = client.countEdges( getTraversalRequestBuilder(fakeSWHID("cnt", 4)).setDirection(GraphDirection.BACKWARD).build()); assertEquals(12, actual.getCount()); } @Test public void backwardRevToRevRevToRel() { CountResponse actual = client.countEdges(getTraversalRequestBuilder(fakeSWHID("rev", 3)) .setEdges("rev:rev,rev:rel").setDirection(GraphDirection.BACKWARD).build()); assertEquals(5, actual.getCount()); } @Test public void testWithEmptyMask() { CountResponse actual = client.countEdges( getTraversalRequestBuilder(fakeSWHID("dir", 12)).setMask(FieldMask.getDefaultInstance()).build()); assertEquals(7, actual.getCount()); } } diff --git a/java/src/test/java/org/softwareheritage/graph/rpc/CountNodesTest.java b/java/src/test/java/org/softwareheritage/graph/rpc/CountNodesTest.java index a0bebc1..97792c5 100644 --- a/java/src/test/java/org/softwareheritage/graph/rpc/CountNodesTest.java +++ b/java/src/test/java/org/softwareheritage/graph/rpc/CountNodesTest.java @@ -1,84 +1,107 @@ /* * Copyright (c) 2022 The Software Heritage developers * See the AUTHORS file at the top-level directory of this distribution * License: GNU General Public License version 3, or any later version * See top-level LICENSE file for more information */ package org.softwareheritage.graph.rpc; import com.google.protobuf.FieldMask; import io.grpc.Status; import io.grpc.StatusRuntimeException; import org.junit.jupiter.api.Test; import org.softwareheritage.graph.SWHID; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; public class CountNodesTest extends TraversalServiceTest { private TraversalRequest.Builder getTraversalRequestBuilder(SWHID src) { return TraversalRequest.newBuilder().addSrc(src.toString()); } @Test public void testSwhidErrors() { StatusRuntimeException thrown; thrown = assertThrows(StatusRuntimeException.class, () -> client .countNodes(TraversalRequest.newBuilder().addSrc(fakeSWHID("cnt", 404).toString()).build())); assertEquals(Status.INVALID_ARGUMENT.getCode(), thrown.getStatus().getCode()); thrown = assertThrows(StatusRuntimeException.class, () -> client.countNodes( TraversalRequest.newBuilder().addSrc("swh:1:lol:0000000000000000000000000000000000000001").build())); assertEquals(Status.INVALID_ARGUMENT.getCode(), thrown.getStatus().getCode()); thrown = assertThrows(StatusRuntimeException.class, () -> client.countNodes( TraversalRequest.newBuilder().addSrc("swh:1:cnt:000000000000000000000000000000000000000z").build())); assertEquals(Status.INVALID_ARGUMENT.getCode(), thrown.getStatus().getCode()); } @Test public void forwardFromRoot() { CountResponse actual = client.countNodes(getTraversalRequestBuilder(new SWHID(TEST_ORIGIN_ID)).build()); assertEquals(12, actual.getCount()); } + @ParameterizedTest + @ValueSource(ints = {0, 1, 2, 5, 11, 12, 13, 14, 15, Integer.MAX_VALUE}) + public void forwardFromRootWithLimit(int limit) { + CountResponse actual = client + .countNodes(getTraversalRequestBuilder(new SWHID(TEST_ORIGIN_ID)).setMaxMatchingNodes(limit).build()); + + if (limit == 0) { + assertEquals(12, actual.getCount()); + } else { + assertEquals(Math.min(limit, 12), actual.getCount()); + } + } + @Test public void forwardFromMiddle() { CountResponse actual = client.countNodes(getTraversalRequestBuilder(fakeSWHID("dir", 12)).build()); assertEquals(8, actual.getCount()); } @Test public void forwardRelRev() { CountResponse actual = client .countNodes(getTraversalRequestBuilder(fakeSWHID("rel", 10)).setEdges("rel:rev,rev:rev").build()); assertEquals(3, actual.getCount()); } @Test public void backwardFromMiddle() { CountResponse actual = client.countNodes( getTraversalRequestBuilder(fakeSWHID("dir", 12)).setDirection(GraphDirection.BACKWARD).build()); assertEquals(4, actual.getCount()); } @Test public void backwardFromLeaf() { CountResponse actual = client.countNodes( getTraversalRequestBuilder(fakeSWHID("cnt", 4)).setDirection(GraphDirection.BACKWARD).build()); assertEquals(11, actual.getCount()); } @Test public void backwardRevToRevRevToRel() { CountResponse actual = client.countNodes(getTraversalRequestBuilder(fakeSWHID("rev", 3)) .setEdges("rev:rev,rev:rel").setDirection(GraphDirection.BACKWARD).build()); assertEquals(6, actual.getCount()); } + @ParameterizedTest + @ValueSource(ints = {1, 2, 3, 4, 5, 6, 7}) + public void backwardRevToRevRevToRelWithLimit(int limit) { + CountResponse actual = client.countNodes(getTraversalRequestBuilder(fakeSWHID("rev", 3)) + .setEdges("rev:rev,rev:rel").setDirection(GraphDirection.BACKWARD).setMaxMatchingNodes(limit).build()); + assertEquals(Math.min(limit, 6), actual.getCount()); + } + @Test public void testWithEmptyMask() { CountResponse actual = client.countNodes( getTraversalRequestBuilder(fakeSWHID("dir", 12)).setMask(FieldMask.getDefaultInstance()).build()); assertEquals(8, actual.getCount()); } } diff --git a/java/src/test/java/org/softwareheritage/graph/rpc/TraverseLeavesTest.java b/java/src/test/java/org/softwareheritage/graph/rpc/TraverseLeavesTest.java index 6e8a7ee..949fa00 100644 --- a/java/src/test/java/org/softwareheritage/graph/rpc/TraverseLeavesTest.java +++ b/java/src/test/java/org/softwareheritage/graph/rpc/TraverseLeavesTest.java @@ -1,100 +1,133 @@ /* * Copyright (c) 2022 The Software Heritage developers * See the AUTHORS file at the top-level directory of this distribution * License: GNU General Public License version 3, or any later version * See top-level LICENSE file for more information */ package org.softwareheritage.graph.rpc; import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; import org.softwareheritage.graph.GraphTest; import org.softwareheritage.graph.SWHID; import java.util.ArrayList; public class TraverseLeavesTest extends TraversalServiceTest { private TraversalRequest.Builder getLeavesRequestBuilder(SWHID src) { return TraversalRequest.newBuilder().addSrc(src.toString()) .setReturnNodes(NodeFilter.newBuilder().setMaxTraversalSuccessors(0).build()); } - @Test - public void forwardFromSnp() { - TraversalRequest request = getLeavesRequestBuilder(fakeSWHID("snp", 20)).build(); - + private void _checkForwardFromSnp(int limit, ArrayList actualLeaves) { ArrayList expectedLeaves = new ArrayList<>(); expectedLeaves.add(new SWHID("swh:1:cnt:0000000000000000000000000000000000000001")); expectedLeaves.add(new SWHID("swh:1:cnt:0000000000000000000000000000000000000004")); expectedLeaves.add(new SWHID("swh:1:cnt:0000000000000000000000000000000000000005")); expectedLeaves.add(new SWHID("swh:1:cnt:0000000000000000000000000000000000000007")); + if (limit == 0) { + GraphTest.assertEqualsAnyOrder(expectedLeaves, actualLeaves); + } else { + GraphTest.assertContainsAll(expectedLeaves, actualLeaves); + GraphTest.assertLength(Math.max(0, Math.min(limit, 4)), actualLeaves); + } + } + + @Test + public void forwardFromSnp() { + TraversalRequest request = getLeavesRequestBuilder(fakeSWHID("snp", 20)).build(); + ArrayList actualLeaves = getSWHIDs(client.traverse(request)); - GraphTest.assertEqualsAnyOrder(expectedLeaves, actualLeaves); + + _checkForwardFromSnp(0, actualLeaves); + } + + @ParameterizedTest + @ValueSource(ints = {0, 1, 2, 3, 4, 5, Integer.MAX_VALUE}) + public void forwardFromSnpWithLimit(int limit) { + TraversalRequest request = getLeavesRequestBuilder(fakeSWHID("snp", 20)).setMaxMatchingNodes(limit).build(); + + ArrayList actualLeaves = getSWHIDs(client.traverse(request)); + + _checkForwardFromSnp(limit, actualLeaves); } @Test public void forwardFromRel() { TraversalRequest request = getLeavesRequestBuilder(fakeSWHID("rel", 19)).build(); ArrayList actualLeaves = getSWHIDs(client.traverse(request)); ArrayList expectedLeaves = new ArrayList<>(); expectedLeaves.add(new SWHID("swh:1:cnt:0000000000000000000000000000000000000015")); expectedLeaves.add(new SWHID("swh:1:cnt:0000000000000000000000000000000000000014")); expectedLeaves.add(new SWHID("swh:1:cnt:0000000000000000000000000000000000000001")); expectedLeaves.add(new SWHID("swh:1:cnt:0000000000000000000000000000000000000004")); expectedLeaves.add(new SWHID("swh:1:cnt:0000000000000000000000000000000000000005")); expectedLeaves.add(new SWHID("swh:1:cnt:0000000000000000000000000000000000000007")); expectedLeaves.add(new SWHID("swh:1:cnt:0000000000000000000000000000000000000011")); GraphTest.assertEqualsAnyOrder(expectedLeaves, actualLeaves); } @Test public void backwardFromLeaf() { TraversalRequest request1 = getLeavesRequestBuilder(fakeSWHID("cnt", 15)).setDirection(GraphDirection.BACKWARD) .build(); ArrayList actualLeaves1 = getSWHIDs(client.traverse(request1)); ArrayList expectedLeaves1 = new ArrayList<>(); expectedLeaves1.add(new SWHID("swh:1:rel:0000000000000000000000000000000000000019")); GraphTest.assertEqualsAnyOrder(expectedLeaves1, actualLeaves1); TraversalRequest request2 = getLeavesRequestBuilder(fakeSWHID("cnt", 4)).setDirection(GraphDirection.BACKWARD) .build(); ArrayList actualLeaves2 = getSWHIDs(client.traverse(request2)); ArrayList expectedLeaves2 = new ArrayList<>(); expectedLeaves2.add(new SWHID(TEST_ORIGIN_ID)); expectedLeaves2.add(new SWHID("swh:1:rel:0000000000000000000000000000000000000019")); GraphTest.assertEqualsAnyOrder(expectedLeaves2, actualLeaves2); } @Test public void forwardRevToRevOnly() { TraversalRequest request = getLeavesRequestBuilder(fakeSWHID("rev", 18)).setEdges("rev:rev").build(); ArrayList actualLeaves = getSWHIDs(client.traverse(request)); ArrayList expectedLeaves = new ArrayList<>(); expectedLeaves.add(new SWHID("swh:1:rev:0000000000000000000000000000000000000003")); GraphTest.assertEqualsAnyOrder(expectedLeaves, actualLeaves); } @Test public void forwardDirToAll() { TraversalRequest request = getLeavesRequestBuilder(fakeSWHID("dir", 8)).setEdges("dir:*").build(); ArrayList actualLeaves = getSWHIDs(client.traverse(request)); ArrayList expectedLeaves = new ArrayList<>(); expectedLeaves.add(new SWHID("swh:1:cnt:0000000000000000000000000000000000000004")); expectedLeaves.add(new SWHID("swh:1:cnt:0000000000000000000000000000000000000005")); expectedLeaves.add(new SWHID("swh:1:cnt:0000000000000000000000000000000000000001")); expectedLeaves.add(new SWHID("swh:1:cnt:0000000000000000000000000000000000000007")); GraphTest.assertEqualsAnyOrder(expectedLeaves, actualLeaves); } @Test public void backwardCntToDirDirToDir() { TraversalRequest request = getLeavesRequestBuilder(fakeSWHID("cnt", 5)).setEdges("cnt:dir,dir:dir") .setDirection(GraphDirection.BACKWARD).build(); ArrayList actualLeaves = getSWHIDs(client.traverse(request)); ArrayList expectedLeaves = new ArrayList<>(); expectedLeaves.add(new SWHID("swh:1:dir:0000000000000000000000000000000000000012")); GraphTest.assertEqualsAnyOrder(expectedLeaves, actualLeaves); } + + @ParameterizedTest + @ValueSource(ints = {0, 1, 2, Integer.MAX_VALUE}) + public void backwardCntToDirDirToDirWithLimit(int limit) { + TraversalRequest request = getLeavesRequestBuilder(fakeSWHID("cnt", 5)).setEdges("cnt:dir,dir:dir") + .setDirection(GraphDirection.BACKWARD).setMaxMatchingNodes(limit).build(); + ArrayList actualLeaves = getSWHIDs(client.traverse(request)); + ArrayList expectedLeaves = new ArrayList<>(); + expectedLeaves.add(new SWHID("swh:1:dir:0000000000000000000000000000000000000012")); + GraphTest.assertEqualsAnyOrder(expectedLeaves, actualLeaves); + } } diff --git a/proto/swhgraph.proto b/proto/swhgraph.proto index 7c40a6e..eb30969 100644 --- a/proto/swhgraph.proto +++ b/proto/swhgraph.proto @@ -1,316 +1,319 @@ syntax = "proto3"; import "google/protobuf/field_mask.proto"; option java_multiple_files = true; option java_package = "org.softwareheritage.graph.rpc"; option java_outer_classname = "GraphService"; package swh.graph; /* Graph traversal service */ service TraversalService { /* GetNode returns a single Node and its properties. */ rpc GetNode (GetNodeRequest) returns (Node); /* Traverse performs a breadth-first graph traversal from a set of source * nodes, then streams the nodes it encounters (if they match a given * return filter), along with their properties. */ rpc Traverse (TraversalRequest) returns (stream Node); /* FindPathTo searches for a shortest path between a set of source nodes * and a node that matches a specific *criteria*. * * It does so by performing a breadth-first search from the source node, * until any node that matches the given criteria is found, then follows * back its parents to return a shortest path from the source set to that * node. */ rpc FindPathTo (FindPathToRequest) returns (Path); /* FindPathBetween searches for a shortest path between a set of source * nodes and a set of destination nodes. * * It does so by performing a *bidirectional breadth-first search*, i.e., * two parallel breadth-first searches, one from the source set ("src-BFS") * and one from the destination set ("dst-BFS"), until both searches find a * common node that joins their visited sets. This node is called the * "midpoint node". * The path returned is the path src -> ... -> midpoint -> ... -> dst, * which is always a shortest path between src and dst. * * The graph direction of both BFS can be configured separately. By * default, the dst-BFS will use the graph in the opposite direction than * the src-BFS (if direction = FORWARD, by default direction_reverse = * BACKWARD, and vice-versa). The default behavior is thus to search for * a shortest path between two nodes in a given direction. However, one * can also specify FORWARD or BACKWARD for *both* the src-BFS and the * dst-BFS. This will search for a common descendant or a common ancestor * between the two sets, respectively. These will be the midpoints of the * returned path. */ rpc FindPathBetween (FindPathBetweenRequest) returns (Path); /* CountNodes does the same as Traverse, but only returns the number of * nodes accessed during the traversal. */ rpc CountNodes (TraversalRequest) returns (CountResponse); /* CountEdges does the same as Traverse, but only returns the number of * edges accessed during the traversal. */ rpc CountEdges (TraversalRequest) returns (CountResponse); /* Stats returns various statistics on the overall graph. */ rpc Stats (StatsRequest) returns (StatsResponse); } /* Direction of the graph */ enum GraphDirection { /* Forward DAG: ori -> snp -> rel -> rev -> dir -> cnt */ FORWARD = 0; /* Transposed DAG: cnt -> dir -> rev -> rel -> snp -> ori */ BACKWARD = 1; } /* Describe a node to return */ message GetNodeRequest { /* SWHID of the node to return */ string swhid = 1; /* FieldMask of which fields are to be returned (e.g., "swhid,cnt.length"). * By default, all fields are returned. */ optional google.protobuf.FieldMask mask = 8; } /* TraversalRequest describes how a breadth-first traversal should be * performed, and what should be returned to the client. */ message TraversalRequest { /* Set of source nodes (SWHIDs) */ repeated string src = 1; /* Direction of the graph to traverse. Defaults to FORWARD. */ GraphDirection direction = 2; /* Edge restriction string (e.g. "rev:dir,dir:cnt"). * Defaults to "*" (all). */ optional string edges = 3; /* Maximum number of edges accessed in the traversal, after which it stops. * Defaults to infinite. */ optional int64 max_edges = 4; /* Do not return nodes with a depth lower than this number. * By default, all depths are returned. */ optional int64 min_depth = 5; /* Maximum depth of the traversal, after which it stops. * Defaults to infinite. */ optional int64 max_depth = 6; /* Filter which nodes will be sent to the stream. By default, all nodes are * returned. */ optional NodeFilter return_nodes = 7; /* FieldMask of which fields are to be returned (e.g., "swhid,cnt.length"). * By default, all fields are returned. */ optional google.protobuf.FieldMask mask = 8; + /* Maximum number of matching results before stopping. For Traverse(), this is + * the total number of results. Defaults to infinite. */ + optional int64 max_matching_nodes = 9; } /* FindPathToRequest describes a request to find a shortest path between a * set of nodes and a given target criteria, as well as what should be returned * in the path. */ message FindPathToRequest { /* Set of source nodes (SWHIDs) */ repeated string src = 1; /* Target criteria, i.e., what constitutes a valid path destination. */ NodeFilter target = 2; /* Direction of the graph to traverse. Defaults to FORWARD. */ GraphDirection direction = 3; /* Edge restriction string (e.g. "rev:dir,dir:cnt"). * Defaults to "*" (all). */ optional string edges = 4; /* Maximum number of edges accessed in the traversal, after which it stops. * Defaults to infinite. */ optional int64 max_edges = 5; /* Maximum depth of the traversal, after which it stops. * Defaults to infinite. */ optional int64 max_depth = 6; /* FieldMask of which fields are to be returned (e.g., "swhid,cnt.length"). * By default, all fields are returned. */ optional google.protobuf.FieldMask mask = 7; } /* FindPathToRequest describes a request to find a shortest path between a * set of source nodes and a set of destination nodes. It works by performing a * bidirectional breadth-first traversal from both sets at the same time. */ message FindPathBetweenRequest { /* Set of source nodes (SWHIDs) */ repeated string src = 1; /* Set of destination nodes (SWHIDs) */ repeated string dst = 2; /* Direction of the graph to traverse from the source set. Defaults to * FORWARD. */ GraphDirection direction = 3; /* Direction of the graph to traverse from the destination set. Defaults to * the opposite of `direction`. If direction and direction_reverse are * identical, it will find the first common successor of both sets in the * given direction. */ optional GraphDirection direction_reverse = 4; /* Edge restriction string for the traversal from the source set. * (e.g. "rev:dir,dir:cnt"). Defaults to "*" (all). */ optional string edges = 5; /* Edge restriction string for the reverse traversal from the destination * set. * If not specified: * - If `edges` is not specified either, defaults to "*" * - If direction == direction_reverse, defaults to `edges` * - If direction != direction_reverse, defaults to the reverse of `edges` * (e.g. "rev:dir" becomes "dir:rev"). */ optional string edges_reverse = 6; /* Maximum number of edges accessed in the traversal, after which it stops. * Defaults to infinite. */ optional int64 max_edges = 7; /* Maximum depth of the traversal, after which it stops. * Defaults to infinite. */ optional int64 max_depth = 8; /* FieldMask of which fields are to be returned (e.g., "swhid,cnt.length"). * By default, all fields are returned. */ optional google.protobuf.FieldMask mask = 9; } /* Represents various criteria that make a given node "valid". A node is * only valid if all the subcriteria present in this message are fulfilled. */ message NodeFilter { /* Node restriction string. (e.g. "dir,cnt,rev"). Defaults to "*" (all). */ optional string types = 1; /* Minimum number of successors encountered *during the traversal*. * Default: no constraint */ optional int64 min_traversal_successors = 2; /* Maximum number of successors encountered *during the traversal*. * Default: no constraint */ optional int64 max_traversal_successors = 3; } /* Represents a node in the graph. */ message Node { /* The SWHID of the graph node. */ string swhid = 1; /* List of relevant successors of this node. */ repeated Successor successor = 2; /* Number of relevant successors. */ optional int64 num_successors = 9; /* Node properties */ oneof data { ContentData cnt = 3; RevisionData rev = 5; ReleaseData rel = 6; OriginData ori = 8; }; } /* Represents a path in the graph. */ message Path { /* List of nodes in the path, from source to destination */ repeated Node node = 1; /* Index of the "midpoint" of the path. For paths obtained with * bidirectional search queries, this is the node that joined the two * sets together. When looking for a common ancestor between two nodes by * performing a FindPathBetween search with two backward graphs, this will * be the index of the common ancestor in the path. */ optional int32 midpoint_index = 2; } /* Represents a successor of a given node. */ message Successor { /* The SWHID of the successor */ optional string swhid = 1; /* A list of edge labels for the given edge */ repeated EdgeLabel label = 2; } /* Content node properties */ message ContentData { /* Length of the blob, in bytes */ optional int64 length = 1; /* Whether the content was skipped during ingestion. */ optional bool is_skipped = 2; } /* Revision node properties */ message RevisionData { /* Revision author ID (anonymized) */ optional int64 author = 1; /* UNIX timestamp of the revision date (UTC) */ optional int64 author_date = 2; /* Timezone of the revision author date as an offset from UTC */ optional int32 author_date_offset = 3; /* Revision committer ID (anonymized) */ optional int64 committer = 4; /* UNIX timestamp of the revision committer date (UTC) */ optional int64 committer_date = 5; /* Timezone of the revision committer date as an offset from UTC */ optional int32 committer_date_offset = 6; /* Revision message */ optional bytes message = 7; } /* Release node properties */ message ReleaseData { /* Release author ID (anonymized) */ optional int64 author = 1; /* UNIX timestamp of the release date (UTC) */ optional int64 author_date = 2; /* Timezone of the release author date as an offset from UTC */ optional int32 author_date_offset = 3; /* Release name */ optional bytes name = 4; /* Release message */ optional bytes message = 5; } /* Origin node properties */ message OriginData { /* URL of the origin */ optional string url = 1; } message EdgeLabel { /* Directory entry name for directories, branch name for snapshots */ bytes name = 1; /* Entry permission (only set for directories). */ int32 permission = 2; } message CountResponse { int64 count = 1; } message StatsRequest { } message StatsResponse { /* Number of nodes in the graph */ int64 num_nodes = 1; /* Number of edges in the graph */ int64 num_edges = 2; /* Ratio between the graph size and the information-theoretical lower * bound */ double compression_ratio = 3; /* Number of bits per node (overall graph size in bits divided by the * number of nodes) */ double bits_per_node = 4; /* Number of bits per edge (overall graph size in bits divided by the * number of arcs). */ double bits_per_edge = 5; double avg_locality = 6; /* Smallest indegree */ int64 indegree_min = 7; /* Largest indegree */ int64 indegree_max = 8; /* Average indegree */ double indegree_avg = 9; /* Smallest outdegree */ int64 outdegree_min = 10; /* Largest outdegree */ int64 outdegree_max = 11; /* Average outdegree */ double outdegree_avg = 12; } diff --git a/requirements.txt b/requirements.txt index 3983067..2dada97 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,7 @@ aiohttp click py4j psutil +protobuf != 4.21.* # https://github.com/protocolbuffers/protobuf/issues/10151 grpcio-tools mypy-protobuf diff --git a/swh.graph.egg-info/PKG-INFO b/swh.graph.egg-info/PKG-INFO index 0eb9464..3551549 100644 --- a/swh.graph.egg-info/PKG-INFO +++ b/swh.graph.egg-info/PKG-INFO @@ -1,52 +1,52 @@ Metadata-Version: 2.1 Name: swh.graph -Version: 2.0.0 +Version: 2.1.0 Summary: Software Heritage graph service Home-page: https://forge.softwareheritage.org/diffusion/DGRPH Author: Software Heritage developers Author-email: swh-devel@inria.fr Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Project-URL: Funding, https://www.softwareheritage.org/donate Project-URL: Source, https://forge.softwareheritage.org/source/swh-graph Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-graph/ Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 3 - Alpha Requires-Python: >=3.7 Description-Content-Type: text/x-rst Provides-Extra: testing License-File: LICENSE License-File: AUTHORS Software Heritage - graph service ================================= Tooling and services, collectively known as ``swh-graph``, providing fast access to the graph representation of the `Software Heritage `_ `archive `_. The service is in-memory, based on a compressed representation of the Software Heritage Merkle DAG. Bibliography ------------ In addition to accompanying technical documentation, ``swh-graph`` is also described in the following scientific paper. If you publish results based on ``swh-graph``, please acknowledge it by citing the paper as follows: .. note:: Paolo Boldi, Antoine Pietri, Sebastiano Vigna, Stefano Zacchiroli. `Ultra-Large-Scale Repository Analysis via Graph Compression `_. In proceedings of `SANER 2020 `_: The 27th IEEE International Conference on Software Analysis, Evolution and Reengineering, pages 184-194. IEEE 2020. Links: `preprint `_, `bibtex `_. diff --git a/swh.graph.egg-info/SOURCES.txt b/swh.graph.egg-info/SOURCES.txt index 48d4758..f96c89d 100644 --- a/swh.graph.egg-info/SOURCES.txt +++ b/swh.graph.egg-info/SOURCES.txt @@ -1,259 +1,259 @@ .git-blame-ignore-revs .gitignore .pre-commit-config.yaml AUTHORS CODE_OF_CONDUCT.md CONTRIBUTORS LICENSE MANIFEST.in Makefile Makefile.local README.rst conftest.py mypy.ini pyproject.toml pytest.ini requirements-swh.txt requirements-test.txt requirements.txt setup.cfg setup.py tox.ini docker/Dockerfile docker/build.sh docker/run.sh docs/.gitignore docs/Makefile docs/Makefile.local docs/README.rst docs/api.rst docs/cli.rst docs/compression.rst docs/conf.py docs/docker.rst docs/git2graph.md docs/grpc-api.rst docs/index.rst docs/java-api.rst docs/memory.rst docs/quickstart.rst docs/_static/.placeholder docs/_templates/.placeholder docs/images/.gitignore docs/images/Makefile docs/images/compression_steps.dot java/.coding-style.xml java/.gitignore java/AUTHORS java/LICENSE java/README.md java/pom.xml java/.mvn/jvm.config java/src/main/proto java/src/main/java/org/softwareheritage/graph/AllowedEdges.java java/src/main/java/org/softwareheritage/graph/AllowedNodes.java java/src/main/java/org/softwareheritage/graph/SWHID.java java/src/main/java/org/softwareheritage/graph/Subgraph.java java/src/main/java/org/softwareheritage/graph/SwhBidirectionalGraph.java java/src/main/java/org/softwareheritage/graph/SwhGraph.java java/src/main/java/org/softwareheritage/graph/SwhGraphProperties.java java/src/main/java/org/softwareheritage/graph/SwhType.java java/src/main/java/org/softwareheritage/graph/SwhUnidirectionalGraph.java java/src/main/java/org/softwareheritage/graph/compress/CSVEdgeDataset.java java/src/main/java/org/softwareheritage/graph/compress/ComposePermutations.java java/src/main/java/org/softwareheritage/graph/compress/ExtractNodes.java java/src/main/java/org/softwareheritage/graph/compress/ExtractPersons.java java/src/main/java/org/softwareheritage/graph/compress/GraphDataset.java java/src/main/java/org/softwareheritage/graph/compress/LabelMapBuilder.java java/src/main/java/org/softwareheritage/graph/compress/NodeMapBuilder.java java/src/main/java/org/softwareheritage/graph/compress/ORCGraphDataset.java java/src/main/java/org/softwareheritage/graph/compress/ScatteredArcsORCGraph.java java/src/main/java/org/softwareheritage/graph/compress/WriteNodeProperties.java java/src/main/java/org/softwareheritage/graph/experiments/forks/ForkCC.java java/src/main/java/org/softwareheritage/graph/experiments/forks/ForkCliques.java java/src/main/java/org/softwareheritage/graph/experiments/forks/ListEmptyOrigins.java java/src/main/java/org/softwareheritage/graph/experiments/topology/AveragePaths.java java/src/main/java/org/softwareheritage/graph/experiments/topology/ClusteringCoefficient.java java/src/main/java/org/softwareheritage/graph/experiments/topology/ConnectedComponents.java java/src/main/java/org/softwareheritage/graph/experiments/topology/InOutDegree.java java/src/main/java/org/softwareheritage/graph/experiments/topology/SubdatasetSizeFunction.java java/src/main/java/org/softwareheritage/graph/labels/DirEntry.java java/src/main/java/org/softwareheritage/graph/labels/SwhLabel.java java/src/main/java/org/softwareheritage/graph/maps/NodeIdMap.java java/src/main/java/org/softwareheritage/graph/maps/NodeTypesMap.java java/src/main/java/org/softwareheritage/graph/rpc/GraphServer.java java/src/main/java/org/softwareheritage/graph/rpc/NodePropertyBuilder.java java/src/main/java/org/softwareheritage/graph/rpc/Traversal.java java/src/main/java/org/softwareheritage/graph/utils/DumpProperties.java java/src/main/java/org/softwareheritage/graph/utils/ExportSubdataset.java java/src/main/java/org/softwareheritage/graph/utils/FindEarliestRevision.java java/src/main/java/org/softwareheritage/graph/utils/ForkJoinBigQuickSort2.java java/src/main/java/org/softwareheritage/graph/utils/ForkJoinQuickSort3.java java/src/main/java/org/softwareheritage/graph/utils/MPHTranslate.java java/src/main/java/org/softwareheritage/graph/utils/ReadGraph.java java/src/main/java/org/softwareheritage/graph/utils/ReadLabelledGraph.java java/src/main/java/org/softwareheritage/graph/utils/Sort.java java/src/test/java/org/softwareheritage/graph/AllowedEdgesTest.java java/src/test/java/org/softwareheritage/graph/AllowedNodesTest.java java/src/test/java/org/softwareheritage/graph/GraphTest.java java/src/test/java/org/softwareheritage/graph/SubgraphTest.java java/src/test/java/org/softwareheritage/graph/compress/ExtractNodesTest.java java/src/test/java/org/softwareheritage/graph/compress/ExtractPersonsTest.java java/src/test/java/org/softwareheritage/graph/rpc/CountEdgesTest.java java/src/test/java/org/softwareheritage/graph/rpc/CountNodesTest.java java/src/test/java/org/softwareheritage/graph/rpc/FindPathBetweenTest.java java/src/test/java/org/softwareheritage/graph/rpc/FindPathToTest.java java/src/test/java/org/softwareheritage/graph/rpc/GetNodeTest.java java/src/test/java/org/softwareheritage/graph/rpc/StatsTest.java java/src/test/java/org/softwareheritage/graph/rpc/TraversalServiceTest.java java/src/test/java/org/softwareheritage/graph/rpc/TraverseLeavesTest.java java/src/test/java/org/softwareheritage/graph/rpc/TraverseNeighborsTest.java java/src/test/java/org/softwareheritage/graph/rpc/TraverseNodesPropertiesTest.java java/src/test/java/org/softwareheritage/graph/rpc/TraverseNodesTest.java java/src/test/java/org/softwareheritage/graph/utils/ForkJoinBigQuickSort2Test.java java/src/test/java/org/softwareheritage/graph/utils/ForkJoinQuickSort3Test.java -java/target/swh-graph-2.0.0.jar +java/target/swh-graph-2.1.0.jar proto/swhgraph.proto reports/.gitignore reports/benchmarks/Makefile reports/benchmarks/benchmarks.tex reports/experiments/Makefile reports/experiments/experiments.tex reports/linux_log/LinuxLog.java reports/linux_log/Makefile reports/linux_log/linux_log.tex reports/node_mapping/Makefile reports/node_mapping/NodeIdMapHaloDB.java reports/node_mapping/NodeIdMapRocksDB.java reports/node_mapping/node_mapping.tex swh/__init__.py swh.graph.egg-info/PKG-INFO swh.graph.egg-info/SOURCES.txt swh.graph.egg-info/dependency_links.txt swh.graph.egg-info/entry_points.txt swh.graph.egg-info/requires.txt swh.graph.egg-info/top_level.txt swh/graph/__init__.py swh/graph/cli.py swh/graph/client.py swh/graph/config.py swh/graph/grpc_server.py swh/graph/http_client.py swh/graph/http_naive_client.py swh/graph/http_rpc_server.py swh/graph/naive_client.py swh/graph/py.typed swh/graph/pytest_plugin.py swh/graph/webgraph.py swh/graph/grpc/swhgraph.proto swh/graph/grpc/swhgraph_pb2.py swh/graph/grpc/swhgraph_pb2.pyi swh/graph/grpc/swhgraph_pb2_grpc.py swh/graph/tests/__init__.py swh/graph/tests/test_cli.py swh/graph/tests/test_grpc.py swh/graph/tests/test_http_client.py swh/graph/tests/test_http_server_down.py swh/graph/tests/dataset/generate_dataset.py swh/graph/tests/dataset/compressed/example-labelled.labeloffsets swh/graph/tests/dataset/compressed/example-labelled.labels swh/graph/tests/dataset/compressed/example-labelled.properties swh/graph/tests/dataset/compressed/example-transposed-labelled.labeloffsets swh/graph/tests/dataset/compressed/example-transposed-labelled.labels swh/graph/tests/dataset/compressed/example-transposed-labelled.properties swh/graph/tests/dataset/compressed/example-transposed.graph swh/graph/tests/dataset/compressed/example-transposed.obl swh/graph/tests/dataset/compressed/example-transposed.offsets swh/graph/tests/dataset/compressed/example-transposed.properties swh/graph/tests/dataset/compressed/example.edges.count.txt swh/graph/tests/dataset/compressed/example.edges.stats.txt swh/graph/tests/dataset/compressed/example.graph swh/graph/tests/dataset/compressed/example.indegree swh/graph/tests/dataset/compressed/example.labels.count.txt swh/graph/tests/dataset/compressed/example.labels.csv.zst swh/graph/tests/dataset/compressed/example.labels.fcl.bytearray swh/graph/tests/dataset/compressed/example.labels.fcl.pointers swh/graph/tests/dataset/compressed/example.labels.fcl.properties swh/graph/tests/dataset/compressed/example.labels.mph swh/graph/tests/dataset/compressed/example.mph swh/graph/tests/dataset/compressed/example.node2swhid.bin swh/graph/tests/dataset/compressed/example.node2type.map swh/graph/tests/dataset/compressed/example.nodes.count.txt swh/graph/tests/dataset/compressed/example.nodes.csv.zst swh/graph/tests/dataset/compressed/example.nodes.stats.txt swh/graph/tests/dataset/compressed/example.obl swh/graph/tests/dataset/compressed/example.offsets swh/graph/tests/dataset/compressed/example.order swh/graph/tests/dataset/compressed/example.outdegree swh/graph/tests/dataset/compressed/example.persons.count.txt swh/graph/tests/dataset/compressed/example.persons.csv.zst swh/graph/tests/dataset/compressed/example.persons.mph swh/graph/tests/dataset/compressed/example.properties swh/graph/tests/dataset/compressed/example.property.author_id.bin swh/graph/tests/dataset/compressed/example.property.author_timestamp.bin swh/graph/tests/dataset/compressed/example.property.author_timestamp_offset.bin swh/graph/tests/dataset/compressed/example.property.committer_id.bin swh/graph/tests/dataset/compressed/example.property.committer_timestamp.bin swh/graph/tests/dataset/compressed/example.property.committer_timestamp_offset.bin swh/graph/tests/dataset/compressed/example.property.content.is_skipped.bin swh/graph/tests/dataset/compressed/example.property.content.length.bin swh/graph/tests/dataset/compressed/example.property.message.bin swh/graph/tests/dataset/compressed/example.property.message.offset.bin swh/graph/tests/dataset/compressed/example.property.tag_name.bin swh/graph/tests/dataset/compressed/example.property.tag_name.offset.bin swh/graph/tests/dataset/compressed/example.stats swh/graph/tests/dataset/edges/content/graph-all.edges.csv.zst swh/graph/tests/dataset/edges/content/graph-all.nodes.csv.zst swh/graph/tests/dataset/edges/directory/graph-all.edges.csv.zst swh/graph/tests/dataset/edges/directory/graph-all.nodes.csv.zst swh/graph/tests/dataset/edges/origin/graph-all.edges.csv.zst swh/graph/tests/dataset/edges/origin/graph-all.nodes.csv.zst swh/graph/tests/dataset/edges/release/graph-all.edges.csv.zst swh/graph/tests/dataset/edges/release/graph-all.nodes.csv.zst swh/graph/tests/dataset/edges/revision/graph-all.edges.csv.zst swh/graph/tests/dataset/edges/revision/graph-all.nodes.csv.zst swh/graph/tests/dataset/edges/snapshot/graph-all.edges.csv.zst swh/graph/tests/dataset/edges/snapshot/graph-all.nodes.csv.zst swh/graph/tests/dataset/img/.gitignore swh/graph/tests/dataset/img/Makefile swh/graph/tests/dataset/img/example.dot swh/graph/tests/dataset/orc/content/content-all.orc swh/graph/tests/dataset/orc/directory/directory-all.orc swh/graph/tests/dataset/orc/directory_entry/directory_entry-all.orc swh/graph/tests/dataset/orc/origin/origin-all.orc swh/graph/tests/dataset/orc/origin_visit/origin_visit-all.orc swh/graph/tests/dataset/orc/origin_visit_status/origin_visit_status-all.orc swh/graph/tests/dataset/orc/release/release-all.orc swh/graph/tests/dataset/orc/revision/revision-all.orc swh/graph/tests/dataset/orc/revision_extra_headers/revision_extra_headers-all.orc swh/graph/tests/dataset/orc/revision_history/revision_history-all.orc swh/graph/tests/dataset/orc/skipped_content/skipped_content-all.orc swh/graph/tests/dataset/orc/snapshot/snapshot-all.orc swh/graph/tests/dataset/orc/snapshot_branch/snapshot_branch-all.orc tools/dir2graph tools/swhid2int2int2swhid.sh tools/git2graph/.gitignore tools/git2graph/Makefile tools/git2graph/README.md tools/git2graph/git2graph.c tools/git2graph/tests/edge-filters.bats tools/git2graph/tests/full-graph.bats tools/git2graph/tests/node-filters.bats tools/git2graph/tests/repo_helper.bash tools/git2graph/tests/data/sample-repo.tgz tools/git2graph/tests/data/graphs/dir-nodes/edges.csv tools/git2graph/tests/data/graphs/dir-nodes/nodes.csv tools/git2graph/tests/data/graphs/from-dir-edges/edges.csv tools/git2graph/tests/data/graphs/from-dir-edges/nodes.csv tools/git2graph/tests/data/graphs/from-rel-edges/edges.csv tools/git2graph/tests/data/graphs/from-rel-edges/nodes.csv tools/git2graph/tests/data/graphs/fs-nodes/edges.csv tools/git2graph/tests/data/graphs/fs-nodes/nodes.csv tools/git2graph/tests/data/graphs/full/edges.csv tools/git2graph/tests/data/graphs/full/nodes.csv tools/git2graph/tests/data/graphs/rev-edges/edges.csv tools/git2graph/tests/data/graphs/rev-edges/nodes.csv tools/git2graph/tests/data/graphs/rev-nodes/edges.csv tools/git2graph/tests/data/graphs/rev-nodes/nodes.csv tools/git2graph/tests/data/graphs/to-rev-edges/edges.csv tools/git2graph/tests/data/graphs/to-rev-edges/nodes.csv \ No newline at end of file diff --git a/swh.graph.egg-info/requires.txt b/swh.graph.egg-info/requires.txt index ad5da5d..4486523 100644 --- a/swh.graph.egg-info/requires.txt +++ b/swh.graph.egg-info/requires.txt @@ -1,18 +1,19 @@ aiohttp click py4j psutil +protobuf!=4.21.* grpcio-tools mypy-protobuf swh.core[http]>=0.3 swh.model>=0.13.0 swh.dataset [testing] pytest pytest-asyncio types-click types-pyyaml types-requests types-protobuf grpc-stubs diff --git a/swh/graph/cli.py b/swh/graph/cli.py index 67d8241..4882230 100644 --- a/swh/graph/cli.py +++ b/swh/graph/cli.py @@ -1,247 +1,247 @@ # Copyright (C) 2019-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from pathlib import Path from typing import TYPE_CHECKING, Any, Dict, Set, Tuple # WARNING: do not import unnecessary things here to keep cli startup time under # control import click from swh.core.cli import CONTEXT_SETTINGS, AliasedGroup from swh.core.cli import swh as swh_cli_group if TYPE_CHECKING: from swh.graph.webgraph import CompressionStep # noqa class StepOption(click.ParamType): """click type for specifying a compression step on the CLI parse either individual steps, specified as step names or integers, or step ranges """ name = "compression step" def convert(self, value, param, ctx): # type: (...) -> Set[CompressionStep] from swh.graph.webgraph import COMP_SEQ, CompressionStep # noqa steps: Set[CompressionStep] = set() specs = value.split(",") for spec in specs: if "-" in spec: # step range (raw_l, raw_r) = spec.split("-", maxsplit=1) if raw_l == "": # no left endpoint raw_l = COMP_SEQ[0].name if raw_r == "": # no right endpoint raw_r = COMP_SEQ[-1].name l_step = self.convert(raw_l, param, ctx) r_step = self.convert(raw_r, param, ctx) if len(l_step) != 1 or len(r_step) != 1: self.fail(f"invalid step specification: {value}, " f"see --help") l_idx = l_step.pop() r_idx = r_step.pop() steps = steps.union( set(CompressionStep(i) for i in range(l_idx.value, r_idx.value + 1)) ) else: # singleton step try: steps.add(CompressionStep(int(spec))) # integer step except ValueError: try: steps.add(CompressionStep[spec.upper()]) # step name except KeyError: self.fail( f"invalid step specification: {value}, " f"see --help" ) return steps class PathlibPath(click.Path): """A Click path argument that returns a pathlib Path, not a string""" def convert(self, value, param, ctx): return Path(super().convert(value, param, ctx)) DEFAULT_CONFIG: Dict[str, Tuple[str, Any]] = {"graph": ("dict", {})} @swh_cli_group.group(name="graph", context_settings=CONTEXT_SETTINGS, cls=AliasedGroup) @click.option( "--config-file", "-C", default=None, type=click.Path( exists=True, dir_okay=False, ), help="YAML configuration file", ) @click.pass_context def graph_cli_group(ctx, config_file): """Software Heritage graph tools.""" from swh.core import config ctx.ensure_object(dict) conf = config.read(config_file, DEFAULT_CONFIG) if "graph" not in conf: raise ValueError( 'no "graph" stanza found in configuration file %s' % config_file ) ctx.obj["config"] = conf @graph_cli_group.command(name="rpc-serve") @click.option( "--host", "-h", default="0.0.0.0", metavar="IP", show_default=True, help="host IP address to bind the server on", ) @click.option( "--port", "-p", default=5009, type=click.INT, metavar="PORT", show_default=True, help="port to bind the server on", ) @click.option( "--graph", "-g", required=True, metavar="GRAPH", help="compressed graph basename" ) @click.pass_context def serve(ctx, host, port, graph): """run the graph RPC service""" import aiohttp.web from swh.graph.http_rpc_server import make_app config = ctx.obj["config"] config.setdefault("graph", {}) config["graph"]["path"] = graph app = make_app(config=config) aiohttp.web.run_app(app, host=host, port=port) @graph_cli_group.command(name="grpc-serve") @click.option( "--port", "-p", default=50091, type=click.INT, metavar="PORT", show_default=True, help=( "port to bind the server on (note: host is not configurable " "for now and will be 0.0.0.0)" ), ) @click.option( "--java-home", "-j", default=None, metavar="JAVA_HOME", help="absolute path to the Java Runtime Environment (JRE)", ) @click.option( "--graph", "-g", required=True, metavar="GRAPH", help="compressed graph basename" ) @click.pass_context -def grpc_serve(ctx, host, port, java_home, graph): +def grpc_serve(ctx, port, java_home, graph): """start the graph GRPC service This command uses execve to execute the java GRPC service. """ import os from pathlib import Path from swh.graph.grpc_server import build_grpc_server_cmdline config = ctx.obj["config"] config.setdefault("graph", {}) config["graph"]["path"] = graph cmd, port = build_grpc_server_cmdline(**config["graph"]) java_bin = cmd[0] if java_home is not None: java_bin = str(Path(java_home) / "bin" / java_bin) print(f"Starting the GRPC server on 0.0.0.0:{port}") os.execvp(java_bin, cmd) @graph_cli_group.command() @click.option( "--input-dataset", "-i", required=True, type=PathlibPath(), help="graph dataset directory, in ORC format", ) @click.option( "--output-directory", "-o", required=True, type=PathlibPath(), help="directory where to store compressed graph", ) @click.option( "--graph-name", "-g", default="graph", metavar="NAME", help="name of the output graph (default: 'graph')", ) @click.option( "--steps", "-s", metavar="STEPS", type=StepOption(), help="run only these compression steps (default: all steps)", ) @click.pass_context def compress(ctx, input_dataset, output_directory, graph_name, steps): """Compress a graph using WebGraph Input: a directory containing a graph dataset in ORC format Output: a directory containing a WebGraph compressed graph Compression steps are: (1) extract_nodes, (2) mph, (3) bv, (4) bfs, (5) permute_bfs, (6) transpose_bfs, (7) simplify, (8) llp, (9) permute_llp, (10) obl, (11) compose_orders, (12) stats, (13) transpose, (14) transpose_obl, (15) maps, (16) extract_persons, (17) mph_persons, (18) node_properties, (19) mph_labels, (20) fcl_labels, (21) edge_labels, (22) edge_labels_obl, (23) edge_labels_transpose_obl, (24) clean_tmp. Compression steps can be selected by name or number using --steps, separating them with commas; step ranges (e.g., 3-9, 6-, etc.) are also supported. """ from swh.graph import webgraph try: conf = ctx.obj["config"]["graph"]["compress"] except KeyError: conf = {} # use defaults webgraph.compress(graph_name, input_dataset, output_directory, steps, conf) def main(): return graph_cli_group(auto_envvar_prefix="SWH_GRAPH") if __name__ == "__main__": main() diff --git a/swh/graph/grpc/swhgraph.proto b/swh/graph/grpc/swhgraph.proto index 7c40a6e..eb30969 100644 --- a/swh/graph/grpc/swhgraph.proto +++ b/swh/graph/grpc/swhgraph.proto @@ -1,316 +1,319 @@ syntax = "proto3"; import "google/protobuf/field_mask.proto"; option java_multiple_files = true; option java_package = "org.softwareheritage.graph.rpc"; option java_outer_classname = "GraphService"; package swh.graph; /* Graph traversal service */ service TraversalService { /* GetNode returns a single Node and its properties. */ rpc GetNode (GetNodeRequest) returns (Node); /* Traverse performs a breadth-first graph traversal from a set of source * nodes, then streams the nodes it encounters (if they match a given * return filter), along with their properties. */ rpc Traverse (TraversalRequest) returns (stream Node); /* FindPathTo searches for a shortest path between a set of source nodes * and a node that matches a specific *criteria*. * * It does so by performing a breadth-first search from the source node, * until any node that matches the given criteria is found, then follows * back its parents to return a shortest path from the source set to that * node. */ rpc FindPathTo (FindPathToRequest) returns (Path); /* FindPathBetween searches for a shortest path between a set of source * nodes and a set of destination nodes. * * It does so by performing a *bidirectional breadth-first search*, i.e., * two parallel breadth-first searches, one from the source set ("src-BFS") * and one from the destination set ("dst-BFS"), until both searches find a * common node that joins their visited sets. This node is called the * "midpoint node". * The path returned is the path src -> ... -> midpoint -> ... -> dst, * which is always a shortest path between src and dst. * * The graph direction of both BFS can be configured separately. By * default, the dst-BFS will use the graph in the opposite direction than * the src-BFS (if direction = FORWARD, by default direction_reverse = * BACKWARD, and vice-versa). The default behavior is thus to search for * a shortest path between two nodes in a given direction. However, one * can also specify FORWARD or BACKWARD for *both* the src-BFS and the * dst-BFS. This will search for a common descendant or a common ancestor * between the two sets, respectively. These will be the midpoints of the * returned path. */ rpc FindPathBetween (FindPathBetweenRequest) returns (Path); /* CountNodes does the same as Traverse, but only returns the number of * nodes accessed during the traversal. */ rpc CountNodes (TraversalRequest) returns (CountResponse); /* CountEdges does the same as Traverse, but only returns the number of * edges accessed during the traversal. */ rpc CountEdges (TraversalRequest) returns (CountResponse); /* Stats returns various statistics on the overall graph. */ rpc Stats (StatsRequest) returns (StatsResponse); } /* Direction of the graph */ enum GraphDirection { /* Forward DAG: ori -> snp -> rel -> rev -> dir -> cnt */ FORWARD = 0; /* Transposed DAG: cnt -> dir -> rev -> rel -> snp -> ori */ BACKWARD = 1; } /* Describe a node to return */ message GetNodeRequest { /* SWHID of the node to return */ string swhid = 1; /* FieldMask of which fields are to be returned (e.g., "swhid,cnt.length"). * By default, all fields are returned. */ optional google.protobuf.FieldMask mask = 8; } /* TraversalRequest describes how a breadth-first traversal should be * performed, and what should be returned to the client. */ message TraversalRequest { /* Set of source nodes (SWHIDs) */ repeated string src = 1; /* Direction of the graph to traverse. Defaults to FORWARD. */ GraphDirection direction = 2; /* Edge restriction string (e.g. "rev:dir,dir:cnt"). * Defaults to "*" (all). */ optional string edges = 3; /* Maximum number of edges accessed in the traversal, after which it stops. * Defaults to infinite. */ optional int64 max_edges = 4; /* Do not return nodes with a depth lower than this number. * By default, all depths are returned. */ optional int64 min_depth = 5; /* Maximum depth of the traversal, after which it stops. * Defaults to infinite. */ optional int64 max_depth = 6; /* Filter which nodes will be sent to the stream. By default, all nodes are * returned. */ optional NodeFilter return_nodes = 7; /* FieldMask of which fields are to be returned (e.g., "swhid,cnt.length"). * By default, all fields are returned. */ optional google.protobuf.FieldMask mask = 8; + /* Maximum number of matching results before stopping. For Traverse(), this is + * the total number of results. Defaults to infinite. */ + optional int64 max_matching_nodes = 9; } /* FindPathToRequest describes a request to find a shortest path between a * set of nodes and a given target criteria, as well as what should be returned * in the path. */ message FindPathToRequest { /* Set of source nodes (SWHIDs) */ repeated string src = 1; /* Target criteria, i.e., what constitutes a valid path destination. */ NodeFilter target = 2; /* Direction of the graph to traverse. Defaults to FORWARD. */ GraphDirection direction = 3; /* Edge restriction string (e.g. "rev:dir,dir:cnt"). * Defaults to "*" (all). */ optional string edges = 4; /* Maximum number of edges accessed in the traversal, after which it stops. * Defaults to infinite. */ optional int64 max_edges = 5; /* Maximum depth of the traversal, after which it stops. * Defaults to infinite. */ optional int64 max_depth = 6; /* FieldMask of which fields are to be returned (e.g., "swhid,cnt.length"). * By default, all fields are returned. */ optional google.protobuf.FieldMask mask = 7; } /* FindPathToRequest describes a request to find a shortest path between a * set of source nodes and a set of destination nodes. It works by performing a * bidirectional breadth-first traversal from both sets at the same time. */ message FindPathBetweenRequest { /* Set of source nodes (SWHIDs) */ repeated string src = 1; /* Set of destination nodes (SWHIDs) */ repeated string dst = 2; /* Direction of the graph to traverse from the source set. Defaults to * FORWARD. */ GraphDirection direction = 3; /* Direction of the graph to traverse from the destination set. Defaults to * the opposite of `direction`. If direction and direction_reverse are * identical, it will find the first common successor of both sets in the * given direction. */ optional GraphDirection direction_reverse = 4; /* Edge restriction string for the traversal from the source set. * (e.g. "rev:dir,dir:cnt"). Defaults to "*" (all). */ optional string edges = 5; /* Edge restriction string for the reverse traversal from the destination * set. * If not specified: * - If `edges` is not specified either, defaults to "*" * - If direction == direction_reverse, defaults to `edges` * - If direction != direction_reverse, defaults to the reverse of `edges` * (e.g. "rev:dir" becomes "dir:rev"). */ optional string edges_reverse = 6; /* Maximum number of edges accessed in the traversal, after which it stops. * Defaults to infinite. */ optional int64 max_edges = 7; /* Maximum depth of the traversal, after which it stops. * Defaults to infinite. */ optional int64 max_depth = 8; /* FieldMask of which fields are to be returned (e.g., "swhid,cnt.length"). * By default, all fields are returned. */ optional google.protobuf.FieldMask mask = 9; } /* Represents various criteria that make a given node "valid". A node is * only valid if all the subcriteria present in this message are fulfilled. */ message NodeFilter { /* Node restriction string. (e.g. "dir,cnt,rev"). Defaults to "*" (all). */ optional string types = 1; /* Minimum number of successors encountered *during the traversal*. * Default: no constraint */ optional int64 min_traversal_successors = 2; /* Maximum number of successors encountered *during the traversal*. * Default: no constraint */ optional int64 max_traversal_successors = 3; } /* Represents a node in the graph. */ message Node { /* The SWHID of the graph node. */ string swhid = 1; /* List of relevant successors of this node. */ repeated Successor successor = 2; /* Number of relevant successors. */ optional int64 num_successors = 9; /* Node properties */ oneof data { ContentData cnt = 3; RevisionData rev = 5; ReleaseData rel = 6; OriginData ori = 8; }; } /* Represents a path in the graph. */ message Path { /* List of nodes in the path, from source to destination */ repeated Node node = 1; /* Index of the "midpoint" of the path. For paths obtained with * bidirectional search queries, this is the node that joined the two * sets together. When looking for a common ancestor between two nodes by * performing a FindPathBetween search with two backward graphs, this will * be the index of the common ancestor in the path. */ optional int32 midpoint_index = 2; } /* Represents a successor of a given node. */ message Successor { /* The SWHID of the successor */ optional string swhid = 1; /* A list of edge labels for the given edge */ repeated EdgeLabel label = 2; } /* Content node properties */ message ContentData { /* Length of the blob, in bytes */ optional int64 length = 1; /* Whether the content was skipped during ingestion. */ optional bool is_skipped = 2; } /* Revision node properties */ message RevisionData { /* Revision author ID (anonymized) */ optional int64 author = 1; /* UNIX timestamp of the revision date (UTC) */ optional int64 author_date = 2; /* Timezone of the revision author date as an offset from UTC */ optional int32 author_date_offset = 3; /* Revision committer ID (anonymized) */ optional int64 committer = 4; /* UNIX timestamp of the revision committer date (UTC) */ optional int64 committer_date = 5; /* Timezone of the revision committer date as an offset from UTC */ optional int32 committer_date_offset = 6; /* Revision message */ optional bytes message = 7; } /* Release node properties */ message ReleaseData { /* Release author ID (anonymized) */ optional int64 author = 1; /* UNIX timestamp of the release date (UTC) */ optional int64 author_date = 2; /* Timezone of the release author date as an offset from UTC */ optional int32 author_date_offset = 3; /* Release name */ optional bytes name = 4; /* Release message */ optional bytes message = 5; } /* Origin node properties */ message OriginData { /* URL of the origin */ optional string url = 1; } message EdgeLabel { /* Directory entry name for directories, branch name for snapshots */ bytes name = 1; /* Entry permission (only set for directories). */ int32 permission = 2; } message CountResponse { int64 count = 1; } message StatsRequest { } message StatsResponse { /* Number of nodes in the graph */ int64 num_nodes = 1; /* Number of edges in the graph */ int64 num_edges = 2; /* Ratio between the graph size and the information-theoretical lower * bound */ double compression_ratio = 3; /* Number of bits per node (overall graph size in bits divided by the * number of nodes) */ double bits_per_node = 4; /* Number of bits per edge (overall graph size in bits divided by the * number of arcs). */ double bits_per_edge = 5; double avg_locality = 6; /* Smallest indegree */ int64 indegree_min = 7; /* Largest indegree */ int64 indegree_max = 8; /* Average indegree */ double indegree_avg = 9; /* Smallest outdegree */ int64 outdegree_min = 10; /* Largest outdegree */ int64 outdegree_max = 11; /* Average outdegree */ double outdegree_avg = 12; } diff --git a/swh/graph/grpc/swhgraph_pb2.py b/swh/graph/grpc/swhgraph_pb2.py index 2809a58..55646ea 100644 --- a/swh/graph/grpc/swhgraph_pb2.py +++ b/swh/graph/grpc/swhgraph_pb2.py @@ -1,196 +1,196 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: swh/graph/grpc/swhgraph.proto """Generated protocol buffer code.""" from google.protobuf.internal import enum_type_wrapper from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool from google.protobuf import message as _message from google.protobuf import reflection as _reflection from google.protobuf import symbol_database as _symbol_database # @@protoc_insertion_point(imports) _sym_db = _symbol_database.Default() from google.protobuf import field_mask_pb2 as google_dot_protobuf_dot_field__mask__pb2 -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1dswh/graph/grpc/swhgraph.proto\x12\tswh.graph\x1a google/protobuf/field_mask.proto\"W\n\x0eGetNodeRequest\x12\r\n\x05swhid\x18\x01 \x01(\t\x12-\n\x04mask\x18\x08 \x01(\x0b\x32\x1a.google.protobuf.FieldMaskH\x00\x88\x01\x01\x42\x07\n\x05_mask\"\xd8\x02\n\x10TraversalRequest\x12\x0b\n\x03src\x18\x01 \x03(\t\x12,\n\tdirection\x18\x02 \x01(\x0e\x32\x19.swh.graph.GraphDirection\x12\x12\n\x05\x65\x64ges\x18\x03 \x01(\tH\x00\x88\x01\x01\x12\x16\n\tmax_edges\x18\x04 \x01(\x03H\x01\x88\x01\x01\x12\x16\n\tmin_depth\x18\x05 \x01(\x03H\x02\x88\x01\x01\x12\x16\n\tmax_depth\x18\x06 \x01(\x03H\x03\x88\x01\x01\x12\x30\n\x0creturn_nodes\x18\x07 \x01(\x0b\x32\x15.swh.graph.NodeFilterH\x04\x88\x01\x01\x12-\n\x04mask\x18\x08 \x01(\x0b\x32\x1a.google.protobuf.FieldMaskH\x05\x88\x01\x01\x42\x08\n\x06_edgesB\x0c\n\n_max_edgesB\x0c\n\n_min_depthB\x0c\n\n_max_depthB\x0f\n\r_return_nodesB\x07\n\x05_mask\"\x97\x02\n\x11\x46indPathToRequest\x12\x0b\n\x03src\x18\x01 \x03(\t\x12%\n\x06target\x18\x02 \x01(\x0b\x32\x15.swh.graph.NodeFilter\x12,\n\tdirection\x18\x03 \x01(\x0e\x32\x19.swh.graph.GraphDirection\x12\x12\n\x05\x65\x64ges\x18\x04 \x01(\tH\x00\x88\x01\x01\x12\x16\n\tmax_edges\x18\x05 \x01(\x03H\x01\x88\x01\x01\x12\x16\n\tmax_depth\x18\x06 \x01(\x03H\x02\x88\x01\x01\x12-\n\x04mask\x18\x07 \x01(\x0b\x32\x1a.google.protobuf.FieldMaskH\x03\x88\x01\x01\x42\x08\n\x06_edgesB\x0c\n\n_max_edgesB\x0c\n\n_max_depthB\x07\n\x05_mask\"\x81\x03\n\x16\x46indPathBetweenRequest\x12\x0b\n\x03src\x18\x01 \x03(\t\x12\x0b\n\x03\x64st\x18\x02 \x03(\t\x12,\n\tdirection\x18\x03 \x01(\x0e\x32\x19.swh.graph.GraphDirection\x12\x39\n\x11\x64irection_reverse\x18\x04 \x01(\x0e\x32\x19.swh.graph.GraphDirectionH\x00\x88\x01\x01\x12\x12\n\x05\x65\x64ges\x18\x05 \x01(\tH\x01\x88\x01\x01\x12\x1a\n\redges_reverse\x18\x06 \x01(\tH\x02\x88\x01\x01\x12\x16\n\tmax_edges\x18\x07 \x01(\x03H\x03\x88\x01\x01\x12\x16\n\tmax_depth\x18\x08 \x01(\x03H\x04\x88\x01\x01\x12-\n\x04mask\x18\t \x01(\x0b\x32\x1a.google.protobuf.FieldMaskH\x05\x88\x01\x01\x42\x14\n\x12_direction_reverseB\x08\n\x06_edgesB\x10\n\x0e_edges_reverseB\x0c\n\n_max_edgesB\x0c\n\n_max_depthB\x07\n\x05_mask\"\xb2\x01\n\nNodeFilter\x12\x12\n\x05types\x18\x01 \x01(\tH\x00\x88\x01\x01\x12%\n\x18min_traversal_successors\x18\x02 \x01(\x03H\x01\x88\x01\x01\x12%\n\x18max_traversal_successors\x18\x03 \x01(\x03H\x02\x88\x01\x01\x42\x08\n\x06_typesB\x1b\n\x19_min_traversal_successorsB\x1b\n\x19_max_traversal_successors\"\x92\x02\n\x04Node\x12\r\n\x05swhid\x18\x01 \x01(\t\x12\'\n\tsuccessor\x18\x02 \x03(\x0b\x32\x14.swh.graph.Successor\x12\x1b\n\x0enum_successors\x18\t \x01(\x03H\x01\x88\x01\x01\x12%\n\x03\x63nt\x18\x03 \x01(\x0b\x32\x16.swh.graph.ContentDataH\x00\x12&\n\x03rev\x18\x05 \x01(\x0b\x32\x17.swh.graph.RevisionDataH\x00\x12%\n\x03rel\x18\x06 \x01(\x0b\x32\x16.swh.graph.ReleaseDataH\x00\x12$\n\x03ori\x18\x08 \x01(\x0b\x32\x15.swh.graph.OriginDataH\x00\x42\x06\n\x04\x64\x61taB\x11\n\x0f_num_successors\"U\n\x04Path\x12\x1d\n\x04node\x18\x01 \x03(\x0b\x32\x0f.swh.graph.Node\x12\x1b\n\x0emidpoint_index\x18\x02 \x01(\x05H\x00\x88\x01\x01\x42\x11\n\x0f_midpoint_index\"N\n\tSuccessor\x12\x12\n\x05swhid\x18\x01 \x01(\tH\x00\x88\x01\x01\x12#\n\x05label\x18\x02 \x03(\x0b\x32\x14.swh.graph.EdgeLabelB\x08\n\x06_swhid\"U\n\x0b\x43ontentData\x12\x13\n\x06length\x18\x01 \x01(\x03H\x00\x88\x01\x01\x12\x17\n\nis_skipped\x18\x02 \x01(\x08H\x01\x88\x01\x01\x42\t\n\x07_lengthB\r\n\x0b_is_skipped\"\xc6\x02\n\x0cRevisionData\x12\x13\n\x06\x61uthor\x18\x01 \x01(\x03H\x00\x88\x01\x01\x12\x18\n\x0b\x61uthor_date\x18\x02 \x01(\x03H\x01\x88\x01\x01\x12\x1f\n\x12\x61uthor_date_offset\x18\x03 \x01(\x05H\x02\x88\x01\x01\x12\x16\n\tcommitter\x18\x04 \x01(\x03H\x03\x88\x01\x01\x12\x1b\n\x0e\x63ommitter_date\x18\x05 \x01(\x03H\x04\x88\x01\x01\x12\"\n\x15\x63ommitter_date_offset\x18\x06 \x01(\x05H\x05\x88\x01\x01\x12\x14\n\x07message\x18\x07 \x01(\x0cH\x06\x88\x01\x01\x42\t\n\x07_authorB\x0e\n\x0c_author_dateB\x15\n\x13_author_date_offsetB\x0c\n\n_committerB\x11\n\x0f_committer_dateB\x18\n\x16_committer_date_offsetB\n\n\x08_message\"\xcd\x01\n\x0bReleaseData\x12\x13\n\x06\x61uthor\x18\x01 \x01(\x03H\x00\x88\x01\x01\x12\x18\n\x0b\x61uthor_date\x18\x02 \x01(\x03H\x01\x88\x01\x01\x12\x1f\n\x12\x61uthor_date_offset\x18\x03 \x01(\x05H\x02\x88\x01\x01\x12\x11\n\x04name\x18\x04 \x01(\x0cH\x03\x88\x01\x01\x12\x14\n\x07message\x18\x05 \x01(\x0cH\x04\x88\x01\x01\x42\t\n\x07_authorB\x0e\n\x0c_author_dateB\x15\n\x13_author_date_offsetB\x07\n\x05_nameB\n\n\x08_message\"&\n\nOriginData\x12\x10\n\x03url\x18\x01 \x01(\tH\x00\x88\x01\x01\x42\x06\n\x04_url\"-\n\tEdgeLabel\x12\x0c\n\x04name\x18\x01 \x01(\x0c\x12\x12\n\npermission\x18\x02 \x01(\x05\"\x1e\n\rCountResponse\x12\r\n\x05\x63ount\x18\x01 \x01(\x03\"\x0e\n\x0cStatsRequest\"\x9b\x02\n\rStatsResponse\x12\x11\n\tnum_nodes\x18\x01 \x01(\x03\x12\x11\n\tnum_edges\x18\x02 \x01(\x03\x12\x19\n\x11\x63ompression_ratio\x18\x03 \x01(\x01\x12\x15\n\rbits_per_node\x18\x04 \x01(\x01\x12\x15\n\rbits_per_edge\x18\x05 \x01(\x01\x12\x14\n\x0c\x61vg_locality\x18\x06 \x01(\x01\x12\x14\n\x0cindegree_min\x18\x07 \x01(\x03\x12\x14\n\x0cindegree_max\x18\x08 \x01(\x03\x12\x14\n\x0cindegree_avg\x18\t \x01(\x01\x12\x15\n\routdegree_min\x18\n \x01(\x03\x12\x15\n\routdegree_max\x18\x0b \x01(\x03\x12\x15\n\routdegree_avg\x18\x0c \x01(\x01*+\n\x0eGraphDirection\x12\x0b\n\x07\x46ORWARD\x10\x00\x12\x0c\n\x08\x42\x41\x43KWARD\x10\x01\x32\xcf\x03\n\x10TraversalService\x12\x35\n\x07GetNode\x12\x19.swh.graph.GetNodeRequest\x1a\x0f.swh.graph.Node\x12:\n\x08Traverse\x12\x1b.swh.graph.TraversalRequest\x1a\x0f.swh.graph.Node0\x01\x12;\n\nFindPathTo\x12\x1c.swh.graph.FindPathToRequest\x1a\x0f.swh.graph.Path\x12\x45\n\x0f\x46indPathBetween\x12!.swh.graph.FindPathBetweenRequest\x1a\x0f.swh.graph.Path\x12\x43\n\nCountNodes\x12\x1b.swh.graph.TraversalRequest\x1a\x18.swh.graph.CountResponse\x12\x43\n\nCountEdges\x12\x1b.swh.graph.TraversalRequest\x1a\x18.swh.graph.CountResponse\x12:\n\x05Stats\x12\x17.swh.graph.StatsRequest\x1a\x18.swh.graph.StatsResponseB0\n\x1eorg.softwareheritage.graph.rpcB\x0cGraphServiceP\x01\x62\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1dswh/graph/grpc/swhgraph.proto\x12\tswh.graph\x1a google/protobuf/field_mask.proto\"W\n\x0eGetNodeRequest\x12\r\n\x05swhid\x18\x01 \x01(\t\x12-\n\x04mask\x18\x08 \x01(\x0b\x32\x1a.google.protobuf.FieldMaskH\x00\x88\x01\x01\x42\x07\n\x05_mask\"\x90\x03\n\x10TraversalRequest\x12\x0b\n\x03src\x18\x01 \x03(\t\x12,\n\tdirection\x18\x02 \x01(\x0e\x32\x19.swh.graph.GraphDirection\x12\x12\n\x05\x65\x64ges\x18\x03 \x01(\tH\x00\x88\x01\x01\x12\x16\n\tmax_edges\x18\x04 \x01(\x03H\x01\x88\x01\x01\x12\x16\n\tmin_depth\x18\x05 \x01(\x03H\x02\x88\x01\x01\x12\x16\n\tmax_depth\x18\x06 \x01(\x03H\x03\x88\x01\x01\x12\x30\n\x0creturn_nodes\x18\x07 \x01(\x0b\x32\x15.swh.graph.NodeFilterH\x04\x88\x01\x01\x12-\n\x04mask\x18\x08 \x01(\x0b\x32\x1a.google.protobuf.FieldMaskH\x05\x88\x01\x01\x12\x1f\n\x12max_matching_nodes\x18\t \x01(\x03H\x06\x88\x01\x01\x42\x08\n\x06_edgesB\x0c\n\n_max_edgesB\x0c\n\n_min_depthB\x0c\n\n_max_depthB\x0f\n\r_return_nodesB\x07\n\x05_maskB\x15\n\x13_max_matching_nodes\"\x97\x02\n\x11\x46indPathToRequest\x12\x0b\n\x03src\x18\x01 \x03(\t\x12%\n\x06target\x18\x02 \x01(\x0b\x32\x15.swh.graph.NodeFilter\x12,\n\tdirection\x18\x03 \x01(\x0e\x32\x19.swh.graph.GraphDirection\x12\x12\n\x05\x65\x64ges\x18\x04 \x01(\tH\x00\x88\x01\x01\x12\x16\n\tmax_edges\x18\x05 \x01(\x03H\x01\x88\x01\x01\x12\x16\n\tmax_depth\x18\x06 \x01(\x03H\x02\x88\x01\x01\x12-\n\x04mask\x18\x07 \x01(\x0b\x32\x1a.google.protobuf.FieldMaskH\x03\x88\x01\x01\x42\x08\n\x06_edgesB\x0c\n\n_max_edgesB\x0c\n\n_max_depthB\x07\n\x05_mask\"\x81\x03\n\x16\x46indPathBetweenRequest\x12\x0b\n\x03src\x18\x01 \x03(\t\x12\x0b\n\x03\x64st\x18\x02 \x03(\t\x12,\n\tdirection\x18\x03 \x01(\x0e\x32\x19.swh.graph.GraphDirection\x12\x39\n\x11\x64irection_reverse\x18\x04 \x01(\x0e\x32\x19.swh.graph.GraphDirectionH\x00\x88\x01\x01\x12\x12\n\x05\x65\x64ges\x18\x05 \x01(\tH\x01\x88\x01\x01\x12\x1a\n\redges_reverse\x18\x06 \x01(\tH\x02\x88\x01\x01\x12\x16\n\tmax_edges\x18\x07 \x01(\x03H\x03\x88\x01\x01\x12\x16\n\tmax_depth\x18\x08 \x01(\x03H\x04\x88\x01\x01\x12-\n\x04mask\x18\t \x01(\x0b\x32\x1a.google.protobuf.FieldMaskH\x05\x88\x01\x01\x42\x14\n\x12_direction_reverseB\x08\n\x06_edgesB\x10\n\x0e_edges_reverseB\x0c\n\n_max_edgesB\x0c\n\n_max_depthB\x07\n\x05_mask\"\xb2\x01\n\nNodeFilter\x12\x12\n\x05types\x18\x01 \x01(\tH\x00\x88\x01\x01\x12%\n\x18min_traversal_successors\x18\x02 \x01(\x03H\x01\x88\x01\x01\x12%\n\x18max_traversal_successors\x18\x03 \x01(\x03H\x02\x88\x01\x01\x42\x08\n\x06_typesB\x1b\n\x19_min_traversal_successorsB\x1b\n\x19_max_traversal_successors\"\x92\x02\n\x04Node\x12\r\n\x05swhid\x18\x01 \x01(\t\x12\'\n\tsuccessor\x18\x02 \x03(\x0b\x32\x14.swh.graph.Successor\x12\x1b\n\x0enum_successors\x18\t \x01(\x03H\x01\x88\x01\x01\x12%\n\x03\x63nt\x18\x03 \x01(\x0b\x32\x16.swh.graph.ContentDataH\x00\x12&\n\x03rev\x18\x05 \x01(\x0b\x32\x17.swh.graph.RevisionDataH\x00\x12%\n\x03rel\x18\x06 \x01(\x0b\x32\x16.swh.graph.ReleaseDataH\x00\x12$\n\x03ori\x18\x08 \x01(\x0b\x32\x15.swh.graph.OriginDataH\x00\x42\x06\n\x04\x64\x61taB\x11\n\x0f_num_successors\"U\n\x04Path\x12\x1d\n\x04node\x18\x01 \x03(\x0b\x32\x0f.swh.graph.Node\x12\x1b\n\x0emidpoint_index\x18\x02 \x01(\x05H\x00\x88\x01\x01\x42\x11\n\x0f_midpoint_index\"N\n\tSuccessor\x12\x12\n\x05swhid\x18\x01 \x01(\tH\x00\x88\x01\x01\x12#\n\x05label\x18\x02 \x03(\x0b\x32\x14.swh.graph.EdgeLabelB\x08\n\x06_swhid\"U\n\x0b\x43ontentData\x12\x13\n\x06length\x18\x01 \x01(\x03H\x00\x88\x01\x01\x12\x17\n\nis_skipped\x18\x02 \x01(\x08H\x01\x88\x01\x01\x42\t\n\x07_lengthB\r\n\x0b_is_skipped\"\xc6\x02\n\x0cRevisionData\x12\x13\n\x06\x61uthor\x18\x01 \x01(\x03H\x00\x88\x01\x01\x12\x18\n\x0b\x61uthor_date\x18\x02 \x01(\x03H\x01\x88\x01\x01\x12\x1f\n\x12\x61uthor_date_offset\x18\x03 \x01(\x05H\x02\x88\x01\x01\x12\x16\n\tcommitter\x18\x04 \x01(\x03H\x03\x88\x01\x01\x12\x1b\n\x0e\x63ommitter_date\x18\x05 \x01(\x03H\x04\x88\x01\x01\x12\"\n\x15\x63ommitter_date_offset\x18\x06 \x01(\x05H\x05\x88\x01\x01\x12\x14\n\x07message\x18\x07 \x01(\x0cH\x06\x88\x01\x01\x42\t\n\x07_authorB\x0e\n\x0c_author_dateB\x15\n\x13_author_date_offsetB\x0c\n\n_committerB\x11\n\x0f_committer_dateB\x18\n\x16_committer_date_offsetB\n\n\x08_message\"\xcd\x01\n\x0bReleaseData\x12\x13\n\x06\x61uthor\x18\x01 \x01(\x03H\x00\x88\x01\x01\x12\x18\n\x0b\x61uthor_date\x18\x02 \x01(\x03H\x01\x88\x01\x01\x12\x1f\n\x12\x61uthor_date_offset\x18\x03 \x01(\x05H\x02\x88\x01\x01\x12\x11\n\x04name\x18\x04 \x01(\x0cH\x03\x88\x01\x01\x12\x14\n\x07message\x18\x05 \x01(\x0cH\x04\x88\x01\x01\x42\t\n\x07_authorB\x0e\n\x0c_author_dateB\x15\n\x13_author_date_offsetB\x07\n\x05_nameB\n\n\x08_message\"&\n\nOriginData\x12\x10\n\x03url\x18\x01 \x01(\tH\x00\x88\x01\x01\x42\x06\n\x04_url\"-\n\tEdgeLabel\x12\x0c\n\x04name\x18\x01 \x01(\x0c\x12\x12\n\npermission\x18\x02 \x01(\x05\"\x1e\n\rCountResponse\x12\r\n\x05\x63ount\x18\x01 \x01(\x03\"\x0e\n\x0cStatsRequest\"\x9b\x02\n\rStatsResponse\x12\x11\n\tnum_nodes\x18\x01 \x01(\x03\x12\x11\n\tnum_edges\x18\x02 \x01(\x03\x12\x19\n\x11\x63ompression_ratio\x18\x03 \x01(\x01\x12\x15\n\rbits_per_node\x18\x04 \x01(\x01\x12\x15\n\rbits_per_edge\x18\x05 \x01(\x01\x12\x14\n\x0c\x61vg_locality\x18\x06 \x01(\x01\x12\x14\n\x0cindegree_min\x18\x07 \x01(\x03\x12\x14\n\x0cindegree_max\x18\x08 \x01(\x03\x12\x14\n\x0cindegree_avg\x18\t \x01(\x01\x12\x15\n\routdegree_min\x18\n \x01(\x03\x12\x15\n\routdegree_max\x18\x0b \x01(\x03\x12\x15\n\routdegree_avg\x18\x0c \x01(\x01*+\n\x0eGraphDirection\x12\x0b\n\x07\x46ORWARD\x10\x00\x12\x0c\n\x08\x42\x41\x43KWARD\x10\x01\x32\xcf\x03\n\x10TraversalService\x12\x35\n\x07GetNode\x12\x19.swh.graph.GetNodeRequest\x1a\x0f.swh.graph.Node\x12:\n\x08Traverse\x12\x1b.swh.graph.TraversalRequest\x1a\x0f.swh.graph.Node0\x01\x12;\n\nFindPathTo\x12\x1c.swh.graph.FindPathToRequest\x1a\x0f.swh.graph.Path\x12\x45\n\x0f\x46indPathBetween\x12!.swh.graph.FindPathBetweenRequest\x1a\x0f.swh.graph.Path\x12\x43\n\nCountNodes\x12\x1b.swh.graph.TraversalRequest\x1a\x18.swh.graph.CountResponse\x12\x43\n\nCountEdges\x12\x1b.swh.graph.TraversalRequest\x1a\x18.swh.graph.CountResponse\x12:\n\x05Stats\x12\x17.swh.graph.StatsRequest\x1a\x18.swh.graph.StatsResponseB0\n\x1eorg.softwareheritage.graph.rpcB\x0cGraphServiceP\x01\x62\x06proto3') _GRAPHDIRECTION = DESCRIPTOR.enum_types_by_name['GraphDirection'] GraphDirection = enum_type_wrapper.EnumTypeWrapper(_GRAPHDIRECTION) FORWARD = 0 BACKWARD = 1 _GETNODEREQUEST = DESCRIPTOR.message_types_by_name['GetNodeRequest'] _TRAVERSALREQUEST = DESCRIPTOR.message_types_by_name['TraversalRequest'] _FINDPATHTOREQUEST = DESCRIPTOR.message_types_by_name['FindPathToRequest'] _FINDPATHBETWEENREQUEST = DESCRIPTOR.message_types_by_name['FindPathBetweenRequest'] _NODEFILTER = DESCRIPTOR.message_types_by_name['NodeFilter'] _NODE = DESCRIPTOR.message_types_by_name['Node'] _PATH = DESCRIPTOR.message_types_by_name['Path'] _SUCCESSOR = DESCRIPTOR.message_types_by_name['Successor'] _CONTENTDATA = DESCRIPTOR.message_types_by_name['ContentData'] _REVISIONDATA = DESCRIPTOR.message_types_by_name['RevisionData'] _RELEASEDATA = DESCRIPTOR.message_types_by_name['ReleaseData'] _ORIGINDATA = DESCRIPTOR.message_types_by_name['OriginData'] _EDGELABEL = DESCRIPTOR.message_types_by_name['EdgeLabel'] _COUNTRESPONSE = DESCRIPTOR.message_types_by_name['CountResponse'] _STATSREQUEST = DESCRIPTOR.message_types_by_name['StatsRequest'] _STATSRESPONSE = DESCRIPTOR.message_types_by_name['StatsResponse'] GetNodeRequest = _reflection.GeneratedProtocolMessageType('GetNodeRequest', (_message.Message,), { 'DESCRIPTOR' : _GETNODEREQUEST, '__module__' : 'swh.graph.grpc.swhgraph_pb2' # @@protoc_insertion_point(class_scope:swh.graph.GetNodeRequest) }) _sym_db.RegisterMessage(GetNodeRequest) TraversalRequest = _reflection.GeneratedProtocolMessageType('TraversalRequest', (_message.Message,), { 'DESCRIPTOR' : _TRAVERSALREQUEST, '__module__' : 'swh.graph.grpc.swhgraph_pb2' # @@protoc_insertion_point(class_scope:swh.graph.TraversalRequest) }) _sym_db.RegisterMessage(TraversalRequest) FindPathToRequest = _reflection.GeneratedProtocolMessageType('FindPathToRequest', (_message.Message,), { 'DESCRIPTOR' : _FINDPATHTOREQUEST, '__module__' : 'swh.graph.grpc.swhgraph_pb2' # @@protoc_insertion_point(class_scope:swh.graph.FindPathToRequest) }) _sym_db.RegisterMessage(FindPathToRequest) FindPathBetweenRequest = _reflection.GeneratedProtocolMessageType('FindPathBetweenRequest', (_message.Message,), { 'DESCRIPTOR' : _FINDPATHBETWEENREQUEST, '__module__' : 'swh.graph.grpc.swhgraph_pb2' # @@protoc_insertion_point(class_scope:swh.graph.FindPathBetweenRequest) }) _sym_db.RegisterMessage(FindPathBetweenRequest) NodeFilter = _reflection.GeneratedProtocolMessageType('NodeFilter', (_message.Message,), { 'DESCRIPTOR' : _NODEFILTER, '__module__' : 'swh.graph.grpc.swhgraph_pb2' # @@protoc_insertion_point(class_scope:swh.graph.NodeFilter) }) _sym_db.RegisterMessage(NodeFilter) Node = _reflection.GeneratedProtocolMessageType('Node', (_message.Message,), { 'DESCRIPTOR' : _NODE, '__module__' : 'swh.graph.grpc.swhgraph_pb2' # @@protoc_insertion_point(class_scope:swh.graph.Node) }) _sym_db.RegisterMessage(Node) Path = _reflection.GeneratedProtocolMessageType('Path', (_message.Message,), { 'DESCRIPTOR' : _PATH, '__module__' : 'swh.graph.grpc.swhgraph_pb2' # @@protoc_insertion_point(class_scope:swh.graph.Path) }) _sym_db.RegisterMessage(Path) Successor = _reflection.GeneratedProtocolMessageType('Successor', (_message.Message,), { 'DESCRIPTOR' : _SUCCESSOR, '__module__' : 'swh.graph.grpc.swhgraph_pb2' # @@protoc_insertion_point(class_scope:swh.graph.Successor) }) _sym_db.RegisterMessage(Successor) ContentData = _reflection.GeneratedProtocolMessageType('ContentData', (_message.Message,), { 'DESCRIPTOR' : _CONTENTDATA, '__module__' : 'swh.graph.grpc.swhgraph_pb2' # @@protoc_insertion_point(class_scope:swh.graph.ContentData) }) _sym_db.RegisterMessage(ContentData) RevisionData = _reflection.GeneratedProtocolMessageType('RevisionData', (_message.Message,), { 'DESCRIPTOR' : _REVISIONDATA, '__module__' : 'swh.graph.grpc.swhgraph_pb2' # @@protoc_insertion_point(class_scope:swh.graph.RevisionData) }) _sym_db.RegisterMessage(RevisionData) ReleaseData = _reflection.GeneratedProtocolMessageType('ReleaseData', (_message.Message,), { 'DESCRIPTOR' : _RELEASEDATA, '__module__' : 'swh.graph.grpc.swhgraph_pb2' # @@protoc_insertion_point(class_scope:swh.graph.ReleaseData) }) _sym_db.RegisterMessage(ReleaseData) OriginData = _reflection.GeneratedProtocolMessageType('OriginData', (_message.Message,), { 'DESCRIPTOR' : _ORIGINDATA, '__module__' : 'swh.graph.grpc.swhgraph_pb2' # @@protoc_insertion_point(class_scope:swh.graph.OriginData) }) _sym_db.RegisterMessage(OriginData) EdgeLabel = _reflection.GeneratedProtocolMessageType('EdgeLabel', (_message.Message,), { 'DESCRIPTOR' : _EDGELABEL, '__module__' : 'swh.graph.grpc.swhgraph_pb2' # @@protoc_insertion_point(class_scope:swh.graph.EdgeLabel) }) _sym_db.RegisterMessage(EdgeLabel) CountResponse = _reflection.GeneratedProtocolMessageType('CountResponse', (_message.Message,), { 'DESCRIPTOR' : _COUNTRESPONSE, '__module__' : 'swh.graph.grpc.swhgraph_pb2' # @@protoc_insertion_point(class_scope:swh.graph.CountResponse) }) _sym_db.RegisterMessage(CountResponse) StatsRequest = _reflection.GeneratedProtocolMessageType('StatsRequest', (_message.Message,), { 'DESCRIPTOR' : _STATSREQUEST, '__module__' : 'swh.graph.grpc.swhgraph_pb2' # @@protoc_insertion_point(class_scope:swh.graph.StatsRequest) }) _sym_db.RegisterMessage(StatsRequest) StatsResponse = _reflection.GeneratedProtocolMessageType('StatsResponse', (_message.Message,), { 'DESCRIPTOR' : _STATSRESPONSE, '__module__' : 'swh.graph.grpc.swhgraph_pb2' # @@protoc_insertion_point(class_scope:swh.graph.StatsResponse) }) _sym_db.RegisterMessage(StatsResponse) _TRAVERSALSERVICE = DESCRIPTOR.services_by_name['TraversalService'] if _descriptor._USE_C_DESCRIPTORS == False: DESCRIPTOR._options = None DESCRIPTOR._serialized_options = b'\n\036org.softwareheritage.graph.rpcB\014GraphServiceP\001' - _GRAPHDIRECTION._serialized_start=2854 - _GRAPHDIRECTION._serialized_end=2897 + _GRAPHDIRECTION._serialized_start=2910 + _GRAPHDIRECTION._serialized_end=2953 _GETNODEREQUEST._serialized_start=78 _GETNODEREQUEST._serialized_end=165 _TRAVERSALREQUEST._serialized_start=168 - _TRAVERSALREQUEST._serialized_end=512 - _FINDPATHTOREQUEST._serialized_start=515 - _FINDPATHTOREQUEST._serialized_end=794 - _FINDPATHBETWEENREQUEST._serialized_start=797 - _FINDPATHBETWEENREQUEST._serialized_end=1182 - _NODEFILTER._serialized_start=1185 - _NODEFILTER._serialized_end=1363 - _NODE._serialized_start=1366 - _NODE._serialized_end=1640 - _PATH._serialized_start=1642 - _PATH._serialized_end=1727 - _SUCCESSOR._serialized_start=1729 - _SUCCESSOR._serialized_end=1807 - _CONTENTDATA._serialized_start=1809 - _CONTENTDATA._serialized_end=1894 - _REVISIONDATA._serialized_start=1897 - _REVISIONDATA._serialized_end=2223 - _RELEASEDATA._serialized_start=2226 - _RELEASEDATA._serialized_end=2431 - _ORIGINDATA._serialized_start=2433 - _ORIGINDATA._serialized_end=2471 - _EDGELABEL._serialized_start=2473 - _EDGELABEL._serialized_end=2518 - _COUNTRESPONSE._serialized_start=2520 - _COUNTRESPONSE._serialized_end=2550 - _STATSREQUEST._serialized_start=2552 - _STATSREQUEST._serialized_end=2566 - _STATSRESPONSE._serialized_start=2569 - _STATSRESPONSE._serialized_end=2852 - _TRAVERSALSERVICE._serialized_start=2900 - _TRAVERSALSERVICE._serialized_end=3363 + _TRAVERSALREQUEST._serialized_end=568 + _FINDPATHTOREQUEST._serialized_start=571 + _FINDPATHTOREQUEST._serialized_end=850 + _FINDPATHBETWEENREQUEST._serialized_start=853 + _FINDPATHBETWEENREQUEST._serialized_end=1238 + _NODEFILTER._serialized_start=1241 + _NODEFILTER._serialized_end=1419 + _NODE._serialized_start=1422 + _NODE._serialized_end=1696 + _PATH._serialized_start=1698 + _PATH._serialized_end=1783 + _SUCCESSOR._serialized_start=1785 + _SUCCESSOR._serialized_end=1863 + _CONTENTDATA._serialized_start=1865 + _CONTENTDATA._serialized_end=1950 + _REVISIONDATA._serialized_start=1953 + _REVISIONDATA._serialized_end=2279 + _RELEASEDATA._serialized_start=2282 + _RELEASEDATA._serialized_end=2487 + _ORIGINDATA._serialized_start=2489 + _ORIGINDATA._serialized_end=2527 + _EDGELABEL._serialized_start=2529 + _EDGELABEL._serialized_end=2574 + _COUNTRESPONSE._serialized_start=2576 + _COUNTRESPONSE._serialized_end=2606 + _STATSREQUEST._serialized_start=2608 + _STATSREQUEST._serialized_end=2622 + _STATSRESPONSE._serialized_start=2625 + _STATSRESPONSE._serialized_end=2908 + _TRAVERSALSERVICE._serialized_start=2956 + _TRAVERSALSERVICE._serialized_end=3419 # @@protoc_insertion_point(module_scope) diff --git a/swh/graph/grpc/swhgraph_pb2.pyi b/swh/graph/grpc/swhgraph_pb2.pyi index 8e108b0..b0ba4eb 100644 --- a/swh/graph/grpc/swhgraph_pb2.pyi +++ b/swh/graph/grpc/swhgraph_pb2.pyi @@ -1,683 +1,694 @@ """ @generated by mypy-protobuf. Do not edit manually! isort:skip_file """ import builtins -import collections.abc import google.protobuf.descriptor import google.protobuf.field_mask_pb2 import google.protobuf.internal.containers import google.protobuf.internal.enum_type_wrapper import google.protobuf.message -import sys import typing - -if sys.version_info >= (3, 10): - import typing as typing_extensions -else: - import typing_extensions +import typing_extensions DESCRIPTOR: google.protobuf.descriptor.FileDescriptor class _GraphDirection: - ValueType = typing.NewType("ValueType", builtins.int) + ValueType = typing.NewType('ValueType', builtins.int) V: typing_extensions.TypeAlias = ValueType - -class _GraphDirectionEnumTypeWrapper(google.protobuf.internal.enum_type_wrapper._EnumTypeWrapper[_GraphDirection.ValueType], builtins.type): # noqa: F821 +class _GraphDirectionEnumTypeWrapper(google.protobuf.internal.enum_type_wrapper._EnumTypeWrapper[_GraphDirection.ValueType], builtins.type): DESCRIPTOR: google.protobuf.descriptor.EnumDescriptor FORWARD: _GraphDirection.ValueType # 0 """Forward DAG: ori -> snp -> rel -> rev -> dir -> cnt""" + BACKWARD: _GraphDirection.ValueType # 1 """Transposed DAG: cnt -> dir -> rev -> rel -> snp -> ori""" class GraphDirection(_GraphDirection, metaclass=_GraphDirectionEnumTypeWrapper): """Direction of the graph""" + pass FORWARD: GraphDirection.ValueType # 0 """Forward DAG: ori -> snp -> rel -> rev -> dir -> cnt""" + BACKWARD: GraphDirection.ValueType # 1 """Transposed DAG: cnt -> dir -> rev -> rel -> snp -> ori""" + global___GraphDirection = GraphDirection + class GetNodeRequest(google.protobuf.message.Message): """Describe a node to return""" - DESCRIPTOR: google.protobuf.descriptor.Descriptor - SWHID_FIELD_NUMBER: builtins.int MASK_FIELD_NUMBER: builtins.int - swhid: builtins.str + swhid: typing.Text """SWHID of the node to return""" + @property def mask(self) -> google.protobuf.field_mask_pb2.FieldMask: """FieldMask of which fields are to be returned (e.g., "swhid,cnt.length"). By default, all fields are returned. """ - def __init__( - self, + pass + def __init__(self, *, - swhid: builtins.str = ..., - mask: google.protobuf.field_mask_pb2.FieldMask | None = ..., - ) -> None: ... - def HasField(self, field_name: typing_extensions.Literal["_mask", b"_mask", "mask", b"mask"]) -> builtins.bool: ... - def ClearField(self, field_name: typing_extensions.Literal["_mask", b"_mask", "mask", b"mask", "swhid", b"swhid"]) -> None: ... - def WhichOneof(self, oneof_group: typing_extensions.Literal["_mask", b"_mask"]) -> typing_extensions.Literal["mask"] | None: ... - + swhid: typing.Text = ..., + mask: typing.Optional[google.protobuf.field_mask_pb2.FieldMask] = ..., + ) -> None: ... + def HasField(self, field_name: typing_extensions.Literal["_mask",b"_mask","mask",b"mask"]) -> builtins.bool: ... + def ClearField(self, field_name: typing_extensions.Literal["_mask",b"_mask","mask",b"mask","swhid",b"swhid"]) -> None: ... + def WhichOneof(self, oneof_group: typing_extensions.Literal["_mask",b"_mask"]) -> typing.Optional[typing_extensions.Literal["mask"]]: ... global___GetNodeRequest = GetNodeRequest class TraversalRequest(google.protobuf.message.Message): """TraversalRequest describes how a breadth-first traversal should be performed, and what should be returned to the client. """ - DESCRIPTOR: google.protobuf.descriptor.Descriptor - SRC_FIELD_NUMBER: builtins.int DIRECTION_FIELD_NUMBER: builtins.int EDGES_FIELD_NUMBER: builtins.int MAX_EDGES_FIELD_NUMBER: builtins.int MIN_DEPTH_FIELD_NUMBER: builtins.int MAX_DEPTH_FIELD_NUMBER: builtins.int RETURN_NODES_FIELD_NUMBER: builtins.int MASK_FIELD_NUMBER: builtins.int + MAX_MATCHING_NODES_FIELD_NUMBER: builtins.int @property - def src(self) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]: + def src(self) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[typing.Text]: """Set of source nodes (SWHIDs)""" + pass direction: global___GraphDirection.ValueType """Direction of the graph to traverse. Defaults to FORWARD.""" - edges: builtins.str + + edges: typing.Text """Edge restriction string (e.g. "rev:dir,dir:cnt"). Defaults to "*" (all). """ + max_edges: builtins.int """Maximum number of edges accessed in the traversal, after which it stops. Defaults to infinite. """ + min_depth: builtins.int """Do not return nodes with a depth lower than this number. By default, all depths are returned. """ + max_depth: builtins.int """Maximum depth of the traversal, after which it stops. Defaults to infinite. """ + @property def return_nodes(self) -> global___NodeFilter: """Filter which nodes will be sent to the stream. By default, all nodes are returned. """ + pass @property def mask(self) -> google.protobuf.field_mask_pb2.FieldMask: """FieldMask of which fields are to be returned (e.g., "swhid,cnt.length"). By default, all fields are returned. """ - def __init__( - self, + pass + max_matching_nodes: builtins.int + """Maximum number of matching results before stopping. For Traverse(), this is + the total number of results. Defaults to infinite. + """ + + def __init__(self, *, - src: collections.abc.Iterable[builtins.str] | None = ..., + src: typing.Optional[typing.Iterable[typing.Text]] = ..., direction: global___GraphDirection.ValueType = ..., - edges: builtins.str | None = ..., - max_edges: builtins.int | None = ..., - min_depth: builtins.int | None = ..., - max_depth: builtins.int | None = ..., - return_nodes: global___NodeFilter | None = ..., - mask: google.protobuf.field_mask_pb2.FieldMask | None = ..., - ) -> None: ... - def HasField(self, field_name: typing_extensions.Literal["_edges", b"_edges", "_mask", b"_mask", "_max_depth", b"_max_depth", "_max_edges", b"_max_edges", "_min_depth", b"_min_depth", "_return_nodes", b"_return_nodes", "edges", b"edges", "mask", b"mask", "max_depth", b"max_depth", "max_edges", b"max_edges", "min_depth", b"min_depth", "return_nodes", b"return_nodes"]) -> builtins.bool: ... - def ClearField(self, field_name: typing_extensions.Literal["_edges", b"_edges", "_mask", b"_mask", "_max_depth", b"_max_depth", "_max_edges", b"_max_edges", "_min_depth", b"_min_depth", "_return_nodes", b"_return_nodes", "direction", b"direction", "edges", b"edges", "mask", b"mask", "max_depth", b"max_depth", "max_edges", b"max_edges", "min_depth", b"min_depth", "return_nodes", b"return_nodes", "src", b"src"]) -> None: ... + edges: typing.Optional[typing.Text] = ..., + max_edges: typing.Optional[builtins.int] = ..., + min_depth: typing.Optional[builtins.int] = ..., + max_depth: typing.Optional[builtins.int] = ..., + return_nodes: typing.Optional[global___NodeFilter] = ..., + mask: typing.Optional[google.protobuf.field_mask_pb2.FieldMask] = ..., + max_matching_nodes: typing.Optional[builtins.int] = ..., + ) -> None: ... + def HasField(self, field_name: typing_extensions.Literal["_edges",b"_edges","_mask",b"_mask","_max_depth",b"_max_depth","_max_edges",b"_max_edges","_max_matching_nodes",b"_max_matching_nodes","_min_depth",b"_min_depth","_return_nodes",b"_return_nodes","edges",b"edges","mask",b"mask","max_depth",b"max_depth","max_edges",b"max_edges","max_matching_nodes",b"max_matching_nodes","min_depth",b"min_depth","return_nodes",b"return_nodes"]) -> builtins.bool: ... + def ClearField(self, field_name: typing_extensions.Literal["_edges",b"_edges","_mask",b"_mask","_max_depth",b"_max_depth","_max_edges",b"_max_edges","_max_matching_nodes",b"_max_matching_nodes","_min_depth",b"_min_depth","_return_nodes",b"_return_nodes","direction",b"direction","edges",b"edges","mask",b"mask","max_depth",b"max_depth","max_edges",b"max_edges","max_matching_nodes",b"max_matching_nodes","min_depth",b"min_depth","return_nodes",b"return_nodes","src",b"src"]) -> None: ... @typing.overload - def WhichOneof(self, oneof_group: typing_extensions.Literal["_edges", b"_edges"]) -> typing_extensions.Literal["edges"] | None: ... + def WhichOneof(self, oneof_group: typing_extensions.Literal["_edges",b"_edges"]) -> typing.Optional[typing_extensions.Literal["edges"]]: ... @typing.overload - def WhichOneof(self, oneof_group: typing_extensions.Literal["_mask", b"_mask"]) -> typing_extensions.Literal["mask"] | None: ... + def WhichOneof(self, oneof_group: typing_extensions.Literal["_mask",b"_mask"]) -> typing.Optional[typing_extensions.Literal["mask"]]: ... @typing.overload - def WhichOneof(self, oneof_group: typing_extensions.Literal["_max_depth", b"_max_depth"]) -> typing_extensions.Literal["max_depth"] | None: ... + def WhichOneof(self, oneof_group: typing_extensions.Literal["_max_depth",b"_max_depth"]) -> typing.Optional[typing_extensions.Literal["max_depth"]]: ... @typing.overload - def WhichOneof(self, oneof_group: typing_extensions.Literal["_max_edges", b"_max_edges"]) -> typing_extensions.Literal["max_edges"] | None: ... + def WhichOneof(self, oneof_group: typing_extensions.Literal["_max_edges",b"_max_edges"]) -> typing.Optional[typing_extensions.Literal["max_edges"]]: ... @typing.overload - def WhichOneof(self, oneof_group: typing_extensions.Literal["_min_depth", b"_min_depth"]) -> typing_extensions.Literal["min_depth"] | None: ... + def WhichOneof(self, oneof_group: typing_extensions.Literal["_max_matching_nodes",b"_max_matching_nodes"]) -> typing.Optional[typing_extensions.Literal["max_matching_nodes"]]: ... @typing.overload - def WhichOneof(self, oneof_group: typing_extensions.Literal["_return_nodes", b"_return_nodes"]) -> typing_extensions.Literal["return_nodes"] | None: ... - + def WhichOneof(self, oneof_group: typing_extensions.Literal["_min_depth",b"_min_depth"]) -> typing.Optional[typing_extensions.Literal["min_depth"]]: ... + @typing.overload + def WhichOneof(self, oneof_group: typing_extensions.Literal["_return_nodes",b"_return_nodes"]) -> typing.Optional[typing_extensions.Literal["return_nodes"]]: ... global___TraversalRequest = TraversalRequest class FindPathToRequest(google.protobuf.message.Message): """FindPathToRequest describes a request to find a shortest path between a set of nodes and a given target criteria, as well as what should be returned in the path. """ - DESCRIPTOR: google.protobuf.descriptor.Descriptor - SRC_FIELD_NUMBER: builtins.int TARGET_FIELD_NUMBER: builtins.int DIRECTION_FIELD_NUMBER: builtins.int EDGES_FIELD_NUMBER: builtins.int MAX_EDGES_FIELD_NUMBER: builtins.int MAX_DEPTH_FIELD_NUMBER: builtins.int MASK_FIELD_NUMBER: builtins.int @property - def src(self) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]: + def src(self) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[typing.Text]: """Set of source nodes (SWHIDs)""" + pass @property def target(self) -> global___NodeFilter: """Target criteria, i.e., what constitutes a valid path destination.""" + pass direction: global___GraphDirection.ValueType """Direction of the graph to traverse. Defaults to FORWARD.""" - edges: builtins.str + + edges: typing.Text """Edge restriction string (e.g. "rev:dir,dir:cnt"). Defaults to "*" (all). """ + max_edges: builtins.int """Maximum number of edges accessed in the traversal, after which it stops. Defaults to infinite. """ + max_depth: builtins.int """Maximum depth of the traversal, after which it stops. Defaults to infinite. """ + @property def mask(self) -> google.protobuf.field_mask_pb2.FieldMask: """FieldMask of which fields are to be returned (e.g., "swhid,cnt.length"). By default, all fields are returned. """ - def __init__( - self, + pass + def __init__(self, *, - src: collections.abc.Iterable[builtins.str] | None = ..., - target: global___NodeFilter | None = ..., + src: typing.Optional[typing.Iterable[typing.Text]] = ..., + target: typing.Optional[global___NodeFilter] = ..., direction: global___GraphDirection.ValueType = ..., - edges: builtins.str | None = ..., - max_edges: builtins.int | None = ..., - max_depth: builtins.int | None = ..., - mask: google.protobuf.field_mask_pb2.FieldMask | None = ..., - ) -> None: ... - def HasField(self, field_name: typing_extensions.Literal["_edges", b"_edges", "_mask", b"_mask", "_max_depth", b"_max_depth", "_max_edges", b"_max_edges", "edges", b"edges", "mask", b"mask", "max_depth", b"max_depth", "max_edges", b"max_edges", "target", b"target"]) -> builtins.bool: ... - def ClearField(self, field_name: typing_extensions.Literal["_edges", b"_edges", "_mask", b"_mask", "_max_depth", b"_max_depth", "_max_edges", b"_max_edges", "direction", b"direction", "edges", b"edges", "mask", b"mask", "max_depth", b"max_depth", "max_edges", b"max_edges", "src", b"src", "target", b"target"]) -> None: ... + edges: typing.Optional[typing.Text] = ..., + max_edges: typing.Optional[builtins.int] = ..., + max_depth: typing.Optional[builtins.int] = ..., + mask: typing.Optional[google.protobuf.field_mask_pb2.FieldMask] = ..., + ) -> None: ... + def HasField(self, field_name: typing_extensions.Literal["_edges",b"_edges","_mask",b"_mask","_max_depth",b"_max_depth","_max_edges",b"_max_edges","edges",b"edges","mask",b"mask","max_depth",b"max_depth","max_edges",b"max_edges","target",b"target"]) -> builtins.bool: ... + def ClearField(self, field_name: typing_extensions.Literal["_edges",b"_edges","_mask",b"_mask","_max_depth",b"_max_depth","_max_edges",b"_max_edges","direction",b"direction","edges",b"edges","mask",b"mask","max_depth",b"max_depth","max_edges",b"max_edges","src",b"src","target",b"target"]) -> None: ... @typing.overload - def WhichOneof(self, oneof_group: typing_extensions.Literal["_edges", b"_edges"]) -> typing_extensions.Literal["edges"] | None: ... + def WhichOneof(self, oneof_group: typing_extensions.Literal["_edges",b"_edges"]) -> typing.Optional[typing_extensions.Literal["edges"]]: ... @typing.overload - def WhichOneof(self, oneof_group: typing_extensions.Literal["_mask", b"_mask"]) -> typing_extensions.Literal["mask"] | None: ... + def WhichOneof(self, oneof_group: typing_extensions.Literal["_mask",b"_mask"]) -> typing.Optional[typing_extensions.Literal["mask"]]: ... @typing.overload - def WhichOneof(self, oneof_group: typing_extensions.Literal["_max_depth", b"_max_depth"]) -> typing_extensions.Literal["max_depth"] | None: ... + def WhichOneof(self, oneof_group: typing_extensions.Literal["_max_depth",b"_max_depth"]) -> typing.Optional[typing_extensions.Literal["max_depth"]]: ... @typing.overload - def WhichOneof(self, oneof_group: typing_extensions.Literal["_max_edges", b"_max_edges"]) -> typing_extensions.Literal["max_edges"] | None: ... - + def WhichOneof(self, oneof_group: typing_extensions.Literal["_max_edges",b"_max_edges"]) -> typing.Optional[typing_extensions.Literal["max_edges"]]: ... global___FindPathToRequest = FindPathToRequest class FindPathBetweenRequest(google.protobuf.message.Message): """FindPathToRequest describes a request to find a shortest path between a set of source nodes and a set of destination nodes. It works by performing a bidirectional breadth-first traversal from both sets at the same time. """ - DESCRIPTOR: google.protobuf.descriptor.Descriptor - SRC_FIELD_NUMBER: builtins.int DST_FIELD_NUMBER: builtins.int DIRECTION_FIELD_NUMBER: builtins.int DIRECTION_REVERSE_FIELD_NUMBER: builtins.int EDGES_FIELD_NUMBER: builtins.int EDGES_REVERSE_FIELD_NUMBER: builtins.int MAX_EDGES_FIELD_NUMBER: builtins.int MAX_DEPTH_FIELD_NUMBER: builtins.int MASK_FIELD_NUMBER: builtins.int @property - def src(self) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]: + def src(self) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[typing.Text]: """Set of source nodes (SWHIDs)""" + pass @property - def dst(self) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]: + def dst(self) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[typing.Text]: """Set of destination nodes (SWHIDs)""" + pass direction: global___GraphDirection.ValueType """Direction of the graph to traverse from the source set. Defaults to FORWARD. """ + direction_reverse: global___GraphDirection.ValueType """Direction of the graph to traverse from the destination set. Defaults to the opposite of `direction`. If direction and direction_reverse are identical, it will find the first common successor of both sets in the given direction. """ - edges: builtins.str + + edges: typing.Text """Edge restriction string for the traversal from the source set. (e.g. "rev:dir,dir:cnt"). Defaults to "*" (all). """ - edges_reverse: builtins.str + + edges_reverse: typing.Text """Edge restriction string for the reverse traversal from the destination set. If not specified: - If `edges` is not specified either, defaults to "*" - If direction == direction_reverse, defaults to `edges` - If direction != direction_reverse, defaults to the reverse of `edges` (e.g. "rev:dir" becomes "dir:rev"). """ + max_edges: builtins.int """Maximum number of edges accessed in the traversal, after which it stops. Defaults to infinite. """ + max_depth: builtins.int """Maximum depth of the traversal, after which it stops. Defaults to infinite. """ + @property def mask(self) -> google.protobuf.field_mask_pb2.FieldMask: """FieldMask of which fields are to be returned (e.g., "swhid,cnt.length"). By default, all fields are returned. """ - def __init__( - self, + pass + def __init__(self, *, - src: collections.abc.Iterable[builtins.str] | None = ..., - dst: collections.abc.Iterable[builtins.str] | None = ..., + src: typing.Optional[typing.Iterable[typing.Text]] = ..., + dst: typing.Optional[typing.Iterable[typing.Text]] = ..., direction: global___GraphDirection.ValueType = ..., - direction_reverse: global___GraphDirection.ValueType | None = ..., - edges: builtins.str | None = ..., - edges_reverse: builtins.str | None = ..., - max_edges: builtins.int | None = ..., - max_depth: builtins.int | None = ..., - mask: google.protobuf.field_mask_pb2.FieldMask | None = ..., - ) -> None: ... - def HasField(self, field_name: typing_extensions.Literal["_direction_reverse", b"_direction_reverse", "_edges", b"_edges", "_edges_reverse", b"_edges_reverse", "_mask", b"_mask", "_max_depth", b"_max_depth", "_max_edges", b"_max_edges", "direction_reverse", b"direction_reverse", "edges", b"edges", "edges_reverse", b"edges_reverse", "mask", b"mask", "max_depth", b"max_depth", "max_edges", b"max_edges"]) -> builtins.bool: ... - def ClearField(self, field_name: typing_extensions.Literal["_direction_reverse", b"_direction_reverse", "_edges", b"_edges", "_edges_reverse", b"_edges_reverse", "_mask", b"_mask", "_max_depth", b"_max_depth", "_max_edges", b"_max_edges", "direction", b"direction", "direction_reverse", b"direction_reverse", "dst", b"dst", "edges", b"edges", "edges_reverse", b"edges_reverse", "mask", b"mask", "max_depth", b"max_depth", "max_edges", b"max_edges", "src", b"src"]) -> None: ... + direction_reverse: typing.Optional[global___GraphDirection.ValueType] = ..., + edges: typing.Optional[typing.Text] = ..., + edges_reverse: typing.Optional[typing.Text] = ..., + max_edges: typing.Optional[builtins.int] = ..., + max_depth: typing.Optional[builtins.int] = ..., + mask: typing.Optional[google.protobuf.field_mask_pb2.FieldMask] = ..., + ) -> None: ... + def HasField(self, field_name: typing_extensions.Literal["_direction_reverse",b"_direction_reverse","_edges",b"_edges","_edges_reverse",b"_edges_reverse","_mask",b"_mask","_max_depth",b"_max_depth","_max_edges",b"_max_edges","direction_reverse",b"direction_reverse","edges",b"edges","edges_reverse",b"edges_reverse","mask",b"mask","max_depth",b"max_depth","max_edges",b"max_edges"]) -> builtins.bool: ... + def ClearField(self, field_name: typing_extensions.Literal["_direction_reverse",b"_direction_reverse","_edges",b"_edges","_edges_reverse",b"_edges_reverse","_mask",b"_mask","_max_depth",b"_max_depth","_max_edges",b"_max_edges","direction",b"direction","direction_reverse",b"direction_reverse","dst",b"dst","edges",b"edges","edges_reverse",b"edges_reverse","mask",b"mask","max_depth",b"max_depth","max_edges",b"max_edges","src",b"src"]) -> None: ... @typing.overload - def WhichOneof(self, oneof_group: typing_extensions.Literal["_direction_reverse", b"_direction_reverse"]) -> typing_extensions.Literal["direction_reverse"] | None: ... + def WhichOneof(self, oneof_group: typing_extensions.Literal["_direction_reverse",b"_direction_reverse"]) -> typing.Optional[typing_extensions.Literal["direction_reverse"]]: ... @typing.overload - def WhichOneof(self, oneof_group: typing_extensions.Literal["_edges", b"_edges"]) -> typing_extensions.Literal["edges"] | None: ... + def WhichOneof(self, oneof_group: typing_extensions.Literal["_edges",b"_edges"]) -> typing.Optional[typing_extensions.Literal["edges"]]: ... @typing.overload - def WhichOneof(self, oneof_group: typing_extensions.Literal["_edges_reverse", b"_edges_reverse"]) -> typing_extensions.Literal["edges_reverse"] | None: ... + def WhichOneof(self, oneof_group: typing_extensions.Literal["_edges_reverse",b"_edges_reverse"]) -> typing.Optional[typing_extensions.Literal["edges_reverse"]]: ... @typing.overload - def WhichOneof(self, oneof_group: typing_extensions.Literal["_mask", b"_mask"]) -> typing_extensions.Literal["mask"] | None: ... + def WhichOneof(self, oneof_group: typing_extensions.Literal["_mask",b"_mask"]) -> typing.Optional[typing_extensions.Literal["mask"]]: ... @typing.overload - def WhichOneof(self, oneof_group: typing_extensions.Literal["_max_depth", b"_max_depth"]) -> typing_extensions.Literal["max_depth"] | None: ... + def WhichOneof(self, oneof_group: typing_extensions.Literal["_max_depth",b"_max_depth"]) -> typing.Optional[typing_extensions.Literal["max_depth"]]: ... @typing.overload - def WhichOneof(self, oneof_group: typing_extensions.Literal["_max_edges", b"_max_edges"]) -> typing_extensions.Literal["max_edges"] | None: ... - + def WhichOneof(self, oneof_group: typing_extensions.Literal["_max_edges",b"_max_edges"]) -> typing.Optional[typing_extensions.Literal["max_edges"]]: ... global___FindPathBetweenRequest = FindPathBetweenRequest class NodeFilter(google.protobuf.message.Message): """Represents various criteria that make a given node "valid". A node is only valid if all the subcriteria present in this message are fulfilled. """ - DESCRIPTOR: google.protobuf.descriptor.Descriptor - TYPES_FIELD_NUMBER: builtins.int MIN_TRAVERSAL_SUCCESSORS_FIELD_NUMBER: builtins.int MAX_TRAVERSAL_SUCCESSORS_FIELD_NUMBER: builtins.int - types: builtins.str + types: typing.Text """Node restriction string. (e.g. "dir,cnt,rev"). Defaults to "*" (all).""" + min_traversal_successors: builtins.int """Minimum number of successors encountered *during the traversal*. Default: no constraint """ + max_traversal_successors: builtins.int """Maximum number of successors encountered *during the traversal*. Default: no constraint """ - def __init__( - self, + + def __init__(self, *, - types: builtins.str | None = ..., - min_traversal_successors: builtins.int | None = ..., - max_traversal_successors: builtins.int | None = ..., - ) -> None: ... - def HasField(self, field_name: typing_extensions.Literal["_max_traversal_successors", b"_max_traversal_successors", "_min_traversal_successors", b"_min_traversal_successors", "_types", b"_types", "max_traversal_successors", b"max_traversal_successors", "min_traversal_successors", b"min_traversal_successors", "types", b"types"]) -> builtins.bool: ... - def ClearField(self, field_name: typing_extensions.Literal["_max_traversal_successors", b"_max_traversal_successors", "_min_traversal_successors", b"_min_traversal_successors", "_types", b"_types", "max_traversal_successors", b"max_traversal_successors", "min_traversal_successors", b"min_traversal_successors", "types", b"types"]) -> None: ... + types: typing.Optional[typing.Text] = ..., + min_traversal_successors: typing.Optional[builtins.int] = ..., + max_traversal_successors: typing.Optional[builtins.int] = ..., + ) -> None: ... + def HasField(self, field_name: typing_extensions.Literal["_max_traversal_successors",b"_max_traversal_successors","_min_traversal_successors",b"_min_traversal_successors","_types",b"_types","max_traversal_successors",b"max_traversal_successors","min_traversal_successors",b"min_traversal_successors","types",b"types"]) -> builtins.bool: ... + def ClearField(self, field_name: typing_extensions.Literal["_max_traversal_successors",b"_max_traversal_successors","_min_traversal_successors",b"_min_traversal_successors","_types",b"_types","max_traversal_successors",b"max_traversal_successors","min_traversal_successors",b"min_traversal_successors","types",b"types"]) -> None: ... @typing.overload - def WhichOneof(self, oneof_group: typing_extensions.Literal["_max_traversal_successors", b"_max_traversal_successors"]) -> typing_extensions.Literal["max_traversal_successors"] | None: ... + def WhichOneof(self, oneof_group: typing_extensions.Literal["_max_traversal_successors",b"_max_traversal_successors"]) -> typing.Optional[typing_extensions.Literal["max_traversal_successors"]]: ... @typing.overload - def WhichOneof(self, oneof_group: typing_extensions.Literal["_min_traversal_successors", b"_min_traversal_successors"]) -> typing_extensions.Literal["min_traversal_successors"] | None: ... + def WhichOneof(self, oneof_group: typing_extensions.Literal["_min_traversal_successors",b"_min_traversal_successors"]) -> typing.Optional[typing_extensions.Literal["min_traversal_successors"]]: ... @typing.overload - def WhichOneof(self, oneof_group: typing_extensions.Literal["_types", b"_types"]) -> typing_extensions.Literal["types"] | None: ... - + def WhichOneof(self, oneof_group: typing_extensions.Literal["_types",b"_types"]) -> typing.Optional[typing_extensions.Literal["types"]]: ... global___NodeFilter = NodeFilter class Node(google.protobuf.message.Message): """Represents a node in the graph.""" - DESCRIPTOR: google.protobuf.descriptor.Descriptor - SWHID_FIELD_NUMBER: builtins.int SUCCESSOR_FIELD_NUMBER: builtins.int NUM_SUCCESSORS_FIELD_NUMBER: builtins.int CNT_FIELD_NUMBER: builtins.int REV_FIELD_NUMBER: builtins.int REL_FIELD_NUMBER: builtins.int ORI_FIELD_NUMBER: builtins.int - swhid: builtins.str + swhid: typing.Text """The SWHID of the graph node.""" + @property def successor(self) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[global___Successor]: """List of relevant successors of this node.""" + pass num_successors: builtins.int """Number of relevant successors.""" + @property def cnt(self) -> global___ContentData: ... @property def rev(self) -> global___RevisionData: ... @property def rel(self) -> global___ReleaseData: ... @property def ori(self) -> global___OriginData: ... - def __init__( - self, + def __init__(self, *, - swhid: builtins.str = ..., - successor: collections.abc.Iterable[global___Successor] | None = ..., - num_successors: builtins.int | None = ..., - cnt: global___ContentData | None = ..., - rev: global___RevisionData | None = ..., - rel: global___ReleaseData | None = ..., - ori: global___OriginData | None = ..., - ) -> None: ... - def HasField(self, field_name: typing_extensions.Literal["_num_successors", b"_num_successors", "cnt", b"cnt", "data", b"data", "num_successors", b"num_successors", "ori", b"ori", "rel", b"rel", "rev", b"rev"]) -> builtins.bool: ... - def ClearField(self, field_name: typing_extensions.Literal["_num_successors", b"_num_successors", "cnt", b"cnt", "data", b"data", "num_successors", b"num_successors", "ori", b"ori", "rel", b"rel", "rev", b"rev", "successor", b"successor", "swhid", b"swhid"]) -> None: ... - @typing.overload - def WhichOneof(self, oneof_group: typing_extensions.Literal["_num_successors", b"_num_successors"]) -> typing_extensions.Literal["num_successors"] | None: ... - @typing.overload - def WhichOneof(self, oneof_group: typing_extensions.Literal["data", b"data"]) -> typing_extensions.Literal["cnt", "rev", "rel", "ori"] | None: ... - + swhid: typing.Text = ..., + successor: typing.Optional[typing.Iterable[global___Successor]] = ..., + num_successors: typing.Optional[builtins.int] = ..., + cnt: typing.Optional[global___ContentData] = ..., + rev: typing.Optional[global___RevisionData] = ..., + rel: typing.Optional[global___ReleaseData] = ..., + ori: typing.Optional[global___OriginData] = ..., + ) -> None: ... + def HasField(self, field_name: typing_extensions.Literal["_num_successors",b"_num_successors","cnt",b"cnt","data",b"data","num_successors",b"num_successors","ori",b"ori","rel",b"rel","rev",b"rev"]) -> builtins.bool: ... + def ClearField(self, field_name: typing_extensions.Literal["_num_successors",b"_num_successors","cnt",b"cnt","data",b"data","num_successors",b"num_successors","ori",b"ori","rel",b"rel","rev",b"rev","successor",b"successor","swhid",b"swhid"]) -> None: ... + @typing.overload + def WhichOneof(self, oneof_group: typing_extensions.Literal["_num_successors",b"_num_successors"]) -> typing.Optional[typing_extensions.Literal["num_successors"]]: ... + @typing.overload + def WhichOneof(self, oneof_group: typing_extensions.Literal["data",b"data"]) -> typing.Optional[typing_extensions.Literal["cnt","rev","rel","ori"]]: ... global___Node = Node class Path(google.protobuf.message.Message): """Represents a path in the graph.""" - DESCRIPTOR: google.protobuf.descriptor.Descriptor - NODE_FIELD_NUMBER: builtins.int MIDPOINT_INDEX_FIELD_NUMBER: builtins.int @property def node(self) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[global___Node]: """List of nodes in the path, from source to destination""" + pass midpoint_index: builtins.int """Index of the "midpoint" of the path. For paths obtained with bidirectional search queries, this is the node that joined the two sets together. When looking for a common ancestor between two nodes by performing a FindPathBetween search with two backward graphs, this will be the index of the common ancestor in the path. """ - def __init__( - self, - *, - node: collections.abc.Iterable[global___Node] | None = ..., - midpoint_index: builtins.int | None = ..., - ) -> None: ... - def HasField(self, field_name: typing_extensions.Literal["_midpoint_index", b"_midpoint_index", "midpoint_index", b"midpoint_index"]) -> builtins.bool: ... - def ClearField(self, field_name: typing_extensions.Literal["_midpoint_index", b"_midpoint_index", "midpoint_index", b"midpoint_index", "node", b"node"]) -> None: ... - def WhichOneof(self, oneof_group: typing_extensions.Literal["_midpoint_index", b"_midpoint_index"]) -> typing_extensions.Literal["midpoint_index"] | None: ... + def __init__(self, + *, + node: typing.Optional[typing.Iterable[global___Node]] = ..., + midpoint_index: typing.Optional[builtins.int] = ..., + ) -> None: ... + def HasField(self, field_name: typing_extensions.Literal["_midpoint_index",b"_midpoint_index","midpoint_index",b"midpoint_index"]) -> builtins.bool: ... + def ClearField(self, field_name: typing_extensions.Literal["_midpoint_index",b"_midpoint_index","midpoint_index",b"midpoint_index","node",b"node"]) -> None: ... + def WhichOneof(self, oneof_group: typing_extensions.Literal["_midpoint_index",b"_midpoint_index"]) -> typing.Optional[typing_extensions.Literal["midpoint_index"]]: ... global___Path = Path class Successor(google.protobuf.message.Message): """Represents a successor of a given node.""" - DESCRIPTOR: google.protobuf.descriptor.Descriptor - SWHID_FIELD_NUMBER: builtins.int LABEL_FIELD_NUMBER: builtins.int - swhid: builtins.str + swhid: typing.Text """The SWHID of the successor""" + @property def label(self) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[global___EdgeLabel]: """A list of edge labels for the given edge""" - def __init__( - self, + pass + def __init__(self, *, - swhid: builtins.str | None = ..., - label: collections.abc.Iterable[global___EdgeLabel] | None = ..., - ) -> None: ... - def HasField(self, field_name: typing_extensions.Literal["_swhid", b"_swhid", "swhid", b"swhid"]) -> builtins.bool: ... - def ClearField(self, field_name: typing_extensions.Literal["_swhid", b"_swhid", "label", b"label", "swhid", b"swhid"]) -> None: ... - def WhichOneof(self, oneof_group: typing_extensions.Literal["_swhid", b"_swhid"]) -> typing_extensions.Literal["swhid"] | None: ... - + swhid: typing.Optional[typing.Text] = ..., + label: typing.Optional[typing.Iterable[global___EdgeLabel]] = ..., + ) -> None: ... + def HasField(self, field_name: typing_extensions.Literal["_swhid",b"_swhid","swhid",b"swhid"]) -> builtins.bool: ... + def ClearField(self, field_name: typing_extensions.Literal["_swhid",b"_swhid","label",b"label","swhid",b"swhid"]) -> None: ... + def WhichOneof(self, oneof_group: typing_extensions.Literal["_swhid",b"_swhid"]) -> typing.Optional[typing_extensions.Literal["swhid"]]: ... global___Successor = Successor class ContentData(google.protobuf.message.Message): """Content node properties""" - DESCRIPTOR: google.protobuf.descriptor.Descriptor - LENGTH_FIELD_NUMBER: builtins.int IS_SKIPPED_FIELD_NUMBER: builtins.int length: builtins.int """Length of the blob, in bytes""" + is_skipped: builtins.bool """Whether the content was skipped during ingestion.""" - def __init__( - self, + + def __init__(self, *, - length: builtins.int | None = ..., - is_skipped: builtins.bool | None = ..., - ) -> None: ... - def HasField(self, field_name: typing_extensions.Literal["_is_skipped", b"_is_skipped", "_length", b"_length", "is_skipped", b"is_skipped", "length", b"length"]) -> builtins.bool: ... - def ClearField(self, field_name: typing_extensions.Literal["_is_skipped", b"_is_skipped", "_length", b"_length", "is_skipped", b"is_skipped", "length", b"length"]) -> None: ... + length: typing.Optional[builtins.int] = ..., + is_skipped: typing.Optional[builtins.bool] = ..., + ) -> None: ... + def HasField(self, field_name: typing_extensions.Literal["_is_skipped",b"_is_skipped","_length",b"_length","is_skipped",b"is_skipped","length",b"length"]) -> builtins.bool: ... + def ClearField(self, field_name: typing_extensions.Literal["_is_skipped",b"_is_skipped","_length",b"_length","is_skipped",b"is_skipped","length",b"length"]) -> None: ... @typing.overload - def WhichOneof(self, oneof_group: typing_extensions.Literal["_is_skipped", b"_is_skipped"]) -> typing_extensions.Literal["is_skipped"] | None: ... + def WhichOneof(self, oneof_group: typing_extensions.Literal["_is_skipped",b"_is_skipped"]) -> typing.Optional[typing_extensions.Literal["is_skipped"]]: ... @typing.overload - def WhichOneof(self, oneof_group: typing_extensions.Literal["_length", b"_length"]) -> typing_extensions.Literal["length"] | None: ... - + def WhichOneof(self, oneof_group: typing_extensions.Literal["_length",b"_length"]) -> typing.Optional[typing_extensions.Literal["length"]]: ... global___ContentData = ContentData class RevisionData(google.protobuf.message.Message): """Revision node properties""" - DESCRIPTOR: google.protobuf.descriptor.Descriptor - AUTHOR_FIELD_NUMBER: builtins.int AUTHOR_DATE_FIELD_NUMBER: builtins.int AUTHOR_DATE_OFFSET_FIELD_NUMBER: builtins.int COMMITTER_FIELD_NUMBER: builtins.int COMMITTER_DATE_FIELD_NUMBER: builtins.int COMMITTER_DATE_OFFSET_FIELD_NUMBER: builtins.int MESSAGE_FIELD_NUMBER: builtins.int author: builtins.int """Revision author ID (anonymized)""" + author_date: builtins.int """UNIX timestamp of the revision date (UTC)""" + author_date_offset: builtins.int """Timezone of the revision author date as an offset from UTC""" + committer: builtins.int """Revision committer ID (anonymized)""" + committer_date: builtins.int """UNIX timestamp of the revision committer date (UTC)""" + committer_date_offset: builtins.int """Timezone of the revision committer date as an offset from UTC""" + message: builtins.bytes """Revision message""" - def __init__( - self, + + def __init__(self, *, - author: builtins.int | None = ..., - author_date: builtins.int | None = ..., - author_date_offset: builtins.int | None = ..., - committer: builtins.int | None = ..., - committer_date: builtins.int | None = ..., - committer_date_offset: builtins.int | None = ..., - message: builtins.bytes | None = ..., - ) -> None: ... - def HasField(self, field_name: typing_extensions.Literal["_author", b"_author", "_author_date", b"_author_date", "_author_date_offset", b"_author_date_offset", "_committer", b"_committer", "_committer_date", b"_committer_date", "_committer_date_offset", b"_committer_date_offset", "_message", b"_message", "author", b"author", "author_date", b"author_date", "author_date_offset", b"author_date_offset", "committer", b"committer", "committer_date", b"committer_date", "committer_date_offset", b"committer_date_offset", "message", b"message"]) -> builtins.bool: ... - def ClearField(self, field_name: typing_extensions.Literal["_author", b"_author", "_author_date", b"_author_date", "_author_date_offset", b"_author_date_offset", "_committer", b"_committer", "_committer_date", b"_committer_date", "_committer_date_offset", b"_committer_date_offset", "_message", b"_message", "author", b"author", "author_date", b"author_date", "author_date_offset", b"author_date_offset", "committer", b"committer", "committer_date", b"committer_date", "committer_date_offset", b"committer_date_offset", "message", b"message"]) -> None: ... + author: typing.Optional[builtins.int] = ..., + author_date: typing.Optional[builtins.int] = ..., + author_date_offset: typing.Optional[builtins.int] = ..., + committer: typing.Optional[builtins.int] = ..., + committer_date: typing.Optional[builtins.int] = ..., + committer_date_offset: typing.Optional[builtins.int] = ..., + message: typing.Optional[builtins.bytes] = ..., + ) -> None: ... + def HasField(self, field_name: typing_extensions.Literal["_author",b"_author","_author_date",b"_author_date","_author_date_offset",b"_author_date_offset","_committer",b"_committer","_committer_date",b"_committer_date","_committer_date_offset",b"_committer_date_offset","_message",b"_message","author",b"author","author_date",b"author_date","author_date_offset",b"author_date_offset","committer",b"committer","committer_date",b"committer_date","committer_date_offset",b"committer_date_offset","message",b"message"]) -> builtins.bool: ... + def ClearField(self, field_name: typing_extensions.Literal["_author",b"_author","_author_date",b"_author_date","_author_date_offset",b"_author_date_offset","_committer",b"_committer","_committer_date",b"_committer_date","_committer_date_offset",b"_committer_date_offset","_message",b"_message","author",b"author","author_date",b"author_date","author_date_offset",b"author_date_offset","committer",b"committer","committer_date",b"committer_date","committer_date_offset",b"committer_date_offset","message",b"message"]) -> None: ... @typing.overload - def WhichOneof(self, oneof_group: typing_extensions.Literal["_author", b"_author"]) -> typing_extensions.Literal["author"] | None: ... + def WhichOneof(self, oneof_group: typing_extensions.Literal["_author",b"_author"]) -> typing.Optional[typing_extensions.Literal["author"]]: ... @typing.overload - def WhichOneof(self, oneof_group: typing_extensions.Literal["_author_date", b"_author_date"]) -> typing_extensions.Literal["author_date"] | None: ... + def WhichOneof(self, oneof_group: typing_extensions.Literal["_author_date",b"_author_date"]) -> typing.Optional[typing_extensions.Literal["author_date"]]: ... @typing.overload - def WhichOneof(self, oneof_group: typing_extensions.Literal["_author_date_offset", b"_author_date_offset"]) -> typing_extensions.Literal["author_date_offset"] | None: ... + def WhichOneof(self, oneof_group: typing_extensions.Literal["_author_date_offset",b"_author_date_offset"]) -> typing.Optional[typing_extensions.Literal["author_date_offset"]]: ... @typing.overload - def WhichOneof(self, oneof_group: typing_extensions.Literal["_committer", b"_committer"]) -> typing_extensions.Literal["committer"] | None: ... + def WhichOneof(self, oneof_group: typing_extensions.Literal["_committer",b"_committer"]) -> typing.Optional[typing_extensions.Literal["committer"]]: ... @typing.overload - def WhichOneof(self, oneof_group: typing_extensions.Literal["_committer_date", b"_committer_date"]) -> typing_extensions.Literal["committer_date"] | None: ... + def WhichOneof(self, oneof_group: typing_extensions.Literal["_committer_date",b"_committer_date"]) -> typing.Optional[typing_extensions.Literal["committer_date"]]: ... @typing.overload - def WhichOneof(self, oneof_group: typing_extensions.Literal["_committer_date_offset", b"_committer_date_offset"]) -> typing_extensions.Literal["committer_date_offset"] | None: ... + def WhichOneof(self, oneof_group: typing_extensions.Literal["_committer_date_offset",b"_committer_date_offset"]) -> typing.Optional[typing_extensions.Literal["committer_date_offset"]]: ... @typing.overload - def WhichOneof(self, oneof_group: typing_extensions.Literal["_message", b"_message"]) -> typing_extensions.Literal["message"] | None: ... - + def WhichOneof(self, oneof_group: typing_extensions.Literal["_message",b"_message"]) -> typing.Optional[typing_extensions.Literal["message"]]: ... global___RevisionData = RevisionData class ReleaseData(google.protobuf.message.Message): """Release node properties""" - DESCRIPTOR: google.protobuf.descriptor.Descriptor - AUTHOR_FIELD_NUMBER: builtins.int AUTHOR_DATE_FIELD_NUMBER: builtins.int AUTHOR_DATE_OFFSET_FIELD_NUMBER: builtins.int NAME_FIELD_NUMBER: builtins.int MESSAGE_FIELD_NUMBER: builtins.int author: builtins.int """Release author ID (anonymized)""" + author_date: builtins.int """UNIX timestamp of the release date (UTC)""" + author_date_offset: builtins.int """Timezone of the release author date as an offset from UTC""" + name: builtins.bytes """Release name""" + message: builtins.bytes """Release message""" - def __init__( - self, + + def __init__(self, *, - author: builtins.int | None = ..., - author_date: builtins.int | None = ..., - author_date_offset: builtins.int | None = ..., - name: builtins.bytes | None = ..., - message: builtins.bytes | None = ..., - ) -> None: ... - def HasField(self, field_name: typing_extensions.Literal["_author", b"_author", "_author_date", b"_author_date", "_author_date_offset", b"_author_date_offset", "_message", b"_message", "_name", b"_name", "author", b"author", "author_date", b"author_date", "author_date_offset", b"author_date_offset", "message", b"message", "name", b"name"]) -> builtins.bool: ... - def ClearField(self, field_name: typing_extensions.Literal["_author", b"_author", "_author_date", b"_author_date", "_author_date_offset", b"_author_date_offset", "_message", b"_message", "_name", b"_name", "author", b"author", "author_date", b"author_date", "author_date_offset", b"author_date_offset", "message", b"message", "name", b"name"]) -> None: ... + author: typing.Optional[builtins.int] = ..., + author_date: typing.Optional[builtins.int] = ..., + author_date_offset: typing.Optional[builtins.int] = ..., + name: typing.Optional[builtins.bytes] = ..., + message: typing.Optional[builtins.bytes] = ..., + ) -> None: ... + def HasField(self, field_name: typing_extensions.Literal["_author",b"_author","_author_date",b"_author_date","_author_date_offset",b"_author_date_offset","_message",b"_message","_name",b"_name","author",b"author","author_date",b"author_date","author_date_offset",b"author_date_offset","message",b"message","name",b"name"]) -> builtins.bool: ... + def ClearField(self, field_name: typing_extensions.Literal["_author",b"_author","_author_date",b"_author_date","_author_date_offset",b"_author_date_offset","_message",b"_message","_name",b"_name","author",b"author","author_date",b"author_date","author_date_offset",b"author_date_offset","message",b"message","name",b"name"]) -> None: ... @typing.overload - def WhichOneof(self, oneof_group: typing_extensions.Literal["_author", b"_author"]) -> typing_extensions.Literal["author"] | None: ... + def WhichOneof(self, oneof_group: typing_extensions.Literal["_author",b"_author"]) -> typing.Optional[typing_extensions.Literal["author"]]: ... @typing.overload - def WhichOneof(self, oneof_group: typing_extensions.Literal["_author_date", b"_author_date"]) -> typing_extensions.Literal["author_date"] | None: ... + def WhichOneof(self, oneof_group: typing_extensions.Literal["_author_date",b"_author_date"]) -> typing.Optional[typing_extensions.Literal["author_date"]]: ... @typing.overload - def WhichOneof(self, oneof_group: typing_extensions.Literal["_author_date_offset", b"_author_date_offset"]) -> typing_extensions.Literal["author_date_offset"] | None: ... + def WhichOneof(self, oneof_group: typing_extensions.Literal["_author_date_offset",b"_author_date_offset"]) -> typing.Optional[typing_extensions.Literal["author_date_offset"]]: ... @typing.overload - def WhichOneof(self, oneof_group: typing_extensions.Literal["_message", b"_message"]) -> typing_extensions.Literal["message"] | None: ... + def WhichOneof(self, oneof_group: typing_extensions.Literal["_message",b"_message"]) -> typing.Optional[typing_extensions.Literal["message"]]: ... @typing.overload - def WhichOneof(self, oneof_group: typing_extensions.Literal["_name", b"_name"]) -> typing_extensions.Literal["name"] | None: ... - + def WhichOneof(self, oneof_group: typing_extensions.Literal["_name",b"_name"]) -> typing.Optional[typing_extensions.Literal["name"]]: ... global___ReleaseData = ReleaseData class OriginData(google.protobuf.message.Message): """Origin node properties""" - DESCRIPTOR: google.protobuf.descriptor.Descriptor - URL_FIELD_NUMBER: builtins.int - url: builtins.str + url: typing.Text """URL of the origin""" - def __init__( - self, - *, - url: builtins.str | None = ..., - ) -> None: ... - def HasField(self, field_name: typing_extensions.Literal["_url", b"_url", "url", b"url"]) -> builtins.bool: ... - def ClearField(self, field_name: typing_extensions.Literal["_url", b"_url", "url", b"url"]) -> None: ... - def WhichOneof(self, oneof_group: typing_extensions.Literal["_url", b"_url"]) -> typing_extensions.Literal["url"] | None: ... + def __init__(self, + *, + url: typing.Optional[typing.Text] = ..., + ) -> None: ... + def HasField(self, field_name: typing_extensions.Literal["_url",b"_url","url",b"url"]) -> builtins.bool: ... + def ClearField(self, field_name: typing_extensions.Literal["_url",b"_url","url",b"url"]) -> None: ... + def WhichOneof(self, oneof_group: typing_extensions.Literal["_url",b"_url"]) -> typing.Optional[typing_extensions.Literal["url"]]: ... global___OriginData = OriginData class EdgeLabel(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.Descriptor - NAME_FIELD_NUMBER: builtins.int PERMISSION_FIELD_NUMBER: builtins.int name: builtins.bytes """Directory entry name for directories, branch name for snapshots""" + permission: builtins.int """Entry permission (only set for directories).""" - def __init__( - self, + + def __init__(self, *, name: builtins.bytes = ..., permission: builtins.int = ..., - ) -> None: ... - def ClearField(self, field_name: typing_extensions.Literal["name", b"name", "permission", b"permission"]) -> None: ... - + ) -> None: ... + def ClearField(self, field_name: typing_extensions.Literal["name",b"name","permission",b"permission"]) -> None: ... global___EdgeLabel = EdgeLabel class CountResponse(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.Descriptor - COUNT_FIELD_NUMBER: builtins.int count: builtins.int - def __init__( - self, + def __init__(self, *, count: builtins.int = ..., - ) -> None: ... - def ClearField(self, field_name: typing_extensions.Literal["count", b"count"]) -> None: ... - + ) -> None: ... + def ClearField(self, field_name: typing_extensions.Literal["count",b"count"]) -> None: ... global___CountResponse = CountResponse class StatsRequest(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.Descriptor - - def __init__( - self, - ) -> None: ... - + def __init__(self, + ) -> None: ... global___StatsRequest = StatsRequest class StatsResponse(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.Descriptor - NUM_NODES_FIELD_NUMBER: builtins.int NUM_EDGES_FIELD_NUMBER: builtins.int COMPRESSION_RATIO_FIELD_NUMBER: builtins.int BITS_PER_NODE_FIELD_NUMBER: builtins.int BITS_PER_EDGE_FIELD_NUMBER: builtins.int AVG_LOCALITY_FIELD_NUMBER: builtins.int INDEGREE_MIN_FIELD_NUMBER: builtins.int INDEGREE_MAX_FIELD_NUMBER: builtins.int INDEGREE_AVG_FIELD_NUMBER: builtins.int OUTDEGREE_MIN_FIELD_NUMBER: builtins.int OUTDEGREE_MAX_FIELD_NUMBER: builtins.int OUTDEGREE_AVG_FIELD_NUMBER: builtins.int num_nodes: builtins.int """Number of nodes in the graph""" + num_edges: builtins.int """Number of edges in the graph""" + compression_ratio: builtins.float """Ratio between the graph size and the information-theoretical lower bound """ + bits_per_node: builtins.float """Number of bits per node (overall graph size in bits divided by the number of nodes) """ + bits_per_edge: builtins.float """Number of bits per edge (overall graph size in bits divided by the number of arcs). """ + avg_locality: builtins.float indegree_min: builtins.int """Smallest indegree""" + indegree_max: builtins.int """Largest indegree""" + indegree_avg: builtins.float """Average indegree""" + outdegree_min: builtins.int """Smallest outdegree""" + outdegree_max: builtins.int """Largest outdegree""" + outdegree_avg: builtins.float """Average outdegree""" - def __init__( - self, + + def __init__(self, *, num_nodes: builtins.int = ..., num_edges: builtins.int = ..., compression_ratio: builtins.float = ..., bits_per_node: builtins.float = ..., bits_per_edge: builtins.float = ..., avg_locality: builtins.float = ..., indegree_min: builtins.int = ..., indegree_max: builtins.int = ..., indegree_avg: builtins.float = ..., outdegree_min: builtins.int = ..., outdegree_max: builtins.int = ..., outdegree_avg: builtins.float = ..., - ) -> None: ... - def ClearField(self, field_name: typing_extensions.Literal["avg_locality", b"avg_locality", "bits_per_edge", b"bits_per_edge", "bits_per_node", b"bits_per_node", "compression_ratio", b"compression_ratio", "indegree_avg", b"indegree_avg", "indegree_max", b"indegree_max", "indegree_min", b"indegree_min", "num_edges", b"num_edges", "num_nodes", b"num_nodes", "outdegree_avg", b"outdegree_avg", "outdegree_max", b"outdegree_max", "outdegree_min", b"outdegree_min"]) -> None: ... - + ) -> None: ... + def ClearField(self, field_name: typing_extensions.Literal["avg_locality",b"avg_locality","bits_per_edge",b"bits_per_edge","bits_per_node",b"bits_per_node","compression_ratio",b"compression_ratio","indegree_avg",b"indegree_avg","indegree_max",b"indegree_max","indegree_min",b"indegree_min","num_edges",b"num_edges","num_nodes",b"num_nodes","outdegree_avg",b"outdegree_avg","outdegree_max",b"outdegree_max","outdegree_min",b"outdegree_min"]) -> None: ... global___StatsResponse = StatsResponse diff --git a/swh/graph/http_client.py b/swh/graph/http_client.py index aa66108..b204d73 100644 --- a/swh/graph/http_client.py +++ b/swh/graph/http_client.py @@ -1,156 +1,167 @@ # Copyright (C) 2019-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import json from swh.core.api import RPCClient class GraphAPIError(Exception): """Graph API Error""" def __str__(self): return """An unexpected error occurred in the Graph backend: {}""".format( self.args ) class GraphArgumentException(Exception): def __init__(self, *args, response=None): super().__init__(*args) self.response = response class RemoteGraphClient(RPCClient): """Client to the Software Heritage Graph.""" def __init__(self, url, timeout=None): super().__init__(api_exception=GraphAPIError, url=url, timeout=timeout) def raw_verb_lines(self, verb, endpoint, **kwargs): response = self.raw_verb(verb, endpoint, stream=True, **kwargs) self.raise_for_status(response) for line in response.iter_lines(): yield line.decode().lstrip("\n") def get_lines(self, endpoint, **kwargs): yield from self.raw_verb_lines("get", endpoint, **kwargs) def raise_for_status(self, response) -> None: if response.status_code // 100 == 4: raise GraphArgumentException( response.content.decode("ascii"), response=response ) super().raise_for_status(response) # Web API endpoints def stats(self): return self.get("stats") def leaves( - self, src, edges="*", direction="forward", max_edges=0, return_types="*" + self, + src, + edges="*", + direction="forward", + max_edges=0, + return_types="*", + max_matching_nodes=0, ): return self.get_lines( "leaves/{}".format(src), params={ "edges": edges, "direction": direction, "max_edges": max_edges, "return_types": return_types, + "max_matching_nodes": max_matching_nodes, }, ) def neighbors( self, src, edges="*", direction="forward", max_edges=0, return_types="*" ): return self.get_lines( "neighbors/{}".format(src), params={ "edges": edges, "direction": direction, "max_edges": max_edges, "return_types": return_types, }, ) def visit_nodes( self, src, edges="*", direction="forward", max_edges=0, return_types="*" ): return self.get_lines( "visit/nodes/{}".format(src), params={ "edges": edges, "direction": direction, "max_edges": max_edges, "return_types": return_types, }, ) def visit_edges(self, src, edges="*", direction="forward", max_edges=0): for edge in self.get_lines( "visit/edges/{}".format(src), params={"edges": edges, "direction": direction, "max_edges": max_edges}, ): yield tuple(edge.split()) def visit_paths(self, src, edges="*", direction="forward", max_edges=0): def decode_path_wrapper(it): for e in it: yield json.loads(e) return decode_path_wrapper( self.get_lines( "visit/paths/{}".format(src), params={"edges": edges, "direction": direction, "max_edges": max_edges}, ) ) def walk( self, src, dst, edges="*", traversal="dfs", direction="forward", limit=None ): endpoint = "walk/{}/{}" return self.get_lines( endpoint.format(src, dst), params={ "edges": edges, "traversal": traversal, "direction": direction, "limit": limit, }, ) def random_walk( self, src, dst, edges="*", direction="forward", limit=None, return_types="*" ): endpoint = "randomwalk/{}/{}" return self.get_lines( endpoint.format(src, dst), params={ "edges": edges, "direction": direction, "limit": limit, "return_types": return_types, }, ) - def count_leaves(self, src, edges="*", direction="forward"): + def count_leaves(self, src, edges="*", direction="forward", max_matching_nodes=0): return self.get( "leaves/count/{}".format(src), - params={"edges": edges, "direction": direction}, + params={ + "edges": edges, + "direction": direction, + "max_matching_nodes": max_matching_nodes, + }, ) def count_neighbors(self, src, edges="*", direction="forward"): return self.get( "neighbors/count/{}".format(src), params={"edges": edges, "direction": direction}, ) def count_visit_nodes(self, src, edges="*", direction="forward"): return self.get( "visit/nodes/count/{}".format(src), params={"edges": edges, "direction": direction}, ) diff --git a/swh/graph/http_naive_client.py b/swh/graph/http_naive_client.py index a94efe8..43f0088 100644 --- a/swh/graph/http_naive_client.py +++ b/swh/graph/http_naive_client.py @@ -1,395 +1,412 @@ -# Copyright (C) 2021 The Software Heritage developers +# Copyright (C) 2021-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import functools import inspect +import itertools import re import statistics from typing import ( Callable, Dict, Iterable, Iterator, List, Optional, Set, Tuple, TypeVar, Union, ) from swh.model.swhids import CoreSWHID, ExtendedSWHID, ValidationError from .http_client import GraphArgumentException _NODE_TYPES = "ori|snp|rel|rev|dir|cnt" NODES_RE = re.compile(rf"(\*|{_NODE_TYPES})") EDGES_RE = re.compile(rf"(\*|{_NODE_TYPES}):(\*|{_NODE_TYPES})") T = TypeVar("T", bound=Callable) SWHIDlike = Union[CoreSWHID, ExtendedSWHID, str] def check_arguments(f: T) -> T: """Decorator for generic argument checking for methods of NaiveClient. Checks ``src`` is a valid and known SWHID, and ``edges`` has the right format.""" signature = inspect.signature(f) @functools.wraps(f) def newf(*args, **kwargs): __tracebackhide__ = True # for pytest try: bound_args = signature.bind(*args, **kwargs) except TypeError as e: # rethrow the exception from here so pytest doesn't flood the terminal # with signature.bind's call stack. raise TypeError(*e.args) from None self = bound_args.arguments["self"] src = bound_args.arguments.get("src") if src: self._check_swhid(src) edges = bound_args.arguments.get("edges") if edges: if edges != "*" and not EDGES_RE.match(edges): raise GraphArgumentException(f"invalid edge restriction: {edges}") return_types = bound_args.arguments.get("return_types") if return_types: if not NODES_RE.match(return_types): raise GraphArgumentException( f"invalid return_types restriction: {return_types}" ) return f(*args, **kwargs) return newf # type: ignore def filter_node_types(node_types: str, nodes: Iterable[str]) -> Iterator[str]: if node_types == "*": yield from nodes else: prefixes = tuple(f"swh:1:{type_}:" for type_ in node_types.split(",")) for node in nodes: if node.startswith(prefixes): yield node class NaiveClient: """An alternative implementation of the graph server, written in pure-python and meant for simulating it in other components' test cases; constructed from a list of nodes and (directed) edges, both represented as SWHIDs. It is NOT meant to be efficient in any way; only to be a very simple implementation that provides the same behavior. >>> nodes = [ ... "swh:1:rev:1111111111111111111111111111111111111111", ... "swh:1:rev:2222222222222222222222222222222222222222", ... "swh:1:rev:3333333333333333333333333333333333333333", ... ] >>> edges = [ ... ( ... "swh:1:rev:1111111111111111111111111111111111111111", ... "swh:1:rev:2222222222222222222222222222222222222222", ... ), ... ( ... "swh:1:rev:2222222222222222222222222222222222222222", ... "swh:1:rev:3333333333333333333333333333333333333333", ... ), ... ] >>> c = NaiveClient(nodes=nodes, edges=edges) >>> list(c.leaves("swh:1:rev:1111111111111111111111111111111111111111")) ['swh:1:rev:3333333333333333333333333333333333333333'] """ def __init__( self, *, nodes: List[SWHIDlike], edges: List[Tuple[SWHIDlike, SWHIDlike]] ): self.graph = Graph(nodes, edges) def _check_swhid(self, swhid): try: ExtendedSWHID.from_string(swhid) except ValidationError as e: raise GraphArgumentException(*e.args) from None if swhid not in self.graph.nodes: raise GraphArgumentException(f"SWHID not found: {swhid}") def stats(self) -> Dict: return { "num_nodes": len(self.graph.nodes), "num_edges": sum(map(len, self.graph.forward_edges.values())), "compression_ratio": 1.0, "bits_per_edge": 100.0, "bits_per_node": 100.0, "avg_locality": 0.0, "indegree_min": min(map(len, self.graph.backward_edges.values())), "indegree_max": max(map(len, self.graph.backward_edges.values())), "indegree_avg": statistics.mean( map(len, self.graph.backward_edges.values()) ), "outdegree_min": min(map(len, self.graph.forward_edges.values())), "outdegree_max": max(map(len, self.graph.forward_edges.values())), "outdegree_avg": statistics.mean( map(len, self.graph.forward_edges.values()) ), } @check_arguments def leaves( self, src: str, edges: str = "*", direction: str = "forward", max_edges: int = 0, return_types: str = "*", + max_matching_nodes: int = 0, ) -> Iterator[str]: # TODO: max_edges - yield from filter_node_types( + leaves = filter_node_types( return_types, [ node for node in self.graph.get_subgraph(src, edges, direction) if not self.graph.get_filtered_neighbors(node, edges, direction) ], ) + if max_matching_nodes > 0: + leaves = itertools.islice(leaves, max_matching_nodes) + + return leaves + @check_arguments def neighbors( self, src: str, edges: str = "*", direction: str = "forward", max_edges: int = 0, return_types: str = "*", ) -> Iterator[str]: # TODO: max_edges yield from filter_node_types( return_types, self.graph.get_filtered_neighbors(src, edges, direction) ) @check_arguments def visit_nodes( self, src: str, edges: str = "*", direction: str = "forward", max_edges: int = 0, return_types: str = "*", ) -> Iterator[str]: # TODO: max_edges yield from filter_node_types( return_types, self.graph.get_subgraph(src, edges, direction) ) @check_arguments def visit_edges( self, src: str, edges: str = "*", direction: str = "forward", max_edges: int = 0 ) -> Iterator[Tuple[str, str]]: if max_edges == 0: max_edges = None # type: ignore else: max_edges -= 1 yield from list(self.graph.iter_edges_dfs(direction, edges, src))[:max_edges] @check_arguments def visit_paths( self, src: str, edges: str = "*", direction: str = "forward", max_edges: int = 0 ) -> Iterator[List[str]]: # TODO: max_edges for path in self.graph.iter_paths_dfs(direction, edges, src): if path[-1] in self.leaves(src, edges, direction): yield list(path) @check_arguments def walk( self, src: str, dst: str, edges: str = "*", traversal: str = "dfs", direction: str = "forward", limit: Optional[int] = None, ) -> Iterator[str]: # TODO: implement algo="bfs" # TODO: limit match_path: Callable[[str], bool] if ":" in dst: match_path = dst.__eq__ self._check_swhid(dst) else: match_path = lambda node: node.startswith(f"swh:1:{dst}:") # noqa for path in self.graph.iter_paths_dfs(direction, edges, src): if match_path(path[-1]): if not limit: # 0 or None yield from path elif limit > 0: yield from path[0:limit] else: yield from path[limit:] @check_arguments def random_walk( self, src: str, dst: str, edges: str = "*", direction: str = "forward", limit: Optional[int] = None, ): # TODO: limit yield from self.walk(src, dst, edges, "dfs", direction, limit) @check_arguments def count_leaves( - self, src: str, edges: str = "*", direction: str = "forward" + self, + src: str, + edges: str = "*", + direction: str = "forward", + max_matching_nodes: int = 0, ) -> int: - return len(list(self.leaves(src, edges, direction))) + return len( + list( + self.leaves( + src, edges, direction, max_matching_nodes=max_matching_nodes + ) + ) + ) @check_arguments def count_neighbors( self, src: str, edges: str = "*", direction: str = "forward" ) -> int: return len(self.graph.get_filtered_neighbors(src, edges, direction)) @check_arguments def count_visit_nodes( self, src: str, edges: str = "*", direction: str = "forward" ) -> int: return len(self.graph.get_subgraph(src, edges, direction)) class Graph: def __init__( self, nodes: List[SWHIDlike], edges: List[Tuple[SWHIDlike, SWHIDlike]] ): self.nodes = [str(node) for node in nodes] self.forward_edges: Dict[str, List[str]] = {} self.backward_edges: Dict[str, List[str]] = {} for node in nodes: self.forward_edges[str(node)] = [] self.backward_edges[str(node)] = [] for (src, dst) in edges: self.forward_edges[str(src)].append(str(dst)) self.backward_edges[str(dst)].append(str(src)) def get_filtered_neighbors( self, src: str, edges_fmt: str, direction: str, ) -> Set[str]: if direction == "forward": edges = self.forward_edges elif direction == "backward": edges = self.backward_edges else: raise GraphArgumentException(f"invalid direction: {direction}") neighbors = edges.get(src, []) if edges_fmt == "*": return set(neighbors) else: filtered_neighbors: Set[str] = set() for edges_fmt_item in edges_fmt.split(","): (src_fmt, dst_fmt) = edges_fmt_item.split(":") if src_fmt != "*" and not src.startswith(f"swh:1:{src_fmt}:"): continue if dst_fmt == "*": filtered_neighbors.update(neighbors) else: prefix = f"swh:1:{dst_fmt}:" filtered_neighbors.update( n for n in neighbors if n.startswith(prefix) ) return filtered_neighbors def get_subgraph(self, src: str, edges_fmt: str, direction: str) -> Set[str]: seen = set() to_visit = {src} while to_visit: node = to_visit.pop() seen.add(node) neighbors = set(self.get_filtered_neighbors(node, edges_fmt, direction)) new_nodes = neighbors - seen to_visit.update(new_nodes) return seen def iter_paths_dfs( self, direction: str, edges_fmt: str, src: str ) -> Iterator[Tuple[str, ...]]: for (path, node) in DfsSubgraphIterator(self, direction, edges_fmt, src): yield path + (node,) def iter_edges_dfs( self, direction: str, edges_fmt: str, src: str ) -> Iterator[Tuple[str, str]]: for (path, node) in DfsSubgraphIterator(self, direction, edges_fmt, src): if len(path) > 0: yield (path[-1], node) class SubgraphIterator(Iterator[Tuple[Tuple[str, ...], str]]): def __init__(self, graph: Graph, direction: str, edges_fmt: str, src: str): self.graph = graph self.direction = direction self.edges_fmt = edges_fmt self.seen: Set[str] = set() self.src = src def more_work(self) -> bool: raise NotImplementedError() def pop(self) -> Tuple[Tuple[str, ...], str]: raise NotImplementedError() def push(self, new_path: Tuple[str, ...], neighbor: str) -> None: raise NotImplementedError() def __next__(self) -> Tuple[Tuple[str, ...], str]: # Stores (path, next_node) if not self.more_work(): raise StopIteration() (path, node) = self.pop() new_path = path + (node,) if node not in self.seen: neighbors = self.graph.get_filtered_neighbors( node, self.edges_fmt, self.direction ) # We want to visit the first neighbor first, and to_visit is a stack; # so we need to reversed() the list of neighbors to get it on top # of the stack. for neighbor in reversed(list(neighbors)): self.push(new_path, neighbor) self.seen.add(node) return (path, node) class DfsSubgraphIterator(SubgraphIterator): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.to_visit: List[Tuple[Tuple[str, ...], str]] = [((), self.src)] def more_work(self) -> bool: return bool(self.to_visit) def pop(self) -> Tuple[Tuple[str, ...], str]: return self.to_visit.pop() def push(self, new_path: Tuple[str, ...], neighbor: str) -> None: self.to_visit.append((new_path, neighbor)) diff --git a/swh/graph/http_rpc_server.py b/swh/graph/http_rpc_server.py index 658a4ea..fc617f0 100644 --- a/swh/graph/http_rpc_server.py +++ b/swh/graph/http_rpc_server.py @@ -1,415 +1,419 @@ # Copyright (C) 2019-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information """ A proxy HTTP server for swh-graph, talking to the Java code via py4j, and using FIFO as a transport to stream integers between the two languages. """ import json import logging import os from typing import Optional import aiohttp.test_utils import aiohttp.web from google.protobuf import json_format from google.protobuf.field_mask_pb2 import FieldMask import grpc from swh.core.api.asynchronous import RPCServerApp from swh.core.config import read as config_read from swh.graph.grpc.swhgraph_pb2 import ( GetNodeRequest, NodeFilter, StatsRequest, TraversalRequest, ) from swh.graph.grpc.swhgraph_pb2_grpc import TraversalServiceStub from swh.graph.grpc_server import spawn_java_grpc_server, stop_java_grpc_server from swh.model.swhids import EXTENDED_SWHID_TYPES try: from contextlib import asynccontextmanager except ImportError: # Compatibility with 3.6 backport from async_generator import asynccontextmanager # type: ignore # maximum number of retries for random walks RANDOM_RETRIES = 10 # TODO make this configurable via rpc-serve configuration logger = logging.getLogger(__name__) async def _aiorpcerror_middleware(app, handler): async def middleware_handler(request): try: return await handler(request) except grpc.aio.AioRpcError as e: # The default error handler of the RPC framework tries to serialize this # with msgpack; which for some unknown reason causes it to raise # ValueError("recursion limit exceeded") with a lot of context, causing # Sentry to be overflowed with gigabytes of logs (160KB per event, with # potentially hundreds of thousands of events per day). # Instead, we simply serialize the exception to a string. # https://sentry.softwareheritage.org/share/issue/d6d4db971e4b47728a6c1dd06cb9b8a5/ raise aiohttp.web.HTTPServiceUnavailable(text=str(e)) return middleware_handler class GraphServerApp(RPCServerApp): def __init__(self, *args, middlewares=(), **kwargs): middlewares = (_aiorpcerror_middleware,) + middlewares super().__init__(*args, middlewares=middlewares, **kwargs) self.on_startup.append(self._start) self.on_shutdown.append(self._stop) @staticmethod async def _start(app): app["channel"] = grpc.aio.insecure_channel(app["rpc_url"]) await app["channel"].__aenter__() app["rpc_client"] = TraversalServiceStub(app["channel"]) await app["rpc_client"].Stats(StatsRequest(), wait_for_ready=True) @staticmethod async def _stop(app): await app["channel"].__aexit__(None, None, None) if app.get("local_server"): stop_java_grpc_server(app["local_server"]) async def index(request): return aiohttp.web.Response( content_type="text/html", body=""" Software Heritage graph server

You have reached the Software Heritage graph API server.

See its API documentation for more information.

""", ) class GraphView(aiohttp.web.View): """Base class for views working on the graph, with utility functions""" def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.rpc_client: TraversalServiceStub = self.request.app["rpc_client"] def get_direction(self): """Validate HTTP query parameter `direction`""" s = self.request.query.get("direction", "forward") if s not in ("forward", "backward"): raise aiohttp.web.HTTPBadRequest(text=f"invalid direction: {s}") return s.upper() def get_edges(self): """Validate HTTP query parameter `edges`, i.e., edge restrictions""" s = self.request.query.get("edges", "*") if any( [ node_type != "*" and node_type not in EXTENDED_SWHID_TYPES for edge in s.split(":") for node_type in edge.split(",", maxsplit=1) ] ): raise aiohttp.web.HTTPBadRequest(text=f"invalid edge restriction: {s}") return s def get_return_types(self): """Validate HTTP query parameter 'return types', i.e, a set of types which we will filter the query results with""" s = self.request.query.get("return_types", "*") if any( node_type != "*" and node_type not in EXTENDED_SWHID_TYPES for node_type in s.split(",") ): raise aiohttp.web.HTTPBadRequest( text=f"invalid type for filtering res: {s}" ) # if the user puts a star, # then we filter nothing, we don't need the other information if "*" in s: return "*" else: return s - def get_limit(self): - """Validate HTTP query parameter `limit`, i.e., number of results""" - s = self.request.query.get("limit", "0") + def get_max_matching_nodes(self): + """Validate HTTP query parameter `max_matching_nodes`, i.e., number of results""" + s = self.request.query.get("max_matching_nodes", "0") try: return int(s) except ValueError: - raise aiohttp.web.HTTPBadRequest(text=f"invalid limit value: {s}") + raise aiohttp.web.HTTPBadRequest( + text=f"invalid max_matching_nodes value: {s}" + ) def get_max_edges(self): """Validate HTTP query parameter 'max_edges', i.e., the limit of the number of edges that can be visited""" s = self.request.query.get("max_edges", "0") try: return int(s) except ValueError: raise aiohttp.web.HTTPBadRequest(text=f"invalid max_edges value: {s}") async def check_swhid(self, swhid): """Validate that the given SWHID exists in the graph""" try: await self.rpc_client.GetNode( GetNodeRequest(swhid=swhid, mask=FieldMask(paths=["swhid"])) ) except grpc.aio.AioRpcError as e: if e.code() == grpc.StatusCode.INVALID_ARGUMENT: raise aiohttp.web.HTTPBadRequest(text=str(e.details())) class StreamingGraphView(GraphView): """Base class for views streaming their response line by line.""" content_type = "text/plain" @asynccontextmanager async def response_streamer(self, *args, **kwargs): """Context manager to prepare then close a StreamResponse""" response = aiohttp.web.StreamResponse(*args, **kwargs) response.content_type = self.content_type await response.prepare(self.request) yield response await response.write_eof() async def get(self): await self.prepare_response() async with self.response_streamer() as self.response_stream: self._buf = [] try: await self.stream_response() finally: await self._flush_buffer() return self.response_stream async def prepare_response(self): """This can be overridden with some setup to be run before the response actually starts streaming. """ pass async def stream_response(self): """Override this to perform the response streaming. Implementations of this should await self.stream_line(line) to write each line. """ raise NotImplementedError async def stream_line(self, line): """Write a line in the response stream.""" self._buf.append(line) if len(self._buf) > 100: await self._flush_buffer() async def _flush_buffer(self): await self.response_stream.write("\n".join(self._buf).encode() + b"\n") self._buf = [] class StatsView(GraphView): """View showing some statistics on the graph""" async def get(self): res = await self.rpc_client.Stats(StatsRequest()) stats = json_format.MessageToDict( res, including_default_value_fields=True, preserving_proto_field_name=True ) # Int64 fields are serialized as strings by default. for descriptor in res.DESCRIPTOR.fields: if descriptor.type == descriptor.TYPE_INT64: try: stats[descriptor.name] = int(stats[descriptor.name]) except KeyError: pass json_body = json.dumps(stats, indent=4, sort_keys=True) return aiohttp.web.Response(body=json_body, content_type="application/json") class SimpleTraversalView(StreamingGraphView): """Base class for views of simple traversals""" async def prepare_response(self): src = self.request.match_info["src"] self.traversal_request = TraversalRequest( src=[src], edges=self.get_edges(), direction=self.get_direction(), return_nodes=NodeFilter(types=self.get_return_types()), mask=FieldMask(paths=["swhid"]), + max_matching_nodes=self.get_max_matching_nodes(), ) if self.get_max_edges(): self.traversal_request.max_edges = self.get_max_edges() await self.check_swhid(src) self.configure_request() self.nodes_stream = self.rpc_client.Traverse(self.traversal_request) # Force gRPC to query the server and fetch the first nodes; so errors # are raised early, so we can return HTTP 503 before HTTP 200 await self.nodes_stream.wait_for_connection() def configure_request(self): pass async def stream_response(self): async for node in self.nodes_stream: await self.stream_line(node.swhid) class LeavesView(SimpleTraversalView): def configure_request(self): self.traversal_request.return_nodes.max_traversal_successors = 0 class NeighborsView(SimpleTraversalView): def configure_request(self): self.traversal_request.min_depth = 1 self.traversal_request.max_depth = 1 class VisitNodesView(SimpleTraversalView): pass class VisitEdgesView(SimpleTraversalView): def configure_request(self): self.traversal_request.mask.paths.extend(["successor", "successor.swhid"]) # self.traversal_request.return_fields.successor = True async def stream_response(self): async for node in self.rpc_client.Traverse(self.traversal_request): for succ in node.successor: await self.stream_line(node.swhid + " " + succ.swhid) class CountView(GraphView): """Base class for counting views.""" count_type: Optional[str] = None async def get(self): src = self.request.match_info["src"] self.traversal_request = TraversalRequest( src=[src], edges=self.get_edges(), direction=self.get_direction(), return_nodes=NodeFilter(types=self.get_return_types()), mask=FieldMask(paths=["swhid"]), + max_matching_nodes=self.get_max_matching_nodes(), ) if self.get_max_edges(): self.traversal_request.max_edges = self.get_max_edges() self.configure_request() res = await self.rpc_client.CountNodes(self.traversal_request) return aiohttp.web.Response( body=str(res.count), content_type="application/json" ) def configure_request(self): pass class CountNeighborsView(CountView): def configure_request(self): self.traversal_request.min_depth = 1 self.traversal_request.max_depth = 1 class CountLeavesView(CountView): def configure_request(self): self.traversal_request.return_nodes.max_traversal_successors = 0 class CountVisitNodesView(CountView): pass def make_app(config=None): """Create an aiohttp server for the HTTP RPC frontend to the swh-graph API. It may either connect to an existing grpc server (cls="remote") or spawn a local grpc server (cls="local"). ``config`` is expected to be a dict like:: graph: cls: "local" grpc_server: port: 50091 http_rpc_server: debug: true or:: graph: cls: "remote" url: "localhost:50091" http_rpc_server: debug: true See: - :mod:`swh.graph.grpc_server` for more details of the content of the grpc_server section, - :class:`~.GraphServerApp` class for more details of the content of the http_rpc_server section. """ if config is None: config = {} if "graph" not in config: logger.info( "Missing 'graph' configuration; default to a locally spawn" "grpc server listening on 0.0.0.0:50091" ) cfg = {"cls": "local", "grpc_server": {"port": 50091}} else: cfg = config["graph"].copy() cls = cfg.pop("cls") grpc_cfg = cfg.pop("grpc_server", {}) app = GraphServerApp(**cfg.get("http_rpc_server", {})) if cls == "remote": if "url" not in cfg: raise KeyError("Missing 'url' configuration entry in the [graph] section") rpc_url = cfg["url"] elif cls == "local": app["local_server"], port = spawn_java_grpc_server(**grpc_cfg) rpc_url = f"localhost:{port}" else: raise ValueError(f"Unknown swh.graph class cls={cls}") app.add_routes( [ aiohttp.web.get("/", index), aiohttp.web.get("/graph", index), aiohttp.web.view("/graph/stats", StatsView), aiohttp.web.view("/graph/leaves/{src}", LeavesView), aiohttp.web.view("/graph/neighbors/{src}", NeighborsView), aiohttp.web.view("/graph/visit/nodes/{src}", VisitNodesView), aiohttp.web.view("/graph/visit/edges/{src}", VisitEdgesView), aiohttp.web.view("/graph/neighbors/count/{src}", CountNeighborsView), aiohttp.web.view("/graph/leaves/count/{src}", CountLeavesView), aiohttp.web.view("/graph/visit/nodes/count/{src}", CountVisitNodesView), ] ) app["rpc_url"] = rpc_url return app def make_app_from_configfile(): """Load configuration and then build application to run""" config_file = os.environ.get("SWH_CONFIG_FILENAME") config = config_read(config_file) return make_app(config=config) diff --git a/swh/graph/tests/test_http_client.py b/swh/graph/tests/test_http_client.py index 21021b3..1878029 100644 --- a/swh/graph/tests/test_http_client.py +++ b/swh/graph/tests/test_http_client.py @@ -1,378 +1,408 @@ # Copyright (c) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import hashlib import pytest from pytest import raises from swh.core.api import RemoteException from swh.graph.http_client import GraphArgumentException TEST_ORIGIN_ID = "swh:1:ori:{}".format( hashlib.sha1(b"https://example.com/swh/graph").hexdigest() ) def test_stats(graph_client): stats = graph_client.stats() assert stats["num_nodes"] == 21 assert stats["num_edges"] == 23 assert isinstance(stats["compression_ratio"], float) assert isinstance(stats["bits_per_node"], float) assert isinstance(stats["bits_per_edge"], float) assert isinstance(stats["avg_locality"], float) assert stats["indegree_min"] == 0 assert stats["indegree_max"] == 3 assert isinstance(stats["indegree_avg"], float) assert stats["outdegree_min"] == 0 assert stats["outdegree_max"] == 3 assert isinstance(stats["outdegree_avg"], float) def test_leaves(graph_client): actual = list(graph_client.leaves(TEST_ORIGIN_ID)) expected = [ "swh:1:cnt:0000000000000000000000000000000000000001", "swh:1:cnt:0000000000000000000000000000000000000004", "swh:1:cnt:0000000000000000000000000000000000000005", "swh:1:cnt:0000000000000000000000000000000000000007", ] assert set(actual) == set(expected) +@pytest.mark.parametrize("max_matching_nodes", [0, 1, 2, 3, 4, 5, 10, 1 << 31]) +def test_leaves_with_limit(graph_client, max_matching_nodes): + actual = list( + graph_client.leaves(TEST_ORIGIN_ID, max_matching_nodes=max_matching_nodes) + ) + expected = [ + "swh:1:cnt:0000000000000000000000000000000000000001", + "swh:1:cnt:0000000000000000000000000000000000000004", + "swh:1:cnt:0000000000000000000000000000000000000005", + "swh:1:cnt:0000000000000000000000000000000000000007", + ] + + if max_matching_nodes == 0: + assert set(actual) == set(expected) + else: + assert set(actual) <= set(expected) + assert len(actual) == min(4, max_matching_nodes) + + def test_neighbors(graph_client): actual = list( graph_client.neighbors( "swh:1:rev:0000000000000000000000000000000000000009", direction="backward" ) ) expected = [ "swh:1:snp:0000000000000000000000000000000000000020", "swh:1:rel:0000000000000000000000000000000000000010", "swh:1:rev:0000000000000000000000000000000000000013", ] assert set(actual) == set(expected) def test_visit_nodes(graph_client): actual = list( graph_client.visit_nodes( "swh:1:rel:0000000000000000000000000000000000000010", edges="rel:rev,rev:rev", ) ) expected = [ "swh:1:rel:0000000000000000000000000000000000000010", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:rev:0000000000000000000000000000000000000003", ] assert set(actual) == set(expected) def test_visit_nodes_filtered(graph_client): actual = list( graph_client.visit_nodes( "swh:1:rel:0000000000000000000000000000000000000010", return_types="dir", ) ) expected = [ "swh:1:dir:0000000000000000000000000000000000000002", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:dir:0000000000000000000000000000000000000006", ] assert set(actual) == set(expected) def test_visit_nodes_filtered_star(graph_client): actual = list( graph_client.visit_nodes( "swh:1:rel:0000000000000000000000000000000000000010", return_types="*", ) ) expected = [ "swh:1:rel:0000000000000000000000000000000000000010", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:rev:0000000000000000000000000000000000000003", "swh:1:dir:0000000000000000000000000000000000000002", "swh:1:cnt:0000000000000000000000000000000000000001", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:cnt:0000000000000000000000000000000000000007", "swh:1:dir:0000000000000000000000000000000000000006", "swh:1:cnt:0000000000000000000000000000000000000004", "swh:1:cnt:0000000000000000000000000000000000000005", ] assert set(actual) == set(expected) def test_visit_edges(graph_client): actual = list( graph_client.visit_edges( "swh:1:rel:0000000000000000000000000000000000000010", edges="rel:rev,rev:rev,rev:dir", ) ) expected = [ ( "swh:1:rel:0000000000000000000000000000000000000010", "swh:1:rev:0000000000000000000000000000000000000009", ), ( "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:rev:0000000000000000000000000000000000000003", ), ( "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:dir:0000000000000000000000000000000000000008", ), ( "swh:1:rev:0000000000000000000000000000000000000003", "swh:1:dir:0000000000000000000000000000000000000002", ), ] assert set(actual) == set(expected) def test_visit_edges_limited(graph_client): actual = list( graph_client.visit_edges( "swh:1:rel:0000000000000000000000000000000000000010", max_edges=4, edges="rel:rev,rev:rev,rev:dir", ) ) expected = [ ( "swh:1:rel:0000000000000000000000000000000000000010", "swh:1:rev:0000000000000000000000000000000000000009", ), ( "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:rev:0000000000000000000000000000000000000003", ), ( "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:dir:0000000000000000000000000000000000000008", ), ( "swh:1:rev:0000000000000000000000000000000000000003", "swh:1:dir:0000000000000000000000000000000000000002", ), ] # As there are four valid answers (up to reordering), we cannot check for # equality. Instead, we check the client returned all edges but one. assert set(actual).issubset(set(expected)) assert len(actual) == 3 def test_visit_edges_diamond_pattern(graph_client): actual = list( graph_client.visit_edges( "swh:1:rev:0000000000000000000000000000000000000009", edges="*", ) ) expected = [ ( "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:rev:0000000000000000000000000000000000000003", ), ( "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:dir:0000000000000000000000000000000000000008", ), ( "swh:1:rev:0000000000000000000000000000000000000003", "swh:1:dir:0000000000000000000000000000000000000002", ), ( "swh:1:dir:0000000000000000000000000000000000000002", "swh:1:cnt:0000000000000000000000000000000000000001", ), ( "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:cnt:0000000000000000000000000000000000000001", ), ( "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:cnt:0000000000000000000000000000000000000007", ), ( "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:dir:0000000000000000000000000000000000000006", ), ( "swh:1:dir:0000000000000000000000000000000000000006", "swh:1:cnt:0000000000000000000000000000000000000004", ), ( "swh:1:dir:0000000000000000000000000000000000000006", "swh:1:cnt:0000000000000000000000000000000000000005", ), ] assert set(actual) == set(expected) @pytest.mark.skip(reason="currently disabled due to T1969") def test_walk(graph_client): args = ("swh:1:dir:0000000000000000000000000000000000000016", "rel") kwargs = { "edges": "dir:dir,dir:rev,rev:*", "direction": "backward", "traversal": "bfs", } actual = list(graph_client.walk(*args, **kwargs)) expected = [ "swh:1:dir:0000000000000000000000000000000000000016", "swh:1:dir:0000000000000000000000000000000000000017", "swh:1:rev:0000000000000000000000000000000000000018", "swh:1:rel:0000000000000000000000000000000000000019", ] assert set(actual) == set(expected) kwargs2 = kwargs.copy() kwargs2["limit"] = -1 actual = list(graph_client.walk(*args, **kwargs2)) expected = ["swh:1:rel:0000000000000000000000000000000000000019"] assert set(actual) == set(expected) kwargs2 = kwargs.copy() kwargs2["limit"] = 2 actual = list(graph_client.walk(*args, **kwargs2)) expected = [ "swh:1:dir:0000000000000000000000000000000000000016", "swh:1:dir:0000000000000000000000000000000000000017", ] assert set(actual) == set(expected) @pytest.mark.skip(reason="Random walk is deprecated") def test_random_walk_dst_is_type(graph_client): """as the walk is random, we test a visit from a cnt node to a release reachable from every single path in the backward graph, and only check the final node of the path (i.e., the release) """ args = ("swh:1:cnt:0000000000000000000000000000000000000015", "rel") kwargs = {"direction": "backward"} expected_root = "swh:1:rel:0000000000000000000000000000000000000019" actual = list(graph_client.random_walk(*args, **kwargs)) assert len(actual) > 1 # no release directly links to a content assert actual[0] == args[0] assert actual[-1] == expected_root kwargs2 = kwargs.copy() kwargs2["limit"] = -1 actual = list(graph_client.random_walk(*args, **kwargs2)) assert actual == [expected_root] kwargs2["limit"] = -2 actual = list(graph_client.random_walk(*args, **kwargs2)) assert len(actual) == 2 assert actual[-1] == expected_root kwargs2["limit"] = 3 actual = list(graph_client.random_walk(*args, **kwargs2)) assert len(actual) == 3 @pytest.mark.skip(reason="Random walk is deprecated") def test_random_walk_dst_is_node(graph_client): """Same as test_random_walk_dst_is_type, but we target the specific release node instead of a type """ args = ( "swh:1:cnt:0000000000000000000000000000000000000015", "swh:1:rel:0000000000000000000000000000000000000019", ) kwargs = {"direction": "backward"} expected_root = "swh:1:rel:0000000000000000000000000000000000000019" actual = list(graph_client.random_walk(*args, **kwargs)) assert len(actual) > 1 # no origin directly links to a content assert actual[0] == args[0] assert actual[-1] == expected_root kwargs2 = kwargs.copy() kwargs2["limit"] = -1 actual = list(graph_client.random_walk(*args, **kwargs2)) assert actual == [expected_root] kwargs2["limit"] = -2 actual = list(graph_client.random_walk(*args, **kwargs2)) assert len(actual) == 2 assert actual[-1] == expected_root kwargs2["limit"] = 3 actual = list(graph_client.random_walk(*args, **kwargs2)) assert len(actual) == 3 def test_count(graph_client): actual = graph_client.count_leaves(TEST_ORIGIN_ID) assert actual == 4 actual = graph_client.count_visit_nodes( "swh:1:rel:0000000000000000000000000000000000000010", edges="rel:rev,rev:rev" ) assert actual == 3 actual = graph_client.count_neighbors( "swh:1:rev:0000000000000000000000000000000000000009", direction="backward" ) assert actual == 3 +@pytest.mark.parametrize("max_matching_nodes", [0, 1, 2, 3, 4, 5, 10, 1 << 31]) +def test_count_with_limit(graph_client, max_matching_nodes): + actual = graph_client.count_leaves( + TEST_ORIGIN_ID, max_matching_nodes=max_matching_nodes + ) + if max_matching_nodes == 0: + assert actual == 4 + else: + assert actual == min(4, max_matching_nodes) + + def test_param_validation(graph_client): with raises(GraphArgumentException) as exc_info: # SWHID not found list(graph_client.leaves("swh:1:rel:00ffffffff000000000000000000000000000010")) if exc_info.value.response: assert exc_info.value.response.status_code == 404 with raises(GraphArgumentException) as exc_info: # malformed SWHID list( graph_client.neighbors("swh:1:rel:00ffffffff00000000zzzzzzz000000000000010") ) if exc_info.value.response: assert exc_info.value.response.status_code == 400 with raises(GraphArgumentException) as exc_info: # malformed edge specificaiton list( graph_client.visit_nodes( "swh:1:dir:0000000000000000000000000000000000000016", edges="dir:notanodetype,dir:rev,rev:*", direction="backward", ) ) if exc_info.value.response: assert exc_info.value.response.status_code == 400 with raises(GraphArgumentException) as exc_info: # malformed direction list( graph_client.visit_nodes( "swh:1:dir:0000000000000000000000000000000000000016", edges="dir:dir,dir:rev,rev:*", direction="notadirection", ) ) if exc_info.value.response: assert exc_info.value.response.status_code == 400 @pytest.mark.skip(reason="currently disabled due to T1969") def test_param_validation_walk(graph_client): """test validation of walk-specific parameters only""" with raises(RemoteException) as exc_info: # malformed traversal order list( graph_client.walk( "swh:1:dir:0000000000000000000000000000000000000016", "rel", edges="dir:dir,dir:rev,rev:*", direction="backward", traversal="notatraversalorder", ) ) assert exc_info.value.response.status_code == 400