diff --git a/java/src/main/java/org/softwareheritage/graph/SwhBidirectionalGraph.java b/java/src/main/java/org/softwareheritage/graph/SwhBidirectionalGraph.java index 824a389..7e70f9e 100644 --- a/java/src/main/java/org/softwareheritage/graph/SwhBidirectionalGraph.java +++ b/java/src/main/java/org/softwareheritage/graph/SwhBidirectionalGraph.java @@ -1,180 +1,180 @@ package org.softwareheritage.graph; import it.unimi.dsi.big.webgraph.labelling.ArcLabelledNodeIterator; import it.unimi.dsi.big.webgraph.BidirectionalImmutableGraph; import it.unimi.dsi.logging.ProgressLogger; import java.io.IOException; import java.io.InputStream; /** * Class representing the compressed Software Heritage graph in both directions (forward and * backward). * * This class uses the {@link BidirectionalImmutableGraph} class internally to implement the * backward equivalent of graph operations ({@link SwhBidirectionalGraph#indegree(long)}, * {@link SwhBidirectionalGraph#predecessors(long)}, etc.) by holding a reference to two * {@link SwhUnidirectionalGraph} (a forward graph and a backward graph). * * Both graphs share their graph properties in memory by storing references to the same * {@link SwhGraphProperties} object. * *
  *                 ┌──────────────┐
  *                 │ImmutableGraph◄────────┐
  *                 └────▲─────────┘        │extends
  *                      │                  │
  *                      │       ┌──────────┴────────────────┐
  *               extends│       │BidirectionalImmutableGraph│
  *                      │       └────────────▲──────────────┘
  *                      │                    │extends
  *       ┌──────────────┴───────┐     ┌──────┴──────────────┐
  *       │SwhUnidirectionalGraph│◄────┤SwhBidirectionalGraph│
  *       └──┬──────────────┬────┘     └────────┬───────────┬┘
  *          │              │    contains x2    │           │
  *          │              │                   │           │
  *          │    implements│                   │implements │
  *          │             ┌▼──────────┐        │           │
  *          │             │SwhGraph(I)◄────────┘           │
  * contains │             └───────────┘                    │contains
  *          │                                              │
  *          │            ┌──────────────────┐              │
  *          └────────────►SwhGraphProperties◄──────────────┘
  *                       └──────────────────┘
  * 
* * @author The Software Heritage developers * @see SwhUnidirectionalGraph */ public class SwhBidirectionalGraph extends BidirectionalImmutableGraph implements SwhGraph { /** Property data of the graph (id/type mappings etc.) */ public final SwhGraphProperties properties; private final SwhUnidirectionalGraph forwardGraph; private final SwhUnidirectionalGraph backwardGraph; public SwhBidirectionalGraph(SwhUnidirectionalGraph forwardGraph, SwhUnidirectionalGraph backwardGraph, SwhGraphProperties properties) { super(forwardGraph, backwardGraph); this.forwardGraph = forwardGraph; this.backwardGraph = backwardGraph; this.properties = properties; } private SwhBidirectionalGraph(BidirectionalImmutableGraph graph, SwhGraphProperties properties) { super(graph.forward, graph.backward); - this.forwardGraph = (SwhUnidirectionalGraph) graph.forward; - this.backwardGraph = (SwhUnidirectionalGraph) graph.backward; + this.forwardGraph = new SwhUnidirectionalGraph(graph.forward, properties); + this.backwardGraph = new SwhUnidirectionalGraph(graph.backward, properties); this.properties = properties; } public static SwhBidirectionalGraph load(LoadMethod method, String path, InputStream is, ProgressLogger pl) throws IOException { SwhUnidirectionalGraph forward = SwhUnidirectionalGraph.loadGraphOnly(method, path, is, pl); SwhUnidirectionalGraph backward = SwhUnidirectionalGraph.loadGraphOnly(method, path + "-transposed", is, pl); SwhGraphProperties properties = SwhGraphProperties.load(path); forward.setProperties(properties); backward.setProperties(properties); return new SwhBidirectionalGraph(forward, backward, properties); } public static SwhBidirectionalGraph loadLabelled(LoadMethod method, String path, InputStream is, ProgressLogger pl) throws IOException { SwhUnidirectionalGraph forward = SwhUnidirectionalGraph.loadLabelledGraphOnly(method, path, is, pl); SwhUnidirectionalGraph backward = SwhUnidirectionalGraph.loadLabelledGraphOnly(method, path + "-transposed", is, pl); SwhGraphProperties properties = SwhGraphProperties.load(path); forward.setProperties(properties); backward.setProperties(properties); return new SwhBidirectionalGraph(forward, backward, properties); } // loadXXX methods from ImmutableGraph public static SwhBidirectionalGraph load(String path, ProgressLogger pl) throws IOException { return load(LoadMethod.STANDARD, path, null, pl); } public static SwhBidirectionalGraph load(String path) throws IOException { return load(LoadMethod.STANDARD, path, null, null); } public static SwhBidirectionalGraph loadMapped(String path, ProgressLogger pl) throws IOException { return load(LoadMethod.MAPPED, path, null, pl); } public static SwhBidirectionalGraph loadMapped(String path) throws IOException { return load(LoadMethod.MAPPED, path, null, null); } public static SwhBidirectionalGraph loadOffline(String path, ProgressLogger pl) throws IOException { return load(LoadMethod.OFFLINE, path, null, pl); } public static SwhBidirectionalGraph loadOffline(String path) throws IOException { return load(LoadMethod.OFFLINE, path, null, null); } // Labelled versions of the loadXXX methods from ImmutableGraph public static SwhBidirectionalGraph loadLabelled(String path, ProgressLogger pl) throws IOException { return loadLabelled(LoadMethod.STANDARD, path, null, pl); } public static SwhBidirectionalGraph loadLabelled(String path) throws IOException { return loadLabelled(LoadMethod.STANDARD, path, null, null); } public static SwhBidirectionalGraph loadLabelledMapped(String path, ProgressLogger pl) throws IOException { return loadLabelled(LoadMethod.MAPPED, path, null, pl); } public static SwhBidirectionalGraph loadLabelledMapped(String path) throws IOException { return loadLabelled(LoadMethod.MAPPED, path, null, null); } public static SwhBidirectionalGraph loadLabelledOffline(String path, ProgressLogger pl) throws IOException { return loadLabelled(LoadMethod.OFFLINE, path, null, pl); } public static SwhBidirectionalGraph loadLabelledOffline(String path) throws IOException { return loadLabelled(LoadMethod.OFFLINE, path, null, null); } @Override public SwhBidirectionalGraph copy() { return new SwhBidirectionalGraph(forwardGraph, backwardGraph, this.properties); } @Override public SwhBidirectionalGraph transpose() { return new SwhBidirectionalGraph(super.transpose(), this.properties); } @Override public SwhBidirectionalGraph symmetrize() { return new SwhBidirectionalGraph(super.symmetrize(), this.properties); } public SwhUnidirectionalGraph getForwardGraph() { return this.forwardGraph; } public SwhUnidirectionalGraph getBackwardGraph() { return this.backwardGraph; } /** * Returns a *labelled* lazy iterator over the successors of a given node. The iteration terminates * when -1 is returned. */ public ArcLabelledNodeIterator.LabelledArcIterator labelledSuccessors(long x) { return forwardGraph.labelledSuccessors(x); } /** * Returns a *labelled* lazy iterator over the predecessors of a given node. The iteration * terminates when -1 is returned. */ public ArcLabelledNodeIterator.LabelledArcIterator labelledPredecessors(long x) { return backwardGraph.labelledSuccessors(x); } public void close() throws IOException { this.properties.close(); } @Override public SwhGraphProperties getProperties() { return properties; } } diff --git a/java/src/main/java/org/softwareheritage/graph/rpc/Traversal.java b/java/src/main/java/org/softwareheritage/graph/rpc/Traversal.java index 5f50596..58aaf91 100644 --- a/java/src/main/java/org/softwareheritage/graph/rpc/Traversal.java +++ b/java/src/main/java/org/softwareheritage/graph/rpc/Traversal.java @@ -1,426 +1,429 @@ package org.softwareheritage.graph.rpc; import it.unimi.dsi.big.webgraph.labelling.ArcLabelledNodeIterator; import it.unimi.dsi.big.webgraph.labelling.Label; import org.softwareheritage.graph.*; import java.util.*; public class Traversal { private static ArcLabelledNodeIterator.LabelledArcIterator filterLabelledSuccessors(SwhUnidirectionalGraph g, long nodeId, AllowedEdges allowedEdges) { if (allowedEdges.restrictedTo == null) { // All edges are allowed, bypass edge check return g.labelledSuccessors(nodeId); } else { ArcLabelledNodeIterator.LabelledArcIterator allSuccessors = g.labelledSuccessors(nodeId); return new ArcLabelledNodeIterator.LabelledArcIterator() { @Override public Label label() { return allSuccessors.label(); } @Override public long nextLong() { long neighbor; while ((neighbor = allSuccessors.nextLong()) != -1) { if (allowedEdges.isAllowed(g.getNodeType(nodeId), g.getNodeType(neighbor))) { return neighbor; } } return -1; } @Override public long skip(final long n) { long i = 0; while (i < n && nextLong() != -1) i++; return i; } }; } } private static class NodeFilterChecker { private final SwhUnidirectionalGraph g; private final NodeFilter filter; private final AllowedNodes allowedNodes; private NodeFilterChecker(SwhUnidirectionalGraph graph, NodeFilter filter) { this.g = graph; this.filter = filter; this.allowedNodes = new AllowedNodes(filter.hasTypes() ? filter.getTypes() : "*"); } public boolean allowed(long nodeId) { if (filter == null) { return true; } if (!this.allowedNodes.isAllowed(g.getNodeType(nodeId))) { return false; } return true; } } public static SwhUnidirectionalGraph getDirectedGraph(SwhBidirectionalGraph g, GraphDirection direction) { switch (direction) { case FORWARD: return g.getForwardGraph(); case BACKWARD: return g.getBackwardGraph(); - case BOTH: - return new SwhUnidirectionalGraph(g.symmetrize(), g.getProperties()); + /* + * TODO: add support for BOTH case BOTH: return new SwhUnidirectionalGraph(g.symmetrize(), + * g.getProperties()); + */ default : throw new IllegalArgumentException("Unknown direction: " + direction); } } public static GraphDirection reverseDirection(GraphDirection direction) { switch (direction) { case FORWARD: return GraphDirection.BACKWARD; case BACKWARD: return GraphDirection.FORWARD; - case BOTH: - return GraphDirection.BOTH; + /* + * TODO: add support for BOTH case BOTH: return GraphDirection.BOTH; + */ default : throw new IllegalArgumentException("Unknown direction: " + direction); } } static class StopTraversalException extends RuntimeException { } static class BFSVisitor { protected final SwhUnidirectionalGraph g; protected long depth = 0; protected long traversalSuccessors = 0; protected long edgesAccessed = 0; protected HashMap parents = new HashMap<>(); protected ArrayDeque queue = new ArrayDeque<>(); private long maxDepth = -1; private long maxEdges = -1; BFSVisitor(SwhUnidirectionalGraph g) { this.g = g; } public void addSource(long nodeId) { queue.add(nodeId); parents.put(nodeId, -1L); } public void setMaxDepth(long depth) { maxDepth = depth; } public void setMaxEdges(long edges) { maxEdges = edges; } public void visitSetup() { edgesAccessed = 0; depth = 0; queue.add(-1L); // depth sentinel } public void visit() { visitSetup(); try { while (!queue.isEmpty()) { visitStep(); } } catch (StopTraversalException e) { // Ignore } } public void visitStep() { assert !queue.isEmpty(); long curr = queue.poll(); if (curr == -1L) { ++depth; if (!queue.isEmpty()) { queue.add(-1L); visitStep(); } return; } if (maxDepth >= 0 && depth > maxDepth) { throw new StopTraversalException(); } edgesAccessed += g.outdegree(curr); if (maxEdges >= 0 && edgesAccessed >= maxEdges) { throw new StopTraversalException(); } visitNode(curr); } protected ArcLabelledNodeIterator.LabelledArcIterator getSuccessors(long nodeId) { return g.labelledSuccessors(nodeId); } protected void visitNode(long node) { ArcLabelledNodeIterator.LabelledArcIterator it = getSuccessors(node); traversalSuccessors = 0; for (long succ; (succ = it.nextLong()) != -1;) { traversalSuccessors++; visitEdge(node, succ, it.label()); } } protected void visitEdge(long src, long dst, Label label) { if (!parents.containsKey(dst)) { queue.add(dst); parents.put(dst, src); } } } static class SimpleTraversal extends BFSVisitor { private final NodeFilterChecker nodeReturnChecker; private final AllowedEdges allowedEdges; private final TraversalRequest request; private final NodePropertyBuilder.NodeDataMask nodeDataMask; private final NodeObserver nodeObserver; private Node.Builder nodeBuilder; SimpleTraversal(SwhBidirectionalGraph bidirectionalGraph, TraversalRequest request, NodeObserver nodeObserver) { super(getDirectedGraph(bidirectionalGraph, request.getDirection())); this.request = request; this.nodeObserver = nodeObserver; this.nodeReturnChecker = new NodeFilterChecker(g, request.getReturnNodes()); this.nodeDataMask = new NodePropertyBuilder.NodeDataMask(request.hasMask() ? request.getMask() : null); this.allowedEdges = new AllowedEdges(request.hasEdges() ? request.getEdges() : "*"); request.getSrcList().forEach(srcSwhid -> { long srcNodeId = g.getNodeId(new SWHID(srcSwhid)); addSource(srcNodeId); }); if (request.hasMaxDepth()) { setMaxDepth(request.getMaxDepth()); } if (request.hasMaxEdges()) { setMaxEdges(request.getMaxEdges()); } } @Override protected ArcLabelledNodeIterator.LabelledArcIterator getSuccessors(long nodeId) { return filterLabelledSuccessors(g, nodeId, allowedEdges); } @Override public void visitNode(long node) { nodeBuilder = null; if (nodeReturnChecker.allowed(node) && (!request.hasMinDepth() || depth >= request.getMinDepth())) { nodeBuilder = Node.newBuilder(); NodePropertyBuilder.buildNodeProperties(g, nodeDataMask, nodeBuilder, node); } super.visitNode(node); if (request.getReturnNodes().hasMinTraversalSuccessors() && traversalSuccessors < request.getReturnNodes().getMinTraversalSuccessors() || request.getReturnNodes().hasMaxTraversalSuccessors() && traversalSuccessors > request.getReturnNodes().getMaxTraversalSuccessors()) { nodeBuilder = null; } if (nodeBuilder != null) { nodeObserver.onNext(nodeBuilder.build()); } } @Override protected void visitEdge(long src, long dst, Label label) { super.visitEdge(src, dst, label); NodePropertyBuilder.buildSuccessorProperties(g, nodeDataMask, nodeBuilder, src, dst, label); } } static class FindPathTo extends BFSVisitor { private final AllowedEdges allowedEdges; private final FindPathToRequest request; private final NodePropertyBuilder.NodeDataMask nodeDataMask; private final NodeFilterChecker targetChecker; private Long targetNode = null; FindPathTo(SwhBidirectionalGraph bidirectionalGraph, FindPathToRequest request) { super(getDirectedGraph(bidirectionalGraph, request.getDirection())); this.request = request; this.targetChecker = new NodeFilterChecker(g, request.getTarget()); this.nodeDataMask = new NodePropertyBuilder.NodeDataMask(request.hasMask() ? request.getMask() : null); this.allowedEdges = new AllowedEdges(request.hasEdges() ? request.getEdges() : "*"); if (request.hasMaxDepth()) { setMaxDepth(request.getMaxDepth()); } if (request.hasMaxEdges()) { setMaxEdges(request.getMaxEdges()); } request.getSrcList().forEach(srcSwhid -> { long srcNodeId = g.getNodeId(new SWHID(srcSwhid)); addSource(srcNodeId); }); } @Override protected ArcLabelledNodeIterator.LabelledArcIterator getSuccessors(long nodeId) { return filterLabelledSuccessors(g, nodeId, allowedEdges); } @Override public void visitNode(long node) { if (targetChecker.allowed(node)) { targetNode = node; throw new StopTraversalException(); } super.visitNode(node); } public Path getPath() { if (targetNode == null) { return null; } Path.Builder pathBuilder = Path.newBuilder(); long curNode = targetNode; ArrayList path = new ArrayList<>(); while (curNode != -1) { path.add(curNode); curNode = parents.get(curNode); } Collections.reverse(path); for (long nodeId : path) { Node.Builder nodeBuilder = Node.newBuilder(); NodePropertyBuilder.buildNodeProperties(g, nodeDataMask, nodeBuilder, nodeId); pathBuilder.addNode(nodeBuilder.build()); } return pathBuilder.build(); } } static class FindPathBetween extends BFSVisitor { private final FindPathBetweenRequest request; private final NodePropertyBuilder.NodeDataMask nodeDataMask; private final AllowedEdges allowedEdgesSrc; private final AllowedEdges allowedEdgesDst; private final BFSVisitor srcVisitor; private final BFSVisitor dstVisitor; private Long middleNode = null; FindPathBetween(SwhBidirectionalGraph bidirectionalGraph, FindPathBetweenRequest request) { super(getDirectedGraph(bidirectionalGraph, request.getDirection())); this.request = request; this.nodeDataMask = new NodePropertyBuilder.NodeDataMask(request.hasMask() ? request.getMask() : null); this.allowedEdgesSrc = new AllowedEdges(request.hasEdges() ? request.getEdges() : "*"); this.allowedEdgesDst = request.hasEdgesReverse() ? new AllowedEdges(request.getEdgesReverse()) : (request.hasEdges() ? new AllowedEdges(request.getEdges()).reverse() : new AllowedEdges("*")); SwhUnidirectionalGraph srcGraph = getDirectedGraph(bidirectionalGraph, request.getDirection()); SwhUnidirectionalGraph dstGraph = getDirectedGraph(bidirectionalGraph, request.hasDirectionReverse() ? request.getDirectionReverse() : reverseDirection(request.getDirection())); this.srcVisitor = new BFSVisitor(srcGraph) { @Override protected ArcLabelledNodeIterator.LabelledArcIterator getSuccessors(long nodeId) { return filterLabelledSuccessors(g, nodeId, allowedEdgesSrc); } @Override public void visitNode(long node) { if (dstVisitor.parents.containsKey(node)) { middleNode = node; throw new StopTraversalException(); } super.visitNode(node); } }; this.dstVisitor = new BFSVisitor(dstGraph) { @Override protected ArcLabelledNodeIterator.LabelledArcIterator getSuccessors(long nodeId) { return filterLabelledSuccessors(g, nodeId, allowedEdgesDst); } @Override public void visitNode(long node) { if (dstVisitor.parents.containsKey(node)) { middleNode = node; throw new StopTraversalException(); } super.visitNode(node); } }; if (request.hasMaxDepth()) { this.srcVisitor.setMaxDepth(request.getMaxDepth()); this.dstVisitor.setMaxDepth(request.getMaxDepth()); } if (request.hasMaxEdges()) { this.srcVisitor.setMaxEdges(request.getMaxEdges()); this.dstVisitor.setMaxEdges(request.getMaxEdges()); } request.getSrcList().forEach(srcSwhid -> { long srcNodeId = g.getNodeId(new SWHID(srcSwhid)); srcVisitor.addSource(srcNodeId); }); request.getDstList().forEach(srcSwhid -> { long srcNodeId = g.getNodeId(new SWHID(srcSwhid)); dstVisitor.addSource(srcNodeId); }); } @Override public void visit() { srcVisitor.visitSetup(); dstVisitor.visitSetup(); while (!srcVisitor.queue.isEmpty() || !dstVisitor.queue.isEmpty()) { try { if (!srcVisitor.queue.isEmpty()) { srcVisitor.visitStep(); } } catch (StopTraversalException e) { srcVisitor.queue.clear(); } try { if (!dstVisitor.queue.isEmpty()) { dstVisitor.visitStep(); } } catch (StopTraversalException e) { dstVisitor.queue.clear(); } } } public Path getPath() { if (middleNode == null) { return null; } Path.Builder pathBuilder = Path.newBuilder(); ArrayList path = new ArrayList<>(); long curNode = middleNode; while (curNode != -1) { path.add(curNode); curNode = srcVisitor.parents.get(curNode); } Collections.reverse(path); curNode = dstVisitor.parents.get(middleNode); while (curNode != -1) { path.add(curNode); curNode = dstVisitor.parents.get(curNode); } for (long nodeId : path) { Node.Builder nodeBuilder = Node.newBuilder(); NodePropertyBuilder.buildNodeProperties(g, nodeDataMask, nodeBuilder, nodeId); pathBuilder.addNode(nodeBuilder.build()); } return pathBuilder.build(); } } public interface NodeObserver { void onNext(Node nodeId); } } diff --git a/proto/swhgraph.proto b/proto/swhgraph.proto index 3687c8e..95ec45c 100644 --- a/proto/swhgraph.proto +++ b/proto/swhgraph.proto @@ -1,155 +1,154 @@ syntax = "proto3"; import "google/protobuf/field_mask.proto"; option java_multiple_files = true; option java_package = "org.softwareheritage.graph.rpc"; option java_outer_classname = "GraphService"; package swh.graph; service TraversalService { rpc Traverse (TraversalRequest) returns (stream Node); rpc FindPathTo (FindPathToRequest) returns (Path); rpc FindPathBetween (FindPathBetweenRequest) returns (Path); rpc CountNodes (TraversalRequest) returns (CountResponse); rpc CountEdges (TraversalRequest) returns (CountResponse); rpc Stats (StatsRequest) returns (StatsResponse); rpc CheckSwhid (CheckSwhidRequest) returns (CheckSwhidResponse); rpc GetNode (GetNodeRequest) returns (Node); } enum GraphDirection { FORWARD = 0; BACKWARD = 1; - BOTH = 2; } message TraversalRequest { repeated string src = 1; GraphDirection direction = 2; optional string edges = 3; optional int64 max_edges = 4; optional int64 min_depth = 5; optional int64 max_depth = 6; optional NodeFilter return_nodes = 7; optional google.protobuf.FieldMask mask = 8; } message FindPathToRequest { repeated string src = 1; optional NodeFilter target = 2; GraphDirection direction = 3; optional string edges = 4; optional int64 max_edges = 5; optional int64 max_depth = 6; optional google.protobuf.FieldMask mask = 7; } message FindPathBetweenRequest { repeated string src = 1; repeated string dst = 2; GraphDirection direction = 3; optional GraphDirection direction_reverse = 4; optional string edges = 5; optional string edges_reverse = 6; optional int64 max_edges = 7; optional int64 max_depth = 8; optional google.protobuf.FieldMask mask = 9; } message NodeFilter { optional string types = 1; optional int64 min_traversal_successors = 2; optional int64 max_traversal_successors = 3; } message Node { string swhid = 1; repeated Successor successor = 2; oneof data { ContentData cnt = 3; RevisionData rev = 5; ReleaseData rel = 6; OriginData ori = 8; }; } message Path { repeated Node node = 1; } message Successor { optional string swhid = 1; repeated EdgeLabel label = 2; } message ContentData { optional int64 length = 1; optional bool is_skipped = 2; } message RevisionData { optional int64 author = 1; optional int64 author_date = 2; optional int32 author_date_offset = 3; optional int64 committer = 4; optional int64 committer_date = 5; optional int32 committer_date_offset = 6; optional bytes message = 7; } message ReleaseData { optional int64 author = 1; optional int64 author_date = 2; optional int32 author_date_offset = 3; optional bytes name = 4; optional bytes message = 5; } message OriginData { optional string url = 1; } message EdgeLabel { bytes name = 1; int32 permission = 2; } message CountResponse { int64 count = 1; } message StatsRequest { } message StatsResponse { int64 num_nodes = 1; int64 num_edges = 2; double compression = 3; double bits_per_node = 4; double bits_per_edge = 5; double avg_locality = 6; int64 indegree_min = 7; int64 indegree_max = 8; double indegree_avg = 9; int64 outdegree_min = 10; int64 outdegree_max = 11; double outdegree_avg = 12; } message CheckSwhidRequest { string swhid = 1; } message GetNodeRequest { string swhid = 1; optional google.protobuf.FieldMask mask = 8; } message CheckSwhidResponse { bool exists = 1; string details = 2; }