diff --git a/java/src/main/java/org/softwareheritage/graph/AllowedNodes.java b/java/src/main/java/org/softwareheritage/graph/AllowedNodes.java new file mode 100644 index 0000000..40f473a --- /dev/null +++ b/java/src/main/java/org/softwareheritage/graph/AllowedNodes.java @@ -0,0 +1,50 @@ +package org.softwareheritage.graph; + +/** + * TODO + * + * @author The Software Heritage developers + */ + +public class AllowedNodes { + public boolean[] restrictedTo; + + /** + * Constructor. + * + * @param nodesFmt a formatted string describing allowed nodes + */ + public AllowedNodes(String nodesFmt) { + int nbNodeTypes = Node.Type.values().length; + this.restrictedTo = new boolean[nbNodeTypes]; + // Special values (null, empty, "*") + if (nodesFmt == null || nodesFmt.isEmpty()) { + return; + } + if (nodesFmt.equals("*")) { + // Allows for quick bypass (with simple null check) when no node restriction + restrictedTo = null; + return; + } + + // Format: "nodeType1,nodeType2,[...]" + String[] nodeTypesStr = nodesFmt.split(","); + for (String nodeTypeStr : nodeTypesStr) { + for (Node.Type nodeType : Node.Type.parse(nodeTypeStr)) { + this.restrictedTo[Node.Type.toInt(nodeType)] = true; + } + } + } + + /** + * Checks if a given node type is allowed. + * + * @param nodeType node type to check + * @return true if allowed and false otherwise + */ + public boolean isAllowed(Node.Type nodeType) { + if (restrictedTo == null) + return true; + return restrictedTo[Node.Type.toInt(nodeType)]; + } +} diff --git a/java/src/main/java/org/softwareheritage/graph/Subgraph.java b/java/src/main/java/org/softwareheritage/graph/Subgraph.java new file mode 100644 index 0000000..65723e3 --- /dev/null +++ b/java/src/main/java/org/softwareheritage/graph/Subgraph.java @@ -0,0 +1,222 @@ +package org.softwareheritage.graph; + +import it.unimi.dsi.big.webgraph.ImmutableGraph; +import it.unimi.dsi.big.webgraph.LazyLongIterator; +import it.unimi.dsi.big.webgraph.NodeIterator; + +import java.util.NoSuchElementException; + + +public class Subgraph extends ImmutableGraph { + private final Graph underlyingGraph; + public final AllowedNodes allowedNodeTypes; + + private long nodeCount = -1; + + /** + * Constructor. + * + */ + public Subgraph(Graph underlyingGraph, AllowedNodes allowedNodeTypes) { + this.underlyingGraph = underlyingGraph.copy(); + this.allowedNodeTypes = allowedNodeTypes; + } + + /** + * Return a flyweight copy of the graph. + */ + @Override + public Subgraph copy() { + return new Subgraph(this.underlyingGraph.copy(), allowedNodeTypes); + } + + @Override + public boolean randomAccess() { + return underlyingGraph.randomAccess(); + } + + /** + * Return a transposed version of the graph. + */ + public Subgraph transpose() { + return new Subgraph(underlyingGraph.transpose(), allowedNodeTypes); + } + + /** + * Return a symmetric version of the graph. + */ + public Subgraph symmetrize() { + return new Subgraph(underlyingGraph.symmetrize(), allowedNodeTypes); + } + + /** + * Returns number of nodes in the graph. + * + * @return number of nodes in the graph + */ + @Override + public long numNodes() { + if (nodeCount == -1) { + for (long i = 0; i < underlyingGraph.numNodes(); ++i) { + if (nodeExists(i)) + ++nodeCount; + } + } + return nodeCount; + } + + /** + * Returns number of edges in the graph. + * + * @return number of edges in the graph + */ + @Override + public long numArcs() { + throw new UnsupportedOperationException("Cannot determine the number of arcs in a subgraph"); + } + + public long maxNodeNumber() { + return underlyingGraph.numNodes(); + } + + public boolean nodeExists(long node) { + return allowedNodeTypes.isAllowed(underlyingGraph.getNodeType(node)); + } + + /** + * Returns lazy iterator of successors of a node. + * + * @param nodeId node specified as a long id + * @return lazy iterator of successors of the node, specified as a WebGraph LazyLongIterator + */ + @Override + public LazyLongIterator successors(long nodeId) { + if (!nodeExists(nodeId)) { + throw new IllegalArgumentException("Node " + nodeId + " not in subgraph"); + } + LazyLongIterator allSuccessors = underlyingGraph.successors(nodeId); + return new LazyLongIterator() { + @Override + public long nextLong() { + long neighbor; + while ((neighbor = allSuccessors.nextLong()) != -1) { + if (nodeExists(neighbor)) { + return neighbor; + } + } + return -1; + } + + @Override + public long skip(final long n) { + long i; + for (i = 0; i < n && nextLong() != -1; i++) ; + return i; + } + }; + } + + /** + * Returns the outdegree of a node. + * + * @param nodeId node specified as a long id + * @return outdegree of a node + */ + @Override + public long outdegree(long nodeId) { + long deg = 0; + for (LazyLongIterator allSuccessors = successors(nodeId); allSuccessors.nextLong() != -1; ++deg); + return deg; + } + + @Override + public NodeIterator nodeIterator() { + return new NodeIterator() { + final long n = numNodes(); + long i = -1; + long done = 0; + + @Override + public boolean hasNext() { + return done <= n; + } + + @Override + public long nextLong() { + if (!hasNext()) throw new NoSuchElementException(); + do { + ++i; + if (i >= underlyingGraph.numNodes()) + throw new NoSuchElementException(); + } while (!nodeExists(i)); + ++done; + return i; + } + + @Override + public long outdegree() { + return Subgraph.this.outdegree(i); + } + + @Override + public LazyLongIterator successors() { + return Subgraph.this.successors(i); + } + }; + } + + /** + * Returns lazy iterator of predecessors of a node. + * + * @param nodeId node specified as a long id + * @return lazy iterator of predecessors of the node, specified as a WebGraph LazyLongIterator + */ + public LazyLongIterator predecessors(long nodeId) { + return this.transpose().successors(nodeId); + } + + /** + * Returns the indegree of a node. + * + * @param nodeId node specified as a long id + * @return indegree of a node + */ + public long indegree(long nodeId) { + return this.transpose().outdegree(nodeId); + } + + /** + * Converts {@link SWHID} node to long. + * + * @param swhid node specified as a {@link SWHID} + * @return internal long node id + * @see SWHID + */ + public long getNodeId(SWHID swhid) { + return underlyingGraph.getNodeId(swhid); + } + + /** + * Converts long id node to {@link SWHID}. + * + * @param nodeId node specified as a long id + * @return external SWHID + * @see SWHID + */ + public SWHID getSWHID(long nodeId) { + return underlyingGraph.getSWHID(nodeId); + } + + /** + * Returns node type. + * + * @param nodeId node specified as a long id + * @return corresponding node type + * @see Node.Type + */ + public Node.Type getNodeType(long nodeId) { + return underlyingGraph.getNodeType(nodeId); + } +} diff --git a/java/src/test/java/org/softwareheritage/graph/GraphTest.java b/java/src/test/java/org/softwareheritage/graph/GraphTest.java index df1afb7..345e374 100644 --- a/java/src/test/java/org/softwareheritage/graph/GraphTest.java +++ b/java/src/test/java/org/softwareheritage/graph/GraphTest.java @@ -1,44 +1,45 @@ package org.softwareheritage.graph; import java.io.IOException; import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; import java.util.Collection; import java.util.Iterator; import it.unimi.dsi.big.webgraph.LazyLongIterator; import it.unimi.dsi.big.webgraph.LazyLongIterators; import org.hamcrest.MatcherAssert; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; import static org.hamcrest.collection.IsIterableContainingInAnyOrder.containsInAnyOrder; public class GraphTest { static Graph graph; - public static void assertEqualsAnyOrder(Collection expecteds, Collection actuals) { - MatcherAssert.assertThat(expecteds, containsInAnyOrder(actuals.toArray())); - } - - public static void assertLazyLongIteratorsEqual(LazyLongIterator expected, LazyLongIterator actual) { - ArrayList expectedList = new ArrayList<>(); - ArrayList actualList = new ArrayList<>(); - Iterator expectedIt = LazyLongIterators.eager(expected); - Iterator actualIt = LazyLongIterators.eager(actual); - expectedIt.forEachRemaining(expectedList::add); - actualIt.forEachRemaining(actualList::add); - Assertions.assertArrayEquals(expectedList.toArray(), actualList.toArray()); - } - @BeforeAll public static void setUp() throws IOException { Path graphPath = Paths.get("..", "swh", "graph", "tests", "dataset", "output", "example"); graph = new Graph(graphPath.toString()); } public Graph getGraph() { return graph; } + + public static SWHID fakeSWHID(String type, int num) { + return new SWHID(String.format("swh:1:%s:%040d", type, num)); + } + + public static void assertEqualsAnyOrder(Collection expecteds, Collection actuals) { + MatcherAssert.assertThat(expecteds, containsInAnyOrder(actuals.toArray())); + } + + public static ArrayList lazyLongIteratorToList(LazyLongIterator input) { + ArrayList inputList = new ArrayList<>(); + Iterator inputIt = LazyLongIterators.eager(input); + inputIt.forEachRemaining(inputList::add); + return inputList; + } } diff --git a/java/src/test/java/org/softwareheritage/graph/SubgraphTest.java b/java/src/test/java/org/softwareheritage/graph/SubgraphTest.java new file mode 100644 index 0000000..70e0417 --- /dev/null +++ b/java/src/test/java/org/softwareheritage/graph/SubgraphTest.java @@ -0,0 +1,110 @@ +package org.softwareheritage.graph; + +import java.lang.IllegalArgumentException; +import java.util.*; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +public class SubgraphTest extends GraphTest { + @Test + public void noFilter() { + Graph g = getGraph(); + Subgraph sg = new Subgraph(g, new AllowedNodes("*")); + + for (long i = 0; i < g.numNodes(); ++i) { + Assertions.assertEquals(g.outdegree(i), sg.outdegree(i)); + } + } + + @Test + public void missingNode() { + Graph g = getGraph(); + Subgraph sg = new Subgraph(g, new AllowedNodes("dir,ori")); + + SWHID rev1 = fakeSWHID("rev", 18); + Assertions.assertThrows(IllegalArgumentException.class, () -> { + sg.outdegree(sg.getNodeId(rev1)); + }); + Assertions.assertThrows(IllegalArgumentException.class, () -> { + sg.successors(sg.getNodeId(rev1)); + }); + } + + @Test + public void outdegreeOnlyDirOri() { + Graph g = getGraph(); + Subgraph sg = new Subgraph(g, new AllowedNodes("dir,ori")); + + SWHID dir1 = fakeSWHID("dir", 17); + Assertions.assertEquals(2, g.outdegree(g.getNodeId(dir1))); + Assertions.assertEquals(1, sg.outdegree(sg.getNodeId(dir1))); + + SWHID dir2 = fakeSWHID("dir", 6); + Assertions.assertEquals(2, g.outdegree(g.getNodeId(dir2))); + Assertions.assertEquals(0, sg.outdegree(sg.getNodeId(dir2))); + + SWHID ori1 = fakeSWHID("ori", 21); + Assertions.assertEquals(1, g.outdegree(g.getNodeId(ori1))); + Assertions.assertEquals(0, sg.outdegree(sg.getNodeId(ori1))); + } + + @Test + public void successorsOnlyDirOri() { + Graph g = getGraph(); + Subgraph sg = new Subgraph(g, new AllowedNodes("dir,ori")); + + SWHID dir1 = fakeSWHID("dir", 17); + assertEqualsAnyOrder( + Collections.singletonList( + sg.getNodeId(fakeSWHID("dir", 16)) + ), + lazyLongIteratorToList(sg.successors(sg.getNodeId(dir1))) + ); + + SWHID dir2 = fakeSWHID("dir", 6); + assertEqualsAnyOrder( + Collections.emptyList(), + lazyLongIteratorToList(sg.successors(sg.getNodeId(dir2))) + ); + + SWHID ori1 = fakeSWHID("ori", 21); + assertEqualsAnyOrder( + Collections.emptyList(), + lazyLongIteratorToList(sg.successors(sg.getNodeId(ori1))) + ); + } + + @Test + public void nodeIteratorOnlyOriDir() { + Graph g = getGraph(); + Subgraph sg = new Subgraph(g, new AllowedNodes("dir,ori")); + ArrayList nodeList = new ArrayList<>(); + Iterator nodeIt = sg.nodeIterator(); + nodeIt.forEachRemaining(nodeList::add); + assertEqualsAnyOrder( + Arrays.asList( + sg.getNodeId(fakeSWHID("ori", 21)), + sg.getNodeId(fakeSWHID("dir", 2)), + sg.getNodeId(fakeSWHID("dir", 6)), + sg.getNodeId(fakeSWHID("dir", 8)), + sg.getNodeId(fakeSWHID("dir", 12)), + sg.getNodeId(fakeSWHID("dir", 16)), + sg.getNodeId(fakeSWHID("dir", 17)) + ), + nodeList + ); + sg = new Subgraph(g, new AllowedNodes("snp,rel")); + nodeList = new ArrayList<>(); + nodeIt = sg.nodeIterator(); + nodeIt.forEachRemaining(nodeList::add); + assertEqualsAnyOrder( + Arrays.asList( + sg.getNodeId(fakeSWHID("snp", 20)), + sg.getNodeId(fakeSWHID("rel", 10)), + sg.getNodeId(fakeSWHID("rel", 19)) + ), + nodeList + ); + } +}