diff --git a/java/src/main/java/org/softwareheritage/graph/AllowedNodes.java b/java/src/main/java/org/softwareheritage/graph/AllowedNodes.java
new file mode 100644
index 0000000..40f473a
--- /dev/null
+++ b/java/src/main/java/org/softwareheritage/graph/AllowedNodes.java
@@ -0,0 +1,50 @@
+package org.softwareheritage.graph;
+
+/**
+ * TODO
+ *
+ * @author The Software Heritage developers
+ */
+
+public class AllowedNodes {
+ public boolean[] restrictedTo;
+
+ /**
+ * Constructor.
+ *
+ * @param nodesFmt a formatted string describing allowed nodes
+ */
+ public AllowedNodes(String nodesFmt) {
+ int nbNodeTypes = Node.Type.values().length;
+ this.restrictedTo = new boolean[nbNodeTypes];
+ // Special values (null, empty, "*")
+ if (nodesFmt == null || nodesFmt.isEmpty()) {
+ return;
+ }
+ if (nodesFmt.equals("*")) {
+ // Allows for quick bypass (with simple null check) when no node restriction
+ restrictedTo = null;
+ return;
+ }
+
+ // Format: "nodeType1,nodeType2,[...]"
+ String[] nodeTypesStr = nodesFmt.split(",");
+ for (String nodeTypeStr : nodeTypesStr) {
+ for (Node.Type nodeType : Node.Type.parse(nodeTypeStr)) {
+ this.restrictedTo[Node.Type.toInt(nodeType)] = true;
+ }
+ }
+ }
+
+ /**
+ * Checks if a given node type is allowed.
+ *
+ * @param nodeType node type to check
+ * @return true if allowed and false otherwise
+ */
+ public boolean isAllowed(Node.Type nodeType) {
+ if (restrictedTo == null)
+ return true;
+ return restrictedTo[Node.Type.toInt(nodeType)];
+ }
+}
diff --git a/java/src/main/java/org/softwareheritage/graph/Subgraph.java b/java/src/main/java/org/softwareheritage/graph/Subgraph.java
new file mode 100644
index 0000000..65723e3
--- /dev/null
+++ b/java/src/main/java/org/softwareheritage/graph/Subgraph.java
@@ -0,0 +1,222 @@
+package org.softwareheritage.graph;
+
+import it.unimi.dsi.big.webgraph.ImmutableGraph;
+import it.unimi.dsi.big.webgraph.LazyLongIterator;
+import it.unimi.dsi.big.webgraph.NodeIterator;
+
+import java.util.NoSuchElementException;
+
+
+public class Subgraph extends ImmutableGraph {
+ private final Graph underlyingGraph;
+ public final AllowedNodes allowedNodeTypes;
+
+ private long nodeCount = -1;
+
+ /**
+ * Constructor.
+ *
+ */
+ public Subgraph(Graph underlyingGraph, AllowedNodes allowedNodeTypes) {
+ this.underlyingGraph = underlyingGraph.copy();
+ this.allowedNodeTypes = allowedNodeTypes;
+ }
+
+ /**
+ * Return a flyweight copy of the graph.
+ */
+ @Override
+ public Subgraph copy() {
+ return new Subgraph(this.underlyingGraph.copy(), allowedNodeTypes);
+ }
+
+ @Override
+ public boolean randomAccess() {
+ return underlyingGraph.randomAccess();
+ }
+
+ /**
+ * Return a transposed version of the graph.
+ */
+ public Subgraph transpose() {
+ return new Subgraph(underlyingGraph.transpose(), allowedNodeTypes);
+ }
+
+ /**
+ * Return a symmetric version of the graph.
+ */
+ public Subgraph symmetrize() {
+ return new Subgraph(underlyingGraph.symmetrize(), allowedNodeTypes);
+ }
+
+ /**
+ * Returns number of nodes in the graph.
+ *
+ * @return number of nodes in the graph
+ */
+ @Override
+ public long numNodes() {
+ if (nodeCount == -1) {
+ for (long i = 0; i < underlyingGraph.numNodes(); ++i) {
+ if (nodeExists(i))
+ ++nodeCount;
+ }
+ }
+ return nodeCount;
+ }
+
+ /**
+ * Returns number of edges in the graph.
+ *
+ * @return number of edges in the graph
+ */
+ @Override
+ public long numArcs() {
+ throw new UnsupportedOperationException("Cannot determine the number of arcs in a subgraph");
+ }
+
+ public long maxNodeNumber() {
+ return underlyingGraph.numNodes();
+ }
+
+ public boolean nodeExists(long node) {
+ return allowedNodeTypes.isAllowed(underlyingGraph.getNodeType(node));
+ }
+
+ /**
+ * Returns lazy iterator of successors of a node.
+ *
+ * @param nodeId node specified as a long id
+ * @return lazy iterator of successors of the node, specified as a WebGraph LazyLongIterator
+ */
+ @Override
+ public LazyLongIterator successors(long nodeId) {
+ if (!nodeExists(nodeId)) {
+ throw new IllegalArgumentException("Node " + nodeId + " not in subgraph");
+ }
+ LazyLongIterator allSuccessors = underlyingGraph.successors(nodeId);
+ return new LazyLongIterator() {
+ @Override
+ public long nextLong() {
+ long neighbor;
+ while ((neighbor = allSuccessors.nextLong()) != -1) {
+ if (nodeExists(neighbor)) {
+ return neighbor;
+ }
+ }
+ return -1;
+ }
+
+ @Override
+ public long skip(final long n) {
+ long i;
+ for (i = 0; i < n && nextLong() != -1; i++) ;
+ return i;
+ }
+ };
+ }
+
+ /**
+ * Returns the outdegree of a node.
+ *
+ * @param nodeId node specified as a long id
+ * @return outdegree of a node
+ */
+ @Override
+ public long outdegree(long nodeId) {
+ long deg = 0;
+ for (LazyLongIterator allSuccessors = successors(nodeId); allSuccessors.nextLong() != -1; ++deg);
+ return deg;
+ }
+
+ @Override
+ public NodeIterator nodeIterator() {
+ return new NodeIterator() {
+ final long n = numNodes();
+ long i = -1;
+ long done = 0;
+
+ @Override
+ public boolean hasNext() {
+ return done <= n;
+ }
+
+ @Override
+ public long nextLong() {
+ if (!hasNext()) throw new NoSuchElementException();
+ do {
+ ++i;
+ if (i >= underlyingGraph.numNodes())
+ throw new NoSuchElementException();
+ } while (!nodeExists(i));
+ ++done;
+ return i;
+ }
+
+ @Override
+ public long outdegree() {
+ return Subgraph.this.outdegree(i);
+ }
+
+ @Override
+ public LazyLongIterator successors() {
+ return Subgraph.this.successors(i);
+ }
+ };
+ }
+
+ /**
+ * Returns lazy iterator of predecessors of a node.
+ *
+ * @param nodeId node specified as a long id
+ * @return lazy iterator of predecessors of the node, specified as a WebGraph LazyLongIterator
+ */
+ public LazyLongIterator predecessors(long nodeId) {
+ return this.transpose().successors(nodeId);
+ }
+
+ /**
+ * Returns the indegree of a node.
+ *
+ * @param nodeId node specified as a long id
+ * @return indegree of a node
+ */
+ public long indegree(long nodeId) {
+ return this.transpose().outdegree(nodeId);
+ }
+
+ /**
+ * Converts {@link SWHID} node to long.
+ *
+ * @param swhid node specified as a {@link SWHID}
+ * @return internal long node id
+ * @see SWHID
+ */
+ public long getNodeId(SWHID swhid) {
+ return underlyingGraph.getNodeId(swhid);
+ }
+
+ /**
+ * Converts long id node to {@link SWHID}.
+ *
+ * @param nodeId node specified as a long id
+ * @return external SWHID
+ * @see SWHID
+ */
+ public SWHID getSWHID(long nodeId) {
+ return underlyingGraph.getSWHID(nodeId);
+ }
+
+ /**
+ * Returns node type.
+ *
+ * @param nodeId node specified as a long id
+ * @return corresponding node type
+ * @see Node.Type
+ */
+ public Node.Type getNodeType(long nodeId) {
+ return underlyingGraph.getNodeType(nodeId);
+ }
+}
diff --git a/java/src/test/java/org/softwareheritage/graph/GraphTest.java b/java/src/test/java/org/softwareheritage/graph/GraphTest.java
index df1afb7..345e374 100644
--- a/java/src/test/java/org/softwareheritage/graph/GraphTest.java
+++ b/java/src/test/java/org/softwareheritage/graph/GraphTest.java
@@ -1,44 +1,45 @@
package org.softwareheritage.graph;
import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import it.unimi.dsi.big.webgraph.LazyLongIterator;
import it.unimi.dsi.big.webgraph.LazyLongIterators;
import org.hamcrest.MatcherAssert;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import static org.hamcrest.collection.IsIterableContainingInAnyOrder.containsInAnyOrder;
public class GraphTest {
static Graph graph;
- public static void assertEqualsAnyOrder(Collection expecteds, Collection actuals) {
- MatcherAssert.assertThat(expecteds, containsInAnyOrder(actuals.toArray()));
- }
-
- public static void assertLazyLongIteratorsEqual(LazyLongIterator expected, LazyLongIterator actual) {
- ArrayList expectedList = new ArrayList<>();
- ArrayList actualList = new ArrayList<>();
- Iterator expectedIt = LazyLongIterators.eager(expected);
- Iterator actualIt = LazyLongIterators.eager(actual);
- expectedIt.forEachRemaining(expectedList::add);
- actualIt.forEachRemaining(actualList::add);
- Assertions.assertArrayEquals(expectedList.toArray(), actualList.toArray());
- }
-
@BeforeAll
public static void setUp() throws IOException {
Path graphPath = Paths.get("..", "swh", "graph", "tests", "dataset", "output", "example");
graph = new Graph(graphPath.toString());
}
public Graph getGraph() {
return graph;
}
+
+ public static SWHID fakeSWHID(String type, int num) {
+ return new SWHID(String.format("swh:1:%s:%040d", type, num));
+ }
+
+ public static void assertEqualsAnyOrder(Collection expecteds, Collection actuals) {
+ MatcherAssert.assertThat(expecteds, containsInAnyOrder(actuals.toArray()));
+ }
+
+ public static ArrayList lazyLongIteratorToList(LazyLongIterator input) {
+ ArrayList inputList = new ArrayList<>();
+ Iterator inputIt = LazyLongIterators.eager(input);
+ inputIt.forEachRemaining(inputList::add);
+ return inputList;
+ }
}
diff --git a/java/src/test/java/org/softwareheritage/graph/SubgraphTest.java b/java/src/test/java/org/softwareheritage/graph/SubgraphTest.java
new file mode 100644
index 0000000..70e0417
--- /dev/null
+++ b/java/src/test/java/org/softwareheritage/graph/SubgraphTest.java
@@ -0,0 +1,110 @@
+package org.softwareheritage.graph;
+
+import java.lang.IllegalArgumentException;
+import java.util.*;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+public class SubgraphTest extends GraphTest {
+ @Test
+ public void noFilter() {
+ Graph g = getGraph();
+ Subgraph sg = new Subgraph(g, new AllowedNodes("*"));
+
+ for (long i = 0; i < g.numNodes(); ++i) {
+ Assertions.assertEquals(g.outdegree(i), sg.outdegree(i));
+ }
+ }
+
+ @Test
+ public void missingNode() {
+ Graph g = getGraph();
+ Subgraph sg = new Subgraph(g, new AllowedNodes("dir,ori"));
+
+ SWHID rev1 = fakeSWHID("rev", 18);
+ Assertions.assertThrows(IllegalArgumentException.class, () -> {
+ sg.outdegree(sg.getNodeId(rev1));
+ });
+ Assertions.assertThrows(IllegalArgumentException.class, () -> {
+ sg.successors(sg.getNodeId(rev1));
+ });
+ }
+
+ @Test
+ public void outdegreeOnlyDirOri() {
+ Graph g = getGraph();
+ Subgraph sg = new Subgraph(g, new AllowedNodes("dir,ori"));
+
+ SWHID dir1 = fakeSWHID("dir", 17);
+ Assertions.assertEquals(2, g.outdegree(g.getNodeId(dir1)));
+ Assertions.assertEquals(1, sg.outdegree(sg.getNodeId(dir1)));
+
+ SWHID dir2 = fakeSWHID("dir", 6);
+ Assertions.assertEquals(2, g.outdegree(g.getNodeId(dir2)));
+ Assertions.assertEquals(0, sg.outdegree(sg.getNodeId(dir2)));
+
+ SWHID ori1 = fakeSWHID("ori", 21);
+ Assertions.assertEquals(1, g.outdegree(g.getNodeId(ori1)));
+ Assertions.assertEquals(0, sg.outdegree(sg.getNodeId(ori1)));
+ }
+
+ @Test
+ public void successorsOnlyDirOri() {
+ Graph g = getGraph();
+ Subgraph sg = new Subgraph(g, new AllowedNodes("dir,ori"));
+
+ SWHID dir1 = fakeSWHID("dir", 17);
+ assertEqualsAnyOrder(
+ Collections.singletonList(
+ sg.getNodeId(fakeSWHID("dir", 16))
+ ),
+ lazyLongIteratorToList(sg.successors(sg.getNodeId(dir1)))
+ );
+
+ SWHID dir2 = fakeSWHID("dir", 6);
+ assertEqualsAnyOrder(
+ Collections.emptyList(),
+ lazyLongIteratorToList(sg.successors(sg.getNodeId(dir2)))
+ );
+
+ SWHID ori1 = fakeSWHID("ori", 21);
+ assertEqualsAnyOrder(
+ Collections.emptyList(),
+ lazyLongIteratorToList(sg.successors(sg.getNodeId(ori1)))
+ );
+ }
+
+ @Test
+ public void nodeIteratorOnlyOriDir() {
+ Graph g = getGraph();
+ Subgraph sg = new Subgraph(g, new AllowedNodes("dir,ori"));
+ ArrayList nodeList = new ArrayList<>();
+ Iterator nodeIt = sg.nodeIterator();
+ nodeIt.forEachRemaining(nodeList::add);
+ assertEqualsAnyOrder(
+ Arrays.asList(
+ sg.getNodeId(fakeSWHID("ori", 21)),
+ sg.getNodeId(fakeSWHID("dir", 2)),
+ sg.getNodeId(fakeSWHID("dir", 6)),
+ sg.getNodeId(fakeSWHID("dir", 8)),
+ sg.getNodeId(fakeSWHID("dir", 12)),
+ sg.getNodeId(fakeSWHID("dir", 16)),
+ sg.getNodeId(fakeSWHID("dir", 17))
+ ),
+ nodeList
+ );
+ sg = new Subgraph(g, new AllowedNodes("snp,rel"));
+ nodeList = new ArrayList<>();
+ nodeIt = sg.nodeIterator();
+ nodeIt.forEachRemaining(nodeList::add);
+ assertEqualsAnyOrder(
+ Arrays.asList(
+ sg.getNodeId(fakeSWHID("snp", 20)),
+ sg.getNodeId(fakeSWHID("rel", 10)),
+ sg.getNodeId(fakeSWHID("rel", 19))
+ ),
+ nodeList
+ );
+ }
+}