diff --git a/java/server/src/main/java/org/softwareheritage/graph/Graph.java b/java/server/src/main/java/org/softwareheritage/graph/Graph.java index faf513d..46ed699 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/Graph.java +++ b/java/server/src/main/java/org/softwareheritage/graph/Graph.java @@ -1,230 +1,196 @@ package org.softwareheritage.graph; import java.io.IOException; import it.unimi.dsi.big.webgraph.BVGraph; import it.unimi.dsi.big.webgraph.LazyLongIterator; import org.softwareheritage.graph.Node; import org.softwareheritage.graph.SwhId; import org.softwareheritage.graph.backend.NodeIdMap; import org.softwareheritage.graph.backend.NodeTypesMap; /** * Main class storing the compressed graph and node id mappings. *

* The compressed graph is stored using the WebGraph * ecosystem. Additional mappings are necessary because Software Heritage uses string based persistent * identifiers (PID) while WebGraph uses integers internally. These two mappings (long id ↔ * PID) are used for the input (users refer to the graph using PID) and the output (convert back to * PID for users results). However, since graph traversal can be restricted depending on the node * type (see {@link AllowedEdges}), a long id → node type map is stored as well to avoid a full * PID lookup. * * @author Thibault Allançon * @version 0.0.1 * @since 0.0.1 * @see org.softwareheritage.graph.AllowedEdges * @see org.softwareheritage.graph.NodeIdMap; * @see org.softwareheritage.graph.NodeTypesMap; */ public class Graph { /** File extension for the SWH PID to long node id map */ public static final String PID_TO_NODE = ".pid2node.csv"; /** File extension for the long node id to SWH PID map */ public static final String NODE_TO_PID = ".node2pid.csv"; /** File extension for the long node id to node typ map */ public static final String NODE_TO_TYPE = ".node2type.map"; /** Compressed graph stored as a {@link it.unimi.dsi.big.webgraph.BVGraph} */ BVGraph graph; /** Transposed compressed graph (used for backward traversals) */ BVGraph graphTransposed; /** Path and basename of the compressed graph */ String path; /** Mapping long id ↔ SWH PIDs */ NodeIdMap nodeIdMap; /** Mapping long id → node types */ NodeTypesMap nodeTypesMap; /** * Constructor. * * @param path path and basename of the compressed graph to load */ public Graph(String path) throws IOException { this.graph = BVGraph.load(path); this.graphTransposed = BVGraph.load(path + "-transposed"); this.path = path; this.nodeIdMap = new NodeIdMap(path, getNbNodes()); this.nodeTypesMap = new NodeTypesMap(path); } /** * Cleans up graph resources after use. */ public void cleanUp() throws IOException { nodeIdMap.close(); } /** * Returns the graph full path. * * @return graph full path */ public String getPath() { return path; } /** * Converts {@link SwhId} node to long. * * @param swhId node specified as a {@link SwhId} * @return internal long node id * @see org.softwareheritage.graph.SwhId */ public long getNodeId(SwhId swhId) { return nodeIdMap.getNodeId(swhId); } /** * Converts long id node to {@link SwhId}. * * @param nodeId node specified as a long id * @return external SWH PID * @see org.softwareheritage.graph.SwhId */ public SwhId getSwhId(long nodeId) { return nodeIdMap.getSwhId(nodeId); } /** * Returns node type. * * @param nodeId node specified as a long id * @return corresponding node type * @see org.softwareheritage.graph.Node.Type */ public Node.Type getNodeType(long nodeId) { return nodeTypesMap.getType(nodeId); } /** * Returns number of nodes in the graph. * * @return number of nodes in the graph */ public long getNbNodes() { return graph.numNodes(); } /** * Returns number of edges in the graph. * * @return number of edges in the graph */ public long getNbEdges() { return graph.numArcs(); } - /** - * Returns list of successors of a node. - * - * @param nodeId node specified as a long id - * @return list of successors of the node, specified as a fastutil LongBigArrays - */ - public long[][] successors(long nodeId) { - return graph.successorBigArray(nodeId); - } - /** * Returns lazy iterator of successors of a node. * * @param nodeId node specified as a long id * @return lazy iterator of successors of the node, specified as a WebGraph LazyLongIterator */ - public LazyLongIterator lazySuccessors(long nodeId) { + public LazyLongIterator successors(long nodeId) { return graph.successors(nodeId); } /** * Returns the outdegree of a node. * * @param nodeId node specified as a long id * @return outdegree of a node */ public long outdegree(long nodeId) { return graph.outdegree(nodeId); } - /** - * Returns list of predecessors of a node. - * - * @param nodeId node specified as a long id - * @return list of predecessors of the node, specified as a fastutil LongBigArrays - */ - public long[][] predecessors(long nodeId) { - return graphTransposed.successorBigArray(nodeId); - } - /** * Returns lazy iterator of predecessors of a node. * * @param nodeId node specified as a long id * @return lazy iterator of predecessors of the node, specified as a WebGraph LazyLongIterator */ - public LazyLongIterator lazyPredecessors(long nodeId) { + public LazyLongIterator predecessors(long nodeId) { return graphTransposed.successors(nodeId); } /** * Returns the indegree of a node. * * @param nodeId node specified as a long id * @return indegree of a node */ public long indegree(long nodeId) { return graphTransposed.outdegree(nodeId); } /** * Returns the degree of a node, depending on graph orientation. * * @param nodeId node specified as a long id * @param useTransposed boolean value to use transposed graph * @return degree of a node */ public long degree(long nodeId, boolean useTransposed) { return (useTransposed) ? indegree(nodeId) : outdegree(nodeId); } - /** - * Returns the neighbors of a node (as a list), depending on graph orientation. - * - * @param nodeId node specified as a long id - * @param useTransposed boolean value to use transposed graph - * @return list of neighbors of the node, specified as a fastutil LongBigArrays - */ - public long[][] neighbors(long nodeId, boolean useTransposed) { - return (useTransposed) ? predecessors(nodeId) : successors(nodeId); - } - /** * Returns the neighbors of a node (as a lazy iterator), depending on graph orientation. * * @param nodeId node specified as a long id * @param useTransposed boolean value to use transposed graph * @return lazy iterator of neighbors of the node, specified as a WebGraph LazyLongIterator */ - public LazyLongIterator lazyNeighbors(long nodeId, boolean useTransposed) { - return (useTransposed) ? lazyPredecessors(nodeId) : lazySuccessors(nodeId); + public LazyLongIterator neighbors(long nodeId, boolean useTransposed) { + return (useTransposed) ? predecessors(nodeId) : successors(nodeId); } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/Neighbors.java b/java/server/src/main/java/org/softwareheritage/graph/Neighbors.java index 51de885..43ba9c8 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/Neighbors.java +++ b/java/server/src/main/java/org/softwareheritage/graph/Neighbors.java @@ -1,98 +1,88 @@ package org.softwareheritage.graph; import java.util.Iterator; -import it.unimi.dsi.fastutil.longs.LongBigArrays; +import it.unimi.dsi.big.webgraph.LazyLongIterator; import org.softwareheritage.graph.AllowedEdges; import org.softwareheritage.graph.Graph; /** * Iterator class to go over a node neighbors in the graph. *

* Wrapper iterator class to easily deal with {@link AllowedEdges} during traversals. * * @author Thibault Allançon * @version 0.0.1 * @since 0.0.1 * @see org.softwareheritage.graph.AllowedEdges */ public class Neighbors implements Iterable { /** Graph used to explore neighbors */ Graph graph; /** Boolean to specify the use of the transposed graph */ boolean useTransposed; /** Graph edge restriction */ AllowedEdges edges; /** Source node from which neighbors will be listed */ long srcNodeId; /** * Constructor. * * @param graph graph used to explore neighbors * @param useTransposed boolean value to use transposed graph * @param edges edges allowed to be used in the graph * @param srcNodeId source node from where to list neighbors */ public Neighbors(Graph graph, boolean useTransposed, AllowedEdges edges, long srcNodeId) { this.graph = graph; this.useTransposed = useTransposed; this.edges = edges; this.srcNodeId = srcNodeId; } @Override public Iterator iterator() { return new NeighborsIterator(); } /** * Inner class for {@link Neighbors} iterator. * * @author Thibault Allançon * @version 0.0.1 * @since 0.0.1 */ public class NeighborsIterator implements Iterator { - long nextNeighborIdx; - long nbNeighbors; - // LongBigArrays to support 64-bit indexing. - long[][] neighbors; + LazyLongIterator neighbors; + long nextNeighborId; public NeighborsIterator() { - this.nextNeighborIdx = -1; - this.nbNeighbors = graph.degree(srcNodeId, useTransposed); this.neighbors = graph.neighbors(srcNodeId, useTransposed); + this.nextNeighborId = -1; } public boolean hasNext() { // Case 1: no edge restriction, bypass type checks and skip to next neighbor if (edges.restrictedTo == null) { - if (nextNeighborIdx + 1 < nbNeighbors) { - nextNeighborIdx++; - return true; - } else { - return false; - } + nextNeighborId = neighbors.nextLong(); + return (nextNeighborId != -1); } // Case 2: edge restriction, look ahead for next neighbor - for (long lookAheadIdx = nextNeighborIdx + 1; lookAheadIdx < nbNeighbors; lookAheadIdx++) { - long nextNodeId = LongBigArrays.get(neighbors, lookAheadIdx); - if (edges.isAllowed(srcNodeId, nextNodeId)) { - nextNeighborIdx = lookAheadIdx; + while ((nextNeighborId = neighbors.nextLong()) != -1) { + if (edges.isAllowed(srcNodeId, nextNeighborId)) { return true; } } return false; } public Long next() { - long nextNodeId = LongBigArrays.get(neighbors, nextNeighborIdx); - return nextNodeId; + return nextNeighborId; } } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/benchmark/AccessEdge.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/AccessEdge.java index a263cfb..5d16f39 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/benchmark/AccessEdge.java +++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/AccessEdge.java @@ -1,49 +1,49 @@ package org.softwareheritage.graph.benchmark; import java.io.IOException; import java.util.ArrayList; import it.unimi.dsi.big.webgraph.LazyLongIterator; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.utils.Random; import org.softwareheritage.graph.utils.Statistics; import org.softwareheritage.graph.utils.Timing; /** * Benchmark to time edge access time. * * @author Thibault Allançon * @version 0.0.1 * @since 0.0.1 */ public class AccessEdge { /** * Main entrypoint. * * @param args command line arguments */ public static void main(String[] args) throws IOException { String path = args[0]; Graph graph = new Graph(path); final long seed = 42; final long nbNodes = 1_000_000; Random random = new Random(seed); long[] nodeIds = random.generateNodeIds(graph, nbNodes); ArrayList timings = new ArrayList<>(); for (long nodeId : nodeIds) { long startTime = Timing.start(); - LazyLongIterator neighbors = graph.lazySuccessors(nodeId); + LazyLongIterator neighbors = graph.successors(nodeId); long firstNeighbor = neighbors.nextLong(); double duration = (double) Timing.stop(startTime); timings.add(duration); } System.out.println("Used " + nbNodes + " random edges (results are in seconds):"); Statistics stats = new Statistics(timings); stats.printAll(); } }