diff --git a/java/src/main/java/org/softwareheritage/graph/Graph.java b/java/src/main/java/org/softwareheritage/graph/Graph.java index ad44a54..92b4cfa 100644 --- a/java/src/main/java/org/softwareheritage/graph/Graph.java +++ b/java/src/main/java/org/softwareheritage/graph/Graph.java @@ -1,257 +1,310 @@ package org.softwareheritage.graph; import it.unimi.dsi.big.webgraph.ImmutableGraph; import it.unimi.dsi.big.webgraph.LazyLongIterator; import it.unimi.dsi.big.webgraph.Transform; +import it.unimi.dsi.logging.ProgressLogger; import org.softwareheritage.graph.maps.NodeIdMap; import org.softwareheritage.graph.maps.NodeTypesMap; import java.io.IOException; /** * Main class storing the compressed graph and node id mappings. *
* The compressed graph is stored using the WebGraph
* ecosystem. Additional mappings are necessary because Software Heritage uses string based persistent
* identifiers (SWHID) while WebGraph uses integers internally. These two mappings (long id
* ↔ SWHID) are used for the input (users refer to the graph using SWHID) and the output
* (convert back to SWHID for users results). However, since graph traversal can be restricted
* depending on the node type (see {@link AllowedEdges}), a long id → node type map is stored
* as well to avoid a full SWHID lookup.
*
* @author The Software Heritage developers
* @see org.softwareheritage.graph.AllowedEdges
* @see org.softwareheritage.graph.maps.NodeIdMap
* @see org.softwareheritage.graph.maps.NodeTypesMap
*/
public class Graph extends ImmutableGraph {
/** File extension for the SWHID to long node id map */
public static final String SWHID_TO_NODE = ".swhid2node.bin";
/** File extension for the long node id to SWHID map */
public static final String NODE_TO_SWHID = ".node2swhid.bin";
/** File extension for the long node id to node type map */
public static final String NODE_TO_TYPE = ".node2type.map";
/** Compressed graph stored as a {@link it.unimi.dsi.big.webgraph.BVGraph} */
ImmutableGraph graph;
/** Transposed compressed graph (used for backward traversals) */
ImmutableGraph graphTransposed;
/** Path and basename of the compressed graph */
String path;
/** Mapping long id ↔ SWHIDs */
NodeIdMap nodeIdMap;
/** Mapping long id → node types */
NodeTypesMap nodeTypesMap;
/**
* Constructor.
*
* @param path path and basename of the compressed graph to load
*/
+
public Graph(String path) throws IOException {
+ loadInternal(path, null, LoadMethod.MAPPED);
+ }
+
+ /**
+ * Loading mechanisms
+ */
+
+ enum LoadMethod {
+ MEMORY, MAPPED, OFFLINE,
+ }
+
+ protected Graph loadInternal(String path, ProgressLogger pl, LoadMethod method) throws IOException {
this.path = path;
- this.graph = ImmutableGraph.loadMapped(path);
- this.graphTransposed = ImmutableGraph.loadMapped(path + "-transposed");
+ if (method == LoadMethod.MEMORY) {
+ this.graph = ImmutableGraph.load(path, pl);
+ this.graphTransposed = ImmutableGraph.load(path + "-transposed", pl);
+ } else if (method == LoadMethod.MAPPED) {
+ this.graph = ImmutableGraph.loadMapped(path, pl);
+ this.graphTransposed = ImmutableGraph.loadMapped(path + "-transposed", pl);
+ } else if (method == LoadMethod.OFFLINE) {
+ this.graph = ImmutableGraph.loadOffline(path, pl);
+ this.graphTransposed = ImmutableGraph.loadOffline(path + "-transposed", pl);
+ }
this.nodeTypesMap = new NodeTypesMap(path);
this.nodeIdMap = new NodeIdMap(path, numNodes());
+ return this;
+ }
+
+ protected Graph() {
+ }
+
+ public Graph load(String Path, ProgressLogger pl) throws IOException {
+ return new Graph().loadInternal(path, pl, LoadMethod.MEMORY);
}
+ public Graph loadMapped(String Path, ProgressLogger pl) throws IOException {
+ return new Graph().loadInternal(path, pl, LoadMethod.MAPPED);
+ }
+
+ public Graph loadOffline(String Path, ProgressLogger pl) throws IOException {
+ return new Graph().loadInternal(path, null, LoadMethod.OFFLINE);
+ }
+
+ public Graph load(String Path) throws IOException {
+ return new Graph().loadInternal(path, null, LoadMethod.MEMORY);
+ }
+
+ public Graph loadMapped(String Path) throws IOException {
+ return new Graph().loadInternal(path, null, LoadMethod.MAPPED);
+ }
+
+ public Graph loadOffline(String Path) throws IOException {
+ return new Graph().loadInternal(path, null, LoadMethod.OFFLINE);
+ }
+
+ /**
+ * Constructor used for copy()
+ */
protected Graph(ImmutableGraph graph, ImmutableGraph graphTransposed, String path, NodeIdMap nodeIdMap,
NodeTypesMap nodeTypesMap) {
this.graph = graph;
this.graphTransposed = graphTransposed;
this.path = path;
this.nodeIdMap = nodeIdMap;
this.nodeTypesMap = nodeTypesMap;
}
/**
* Return a flyweight copy of the graph.
*/
@Override
public Graph copy() {
return new Graph(this.graph.copy(), this.graphTransposed.copy(), this.path, this.nodeIdMap, this.nodeTypesMap);
}
@Override
public boolean randomAccess() {
return graph.randomAccess() && graphTransposed.randomAccess();
}
/**
* Return a transposed version of the graph.
*/
public Graph transpose() {
return new Graph(this.graphTransposed, this.graph, this.path, this.nodeIdMap, this.nodeTypesMap);
}
/**
* Return a symmetric version of the graph.
*/
public Graph symmetrize() {
ImmutableGraph symmetric = Transform.union(graph, graphTransposed);
return new Graph(symmetric, symmetric, this.path, this.nodeIdMap, this.nodeTypesMap);
}
/**
* Cleans up graph resources after use.
*/
public void cleanUp() throws IOException {
nodeIdMap.close();
}
/**
* Returns number of nodes in the graph.
*
* @return number of nodes in the graph
*/
@Override
public long numNodes() {
return graph.numNodes();
}
/**
* Returns number of edges in the graph.
*
* @return number of edges in the graph
*/
@Override
public long numArcs() {
return graph.numArcs();
}
/**
* Returns lazy iterator of successors of a node.
*
* @param nodeId node specified as a long id
* @return lazy iterator of successors of the node, specified as a
* WebGraph LazyLongIterator
*/
@Override
public LazyLongIterator successors(long nodeId) {
return graph.successors(nodeId);
}
/**
* Returns lazy iterator of successors of a node while following a specific set of edge types.
*
* @param nodeId node specified as a long id
* @param allowedEdges the specification of which edges can be traversed
* @return lazy iterator of successors of the node, specified as a
* WebGraph LazyLongIterator
*/
public LazyLongIterator successors(long nodeId, AllowedEdges allowedEdges) {
if (allowedEdges.restrictedTo == null) {
// All edges are allowed, bypass edge check
return this.successors(nodeId);
} else {
LazyLongIterator allSuccessors = this.successors(nodeId);
Graph thisGraph = this;
return new LazyLongIterator() {
@Override
public long nextLong() {
long neighbor;
while ((neighbor = allSuccessors.nextLong()) != -1) {
if (allowedEdges.isAllowed(thisGraph.getNodeType(nodeId), thisGraph.getNodeType(neighbor))) {
return neighbor;
}
}
return -1;
}
@Override
public long skip(final long n) {
long i;
for (i = 0; i < n && nextLong() != -1; i++)
;
return i;
}
};
}
}
/**
* Returns the outdegree of a node.
*
* @param nodeId node specified as a long id
* @return outdegree of a node
*/
@Override
public long outdegree(long nodeId) {
return graph.outdegree(nodeId);
}
/**
* Returns lazy iterator of predecessors of a node.
*
* @param nodeId node specified as a long id
* @return lazy iterator of predecessors of the node, specified as a
* WebGraph LazyLongIterator
*/
public LazyLongIterator predecessors(long nodeId) {
return this.transpose().successors(nodeId);
}
/**
* Returns the indegree of a node.
*
* @param nodeId node specified as a long id
* @return indegree of a node
*/
public long indegree(long nodeId) {
return this.transpose().outdegree(nodeId);
}
/**
* Returns the underlying BVGraph.
*
* @return WebGraph BVGraph
*/
public ImmutableGraph getGraph() {
return this.graph;
}
/**
* Returns the graph full path.
*
* @return graph full path
*/
public String getPath() {
return path;
}
/**
* Converts {@link SWHID} node to long.
*
* @param swhid node specified as a {@link SWHID}
* @return internal long node id
* @see SWHID
*/
public long getNodeId(SWHID swhid) {
return nodeIdMap.getNodeId(swhid);
}
/**
* Converts long id node to {@link SWHID}.
*
* @param nodeId node specified as a long id
* @return external SWHID
* @see SWHID
*/
public SWHID getSWHID(long nodeId) {
return nodeIdMap.getSWHID(nodeId);
}
/**
* Returns node type.
*
* @param nodeId node specified as a long id
* @return corresponding node type
* @see org.softwareheritage.graph.Node.Type
*/
public Node.Type getNodeType(long nodeId) {
return nodeTypesMap.getType(nodeId);
}
}
diff --git a/java/src/main/java/org/softwareheritage/graph/experiments/topology/InOutDegree.java b/java/src/main/java/org/softwareheritage/graph/experiments/topology/InOutDegree.java
index c7370d2..e3f67a2 100644
--- a/java/src/main/java/org/softwareheritage/graph/experiments/topology/InOutDegree.java
+++ b/java/src/main/java/org/softwareheritage/graph/experiments/topology/InOutDegree.java
@@ -1,239 +1,239 @@
package org.softwareheritage.graph.experiments.topology;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.lang.reflect.InvocationTargetException;
import java.util.*;
import com.martiansoftware.jsap.JSAP;
import com.martiansoftware.jsap.JSAPException;
import com.martiansoftware.jsap.JSAPResult;
import com.martiansoftware.jsap.Parameter;
import com.martiansoftware.jsap.SimpleJSAP;
import com.martiansoftware.jsap.UnflaggedOption;
import it.unimi.dsi.logging.ProgressLogger;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.Node;
public class InOutDegree {
private InOutDegree() {
}
private static final int NODE_ARRAY_SIZE = Node.Type.values().length + 1;
private static final int TYPE_ALL = Node.Type.values().length;
private static final int TYPE_CNT = Node.Type.toInt(Node.Type.CNT);
private static final int TYPE_DIR = Node.Type.toInt(Node.Type.DIR);
private static final int TYPE_REV = Node.Type.toInt(Node.Type.REV);
private static final int TYPE_REL = Node.Type.toInt(Node.Type.REL);
private static final int TYPE_SNP = Node.Type.toInt(Node.Type.SNP);
private static final int TYPE_ORI = Node.Type.toInt(Node.Type.ORI);
public static long[] outdegreeTypes(final Graph graph, long node) {
long[] out = new long[NODE_ARRAY_SIZE];
var successors = graph.successors(node);
long neighbor;
while ((neighbor = successors.nextLong()) != -1) {
out[Node.Type.toInt(graph.getNodeType(neighbor))]++;
out[TYPE_ALL]++;
}
return out;
}
public static long[] indegreeTypes(final Graph graph, long node) {
return outdegreeTypes(graph.transpose(), node);
}
public static void writeDistribution(HashMap