Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F8322666
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
21 KB
Subscribers
None
View Options
diff --git a/java/src/main/java/org/softwareheritage/graph/Graph.java b/java/src/main/java/org/softwareheritage/graph/Graph.java
index ad44a54..92b4cfa 100644
--- a/java/src/main/java/org/softwareheritage/graph/Graph.java
+++ b/java/src/main/java/org/softwareheritage/graph/Graph.java
@@ -1,257 +1,310 @@
package org.softwareheritage.graph;
import it.unimi.dsi.big.webgraph.ImmutableGraph;
import it.unimi.dsi.big.webgraph.LazyLongIterator;
import it.unimi.dsi.big.webgraph.Transform;
+import it.unimi.dsi.logging.ProgressLogger;
import org.softwareheritage.graph.maps.NodeIdMap;
import org.softwareheritage.graph.maps.NodeTypesMap;
import java.io.IOException;
/**
* Main class storing the compressed graph and node id mappings.
* <p>
* The compressed graph is stored using the <a href="http://webgraph.di.unimi.it/">WebGraph</a>
* ecosystem. Additional mappings are necessary because Software Heritage uses string based <a href=
* "https://docs.softwareheritage.org/devel/swh-model/persistent-identifiers.html">persistent
* identifiers</a> (SWHID) while WebGraph uses integers internally. These two mappings (long id
* ↔ SWHID) are used for the input (users refer to the graph using SWHID) and the output
* (convert back to SWHID for users results). However, since graph traversal can be restricted
* depending on the node type (see {@link AllowedEdges}), a long id → node type map is stored
* as well to avoid a full SWHID lookup.
*
* @author The Software Heritage developers
* @see org.softwareheritage.graph.AllowedEdges
* @see org.softwareheritage.graph.maps.NodeIdMap
* @see org.softwareheritage.graph.maps.NodeTypesMap
*/
public class Graph extends ImmutableGraph {
/** File extension for the SWHID to long node id map */
public static final String SWHID_TO_NODE = ".swhid2node.bin";
/** File extension for the long node id to SWHID map */
public static final String NODE_TO_SWHID = ".node2swhid.bin";
/** File extension for the long node id to node type map */
public static final String NODE_TO_TYPE = ".node2type.map";
/** Compressed graph stored as a {@link it.unimi.dsi.big.webgraph.BVGraph} */
ImmutableGraph graph;
/** Transposed compressed graph (used for backward traversals) */
ImmutableGraph graphTransposed;
/** Path and basename of the compressed graph */
String path;
/** Mapping long id ↔ SWHIDs */
NodeIdMap nodeIdMap;
/** Mapping long id → node types */
NodeTypesMap nodeTypesMap;
/**
* Constructor.
*
* @param path path and basename of the compressed graph to load
*/
+
public Graph(String path) throws IOException {
+ loadInternal(path, null, LoadMethod.MAPPED);
+ }
+
+ /**
+ * Loading mechanisms
+ */
+
+ enum LoadMethod {
+ MEMORY, MAPPED, OFFLINE,
+ }
+
+ protected Graph loadInternal(String path, ProgressLogger pl, LoadMethod method) throws IOException {
this.path = path;
- this.graph = ImmutableGraph.loadMapped(path);
- this.graphTransposed = ImmutableGraph.loadMapped(path + "-transposed");
+ if (method == LoadMethod.MEMORY) {
+ this.graph = ImmutableGraph.load(path, pl);
+ this.graphTransposed = ImmutableGraph.load(path + "-transposed", pl);
+ } else if (method == LoadMethod.MAPPED) {
+ this.graph = ImmutableGraph.loadMapped(path, pl);
+ this.graphTransposed = ImmutableGraph.loadMapped(path + "-transposed", pl);
+ } else if (method == LoadMethod.OFFLINE) {
+ this.graph = ImmutableGraph.loadOffline(path, pl);
+ this.graphTransposed = ImmutableGraph.loadOffline(path + "-transposed", pl);
+ }
this.nodeTypesMap = new NodeTypesMap(path);
this.nodeIdMap = new NodeIdMap(path, numNodes());
+ return this;
+ }
+
+ protected Graph() {
+ }
+
+ public Graph load(String Path, ProgressLogger pl) throws IOException {
+ return new Graph().loadInternal(path, pl, LoadMethod.MEMORY);
}
+ public Graph loadMapped(String Path, ProgressLogger pl) throws IOException {
+ return new Graph().loadInternal(path, pl, LoadMethod.MAPPED);
+ }
+
+ public Graph loadOffline(String Path, ProgressLogger pl) throws IOException {
+ return new Graph().loadInternal(path, null, LoadMethod.OFFLINE);
+ }
+
+ public Graph load(String Path) throws IOException {
+ return new Graph().loadInternal(path, null, LoadMethod.MEMORY);
+ }
+
+ public Graph loadMapped(String Path) throws IOException {
+ return new Graph().loadInternal(path, null, LoadMethod.MAPPED);
+ }
+
+ public Graph loadOffline(String Path) throws IOException {
+ return new Graph().loadInternal(path, null, LoadMethod.OFFLINE);
+ }
+
+ /**
+ * Constructor used for copy()
+ */
protected Graph(ImmutableGraph graph, ImmutableGraph graphTransposed, String path, NodeIdMap nodeIdMap,
NodeTypesMap nodeTypesMap) {
this.graph = graph;
this.graphTransposed = graphTransposed;
this.path = path;
this.nodeIdMap = nodeIdMap;
this.nodeTypesMap = nodeTypesMap;
}
/**
* Return a flyweight copy of the graph.
*/
@Override
public Graph copy() {
return new Graph(this.graph.copy(), this.graphTransposed.copy(), this.path, this.nodeIdMap, this.nodeTypesMap);
}
@Override
public boolean randomAccess() {
return graph.randomAccess() && graphTransposed.randomAccess();
}
/**
* Return a transposed version of the graph.
*/
public Graph transpose() {
return new Graph(this.graphTransposed, this.graph, this.path, this.nodeIdMap, this.nodeTypesMap);
}
/**
* Return a symmetric version of the graph.
*/
public Graph symmetrize() {
ImmutableGraph symmetric = Transform.union(graph, graphTransposed);
return new Graph(symmetric, symmetric, this.path, this.nodeIdMap, this.nodeTypesMap);
}
/**
* Cleans up graph resources after use.
*/
public void cleanUp() throws IOException {
nodeIdMap.close();
}
/**
* Returns number of nodes in the graph.
*
* @return number of nodes in the graph
*/
@Override
public long numNodes() {
return graph.numNodes();
}
/**
* Returns number of edges in the graph.
*
* @return number of edges in the graph
*/
@Override
public long numArcs() {
return graph.numArcs();
}
/**
* Returns lazy iterator of successors of a node.
*
* @param nodeId node specified as a long id
* @return lazy iterator of successors of the node, specified as a
* <a href="http://webgraph.di.unimi.it/">WebGraph</a> LazyLongIterator
*/
@Override
public LazyLongIterator successors(long nodeId) {
return graph.successors(nodeId);
}
/**
* Returns lazy iterator of successors of a node while following a specific set of edge types.
*
* @param nodeId node specified as a long id
* @param allowedEdges the specification of which edges can be traversed
* @return lazy iterator of successors of the node, specified as a
* <a href="http://webgraph.di.unimi.it/">WebGraph</a> LazyLongIterator
*/
public LazyLongIterator successors(long nodeId, AllowedEdges allowedEdges) {
if (allowedEdges.restrictedTo == null) {
// All edges are allowed, bypass edge check
return this.successors(nodeId);
} else {
LazyLongIterator allSuccessors = this.successors(nodeId);
Graph thisGraph = this;
return new LazyLongIterator() {
@Override
public long nextLong() {
long neighbor;
while ((neighbor = allSuccessors.nextLong()) != -1) {
if (allowedEdges.isAllowed(thisGraph.getNodeType(nodeId), thisGraph.getNodeType(neighbor))) {
return neighbor;
}
}
return -1;
}
@Override
public long skip(final long n) {
long i;
for (i = 0; i < n && nextLong() != -1; i++)
;
return i;
}
};
}
}
/**
* Returns the outdegree of a node.
*
* @param nodeId node specified as a long id
* @return outdegree of a node
*/
@Override
public long outdegree(long nodeId) {
return graph.outdegree(nodeId);
}
/**
* Returns lazy iterator of predecessors of a node.
*
* @param nodeId node specified as a long id
* @return lazy iterator of predecessors of the node, specified as a
* <a href="http://webgraph.di.unimi.it/">WebGraph</a> LazyLongIterator
*/
public LazyLongIterator predecessors(long nodeId) {
return this.transpose().successors(nodeId);
}
/**
* Returns the indegree of a node.
*
* @param nodeId node specified as a long id
* @return indegree of a node
*/
public long indegree(long nodeId) {
return this.transpose().outdegree(nodeId);
}
/**
* Returns the underlying BVGraph.
*
* @return WebGraph BVGraph
*/
public ImmutableGraph getGraph() {
return this.graph;
}
/**
* Returns the graph full path.
*
* @return graph full path
*/
public String getPath() {
return path;
}
/**
* Converts {@link SWHID} node to long.
*
* @param swhid node specified as a {@link SWHID}
* @return internal long node id
* @see SWHID
*/
public long getNodeId(SWHID swhid) {
return nodeIdMap.getNodeId(swhid);
}
/**
* Converts long id node to {@link SWHID}.
*
* @param nodeId node specified as a long id
* @return external SWHID
* @see SWHID
*/
public SWHID getSWHID(long nodeId) {
return nodeIdMap.getSWHID(nodeId);
}
/**
* Returns node type.
*
* @param nodeId node specified as a long id
* @return corresponding node type
* @see org.softwareheritage.graph.Node.Type
*/
public Node.Type getNodeType(long nodeId) {
return nodeTypesMap.getType(nodeId);
}
}
diff --git a/java/src/main/java/org/softwareheritage/graph/experiments/topology/InOutDegree.java b/java/src/main/java/org/softwareheritage/graph/experiments/topology/InOutDegree.java
index c7370d2..e3f67a2 100644
--- a/java/src/main/java/org/softwareheritage/graph/experiments/topology/InOutDegree.java
+++ b/java/src/main/java/org/softwareheritage/graph/experiments/topology/InOutDegree.java
@@ -1,239 +1,239 @@
package org.softwareheritage.graph.experiments.topology;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.lang.reflect.InvocationTargetException;
import java.util.*;
import com.martiansoftware.jsap.JSAP;
import com.martiansoftware.jsap.JSAPException;
import com.martiansoftware.jsap.JSAPResult;
import com.martiansoftware.jsap.Parameter;
import com.martiansoftware.jsap.SimpleJSAP;
import com.martiansoftware.jsap.UnflaggedOption;
import it.unimi.dsi.logging.ProgressLogger;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.Node;
public class InOutDegree {
private InOutDegree() {
}
private static final int NODE_ARRAY_SIZE = Node.Type.values().length + 1;
private static final int TYPE_ALL = Node.Type.values().length;
private static final int TYPE_CNT = Node.Type.toInt(Node.Type.CNT);
private static final int TYPE_DIR = Node.Type.toInt(Node.Type.DIR);
private static final int TYPE_REV = Node.Type.toInt(Node.Type.REV);
private static final int TYPE_REL = Node.Type.toInt(Node.Type.REL);
private static final int TYPE_SNP = Node.Type.toInt(Node.Type.SNP);
private static final int TYPE_ORI = Node.Type.toInt(Node.Type.ORI);
public static long[] outdegreeTypes(final Graph graph, long node) {
long[] out = new long[NODE_ARRAY_SIZE];
var successors = graph.successors(node);
long neighbor;
while ((neighbor = successors.nextLong()) != -1) {
out[Node.Type.toInt(graph.getNodeType(neighbor))]++;
out[TYPE_ALL]++;
}
return out;
}
public static long[] indegreeTypes(final Graph graph, long node) {
return outdegreeTypes(graph.transpose(), node);
}
public static void writeDistribution(HashMap<Long, Long> distribution, String filename) throws IOException {
PrintWriter f = new PrintWriter(new FileWriter(filename));
TreeMap<Long, Long> sortedDistribution = new TreeMap<>(distribution);
for (Map.Entry<Long, Long> entry : sortedDistribution.entrySet()) {
f.println(entry.getKey() + " " + entry.getValue());
}
f.close();
}
public static void run(final Graph graph, String resultsDir) throws IOException {
// Per-type
var cnt_in_dir = new HashMap<Long, Long>();
var dir_in_dir = new HashMap<Long, Long>();
var dir_in_rev = new HashMap<Long, Long>();
var dir_in_all = new HashMap<Long, Long>();
var dir_out_all = new HashMap<Long, Long>();
var dir_out_dir = new HashMap<Long, Long>();
var dir_out_cnt = new HashMap<Long, Long>();
var dir_out_rev = new HashMap<Long, Long>();
var rev_in_dir = new HashMap<Long, Long>();
var rev_in_rel = new HashMap<Long, Long>();
var rev_in_rev = new HashMap<Long, Long>();
var rev_in_snp = new HashMap<Long, Long>();
var rev_in_all = new HashMap<Long, Long>();
var rev_out_rev = new HashMap<Long, Long>();
var rel_in_snp = new HashMap<Long, Long>();
var snp_in_ori = new HashMap<Long, Long>();
var snp_out_all = new HashMap<Long, Long>();
var snp_out_rel = new HashMap<Long, Long>();
var snp_out_rev = new HashMap<Long, Long>();
var ori_out_snp = new HashMap<Long, Long>();
// Aggregated per layer
var full_in = new HashMap<Long, Long>();
var full_out = new HashMap<Long, Long>();
var dircnt_in = new HashMap<Long, Long>();
var dircnt_out = new HashMap<Long, Long>();
var orisnp_in = new HashMap<Long, Long>();
var orisnp_out = new HashMap<Long, Long>();
var relrev_in = new HashMap<Long, Long>();
var relrev_out = new HashMap<Long, Long>();
var rev_in = rev_in_rev; // alias for single-type layer
var rev_out = rev_out_rev;
final ProgressLogger pl = new ProgressLogger();
pl.itemsName = "nodes";
pl.expectedUpdates = graph.numNodes();
pl.start("Scanning...");
long[] in;
long[] out;
for (long i = graph.numNodes(); i-- != 0;) {
long d_in = graph.indegree(i);
long d_out = graph.outdegree(i);
full_in.merge(d_in, 1L, Long::sum);
full_out.merge(d_out, 1L, Long::sum);
switch (graph.getNodeType(i)) {
case CNT:
cnt_in_dir.merge(d_in, 1L, Long::sum);
dircnt_in.merge(d_in, 1L, Long::sum);
dircnt_out.merge(0L, 1L, Long::sum);
break;
case DIR:
in = indegreeTypes(graph, i);
out = outdegreeTypes(graph, i);
dir_in_all.merge(in[TYPE_ALL], 1L, Long::sum);
dir_out_all.merge(out[TYPE_ALL], 1L, Long::sum);
dir_in_dir.merge(in[TYPE_DIR], 1L, Long::sum);
dir_in_rev.merge(in[TYPE_REV], 1L, Long::sum);
dir_out_cnt.merge(out[TYPE_CNT], 1L, Long::sum);
dir_out_dir.merge(out[TYPE_DIR], 1L, Long::sum);
dir_out_rev.merge(out[TYPE_REV], 1L, Long::sum);
dircnt_in.merge(in[TYPE_DIR] + in[TYPE_CNT], 1L, Long::sum);
dircnt_out.merge(out[TYPE_DIR] + out[TYPE_CNT], 1L, Long::sum);
break;
case REV:
in = indegreeTypes(graph, i);
out = outdegreeTypes(graph, i);
rev_in_all.merge(in[TYPE_ALL], 1L, Long::sum);
rev_in_dir.merge(in[TYPE_DIR], 1L, Long::sum);
rev_in_rev.merge(in[TYPE_REV], 1L, Long::sum);
rev_in_rel.merge(in[TYPE_REL], 1L, Long::sum);
rev_in_snp.merge(in[TYPE_SNP], 1L, Long::sum);
rev_out_rev.merge(out[TYPE_REV], 1L, Long::sum);
relrev_in.merge(in[TYPE_REL] + in[TYPE_REV], 1L, Long::sum);
relrev_out.merge(out[TYPE_REL] + out[TYPE_REV], 1L, Long::sum);
break;
case REL:
rel_in_snp.merge(d_in, 1L, Long::sum);
relrev_in.merge(0L, 1L, Long::sum);
relrev_out.merge(d_out, 1L, Long::sum);
break;
case SNP:
out = outdegreeTypes(graph, i);
snp_in_ori.merge(d_in, 1L, Long::sum);
snp_out_all.merge(out[TYPE_ALL], 1L, Long::sum);
snp_out_rel.merge(out[TYPE_REL], 1L, Long::sum);
snp_out_rev.merge(out[TYPE_REV], 1L, Long::sum);
orisnp_in.merge(d_in, 1L, Long::sum);
orisnp_out.merge(out[TYPE_REL] + out[TYPE_REV], 1L, Long::sum);
break;
case ORI:
ori_out_snp.merge(d_out, 1L, Long::sum);
orisnp_in.merge(0L, 1L, Long::sum);
orisnp_out.merge(d_out, 1L, Long::sum);
break;
- default:
+ default :
pl.logger().warn("Invalid node type at pos {}", i);
break;
}
pl.update();
}
pl.done();
(new File(resultsDir)).mkdir();
writeDistribution(full_in, resultsDir + "/full_in.txt");
writeDistribution(full_out, resultsDir + "/full_out.txt");
writeDistribution(dircnt_in, resultsDir + "/dir+cnt_in.txt");
writeDistribution(dircnt_out, resultsDir + "/dir+cnt_out.txt");
writeDistribution(relrev_in, resultsDir + "/rel+rev_in.txt");
writeDistribution(relrev_out, resultsDir + "/rel+rev_out.txt");
writeDistribution(orisnp_in, resultsDir + "/ori+snp_in.txt");
writeDistribution(orisnp_out, resultsDir + "/ori+snp_out.txt");
writeDistribution(rev_in, resultsDir + "/rev_in.txt");
writeDistribution(rev_out, resultsDir + "/rev_out.txt");
String resultsTypeDir = resultsDir + "/per_type";
(new File(resultsTypeDir)).mkdir();
writeDistribution(cnt_in_dir, resultsTypeDir + "/cnt_in_dir.txt");
writeDistribution(dir_in_dir, resultsTypeDir + "/dir_in_dir.txt");
writeDistribution(dir_in_rev, resultsTypeDir + "/dir_in_rev.txt");
writeDistribution(dir_in_all, resultsTypeDir + "/dir_in_all.txt");
writeDistribution(dir_out_all, resultsTypeDir + "/dir_out_all.txt");
writeDistribution(dir_out_dir, resultsTypeDir + "/dir_out_dir.txt");
writeDistribution(dir_out_cnt, resultsTypeDir + "/dir_out_cnt.txt");
writeDistribution(dir_out_rev, resultsTypeDir + "/dir_out_rev.txt");
writeDistribution(rev_in_dir, resultsTypeDir + "/rev_in_dir.txt");
writeDistribution(rev_in_rel, resultsTypeDir + "/rev_in_rel.txt");
writeDistribution(rev_in_rev, resultsTypeDir + "/rev_in_rev.txt");
writeDistribution(rev_in_snp, resultsTypeDir + "/rev_in_snp.txt");
writeDistribution(rev_in_all, resultsTypeDir + "/rev_in_all.txt");
writeDistribution(rev_out_rev, resultsTypeDir + "/rev_out_rev.txt");
writeDistribution(rel_in_snp, resultsTypeDir + "/rel_in_snp.txt");
writeDistribution(snp_in_ori, resultsTypeDir + "/snp_in_ori.txt");
writeDistribution(snp_out_all, resultsTypeDir + "/snp_out_all.txt");
writeDistribution(snp_out_rel, resultsTypeDir + "/snp_out_rel.txt");
writeDistribution(snp_out_rev, resultsTypeDir + "/snp_out_rev.txt");
writeDistribution(ori_out_snp, resultsTypeDir + "/ori_out_snp.txt");
}
static public void main(final String[] arg)
throws IllegalArgumentException, SecurityException, IllegalAccessException, InvocationTargetException,
NoSuchMethodException, JSAPException, IOException, ClassNotFoundException {
final SimpleJSAP jsap = new SimpleJSAP(InOutDegree.class.getName(),
"Computes in and out degrees of the given SWHGraph",
new Parameter[]{
new UnflaggedOption("basename", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED,
JSAP.NOT_GREEDY, "The basename of the graph."),
new UnflaggedOption("resultsDir", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED,
JSAP.NOT_GREEDY, "The directory of the resulting files."),});
final JSAPResult jsapResult = jsap.parse(arg);
if (jsap.messagePrinted())
System.exit(1);
final String basename = jsapResult.getString("basename");
final String resultsDir = jsapResult.userSpecified("resultsDir")
? jsapResult.getString("resultsDir")
: basename;
final ProgressLogger pl = new ProgressLogger();
Graph graph = new Graph(basename);
run(graph, resultsDir);
}
}
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Tue, Jun 3, 7:41 AM (6 d, 9 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3284624
Attached To
rDGRPH Compressed graph representation
Event Timeline
Log In to Comment