Page MenuHomeSoftware Heritage

No OneTemporary

diff --git a/java/src/main/java/org/softwareheritage/graph/Graph.java b/java/src/main/java/org/softwareheritage/graph/Graph.java
index ad44a54..92b4cfa 100644
--- a/java/src/main/java/org/softwareheritage/graph/Graph.java
+++ b/java/src/main/java/org/softwareheritage/graph/Graph.java
@@ -1,257 +1,310 @@
package org.softwareheritage.graph;
import it.unimi.dsi.big.webgraph.ImmutableGraph;
import it.unimi.dsi.big.webgraph.LazyLongIterator;
import it.unimi.dsi.big.webgraph.Transform;
+import it.unimi.dsi.logging.ProgressLogger;
import org.softwareheritage.graph.maps.NodeIdMap;
import org.softwareheritage.graph.maps.NodeTypesMap;
import java.io.IOException;
/**
* Main class storing the compressed graph and node id mappings.
* <p>
* The compressed graph is stored using the <a href="http://webgraph.di.unimi.it/">WebGraph</a>
* ecosystem. Additional mappings are necessary because Software Heritage uses string based <a href=
* "https://docs.softwareheritage.org/devel/swh-model/persistent-identifiers.html">persistent
* identifiers</a> (SWHID) while WebGraph uses integers internally. These two mappings (long id
* &harr; SWHID) are used for the input (users refer to the graph using SWHID) and the output
* (convert back to SWHID for users results). However, since graph traversal can be restricted
* depending on the node type (see {@link AllowedEdges}), a long id &rarr; node type map is stored
* as well to avoid a full SWHID lookup.
*
* @author The Software Heritage developers
* @see org.softwareheritage.graph.AllowedEdges
* @see org.softwareheritage.graph.maps.NodeIdMap
* @see org.softwareheritage.graph.maps.NodeTypesMap
*/
public class Graph extends ImmutableGraph {
/** File extension for the SWHID to long node id map */
public static final String SWHID_TO_NODE = ".swhid2node.bin";
/** File extension for the long node id to SWHID map */
public static final String NODE_TO_SWHID = ".node2swhid.bin";
/** File extension for the long node id to node type map */
public static final String NODE_TO_TYPE = ".node2type.map";
/** Compressed graph stored as a {@link it.unimi.dsi.big.webgraph.BVGraph} */
ImmutableGraph graph;
/** Transposed compressed graph (used for backward traversals) */
ImmutableGraph graphTransposed;
/** Path and basename of the compressed graph */
String path;
/** Mapping long id &harr; SWHIDs */
NodeIdMap nodeIdMap;
/** Mapping long id &rarr; node types */
NodeTypesMap nodeTypesMap;
/**
* Constructor.
*
* @param path path and basename of the compressed graph to load
*/
+
public Graph(String path) throws IOException {
+ loadInternal(path, null, LoadMethod.MAPPED);
+ }
+
+ /**
+ * Loading mechanisms
+ */
+
+ enum LoadMethod {
+ MEMORY, MAPPED, OFFLINE,
+ }
+
+ protected Graph loadInternal(String path, ProgressLogger pl, LoadMethod method) throws IOException {
this.path = path;
- this.graph = ImmutableGraph.loadMapped(path);
- this.graphTransposed = ImmutableGraph.loadMapped(path + "-transposed");
+ if (method == LoadMethod.MEMORY) {
+ this.graph = ImmutableGraph.load(path, pl);
+ this.graphTransposed = ImmutableGraph.load(path + "-transposed", pl);
+ } else if (method == LoadMethod.MAPPED) {
+ this.graph = ImmutableGraph.loadMapped(path, pl);
+ this.graphTransposed = ImmutableGraph.loadMapped(path + "-transposed", pl);
+ } else if (method == LoadMethod.OFFLINE) {
+ this.graph = ImmutableGraph.loadOffline(path, pl);
+ this.graphTransposed = ImmutableGraph.loadOffline(path + "-transposed", pl);
+ }
this.nodeTypesMap = new NodeTypesMap(path);
this.nodeIdMap = new NodeIdMap(path, numNodes());
+ return this;
+ }
+
+ protected Graph() {
+ }
+
+ public Graph load(String Path, ProgressLogger pl) throws IOException {
+ return new Graph().loadInternal(path, pl, LoadMethod.MEMORY);
}
+ public Graph loadMapped(String Path, ProgressLogger pl) throws IOException {
+ return new Graph().loadInternal(path, pl, LoadMethod.MAPPED);
+ }
+
+ public Graph loadOffline(String Path, ProgressLogger pl) throws IOException {
+ return new Graph().loadInternal(path, null, LoadMethod.OFFLINE);
+ }
+
+ public Graph load(String Path) throws IOException {
+ return new Graph().loadInternal(path, null, LoadMethod.MEMORY);
+ }
+
+ public Graph loadMapped(String Path) throws IOException {
+ return new Graph().loadInternal(path, null, LoadMethod.MAPPED);
+ }
+
+ public Graph loadOffline(String Path) throws IOException {
+ return new Graph().loadInternal(path, null, LoadMethod.OFFLINE);
+ }
+
+ /**
+ * Constructor used for copy()
+ */
protected Graph(ImmutableGraph graph, ImmutableGraph graphTransposed, String path, NodeIdMap nodeIdMap,
NodeTypesMap nodeTypesMap) {
this.graph = graph;
this.graphTransposed = graphTransposed;
this.path = path;
this.nodeIdMap = nodeIdMap;
this.nodeTypesMap = nodeTypesMap;
}
/**
* Return a flyweight copy of the graph.
*/
@Override
public Graph copy() {
return new Graph(this.graph.copy(), this.graphTransposed.copy(), this.path, this.nodeIdMap, this.nodeTypesMap);
}
@Override
public boolean randomAccess() {
return graph.randomAccess() && graphTransposed.randomAccess();
}
/**
* Return a transposed version of the graph.
*/
public Graph transpose() {
return new Graph(this.graphTransposed, this.graph, this.path, this.nodeIdMap, this.nodeTypesMap);
}
/**
* Return a symmetric version of the graph.
*/
public Graph symmetrize() {
ImmutableGraph symmetric = Transform.union(graph, graphTransposed);
return new Graph(symmetric, symmetric, this.path, this.nodeIdMap, this.nodeTypesMap);
}
/**
* Cleans up graph resources after use.
*/
public void cleanUp() throws IOException {
nodeIdMap.close();
}
/**
* Returns number of nodes in the graph.
*
* @return number of nodes in the graph
*/
@Override
public long numNodes() {
return graph.numNodes();
}
/**
* Returns number of edges in the graph.
*
* @return number of edges in the graph
*/
@Override
public long numArcs() {
return graph.numArcs();
}
/**
* Returns lazy iterator of successors of a node.
*
* @param nodeId node specified as a long id
* @return lazy iterator of successors of the node, specified as a
* <a href="http://webgraph.di.unimi.it/">WebGraph</a> LazyLongIterator
*/
@Override
public LazyLongIterator successors(long nodeId) {
return graph.successors(nodeId);
}
/**
* Returns lazy iterator of successors of a node while following a specific set of edge types.
*
* @param nodeId node specified as a long id
* @param allowedEdges the specification of which edges can be traversed
* @return lazy iterator of successors of the node, specified as a
* <a href="http://webgraph.di.unimi.it/">WebGraph</a> LazyLongIterator
*/
public LazyLongIterator successors(long nodeId, AllowedEdges allowedEdges) {
if (allowedEdges.restrictedTo == null) {
// All edges are allowed, bypass edge check
return this.successors(nodeId);
} else {
LazyLongIterator allSuccessors = this.successors(nodeId);
Graph thisGraph = this;
return new LazyLongIterator() {
@Override
public long nextLong() {
long neighbor;
while ((neighbor = allSuccessors.nextLong()) != -1) {
if (allowedEdges.isAllowed(thisGraph.getNodeType(nodeId), thisGraph.getNodeType(neighbor))) {
return neighbor;
}
}
return -1;
}
@Override
public long skip(final long n) {
long i;
for (i = 0; i < n && nextLong() != -1; i++)
;
return i;
}
};
}
}
/**
* Returns the outdegree of a node.
*
* @param nodeId node specified as a long id
* @return outdegree of a node
*/
@Override
public long outdegree(long nodeId) {
return graph.outdegree(nodeId);
}
/**
* Returns lazy iterator of predecessors of a node.
*
* @param nodeId node specified as a long id
* @return lazy iterator of predecessors of the node, specified as a
* <a href="http://webgraph.di.unimi.it/">WebGraph</a> LazyLongIterator
*/
public LazyLongIterator predecessors(long nodeId) {
return this.transpose().successors(nodeId);
}
/**
* Returns the indegree of a node.
*
* @param nodeId node specified as a long id
* @return indegree of a node
*/
public long indegree(long nodeId) {
return this.transpose().outdegree(nodeId);
}
/**
* Returns the underlying BVGraph.
*
* @return WebGraph BVGraph
*/
public ImmutableGraph getGraph() {
return this.graph;
}
/**
* Returns the graph full path.
*
* @return graph full path
*/
public String getPath() {
return path;
}
/**
* Converts {@link SWHID} node to long.
*
* @param swhid node specified as a {@link SWHID}
* @return internal long node id
* @see SWHID
*/
public long getNodeId(SWHID swhid) {
return nodeIdMap.getNodeId(swhid);
}
/**
* Converts long id node to {@link SWHID}.
*
* @param nodeId node specified as a long id
* @return external SWHID
* @see SWHID
*/
public SWHID getSWHID(long nodeId) {
return nodeIdMap.getSWHID(nodeId);
}
/**
* Returns node type.
*
* @param nodeId node specified as a long id
* @return corresponding node type
* @see org.softwareheritage.graph.Node.Type
*/
public Node.Type getNodeType(long nodeId) {
return nodeTypesMap.getType(nodeId);
}
}
diff --git a/java/src/main/java/org/softwareheritage/graph/experiments/topology/InOutDegree.java b/java/src/main/java/org/softwareheritage/graph/experiments/topology/InOutDegree.java
index c7370d2..e3f67a2 100644
--- a/java/src/main/java/org/softwareheritage/graph/experiments/topology/InOutDegree.java
+++ b/java/src/main/java/org/softwareheritage/graph/experiments/topology/InOutDegree.java
@@ -1,239 +1,239 @@
package org.softwareheritage.graph.experiments.topology;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.lang.reflect.InvocationTargetException;
import java.util.*;
import com.martiansoftware.jsap.JSAP;
import com.martiansoftware.jsap.JSAPException;
import com.martiansoftware.jsap.JSAPResult;
import com.martiansoftware.jsap.Parameter;
import com.martiansoftware.jsap.SimpleJSAP;
import com.martiansoftware.jsap.UnflaggedOption;
import it.unimi.dsi.logging.ProgressLogger;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.Node;
public class InOutDegree {
private InOutDegree() {
}
private static final int NODE_ARRAY_SIZE = Node.Type.values().length + 1;
private static final int TYPE_ALL = Node.Type.values().length;
private static final int TYPE_CNT = Node.Type.toInt(Node.Type.CNT);
private static final int TYPE_DIR = Node.Type.toInt(Node.Type.DIR);
private static final int TYPE_REV = Node.Type.toInt(Node.Type.REV);
private static final int TYPE_REL = Node.Type.toInt(Node.Type.REL);
private static final int TYPE_SNP = Node.Type.toInt(Node.Type.SNP);
private static final int TYPE_ORI = Node.Type.toInt(Node.Type.ORI);
public static long[] outdegreeTypes(final Graph graph, long node) {
long[] out = new long[NODE_ARRAY_SIZE];
var successors = graph.successors(node);
long neighbor;
while ((neighbor = successors.nextLong()) != -1) {
out[Node.Type.toInt(graph.getNodeType(neighbor))]++;
out[TYPE_ALL]++;
}
return out;
}
public static long[] indegreeTypes(final Graph graph, long node) {
return outdegreeTypes(graph.transpose(), node);
}
public static void writeDistribution(HashMap<Long, Long> distribution, String filename) throws IOException {
PrintWriter f = new PrintWriter(new FileWriter(filename));
TreeMap<Long, Long> sortedDistribution = new TreeMap<>(distribution);
for (Map.Entry<Long, Long> entry : sortedDistribution.entrySet()) {
f.println(entry.getKey() + " " + entry.getValue());
}
f.close();
}
public static void run(final Graph graph, String resultsDir) throws IOException {
// Per-type
var cnt_in_dir = new HashMap<Long, Long>();
var dir_in_dir = new HashMap<Long, Long>();
var dir_in_rev = new HashMap<Long, Long>();
var dir_in_all = new HashMap<Long, Long>();
var dir_out_all = new HashMap<Long, Long>();
var dir_out_dir = new HashMap<Long, Long>();
var dir_out_cnt = new HashMap<Long, Long>();
var dir_out_rev = new HashMap<Long, Long>();
var rev_in_dir = new HashMap<Long, Long>();
var rev_in_rel = new HashMap<Long, Long>();
var rev_in_rev = new HashMap<Long, Long>();
var rev_in_snp = new HashMap<Long, Long>();
var rev_in_all = new HashMap<Long, Long>();
var rev_out_rev = new HashMap<Long, Long>();
var rel_in_snp = new HashMap<Long, Long>();
var snp_in_ori = new HashMap<Long, Long>();
var snp_out_all = new HashMap<Long, Long>();
var snp_out_rel = new HashMap<Long, Long>();
var snp_out_rev = new HashMap<Long, Long>();
var ori_out_snp = new HashMap<Long, Long>();
// Aggregated per layer
var full_in = new HashMap<Long, Long>();
var full_out = new HashMap<Long, Long>();
var dircnt_in = new HashMap<Long, Long>();
var dircnt_out = new HashMap<Long, Long>();
var orisnp_in = new HashMap<Long, Long>();
var orisnp_out = new HashMap<Long, Long>();
var relrev_in = new HashMap<Long, Long>();
var relrev_out = new HashMap<Long, Long>();
var rev_in = rev_in_rev; // alias for single-type layer
var rev_out = rev_out_rev;
final ProgressLogger pl = new ProgressLogger();
pl.itemsName = "nodes";
pl.expectedUpdates = graph.numNodes();
pl.start("Scanning...");
long[] in;
long[] out;
for (long i = graph.numNodes(); i-- != 0;) {
long d_in = graph.indegree(i);
long d_out = graph.outdegree(i);
full_in.merge(d_in, 1L, Long::sum);
full_out.merge(d_out, 1L, Long::sum);
switch (graph.getNodeType(i)) {
case CNT:
cnt_in_dir.merge(d_in, 1L, Long::sum);
dircnt_in.merge(d_in, 1L, Long::sum);
dircnt_out.merge(0L, 1L, Long::sum);
break;
case DIR:
in = indegreeTypes(graph, i);
out = outdegreeTypes(graph, i);
dir_in_all.merge(in[TYPE_ALL], 1L, Long::sum);
dir_out_all.merge(out[TYPE_ALL], 1L, Long::sum);
dir_in_dir.merge(in[TYPE_DIR], 1L, Long::sum);
dir_in_rev.merge(in[TYPE_REV], 1L, Long::sum);
dir_out_cnt.merge(out[TYPE_CNT], 1L, Long::sum);
dir_out_dir.merge(out[TYPE_DIR], 1L, Long::sum);
dir_out_rev.merge(out[TYPE_REV], 1L, Long::sum);
dircnt_in.merge(in[TYPE_DIR] + in[TYPE_CNT], 1L, Long::sum);
dircnt_out.merge(out[TYPE_DIR] + out[TYPE_CNT], 1L, Long::sum);
break;
case REV:
in = indegreeTypes(graph, i);
out = outdegreeTypes(graph, i);
rev_in_all.merge(in[TYPE_ALL], 1L, Long::sum);
rev_in_dir.merge(in[TYPE_DIR], 1L, Long::sum);
rev_in_rev.merge(in[TYPE_REV], 1L, Long::sum);
rev_in_rel.merge(in[TYPE_REL], 1L, Long::sum);
rev_in_snp.merge(in[TYPE_SNP], 1L, Long::sum);
rev_out_rev.merge(out[TYPE_REV], 1L, Long::sum);
relrev_in.merge(in[TYPE_REL] + in[TYPE_REV], 1L, Long::sum);
relrev_out.merge(out[TYPE_REL] + out[TYPE_REV], 1L, Long::sum);
break;
case REL:
rel_in_snp.merge(d_in, 1L, Long::sum);
relrev_in.merge(0L, 1L, Long::sum);
relrev_out.merge(d_out, 1L, Long::sum);
break;
case SNP:
out = outdegreeTypes(graph, i);
snp_in_ori.merge(d_in, 1L, Long::sum);
snp_out_all.merge(out[TYPE_ALL], 1L, Long::sum);
snp_out_rel.merge(out[TYPE_REL], 1L, Long::sum);
snp_out_rev.merge(out[TYPE_REV], 1L, Long::sum);
orisnp_in.merge(d_in, 1L, Long::sum);
orisnp_out.merge(out[TYPE_REL] + out[TYPE_REV], 1L, Long::sum);
break;
case ORI:
ori_out_snp.merge(d_out, 1L, Long::sum);
orisnp_in.merge(0L, 1L, Long::sum);
orisnp_out.merge(d_out, 1L, Long::sum);
break;
- default:
+ default :
pl.logger().warn("Invalid node type at pos {}", i);
break;
}
pl.update();
}
pl.done();
(new File(resultsDir)).mkdir();
writeDistribution(full_in, resultsDir + "/full_in.txt");
writeDistribution(full_out, resultsDir + "/full_out.txt");
writeDistribution(dircnt_in, resultsDir + "/dir+cnt_in.txt");
writeDistribution(dircnt_out, resultsDir + "/dir+cnt_out.txt");
writeDistribution(relrev_in, resultsDir + "/rel+rev_in.txt");
writeDistribution(relrev_out, resultsDir + "/rel+rev_out.txt");
writeDistribution(orisnp_in, resultsDir + "/ori+snp_in.txt");
writeDistribution(orisnp_out, resultsDir + "/ori+snp_out.txt");
writeDistribution(rev_in, resultsDir + "/rev_in.txt");
writeDistribution(rev_out, resultsDir + "/rev_out.txt");
String resultsTypeDir = resultsDir + "/per_type";
(new File(resultsTypeDir)).mkdir();
writeDistribution(cnt_in_dir, resultsTypeDir + "/cnt_in_dir.txt");
writeDistribution(dir_in_dir, resultsTypeDir + "/dir_in_dir.txt");
writeDistribution(dir_in_rev, resultsTypeDir + "/dir_in_rev.txt");
writeDistribution(dir_in_all, resultsTypeDir + "/dir_in_all.txt");
writeDistribution(dir_out_all, resultsTypeDir + "/dir_out_all.txt");
writeDistribution(dir_out_dir, resultsTypeDir + "/dir_out_dir.txt");
writeDistribution(dir_out_cnt, resultsTypeDir + "/dir_out_cnt.txt");
writeDistribution(dir_out_rev, resultsTypeDir + "/dir_out_rev.txt");
writeDistribution(rev_in_dir, resultsTypeDir + "/rev_in_dir.txt");
writeDistribution(rev_in_rel, resultsTypeDir + "/rev_in_rel.txt");
writeDistribution(rev_in_rev, resultsTypeDir + "/rev_in_rev.txt");
writeDistribution(rev_in_snp, resultsTypeDir + "/rev_in_snp.txt");
writeDistribution(rev_in_all, resultsTypeDir + "/rev_in_all.txt");
writeDistribution(rev_out_rev, resultsTypeDir + "/rev_out_rev.txt");
writeDistribution(rel_in_snp, resultsTypeDir + "/rel_in_snp.txt");
writeDistribution(snp_in_ori, resultsTypeDir + "/snp_in_ori.txt");
writeDistribution(snp_out_all, resultsTypeDir + "/snp_out_all.txt");
writeDistribution(snp_out_rel, resultsTypeDir + "/snp_out_rel.txt");
writeDistribution(snp_out_rev, resultsTypeDir + "/snp_out_rev.txt");
writeDistribution(ori_out_snp, resultsTypeDir + "/ori_out_snp.txt");
}
static public void main(final String[] arg)
throws IllegalArgumentException, SecurityException, IllegalAccessException, InvocationTargetException,
NoSuchMethodException, JSAPException, IOException, ClassNotFoundException {
final SimpleJSAP jsap = new SimpleJSAP(InOutDegree.class.getName(),
"Computes in and out degrees of the given SWHGraph",
new Parameter[]{
new UnflaggedOption("basename", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED,
JSAP.NOT_GREEDY, "The basename of the graph."),
new UnflaggedOption("resultsDir", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED,
JSAP.NOT_GREEDY, "The directory of the resulting files."),});
final JSAPResult jsapResult = jsap.parse(arg);
if (jsap.messagePrinted())
System.exit(1);
final String basename = jsapResult.getString("basename");
final String resultsDir = jsapResult.userSpecified("resultsDir")
? jsapResult.getString("resultsDir")
: basename;
final ProgressLogger pl = new ProgressLogger();
Graph graph = new Graph(basename);
run(graph, resultsDir);
}
}

File Metadata

Mime Type
text/x-diff
Expires
Tue, Jun 3, 7:41 AM (6 d, 3 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3284624

Event Timeline