diff --git a/java/src/main/java/org/softwareheritage/graph/Graph.java b/java/src/main/java/org/softwareheritage/graph/Graph.java index ad44a54..92b4cfa 100644 --- a/java/src/main/java/org/softwareheritage/graph/Graph.java +++ b/java/src/main/java/org/softwareheritage/graph/Graph.java @@ -1,257 +1,310 @@ package org.softwareheritage.graph; import it.unimi.dsi.big.webgraph.ImmutableGraph; import it.unimi.dsi.big.webgraph.LazyLongIterator; import it.unimi.dsi.big.webgraph.Transform; +import it.unimi.dsi.logging.ProgressLogger; import org.softwareheritage.graph.maps.NodeIdMap; import org.softwareheritage.graph.maps.NodeTypesMap; import java.io.IOException; /** * Main class storing the compressed graph and node id mappings. *

* The compressed graph is stored using the WebGraph * ecosystem. Additional mappings are necessary because Software Heritage uses string based persistent * identifiers (SWHID) while WebGraph uses integers internally. These two mappings (long id * ↔ SWHID) are used for the input (users refer to the graph using SWHID) and the output * (convert back to SWHID for users results). However, since graph traversal can be restricted * depending on the node type (see {@link AllowedEdges}), a long id → node type map is stored * as well to avoid a full SWHID lookup. * * @author The Software Heritage developers * @see org.softwareheritage.graph.AllowedEdges * @see org.softwareheritage.graph.maps.NodeIdMap * @see org.softwareheritage.graph.maps.NodeTypesMap */ public class Graph extends ImmutableGraph { /** File extension for the SWHID to long node id map */ public static final String SWHID_TO_NODE = ".swhid2node.bin"; /** File extension for the long node id to SWHID map */ public static final String NODE_TO_SWHID = ".node2swhid.bin"; /** File extension for the long node id to node type map */ public static final String NODE_TO_TYPE = ".node2type.map"; /** Compressed graph stored as a {@link it.unimi.dsi.big.webgraph.BVGraph} */ ImmutableGraph graph; /** Transposed compressed graph (used for backward traversals) */ ImmutableGraph graphTransposed; /** Path and basename of the compressed graph */ String path; /** Mapping long id ↔ SWHIDs */ NodeIdMap nodeIdMap; /** Mapping long id → node types */ NodeTypesMap nodeTypesMap; /** * Constructor. * * @param path path and basename of the compressed graph to load */ + public Graph(String path) throws IOException { + loadInternal(path, null, LoadMethod.MAPPED); + } + + /** + * Loading mechanisms + */ + + enum LoadMethod { + MEMORY, MAPPED, OFFLINE, + } + + protected Graph loadInternal(String path, ProgressLogger pl, LoadMethod method) throws IOException { this.path = path; - this.graph = ImmutableGraph.loadMapped(path); - this.graphTransposed = ImmutableGraph.loadMapped(path + "-transposed"); + if (method == LoadMethod.MEMORY) { + this.graph = ImmutableGraph.load(path, pl); + this.graphTransposed = ImmutableGraph.load(path + "-transposed", pl); + } else if (method == LoadMethod.MAPPED) { + this.graph = ImmutableGraph.loadMapped(path, pl); + this.graphTransposed = ImmutableGraph.loadMapped(path + "-transposed", pl); + } else if (method == LoadMethod.OFFLINE) { + this.graph = ImmutableGraph.loadOffline(path, pl); + this.graphTransposed = ImmutableGraph.loadOffline(path + "-transposed", pl); + } this.nodeTypesMap = new NodeTypesMap(path); this.nodeIdMap = new NodeIdMap(path, numNodes()); + return this; + } + + protected Graph() { + } + + public Graph load(String Path, ProgressLogger pl) throws IOException { + return new Graph().loadInternal(path, pl, LoadMethod.MEMORY); } + public Graph loadMapped(String Path, ProgressLogger pl) throws IOException { + return new Graph().loadInternal(path, pl, LoadMethod.MAPPED); + } + + public Graph loadOffline(String Path, ProgressLogger pl) throws IOException { + return new Graph().loadInternal(path, null, LoadMethod.OFFLINE); + } + + public Graph load(String Path) throws IOException { + return new Graph().loadInternal(path, null, LoadMethod.MEMORY); + } + + public Graph loadMapped(String Path) throws IOException { + return new Graph().loadInternal(path, null, LoadMethod.MAPPED); + } + + public Graph loadOffline(String Path) throws IOException { + return new Graph().loadInternal(path, null, LoadMethod.OFFLINE); + } + + /** + * Constructor used for copy() + */ protected Graph(ImmutableGraph graph, ImmutableGraph graphTransposed, String path, NodeIdMap nodeIdMap, NodeTypesMap nodeTypesMap) { this.graph = graph; this.graphTransposed = graphTransposed; this.path = path; this.nodeIdMap = nodeIdMap; this.nodeTypesMap = nodeTypesMap; } /** * Return a flyweight copy of the graph. */ @Override public Graph copy() { return new Graph(this.graph.copy(), this.graphTransposed.copy(), this.path, this.nodeIdMap, this.nodeTypesMap); } @Override public boolean randomAccess() { return graph.randomAccess() && graphTransposed.randomAccess(); } /** * Return a transposed version of the graph. */ public Graph transpose() { return new Graph(this.graphTransposed, this.graph, this.path, this.nodeIdMap, this.nodeTypesMap); } /** * Return a symmetric version of the graph. */ public Graph symmetrize() { ImmutableGraph symmetric = Transform.union(graph, graphTransposed); return new Graph(symmetric, symmetric, this.path, this.nodeIdMap, this.nodeTypesMap); } /** * Cleans up graph resources after use. */ public void cleanUp() throws IOException { nodeIdMap.close(); } /** * Returns number of nodes in the graph. * * @return number of nodes in the graph */ @Override public long numNodes() { return graph.numNodes(); } /** * Returns number of edges in the graph. * * @return number of edges in the graph */ @Override public long numArcs() { return graph.numArcs(); } /** * Returns lazy iterator of successors of a node. * * @param nodeId node specified as a long id * @return lazy iterator of successors of the node, specified as a * WebGraph LazyLongIterator */ @Override public LazyLongIterator successors(long nodeId) { return graph.successors(nodeId); } /** * Returns lazy iterator of successors of a node while following a specific set of edge types. * * @param nodeId node specified as a long id * @param allowedEdges the specification of which edges can be traversed * @return lazy iterator of successors of the node, specified as a * WebGraph LazyLongIterator */ public LazyLongIterator successors(long nodeId, AllowedEdges allowedEdges) { if (allowedEdges.restrictedTo == null) { // All edges are allowed, bypass edge check return this.successors(nodeId); } else { LazyLongIterator allSuccessors = this.successors(nodeId); Graph thisGraph = this; return new LazyLongIterator() { @Override public long nextLong() { long neighbor; while ((neighbor = allSuccessors.nextLong()) != -1) { if (allowedEdges.isAllowed(thisGraph.getNodeType(nodeId), thisGraph.getNodeType(neighbor))) { return neighbor; } } return -1; } @Override public long skip(final long n) { long i; for (i = 0; i < n && nextLong() != -1; i++) ; return i; } }; } } /** * Returns the outdegree of a node. * * @param nodeId node specified as a long id * @return outdegree of a node */ @Override public long outdegree(long nodeId) { return graph.outdegree(nodeId); } /** * Returns lazy iterator of predecessors of a node. * * @param nodeId node specified as a long id * @return lazy iterator of predecessors of the node, specified as a * WebGraph LazyLongIterator */ public LazyLongIterator predecessors(long nodeId) { return this.transpose().successors(nodeId); } /** * Returns the indegree of a node. * * @param nodeId node specified as a long id * @return indegree of a node */ public long indegree(long nodeId) { return this.transpose().outdegree(nodeId); } /** * Returns the underlying BVGraph. * * @return WebGraph BVGraph */ public ImmutableGraph getGraph() { return this.graph; } /** * Returns the graph full path. * * @return graph full path */ public String getPath() { return path; } /** * Converts {@link SWHID} node to long. * * @param swhid node specified as a {@link SWHID} * @return internal long node id * @see SWHID */ public long getNodeId(SWHID swhid) { return nodeIdMap.getNodeId(swhid); } /** * Converts long id node to {@link SWHID}. * * @param nodeId node specified as a long id * @return external SWHID * @see SWHID */ public SWHID getSWHID(long nodeId) { return nodeIdMap.getSWHID(nodeId); } /** * Returns node type. * * @param nodeId node specified as a long id * @return corresponding node type * @see org.softwareheritage.graph.Node.Type */ public Node.Type getNodeType(long nodeId) { return nodeTypesMap.getType(nodeId); } } diff --git a/java/src/main/java/org/softwareheritage/graph/experiments/topology/InOutDegree.java b/java/src/main/java/org/softwareheritage/graph/experiments/topology/InOutDegree.java index c7370d2..e3f67a2 100644 --- a/java/src/main/java/org/softwareheritage/graph/experiments/topology/InOutDegree.java +++ b/java/src/main/java/org/softwareheritage/graph/experiments/topology/InOutDegree.java @@ -1,239 +1,239 @@ package org.softwareheritage.graph.experiments.topology; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.io.PrintWriter; import java.lang.reflect.InvocationTargetException; import java.util.*; import com.martiansoftware.jsap.JSAP; import com.martiansoftware.jsap.JSAPException; import com.martiansoftware.jsap.JSAPResult; import com.martiansoftware.jsap.Parameter; import com.martiansoftware.jsap.SimpleJSAP; import com.martiansoftware.jsap.UnflaggedOption; import it.unimi.dsi.logging.ProgressLogger; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.Node; public class InOutDegree { private InOutDegree() { } private static final int NODE_ARRAY_SIZE = Node.Type.values().length + 1; private static final int TYPE_ALL = Node.Type.values().length; private static final int TYPE_CNT = Node.Type.toInt(Node.Type.CNT); private static final int TYPE_DIR = Node.Type.toInt(Node.Type.DIR); private static final int TYPE_REV = Node.Type.toInt(Node.Type.REV); private static final int TYPE_REL = Node.Type.toInt(Node.Type.REL); private static final int TYPE_SNP = Node.Type.toInt(Node.Type.SNP); private static final int TYPE_ORI = Node.Type.toInt(Node.Type.ORI); public static long[] outdegreeTypes(final Graph graph, long node) { long[] out = new long[NODE_ARRAY_SIZE]; var successors = graph.successors(node); long neighbor; while ((neighbor = successors.nextLong()) != -1) { out[Node.Type.toInt(graph.getNodeType(neighbor))]++; out[TYPE_ALL]++; } return out; } public static long[] indegreeTypes(final Graph graph, long node) { return outdegreeTypes(graph.transpose(), node); } public static void writeDistribution(HashMap distribution, String filename) throws IOException { PrintWriter f = new PrintWriter(new FileWriter(filename)); TreeMap sortedDistribution = new TreeMap<>(distribution); for (Map.Entry entry : sortedDistribution.entrySet()) { f.println(entry.getKey() + " " + entry.getValue()); } f.close(); } public static void run(final Graph graph, String resultsDir) throws IOException { // Per-type var cnt_in_dir = new HashMap(); var dir_in_dir = new HashMap(); var dir_in_rev = new HashMap(); var dir_in_all = new HashMap(); var dir_out_all = new HashMap(); var dir_out_dir = new HashMap(); var dir_out_cnt = new HashMap(); var dir_out_rev = new HashMap(); var rev_in_dir = new HashMap(); var rev_in_rel = new HashMap(); var rev_in_rev = new HashMap(); var rev_in_snp = new HashMap(); var rev_in_all = new HashMap(); var rev_out_rev = new HashMap(); var rel_in_snp = new HashMap(); var snp_in_ori = new HashMap(); var snp_out_all = new HashMap(); var snp_out_rel = new HashMap(); var snp_out_rev = new HashMap(); var ori_out_snp = new HashMap(); // Aggregated per layer var full_in = new HashMap(); var full_out = new HashMap(); var dircnt_in = new HashMap(); var dircnt_out = new HashMap(); var orisnp_in = new HashMap(); var orisnp_out = new HashMap(); var relrev_in = new HashMap(); var relrev_out = new HashMap(); var rev_in = rev_in_rev; // alias for single-type layer var rev_out = rev_out_rev; final ProgressLogger pl = new ProgressLogger(); pl.itemsName = "nodes"; pl.expectedUpdates = graph.numNodes(); pl.start("Scanning..."); long[] in; long[] out; for (long i = graph.numNodes(); i-- != 0;) { long d_in = graph.indegree(i); long d_out = graph.outdegree(i); full_in.merge(d_in, 1L, Long::sum); full_out.merge(d_out, 1L, Long::sum); switch (graph.getNodeType(i)) { case CNT: cnt_in_dir.merge(d_in, 1L, Long::sum); dircnt_in.merge(d_in, 1L, Long::sum); dircnt_out.merge(0L, 1L, Long::sum); break; case DIR: in = indegreeTypes(graph, i); out = outdegreeTypes(graph, i); dir_in_all.merge(in[TYPE_ALL], 1L, Long::sum); dir_out_all.merge(out[TYPE_ALL], 1L, Long::sum); dir_in_dir.merge(in[TYPE_DIR], 1L, Long::sum); dir_in_rev.merge(in[TYPE_REV], 1L, Long::sum); dir_out_cnt.merge(out[TYPE_CNT], 1L, Long::sum); dir_out_dir.merge(out[TYPE_DIR], 1L, Long::sum); dir_out_rev.merge(out[TYPE_REV], 1L, Long::sum); dircnt_in.merge(in[TYPE_DIR] + in[TYPE_CNT], 1L, Long::sum); dircnt_out.merge(out[TYPE_DIR] + out[TYPE_CNT], 1L, Long::sum); break; case REV: in = indegreeTypes(graph, i); out = outdegreeTypes(graph, i); rev_in_all.merge(in[TYPE_ALL], 1L, Long::sum); rev_in_dir.merge(in[TYPE_DIR], 1L, Long::sum); rev_in_rev.merge(in[TYPE_REV], 1L, Long::sum); rev_in_rel.merge(in[TYPE_REL], 1L, Long::sum); rev_in_snp.merge(in[TYPE_SNP], 1L, Long::sum); rev_out_rev.merge(out[TYPE_REV], 1L, Long::sum); relrev_in.merge(in[TYPE_REL] + in[TYPE_REV], 1L, Long::sum); relrev_out.merge(out[TYPE_REL] + out[TYPE_REV], 1L, Long::sum); break; case REL: rel_in_snp.merge(d_in, 1L, Long::sum); relrev_in.merge(0L, 1L, Long::sum); relrev_out.merge(d_out, 1L, Long::sum); break; case SNP: out = outdegreeTypes(graph, i); snp_in_ori.merge(d_in, 1L, Long::sum); snp_out_all.merge(out[TYPE_ALL], 1L, Long::sum); snp_out_rel.merge(out[TYPE_REL], 1L, Long::sum); snp_out_rev.merge(out[TYPE_REV], 1L, Long::sum); orisnp_in.merge(d_in, 1L, Long::sum); orisnp_out.merge(out[TYPE_REL] + out[TYPE_REV], 1L, Long::sum); break; case ORI: ori_out_snp.merge(d_out, 1L, Long::sum); orisnp_in.merge(0L, 1L, Long::sum); orisnp_out.merge(d_out, 1L, Long::sum); break; - default: + default : pl.logger().warn("Invalid node type at pos {}", i); break; } pl.update(); } pl.done(); (new File(resultsDir)).mkdir(); writeDistribution(full_in, resultsDir + "/full_in.txt"); writeDistribution(full_out, resultsDir + "/full_out.txt"); writeDistribution(dircnt_in, resultsDir + "/dir+cnt_in.txt"); writeDistribution(dircnt_out, resultsDir + "/dir+cnt_out.txt"); writeDistribution(relrev_in, resultsDir + "/rel+rev_in.txt"); writeDistribution(relrev_out, resultsDir + "/rel+rev_out.txt"); writeDistribution(orisnp_in, resultsDir + "/ori+snp_in.txt"); writeDistribution(orisnp_out, resultsDir + "/ori+snp_out.txt"); writeDistribution(rev_in, resultsDir + "/rev_in.txt"); writeDistribution(rev_out, resultsDir + "/rev_out.txt"); String resultsTypeDir = resultsDir + "/per_type"; (new File(resultsTypeDir)).mkdir(); writeDistribution(cnt_in_dir, resultsTypeDir + "/cnt_in_dir.txt"); writeDistribution(dir_in_dir, resultsTypeDir + "/dir_in_dir.txt"); writeDistribution(dir_in_rev, resultsTypeDir + "/dir_in_rev.txt"); writeDistribution(dir_in_all, resultsTypeDir + "/dir_in_all.txt"); writeDistribution(dir_out_all, resultsTypeDir + "/dir_out_all.txt"); writeDistribution(dir_out_dir, resultsTypeDir + "/dir_out_dir.txt"); writeDistribution(dir_out_cnt, resultsTypeDir + "/dir_out_cnt.txt"); writeDistribution(dir_out_rev, resultsTypeDir + "/dir_out_rev.txt"); writeDistribution(rev_in_dir, resultsTypeDir + "/rev_in_dir.txt"); writeDistribution(rev_in_rel, resultsTypeDir + "/rev_in_rel.txt"); writeDistribution(rev_in_rev, resultsTypeDir + "/rev_in_rev.txt"); writeDistribution(rev_in_snp, resultsTypeDir + "/rev_in_snp.txt"); writeDistribution(rev_in_all, resultsTypeDir + "/rev_in_all.txt"); writeDistribution(rev_out_rev, resultsTypeDir + "/rev_out_rev.txt"); writeDistribution(rel_in_snp, resultsTypeDir + "/rel_in_snp.txt"); writeDistribution(snp_in_ori, resultsTypeDir + "/snp_in_ori.txt"); writeDistribution(snp_out_all, resultsTypeDir + "/snp_out_all.txt"); writeDistribution(snp_out_rel, resultsTypeDir + "/snp_out_rel.txt"); writeDistribution(snp_out_rev, resultsTypeDir + "/snp_out_rev.txt"); writeDistribution(ori_out_snp, resultsTypeDir + "/ori_out_snp.txt"); } static public void main(final String[] arg) throws IllegalArgumentException, SecurityException, IllegalAccessException, InvocationTargetException, NoSuchMethodException, JSAPException, IOException, ClassNotFoundException { final SimpleJSAP jsap = new SimpleJSAP(InOutDegree.class.getName(), "Computes in and out degrees of the given SWHGraph", new Parameter[]{ new UnflaggedOption("basename", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The basename of the graph."), new UnflaggedOption("resultsDir", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, JSAP.NOT_GREEDY, "The directory of the resulting files."),}); final JSAPResult jsapResult = jsap.parse(arg); if (jsap.messagePrinted()) System.exit(1); final String basename = jsapResult.getString("basename"); final String resultsDir = jsapResult.userSpecified("resultsDir") ? jsapResult.getString("resultsDir") : basename; final ProgressLogger pl = new ProgressLogger(); Graph graph = new Graph(basename); run(graph, resultsDir); } }