Differential D4028 Diff 14692 java/src/main/java/org/softwareheritage/graph/experiments/topology/ConnectedComponents.java
Changeset View
Changeset View
Standalone View
Standalone View
java/src/main/java/org/softwareheritage/graph/experiments/topology/ConnectedComponents.java
package org.softwareheritage.graph.experiments.topology; | package org.softwareheritage.graph.experiments.topology; | ||||
import com.google.common.primitives.Longs; | import com.google.common.primitives.Longs; | ||||
import com.martiansoftware.jsap.*; | import com.martiansoftware.jsap.*; | ||||
import it.unimi.dsi.big.webgraph.ImmutableGraph; | |||||
import it.unimi.dsi.big.webgraph.LazyLongIterator; | import it.unimi.dsi.big.webgraph.LazyLongIterator; | ||||
import it.unimi.dsi.big.webgraph.Transform; | |||||
import it.unimi.dsi.bits.LongArrayBitVector; | import it.unimi.dsi.bits.LongArrayBitVector; | ||||
import it.unimi.dsi.fastutil.Arrays; | import it.unimi.dsi.fastutil.Arrays; | ||||
import it.unimi.dsi.io.ByteDiskQueue; | import it.unimi.dsi.io.ByteDiskQueue; | ||||
import it.unimi.dsi.logging.ProgressLogger; | import it.unimi.dsi.logging.ProgressLogger; | ||||
import org.softwareheritage.graph.Graph; | import org.softwareheritage.graph.Graph; | ||||
import java.io.File; | import java.io.File; | ||||
import java.io.FileWriter; | import java.io.FileWriter; | ||||
import java.io.IOException; | import java.io.IOException; | ||||
import java.io.PrintWriter; | import java.io.PrintWriter; | ||||
import java.util.*; | import java.util.*; | ||||
public class ConnectedComponents { | public class ConnectedComponents { | ||||
private Graph graph; | private Graph graph; | ||||
private void load_graph(String graphBasename) throws IOException { | private void load_graph(String graphBasename) throws IOException { | ||||
System.err.println("Loading graph " + graphBasename + " ..."); | System.err.println("Loading graph " + graphBasename + " ..."); | ||||
this.graph = new Graph(graphBasename).symmetrize(); | this.graph = new Graph(graphBasename).symmetrize(); | ||||
System.err.println("Graph loaded."); | System.err.println("Graph loaded."); | ||||
} | } | ||||
private static JSAPResult parse_args(String[] args) { | private static JSAPResult parse_args(String[] args) { | ||||
JSAPResult config = null; | JSAPResult config = null; | ||||
try { | try { | ||||
SimpleJSAP jsap = new SimpleJSAP( | SimpleJSAP jsap = new SimpleJSAP(ConnectedComponents.class.getName(), "", | ||||
ConnectedComponents.class.getName(), | |||||
"", | |||||
new Parameter[] { | new Parameter[]{ | ||||
new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, | new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'g', | ||||
'g', "graph", "Basename of the compressed graph"), | "graph", "Basename of the compressed graph"), | ||||
new FlaggedOption("outdir", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, | new FlaggedOption("outdir", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'o', | ||||
'o', "outdir", "Directory where to put the results"), | "outdir", "Directory where to put the results"),}); | ||||
} | |||||
); | |||||
config = jsap.parse(args); | config = jsap.parse(args); | ||||
if (jsap.messagePrinted()) { | if (jsap.messagePrinted()) { | ||||
System.exit(1); | System.exit(1); | ||||
} | } | ||||
} catch (JSAPException e) { | } catch (JSAPException e) { | ||||
e.printStackTrace(); | e.printStackTrace(); | ||||
} | } | ||||
return config; | return config; | ||||
} | } | ||||
private HashMap<Long, Long> /*ArrayList<ArrayList<Long>>*/ compute(ProgressLogger pl) throws IOException { | private HashMap<Long, Long> /* ArrayList<ArrayList<Long>> */ compute(ProgressLogger pl) throws IOException { | ||||
final long n = graph.numNodes(); | final long n = graph.numNodes(); | ||||
// Allow enough memory to behave like in-memory queue | // Allow enough memory to behave like in-memory queue | ||||
int bufferSize = (int)Math.min(Arrays.MAX_ARRAY_SIZE & ~0x7, 8L * n); | int bufferSize = (int) Math.min(Arrays.MAX_ARRAY_SIZE & ~0x7, 8L * n); | ||||
// Use a disk based queue to store BFS frontier | // Use a disk based queue to store BFS frontier | ||||
final File queueFile = File.createTempFile(ConnectedComponents.class.getSimpleName(), "queue"); | final File queueFile = File.createTempFile(ConnectedComponents.class.getSimpleName(), "queue"); | ||||
final ByteDiskQueue queue = ByteDiskQueue.createNew(queueFile, bufferSize, true); | final ByteDiskQueue queue = ByteDiskQueue.createNew(queueFile, bufferSize, true); | ||||
final byte[] byteBuf = new byte[Long.BYTES]; | final byte[] byteBuf = new byte[Long.BYTES]; | ||||
// WARNING: no 64-bit version of this data-structure, but it can support | // WARNING: no 64-bit version of this data-structure, but it can support | ||||
// indices up to 2^37 | // indices up to 2^37 | ||||
LongArrayBitVector visited = LongArrayBitVector.ofLength(n); | LongArrayBitVector visited = LongArrayBitVector.ofLength(n); | ||||
Show All 26 Lines | private HashMap<Long, Long> /* ArrayList<ArrayList<Long>> */ compute(ProgressLogger pl) throws IOException { | ||||
visited.set(succ); | visited.set(succ); | ||||
queue.enqueue(Longs.toByteArray(succ)); | queue.enqueue(Longs.toByteArray(succ)); | ||||
} | } | ||||
pl.update(); | pl.update(); | ||||
} | } | ||||
/* | /* | ||||
if (component.size() > 0) { | * if (component.size() > 0) { components.add(component); } | ||||
components.add(component); | |||||
} | |||||
*/ | */ | ||||
if (componentNodes > 0) | if (componentNodes > 0) | ||||
componentDistribution.merge(componentNodes, 1L, Long::sum); | componentDistribution.merge(componentNodes, 1L, Long::sum); | ||||
} | } | ||||
pl.done(); | pl.done(); | ||||
// return components; | // return components; | ||||
return componentDistribution; | return componentDistribution; | ||||
} | } | ||||
▲ Show 20 Lines • Show All 44 Lines • ▼ Show 20 Lines | public static void main(String[] args) { | ||||
try { | try { | ||||
connectedComponents.load_graph(graphPath); | connectedComponents.load_graph(graphPath); | ||||
} catch (IOException e) { | } catch (IOException e) { | ||||
System.out.println("Could not load graph: " + e); | System.out.println("Could not load graph: " + e); | ||||
System.exit(2); | System.exit(2); | ||||
} | } | ||||
ProgressLogger logger = new ProgressLogger(); | ProgressLogger logger = new ProgressLogger(); | ||||
//noinspection ResultOfMethodCallIgnored | // noinspection ResultOfMethodCallIgnored | ||||
new File(outdirPath).mkdirs(); | new File(outdirPath).mkdirs(); | ||||
try { | try { | ||||
// ArrayList<ArrayList<Long>> components = connectedComponents.compute(logger); | // ArrayList<ArrayList<Long>> components = connectedComponents.compute(logger); | ||||
// components.sort(Comparator.comparing(ArrayList<Long>::size).reversed()); | // components.sort(Comparator.comparing(ArrayList<Long>::size).reversed()); | ||||
// printDistribution(components, new Formatter(outdirPath + "/distribution.txt")); | // printDistribution(components, new Formatter(outdirPath + "/distribution.txt")); | ||||
// printLargestComponent(components, new Formatter(outdirPath + "/largest_component.txt")); | // printLargestComponent(components, new Formatter(outdirPath + "/largest_component.txt")); | ||||
// printAllComponents(components, new Formatter(outdirPath + "/all_components.txt")); | // printAllComponents(components, new Formatter(outdirPath + "/all_components.txt")); | ||||
Show All 15 Lines |