Changeset View
Changeset View
Standalone View
Standalone View
java/src/main/java/org/softwareheritage/graph/benchmark/ForkCC.java
package org.softwareheritage.graph.benchmark; | package org.softwareheritage.graph.benchmark; | ||||
import com.google.common.primitives.Longs; | import com.google.common.primitives.Longs; | ||||
import com.martiansoftware.jsap.*; | import com.martiansoftware.jsap.*; | ||||
import it.unimi.dsi.big.webgraph.ImmutableGraph; | |||||
import it.unimi.dsi.big.webgraph.LazyLongIterator; | import it.unimi.dsi.big.webgraph.LazyLongIterator; | ||||
import it.unimi.dsi.big.webgraph.Transform; | |||||
import it.unimi.dsi.bits.LongArrayBitVector; | import it.unimi.dsi.bits.LongArrayBitVector; | ||||
import it.unimi.dsi.fastutil.Arrays; | import it.unimi.dsi.fastutil.Arrays; | ||||
import it.unimi.dsi.io.ByteDiskQueue; | import it.unimi.dsi.io.ByteDiskQueue; | ||||
import it.unimi.dsi.logging.ProgressLogger; | import it.unimi.dsi.logging.ProgressLogger; | ||||
import org.softwareheritage.graph.Graph; | import org.softwareheritage.graph.Graph; | ||||
import org.softwareheritage.graph.Node; | import org.softwareheritage.graph.Node; | ||||
import java.io.File; | import java.io.File; | ||||
import java.io.FileNotFoundException; | import java.io.FileNotFoundException; | ||||
import java.io.IOException; | import java.io.IOException; | ||||
import java.util.*; | import java.util.*; | ||||
public class ForkCC { | public class ForkCC { | ||||
public Boolean includeRootDir; | public Boolean includeRootDir; | ||||
private Graph graph; | private Graph graph; | ||||
private Long emptySnapshot; | private Long emptySnapshot; | ||||
private LongArrayBitVector visited; | private LongArrayBitVector visited; | ||||
private LongArrayBitVector whitelist; | private LongArrayBitVector whitelist; | ||||
private void load_graph(String graphBasename) throws IOException { | private void load_graph(String graphBasename) throws IOException { | ||||
System.err.println("Loading graph " + graphBasename + " ..."); | System.err.println("Loading graph " + graphBasename + " ..."); | ||||
this.graph = new Graph(graphBasename); | this.graph = new Graph(graphBasename).symmetrize(); | ||||
System.err.println("Graph loaded."); | System.err.println("Graph loaded."); | ||||
this.emptySnapshot = null; | this.emptySnapshot = null; | ||||
this.whitelist = null; | this.whitelist = null; | ||||
this.visited = null; | this.visited = null; | ||||
this.includeRootDir = null; | this.includeRootDir = null; | ||||
} | } | ||||
private static JSAPResult parse_args(String[] args) { | private static JSAPResult parse_args(String[] args) { | ||||
▲ Show 20 Lines • Show All 44 Lines • ▼ Show 20 Lines | private Boolean shouldVisit(Long node){ | ||||
if (this.nodeIsEmptySnapshot(node)) | if (this.nodeIsEmptySnapshot(node)) | ||||
return false; | return false; | ||||
if (visited.getBoolean(node)) | if (visited.getBoolean(node)) | ||||
return false; | return false; | ||||
return true; | return true; | ||||
} | } | ||||
private ArrayList<ArrayList<Long>> compute(final ImmutableGraph graph, ProgressLogger pl) throws IOException { | private ArrayList<ArrayList<Long>> compute(ProgressLogger pl) throws IOException { | ||||
final long n = graph.numNodes(); | final long n = graph.numNodes(); | ||||
// Allow enough memory to behave like in-memory queue | // Allow enough memory to behave like in-memory queue | ||||
int bufferSize = (int)Math.min(Arrays.MAX_ARRAY_SIZE & ~0x7, 8L * n); | int bufferSize = (int)Math.min(Arrays.MAX_ARRAY_SIZE & ~0x7, 8L * n); | ||||
// Use a disk based queue to store BFS frontier | // Use a disk based queue to store BFS frontier | ||||
final File queueFile = File.createTempFile(BFS.class.getSimpleName(), "queue"); | final File queueFile = File.createTempFile(BFS.class.getSimpleName(), "queue"); | ||||
final ByteDiskQueue queue = ByteDiskQueue.createNew(queueFile, bufferSize, true); | final ByteDiskQueue queue = ByteDiskQueue.createNew(queueFile, bufferSize, true); | ||||
final byte[] byteBuf = new byte[Long.BYTES]; | final byte[] byteBuf = new byte[Long.BYTES]; | ||||
▲ Show 20 Lines • Show All 67 Lines • ▼ Show 20 Lines | private static void printLargestComponent(ArrayList<ArrayList<Long>> components) { | ||||
ArrayList<Long> component = components.get(indexLargest); | ArrayList<Long> component = components.get(indexLargest); | ||||
for (Long node : component) { | for (Long node : component) { | ||||
System.out.println(node); | System.out.println(node); | ||||
} | } | ||||
} | } | ||||
private void parseWhitelist(String path) { | private void parseWhitelist(String path) { | ||||
System.err.println("Loading whitelist " + path + " ..."); | System.err.println("Loading whitelist " + path + " ..."); | ||||
this.whitelist = LongArrayBitVector.ofLength(this.graph.getNbNodes()); | this.whitelist = LongArrayBitVector.ofLength(this.graph.numNodes()); | ||||
Scanner scanner = null; | Scanner scanner = null; | ||||
try { | try { | ||||
scanner = new Scanner(new File(path)); | scanner = new Scanner(new File(path)); | ||||
while(scanner.hasNextLong()) { | while(scanner.hasNextLong()) { | ||||
whitelist.set(scanner.nextLong()); | whitelist.set(scanner.nextLong()); | ||||
} | } | ||||
System.err.println("Whitelist loaded."); | System.err.println("Whitelist loaded."); | ||||
} catch (FileNotFoundException e) { | } catch (FileNotFoundException e) { | ||||
Show All 17 Lines | public static void main(String[] args) { | ||||
System.out.println("Could not load graph: " + e); | System.out.println("Could not load graph: " + e); | ||||
System.exit(2); | System.exit(2); | ||||
} | } | ||||
if (whitelistPath != null) { | if (whitelistPath != null) { | ||||
forkCc.parseWhitelist(whitelistPath); | forkCc.parseWhitelist(whitelistPath); | ||||
} | } | ||||
ImmutableGraph symmetric = Transform.union( | |||||
forkCc.graph.getBVGraph(false), | |||||
forkCc.graph.getBVGraph(true) | |||||
); | |||||
ProgressLogger logger = new ProgressLogger(); | ProgressLogger logger = new ProgressLogger(); | ||||
try { | try { | ||||
ArrayList<ArrayList<Long>> components = forkCc.compute(symmetric, logger); | ArrayList<ArrayList<Long>> components = forkCc.compute(logger); | ||||
printDistribution(components); | printDistribution(components); | ||||
// printLargestComponent(components); | // printLargestComponent(components); | ||||
} catch (IOException e) { | } catch (IOException e) { | ||||
e.printStackTrace(); | e.printStackTrace(); | ||||
} | } | ||||
logger.done(); | logger.done(); | ||||
} | } | ||||
} | } |