Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9123124
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
2 KB
Subscribers
None
View Options
diff --git a/compression/compress_graph.sh b/compression/compress_graph.sh
index f0c4635..ca9751c 100755
--- a/compression/compress_graph.sh
+++ b/compression/compress_graph.sh
@@ -1,61 +1,65 @@
#!/bin/bash
if [ "$#" -ne 2 ]; then
echo "Expected two arguments: <input graph> <output dir>"
exit -1
fi
INPUT_GRAPH=$1
OUTPUT_DIR=$2
DATASET=$(basename $INPUT_GRAPH)
COMPR_GRAPH="$OUTPUT_DIR/$DATASET"
+TEMP_DIR="$OUTPUT_DIR/tmp"
java_cmd () {
/usr/bin/time -v java \
-Xmx1024G -server -XX:PretenureSizeThreshold=512M -XX:MaxNewSize=4G \
-XX:+UseLargePages -XX:+UseTransparentHugePages -XX:+UseNUMA \
-XX:+UseTLAB -XX:+ResizeTLAB \
-cp /app/'*' $*
}
llp_ordering () {
# Create a symmetrized version of the graph
# (output: .{graph,offsets,properties})
java_cmd it.unimi.dsi.big.webgraph.Transform symmetrizeOffline \
$COMPR_GRAPH-bv $COMPR_GRAPH-bv-sym
java_cmd it.unimi.dsi.big.webgraph.BVGraph --list $COMPR_GRAPH-bv-sym
# Find a better permutation through Layered LPA
# WARNING: no 64-bit version of LLP
java_cmd it.unimi.dsi.law.graph.LayeredLabelPropagation \
--longs $COMPR_GRAPH-bv-sym $COMPR_GRAPH.order
}
bfs_ordering () {
- java_cmd it.unimi.dsi.law.graph.BFS $COMPR_GRAPH-bv $COMPR_GRAPH.order
+ java_cmd it.unimi.dsi.law.graph.BFSBig $COMPR_GRAPH-bv $COMPR_GRAPH.order
}
mkdir -p $OUTPUT_DIR
+mkdir -p $TEMP_DIR
# Build a function (MPH) that maps node names to node numbers in lexicographic
# order (output: .mph)
-java_cmd it.unimi.dsi.sux4j.mph.GOVMinimalPerfectHashFunction \
- --zipped $COMPR_GRAPH.mph $INPUT_GRAPH.nodes.csv.gz
+java_cmd it.unimi.dsi.sux4j.mph.GOVMinimalPerfectHashFunction \
+ --zipped $COMPR_GRAPH.mph --temp-dir $TEMP_DIR \
+ $INPUT_GRAPH.nodes.csv.gz
# Build the graph in BVGraph format (output: .{graph,offsets,properties})
java_cmd it.unimi.dsi.big.webgraph.ScatteredArcsASCIIGraph \
- --function $COMPR_GRAPH.mph \
+ --function $COMPR_GRAPH.mph --temp-dir $TEMP_DIR \
--zipped $COMPR_GRAPH-bv < $INPUT_GRAPH.edges.csv.gz
# Build the offset big-list file to load the graph faster (output: .obl)
java_cmd it.unimi.dsi.big.webgraph.BVGraph --list $COMPR_GRAPH-bv
# Find a better permutation
bfs_ordering
# Permute the graph accordingly
+BATCH_SIZE=1000000000
java_cmd it.unimi.dsi.big.webgraph.Transform mapOffline \
- $COMPR_GRAPH-bv $COMPR_GRAPH $COMPR_GRAPH.order
+ $COMPR_GRAPH-bv $COMPR_GRAPH $COMPR_GRAPH.order $BATCH_SIZE
java_cmd it.unimi.dsi.big.webgraph.BVGraph --list $COMPR_GRAPH
# Compute graph statistics (output: .{indegree,outdegree,stats})
java_cmd it.unimi.dsi.big.webgraph.Stats $COMPR_GRAPH
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Sat, Jun 21, 5:05 PM (1 w, 6 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3282987
Attached To
rDGRPH Compressed graph representation
Event Timeline
Log In to Comment