Changeset View
Changeset View
Standalone View
Standalone View
dockerfiles/scripts/compress_graph.sh
Show First 20 Lines • Show All 47 Lines • ▼ Show 20 Lines | |||||
fi | fi | ||||
dataset=$(basename $graph_path) | dataset=$(basename $graph_path) | ||||
compr_graph_path="${out_dir}/${dataset}" | compr_graph_path="${out_dir}/${dataset}" | ||||
test -d "$out_dir" || mkdir -p "$out_dir" | test -d "$out_dir" || mkdir -p "$out_dir" | ||||
test -d "$tmp_dir" || mkdir -p "$tmp_dir" | test -d "$tmp_dir" || mkdir -p "$tmp_dir" | ||||
step_info() { | |||||
echo -e "\n* swh-graph: $1 step... ($2)\n" | |||||
} | |||||
java_cmd () { | java_cmd () { | ||||
/usr/bin/time -v java -cp $lib_dir/'*' $* | /usr/bin/time -v java -cp $lib_dir/'*' $* | ||||
} | } | ||||
{ | { | ||||
# Build a function (MPH) that maps node names to node numbers in | # Build a function (MPH) that maps node names to node numbers in | ||||
# lexicographic order (output: .mph) | # lexicographic order (output: .mph) | ||||
step_info "MPH" "1/6" && | |||||
java_cmd it.unimi.dsi.sux4j.mph.GOVMinimalPerfectHashFunction \ | java_cmd it.unimi.dsi.sux4j.mph.GOVMinimalPerfectHashFunction \ | ||||
--zipped $compr_graph_path.mph --temp-dir $tmp_dir \ | --zipped $compr_graph_path.mph --temp-dir $tmp_dir \ | ||||
$graph_path.nodes.csv.gz && | $graph_path.nodes.csv.gz && | ||||
# Build the graph in BVGraph format (output: .{graph,offsets,properties}) | # Build the graph in BVGraph format (output: .{graph,offsets,properties}) | ||||
step_info "BV compress" "2/6" && | |||||
java_cmd it.unimi.dsi.big.webgraph.ScatteredArcsASCIIGraph \ | java_cmd it.unimi.dsi.big.webgraph.ScatteredArcsASCIIGraph \ | ||||
--function $compr_graph_path.mph --temp-dir $tmp_dir \ | --function $compr_graph_path.mph --temp-dir $tmp_dir \ | ||||
--zipped $compr_graph_path-bv < $graph_path.edges.csv.gz && | --zipped $compr_graph_path-bv < $graph_path.edges.csv.gz && | ||||
# Build the offset big-list file to load the graph faster (output: .obl) | # Build the offset big-list file to load the graph faster (output: .obl) | ||||
java_cmd it.unimi.dsi.big.webgraph.BVGraph \ | java_cmd it.unimi.dsi.big.webgraph.BVGraph \ | ||||
--list $compr_graph_path-bv && | --list $compr_graph_path-bv && | ||||
# Find a better permutation using a BFS traversal order (output: .order) | # Find a better permutation using a BFS traversal order (output: .order) | ||||
step_info "BFS" "3/6" && | |||||
java_cmd it.unimi.dsi.law.big.graph.BFS \ | java_cmd it.unimi.dsi.law.big.graph.BFS \ | ||||
$compr_graph_path-bv $compr_graph_path.order && | $compr_graph_path-bv $compr_graph_path.order && | ||||
# Permute the graph accordingly | # Permute the graph accordingly | ||||
step_info "Permute" "4/6" && | |||||
java_cmd it.unimi.dsi.big.webgraph.Transform mapOffline \ | java_cmd it.unimi.dsi.big.webgraph.Transform mapOffline \ | ||||
$compr_graph_path-bv $compr_graph_path \ | $compr_graph_path-bv $compr_graph_path \ | ||||
$compr_graph_path.order $batch_size $tmp_dir && | $compr_graph_path.order $batch_size $tmp_dir && | ||||
java_cmd it.unimi.dsi.big.webgraph.BVGraph \ | java_cmd it.unimi.dsi.big.webgraph.BVGraph \ | ||||
--list $compr_graph_path && | --list $compr_graph_path && | ||||
# Compute graph statistics (output: .{indegree,outdegree,stats}) | # Compute graph statistics (output: .{indegree,outdegree,stats}) | ||||
step_info "Stats" "5/6" && | |||||
java_cmd it.unimi.dsi.big.webgraph.Stats $compr_graph_path && | java_cmd it.unimi.dsi.big.webgraph.Stats $compr_graph_path && | ||||
# Create transposed graph (to allow backward traversal) | # Create transposed graph (to allow backward traversal) | ||||
step_info "Transpose" "6/6" && | |||||
java_cmd it.unimi.dsi.big.webgraph.Transform transposeOffline \ | java_cmd it.unimi.dsi.big.webgraph.Transform transposeOffline \ | ||||
$compr_graph_path $compr_graph_path-transposed \ | $compr_graph_path $compr_graph_path-transposed \ | ||||
$batch_size $tmp_dir && | $batch_size $tmp_dir && | ||||
java_cmd it.unimi.dsi.big.webgraph.BVGraph \ | java_cmd it.unimi.dsi.big.webgraph.BVGraph \ | ||||
--list $compr_graph_path-transposed | --list $compr_graph_path-transposed | ||||
} > $stdout_file 2> $stderr_file | } > $stdout_file 2> $stderr_file | ||||
if [[ $? -eq 0 ]]; then | if [[ $? -eq 0 ]]; then | ||||
echo "Graph compression done." | echo "Graph compression done." | ||||
else | else | ||||
echo "Graph compression failed: see $stderr_file for more info." | echo "Graph compression failed: see $stderr_file for more info." | ||||
exit 1 | exit 1 | ||||
fi | fi |