Changeset View
Changeset View
Standalone View
Standalone View
dockerfiles/scripts/compress_graph.sh
#!/bin/bash | #!/bin/bash | ||||
usage() { | usage() { | ||||
echo "Usage: --input <graph path> --output <out dir> --lib <graph lib dir>" | echo "Usage: compress_graph.sh --lib <LIB_DIR> --input <GRAPH_BASEPATH>" | ||||
echo " options:" | echo "Options:" | ||||
echo " -t, --tmp <temporary dir> (default to /tmp/)" | echo " -o, --outdir <OUT_DIR> (Default: GRAPH_DIR/compressed)" | ||||
echo " --stdout <stdout file> (default to ./stdout)" | echo " -t, --tmp <TMP_DIR> (Default: OUT_DIR/tmp)" | ||||
echo " --stderr <stderr file> (default to ./stderr)" | echo " --stdout <STDOUT_LOG> (Default: OUT_DIR/stdout)" | ||||
echo " --batch-size <batch size> (default to 10^6): WebGraph internals" | echo " --stderr <STDERR_LOG> (Default: OUT_DIR/stderr)" | ||||
echo " --batch-size <BATCH_SIZE> (Default: 10^6): WebGraph internals" | |||||
exit 1 | exit 1 | ||||
} | } | ||||
graph_path="" | graph_path="" | ||||
out_dir="" | out_dir="" | ||||
lib_dir="" | lib_dir="" | ||||
tmp_dir="/tmp/" | stdout_file="" | ||||
stdout_file="stdout" | stderr_file="" | ||||
stderr_file="stderr" | |||||
batch_size=1000000 | batch_size=1000000 | ||||
while (( "$#" )); do | while (( "$#" )); do | ||||
case "$1" in | case "$1" in | ||||
-i|--input) shift; graph_path=$1;; | -i|--input) shift; graph_path=$1 ;; | ||||
-o|--output) shift; out_dir=$1;; | -o|--outdir) shift; out_dir=$1 ;; | ||||
-l|--lib) shift; lib_dir=$1;; | -l|--lib) shift; lib_dir=$1 ;; | ||||
-t|--tmp) shift; tmp_dir=$1;; | -t|--tmp) shift; tmp_dir=$1 ;; | ||||
--stdout) shift; stdout_file=$1;; | --stdout) shift; stdout_file=$1 ;; | ||||
--stderr) shift; stderr_file=$1;; | --stderr) shift; stderr_file=$1 ;; | ||||
--batch-size) shift; batch_size=$1;; | --batch-size) shift; batch_size=$1 ;; | ||||
*) usage;; | *) usage ;; | ||||
esac | esac | ||||
shift | shift | ||||
done | done | ||||
if [[ -z $graph_path || -z $out_dir || -z $lib_dir ]]; then | if [[ -z "$graph_path" || ! -d "$lib_dir" ]]; then | ||||
usage | usage | ||||
fi | fi | ||||
if [ -z "$out_dir" ] ; then | |||||
if [[ -f "$stdout_file" || -f "$stderr_file" ]]; then | out_dir="$(dirname $graph_path)/compressed" | ||||
echo "Cannot overwrite previous compression stdout/stderr files" | fi | ||||
exit 1 | if [ -z "$tmp_dir" ] ; then | ||||
tmp_dir="${out_dir}/tmp" | |||||
fi | |||||
if [ -z "$stdout_file" ] ; then | |||||
stdout_file="${out_dir}/stdout" | |||||
fi | |||||
if [ -z "$stderr_file" ] ; then | |||||
stderr_file="${out_dir}/stderr" | |||||
fi | fi | ||||
dataset=$(basename $graph_path) | dataset=$(basename $graph_path) | ||||
compr_graph_path="$out_dir/$dataset" | compr_graph_path="${out_dir}/${dataset}" | ||||
mkdir -p $out_dir | test -d "$out_dir" || mkdir -p "$out_dir" | ||||
mkdir -p $tmp_dir | test -d "$tmp_dir" || mkdir -p "$tmp_dir" | ||||
java_cmd () { | java_cmd () { | ||||
/usr/bin/time -v java -cp $lib_dir/'*' $* | java -cp $lib_dir/'*' $* | ||||
} | } | ||||
{ | { | ||||
# Build a function (MPH) that maps node names to node numbers in | # Build a function (MPH) that maps node names to node numbers in | ||||
# lexicographic order (output: .mph) | # lexicographic order (output: .mph) | ||||
java_cmd it.unimi.dsi.sux4j.mph.GOVMinimalPerfectHashFunction \ | java_cmd it.unimi.dsi.sux4j.mph.GOVMinimalPerfectHashFunction \ | ||||
--zipped $compr_graph_path.mph --temp-dir $tmp_dir \ | --zipped $compr_graph_path.mph --temp-dir $tmp_dir \ | ||||
$graph_path.nodes.csv.gz && | $graph_path.nodes.csv.gz && | ||||
Show All 21 Lines | java_cmd () { | ||||
java_cmd it.unimi.dsi.big.webgraph.Stats $compr_graph_path && | java_cmd it.unimi.dsi.big.webgraph.Stats $compr_graph_path && | ||||
# Create transposed graph (to allow backward traversal) | # Create transposed graph (to allow backward traversal) | ||||
java_cmd it.unimi.dsi.big.webgraph.Transform transposeOffline \ | java_cmd it.unimi.dsi.big.webgraph.Transform transposeOffline \ | ||||
$compr_graph_path $compr_graph_path-transposed \ | $compr_graph_path $compr_graph_path-transposed \ | ||||
$batch_size $tmp_dir && | $batch_size $tmp_dir && | ||||
java_cmd it.unimi.dsi.big.webgraph.BVGraph \ | java_cmd it.unimi.dsi.big.webgraph.BVGraph \ | ||||
--list $compr_graph_path-transposed | --list $compr_graph_path-transposed | ||||
} >> $stdout_file 2>> $stderr_file | } > $stdout_file 2> $stderr_file | ||||
if [[ $? -eq 0 ]]; then | if [[ $? -eq 0 ]]; then | ||||
echo "Graph compression done." | echo "Graph compression done." | ||||
else | else | ||||
echo "Graph compression failed: see $stderr_file for more info." | echo "Graph compression failed: see $stderr_file for more info." | ||||
exit 1 | exit 1 | ||||
fi | fi |