diff --git a/dockerfiles/Dockerfile b/dockerfiles/Dockerfile
index ca74bb8..c455fd9 100644
--- a/dockerfiles/Dockerfile
+++ b/dockerfiles/Dockerfile
@@ -1,18 +1,19 @@
FROM maven:3.6.0-jdk-11
WORKDIR /graph-lib
# Download webgraph binary
RUN curl -O http://webgraph.di.unimi.it/webgraph-big-3.5.0-bin.tar.gz
RUN tar xvfz webgraph-big-3.5.0-bin.tar.gz
RUN cp webgraph-big-3.5.0/webgraph-big-3.5.0.jar .
# Download webgraph dependencies
RUN curl -O http://webgraph.di.unimi.it/webgraph-big-deps.tar.gz
RUN tar xvfz webgraph-big-deps.tar.gz
# Monitoring
RUN apt-get update
RUN apt-get install -y time
WORKDIR /graph
+COPY configuration configuration/
COPY scripts scripts/
diff --git a/dockerfiles/configuration/logback.xml b/dockerfiles/configuration/logback.xml
new file mode 100644
index 0000000..76a4c7a
--- /dev/null
+++ b/dockerfiles/configuration/logback.xml
@@ -0,0 +1,11 @@
+
+
+
+ %d %r %p [%t] %logger{1} - %m%n
+
+
+
+
+
+
+
diff --git a/dockerfiles/scripts/compress_graph.sh b/dockerfiles/scripts/compress_graph.sh
index 8ac2756..5b79cb3 100755
--- a/dockerfiles/scripts/compress_graph.sh
+++ b/dockerfiles/scripts/compress_graph.sh
@@ -1,86 +1,87 @@
#!/bin/bash
usage() {
echo "Usage: --input --output --lib "
echo " options:"
echo " -t, --tmp (default to /tmp/)"
exit 1
}
graph_path=""
out_dir=""
lib_path=""
tmp_dir="/tmp/"
while (( "$#" )); do
case "$1" in
-i|--input) shift; graph_path=$1;;
-o|--output) shift; out_dir=$1;;
-l|--lib) shift; lib_path=$1;;
-t|--tmp) shift; tmp_dir=$1;;
*) usage;;
esac
shift
done
if [[ -z $graph_path || -z $out_dir || -z $lib_path ]]; then
usage
fi
dataset=$(basename $graph_path)
compr_graph_path="$out_dir/$dataset"
stdout_file="$out_dir/stdout"
stderr_file="$out_dir/stderr"
mkdir -p $out_dir
mkdir -p $tmp_dir
if [[ -f "$stdout_file" || -f "$stderr_file" ]]; then
echo "Cannot overwrite compression stdout/stderr files"
exit 1
fi
java_cmd () {
/usr/bin/time -v java \
-server -Xmx1024G -XX:PretenureSizeThreshold=512M \
-XX:MaxNewSize=4G -XX:+UseLargePages -XX:+UseNUMA \
-XX:+UseTransparentHugePages -XX:+UseTLAB -XX:+ResizeTLAB \
+ -Dlogback.configurationFile=configuration/logback.xml \
-cp $lib_path/'*' $*
}
{
# Build a function (MPH) that maps node names to node numbers in
# lexicographic order (output: .mph)
java_cmd it.unimi.dsi.sux4j.mph.GOVMinimalPerfectHashFunction \
--zipped $compr_graph_path.mph --temp-dir $tmp_dir \
$graph_path.nodes.csv.gz ;
# Build the graph in BVGraph format (output: .{graph,offsets,properties})
java_cmd it.unimi.dsi.big.webgraph.ScatteredArcsASCIIGraph \
--function $compr_graph_path.mph --temp-dir $tmp_dir \
--zipped $compr_graph_path-bv < $graph_path.edges.csv.gz ;
# Build the offset big-list file to load the graph faster (output: .obl)
java_cmd it.unimi.dsi.big.webgraph.BVGraph \
--list $compr_graph_path-bv ;
# Find a better permutation using a BFS traversal order (output: .order)
java_cmd it.unimi.dsi.law.graph.BFSBig \
$compr_graph_path-bv $compr_graph_path.order ;
# Permute the graph accordingly
batch_size=1000000000
java_cmd it.unimi.dsi.big.webgraph.Transform mapOffline \
$compr_graph_path-bv $compr_graph_path \
$compr_graph_path.order $batch_size ;
java_cmd it.unimi.dsi.big.webgraph.BVGraph \
--list $compr_graph_path ;
# Compute graph statistics (output: .{indegree,outdegree,stats})
java_cmd it.unimi.dsi.big.webgraph.Stats $compr_graph_path ;
# Create transposed graph (to allow backward traversal)
java_cmd it.unimi.dsi.big.webgraph.Transform transposeOffline \
$compr_graph_path $compr_graph_path-transposed $batch_size ;
java_cmd it.unimi.dsi.big.webgraph.BVGraph \
--list $compr_graph_path-transposed ;
} >> $stdout_file 2>> $stderr_file
echo "Graph compression done."