diff --git a/compression/Dockerfile b/compression/Dockerfile index 8a9040f..4329744 100644 --- a/compression/Dockerfile +++ b/compression/Dockerfile @@ -1,19 +1,19 @@ FROM maven:3.6.0-jdk-11 WORKDIR /app # Download webgraph binary -RUN curl -O http://webgraph.di.unimi.it/webgraph-3.6.1-bin.tar.gz -RUN tar xvfz webgraph-3.6.1-bin.tar.gz -RUN cp webgraph-3.6.1/webgraph-3.6.1.jar . +RUN curl -O http://webgraph.di.unimi.it/webgraph-big-3.5.0-bin.tar.gz +RUN tar xvfz webgraph-big-3.5.0-bin.tar.gz +RUN cp webgraph-big-3.5.0/webgraph-big-3.5.0.jar . # Download webgraph dependencies -RUN curl -O http://webgraph.di.unimi.it/webgraph-deps.tar.gz -RUN tar xvfz webgraph-deps.tar.gz +RUN curl -O http://webgraph.di.unimi.it/webgraph-big-deps.tar.gz +RUN tar xvfz webgraph-big-deps.tar.gz # Download LAW (for LLP ordering) RUN curl -O http://law.di.unimi.it/software/download/law-2.5-bin.tar.gz RUN tar xvfz law-2.5-bin.tar.gz RUN cp law-2.5/law-2.5.jar . WORKDIR /graph COPY compress_graph.sh . diff --git a/compression/compress_graph.sh b/compression/compress_graph.sh index ac1e42b..211aaab 100755 --- a/compression/compress_graph.sh +++ b/compression/compress_graph.sh @@ -1,56 +1,57 @@ #!/bin/bash if [ "$#" -ne 2 ]; then echo "Expected two arguments: <input graph> <output dir>" exit -1 fi INPUT_GRAPH=$1 OUTPUT_DIR=$2 DATASET=$(basename $INPUT_GRAPH) COMPR_GRAPH="$OUTPUT_DIR/$DATASET" java_cmd () { /usr/bin/time -v java -Xmx1024G -cp /app/'*' $* } llp_ordering () { # Create a symmetrized version of the graph # (output: .{graph,offsets,properties}) - java_cmd it.unimi.dsi.webgraph.Transform symmetrizeOffline \ + java_cmd it.unimi.dsi.big.webgraph.Transform symmetrizeOffline \ $COMPR_GRAPH-bv $COMPR_GRAPH-bv-sym - java_cmd it.unimi.dsi.webgraph.BVGraph --list $COMPR_GRAPH-bv-sym + java_cmd it.unimi.dsi.big.webgraph.BVGraph --list $COMPR_GRAPH-bv-sym # Find a better permutation through Layered LPA + # WARNING: no 64-bit version of LLP java_cmd it.unimi.dsi.law.graph.LayeredLabelPropagation \ - $COMPR_GRAPH-bv-sym $COMPR_GRAPH.order + --longs $COMPR_GRAPH-bv-sym $COMPR_GRAPH.order } bfs_ordering () { java_cmd it.unimi.dsi.law.graph.BFS $COMPR_GRAPH-bv $COMPR_GRAPH.order } mkdir -p $OUTPUT_DIR # Build a function (MPH) that maps node names to node numbers in lexicographic # order (output: .mph) java_cmd it.unimi.dsi.sux4j.mph.GOVMinimalPerfectHashFunction \ --zipped $COMPR_GRAPH.mph $INPUT_GRAPH.nodes.csv.gz # Build the graph in BVGraph format (output: .{graph,offsets,properties}) -java_cmd it.unimi.dsi.webgraph.ScatteredArcsASCIIGraph \ +java_cmd it.unimi.dsi.big.webgraph.ScatteredArcsASCIIGraph \ --function $COMPR_GRAPH.mph \ --zipped $COMPR_GRAPH-bv < $INPUT_GRAPH.edges.csv.gz # Build the offset big-list file to load the graph faster (output: .obl) -java_cmd it.unimi.dsi.webgraph.BVGraph --list $COMPR_GRAPH-bv +java_cmd it.unimi.dsi.big.webgraph.BVGraph --list $COMPR_GRAPH-bv # Find a better permutation bfs_ordering # Permute the graph accordingly -java_cmd it.unimi.dsi.webgraph.Transform mapOffline \ +java_cmd it.unimi.dsi.big.webgraph.Transform mapOffline \ $COMPR_GRAPH-bv $COMPR_GRAPH $COMPR_GRAPH.order -java_cmd it.unimi.dsi.webgraph.BVGraph --list $COMPR_GRAPH +java_cmd it.unimi.dsi.big.webgraph.BVGraph --list $COMPR_GRAPH # Compute graph statistics (output: .{indegree,outdegree,stats}) -java_cmd it.unimi.dsi.webgraph.Stats $COMPR_GRAPH +java_cmd it.unimi.dsi.big.webgraph.Stats $COMPR_GRAPH