Page MenuHomeSoftware Heritage

No OneTemporary

diff --git a/compression/Dockerfile b/compression/Dockerfile
new file mode 100644
index 0000000..8a9040f
--- /dev/null
+++ b/compression/Dockerfile
@@ -0,0 +1,19 @@
+FROM maven:3.6.0-jdk-11
+WORKDIR /app
+
+# Download webgraph binary
+RUN curl -O http://webgraph.di.unimi.it/webgraph-3.6.1-bin.tar.gz
+RUN tar xvfz webgraph-3.6.1-bin.tar.gz
+RUN cp webgraph-3.6.1/webgraph-3.6.1.jar .
+
+# Download webgraph dependencies
+RUN curl -O http://webgraph.di.unimi.it/webgraph-deps.tar.gz
+RUN tar xvfz webgraph-deps.tar.gz
+
+# Download LAW (for LLP ordering)
+RUN curl -O http://law.di.unimi.it/software/download/law-2.5-bin.tar.gz
+RUN tar xvfz law-2.5-bin.tar.gz
+RUN cp law-2.5/law-2.5.jar .
+
+WORKDIR /graph
+COPY compress_graph.sh .
diff --git a/compression/compress_graph.sh b/compression/compress_graph.sh
new file mode 100755
index 0000000..ac1e42b
--- /dev/null
+++ b/compression/compress_graph.sh
@@ -0,0 +1,56 @@
+#!/bin/bash
+
+if [ "$#" -ne 2 ]; then
+ echo "Expected two arguments: <input graph> <output dir>"
+ exit -1
+fi
+
+INPUT_GRAPH=$1
+OUTPUT_DIR=$2
+DATASET=$(basename $INPUT_GRAPH)
+COMPR_GRAPH="$OUTPUT_DIR/$DATASET"
+
+java_cmd () {
+ /usr/bin/time -v java -Xmx1024G -cp /app/'*' $*
+}
+
+llp_ordering () {
+ # Create a symmetrized version of the graph
+ # (output: .{graph,offsets,properties})
+ java_cmd it.unimi.dsi.webgraph.Transform symmetrizeOffline \
+ $COMPR_GRAPH-bv $COMPR_GRAPH-bv-sym
+ java_cmd it.unimi.dsi.webgraph.BVGraph --list $COMPR_GRAPH-bv-sym
+
+ # Find a better permutation through Layered LPA
+ java_cmd it.unimi.dsi.law.graph.LayeredLabelPropagation \
+ $COMPR_GRAPH-bv-sym $COMPR_GRAPH.order
+}
+
+bfs_ordering () {
+ java_cmd it.unimi.dsi.law.graph.BFS $COMPR_GRAPH-bv $COMPR_GRAPH.order
+}
+
+mkdir -p $OUTPUT_DIR
+
+# Build a function (MPH) that maps node names to node numbers in lexicographic
+# order (output: .mph)
+java_cmd it.unimi.dsi.sux4j.mph.GOVMinimalPerfectHashFunction \
+ --zipped $COMPR_GRAPH.mph $INPUT_GRAPH.nodes.csv.gz
+
+# Build the graph in BVGraph format (output: .{graph,offsets,properties})
+java_cmd it.unimi.dsi.webgraph.ScatteredArcsASCIIGraph \
+ --function $COMPR_GRAPH.mph \
+ --zipped $COMPR_GRAPH-bv < $INPUT_GRAPH.edges.csv.gz
+# Build the offset big-list file to load the graph faster (output: .obl)
+java_cmd it.unimi.dsi.webgraph.BVGraph --list $COMPR_GRAPH-bv
+
+# Find a better permutation
+bfs_ordering
+
+# Permute the graph accordingly
+java_cmd it.unimi.dsi.webgraph.Transform mapOffline \
+ $COMPR_GRAPH-bv $COMPR_GRAPH $COMPR_GRAPH.order
+java_cmd it.unimi.dsi.webgraph.BVGraph --list $COMPR_GRAPH
+
+# Compute graph statistics (output: .{indegree,outdegree,stats})
+java_cmd it.unimi.dsi.webgraph.Stats $COMPR_GRAPH

File Metadata

Mime Type
text/x-diff
Expires
Sat, Jun 21, 5:06 PM (1 w, 6 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3214543

Event Timeline