Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9123136
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
2 KB
Subscribers
None
View Options
diff --git a/compression/Dockerfile b/compression/Dockerfile
new file mode 100644
index 0000000..8a9040f
--- /dev/null
+++ b/compression/Dockerfile
@@ -0,0 +1,19 @@
+FROM maven:3.6.0-jdk-11
+WORKDIR /app
+
+# Download webgraph binary
+RUN curl -O http://webgraph.di.unimi.it/webgraph-3.6.1-bin.tar.gz
+RUN tar xvfz webgraph-3.6.1-bin.tar.gz
+RUN cp webgraph-3.6.1/webgraph-3.6.1.jar .
+
+# Download webgraph dependencies
+RUN curl -O http://webgraph.di.unimi.it/webgraph-deps.tar.gz
+RUN tar xvfz webgraph-deps.tar.gz
+
+# Download LAW (for LLP ordering)
+RUN curl -O http://law.di.unimi.it/software/download/law-2.5-bin.tar.gz
+RUN tar xvfz law-2.5-bin.tar.gz
+RUN cp law-2.5/law-2.5.jar .
+
+WORKDIR /graph
+COPY compress_graph.sh .
diff --git a/compression/compress_graph.sh b/compression/compress_graph.sh
new file mode 100755
index 0000000..ac1e42b
--- /dev/null
+++ b/compression/compress_graph.sh
@@ -0,0 +1,56 @@
+#!/bin/bash
+
+if [ "$#" -ne 2 ]; then
+ echo "Expected two arguments: <input graph> <output dir>"
+ exit -1
+fi
+
+INPUT_GRAPH=$1
+OUTPUT_DIR=$2
+DATASET=$(basename $INPUT_GRAPH)
+COMPR_GRAPH="$OUTPUT_DIR/$DATASET"
+
+java_cmd () {
+ /usr/bin/time -v java -Xmx1024G -cp /app/'*' $*
+}
+
+llp_ordering () {
+ # Create a symmetrized version of the graph
+ # (output: .{graph,offsets,properties})
+ java_cmd it.unimi.dsi.webgraph.Transform symmetrizeOffline \
+ $COMPR_GRAPH-bv $COMPR_GRAPH-bv-sym
+ java_cmd it.unimi.dsi.webgraph.BVGraph --list $COMPR_GRAPH-bv-sym
+
+ # Find a better permutation through Layered LPA
+ java_cmd it.unimi.dsi.law.graph.LayeredLabelPropagation \
+ $COMPR_GRAPH-bv-sym $COMPR_GRAPH.order
+}
+
+bfs_ordering () {
+ java_cmd it.unimi.dsi.law.graph.BFS $COMPR_GRAPH-bv $COMPR_GRAPH.order
+}
+
+mkdir -p $OUTPUT_DIR
+
+# Build a function (MPH) that maps node names to node numbers in lexicographic
+# order (output: .mph)
+java_cmd it.unimi.dsi.sux4j.mph.GOVMinimalPerfectHashFunction \
+ --zipped $COMPR_GRAPH.mph $INPUT_GRAPH.nodes.csv.gz
+
+# Build the graph in BVGraph format (output: .{graph,offsets,properties})
+java_cmd it.unimi.dsi.webgraph.ScatteredArcsASCIIGraph \
+ --function $COMPR_GRAPH.mph \
+ --zipped $COMPR_GRAPH-bv < $INPUT_GRAPH.edges.csv.gz
+# Build the offset big-list file to load the graph faster (output: .obl)
+java_cmd it.unimi.dsi.webgraph.BVGraph --list $COMPR_GRAPH-bv
+
+# Find a better permutation
+bfs_ordering
+
+# Permute the graph accordingly
+java_cmd it.unimi.dsi.webgraph.Transform mapOffline \
+ $COMPR_GRAPH-bv $COMPR_GRAPH $COMPR_GRAPH.order
+java_cmd it.unimi.dsi.webgraph.BVGraph --list $COMPR_GRAPH
+
+# Compute graph statistics (output: .{indegree,outdegree,stats})
+java_cmd it.unimi.dsi.webgraph.Stats $COMPR_GRAPH
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Sat, Jun 21, 5:06 PM (1 w, 6 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3214543
Attached To
rDGRPH Compressed graph representation
Event Timeline
Log In to Comment