#!/bin/bash set -e mkdir -p tmp SWH_GRAPH_JAR=~/swh-graph-0.2.9.jar export JAVA_OPTS="-Xmx700G -server -Djava.io.tmpdir=tmp -Dlogback.configurationFile=/home/seirl/logback.xml -XX:PretenureSizeThreshold=512M -XX:MaxNewSize=4G -XX:+UseLargePages -XX:+UseTransparentHugePages -XX:+UseNUMA -XX:+UseTLAB -XX:+ResizeTLAB" echo echo "### Sort the list of labels" # zstdcat python3k.edgeslabels.csv.zst | # cut -d' ' -f3 | # sort -S100G -u > tmp/python3k.sortedlabels.txt echo echo "### Generate label MPH" java $JAVA_OPTS -cp $SWH_GRAPH_JAR \ it.unimi.dsi.sux4j.mph.GOVMinimalPerfectHashFunction \ --temp-dir tmp compress/python3k-labels.mph \ < tmp/python3k.sortedlabels.txt echo echo "### Generate reverse MPH index" echo "#### 1. Reverse permutation" java -cp $SWH_GRAPH_JAR \ org.softwareheritage.graph.utils.MPHTranslate \ compress/python3k-labels.mph < tmp/python3k.sortedlabels.txt \ > tmp/python3k-labels-perm.txt echo "#### 2. Sorted FCL" java -cp $SWH_GRAPH_JAR \ it.unimi.dsi.util.FrontCodedStringList tmp/python3k-labels-sorted.fcl \ < tmp/python3k.sortedlabels.txt echo "#### 3. Permuted FCL" java -cp $SWH_GRAPH_JAR \ it.unimi.dsi.util.PermutedFrontCodedStringList \ -i -t tmp/python3k-labels-sorted.fcl tmp/python3k-labels-perm.txt \ compress/python3k-labels.fcl echo "#### 4. Cleanup" # rm tmp/python3k-labels-perm.txt # rm tmp/python3k-labels-sorted.fcl # rm tmp/python3k.sortedlabels.txt echo echo "### Write labelled graph" zstdcat python3k.edgeslabels.csv.zst | java $JAVA_OPTS -cp $SWH_GRAPH_JAR \ org.softwareheritage.graph.maps.LabelMapBuilder \ --tmp tmp --graph compress/python3k \ -d tmp/intermediate-sort-repr.txt