Page MenuHomeSoftware Heritage
Paste P768

For haltode :-)
ActivePublic

Authored by seirl on Sep 15 2020, 2:28 PM.
#!/bin/bash
set -e
mkdir -p tmp
SWH_GRAPH_JAR=~/swh-graph-0.2.9.jar
export JAVA_OPTS="-Xmx700G -server -Djava.io.tmpdir=tmp -Dlogback.configurationFile=/home/seirl/logback.xml -XX:PretenureSizeThreshold=512M -XX:MaxNewSize=4G -XX:+UseLargePages -XX:+UseTransparentHugePages -XX:+UseNUMA -XX:+UseTLAB -XX:+ResizeTLAB"
echo
echo "### Sort the list of labels"
# zstdcat python3k.edgeslabels.csv.zst |
# cut -d' ' -f3 |
# sort -S100G -u > tmp/python3k.sortedlabels.txt
echo
echo "### Generate label MPH"
java $JAVA_OPTS -cp $SWH_GRAPH_JAR \
it.unimi.dsi.sux4j.mph.GOVMinimalPerfectHashFunction \
--temp-dir tmp compress/python3k-labels.mph \
< tmp/python3k.sortedlabels.txt
echo
echo "### Generate reverse MPH index"
echo "#### 1. Reverse permutation"
java -cp $SWH_GRAPH_JAR \
org.softwareheritage.graph.utils.MPHTranslate \
compress/python3k-labels.mph < tmp/python3k.sortedlabels.txt \
> tmp/python3k-labels-perm.txt
echo "#### 2. Sorted FCL"
java -cp $SWH_GRAPH_JAR \
it.unimi.dsi.util.FrontCodedStringList tmp/python3k-labels-sorted.fcl \
< tmp/python3k.sortedlabels.txt
echo "#### 3. Permuted FCL"
java -cp $SWH_GRAPH_JAR \
it.unimi.dsi.util.PermutedFrontCodedStringList \
-i -t tmp/python3k-labels-sorted.fcl tmp/python3k-labels-perm.txt \
compress/python3k-labels.fcl
echo "#### 4. Cleanup"
# rm tmp/python3k-labels-perm.txt
# rm tmp/python3k-labels-sorted.fcl
# rm tmp/python3k.sortedlabels.txt
echo
echo "### Write labelled graph"
zstdcat python3k.edgeslabels.csv.zst |
java $JAVA_OPTS -cp $SWH_GRAPH_JAR \
org.softwareheritage.graph.maps.LabelMapBuilder \
--tmp tmp --graph compress/python3k \
-d tmp/intermediate-sort-repr.txt