Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Paste
P768
For haltode :-)
Active
Public
Actions
Authored by
seirl
on Sep 15 2020, 2:28 PM.
Edit Paste
Archive Paste
View Raw File
Subscribe
Mute Notifications
Award Token
Flag For Later
Tags
None
Subscribers
None
#!/bin/bash
set
-e
mkdir -p tmp
SWH_GRAPH_JAR
=
~/swh-graph-0.2.9.jar
export
JAVA_OPTS
=
"-Xmx700G -server -Djava.io.tmpdir=tmp -Dlogback.configurationFile=/home/seirl/logback.xml -XX:PretenureSizeThreshold=512M -XX:MaxNewSize=4G -XX:+UseLargePages -XX:+UseTransparentHugePages -XX:+UseNUMA -XX:+UseTLAB -XX:+ResizeTLAB"
echo
echo
"### Sort the list of labels"
# zstdcat python3k.edgeslabels.csv.zst |
# cut -d' ' -f3 |
# sort -S100G -u > tmp/python3k.sortedlabels.txt
echo
echo
"### Generate label MPH"
java
$JAVA_OPTS
-cp
$SWH_GRAPH_JAR
\
it.unimi.dsi.sux4j.mph.GOVMinimalPerfectHashFunction
\
--temp-dir tmp compress/python3k-labels.mph
\
< tmp/python3k.sortedlabels.txt
echo
echo
"### Generate reverse MPH index"
echo
"#### 1. Reverse permutation"
java -cp
$SWH_GRAPH_JAR
\
org.softwareheritage.graph.utils.MPHTranslate
\
compress/python3k-labels.mph < tmp/python3k.sortedlabels.txt
\
> tmp/python3k-labels-perm.txt
echo
"#### 2. Sorted FCL"
java -cp
$SWH_GRAPH_JAR
\
it.unimi.dsi.util.FrontCodedStringList tmp/python3k-labels-sorted.fcl
\
< tmp/python3k.sortedlabels.txt
echo
"#### 3. Permuted FCL"
java -cp
$SWH_GRAPH_JAR
\
it.unimi.dsi.util.PermutedFrontCodedStringList
\
-i -t tmp/python3k-labels-sorted.fcl tmp/python3k-labels-perm.txt
\
compress/python3k-labels.fcl
echo
"#### 4. Cleanup"
# rm tmp/python3k-labels-perm.txt
# rm tmp/python3k-labels-sorted.fcl
# rm tmp/python3k.sortedlabels.txt
echo
echo
"### Write labelled graph"
zstdcat python3k.edgeslabels.csv.zst
|
java
$JAVA_OPTS
-cp
$SWH_GRAPH_JAR
\
org.softwareheritage.graph.maps.LabelMapBuilder
\
--tmp tmp --graph compress/python3k
\
-d tmp/intermediate-sort-repr.txt
Event Timeline
seirl
created this paste.
Sep 15 2020, 2:28 PM
2020-09-15 14:28:39 (UTC+2)
Log In to Comment