diff --git a/java/src/main/java/org/softwareheritage/graph/utils/ComposePermutations.java b/java/src/main/java/org/softwareheritage/graph/utils/ComposePermutations.java new file mode 100644 --- /dev/null +++ b/java/src/main/java/org/softwareheritage/graph/utils/ComposePermutations.java @@ -0,0 +1,51 @@ +package org.softwareheritage.graph.utils; + +import com.martiansoftware.jsap.*; +import it.unimi.dsi.Util; +import it.unimi.dsi.fastutil.io.BinIO; + +import java.io.File; +import java.io.IOException; + +/** + * CLI program used to compose two on-disk permutations. + * + * It takes two on-disk permutations as parameters, p1 and p2, and writes on disk (p1 o p2) at the + * given location. This is useful for multi-step compression (e.g. Unordered -> BFS -> LLP), as it + * can be used to merge all the intermediate permutations. + */ +public class ComposePermutations { + private static JSAPResult parse_args(String[] args) { + JSAPResult config = null; + try { + SimpleJSAP jsap = new SimpleJSAP(ComposePermutations.class.getName(), "", new Parameter[]{ + new UnflaggedOption("firstPermutation", JSAP.STRING_PARSER, JSAP.REQUIRED, "The first permutation"), + new UnflaggedOption("secondPermutation", JSAP.STRING_PARSER, JSAP.REQUIRED, + "The second permutation"), + new UnflaggedOption("outputPermutation", JSAP.STRING_PARSER, JSAP.REQUIRED, + "The output permutation"),}); + + config = jsap.parse(args); + if (jsap.messagePrinted()) { + System.exit(1); + } + } catch (JSAPException e) { + e.printStackTrace(); + } + return config; + } + + public static void main(String[] args) throws IOException, ClassNotFoundException { + JSAPResult config = parse_args(args); + String firstPermFilename = config.getString("firstPermutation"); + String secondPermFilename = config.getString("secondPermutation"); + String outputPermFilename = config.getString("outputPermutation"); + + long[][] firstPerm = BinIO.loadLongsBig(new File(firstPermFilename)); + long[][] secondPerm = BinIO.loadLongsBig(new File(secondPermFilename)); + + long[][] outputPerm = Util.composePermutationsInPlace(firstPerm, secondPerm); + + BinIO.storeLongs(outputPerm, outputPermFilename); + } +} diff --git a/swh/graph/cli.py b/swh/graph/cli.py --- a/swh/graph/cli.py +++ b/swh/graph/cli.py @@ -382,9 +382,10 @@ Output: a directory containing a WebGraph compressed graph - Compression steps are: (1) mph, (2) bv, (3) bv_obl, (4) bfs, (5) permute, - (6) permute_obl, (7) stats, (8) transpose, (9) transpose_obl, (10) maps, - (11) clean_tmp. Compression steps can be selected by name or number using + Compression steps are: (1) mph, (2) bv, (3) bfs, (4) permute_bfs, + (5) transpose_bfs, (6) simplify, (7) llp, (8) permute_llp, (9) obl, (10) + compose_orders, (11) stats, (12) transpose, (13) transpose_obl, (14) maps, + (15) clean_tmp. Compression steps can be selected by name or number using --steps, separating them with commas; step ranges (e.g., 3-9, 6-, etc.) are also supported. diff --git a/swh/graph/config.py b/swh/graph/config.py --- a/swh/graph/config.py +++ b/swh/graph/config.py @@ -42,6 +42,8 @@ # Use 0.1% of the RAM as a batch size: # ~1 billion for big servers, ~10 million for small desktop machines conf["batch_size"] = int(psutil.virtual_memory().total / 1000) + if "llp_gammas" not in conf: + conf["llp_gammas"] = "-0,-1,-2,-3,-4" if "max_ram" not in conf: conf["max_ram"] = str(psutil.virtual_memory().total) if "java_tool_options" not in conf: diff --git a/swh/graph/tests/dataset/output/example-transposed.graph b/swh/graph/tests/dataset/output/example-transposed.graph --- a/swh/graph/tests/dataset/output/example-transposed.graph +++ b/swh/graph/tests/dataset/output/example-transposed.graph @@ -1 +1 @@ -[):¤+Åãuâñ6ü¾Mjk¥Òé5Öº \ No newline at end of file +zÏ.—hÑ®ëÄ×I®–‰tõÄëµì€{‹ÅÐ \ No newline at end of file diff --git a/swh/graph/tests/dataset/output/example-transposed.obl b/swh/graph/tests/dataset/output/example-transposed.obl index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000 GIT binary patch literal 0 Hc$@