diff --git a/docs/compression.rst b/docs/compression.rst --- a/docs/compression.rst +++ b/docs/compression.rst @@ -131,7 +131,7 @@ .. figure:: images/compression_steps.png :align: center :alt: Compression steps - :target: _images/compression_steps.png + :scale: 20% Compression steps @@ -587,8 +587,23 @@ ``graph-transposed-labelled.{properties,labels,labeloffsets}``. +23. EDGE_LABELS_OBL +------------------- + +Cache the label offsets of the forward labelled graph to make loading faster. +The resulting label offset big list is stored in the +``graph-labelled.labelobl`` file. + + +23. EDGE_LABELS_TRANSPOSE_OBL +----------------------------- + +Same as EDGE_LABELS_OBL, but for the transposed labelled graph. +The resulting label offset big list is stored in the +``graph-transposed-labelled.labelobl`` file. + -22. CLEAN_TMP +24. CLEAN_TMP ------------- This step reclaims space by deleting the temporary directory, as well as all diff --git a/docs/images/compression_steps.dot b/docs/images/compression_steps.dot --- a/docs/images/compression_steps.dot +++ b/docs/images/compression_steps.dot @@ -31,6 +31,8 @@ labels_fcl [label="graph.labels.fcl"]; graph_labelled [label="graph-labelled.*"]; graph_transposed_labelled [label="graph-transposed-labelled.*"]; + graph_labelled_obl [label="graph-labelled.labelobl"]; + graph_transposed_labelled [label="graph-transposed-labelled.labelobl"]; subgraph { node [shape=box, fontname="Courier New"]; @@ -55,6 +57,8 @@ MPH_LABELS; FCL_LABELS; EDGE_LABELS; + EDGE_LABELS_OBL; + EDGE_LABELS_TRANSPOSE_OBL; } @@ -102,4 +106,6 @@ graph_order -> EDGE_LABELS; EDGE_LABELS -> graph_labelled; EDGE_LABELS -> graph_transposed_labelled; + graph_labelled -> EDGE_LABELS_OBL -> graph_labelled_obl; + graph_transposed_labelled -> EDGE_LABELS_TRANSPOSE_OBL -> graph_transposed_labelled_obl; } diff --git a/swh/graph/cli.py b/swh/graph/cli.py --- a/swh/graph/cli.py +++ b/swh/graph/cli.py @@ -176,7 +176,7 @@ (10) obl, (11) compose_orders, (12) stats, (13) transpose, (14) transpose_obl, (15) maps, (16) extract_persons, (17) mph_persons, (18) node_properties, (19) mph_labels, (20) fcl_labels, (21) edge_labels, (22) - clean_tmp. + edge_labels_obl, (23) edge_labels_transpose_obl, (24) clean_tmp. Compression steps can be selected by name or number using --steps, separating them with commas; step ranges (e.g., 3-9, 6-, etc.) are also supported. diff --git a/swh/graph/webgraph.py b/swh/graph/webgraph.py --- a/swh/graph/webgraph.py +++ b/swh/graph/webgraph.py @@ -42,7 +42,9 @@ MPH_LABELS = 19 FCL_LABELS = 20 EDGE_LABELS = 21 - CLEAN_TMP = 22 + EDGE_LABELS_OBL = 22 + EDGE_LABELS_TRANSPOSE_OBL = 23 + CLEAN_TMP = 24 def __str__(self): return self.name @@ -231,6 +233,18 @@ "{in_dir}", "{out_dir}/{graph_name}", ], + CompressionStep.EDGE_LABELS_OBL: [ + "{java}", + "it.unimi.dsi.big.webgraph.labelling.BitStreamArcLabelledImmutableGraph", + "--list", + "{out_dir}/{graph_name}-labelled", + ], + CompressionStep.EDGE_LABELS_TRANSPOSE_OBL: [ + "{java}", + "it.unimi.dsi.big.webgraph.labelling.BitStreamArcLabelledImmutableGraph", + "--list", + "{out_dir}/{graph_name}-transposed-labelled", + ], CompressionStep.CLEAN_TMP: [ "rm", "-rf",