Page MenuHomeSoftware Heritage

D8059.id29079.diff
No OneTemporary

D8059.id29079.diff

diff --git a/docs/compression.rst b/docs/compression.rst
--- a/docs/compression.rst
+++ b/docs/compression.rst
@@ -131,7 +131,7 @@
.. figure:: images/compression_steps.png
:align: center
:alt: Compression steps
- :target: _images/compression_steps.png
+ :scale: 20%
Compression steps
@@ -587,8 +587,23 @@
``graph-transposed-labelled.{properties,labels,labeloffsets}``.
+23. EDGE_LABELS_OBL
+-------------------
+
+Cache the label offsets of the forward labelled graph to make loading faster.
+The resulting label offset big list is stored in the
+``graph-labelled.labelobl`` file.
+
+
+23. EDGE_LABELS_TRANSPOSE_OBL
+-----------------------------
+
+Same as EDGE_LABELS_OBL, but for the transposed labelled graph.
+The resulting label offset big list is stored in the
+``graph-transposed-labelled.labelobl`` file.
+
-22. CLEAN_TMP
+24. CLEAN_TMP
-------------
This step reclaims space by deleting the temporary directory, as well as all
diff --git a/docs/images/compression_steps.dot b/docs/images/compression_steps.dot
--- a/docs/images/compression_steps.dot
+++ b/docs/images/compression_steps.dot
@@ -31,6 +31,8 @@
labels_fcl [label="graph.labels.fcl"];
graph_labelled [label="graph-labelled.*"];
graph_transposed_labelled [label="graph-transposed-labelled.*"];
+ graph_labelled_obl [label="graph-labelled.labelobl"];
+ graph_transposed_labelled [label="graph-transposed-labelled.labelobl"];
subgraph {
node [shape=box, fontname="Courier New"];
@@ -55,6 +57,8 @@
MPH_LABELS;
FCL_LABELS;
EDGE_LABELS;
+ EDGE_LABELS_OBL;
+ EDGE_LABELS_TRANSPOSE_OBL;
}
@@ -102,4 +106,6 @@
graph_order -> EDGE_LABELS;
EDGE_LABELS -> graph_labelled;
EDGE_LABELS -> graph_transposed_labelled;
+ graph_labelled -> EDGE_LABELS_OBL -> graph_labelled_obl;
+ graph_transposed_labelled -> EDGE_LABELS_TRANSPOSE_OBL -> graph_transposed_labelled_obl;
}
diff --git a/swh/graph/cli.py b/swh/graph/cli.py
--- a/swh/graph/cli.py
+++ b/swh/graph/cli.py
@@ -176,7 +176,7 @@
(10) obl, (11) compose_orders, (12) stats, (13) transpose, (14)
transpose_obl, (15) maps, (16) extract_persons, (17) mph_persons, (18)
node_properties, (19) mph_labels, (20) fcl_labels, (21) edge_labels, (22)
- clean_tmp.
+ edge_labels_obl, (23) edge_labels_transpose_obl, (24) clean_tmp.
Compression steps can be selected by name or number using --steps,
separating them with commas; step ranges (e.g., 3-9, 6-, etc.) are also
supported.
diff --git a/swh/graph/webgraph.py b/swh/graph/webgraph.py
--- a/swh/graph/webgraph.py
+++ b/swh/graph/webgraph.py
@@ -42,7 +42,9 @@
MPH_LABELS = 19
FCL_LABELS = 20
EDGE_LABELS = 21
- CLEAN_TMP = 22
+ EDGE_LABELS_OBL = 22
+ EDGE_LABELS_TRANSPOSE_OBL = 23
+ CLEAN_TMP = 24
def __str__(self):
return self.name
@@ -231,6 +233,18 @@
"{in_dir}",
"{out_dir}/{graph_name}",
],
+ CompressionStep.EDGE_LABELS_OBL: [
+ "{java}",
+ "it.unimi.dsi.big.webgraph.labelling.BitStreamArcLabelledImmutableGraph",
+ "--list",
+ "{out_dir}/{graph_name}-labelled",
+ ],
+ CompressionStep.EDGE_LABELS_TRANSPOSE_OBL: [
+ "{java}",
+ "it.unimi.dsi.big.webgraph.labelling.BitStreamArcLabelledImmutableGraph",
+ "--list",
+ "{out_dir}/{graph_name}-transposed-labelled",
+ ],
CompressionStep.CLEAN_TMP: [
"rm",
"-rf",

File Metadata

Mime Type
text/plain
Expires
Thu, Dec 19, 1:31 AM (19 h, 46 m ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3220802

Event Timeline