diff --git a/swh/graph/luigi/__init__.py b/swh/graph/luigi/__init__.py
index a82ff9b..bdd0b6c 100644
--- a/swh/graph/luigi/__init__.py
+++ b/swh/graph/luigi/__init__.py
@@ -1,75 +1,75 @@
# Copyright (C) 2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
"""
Luigi tasks
===========
This package contains `Luigi `_ tasks.
These come in two kinds:
* in :mod:`swh.graph.luigi.compressed_graph`: an alternative to the 'swh graph compress'
CLI that can be composed with other tasks, such as swh-dataset's
* in other submodules: tasks driving the creation of specific datasets that are
generated using the compressed graph
The overall directory structure is::
base_dir/
[_]/
edges/
...
orc/
...
compressed/
graph.graph
graph.mph
...
meta/
export.json
compression.json
datasets/
contribution_graph.csv.zst
topology/
topological_order_dfs.csv.zst
And optionally::
sensitive_base_dir/
[_]/
persons_sha256_to_name.csv.zst
datasets/
contribution_graph.deanonymized.csv.zst
"""
# WARNING: do not import unnecessary things here to keep cli startup time under
# control
from typing import List
import luigi
-from . import compressed_graph, origin_contributors
+from . import compressed_graph
-class RunAll(luigi.Task):
+class RunExportCompressUpload(luigi.Task):
"""Runs dataset export, graph compression, and generates datasets using the graph."""
def requires(self) -> List[luigi.Task]:
+ """Returns instances of :class:`swh.dataset.luigi.RunExportAll`
+ and :class:`swh.graph.luigi.compressed_graph.UploadGraphToS3`, which
+ recursively depend on the whole export and compression pipeline.
+ """
from swh.dataset.luigi import RunExportAll
- # Technically RunExportAll and DeanonymizeOriginContributors together depend
- # on everything else, but it's best to be explicit
return [
RunExportAll(),
- compressed_graph.LocalGraph(),
- origin_contributors.ListOriginContributors(),
- origin_contributors.DeanonymizeOriginContributors(),
+ compressed_graph.UploadGraphToS3(),
]
def complete(self) -> bool:
# Dependencies perform their own completeness check, and this task
# does no work itself
return False