diff --git a/swh/graph/luigi/__init__.py b/swh/graph/luigi/__init__.py index a82ff9b..bdd0b6c 100644 --- a/swh/graph/luigi/__init__.py +++ b/swh/graph/luigi/__init__.py @@ -1,75 +1,75 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information """ Luigi tasks =========== This package contains `Luigi `_ tasks. These come in two kinds: * in :mod:`swh.graph.luigi.compressed_graph`: an alternative to the 'swh graph compress' CLI that can be composed with other tasks, such as swh-dataset's * in other submodules: tasks driving the creation of specific datasets that are generated using the compressed graph The overall directory structure is:: base_dir/ [_]/ edges/ ... orc/ ... compressed/ graph.graph graph.mph ... meta/ export.json compression.json datasets/ contribution_graph.csv.zst topology/ topological_order_dfs.csv.zst And optionally:: sensitive_base_dir/ [_]/ persons_sha256_to_name.csv.zst datasets/ contribution_graph.deanonymized.csv.zst """ # WARNING: do not import unnecessary things here to keep cli startup time under # control from typing import List import luigi -from . import compressed_graph, origin_contributors +from . import compressed_graph -class RunAll(luigi.Task): +class RunExportCompressUpload(luigi.Task): """Runs dataset export, graph compression, and generates datasets using the graph.""" def requires(self) -> List[luigi.Task]: + """Returns instances of :class:`swh.dataset.luigi.RunExportAll` + and :class:`swh.graph.luigi.compressed_graph.UploadGraphToS3`, which + recursively depend on the whole export and compression pipeline. + """ from swh.dataset.luigi import RunExportAll - # Technically RunExportAll and DeanonymizeOriginContributors together depend - # on everything else, but it's best to be explicit return [ RunExportAll(), - compressed_graph.LocalGraph(), - origin_contributors.ListOriginContributors(), - origin_contributors.DeanonymizeOriginContributors(), + compressed_graph.UploadGraphToS3(), ] def complete(self) -> bool: # Dependencies perform their own completeness check, and this task # does no work itself return False