diff --git a/swh/fetcher/googlecode/checker_producer.py b/swh/fetcher/googlecode/checker_producer.py index c8491f4..5a00136 100644 --- a/swh/fetcher/googlecode/checker_producer.py +++ b/swh/fetcher/googlecode/checker_producer.py @@ -1,33 +1,33 @@ # Copyright (C) 2015-2016 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import click import sys -task_name = 'swh.fetcher.googlecode.tasks.SWHGoogleArchiveCheckerTask' +task_name = 'swh.fetcher.googlecode.tasks.SWHGoogleArchiveDispatchCheckerTask' @click.command() @click.option('--archive-path', help="Archive path to check") @click.option('--temp-dir', help="Temporary folder to make check computations on archive.") def produce(archive_path, temp_dir): from swh.scheduler.celery_backend.config import app from swh.fetcher.googlecode import tasks # noqa task = app.tasks[task_name] if archive_path: # for debug purpose, one archive task.delay(archive_path, temp_dir) else: # otherwise, we deal in archive_path batch for archive_path in sys.stdin: archive_path = archive_path.rstrip() print(archive_path) task.delay(archive_path, temp_dir) if __name__ == '__main__': produce() diff --git a/swh/fetcher/googlecode/tasks.py b/swh/fetcher/googlecode/tasks.py index 4cc42c4..26c63cc 100644 --- a/swh/fetcher/googlecode/tasks.py +++ b/swh/fetcher/googlecode/tasks.py @@ -1,81 +1,79 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.scheduler.task import Task from .fetcher import SWHGoogleArchiveFetcher from .checker import SWHGoogleArchiveChecker, SWHGoogleArchiveDispatchChecker class SWHGoogleArchiveFetcherTask(Task): """Main task to fetch and check archive source from google code archive server. The checks are made on: - size - md5 from the associated '.json' file associated to the archive fetched. """ task_queue = 'swh_fetcher_googlecode_fetch_archive' def run(self, archive_gs, destination_rootpath): SWHGoogleArchiveFetcher().process(archive_gs, destination_rootpath) -# FIXME: It's a dispatch (keep the old name for production reason for -# now) -class SWHGoogleArchiveCheckerTask(Task): +class SWHGoogleArchiveDispatchCheckerTask(Task): """Main task to check fetched archive files from google code archive server. Check the length of the archives. If archive's length is not ok, refetch it. When done, depending on its size, dispatch: - large: to SWHGoogleArchiveCheckerHugeTask - small: to SWHGoogleArchiveCheckerSmallTask - uncompress the archive on a temporary folder - integrity check according to repo's nature (git, hg, svn) """ task_queue = 'swh_fetcher_googlecode_check_archive' def run(self, path, root_temp_dir): SWHGoogleArchiveDispatchChecker().process(path, root_temp_dir) -class AbstractSWHGoogleArchiveCheckerTask(Task): +class SWHGoogleArchiveCheckerTask(Task): """Main task to check huge fetched archive files from google code archive server. The checks are more thorough, that is: - uncompress the archive on a temporary folder - integrity check according to repo's nature (git, hg, svn) Intended to be inherited (cf. SWHGoogleSmallArchiveCheckerTask, SWHGoogleMediumArchiveCheckerTask, SWHGoogleHugeArchiveCheckerTask) """ def run(self, archive_path, repo_type, root_temp_dir): """Process a repo archive archive_path of type repo_type. The archive is uncompressed in root_temp_dir. """ SWHGoogleArchiveChecker().process( archive_path, repo_type, root_temp_dir) -class SWHGoogleSmallArchiveCheckerTask(AbstractSWHGoogleArchiveCheckerTask): +class SWHGoogleSmallArchiveCheckerTask(SWHGoogleArchiveCheckerTask): task_queue = 'swh_fetcher_googlecode_check_small_archive' -class SWHGoogleMediumArchiveCheckerTask(AbstractSWHGoogleArchiveCheckerTask): +class SWHGoogleMediumArchiveCheckerTask(SWHGoogleArchiveCheckerTask): task_queue = 'swh_fetcher_googlecode_check_medium_archive' -class SWHGoogleHugeArchiveCheckerTask(AbstractSWHGoogleArchiveCheckerTask): +class SWHGoogleHugeArchiveCheckerTask(SWHGoogleArchiveCheckerTask): task_queue = 'swh_fetcher_googlecode_check_huge_archive'