diff --git a/swh/cloner/git/producer/clones.py b/swh/cloner/git/producer/clones.py index 30235bc..245726d 100644 --- a/swh/cloner/git/producer/clones.py +++ b/swh/cloner/git/producer/clones.py @@ -1,68 +1,75 @@ # Copyright (C) 2015 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import logging from swh.cloner.git.storage import db, models from swh.cloner.git.worker import tasks def just_print_repos(repository_scheme, repos): """Only print what should be posted in queue. Args: repository_scheme: not used repos: Generator of tuple repository id, repository name to print. Returns: None Raises: None """ print('This is a dry run, will only display the repositories to load...') for (repo_id, repo_name) in repos: print('load repo %s into queue' % repo_name) def post_to_task_queue(repository_scheme, repos): """Load repositories to queue. Args: Generator of repository to print. Returns: None Raises: None """ for (repo_id, repo_name) in repos: logging.info('load repo %s into queue' % repo_name) repo_url = repository_scheme % repo_name model_data = {'repo_id': repo_id, 'repo_url': repo_url, 'repo_name': repo_name} tasks.orchestrate_clone_with_measure.delay(model_data) _run_fn = { True : just_print_repos } + def produce(conf): - """Make workers clone repositories. + """Produce a list of repositories to clone in the queue. + + Args: + conf: a dictionary of setup + - db_url: the setup string to access the db + - dry_run: optional flag setup by cli to avoid actually producing real messages + - repository_scheme: the uri to use for cloning repositories """ db_url = conf['db_url'] dry_run = conf['dry_run'] repository_scheme = conf['repository_scheme'] run_fn = _run_fn.get(dry_run, post_to_task_queue) with db.connect(db_url) as db_conn: repos = models.load_repos(db_conn) run_fn(repository_scheme, repos)