diff --git a/PKG-INFO b/PKG-INFO index be85781..3bc48c4 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,10 +1,10 @@ Metadata-Version: 1.0 Name: swh.loader.svn -Version: 0.0.29 +Version: 0.0.30 Summary: Software Heritage Loader SVN Home-page: https://forge.softwareheritage.org/diffusion/DLDSVN Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Description: UNKNOWN Platform: UNKNOWN diff --git a/setup.py b/setup.py index dfb3f24..3033aa7 100644 --- a/setup.py +++ b/setup.py @@ -1,28 +1,28 @@ -from setuptools import setup +from setuptools import setup, find_packages def parse_requirements(): requirements = [] for reqf in ('requirements.txt', 'requirements-swh.txt'): with open(reqf) as f: for line in f.readlines(): line = line.strip() if not line or line.startswith('#'): continue requirements.append(line) return requirements setup( name='swh.loader.svn', description='Software Heritage Loader SVN', author='Software Heritage developers', author_email='swh-devel@inria.fr', url='https://forge.softwareheritage.org/diffusion/DLDSVN', - packages=['swh.loader.svn', 'swh.loader.svn.tests'], # packages's modules + packages=find_packages(), # packages's modules scripts=[], # scripts to package install_requires=parse_requirements(), setup_requires=['vcversioner'], vcversioner={}, include_package_data=True, ) diff --git a/swh.loader.svn.egg-info/PKG-INFO b/swh.loader.svn.egg-info/PKG-INFO index be85781..3bc48c4 100644 --- a/swh.loader.svn.egg-info/PKG-INFO +++ b/swh.loader.svn.egg-info/PKG-INFO @@ -1,10 +1,10 @@ Metadata-Version: 1.0 Name: swh.loader.svn -Version: 0.0.29 +Version: 0.0.30 Summary: Software Heritage Loader SVN Home-page: https://forge.softwareheritage.org/diffusion/DLDSVN Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Description: UNKNOWN Platform: UNKNOWN diff --git a/swh.loader.svn.egg-info/SOURCES.txt b/swh.loader.svn.egg-info/SOURCES.txt index 0862885..c492284 100644 --- a/swh.loader.svn.egg-info/SOURCES.txt +++ b/swh.loader.svn.egg-info/SOURCES.txt @@ -1,51 +1,53 @@ .gitignore AUTHORS LICENSE MANIFEST.in Makefile README README-dev.org errors.org requirements-swh.txt requirements.txt setup.py svn-lib-client-analysis.org version.txt bin/init-svn-repository.sh bin/swh-hashtree bin/swh-svn debian/changelog debian/compat debian/control debian/copyright debian/rules debian/source/format docs/.gitignore docs/Makefile docs/comparison-git-svn-swh-svn.org docs/conf.py docs/index.rst docs/swh-loader-svn.txt docs/_static/.placeholder docs/_templates/.placeholder install/install-pysvn.sh install/install-subvertpy.sh resources/svn.ini +swh/__init__.py swh.loader.svn.egg-info/PKG-INFO swh.loader.svn.egg-info/SOURCES.txt swh.loader.svn.egg-info/dependency_links.txt swh.loader.svn.egg-info/requires.txt swh.loader.svn.egg-info/top_level.txt +swh/loader/__init__.py swh/loader/svn/__init__.py swh/loader/svn/converters.py swh/loader/svn/loader.py swh/loader/svn/producer.py swh/loader/svn/ra.py swh/loader/svn/svn.py swh/loader/svn/tasks.py swh/loader/svn/utils.py swh/loader/svn/tests/test_base.py swh/loader/svn/tests/test_converters.py swh/loader/svn/tests/test_loader.org swh/loader/svn/tests/test_loader.py swh/loader/svn/tests/test_utils.py \ No newline at end of file diff --git a/swh/__init__.py b/swh/__init__.py new file mode 100644 index 0000000..69e3be5 --- /dev/null +++ b/swh/__init__.py @@ -0,0 +1 @@ +__path__ = __import__('pkgutil').extend_path(__path__, __name__) diff --git a/swh/loader/__init__.py b/swh/loader/__init__.py new file mode 100644 index 0000000..69e3be5 --- /dev/null +++ b/swh/loader/__init__.py @@ -0,0 +1 @@ +__path__ = __import__('pkgutil').extend_path(__path__, __name__) diff --git a/swh/loader/svn/producer.py b/swh/loader/svn/producer.py index 7c0c16b..e4c8aa5 100644 --- a/swh/loader/svn/producer.py +++ b/swh/loader/svn/producer.py @@ -1,108 +1,203 @@ # Copyright (C) 2015-2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import click +import datetime import sys +from swh.core import utils from swh.scheduler.utils import get_task +from swh.scheduler.backend import SchedulerBackend -def _produce_svn_to_load( - svn_url, origin_url, - destination_path=None, visit_date=None, synchroneous=False, - task_name='swh.loader.svn.tasks.LoadSWHSvnRepositoryTsk'): - """Produce svn urls on the message queue. +def _produce_svn_to_load(svn_url, origin_url, + destination_path=None, visit_date=None): + """Yield svn url(s) parameters for producers. - Those urls can either be read from stdin or directly passed as argument. + Those urls can either be read from stdin or directly passed as + argument. Either the svn_url is passed and only 1 svn url is + sent. Either no svn_url is provided and those urls are read from + stdin and yielded as parameters for producers down the line. + + Args: + svn_url (str / None): Potential svn url to load + origin_url (str / None): Potential associated origin url + destination_path (str): Destination path + visit_date (date): Forcing the visit date + + Yields + tuple svn_url, origin_url, visit_date, destination_path + + """ + if svn_url: + yield svn_url, origin_url, visit_date, destination_path + + # input from stdin, so we ignore most of the function's input + for line in sys.stdin: + line = line.rstrip() + data = line.split(' ') + svn_url = data[0] + if len(data) > 1: + origin_url = data[1] + else: + origin_url = None + + if svn_url: + yield svn_url, origin_url, visit_date, destination_path + + +def _produce_archive_to_mount_and_load(archive_path, visit_date): + """Yield svn dump(s) parameters for producers. + + Those dumps can either be read from stdin or directly passed as + argument. Either the archive_url is passed and only 1 dump is + sent. Either no archive_path is provided and those dumps are read + from stdin and yielded as parameters for producers down the line. + + Args: + svn_url (str / None): Potential svn url to load + origin_url (str / None): Potential associated origin url + destination_path (str): Destination path + visit_date (date): Forcing the visit date + + Yields + tuple archive_path, origin_url, visit_date """ - task = get_task(task_name) - if not synchroneous and svn_url: - task.delay(svn_url=svn_url, - origin_url=origin_url, - visit_date=visit_date, - destination_path=destination_path) - elif synchroneous and svn_url: # for debug purpose - task(svn_url=svn_url, - origin_url=origin_url, - visit_date=visit_date, - destination_path=destination_path) - else: # input from stdin, so we ignore most of the function's input - for line in sys.stdin: - line = line.rstrip() - data = line.split(' ') - svn_url = data[0] - if len(data) > 1: - origin_url = data[1] - else: - origin_url = None - - if svn_url: - print(svn_url, origin_url) - task.delay(svn_url=svn_url, - origin_url=origin_url, - visit_date=visit_date, - destination_path=destination_path) - - -def _produce_archive_to_mount_and_load( - archive_path, - visit_date, - task_name='swh.loader.svn.tasks.MountAndLoadSvnRepositoryTsk'): - task = get_task(task_name) if archive_path: - task.delay(archive_path) - else: - for line in sys.stdin: - line = line.rstrip() - data = line.split(' ') - archive_path = data[0] - if len(data) > 1: - origin_url = data[1] - else: - origin_url = None + yield archive_path, None, visit_date - if archive_path: - print(archive_path, origin_url) - task.delay(archive_path, origin_url, visit_date=visit_date) + for line in sys.stdin: + line = line.rstrip() + data = line.split(' ') + archive_path = data[0] + if len(data) > 1: + origin_url = data[1] + else: + origin_url = None + + if archive_path: + yield archive_path, origin_url, visit_date @click.group() def cli(): pass @cli.command('svn', help='Default svn urls producer') @click.option('--url', help="svn repository's mirror url.") @click.option('--origin-url', default=None, help='svn repository\'s original remote url ' '(if different than --svn-url).') @click.option('--destination-path', help="(optional) svn checkout destination.") @click.option('--visit-date', help="(optional) visit date to override") @click.option('--synchroneous', is_flag=True, help="To execute directly the svn loading.") -def produce_svn_to_load(url, origin_url, - destination_path, visit_date, synchroneous): - _produce_svn_to_load(svn_url=url, - origin_url=origin_url, - visit_date=visit_date, - destination_path=destination_path, - synchroneous=synchroneous) +@click.option('--dry-run/--no-dry-run', default=False, is_flag=True, + help="Dry run flag") +def produce_svn_to_load(url, origin_url, destination_path, visit_date, + synchroneous, dry_run): + """Produce svn urls to celery queue + + """ + task = get_task('swh.loader.svn.tasks.LoadSWHSvnRepositoryTsk') + + if synchroneous: + fn = task + else: + fn = task.delay + + for args in _produce_svn_to_load(svn_url=url, + origin_url=origin_url, + visit_date=visit_date, + destination_path=destination_path): + print(args) + if dry_run: + continue + + svn_url, origin_url, visit_date, destination_path = args + fn(svn_url=svn_url, + origin_url=origin_url, + visit_date=visit_date, + destination_path=destination_path) @cli.command('svn-archive', help='Default svndump archive producer') @click.option('--visit-date', help="(optional) visit date to override") @click.option('--path', help="Archive's Path to load and mount") -def produce_archive_to_mount_and_load(path, visit_date): - _produce_archive_to_mount_and_load(path, visit_date) +@click.option('--synchroneous', + is_flag=True, + help="To execute directly the svn loading.") +@click.option('--dry-run/--no-dry-run', default=False, is_flag=True, + help="Dry run flag") +def produce_archive_to_mount_and_load(path, visit_date, synchroneous, dry_run): + """Produce svn dumps to celery queue + + """ + task = get_task('swh.loader.svn.tasks.MountAndLoadSvnRepositoryTsk') + + if synchroneous: + fn = task + else: + fn = task.delay + + for args in _produce_archive_to_mount_and_load(path, visit_date): + print(args) + if dry_run: + continue + + archive_path, origin_url, visit_date = args + + fn(archive_path, origin_url, visit_date) + + +@cli.command('schedule-svn-archive', + help='Default svndump archive mounting and loading scheduling') +@click.option('--visit-date', + help="(optional) visit date to override") +@click.option('--path', help="Archive's Path to load and mount") +@click.option('--dry-run/--no-dry-run', default=False, is_flag=True, + help="Dry run flag") +def schedule_archive_to_mount_and_load(path, visit_date, dry_run): + """Produce svn dumps to scheduler backend + + """ + scheduler = SchedulerBackend() + + def make_scheduler_task(path, origin_url, visit_date): + return { + 'policy': 'oneshot', + 'type': 'swh-loader-mount-dump-and-load-svn-repository', + 'next_run': datetime.datetime.now(tz=datetime.timezone.utc), + 'arguments': { + 'args': [ + path, + ], + 'kwargs': { + 'origin_url': origin_url, + 'visit_date': visit_date, + }, + } + } + + for tasks in utils.grouper( + _produce_archive_to_mount_and_load(path, visit_date), + n=1000): + tasks = [make_scheduler_task(*t) for t in tasks] + print('[%s, ...]' % tasks[0]) + if dry_run: + continue + + scheduler.create_tasks(tasks) if __name__ == '__main__': cli() diff --git a/version.txt b/version.txt index bdf1c75..c0a83b3 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -v0.0.29-0-g466c1e0 \ No newline at end of file +v0.0.30-0-g8cedf74 \ No newline at end of file