diff --git a/PKG-INFO b/PKG-INFO index ef9a14d..5df92a7 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,10 +1,10 @@ Metadata-Version: 1.0 Name: swh.loader.svn -Version: 0.0.21 +Version: 0.0.22 Summary: Software Heritage Loader SVN Home-page: https://forge.softwareheritage.org/diffusion/DLDSVN Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Description: UNKNOWN Platform: UNKNOWN diff --git a/swh.loader.svn.egg-info/PKG-INFO b/swh.loader.svn.egg-info/PKG-INFO index ef9a14d..5df92a7 100644 --- a/swh.loader.svn.egg-info/PKG-INFO +++ b/swh.loader.svn.egg-info/PKG-INFO @@ -1,10 +1,10 @@ Metadata-Version: 1.0 Name: swh.loader.svn -Version: 0.0.21 +Version: 0.0.22 Summary: Software Heritage Loader SVN Home-page: https://forge.softwareheritage.org/diffusion/DLDSVN Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Description: UNKNOWN Platform: UNKNOWN diff --git a/swh/loader/svn/producer.py b/swh/loader/svn/producer.py index 778262e..0c9ccea 100644 --- a/swh/loader/svn/producer.py +++ b/swh/loader/svn/producer.py @@ -1,107 +1,115 @@ # Copyright (C) 2015-2016 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import click import sys def get_task(task_name): """Retrieve task object in the application by its fully qualified name. """ from swh.scheduler.celery_backend.config import app for module in app.conf.CELERY_IMPORTS: __import__(module) return app.tasks[task_name] def _produce_svn_to_load( svn_url, origin_url, - destination_path=None, synchroneous=False, + destination_path=None, visit_date=None, synchroneous=False, task_name='swh.loader.svn.tasks.LoadSWHSvnRepositoryTsk'): """Produce svn urls on the message queue. Those urls can either be read from stdin or directly passed as argument. """ task = get_task(task_name) if not synchroneous and svn_url: task.delay(svn_url=svn_url, origin_url=origin_url, + visit_date=visit_date, destination_path=destination_path) elif synchroneous and svn_url: # for debug purpose task(svn_url=svn_url, origin_url=origin_url, + visit_date=visit_date, destination_path=destination_path) else: # input from stdin, so we ignore most of the function's input for line in sys.stdin: line = line.rstrip() data = line.split(' ') svn_url = data[0] if len(data) > 1: origin_url = data[1] else: origin_url = None if svn_url: print(svn_url, origin_url) task.delay(svn_url=svn_url, origin_url=origin_url, destination_path=destination_path) def _produce_archive_to_mount_and_load( archive_path, + visit_date, task_name='swh.loader.svn.tasks.MountAndLoadSvnRepositoryTsk'): task = get_task(task_name) if archive_path: task.delay(archive_path) else: for line in sys.stdin: line = line.rstrip() data = line.split(' ') archive_path = data[0] if len(data) > 1: origin_url = data[1] else: origin_url = None if archive_path: print(archive_path, origin_url) - task.delay(archive_path, origin_url) + task.delay(archive_path, origin_url, visit_date=visit_date) @click.group() def cli(): pass @cli.command('svn', help='Default svn urls producer') @click.option('--url', help="svn repository's mirror url.") @click.option('--origin-url', default=None, help='svn repository\'s original remote url ' '(if different than --svn-url).') @click.option('--destination-path', help="(optional) svn checkout destination.") +@click.option('--visit-date', + help="(optional) visit date to override") @click.option('--synchroneous', is_flag=True, help="To execute directly the svn loading.") -def produce_svn_to_load(url, origin_url, destination_path, synchroneous): +def produce_svn_to_load(url, origin_url, + destination_path, visit_date, synchroneous): _produce_svn_to_load(svn_url=url, origin_url=origin_url, destination_path=destination_path, synchroneous=synchroneous) @cli.command('svn-archive', help='Default svndump archive producer') +@click.option('--visit-date', + help="(optional) visit date to override") @click.option('--path', help="Archive's Path to load and mount") -def produce_archive_to_mount_and_load(path): - _produce_archive_to_mount_and_load(path) +def produce_archive_to_mount_and_load(path, visit_date): + _produce_archive_to_mount_and_load(path, visit_date) if __name__ == '__main__': cli() diff --git a/swh/loader/svn/tasks.py b/swh/loader/svn/tasks.py index 711cdd8..7193b15 100644 --- a/swh/loader/svn/tasks.py +++ b/swh/loader/svn/tasks.py @@ -1,66 +1,63 @@ # Copyright (C) 2015-2016 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import shutil -from datetime import datetime -from os import stat from os.path import basename from swh.scheduler.task import Task from .loader import SWHSvnLoader from . import utils class LoadSWHSvnRepositoryTsk(Task): """Import one svn repository to Software Heritage. """ task_queue = 'swh_loader_svn' def run(self, *args, **kwargs): """Import a svn repository with swh policy. Args: args: ordered arguments (expected None) kwargs: Dictionary with the following expected keys: - svn_url: (mandatory) svn's repository url - destination_path: (mandatory) root directory to locally retrieve svn's data - swh_revision: (optional) extra SWH revision hex to start from. cf. swh.loader.svn.SvnLoader.process docstring """ SWHSvnLoader().load(*args, **kwargs) class MountAndLoadSvnRepositoryTsk(Task): task_queue = 'swh_loader_svn_mount_and_load' - def run(self, archive_path, origin_url=None): + def run(self, archive_path, origin_url=None, visit_date=None): """1. Mount an svn dump from archive as a local svn repository. 2. Load it through the svn loader. 3. Clean up mounted svn repository archive. """ temp_dir = None try: self.log.info('Archive to mount and load %s' % archive_path) temp_dir, repo_path = utils.init_svn_repo_from_archive_dump( archive_path) self.log.debug('Mounted svn repository to %s' % repo_path) - mtime = stat(archive_path).st_mtime SWHSvnLoader().load(svn_url='file://%s' % repo_path, origin_url=origin_url, - visit_date=datetime.utcfromtimestamp(mtime), + visit_date=visit_date, destination_path=None) except Exception as e: raise e finally: if temp_dir: self.log.debug('Clean up temp directory %s for project %s' % ( temp_dir, basename(repo_path))) shutil.rmtree(temp_dir) diff --git a/version.txt b/version.txt index e3513da..fccfd54 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -v0.0.21-0-g03d0a18 \ No newline at end of file +v0.0.22-0-gb62f9ee \ No newline at end of file