diff --git a/swh/loader/mercurial/loader.py b/swh/loader/mercurial/loader.py --- a/swh/loader/mercurial/loader.py +++ b/swh/loader/mercurial/loader.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2018 The Software Heritage developers +# Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -75,8 +75,12 @@ visit_type = 'hg' - def __init__(self, logging_class='swh.loader.mercurial.Bundle20Loader'): + def __init__(self, url, visit_date=None, directory=None, + logging_class='swh.loader.mercurial.Bundle20Loader'): super().__init__(logging_class=logging_class) + self.origin_url = url + self.visit_date = visit_date + self.directory = directory self.content_max_size_limit = self.config['content_size_limit'] self.bundle_filename = self.config['bundle_filename'] self.reduce_effort_flag = self.config['reduce_effort'] @@ -130,14 +134,14 @@ return b - def prepare_origin_visit(self, *, origin_url, visit_date, **kwargs): - self.origin_url = origin_url - self.origin = {'url': self.origin_url, 'type': self.visit_type} + def prepare_origin_visit(self, *args, **kwargs): + self.origin = {'url': self.origin_url} + visit_date = self.visit_date if isinstance(visit_date, str): # visit_date can be string or datetime visit_date = parser.parse(visit_date) self.visit_date = visit_date self.last_visit = self.storage.origin_visit_get_latest( - self.origin['url'], require_snapshot=True) + self.origin_url, require_snapshot=True) @staticmethod def clone_with_timeout(log, origin, destination, timeout): @@ -176,7 +180,7 @@ return result - def prepare(self, *, origin_url, visit_date, directory=None): + def prepare(self, *args, **kwargs): """Prepare the necessary steps to load an actual remote or local repository. @@ -197,6 +201,8 @@ self.releases = {} self.node_2_rev = {} + directory = self.directory + if not directory: # remote repository self.working_directory = mkdtemp( prefix=TEMPORARY_DIR_PREFIX_PATTERN, @@ -206,9 +212,9 @@ self.hgdir = self.working_directory self.log.debug('Cloning %s to %s with timeout %s seconds', - self.origin['url'], self.hgdir, self.clone_timeout) + self.origin_url, self.hgdir, self.clone_timeout) - self.clone_with_timeout(self.log, self.origin['url'], self.hgdir, + self.clone_with_timeout(self.log, self.origin_url, self.hgdir, self.clone_timeout) else: # local repository @@ -339,7 +345,7 @@ content, log=self.log, max_content_size=self.content_max_size_limit, - origin_url=self.origin['url'] + origin_url=self.origin_url ) def load_directories(self): @@ -575,23 +581,24 @@ """Mercurial loader for repository wrapped within archives. """ - def __init__(self): + def __init__(self, url, visit_date=None, archive_path=None): super().__init__( + url, visit_date=visit_date, logging_class='swh.loader.mercurial.HgArchiveBundle20Loader') self.temp_dir = None + self.archive_path = archive_path - def prepare(self, *, origin_url, archive_path, visit_date): - self.temp_dir = tmp_extract(archive=archive_path, + def prepare(self, *args, **kwargs): + self.temp_dir = tmp_extract(archive=self.archive_path, dir=self.temp_directory, prefix=TEMPORARY_DIR_PREFIX_PATTERN, suffix='.dump-%s' % os.getpid(), log=self.log, - source=origin_url) + source=self.origin_url) repo_name = os.listdir(self.temp_dir)[0] - directory = os.path.join(self.temp_dir, repo_name) - super().prepare(origin_url=origin_url, - visit_date=visit_date, directory=directory) + self.directory = os.path.join(self.temp_dir, repo_name) + super().prepare(*args, **kwargs) def cleanup(self): if self.temp_dir and os.path.exists(self.temp_dir): diff --git a/swh/loader/mercurial/tasks.py b/swh/loader/mercurial/tasks.py --- a/swh/loader/mercurial/tasks.py +++ b/swh/loader/mercurial/tasks.py @@ -3,13 +3,13 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -from celery import current_app as app +from celery import shared_task from .loader import HgBundle20Loader, HgArchiveBundle20Loader -@app.task(name=__name__ + '.LoadMercurial') -def load_mercurial(origin_url, directory=None, visit_date=None): +@shared_task(name=__name__ + '.LoadMercurial') +def load_mercurial(*, url, directory=None, visit_date=None): """Mercurial repository loading Import a mercurial tarball into swh. @@ -17,19 +17,17 @@ Args: see :func:`DepositLoader.load`. """ - loader = HgBundle20Loader() - return loader.load(origin_url=origin_url, - directory=directory, - visit_date=visit_date) + loader = HgBundle20Loader( + url, directory=directory, visit_date=visit_date) + return loader.load() -@app.task(name=__name__ + '.LoadArchiveMercurial') -def load_archive_mercurial(origin_url, archive_path, visit_date=None): +@shared_task(name=__name__ + '.LoadArchiveMercurial') +def load_archive_mercurial(*, url, archive_path=None, visit_date=None): """Import a mercurial tarball into swh. Args: see :func:`DepositLoader.load`. """ - loader = HgArchiveBundle20Loader() - return loader.load(origin_url=origin_url, - archive_path=archive_path, - visit_date=visit_date) + loader = HgArchiveBundle20Loader( + url, archive_path=archive_path, visit_date=visit_date) + return loader.load() diff --git a/swh/loader/mercurial/tests/common.py b/swh/loader/mercurial/tests/common.py --- a/swh/loader/mercurial/tests/common.py +++ b/swh/loader/mercurial/tests/common.py @@ -46,8 +46,8 @@ objects. """ - def __init__(self): - super().__init__() + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) self.origin_id = 1 self.visit = 1 diff --git a/swh/loader/mercurial/tests/test_loader.py b/swh/loader/mercurial/tests/test_loader.py --- a/swh/loader/mercurial/tests/test_loader.py +++ b/swh/loader/mercurial/tests/test_loader.py @@ -23,33 +23,35 @@ """Mixin base loader test to prepare the mercurial repository to uncompress, load and test the results. - This sets up - """ - def setUp(self, loader=HgLoaderMemoryStorage, - archive_name='the-sandbox.tgz', filename='the-sandbox', + def setUp(self, archive_name='the-sandbox.tgz', filename='the-sandbox', uncompress_archive=True): super().setUp(archive_name=archive_name, filename=filename, prefix_tmp_folder_name='swh.loader.mercurial.', start_path=os.path.dirname(__file__), uncompress_archive=uncompress_archive) - self.loader = loader() - self.storage = self.loader.storage class WithoutReleaseLoaderTest(BaseHgLoaderTest): """Load a mercurial repository without release """ + def setUp(self, *args, **kwargs): + super().setUp(*args, **kwargs) + self.loader = HgLoaderMemoryStorage( + url=self.repo_url, + visit_date='2016-05-03 15:16:32+00', + directory=self.destination_path) + self.storage = self.loader.storage + def test_load(self): """Load a repository with multiple branches results in 1 snapshot + Another visit with no change in between result in uneventful visit + """ # when - self.loader.load( - origin_url=self.repo_url, - visit_date='2016-05-03 15:16:32+00', - directory=self.destination_path) + self.loader.load() # then self.assertCountContents(2) @@ -150,22 +152,9 @@ self.assertEqual(self.loader.load_status(), {'status': 'eventful'}) self.assertEqual(self.loader.visit_status(), 'full') - def test_load_status(self): - # first visit of the mercurial repository - self.loader.load( - origin_url=self.repo_url, - visit_date='2016-05-03 15:16:32+00', - directory=self.destination_path) - - self.assertEqual(self.loader.load_status(), {'status': 'eventful'}) - self.assertEqual(self.loader.visit_status(), 'full') - # second visit with no changes in the mercurial repository # since the first one - self.loader.load( - origin_url=self.repo_url, - visit_date='2016-05-04 14:12:21+00', - directory=self.destination_path) + self.loader.load() self.assertEqual(self.loader.load_status(), {'status': 'uneventful'}) self.assertEqual(self.loader.visit_status(), 'full') @@ -224,17 +213,19 @@ """ def setUp(self): super().setUp(archive_name='hello.tgz', filename='hello') + self.loader = HgLoaderMemoryStorage( + url=self.repo_url, + visit_date='2016-05-03 15:16:32+00', + directory=self.destination_path + ) + self.storage = self.loader.storage def test_load(self): """Load a repository with tags results in 1 snapshot """ # when - self.loader.load( - origin_url=self.repo_url, - visit_date='2016-05-03 15:16:32+00', - directory=self.destination_path) - + self.loader.load() self.assert_data_ok() @@ -243,20 +234,21 @@ """ def setUp(self): - super().setUp(loader=HgArchiveLoaderMemoryStorage, - archive_name='hello.tgz', filename='hello', + super().setUp(archive_name='hello.tgz', filename='hello', uncompress_archive=False) + self.loader = HgArchiveLoaderMemoryStorage( + url=self.repo_url, + visit_date='2016-05-03 15:16:32+00', + archive_path=self.destination_path + ) + self.storage = self.loader.storage def test_load(self): """Load a mercurial repository archive with tags results in 1 snapshot """ # when - self.loader.load( - origin_url=self.repo_url, - visit_date='2016-05-03 15:16:32+00', - archive_path=self.destination_path) - + self.loader.load() self.assert_data_ok() @patch('swh.loader.mercurial.archive_extract.patoolib') @@ -264,10 +256,7 @@ mock_patoo.side_effect = ValueError # when - r = self.loader.load( - origin_url=self.repo_url, - visit_date='2016-05-03 15:16:32+00', - archive_path=self.destination_path) + r = self.loader.load() self.assertEqual(r, {'status': 'failed'}) self.assertCountContents(0) @@ -284,13 +273,16 @@ """ def setUp(self): super().setUp(archive_name='transplant.tgz', filename='transplant') + self.loader = HgLoaderMemoryStorage( + url=self.repo_url, + visit_date='2019-05-23 12:06:00+00', + directory=self.destination_path + ) + self.storage = self.loader.storage def test_load(self): # load hg repository - self.loader.load( - origin_url=self.repo_url, - visit_date='2019-05-23 12:06:00+00', - directory=self.destination_path) + self.loader.load() # collect swh revisions origin_url = self.storage.origin_get([ diff --git a/swh/loader/mercurial/tests/test_tasks.py b/swh/loader/mercurial/tests/test_tasks.py --- a/swh/loader/mercurial/tests/test_tasks.py +++ b/swh/loader/mercurial/tests/test_tasks.py @@ -12,15 +12,15 @@ res = swh_app.send_task( 'swh.loader.mercurial.tasks.LoadMercurial', - kwargs=dict(origin_url='origin_url', directory='/some/repo', - visit_date='now')) + (), dict(url='origin_url', + directory='/some/repo', + visit_date='now')) assert res res.wait() assert res.successful() assert res.result == {'status': 'eventful'} - mock_loader.assert_called_once_with( - origin_url='origin_url', visit_date='now', directory='/some/repo') + mock_loader.assert_called_once_with() @patch('swh.loader.mercurial.loader.HgArchiveBundle20Loader.load') @@ -29,13 +29,12 @@ res = swh_app.send_task( 'swh.loader.mercurial.tasks.LoadArchiveMercurial', - ('another_url', '/some/tar.tgz', 'now')) + (), dict(url='another_url', + archive_path='/some/tar.tgz', + visit_date='now')) assert res res.wait() assert res.successful() assert res.result == {'status': 'uneventful'} - mock_loader.assert_called_once_with( - origin_url='another_url', - archive_path='/some/tar.tgz', - visit_date='now') + mock_loader.assert_called_once_with()