diff --git a/mypy.ini b/mypy.ini --- a/mypy.ini +++ b/mypy.ini @@ -10,3 +10,6 @@ [mypy-subvertpy.*] ignore_missing_imports = True + +[mypy-pytest.*] +ignore_missing_imports = True diff --git a/requirements-test.txt b/requirements-test.txt --- a/requirements-test.txt +++ b/requirements-test.txt @@ -1,2 +1,3 @@ pytest +pytest-mock swh.core[http] >= 0.0.61 diff --git a/setup.py b/setup.py --- a/setup.py +++ b/setup.py @@ -50,6 +50,10 @@ extras_require={'testing': parse_requirements('test')}, vcversioner={}, include_package_data=True, + entry_points=''' + [swh.workers] + loader.svn=swh.loader.svn:register + ''', classifiers=[ "Programming Language :: Python :: 3", "Intended Audience :: Developers", diff --git a/swh/loader/svn/__init__.py b/swh/loader/svn/__init__.py --- a/swh/loader/svn/__init__.py +++ b/swh/loader/svn/__init__.py @@ -0,0 +1,14 @@ +# Copyright (C) 2019 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from typing import Any, Dict + + +def register() -> Dict[str, Any]: + from swh.loader.svn.loader import SvnLoader + return { + 'task_modules': ['%s.tasks' % __name__], + 'loader': SvnLoader, + } diff --git a/swh/loader/svn/loader.py b/swh/loader/svn/loader.py --- a/swh/loader/svn/loader.py +++ b/swh/loader/svn/loader.py @@ -1,4 +1,4 @@ -# Copyright (C) 2015-2018 The Software Heritage developers +# Copyright (C) 2015-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -78,9 +78,14 @@ visit_type = 'svn' - def __init__(self): + def __init__(self, url, origin_url=None, visit_date=None, + destination_path=None, swh_revision=None, + start_from_scratch=False): super().__init__(logging_class='swh.loader.svn.SvnLoader') - self.origin_url = None + # technical svn uri to act on svn repository + self.svn_url = url + # origin url as unique identifier for origin in swh archive + self.origin_url = origin_url if origin_url else self.svn_url self.debug = self.config['debug'] self.last_seen_revision = None self.temp_directory = self.config['temp_directory'] @@ -100,6 +105,10 @@ self._last_revision = None self._visit_status = 'full' self._load_status = 'uneventful' + self.visit_date = visit_date + self.destination_path = destination_path + self.start_from_scratch = start_from_scratch + self.swh_revision = swh_revision def pre_cleanup(self): """Cleanup potential dangling files from prior runs (e.g. OOM killed @@ -444,27 +453,22 @@ yield _contents, _directories, swh_revision - def prepare_origin_visit(self, *, svn_url, visit_date=None, - origin_url=None, **kwargs): + def prepare_origin_visit(self, *args, **kwargs): self.origin = { - 'url': origin_url if origin_url else svn_url, - 'type': self.visit_type, + 'url': self.origin_url if self.origin_url else self.svn_url, } - self.visit_date = visit_date - def prepare(self, *, svn_url, destination_path=None, - swh_revision=None, start_from_scratch=False, **kwargs): - self.start_from_scratch = start_from_scratch - if swh_revision: - self.last_known_swh_revision = swh_revision + def prepare(self, *args, **kwargs): + if self.swh_revision: + self.last_known_swh_revision = self.swh_revision else: self.last_known_swh_revision = None self.latest_snapshot = self.swh_latest_snapshot_revision( self.origin_url, self.last_known_swh_revision) - if destination_path: - local_dirname = destination_path + if self.destination_path: + local_dirname = self.destination_path else: local_dirname = tempfile.mkdtemp( suffix='-%s' % os.getpid(), @@ -472,7 +476,7 @@ dir=self.temp_directory) self.svnrepo = self.get_svn_repo( - svn_url, local_dirname, self.origin_url) + self.svn_url, local_dirname, self.origin_url) try: revision_start, revision_end, revision_parents = self.start_from( self.last_known_swh_revision, self.start_from_scratch) @@ -584,26 +588,28 @@ an svn repository and load said repository. """ - def __init__(self, archive_path): - super().__init__() + def __init__(self, url, archive_path, + origin_url=None, destination_path=None, + swh_revision=None, start_from_scratch=None, + visit_date=None): + super().__init__(url, + origin_url=origin_url, + destination_path=destination_path, + swh_revision=swh_revision, + start_from_scratch=start_from_scratch, + visit_date=visit_date) self.archive_path = archive_path self.temp_dir = None self.repo_path = None - def prepare(self, *, svn_url, destination_path=None, - swh_revision=None, start_from_scratch=False, **kwargs): + def prepare(self, *args, **kwargs): self.log.info('Archive to mount and load %s' % self.archive_path) self.temp_dir, self.repo_path = init_svn_repo_from_archive_dump( self.archive_path, prefix=TEMPORARY_DIR_PREFIX_PATTERN, suffix='-%s' % os.getpid(), root_dir=self.temp_directory) - if not svn_url: - svn_url = 'file://%s' % self.repo_path - super().prepare(svn_url=svn_url, destination_path=destination_path, - swh_revision=swh_revision, - start_from_scratch=start_from_scratch, - **kwargs) + super().prepare(*args, **kwargs) def cleanup(self): super().cleanup() @@ -620,8 +626,13 @@ Create a subversion repository dump using the svnrdump utility, mount it locally and load the repository from it. """ - def __init__(self): - super().__init__() + def __init__(self, url, origin_url=None, destination_path=None, + swh_revision=None, start_from_scratch=False, visit_date=None): + super().__init__(url, origin_url=origin_url, + destination_path=destination_path, + swh_revision=swh_revision, + start_from_scratch=start_from_scratch, + visit_date=visit_date) self.temp_dir = tempfile.mkdtemp(dir=self.temp_directory) self.repo_path = None self.truncated_dump = False @@ -635,7 +646,7 @@ last_loaded_svn_rev = -1 try: origin = \ - self.storage.origin_get({'type': 'svn', 'url': svn_url}) + self.storage.origin_get({'url': svn_url}) last_swh_rev = \ self.swh_latest_snapshot_revision(origin['url'])['revision'] last_swh_rev_headers = \ @@ -723,15 +734,14 @@ raise Exception('An error occurred when running svnrdump and ' 'no exploitable dump file has been generated.') - def prepare(self, *, svn_url, destination_path=None, - swh_revision=None, start_from_scratch=False, **kwargs): + def prepare(self, *args, **kwargs): # First, check if previous revisions have been loaded for the # subversion origin and get the number of the last one - last_loaded_svn_rev = self.get_last_loaded_svn_rev(svn_url) + last_loaded_svn_rev = self.get_last_loaded_svn_rev(self.svn_url) # Then try to generate a dump file containing relevant svn revisions # to load, an exception will be thrown if something wrong happened - dump_path = self.dump_svn_revisions(svn_url, last_loaded_svn_rev) + dump_path = self.dump_svn_revisions(self.svn_url, last_loaded_svn_rev) # Finally, mount the dump and load the repository self.log.debug('Mounting dump file with "svnadmin load".') @@ -740,11 +750,8 @@ prefix=TEMPORARY_DIR_PREFIX_PATTERN, suffix='-%s' % os.getpid(), root_dir=self.temp_dir) - super().prepare(svn_url='file://%s' % self.repo_path, - destination_path=destination_path, - swh_revision=swh_revision, - start_from_scratch=start_from_scratch, - **kwargs) + self.svn_url = 'file://%s' % self.repo_path + super().prepare(*args, **kwargs) def cleanup(self): super().cleanup() diff --git a/swh/loader/svn/tasks.py b/swh/loader/svn/tasks.py --- a/swh/loader/svn/tasks.py +++ b/swh/loader/svn/tasks.py @@ -1,72 +1,77 @@ -# Copyright (C) 2015-2018 The Software Heritage developers +# Copyright (C) 2015-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -from celery import current_app as app +from celery import shared_task from .loader import ( SvnLoader, SvnLoaderFromDumpArchive, SvnLoaderFromRemoteDump ) -@app.task(name=__name__ + '.LoadSvnRepository') -def load_svn(svn_url, +@shared_task(name=__name__ + '.LoadSvnRepository') +def load_svn(url=None, + origin_url=None, destination_path=None, swh_revision=None, - origin_url=None, visit_date=None, - start_from_scratch=None): + start_from_scratch=False): """Import a svn repository Args: args: ordered arguments (expected None) kwargs: Dictionary with the following expected keys: - - svn_url (str): (mandatory) svn's repository url - - destination_path (str): (mandatory) root directory to - locally retrieve svn's data + - url (str): (mandatory) svn's repository url - origin_url (str): Optional original url override + - destination_path (str): (optional) root directory to + locally retrieve svn's data - swh_revision (dict): (optional) extra revision hex to start from. see swh.loader.svn.SvnLoader.process docstring """ - return SvnLoader().load( - svn_url=svn_url, - destination_path=destination_path, - origin_url=origin_url, - swh_revision=swh_revision, - visit_date=visit_date, - start_from_scratch=start_from_scratch) + loader = SvnLoader(url, + origin_url=origin_url, + destination_path=destination_path, + swh_revision=swh_revision, + visit_date=visit_date, + start_from_scratch=start_from_scratch) + return loader.load() -@app.task(name=__name__ + '.MountAndLoadSvnRepository') -def mount_load_svn(archive_path, origin_url=None, visit_date=None, - start_from_scratch=False): +@shared_task(name=__name__ + '.MountAndLoadSvnRepository') +def load_svn_from_archive(url=None, + archive_path=None, + visit_date=None, + start_from_scratch=False): """1. Mount an svn dump from archive as a local svn repository 2. Load it through the svn loader 3. Clean up mounted svn repository archive """ - return SvnLoaderFromDumpArchive(archive_path).load( - svn_url=None, - origin_url=origin_url, - visit_date=visit_date, + loader = SvnLoaderFromDumpArchive( + url, archive_path=archive_path, + visit_date=visit_date, start_from_scratch=start_from_scratch) + return loader.load() -@app.task(name=__name__ + '.DumpMountAndLoadSvnRepository') -def dump_mount_load_svn(svn_url, origin_url=None, visit_date=None, - start_from_scratch=False): - """1. Mount an svn dump from archive as a local svn repository. +@shared_task(name=__name__ + '.DumpMountAndLoadSvnRepository') +def load_svn_from_remote_dump(url=None, + origin_url=None, + visit_date=None, + start_from_scratch=False): + """1. Mount a remote svn dump as a local svn repository. 2. Load it through the svn loader. 3. Clean up mounted svn repository archive. """ - return SvnLoaderFromRemoteDump().load( - svn_url=svn_url, + loader = SvnLoaderFromRemoteDump( + url, origin_url=origin_url, visit_date=visit_date, start_from_scratch=start_from_scratch) + return loader.load() diff --git a/swh/loader/svn/tests/conftest.py b/swh/loader/svn/tests/conftest.py new file mode 100644 --- /dev/null +++ b/swh/loader/svn/tests/conftest.py @@ -0,0 +1,57 @@ +# Copyright (C) 2019 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import os +import pytest +import yaml + +from typing import Any, Dict + +from swh.scheduler.tests.conftest import swh_app # noqa + + +@pytest.fixture +def swh_loader_config() -> Dict[str, Any]: + return { + 'storage': { + 'cls': 'memory', + }, + 'check_revision': {'limit': 100, 'status': False}, + 'content_packet_block_size_bytes': 104857600, + 'content_packet_size': 10000, + 'content_packet_size_bytes': 1073741824, + 'content_size_limit': 104857600, + 'debug': False, + 'directory_packet_size': 2500, + 'log_db': 'dbname=softwareheritage-log', + 'occurrence_packet_size': 1000, + 'release_packet_size': 1000, + 'revision_packet_size': 10, + 'save_data': False, + 'save_data_path': '', + 'send_contents': True, + 'send_directories': True, + 'send_occurrences': True, + 'send_releases': True, + 'send_revisions': True, + 'send_snapshot': True, + 'temp_directory': '/tmp', + } + + +@pytest.fixture +def swh_config(swh_loader_config, monkeypatch, tmp_path): + conffile = os.path.join(str(tmp_path), 'loader.yml') + with open(conffile, 'w') as f: + f.write(yaml.dump(swh_loader_config)) + monkeypatch.setenv('SWH_CONFIG_FILENAME', conffile) + return conffile + + +@pytest.fixture(scope='session') +def celery_includes(): + return [ + 'swh.loader.svn.tasks', + ] diff --git a/swh/loader/svn/tests/test_loader.py b/swh/loader/svn/tests/test_loader.py --- a/swh/loader/svn/tests/test_loader.py +++ b/swh/loader/svn/tests/test_loader.py @@ -60,12 +60,14 @@ Load a new svn repository using the swh policy (so no update). """ - def __init__(self, last_snp_rev={}): - super().__init__() + def __init__(self, url, last_snp_rev={}, destination_path=None, + start_from_scratch=False, swh_revision=None): + super().__init__(url, destination_path=destination_path, + start_from_scratch=start_from_scratch, + swh_revision=swh_revision) self.origin = { 'id': 1, - 'url': '/dev/null', - 'type': 'svn', + 'url': url, } self.visit = { 'origin': 1, @@ -95,12 +97,34 @@ """ def setUp(self, archive_name='pkg-gourmet.tgz', filename='pkg-gourmet', - loader=None): + loader=None, snapshot=None, type='default', + start_from_scratch=False, swh_revision=None): super().setUp(archive_name=archive_name, filename=filename, prefix_tmp_folder_name='swh.loader.svn.', start_path=os.path.dirname(__file__)) self.svn_mirror_url = self.repo_url - self.loader = loader or SvnLoaderTest() + if type == 'default': + loader_test_class = SvnLoaderTest + else: + loader_test_class = SvnLoaderTestFromRemoteDump + + if loader: + self.loader = loader + elif snapshot: + self.loader = loader_test_class( + self.svn_mirror_url, + destination_path=self.destination_path, + start_from_scratch=start_from_scratch, + swh_revision=swh_revision, + last_snp_rev=snapshot, + ) + else: + self.loader = loader_test_class( + self.svn_mirror_url, + destination_path=self.destination_path, + start_from_scratch=start_from_scratch, + swh_revision=swh_revision + ) self.storage = self.loader.storage @@ -113,9 +137,7 @@ """ # when - self.loader.load( - svn_url=self.svn_mirror_url, - destination_path=self.destination_path) + self.loader.load() # then self.assertCountRevisions(6) @@ -172,16 +194,14 @@ """ def setUp(self): - super().setUp(loader=SvnLoaderTest(last_snp_rev=_LAST_SNP_REV)) + super().setUp(snapshot=_LAST_SNP_REV) def test_load(self): """Load a repository without new changes results in same snapshot """ # when - self.loader.load( - svn_url=self.svn_mirror_url, - destination_path=self.destination_path) + self.loader.load() # then @@ -212,15 +232,14 @@ hashutil.hash_to_bytes('badbadbadbadf708f7466dddf547567b65f6c39d') # the svn repository pkg-gourmet has been updated with changes super().setUp(archive_name='pkg-gourmet-with-updates.tgz', - loader=SvnLoaderTest(last_snp_rev=last_snp_rev)) + snapshot=last_snp_rev) def test_load(self): """Load known repository with history altered should do nothing """ # when - self.loader.load(svn_url=self.svn_mirror_url, - destination_path=self.destination_path) + self.loader.load() # then # we got the previous run's last revision (rev 6) @@ -245,15 +264,14 @@ def setUp(self): # the svn repository pkg-gourmet has been updated with changes super().setUp(archive_name='pkg-gourmet-with-updates.tgz', - loader=SvnLoaderTest(last_snp_rev=_LAST_SNP_REV)) + snapshot=_LAST_SNP_REV) def test_process_repository(self): """Process updated repository should yield new objects """ # when - self.loader.load(svn_url=self.svn_mirror_url, - destination_path=self.destination_path) + self.loader.load() # then # we got the previous run's last revision (rev 6) @@ -292,16 +310,15 @@ def setUp(self): # the svn repository pkg-gourmet has been updated with changes super().setUp(archive_name='pkg-gourmet-with-updates.tgz', - loader=SvnLoaderTest(last_snp_rev=_LAST_SNP_REV)) + snapshot=_LAST_SNP_REV, + start_from_scratch=True) def test_load(self): """Load an existing repository from scratch yields same swh objects """ # when - self.loader.load(svn_url=self.svn_mirror_url, - destination_path=self.destination_path, - start_from_scratch=True) + self.loader.load() # then # we got the previous run's last revision (rev 6) @@ -360,16 +377,14 @@ } } super().setUp(archive_name='pkg-gourmet-with-updates.tgz', - loader=SvnLoaderTest(last_snp_rev=last_snp_rev)) + snapshot=last_snp_rev) def test_load(self): """Load from partial previous visit result in new changes """ # when - self.loader.load( - svn_url=self.svn_mirror_url, - destination_path=self.destination_path) + self.loader.load() # then # we got the previous run's last revision (rev 6) @@ -405,13 +420,6 @@ """ def setUp(self): - super().setUp(archive_name='pkg-gourmet-with-updates.tgz', - loader=SvnLoaderTest(last_snp_rev=_LAST_SNP_REV)) - - def test_load(self): - """Load known and partial repository should start from last visit - - """ previous_unfinished_revision = { 'id': hashutil.hash_to_bytes( 'a3a577948fdbda9d1061913b77a1588695eadb41'), @@ -427,12 +435,17 @@ ] } } + super().setUp(archive_name='pkg-gourmet-with-updates.tgz', + snapshot=_LAST_SNP_REV, + swh_revision=previous_unfinished_revision) + + def test_load(self): + """Load known and partial repository should start from last visit + + """ # when - self.loader.load( - svn_url=self.svn_mirror_url, - destination_path=self.destination_path, - swh_revision=previous_unfinished_revision) + self.loader.load() # then # we got the previous run's last revision (rev 6) @@ -488,13 +501,6 @@ } } } - super().setUp(archive_name='pkg-gourmet-with-updates.tgz', - loader=SvnLoaderTest(last_snp_rev=last_snp_rev)) - - def test_load(self): - """Load repository should yield revisions starting from last visit - - """ previous_unfinished_revision = { 'id': hashutil.hash_to_bytes( '4876cb10aec6f708f7466dddf547567b65f6c39c'), @@ -510,11 +516,16 @@ ] } } + super().setUp(archive_name='pkg-gourmet-with-updates.tgz', + snapshot=last_snp_rev, + swh_revision=previous_unfinished_revision) + + def test_load(self): + """Load repository should yield revisions starting from last visit + + """ # when - self.loader.load( - svn_url=self.svn_mirror_url, - destination_path=self.destination_path, - swh_revision=previous_unfinished_revision) + self.loader.load() # then # we got the previous run's last revision (rev 6) @@ -558,8 +569,7 @@ """ # when - self.loader.load(svn_url=self.svn_mirror_url, - destination_path=self.destination_path) + self.loader.load() expected_revisions = { '7da4975c363101b819756d33459f30a866d01b1b': 'f63637223ee0f7d4951ffd2d4d9547a4882c5d8b' # noqa @@ -589,8 +599,7 @@ """Load repo with mixed CRLF/LF endings (svn:eol-style:native) is ok """ - self.loader.load(svn_url=self.svn_mirror_url, - destination_path=self.destination_path) + self.loader.load() expected_revisions = { '9c6962eeb9164a636c374be700672355e34a98a7': '16aa6b6271f3456d4643999d234cf39fe3d0cc5a' # noqa @@ -612,19 +621,17 @@ """ def setUp(self): - super().setUp(archive_name='pkg-gourmet-with-external-id.tgz') + previous_unfinished_revision = None + super().setUp(archive_name='pkg-gourmet-with-external-id.tgz', + swh_revision=previous_unfinished_revision) def test_load(self): """Repository with svn:externals property, will stop raising an error """ - previous_unfinished_revision = None # when - self.loader.load( - svn_url=self.svn_mirror_url, - destination_path=self.destination_path, - swh_revision=previous_unfinished_revision) + self.loader.load() # then repositories holds 21 revisions, but the last commit # one holds an 'svn:externals' property which will make the @@ -673,20 +680,18 @@ """ def setUp(self): + previous_unfinished_revision = None super().setUp( - archive_name='pkg-gourmet-with-edge-case-links-and-files.tgz') + archive_name='pkg-gourmet-with-edge-case-links-and-files.tgz', + swh_revision=previous_unfinished_revision) def test_load(self): """File/Link removed prior to folder with same name creation is ok """ - previous_unfinished_revision = None # when - self.loader.load( - svn_url=self.svn_mirror_url, - destination_path=self.destination_path, - swh_revision=previous_unfinished_revision) + self.loader.load() # then repositories holds 14 revisions, but the last commit self.assertCountRevisions(19) @@ -738,9 +743,7 @@ """ # when - self.loader.load( - svn_url=self.svn_mirror_url, - destination_path=self.destination_path) + self.loader.load() # then repositories holds 14 revisions, but the last commit self.assertCountRevisions(21) @@ -790,27 +793,26 @@ and the base svn loader are the same. """ def setUp(self): - super().setUp(archive_name='pkg-gourmet.tgz', - loader=SvnLoaderTestFromRemoteDump()) + _LOADER_TEST_CONFIG['debug'] = True # to avoid cleanup in between load + super().setUp(archive_name='pkg-gourmet.tgz', type='remote') def test_load(self): """ Compare results of remote dump loader and base loader """ dump_loader = self.loader - dump_loader.load(svn_url=self.svn_mirror_url) + dump_loader.load() self.assertCountContents(19) self.assertCountDirectories(17) self.assertCountRevisions(6) self.assertCountSnapshots(1) - base_loader = SvnLoaderTest() - base_loader.load(svn_url=self.svn_mirror_url) + base_loader = SvnLoaderTest(self.svn_mirror_url) + base_loader.load() dump_storage_stat = dump_loader.storage.stat_counters() base_storage_stat = base_loader.storage.stat_counters() - self.assertEqual(dump_storage_stat, base_storage_stat) @@ -828,9 +830,7 @@ """ # when - self.loader.load( - svn_url=self.svn_mirror_url, - destination_path=self.destination_path) + self.loader.load() self.assertCountRevisions(7, '7 svn commits') self.assertCountReleases(0) diff --git a/swh/loader/svn/tests/test_task.py b/swh/loader/svn/tests/test_task.py new file mode 100644 --- /dev/null +++ b/swh/loader/svn/tests/test_task.py @@ -0,0 +1,50 @@ +# Copyright (C) 2019 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + + +def test_svn_loader(mocker, swh_app, celery_session_worker, swh_config): + mock_loader = mocker.patch('swh.loader.svn.loader.SvnLoader.load') + mock_loader.return_value = {'status': 'eventful'} + + res = swh_app.send_task( + 'swh.loader.svn.tasks.LoadSvnRepository', + (), dict(url='some-technical-url', origin_url='origin-url')) + assert res + res.wait() + assert res.successful() + + assert res.result == {'status': 'eventful'} + + +def test_svn_loader_from_dump( + mocker, swh_app, celery_session_worker, swh_config): + mock_loader = mocker.patch( + 'swh.loader.svn.loader.SvnLoaderFromDumpArchive.load') + mock_loader.return_value = {'status': 'eventful'} + + res = swh_app.send_task( + 'swh.loader.svn.tasks.MountAndLoadSvnRepository', + (), dict(url='some-url', archive_path='some-path')) + assert res + res.wait() + assert res.successful() + + assert res.result == {'status': 'eventful'} + + +def test_svn_loader_from_remote_dump( + mocker, swh_app, celery_session_worker, swh_config): + mock_loader = mocker.patch( + 'swh.loader.svn.loader.SvnLoaderFromRemoteDump.load') + mock_loader.return_value = {'status': 'eventful'} + + res = swh_app.send_task( + 'swh.loader.svn.tasks.DumpMountAndLoadSvnRepository', + (), dict(url='some-remote-dump-url', origin_url='origin-url')) + assert res + res.wait() + assert res.successful() + + assert res.result == {'status': 'eventful'}