diff --git a/swh/loader/svn/loader.py b/swh/loader/svn/loader.py --- a/swh/loader/svn/loader.py +++ b/swh/loader/svn/loader.py @@ -1,4 +1,4 @@ -# Copyright (C) 2015-2018 The Software Heritage developers +# Copyright (C) 2015-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -78,9 +78,12 @@ visit_type = 'svn' - def __init__(self): + def __init__(self, url, svn_url=None, visit_date=None, + destination_path=None, swh_revision=None, + start_from_scratch=False): super().__init__(logging_class='swh.loader.svn.SvnLoader') - self.origin_url = None + self.origin_url = url + self.svn_url = svn_url if svn_url else self.origin_url self.debug = self.config['debug'] self.last_seen_revision = None self.temp_directory = self.config['temp_directory'] @@ -100,6 +103,10 @@ self._last_revision = None self._visit_status = 'full' self._load_status = 'uneventful' + self.visit_date = visit_date + self.destination_path = destination_path + self.start_from_scratch = start_from_scratch + self.swh_revision = swh_revision def pre_cleanup(self): """Cleanup potential dangling files from prior runs (e.g. OOM killed @@ -444,27 +451,22 @@ yield _contents, _directories, swh_revision - def prepare_origin_visit(self, *, svn_url, visit_date=None, - origin_url=None, **kwargs): + def prepare_origin_visit(self, *args, **kwargs): self.origin = { - 'url': origin_url if origin_url else svn_url, - 'type': self.visit_type, + 'url': self.origin_url if self.origin_url else self.svn_url, } - self.visit_date = visit_date - def prepare(self, *, svn_url, destination_path=None, - swh_revision=None, start_from_scratch=False, **kwargs): - self.start_from_scratch = start_from_scratch - if swh_revision: - self.last_known_swh_revision = swh_revision + def prepare(self, *args, **kwargs): + if self.swh_revision: + self.last_known_swh_revision = self.swh_revision else: self.last_known_swh_revision = None self.latest_snapshot = self.swh_latest_snapshot_revision( self.origin_url, self.last_known_swh_revision) - if destination_path: - local_dirname = destination_path + if self.destination_path: + local_dirname = self.destination_path else: local_dirname = tempfile.mkdtemp( suffix='-%s' % os.getpid(), @@ -472,7 +474,7 @@ dir=self.temp_directory) self.svnrepo = self.get_svn_repo( - svn_url, local_dirname, self.origin_url) + self.svn_url, local_dirname, self.origin_url) try: revision_start, revision_end, revision_parents = self.start_from( self.last_known_swh_revision, self.start_from_scratch) @@ -584,26 +586,26 @@ an svn repository and load said repository. """ - def __init__(self, archive_path): - super().__init__() + def __init__(self, url, archive_path, + svn_url=None, destination_path=None, + swh_revision=None, start_from_scratch=None): + super().__init__(url, svn_url=svn_url, + destination_path=destination_path, + swh_revision=swh_revision, + start_from_scratch=start_from_scratch) + self.svn_url = svn_url self.archive_path = archive_path self.temp_dir = None self.repo_path = None - def prepare(self, *, svn_url, destination_path=None, - swh_revision=None, start_from_scratch=False, **kwargs): + def prepare(self, *args, **kwargs): self.log.info('Archive to mount and load %s' % self.archive_path) self.temp_dir, self.repo_path = init_svn_repo_from_archive_dump( self.archive_path, prefix=TEMPORARY_DIR_PREFIX_PATTERN, suffix='-%s' % os.getpid(), root_dir=self.temp_directory) - if not svn_url: - svn_url = 'file://%s' % self.repo_path - super().prepare(svn_url=svn_url, destination_path=destination_path, - swh_revision=swh_revision, - start_from_scratch=start_from_scratch, - **kwargs) + super().prepare(*args, **kwargs) def cleanup(self): super().cleanup() @@ -620,8 +622,12 @@ Create a subversion repository dump using the svnrdump utility, mount it locally and load the repository from it. """ - def __init__(self): - super().__init__() + def __init__(self, url, svn_url=None, destination_path=None, + swh_revision=None, start_from_scratch=False): + super().__init__(url, svn_url=svn_url, + destination_path=destination_path, + swh_revision=swh_revision, + start_from_scratch=start_from_scratch) self.temp_dir = tempfile.mkdtemp(dir=self.temp_directory) self.repo_path = None self.truncated_dump = False @@ -635,7 +641,7 @@ last_loaded_svn_rev = -1 try: origin = \ - self.storage.origin_get({'type': 'svn', 'url': svn_url}) + self.storage.origin_get({'url': svn_url}) last_swh_rev = \ self.swh_latest_snapshot_revision(origin['url'])['revision'] last_swh_rev_headers = \ @@ -723,15 +729,14 @@ raise Exception('An error occurred when running svnrdump and ' 'no exploitable dump file has been generated.') - def prepare(self, *, svn_url, destination_path=None, - swh_revision=None, start_from_scratch=False, **kwargs): + def prepare(self, *args, **kwargs): # First, check if previous revisions have been loaded for the # subversion origin and get the number of the last one - last_loaded_svn_rev = self.get_last_loaded_svn_rev(svn_url) + last_loaded_svn_rev = self.get_last_loaded_svn_rev(self.svn_url) # Then try to generate a dump file containing relevant svn revisions # to load, an exception will be thrown if something wrong happened - dump_path = self.dump_svn_revisions(svn_url, last_loaded_svn_rev) + dump_path = self.dump_svn_revisions(self.svn_url, last_loaded_svn_rev) # Finally, mount the dump and load the repository self.log.debug('Mounting dump file with "svnadmin load".') @@ -740,11 +745,8 @@ prefix=TEMPORARY_DIR_PREFIX_PATTERN, suffix='-%s' % os.getpid(), root_dir=self.temp_dir) - super().prepare(svn_url='file://%s' % self.repo_path, - destination_path=destination_path, - swh_revision=swh_revision, - start_from_scratch=start_from_scratch, - **kwargs) + self.svn_url = 'file://%s' % self.repo_path + super().prepare(*args, **kwargs) def cleanup(self): super().cleanup() diff --git a/swh/loader/svn/tasks.py b/swh/loader/svn/tasks.py --- a/swh/loader/svn/tasks.py +++ b/swh/loader/svn/tasks.py @@ -32,10 +32,9 @@ docstring """ - return SvnLoader().load( + return SvnLoader(origin_url).load( svn_url=svn_url, destination_path=destination_path, - origin_url=origin_url, swh_revision=swh_revision, visit_date=visit_date, start_from_scratch=start_from_scratch) @@ -49,9 +48,8 @@ 3. Clean up mounted svn repository archive """ - return SvnLoaderFromDumpArchive(archive_path).load( + return SvnLoaderFromDumpArchive(origin_url, archive_path).load( svn_url=None, - origin_url=origin_url, visit_date=visit_date, archive_path=archive_path, start_from_scratch=start_from_scratch) @@ -65,8 +63,7 @@ 3. Clean up mounted svn repository archive. """ - return SvnLoaderFromRemoteDump().load( + return SvnLoaderFromRemoteDump(origin_url).load( svn_url=svn_url, - origin_url=origin_url, visit_date=visit_date, start_from_scratch=start_from_scratch) diff --git a/swh/loader/svn/tests/test_loader.py b/swh/loader/svn/tests/test_loader.py --- a/swh/loader/svn/tests/test_loader.py +++ b/swh/loader/svn/tests/test_loader.py @@ -60,12 +60,14 @@ Load a new svn repository using the swh policy (so no update). """ - def __init__(self, last_snp_rev={}): - super().__init__() + def __init__(self, url, last_snp_rev={}, destination_path=None, + start_from_scratch=False, swh_revision=None): + super().__init__(url, destination_path=destination_path, + start_from_scratch=start_from_scratch, + swh_revision=swh_revision) self.origin = { 'id': 1, - 'url': '/dev/null', - 'type': 'svn', + 'url': url, } self.visit = { 'origin': 1, @@ -95,12 +97,34 @@ """ def setUp(self, archive_name='pkg-gourmet.tgz', filename='pkg-gourmet', - loader=None): + loader=None, snapshot=None, type='default', + start_from_scratch=False, swh_revision=None): super().setUp(archive_name=archive_name, filename=filename, prefix_tmp_folder_name='swh.loader.svn.', start_path=os.path.dirname(__file__)) self.svn_mirror_url = self.repo_url - self.loader = loader or SvnLoaderTest() + if type == 'default': + loader_test_class = SvnLoaderTest + else: + loader_test_class = SvnLoaderTestFromRemoteDump + + if loader: + self.loader = loader + elif snapshot: + self.loader = loader_test_class( + self.svn_mirror_url, + destination_path=self.destination_path, + start_from_scratch=start_from_scratch, + swh_revision=swh_revision, + last_snp_rev=snapshot, + ) + else: + self.loader = loader_test_class( + self.svn_mirror_url, + destination_path=self.destination_path, + start_from_scratch=start_from_scratch, + swh_revision=swh_revision + ) self.storage = self.loader.storage @@ -113,9 +137,7 @@ """ # when - self.loader.load( - svn_url=self.svn_mirror_url, - destination_path=self.destination_path) + self.loader.load() # then self.assertCountRevisions(6) @@ -172,16 +194,14 @@ """ def setUp(self): - super().setUp(loader=SvnLoaderTest(last_snp_rev=_LAST_SNP_REV)) + super().setUp(snapshot=_LAST_SNP_REV) def test_load(self): """Load a repository without new changes results in same snapshot """ # when - self.loader.load( - svn_url=self.svn_mirror_url, - destination_path=self.destination_path) + self.loader.load() # then @@ -212,15 +232,14 @@ hashutil.hash_to_bytes('badbadbadbadf708f7466dddf547567b65f6c39d') # the svn repository pkg-gourmet has been updated with changes super().setUp(archive_name='pkg-gourmet-with-updates.tgz', - loader=SvnLoaderTest(last_snp_rev=last_snp_rev)) + snapshot=last_snp_rev) def test_load(self): """Load known repository with history altered should do nothing """ # when - self.loader.load(svn_url=self.svn_mirror_url, - destination_path=self.destination_path) + self.loader.load() # then # we got the previous run's last revision (rev 6) @@ -245,15 +264,14 @@ def setUp(self): # the svn repository pkg-gourmet has been updated with changes super().setUp(archive_name='pkg-gourmet-with-updates.tgz', - loader=SvnLoaderTest(last_snp_rev=_LAST_SNP_REV)) + snapshot=_LAST_SNP_REV) def test_process_repository(self): """Process updated repository should yield new objects """ # when - self.loader.load(svn_url=self.svn_mirror_url, - destination_path=self.destination_path) + self.loader.load() # then # we got the previous run's last revision (rev 6) @@ -292,16 +310,15 @@ def setUp(self): # the svn repository pkg-gourmet has been updated with changes super().setUp(archive_name='pkg-gourmet-with-updates.tgz', - loader=SvnLoaderTest(last_snp_rev=_LAST_SNP_REV)) + snapshot=_LAST_SNP_REV, + start_from_scratch=True) def test_load(self): """Load an existing repository from scratch yields same swh objects """ # when - self.loader.load(svn_url=self.svn_mirror_url, - destination_path=self.destination_path, - start_from_scratch=True) + self.loader.load() # then # we got the previous run's last revision (rev 6) @@ -360,16 +377,14 @@ } } super().setUp(archive_name='pkg-gourmet-with-updates.tgz', - loader=SvnLoaderTest(last_snp_rev=last_snp_rev)) + snapshot=last_snp_rev) def test_load(self): """Load from partial previous visit result in new changes """ # when - self.loader.load( - svn_url=self.svn_mirror_url, - destination_path=self.destination_path) + self.loader.load() # then # we got the previous run's last revision (rev 6) @@ -405,13 +420,6 @@ """ def setUp(self): - super().setUp(archive_name='pkg-gourmet-with-updates.tgz', - loader=SvnLoaderTest(last_snp_rev=_LAST_SNP_REV)) - - def test_load(self): - """Load known and partial repository should start from last visit - - """ previous_unfinished_revision = { 'id': hashutil.hash_to_bytes( 'a3a577948fdbda9d1061913b77a1588695eadb41'), @@ -427,12 +435,17 @@ ] } } + super().setUp(archive_name='pkg-gourmet-with-updates.tgz', + snapshot=_LAST_SNP_REV, + swh_revision=previous_unfinished_revision) + + def test_load(self): + """Load known and partial repository should start from last visit + + """ # when - self.loader.load( - svn_url=self.svn_mirror_url, - destination_path=self.destination_path, - swh_revision=previous_unfinished_revision) + self.loader.load() # then # we got the previous run's last revision (rev 6) @@ -488,13 +501,6 @@ } } } - super().setUp(archive_name='pkg-gourmet-with-updates.tgz', - loader=SvnLoaderTest(last_snp_rev=last_snp_rev)) - - def test_load(self): - """Load repository should yield revisions starting from last visit - - """ previous_unfinished_revision = { 'id': hashutil.hash_to_bytes( '4876cb10aec6f708f7466dddf547567b65f6c39c'), @@ -510,11 +516,16 @@ ] } } + super().setUp(archive_name='pkg-gourmet-with-updates.tgz', + snapshot=last_snp_rev, + swh_revision=previous_unfinished_revision) + + def test_load(self): + """Load repository should yield revisions starting from last visit + + """ # when - self.loader.load( - svn_url=self.svn_mirror_url, - destination_path=self.destination_path, - swh_revision=previous_unfinished_revision) + self.loader.load() # then # we got the previous run's last revision (rev 6) @@ -558,8 +569,7 @@ """ # when - self.loader.load(svn_url=self.svn_mirror_url, - destination_path=self.destination_path) + self.loader.load() expected_revisions = { '7da4975c363101b819756d33459f30a866d01b1b': 'f63637223ee0f7d4951ffd2d4d9547a4882c5d8b' # noqa @@ -589,8 +599,7 @@ """Load repo with mixed CRLF/LF endings (svn:eol-style:native) is ok """ - self.loader.load(svn_url=self.svn_mirror_url, - destination_path=self.destination_path) + self.loader.load() expected_revisions = { '9c6962eeb9164a636c374be700672355e34a98a7': '16aa6b6271f3456d4643999d234cf39fe3d0cc5a' # noqa @@ -612,19 +621,17 @@ """ def setUp(self): - super().setUp(archive_name='pkg-gourmet-with-external-id.tgz') + previous_unfinished_revision = None + super().setUp(archive_name='pkg-gourmet-with-external-id.tgz', + swh_revision=previous_unfinished_revision) def test_load(self): """Repository with svn:externals property, will stop raising an error """ - previous_unfinished_revision = None # when - self.loader.load( - svn_url=self.svn_mirror_url, - destination_path=self.destination_path, - swh_revision=previous_unfinished_revision) + self.loader.load() # then repositories holds 21 revisions, but the last commit # one holds an 'svn:externals' property which will make the @@ -673,20 +680,18 @@ """ def setUp(self): + previous_unfinished_revision = None super().setUp( - archive_name='pkg-gourmet-with-edge-case-links-and-files.tgz') + archive_name='pkg-gourmet-with-edge-case-links-and-files.tgz', + swh_revision=previous_unfinished_revision) def test_load(self): """File/Link removed prior to folder with same name creation is ok """ - previous_unfinished_revision = None # when - self.loader.load( - svn_url=self.svn_mirror_url, - destination_path=self.destination_path, - swh_revision=previous_unfinished_revision) + self.loader.load() # then repositories holds 14 revisions, but the last commit self.assertCountRevisions(19) @@ -738,9 +743,7 @@ """ # when - self.loader.load( - svn_url=self.svn_mirror_url, - destination_path=self.destination_path) + self.loader.load() # then repositories holds 14 revisions, but the last commit self.assertCountRevisions(21) @@ -790,27 +793,26 @@ and the base svn loader are the same. """ def setUp(self): - super().setUp(archive_name='pkg-gourmet.tgz', - loader=SvnLoaderTestFromRemoteDump()) + _LOADER_TEST_CONFIG['debug'] = True # to avoid cleanup in between load + super().setUp(archive_name='pkg-gourmet.tgz', type='remote') def test_load(self): """ Compare results of remote dump loader and base loader """ dump_loader = self.loader - dump_loader.load(svn_url=self.svn_mirror_url) + dump_loader.load() self.assertCountContents(19) self.assertCountDirectories(17) self.assertCountRevisions(6) self.assertCountSnapshots(1) - base_loader = SvnLoaderTest() - base_loader.load(svn_url=self.svn_mirror_url) + base_loader = SvnLoaderTest(self.svn_mirror_url) + base_loader.load() dump_storage_stat = dump_loader.storage.stat_counters() base_storage_stat = base_loader.storage.stat_counters() - self.assertEqual(dump_storage_stat, base_storage_stat) @@ -828,9 +830,7 @@ """ # when - self.loader.load( - svn_url=self.svn_mirror_url, - destination_path=self.destination_path) + self.loader.load() self.assertCountRevisions(7, '7 svn commits') self.assertCountReleases(0)