diff --git a/swh/loader/mercurial/archive_extract.py b/swh/loader/mercurial/archive_extract.py --- a/swh/loader/mercurial/archive_extract.py +++ b/swh/loader/mercurial/archive_extract.py @@ -28,6 +28,9 @@ """ logstr = 'From %s - ' % source if log and source else '' + if dir and not os.path.exists(dir): + os.makedirs(dir, exist_ok=True) + archive_base = os.path.basename(archive) if archive_base[0] == '.': package = '.' + archive_base.split('.')[1] diff --git a/swh/loader/mercurial/loader.py b/swh/loader/mercurial/loader.py --- a/swh/loader/mercurial/loader.py +++ b/swh/loader/mercurial/loader.py @@ -58,7 +58,7 @@ ADDITIONAL_CONFIG = { 'bundle_filename': ('str', 'HG20_none_bundle'), - 'reduce_effort': ('bool', True), # default: Try to be smart about time + 'reduce_effort': ('bool', False), 'temp_directory': ('str', '/tmp'), 'cache1_size': ('int', 800*1024*1024), 'cache2_size': ('int', 800*1024*1024), diff --git a/swh/loader/mercurial/tests/common.py b/swh/loader/mercurial/tests/common.py new file mode 100644 --- /dev/null +++ b/swh/loader/mercurial/tests/common.py @@ -0,0 +1,63 @@ +# Copyright (C) 2018 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from swh.loader.mercurial.loader import ( + HgBundle20Loader, HgArchiveBundle20Loader +) + + +_LOADER_TEST_CONFIG = { + 'bundle_filename': 'HG20_none_bundle', + 'cache1_size': 838860800, + 'cache2_size': 838860800, + 'content_packet_size': 100000, + 'content_packet_size_bytes': 1073741824, + 'content_size_limit': 104857600, + 'directory_packet_size': 25000, + 'log_db': 'dbname=softwareheritage-log', + 'occurrence_packet_size': 100000, + 'reduce_effort': False, + 'release_packet_size': 100000, + 'revision_packet_size': 100000, + 'save_data': False, + 'save_data_path': '', + 'send_contents': True, + 'send_directories': True, + 'send_occurrences': True, + 'send_releases': True, + 'send_revisions': True, + 'send_snapshot': True, + 'storage': {'args': {}, 'cls': 'memory'}, + 'temp_directory': '/tmp/swh.loader.mercurial' +} + + +class BaseHgLoaderMemoryStorage: + """The base mercurial loader to test. + + Mixin behavior changed to: + - use an in-memory storage + - not use the default configuration loading mechanism + + At the end of the tests, you can make sure you have the rights + objects. + + """ + def __init__(self): + super().__init__() + self.origin_id = 1 + self.visit = 1 + + def parse_config_file(self, *args, **kwargs): + return _LOADER_TEST_CONFIG + + +class HgLoaderMemoryStorage(BaseHgLoaderMemoryStorage, HgBundle20Loader): + pass + + +class HgArchiveLoaderMemoryStorage(BaseHgLoaderMemoryStorage, + HgArchiveBundle20Loader): + pass diff --git a/swh/loader/mercurial/tests/test_converters.py b/swh/loader/mercurial/tests/test_converters.py new file mode 100644 --- /dev/null +++ b/swh/loader/mercurial/tests/test_converters.py @@ -0,0 +1,76 @@ +# Copyright (C) 2018 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import unittest + +from swh.loader.mercurial import converters + + +class TestParseAuthorConverters(unittest.TestCase): + def test_parse_author_no_email(self): + self.assertIsNone(converters.parse_author(None)) + + def test_parse_author_no_bracket(self): + actual_author = converters.parse_author(b'someone') + + self.assertEqual(actual_author, { + 'name': None, + 'email': None, + 'fullname': b'someone' + }) + + def test_parse_author_2(self): + actual_author = converters.parse_author(b'something wicked') + + self.assertEqual(actual_author, { + 'name': None, + 'email': None, + 'fullname': b'something >wicked' + }) + + def test_parse_author_4(self): + actual_author = converters.parse_author(b'something <') + + self.assertEqual(actual_author, { + 'name': b'something', + 'email': None, + 'fullname': b'something <' + }) + + def test_parse_author_5(self): + actual_author = converters.parse_author(b'') + + self.assertEqual(actual_author, { + 'name': None, + 'email': b'only', + 'fullname': b'' + }) + + def test_parse_author_6(self): + actual_author = converters.parse_author(b' ') + + self.assertEqual(actual_author, { + 'name': b' ', + 'email': b'something', + 'fullname': b' ' + }) + + def test_parse_author_normal(self): + actual_author = converters.parse_author(b'someone ') + + self.assertEqual(actual_author, { + 'name': b'someone', + 'email': b'awesome', + 'fullname': b'someone ' + }) diff --git a/swh/loader/mercurial/tests/test_loader.py b/swh/loader/mercurial/tests/test_loader.py --- a/swh/loader/mercurial/tests/test_loader.py +++ b/swh/loader/mercurial/tests/test_loader.py @@ -5,54 +5,10 @@ import os -from swh.loader.core.tests import BaseLoaderTest -from swh.loader.mercurial.loader import HgBundle20Loader - -_LOADER_TEST_CONFIG = { - 'bundle_filename': 'HG20_none_bundle', - 'cache1_size': 838860800, - 'cache2_size': 838860800, - 'content_packet_size': 100000, - 'content_packet_size_bytes': 1073741824, - 'content_size_limit': 104857600, - 'directory_packet_size': 25000, - 'log_db': 'dbname=softwareheritage-log', - 'occurrence_packet_size': 100000, - 'reduce_effort': False, - 'release_packet_size': 100000, - 'revision_packet_size': 100000, - 'save_data': False, - 'save_data_path': '', - 'send_contents': True, - 'send_directories': True, - 'send_occurrences': True, - 'send_releases': True, - 'send_revisions': True, - 'send_snapshot': True, - 'storage': {'args': {}, 'cls': 'memory'}, - 'temp_directory': '/tmp/swh.loader.mercurial' -} - - -class HgLoaderMemoryStorage(HgBundle20Loader): - """The mercurial loader to test. - - Its behavior has been changed to: - - not use any persistence (no storage, or for now a passthrough - storage with no filtering) - - not use the default configuration loading - - At the end of the tests, you can make sure you have the rights - objects. - - """ - def __init__(self): - super().__init__() - self.origin_id = 1 - self.visit = 1 +from unittest.mock import patch - def parse_config_file(self, *args, **kwargs): - return _LOADER_TEST_CONFIG +from swh.loader.core.tests import BaseLoaderTest +from .common import HgLoaderMemoryStorage, HgArchiveLoaderMemoryStorage class BaseHgLoaderTest(BaseLoaderTest): @@ -62,11 +18,14 @@ This sets up """ - def setUp(self, archive_name='the-sandbox.tgz', filename='the-sandbox'): + def setUp(self, loader=HgLoaderMemoryStorage, + archive_name='the-sandbox.tgz', filename='the-sandbox', + uncompress_archive=True): super().setUp(archive_name=archive_name, filename=filename, prefix_tmp_folder_name='swh.loader.mercurial.', - start_path=os.path.dirname(__file__)) - self.loader = HgLoaderMemoryStorage() + start_path=os.path.dirname(__file__), + uncompress_archive=uncompress_archive) + self.loader = loader() self.storage = self.loader.storage @@ -74,7 +33,6 @@ """Load a mercurial repository without release """ - def test_load(self): """Load a repository with multiple branches results in 1 snapshot @@ -185,23 +143,8 @@ self.assertEqual(self.loader.visit_status(), 'full') -class LoaderITest2(BaseHgLoaderTest): - """Load a mercurial repository with release - - """ - def setUp(self): - super().setUp(archive_name='hello.tgz', filename='hello') - - def test_load(self): - """Load a repository with tags results in 1 snapshot - - """ - # when - self.loader.load( - origin_url=self.repo_url, - visit_date='2016-05-03 15:16:32+00', - directory=self.destination_path) - +class CommonHgLoaderData: + def assert_data_ok(self): # then self.assertCountContents(3) self.assertCountDirectories(3) @@ -245,3 +188,62 @@ self.assertSnapshotEqual(expected_snapshot) self.assertEqual(self.loader.load_status(), {'status': 'eventful'}) self.assertEqual(self.loader.visit_status(), 'full') + + +class LoaderITest2(BaseHgLoaderTest, CommonHgLoaderData): + """Load a mercurial repository with release + + """ + def setUp(self): + super().setUp(archive_name='hello.tgz', filename='hello') + + def test_load(self): + """Load a repository with tags results in 1 snapshot + + """ + # when + self.loader.load( + origin_url=self.repo_url, + visit_date='2016-05-03 15:16:32+00', + directory=self.destination_path) + + self.assert_data_ok() + + +class ArchiveLoaderITest(BaseHgLoaderTest, CommonHgLoaderData): + """Load a mercurial repository archive with release + + """ + def setUp(self): + super().setUp(loader=HgArchiveLoaderMemoryStorage, + archive_name='hello.tgz', filename='hello', + uncompress_archive=False) + + def test_load(self): + """Load a mercurial repository archive with tags results in 1 snapshot + + """ + # when + self.loader.load( + origin_url=self.repo_url, + visit_date='2016-05-03 15:16:32+00', + archive_path=self.destination_path) + + self.assert_data_ok() + + @patch('swh.loader.mercurial.archive_extract.patoolib') + def test_load_with_failure(self, mock_patoo): + mock_patoo.side_effect = ValueError + + # when + r = self.loader.load( + origin_url=self.repo_url, + visit_date='2016-05-03 15:16:32+00', + archive_path=self.destination_path) + + self.assertEqual(r, {'status': 'failed'}) + self.assertCountContents(0) + self.assertCountDirectories(0) + self.assertCountRevisions(0) + self.assertCountReleases(0) + self.assertCountSnapshots(0) diff --git a/swh/loader/mercurial/loader_verifier.py b/swh/loader/mercurial/tests/test_loader_verifier.py rename from swh/loader/mercurial/loader_verifier.py rename to swh/loader/mercurial/tests/test_loader_verifier.py --- a/swh/loader/mercurial/loader_verifier.py +++ b/swh/loader/mercurial/tests/test_loader_verifier.py @@ -3,7 +3,6 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -import click import code import datetime import hglib @@ -14,21 +13,28 @@ import time from binascii import hexlify, unhexlify - from swh.model.hashutil import MultiHash +from ..converters import PRIMARY_ALGO as ALGO +from ..objects import SimpleTree + +from swh.loader.core.tests import BaseLoaderTest + +from .common import HgLoaderMemoryStorage -from .loader import HgBundle20Loader -from .converters import PRIMARY_ALGO as ALGO -from .objects import SimpleTree +class HgLoaderValidater: + """Loader validater + + """ + def __init__(self, loader): + self.loader = loader -class HgLoaderValidater(HgBundle20Loader): def generate_all_blobs(self, validate=True, frequency=1): logging.debug('GENERATING BLOBS') i = 0 start = time.time() u = set() - for blob, node_info in self.br.yield_all_blobs(): + for blob, node_info in self.loader.br.yield_all_blobs(): filename = node_info[0] header = node_info[2] i += 1 @@ -83,7 +89,7 @@ start = time.time() validated = 0 - for header, tree, new_dirs in self.load_directories(): + for header, tree, new_dirs in self.loader.load_directories(): if validate and (c >= validated) and (random.random() < frequency): self.validate_tree(tree, header, c) @@ -138,11 +144,6 @@ tree_dirs.sort(key=so1) base_dirs.sort(key=so1) - # for i in range(len(tree_dirs)): - # if tree_dirs[i] != base_dirs[i]: - # logging.debug(i) - # code.interact(local=dict(globals(), **locals())) - logging.debug('Program will quit after your next Ctrl-D') code.interact(local=dict(globals(), **locals())) quit() @@ -163,15 +164,14 @@ logging.debug('ELAPSED: %s' % (time.time()-start)) def runtest(self, hgdir, validate_blobs=False, validate_trees=False, - frequency=1.0, test_iterative=False): - """HgLoaderValidater().runtest('/home/avi/SWH/mozilla-unified') + frequency=1.0): + """loader = HgLoaderMemoryStorage(0 + HgLoaderValidater(loader).runtest('/home/avi/SWH/mozilla-unified') """ self.origin_id = 'test' dt = datetime.datetime.now(tz=datetime.timezone.utc) - if test_iterative: - dt = dt - datetime.timedelta(10) hgrepo = None if (hgdir.lower().startswith('http:') @@ -182,67 +182,86 @@ try: logging.debug('preparing') - self.prepare(origin_url=hgrepo, visit_date=dt, directory=hgdir) + self.loader.prepare( + origin_url=hgrepo, visit_date=dt, directory=hgdir) self.file_node_to_hash = {} - # self.generate_all_blobs(validate=validate_blobs, - # frequency=frequency) - - # self.generate_all_trees(validate=validate_trees, frequency=frequency) - # self.generate_all_commits() logging.debug('getting contents') cs = 0 - for c in self.get_contents(): + for c in self.loader.get_contents(): cs += 1 pass logging.debug('getting directories') ds = 0 - for d in self.get_directories(): + for d in self.loader.get_directories(): ds += 1 pass revs = 0 logging.debug('getting revisions') - for rev in self.get_revisions(): + for rev in self.loader.get_revisions(): revs += 1 pass logging.debug('getting releases') rels = 0 - for rel in self.get_releases(): + for rel in self.loader.get_releases(): rels += 1 logging.debug(rel) self.visit = 'foo' + snps = 0 logging.debug('getting snapshot') - o = self.get_snapshot() + o = self.loader.get_snapshot() logging.debug('Snapshot: %s' % o) + if o: + snps += 1 finally: - self.cleanup() - - logging.info('final count: cs %s ds %s revs %s rels %s' % ( - cs, ds, revs, rels)) - - -@click.command() -@click.option('--verbose', is_flag=True, default=False) -@click.option('--validate-frequency', default=0.001, type=click.FLOAT) -@click.option('--test-iterative', default=False, type=click.BOOL) -@click.argument('repository-url', required=1) -def main(verbose, validate_frequency, test_iterative, repository_url): - logging.basicConfig(level=logging.DEBUG if verbose else logging.INFO) - while repository_url[-1] == '/': - repository_url = repository_url[:-1] - - HgLoaderValidater().runtest( - repository_url, - validate_blobs=True, validate_trees=True, - frequency=validate_frequency, - test_iterative=test_iterative) - - -if __name__ == '__main__': - main() + self.loader.cleanup() + + return cs, ds, revs, rels, snps + + +class BaseLoaderVerifierTest(BaseLoaderTest): + def setUp(self, archive_name='the-sandbox.tgz', filename='the-sandbox'): + super().setUp(archive_name=archive_name, filename=filename, + prefix_tmp_folder_name='swh.loader.mercurial.', + start_path=os.path.dirname(__file__)) + loader = HgLoaderMemoryStorage() + self.validator = HgLoaderValidater(loader) + + +class LoaderVerifierTest1(BaseLoaderVerifierTest): + def test_data(self): + cs, ds, revs, rels, snps = self.validator.runtest( + self.destination_path, + validate_blobs=True, + validate_trees=True, + frequency=0.001) + + self.assertEqual(cs, 2) + self.assertEqual(ds, 3) + self.assertEqual(revs, 58) + self.assertEqual(rels, 0) + self.assertEqual(snps, 1) + + +class LoaderVerifierTest2(BaseLoaderVerifierTest): + def setUp(self, archive_name='hello.tgz', filename='hello'): + super().setUp(archive_name=archive_name, filename=filename) + + def test_data(self): + cs, ds, revs, rels, snps = self.validator.runtest( + self.destination_path, + validate_blobs=True, + validate_trees=True, + frequency=0.001) + + self.assertEqual(cs, 3) + self.assertEqual(ds, 3) + self.assertEqual(rels, 1) + self.assertEqual(revs, 3) + self.assertEqual(snps, 1) diff --git a/swh/loader/mercurial/tests/test_tasks.py b/swh/loader/mercurial/tests/test_tasks.py new file mode 100644 --- /dev/null +++ b/swh/loader/mercurial/tests/test_tasks.py @@ -0,0 +1,53 @@ +# Copyright (C) 2018 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import unittest +from unittest.mock import patch + +from swh.loader.mercurial.tasks import LoadMercurial, LoadArchiveMercurial + + +class TestTasks(unittest.TestCase): + def test_check_task_name(self): + task = LoadMercurial() + self.assertEqual(task.task_queue, 'swh_loader_mercurial') + + @patch('swh.loader.mercurial.loader.HgBundle20Loader.load') + def test_task(self, mock_loader): + mock_loader.return_value = {'status': 'eventful'} + task = LoadMercurial() + + # given + actual_result = task.run_task( + origin_url='origin_url', visit_date='now', directory='/some/repo') + + self.assertEqual(actual_result, {'status': 'eventful'}) + + mock_loader.assert_called_once_with( + origin_url='origin_url', visit_date='now', directory='/some/repo') + + +class TestTasks2(unittest.TestCase): + def test_check_task_name(self): + task = LoadArchiveMercurial() + self.assertEqual(task.task_queue, 'swh_loader_mercurial_archive') + + @patch('swh.loader.mercurial.loader.HgArchiveBundle20Loader.load') + def test_task(self, mock_loader): + mock_loader.return_value = {'status': 'uneventful'} + task = LoadArchiveMercurial() + + # given + actual_result = task.run_task( + origin_url='another_url', + archive_path='/some/tar.tgz', + visit_date='now') + + self.assertEqual(actual_result, {'status': 'uneventful'}) + + mock_loader.assert_called_once_with( + origin_url='another_url', + archive_path='/some/tar.tgz', + visit_date='now')