diff --git a/swh/loader/git/tests/__init__.py b/swh/loader/git/tests/__init__.py index e69de29..a07e188 100644 --- a/swh/loader/git/tests/__init__.py +++ b/swh/loader/git/tests/__init__.py @@ -0,0 +1,21 @@ +TEST_LOADER_CONFIG = { + 'storage': { + 'cls': 'memory', + 'args': { + } + }, + 'send_contents': True, + 'send_directories': True, + 'send_revisions': True, + 'send_releases': True, + 'send_snapshot': True, + + 'content_size_limit': 100 * 1024 * 1024, + 'content_packet_size': 10, + 'content_packet_size_bytes': 100 * 1024 * 1024, + 'directory_packet_size': 10, + 'revision_packet_size': 10, + 'release_packet_size': 10, + + 'save_data': False, +} diff --git a/swh/loader/git/tests/test_loader.py b/swh/loader/git/tests/test_loader.py index 279998e..1799492 100644 --- a/swh/loader/git/tests/test_loader.py +++ b/swh/loader/git/tests/test_loader.py @@ -1,374 +1,287 @@ # Copyright (C) 2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os.path import zipfile import tempfile import subprocess from swh.loader.git.loader import GitLoader, GitLoaderFromArchive -from swh.loader.core.tests import BaseLoaderTest, LoaderNoStorage -from swh.model.hashutil import hash_to_bytes, hash_to_bytehex +from swh.loader.core.tests import BaseLoaderTest +from . import TEST_LOADER_CONFIG -class MockStorage0: - """The storage's state before anything is added.""" - def snapshot_get_latest(self, origin_id): - return None - def content_missing(self, contents, key_hash='sha1'): - return [c[key_hash] for c in contents] - - def directory_missing(self, directories): - return directories - - def revision_missing(self, revisions): - return revisions - - def object_find_by_sha1_git(self, ids): - return {} +class GitLoaderFromArchive(GitLoaderFromArchive): + def project_name_from_archive(self, archive_path): + # We don't want the project name to be 'resources'. + return 'testrepo' CONTENT1 = { '33ab5639bfd8e7b95eb1d8d0b87781d4ffea4d5d', # README v1 '349c4ff7d21f1ec0eda26f3d9284c293e3425417', # README v2 '799c11e348d39f1704022b8354502e2f81f3c037', # file1.txt '4bdb40dfd6ec75cb730e678b5d7786e30170c5fb', # file2.txt } SNAPSHOT_ID = 'bdf3b06d6017e0d9ad6447a73da6ff1ae9efb8f0' SNAPSHOT1 = { 'id': SNAPSHOT_ID, 'branches': { 'HEAD': { 'target': '2f01f5ca7e391a2f08905990277faf81e709a649', 'target_type': 'revision', }, 'refs/heads/master': { 'target': '2f01f5ca7e391a2f08905990277faf81e709a649', 'target_type': 'revision', }, 'refs/heads/branch1': { 'target': 'b0a77609903f767a2fd3d769904ef9ef68468b87', 'target_type': 'revision', }, 'refs/heads/branch2': { 'target': 'bd746cd1913721b269b395a56a97baf6755151c2', 'target_type': 'revision', }, 'refs/tags/branch2-after-delete': { 'target': 'bd746cd1913721b269b395a56a97baf6755151c2', 'target_type': 'revision', }, 'refs/tags/branch2-before-delete': { 'target': '1135e94ccf73b5f9bd6ef07b3fa2c5cc60bba69b', 'target_type': 'revision', }, }, } -SUBDIR_HASH = hash_to_bytes('d53f143d5f3aadb278aad60c4e9a17945a2d68de') - # directory hashes obtained with: # gco b6f40292c4e94a8f7e7b4aff50e6c7429ab98e2a # swh-hashtree --ignore '.git' --path . # gco 2f01f5ca7e391a2f08905990277faf81e709a649 # swh-hashtree --ignore '.git' --path . # gco bcdc5ebfde1a3cd6c96e0c2ea4eed19c13208777 # swh-hashtree --ignore '.git' --path . # gco 1135e94ccf73b5f9bd6ef07b3fa2c5cc60bba69b # swh-hashtree --ignore '.git' --path . # gco 79f65ac75f79dda6ff03d66e1242702ab67fb51c # swh-hashtree --ignore '.git' --path . # gco b0a77609903f767a2fd3d769904ef9ef68468b87 # swh-hashtree --ignore '.git' --path . # gco bd746cd1913721b269b395a56a97baf6755151c2 # swh-hashtree --ignore '.git' --path . REVISIONS1 = { 'b6f40292c4e94a8f7e7b4aff50e6c7429ab98e2a': '40dbdf55dfd4065422462cc74a949254aefa972e', '2f01f5ca7e391a2f08905990277faf81e709a649': 'e1d0d894835f91a0f887a4bc8b16f81feefdfbd5', 'bcdc5ebfde1a3cd6c96e0c2ea4eed19c13208777': 'b43724545b4759244bb54be053c690649161411c', '1135e94ccf73b5f9bd6ef07b3fa2c5cc60bba69b': 'fbf70528223d263661b5ad4b80f26caf3860eb8e', '79f65ac75f79dda6ff03d66e1242702ab67fb51c': '5df34ec74d6f69072d9a0a6677d8efbed9b12e60', 'b0a77609903f767a2fd3d769904ef9ef68468b87': '9ca0c7d6ffa3f9f0de59fd7912e08f11308a1338', 'bd746cd1913721b269b395a56a97baf6755151c2': 'e1d0d894835f91a0f887a4bc8b16f81feefdfbd5', } -class MockStorage1: - """The storage's state after the first snapshot is loaded.""" - def snapshot_get_latest(self, origin_id): - # The following line reencodes SNAPSHOT1 from the format expected - # by assertSnapshotOk to the one that Storage.snapshot_get_latest - # returns. - return { - 'id': hash_to_bytes(SNAPSHOT1['id']), - 'branches': { - branch_name.encode(): { - 'target': hash_to_bytes(branch['target']), - 'target_type': branch['target_type']} - for (branch_name, branch) - in SNAPSHOT1['branches'].items()}} - - def content_missing(self, contents, key_hash='sha1'): - return map(hash_to_bytes, - {c[key_hash] for c in contents} - CONTENT1) - - def directory_missing(self, directories): - assert all(isinstance(d, bytes) for d in directories) - return (set(directories) - - set(map(hash_to_bytes, REVISIONS1)) - - {hash_to_bytes(SUBDIR_HASH)}) - - def revision_missing(self, revisions): - assert all(isinstance(r, bytes) for r in revisions) - return list(set(revisions) - set(map(hash_to_bytes, REVISIONS1))) - - def object_find_by_sha1_git(self, ids): - res = {} - for id_ in ids: - found = [] - decoded_id = hash_to_bytehex(id_) - if decoded_id in REVISIONS1: - found.append({ - 'sha1_git': id_, - 'type': 'revision', - 'id': id_, - 'object_id': 42, - }) - elif decoded_id in REVISIONS1.values(): - found.append({ - 'sha1_git': id_, - 'type': 'directory', - 'id': id_, - 'object_id': 42, - }) - elif decoded_id == SUBDIR_HASH: - found.append({ - 'sha1_git': id_, - 'type': 'directory', - 'id': id_, - 'object_id': 42, - }) - elif decoded_id in CONTENT1: - found.append({ - 'sha1_git': id_, - 'type': 'content', - 'id': id_, - 'object_id': 42, - }) - res[id_] = found - return res - - -class LoaderNoStorageMixin(LoaderNoStorage): - def __init__(self): - super().__init__() - self.origin_id = 1 - self.visit = 1 - self.storage = MockStorage0() - - -class GitLoaderNoStorage(LoaderNoStorageMixin, GitLoader): - pass - - -class GitLoaderFromArchiveNoStorage(LoaderNoStorageMixin, - GitLoaderFromArchive): - def project_name_from_archive(self, archive_path): - # We don't want the project name to be 'resources'. - return 'testrepo' - - class BaseGitLoaderTest(BaseLoaderTest): def setUp(self, archive_name, uncompress_archive, filename='testrepo'): super().setUp(archive_name=archive_name, filename=filename, prefix_tmp_folder_name='swh.loader.git.', start_path=os.path.dirname(__file__), uncompress_archive=uncompress_archive) +class TestGitLoader(GitLoader): + def parse_config_file(self, *args, **kwargs): + return TEST_LOADER_CONFIG + + class BaseDirGitLoaderTest(BaseGitLoaderTest): """Mixin base loader test to prepare the git repository to uncompress, load and test the results. This sets up """ def setUp(self): super().setUp('testrepo.tgz', True) - self.loader = GitLoaderNoStorage() + self.loader = TestGitLoader() + self.storage = self.loader.storage def load(self): return self.loader.load( origin_url=self.repo_url, visit_date='2016-05-03 15:16:32+00', directory=self.destination_path) +class TestGitLoaderFromArchive(GitLoaderFromArchive): + def parse_config_file(self, *args, **kwargs): + return TEST_LOADER_CONFIG + + class BaseZipGitLoaderTest(BaseGitLoaderTest): """Mixin base loader test to prepare the git repository to uncompress, load and test the results. This sets up """ def setUp(self): super().setUp('testrepo.tgz', True) self._setup_zip() - self.loader = GitLoaderFromArchiveNoStorage() + self.loader = TestGitLoaderFromArchive() + self.storage = self.loader.storage def _setup_zip(self): self._zip_file = tempfile.NamedTemporaryFile('ab', suffix='.zip') dest_dir = os.path.normpath(self.destination_path) + '/' with zipfile.ZipFile(self._zip_file, 'a') as zip_writer: for root, dirs, files in os.walk(dest_dir): assert root.startswith(dest_dir) relative_root = os.path.join( 'testrepo', root[len(dest_dir):]) for file_ in files: zip_writer.write( filename=os.path.join(root, file_), arcname=os.path.join(relative_root, file_)) self.destination_path = self._zip_file.name self.tmp_root_path = None self.repo_url = 'file://' + self.destination_path def tearDown(self): self._zip_file.close() super().tearDown() def load(self): return self.loader.load( origin_url=self.repo_url, visit_date='2016-05-03 15:16:32+00', archive_path=self.destination_path) class GitLoaderTests: """Common tests for all git loaders.""" def test_load(self): """Loads a simple repository (made available by `setUp()`), and checks everything was added in the storage.""" res = self.load() self.assertEqual(res['status'], 'eventful', res) - self.assertCountContents(4) # two README, file1, file2 + self.assertContentsContain(CONTENT1) self.assertCountDirectories(7) self.assertCountReleases(0) # FIXME: why not 2? self.assertCountRevisions(7) self.assertCountSnapshots(1) - self.assertRevisionsOk(REVISIONS1) + self.assertRevisionsContain(REVISIONS1) - self.assertSnapshotOk(SNAPSHOT1) + self.assertSnapshotEqual(SNAPSHOT1) self.assertEqual(self.loader.load_status(), {'status': 'eventful'}) self.assertEqual(self.loader.visit_status(), 'full') def test_load_unchanged(self): """Checks loading a repository a second time does not add any extra data.""" res = self.load() self.assertEqual(res['status'], 'eventful') - self.loader.storage = MockStorage1() res = self.load() self.assertEqual(res['status'], 'uneventful') self.assertCountSnapshots(1) class DirGitLoaderTest(BaseDirGitLoaderTest, GitLoaderTests): """Tests for the GitLoader. Includes the common ones, and add others that only work with a local dir.""" def _git(self, *cmd): """Small wrapper around subprocess to call Git.""" try: return subprocess.check_output( ['git', '-C', self.destination_path] + list(cmd)) except subprocess.CalledProcessError as e: print(e.output) print(e.stderr) raise def test_load_changed(self): """Loads a repository, makes some changes by adding files, commits, and merges, load it again, and check the storage contains everything it should.""" # Initial load res = self.load() self.assertEqual(res['status'], 'eventful', res) self._git('config', '--local', 'user.email', 'you@example.com') self._git('config', '--local', 'user.name', 'Your Name') # Load with a new file + revision with open(os.path.join(self.destination_path, 'hello.py'), 'a') as fd: fd.write("print('Hello world')\n") self._git('add', 'hello.py') self._git('commit', '-m', 'Hello world') new_revision = self._git('rev-parse', 'master').decode().strip() revisions = REVISIONS1.copy() assert new_revision not in revisions revisions[new_revision] = '85dae072a5aa9923ffa7a7568f819ff21bf49858' res = self.load() self.assertEqual(res['status'], 'eventful') self.assertCountContents(4 + 1) self.assertCountDirectories(7 + 1) self.assertCountReleases(0) # FIXME: why not 2? self.assertCountRevisions(7 + 1) self.assertCountSnapshots(1 + 1) - self.assertRevisionsOk(revisions) + self.assertRevisionsContain(revisions) # TODO: how to check the snapshot id? - # self.assertSnapshotOk(SNAPSHOT1) + # self.assertSnapshotEqual(SNAPSHOT1) self.assertEqual(self.loader.load_status(), {'status': 'eventful'}) self.assertEqual(self.loader.visit_status(), 'full') # Load with a new merge self._git('merge', 'branch1', '-m', 'merge') new_revision = self._git('rev-parse', 'master').decode().strip() assert new_revision not in revisions revisions[new_revision] = 'dab8a37df8db8666d4e277bef9a546f585b5bedd' res = self.load() self.assertEqual(res['status'], 'eventful') self.assertCountContents(4 + 1) self.assertCountDirectories(7 + 2) self.assertCountReleases(0) # FIXME: why not 2? self.assertCountRevisions(7 + 2) self.assertCountSnapshots(1 + 1 + 1) - self.assertRevisionsOk(revisions) + self.assertRevisionsContain(revisions) # TODO: how to check the snapshot id? - # self.assertSnapshotOk(SNAPSHOT1) + # self.assertSnapshotEqual(SNAPSHOT1) self.assertEqual(self.loader.load_status(), {'status': 'eventful'}) self.assertEqual(self.loader.visit_status(), 'full') class ZipGitLoaderTest(BaseZipGitLoaderTest, GitLoaderTests): """Tests for GitLoaderFromArchive. Imports the common ones from GitLoaderTests.""" pass diff --git a/swh/loader/git/tests/test_updater.py b/swh/loader/git/tests/test_updater.py index 380f1c6..e20914d 100644 --- a/swh/loader/git/tests/test_updater.py +++ b/swh/loader/git/tests/test_updater.py @@ -1,19 +1,22 @@ from swh.loader.git.updater import BulkUpdater -from swh.loader.git.tests.test_loader import ( - DirGitLoaderTest, LoaderNoStorageMixin) +from swh.loader.git.tests.test_loader import DirGitLoaderTest -class BulkUpdaterNoStorage(LoaderNoStorageMixin, BulkUpdater): - """Subclass of BulkUpdater that uses a mock storage.""" - pass +class TestBulkUpdater(BulkUpdater): + def parse_config_file(self, *args, **kwargs): + return { + **super().parse_config_file(*args, **kwargs), + 'storage': {'cls': 'memory', 'args': {}} + } class BulkUpdaterTest(DirGitLoaderTest): """Same tests as for the GitLoader, but running on BulkUpdater.""" def setUp(self): super().setUp() - self.loader = BulkUpdaterNoStorage() + self.loader = TestBulkUpdater() + self.storage = self.loader.storage def load(self): return self.loader.load( origin_url=self.repo_url)