Page MenuHomeSoftware Heritage

D622.id1986.diff
No OneTemporary

D622.id1986.diff

diff --git a/swh/loader/git/tests/resources/testrepo.tgz b/swh/loader/git/tests/resources/testrepo.tgz
new file mode 100644
index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000
GIT binary patch
literal 0
Hc$@<O00001
literal 0
Hc$@<O00001
diff --git a/swh/loader/git/tests/test_loader.py b/swh/loader/git/tests/test_loader.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/git/tests/test_loader.py
@@ -0,0 +1,305 @@
+# Copyright (C) 2018 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import os.path
+import zipfile
+import tempfile
+import subprocess
+
+from swh.loader.git.loader import GitLoader, GitLoaderFromArchive
+from swh.loader.core.tests import BaseLoaderTest, LoaderNoStorage
+
+
+class MockStorage0:
+ """The storage's state before anything is added."""
+ def snapshot_get_latest(self, origin_id):
+ return None
+
+ def content_missing(self, contents, key_hash='sha1'):
+ return [c[key_hash] for c in contents]
+
+ def directory_missing(self, directories):
+ return directories
+
+ def revision_missing(self, revisions):
+ return revisions
+
+
+CONTENT1 = {
+ '33ab5639bfd8e7b95eb1d8d0b87781d4ffea4d5d', # README v1
+ '349c4ff7d21f1ec0eda26f3d9284c293e3425417', # README v2
+ '799c11e348d39f1704022b8354502e2f81f3c037', # file1.txt
+ '4bdb40dfd6ec75cb730e678b5d7786e30170c5fb', # file2.txt
+ }
+
+SNAPSHOT_ID = 'bdf3b06d6017e0d9ad6447a73da6ff1ae9efb8f0'
+
+SNAPSHOT1 = {
+ 'id': SNAPSHOT_ID,
+ 'branches': {
+ 'HEAD': {
+ 'target': '2f01f5ca7e391a2f08905990277faf81e709a649',
+ 'target_type': 'revision',
+ },
+ 'refs/heads/master': {
+ 'target': '2f01f5ca7e391a2f08905990277faf81e709a649',
+ 'target_type': 'revision',
+ },
+ 'refs/heads/branch1': {
+ 'target': 'b0a77609903f767a2fd3d769904ef9ef68468b87',
+ 'target_type': 'revision',
+ },
+ 'refs/heads/branch2': {
+ 'target': 'bd746cd1913721b269b395a56a97baf6755151c2',
+ 'target_type': 'revision',
+ },
+ 'refs/tags/branch2-after-delete': {
+ 'target': 'bd746cd1913721b269b395a56a97baf6755151c2',
+ 'target_type': 'revision',
+ },
+ 'refs/tags/branch2-before-delete': {
+ 'target': '1135e94ccf73b5f9bd6ef07b3fa2c5cc60bba69b',
+ 'target_type': 'revision',
+ },
+ },
+}
+
+SUBDIR_HASH = 'd53f143d5f3aadb278aad60c4e9a17945a2d68de'
+
+# directory hashes obtained with:
+# gco b6f40292c4e94a8f7e7b4aff50e6c7429ab98e2a
+# swh-hashtree --ignore '.git' --path .
+# gco 2f01f5ca7e391a2f08905990277faf81e709a649
+# swh-hashtree --ignore '.git' --path .
+# gco bcdc5ebfde1a3cd6c96e0c2ea4eed19c13208777
+# swh-hashtree --ignore '.git' --path .
+# gco 1135e94ccf73b5f9bd6ef07b3fa2c5cc60bba69b
+# swh-hashtree --ignore '.git' --path .
+# gco 79f65ac75f79dda6ff03d66e1242702ab67fb51c
+# swh-hashtree --ignore '.git' --path .
+# gco b0a77609903f767a2fd3d769904ef9ef68468b87
+# swh-hashtree --ignore '.git' --path .
+# gco bd746cd1913721b269b395a56a97baf6755151c2
+# swh-hashtree --ignore '.git' --path .
+REVISIONS1 = {
+ 'b6f40292c4e94a8f7e7b4aff50e6c7429ab98e2a':
+ '40dbdf55dfd4065422462cc74a949254aefa972e',
+ '2f01f5ca7e391a2f08905990277faf81e709a649':
+ 'e1d0d894835f91a0f887a4bc8b16f81feefdfbd5',
+ 'bcdc5ebfde1a3cd6c96e0c2ea4eed19c13208777':
+ 'b43724545b4759244bb54be053c690649161411c',
+ '1135e94ccf73b5f9bd6ef07b3fa2c5cc60bba69b':
+ 'fbf70528223d263661b5ad4b80f26caf3860eb8e',
+ '79f65ac75f79dda6ff03d66e1242702ab67fb51c':
+ '5df34ec74d6f69072d9a0a6677d8efbed9b12e60',
+ 'b0a77609903f767a2fd3d769904ef9ef68468b87':
+ '9ca0c7d6ffa3f9f0de59fd7912e08f11308a1338',
+ 'bd746cd1913721b269b395a56a97baf6755151c2':
+ 'e1d0d894835f91a0f887a4bc8b16f81feefdfbd5',
+ }
+
+
+class MockStorage1:
+ """The storage's state after the first snapshot is loaded."""
+ def snapshot_get_latest(self, origin_id):
+ return SNAPSHOT1
+
+ def content_missing(self, contents, key_hash='sha1'):
+ return list({c[key_hash] for c in contents} - CONTENT1)
+
+ def directory_missing(self, directories):
+ return list(set(directories) - set(REVISIONS1.keys()) - {SUBDIR_HASH})
+
+ def revision_missing(self, revisions):
+ return list(set(revisions) - set(REVISIONS1))
+
+
+class LoaderNoStorageMixin(LoaderNoStorage):
+ def __init__(self):
+ super().__init__()
+ self.origin_id = 1
+ self.visit = 1
+ self.storage = MockStorage0()
+
+
+class GitLoaderNoStorage(LoaderNoStorageMixin, GitLoader):
+ pass
+
+
+class GitLoaderFromArchiveNoStorage(LoaderNoStorageMixin,
+ GitLoaderFromArchive):
+ def project_name_from_archive(self, archive_path):
+ # We don't want the project name to be 'resources'.
+ return 'testrepo'
+
+
+class BaseGitLoaderTest(BaseLoaderTest):
+ def setUp(self, archive_name, uncompress_archive, filename='testrepo'):
+ super().setUp(archive_name=archive_name, filename=filename,
+ prefix_tmp_folder_name='swh.loader.git.',
+ start_path=os.path.dirname(__file__),
+ uncompress_archive=uncompress_archive)
+
+
+class BaseDirGitLoaderTest(BaseGitLoaderTest):
+ """Mixin base loader test to prepare the git
+ repository to uncompress, load and test the results.
+
+ This sets up
+
+ """
+ def setUp(self):
+ super().setUp('testrepo.tgz', True)
+ self.loader = GitLoaderNoStorage()
+
+ def load(self):
+ return self.loader.load(
+ origin_url=self.repo_url,
+ visit_date='2016-05-03 15:16:32+00',
+ directory=self.destination_path)
+
+
+class BaseZipGitLoaderTest(BaseGitLoaderTest):
+ """Mixin base loader test to prepare the git
+ repository to uncompress, load and test the results.
+
+ This sets up
+
+ """
+ def setUp(self):
+ super().setUp('testrepo.tgz', True)
+ self._setup_zip()
+ self.loader = GitLoaderFromArchiveNoStorage()
+
+ def _setup_zip(self):
+ self._zip_file = tempfile.NamedTemporaryFile('ab', suffix='.zip')
+ dest_dir = os.path.normpath(self.destination_path) + '/'
+ with zipfile.ZipFile(self._zip_file, 'a') as zip_writer:
+ for root, dirs, files in os.walk(dest_dir):
+ assert root.startswith(dest_dir)
+ relative_root = os.path.join(
+ 'testrepo',
+ root[len(dest_dir):])
+ for file_ in files:
+ zip_writer.write(
+ filename=os.path.join(root, file_),
+ arcname=os.path.join(relative_root, file_))
+ self.destination_path = self._zip_file.name
+ self.tmp_root_path = None
+ self.repo_url = 'file://' + self.destination_path
+
+ def tearDown(self):
+ self._zip_file.close()
+ super().tearDown()
+
+ def load(self):
+ return self.loader.load(
+ origin_url=self.repo_url,
+ visit_date='2016-05-03 15:16:32+00',
+ archive_path=self.destination_path)
+
+
+class GitLoaderTests:
+ def test_load(self):
+ res = self.load()
+ self.assertEqual(res['status'], 'eventful')
+
+ self.assertCountContents(4) # two README, file1, file2
+ self.assertCountDirectories(7)
+ self.assertCountReleases(0) # FIXME: why not 2?
+ self.assertCountRevisions(7)
+ self.assertCountSnapshots(1)
+
+ self.assertRevisionsOk(REVISIONS1)
+
+ self.assertSnapshotOk(SNAPSHOT1)
+
+ self.assertEqual(self.loader.load_status(), {'status': 'eventful'})
+ self.assertEqual(self.loader.visit_status(), 'full')
+
+ def test_load_unchanged(self):
+ res = self.load()
+ self.assertEqual(res['status'], 'eventful')
+
+ self.loader.storage = MockStorage1()
+ res = self.load()
+ self.assertEqual(res['status'], 'eventful')
+ self.assertCountSnapshots(1)
+
+
+class DirGitLoaderTest(BaseDirGitLoaderTest, GitLoaderTests):
+ def _git(self, *cmd):
+ try:
+ return subprocess.check_output(
+ ['git', '-C', self.destination_path] + list(cmd))
+ except subprocess.CalledProcessError as e:
+ print(e.output)
+ print(e.stderr)
+ raise
+
+ def test_load_changed(self):
+ # Initial load
+ res = self.load()
+ self.assertEqual(res['status'], 'eventful')
+
+ self._git('config', '--local', 'user.email', 'you@example.com')
+ self._git('config', '--local', 'user.name', 'Your Name')
+
+ # Load with a new file + revision
+ with open(os.path.join(self.destination_path, 'hello.py'), 'a') as fd:
+ fd.write("print('Hello world')\n")
+
+ self._git('add', 'hello.py')
+ self._git('commit', '-m', 'Hello world')
+ new_revision = self._git('rev-parse', 'master').decode().strip()
+
+ revisions = REVISIONS1.copy()
+ assert new_revision not in revisions
+ revisions[new_revision] = '85dae072a5aa9923ffa7a7568f819ff21bf49858'
+
+ res = self.load()
+ self.assertEqual(res['status'], 'eventful')
+
+ self.assertCountContents(4 + 1)
+ self.assertCountDirectories(7 + 1)
+ self.assertCountReleases(0) # FIXME: why not 2?
+ self.assertCountRevisions(7 + 1)
+ self.assertCountSnapshots(1 + 1)
+
+ self.assertRevisionsOk(revisions)
+
+ # TODO: how to check the snapshot id?
+ # self.assertSnapshotOk(SNAPSHOT1)
+
+ self.assertEqual(self.loader.load_status(), {'status': 'eventful'})
+ self.assertEqual(self.loader.visit_status(), 'full')
+
+ # Load with a new merge
+ self._git('merge', 'branch1', '-m', 'merge')
+ new_revision = self._git('rev-parse', 'master').decode().strip()
+
+ assert new_revision not in revisions
+ revisions[new_revision] = 'dab8a37df8db8666d4e277bef9a546f585b5bedd'
+
+ res = self.load()
+ self.assertEqual(res['status'], 'eventful')
+
+ self.assertCountContents(4 + 1)
+ self.assertCountDirectories(7 + 2)
+ self.assertCountReleases(0) # FIXME: why not 2?
+ self.assertCountRevisions(7 + 2)
+ self.assertCountSnapshots(1 + 1 + 1)
+
+ self.assertRevisionsOk(revisions)
+
+ # TODO: how to check the snapshot id?
+ # self.assertSnapshotOk(SNAPSHOT1)
+
+ self.assertEqual(self.loader.load_status(), {'status': 'eventful'})
+ self.assertEqual(self.loader.visit_status(), 'full')
+
+
+class ZipGitLoaderTest(BaseZipGitLoaderTest, GitLoaderTests):
+ pass

File Metadata

Mime Type
text/plain
Expires
Mar 17 2025, 7:01 PM (7 w, 3 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3218318

Event Timeline