diff --git a/MANIFEST.in b/MANIFEST.in --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,3 +1,4 @@ +include conftest.py include Makefile include Makefile.local include README.db_testing diff --git a/conftest.py b/conftest.py new file mode 100644 --- /dev/null +++ b/conftest.py @@ -0,0 +1,6 @@ +# Copyright (C) 2020 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +pytest_plugins = ["swh.storage.pytest_plugin", "swh.loader.pytest_plugin"] diff --git a/requirements-test.txt b/requirements-test.txt --- a/requirements-test.txt +++ b/requirements-test.txt @@ -2,5 +2,6 @@ pytest-aiohttp pytest-postgresql dulwich >= 0.18.7 +swh.loader.core swh.loader.git >= 0.0.52 swh.storage[testing] diff --git a/swh/vault/tests/test_cookers.py b/swh/vault/tests/test_cookers.py --- a/swh/vault/tests/test_cookers.py +++ b/swh/vault/tests/test_cookers.py @@ -20,6 +20,7 @@ import dulwich.objects import dulwich.porcelain import dulwich.repo +import pytest from swh.loader.git.from_disk import GitLoaderFromDisk from swh.model import from_disk, hashutil @@ -122,14 +123,23 @@ self.git_shell(*args, stdout=None) +@pytest.fixture +def swh_loader_config(swh_loader_config): + swh_loader_config["max_content_size"] = 100 * 1024 * 1024 + return swh_loader_config + + def git_loader( - storage, repo_path, visit_date=datetime.datetime.now(datetime.timezone.utc) + storage, + repo_path, + visit_date=datetime.datetime.now(datetime.timezone.utc), + config=None, ): """Instantiate a Git Loader using the storage instance as storage. """ loader = GitLoaderFromDisk( - "fake_origin", directory=repo_path, visit_date=visit_date + "fake_origin", directory=repo_path, visit_date=visit_date, config=config ) loader.storage = storage return loader @@ -186,7 +196,7 @@ class TestDirectoryCooker: - def test_directory_simple(self, swh_storage): + def test_directory_simple(self, swh_storage, swh_loader_config): repo = TestRepo() with repo as rp: (rp / "file").write_text(TEST_CONTENT) @@ -196,7 +206,7 @@ (rp / "dir1/dir2").mkdir(parents=True) (rp / "dir1/dir2/file").write_text(TEST_CONTENT) c = repo.commit() - loader = git_loader(swh_storage, str(rp)) + loader = git_loader(swh_storage, str(rp), config=swh_loader_config) loader.load() obj_id_hex = repo.repo[c].tree.decode() @@ -215,7 +225,7 @@ directory = from_disk.Directory.from_disk(path=bytes(p)) assert obj_id_hex == hashutil.hash_to_hex(directory.hash) - def test_directory_filtered_objects(self, swh_storage): + def test_directory_filtered_objects(self, swh_storage, swh_loader_config): repo = TestRepo() with repo as rp: file_1, id_1 = hash_content(b"test1") @@ -227,7 +237,7 @@ (rp / "absent_file").write_bytes(file_3) c = repo.commit() - loader = git_loader(swh_storage, str(rp)) + loader = git_loader(swh_storage, str(rp), config=swh_loader_config) loader.load() obj_id_hex = repo.repo[c].tree.decode() @@ -257,7 +267,7 @@ assert (p / "hidden_file").read_bytes() == HIDDEN_MESSAGE assert (p / "absent_file").read_bytes() == SKIPPED_MESSAGE - def test_directory_bogus_perms(self, swh_storage): + def test_directory_bogus_perms(self, swh_storage, swh_loader_config): # Some early git repositories have 664/775 permissions... let's check # if all the weird modes are properly normalized in the directory # cooker. @@ -270,7 +280,7 @@ (rp / "wat").write_text(TEST_CONTENT) (rp / "wat").chmod(0o604) c = repo.commit() - loader = git_loader(swh_storage, str(rp)) + loader = git_loader(swh_storage, str(rp), config=swh_loader_config) loader.load() obj_id_hex = repo.repo[c].tree.decode() @@ -302,7 +312,7 @@ class TestRevisionGitfastCooker: - def test_revision_simple(self, swh_storage): + def test_revision_simple(self, swh_storage, swh_loader_config): # # 1--2--3--4--5--6--7 # @@ -324,7 +334,7 @@ repo.commit("remove file2") (rp / "bin1").rename(rp / "bin") repo.commit("rename bin1 to bin") - loader = git_loader(swh_storage, str(rp)) + loader = git_loader(swh_storage, str(rp), config=swh_loader_config) loader.load() obj_id_hex = repo.repo.refs[b"HEAD"].decode() obj_id = hashutil.hash_to_bytes(obj_id_hex) @@ -341,7 +351,7 @@ assert (p / "dir1/dir2/file").stat().st_mode == 0o100644 assert ert.repo.refs[b"HEAD"].decode() == obj_id_hex - def test_revision_two_roots(self, swh_storage): + def test_revision_two_roots(self, swh_storage, swh_loader_config): # # 1----3---4 # / @@ -359,13 +369,13 @@ repo.commit("add file3") obj_id_hex = repo.repo.refs[b"HEAD"].decode() obj_id = hashutil.hash_to_bytes(obj_id_hex) - loader = git_loader(swh_storage, str(rp)) + loader = git_loader(swh_storage, str(rp), config=swh_loader_config) loader.load() with cook_extract_revision_gitfast(swh_storage, obj_id) as (ert, p): assert ert.repo.refs[b"HEAD"].decode() == obj_id_hex - def test_revision_two_double_fork_merge(self, swh_storage): + def test_revision_two_double_fork_merge(self, swh_storage, swh_loader_config): # # 2---4---6 # / / / @@ -393,13 +403,13 @@ obj_id_hex = repo.repo.refs[b"HEAD"].decode() obj_id = hashutil.hash_to_bytes(obj_id_hex) - loader = git_loader(swh_storage, str(rp)) + loader = git_loader(swh_storage, str(rp), config=swh_loader_config) loader.load() with cook_extract_revision_gitfast(swh_storage, obj_id) as (ert, p): assert ert.repo.refs[b"HEAD"].decode() == obj_id_hex - def test_revision_triple_merge(self, swh_storage): + def test_revision_triple_merge(self, swh_storage, swh_loader_config): # # .---.---5 # / / / @@ -421,13 +431,13 @@ obj_id_hex = repo.repo.refs[b"HEAD"].decode() obj_id = hashutil.hash_to_bytes(obj_id_hex) - loader = git_loader(swh_storage, str(rp)) + loader = git_loader(swh_storage, str(rp), config=swh_loader_config) loader.load() with cook_extract_revision_gitfast(swh_storage, obj_id) as (ert, p): assert ert.repo.refs[b"HEAD"].decode() == obj_id_hex - def test_revision_filtered_objects(self, swh_storage): + def test_revision_filtered_objects(self, swh_storage, swh_loader_config): repo = TestRepo() with repo as rp: file_1, id_1 = hash_content(b"test1") @@ -441,7 +451,7 @@ repo.commit() obj_id_hex = repo.repo.refs[b"HEAD"].decode() obj_id = hashutil.hash_to_bytes(obj_id_hex) - loader = git_loader(swh_storage, str(rp)) + loader = git_loader(swh_storage, str(rp), config=swh_loader_config) loader.load() # FIXME: storage.content_update() should be changed to allow things @@ -469,7 +479,7 @@ assert (p / "hidden_file").read_bytes() == HIDDEN_MESSAGE assert (p / "absent_file").read_bytes() == SKIPPED_MESSAGE - def test_revision_bogus_perms(self, swh_storage): + def test_revision_bogus_perms(self, swh_storage, swh_loader_config): # Some early git repositories have 664/775 permissions... let's check # if all the weird modes are properly normalized in the revision # cooker. @@ -482,7 +492,7 @@ (rp / "wat").write_text(TEST_CONTENT) (rp / "wat").chmod(0o604) repo.commit("initial commit") - loader = git_loader(swh_storage, str(rp)) + loader = git_loader(swh_storage, str(rp), config=swh_loader_config) loader.load() obj_id_hex = repo.repo.refs[b"HEAD"].decode() obj_id = hashutil.hash_to_bytes(obj_id_hex) @@ -493,14 +503,14 @@ assert (p / "executable").stat().st_mode == 0o100755 assert (p / "wat").stat().st_mode == 0o100644 - def test_revision_null_fields(self, swh_storage): + def test_revision_null_fields(self, swh_storage, swh_loader_config): # Our schema doesn't enforce a lot of non-null revision fields. We need # to check these cases don't break the cooker. repo = TestRepo() with repo as rp: (rp / "file").write_text(TEST_CONTENT) c = repo.commit("initial commit") - loader = git_loader(swh_storage, str(rp)) + loader = git_loader(swh_storage, str(rp), config=swh_loader_config) loader.load() repo.repo.refs[b"HEAD"].decode() dir_id_hex = repo.repo[c].tree.decode()