Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7124089
D3428.id12158.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
21 KB
Subscribers
None
D3428.id12158.diff
View Options
diff --git a/MANIFEST.in b/MANIFEST.in
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -2,6 +2,5 @@
include Makefile
include requirements*.txt
include version.txt
-recursive-include swh/loader/git/tests/data *.xz
-recursive-include swh/loader/git/tests/resources/ *
+recursive-include swh/loader/git/tests/data *
recursive-include swh py.typed
diff --git a/requirements-test.txt b/requirements-test.txt
--- a/requirements-test.txt
+++ b/requirements-test.txt
@@ -1,3 +1,4 @@
pytest
pytest-mock
swh.scheduler[testing]
+swh.storage[testing]
diff --git a/swh/loader/git/tests/__init__.py b/swh/loader/git/tests/__init__.py
--- a/swh/loader/git/tests/__init__.py
+++ b/swh/loader/git/tests/__init__.py
@@ -1,27 +1,23 @@
-# Copyright (C) 2018-2019 The Software Heritage developers
+# Copyright (C) 2018-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-TEST_LOADER_CONFIG = {
- "storage": {
- "cls": "pipeline",
- "steps": [
- {"cls": "filter"},
- {
- "cls": "buffer",
- "min_batch_size": {
- "content": 10,
- "content_bytes": 100 * 1024 * 1024,
- "directory": 10,
- "revision": 10,
- "release": 10,
- },
- },
- {"cls": "memory"},
- ],
- },
- "max_content_size": 100 * 1024 * 1024,
- "pack_size_bytes": 4 * 1024 * 1024 * 1024,
- "save_data": False,
-}
+import os
+import subprocess
+
+from typing import Optional
+
+
+def prepare_repository_from_archive(
+ archive_path: str,
+ filename: Optional[str] = None,
+ tmp_path: str = "/tmp",
+ uncompress_archive: bool = True,
+) -> str:
+ if uncompress_archive:
+ # uncompress folder/repositories/dump for the loader to ingest
+ subprocess.check_output(["tar", "xf", archive_path, "-C", tmp_path])
+ # build the origin url (or some derivative form)
+ _fname = filename if filename else os.path.basename(archive_path)
+ return f"file://{tmp_path}/{_fname}"
diff --git a/swh/loader/git/tests/conftest.py b/swh/loader/git/tests/conftest.py
--- a/swh/loader/git/tests/conftest.py
+++ b/swh/loader/git/tests/conftest.py
@@ -1,11 +1,53 @@
-# Copyright (C) 2018-2019 The Software Heritage developers
+# Copyright (C) 2018-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+import os
+import yaml
+
import pytest
+from typing import Any, Dict
+
from swh.scheduler.tests.conftest import * # noqa
+from swh.storage.tests.conftest import * # noqa
+
+
+@pytest.fixture
+def swh_loader_config(swh_storage_backend_config) -> Dict[str, Any]:
+ swh_storage_backend_config["journal_writer"] = {}
+ return {
+ "storage": {
+ "cls": "pipeline",
+ "steps": [
+ {"cls": "filter"},
+ {
+ "cls": "buffer",
+ "min_batch_size": {
+ "content": 10,
+ "content_bytes": 100 * 1024 * 1024,
+ "directory": 10,
+ "revision": 10,
+ "release": 10,
+ },
+ },
+ swh_storage_backend_config,
+ ],
+ },
+ "max_content_size": 100 * 1024 * 1024,
+ "pack_size_bytes": 4 * 1024 * 1024 * 1024,
+ "save_data": False,
+ }
+
+
+@pytest.fixture
+def swh_config(swh_loader_config, monkeypatch, tmp_path):
+ conffile = os.path.join(str(tmp_path), "loader.yml")
+ with open(conffile, "w") as f:
+ f.write(yaml.dump(swh_loader_config))
+ monkeypatch.setenv("SWH_CONFIG_FILENAME", conffile)
+ return conffile
@pytest.fixture(scope="session") # type: ignore # expected redefinition
diff --git a/swh/loader/git/tests/resources/testrepo.tgz b/swh/loader/git/tests/data/testrepo.tgz
rename from swh/loader/git/tests/resources/testrepo.tgz
rename to swh/loader/git/tests/data/testrepo.tgz
diff --git a/swh/loader/git/tests/test_from_disk.py b/swh/loader/git/tests/test_from_disk.py
--- a/swh/loader/git/tests/test_from_disk.py
+++ b/swh/loader/git/tests/test_from_disk.py
@@ -3,38 +3,24 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+import copy
import datetime
import os.path
import dulwich.repo
+import pytest
+
+from unittest import TestCase
from swh.model.model import Snapshot, SnapshotBranch, TargetType
from swh.model.hashutil import hash_to_bytes
-
-from swh.loader.core.tests import BaseLoaderTest
from swh.loader.tests.common import assert_last_visit_matches
+from swh.loader.git.from_disk import GitLoaderFromDisk
+from swh.loader.git.from_disk import GitLoaderFromArchive
+from swh.loader.package.tests.common import check_snapshot, get_stats
-from swh.loader.git.from_disk import GitLoaderFromDisk as OrigGitLoaderFromDisk
-from swh.loader.git.from_disk import GitLoaderFromArchive as OrigGitLoaderFromArchive
-
-from . import TEST_LOADER_CONFIG
-
-
-class GitLoaderFromArchive(OrigGitLoaderFromArchive):
- def project_name_from_archive(self, archive_path):
- # We don't want the project name to be 'resources'.
- return "testrepo"
+from swh.loader.git.tests import prepare_repository_from_archive
- def parse_config_file(self, *args, **kwargs):
- return TEST_LOADER_CONFIG
-
-
-CONTENT1 = {
- "33ab5639bfd8e7b95eb1d8d0b87781d4ffea4d5d", # README v1
- "349c4ff7d21f1ec0eda26f3d9284c293e3425417", # README v2
- "799c11e348d39f1704022b8354502e2f81f3c037", # file1.txt
- "4bdb40dfd6ec75cb730e678b5d7786e30170c5fb", # file2.txt
-}
SNAPSHOT_ID = "a23699280a82a043f8c0994cf1631b568f716f95"
@@ -105,119 +91,77 @@
}
-class BaseGitLoaderFromDiskTest(BaseLoaderTest):
- def setUp(self, archive_name, uncompress_archive, filename="testrepo"):
- super().setUp(
- archive_name=archive_name,
- filename=filename,
- prefix_tmp_folder_name="swh.loader.git.",
- start_path=os.path.dirname(__file__),
- uncompress_archive=uncompress_archive,
- )
-
-
-class GitLoaderFromDiskTest(OrigGitLoaderFromDisk):
- def parse_config_file(self, *args, **kwargs):
- return TEST_LOADER_CONFIG
-
-
-class BaseDirGitLoaderFromDiskTest(BaseGitLoaderFromDiskTest):
- """Mixin base loader test to prepare the git
- repository to uncompress, load and test the results.
-
- This sets up
-
- """
-
- def setUp(self):
- super().setUp("testrepo.tgz", uncompress_archive=True)
- self.loader = GitLoaderFromDiskTest(
- url=self.repo_url,
- visit_date=datetime.datetime(
- 2016, 5, 3, 15, 16, 32, tzinfo=datetime.timezone.utc
- ),
- directory=self.destination_path,
- )
- self.storage = self.loader.storage
- self.repo = dulwich.repo.Repo(self.destination_path)
-
- def load(self):
- return self.loader.load()
-
-
-class BaseGitLoaderFromArchiveTest(BaseGitLoaderFromDiskTest):
- """Mixin base loader test to prepare the git
- repository to uncompress, load and test the results.
-
- This sets up
-
- """
-
- def setUp(self):
- super().setUp("testrepo.tgz", uncompress_archive=False)
- self.loader = GitLoaderFromArchive(
- url=self.repo_url,
- visit_date=datetime.datetime(
- 2016, 5, 3, 15, 16, 32, tzinfo=datetime.timezone.utc
- ),
- archive_path=self.destination_path,
- )
- self.storage = self.loader.storage
-
- def load(self):
- return self.loader.load()
-
-
-class GitLoaderFromDiskTests:
+class CommonGitLoaderTests:
"""Common tests for all git loaders."""
def test_load(self):
"""Loads a simple repository (made available by `setUp()`),
and checks everything was added in the storage."""
- res = self.load()
- self.assertEqual(res["status"], "eventful", res)
-
- self.assertContentsContain(CONTENT1)
- self.assertCountDirectories(7)
- self.assertCountReleases(0) # FIXME: should be 2 after T2059
- self.assertCountRevisions(7)
- self.assertCountSnapshots(1)
-
- self.assertRevisionsContain(REVISIONS1)
+ res = self.loader.load()
- self.assertSnapshotEqual(SNAPSHOT1)
-
- self.assertEqual(self.loader.load_status(), {"status": "eventful"})
- self.assertEqual(self.loader.visit_status(), "full")
+ assert res == {"status": "eventful"}
assert_last_visit_matches(
- self.storage,
+ self.loader.storage,
self.repo_url,
status="full",
type="git",
snapshot=hash_to_bytes(SNAPSHOT1["id"]),
)
+ stats = get_stats(self.loader.storage)
+ assert stats == {
+ "content": 4,
+ "directory": 7,
+ "origin": 1,
+ "origin_visit": 1,
+ "person": 1,
+ "release": 0,
+ "revision": 7,
+ "skipped_content": 0,
+ "snapshot": 1,
+ }
+
+ check_snapshot(SNAPSHOT1, self.loader.storage)
+
def test_load_unchanged(self):
"""Checks loading a repository a second time does not add
any extra data."""
- res = self.load()
- self.assertEqual(res["status"], "eventful")
+ res = self.loader.load()
+ assert res == {"status": "eventful"}
assert_last_visit_matches(
- self.storage,
+ self.loader.storage,
self.repo_url,
status="full",
type="git",
snapshot=hash_to_bytes(SNAPSHOT1["id"]),
)
- res = self.load()
- self.assertEqual(res["status"], "uneventful")
- self.assertCountSnapshots(1)
+ stats0 = get_stats(self.loader.storage)
+ assert stats0 == {
+ "content": 4,
+ "directory": 7,
+ "origin": 1,
+ "origin_visit": 1,
+ "person": 1,
+ "release": 0,
+ "revision": 7,
+ "skipped_content": 0,
+ "snapshot": 1,
+ }
+
+ res = self.loader.load()
+ assert res == {"status": "uneventful"}
+ stats1 = get_stats(self.loader.storage)
+ expected_stats = copy.deepcopy(stats0)
+ expected_stats["origin_visit"] += 1
+ assert stats1 == expected_stats
+
+ check_snapshot(SNAPSHOT1, self.loader.storage)
assert_last_visit_matches(
- self.storage,
+ self.loader.storage,
self.repo_url,
status="full",
type="git",
@@ -225,17 +169,32 @@
)
-class DirGitLoaderTest(BaseDirGitLoaderFromDiskTest, GitLoaderFromDiskTests):
- """Tests for the GitLoaderFromDisk. Includes the common ones, and
- add others that only work with a local dir."""
+class FullGitLoaderTests(CommonGitLoaderTests):
+ """Tests for GitLoader (from disk or not). Includes the common ones, and
+ add others that only work with a local dir.
+
+ """
def test_load_changed(self):
"""Loads a repository, makes some changes by adding files, commits,
and merges, load it again, and check the storage contains everything
it should."""
# Initial load
- res = self.load()
- self.assertEqual(res["status"], "eventful", res)
+ res = self.loader.load()
+ assert res == {"status": "eventful"}
+
+ stats0 = get_stats(self.loader.storage)
+ assert stats0 == {
+ "content": 4,
+ "directory": 7,
+ "origin": 1,
+ "origin_visit": 1,
+ "person": 1,
+ "release": 0,
+ "revision": 7,
+ "skipped_content": 0,
+ "snapshot": 1,
+ }
# Load with a new file + revision
with open(os.path.join(self.destination_path, "hello.py"), "a") as fd:
@@ -251,27 +210,30 @@
assert new_revision not in revisions
revisions[new_revision] = new_dir
- res = self.load()
- self.assertEqual(res["status"], "eventful")
+ res = self.loader.load()
+ assert res == {"status": "eventful"}
- self.assertCountContents(4 + 1)
- self.assertCountDirectories(7 + 1)
- self.assertCountReleases(0) # FIXME: should be 2 after T2059
- self.assertCountRevisions(7 + 1)
- self.assertCountSnapshots(1 + 1)
+ stats1 = get_stats(self.loader.storage)
+ expected_stats = copy.deepcopy(stats0)
+ # did one new visit
+ expected_stats["origin_visit"] += 1
+ # with one more of the following objects
+ expected_stats["person"] += 1
+ expected_stats["content"] += 1
+ expected_stats["directory"] += 1
+ expected_stats["revision"] += 1
+ # concluding into 1 new snapshot
+ expected_stats["snapshot"] += 1
- self.assertRevisionsContain(revisions)
-
- self.assertEqual(self.loader.load_status(), {"status": "eventful"})
- self.assertEqual(self.loader.visit_status(), "full")
+ assert stats1 == expected_stats
visit_status = assert_last_visit_matches(
- self.storage, self.repo_url, status="full", type="git"
+ self.loader.storage, self.repo_url, status="full", type="git"
)
- self.assertIsNotNone(visit_status.snapshot)
+ assert visit_status.snapshot is not None
snapshot_id = visit_status.snapshot
- snapshot = self.storage.snapshot_get(snapshot_id)
+ snapshot = self.loader.storage.snapshot_get(snapshot_id)
branches = snapshot["branches"]
assert branches[b"HEAD"] == {
"target": b"refs/heads/master",
@@ -304,29 +266,30 @@
assert merge_commit.decode() not in revisions
revisions[merge_commit.decode()] = merged_tree.id.decode()
- res = self.load()
- self.assertEqual(res["status"], "eventful")
-
- self.assertCountContents(4 + 1)
- self.assertCountDirectories(7 + 2)
- self.assertCountReleases(0) # FIXME: should be 2 after T2059
- self.assertCountRevisions(7 + 2)
- self.assertCountSnapshots(1 + 1 + 1)
+ res = self.loader.load()
+ assert res == {"status": "eventful"}
- self.assertRevisionsContain(revisions)
+ stats2 = get_stats(self.loader.storage)
+ expected_stats = copy.deepcopy(stats1)
+ # one more visit
+ expected_stats["origin_visit"] += 1
+ # with 1 new directory and revision
+ expected_stats["directory"] += 1
+ expected_stats["revision"] += 1
+ # concluding into 1 new snapshot
+ expected_stats["snapshot"] += 1
- self.assertEqual(self.loader.load_status(), {"status": "eventful"})
- self.assertEqual(self.loader.visit_status(), "full")
+ assert stats2 == expected_stats
visit_status = assert_last_visit_matches(
- self.storage, self.repo_url, status="full", type="git"
+ self.loader.storage, self.repo_url, status="full", type="git"
)
- self.assertIsNotNone(visit_status.snapshot)
+ assert visit_status.snapshot is not None
merge_snapshot_id = visit_status.snapshot
assert merge_snapshot_id != snapshot_id
- merge_snapshot = self.storage.snapshot_get(merge_snapshot_id)
+ merge_snapshot = self.loader.storage.snapshot_get(merge_snapshot_id)
merge_branches = merge_snapshot["branches"]
assert merge_branches[b"HEAD"] == {
"target": b"refs/heads/master",
@@ -372,14 +335,11 @@
expected_snapshot = Snapshot(branches=branches)
# Load the modified repository
- res = self.load()
- assert res["status"] == "eventful"
-
- assert self.loader.load_status() == {"status": "eventful"}
- assert self.loader.visit_status() == "full"
+ res = self.loader.load()
+ assert res == {"status": "eventful"}
assert_last_visit_matches(
- self.storage,
+ self.loader.storage,
self.repo_url,
status="full",
type="git",
@@ -390,22 +350,16 @@
with open(os.path.join(self.destination_path, ".git/HEAD"), "wb") as f:
f.write(b"ref: refs/heads/dangling-branch\n")
- res = self.load()
- self.assertEqual(res["status"], "eventful", res)
-
- self.assertContentsContain(CONTENT1)
- self.assertCountDirectories(7)
- self.assertCountReleases(0) # FIXME: should be 2 after T2059
- self.assertCountRevisions(7)
- self.assertCountSnapshots(1)
+ res = self.loader.load()
+ assert res == {"status": "eventful"}
visit_status = assert_last_visit_matches(
- self.storage, self.repo_url, status="full", type="git"
+ self.loader.storage, self.repo_url, status="full", type="git"
)
snapshot_id = visit_status.snapshot
assert snapshot_id is not None
- snapshot = self.storage.snapshot_get(snapshot_id)
+ snapshot = self.loader.storage.snapshot_get(snapshot_id)
branches = snapshot["branches"]
assert branches[b"HEAD"] == {
@@ -414,9 +368,57 @@
}
assert branches[b"refs/heads/dangling-branch"] is None
+ stats = get_stats(self.loader.storage)
+ assert stats == {
+ "content": 4,
+ "directory": 7,
+ "origin": 1,
+ "origin_visit": 1,
+ "person": 1,
+ "release": 0,
+ "revision": 7,
+ "skipped_content": 0,
+ "snapshot": 1,
+ }
+
-class GitLoaderFromArchiveTest(BaseGitLoaderFromArchiveTest, GitLoaderFromDiskTests):
- """Tests for GitLoaderFromArchive. Imports the common ones
- from GitLoaderTests."""
+class GitLoaderFromDiskTest(TestCase, FullGitLoaderTests):
+ """Prepare a git directory repository to be loaded through a GitLoaderFromDisk.
+ This tests all git loader scenario.
- pass
+ """
+
+ @pytest.fixture(autouse=True)
+ def init(self, swh_config, datadir, tmp_path):
+ archive_name = "testrepo"
+ archive_path = os.path.join(datadir, f"{archive_name}.tgz")
+ tmp_path = str(tmp_path)
+ self.repo_url = prepare_repository_from_archive(
+ archive_path, archive_name, tmp_path=tmp_path
+ )
+ self.destination_path = os.path.join(tmp_path, archive_name)
+ self.loader = GitLoaderFromDisk(
+ url=self.repo_url,
+ visit_date=datetime.datetime(
+ 2016, 5, 3, 15, 16, 32, tzinfo=datetime.timezone.utc
+ ),
+ directory=self.destination_path,
+ )
+ self.repo = dulwich.repo.Repo(self.destination_path)
+
+
+class GitLoaderFromArchiveTest(TestCase, CommonGitLoaderTests):
+ """Tests for GitLoaderFromArchive. Only tests common scenario."""
+
+ @pytest.fixture(autouse=True)
+ def init(self, swh_config, datadir, tmp_path):
+ archive_name = "testrepo"
+ archive_path = os.path.join(datadir, f"{archive_name}.tgz")
+ self.repo_url = archive_path
+ self.loader = GitLoaderFromArchive(
+ url=self.repo_url,
+ archive_path=archive_path,
+ visit_date=datetime.datetime(
+ 2016, 5, 3, 15, 16, 32, tzinfo=datetime.timezone.utc
+ ),
+ )
diff --git a/swh/loader/git/tests/test_loader.py b/swh/loader/git/tests/test_loader.py
--- a/swh/loader/git/tests/test_loader.py
+++ b/swh/loader/git/tests/test_loader.py
@@ -3,24 +3,34 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-from swh.loader.git.loader import GitLoader
-from swh.loader.git.tests.test_from_disk import DirGitLoaderTest
+import os
-from . import TEST_LOADER_CONFIG
+import pytest
+import dulwich.repo
+from unittest import TestCase
-class GitLoaderTest(GitLoader):
- def parse_config_file(self, *args, **kwargs):
- return {**super().parse_config_file(*args, **kwargs), **TEST_LOADER_CONFIG}
+from swh.loader.git.loader import GitLoader
+from swh.loader.git.tests.test_from_disk import FullGitLoaderTests
+from swh.loader.git.tests import prepare_repository_from_archive
-class TestGitLoader(DirGitLoaderTest):
- """Same tests as for the GitLoaderFromDisk, but running on GitLoader."""
- def setUp(self):
- super().setUp()
- self.loader = GitLoaderTest(self.repo_url)
- self.storage = self.loader.storage
+class GitLoaderTest(TestCase, FullGitLoaderTests):
+ """Prepare a git directory repository to be loaded through a GitLoader.
+ This tests all git loader scenario.
- def load(self):
- return self.loader.load()
+ """
+
+ @pytest.fixture(autouse=True)
+ def init(self, swh_config, datadir, tmp_path):
+ super().setUp()
+ archive_name = "testrepo"
+ archive_path = os.path.join(datadir, f"{archive_name}.tgz")
+ tmp_path = str(tmp_path)
+ self.repo_url = prepare_repository_from_archive(
+ archive_path, archive_name, tmp_path=tmp_path
+ )
+ self.destination_path = os.path.join(tmp_path, archive_name)
+ self.loader = GitLoader(self.repo_url)
+ self.repo = dulwich.repo.Repo(self.destination_path)
diff --git a/tox.ini b/tox.ini
--- a/tox.ini
+++ b/tox.ini
@@ -9,6 +9,7 @@
# https://github.com/pypa/pip/issues/6239
# TODO: remove when this issue is fixed
swh.core[http] >= 0.0.61
+ swh.storage[testing]
pytest-cov
commands =
pytest --cov={envsitepackagesdir}/swh/loader/git \
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Dec 20 2024, 1:14 PM (11 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3222013
Attached To
D3428: Migrate loader tests to use pytest
Event Timeline
Log In to Comment