Page MenuHomeSoftware Heritage

D3428.id12158.diff
No OneTemporary

D3428.id12158.diff

diff --git a/MANIFEST.in b/MANIFEST.in
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -2,6 +2,5 @@
include Makefile
include requirements*.txt
include version.txt
-recursive-include swh/loader/git/tests/data *.xz
-recursive-include swh/loader/git/tests/resources/ *
+recursive-include swh/loader/git/tests/data *
recursive-include swh py.typed
diff --git a/requirements-test.txt b/requirements-test.txt
--- a/requirements-test.txt
+++ b/requirements-test.txt
@@ -1,3 +1,4 @@
pytest
pytest-mock
swh.scheduler[testing]
+swh.storage[testing]
diff --git a/swh/loader/git/tests/__init__.py b/swh/loader/git/tests/__init__.py
--- a/swh/loader/git/tests/__init__.py
+++ b/swh/loader/git/tests/__init__.py
@@ -1,27 +1,23 @@
-# Copyright (C) 2018-2019 The Software Heritage developers
+# Copyright (C) 2018-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-TEST_LOADER_CONFIG = {
- "storage": {
- "cls": "pipeline",
- "steps": [
- {"cls": "filter"},
- {
- "cls": "buffer",
- "min_batch_size": {
- "content": 10,
- "content_bytes": 100 * 1024 * 1024,
- "directory": 10,
- "revision": 10,
- "release": 10,
- },
- },
- {"cls": "memory"},
- ],
- },
- "max_content_size": 100 * 1024 * 1024,
- "pack_size_bytes": 4 * 1024 * 1024 * 1024,
- "save_data": False,
-}
+import os
+import subprocess
+
+from typing import Optional
+
+
+def prepare_repository_from_archive(
+ archive_path: str,
+ filename: Optional[str] = None,
+ tmp_path: str = "/tmp",
+ uncompress_archive: bool = True,
+) -> str:
+ if uncompress_archive:
+ # uncompress folder/repositories/dump for the loader to ingest
+ subprocess.check_output(["tar", "xf", archive_path, "-C", tmp_path])
+ # build the origin url (or some derivative form)
+ _fname = filename if filename else os.path.basename(archive_path)
+ return f"file://{tmp_path}/{_fname}"
diff --git a/swh/loader/git/tests/conftest.py b/swh/loader/git/tests/conftest.py
--- a/swh/loader/git/tests/conftest.py
+++ b/swh/loader/git/tests/conftest.py
@@ -1,11 +1,53 @@
-# Copyright (C) 2018-2019 The Software Heritage developers
+# Copyright (C) 2018-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+import os
+import yaml
+
import pytest
+from typing import Any, Dict
+
from swh.scheduler.tests.conftest import * # noqa
+from swh.storage.tests.conftest import * # noqa
+
+
+@pytest.fixture
+def swh_loader_config(swh_storage_backend_config) -> Dict[str, Any]:
+ swh_storage_backend_config["journal_writer"] = {}
+ return {
+ "storage": {
+ "cls": "pipeline",
+ "steps": [
+ {"cls": "filter"},
+ {
+ "cls": "buffer",
+ "min_batch_size": {
+ "content": 10,
+ "content_bytes": 100 * 1024 * 1024,
+ "directory": 10,
+ "revision": 10,
+ "release": 10,
+ },
+ },
+ swh_storage_backend_config,
+ ],
+ },
+ "max_content_size": 100 * 1024 * 1024,
+ "pack_size_bytes": 4 * 1024 * 1024 * 1024,
+ "save_data": False,
+ }
+
+
+@pytest.fixture
+def swh_config(swh_loader_config, monkeypatch, tmp_path):
+ conffile = os.path.join(str(tmp_path), "loader.yml")
+ with open(conffile, "w") as f:
+ f.write(yaml.dump(swh_loader_config))
+ monkeypatch.setenv("SWH_CONFIG_FILENAME", conffile)
+ return conffile
@pytest.fixture(scope="session") # type: ignore # expected redefinition
diff --git a/swh/loader/git/tests/resources/testrepo.tgz b/swh/loader/git/tests/data/testrepo.tgz
rename from swh/loader/git/tests/resources/testrepo.tgz
rename to swh/loader/git/tests/data/testrepo.tgz
diff --git a/swh/loader/git/tests/test_from_disk.py b/swh/loader/git/tests/test_from_disk.py
--- a/swh/loader/git/tests/test_from_disk.py
+++ b/swh/loader/git/tests/test_from_disk.py
@@ -3,38 +3,24 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+import copy
import datetime
import os.path
import dulwich.repo
+import pytest
+
+from unittest import TestCase
from swh.model.model import Snapshot, SnapshotBranch, TargetType
from swh.model.hashutil import hash_to_bytes
-
-from swh.loader.core.tests import BaseLoaderTest
from swh.loader.tests.common import assert_last_visit_matches
+from swh.loader.git.from_disk import GitLoaderFromDisk
+from swh.loader.git.from_disk import GitLoaderFromArchive
+from swh.loader.package.tests.common import check_snapshot, get_stats
-from swh.loader.git.from_disk import GitLoaderFromDisk as OrigGitLoaderFromDisk
-from swh.loader.git.from_disk import GitLoaderFromArchive as OrigGitLoaderFromArchive
-
-from . import TEST_LOADER_CONFIG
-
-
-class GitLoaderFromArchive(OrigGitLoaderFromArchive):
- def project_name_from_archive(self, archive_path):
- # We don't want the project name to be 'resources'.
- return "testrepo"
+from swh.loader.git.tests import prepare_repository_from_archive
- def parse_config_file(self, *args, **kwargs):
- return TEST_LOADER_CONFIG
-
-
-CONTENT1 = {
- "33ab5639bfd8e7b95eb1d8d0b87781d4ffea4d5d", # README v1
- "349c4ff7d21f1ec0eda26f3d9284c293e3425417", # README v2
- "799c11e348d39f1704022b8354502e2f81f3c037", # file1.txt
- "4bdb40dfd6ec75cb730e678b5d7786e30170c5fb", # file2.txt
-}
SNAPSHOT_ID = "a23699280a82a043f8c0994cf1631b568f716f95"
@@ -105,119 +91,77 @@
}
-class BaseGitLoaderFromDiskTest(BaseLoaderTest):
- def setUp(self, archive_name, uncompress_archive, filename="testrepo"):
- super().setUp(
- archive_name=archive_name,
- filename=filename,
- prefix_tmp_folder_name="swh.loader.git.",
- start_path=os.path.dirname(__file__),
- uncompress_archive=uncompress_archive,
- )
-
-
-class GitLoaderFromDiskTest(OrigGitLoaderFromDisk):
- def parse_config_file(self, *args, **kwargs):
- return TEST_LOADER_CONFIG
-
-
-class BaseDirGitLoaderFromDiskTest(BaseGitLoaderFromDiskTest):
- """Mixin base loader test to prepare the git
- repository to uncompress, load and test the results.
-
- This sets up
-
- """
-
- def setUp(self):
- super().setUp("testrepo.tgz", uncompress_archive=True)
- self.loader = GitLoaderFromDiskTest(
- url=self.repo_url,
- visit_date=datetime.datetime(
- 2016, 5, 3, 15, 16, 32, tzinfo=datetime.timezone.utc
- ),
- directory=self.destination_path,
- )
- self.storage = self.loader.storage
- self.repo = dulwich.repo.Repo(self.destination_path)
-
- def load(self):
- return self.loader.load()
-
-
-class BaseGitLoaderFromArchiveTest(BaseGitLoaderFromDiskTest):
- """Mixin base loader test to prepare the git
- repository to uncompress, load and test the results.
-
- This sets up
-
- """
-
- def setUp(self):
- super().setUp("testrepo.tgz", uncompress_archive=False)
- self.loader = GitLoaderFromArchive(
- url=self.repo_url,
- visit_date=datetime.datetime(
- 2016, 5, 3, 15, 16, 32, tzinfo=datetime.timezone.utc
- ),
- archive_path=self.destination_path,
- )
- self.storage = self.loader.storage
-
- def load(self):
- return self.loader.load()
-
-
-class GitLoaderFromDiskTests:
+class CommonGitLoaderTests:
"""Common tests for all git loaders."""
def test_load(self):
"""Loads a simple repository (made available by `setUp()`),
and checks everything was added in the storage."""
- res = self.load()
- self.assertEqual(res["status"], "eventful", res)
-
- self.assertContentsContain(CONTENT1)
- self.assertCountDirectories(7)
- self.assertCountReleases(0) # FIXME: should be 2 after T2059
- self.assertCountRevisions(7)
- self.assertCountSnapshots(1)
-
- self.assertRevisionsContain(REVISIONS1)
+ res = self.loader.load()
- self.assertSnapshotEqual(SNAPSHOT1)
-
- self.assertEqual(self.loader.load_status(), {"status": "eventful"})
- self.assertEqual(self.loader.visit_status(), "full")
+ assert res == {"status": "eventful"}
assert_last_visit_matches(
- self.storage,
+ self.loader.storage,
self.repo_url,
status="full",
type="git",
snapshot=hash_to_bytes(SNAPSHOT1["id"]),
)
+ stats = get_stats(self.loader.storage)
+ assert stats == {
+ "content": 4,
+ "directory": 7,
+ "origin": 1,
+ "origin_visit": 1,
+ "person": 1,
+ "release": 0,
+ "revision": 7,
+ "skipped_content": 0,
+ "snapshot": 1,
+ }
+
+ check_snapshot(SNAPSHOT1, self.loader.storage)
+
def test_load_unchanged(self):
"""Checks loading a repository a second time does not add
any extra data."""
- res = self.load()
- self.assertEqual(res["status"], "eventful")
+ res = self.loader.load()
+ assert res == {"status": "eventful"}
assert_last_visit_matches(
- self.storage,
+ self.loader.storage,
self.repo_url,
status="full",
type="git",
snapshot=hash_to_bytes(SNAPSHOT1["id"]),
)
- res = self.load()
- self.assertEqual(res["status"], "uneventful")
- self.assertCountSnapshots(1)
+ stats0 = get_stats(self.loader.storage)
+ assert stats0 == {
+ "content": 4,
+ "directory": 7,
+ "origin": 1,
+ "origin_visit": 1,
+ "person": 1,
+ "release": 0,
+ "revision": 7,
+ "skipped_content": 0,
+ "snapshot": 1,
+ }
+
+ res = self.loader.load()
+ assert res == {"status": "uneventful"}
+ stats1 = get_stats(self.loader.storage)
+ expected_stats = copy.deepcopy(stats0)
+ expected_stats["origin_visit"] += 1
+ assert stats1 == expected_stats
+
+ check_snapshot(SNAPSHOT1, self.loader.storage)
assert_last_visit_matches(
- self.storage,
+ self.loader.storage,
self.repo_url,
status="full",
type="git",
@@ -225,17 +169,32 @@
)
-class DirGitLoaderTest(BaseDirGitLoaderFromDiskTest, GitLoaderFromDiskTests):
- """Tests for the GitLoaderFromDisk. Includes the common ones, and
- add others that only work with a local dir."""
+class FullGitLoaderTests(CommonGitLoaderTests):
+ """Tests for GitLoader (from disk or not). Includes the common ones, and
+ add others that only work with a local dir.
+
+ """
def test_load_changed(self):
"""Loads a repository, makes some changes by adding files, commits,
and merges, load it again, and check the storage contains everything
it should."""
# Initial load
- res = self.load()
- self.assertEqual(res["status"], "eventful", res)
+ res = self.loader.load()
+ assert res == {"status": "eventful"}
+
+ stats0 = get_stats(self.loader.storage)
+ assert stats0 == {
+ "content": 4,
+ "directory": 7,
+ "origin": 1,
+ "origin_visit": 1,
+ "person": 1,
+ "release": 0,
+ "revision": 7,
+ "skipped_content": 0,
+ "snapshot": 1,
+ }
# Load with a new file + revision
with open(os.path.join(self.destination_path, "hello.py"), "a") as fd:
@@ -251,27 +210,30 @@
assert new_revision not in revisions
revisions[new_revision] = new_dir
- res = self.load()
- self.assertEqual(res["status"], "eventful")
+ res = self.loader.load()
+ assert res == {"status": "eventful"}
- self.assertCountContents(4 + 1)
- self.assertCountDirectories(7 + 1)
- self.assertCountReleases(0) # FIXME: should be 2 after T2059
- self.assertCountRevisions(7 + 1)
- self.assertCountSnapshots(1 + 1)
+ stats1 = get_stats(self.loader.storage)
+ expected_stats = copy.deepcopy(stats0)
+ # did one new visit
+ expected_stats["origin_visit"] += 1
+ # with one more of the following objects
+ expected_stats["person"] += 1
+ expected_stats["content"] += 1
+ expected_stats["directory"] += 1
+ expected_stats["revision"] += 1
+ # concluding into 1 new snapshot
+ expected_stats["snapshot"] += 1
- self.assertRevisionsContain(revisions)
-
- self.assertEqual(self.loader.load_status(), {"status": "eventful"})
- self.assertEqual(self.loader.visit_status(), "full")
+ assert stats1 == expected_stats
visit_status = assert_last_visit_matches(
- self.storage, self.repo_url, status="full", type="git"
+ self.loader.storage, self.repo_url, status="full", type="git"
)
- self.assertIsNotNone(visit_status.snapshot)
+ assert visit_status.snapshot is not None
snapshot_id = visit_status.snapshot
- snapshot = self.storage.snapshot_get(snapshot_id)
+ snapshot = self.loader.storage.snapshot_get(snapshot_id)
branches = snapshot["branches"]
assert branches[b"HEAD"] == {
"target": b"refs/heads/master",
@@ -304,29 +266,30 @@
assert merge_commit.decode() not in revisions
revisions[merge_commit.decode()] = merged_tree.id.decode()
- res = self.load()
- self.assertEqual(res["status"], "eventful")
-
- self.assertCountContents(4 + 1)
- self.assertCountDirectories(7 + 2)
- self.assertCountReleases(0) # FIXME: should be 2 after T2059
- self.assertCountRevisions(7 + 2)
- self.assertCountSnapshots(1 + 1 + 1)
+ res = self.loader.load()
+ assert res == {"status": "eventful"}
- self.assertRevisionsContain(revisions)
+ stats2 = get_stats(self.loader.storage)
+ expected_stats = copy.deepcopy(stats1)
+ # one more visit
+ expected_stats["origin_visit"] += 1
+ # with 1 new directory and revision
+ expected_stats["directory"] += 1
+ expected_stats["revision"] += 1
+ # concluding into 1 new snapshot
+ expected_stats["snapshot"] += 1
- self.assertEqual(self.loader.load_status(), {"status": "eventful"})
- self.assertEqual(self.loader.visit_status(), "full")
+ assert stats2 == expected_stats
visit_status = assert_last_visit_matches(
- self.storage, self.repo_url, status="full", type="git"
+ self.loader.storage, self.repo_url, status="full", type="git"
)
- self.assertIsNotNone(visit_status.snapshot)
+ assert visit_status.snapshot is not None
merge_snapshot_id = visit_status.snapshot
assert merge_snapshot_id != snapshot_id
- merge_snapshot = self.storage.snapshot_get(merge_snapshot_id)
+ merge_snapshot = self.loader.storage.snapshot_get(merge_snapshot_id)
merge_branches = merge_snapshot["branches"]
assert merge_branches[b"HEAD"] == {
"target": b"refs/heads/master",
@@ -372,14 +335,11 @@
expected_snapshot = Snapshot(branches=branches)
# Load the modified repository
- res = self.load()
- assert res["status"] == "eventful"
-
- assert self.loader.load_status() == {"status": "eventful"}
- assert self.loader.visit_status() == "full"
+ res = self.loader.load()
+ assert res == {"status": "eventful"}
assert_last_visit_matches(
- self.storage,
+ self.loader.storage,
self.repo_url,
status="full",
type="git",
@@ -390,22 +350,16 @@
with open(os.path.join(self.destination_path, ".git/HEAD"), "wb") as f:
f.write(b"ref: refs/heads/dangling-branch\n")
- res = self.load()
- self.assertEqual(res["status"], "eventful", res)
-
- self.assertContentsContain(CONTENT1)
- self.assertCountDirectories(7)
- self.assertCountReleases(0) # FIXME: should be 2 after T2059
- self.assertCountRevisions(7)
- self.assertCountSnapshots(1)
+ res = self.loader.load()
+ assert res == {"status": "eventful"}
visit_status = assert_last_visit_matches(
- self.storage, self.repo_url, status="full", type="git"
+ self.loader.storage, self.repo_url, status="full", type="git"
)
snapshot_id = visit_status.snapshot
assert snapshot_id is not None
- snapshot = self.storage.snapshot_get(snapshot_id)
+ snapshot = self.loader.storage.snapshot_get(snapshot_id)
branches = snapshot["branches"]
assert branches[b"HEAD"] == {
@@ -414,9 +368,57 @@
}
assert branches[b"refs/heads/dangling-branch"] is None
+ stats = get_stats(self.loader.storage)
+ assert stats == {
+ "content": 4,
+ "directory": 7,
+ "origin": 1,
+ "origin_visit": 1,
+ "person": 1,
+ "release": 0,
+ "revision": 7,
+ "skipped_content": 0,
+ "snapshot": 1,
+ }
+
-class GitLoaderFromArchiveTest(BaseGitLoaderFromArchiveTest, GitLoaderFromDiskTests):
- """Tests for GitLoaderFromArchive. Imports the common ones
- from GitLoaderTests."""
+class GitLoaderFromDiskTest(TestCase, FullGitLoaderTests):
+ """Prepare a git directory repository to be loaded through a GitLoaderFromDisk.
+ This tests all git loader scenario.
- pass
+ """
+
+ @pytest.fixture(autouse=True)
+ def init(self, swh_config, datadir, tmp_path):
+ archive_name = "testrepo"
+ archive_path = os.path.join(datadir, f"{archive_name}.tgz")
+ tmp_path = str(tmp_path)
+ self.repo_url = prepare_repository_from_archive(
+ archive_path, archive_name, tmp_path=tmp_path
+ )
+ self.destination_path = os.path.join(tmp_path, archive_name)
+ self.loader = GitLoaderFromDisk(
+ url=self.repo_url,
+ visit_date=datetime.datetime(
+ 2016, 5, 3, 15, 16, 32, tzinfo=datetime.timezone.utc
+ ),
+ directory=self.destination_path,
+ )
+ self.repo = dulwich.repo.Repo(self.destination_path)
+
+
+class GitLoaderFromArchiveTest(TestCase, CommonGitLoaderTests):
+ """Tests for GitLoaderFromArchive. Only tests common scenario."""
+
+ @pytest.fixture(autouse=True)
+ def init(self, swh_config, datadir, tmp_path):
+ archive_name = "testrepo"
+ archive_path = os.path.join(datadir, f"{archive_name}.tgz")
+ self.repo_url = archive_path
+ self.loader = GitLoaderFromArchive(
+ url=self.repo_url,
+ archive_path=archive_path,
+ visit_date=datetime.datetime(
+ 2016, 5, 3, 15, 16, 32, tzinfo=datetime.timezone.utc
+ ),
+ )
diff --git a/swh/loader/git/tests/test_loader.py b/swh/loader/git/tests/test_loader.py
--- a/swh/loader/git/tests/test_loader.py
+++ b/swh/loader/git/tests/test_loader.py
@@ -3,24 +3,34 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-from swh.loader.git.loader import GitLoader
-from swh.loader.git.tests.test_from_disk import DirGitLoaderTest
+import os
-from . import TEST_LOADER_CONFIG
+import pytest
+import dulwich.repo
+from unittest import TestCase
-class GitLoaderTest(GitLoader):
- def parse_config_file(self, *args, **kwargs):
- return {**super().parse_config_file(*args, **kwargs), **TEST_LOADER_CONFIG}
+from swh.loader.git.loader import GitLoader
+from swh.loader.git.tests.test_from_disk import FullGitLoaderTests
+from swh.loader.git.tests import prepare_repository_from_archive
-class TestGitLoader(DirGitLoaderTest):
- """Same tests as for the GitLoaderFromDisk, but running on GitLoader."""
- def setUp(self):
- super().setUp()
- self.loader = GitLoaderTest(self.repo_url)
- self.storage = self.loader.storage
+class GitLoaderTest(TestCase, FullGitLoaderTests):
+ """Prepare a git directory repository to be loaded through a GitLoader.
+ This tests all git loader scenario.
- def load(self):
- return self.loader.load()
+ """
+
+ @pytest.fixture(autouse=True)
+ def init(self, swh_config, datadir, tmp_path):
+ super().setUp()
+ archive_name = "testrepo"
+ archive_path = os.path.join(datadir, f"{archive_name}.tgz")
+ tmp_path = str(tmp_path)
+ self.repo_url = prepare_repository_from_archive(
+ archive_path, archive_name, tmp_path=tmp_path
+ )
+ self.destination_path = os.path.join(tmp_path, archive_name)
+ self.loader = GitLoader(self.repo_url)
+ self.repo = dulwich.repo.Repo(self.destination_path)
diff --git a/tox.ini b/tox.ini
--- a/tox.ini
+++ b/tox.ini
@@ -9,6 +9,7 @@
# https://github.com/pypa/pip/issues/6239
# TODO: remove when this issue is fixed
swh.core[http] >= 0.0.61
+ swh.storage[testing]
pytest-cov
commands =
pytest --cov={envsitepackagesdir}/swh/loader/git \

File Metadata

Mime Type
text/plain
Expires
Dec 20 2024, 1:14 PM (11 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3222013

Event Timeline