Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/git/tests/test_loader.py
| # Copyright (C) 2018-2021 The Software Heritage developers | # Copyright (C) 2018-2022 The Software Heritage developers | ||||
| # See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
| # License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
| # See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
| import datetime | |||||
| from functools import partial | from functools import partial | ||||
| from http.server import HTTPServer, SimpleHTTPRequestHandler | from http.server import HTTPServer, SimpleHTTPRequestHandler | ||||
| import os | import os | ||||
| import subprocess | import subprocess | ||||
| from tempfile import SpooledTemporaryFile | from tempfile import SpooledTemporaryFile | ||||
| from threading import Thread | from threading import Thread | ||||
| from unittest.mock import MagicMock, call | from unittest.mock import MagicMock, call | ||||
| from dulwich.errors import GitProtocolError, NotGitRepository, ObjectFormatException | from dulwich.errors import GitProtocolError, NotGitRepository, ObjectFormatException | ||||
| from dulwich.porcelain import push | from dulwich.porcelain import push | ||||
| import dulwich.repo | import dulwich.repo | ||||
| import pytest | import pytest | ||||
| from swh.loader.git import dumb | from swh.loader.git import dumb | ||||
| from swh.loader.git.loader import GitLoader | from swh.loader.git.loader import GitLoader | ||||
| from swh.loader.git.tests.test_from_disk import FullGitLoaderTests | from swh.loader.git.tests.test_from_disk import SNAPSHOT1, FullGitLoaderTests | ||||
| from swh.loader.tests import ( | from swh.loader.tests import ( | ||||
| assert_last_visit_matches, | assert_last_visit_matches, | ||||
| get_stats, | get_stats, | ||||
| prepare_repository_from_archive, | prepare_repository_from_archive, | ||||
| ) | ) | ||||
| from swh.model.model import Origin | from swh.model.model import Origin, OriginVisit, OriginVisitStatus | ||||
| class CommonGitLoaderNotFound: | class CommonGitLoaderNotFound: | ||||
| @pytest.fixture(autouse=True) | @pytest.fixture(autouse=True) | ||||
| def __inject_fixtures(self, mocker): | def __inject_fixtures(self, mocker): | ||||
| """Inject required fixtures in unittest.TestCase class""" | """Inject required fixtures in unittest.TestCase class""" | ||||
| self.mocker = mocker | self.mocker = mocker | ||||
| ▲ Show 20 Lines • Show All 110 Lines • ▼ Show 20 Lines | def init(self, swh_storage, datadir, tmp_path, mocker): | ||||
| self.loader = GitLoader( | self.loader = GitLoader( | ||||
| MagicMock(wraps=swh_storage), | MagicMock(wraps=swh_storage), | ||||
| self.repo_url, | self.repo_url, | ||||
| lister_name="fake-lister", | lister_name="fake-lister", | ||||
| lister_instance_name="", | lister_instance_name="", | ||||
| ) | ) | ||||
| self.repo = dulwich.repo.Repo(self.destination_path) | self.repo = dulwich.repo.Repo(self.destination_path) | ||||
| def test_load_incremental(self): | def test_no_previous_snapshot(self, mocker): | ||||
| statsd_report = mocker.patch("swh.core.statsd.statsd._report") | |||||
| res = self.loader.load() | res = self.loader.load() | ||||
| assert res == {"status": "eventful"} | assert res == {"status": "eventful"} | ||||
| self.fetcher_cls.assert_called_once_with( | self.fetcher_cls.assert_called_once_with( | ||||
| credentials={}, | credentials={}, | ||||
| lister_name="fake-lister", | lister_name="fake-lister", | ||||
| lister_instance_name="", | lister_instance_name="", | ||||
| origin=Origin(url=self.repo_url), | origin=Origin(url=self.repo_url), | ||||
| Show All 12 Lines | def test_no_previous_snapshot(self, mocker): | ||||
| call( | call( | ||||
| f"base://{self.repo_url}", | f"base://{self.repo_url}", | ||||
| allowed_statuses=None, | allowed_statuses=None, | ||||
| require_snapshot=True, | require_snapshot=True, | ||||
| type=None, | type=None, | ||||
| ), | ), | ||||
| ] | ] | ||||
| p = "swh.loader.git" | |||||
| assert [c for c in statsd_report.mock_calls if c[1][0].startswith(p)] == [ | |||||
| call(f"{p}.loader.incremental.no_previous_snapshot", "c", 1, None, 1), | |||||
| call( | |||||
| f"{p}.loader.ratio_ignored_refs", | |||||
| "h", | |||||
| 0.0, | |||||
| {"incremental": "no_previous_snapshot"}, | |||||
| 1, | |||||
| ), | |||||
| call( | |||||
| f"{p}.loader.ratio_known_refs", | |||||
| "h", | |||||
| 0.0, | |||||
| {"incremental": "no_previous_snapshot"}, | |||||
| 1, | |||||
| ), | |||||
| ] | |||||
| def test_load_incremental(self, mocker): | |||||
| statsd_report = mocker.patch("swh.core.statsd.statsd._report") | |||||
| snapshot_id = b"\x01" * 20 | |||||
| now = datetime.datetime.now(tz=datetime.timezone.utc) | |||||
| def ovgl(origin_url, allowed_statuses, require_snapshot, type): | |||||
| if origin_url == f"base://{self.repo_url}": | |||||
| return OriginVisit(origin=origin_url, visit=42, date=now, type="git") | |||||
| else: | |||||
| return None | |||||
| self.loader.storage.origin_visit_get_latest.side_effect = ovgl | |||||
| self.loader.storage.origin_visit_status_get_latest.return_value = ( | |||||
| OriginVisitStatus( | |||||
| origin=f"base://{self.repo_url}", | |||||
| visit=42, | |||||
| snapshot=snapshot_id, | |||||
| date=now, | |||||
| status="full", | |||||
| ) | |||||
| ) | |||||
| self.loader.storage.snapshot_get_branches.return_value = { | |||||
| "id": snapshot_id, | |||||
| "branches": { | |||||
| b"refs/heads/master": SNAPSHOT1.branches[b"refs/heads/master"] | |||||
| }, | |||||
| "next_branch": None, | |||||
| } | |||||
| res = self.loader.load() | |||||
| assert res == {"status": "eventful"} | |||||
| self.fetcher_cls.assert_called_once_with( | |||||
| credentials={}, | |||||
| lister_name="fake-lister", | |||||
| lister_instance_name="", | |||||
| origin=Origin(url=self.repo_url), | |||||
| ) | |||||
| self.fetcher.get_parent_origins.assert_called_once_with() | |||||
| # First tries the same origin | |||||
| assert self.loader.storage.origin_visit_get_latest.mock_calls == [ | |||||
| call( | |||||
| self.repo_url, | |||||
| allowed_statuses=None, | |||||
| require_snapshot=True, | |||||
| type=None, | |||||
| ), | |||||
| # As it does not already have a snapshot, fall back to the parent origin | |||||
| call( | |||||
| f"base://{self.repo_url}", | |||||
| allowed_statuses=None, | |||||
| require_snapshot=True, | |||||
| type=None, | |||||
| ), | |||||
| ] | |||||
| p = "swh.loader.git" | |||||
| assert [c for c in statsd_report.mock_calls if c[1][0].startswith(p)] == [ | |||||
| call(f"{p}.loader.incremental.from_parent_origin", "c", 1, None, 1), | |||||
| call( | |||||
| f"{p}.loader.ratio_ignored_refs", | |||||
| "h", | |||||
| 0.0, | |||||
| {"incremental": "from_parent_origin"}, | |||||
| 1, | |||||
| ), | |||||
| call( | |||||
| f"{p}.loader.ratio_known_refs", | |||||
| "h", | |||||
| 0.25, | |||||
| {"incremental": "from_parent_origin"}, | |||||
| 1, | |||||
| ), | |||||
| ] | |||||
| self.fetcher.reset_mock() | self.fetcher.reset_mock() | ||||
| self.fetcher_cls.reset_mock() | self.fetcher_cls.reset_mock() | ||||
| self.loader.storage.reset_mock() | self.loader.storage.reset_mock(return_value=True, side_effect=True) | ||||
| statsd_report.reset_mock() | |||||
| # Load again | # Load again | ||||
| res = self.loader.load() | res = self.loader.load() | ||||
| assert res == {"status": "uneventful"} | assert res == {"status": "uneventful"} | ||||
| self.fetcher_cls.assert_called_once_with( | self.fetcher_cls.assert_called_once_with( | ||||
| credentials={}, | credentials={}, | ||||
| lister_name="fake-lister", | lister_name="fake-lister", | ||||
| lister_instance_name="", | lister_instance_name="", | ||||
| origin=Origin(url=self.repo_url), | origin=Origin(url=self.repo_url), | ||||
| ) | ) | ||||
| self.fetcher.get_parent_origins.assert_not_called() | self.fetcher.get_parent_origins.assert_not_called() | ||||
| assert self.loader.storage.origin_visit_get_latest.mock_calls == [ | assert self.loader.storage.origin_visit_get_latest.mock_calls == [ | ||||
| # Tries the same origin, and finds a snapshot | # Tries the same origin, and finds a snapshot | ||||
| call( | call( | ||||
| self.repo_url, | self.repo_url, | ||||
| type=None, | |||||
| allowed_statuses=None, | allowed_statuses=None, | ||||
| require_snapshot=True, | require_snapshot=True, | ||||
| type=None, | |||||
| ), | ), | ||||
| # -> does not need to fall back to the parent | # -> does not need to fall back to the parent | ||||
| ] | ] | ||||
| p = "swh.loader.git" | |||||
| assert [c for c in statsd_report.mock_calls if c.args[0].startswith(p)] == [ | |||||
| call(f"{p}.loader.incremental.from_same_origin", "c", 1, None, 1), | |||||
| call( | |||||
| f"{p}.loader.ratio_ignored_refs", | |||||
| "h", | |||||
| 0.0, | |||||
| {"incremental": "from_same_origin"}, | |||||
| 1, | |||||
| ), | |||||
| call( | |||||
| f"{p}.loader.ratio_known_refs", | |||||
| "h", | |||||
| 1.0, | |||||
| {"incremental": "from_same_origin"}, | |||||
| 1, | |||||
| ), | |||||
| ] | |||||
| class DumbGitLoaderTestBase(FullGitLoaderTests): | class DumbGitLoaderTestBase(FullGitLoaderTests): | ||||
| """Prepare a git repository to be loaded using the HTTP dumb transfer protocol.""" | """Prepare a git repository to be loaded using the HTTP dumb transfer protocol.""" | ||||
| @pytest.fixture(autouse=True) | @pytest.fixture(autouse=True) | ||||
| def init(self, swh_storage, datadir, tmp_path): | def init(self, swh_storage, datadir, tmp_path): | ||||
| # remove any proxy settings in order to successfully spawn a local HTTP server | # remove any proxy settings in order to successfully spawn a local HTTP server | ||||
| http_proxy = os.environ.get("http_proxy") | http_proxy = os.environ.get("http_proxy") | ||||
| ▲ Show 20 Lines • Show All 189 Lines • Show Last 20 Lines | |||||