Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/git/tests/test_loader.py
# Copyright (C) 2018-2021 The Software Heritage developers | # Copyright (C) 2018-2022 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import datetime | |||||
from functools import partial | from functools import partial | ||||
from http.server import HTTPServer, SimpleHTTPRequestHandler | from http.server import HTTPServer, SimpleHTTPRequestHandler | ||||
import os | import os | ||||
import subprocess | import subprocess | ||||
import sys | |||||
from tempfile import SpooledTemporaryFile | from tempfile import SpooledTemporaryFile | ||||
from threading import Thread | from threading import Thread | ||||
from unittest.mock import MagicMock, call | from unittest.mock import MagicMock, call | ||||
from dulwich.errors import GitProtocolError, NotGitRepository, ObjectFormatException | from dulwich.errors import GitProtocolError, NotGitRepository, ObjectFormatException | ||||
from dulwich.porcelain import push | from dulwich.porcelain import push | ||||
import dulwich.repo | import dulwich.repo | ||||
import pytest | import pytest | ||||
from swh.loader.git import dumb | from swh.loader.git import dumb | ||||
from swh.loader.git.loader import GitLoader | from swh.loader.git.loader import GitLoader | ||||
from swh.loader.git.tests.test_from_disk import FullGitLoaderTests | from swh.loader.git.tests.test_from_disk import SNAPSHOT1, FullGitLoaderTests | ||||
from swh.loader.tests import ( | from swh.loader.tests import ( | ||||
assert_last_visit_matches, | assert_last_visit_matches, | ||||
get_stats, | get_stats, | ||||
prepare_repository_from_archive, | prepare_repository_from_archive, | ||||
) | ) | ||||
from swh.model.model import Origin | from swh.model.model import Origin, OriginVisit, OriginVisitStatus | ||||
class CommonGitLoaderNotFound: | class CommonGitLoaderNotFound: | ||||
@pytest.fixture(autouse=True) | @pytest.fixture(autouse=True) | ||||
def __inject_fixtures(self, mocker): | def __inject_fixtures(self, mocker): | ||||
"""Inject required fixtures in unittest.TestCase class""" | """Inject required fixtures in unittest.TestCase class""" | ||||
self.mocker = mocker | self.mocker = mocker | ||||
▲ Show 20 Lines • Show All 110 Lines • ▼ Show 20 Lines | def init(self, swh_storage, datadir, tmp_path, mocker): | ||||
self.loader = GitLoader( | self.loader = GitLoader( | ||||
MagicMock(wraps=swh_storage), | MagicMock(wraps=swh_storage), | ||||
self.repo_url, | self.repo_url, | ||||
lister_name="fake-lister", | lister_name="fake-lister", | ||||
lister_instance_name="", | lister_instance_name="", | ||||
) | ) | ||||
self.repo = dulwich.repo.Repo(self.destination_path) | self.repo = dulwich.repo.Repo(self.destination_path) | ||||
def test_load_incremental(self): | def test_no_previous_snapshot(self, mocker): | ||||
statsd_report = mocker.patch.object(self.loader.statsd, "_report") | |||||
res = self.loader.load() | res = self.loader.load() | ||||
assert res == {"status": "eventful"} | assert res == {"status": "eventful"} | ||||
self.fetcher_cls.assert_called_once_with( | self.fetcher_cls.assert_called_once_with( | ||||
credentials={}, | credentials={}, | ||||
lister_name="fake-lister", | lister_name="fake-lister", | ||||
lister_instance_name="", | lister_instance_name="", | ||||
origin=Origin(url=self.repo_url), | origin=Origin(url=self.repo_url), | ||||
Show All 12 Lines | def test_no_previous_snapshot(self, mocker): | ||||
call( | call( | ||||
f"base://{self.repo_url}", | f"base://{self.repo_url}", | ||||
allowed_statuses=None, | allowed_statuses=None, | ||||
require_snapshot=True, | require_snapshot=True, | ||||
type=None, | type=None, | ||||
), | ), | ||||
] | ] | ||||
# TODO: assert "incremental" is added to constant tags before these | |||||
# metrics are sent | |||||
assert [c for c in statsd_report.mock_calls if c[1][0].startswith("git")] == [ | |||||
call("git", "c", 1, {}, 1), | |||||
call("git_ignored_refs_percent", "h", 0.0, {}, 1), | |||||
call("git_known_refs_percent", "h", 0.0, {}, 1), | |||||
] | |||||
assert self.loader.statsd.constant_tags == { | |||||
"visit_type": "git", | |||||
"incremental": "no_previous_snapshot", | |||||
} | |||||
def test_load_incremental(self, mocker): | |||||
statsd_report = mocker.patch.object(self.loader.statsd, "_report") | |||||
snapshot_id = b"\x01" * 20 | |||||
now = datetime.datetime.now(tz=datetime.timezone.utc) | |||||
def ovgl(origin_url, allowed_statuses, require_snapshot, type): | |||||
if origin_url == f"base://{self.repo_url}": | |||||
return OriginVisit(origin=origin_url, visit=42, date=now, type="git") | |||||
else: | |||||
return None | |||||
self.loader.storage.origin_visit_get_latest.side_effect = ovgl | |||||
self.loader.storage.origin_visit_status_get_latest.return_value = ( | |||||
OriginVisitStatus( | |||||
origin=f"base://{self.repo_url}", | |||||
visit=42, | |||||
snapshot=snapshot_id, | |||||
date=now, | |||||
status="full", | |||||
) | |||||
) | |||||
self.loader.storage.snapshot_get_branches.return_value = { | |||||
"id": snapshot_id, | |||||
"branches": { | |||||
b"refs/heads/master": SNAPSHOT1.branches[b"refs/heads/master"] | |||||
}, | |||||
"next_branch": None, | |||||
} | |||||
res = self.loader.load() | |||||
assert res == {"status": "eventful"} | |||||
self.fetcher_cls.assert_called_once_with( | |||||
credentials={}, | |||||
lister_name="fake-lister", | |||||
lister_instance_name="", | |||||
origin=Origin(url=self.repo_url), | |||||
) | |||||
self.fetcher.get_parent_origins.assert_called_once_with() | |||||
# First tries the same origin | |||||
assert self.loader.storage.origin_visit_get_latest.mock_calls == [ | |||||
call( | |||||
self.repo_url, | |||||
allowed_statuses=None, | |||||
require_snapshot=True, | |||||
type=None, | |||||
), | |||||
# As it does not already have a snapshot, fall back to the parent origin | |||||
call( | |||||
f"base://{self.repo_url}", | |||||
allowed_statuses=None, | |||||
require_snapshot=True, | |||||
type=None, | |||||
), | |||||
] | |||||
# TODO: assert "incremental" is added to constant tags before these | |||||
# metrics are sent | |||||
assert [c for c in statsd_report.mock_calls if c[1][0].startswith("git")] == [ | |||||
call("git", "c", 1, {}, 1), | |||||
call("git_ignored_refs_percent", "h", 0.0, {}, 1), | |||||
call("git_known_refs_percent", "h", 0.25, {}, 1), | |||||
] | |||||
assert self.loader.statsd.constant_tags == { | |||||
"visit_type": "git", | |||||
"incremental": "from_parent_origin", | |||||
} | |||||
self.fetcher.reset_mock() | self.fetcher.reset_mock() | ||||
self.fetcher_cls.reset_mock() | self.fetcher_cls.reset_mock() | ||||
self.loader.storage.reset_mock() | if sys.version_info >= (3, 9, 0): | ||||
self.loader.storage.reset_mock(return_value=True, side_effect=True) | |||||
else: | |||||
# Reimplement https://github.com/python/cpython/commit/aef7dc89879d099dc704bd8037b8a7686fb72838 # noqa | |||||
# for old Python versions: | |||||
def reset_mock(m): | |||||
m.reset_mock(return_value=True, side_effect=True) | |||||
for child in m._mock_children.values(): | |||||
reset_mock(child) | |||||
reset_mock(self.loader.storage) | |||||
statsd_report.reset_mock() | |||||
# Load again | # Load again | ||||
res = self.loader.load() | res = self.loader.load() | ||||
assert res == {"status": "uneventful"} | assert res == {"status": "uneventful"} | ||||
self.fetcher_cls.assert_called_once_with( | self.fetcher_cls.assert_called_once_with( | ||||
credentials={}, | credentials={}, | ||||
lister_name="fake-lister", | lister_name="fake-lister", | ||||
lister_instance_name="", | lister_instance_name="", | ||||
origin=Origin(url=self.repo_url), | origin=Origin(url=self.repo_url), | ||||
) | ) | ||||
self.fetcher.get_parent_origins.assert_not_called() | self.fetcher.get_parent_origins.assert_not_called() | ||||
assert self.loader.storage.origin_visit_get_latest.mock_calls == [ | assert self.loader.storage.origin_visit_get_latest.mock_calls == [ | ||||
# Tries the same origin, and finds a snapshot | # Tries the same origin, and finds a snapshot | ||||
call( | call( | ||||
self.repo_url, | self.repo_url, | ||||
type=None, | |||||
allowed_statuses=None, | allowed_statuses=None, | ||||
require_snapshot=True, | require_snapshot=True, | ||||
type=None, | |||||
), | ), | ||||
# -> does not need to fall back to the parent | # -> does not need to fall back to the parent | ||||
] | ] | ||||
# TODO: assert "incremental" is added to constant tags before these | |||||
# metrics are sent | |||||
assert [c for c in statsd_report.mock_calls if c[1][0].startswith("git")] == [ | |||||
call("git", "c", 1, {}, 1), | |||||
call("git_ignored_refs_percent", "h", 0.0, {}, 1), | |||||
call("git_known_refs_percent", "h", 1.0, {}, 1), | |||||
] | |||||
assert self.loader.statsd.constant_tags == { | |||||
"visit_type": "git", | |||||
"incremental": "from_same_origin", | |||||
} | |||||
class DumbGitLoaderTestBase(FullGitLoaderTests): | class DumbGitLoaderTestBase(FullGitLoaderTests): | ||||
"""Prepare a git repository to be loaded using the HTTP dumb transfer protocol.""" | """Prepare a git repository to be loaded using the HTTP dumb transfer protocol.""" | ||||
@pytest.fixture(autouse=True) | @pytest.fixture(autouse=True) | ||||
def init(self, swh_storage, datadir, tmp_path): | def init(self, swh_storage, datadir, tmp_path): | ||||
# remove any proxy settings in order to successfully spawn a local HTTP server | # remove any proxy settings in order to successfully spawn a local HTTP server | ||||
http_proxy = os.environ.get("http_proxy") | http_proxy = os.environ.get("http_proxy") | ||||
▲ Show 20 Lines • Show All 189 Lines • Show Last 20 Lines |