diff --git a/swh/loader/core/tests/test_utils.py b/swh/loader/core/tests/test_utils.py index cde3061..d1954ae 100644 --- a/swh/loader/core/tests/test_utils.py +++ b/swh/loader/core/tests/test_utils.py @@ -1,95 +1,147 @@ # Copyright (C) 2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os +import signal +from time import sleep from unittest.mock import patch -from swh.loader.core.utils import clean_dangling_folders +import pytest + +from swh.loader.core.utils import ( + CloneFailure, + CloneTimeout, + clean_dangling_folders, + clone_with_timeout, +) def prepare_arborescence_from(tmpdir, folder_names): """Prepare arborescence tree with folders Args: tmpdir (Either[LocalPath, str]): Root temporary directory folder_names (List[str]): List of folder names Returns: List of folders """ dangling_folders = [] for dname in folder_names: d = str(tmpdir / dname) os.mkdir(d) dangling_folders.append(d) return str(tmpdir), dangling_folders def assert_dirs(actual_dirs, expected_dirs): """Assert that the directory actual and expected match """ for d in actual_dirs: assert d in expected_dirs assert len(actual_dirs) == len(expected_dirs) def test_clean_dangling_folders_0(tmpdir): """Folder does not exist, do nothing""" r = clean_dangling_folders("/path/does/not/exist", "unused-pattern") assert r is None @patch("swh.loader.core.utils.psutil.pid_exists", return_value=False) def test_clean_dangling_folders_1(mock_pid_exists, tmpdir): """Folder which matches pattern with dead pid are cleaned up """ rootpath, dangling = prepare_arborescence_from( tmpdir, ["something", "swh.loader.svn-4321.noisynoise",] ) clean_dangling_folders(rootpath, "swh.loader.svn") actual_dirs = os.listdir(rootpath) mock_pid_exists.assert_called_once_with(4321) assert_dirs(actual_dirs, ["something"]) @patch("swh.loader.core.utils.psutil.pid_exists", return_value=True) def test_clean_dangling_folders_2(mock_pid_exists, tmpdir): """Folder which matches pattern with live pid are skipped """ rootpath, dangling = prepare_arborescence_from( tmpdir, ["something", "swh.loader.hg-1234.noisynoise",] ) clean_dangling_folders(rootpath, "swh.loader.hg") actual_dirs = os.listdir(rootpath) mock_pid_exists.assert_called_once_with(1234) assert_dirs(actual_dirs, ["something", "swh.loader.hg-1234.noisynoise",]) @patch("swh.loader.core.utils.psutil.pid_exists", return_value=False) @patch( "swh.loader.core.utils.shutil.rmtree", side_effect=ValueError("Could not remove for reasons"), ) def test_clean_dangling_folders_3(mock_rmtree, mock_pid_exists, tmpdir): """Error in trying to clean dangling folders are skipped """ path1 = "thingy" path2 = "swh.loader.git-1468.noisy" rootpath, dangling = prepare_arborescence_from(tmpdir, [path1, path2,]) clean_dangling_folders(rootpath, "swh.loader.git") actual_dirs = os.listdir(rootpath) mock_pid_exists.assert_called_once_with(1468) mock_rmtree.assert_called_once_with(os.path.join(rootpath, path2)) assert_dirs(actual_dirs, [path2, path1]) + + +def test_clone_with_timeout_no_error_no_timeout(): + def succeed(): + """This does nothing to simulate a successful clone""" + + clone_with_timeout("foo", "bar", succeed, timeout=0.5) + + +def test_clone_with_timeout_no_error_timeout(): + def slow(): + """This lasts for more than the timeout""" + sleep(1) + + with pytest.raises(CloneTimeout): + clone_with_timeout("foo", "bar", slow, timeout=0.5) + + +def test_clone_with_timeout_error(): + def raise_something(): + raise RuntimeError("panic!") + + with pytest.raises(CloneFailure): + clone_with_timeout("foo", "bar", raise_something, timeout=0.5) + + +def test_clone_with_timeout_sigkill(): + """This also tests that the traceback is useful""" + src = "https://www.mercurial-scm.org/repo/hello" + dest = "/dev/null" + timeout = 0.5 + sleepy_time = 100 * timeout + assert sleepy_time > timeout + + def ignores_sigterm(*args, **kwargs): + # ignore SIGTERM to force sigkill + signal.signal(signal.SIGTERM, lambda signum, frame: None) + sleep(sleepy_time) # we make sure we exceed the timeout + + with pytest.raises(CloneTimeout) as e: + clone_with_timeout(src, dest, ignores_sigterm, timeout) + killed = True + assert e.value.args == (src, timeout, killed) diff --git a/swh/loader/core/utils.py b/swh/loader/core/utils.py index de026c2..632bef3 100644 --- a/swh/loader/core/utils.py +++ b/swh/loader/core/utils.py @@ -1,45 +1,105 @@ -# Copyright (C) 2018-2021 The Software Heritage developers +# Copyright (C) 2018-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +import io import os import shutil +import signal +import time +import traceback +from typing import Callable +from billiard import Process, Queue # type: ignore import psutil def clean_dangling_folders(dirpath: str, pattern_check: str, log=None) -> None: """Clean up potential dangling temporary working folder rooted at `dirpath`. Those folders must match a dedicated pattern and not belonging to a live pid. Args: dirpath: Path to check for dangling files pattern_check: A dedicated pattern to check on first level directory (e.g `swh.loader.mercurial.`, `swh.loader.svn.`) log (Logger): Optional logger """ if not os.path.exists(dirpath): return for filename in os.listdir(dirpath): path_to_cleanup = os.path.join(dirpath, filename) try: # pattern: `swh.loader.{loader-type}-pid.{noise}` if ( pattern_check not in filename or "-" not in filename ): # silently ignore unknown patterns continue _, pid_ = filename.split("-") pid = int(pid_.split(".")[0]) if psutil.pid_exists(pid): if log: log.debug("PID %s is live, skipping", pid) continue # could be removed concurrently, so check before removal if os.path.exists(path_to_cleanup): shutil.rmtree(path_to_cleanup) except Exception as e: if log: log.warn("Fail to clean dangling path %s: %s", path_to_cleanup, e) + + +class CloneTimeout(Exception): + pass + + +class CloneFailure(Exception): + pass + + +def _clone_task(clone_func: Callable[[], None], errors: Queue) -> None: + try: + clone_func() + except Exception as e: + exc_buffer = io.StringIO() + traceback.print_exc(file=exc_buffer) + errors.put_nowait(exc_buffer.getvalue()) + raise e + + +def clone_with_timeout( + src: str, dest: str, clone_func: Callable[[], None], timeout: float +) -> None: + """Clone a repository with timeout. + + Args: + src: clone source + dest: clone destination + clone_func: callable that does the actual cloning + timeout: timeout in seconds + """ + errors: Queue = Queue() + process = Process(target=_clone_task, args=(clone_func, errors)) + process.start() + process.join(timeout) + + if process.is_alive(): + process.terminate() + # Give it literally a second (in successive steps of 0.1 second), + # then kill it. + # Can't use `process.join(1)` here, billiard appears to be bugged + # https://github.com/celery/billiard/issues/270 + killed = False + for _ in range(10): + time.sleep(0.1) + if not process.is_alive(): + break + else: + killed = True + os.kill(process.pid, signal.SIGKILL) + raise CloneTimeout(src, timeout, killed) + + if not errors.empty(): + raise CloneFailure(src, dest, errors.get())