diff --git a/swh/loader/svn/tests/test_loader.py b/swh/loader/svn/tests/test_loader.py --- a/swh/loader/svn/tests/test_loader.py +++ b/swh/loader/svn/tests/test_loader.py @@ -2,10 +2,10 @@ # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information - from enum import Enum from io import BytesIO import os +import shutil import subprocess from typing import Any, Dict, List @@ -127,8 +127,13 @@ 160006" message. """ + archive_ori_dump = os.path.join(datadir, "penguinsdbtools2018.dump.gz") + archive_dump_dir = os.path.join(tmp_path, "dump") + os.mkdir(archive_dump_dir) + archive_dump = os.path.join(archive_dump_dir, "penguinsdbtools2018.dump.gz") + # loader now drops the dump as soon as it's mounted so we need to make a copy first + shutil.copyfile(archive_ori_dump, archive_dump) - archive_dump = os.path.join(datadir, "penguinsdbtools2018.dump.gz") loading_path = str(tmp_path / "loading") os.mkdir(loading_path) diff --git a/swh/loader/svn/tests/test_utils.py b/swh/loader/svn/tests/test_utils.py --- a/swh/loader/svn/tests/test_utils.py +++ b/swh/loader/svn/tests/test_utils.py @@ -1,10 +1,12 @@ -# Copyright (C) 2016-2020 The Software Heritage developers +# Copyright (C) 2016-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +import logging import os import pty +import shutil from subprocess import Popen from swh.loader.svn import utils @@ -41,3 +43,93 @@ default_ts = Timestamp(seconds=0, microseconds=0) assert default_ts == utils.strdate_to_timestamp("") assert default_ts == utils.strdate_to_timestamp(None) + + +def test_init_svn_repo_from_dump(datadir, tmp_path): + """Mounting svn repository out of a dump is ok""" + dump_name = "penguinsdbtools2018.dump.gz" + dump_path = os.path.join(datadir, dump_name) + + tmp_repo, repo_path = utils.init_svn_repo_from_dump( + dump_path, gzip=True, cleanup_dump=False + ) + + assert os.path.exists(dump_path), "Dump path should still exists" + assert os.path.exists(repo_path), "Repository should exists" + + +def test_init_svn_repo_from_dump_and_cleanup(datadir, tmp_path): + """Mounting svn repository with a dump cleanup after is ok""" + dump_name = "penguinsdbtools2018.dump.gz" + dump_ori_path = os.path.join(datadir, dump_name) + + dump_path = os.path.join(tmp_path, dump_name) + shutil.copyfile(dump_ori_path, dump_path) + + assert os.path.exists(dump_path) + assert os.path.exists(dump_ori_path) + + tmp_repo, repo_path = utils.init_svn_repo_from_dump(dump_path, gzip=True) + + assert not os.path.exists(dump_path), "Dump path should no longer exists" + assert os.path.exists(repo_path), "Repository should exists" + assert os.path.exists(dump_ori_path), "Original dump path should still exists" + + +def test_init_svn_repo_from_dump_and_cleanup_already_done( + datadir, tmp_path, mocker, caplog +): + """Mounting svn repository out of a dump is ok""" + caplog.set_level(logging.INFO, "swh.loader.svn.utils") + + dump_name = "penguinsdbtools2018.dump.gz" + dump_ori_path = os.path.join(datadir, dump_name) + + mock_remove = mocker.patch("os.remove") + mock_remove.side_effect = FileNotFoundError + + dump_path = os.path.join(tmp_path, dump_name) + shutil.copyfile(dump_ori_path, dump_path) + + assert os.path.exists(dump_path) + assert os.path.exists(dump_ori_path) + + tmp_repo, repo_path = utils.init_svn_repo_from_dump(dump_path, gzip=True) + + assert os.path.exists(repo_path), "Repository should exists" + assert os.path.exists(dump_ori_path), "Original dump path should still exists" + + assert len(caplog.record_tuples) == 1 + assert "Failure to remove" in caplog.record_tuples[0][2] + assert mock_remove.called + + +def test_init_svn_repo_from_archive_dump(datadir, tmp_path): + """Mounting svn repository out of an archive dump is ok""" + dump_name = "penguinsdbtools2018.dump.gz" + dump_path = os.path.join(datadir, dump_name) + + tmp_repo, repo_path = utils.init_svn_repo_from_archive_dump( + dump_path, cleanup_dump=False + ) + + assert os.path.exists(dump_path), "Dump path should still exists" + assert os.path.exists(repo_path), "Repository should exists" + + +def test_init_svn_repo_from_archive_dump_and_cleanup(datadir, tmp_path): + """Mounting svn repository out of a dump is ok""" + dump_name = "penguinsdbtools2018.dump.gz" + dump_ori_path = os.path.join(datadir, dump_name) + + dump_path = os.path.join(tmp_path, dump_name) + shutil.copyfile(dump_ori_path, dump_path) + + assert os.path.exists(dump_path) + assert os.path.exists(dump_ori_path) + + tmp_repo, repo_path = utils.init_svn_repo_from_archive_dump(dump_path) + + assert not os.path.exists(dump_path), "Dump path should no longer exists" + assert os.path.exists(repo_path), "Repository should exists" + assert os.path.exists(dump_ori_path), "Original dump path should still exists" diff --git a/swh/loader/svn/utils.py b/swh/loader/svn/utils.py --- a/swh/loader/svn/utils.py +++ b/swh/loader/svn/utils.py @@ -1,18 +1,22 @@ -# Copyright (C) 2016-2020 The Software Heritage developers +# Copyright (C) 2016-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import errno +import logging import os import shutil from subprocess import PIPE, Popen, call import tempfile +from typing import Tuple from dateutil import parser from swh.model.model import Optional, Timestamp +logger = logging.getLogger(__name__) + def strdate_to_timestamp(strdate: Optional[str]) -> Timestamp: """Convert a string date to an int timestamp. @@ -80,20 +84,33 @@ def init_svn_repo_from_dump( - dump_path, prefix=None, suffix=None, root_dir="/tmp", gzip=False -): - """Given a path to a svn dump. - Initialize an svn repository with the content of said dump. + dump_path: str, + prefix: Optional[str] = None, + suffix: Optional[str] = None, + root_dir: str = "/tmp", + gzip: bool = False, + cleanup_dump: bool = True, +) -> Tuple[str, str]: + """Given a path to a svn dump, initialize an svn repository with the content of said + dump. - Returns: - A tuple: - - temporary folder (str): containing the mounted repository - - repo_path (str): path to the mounted repository inside the - temporary folder + Args: + dump_path: The dump to the path + prefix: optional prefix file name for the working directory + suffix: optional suffix file name for the working directory + root_dir: the root directory where the working directory is created + gzip: Boolean to determine whether we treat the dump as compressed or not. + cleanup_dump: Whether we want this function call to clean up the dump at the end + of the repository initialization. Raises: - ValueError in case of failure to run the command to uncompress - and load the dump. + ValueError in case of failure to run the command to uncompress and load the + dump. + + Returns: + A tuple: + - temporary folder: containing the mounted repository + - repo_path: path to the mounted repository inside the temporary folder """ project_name = os.path.basename(os.path.dirname(dump_path)) @@ -128,25 +145,51 @@ except Exception as e: shutil.rmtree(temp_dir) raise e + finally: + if cleanup_dump: + try: + # At this time, the temporary svn repository is mounted from the dump or + # the svn repository failed to mount. Either way, we can drop the dump. + os.remove(dump_path) + assert not os.path.exists(dump_path) + except OSError as e: + logger.warn("Failure to remove the dump %s: %s", dump_path, e) def init_svn_repo_from_archive_dump( - archive_path, prefix=None, suffix=None, root_dir="/tmp" -): - """Given a path to an archive containing an svn dump. - Initialize an svn repository with the content of said dump. - - Returns: - A tuple: - - temporary folder (str): containing the mounted repository - - repo_path (str): path to the mounted repository inside the - temporary folder + archive_path: str, + prefix: Optional[str] = None, + suffix: Optional[str] = None, + root_dir: str = "/tmp", + cleanup_dump: bool = True, +) -> Tuple[str, str]: + """Given a path to an archive containing an svn dump, initializes an svn repository + with the content of the uncompressed dump. + Args: + archive_path: The archive svn dump path + prefix: optional prefix file name for the working directory + suffix: optional suffix file name for the working directory + root_dir: the root directory where the working directory is created + gzip: Boolean to determine whether we treat the dump as compressed or not. + cleanup_dump: Whether we want this function call to clean up the dump at the end + of the repository initialization. Raises: ValueError in case of failure to run the command to uncompress and load the dump. + Returns: + A tuple: + - temporary folder: containing the mounted repository + - repo_path: path to the mounted repository inside the + temporary folder + """ return init_svn_repo_from_dump( - archive_path, prefix=prefix, suffix=suffix, root_dir=root_dir, gzip=True + archive_path, + prefix=prefix, + suffix=suffix, + root_dir=root_dir, + gzip=True, + cleanup_dump=cleanup_dump, )