diff --git a/swh/loader/svn/tests/test_loader.py b/swh/loader/svn/tests/test_loader.py --- a/swh/loader/svn/tests/test_loader.py +++ b/swh/loader/svn/tests/test_loader.py @@ -2,10 +2,10 @@ # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information - from enum import Enum from io import BytesIO import os +import shutil import subprocess from typing import Any, Dict, List @@ -127,8 +127,13 @@ 160006" message. """ + archive_ori_dump = os.path.join(datadir, "penguinsdbtools2018.dump.gz") + archive_dump_dir = os.path.join(tmp_path, "dump") + os.mkdir(archive_dump_dir) + archive_dump = os.path.join(archive_dump_dir, "penguinsdbtools2018.dump.gz") + # loader now drops the dump as soon as it's mounted so we need to make a copy first + shutil.copyfile(archive_ori_dump, archive_dump) - archive_dump = os.path.join(datadir, "penguinsdbtools2018.dump.gz") loading_path = str(tmp_path / "loading") os.mkdir(loading_path) diff --git a/swh/loader/svn/tests/test_utils.py b/swh/loader/svn/tests/test_utils.py --- a/swh/loader/svn/tests/test_utils.py +++ b/swh/loader/svn/tests/test_utils.py @@ -1,10 +1,11 @@ -# Copyright (C) 2016-2020 The Software Heritage developers +# Copyright (C) 2016-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os import pty +import shutil from subprocess import Popen from swh.loader.svn import utils @@ -41,3 +42,65 @@ default_ts = Timestamp(seconds=0, microseconds=0) assert default_ts == utils.strdate_to_timestamp("") assert default_ts == utils.strdate_to_timestamp(None) + + +def test_init_svn_repo_from_dump(datadir, tmp_path): + """Mounting svn repository out of a dump is ok""" + dump_name = "penguinsdbtools2018.dump.gz" + dump_path = os.path.join(datadir, dump_name) + + tmp_repo, repo_path = utils.init_svn_repo_from_dump( + dump_path, gzip=True, cleanup_dump=False + ) + + assert os.path.exists(dump_path), "Dump path should still exists" + assert os.path.exists(repo_path), "Repository should still exists" + + +def test_init_svn_repo_from_dump_and_cleanup(datadir, tmp_path): + """Mounting svn repository out of a dump is ok""" + dump_name = "penguinsdbtools2018.dump.gz" + dump_ori_path = os.path.join(datadir, dump_name) + + dump_path = os.path.join(tmp_path, dump_name) + shutil.copyfile(dump_ori_path, dump_path) + + assert os.path.exists(dump_path) + assert os.path.exists(dump_ori_path) + + tmp_repo, repo_path = utils.init_svn_repo_from_dump(dump_path, gzip=True) + + assert not os.path.exists(dump_path), "Dump path should no longer exists" + assert os.path.exists(repo_path), "Repository should exists" + assert os.path.exists(dump_ori_path), "Original dump path should still exists" + + +def test_init_svn_repo_from_archive_dump(datadir, tmp_path): + """Mounting svn repository out of an archive dump is ok""" + dump_name = "penguinsdbtools2018.dump.gz" + dump_path = os.path.join(datadir, dump_name) + + tmp_repo, repo_path = utils.init_svn_repo_from_archive_dump( + dump_path, cleanup_dump=False + ) + + assert os.path.exists(dump_path), "Dump path should still exists" + assert os.path.exists(repo_path), "Repository should exists" + + +def test_init_svn_repo_from_archive_dump_and_cleanup(datadir, tmp_path): + """Mounting svn repository out of a dump is ok""" + dump_name = "penguinsdbtools2018.dump.gz" + dump_ori_path = os.path.join(datadir, dump_name) + + dump_path = os.path.join(tmp_path, dump_name) + shutil.copyfile(dump_ori_path, dump_path) + + assert os.path.exists(dump_path) + assert os.path.exists(dump_ori_path) + + tmp_repo, repo_path = utils.init_svn_repo_from_archive_dump(dump_path) + + assert not os.path.exists(dump_path), "Dump path should no longer exists" + assert os.path.exists(repo_path), "Repository should exists" + assert os.path.exists(dump_ori_path), "Original dump path should still exists" diff --git a/swh/loader/svn/utils.py b/swh/loader/svn/utils.py --- a/swh/loader/svn/utils.py +++ b/swh/loader/svn/utils.py @@ -1,4 +1,4 @@ -# Copyright (C) 2016-2020 The Software Heritage developers +# Copyright (C) 2016-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -8,6 +8,7 @@ import shutil from subprocess import PIPE, Popen, call import tempfile +from typing import Tuple from dateutil import parser @@ -80,20 +81,33 @@ def init_svn_repo_from_dump( - dump_path, prefix=None, suffix=None, root_dir="/tmp", gzip=False -): - """Given a path to a svn dump. - Initialize an svn repository with the content of said dump. + dump_path: str, + prefix: Optional[str] = None, + suffix: Optional[str] = None, + root_dir: str = "/tmp", + gzip: bool = False, + cleanup_dump: bool = True, +) -> Tuple[str, str]: + """Given a path to a svn dump, initialize an svn repository with the content of said + dump. - Returns: - A tuple: - - temporary folder (str): containing the mounted repository - - repo_path (str): path to the mounted repository inside the - temporary folder + Args: + dump_path: The dump to the path + prefix: optional prefix file name for the working directory + suffix: optional suffix file name for the working directory + root_dir: the root directory where the working directory is created + gzip: Boolean to determine whether we treat the dump as compressed or not. + cleanup_dump: Whether we want this function call to clean up the dump at the end + of the repository initialization. Raises: - ValueError in case of failure to run the command to uncompress - and load the dump. + ValueError in case of failure to run the command to uncompress and load the + dump. + + Returns: + A tuple: + - temporary folder: containing the mounted repository + - repo_path: path to the mounted repository inside the temporary folder """ project_name = os.path.basename(os.path.dirname(dump_path)) @@ -128,25 +142,48 @@ except Exception as e: shutil.rmtree(temp_dir) raise e + finally: + if cleanup_dump: + # At this time, the temporary svn repository is mounted from the dump or the + # svn repository failed to mount. Either way, we can drop the dump. + os.remove(dump_path) + assert not os.path.exists(dump_path) def init_svn_repo_from_archive_dump( - archive_path, prefix=None, suffix=None, root_dir="/tmp" -): - """Given a path to an archive containing an svn dump. - Initialize an svn repository with the content of said dump. - - Returns: - A tuple: - - temporary folder (str): containing the mounted repository - - repo_path (str): path to the mounted repository inside the - temporary folder + archive_path: str, + prefix: Optional[str] = None, + suffix: Optional[str] = None, + root_dir: str = "/tmp", + cleanup_dump: bool = True, +) -> Tuple[str, str]: + """Given a path to an archive containing an svn dump, initializes an svn repository + with the content of the uncompressed dump. + Args: + archive_path: The archive svn dump path + prefix: optional prefix file name for the working directory + suffix: optional suffix file name for the working directory + root_dir: the root directory where the working directory is created + gzip: Boolean to determine whether we treat the dump as compressed or not. + cleanup_dump: Whether we want this function call to clean up the dump at the end + of the repository initialization. Raises: ValueError in case of failure to run the command to uncompress and load the dump. + Returns: + A tuple: + - temporary folder: containing the mounted repository + - repo_path: path to the mounted repository inside the + temporary folder + """ return init_svn_repo_from_dump( - archive_path, prefix=prefix, suffix=suffix, root_dir=root_dir, gzip=True + archive_path, + prefix=prefix, + suffix=suffix, + root_dir=root_dir, + gzip=True, + cleanup_dump=cleanup_dump, )