Page MenuHomeSoftware Heritage

D6622.diff
No OneTemporary

D6622.diff

diff --git a/swh/loader/svn/tests/test_loader.py b/swh/loader/svn/tests/test_loader.py
--- a/swh/loader/svn/tests/test_loader.py
+++ b/swh/loader/svn/tests/test_loader.py
@@ -2,10 +2,10 @@
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-
from enum import Enum
from io import BytesIO
import os
+import shutil
import subprocess
from typing import Any, Dict, List
@@ -127,8 +127,13 @@
160006" message.
"""
+ archive_ori_dump = os.path.join(datadir, "penguinsdbtools2018.dump.gz")
+ archive_dump_dir = os.path.join(tmp_path, "dump")
+ os.mkdir(archive_dump_dir)
+ archive_dump = os.path.join(archive_dump_dir, "penguinsdbtools2018.dump.gz")
+ # loader now drops the dump as soon as it's mounted so we need to make a copy first
+ shutil.copyfile(archive_ori_dump, archive_dump)
- archive_dump = os.path.join(datadir, "penguinsdbtools2018.dump.gz")
loading_path = str(tmp_path / "loading")
os.mkdir(loading_path)
diff --git a/swh/loader/svn/tests/test_utils.py b/swh/loader/svn/tests/test_utils.py
--- a/swh/loader/svn/tests/test_utils.py
+++ b/swh/loader/svn/tests/test_utils.py
@@ -1,10 +1,12 @@
-# Copyright (C) 2016-2020 The Software Heritage developers
+# Copyright (C) 2016-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+import logging
import os
import pty
+import shutil
from subprocess import Popen
from swh.loader.svn import utils
@@ -41,3 +43,93 @@
default_ts = Timestamp(seconds=0, microseconds=0)
assert default_ts == utils.strdate_to_timestamp("")
assert default_ts == utils.strdate_to_timestamp(None)
+
+
+def test_init_svn_repo_from_dump(datadir, tmp_path):
+ """Mounting svn repository out of a dump is ok"""
+ dump_name = "penguinsdbtools2018.dump.gz"
+ dump_path = os.path.join(datadir, dump_name)
+
+ tmp_repo, repo_path = utils.init_svn_repo_from_dump(
+ dump_path, gzip=True, cleanup_dump=False
+ )
+
+ assert os.path.exists(dump_path), "Dump path should still exists"
+ assert os.path.exists(repo_path), "Repository should exists"
+
+
+def test_init_svn_repo_from_dump_and_cleanup(datadir, tmp_path):
+ """Mounting svn repository with a dump cleanup after is ok"""
+ dump_name = "penguinsdbtools2018.dump.gz"
+ dump_ori_path = os.path.join(datadir, dump_name)
+
+ dump_path = os.path.join(tmp_path, dump_name)
+ shutil.copyfile(dump_ori_path, dump_path)
+
+ assert os.path.exists(dump_path)
+ assert os.path.exists(dump_ori_path)
+
+ tmp_repo, repo_path = utils.init_svn_repo_from_dump(dump_path, gzip=True)
+
+ assert not os.path.exists(dump_path), "Dump path should no longer exists"
+ assert os.path.exists(repo_path), "Repository should exists"
+ assert os.path.exists(dump_ori_path), "Original dump path should still exists"
+
+
+def test_init_svn_repo_from_dump_and_cleanup_already_done(
+ datadir, tmp_path, mocker, caplog
+):
+ """Mounting svn repository out of a dump is ok"""
+ caplog.set_level(logging.INFO, "swh.loader.svn.utils")
+
+ dump_name = "penguinsdbtools2018.dump.gz"
+ dump_ori_path = os.path.join(datadir, dump_name)
+
+ mock_remove = mocker.patch("os.remove")
+ mock_remove.side_effect = FileNotFoundError
+
+ dump_path = os.path.join(tmp_path, dump_name)
+ shutil.copyfile(dump_ori_path, dump_path)
+
+ assert os.path.exists(dump_path)
+ assert os.path.exists(dump_ori_path)
+
+ tmp_repo, repo_path = utils.init_svn_repo_from_dump(dump_path, gzip=True)
+
+ assert os.path.exists(repo_path), "Repository should exists"
+ assert os.path.exists(dump_ori_path), "Original dump path should still exists"
+
+ assert len(caplog.record_tuples) == 1
+ assert "Failure to remove" in caplog.record_tuples[0][2]
+ assert mock_remove.called
+
+
+def test_init_svn_repo_from_archive_dump(datadir, tmp_path):
+ """Mounting svn repository out of an archive dump is ok"""
+ dump_name = "penguinsdbtools2018.dump.gz"
+ dump_path = os.path.join(datadir, dump_name)
+
+ tmp_repo, repo_path = utils.init_svn_repo_from_archive_dump(
+ dump_path, cleanup_dump=False
+ )
+
+ assert os.path.exists(dump_path), "Dump path should still exists"
+ assert os.path.exists(repo_path), "Repository should exists"
+
+
+def test_init_svn_repo_from_archive_dump_and_cleanup(datadir, tmp_path):
+ """Mounting svn repository out of a dump is ok"""
+ dump_name = "penguinsdbtools2018.dump.gz"
+ dump_ori_path = os.path.join(datadir, dump_name)
+
+ dump_path = os.path.join(tmp_path, dump_name)
+ shutil.copyfile(dump_ori_path, dump_path)
+
+ assert os.path.exists(dump_path)
+ assert os.path.exists(dump_ori_path)
+
+ tmp_repo, repo_path = utils.init_svn_repo_from_archive_dump(dump_path)
+
+ assert not os.path.exists(dump_path), "Dump path should no longer exists"
+ assert os.path.exists(repo_path), "Repository should exists"
+ assert os.path.exists(dump_ori_path), "Original dump path should still exists"
diff --git a/swh/loader/svn/utils.py b/swh/loader/svn/utils.py
--- a/swh/loader/svn/utils.py
+++ b/swh/loader/svn/utils.py
@@ -1,18 +1,22 @@
-# Copyright (C) 2016-2020 The Software Heritage developers
+# Copyright (C) 2016-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import errno
+import logging
import os
import shutil
from subprocess import PIPE, Popen, call
import tempfile
+from typing import Tuple
from dateutil import parser
from swh.model.model import Optional, Timestamp
+logger = logging.getLogger(__name__)
+
def strdate_to_timestamp(strdate: Optional[str]) -> Timestamp:
"""Convert a string date to an int timestamp.
@@ -80,20 +84,33 @@
def init_svn_repo_from_dump(
- dump_path, prefix=None, suffix=None, root_dir="/tmp", gzip=False
-):
- """Given a path to a svn dump.
- Initialize an svn repository with the content of said dump.
+ dump_path: str,
+ prefix: Optional[str] = None,
+ suffix: Optional[str] = None,
+ root_dir: str = "/tmp",
+ gzip: bool = False,
+ cleanup_dump: bool = True,
+) -> Tuple[str, str]:
+ """Given a path to a svn dump, initialize an svn repository with the content of said
+ dump.
- Returns:
- A tuple:
- - temporary folder (str): containing the mounted repository
- - repo_path (str): path to the mounted repository inside the
- temporary folder
+ Args:
+ dump_path: The dump to the path
+ prefix: optional prefix file name for the working directory
+ suffix: optional suffix file name for the working directory
+ root_dir: the root directory where the working directory is created
+ gzip: Boolean to determine whether we treat the dump as compressed or not.
+ cleanup_dump: Whether we want this function call to clean up the dump at the end
+ of the repository initialization.
Raises:
- ValueError in case of failure to run the command to uncompress
- and load the dump.
+ ValueError in case of failure to run the command to uncompress and load the
+ dump.
+
+ Returns:
+ A tuple:
+ - temporary folder: containing the mounted repository
+ - repo_path: path to the mounted repository inside the temporary folder
"""
project_name = os.path.basename(os.path.dirname(dump_path))
@@ -128,25 +145,51 @@
except Exception as e:
shutil.rmtree(temp_dir)
raise e
+ finally:
+ if cleanup_dump:
+ try:
+ # At this time, the temporary svn repository is mounted from the dump or
+ # the svn repository failed to mount. Either way, we can drop the dump.
+ os.remove(dump_path)
+ assert not os.path.exists(dump_path)
+ except OSError as e:
+ logger.warn("Failure to remove the dump %s: %s", dump_path, e)
def init_svn_repo_from_archive_dump(
- archive_path, prefix=None, suffix=None, root_dir="/tmp"
-):
- """Given a path to an archive containing an svn dump.
- Initialize an svn repository with the content of said dump.
-
- Returns:
- A tuple:
- - temporary folder (str): containing the mounted repository
- - repo_path (str): path to the mounted repository inside the
- temporary folder
+ archive_path: str,
+ prefix: Optional[str] = None,
+ suffix: Optional[str] = None,
+ root_dir: str = "/tmp",
+ cleanup_dump: bool = True,
+) -> Tuple[str, str]:
+ """Given a path to an archive containing an svn dump, initializes an svn repository
+ with the content of the uncompressed dump.
+ Args:
+ archive_path: The archive svn dump path
+ prefix: optional prefix file name for the working directory
+ suffix: optional suffix file name for the working directory
+ root_dir: the root directory where the working directory is created
+ gzip: Boolean to determine whether we treat the dump as compressed or not.
+ cleanup_dump: Whether we want this function call to clean up the dump at the end
+ of the repository initialization.
Raises:
ValueError in case of failure to run the command to uncompress
and load the dump.
+ Returns:
+ A tuple:
+ - temporary folder: containing the mounted repository
+ - repo_path: path to the mounted repository inside the
+ temporary folder
+
"""
return init_svn_repo_from_dump(
- archive_path, prefix=prefix, suffix=suffix, root_dir=root_dir, gzip=True
+ archive_path,
+ prefix=prefix,
+ suffix=suffix,
+ root_dir=root_dir,
+ gzip=True,
+ cleanup_dump=cleanup_dump,
)

File Metadata

Mime Type
text/plain
Expires
Thu, Jul 3, 3:50 PM (2 w, 12 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3229529

Event Timeline