Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9346238
D6622.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
10 KB
Subscribers
None
D6622.diff
View Options
diff --git a/swh/loader/svn/tests/test_loader.py b/swh/loader/svn/tests/test_loader.py
--- a/swh/loader/svn/tests/test_loader.py
+++ b/swh/loader/svn/tests/test_loader.py
@@ -2,10 +2,10 @@
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-
from enum import Enum
from io import BytesIO
import os
+import shutil
import subprocess
from typing import Any, Dict, List
@@ -127,8 +127,13 @@
160006" message.
"""
+ archive_ori_dump = os.path.join(datadir, "penguinsdbtools2018.dump.gz")
+ archive_dump_dir = os.path.join(tmp_path, "dump")
+ os.mkdir(archive_dump_dir)
+ archive_dump = os.path.join(archive_dump_dir, "penguinsdbtools2018.dump.gz")
+ # loader now drops the dump as soon as it's mounted so we need to make a copy first
+ shutil.copyfile(archive_ori_dump, archive_dump)
- archive_dump = os.path.join(datadir, "penguinsdbtools2018.dump.gz")
loading_path = str(tmp_path / "loading")
os.mkdir(loading_path)
diff --git a/swh/loader/svn/tests/test_utils.py b/swh/loader/svn/tests/test_utils.py
--- a/swh/loader/svn/tests/test_utils.py
+++ b/swh/loader/svn/tests/test_utils.py
@@ -1,10 +1,12 @@
-# Copyright (C) 2016-2020 The Software Heritage developers
+# Copyright (C) 2016-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+import logging
import os
import pty
+import shutil
from subprocess import Popen
from swh.loader.svn import utils
@@ -41,3 +43,93 @@
default_ts = Timestamp(seconds=0, microseconds=0)
assert default_ts == utils.strdate_to_timestamp("")
assert default_ts == utils.strdate_to_timestamp(None)
+
+
+def test_init_svn_repo_from_dump(datadir, tmp_path):
+ """Mounting svn repository out of a dump is ok"""
+ dump_name = "penguinsdbtools2018.dump.gz"
+ dump_path = os.path.join(datadir, dump_name)
+
+ tmp_repo, repo_path = utils.init_svn_repo_from_dump(
+ dump_path, gzip=True, cleanup_dump=False
+ )
+
+ assert os.path.exists(dump_path), "Dump path should still exists"
+ assert os.path.exists(repo_path), "Repository should exists"
+
+
+def test_init_svn_repo_from_dump_and_cleanup(datadir, tmp_path):
+ """Mounting svn repository with a dump cleanup after is ok"""
+ dump_name = "penguinsdbtools2018.dump.gz"
+ dump_ori_path = os.path.join(datadir, dump_name)
+
+ dump_path = os.path.join(tmp_path, dump_name)
+ shutil.copyfile(dump_ori_path, dump_path)
+
+ assert os.path.exists(dump_path)
+ assert os.path.exists(dump_ori_path)
+
+ tmp_repo, repo_path = utils.init_svn_repo_from_dump(dump_path, gzip=True)
+
+ assert not os.path.exists(dump_path), "Dump path should no longer exists"
+ assert os.path.exists(repo_path), "Repository should exists"
+ assert os.path.exists(dump_ori_path), "Original dump path should still exists"
+
+
+def test_init_svn_repo_from_dump_and_cleanup_already_done(
+ datadir, tmp_path, mocker, caplog
+):
+ """Mounting svn repository out of a dump is ok"""
+ caplog.set_level(logging.INFO, "swh.loader.svn.utils")
+
+ dump_name = "penguinsdbtools2018.dump.gz"
+ dump_ori_path = os.path.join(datadir, dump_name)
+
+ mock_remove = mocker.patch("os.remove")
+ mock_remove.side_effect = FileNotFoundError
+
+ dump_path = os.path.join(tmp_path, dump_name)
+ shutil.copyfile(dump_ori_path, dump_path)
+
+ assert os.path.exists(dump_path)
+ assert os.path.exists(dump_ori_path)
+
+ tmp_repo, repo_path = utils.init_svn_repo_from_dump(dump_path, gzip=True)
+
+ assert os.path.exists(repo_path), "Repository should exists"
+ assert os.path.exists(dump_ori_path), "Original dump path should still exists"
+
+ assert len(caplog.record_tuples) == 1
+ assert "Failure to remove" in caplog.record_tuples[0][2]
+ assert mock_remove.called
+
+
+def test_init_svn_repo_from_archive_dump(datadir, tmp_path):
+ """Mounting svn repository out of an archive dump is ok"""
+ dump_name = "penguinsdbtools2018.dump.gz"
+ dump_path = os.path.join(datadir, dump_name)
+
+ tmp_repo, repo_path = utils.init_svn_repo_from_archive_dump(
+ dump_path, cleanup_dump=False
+ )
+
+ assert os.path.exists(dump_path), "Dump path should still exists"
+ assert os.path.exists(repo_path), "Repository should exists"
+
+
+def test_init_svn_repo_from_archive_dump_and_cleanup(datadir, tmp_path):
+ """Mounting svn repository out of a dump is ok"""
+ dump_name = "penguinsdbtools2018.dump.gz"
+ dump_ori_path = os.path.join(datadir, dump_name)
+
+ dump_path = os.path.join(tmp_path, dump_name)
+ shutil.copyfile(dump_ori_path, dump_path)
+
+ assert os.path.exists(dump_path)
+ assert os.path.exists(dump_ori_path)
+
+ tmp_repo, repo_path = utils.init_svn_repo_from_archive_dump(dump_path)
+
+ assert not os.path.exists(dump_path), "Dump path should no longer exists"
+ assert os.path.exists(repo_path), "Repository should exists"
+ assert os.path.exists(dump_ori_path), "Original dump path should still exists"
diff --git a/swh/loader/svn/utils.py b/swh/loader/svn/utils.py
--- a/swh/loader/svn/utils.py
+++ b/swh/loader/svn/utils.py
@@ -1,18 +1,22 @@
-# Copyright (C) 2016-2020 The Software Heritage developers
+# Copyright (C) 2016-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import errno
+import logging
import os
import shutil
from subprocess import PIPE, Popen, call
import tempfile
+from typing import Tuple
from dateutil import parser
from swh.model.model import Optional, Timestamp
+logger = logging.getLogger(__name__)
+
def strdate_to_timestamp(strdate: Optional[str]) -> Timestamp:
"""Convert a string date to an int timestamp.
@@ -80,20 +84,33 @@
def init_svn_repo_from_dump(
- dump_path, prefix=None, suffix=None, root_dir="/tmp", gzip=False
-):
- """Given a path to a svn dump.
- Initialize an svn repository with the content of said dump.
+ dump_path: str,
+ prefix: Optional[str] = None,
+ suffix: Optional[str] = None,
+ root_dir: str = "/tmp",
+ gzip: bool = False,
+ cleanup_dump: bool = True,
+) -> Tuple[str, str]:
+ """Given a path to a svn dump, initialize an svn repository with the content of said
+ dump.
- Returns:
- A tuple:
- - temporary folder (str): containing the mounted repository
- - repo_path (str): path to the mounted repository inside the
- temporary folder
+ Args:
+ dump_path: The dump to the path
+ prefix: optional prefix file name for the working directory
+ suffix: optional suffix file name for the working directory
+ root_dir: the root directory where the working directory is created
+ gzip: Boolean to determine whether we treat the dump as compressed or not.
+ cleanup_dump: Whether we want this function call to clean up the dump at the end
+ of the repository initialization.
Raises:
- ValueError in case of failure to run the command to uncompress
- and load the dump.
+ ValueError in case of failure to run the command to uncompress and load the
+ dump.
+
+ Returns:
+ A tuple:
+ - temporary folder: containing the mounted repository
+ - repo_path: path to the mounted repository inside the temporary folder
"""
project_name = os.path.basename(os.path.dirname(dump_path))
@@ -128,25 +145,51 @@
except Exception as e:
shutil.rmtree(temp_dir)
raise e
+ finally:
+ if cleanup_dump:
+ try:
+ # At this time, the temporary svn repository is mounted from the dump or
+ # the svn repository failed to mount. Either way, we can drop the dump.
+ os.remove(dump_path)
+ assert not os.path.exists(dump_path)
+ except OSError as e:
+ logger.warn("Failure to remove the dump %s: %s", dump_path, e)
def init_svn_repo_from_archive_dump(
- archive_path, prefix=None, suffix=None, root_dir="/tmp"
-):
- """Given a path to an archive containing an svn dump.
- Initialize an svn repository with the content of said dump.
-
- Returns:
- A tuple:
- - temporary folder (str): containing the mounted repository
- - repo_path (str): path to the mounted repository inside the
- temporary folder
+ archive_path: str,
+ prefix: Optional[str] = None,
+ suffix: Optional[str] = None,
+ root_dir: str = "/tmp",
+ cleanup_dump: bool = True,
+) -> Tuple[str, str]:
+ """Given a path to an archive containing an svn dump, initializes an svn repository
+ with the content of the uncompressed dump.
+ Args:
+ archive_path: The archive svn dump path
+ prefix: optional prefix file name for the working directory
+ suffix: optional suffix file name for the working directory
+ root_dir: the root directory where the working directory is created
+ gzip: Boolean to determine whether we treat the dump as compressed or not.
+ cleanup_dump: Whether we want this function call to clean up the dump at the end
+ of the repository initialization.
Raises:
ValueError in case of failure to run the command to uncompress
and load the dump.
+ Returns:
+ A tuple:
+ - temporary folder: containing the mounted repository
+ - repo_path: path to the mounted repository inside the
+ temporary folder
+
"""
return init_svn_repo_from_dump(
- archive_path, prefix=prefix, suffix=suffix, root_dir=root_dir, gzip=True
+ archive_path,
+ prefix=prefix,
+ suffix=suffix,
+ root_dir=root_dir,
+ gzip=True,
+ cleanup_dump=cleanup_dump,
)
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Jul 3, 3:50 PM (2 w, 12 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3229529
Attached To
D6622: SvnLoaderFromRemoteDump: Drop dump when svn repository is mounted
Event Timeline
Log In to Comment