Page MenuHomeSoftware Heritage

D6925.id25169.diff
No OneTemporary

D6925.id25169.diff

diff --git a/swh/loader/svn/ra.py b/swh/loader/svn/ra.py
--- a/swh/loader/svn/ra.py
+++ b/swh/loader/svn/ra.py
@@ -565,31 +565,41 @@
# external already exported, nothing to do
continue
- try:
- # try to export external in a temporary path, destination path could
- # be versioned and must be overridden only if the external URL is
- # still valid
- temp_dir = os.fsencode(tempfile.mkdtemp())
- temp_path = os.path.join(temp_dir, dest_path)
- os.makedirs(b"/".join(temp_path.split(b"/")[:-1]), exist_ok=True)
- if external_url not in self.editor.dead_externals:
- logger.debug("Exporting external %s to path %s", external_url, path)
- self.svnrepo.client.export(
- external_url.rstrip("/"),
- to=temp_path,
- rev=revision,
- ignore_keywords=True,
- )
- self.editor.valid_externals[dest_fullpath] = (
- external_url,
- relative_url,
+ if external not in self.editor.externals_cache:
+
+ try:
+ # try to export external in a temporary path, destination path could
+ # be versioned and must be overridden only if the external URL is
+ # still valid
+ temp_dir = os.fsencode(
+ tempfile.mkdtemp(dir=self.editor.externals_cache_dir)
)
+ temp_path = os.path.join(temp_dir, dest_path)
+ os.makedirs(b"/".join(temp_path.split(b"/")[:-1]), exist_ok=True)
+ if external_url not in self.editor.dead_externals:
+ logger.debug(
+ "Exporting external %s to path %s", external_url, path
+ )
+ self.svnrepo.client.export(
+ external_url.rstrip("/"),
+ to=temp_path,
+ rev=revision,
+ ignore_keywords=True,
+ )
+ self.editor.externals_cache[external] = temp_path
+ self.editor.valid_externals[dest_fullpath] = (
+ external_url,
+ relative_url,
+ )
- except SubversionException as se:
- # external no longer available (404)
- logger.debug(se)
- self.editor.dead_externals.add(external_url)
- pass
+ except SubversionException as se:
+ # external no longer available (404)
+ logger.debug(se)
+ self.editor.dead_externals.add(external_url)
+ pass
+
+ else:
+ temp_path = self.editor.externals_cache[external]
# subversion export will always create the subdirectories of the external
# path regardless the validity of the remote URL
@@ -604,16 +614,17 @@
# remove previous path in from_disk model
self.remove_child(dest_fullpath)
- # move exported path to reconstructed filesystem
+ # copy exported path to reconstructed filesystem
fullpath = os.path.join(self.rootpath, dest_fullpath)
- shutil.move(temp_path, fullpath)
# update from_disk model and store external paths
self.editor.external_paths.add(dest_fullpath)
- if os.path.isfile(fullpath):
+ if os.path.isfile(temp_path):
+ shutil.copy(temp_path, fullpath)
self.directory[dest_fullpath] = from_disk.Content.from_file(
path=fullpath
)
else:
+ shutil.copytree(temp_path, fullpath)
self.directory[dest_fullpath] = from_disk.Directory.from_disk(
path=fullpath
)
@@ -684,7 +695,11 @@
"""
def __init__(
- self, rootpath: bytes, directory: from_disk.Directory, svnrepo: SvnRepo
+ self,
+ rootpath: bytes,
+ directory: from_disk.Directory,
+ svnrepo: SvnRepo,
+ temp_dir: str,
):
self.rootpath = rootpath
self.directory = directory
@@ -693,6 +708,8 @@
self.external_paths: Set[bytes] = set()
self.valid_externals: Dict[bytes, Tuple[str, bool]] = {}
self.dead_externals: Set[str] = set()
+ self.externals_cache_dir = tempfile.mkdtemp(dir=temp_dir)
+ self.externals_cache: Dict[Tuple[str, Optional[int]], str] = {}
self.svnrepo = svnrepo
self.revnum = None
@@ -725,6 +742,7 @@
conn: RemoteAccess,
rootpath: bytes,
svnrepo: SvnRepo,
+ temp_dir: str,
directory: Optional[from_disk.Directory] = None,
):
self.conn = conn
@@ -732,7 +750,9 @@
if directory is None:
directory = from_disk.Directory()
self.directory = directory
- self.editor = Editor(rootpath=rootpath, directory=directory, svnrepo=svnrepo)
+ self.editor = Editor(
+ rootpath=rootpath, directory=directory, svnrepo=svnrepo, temp_dir=temp_dir
+ )
def replay(self, rev: int) -> from_disk.Directory:
"""Replay svn actions between rev and rev+1.
diff --git a/swh/loader/svn/svn.py b/swh/loader/svn/svn.py
--- a/swh/loader/svn/svn.py
+++ b/swh/loader/svn/svn.py
@@ -71,7 +71,10 @@
self.uuid = self.conn.get_uuid().encode("utf-8")
self.swhreplay = ra.Replay(
- conn=self.conn, rootpath=self.local_url, svnrepo=self
+ conn=self.conn,
+ rootpath=self.local_url,
+ svnrepo=self,
+ temp_dir=local_dirname,
)
self.max_content_length = max_content_length
self.has_relative_externals = False
diff --git a/swh/loader/svn/tests/test_loader.py b/swh/loader/svn/tests/test_loader.py
--- a/swh/loader/svn/tests/test_loader.py
+++ b/swh/loader/svn/tests/test_loader.py
@@ -2364,3 +2364,61 @@
loader.storage, repo_url, status="full", type="svn",
)
assert not loader.svnrepo.has_relative_externals
+
+
+def test_loader_externals_cache(swh_storage, repo_url, external_repo_url, tmp_path):
+
+ # first commit on external
+ add_commit(
+ external_repo_url,
+ "Create some directories and files in an external repository",
+ [
+ CommitChange(
+ change_type=CommitChangeType.AddOrUpdate,
+ path="code/hello/hello-world",
+ properties={"svn:executable": "*"},
+ data=b"#!/bin/bash\necho Hello World !",
+ ),
+ ],
+ )
+
+ # first commit
+ add_commit(
+ repo_url,
+ "Create repository structure.",
+ [
+ CommitChange(change_type=CommitChangeType.AddOrUpdate, path="project1/",),
+ CommitChange(change_type=CommitChangeType.AddOrUpdate, path="project2/",),
+ ],
+ )
+
+ external_url = svn_urljoin(external_repo_url, "code/hello")
+
+ # second commit
+ add_commit(
+ repo_url,
+ (
+ "Set svn:externals property on trunk/externals path of repository to load."
+ "One external targets a remote directory and another one a remote file."
+ ),
+ [
+ CommitChange(
+ change_type=CommitChangeType.AddOrUpdate,
+ path="project1/externals/",
+ properties={"svn:externals": (f"{external_url} hello\n")},
+ ),
+ CommitChange(
+ change_type=CommitChangeType.AddOrUpdate,
+ path="project2/externals/",
+ properties={"svn:externals": (f"{external_url} hello\n")},
+ ),
+ ],
+ )
+
+ loader = SvnLoader(swh_storage, repo_url, temp_directory=tmp_path, check_revision=1)
+ assert loader.load() == {"status": "eventful"}
+ assert_last_visit_matches(
+ loader.storage, repo_url, status="full", type="svn",
+ )
+
+ assert (external_url, None) in loader.svnrepo.swhreplay.editor.externals_cache

File Metadata

Mime Type
text/plain
Expires
Nov 5 2024, 5:33 AM (8 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3231276

Event Timeline