Page MenuHomeSoftware Heritage

D7027.diff
No OneTemporary

D7027.diff

diff --git a/swh/loader/svn/replay.py b/swh/loader/svn/replay.py
--- a/swh/loader/svn/replay.py
+++ b/swh/loader/svn/replay.py
@@ -42,7 +42,11 @@
if TYPE_CHECKING:
from swh.loader.svn.svn import SvnRepo
-from swh.loader.svn.utils import parse_external_definition, svn_urljoin
+from swh.loader.svn.utils import (
+ is_recursive_external,
+ parse_external_definition,
+ svn_urljoin,
+)
_eol_style = {"native": b"\n", "CRLF": b"\r\n", "LF": b"\n", "CR": b"\r"}
@@ -587,6 +591,12 @@
# external already exported, nothing to do
continue
+ if is_recursive_external(
+ self.svnrepo.origin_url, os.fsdecode(self.path), path, external_url
+ ):
+ # recursive external, skip it
+ continue
+
if external not in self.editor.externals_cache:
try:
@@ -675,6 +685,21 @@
[relative_url for (_, relative_url) in self.editor.valid_externals.values()]
)
+ self.svnrepo.has_recursive_externals = any(
+ is_recursive_external(
+ self.svnrepo.origin_url, os.fsdecode(path), external_path, external_url
+ )
+ for path, dir_state in self.dir_states.items()
+ for external_path, (external_url, _, _) in dir_state.externals.items()
+ )
+ if self.svnrepo.has_recursive_externals:
+ # If the repository has recursive externals, we stop processing externals
+ # and remove those already exported,
+ # We will then ignore externals when exporting the revision to check for
+ # divergence with the reconstructed filesystem
+ for external_path in list(self.editor.external_paths):
+ self.remove_external_path(external_path)
+
def remove_external_path(self, external_path: bytes) -> None:
"""Remove a previously exported SVN external path from
the reconstruted filesystem.
diff --git a/swh/loader/svn/svn.py b/swh/loader/svn/svn.py
--- a/swh/loader/svn/svn.py
+++ b/swh/loader/svn/svn.py
@@ -28,7 +28,7 @@
)
from . import converters, replay
-from .utils import parse_external_definition
+from .utils import is_recursive_external, parse_external_definition
# When log message contains empty data
DEFAULT_AUTHOR_MESSAGE = ""
@@ -78,6 +78,7 @@
)
self.max_content_length = max_content_length
self.has_relative_externals = False
+ self.has_recursive_externals = False
self.replay_started = False
def __str__(self):
@@ -221,10 +222,11 @@
if self.replay_started and self.has_relative_externals:
# externals detected while replaying revisions
url = self.origin_url
- elif not self.replay_started and self.remote_url.startswith("file://"):
+ elif not self.replay_started:
# revisions replay has not started, we need to check if svn:externals
# properties are set from a checkout of the revision and if some
- # external URLs are relative to pick the right export URL
+ # external URLs are relative to pick the right export URL,
+ # recursive externals are also checked
with tempfile.TemporaryDirectory(
dir=self.local_dirname, prefix=f"checkout-revision-{revision}."
) as co_dirname:
@@ -236,23 +238,42 @@
"svn:externals", co_dirname, None, revision, True
)
self.has_relative_externals = False
+ self.has_recursive_externals = False
for path, external_defs in externals.items():
- if self.has_relative_externals:
+ if self.has_relative_externals or self.has_recursive_externals:
break
+ path = path.replace(self.remote_url.rstrip("/") + "/", "")
for external_def in os.fsdecode(external_defs).split("\n"):
# skip empty line or comment
if not external_def or external_def.startswith("#"):
continue
- _, _, _, relative_url = parse_external_definition(
+ (
+ external_path,
+ external_url,
+ _,
+ relative_url,
+ ) = parse_external_definition(
external_def.rstrip("\r"), path, self.origin_url
)
+
+ if is_recursive_external(
+ self.origin_url, path, external_path, external_url,
+ ):
+ self.has_recursive_externals = True
+ url = self.remote_url
+ break
+
if relative_url:
self.has_relative_externals = True
url = self.origin_url
break
self.client.export(
- url.rstrip("/"), to=local_url, rev=revision, ignore_keywords=True,
+ url.rstrip("/"),
+ to=local_url,
+ rev=revision,
+ ignore_keywords=True,
+ ignore_externals=self.has_recursive_externals,
)
return local_dirname, os.fsencode(local_url)
diff --git a/swh/loader/svn/tests/test_loader.py b/swh/loader/svn/tests/test_loader.py
--- a/swh/loader/svn/tests/test_loader.py
+++ b/swh/loader/svn/tests/test_loader.py
@@ -2734,3 +2734,99 @@
loader.storage, repo_url, status="full", type="svn",
)
check_snapshot(loader.snapshot, loader.storage)
+
+
+def test_loader_with_recursive_external(
+ swh_storage, repo_url, external_repo_url, tmp_path
+):
+ # first commit on external
+ add_commit(
+ external_repo_url,
+ "Create a file in an external repository",
+ [
+ CommitChange(
+ change_type=CommitChangeType.AddOrUpdate,
+ path="code/foo.sh",
+ data=b"#!/bin/bash\necho foo",
+ ),
+ ],
+ )
+
+ # first commit
+ add_commit(
+ repo_url,
+ "Add trunk dir and a file",
+ [
+ CommitChange(
+ change_type=CommitChangeType.AddOrUpdate,
+ path="trunk/bar.sh",
+ data=b"#!/bin/bash\necho bar",
+ )
+ ],
+ )
+
+ # second commit
+ add_commit(
+ repo_url,
+ "Set externals code on trunk/externals dir, one being recursive",
+ [
+ CommitChange(
+ change_type=CommitChangeType.AddOrUpdate,
+ path="trunk/externals/",
+ properties={
+ "svn:externals": (
+ f"{svn_urljoin(external_repo_url, 'code')} code\n"
+ f"{repo_url} recursive"
+ )
+ },
+ ),
+ ],
+ )
+
+ # first load
+ loader = SvnLoader(
+ swh_storage, repo_url, temp_directory=tmp_path, check_revision=1,
+ )
+ assert loader.load() == {"status": "eventful"}
+ assert_last_visit_matches(
+ loader.storage, repo_url, status="full", type="svn",
+ )
+ check_snapshot(loader.snapshot, loader.storage)
+ assert loader.svnrepo.has_recursive_externals
+
+ # second load on stale repo
+ loader = SvnLoader(
+ swh_storage, repo_url, temp_directory=tmp_path, check_revision=1,
+ )
+ assert loader.load() == {"status": "uneventful"}
+ assert_last_visit_matches(
+ loader.storage, repo_url, status="full", type="svn",
+ )
+ check_snapshot(loader.snapshot, loader.storage)
+ assert loader.svnrepo.has_recursive_externals
+
+ # third commit
+ add_commit(
+ repo_url,
+ "Remove recursive external on trunk/externals dir",
+ [
+ CommitChange(
+ change_type=CommitChangeType.AddOrUpdate,
+ path="trunk/externals/",
+ properties={
+ "svn:externals": (f"{svn_urljoin(external_repo_url, 'code')} code")
+ },
+ ),
+ ],
+ )
+
+ # third load
+ loader = SvnLoader(
+ swh_storage, repo_url, temp_directory=tmp_path, check_revision=1,
+ )
+ assert loader.load() == {"status": "eventful"}
+ assert_last_visit_matches(
+ loader.storage, repo_url, status="full", type="svn",
+ )
+ check_snapshot(loader.snapshot, loader.storage)
+ assert not loader.svnrepo.has_recursive_externals
diff --git a/swh/loader/svn/utils.py b/swh/loader/svn/utils.py
--- a/swh/loader/svn/utils.py
+++ b/swh/loader/svn/utils.py
@@ -11,7 +11,7 @@
from subprocess import PIPE, Popen, call
import tempfile
from typing import Optional, Tuple
-from urllib.parse import urlparse
+from urllib.parse import quote, urlparse, urlunparse
logger = logging.getLogger(__name__)
@@ -292,3 +292,29 @@
# handle URL like http://user@svn.example.org/
pass
return (path.rstrip("/"), external_url, revision, relative_url)
+
+
+def is_recursive_external(
+ origin_url: str, dir_path: str, external_path: str, external_url: str
+) -> bool:
+ """
+ Check if an external definition can lead to a recursive subversion export
+ operation (https://issues.apache.org/jira/browse/SVN-1703).
+
+ Args:
+ origin_url: repository URL
+ dir_path: path of the directory where external is defined
+ external_path: path of the external relative to the directory
+ external_url: external URL
+
+ Returns:
+ Whether the external definition is recursive
+ """
+ parsed_origin_url = urlparse(origin_url)
+ parsed_external_url = urlparse(external_url)
+ external_url = urlunparse(
+ parsed_external_url._replace(scheme=parsed_origin_url.scheme)
+ )
+ return svn_urljoin(origin_url, quote(dir_path), quote(external_path)).startswith(
+ external_url
+ )

File Metadata

Mime Type
text/plain
Expires
Dec 20 2024, 2:40 AM (11 w, 3 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3225609

Event Timeline