Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7123803
D7027.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
9 KB
Subscribers
None
D7027.diff
View Options
diff --git a/swh/loader/svn/replay.py b/swh/loader/svn/replay.py
--- a/swh/loader/svn/replay.py
+++ b/swh/loader/svn/replay.py
@@ -42,7 +42,11 @@
if TYPE_CHECKING:
from swh.loader.svn.svn import SvnRepo
-from swh.loader.svn.utils import parse_external_definition, svn_urljoin
+from swh.loader.svn.utils import (
+ is_recursive_external,
+ parse_external_definition,
+ svn_urljoin,
+)
_eol_style = {"native": b"\n", "CRLF": b"\r\n", "LF": b"\n", "CR": b"\r"}
@@ -587,6 +591,12 @@
# external already exported, nothing to do
continue
+ if is_recursive_external(
+ self.svnrepo.origin_url, os.fsdecode(self.path), path, external_url
+ ):
+ # recursive external, skip it
+ continue
+
if external not in self.editor.externals_cache:
try:
@@ -675,6 +685,21 @@
[relative_url for (_, relative_url) in self.editor.valid_externals.values()]
)
+ self.svnrepo.has_recursive_externals = any(
+ is_recursive_external(
+ self.svnrepo.origin_url, os.fsdecode(path), external_path, external_url
+ )
+ for path, dir_state in self.dir_states.items()
+ for external_path, (external_url, _, _) in dir_state.externals.items()
+ )
+ if self.svnrepo.has_recursive_externals:
+ # If the repository has recursive externals, we stop processing externals
+ # and remove those already exported,
+ # We will then ignore externals when exporting the revision to check for
+ # divergence with the reconstructed filesystem
+ for external_path in list(self.editor.external_paths):
+ self.remove_external_path(external_path)
+
def remove_external_path(self, external_path: bytes) -> None:
"""Remove a previously exported SVN external path from
the reconstruted filesystem.
diff --git a/swh/loader/svn/svn.py b/swh/loader/svn/svn.py
--- a/swh/loader/svn/svn.py
+++ b/swh/loader/svn/svn.py
@@ -28,7 +28,7 @@
)
from . import converters, replay
-from .utils import parse_external_definition
+from .utils import is_recursive_external, parse_external_definition
# When log message contains empty data
DEFAULT_AUTHOR_MESSAGE = ""
@@ -78,6 +78,7 @@
)
self.max_content_length = max_content_length
self.has_relative_externals = False
+ self.has_recursive_externals = False
self.replay_started = False
def __str__(self):
@@ -221,10 +222,11 @@
if self.replay_started and self.has_relative_externals:
# externals detected while replaying revisions
url = self.origin_url
- elif not self.replay_started and self.remote_url.startswith("file://"):
+ elif not self.replay_started:
# revisions replay has not started, we need to check if svn:externals
# properties are set from a checkout of the revision and if some
- # external URLs are relative to pick the right export URL
+ # external URLs are relative to pick the right export URL,
+ # recursive externals are also checked
with tempfile.TemporaryDirectory(
dir=self.local_dirname, prefix=f"checkout-revision-{revision}."
) as co_dirname:
@@ -236,23 +238,42 @@
"svn:externals", co_dirname, None, revision, True
)
self.has_relative_externals = False
+ self.has_recursive_externals = False
for path, external_defs in externals.items():
- if self.has_relative_externals:
+ if self.has_relative_externals or self.has_recursive_externals:
break
+ path = path.replace(self.remote_url.rstrip("/") + "/", "")
for external_def in os.fsdecode(external_defs).split("\n"):
# skip empty line or comment
if not external_def or external_def.startswith("#"):
continue
- _, _, _, relative_url = parse_external_definition(
+ (
+ external_path,
+ external_url,
+ _,
+ relative_url,
+ ) = parse_external_definition(
external_def.rstrip("\r"), path, self.origin_url
)
+
+ if is_recursive_external(
+ self.origin_url, path, external_path, external_url,
+ ):
+ self.has_recursive_externals = True
+ url = self.remote_url
+ break
+
if relative_url:
self.has_relative_externals = True
url = self.origin_url
break
self.client.export(
- url.rstrip("/"), to=local_url, rev=revision, ignore_keywords=True,
+ url.rstrip("/"),
+ to=local_url,
+ rev=revision,
+ ignore_keywords=True,
+ ignore_externals=self.has_recursive_externals,
)
return local_dirname, os.fsencode(local_url)
diff --git a/swh/loader/svn/tests/test_loader.py b/swh/loader/svn/tests/test_loader.py
--- a/swh/loader/svn/tests/test_loader.py
+++ b/swh/loader/svn/tests/test_loader.py
@@ -2734,3 +2734,99 @@
loader.storage, repo_url, status="full", type="svn",
)
check_snapshot(loader.snapshot, loader.storage)
+
+
+def test_loader_with_recursive_external(
+ swh_storage, repo_url, external_repo_url, tmp_path
+):
+ # first commit on external
+ add_commit(
+ external_repo_url,
+ "Create a file in an external repository",
+ [
+ CommitChange(
+ change_type=CommitChangeType.AddOrUpdate,
+ path="code/foo.sh",
+ data=b"#!/bin/bash\necho foo",
+ ),
+ ],
+ )
+
+ # first commit
+ add_commit(
+ repo_url,
+ "Add trunk dir and a file",
+ [
+ CommitChange(
+ change_type=CommitChangeType.AddOrUpdate,
+ path="trunk/bar.sh",
+ data=b"#!/bin/bash\necho bar",
+ )
+ ],
+ )
+
+ # second commit
+ add_commit(
+ repo_url,
+ "Set externals code on trunk/externals dir, one being recursive",
+ [
+ CommitChange(
+ change_type=CommitChangeType.AddOrUpdate,
+ path="trunk/externals/",
+ properties={
+ "svn:externals": (
+ f"{svn_urljoin(external_repo_url, 'code')} code\n"
+ f"{repo_url} recursive"
+ )
+ },
+ ),
+ ],
+ )
+
+ # first load
+ loader = SvnLoader(
+ swh_storage, repo_url, temp_directory=tmp_path, check_revision=1,
+ )
+ assert loader.load() == {"status": "eventful"}
+ assert_last_visit_matches(
+ loader.storage, repo_url, status="full", type="svn",
+ )
+ check_snapshot(loader.snapshot, loader.storage)
+ assert loader.svnrepo.has_recursive_externals
+
+ # second load on stale repo
+ loader = SvnLoader(
+ swh_storage, repo_url, temp_directory=tmp_path, check_revision=1,
+ )
+ assert loader.load() == {"status": "uneventful"}
+ assert_last_visit_matches(
+ loader.storage, repo_url, status="full", type="svn",
+ )
+ check_snapshot(loader.snapshot, loader.storage)
+ assert loader.svnrepo.has_recursive_externals
+
+ # third commit
+ add_commit(
+ repo_url,
+ "Remove recursive external on trunk/externals dir",
+ [
+ CommitChange(
+ change_type=CommitChangeType.AddOrUpdate,
+ path="trunk/externals/",
+ properties={
+ "svn:externals": (f"{svn_urljoin(external_repo_url, 'code')} code")
+ },
+ ),
+ ],
+ )
+
+ # third load
+ loader = SvnLoader(
+ swh_storage, repo_url, temp_directory=tmp_path, check_revision=1,
+ )
+ assert loader.load() == {"status": "eventful"}
+ assert_last_visit_matches(
+ loader.storage, repo_url, status="full", type="svn",
+ )
+ check_snapshot(loader.snapshot, loader.storage)
+ assert not loader.svnrepo.has_recursive_externals
diff --git a/swh/loader/svn/utils.py b/swh/loader/svn/utils.py
--- a/swh/loader/svn/utils.py
+++ b/swh/loader/svn/utils.py
@@ -11,7 +11,7 @@
from subprocess import PIPE, Popen, call
import tempfile
from typing import Optional, Tuple
-from urllib.parse import urlparse
+from urllib.parse import quote, urlparse, urlunparse
logger = logging.getLogger(__name__)
@@ -292,3 +292,29 @@
# handle URL like http://user@svn.example.org/
pass
return (path.rstrip("/"), external_url, revision, relative_url)
+
+
+def is_recursive_external(
+ origin_url: str, dir_path: str, external_path: str, external_url: str
+) -> bool:
+ """
+ Check if an external definition can lead to a recursive subversion export
+ operation (https://issues.apache.org/jira/browse/SVN-1703).
+
+ Args:
+ origin_url: repository URL
+ dir_path: path of the directory where external is defined
+ external_path: path of the external relative to the directory
+ external_url: external URL
+
+ Returns:
+ Whether the external definition is recursive
+ """
+ parsed_origin_url = urlparse(origin_url)
+ parsed_external_url = urlparse(external_url)
+ external_url = urlunparse(
+ parsed_external_url._replace(scheme=parsed_origin_url.scheme)
+ )
+ return svn_urljoin(origin_url, quote(dir_path), quote(external_path)).startswith(
+ external_url
+ )
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Dec 20 2024, 2:40 AM (11 w, 3 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3225609
Attached To
D7027: replay: Handle recursive external definition
Event Timeline
Log In to Comment