diff --git a/swh/loader/svn/ra.py b/swh/loader/svn/ra.py --- a/swh/loader/svn/ra.py +++ b/swh/loader/svn/ra.py @@ -1,4 +1,4 @@ -# Copyright (C) 2016-2021 The Software Heritage developers +# Copyright (C) 2016-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -10,7 +10,9 @@ from __future__ import annotations import codecs -import dataclasses +from collections import defaultdict +from dataclasses import dataclass, field +from itertools import chain import logging import os import shutil @@ -23,13 +25,14 @@ Dict, List, Optional, + Set, Tuple, Union, cast, ) import click -from subvertpy import delta, properties +from subvertpy import SubversionException, delta, properties from subvertpy.ra import Auth, RemoteAccess, get_username_provider from swh.model import from_disk, hashutil @@ -38,6 +41,8 @@ if TYPE_CHECKING: from swh.loader.svn.svn import SvnRepo +from swh.loader.svn.utils import parse_external_definition, svn_urljoin + _eol_style = {"native": b"\n", "CRLF": b"\r\n", "LF": b"\n", "CR": b"\r"} logger = logging.getLogger(__name__) @@ -150,7 +155,7 @@ SVN_PROPERTY_EOL = "svn:eol-style" -@dataclasses.dataclass +@dataclass class FileState: """Persists some file states (eg. end of lines style) across revisions while replaying them.""" @@ -182,6 +187,7 @@ "link", "state", "svnrepo", + "editor", ] def __init__( @@ -197,6 +203,7 @@ self.fullpath = os.path.join(rootpath, path) self.state = state self.svnrepo = svnrepo + self.editor = svnrepo.swhreplay.editor def change_prop(self, key: str, value: str) -> None: if key == properties.PROP_EXECUTABLE: @@ -247,6 +254,10 @@ return sbuf def apply_textdelta(self, base_checksum) -> Callable[[Any, bytes, BinaryIO], None]: + # if the filepath matches an external, do not apply local patch + if self.path in self.editor.external_paths: + return lambda *args: None + if os.path.lexists(self.fullpath): if os.path.islink(self.fullpath): # svn does not deal with symlink so we transform into @@ -291,7 +302,7 @@ self.svnrepo.client.export( os.path.join(self.svnrepo.remote_url.encode(), self.path), to=self.fullpath, - rev=self.svnrepo.swhreplay.editor.revnum, + rev=self.editor.revnum, ignore_keywords=True, overwrite=True, ) @@ -335,6 +346,14 @@ self.directory[self.path] = from_disk.Content.from_file(path=self.fullpath) +@dataclass +class DirState: + """Persists some directory states (eg. externals) across revisions while + replaying them.""" + + externals: Dict[str, Tuple[str, Optional[int]]] = field(default_factory=dict) + + class DirEditor: """Directory Editor in charge of updating directory hashes computation. @@ -342,7 +361,16 @@ """ - __slots__ = ["directory", "rootpath", "path", "file_states", "svnrepo"] + __slots__ = [ + "directory", + "rootpath", + "path", + "file_states", + "dir_states", + "svnrepo", + "editor", + "externals", + ] def __init__( self, @@ -350,6 +378,7 @@ rootpath: bytes, path: bytes, file_states: Dict[bytes, FileState], + dir_states: Dict[bytes, DirState], svnrepo: SvnRepo, ): self.directory = directory @@ -358,7 +387,10 @@ # build directory on init os.makedirs(rootpath, exist_ok=True) self.file_states = file_states + self.dir_states = dir_states self.svnrepo = svnrepo + self.editor = svnrepo.swhreplay.editor + self.externals: Dict[str, Tuple[str, Optional[int], bool]] = {} def remove_child(self, path: bytes) -> None: """Remove a path from the current objects. @@ -401,6 +433,7 @@ rootpath=self.rootpath, path=os.fsencode(path), file_states=self.file_states, + dir_states=self.dir_states, svnrepo=self.svnrepo, ) @@ -409,13 +442,18 @@ """ path_bytes = os.fsencode(path) + os.makedirs(os.path.join(self.rootpath, path_bytes), exist_ok=True) - self.directory[path_bytes] = from_disk.Directory() + if path_bytes not in self.directory: + self.dir_states[path_bytes] = DirState() + self.directory[path_bytes] = from_disk.Directory() + return DirEditor( self.directory, - rootpath=self.rootpath, - path=path_bytes, - file_states=self.file_states, + self.rootpath, + path_bytes, + self.file_states, + self.dir_states, svnrepo=self.svnrepo, ) @@ -461,21 +499,180 @@ value, self.path, ) - raise ValueError("Property '%s' detected. Not implemented yet." % key) + self.externals = {} + if value is not None: + # externals are set on that directory path, parse and store them + # for later processing in the close method + for external in value.split("\n"): + external = external.rstrip("\r") + # skip empty line or comment + if not external or external.startswith("#"): + continue + ( + path, + external_url, + revision, + relative_url, + ) = parse_external_definition( + external, os.fsdecode(self.path), self.svnrepo.origin_url + ) + self.externals[path] = (external_url, revision, relative_url) + + if not self.externals: + # externals might have been unset on that directory path, + # remove associated paths from the reconstructed filesystem + externals = self.dir_states[self.path].externals + for path in externals.keys(): + self.remove_external_path(os.fsencode(path)) + + self.dir_states[self.path].externals = {} def delete_entry(self, path: str, revision: int) -> None: """Remove a path. """ - fullpath = os.path.join(self.rootpath, path.encode("utf-8")) - self.file_states.pop(fullpath, None) - self.remove_child(path.encode("utf-8")) + path_bytes = os.fsencode(path) + if path_bytes not in self.editor.external_paths: + fullpath = os.path.join(self.rootpath, path_bytes) + self.file_states.pop(fullpath, None) + self.remove_child(path_bytes) def close(self): - """Function called when we finish walking a repository. + """Function called when we finish processing a repository. + SVN external definitions are processed by it. """ - pass + + prev_externals = self.dir_states[self.path].externals + + if self.externals: + # externals definition list might have changed in the current processed + # revision, we need to determine if some were removed and delete the + # associated paths + old_externals = set(prev_externals) - set(self.externals) + for old_external in old_externals: + self.remove_external_path(os.fsencode(old_external)) + + # For each external, try to export it in reconstructed filesystem + for path, (external_url, revision, relative_url) in self.externals.items(): + external = (external_url, revision) + dest_path = os.fsencode(path) + dest_fullpath = os.path.join(self.path, dest_path) + if ( + path in prev_externals + and prev_externals[path] == external + and dest_fullpath in self.directory + ): + # external already exported, nothing to do + continue + + try: + # try to export external in a temporary path, destination path could + # be versioned and must be overridden only if the external URL is + # still valid + temp_dir = os.fsencode(tempfile.mkdtemp()) + temp_path = os.path.join(temp_dir, dest_path) + os.makedirs(b"/".join(temp_path.split(b"/")[:-1]), exist_ok=True) + if external_url not in self.editor.dead_externals: + logger.debug("Exporting external %s to path %s", external_url, path) + self.svnrepo.client.export( + external_url.rstrip("/"), + to=temp_path, + rev=revision, + ignore_keywords=True, + ) + self.editor.valid_externals[dest_fullpath] = ( + external_url, + relative_url, + ) + + except SubversionException as se: + # external no longer available (404) + logger.debug(se) + self.editor.dead_externals.add(external_url) + + # subversion export will always create the subdirectories of the external + # path regardless the validity of the remote URL + dest_path_split = dest_path.split(b"/") + current_path = self.path + self.add_directory(current_path) + for subpath in dest_path_split[:-1]: + current_path = os.path.join(current_path, subpath) + self.add_directory(current_path) + + if os.path.exists(temp_path): + # external successfully exported + + # remove previous path in from_disk model + self.remove_child(dest_fullpath) + # move exported path to reconstructed filesystem + fullpath = os.path.join(self.rootpath, dest_fullpath) + shutil.move(temp_path, fullpath) + # update from_disk model and store external paths + self.editor.external_paths.add(dest_fullpath) + if os.path.isfile(fullpath): + self.directory[dest_fullpath] = from_disk.Content.from_file( + path=fullpath + ) + else: + self.directory[dest_fullpath] = from_disk.Directory.from_disk( + path=fullpath + ) + for root, dirs, files in os.walk(fullpath): + self.editor.external_paths.update( + [ + os.path.join(root.replace(self.rootpath + b"/", b""), p) + for p in chain(dirs, files) + ] + ) + + # ensure hash update for the directory with externals set + self.directory[self.path].update_hash(force=True) + + # backup externals in directory state + if self.externals: + self.dir_states[self.path].externals = self.externals + + self.svnrepo.has_relative_externals = any( + [relative_url for (_, relative_url) in self.editor.valid_externals.values()] + ) + + def remove_external_path(self, external_path: bytes) -> None: + """Remove a previously exported SVN external path from + the reconstruted filesystem. + """ + fullpath = os.path.join(self.path, external_path) + self.remove_child(fullpath) + self.editor.external_paths.discard(fullpath) + self.editor.valid_externals.pop(fullpath, None) + for path in list(self.editor.external_paths): + if path.startswith(fullpath + b"/"): + self.editor.external_paths.remove(path) + subpath_split = external_path.split(b"/")[:-1] + for i in reversed(range(1, len(subpath_split) + 1)): + # delete external sub-directory only if it is empty + subpath = os.path.join(self.path, b"/".join(subpath_split[0:i])) + if not os.listdir(os.path.join(self.rootpath, subpath)): + self.remove_child(subpath) + else: + break + + try: + # externals can overlap with versioned files so we must restore + # them after removing the path above + dest_path = os.path.join(self.rootpath, fullpath) + self.svnrepo.client.export( + svn_urljoin(self.svnrepo.remote_url, os.fsdecode(fullpath)), + to=dest_path, + rev=self.editor.revnum, + ignore_keywords=True, + ) + if os.path.isfile(dest_path): + self.directory[fullpath] = from_disk.Content.from_file(path=dest_path) + else: + self.directory[fullpath] = from_disk.Directory.from_disk(path=dest_path) + except SubversionException: + pass class Editor: @@ -492,7 +689,11 @@ ): self.rootpath = rootpath self.directory = directory - self.file_states: Dict[bytes, FileState] = {} + self.file_states: Dict[bytes, FileState] = defaultdict(FileState) + self.dir_states: Dict[bytes, DirState] = defaultdict(DirState) + self.external_paths: Set[bytes] = set() + self.valid_externals: Dict[bytes, Tuple[str, bool]] = {} + self.dead_externals: Set[str] = set() self.svnrepo = svnrepo self.revnum = None @@ -511,6 +712,7 @@ rootpath=self.rootpath, path=b"", file_states=self.file_states, + dir_states=self.dir_states, svnrepo=self.svnrepo, ) diff --git a/swh/loader/svn/svn.py b/swh/loader/svn/svn.py --- a/swh/loader/svn/svn.py +++ b/swh/loader/svn/svn.py @@ -1,4 +1,4 @@ -# Copyright (C) 2015-2021 The Software Heritage developers +# Copyright (C) 2015-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -28,6 +28,7 @@ ) from . import converters, ra +from .utils import parse_external_definition # When log message contains empty data DEFAULT_AUTHOR_MESSAGE = "" @@ -73,6 +74,8 @@ conn=self.conn, rootpath=self.local_url, svnrepo=self ) self.max_content_length = max_content_length + self.has_relative_externals = False + self.replay_started = False def __str__(self): return str( @@ -204,10 +207,49 @@ local_dirname = tempfile.mkdtemp( dir=self.local_dirname, prefix=f"check-revision-{revision}." ) + local_name = os.path.basename(self.remote_url) local_url = os.path.join(local_dirname, local_name) + + url = self.remote_url + # if some paths have external URLs relative to the repository URL but targeting + # paths oustide it, we need to export from the origin URL as the remote URL can + # target a dump mounted on the local filesystem + if self.replay_started and self.has_relative_externals: + # externals detected while replaying revisions + url = self.origin_url + elif not self.replay_started and self.remote_url.startswith("file://"): + # revisions replay has not started, we need to check if svn:externals + # properties are set from a checkout of the revision and if some + # external URLs are relative to pick the right export URL + with tempfile.TemporaryDirectory( + dir=self.local_dirname, prefix=f"checkout-revision-{revision}." + ) as co_dirname: + self.client.checkout( + self.remote_url, co_dirname, revision, ignore_externals=True + ) + # get all svn:externals properties recursively + externals = self.client.propget( + "svn:externals", co_dirname, None, revision, True + ) + self.has_relative_externals = False + for path, external_defs in externals.items(): + if self.has_relative_externals: + break + for external_def in os.fsdecode(external_defs).split("\n"): + # skip empty line or comment + if not external_def or external_def.startswith("#"): + continue + _, _, _, relative_url = parse_external_definition( + external_def.rstrip("\r"), path, self.origin_url + ) + if relative_url: + self.has_relative_externals = True + url = self.origin_url + break + self.client.export( - self.remote_url, to=local_url, rev=revision, ignore_keywords=True + url.rstrip("/"), to=local_url, rev=revision, ignore_keywords=True, ) return local_dirname, os.fsencode(local_url) @@ -244,6 +286,7 @@ # even in incremental loading mode, we need to replay the whole set of # path modifications from first revision to restore possible file states induced # by setting svn properties on those files (end of line style for instance) + self.replay_started = True first_revision = 1 if start_revision else 0 # handle empty repository edge case for commit in self.logs(first_revision, end_revision): rev = commit["rev"] diff --git a/swh/loader/svn/tests/test_loader.py b/swh/loader/svn/tests/test_loader.py --- a/swh/loader/svn/tests/test_loader.py +++ b/swh/loader/svn/tests/test_loader.py @@ -1,7 +1,8 @@ -# Copyright (C) 2016-2021 The Software Heritage developers +# Copyright (C) 2016-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information + from enum import Enum from io import BytesIO import os @@ -20,7 +21,7 @@ SvnLoaderFromRemoteDump, ) from swh.loader.svn.svn import SvnRepo -from swh.loader.svn.utils import init_svn_repo_from_dump +from swh.loader.svn.utils import init_svn_repo_from_dump, svn_urljoin from swh.loader.tests import ( assert_last_visit_matches, check_snapshot, @@ -544,90 +545,6 @@ assert stats["snapshot"] == 1 -def test_loader_svn_with_external_properties(swh_storage, datadir, tmp_path): - """Repository with svn:external properties cannot be fully ingested yet - - """ - archive_name = "pkg-gourmet" - archive_path = os.path.join(datadir, "pkg-gourmet-with-external-id.tgz") - repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) - - loader = SvnLoader(swh_storage, repo_url, temp_directory=tmp_path) - - assert loader.load() == {"status": "eventful"} - gourmet_externals_snapshot = Snapshot( - id=hash_to_bytes("19cb68d0a3f22372e2b7017ea5e2a2ea5ae3e09a"), - branches={ - b"HEAD": SnapshotBranch( - target=hash_to_bytes("82a7a4a09f9549223429143ba36ad77375e33c5c"), - target_type=TargetType.REVISION, - ) - }, - ) - check_snapshot(gourmet_externals_snapshot, loader.storage) - assert_last_visit_matches( - loader.storage, - repo_url, - status="partial", - type="svn", - snapshot=gourmet_externals_snapshot.id, - ) - - stats = get_stats(loader.storage) - assert stats["origin"] == 1 - assert stats["origin_visit"] == 1 - assert stats["snapshot"] == 1 - # repository holds 21 revisions, but the last commit holds an 'svn:externals' - # property which will make the loader-svn stops at the last revision prior to the - # bad one - assert stats["revision"] == 20 # commit with the svn:external property - - -def test_loader_svn_with_external_properties_mutiple_loads( - swh_storage, datadir, tmp_path -): - """Repository with svn:external properties cannot be fully ingested yet - but it should not raise errors on second load. - - """ - archive_name = "pkg-gourmet" - archive_path = os.path.join(datadir, "pkg-gourmet-with-external-id.tgz") - repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) - - # first load - loader = SvnLoader(swh_storage, repo_url, temp_directory=tmp_path) - assert loader.load() == {"status": "eventful"} - gourmet_externals_snapshot = Snapshot( - id=hash_to_bytes("19cb68d0a3f22372e2b7017ea5e2a2ea5ae3e09a"), - branches={ - b"HEAD": SnapshotBranch( - target=hash_to_bytes("82a7a4a09f9549223429143ba36ad77375e33c5c"), - target_type=TargetType.REVISION, - ) - }, - ) - check_snapshot(gourmet_externals_snapshot, loader.storage) - assert_last_visit_matches( - loader.storage, - repo_url, - status="partial", - type="svn", - snapshot=gourmet_externals_snapshot.id, - ) - - # second load - loader = SvnLoader(swh_storage, repo_url, temp_directory=tmp_path) - assert loader.load() == {"status": "uneventful"} - check_snapshot(gourmet_externals_snapshot, loader.storage) - assert_last_visit_matches( - loader.storage, - repo_url, - status="partial", - type="svn", - snapshot=gourmet_externals_snapshot.id, - ) - - def test_loader_svn_with_symlink(swh_storage, datadir, tmp_path): """Repository with symlinks should be ingested ok @@ -1059,7 +976,14 @@ pass else: if dir_change: - root.add_directory(path).close() + try: + dir = root.add_directory(path) + except SubversionException: + dir = root.open_directory(path) + if "properties" in change: + for prop, value in change["properties"].items(): + dir.change_prop(prop, value) + dir.close() else: try: file = root.add_file(path) @@ -1076,15 +1000,19 @@ editor.close() -@pytest.fixture -def repo_url(tmp_path): - # create a repository +def create_repo(tmp_path): repo_path = os.path.join(tmp_path, "tmprepo") repos.create(repo_path) repo_url = f"file://{repo_path}" return repo_url +@pytest.fixture +def repo_url(tmpdir_factory): + # create a repository + return create_repo(tmpdir_factory.mktemp("repos")) + + def test_loader_eol_style_file_property_handling_edge_case( swh_storage, repo_url, tmp_path ): @@ -1895,3 +1823,545 @@ assert_last_visit_matches( loader.storage, repo_url, status="full", type="svn", ) + + +@pytest.fixture +def external_repo_url(tmpdir_factory): + # create a repository + return create_repo(tmpdir_factory.mktemp("external")) + + +def test_loader_with_valid_svn_externals( + swh_storage, repo_url, external_repo_url, tmp_path +): + # first commit on external + add_commit( + external_repo_url, + "Create some directories and files in an external repository", + [ + CommitChange( + change_type=CommitChangeType.AddOrUpdate, + path="code/hello/hello-world", + properties={"svn:executable": "*"}, + data=b"#!/bin/bash\necho Hello World !", + ), + CommitChange( + change_type=CommitChangeType.AddOrUpdate, + path="foo.sh", + properties={"svn:executable": "*"}, + data=b"#!/bin/bash\necho foo", + ), + ], + ) + + # first commit + add_commit( + repo_url, + "Create repository structure.", + [ + CommitChange(change_type=CommitChangeType.AddOrUpdate, path="branches/",), + CommitChange(change_type=CommitChangeType.AddOrUpdate, path="tags/",), + CommitChange(change_type=CommitChangeType.AddOrUpdate, path="trunk/",), + ], + ) + + # second commit + add_commit( + repo_url, + ( + "Set svn:externals property on trunk/externals path of repository to load." + "One external targets a remote directory and another one a remote file." + ), + [ + CommitChange( + change_type=CommitChangeType.AddOrUpdate, + path="trunk/externals/", + properties={ + "svn:externals": ( + f"{svn_urljoin(external_repo_url, 'code/hello')} hello\n" + f"{svn_urljoin(external_repo_url, 'foo.sh')} foo.sh" + ) + }, + ), + ], + ) + + # first load + loader = SvnLoader(swh_storage, repo_url, temp_directory=tmp_path, check_revision=1) + assert loader.load() == {"status": "eventful"} + assert_last_visit_matches( + loader.storage, repo_url, status="full", type="svn", + ) + + # third commit + add_commit( + repo_url, + "Unset svn:externals property on trunk/externals path", + [ + CommitChange( + change_type=CommitChangeType.AddOrUpdate, + path="trunk/externals/", + properties={"svn:externals": None}, + ), + ], + ) + + # second load + loader = SvnLoader(swh_storage, repo_url, temp_directory=tmp_path, check_revision=1) + assert loader.load() == {"status": "eventful"} + assert_last_visit_matches( + loader.storage, repo_url, status="full", type="svn", + ) + + +def test_loader_with_invalid_svn_externals(swh_storage, repo_url, tmp_path): + + # first commit + add_commit( + repo_url, + "Create repository structure.", + [ + CommitChange(change_type=CommitChangeType.AddOrUpdate, path="branches/",), + CommitChange(change_type=CommitChangeType.AddOrUpdate, path="tags/",), + CommitChange(change_type=CommitChangeType.AddOrUpdate, path="trunk/",), + ], + ) + + # second commit + add_commit( + repo_url, + ( + "Set svn:externals property on trunk/externals path of repository to load." + "The externals URLs are not valid." + ), + [ + CommitChange( + change_type=CommitChangeType.AddOrUpdate, + path="trunk/externals/", + properties={ + "svn:externals": ( + "file:///tmp/invalid/svn/repo/hello hello\n" + "file:///tmp/invalid/svn/repo/foo.sh foo.sh" + ) + }, + ), + ], + ) + + loader = SvnLoader(swh_storage, repo_url, temp_directory=tmp_path, check_revision=1) + assert loader.load() == {"status": "eventful"} + assert_last_visit_matches( + loader.storage, repo_url, status="full", type="svn", + ) + + +def test_loader_with_valid_externals_modification( + swh_storage, repo_url, external_repo_url, tmp_path +): + # first commit on external + add_commit( + external_repo_url, + "Create some directories and files in an external repository", + [ + CommitChange( + change_type=CommitChangeType.AddOrUpdate, + path="code/hello/hello-world", + properties={"svn:executable": "*"}, + data=b"#!/bin/bash\necho Hello World !", + ), + CommitChange( + change_type=CommitChangeType.AddOrUpdate, + path="code/bar/bar.sh", + properties={"svn:executable": "*"}, + data=b"#!/bin/bash\necho bar", + ), + CommitChange( + change_type=CommitChangeType.AddOrUpdate, + path="foo.sh", + properties={"svn:executable": "*"}, + data=b"#!/bin/bash\necho foo", + ), + ], + ) + + # first commit + add_commit( + repo_url, + ("Set svn:externals property on trunk/externals path of repository to load."), + [ + CommitChange( + change_type=CommitChangeType.AddOrUpdate, + path="trunk/externals/", + properties={ + "svn:externals": ( + f"{svn_urljoin(external_repo_url, 'code/hello')} src/code/hello\n" # noqa + f"{svn_urljoin(external_repo_url, 'foo.sh')} src/foo.sh\n" + ) + }, + ), + ], + ) + + # second commit + add_commit( + repo_url, + ( + "Modify svn:externals property on trunk/externals path of repository to load." # noqa + ), + [ + CommitChange( + change_type=CommitChangeType.AddOrUpdate, + path="trunk/externals/", + properties={ + "svn:externals": ( + f"{svn_urljoin(external_repo_url, 'code/bar')} src/code/bar\n" # noqa + f"{svn_urljoin(external_repo_url, 'foo.sh')} src/foo.sh\n" + ) + }, + ), + ], + ) + + loader = SvnLoader(swh_storage, repo_url, temp_directory=tmp_path, check_revision=1) + assert loader.load() == {"status": "eventful"} + assert_last_visit_matches( + loader.storage, repo_url, status="full", type="svn", + ) + + +def test_loader_with_valid_externals_and_versioned_path( + swh_storage, repo_url, external_repo_url, tmp_path +): + # first commit on external + add_commit( + external_repo_url, + "Create a file in an external repository", + [ + CommitChange( + change_type=CommitChangeType.AddOrUpdate, + path="code/script.sh", + data=b"#!/bin/bash\necho Hello World !", + ), + ], + ) + + # first commit + add_commit( + repo_url, + "Add file with same name but different content in main repository", + [ + CommitChange( + change_type=CommitChangeType.AddOrUpdate, + path="trunk/script.sh", + data=b"#!/bin/bash\necho foo", + ), + ], + ) + + # second commit + add_commit( + repo_url, + "Add externals targeting the versioned file", + [ + CommitChange( + change_type=CommitChangeType.AddOrUpdate, + path="trunk/", + properties={ + "svn:externals": ( + f"{svn_urljoin(external_repo_url, 'code/script.sh')} script.sh" # noqa + ) + }, + ), + ], + ) + + # third commit + add_commit( + repo_url, + "Modify the versioned file", + [ + CommitChange( + change_type=CommitChangeType.AddOrUpdate, + path="trunk/script.sh", + data=b"#!/bin/bash\necho bar", + ), + ], + ) + + loader = SvnLoader(swh_storage, repo_url, temp_directory=tmp_path, check_revision=1) + assert loader.load() == {"status": "eventful"} + assert_last_visit_matches( + loader.storage, repo_url, status="full", type="svn", + ) + + +def test_loader_with_invalid_externals_and_versioned_path( + swh_storage, repo_url, tmp_path +): + + # first commit + add_commit( + repo_url, + "Add file in main repository", + [ + CommitChange( + change_type=CommitChangeType.AddOrUpdate, + path="trunk/script.sh", + data=b"#!/bin/bash\necho foo", + ), + ], + ) + + # second commit + add_commit( + repo_url, + "Add invalid externals targeting the versioned file", + [ + CommitChange( + change_type=CommitChangeType.AddOrUpdate, + path="trunk/", + properties={ + "svn:externals": ( + "file:///tmp/invalid/svn/repo/code/script.sh script.sh" + ) + }, + ), + ], + ) + + loader = SvnLoader(swh_storage, repo_url, temp_directory=tmp_path, check_revision=1) + assert loader.load() == {"status": "eventful"} + assert_last_visit_matches( + loader.storage, repo_url, status="full", type="svn", + ) + + +def test_loader_set_externals_then_remove_and_add_as_local( + swh_storage, repo_url, external_repo_url, tmp_path +): + # first commit on external + add_commit( + external_repo_url, + "Create a file in an external repository", + [ + CommitChange( + change_type=CommitChangeType.AddOrUpdate, + path="code/script.sh", + data=b"#!/bin/bash\necho Hello World !", + ), + ], + ) + + # first commit + add_commit( + repo_url, + "Add trunk directory and set externals", + [ + CommitChange( + change_type=CommitChangeType.AddOrUpdate, + path="trunk/", + properties={ + "svn:externals": (f"{svn_urljoin(external_repo_url, 'code')} code") + }, + ), + ], + ) + + # second commit + add_commit( + repo_url, + "Unset externals on trunk and add remote path as local path", + [ + CommitChange( + change_type=CommitChangeType.AddOrUpdate, + path="trunk/", + properties={"svn:externals": None}, + ), + CommitChange( + change_type=CommitChangeType.AddOrUpdate, + path="trunk/code/script.sh", + data=b"#!/bin/bash\necho Hello World !", + ), + ], + ) + + loader = SvnLoader(swh_storage, repo_url, temp_directory=tmp_path, check_revision=1) + assert loader.load() == {"status": "eventful"} + assert_last_visit_matches( + loader.storage, repo_url, status="full", type="svn", + ) + + +def test_loader_set_invalid_externals_then_remove(swh_storage, repo_url, tmp_path): + + # first commit + add_commit( + repo_url, + "Add trunk directory and set invalid external", + [ + CommitChange( + change_type=CommitChangeType.AddOrUpdate, + path="trunk/", + properties={ + "svn:externals": "file:///tmp/invalid/svn/repo/code external/code" + }, + ), + ], + ) + + # second commit + add_commit( + repo_url, + "Unset externals on trunk", + [ + CommitChange( + change_type=CommitChangeType.AddOrUpdate, + path="trunk/", + properties={"svn:externals": None}, + ), + ], + ) + + loader = SvnLoader(swh_storage, repo_url, temp_directory=tmp_path, check_revision=1) + assert loader.load() == {"status": "eventful"} + assert_last_visit_matches( + loader.storage, repo_url, status="full", type="svn", + ) + + +def test_loader_set_externals_with_versioned_file_overlap( + swh_storage, repo_url, external_repo_url, tmp_path +): + # first commit on external + add_commit( + external_repo_url, + "Create a file in an external repository", + [ + CommitChange( + change_type=CommitChangeType.AddOrUpdate, + path="code/script.sh", + data=b"#!/bin/bash\necho Hello World !", + ), + ], + ) + + # first commit + add_commit( + repo_url, + "Add file with same name as in the external repository", + [ + CommitChange( + change_type=CommitChangeType.AddOrUpdate, + path="trunk/script.sh", + data=b"#!/bin/bash\necho foo", + ), + ], + ) + + # second commit + add_commit( + repo_url, + "Set external on trunk overlapping versioned file", + [ + CommitChange( + change_type=CommitChangeType.AddOrUpdate, + path="trunk/", + properties={ + "svn:externals": ( + f"{svn_urljoin(external_repo_url, 'code/script.sh')} script.sh" + ) + }, + ), + ], + ) + + # third commit + add_commit( + repo_url, + "Unset externals on trunk", + [ + CommitChange( + change_type=CommitChangeType.AddOrUpdate, + path="trunk/", + properties={"svn:externals": None}, + ), + ], + ) + + loader = SvnLoader(swh_storage, repo_url, temp_directory=tmp_path, check_revision=1) + assert loader.load() == {"status": "eventful"} + assert_last_visit_matches( + loader.storage, repo_url, status="full", type="svn", + ) + + +def test_dump_loader_relative_externals_detection( + swh_storage, repo_url, external_repo_url, tmp_path +): + + add_commit( + external_repo_url, + "Create a file in external repository.", + [ + CommitChange( + change_type=CommitChangeType.AddOrUpdate, + path="project1/foo.sh", + data=b"#!/bin/bash\necho foo", + ), + ], + ) + + add_commit( + external_repo_url, + "Create another file in repository to load.", + [ + CommitChange( + change_type=CommitChangeType.AddOrUpdate, + path="project2/bar.sh", + data=b"#!/bin/bash\necho bar", + ), + ], + ) + + external_url = f"{external_repo_url.replace('file://', '//')}/project2/bar.sh" + add_commit( + repo_url, + "Set external relative to URL scheme in repository to load", + [ + CommitChange( + change_type=CommitChangeType.AddOrUpdate, + path="project1/", + properties={"svn:externals": (f"{external_url} bar.sh")}, + ), + ], + ) + + loader = SvnLoaderFromRemoteDump( + swh_storage, repo_url, temp_directory=tmp_path, check_revision=1 + ) + assert loader.load() == {"status": "eventful"} + assert_last_visit_matches( + loader.storage, repo_url, status="full", type="svn", + ) + assert loader.svnrepo.has_relative_externals + + add_commit( + repo_url, + "Unset external in repository to load", + [ + CommitChange( + change_type=CommitChangeType.AddOrUpdate, + path="project1/", + properties={"svn:externals": None}, + ), + ], + ) + + loader = SvnLoaderFromRemoteDump( + swh_storage, repo_url, temp_directory=tmp_path, check_revision=1 + ) + assert loader.load() == {"status": "eventful"} + assert_last_visit_matches( + loader.storage, repo_url, status="full", type="svn", + ) + assert not loader.svnrepo.has_relative_externals