diff --git a/swh/loader/svn/tests/test_utils.py b/swh/loader/svn/tests/test_utils.py --- a/swh/loader/svn/tests/test_utils.py +++ b/swh/loader/svn/tests/test_utils.py @@ -9,6 +9,8 @@ import shutil from subprocess import Popen +import pytest + from swh.loader.svn import utils from swh.model.model import Timestamp @@ -139,3 +141,192 @@ assert not os.path.exists(dump_path), "Dump path should no longer exists" assert os.path.exists(repo_path), "Repository should exists" assert os.path.exists(dump_ori_path), "Original dump path should still exists" + + +@pytest.mark.parametrize( + "base_url, paths_to_join, expected_result", + [ + ( + "https://svn.example.org", + ["repos", "test"], + "https://svn.example.org/repos/test", + ), + ( + "https://svn.example.org/", + ["repos", "test"], + "https://svn.example.org/repos/test", + ), + ( + "https://svn.example.org/foo", + ["repos", "test"], + "https://svn.example.org/foo/repos/test", + ), + ( + "https://svn.example.org/foo/", + ["/repos", "test/"], + "https://svn.example.org/foo/repos/test", + ), + ("https://svn.example.org/foo", ["../bar"], "https://svn.example.org/bar",), + ], +) +def test_svn_urljoin(base_url, paths_to_join, expected_result): + assert utils.svn_urljoin(base_url, *paths_to_join) == expected_result + + +@pytest.mark.parametrize( + "external, dir_path, repo_url, expected_result", + [ + # subversion < 1.5 + ( + "third-party/sounds http://svn.example.com/repos/sounds", + "trunk/externals", + "http://svn.example.org/repos/test", + ("third-party/sounds", "http://svn.example.com/repos/sounds", None), + ), + ( + "third-party/skins -r148 http://svn.example.com/skinproj", + "trunk/externals", + "http://svn.example.org/repos/test", + ("third-party/skins", "http://svn.example.com/skinproj", 148), + ), + ( + "third-party/skins/toolkit -r21 http://svn.example.com/skin-maker", + "trunk/externals", + "http://svn.example.org/repos/test", + ("third-party/skins/toolkit", "http://svn.example.com/skin-maker", 21), + ), + # subversion >= 1.5 + ( + " http://svn.example.com/repos/sounds third-party/sounds", + "trunk/externals", + "http://svn.example.org/repos/test", + ("third-party/sounds", "http://svn.example.com/repos/sounds", None), + ), + ( + "-r148 http://svn.example.com/skinproj third-party/skins", + "trunk/externals", + "http://svn.example.org/repos/test", + ("third-party/skins", "http://svn.example.com/skinproj", 148), + ), + ( + "-r 21 http://svn.example.com/skin-maker third-party/skins/toolkit", + "trunk/externals", + "http://svn.example.org/repos/test", + ("third-party/skins/toolkit", "http://svn.example.com/skin-maker", 21), + ), + ( + "http://svn.example.com/repos/sounds third-party/sounds", + "trunk/externals", + "http://svn.example.org/repos/test", + ("third-party/sounds", "http://svn.example.com/repos/sounds", None), + ), + ( + "http://svn.example.com/skinproj@148 third-party/skins", + "trunk/externals", + "http://svn.example.org/repos/test", + ("third-party/skins", "http://svn.example.com/skinproj", 148), + ), + ( + "http://anon:anon@svn.example.com/skin-maker@21 third-party/skins/toolkit", + "trunk/externals", + "http://svn.example.org/repos/test", + ( + "third-party/skins/toolkit", + "http://anon:anon@svn.example.com/skin-maker", + 21, + ), + ), + ( + "-r21 http://anon:anon@svn.example.com/skin-maker third-party/skins/toolkit", # noqa + "trunk/externals", + "http://svn.example.org/repos/test", + ( + "third-party/skins/toolkit", + "http://anon:anon@svn.example.com/skin-maker", + 21, + ), + ), + ( + "-r21 http://anon:anon@svn.example.com/skin-maker@21 third-party/skins/toolkit", # noqa + "trunk/externals", + "http://svn.example.org/repos/test", + ( + "third-party/skins/toolkit", + "http://anon:anon@svn.example.com/skin-maker", + 21, + ), + ), + # subversion >= 1.5, relative external definitions + ( + "^/sounds third-party/sounds", + "trunk/externals", + "http://svn.example.org/repos/test", + ("third-party/sounds", "http://svn.example.org/repos/test/sounds", None,), + ), + ( + "/skinproj@148 third-party/skins", + "trunk/externals", + "http://svn.example.org/repos/test", + ("third-party/skins", "http://svn.example.org/skinproj", 148,), + ), + ( + "//svn.example.com/skin-maker@21 third-party/skins/toolkit", + "trunk/externals", + "http://svn.example.org/repos/test", + ("third-party/skins/toolkit", "http://svn.example.com/skin-maker", 21,), + ), + ( + "../skins skins", + "trunk/externals", + "http://svn.example.org/repos/test", + ("skins", "http://svn.example.org/repos/test/trunk/skins", None,), + ), + ( + "../skins skins", + "trunk/externals", + "http://svn.example.org/repos/test", + ("skins", "http://svn.example.org/repos/test/trunk/skins", None,), + ), + # subversion >= 1.6 + ( + 'http://svn.thirdparty.com/repos/My%20Project "My Project"', + "trunk/externals", + "http://svn.example.org/repos/test", + ("My Project", "http://svn.thirdparty.com/repos/My%20Project", None,), + ), + ( + 'http://svn.thirdparty.com/repos/My%20%20%20Project "My Project"', + "trunk/externals", + "http://svn.example.org/repos/test", + ( + "My Project", + "http://svn.thirdparty.com/repos/My%20%20%20Project", + None, + ), + ), + ( + 'http://svn.thirdparty.com/repos/%22Quotes%20Too%22 \\"Quotes\\ Too\\"', + "trunk/externals", + "http://svn.example.org/repos/test", + ( + '"Quotes Too"', + "http://svn.thirdparty.com/repos/%22Quotes%20Too%22", + None, + ), + ), + ( + 'http://svn.thirdparty.com/repos/%22Quotes%20%20%20Too%22 \\"Quotes\\ \\ \\ Too\\"', # noqa + "trunk/externals", + "http://svn.example.org/repos/test", + ( + '"Quotes Too"', + "http://svn.thirdparty.com/repos/%22Quotes%20%20%20Too%22", + None, + ), + ), + ], +) +def test_parse_external_definition(external, dir_path, repo_url, expected_result): + assert ( + utils.parse_external_definition(external, dir_path, repo_url) == expected_result + ) diff --git a/swh/loader/svn/utils.py b/swh/loader/svn/utils.py --- a/swh/loader/svn/utils.py +++ b/swh/loader/svn/utils.py @@ -10,6 +10,7 @@ from subprocess import PIPE, Popen, call import tempfile from typing import Tuple +from urllib.parse import urlparse from dateutil import parser @@ -193,3 +194,119 @@ gzip=True, cleanup_dump=cleanup_dump, ) + + +def svn_urljoin(base_url: str, *args) -> str: + """Join a base URL and a list of paths in a SVN way. + + For instance: + + - svn_urljoin("http://example.org", "foo", "bar") + will return "https://example.org/foo/bar + + - svn_urljoin("http://example.org/foo", "../bar") + will return "https://example.org/bar + + Args: + base_url: Base URL to join paths with + args: path components + + Returns: + The joined URL + + """ + parsed_url = urlparse(base_url) + path = os.path.abspath( + os.path.join(parsed_url.path or "/", *[arg.strip("/") for arg in args]) + ) + return f"{parsed_url.scheme}://{parsed_url.netloc}{path}" + + +def parse_external_definition( + external: str, dir_path: str, repo_url: str +) -> Tuple[str, str, Optional[int]]: + """Parse a subversion external definition. + + Args: + external: an external definition, extracted from the lines split of a + svn:externals property value + dir_path: The path of the directory in the subversion repository where + the svn:externals property was set + repo_url: URL of the subversion repository + + Returns: + A tuple with the following members: + + - path relative to dir_path where the external should be exported + - URL of the external to export + - optional revision of the external to export + + """ + path = "" + external_url = "" + revision = None + prev_part = None + # turn multiple spaces into a single one and split on space + for external_part in external.split(): + if prev_part == "-r": + # parse revision in the form "-r XXX" + revision = int(external_part) + elif external_part.startswith("-r") and external_part != "-r": + # parse revision in the form "-rXXX" + revision = int(external_part[2:]) + elif external_part.startswith("^/"): + # URL relative to the root of the repository in which the svn:externals + # property is versioned + external_url = svn_urljoin(repo_url, external_part[2:]) + elif external_part.startswith("//"): + # URL relative to the scheme of the URL of the directory on which the + # svn:externals property is set + scheme = urlparse(repo_url).scheme + external_url = f"{scheme}:{external_part}" + elif external_part.startswith("/"): + # URL relative to the root URL of the server on which the svn:externals + # property is versioned + parsed_url = urlparse(repo_url) + root_url = f"{parsed_url.scheme}://{parsed_url.netloc}" + external_url = svn_urljoin(root_url, external_part) + elif external_part.startswith("../"): + # URL relative to the URL of the directory on which the svn:externals + # property is set + external_url = svn_urljoin(repo_url, dir_path, external_part) + elif external_part.startswith( + ("file://", "http://", "https://", "svn://", "svn+ssh://") + ): + # absolute external URL + external_url = external_part + # subversion >= 1.6 added a quoting and escape mechanism to the syntax so + # that the path of the external working copy may contain whitespace. + elif external_part.startswith('\\"'): + external_split = external.split('\\"') + path = [ + e.replace("\\ ", " ") + for e in external_split + if e.startswith(external_part[2:]) + ][0] + path = f'"{path}"' + elif external_part.endswith('\\"'): + continue + elif external_part.startswith('"'): + external_split = external.split('"') + path = [e for e in external_split if e.startswith(external_part[1:])][0] + elif external_part.endswith('"'): + continue + elif not external_part.startswith("\\"): + # path of the external relative to dir_path + path = external_part + prev_part = external_part + if "@" in external_url: + # try to extract revision number if external URL is in the form + # http://svn.example.org/repos/test/path@XXX + url, revision_s = external_url.rsplit("@", maxsplit=1) + try: + revision = int(revision_s) + external_url = url + except ValueError: + # handle URL like http://user@svn.example.org/ + pass + return (path, external_url, revision)