diff --git a/swh/core/github/tests/test_github_utils.py b/swh/core/github/tests/test_github_utils.py --- a/swh/core/github/tests/test_github_utils.py +++ b/swh/core/github/tests/test_github_utils.py @@ -16,6 +16,7 @@ ) KNOWN_GH_REPO = "https://github.com/user/repo" +KNOWN_GH_REPO2 = "https://github.com/user/reposit" @pytest.mark.parametrize( @@ -26,6 +27,11 @@ ("user/repo/", KNOWN_GH_REPO), ("user/repo", KNOWN_GH_REPO), ("user/repo/.git", KNOWN_GH_REPO), + ("user/reposit.git", KNOWN_GH_REPO2), + ("user/reposit.git/", KNOWN_GH_REPO2), + ("user/reposit/", KNOWN_GH_REPO2), + ("user/reposit", KNOWN_GH_REPO2), + ("user/reposit/.git", KNOWN_GH_REPO2), ("unknown/page", None), # unknown gh origin returns None ("user/with/deps", None), # url kind is not dealt with ], diff --git a/swh/core/github/utils.py b/swh/core/github/utils.py --- a/swh/core/github/utils.py +++ b/swh/core/github/utils.py @@ -32,9 +32,14 @@ return f"https://api.github.com/repos/{user_repo}" +_SANITIZATION_RE = re.compile(r"^(.*?)/?(\.git)?/?$") + + def _sanitize_github_url(url: str) -> str: """Sanitize github url.""" - return url.lower().rstrip("/").rstrip(".git").rstrip("/") + m = _SANITIZATION_RE.match(url.lower()) + assert m is not None, url # impossible, but mypy doesn't know it + return m.group(1) def get_canonical_github_origin_url(