diff --git a/swh/core/github/tests/test_github_utils.py b/swh/core/github/tests/test_github_utils.py --- a/swh/core/github/tests/test_github_utils.py +++ b/swh/core/github/tests/test_github_utils.py @@ -18,11 +18,6 @@ KNOWN_GH_REPO = "https://github.com/user/repo" -def _url_github_html(user_repo: str, protocol: str = "https") -> str: - """Given the user repo, returns the expected github html url.""" - return f"{protocol}://github.com/{user_repo}" - - @pytest.mark.parametrize( "user_repo, expected_url", [ @@ -39,46 +34,57 @@ user_repo, expected_url, requests_mock, github_credentials ): """It should return a canonical github origin when it exists, None otherwise""" - for protocol in ["https", "git", "http"]: - html_input_url = _url_github_html(user_repo, protocol=protocol) - html_url = _url_github_html(user_repo) - api_url = _url_github_api(_sanitize_github_url(user_repo)) - - if expected_url is not None: - status_code = 200 - response = {"html_url": _sanitize_github_url(html_url)} - else: - status_code = 404 - response = {} - - requests_mock.get(api_url, [{"status_code": status_code, "json": response}]) - - # anonymous - assert get_canonical_github_origin_url(html_input_url) == expected_url - - # with credentials - assert ( - get_canonical_github_origin_url( - html_input_url, credentials=github_credentials + for separator in ["/", ":"]: + for prefix in [ + "http://", + "https://", + "git://", + "ssh://", + "//", + "git@", + "ssh://git@", + "https://${env.GITHUB_TOKEN_USR}:${env.GITHUB_TOKEN_PSW}@", + "[fetch=]git@", + ]: + html_input_url = f"{prefix}github.com{separator}{user_repo}" + html_url = f"https://github.com/{user_repo}" + api_url = _url_github_api(_sanitize_github_url(user_repo)) + + if expected_url is not None: + status_code = 200 + response = {"html_url": _sanitize_github_url(html_url)} + else: + status_code = 404 + response = {} + + requests_mock.get(api_url, [{"status_code": status_code, "json": response}]) + + # anonymous + assert get_canonical_github_origin_url(html_input_url) == expected_url + + # with credentials + assert ( + get_canonical_github_origin_url( + html_input_url, credentials=github_credentials + ) + == expected_url + ) + + # anonymous + assert ( + GitHubSession( + user_agent="GitHub Session Test", + ).get_canonical_url(html_input_url) + == expected_url + ) + + # with credentials + assert ( + GitHubSession( + user_agent="GitHub Session Test", credentials=github_credentials + ).get_canonical_url(html_input_url) + == expected_url ) - == expected_url - ) - - # anonymous - assert ( - GitHubSession( - user_agent="GitHub Session Test", - ).get_canonical_url(html_input_url) - == expected_url - ) - - # with credentials - assert ( - GitHubSession( - user_agent="GitHub Session Test", credentials=github_credentials - ).get_canonical_url(html_input_url) - == expected_url - ) def test_get_canonical_github_origin_url_not_gh_origin(): diff --git a/swh/core/github/utils.py b/swh/core/github/utils.py --- a/swh/core/github/utils.py +++ b/swh/core/github/utils.py @@ -19,7 +19,9 @@ wait_exponential, ) -GITHUB_PATTERN = re.compile(r"(git|https?)://github.com/(?P.*)") +GITHUB_PATTERN = re.compile( + r"(//|git://|git@|git//|https?://|ssh://|.*@)github.com[/:](?P.*)" +) logger = logging.getLogger(__name__)