diff --git a/swh/core/github/tests/test_github_utils.py b/swh/core/github/tests/test_github_utils.py --- a/swh/core/github/tests/test_github_utils.py +++ b/swh/core/github/tests/test_github_utils.py @@ -32,7 +32,9 @@ ("user/repo/with/some/deps", None), # url kind is not dealt with for now ], ) -def test_get_canonical_github_origin_url(user_repo, expected_url, requests_mock): +def test_get_canonical_github_origin_url( + user_repo, expected_url, requests_mock, github_credentials +): """It should return a canonical github origin when it exists, None otherwise""" html_url = _url_github_html(user_repo) api_url = _url_github_api(_sanitize_github_url(user_repo)) @@ -46,14 +48,44 @@ requests_mock.get(api_url, [{"status_code": status_code, "json": response}]) + # anonymous assert get_canonical_github_origin_url(html_url) == expected_url + # with credentials + assert ( + get_canonical_github_origin_url(html_url, credentials=github_credentials) + == expected_url + ) + + # anonymous + assert ( + GitHubSession( + user_agent="GitHub Session Test", + ).get_canonical_url(html_url) + == expected_url + ) + + # with credentials + assert ( + GitHubSession( + user_agent="GitHub Session Test", credentials=github_credentials + ).get_canonical_url(html_url) + == expected_url + ) + def test_get_canonical_github_origin_url_not_gh_origin(): """It should return the input url when that origin is not a github one""" url = "https://example.org" assert get_canonical_github_origin_url(url) == url + assert ( + GitHubSession( + user_agent="GitHub Session Test", + ).get_canonical_url(url) + == url + ) + def test_github_session_anonymous_session(): user_agent = ("GitHub Session Test",) diff --git a/swh/core/github/utils.py b/swh/core/github/utils.py --- a/swh/core/github/utils.py +++ b/swh/core/github/utils.py @@ -40,25 +40,19 @@ return url.lower().rstrip("/").rstrip(".git").rstrip("/") -def get_canonical_github_origin_url(url: str) -> Optional[str]: +def get_canonical_github_origin_url( + url: str, credentials: Optional[List[Dict[str, str]]] = None +) -> Optional[str]: """Retrieve canonical github url out of an url if any or None otherwise. - This triggers an anonymous http request to the github api url to determine the - canonical repository url. + This triggers an http request to the github api url to determine the canonical + repository url (if no credentials is provided, the http request is anonymous. Either + way that request can be rate-limited by github.) """ - url_ = url.lower() - - match = GITHUB_PATTERN.match(url_) - if not match: - return url - - user_repo = _sanitize_github_url(match.groupdict()["user_repo"]) - response = requests.get(_url_github_api(user_repo)) - if response.status_code != 200: - return None - data = response.json() - return data["html_url"] + return GitHubSession( + user_agent="SWH core library", credentials=credentials + ).get_canonical_url(url) class RateLimited(Exception): @@ -212,3 +206,25 @@ sleep_time, ) time.sleep(sleep_time) + + def get_canonical_url(self, url: str) -> Optional[str]: + """Retrieve canonical github url out of an url if any or None otherwise. + + This triggers an http request to the github api url to determine the + canonical repository url. + + Returns + The canonical url if any, None otherwise. + """ + url_ = url.lower() + + match = GITHUB_PATTERN.match(url_) + if not match: + return url + + user_repo = _sanitize_github_url(match.groupdict()["user_repo"]) + response = self.request(_url_github_api(user_repo)) + if response.status_code != 200: + return None + data = response.json() + return data["html_url"]