Changeset View
Changeset View
Standalone View
Standalone View
swh/core/github/utils.py
Show All 34 Lines | def _url_github_api(user_repo: str) -> str: | ||||
return f"https://api.github.com/repos/{user_repo}" | return f"https://api.github.com/repos/{user_repo}" | ||||
def _sanitize_github_url(url: str) -> str: | def _sanitize_github_url(url: str) -> str: | ||||
"""Sanitize github url.""" | """Sanitize github url.""" | ||||
return url.lower().rstrip("/").rstrip(".git").rstrip("/") | return url.lower().rstrip("/").rstrip(".git").rstrip("/") | ||||
def get_canonical_github_origin_url(url: str) -> Optional[str]: | def get_canonical_github_origin_url( | ||||
url: str, credentials: Optional[List[Dict[str, str]]] = None | |||||
) -> Optional[str]: | |||||
"""Retrieve canonical github url out of an url if any or None otherwise. | """Retrieve canonical github url out of an url if any or None otherwise. | ||||
This triggers an anonymous http request to the github api url to determine the | This triggers an http request to the github api url to determine the canonical | ||||
canonical repository url. | repository url (if no credentials is provided, the http request is anonymous. Either | ||||
way that request can be rate-limited by github.) | |||||
""" | """ | ||||
url_ = url.lower() | return GitHubSession( | ||||
user_agent="SWH core library", credentials=credentials | |||||
match = GITHUB_PATTERN.match(url_) | ).get_canonical_url(url) | ||||
if not match: | |||||
return url | |||||
user_repo = _sanitize_github_url(match.groupdict()["user_repo"]) | |||||
response = requests.get(_url_github_api(user_repo)) | |||||
if response.status_code != 200: | |||||
return None | |||||
data = response.json() | |||||
return data["html_url"] | |||||
class RateLimited(Exception): | class RateLimited(Exception): | ||||
def __init__(self, response): | def __init__(self, response): | ||||
self.reset_time: Optional[int] | self.reset_time: Optional[int] | ||||
# Figure out how long we need to sleep because of that rate limit | # Figure out how long we need to sleep because of that rate limit | ||||
ratelimit_reset = response.headers.get("X-Ratelimit-Reset") | ratelimit_reset = response.headers.get("X-Ratelimit-Reset") | ||||
▲ Show 20 Lines • Show All 137 Lines • ▼ Show 20 Lines | def request(self, url) -> requests.Response: | ||||
raise MissingRateLimitReset() | raise MissingRateLimitReset() | ||||
sleep_time = max(reset_times.values()) - time.time() + 1 | sleep_time = max(reset_times.values()) - time.time() + 1 | ||||
logger.info( | logger.info( | ||||
"Rate limits exhausted for all tokens. Sleeping for %f seconds.", | "Rate limits exhausted for all tokens. Sleeping for %f seconds.", | ||||
sleep_time, | sleep_time, | ||||
) | ) | ||||
time.sleep(sleep_time) | time.sleep(sleep_time) | ||||
def get_canonical_url(self, url: str) -> Optional[str]: | |||||
"""Retrieve canonical github url out of an url if any or None otherwise. | |||||
This triggers an http request to the github api url to determine the | |||||
canonical repository url. | |||||
Returns | |||||
The canonical url if any, None otherwise. | |||||
""" | |||||
url_ = url.lower() | |||||
match = GITHUB_PATTERN.match(url_) | |||||
if not match: | |||||
return url | |||||
user_repo = _sanitize_github_url(match.groupdict()["user_repo"]) | |||||
response = self.request(_url_github_api(user_repo)) | |||||
if response.status_code != 200: | |||||
return None | |||||
data = response.json() | |||||
return data["html_url"] |