Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F8396116
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
3 KB
Subscribers
None
View Options
diff --git a/swh/core/github/__init__.py b/swh/core/github/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/swh/core/github/utils.py b/swh/core/github/utils.py
new file mode 100644
index 0000000..8721401
--- /dev/null
+++ b/swh/core/github/utils.py
@@ -0,0 +1,48 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+
+import re
+from typing import Optional
+
+import requests
+
+GITHUB_PATTERN = re.compile(r"https?://github.com/(?P<user_repo>.*)")
+
+
+def _url_github_html(user_repo: str) -> str:
+ """Given the user repo, returns the expected github html url."""
+ return f"https://github.com/{user_repo}"
+
+
+def _url_github_api(user_repo: str) -> str:
+ """Given the user_repo, returns the expected github api url."""
+ return f"https://api.github.com/repos/{user_repo}"
+
+
+def _sanitize_github_url(url: str) -> str:
+ """Sanitize github url."""
+ return url.lower().rstrip("/").rstrip(".git").rstrip("/")
+
+
+def get_canonical_github_origin_url(url: str) -> Optional[str]:
+ """Retrieve canonical github url out of an url if any or None otherwise.
+
+ This triggers an anonymous http request to the github api url to determine the
+ canonical repository url.
+
+ """
+ url_ = url.lower()
+
+ match = GITHUB_PATTERN.match(url_)
+ if not match:
+ return url
+
+ user_repo = _sanitize_github_url(match.groupdict()["user_repo"])
+ response = requests.get(_url_github_api(user_repo))
+ if response.status_code != 200:
+ return None
+ data = response.json()
+ return data["html_url"]
diff --git a/swh/core/tests/test_github_utils.py b/swh/core/tests/test_github_utils.py
new file mode 100644
index 0000000..52a4608
--- /dev/null
+++ b/swh/core/tests/test_github_utils.py
@@ -0,0 +1,51 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import pytest
+
+from swh.core.github.utils import (
+ _sanitize_github_url,
+ _url_github_api,
+ _url_github_html,
+ get_canonical_github_origin_url,
+)
+
+KNOWN_GH_REPO = "https://github.com/user/repo"
+
+
+@pytest.mark.parametrize(
+ "user_repo, expected_url",
+ [
+ ("user/repo.git", KNOWN_GH_REPO),
+ ("user/repo.git/", KNOWN_GH_REPO),
+ ("user/repo/", KNOWN_GH_REPO),
+ ("user/repo", KNOWN_GH_REPO),
+ ("user/repo/.git", KNOWN_GH_REPO),
+ # edge cases
+ ("https://github.com/unknown-page", None), # unknown gh origin returns None
+ ("user/repo/with/some/deps", None), # url kind is not dealt with for now
+ ],
+)
+def test_get_canonical_github_origin_url(user_repo, expected_url, requests_mock):
+ """It should return a canonical github origin when it exists, None otherwise"""
+ html_url = _url_github_html(user_repo)
+ api_url = _url_github_api(_sanitize_github_url(user_repo))
+
+ if expected_url is not None:
+ status_code = 200
+ response = {"html_url": _sanitize_github_url(html_url)}
+ else:
+ status_code = 404
+ response = {}
+
+ requests_mock.get(api_url, [{"status_code": status_code, "json": response}])
+
+ assert get_canonical_github_origin_url(html_url) == expected_url
+
+
+def test_get_canonical_github_origin_url_not_gh_origin():
+ """It should return the input url when that origin is not a github one"""
+ url = "https://example.org"
+ assert get_canonical_github_origin_url(url) == url
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Jun 4 2025, 7:49 PM (12 w, 16 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3275845
Attached To
rDCORE Foundations and core functionalities
Event Timeline
Log In to Comment