Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F8395789
test_github_utils.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
6 KB
Subscribers
None
test_github_utils.py
View Options
# Copyright (C) 2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import
logging
import
pytest
from
swh.core.github.pytest_plugin
import
HTTP_GITHUB_API_URL
from
swh.core.github.utils
import
(
GitHubSession
,
_sanitize_github_url
,
_url_github_api
,
get_canonical_github_origin_url
,
)
KNOWN_GH_REPO
=
"https://github.com/user/repo"
@pytest.mark.parametrize
(
"user_repo, expected_url"
,
[
(
"user/repo.git"
,
KNOWN_GH_REPO
),
(
"user/repo.git/"
,
KNOWN_GH_REPO
),
(
"user/repo/"
,
KNOWN_GH_REPO
),
(
"user/repo"
,
KNOWN_GH_REPO
),
(
"user/repo/.git"
,
KNOWN_GH_REPO
),
(
"unknown/page"
,
None
),
# unknown gh origin returns None
(
"user/with/deps"
,
None
),
# url kind is not dealt with
],
)
def
test_get_canonical_github_origin_url
(
user_repo
,
expected_url
,
requests_mock
,
github_credentials
):
"""It should return a canonical github origin when it exists, None otherwise"""
for
separator
in
[
"/"
,
":"
]:
for
prefix
in
[
"http://"
,
"https://"
,
"git://"
,
"ssh://"
,
"//"
,
"git@"
,
"ssh://git@"
,
"https://${env.GITHUB_TOKEN_USR}:${env.GITHUB_TOKEN_PSW}@"
,
"[fetch=]git@"
,
]:
html_input_url
=
f
"{prefix}github.com{separator}{user_repo}"
html_url
=
f
"https://github.com/{user_repo}"
api_url
=
_url_github_api
(
_sanitize_github_url
(
user_repo
))
if
expected_url
is
not
None
:
status_code
=
200
response
=
{
"html_url"
:
_sanitize_github_url
(
html_url
)}
else
:
status_code
=
404
response
=
{}
requests_mock
.
get
(
api_url
,
[{
"status_code"
:
status_code
,
"json"
:
response
}])
# anonymous
assert
get_canonical_github_origin_url
(
html_input_url
)
==
expected_url
# with credentials
assert
(
get_canonical_github_origin_url
(
html_input_url
,
credentials
=
github_credentials
)
==
expected_url
)
# anonymous
assert
(
GitHubSession
(
user_agent
=
"GitHub Session Test"
,
)
.
get_canonical_url
(
html_input_url
)
==
expected_url
)
# with credentials
assert
(
GitHubSession
(
user_agent
=
"GitHub Session Test"
,
credentials
=
github_credentials
)
.
get_canonical_url
(
html_input_url
)
==
expected_url
)
def
test_get_canonical_github_origin_url_not_gh_origin
():
"""It should return the input url when that origin is not a github one"""
url
=
"https://example.org"
assert
get_canonical_github_origin_url
(
url
)
==
url
assert
(
GitHubSession
(
user_agent
=
"GitHub Session Test"
,
)
.
get_canonical_url
(
url
)
==
url
)
def
test_github_session_anonymous_session
():
user_agent
=
(
"GitHub Session Test"
,)
github_session
=
GitHubSession
(
user_agent
=
user_agent
,
)
assert
github_session
.
anonymous
is
True
actual_headers
=
github_session
.
session
.
headers
assert
actual_headers
[
"Accept"
]
==
"application/vnd.github.v3+json"
assert
actual_headers
[
"User-Agent"
]
==
user_agent
@pytest.mark.parametrize
(
"num_ratelimit"
,
[
1
]
# return a single rate-limit response, then continue
)
def
test_github_session_ratelimit_once_recovery
(
caplog
,
requests_ratelimited
,
num_ratelimit
,
monkeypatch_sleep_calls
,
github_credentials
,
):
"""GitHubSession should recover from hitting the rate-limit once"""
caplog
.
set_level
(
logging
.
DEBUG
,
"swh.core.github.utils"
)
github_session
=
GitHubSession
(
user_agent
=
"GitHub Session Test"
,
credentials
=
github_credentials
)
res
=
github_session
.
request
(
f
"{HTTP_GITHUB_API_URL}?per_page=1000&since=10"
)
assert
res
.
status_code
==
200
token_users
=
[]
for
record
in
caplog
.
records
:
if
"Using authentication token"
in
record
.
message
:
token_users
.
append
(
record
.
args
[
0
])
# check that we used one more token than we saw rate limited requests
assert
len
(
token_users
)
==
1
+
num_ratelimit
# check that we slept for one second between our token uses
assert
monkeypatch_sleep_calls
==
[
1
]
def
test_github_session_authenticated_credentials
(
caplog
,
github_credentials
,
all_tokens
):
"""GitHubSession should have Authorization headers set in authenticated mode"""
caplog
.
set_level
(
logging
.
DEBUG
,
"swh.core.github.utils"
)
github_session
=
GitHubSession
(
"GitHub Session Test"
,
credentials
=
github_credentials
)
assert
github_session
.
anonymous
is
False
assert
github_session
.
token_index
==
0
assert
(
sorted
(
github_session
.
credentials
,
key
=
lambda
t
:
t
[
"username"
])
==
github_credentials
)
assert
github_session
.
session
.
headers
[
"Authorization"
]
in
[
f
"token {t}"
for
t
in
all_tokens
]
@pytest.mark.parametrize
(
# Do 5 successful requests, return 6 ratelimits (to exhaust the credentials) with a
# set value for X-Ratelimit-Reset, then resume listing successfully.
"num_before_ratelimit, num_ratelimit, ratelimit_reset"
,
[(
5
,
6
,
123456
)],
)
def
test_github_session_ratelimit_reset_sleep
(
caplog
,
requests_ratelimited
,
monkeypatch_sleep_calls
,
num_before_ratelimit
,
num_ratelimit
,
ratelimit_reset
,
github_credentials
,
):
"""GitHubSession should handle rate-limit with authentication tokens."""
caplog
.
set_level
(
logging
.
DEBUG
,
"swh.core.github.utils"
)
github_session
=
GitHubSession
(
user_agent
=
"GitHub Session Test"
,
credentials
=
github_credentials
)
for
_
in
range
(
num_ratelimit
):
github_session
.
request
(
f
"{HTTP_GITHUB_API_URL}?per_page=1000&since=10"
)
# We sleep 1 second every time we change credentials, then we sleep until
# ratelimit_reset + 1
expected_sleep_calls
=
len
(
github_credentials
)
*
[
1
]
+
[
ratelimit_reset
+
1
]
assert
monkeypatch_sleep_calls
==
expected_sleep_calls
found_exhaustion_message
=
False
for
record
in
caplog
.
records
:
if
record
.
levelname
==
"INFO"
:
if
"Rate limits exhausted for all tokens"
in
record
.
message
:
found_exhaustion_message
=
True
break
assert
found_exhaustion_message
is
True
File Metadata
Details
Attached
Mime Type
text/x-python
Expires
Jun 4 2025, 7:44 PM (11 w, 2 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3343812
Attached To
rDCORE Foundations and core functionalities
Event Timeline
Log In to Comment