Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9339698
test_lister.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
5 KB
Subscribers
None
test_lister.py
View Options
# Copyright (C) 2017-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from
datetime
import
datetime
import
json
import
os
import
pytest
from
swh.lister.bitbucket.lister
import
BitbucketLister
@pytest.fixture
def
bb_api_repositories_page1
(
datadir
):
data_file_path
=
os
.
path
.
join
(
datadir
,
"bb_api_repositories_page1.json"
)
with
open
(
data_file_path
,
"r"
)
as
data_file
:
return
json
.
load
(
data_file
)
@pytest.fixture
def
bb_api_repositories_page2
(
datadir
):
data_file_path
=
os
.
path
.
join
(
datadir
,
"bb_api_repositories_page2.json"
)
with
open
(
data_file_path
,
"r"
)
as
data_file
:
return
json
.
load
(
data_file
)
def
_check_listed_origins
(
lister_origins
,
scheduler_origins
):
"""Asserts that the two collections have the same origins from the point of view of
the lister"""
assert
{(
lo
.
url
,
lo
.
last_update
)
for
lo
in
lister_origins
}
==
{
(
so
.
url
,
so
.
last_update
)
for
so
in
scheduler_origins
}
def
test_bitbucket_incremental_lister
(
swh_scheduler
,
requests_mock
,
mocker
,
bb_api_repositories_page1
,
bb_api_repositories_page2
,
):
"""Simple Bitbucket listing with two pages containing 10 origins"""
requests_mock
.
get
(
BitbucketLister
.
API_URL
,
[
{
"json"
:
bb_api_repositories_page1
},
{
"json"
:
bb_api_repositories_page2
},
],
)
lister
=
BitbucketLister
(
scheduler
=
swh_scheduler
,
page_size
=
10
)
# First listing
stats
=
lister
.
run
()
scheduler_origins
=
swh_scheduler
.
get_listed_origins
(
lister
.
lister_obj
.
id
)
.
results
assert
stats
.
pages
==
2
assert
stats
.
origins
==
20
assert
len
(
scheduler_origins
)
==
20
assert
lister
.
updated
lister_state
=
lister
.
get_state_from_scheduler
()
last_repo_cdate
=
lister_state
.
last_repo_cdate
.
isoformat
()
assert
hasattr
(
lister_state
,
"last_repo_cdate"
)
assert
last_repo_cdate
==
bb_api_repositories_page2
[
"values"
][
-
1
][
"created_on"
]
# Second listing, restarting from last state
lister
.
session
.
get
=
mocker
.
spy
(
lister
.
session
,
"get"
)
lister
.
run
()
url_params
=
lister
.
url_params
url_params
[
"after"
]
=
last_repo_cdate
lister
.
session
.
get
.
assert_called_once_with
(
lister
.
API_URL
,
params
=
url_params
)
all_origins
=
(
bb_api_repositories_page1
[
"values"
]
+
bb_api_repositories_page2
[
"values"
]
)
_check_listed_origins
(
lister
.
get_origins_from_page
(
all_origins
),
scheduler_origins
)
def
test_bitbucket_lister_rate_limit_hit
(
swh_scheduler
,
requests_mock
,
mocker
,
bb_api_repositories_page1
,
bb_api_repositories_page2
,
):
"""Simple Bitbucket listing with two pages containing 10 origins"""
requests_mock
.
get
(
BitbucketLister
.
API_URL
,
[
{
"json"
:
bb_api_repositories_page1
,
"status_code"
:
200
},
{
"json"
:
None
,
"status_code"
:
429
},
{
"json"
:
None
,
"status_code"
:
429
},
{
"json"
:
bb_api_repositories_page2
,
"status_code"
:
200
},
],
)
lister
=
BitbucketLister
(
scheduler
=
swh_scheduler
,
page_size
=
10
)
mocker
.
patch
.
object
(
lister
.
page_request
.
retry
,
"sleep"
)
stats
=
lister
.
run
()
assert
stats
.
pages
==
2
assert
stats
.
origins
==
20
assert
len
(
swh_scheduler
.
get_listed_origins
(
lister
.
lister_obj
.
id
)
.
results
)
==
20
def
test_bitbucket_full_lister
(
swh_scheduler
,
requests_mock
,
mocker
,
bb_api_repositories_page1
,
bb_api_repositories_page2
,
):
"""Simple Bitbucket listing with two pages containing 10 origins"""
requests_mock
.
get
(
BitbucketLister
.
API_URL
,
[
{
"json"
:
bb_api_repositories_page1
},
{
"json"
:
bb_api_repositories_page2
},
{
"json"
:
bb_api_repositories_page1
},
{
"json"
:
bb_api_repositories_page2
},
],
)
credentials
=
{
"bitbucket"
:
{
"bitbucket"
:
[{
"username"
:
"u"
,
"password"
:
"p"
}]}}
lister
=
BitbucketLister
(
scheduler
=
swh_scheduler
,
page_size
=
10
,
incremental
=
True
,
credentials
=
credentials
)
assert
lister
.
session
.
auth
is
not
None
# First do a incremental run to have an initial lister state
stats
=
lister
.
run
()
last_lister_state
=
lister
.
get_state_from_scheduler
()
assert
stats
.
origins
==
20
# Then do the full run and verify lister state did not change
# Modify last listed repo modification date to check it will be not saved
# to lister state after its execution
last_page2_repo
=
bb_api_repositories_page2
[
"values"
][
-
1
]
last_page2_repo
[
"created_on"
]
=
datetime
.
now
()
.
isoformat
()
last_page2_repo
[
"updated_on"
]
=
datetime
.
now
()
.
isoformat
()
lister
=
BitbucketLister
(
scheduler
=
swh_scheduler
,
page_size
=
10
,
incremental
=
False
)
assert
lister
.
session
.
auth
is
None
stats
=
lister
.
run
()
assert
stats
.
pages
==
2
assert
stats
.
origins
==
20
scheduler_origins
=
swh_scheduler
.
get_listed_origins
(
lister
.
lister_obj
.
id
)
.
results
# 20 because scheduler upserts based on (id, type, url)
assert
len
(
scheduler_origins
)
==
20
# Modification on created_on SHOULD NOT impact lister state
assert
lister
.
get_state_from_scheduler
()
==
last_lister_state
# Modification on updated_on SHOULD impact lister state
all_origins
=
(
bb_api_repositories_page1
[
"values"
]
+
bb_api_repositories_page2
[
"values"
]
)
_check_listed_origins
(
lister
.
get_origins_from_page
(
all_origins
),
scheduler_origins
)
File Metadata
Details
Attached
Mime Type
text/x-python
Expires
Jul 4 2025, 9:51 AM (5 w, 1 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3367619
Attached To
rDLS Listers
Event Timeline
Log In to Comment