Page MenuHomeSoftware Heritage

D6022.id21762.diff
No OneTemporary

D6022.id21762.diff

diff --git a/swh/lister/gitlab/lister.py b/swh/lister/gitlab/lister.py
--- a/swh/lister/gitlab/lister.py
+++ b/swh/lister/gitlab/lister.py
@@ -111,6 +111,7 @@
)
self.incremental = incremental
self.last_page: Optional[str] = None
+ self.per_page = 100
self.session = requests.Session()
self.session.headers.update(
@@ -145,7 +146,25 @@
response.url,
response.content,
)
- response.raise_for_status()
+
+ # GitLab API can return errors 500 when listing projects.
+ # https://gitlab.com/gitlab-org/gitlab/-/issues/262629
+ # To avoid ending the listing prematurely, skip buggy URLs and move
+ # to next pages.
+ if response.status_code == 500:
+ id_after = _parse_id_after(url)
+ assert id_after is not None
+ while True:
+ next_id_after = id_after + self.per_page
+ url = url.replace(f"id_after={id_after}", f"id_after={next_id_after}")
+ response = self.session.get(url)
+ if response.status_code == 200:
+ break
+ else:
+ id_after = next_id_after
+ else:
+ response.raise_for_status()
+
repositories: Tuple[Repository, ...] = tuple(response.json())
if hasattr(response, "links") and response.links.get("next"):
next_page = response.links["next"]["url"]
@@ -160,7 +179,7 @@
"order_by": "id",
"sort": "asc",
"simple": "true",
- "per_page": "100",
+ "per_page": f"{self.per_page}",
}
if id_after is not None:
parameters["id_after"] = str(id_after)
diff --git a/swh/lister/gitlab/tests/test_lister.py b/swh/lister/gitlab/tests/test_lister.py
--- a/swh/lister/gitlab/tests/test_lister.py
+++ b/swh/lister/gitlab/tests/test_lister.py
@@ -244,6 +244,39 @@
assert_sleep_calls(mocker, mock_sleep, [1])
+def test_lister_gitlab_http_error_500(swh_scheduler, requests_mock, datadir):
+ """Gitlab lister should skip buggy URl and move to next page.
+
+ """
+ instance = "gite.lirmm.fr"
+ url = api_url(instance)
+ lister = GitLabLister(swh_scheduler, url=url, instance=instance)
+
+ url_page1 = lister.page_url()
+ response1 = gitlab_page_response(datadir, instance, 1)
+ url_page2 = lister.page_url(lister.per_page)
+ url_page3 = lister.page_url(2 * lister.per_page)
+ response3 = gitlab_page_response(datadir, instance, 3)
+
+ requests_mock.get(
+ url_page1,
+ [{"json": response1, "headers": {"Link": f"<{url_page2}>; rel=next"}}],
+ additional_matcher=_match_request,
+ )
+ requests_mock.get(
+ url_page2, [{"status_code": 500},], additional_matcher=_match_request,
+ )
+
+ requests_mock.get(
+ url_page3, [{"json": response3}], additional_matcher=_match_request,
+ )
+
+ listed_result = lister.run()
+
+ expected_nb_origins = len(response1) + len(response3)
+ assert listed_result == ListerStats(pages=2, origins=expected_nb_origins)
+
+
def test_lister_gitlab_credentials(swh_scheduler):
"""Gitlab lister supports credentials configuration

File Metadata

Mime Type
text/plain
Expires
Dec 20 2024, 6:52 PM (11 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3217188

Event Timeline