Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F8394227
D8509.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
6 KB
Subscribers
None
D8509.diff
View Options
diff --git a/swh/lister/gitea/tests/test_lister.py b/swh/lister/gitea/tests/test_lister.py
--- a/swh/lister/gitea/tests/test_lister.py
+++ b/swh/lister/gitea/tests/test_lister.py
@@ -9,6 +9,7 @@
import pytest
import requests
+from requests import HTTPError
from swh.lister.gitea.lister import GiteaLister
from swh.lister.gogs.lister import GogsListerPage
@@ -138,16 +139,38 @@
@pytest.mark.parametrize("http_code", [400, 500, 502])
-def test_gitea_list_http_error(swh_scheduler, requests_mock, http_code):
+def test_gitea_list_http_error(
+ swh_scheduler, requests_mock, http_code, trygitea_p1, trygitea_p2
+):
"""Test handling of some HTTP errors commonly encountered"""
lister = GiteaLister(scheduler=swh_scheduler, url=TRYGITEA_URL, page_size=3)
+ p1_text, p1_headers, _, p1_origin_urls = trygitea_p1
+ p3_text, p3_headers, _, p3_origin_urls = trygitea_p2
+
base_url = TRYGITEA_URL + lister.REPO_LIST_PATH
- requests_mock.get(base_url, status_code=http_code)
+ requests_mock.get(
+ base_url,
+ [
+ {"text": p1_text, "headers": p1_headers, "status_code": 200},
+ {"status_code": http_code},
+ {"text": p3_text, "headers": p3_headers, "status_code": 200},
+ ],
+ )
- with pytest.raises(requests.HTTPError):
+ # pages with fatal repositories should be skipped (no error raised)
+ # See T4423 for more details
+ if http_code == 500:
lister.run()
+ else:
+ with pytest.raises(HTTPError):
+ lister.run()
+ # Both P1 and P3 origins should be listed in case of 500 error
+ # While in other cases, only P1 origins should be listed
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
- assert len(scheduler_origins) == 0
+ check_listed_origins(
+ (p1_origin_urls + p3_origin_urls) if http_code == 500 else p1_origin_urls,
+ scheduler_origins,
+ )
diff --git a/swh/lister/gogs/lister.py b/swh/lister/gogs/lister.py
--- a/swh/lister/gogs/lister.py
+++ b/swh/lister/gogs/lister.py
@@ -2,12 +2,11 @@
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-
from dataclasses import asdict, dataclass
import logging
import random
-from typing import Any, Dict, Iterator, List, Optional
-from urllib.parse import parse_qs, urljoin, urlparse
+from typing import Any, Dict, Iterator, List, Optional, Tuple
+from urllib.parse import parse_qs, parse_qsl, urlencode, urljoin, urlparse
import iso8601
import requests
@@ -97,8 +96,6 @@
# Raises an error on Gogs, or a warning on Gitea
self.on_anonymous_mode()
- self.max_page_limit = 2
-
self.session = requests.Session()
self.session.headers.update(
{
@@ -120,7 +117,9 @@
return asdict(state)
@throttling_retry(before_sleep=before_sleep_log(logger, logging.WARNING))
- def page_request(self, url, params) -> requests.Response:
+ def page_request(
+ self, url: str, params: Dict[str, Any]
+ ) -> Tuple[Dict[str, Any], Dict[str, Any]]:
logger.debug("Fetching URL %s with params %s", url, params)
@@ -133,9 +132,20 @@
response.url,
response.content,
)
- response.raise_for_status()
-
- return response
+ if (
+ response.status_code == 500
+ ): # Temporary hack for skipping fatal repos (T4423)
+ url_parts = urlparse(url)
+ query: Dict[str, Any] = dict(parse_qsl(url_parts.query))
+ query.update({"page": _parse_page_id(url) + 1})
+ next_page_link = url_parts._replace(query=urlencode(query)).geturl()
+ body: Dict[str, Any] = {"data": []}
+ links = {"next": {"url": next_page_link}}
+ return body, links
+ else:
+ response.raise_for_status()
+
+ return response.json(), response.links
@classmethod
def extract_repos(cls, body: Dict[str, Any]) -> List[Repo]:
@@ -149,21 +159,24 @@
# base with trailing slash, path without leading slash for urljoin
next_link: Optional[str] = urljoin(self.url, self.REPO_LIST_PATH)
- response = self.page_request(next_link, {**self.query_params, "page": page_id})
+
+ body, links = self.page_request(
+ next_link, {**self.query_params, "page": page_id}
+ )
while next_link is not None:
- repos = self.extract_repos(response.json())
+ repos = self.extract_repos(body)
- assert len(response.links) > 0, "API changed: no Link header found"
- if "next" in response.links:
- next_link = response.links["next"]["url"]
+ assert len(links) > 0, "API changed: no Link header found"
+ if "next" in links:
+ next_link = links["next"]["url"]
else:
next_link = None # Happens for the last page
yield GogsListerPage(repos=repos, next_link=next_link)
if next_link is not None:
- response = self.page_request(next_link, {})
+ body, links = self.page_request(next_link, {})
def get_origins_from_page(self, page: GogsListerPage) -> Iterator[ListedOrigin]:
"""Convert a page of Gogs repositories into a list of ListedOrigins"""
diff --git a/swh/lister/gogs/tests/test_lister.py b/swh/lister/gogs/tests/test_lister.py
--- a/swh/lister/gogs/tests/test_lister.py
+++ b/swh/lister/gogs/tests/test_lister.py
@@ -186,7 +186,7 @@
lister = GogsLister(scheduler=swh_scheduler, url=TRY_GOGS_URL, api_token="secret")
p1_text, p1_headers, _, p1_origin_urls = trygogs_p1
- p3_text, p3_headers, _, _ = trygogs_p3_last
+ p3_text, p3_headers, _, p3_origin_urls = trygogs_p3_last
base_url = TRY_GOGS_URL + lister.REPO_LIST_PATH
requests_mock.get(
@@ -198,13 +198,21 @@
],
)
- with pytest.raises(HTTPError):
+ # pages with fatal repositories should be skipped (no error raised)
+ # See T4423 for more details
+ if http_code == 500:
lister.run()
+ else:
+ with pytest.raises(HTTPError):
+ lister.run()
+ # Both P1 and P3 origins should be listed in case of 500 error
+ # While in other cases, only P1 origins should be listed
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
check_listed_origins(
- p1_origin_urls, scheduler_origins
- ) # Only the first page is listed
+ (p1_origin_urls + p3_origin_urls) if http_code == 500 else p1_origin_urls,
+ scheduler_origins,
+ )
def test_gogs_incremental_lister(
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Tue, Jun 3, 7:22 PM (5 d, 21 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3228297
Attached To
D8509: gogs: Skip pages with error 500
Event Timeline
Log In to Comment