Changeset View
Changeset View
Standalone View
Standalone View
swh/lister/cgit/lister.py
Show First 20 Lines • Show All 163 Lines • ▼ Show 20 Lines | def _get_origin_from_repository_url(self, repository_url: str) -> Optional[str]: | ||||
except HTTPError as e: | except HTTPError as e: | ||||
logger.warning( | logger.warning( | ||||
"Unexpected HTTP status code %s on %s", | "Unexpected HTTP status code %s on %s", | ||||
e.response.status_code, | e.response.status_code, | ||||
e.response.url, | e.response.url, | ||||
) | ) | ||||
return None | return None | ||||
# check if we are on the summary tab, if not, go to this tab | |||||
tab = bs.find("table", {"class": "tabs"}) | |||||
if tab: | |||||
summary_a = tab.find("a", string="summary") | |||||
if summary_a: | |||||
summary_url = urljoin(repository_url, summary_a["href"]).strip("/") | |||||
if summary_url != repository_url: | |||||
logger.debug( | |||||
"%s : Active tab is not the summary, trying to load the summary page", | |||||
repository_url, | |||||
) | |||||
return self._get_origin_from_repository_url(summary_url) | |||||
else: | |||||
logger.debug("No summary tab found on %s", repository_url) | |||||
anlambert: I think it would be better to iterate on the tab links and fetch the URL associated to the tab… | |||||
Done Inline Actionsyes sure it would be better, I will update to do that. For what I saw when I tested, when the summary tab is not /summary, it's because it's the default tab so it's active. vsellier: yes sure it would be better, I will update to do that.
For what I saw when I tested, when the… | |||||
# origin urls are listed on the repository page | # origin urls are listed on the repository page | ||||
# TODO check if forcing https is better or not ? | # TODO check if forcing https is better or not ? | ||||
# <link rel='vcs-git' href='git://...' title='...'/> | # <link rel='vcs-git' href='git://...' title='...'/> | ||||
# <link rel='vcs-git' href='http://...' title='...'/> | # <link rel='vcs-git' href='http://...' title='...'/> | ||||
# <link rel='vcs-git' href='https://...' title='...'/> | # <link rel='vcs-git' href='https://...' title='...'/> | ||||
urls = [x["href"] for x in bs.find_all("a", {"rel": "vcs-git"})] | urls = [x["href"] for x in bs.find_all("a", {"rel": "vcs-git"})] | ||||
if not urls: | if not urls: | ||||
logger.debug("No git urls found on %s", repository_url) | |||||
return None | return None | ||||
# look for the http/https url, if any, and use it as origin_url | # look for the http/https url, if any, and use it as origin_url | ||||
for url in urls: | for url in urls: | ||||
if urlparse(url).scheme in ("http", "https"): | if urlparse(url).scheme in ("http", "https"): | ||||
origin_url = url | origin_url = url | ||||
break | break | ||||
else: | else: | ||||
Show All 30 Lines |
I think it would be better to iterate on the tab links and fetch the URL associated to the tab with the text summary, this would be more generic.
For instance in that cgit repository, the summary link does not end with /summary