diff --git a/swh/lister/cgit/lister.py b/swh/lister/cgit/lister.py
--- a/swh/lister/cgit/lister.py
+++ b/swh/lister/cgit/lister.py
@@ -169,6 +169,25 @@
)
return None
+ # ensure the active tab is the summary tab
+ # on some repository, the default tab can be the about one where there is no clone info
+ if (
+ "summary" not in repository_url
+ ): # avoid infinite loop if the repository doesn't exist
+ tab = bs.find("table", {"class": "tabs"})
+ if tab:
+ active_tab = tab.find("a", {"class": "active"})
+ if active_tab and active_tab.text != "summary":
+ logger.debug(
+ "%s : Active tab is not the summary, trying to load the summary page",
+ repository_url,
+ )
+ return self._get_origin_from_repository_url(
+ f"{repository_url}/summary"
+ )
+ else:
+ logger.debug("No summary tab found on %s", repository_url)
+
# origin urls are listed on the repository page
# TODO check if forcing https is better or not ?
#
@@ -177,6 +196,7 @@
urls = [x["href"] for x in bs.find_all("a", {"rel": "vcs-git"})]
if not urls:
+ logger.debug("No git urls found on %s", repository_url)
return None
# look for the http/https url, if any, and use it as origin_url
diff --git a/swh/lister/cgit/tests/data/https_git.acdw.net/Readme.md b/swh/lister/cgit/tests/data/https_git.acdw.net/Readme.md
new file mode 100644
--- /dev/null
+++ b/swh/lister/cgit/tests/data/https_git.acdw.net/Readme.md
@@ -0,0 +1 @@
+These files are a partial dump of http://git.savannah.gnu.org/cgit
diff --git a/swh/lister/cgit/tests/data/https_git.acdw.net/cgit b/swh/lister/cgit/tests/data/https_git.acdw.net/cgit
new file mode 100644
--- /dev/null
+++ b/swh/lister/cgit/tests/data/https_git.acdw.net/cgit
@@ -0,0 +1,40 @@
+
+
+
+friendware by acdw
+
+
+
+
+
+
+
+
+
friendware by acdw
+
the real software was the friends we made along the way