diff --git a/swh/lister/cgit/lister.py b/swh/lister/cgit/lister.py --- a/swh/lister/cgit/lister.py +++ b/swh/lister/cgit/lister.py @@ -169,6 +169,22 @@ ) return None + # check if we are on the summary tab, if not, go to this tab + tab = bs.find("table", {"class": "tabs"}) + if tab: + summary_a = tab.find("a", string="summary") + if summary_a: + summary_url = urljoin(repository_url, summary_a["href"]).strip("/") + + if summary_url != repository_url: + logger.debug( + "%s : Active tab is not the summary, trying to load the summary page", + repository_url, + ) + return self._get_origin_from_repository_url(summary_url) + else: + logger.debug("No summary tab found on %s", repository_url) + # origin urls are listed on the repository page # TODO check if forcing https is better or not ? # @@ -177,6 +193,7 @@ urls = [x["href"] for x in bs.find_all("a", {"rel": "vcs-git"})] if not urls: + logger.debug("No git urls found on %s", repository_url) return None # look for the http/https url, if any, and use it as origin_url diff --git a/swh/lister/cgit/tests/data/https_git.acdw.net/Readme.md b/swh/lister/cgit/tests/data/https_git.acdw.net/Readme.md new file mode 100644 --- /dev/null +++ b/swh/lister/cgit/tests/data/https_git.acdw.net/Readme.md @@ -0,0 +1 @@ +These files are a partial dump of http://git.savannah.gnu.org/cgit diff --git a/swh/lister/cgit/tests/data/https_git.acdw.net/cgit b/swh/lister/cgit/tests/data/https_git.acdw.net/cgit new file mode 100644 --- /dev/null +++ b/swh/lister/cgit/tests/data/https_git.acdw.net/cgit @@ -0,0 +1,40 @@ + + + +friendware by acdw + + + + + +
+ + + + +
+index
+ + +
+
+ + + + + +
NameDescriptionIdleLinks
sfeedMy sfeed scripts +28 min.summary log tree
fooNon existing repository +28 min.summary log tree
+ +
+ + diff --git a/swh/lister/cgit/tests/data/https_git.acdw.net/foo b/swh/lister/cgit/tests/data/https_git.acdw.net/foo new file mode 100644 --- /dev/null +++ b/swh/lister/cgit/tests/data/https_git.acdw.net/foo @@ -0,0 +1,33 @@ + + + + + + + + + +
+ + + + +
+index
+ + +
+
No repositories found
+
+ +
+ + diff --git a/swh/lister/cgit/tests/data/https_git.acdw.net/foo_summary b/swh/lister/cgit/tests/data/https_git.acdw.net/foo_summary new file mode 100644 --- /dev/null +++ b/swh/lister/cgit/tests/data/https_git.acdw.net/foo_summary @@ -0,0 +1,33 @@ + + + + + + + + + +
+ + + + +
+index
+ + +
+
No repositories found
+
+ +
+ + diff --git a/swh/lister/cgit/tests/data/https_git.acdw.net/sfeed b/swh/lister/cgit/tests/data/https_git.acdw.net/sfeed new file mode 100644 --- /dev/null +++ b/swh/lister/cgit/tests/data/https_git.acdw.net/sfeed @@ -0,0 +1,49 @@ + + + +sfeed - My sfeed scripts + + + + + + + + +
+ + + + +
+about summary refs log tree commit diff stats
+ + + +
+
+

sfeed

+

Turns out, sfeed is cool! You can see what this repo generates at https://acdw.casa/planet/.

+
+ +
+ + diff --git a/swh/lister/cgit/tests/data/https_git.acdw.net/sfeed_summary b/swh/lister/cgit/tests/data/https_git.acdw.net/sfeed_summary new file mode 100644 --- /dev/null +++ b/swh/lister/cgit/tests/data/https_git.acdw.net/sfeed_summary @@ -0,0 +1,63 @@ + + + +sfeed - My sfeed scripts + + + + + + + + +
+ + + + +
+about summary refs log tree commit diff stats
+ + + +
+
+
+ + + + + + + + + + + + + + + +
BranchCommit messageAuthorAge
mainAdd APODCase Duckworth38 min.
 
 
AgeCommit messageAuthor
38 min.Add APOD HEAD mainCase Duckworth
4 daysChange fresh item colorsCase Duckworth
4 daysIndentationCase Duckworth
5 daysAdd Tab CompletionCase Duckworth
5 daysAdd Lonnie JohnsonCase Duckworth
7 daysAdd miniature calendar; metafilterCase Duckworth
9 daysAdd active listeningCase Duckworth
10 daysAdd tilde.town blogCase Duckworth
12 daysAdd zsergeCase Duckworth
12 daysRemove duplicateCase Duckworth
[...]
 
Clone
https://git.acdw.net/sfeed
+ +
+ + diff --git a/swh/lister/cgit/tests/test_lister.py b/swh/lister/cgit/tests/test_lister.py --- a/swh/lister/cgit/tests/test_lister.py +++ b/swh/lister/cgit/tests/test_lister.py @@ -265,3 +265,16 @@ # we should have 16 repos (listed on 3 pages) assert len(repos) == 3 assert len(flattened_repos) == 16 + + +def test_lister_cgit_summary_not_default(requests_mock_datadir, swh_scheduler): + """cgit lister returns git url when the default repository tab is not the summary""" + + url = "https://git.acdw.net/cgit" + + lister_cgit = CGitLister(swh_scheduler, url=url) + + stats = lister_cgit.run() + + expected_nb_origins = 1 + assert stats == ListerStats(pages=1, origins=expected_nb_origins)