Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7124255
D8508.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
15 KB
Subscribers
None
D8508.diff
View Options
diff --git a/swh/lister/cgit/lister.py b/swh/lister/cgit/lister.py
--- a/swh/lister/cgit/lister.py
+++ b/swh/lister/cgit/lister.py
@@ -169,6 +169,22 @@
)
return None
+ # check if we are on the summary tab, if not, go to this tab
+ tab = bs.find("table", {"class": "tabs"})
+ if tab:
+ summary_a = tab.find("a", string="summary")
+ if summary_a:
+ summary_url = urljoin(repository_url, summary_a["href"]).strip("/")
+
+ if summary_url != repository_url:
+ logger.debug(
+ "%s : Active tab is not the summary, trying to load the summary page",
+ repository_url,
+ )
+ return self._get_origin_from_repository_url(summary_url)
+ else:
+ logger.debug("No summary tab found on %s", repository_url)
+
# origin urls are listed on the repository page
# TODO check if forcing https is better or not ?
# <link rel='vcs-git' href='git://...' title='...'/>
@@ -177,6 +193,7 @@
urls = [x["href"] for x in bs.find_all("a", {"rel": "vcs-git"})]
if not urls:
+ logger.debug("No git urls found on %s", repository_url)
return None
# look for the http/https url, if any, and use it as origin_url
diff --git a/swh/lister/cgit/tests/data/https_git.acdw.net/Readme.md b/swh/lister/cgit/tests/data/https_git.acdw.net/Readme.md
new file mode 100644
--- /dev/null
+++ b/swh/lister/cgit/tests/data/https_git.acdw.net/Readme.md
@@ -0,0 +1 @@
+These files are a partial dump of http://git.savannah.gnu.org/cgit
diff --git a/swh/lister/cgit/tests/data/https_git.acdw.net/cgit b/swh/lister/cgit/tests/data/https_git.acdw.net/cgit
new file mode 100644
--- /dev/null
+++ b/swh/lister/cgit/tests/data/https_git.acdw.net/cgit
@@ -0,0 +1,40 @@
+<!DOCTYPE html>
+<html lang='en'>
+<head>
+<title>friendware by acdw</title>
+<meta name='generator' content='cgit 1.4.1-21-gabe81'/>
+<meta name='robots' content='index, nofollow'/>
+<link rel='stylesheet' type='text/css' href='/cgit.css'/>
+</head>
+<body>
+<div id='cgit'><table id='header'>
+<tr>
+<td class='logo' rowspan='2'><a href='/'><img src='/avatar.png' alt='cgit logo'/></a></td>
+<td class='main'>friendware by acdw</td></tr>
+<tr><td class='sub'>the real software was the friends we made along the way</td></tr></table>
+<table class='tabs'><tr><td>
+<a class='active' href='/'>index</a></td><td class='form'><form method='get' action='/'>
+<input type='search' name='q' size='10' value=''/>
+<input type='submit' value='search'/>
+</form></td></tr></table>
+<div class='content'><table summary='repository list' class='list nowrap'><tr class='nohover'><th class='left'><a href='/?s=name'>Name</a></th><th class='left'><a href='/?s=desc'>Description</a></th><th class='left'><a href='/?s=idle'>Idle</a></th><th class='left'>Links</th></tr>
+<tr><td class='toplevel-repo'><a title='sfeed' href='/sfeed/'>sfeed</a></td><td><a href='/sfeed/'>My sfeed scripts
+</a></td><td><span class='age-mins' title='2022-09-19 19:28:30 +0000'>28 min.</span></td><td><a class='button' href='/sfeed/summary/'>summary</a> <a class='button' href='/sfeed/log/'>log</a> <a class='button' href='/sfeed/tree/'>tree</a></td></tr>
+
+<tr><td class='toplevel-repo'><a title='foo' href='/foo/'>foo</a></td><td><a href='/foo/'>Non existing repository
+</a></td><td><span class='age-mins' title='2022-09-19 19:28:30 +0000'>28 min.</span></td><td><a class='button' href='/foo/summary/'>summary</a> <a class='button' href='/foo/log/'>log</a> <a class='button' href='/foo/tree/'>tree</a></td></tr>
+
+
+</table></div> <!-- class=content -->
+<div class="footer">
+© 2022 C. Duckworth.
+generated by
+<a href="/cgit" >cgit</a>.
+comments, issues, and patches welcome at
+<
+<a href="mailto:git@acdw.net" >git <em>at</em> acdw <em>dot</em> net</a>
+>.
+</div>
+</div> <!-- id=cgit -->
+</body>
+</html>
diff --git a/swh/lister/cgit/tests/data/https_git.acdw.net/foo b/swh/lister/cgit/tests/data/https_git.acdw.net/foo
new file mode 100644
--- /dev/null
+++ b/swh/lister/cgit/tests/data/https_git.acdw.net/foo
@@ -0,0 +1,33 @@
+<!DOCTYPE html>
+<html lang='en'>
+<head>
+<title></title>
+<meta name='generator' content='cgit 1.4.1-21-gabe81'/>
+<meta name='robots' content='index, nofollow'/>
+<link rel='stylesheet' type='text/css' href='/cgit.css'/>
+</head>
+<body>
+<div id='cgit'><table id='header'>
+<tr>
+<td class='logo' rowspan='2'><a href='/'><img src='/avatar.png' alt='cgit logo'/></a></td>
+<td class='main'>friendware by acdw</td></tr>
+<tr><td class='sub'>the real software was the friends we made along the way</td></tr></table>
+<table class='tabs'><tr><td>
+<a class='active' href='/'>index</a></td><td class='form'><form method='get' action='/foo/'>
+<input type='search' name='q' size='10' value=''/>
+<input type='submit' value='search'/>
+</form></td></tr></table>
+<div class='content'><div class='error'>No repositories found</div>
+</div> <!-- class=content -->
+<div class="footer">
+© 2022 C. Duckworth.
+generated by
+<a href="/cgit" >cgit</a>.
+comments, issues, and patches welcome at
+<
+<a href="mailto:git@acdw.net" >git <em>at</em> acdw <em>dot</em> net</a>
+>.
+</div>
+</div> <!-- id=cgit -->
+</body>
+</html>
diff --git a/swh/lister/cgit/tests/data/https_git.acdw.net/foo_summary b/swh/lister/cgit/tests/data/https_git.acdw.net/foo_summary
new file mode 100644
--- /dev/null
+++ b/swh/lister/cgit/tests/data/https_git.acdw.net/foo_summary
@@ -0,0 +1,33 @@
+<!DOCTYPE html>
+<html lang='en'>
+<head>
+<title></title>
+<meta name='generator' content='cgit 1.4.1-21-gabe81'/>
+<meta name='robots' content='index, nofollow'/>
+<link rel='stylesheet' type='text/css' href='/cgit.css'/>
+</head>
+<body>
+<div id='cgit'><table id='header'>
+<tr>
+<td class='logo' rowspan='2'><a href='/'><img src='/avatar.png' alt='cgit logo'/></a></td>
+<td class='main'>friendware by acdw</td></tr>
+<tr><td class='sub'>the real software was the friends we made along the way</td></tr></table>
+<table class='tabs'><tr><td>
+<a class='active' href='/'>index</a></td><td class='form'><form method='get' action='/foo/'>
+<input type='search' name='q' size='10' value=''/>
+<input type='submit' value='search'/>
+</form></td></tr></table>
+<div class='content'><div class='error'>No repositories found</div>
+</div> <!-- class=content -->
+<div class="footer">
+© 2022 C. Duckworth.
+generated by
+<a href="/cgit" >cgit</a>.
+comments, issues, and patches welcome at
+<
+<a href="mailto:git@acdw.net" >git <em>at</em> acdw <em>dot</em> net</a>
+>.
+</div>
+</div> <!-- id=cgit -->
+</body>
+</html>
diff --git a/swh/lister/cgit/tests/data/https_git.acdw.net/sfeed b/swh/lister/cgit/tests/data/https_git.acdw.net/sfeed
new file mode 100644
--- /dev/null
+++ b/swh/lister/cgit/tests/data/https_git.acdw.net/sfeed
@@ -0,0 +1,49 @@
+<!DOCTYPE html>
+<html lang='en'>
+<head>
+<title>sfeed - My sfeed scripts
+</title>
+<meta name='generator' content='cgit 1.4.1-21-gabe81'/>
+<meta name='robots' content='index, nofollow'/>
+<link rel='stylesheet' type='text/css' href='/cgit.css'/>
+<link rel='alternate' title='Atom feed' href='https://git.acdw.net/sfeed/atom/?h=main' type='application/atom+xml'/>
+<link rel='vcs-git' href='https://git.acdw.net/sfeed' title='sfeed Git repository'/>
+</head>
+<body>
+<div id='cgit'><table id='header'>
+<tr>
+<td class='logo' rowspan='2'><a href='/'><img src='/avatar.png' alt='cgit logo'/></a></td>
+<td class='main'><a href='/'>index</a> : <a title='sfeed' href='/sfeed/'>sfeed</a></td><td class='form'><form method='get'>
+<select name='h' onchange='this.form.submit();'>
+<option value='7e389913489916166a0c9f590a1afb1737adcbf2'>this commit</option>
+<optgroup label='branches'><option value='main' selected='selected'>main</option>
+</optgroup></select> <input type='submit' value='switch'/></form></td></tr>
+<tr><td class='sub'>My sfeed scripts
+</td><td class='sub right'>Case Duckworth</td></tr></table>
+<table class='tabs'><tr><td>
+<a class='active' href='/sfeed/'>about</a> <a href='/sfeed/summary/'>summary</a> <a href='/sfeed/refs/'>refs</a> <a href='/sfeed/log/'>log</a> <a href='/sfeed/tree/'>tree</a> <a href='/sfeed/commit/'>commit</a> <a href='/sfeed/diff/'>diff</a> <a href='/sfeed/stats/'>stats</a></td><td class='form'><form class='right' method='get' action='/sfeed/log/'>
+<select name='qt'>
+<option value='grep'>log msg</option>
+<option value='author'>author</option>
+<option value='committer'>committer</option>
+<option value='range'>range</option>
+</select>
+<input class='txt' type='search' size='10' name='q' value=''/>
+<input type='submit' value='search'/>
+</form>
+</td></tr></table>
+<div class='content'><div id='summary'><h1 id="sfeed">sfeed</h1>
+<p>Turns out, <a href="https://codemadness.org/sfeed-simple-feed-parser.html">sfeed</a> is cool! You can see what this repo generates at <a href="https://acdw.casa/planet/" class="uri">https://acdw.casa/planet/</a>.</p>
+</div></div> <!-- class=content -->
+<div class="footer">
+© 2022 C. Duckworth.
+generated by
+<a href="/cgit" >cgit</a>.
+comments, issues, and patches welcome at
+<
+<a href="mailto:git@acdw.net" >git <em>at</em> acdw <em>dot</em> net</a>
+>.
+</div>
+</div> <!-- id=cgit -->
+</body>
+</html>
diff --git a/swh/lister/cgit/tests/data/https_git.acdw.net/sfeed_summary b/swh/lister/cgit/tests/data/https_git.acdw.net/sfeed_summary
new file mode 100644
--- /dev/null
+++ b/swh/lister/cgit/tests/data/https_git.acdw.net/sfeed_summary
@@ -0,0 +1,63 @@
+<!DOCTYPE html>
+<html lang='en'>
+<head>
+<title>sfeed - My sfeed scripts
+</title>
+<meta name='generator' content='cgit 1.4.1-21-gabe81'/>
+<meta name='robots' content='index, nofollow'/>
+<link rel='stylesheet' type='text/css' href='/cgit.css'/>
+<link rel='alternate' title='Atom feed' href='https://git.acdw.net/sfeed/atom/?h=main' type='application/atom+xml'/>
+<link rel='vcs-git' href='https://git.acdw.net/sfeed' title='sfeed Git repository'/>
+</head>
+<body>
+<div id='cgit'><table id='header'>
+<tr>
+<td class='logo' rowspan='2'><a href='/'><img src='/avatar.png' alt='cgit logo'/></a></td>
+<td class='main'><a href='/'>index</a> : <a title='sfeed' href='/sfeed/'>sfeed</a></td><td class='form'><form method='get'>
+<select name='h' onchange='this.form.submit();'>
+<option value='7e389913489916166a0c9f590a1afb1737adcbf2'>this commit</option>
+<optgroup label='branches'><option value='main' selected='selected'>main</option>
+</optgroup></select> <input type='submit' value='switch'/></form></td></tr>
+<tr><td class='sub'>My sfeed scripts
+</td><td class='sub right'>Case Duckworth</td></tr></table>
+<table class='tabs'><tr><td>
+<a href='/sfeed/'>about</a> <a class='active' href='/sfeed/summary/'>summary</a> <a href='/sfeed/refs/'>refs</a> <a href='/sfeed/log/'>log</a> <a href='/sfeed/tree/'>tree</a> <a href='/sfeed/commit/'>commit</a> <a href='/sfeed/diff/'>diff</a> <a href='/sfeed/stats/'>stats</a></td><td class='form'><form class='right' method='get' action='/sfeed/log/'>
+<select name='qt'>
+<option value='grep'>log msg</option>
+<option value='author'>author</option>
+<option value='committer'>committer</option>
+<option value='range'>range</option>
+</select>
+<input class='txt' type='search' size='10' name='q' value=''/>
+<input type='submit' value='search'/>
+</form>
+</td></tr></table>
+<div class='content'><table summary='repository info' class='list nowrap'><tr class='nohover'><th class='left'>Branch</th><th class='left'>Commit message</th><th class='left'>Author</th><th class='left' colspan='2'>Age</th></tr>
+<tr><td><a href='/sfeed/log/'>main</a></td><td><a href='/sfeed/commit/'>Add APOD</a></td><td>Case Duckworth</td><td colspan='2'><span class='age-mins' title='2022-09-19 14:28:30 -0500'>38 min.</span></td></tr>
+<tr class='nohover'><td colspan='3'> </td></tr><tr class='nohover'><td colspan='3'> </td></tr><tr class='nohover'><th class='left'>Age</th><th class='left'>Commit message</th><th class='left'>Author</th></tr>
+<tr><td><span class='age-mins' title='2022-09-19 14:28:30 -0500'>38 min.</span></td><td><a href='/sfeed/commit/?id=7e389913489916166a0c9f590a1afb1737adcbf2'>Add APOD</a><span class='decoration'> <a class='deco' href='/sfeed/commit/?id=7e389913489916166a0c9f590a1afb1737adcbf2'>HEAD</a> <a class='branch-deco' href='/sfeed/log/'>main</a></span></td><td>Case Duckworth</td></tr>
+<tr><td><span class='age-days' title='2022-09-15 14:42:28 -0500'>4 days</span></td><td><a href='/sfeed/commit/?id=da8103330e7ec902994d72c6b1a3ec5fbad5c9d3'>Change fresh item colors</a></td><td>Case Duckworth</td></tr>
+<tr><td><span class='age-days' title='2022-09-15 14:38:35 -0500'>4 days</span></td><td><a href='/sfeed/commit/?id=4bea5f02d3b13cac53ddaa60d8202083fbe80aeb'>Indentation</a></td><td>Case Duckworth</td></tr>
+<tr><td><span class='age-days' title='2022-09-14 09:53:56 -0500'>5 days</span></td><td><a href='/sfeed/commit/?id=d5348fa3f2080bfd0fa217f2311d61aa99e34860'>Add Tab Completion</a></td><td>Case Duckworth</td></tr>
+<tr><td><span class='age-days' title='2022-09-14 08:51:36 -0500'>5 days</span></td><td><a href='/sfeed/commit/?id=c3e8226c62e107d9db6dda4d669b3676cfc6cde7'>Add Lonnie Johnson</a></td><td>Case Duckworth</td></tr>
+<tr><td><span class='age-days' title='2022-09-12 17:39:32 -0500'>7 days</span></td><td><a href='/sfeed/commit/?id=3debe199e3dbd5020da1768d297777423652e6fd'>Add miniature calendar; metafilter</a></td><td>Case Duckworth</td></tr>
+<tr><td><span class='age-days' title='2022-09-10 23:20:29 -0500'>9 days</span></td><td><a href='/sfeed/commit/?id=7eed94e4c96f9fbd4a03dd1dc11a67514d64b404'>Add active listening</a></td><td>Case Duckworth</td></tr>
+<tr><td><span class='age-days' title='2022-09-09 20:01:46 -0500'>10 days</span></td><td><a href='/sfeed/commit/?id=c6056e5676b3e9970dbe1fc681d3f5ea447734fc'>Add tilde.town blog</a></td><td>Case Duckworth</td></tr>
+<tr><td><span class='age-days' title='2022-09-07 23:57:26 -0500'>12 days</span></td><td><a href='/sfeed/commit/?id=06c33ee36f71f7f92ab7c918f02efd262b83d276'>Add zserge</a></td><td>Case Duckworth</td></tr>
+<tr><td><span class='age-days' title='2022-09-07 23:56:50 -0500'>12 days</span></td><td><a href='/sfeed/commit/?id=0cd234e1faeefa14cb44206a3f8776190d35b2e9'>Remove duplicate</a></td><td>Case Duckworth</td></tr>
+<tr class='nohover'><td colspan='3'><a href='/sfeed/log/'>[...]</a></td></tr>
+<tr class='nohover'><td colspan='3'> </td></tr><tr class='nohover'><th class='left' colspan='3'>Clone</th></tr>
+<tr><td colspan='3'><a rel='vcs-git' href='https://git.acdw.net/sfeed' title='sfeed Git repository'>https://git.acdw.net/sfeed</a></td></tr>
+</table></div> <!-- class=content -->
+<div class="footer">
+© 2022 C. Duckworth.
+generated by
+<a href="/cgit" >cgit</a>.
+comments, issues, and patches welcome at
+<
+<a href="mailto:git@acdw.net" >git <em>at</em> acdw <em>dot</em> net</a>
+>.
+</div>
+</div> <!-- id=cgit -->
+</body>
+</html>
diff --git a/swh/lister/cgit/tests/test_lister.py b/swh/lister/cgit/tests/test_lister.py
--- a/swh/lister/cgit/tests/test_lister.py
+++ b/swh/lister/cgit/tests/test_lister.py
@@ -265,3 +265,16 @@
# we should have 16 repos (listed on 3 pages)
assert len(repos) == 3
assert len(flattened_repos) == 16
+
+
+def test_lister_cgit_summary_not_default(requests_mock_datadir, swh_scheduler):
+ """cgit lister returns git url when the default repository tab is not the summary"""
+
+ url = "https://git.acdw.net/cgit"
+
+ lister_cgit = CGitLister(swh_scheduler, url=url)
+
+ stats = lister_cgit.run()
+
+ expected_nb_origins = 1
+ assert stats == ListerStats(pages=1, origins=expected_nb_origins)
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Dec 21 2024, 2:55 AM (11 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3219598
Attached To
D8508: cgit: Ensure the clone url is searched on the right tab
Event Timeline
Log In to Comment