Page MenuHomeSoftware Heritage

D8508.diff
No OneTemporary

D8508.diff

diff --git a/swh/lister/cgit/lister.py b/swh/lister/cgit/lister.py
--- a/swh/lister/cgit/lister.py
+++ b/swh/lister/cgit/lister.py
@@ -169,6 +169,22 @@
)
return None
+ # check if we are on the summary tab, if not, go to this tab
+ tab = bs.find("table", {"class": "tabs"})
+ if tab:
+ summary_a = tab.find("a", string="summary")
+ if summary_a:
+ summary_url = urljoin(repository_url, summary_a["href"]).strip("/")
+
+ if summary_url != repository_url:
+ logger.debug(
+ "%s : Active tab is not the summary, trying to load the summary page",
+ repository_url,
+ )
+ return self._get_origin_from_repository_url(summary_url)
+ else:
+ logger.debug("No summary tab found on %s", repository_url)
+
# origin urls are listed on the repository page
# TODO check if forcing https is better or not ?
# <link rel='vcs-git' href='git://...' title='...'/>
@@ -177,6 +193,7 @@
urls = [x["href"] for x in bs.find_all("a", {"rel": "vcs-git"})]
if not urls:
+ logger.debug("No git urls found on %s", repository_url)
return None
# look for the http/https url, if any, and use it as origin_url
diff --git a/swh/lister/cgit/tests/data/https_git.acdw.net/Readme.md b/swh/lister/cgit/tests/data/https_git.acdw.net/Readme.md
new file mode 100644
--- /dev/null
+++ b/swh/lister/cgit/tests/data/https_git.acdw.net/Readme.md
@@ -0,0 +1 @@
+These files are a partial dump of http://git.savannah.gnu.org/cgit
diff --git a/swh/lister/cgit/tests/data/https_git.acdw.net/cgit b/swh/lister/cgit/tests/data/https_git.acdw.net/cgit
new file mode 100644
--- /dev/null
+++ b/swh/lister/cgit/tests/data/https_git.acdw.net/cgit
@@ -0,0 +1,40 @@
+<!DOCTYPE html>
+<html lang='en'>
+<head>
+<title>friendware by acdw</title>
+<meta name='generator' content='cgit 1.4.1-21-gabe81'/>
+<meta name='robots' content='index, nofollow'/>
+<link rel='stylesheet' type='text/css' href='/cgit.css'/>
+</head>
+<body>
+<div id='cgit'><table id='header'>
+<tr>
+<td class='logo' rowspan='2'><a href='/'><img src='/avatar.png' alt='cgit logo'/></a></td>
+<td class='main'>friendware by acdw</td></tr>
+<tr><td class='sub'>the real software was the friends we made along the way</td></tr></table>
+<table class='tabs'><tr><td>
+<a class='active' href='/'>index</a></td><td class='form'><form method='get' action='/'>
+<input type='search' name='q' size='10' value=''/>
+<input type='submit' value='search'/>
+</form></td></tr></table>
+<div class='content'><table summary='repository list' class='list nowrap'><tr class='nohover'><th class='left'><a href='/?s=name'>Name</a></th><th class='left'><a href='/?s=desc'>Description</a></th><th class='left'><a href='/?s=idle'>Idle</a></th><th class='left'>Links</th></tr>
+<tr><td class='toplevel-repo'><a title='sfeed' href='/sfeed/'>sfeed</a></td><td><a href='/sfeed/'>My sfeed scripts
+</a></td><td><span class='age-mins' title='2022-09-19 19:28:30 +0000'>28 min.</span></td><td><a class='button' href='/sfeed/summary/'>summary</a> <a class='button' href='/sfeed/log/'>log</a> <a class='button' href='/sfeed/tree/'>tree</a></td></tr>
+
+<tr><td class='toplevel-repo'><a title='foo' href='/foo/'>foo</a></td><td><a href='/foo/'>Non existing repository
+</a></td><td><span class='age-mins' title='2022-09-19 19:28:30 +0000'>28 min.</span></td><td><a class='button' href='/foo/summary/'>summary</a> <a class='button' href='/foo/log/'>log</a> <a class='button' href='/foo/tree/'>tree</a></td></tr>
+
+
+</table></div> <!-- class=content -->
+<div class="footer">
+&copy; 2022 C. Duckworth.
+generated by
+<a href="/cgit" >cgit</a>.
+comments, issues, and patches welcome at
+&lt;
+<a href="mailto:git@acdw.net" >git <em>at</em> acdw <em>dot</em> net</a>
+&gt;.
+</div>
+</div> <!-- id=cgit -->
+</body>
+</html>
diff --git a/swh/lister/cgit/tests/data/https_git.acdw.net/foo b/swh/lister/cgit/tests/data/https_git.acdw.net/foo
new file mode 100644
--- /dev/null
+++ b/swh/lister/cgit/tests/data/https_git.acdw.net/foo
@@ -0,0 +1,33 @@
+<!DOCTYPE html>
+<html lang='en'>
+<head>
+<title></title>
+<meta name='generator' content='cgit 1.4.1-21-gabe81'/>
+<meta name='robots' content='index, nofollow'/>
+<link rel='stylesheet' type='text/css' href='/cgit.css'/>
+</head>
+<body>
+<div id='cgit'><table id='header'>
+<tr>
+<td class='logo' rowspan='2'><a href='/'><img src='/avatar.png' alt='cgit logo'/></a></td>
+<td class='main'>friendware by acdw</td></tr>
+<tr><td class='sub'>the real software was the friends we made along the way</td></tr></table>
+<table class='tabs'><tr><td>
+<a class='active' href='/'>index</a></td><td class='form'><form method='get' action='/foo/'>
+<input type='search' name='q' size='10' value=''/>
+<input type='submit' value='search'/>
+</form></td></tr></table>
+<div class='content'><div class='error'>No repositories found</div>
+</div> <!-- class=content -->
+<div class="footer">
+&copy; 2022 C. Duckworth.
+generated by
+<a href="/cgit" >cgit</a>.
+comments, issues, and patches welcome at
+&lt;
+<a href="mailto:git@acdw.net" >git <em>at</em> acdw <em>dot</em> net</a>
+&gt;.
+</div>
+</div> <!-- id=cgit -->
+</body>
+</html>
diff --git a/swh/lister/cgit/tests/data/https_git.acdw.net/foo_summary b/swh/lister/cgit/tests/data/https_git.acdw.net/foo_summary
new file mode 100644
--- /dev/null
+++ b/swh/lister/cgit/tests/data/https_git.acdw.net/foo_summary
@@ -0,0 +1,33 @@
+<!DOCTYPE html>
+<html lang='en'>
+<head>
+<title></title>
+<meta name='generator' content='cgit 1.4.1-21-gabe81'/>
+<meta name='robots' content='index, nofollow'/>
+<link rel='stylesheet' type='text/css' href='/cgit.css'/>
+</head>
+<body>
+<div id='cgit'><table id='header'>
+<tr>
+<td class='logo' rowspan='2'><a href='/'><img src='/avatar.png' alt='cgit logo'/></a></td>
+<td class='main'>friendware by acdw</td></tr>
+<tr><td class='sub'>the real software was the friends we made along the way</td></tr></table>
+<table class='tabs'><tr><td>
+<a class='active' href='/'>index</a></td><td class='form'><form method='get' action='/foo/'>
+<input type='search' name='q' size='10' value=''/>
+<input type='submit' value='search'/>
+</form></td></tr></table>
+<div class='content'><div class='error'>No repositories found</div>
+</div> <!-- class=content -->
+<div class="footer">
+&copy; 2022 C. Duckworth.
+generated by
+<a href="/cgit" >cgit</a>.
+comments, issues, and patches welcome at
+&lt;
+<a href="mailto:git@acdw.net" >git <em>at</em> acdw <em>dot</em> net</a>
+&gt;.
+</div>
+</div> <!-- id=cgit -->
+</body>
+</html>
diff --git a/swh/lister/cgit/tests/data/https_git.acdw.net/sfeed b/swh/lister/cgit/tests/data/https_git.acdw.net/sfeed
new file mode 100644
--- /dev/null
+++ b/swh/lister/cgit/tests/data/https_git.acdw.net/sfeed
@@ -0,0 +1,49 @@
+<!DOCTYPE html>
+<html lang='en'>
+<head>
+<title>sfeed - My sfeed scripts
+</title>
+<meta name='generator' content='cgit 1.4.1-21-gabe81'/>
+<meta name='robots' content='index, nofollow'/>
+<link rel='stylesheet' type='text/css' href='/cgit.css'/>
+<link rel='alternate' title='Atom feed' href='https://git.acdw.net/sfeed/atom/?h=main' type='application/atom+xml'/>
+<link rel='vcs-git' href='https://git.acdw.net/sfeed' title='sfeed Git repository'/>
+</head>
+<body>
+<div id='cgit'><table id='header'>
+<tr>
+<td class='logo' rowspan='2'><a href='/'><img src='/avatar.png' alt='cgit logo'/></a></td>
+<td class='main'><a href='/'>index</a> : <a title='sfeed' href='/sfeed/'>sfeed</a></td><td class='form'><form method='get'>
+<select name='h' onchange='this.form.submit();'>
+<option value='7e389913489916166a0c9f590a1afb1737adcbf2'>this commit</option>
+<optgroup label='branches'><option value='main' selected='selected'>main</option>
+</optgroup></select> <input type='submit' value='switch'/></form></td></tr>
+<tr><td class='sub'>My sfeed scripts
+</td><td class='sub right'>Case Duckworth</td></tr></table>
+<table class='tabs'><tr><td>
+<a class='active' href='/sfeed/'>about</a> <a href='/sfeed/summary/'>summary</a> <a href='/sfeed/refs/'>refs</a> <a href='/sfeed/log/'>log</a> <a href='/sfeed/tree/'>tree</a> <a href='/sfeed/commit/'>commit</a> <a href='/sfeed/diff/'>diff</a> <a href='/sfeed/stats/'>stats</a></td><td class='form'><form class='right' method='get' action='/sfeed/log/'>
+<select name='qt'>
+<option value='grep'>log msg</option>
+<option value='author'>author</option>
+<option value='committer'>committer</option>
+<option value='range'>range</option>
+</select>
+<input class='txt' type='search' size='10' name='q' value=''/>
+<input type='submit' value='search'/>
+</form>
+</td></tr></table>
+<div class='content'><div id='summary'><h1 id="sfeed">sfeed</h1>
+<p>Turns out, <a href="https://codemadness.org/sfeed-simple-feed-parser.html">sfeed</a> is cool! You can see what this repo generates at <a href="https://acdw.casa/planet/" class="uri">https://acdw.casa/planet/</a>.</p>
+</div></div> <!-- class=content -->
+<div class="footer">
+&copy; 2022 C. Duckworth.
+generated by
+<a href="/cgit" >cgit</a>.
+comments, issues, and patches welcome at
+&lt;
+<a href="mailto:git@acdw.net" >git <em>at</em> acdw <em>dot</em> net</a>
+&gt;.
+</div>
+</div> <!-- id=cgit -->
+</body>
+</html>
diff --git a/swh/lister/cgit/tests/data/https_git.acdw.net/sfeed_summary b/swh/lister/cgit/tests/data/https_git.acdw.net/sfeed_summary
new file mode 100644
--- /dev/null
+++ b/swh/lister/cgit/tests/data/https_git.acdw.net/sfeed_summary
@@ -0,0 +1,63 @@
+<!DOCTYPE html>
+<html lang='en'>
+<head>
+<title>sfeed - My sfeed scripts
+</title>
+<meta name='generator' content='cgit 1.4.1-21-gabe81'/>
+<meta name='robots' content='index, nofollow'/>
+<link rel='stylesheet' type='text/css' href='/cgit.css'/>
+<link rel='alternate' title='Atom feed' href='https://git.acdw.net/sfeed/atom/?h=main' type='application/atom+xml'/>
+<link rel='vcs-git' href='https://git.acdw.net/sfeed' title='sfeed Git repository'/>
+</head>
+<body>
+<div id='cgit'><table id='header'>
+<tr>
+<td class='logo' rowspan='2'><a href='/'><img src='/avatar.png' alt='cgit logo'/></a></td>
+<td class='main'><a href='/'>index</a> : <a title='sfeed' href='/sfeed/'>sfeed</a></td><td class='form'><form method='get'>
+<select name='h' onchange='this.form.submit();'>
+<option value='7e389913489916166a0c9f590a1afb1737adcbf2'>this commit</option>
+<optgroup label='branches'><option value='main' selected='selected'>main</option>
+</optgroup></select> <input type='submit' value='switch'/></form></td></tr>
+<tr><td class='sub'>My sfeed scripts
+</td><td class='sub right'>Case Duckworth</td></tr></table>
+<table class='tabs'><tr><td>
+<a href='/sfeed/'>about</a> <a class='active' href='/sfeed/summary/'>summary</a> <a href='/sfeed/refs/'>refs</a> <a href='/sfeed/log/'>log</a> <a href='/sfeed/tree/'>tree</a> <a href='/sfeed/commit/'>commit</a> <a href='/sfeed/diff/'>diff</a> <a href='/sfeed/stats/'>stats</a></td><td class='form'><form class='right' method='get' action='/sfeed/log/'>
+<select name='qt'>
+<option value='grep'>log msg</option>
+<option value='author'>author</option>
+<option value='committer'>committer</option>
+<option value='range'>range</option>
+</select>
+<input class='txt' type='search' size='10' name='q' value=''/>
+<input type='submit' value='search'/>
+</form>
+</td></tr></table>
+<div class='content'><table summary='repository info' class='list nowrap'><tr class='nohover'><th class='left'>Branch</th><th class='left'>Commit message</th><th class='left'>Author</th><th class='left' colspan='2'>Age</th></tr>
+<tr><td><a href='/sfeed/log/'>main</a></td><td><a href='/sfeed/commit/'>Add APOD</a></td><td>Case Duckworth</td><td colspan='2'><span class='age-mins' title='2022-09-19 14:28:30 -0500'>38 min.</span></td></tr>
+<tr class='nohover'><td colspan='3'>&nbsp;</td></tr><tr class='nohover'><td colspan='3'>&nbsp;</td></tr><tr class='nohover'><th class='left'>Age</th><th class='left'>Commit message</th><th class='left'>Author</th></tr>
+<tr><td><span class='age-mins' title='2022-09-19 14:28:30 -0500'>38 min.</span></td><td><a href='/sfeed/commit/?id=7e389913489916166a0c9f590a1afb1737adcbf2'>Add APOD</a><span class='decoration'> <a class='deco' href='/sfeed/commit/?id=7e389913489916166a0c9f590a1afb1737adcbf2'>HEAD</a> <a class='branch-deco' href='/sfeed/log/'>main</a></span></td><td>Case Duckworth</td></tr>
+<tr><td><span class='age-days' title='2022-09-15 14:42:28 -0500'>4 days</span></td><td><a href='/sfeed/commit/?id=da8103330e7ec902994d72c6b1a3ec5fbad5c9d3'>Change fresh item colors</a></td><td>Case Duckworth</td></tr>
+<tr><td><span class='age-days' title='2022-09-15 14:38:35 -0500'>4 days</span></td><td><a href='/sfeed/commit/?id=4bea5f02d3b13cac53ddaa60d8202083fbe80aeb'>Indentation</a></td><td>Case Duckworth</td></tr>
+<tr><td><span class='age-days' title='2022-09-14 09:53:56 -0500'>5 days</span></td><td><a href='/sfeed/commit/?id=d5348fa3f2080bfd0fa217f2311d61aa99e34860'>Add Tab Completion</a></td><td>Case Duckworth</td></tr>
+<tr><td><span class='age-days' title='2022-09-14 08:51:36 -0500'>5 days</span></td><td><a href='/sfeed/commit/?id=c3e8226c62e107d9db6dda4d669b3676cfc6cde7'>Add Lonnie Johnson</a></td><td>Case Duckworth</td></tr>
+<tr><td><span class='age-days' title='2022-09-12 17:39:32 -0500'>7 days</span></td><td><a href='/sfeed/commit/?id=3debe199e3dbd5020da1768d297777423652e6fd'>Add miniature calendar; metafilter</a></td><td>Case Duckworth</td></tr>
+<tr><td><span class='age-days' title='2022-09-10 23:20:29 -0500'>9 days</span></td><td><a href='/sfeed/commit/?id=7eed94e4c96f9fbd4a03dd1dc11a67514d64b404'>Add active listening</a></td><td>Case Duckworth</td></tr>
+<tr><td><span class='age-days' title='2022-09-09 20:01:46 -0500'>10 days</span></td><td><a href='/sfeed/commit/?id=c6056e5676b3e9970dbe1fc681d3f5ea447734fc'>Add tilde.town blog</a></td><td>Case Duckworth</td></tr>
+<tr><td><span class='age-days' title='2022-09-07 23:57:26 -0500'>12 days</span></td><td><a href='/sfeed/commit/?id=06c33ee36f71f7f92ab7c918f02efd262b83d276'>Add zserge</a></td><td>Case Duckworth</td></tr>
+<tr><td><span class='age-days' title='2022-09-07 23:56:50 -0500'>12 days</span></td><td><a href='/sfeed/commit/?id=0cd234e1faeefa14cb44206a3f8776190d35b2e9'>Remove duplicate</a></td><td>Case Duckworth</td></tr>
+<tr class='nohover'><td colspan='3'><a href='/sfeed/log/'>[...]</a></td></tr>
+<tr class='nohover'><td colspan='3'>&nbsp;</td></tr><tr class='nohover'><th class='left' colspan='3'>Clone</th></tr>
+<tr><td colspan='3'><a rel='vcs-git' href='https://git.acdw.net/sfeed' title='sfeed Git repository'>https://git.acdw.net/sfeed</a></td></tr>
+</table></div> <!-- class=content -->
+<div class="footer">
+&copy; 2022 C. Duckworth.
+generated by
+<a href="/cgit" >cgit</a>.
+comments, issues, and patches welcome at
+&lt;
+<a href="mailto:git@acdw.net" >git <em>at</em> acdw <em>dot</em> net</a>
+&gt;.
+</div>
+</div> <!-- id=cgit -->
+</body>
+</html>
diff --git a/swh/lister/cgit/tests/test_lister.py b/swh/lister/cgit/tests/test_lister.py
--- a/swh/lister/cgit/tests/test_lister.py
+++ b/swh/lister/cgit/tests/test_lister.py
@@ -265,3 +265,16 @@
# we should have 16 repos (listed on 3 pages)
assert len(repos) == 3
assert len(flattened_repos) == 16
+
+
+def test_lister_cgit_summary_not_default(requests_mock_datadir, swh_scheduler):
+ """cgit lister returns git url when the default repository tab is not the summary"""
+
+ url = "https://git.acdw.net/cgit"
+
+ lister_cgit = CGitLister(swh_scheduler, url=url)
+
+ stats = lister_cgit.run()
+
+ expected_nb_origins = 1
+ assert stats == ListerStats(pages=1, origins=expected_nb_origins)

File Metadata

Mime Type
text/plain
Expires
Dec 21 2024, 2:55 AM (11 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3219598

Event Timeline