diff --git a/mypy.ini b/mypy.ini --- a/mypy.ini +++ b/mypy.ini @@ -22,6 +22,9 @@ [mypy-lazr.*] ignore_missing_imports = True +[mypy-lxml.*] +ignore_missing_imports = True + [mypy-pkg_resources.*] ignore_missing_imports = True diff --git a/requirements.txt b/requirements.txt --- a/requirements.txt +++ b/requirements.txt @@ -6,3 +6,4 @@ launchpadlib tenacity >= 6.2 xmltodict +lxml diff --git a/swh/lister/sourceforge/lister.py b/swh/lister/sourceforge/lister.py --- a/swh/lister/sourceforge/lister.py +++ b/swh/lister/sourceforge/lister.py @@ -1,7 +1,8 @@ -# Copyright (C) 2021 The Software Heritage developers +# Copyright (C) 2021-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information + from dataclasses import dataclass, field import datetime from enum import Enum @@ -12,6 +13,7 @@ from bs4 import BeautifulSoup import iso8601 +import lxml import requests from tenacity.before_sleep import before_sleep_log @@ -172,7 +174,7 @@ r".*\.code\.sf\.net/(?P[^/]+)/(?P.+)/.*" ) bzr_url_match = re.compile( - r"http://(?P[^/]+).bzr.sourceforge.net/bzrroot/([^/]+)" + r"http://(?P[^/]+).bzr.sourceforge.net/bzr/([^/]+)" ) cvs_url_match = re.compile( r"rsync://a.cvs.sourceforge.net/cvsroot/(?P.+)/([^/]+)" @@ -410,7 +412,37 @@ # SourceForge has removed support for bzr and only keeps legacy projects # around at a separate (also not https) URL. Bzr projects are very rare # and a lot of them are 404 now. - url = f"http://{project}.bzr.sourceforge.net/bzrroot/{project}" + url = f"http://{project}.bzr.sourceforge.net/bzr/{project}" + try: + response = self.page_request(url, params={}) + if "To get this branch, use:" not in response.text: + # If a bzr project has multiple branches, we need to extract their + # names from the repository landing page and create one listed origin + # per branch + parser = lxml.etree.HTMLParser() + tree = lxml.etree.fromstring(response.text, parser) + + # Get all tds with class 'autcell' + tds = tree.xpath(".//td[contains(@class, 'autcell')]") + for td in tds: + branch = td.findtext("a") + # If the td's parent contains Branch and + # it has non-empty text: + if td.xpath("..//img[@alt='Branch']") and branch: + hits.append( + SourceForgeListerEntry( + vcs=VcsNames(tool_name), + url=f"{url}/{branch}", + last_modified=last_modified, + ) + ) + continue + except requests.HTTPError: + logger.warning( + "Bazaar repository page could not be fetched, skipping project '%s'", + project, + ) + continue entry = SourceForgeListerEntry( vcs=VcsNames(tool_name), url=url, last_modified=last_modified ) diff --git a/swh/lister/sourceforge/tests/data/bzr-repo.json b/swh/lister/sourceforge/tests/data/bzr-repo.json deleted file mode 100644 --- a/swh/lister/sourceforge/tests/data/bzr-repo.json +++ /dev/null @@ -1,53 +0,0 @@ -{ - "shortname": "bzr-repo", - "name": "Bazaar repo", - "_id": "4bf3fc291be1ce2f10000052", - "url": "https://sourceforge.net/p/bzr-repo/", - "private": false, - "short_description": "This is an example bzr project", - "creation_date": "2009-10-10", - "summary": "", - "external_homepage": "", - "video_url": "", - "socialnetworks": [], - "status": "active", - "moved_to_url": "", - "preferred_support_tool": "", - "preferred_support_url": "", - "developers": [ - { - "username": "Alphare", - "name": "Raphaël Gomès", - "url": "https://sourceforge.net/u/alphare/" - } - ], - "tools": [ - { - "name": "bzr", - "mount_point": "bzr", - "url": "/p/bzr-repo/bazaar/", - "icons": { - "24": "images/code_24.png", - "32": "images/code_32.png", - "48": "images/code_48.png" - }, - "installable": true, - "tool_label": "Bazaar", - "mount_label": "Bazaar" - } - ], - "labels": [], - "categories": { - "audience": [], - "developmentstatus": [], - "environment": [], - "language": [], - "license": [], - "translation": [], - "os": [], - "database": [], - "topic": [] - }, - "icon_url": null, - "screenshots": [] - } diff --git a/swh/lister/sourceforge/tests/data/ocaml-lpd.html b/swh/lister/sourceforge/tests/data/ocaml-lpd.html new file mode 100644 --- /dev/null +++ b/swh/lister/sourceforge/tests/data/ocaml-lpd.html @@ -0,0 +1,106 @@ + + + +SourceForge: Browsing /ocaml-lpd + + + + + + + + + +
+

+Browsing + +(root)/ocaml-lpd + +

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FilenameLatest RevLast Changed
+ + +.. +
+ +Folder + + +backup.bzr.~1~
+ +Branch + + +trunk +13 +2011-04-17 22:02:29
+
+
+
+
+
+ +
+Help +
+
+
+ + \ No newline at end of file diff --git a/swh/lister/sourceforge/tests/data/ocaml-lpd.json b/swh/lister/sourceforge/tests/data/ocaml-lpd.json new file mode 100644 --- /dev/null +++ b/swh/lister/sourceforge/tests/data/ocaml-lpd.json @@ -0,0 +1,201 @@ +{ + "shortname": "ocaml-lpd", + "name": "Lpd OCaml library", + "_id": "50c63c70e88f3d0bf07d4c6d", + "url": "https://sourceforge.net/p/ocaml-lpd/", + "private": false, + "short_description": "OCaml Lpd is a Line Printer Daemon (LPD) server library written in OCaml. This project moved to OCamlForge https://forge.ocamlcore.org/projects/lpd/", + "creation_date": "2005-02-23", + "summary": "", + "external_homepage": "http://lpd.forge.ocamlcore.org/", + "video_url": "", + "socialnetworks": [], + "status": "moved", + "moved_to_url": "https://forge.ocamlcore.org/projects/lpd/", + "preferred_support_tool": "", + "preferred_support_url": "", + "developers": [ + { + "username": "chris_77", + "name": "ChriS", + "url": "https://sourceforge.net/u/chris_77/" + } + ], + "tools": [ + { + "name": "files-sf", + "mount_point": "files", + "url": "/p/ocaml-lpd/files/", + "icons": { + "24": "images/downloads_24.png", + "32": "images/downloads_32.png", + "48": "images/downloads_48.png" + }, + "installable": false, + "tool_label": "Files", + "mount_label": "Files" + }, + { + "name": "mailman", + "mount_point": "mailman", + "url": "/p/ocaml-lpd/mailman/", + "icons": { + "24": "images/forums_24.png", + "32": "images/forums_32.png", + "48": "images/forums_48.png" + }, + "installable": false, + "tool_label": "Mailing Lists", + "mount_label": "Mailing Lists" + }, + { + "name": "bzr", + "mount_point": "code", + "url": "/p/ocaml-lpd/code/", + "icons": { + "24": "images/code_24.png", + "32": "images/code_32.png", + "48": "images/code_48.png" + }, + "installable": false, + "tool_label": "BZR", + "mount_label": "Code" + }, + { + "name": "summary", + "mount_point": "summary", + "url": "/p/ocaml-lpd/summary/", + "icons": { + "24": "images/sftheme/24x24/blog_24.png", + "32": "images/sftheme/32x32/blog_32.png", + "48": "images/sftheme/48x48/blog_48.png" + }, + "installable": false, + "tool_label": "Summary", + "mount_label": "Summary", + "sourceforge_group_id": 132212 + }, + { + "name": "wiki", + "mount_point": "wiki", + "url": "/p/ocaml-lpd/wiki/", + "icons": { + "24": "images/wiki_24.png", + "32": "images/wiki_32.png", + "48": "images/wiki_48.png" + }, + "installable": true, + "tool_label": "Wiki", + "mount_label": "Wiki" + }, + { + "name": "reviews", + "mount_point": "reviews", + "url": "/p/ocaml-lpd/reviews/", + "icons": { + "24": "images/sftheme/24x24/blog_24.png", + "32": "images/sftheme/32x32/blog_32.png", + "48": "images/sftheme/48x48/blog_48.png" + }, + "installable": false, + "tool_label": "Reviews", + "mount_label": "Reviews" + }, + { + "name": "support", + "mount_point": "support", + "url": "/p/ocaml-lpd/support/", + "icons": { + "24": "images/sftheme/24x24/blog_24.png", + "32": "images/sftheme/32x32/blog_32.png", + "48": "images/sftheme/48x48/blog_48.png" + }, + "installable": false, + "tool_label": "Support", + "mount_label": "Support" + }, + { + "name": "activity", + "mount_point": "activity", + "url": "/p/ocaml-lpd/activity/", + "icons": { + "24": "images/admin_24.png", + "32": "images/admin_32.png", + "48": "images/admin_48.png" + }, + "installable": false, + "tool_label": "Tool", + "mount_label": "Activity" + } + ], + "labels": [], + "categories": { + "audience": [ + { + "id": 3, + "shortname": "developers", + "fullname": "Developers", + "fullpath": "Intended Audience :: by End-User Class :: Developers" + } + ], + "developmentstatus": [ + { + "id": 11, + "shortname": "production", + "fullname": "5 - Production/Stable", + "fullpath": "Development Status :: 5 - Production/Stable" + } + ], + "environment": [ + { + "id": 238, + "shortname": "daemon", + "fullname": "Non-interactive (Daemon)", + "fullpath": "User Interface :: Non-interactive (Daemon)" + } + ], + "language": [ + { + "id": 454, + "shortname": "ocaml", + "fullname": "OCaml (Objective Caml)", + "fullpath": "Programming Language :: OCaml (Objective Caml)" + } + ], + "license": [ + { + "id": 16, + "shortname": "lgpl", + "fullname": "GNU Library or Lesser General Public License version 2.0 (LGPLv2)", + "fullpath": "License :: OSI-Approved Open Source :: GNU Library or Lesser General Public License version 2.0 (LGPLv2)" + } + ], + "translation": [ + { + "id": 275, + "shortname": "english", + "fullname": "English", + "fullpath": "Translations :: English" + } + ], + "os": [ + { + "id": 436, + "shortname": "os_portable", + "fullname": "OS Portable (Source code to work with many OS platforms)", + "fullpath": "Operating System :: Grouping and Descriptive Categories :: OS Portable (Source code to work with many OS platforms)" + } + ], + "database": [], + "topic": [ + { + "id": 154, + "shortname": "printing", + "fullname": "Printing", + "fullpath": "Topic :: Printing" + } + ] + }, + "icon_url": null, + "screenshots": [] +} \ No newline at end of file diff --git a/swh/lister/sourceforge/tests/data/subsitemap-1.xml b/swh/lister/sourceforge/tests/data/subsitemap-1.xml --- a/swh/lister/sourceforge/tests/data/subsitemap-1.xml +++ b/swh/lister/sourceforge/tests/data/subsitemap-1.xml @@ -41,8 +41,13 @@ daily - https://sourceforge.net/p/bzr-repo/ - 2021-01-27 + https://sourceforge.net/p/t12eksandbox/ + 2011-02-09 + daily + + + https://sourceforge.net/p/ocaml-lpd/ + 2011-04-17 daily diff --git a/swh/lister/sourceforge/tests/data/t12eksandbox.html b/swh/lister/sourceforge/tests/data/t12eksandbox.html new file mode 100644 --- /dev/null +++ b/swh/lister/sourceforge/tests/data/t12eksandbox.html @@ -0,0 +1,274 @@ + + + + + +/t12eksandbox : changes + + + + + + + + + +
+ +
+ + +
+ +
+ + +RSS + + +
+
+ + + +
+
+ +

+ + + +(root)/t12eksandbox + + + +: changes + +from revision +4 + + + +

+ +
+
+To get this branch, use:
+bzr branch +http://t12eksandbox.bzr.sourceforge.net/bzr/t12eksandbox +
+ + + + +

+expand all expand all +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Rev SummaryAuthorsDateDiffFiles
+
+ + + +
+
+
+ +Commit! +
+ +
ctsai at sourceforge +2011-02-09 + +Diff +Files +
+
+ + + +
+
+
+ +fdsa +
+ +
ctsai at sourceforge +2010-02-03 + +Diff +Files +
+
+ + + +
+
+
+ +fdsa +
+ +
ctsai at sourceforge +2009-10-12 + +Diff +Files +
+
+ + + +
+
+ + +ctsai at sourceforge +2009-10-12 + +Diff +Files +
+ +
+

Loggerhead 1.18.1 is a web-based interface for Bazaar branches

+
+
+
+
+
+ +
+Help +
+
+
+ + \ No newline at end of file diff --git a/swh/lister/sourceforge/tests/data/t12eksandbox.json b/swh/lister/sourceforge/tests/data/t12eksandbox.json new file mode 100644 --- /dev/null +++ b/swh/lister/sourceforge/tests/data/t12eksandbox.json @@ -0,0 +1,292 @@ +{ + "shortname": "t12eksandbox", + "name": "t12ek sandbox", + "_id": "5304cd2634309d109fc1dec5", + "url": "https://sourceforge.net/p/t12eksandbox/", + "private": false, + "short_description": "Sandboxes are for playing in... Note: this is an SF.net staff's test project. Don't expect to find real files here. Update test!\r\nLine 2!\r\nupdate 2012-06-05", + "creation_date": "2009-07-14", + "summary": "", + "external_homepage": "http://t12eksandbox.sourceforge.net", + "video_url": "", + "socialnetworks": [], + "status": "active", + "moved_to_url": "", + "preferred_support_tool": "_url", + "preferred_support_url": "http://sourceforge.net/tracker/?func=add&group_id=269579&atid=1146768", + "developers": [ + { + "username": "sillygoose", + "name": "sillygoose", + "url": "https://sourceforge.net/u/sillygoose/" + }, + { + "username": "thimsmith", + "name": "Tim Siegel", + "url": "https://sourceforge.net/u/thimsmith/" + } + ], + "tools": [ + { + "name": "reviews", + "mount_point": "reviews", + "url": "/p/t12eksandbox/reviews/", + "icons": { + "24": "images/sftheme/24x24/blog_24.png", + "32": "images/sftheme/32x32/blog_32.png", + "48": "images/sftheme/48x48/blog_48.png" + }, + "installable": false, + "tool_label": "Reviews", + "mount_label": "Reviews" + }, + { + "name": "summary", + "mount_point": "summary", + "url": "/p/t12eksandbox/summary/", + "icons": { + "24": "images/sftheme/24x24/blog_24.png", + "32": "images/sftheme/32x32/blog_32.png", + "48": "images/sftheme/48x48/blog_48.png" + }, + "installable": false, + "tool_label": "Summary", + "mount_label": "Summary", + "sourceforge_group_id": 269579 + }, + { + "name": "mailman", + "mount_point": "mailman", + "url": "/p/t12eksandbox/mailman/", + "icons": { + "24": "images/forums_24.png", + "32": "images/forums_32.png", + "48": "images/forums_48.png" + }, + "installable": false, + "tool_label": "Mailing Lists", + "mount_label": "Mailing Lists" + }, + { + "name": "support", + "mount_point": "support", + "url": "/p/t12eksandbox/support/", + "icons": { + "24": "images/sftheme/24x24/blog_24.png", + "32": "images/sftheme/32x32/blog_32.png", + "48": "images/sftheme/48x48/blog_48.png" + }, + "installable": false, + "tool_label": "Support", + "mount_label": "Support" + }, + { + "name": "files-sf", + "mount_point": "files", + "url": "/p/t12eksandbox/files/", + "icons": { + "24": "images/downloads_24.png", + "32": "images/downloads_32.png", + "48": "images/downloads_48.png" + }, + "installable": false, + "tool_label": "Files", + "mount_label": "Files" + }, + { + "name": "wiki", + "mount_point": "wiki", + "url": "/p/t12eksandbox/wiki/", + "icons": { + "24": "images/wiki_24.png", + "32": "images/wiki_32.png", + "48": "images/wiki_48.png" + }, + "installable": true, + "tool_label": "Wiki", + "mount_label": "Wiki" + }, + { + "name": "blog", + "mount_point": "news", + "url": "/p/t12eksandbox/news/", + "icons": { + "24": "images/blog_24.png", + "32": "images/blog_32.png", + "48": "images/blog_48.png" + }, + "installable": true, + "tool_label": "Blog", + "mount_label": "News" + }, + { + "name": "bzr", + "mount_point": "bazaar", + "url": "/p/t12eksandbox/bazaar/", + "icons": { + "24": "images/code_24.png", + "32": "images/code_32.png", + "48": "images/code_48.png" + }, + "installable": false, + "tool_label": "BZR", + "mount_label": "Bazaar" + }, + { + "name": "discussion", + "mount_point": "discussion", + "url": "/p/t12eksandbox/discussion/", + "icons": { + "24": "images/forums_24.png", + "32": "images/forums_32.png", + "48": "images/forums_48.png" + }, + "installable": true, + "tool_label": "Discussion", + "mount_label": "Discussion" + }, + { + "name": "tickets", + "mount_point": "support-requests", + "url": "/p/t12eksandbox/support-requests/", + "icons": { + "24": "images/tickets_24.png", + "32": "images/tickets_32.png", + "48": "images/tickets_48.png" + }, + "installable": true, + "tool_label": "Tickets", + "mount_label": "Support Requests" + }, + { + "name": "tickets", + "mount_point": "feature-requests", + "url": "/p/t12eksandbox/feature-requests/", + "icons": { + "24": "images/tickets_24.png", + "32": "images/tickets_32.png", + "48": "images/tickets_48.png" + }, + "installable": true, + "tool_label": "Tickets", + "mount_label": "Feature Requests" + }, + { + "name": "link", + "mount_point": "donate", + "url": "/p/t12eksandbox/donate/", + "icons": { + "24": "images/ext_24.png", + "32": "images/ext_32.png", + "48": "images/ext_48.png" + }, + "installable": true, + "tool_label": "External Link", + "mount_label": "Donate" + }, + { + "name": "tickets", + "mount_point": "patches", + "url": "/p/t12eksandbox/patches/", + "icons": { + "24": "images/tickets_24.png", + "32": "images/tickets_32.png", + "48": "images/tickets_48.png" + }, + "installable": true, + "tool_label": "Tickets", + "mount_label": "Patches" + }, + { + "name": "tickets", + "mount_point": "bugs", + "url": "/p/t12eksandbox/bugs/", + "icons": { + "24": "images/tickets_24.png", + "32": "images/tickets_32.png", + "48": "images/tickets_48.png" + }, + "installable": true, + "tool_label": "Tickets", + "mount_label": "Bugs" + }, + { + "name": "activity", + "mount_point": "activity", + "url": "/p/t12eksandbox/activity/", + "icons": { + "24": "images/admin_24.png", + "32": "images/admin_32.png", + "48": "images/admin_48.png" + }, + "installable": false, + "tool_label": "Tool", + "mount_label": "Activity" + } + ], + "labels": [], + "categories": { + "audience": [], + "developmentstatus": [ + { + "id": 10, + "shortname": "beta", + "fullname": "4 - Beta", + "fullpath": "Development Status :: 4 - Beta" + }, + { + "id": 7, + "shortname": "planning", + "fullname": "1 - Planning", + "fullpath": "Development Status :: 1 - Planning" + } + ], + "environment": [], + "language": [], + "license": [ + { + "id": 196, + "shortname": "other", + "fullname": "Other License", + "fullpath": "License :: Other License" + } + ], + "translation": [], + "os": [], + "database": [ + { + "id": 524, + "shortname": "db_net_mysql", + "fullname": "MySQL", + "fullpath": "Database Environment :: Network-based DBMS :: MySQL" + } + ], + "topic": [ + { + "id": 575, + "shortname": "testing", + "fullname": "Testing", + "fullpath": "Topic :: Software Development :: Testing" + }, + { + "id": 97, + "shortname": "scientific", + "fullname": "Scientific/Engineering", + "fullpath": "Topic :: Scientific/Engineering" + } + ] + }, + "icon_url": null, + "screenshots": [ + { + "url": "https://sourceforge.net/p/t12eksandbox/screenshot/224498.jpg", + "thumbnail_url": "https://sourceforge.net/p/t12eksandbox/screenshot/224498.jpg/thumb", + "caption": "aimage2" + }, + { + "url": "https://sourceforge.net/p/t12eksandbox/screenshot/224496.jpg", + "thumbnail_url": "https://sourceforge.net/p/t12eksandbox/screenshot/224496.jpg/thumb", + "caption": "3Kimage3" + } + ] +} \ No newline at end of file diff --git a/swh/lister/sourceforge/tests/test_lister.py b/swh/lister/sourceforge/tests/test_lister.py --- a/swh/lister/sourceforge/tests/test_lister.py +++ b/swh/lister/sourceforge/tests/test_lister.py @@ -35,6 +35,8 @@ "mramm": "p", "os3dmodels": "p", "random-mercurial": "p", + "t12eksandbox": "p", + "ocaml-lpd": "p", } URLS_MATCHER = { @@ -67,6 +69,10 @@ return Path(datadir, "aaron.html").read_text() +def get_bzr_repo_page(datadir, repo_name): + return Path(datadir, f"{repo_name}.html").read_text() + + def _check_request_headers(request): return request.headers.get("User-Agent") == USER_AGENT @@ -85,7 +91,14 @@ "https://git.code.sf.net/p/mojunk/git2": ("git", "2017-12-31"), "https://svn.code.sf.net/p/mojunk/svn": ("svn", "2017-12-31"), "http://hg.code.sf.net/p/random-mercurial/hg": ("hg", "2019-05-02"), - "http://bzr-repo.bzr.sourceforge.net/bzrroot/bzr-repo": ("bzr", "2021-01-27"), + "http://t12eksandbox.bzr.sourceforge.net/bzr/t12eksandbox": ( + "bzr", + "2011-02-09", + ), + "http://ocaml-lpd.bzr.sourceforge.net/bzr/ocaml-lpd/trunk": ( + "bzr", + "2011-04-17", + ), "rsync://a.cvs.sourceforge.net/cvsroot/aaron/aaron": ("cvs", "2013-03-07"), "rsync://a.cvs.sourceforge.net/cvsroot/aaron/www": ("cvs", "2013-03-07"), } @@ -126,6 +139,16 @@ text=get_cvs_info_page(datadir), additional_matcher=_check_request_headers, ) + requests_mock.get( + re.compile("http://t12eksandbox.bzr.sourceforge.net/bzr/t12eksandbox"), + text=get_bzr_repo_page(datadir, "t12eksandbox"), + additional_matcher=_check_request_headers, + ) + requests_mock.get( + re.compile("http://ocaml-lpd.bzr.sourceforge.net/bzr/ocaml-lpd"), + text=get_bzr_repo_page(datadir, "ocaml-lpd"), + additional_matcher=_check_request_headers, + ) stats = lister.run() # - os3dmodels (2 repos), @@ -133,10 +156,11 @@ # - mojunk (3 repos), # - backapps/website (1 repo), # - random-mercurial (1 repo). - # - bzr-repo (1 repo). + # - t12eksandbox (1 repo). + # - ocaml-lpd (1 repo). # adobe and backapps itself have no repos. - assert stats.pages == 7 - assert stats.origins == 13 + assert stats.pages == 8 + assert stats.origins == 14 expected_state = { "subsitemap_last_modified": { "https://sourceforge.net/allura_sitemap/sitemap-0.xml": "2021-03-18", @@ -196,6 +220,18 @@ additional_matcher=_check_request_headers, ) + requests_mock.get( + re.compile("http://t12eksandbox.bzr.sourceforge.net/bzr/t12eksandbox"), + text=get_bzr_repo_page(datadir, "t12eksandbox"), + additional_matcher=_check_request_headers, + ) + + requests_mock.get( + re.compile("http://ocaml-lpd.bzr.sourceforge.net/bzr/ocaml-lpd"), + text=get_bzr_repo_page(datadir, "ocaml-lpd"), + additional_matcher=_check_request_headers, + ) + faked_listed_origins = [ # mramm: changed ListedOrigin( @@ -263,8 +299,14 @@ ListedOrigin( lister_id=lister.lister_obj.id, visit_type="bzr", - url="http://bzr-repo.bzr.sourceforge.net/bzrroot/bzr-repo", - last_update=iso8601.parse_date("2021-01-27"), + url="http://t12eksandbox.bzr.sourceforge.net/bzr/t12eksandbox", + last_update=iso8601.parse_date("2011-02-09"), + ), + ListedOrigin( + lister_id=lister.lister_obj.id, + visit_type="bzr", + url="http://ocaml-lpd.bzr.sourceforge.net/bzr/ocaml-lpd/trunk", + last_update=iso8601.parse_date("2011-04-17"), ), ListedOrigin( lister_id=lister.lister_obj.id, @@ -359,16 +401,29 @@ additional_matcher=_check_request_headers, ) + requests_mock.get( + re.compile("http://t12eksandbox.bzr.sourceforge.net/bzr/t12eksandbox"), + text=get_bzr_repo_page(datadir, "t12eksandbox"), + additional_matcher=_check_request_headers, + ) + + requests_mock.get( + re.compile("http://ocaml-lpd.bzr.sourceforge.net/bzr/ocaml-lpd"), + text=get_bzr_repo_page(datadir, "ocaml-lpd"), + additional_matcher=_check_request_headers, + ) + stats = lister.run() # - os3dmodels (2 repos), # - mramm (3 repos), # - mojunk (3 repos), # - backapps/website (1 repo), # - random-mercurial (1 repo). - # - bzr-repo (1 repo). + # - t12eksandbox (1 repo). + # - ocaml-lpd (1 repo). # adobe and backapps itself have no repos. - assert stats.pages == 7 - assert stats.origins == 13 + assert stats.pages == 8 + assert stats.origins == 14 _check_listed_origins(lister, swh_scheduler) @@ -426,6 +481,16 @@ json=functools.partial(get_project_json, datadir), additional_matcher=_check_request_headers, ) + requests_mock.get( + re.compile("http://t12eksandbox.bzr.sourceforge.net/bzr/t12eksandbox"), + text=get_bzr_repo_page(datadir, "t12eksandbox"), + additional_matcher=_check_request_headers, + ) + requests_mock.get( + re.compile("http://ocaml-lpd.bzr.sourceforge.net/bzr/ocaml-lpd"), + text=get_bzr_repo_page(datadir, "ocaml-lpd"), + additional_matcher=_check_request_headers, + ) # Make all `mramm` requests fail # `mramm` is in subsitemap 0, which ensures we keep listing after an error. requests_mock.get( @@ -442,11 +507,12 @@ # - mojunk (3 repos), # - backapps/website (1 repo), # - random-mercurial (1 repo). - # - bzr-repo (1 repo). + # - t12eksandbox (1 repo). + # - ocaml-lpd (1 repo). # adobe and backapps itself have no repos. # Did *not* list mramm - assert stats.pages == 5 - assert stats.origins == 8 + assert stats.pages == 6 + assert stats.origins == 9 scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results res = {o.url: (o.visit_type, str(o.last_update.date())) for o in scheduler_origins} @@ -459,5 +525,12 @@ "https://git.code.sf.net/p/mojunk/git2": ("git", "2017-12-31"), "https://svn.code.sf.net/p/mojunk/svn": ("svn", "2017-12-31"), "http://hg.code.sf.net/p/random-mercurial/hg": ("hg", "2019-05-02"), - "http://bzr-repo.bzr.sourceforge.net/bzrroot/bzr-repo": ("bzr", "2021-01-27"), + "http://t12eksandbox.bzr.sourceforge.net/bzr/t12eksandbox": ( + "bzr", + "2011-02-09", + ), + "http://ocaml-lpd.bzr.sourceforge.net/bzr/ocaml-lpd/trunk": ( + "bzr", + "2011-04-17", + ), }