diff --git a/swh/lister/sourceforge/lister.py b/swh/lister/sourceforge/lister.py --- a/swh/lister/sourceforge/lister.py +++ b/swh/lister/sourceforge/lister.py @@ -177,17 +177,25 @@ bzr_url_match = re.compile( r"http://(?P[^/]+).bzr.sourceforge.net/bzrroot/([^/]+)" ) + cvs_url_match = re.compile( + r"rsync://a.cvs.sourceforge.net/cvsroot/(?P.+)/([^/]+)" + ) for origin in stream: url = origin.url match = url_match.match(url) if match is None: - # Should be a bzr special endpoint - match = bzr_url_match.match(url) - assert match is not None - matches = match.groupdict() + # Could be a bzr or cvs special endpoint + bzr_match = bzr_url_match.match(url) + cvs_match = cvs_url_match.match(url) + matches = None + if bzr_match is not None: + matches = bzr_match.groupdict() + elif cvs_match is not None: + matches = cvs_match.groupdict() + assert matches project = matches["project"] - namespace = "p" # no special namespacing for bzr projects + namespace = "p" # no special namespacing for bzr and cvs projects else: matches = match.groupdict() namespace = matches["namespace"] diff --git a/swh/lister/sourceforge/tests/test_lister.py b/swh/lister/sourceforge/tests/test_lister.py --- a/swh/lister/sourceforge/tests/test_lister.py +++ b/swh/lister/sourceforge/tests/test_lister.py @@ -266,6 +266,18 @@ url="http://bzr-repo.bzr.sourceforge.net/bzrroot/bzr-repo", last_update=iso8601.parse_date("2021-01-27"), ), + ListedOrigin( + lister_id=lister.lister_obj.id, + visit_type="cvs", + url="rsync://a.cvs.sourceforge.net/cvsroot/aaron/aaron", + last_update=iso8601.parse_date("2013-03-07"), + ), + ListedOrigin( + lister_id=lister.lister_obj.id, + visit_type="cvs", + url="rsync://a.cvs.sourceforge.net/cvsroot/aaron/www", + last_update=iso8601.parse_date("2013-03-07"), + ), ] swh_scheduler.record_listed_origins(faked_listed_origins) @@ -289,9 +301,10 @@ lister.state = faked_state stats = lister.run() + # - mramm (3 repos), # changed - assert stats.pages == 2 - assert stats.origins == 5 + assert stats.pages == 1 + assert stats.origins == 3 expected_state = { "subsitemap_last_modified": { "https://sourceforge.net/allura_sitemap/sitemap-0.xml": "2021-03-18",