diff --git a/swh/lister/launchpad/lister.py b/swh/lister/launchpad/lister.py --- a/swh/lister/launchpad/lister.py +++ b/swh/lister/launchpad/lister.py @@ -1,4 +1,4 @@ -# Copyright (C) 2020-2021 The Software Heritage developers +# Copyright (C) 2020-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -6,7 +6,7 @@ from dataclasses import dataclass from datetime import datetime import logging -from typing import Any, Dict, Iterator, Optional +from typing import Any, Dict, Iterator, Optional, Tuple import iso8601 from launchpadlib.launchpad import Launchpad @@ -19,20 +19,28 @@ logger = logging.getLogger(__name__) -LaunchpadPageType = Iterator[Collection] +VcsType = str +LaunchpadPageType = Tuple[VcsType, Collection] @dataclass class LaunchpadListerState: """State of Launchpad lister""" - date_last_modified: Optional[datetime] = None - """modification date of last updated repository since last listing""" + git_date_last_modified: Optional[datetime] = None + """modification date of last updated git repository since last listing""" + bzr_date_last_modified: Optional[datetime] = None + """modification date of last updated bzr repository since last listing""" + + +def origin(vcs_type: str, repo: Any) -> str: + """Determine the origin url out of a repository with a given vcs_type""" + return repo.git_https_url if vcs_type == "git" else repo.web_link class LaunchpadLister(Lister[LaunchpadListerState, LaunchpadPageType]): """ - List git repositories from Launchpad. + List repositories from Launchpad (git or bzr). Args: scheduler: instance of SchedulerInterface @@ -56,36 +64,53 @@ credentials=credentials, ) self.incremental = incremental - self.date_last_modified = None + self.date_last_modified: Dict[str, Optional[datetime]] = { + "git": None, + "bzr": None, + } def state_from_dict(self, d: Dict[str, Any]) -> LaunchpadListerState: - date_last_modified = d.get("date_last_modified") - if date_last_modified is not None: - d["date_last_modified"] = iso8601.parse_date(date_last_modified) + for vcs_type in ["git", "bzr"]: + key = f"{vcs_type}_date_last_modified" + date_last_modified = d.get(key) + if date_last_modified is not None: + d[key] = iso8601.parse_date(date_last_modified) + return LaunchpadListerState(**d) def state_to_dict(self, state: LaunchpadListerState) -> Dict[str, Any]: - d: Dict[str, Optional[str]] = {"date_last_modified": None} - date_last_modified = state.date_last_modified - if date_last_modified is not None: - d["date_last_modified"] = date_last_modified.isoformat() + d: Dict[str, Optional[str]] = {} + for vcs_type in ["git", "bzr"]: + attribute_name = f"{vcs_type}_date_last_modified" + d[attribute_name] = None + + if hasattr(state, attribute_name): + date_last_modified = getattr(state, attribute_name) + if date_last_modified is not None: + d[attribute_name] = date_last_modified.isoformat() return d def get_pages(self) -> Iterator[LaunchpadPageType]: """ - Yields an iterator on all git repositories hosted on Launchpad sorted + Yields an iterator on all git/bzr repositories hosted on Launchpad sorted by last modification date in ascending order. """ launchpad = Launchpad.login_anonymously( "softwareheritage", "production", version="devel" ) - date_last_modified = None if self.incremental: - date_last_modified = self.state.date_last_modified - get_repos = launchpad.git_repositories.getRepositories - yield get_repos( - order_by="most neglected first", modified_since_date=date_last_modified - ) + self.date_last_modified = { + "git": self.state.git_date_last_modified, + "bzr": self.state.bzr_date_last_modified, + } + for vcs_type, get_vcs_fn in [ + ("git", launchpad.git_repositories.getRepositories), + ("bzr", launchpad.branches.getBranches), + ]: + yield vcs_type, get_vcs_fn( + order_by="most neglected first", + modified_since_date=self.date_last_modified[vcs_type], + ) def get_origins_from_page(self, page: LaunchpadPageType) -> Iterator[ListedOrigin]: """ @@ -93,40 +118,65 @@ """ assert self.lister_obj.id is not None - prev_origin_url = None + prev_origin_url: Dict[str, Optional[str]] = {"git": None, "bzr": None} + + vcs_type, repos = page - for repo in page: + assert vcs_type in {"git", "bzr"} - origin_url = repo.git_https_url + for repo in repos: + origin_url = origin(vcs_type, repo) # filter out origins with invalid URL or origin previously listed # (last modified repository will be listed twice by launchpadlib) - if not origin_url.startswith("https://") or origin_url == prev_origin_url: + if ( + not origin_url.startswith("https://") + or origin_url == prev_origin_url[vcs_type] + ): continue last_update = repo.date_last_modified - self.date_last_modified = last_update + self.date_last_modified[vcs_type] = last_update - logger.debug("Found origin %s last updated on %s", origin_url, last_update) + logger.debug( + "Found origin %s with type %s last updated on %s", + origin_url, + vcs_type, + last_update, + ) - prev_origin_url = origin_url + prev_origin_url[vcs_type] = origin_url yield ListedOrigin( lister_id=self.lister_obj.id, - visit_type="git", + visit_type=vcs_type, url=origin_url, last_update=last_update, ) def finalize(self) -> None: - if self.date_last_modified is None: + git_date_last_modified = self.date_last_modified["git"] + bzr_date_last_modified = self.date_last_modified["bzr"] + if git_date_last_modified is None and bzr_date_last_modified is None: return if self.incremental and ( - self.state.date_last_modified is None - or self.date_last_modified > self.state.date_last_modified + self.state.git_date_last_modified is None + or ( + git_date_last_modified is not None + and git_date_last_modified > self.state.git_date_last_modified + ) + ): + self.state.git_date_last_modified = git_date_last_modified + + if self.incremental and ( + self.state.bzr_date_last_modified is None + or ( + bzr_date_last_modified is not None + and bzr_date_last_modified > self.state.bzr_date_last_modified + ) ): - self.state.date_last_modified = self.date_last_modified + self.state.bzr_date_last_modified = self.date_last_modified["bzr"] self.updated = True diff --git a/swh/lister/launchpad/tests/data/launchpad_bzr_response.json b/swh/lister/launchpad/tests/data/launchpad_bzr_response.json new file mode 100644 --- /dev/null +++ b/swh/lister/launchpad/tests/data/launchpad_bzr_response.json @@ -0,0 +1,126 @@ +[ + { + "self_link": "https://api.launchpad.net/1.0/fourbar", + "web_link": "https://launchpad.net/fourbar", + "resource_type_link": "https://api.launchpad.net/1.0/#project", + "official_answers": true, + "official_blueprints": true, + "official_codehosting": true, + "official_bugs": true, + "information_type": "Public", + "active": true, + "bug_reporting_guidelines": null, + "bug_reported_acknowledgement": null, + "official_bug_tags": [], + "recipes_collection_link": "https://api.launchpad.net/1.0/fourbar/recipes", + "active_milestones_collection_link": "https://api.launchpad.net/1.0/fourbar/active_milestones", + "all_milestones_collection_link": "https://api.launchpad.net/1.0/fourbar/all_milestones", + "bug_supervisor_link": null, + "qualifies_for_free_hosting": true, + "reviewer_whiteboard": "tag:launchpad.net:2008:redacted", + "is_permitted": "tag:launchpad.net:2008:redacted", + "project_reviewed": "tag:launchpad.net:2008:redacted", + "license_approved": "tag:launchpad.net:2008:redacted", + "private": false, + "display_name": "fourBar", + "icon_link": "https://api.launchpad.net/1.0/fourbar/icon", + "logo_link": "https://api.launchpad.net/1.0/fourbar/logo", + "name": "fourbar", + "owner_link": "https://api.launchpad.net/1.0/~sorivenul", + "project_group_link": null, + "title": "fourBar", + "registrant_link": "https://api.launchpad.net/1.0/~sorivenul", + "driver_link": null, + "summary": "fourBar is a minimal application launcher for POSIX systems. It launches four commonly used applications (terminal, file browser, editor, and web browser by default). It is written in Python/Tkinter. Documentation on simple customization is included. ", + "description": "If you wish to help with the development of fourBar, download a branch, test, report bugs and propose features. There is still work to be done.", + "date_created": "2008-11-03T07:03:00.872230+00:00", + "homepage_url": null, + "wiki_url": null, + "screenshots_url": null, + "download_url": "http://downloads.sourceforge.net/fourbar/fourbar-1.0.0.tar.gz?modtime=1224102066&big_mirror=0", + "programming_language": "Python", + "sourceforge_project": "fourBar", + "freshmeat_project": null, + "brand_link": "https://api.launchpad.net/1.0/fourbar/brand", + "private_bugs": false, + "licenses": [ + "GNU GPL v3" + ], + "license_info": null, + "bug_tracker_link": null, + "date_next_suggest_packaging": null, + "series_collection_link": "https://api.launchpad.net/1.0/fourbar/series", + "development_focus_link": "https://api.launchpad.net/1.0/fourbar/trunk", + "releases_collection_link": "https://api.launchpad.net/1.0/fourbar/releases", + "translation_focus_link": null, + "commercial_subscription_link": null, + "commercial_subscription_is_due": false, + "remote_product": "242408&1119369", + "security_contact": null, + "vcs": "Bazaar", + "http_etag": "\"e3685b989bd2609f9a84bd2d90bef380c6f3c92b-13a47c4e8b4688c8fc042bf7eede3a2f4c14a9d6\"", + "date_last_modified":"2016-05-19T16:05:23.706734+00:00" + }, + { + "self_link": "https://api.launchpad.net/1.0/gekkoware", + "web_link": "https://launchpad.net/gekkoware", + "resource_type_link": "https://api.launchpad.net/1.0/#project", + "official_answers": false, + "official_blueprints": false, + "official_codehosting": false, + "official_bugs": false, + "information_type": "Public", + "active": true, + "bug_reporting_guidelines": null, + "bug_reported_acknowledgement": null, + "official_bug_tags": [], + "recipes_collection_link": "https://api.launchpad.net/1.0/gekkoware/recipes", + "active_milestones_collection_link": "https://api.launchpad.net/1.0/gekkoware/active_milestones", + "all_milestones_collection_link": "https://api.launchpad.net/1.0/gekkoware/all_milestones", + "bug_supervisor_link": null, + "qualifies_for_free_hosting": true, + "reviewer_whiteboard": "tag:launchpad.net:2008:redacted", + "is_permitted": "tag:launchpad.net:2008:redacted", + "project_reviewed": "tag:launchpad.net:2008:redacted", + "license_approved": "tag:launchpad.net:2008:redacted", + "private": false, + "display_name": "gekkoware", + "icon_link": "https://api.launchpad.net/1.0/gekkoware/icon", + "logo_link": "https://api.launchpad.net/1.0/gekkoware/logo", + "name": "gekkoware", + "owner_link": "https://api.launchpad.net/1.0/~compermisos", + "project_group_link": null, + "title": "gekkoware", + "registrant_link": "https://api.launchpad.net/1.0/~compermisos", + "driver_link": null, + "summary": "A port of gekko to ubuntu", + "description": null, + "date_created": "2007-10-21T03:02:22.186775+00:00", + "homepage_url": "http://gekkoware.org", + "wiki_url": null, + "screenshots_url": null, + "download_url": null, + "programming_language": "php", + "sourceforge_project": "gekkoware", + "freshmeat_project": null, + "brand_link": "https://api.launchpad.net/1.0/gekkoware/brand", + "private_bugs": false, + "licenses": [ + "GNU GPL v2" + ], + "license_info": null, + "bug_tracker_link": null, + "date_next_suggest_packaging": null, + "series_collection_link": "https://api.launchpad.net/1.0/gekkoware/series", + "development_focus_link": "https://api.launchpad.net/1.0/gekkoware/trunk", + "releases_collection_link": "https://api.launchpad.net/1.0/gekkoware/releases", + "translation_focus_link": null, + "commercial_subscription_link": null, + "commercial_subscription_is_due": false, + "remote_product": "117004&676653", + "security_contact": null, + "vcs": "Bazaar", + "http_etag": "\"b9802efcebb5afdd87c8ee10f8473040340bcead-159127be59c12e7cbb161eee4cae2ade72353c0d\"", + "date_last_modified":"2017-03-15T16:03:22.706432+00:00" + } +] diff --git a/swh/lister/launchpad/tests/test_lister.py b/swh/lister/launchpad/tests/test_lister.py --- a/swh/lister/launchpad/tests/test_lister.py +++ b/swh/lister/launchpad/tests/test_lister.py @@ -1,4 +1,4 @@ -# Copyright (C) 2020-2021 The Software Heritage developers +# Copyright (C) 2020-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -10,7 +10,7 @@ import pytest -from ..lister import LaunchpadLister +from ..lister import LaunchpadLister, origin class _Repo: @@ -49,25 +49,35 @@ return _launchpad_response(datadir, "launchpad_response2.json") -def _mock_getRepositories(mocker, launchpad_response): +@pytest.fixture +def launchpad_bzr_response(datadir): + return _launchpad_response(datadir, "launchpad_bzr_response.json") + + +def _mock_launchpad(mocker, launchpad_response, launchpad_bzr_response=None): mock_launchpad = mocker.patch("swh.lister.launchpad.lister.Launchpad") mock_getRepositories = mock_launchpad.git_repositories.getRepositories mock_getRepositories.return_value = launchpad_response + mock_getBranches = mock_launchpad.branches.getBranches + mock_getBranches.return_value = ( + [] if launchpad_bzr_response is None else launchpad_bzr_response + ) mock_launchpad.login_anonymously.return_value = mock_launchpad - return mock_getRepositories + return mock_getRepositories, mock_getBranches -def _check_listed_origins(scheduler_origins, launchpad_response): - for origin in launchpad_response: +def _check_listed_origins(scheduler_origins, launchpad_response, vcs_type="git"): + for repo in launchpad_response: filtered_origins = [ - o for o in scheduler_origins if o.url == origin.git_https_url + o for o in scheduler_origins if o.url == origin(vcs_type, repo) ] assert len(filtered_origins) == 1 - assert filtered_origins[0].last_update == origin.date_last_modified + assert filtered_origins[0].last_update == repo.date_last_modified + assert filtered_origins[0].visit_type == vcs_type def test_lister_from_configfile(swh_scheduler_config, mocker): @@ -81,65 +91,95 @@ assert lister.credentials is not None -def test_launchpad_full_lister(swh_scheduler, mocker, launchpad_response1): - mock_getRepositories = _mock_getRepositories(mocker, launchpad_response1) +def test_launchpad_full_lister( + swh_scheduler, mocker, launchpad_response1, launchpad_bzr_response +): + mock_getRepositories, mock_getBranches = _mock_launchpad( + mocker, launchpad_response1, launchpad_bzr_response + ) lister = LaunchpadLister(scheduler=swh_scheduler) stats = lister.run() assert not lister.incremental assert lister.updated - assert stats.pages == 1 - assert stats.origins == len(launchpad_response1) + assert stats.pages == 1 + 1, "Expects 1 page for git origins, another for bzr ones" + assert stats.origins == len(launchpad_response1) + len(launchpad_bzr_response) mock_getRepositories.assert_called_once_with( order_by="most neglected first", modified_since_date=None ) + mock_getBranches.assert_called_once_with( + order_by="most neglected first", modified_since_date=None + ) scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results - assert len(scheduler_origins) == len(launchpad_response1) + assert len(scheduler_origins) == len(launchpad_response1) + len( + launchpad_bzr_response + ) _check_listed_origins(scheduler_origins, launchpad_response1) + _check_listed_origins(scheduler_origins, launchpad_bzr_response, vcs_type="bzr") def test_launchpad_incremental_lister( - swh_scheduler, mocker, launchpad_response1, launchpad_response2 + swh_scheduler, + mocker, + launchpad_response1, + launchpad_response2, + launchpad_bzr_response, ): - mock_getRepositories = _mock_getRepositories(mocker, launchpad_response1) + mock_getRepositories, mock_getBranches = _mock_launchpad( + mocker, launchpad_response1, launchpad_bzr_response + ) lister = LaunchpadLister(scheduler=swh_scheduler, incremental=True) stats = lister.run() assert lister.incremental assert lister.updated - assert stats.pages == 1 - assert stats.origins == len(launchpad_response1) + assert stats.pages == 1 + 1, "Expects 1 page for git origins, another for bzr ones" + len_first_runs = len(launchpad_response1) + len(launchpad_bzr_response) + assert stats.origins == len_first_runs mock_getRepositories.assert_called_once_with( order_by="most neglected first", modified_since_date=None ) + mock_getBranches.assert_called_once_with( + order_by="most neglected first", modified_since_date=None + ) lister_state = lister.get_state_from_scheduler() - assert lister_state.date_last_modified == launchpad_response1[-1].date_last_modified + assert ( + lister_state.git_date_last_modified + == launchpad_response1[-1].date_last_modified + ) + assert ( + lister_state.bzr_date_last_modified + == launchpad_bzr_response[-1].date_last_modified + ) - mock_getRepositories = _mock_getRepositories(mocker, launchpad_response2) + mock_getRepositories, mock_getBranches = _mock_launchpad( + mocker, launchpad_response2 + ) lister = LaunchpadLister(scheduler=swh_scheduler, incremental=True) stats = lister.run() assert lister.incremental assert lister.updated - assert stats.pages == 1 + assert stats.pages == 2, "Empty bzr response still accounts for 1 page" assert stats.origins == len(launchpad_response2) mock_getRepositories.assert_called_once_with( order_by="most neglected first", - modified_since_date=lister_state.date_last_modified, + modified_since_date=lister_state.git_date_last_modified, ) scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results - assert len(scheduler_origins) == len(launchpad_response1) + len(launchpad_response2) + assert len(scheduler_origins) == len_first_runs + len(launchpad_response2) _check_listed_origins(scheduler_origins, launchpad_response1) + _check_listed_origins(scheduler_origins, launchpad_bzr_response, vcs_type="bzr") _check_listed_origins(scheduler_origins, launchpad_response2) @@ -147,12 +187,12 @@ swh_scheduler, mocker, ): invalid_origin = [_Repo({"git_https_url": "tag:launchpad.net:2008:redacted",})] - _mock_getRepositories(mocker, invalid_origin) + _mock_launchpad(mocker, invalid_origin) lister = LaunchpadLister(scheduler=swh_scheduler) stats = lister.run() assert not lister.updated - assert stats.pages == 1 + assert stats.pages == 1 + 1, "Empty pages are still accounted for (1 git, 1 bzr)" assert stats.origins == 0 @@ -166,10 +206,10 @@ } ) origins = [origin, origin] - _mock_getRepositories(mocker, origins) + _mock_launchpad(mocker, origins) lister = LaunchpadLister(scheduler=swh_scheduler) stats = lister.run() assert lister.updated - assert stats.pages == 1 + assert stats.pages == 1 + 1, "Empty bzr page is still accounted for (1 git, 1 bzr)" assert stats.origins == 1