Page MenuHomeSoftware Heritage

D7193.id26070.diff
No OneTemporary

D7193.id26070.diff

diff --git a/swh/lister/launchpad/lister.py b/swh/lister/launchpad/lister.py
--- a/swh/lister/launchpad/lister.py
+++ b/swh/lister/launchpad/lister.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2020-2021 The Software Heritage developers
+# Copyright (C) 2020-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -6,7 +6,7 @@
from dataclasses import dataclass
from datetime import datetime
import logging
-from typing import Any, Dict, Iterator, Optional
+from typing import Any, Dict, Iterator, Optional, Tuple
import iso8601
from launchpadlib.launchpad import Launchpad
@@ -19,20 +19,28 @@
logger = logging.getLogger(__name__)
-LaunchpadPageType = Iterator[Collection]
+VcsType = str
+LaunchpadPageType = Tuple[VcsType, Collection]
@dataclass
class LaunchpadListerState:
"""State of Launchpad lister"""
- date_last_modified: Optional[datetime] = None
- """modification date of last updated repository since last listing"""
+ git_date_last_modified: Optional[datetime] = None
+ """modification date of last updated git repository since last listing"""
+ bzr_date_last_modified: Optional[datetime] = None
+ """modification date of last updated bzr repository since last listing"""
+
+
+def origin(vcs_type: str, repo: Any) -> str:
+ """Determine the origin url out of a repository with a given vcs_type"""
+ return repo.git_https_url if vcs_type == "git" else repo.web_link
class LaunchpadLister(Lister[LaunchpadListerState, LaunchpadPageType]):
"""
- List git repositories from Launchpad.
+ List repositories from Launchpad (git or bzr).
Args:
scheduler: instance of SchedulerInterface
@@ -56,36 +64,53 @@
credentials=credentials,
)
self.incremental = incremental
- self.date_last_modified = None
+ self.date_last_modified: Dict[str, Optional[datetime]] = {
+ "git": None,
+ "bzr": None,
+ }
def state_from_dict(self, d: Dict[str, Any]) -> LaunchpadListerState:
- date_last_modified = d.get("date_last_modified")
- if date_last_modified is not None:
- d["date_last_modified"] = iso8601.parse_date(date_last_modified)
+ for vcs_type in ["git", "bzr"]:
+ key = f"{vcs_type}_date_last_modified"
+ date_last_modified = d.get(key)
+ if date_last_modified is not None:
+ d[key] = iso8601.parse_date(date_last_modified)
+
return LaunchpadListerState(**d)
def state_to_dict(self, state: LaunchpadListerState) -> Dict[str, Any]:
- d: Dict[str, Optional[str]] = {"date_last_modified": None}
- date_last_modified = state.date_last_modified
- if date_last_modified is not None:
- d["date_last_modified"] = date_last_modified.isoformat()
+ d: Dict[str, Optional[str]] = {}
+ for vcs_type in ["git", "bzr"]:
+ attribute_name = f"{vcs_type}_date_last_modified"
+ d[attribute_name] = None
+
+ if hasattr(state, attribute_name):
+ date_last_modified = getattr(state, attribute_name)
+ if date_last_modified is not None:
+ d[attribute_name] = date_last_modified.isoformat()
return d
def get_pages(self) -> Iterator[LaunchpadPageType]:
"""
- Yields an iterator on all git repositories hosted on Launchpad sorted
+ Yields an iterator on all git/bzr repositories hosted on Launchpad sorted
by last modification date in ascending order.
"""
launchpad = Launchpad.login_anonymously(
"softwareheritage", "production", version="devel"
)
- date_last_modified = None
if self.incremental:
- date_last_modified = self.state.date_last_modified
- get_repos = launchpad.git_repositories.getRepositories
- yield get_repos(
- order_by="most neglected first", modified_since_date=date_last_modified
- )
+ self.date_last_modified = {
+ "git": self.state.git_date_last_modified,
+ "bzr": self.state.bzr_date_last_modified,
+ }
+ for vcs_type, get_vcs_fn in [
+ ("git", launchpad.git_repositories.getRepositories),
+ ("bzr", launchpad.branches.getBranches),
+ ]:
+ yield vcs_type, get_vcs_fn(
+ order_by="most neglected first",
+ modified_since_date=self.date_last_modified[vcs_type],
+ )
def get_origins_from_page(self, page: LaunchpadPageType) -> Iterator[ListedOrigin]:
"""
@@ -93,40 +118,65 @@
"""
assert self.lister_obj.id is not None
- prev_origin_url = None
+ prev_origin_url: Dict[str, Optional[str]] = {"git": None, "bzr": None}
+
+ vcs_type, repos = page
- for repo in page:
+ assert vcs_type in {"git", "bzr"}
- origin_url = repo.git_https_url
+ for repo in repos:
+ origin_url = origin(vcs_type, repo)
# filter out origins with invalid URL or origin previously listed
# (last modified repository will be listed twice by launchpadlib)
- if not origin_url.startswith("https://") or origin_url == prev_origin_url:
+ if (
+ not origin_url.startswith("https://")
+ or origin_url == prev_origin_url[vcs_type]
+ ):
continue
last_update = repo.date_last_modified
- self.date_last_modified = last_update
+ self.date_last_modified[vcs_type] = last_update
- logger.debug("Found origin %s last updated on %s", origin_url, last_update)
+ logger.debug(
+ "Found origin %s with type %s last updated on %s",
+ origin_url,
+ vcs_type,
+ last_update,
+ )
- prev_origin_url = origin_url
+ prev_origin_url[vcs_type] = origin_url
yield ListedOrigin(
lister_id=self.lister_obj.id,
- visit_type="git",
+ visit_type=vcs_type,
url=origin_url,
last_update=last_update,
)
def finalize(self) -> None:
- if self.date_last_modified is None:
+ git_date_last_modified = self.date_last_modified["git"]
+ bzr_date_last_modified = self.date_last_modified["bzr"]
+ if git_date_last_modified is None and bzr_date_last_modified is None:
return
if self.incremental and (
- self.state.date_last_modified is None
- or self.date_last_modified > self.state.date_last_modified
+ self.state.git_date_last_modified is None
+ or (
+ git_date_last_modified is not None
+ and git_date_last_modified > self.state.git_date_last_modified
+ )
+ ):
+ self.state.git_date_last_modified = git_date_last_modified
+
+ if self.incremental and (
+ self.state.bzr_date_last_modified is None
+ or (
+ bzr_date_last_modified is not None
+ and bzr_date_last_modified > self.state.bzr_date_last_modified
+ )
):
- self.state.date_last_modified = self.date_last_modified
+ self.state.bzr_date_last_modified = self.date_last_modified["bzr"]
self.updated = True
diff --git a/swh/lister/launchpad/tests/data/launchpad_bzr_response.json b/swh/lister/launchpad/tests/data/launchpad_bzr_response.json
new file mode 100644
--- /dev/null
+++ b/swh/lister/launchpad/tests/data/launchpad_bzr_response.json
@@ -0,0 +1,126 @@
+[
+ {
+ "self_link": "https://api.launchpad.net/1.0/fourbar",
+ "web_link": "https://launchpad.net/fourbar",
+ "resource_type_link": "https://api.launchpad.net/1.0/#project",
+ "official_answers": true,
+ "official_blueprints": true,
+ "official_codehosting": true,
+ "official_bugs": true,
+ "information_type": "Public",
+ "active": true,
+ "bug_reporting_guidelines": null,
+ "bug_reported_acknowledgement": null,
+ "official_bug_tags": [],
+ "recipes_collection_link": "https://api.launchpad.net/1.0/fourbar/recipes",
+ "active_milestones_collection_link": "https://api.launchpad.net/1.0/fourbar/active_milestones",
+ "all_milestones_collection_link": "https://api.launchpad.net/1.0/fourbar/all_milestones",
+ "bug_supervisor_link": null,
+ "qualifies_for_free_hosting": true,
+ "reviewer_whiteboard": "tag:launchpad.net:2008:redacted",
+ "is_permitted": "tag:launchpad.net:2008:redacted",
+ "project_reviewed": "tag:launchpad.net:2008:redacted",
+ "license_approved": "tag:launchpad.net:2008:redacted",
+ "private": false,
+ "display_name": "fourBar",
+ "icon_link": "https://api.launchpad.net/1.0/fourbar/icon",
+ "logo_link": "https://api.launchpad.net/1.0/fourbar/logo",
+ "name": "fourbar",
+ "owner_link": "https://api.launchpad.net/1.0/~sorivenul",
+ "project_group_link": null,
+ "title": "fourBar",
+ "registrant_link": "https://api.launchpad.net/1.0/~sorivenul",
+ "driver_link": null,
+ "summary": "fourBar is a minimal application launcher for POSIX systems. It launches four commonly used applications (terminal, file browser, editor, and web browser by default). It is written in Python/Tkinter. Documentation on simple customization is included. ",
+ "description": "If you wish to help with the development of fourBar, download a branch, test, report bugs and propose features. There is still work to be done.",
+ "date_created": "2008-11-03T07:03:00.872230+00:00",
+ "homepage_url": null,
+ "wiki_url": null,
+ "screenshots_url": null,
+ "download_url": "http://downloads.sourceforge.net/fourbar/fourbar-1.0.0.tar.gz?modtime=1224102066&big_mirror=0",
+ "programming_language": "Python",
+ "sourceforge_project": "fourBar",
+ "freshmeat_project": null,
+ "brand_link": "https://api.launchpad.net/1.0/fourbar/brand",
+ "private_bugs": false,
+ "licenses": [
+ "GNU GPL v3"
+ ],
+ "license_info": null,
+ "bug_tracker_link": null,
+ "date_next_suggest_packaging": null,
+ "series_collection_link": "https://api.launchpad.net/1.0/fourbar/series",
+ "development_focus_link": "https://api.launchpad.net/1.0/fourbar/trunk",
+ "releases_collection_link": "https://api.launchpad.net/1.0/fourbar/releases",
+ "translation_focus_link": null,
+ "commercial_subscription_link": null,
+ "commercial_subscription_is_due": false,
+ "remote_product": "242408&1119369",
+ "security_contact": null,
+ "vcs": "Bazaar",
+ "http_etag": "\"e3685b989bd2609f9a84bd2d90bef380c6f3c92b-13a47c4e8b4688c8fc042bf7eede3a2f4c14a9d6\"",
+ "date_last_modified":"2016-05-19T16:05:23.706734+00:00"
+ },
+ {
+ "self_link": "https://api.launchpad.net/1.0/gekkoware",
+ "web_link": "https://launchpad.net/gekkoware",
+ "resource_type_link": "https://api.launchpad.net/1.0/#project",
+ "official_answers": false,
+ "official_blueprints": false,
+ "official_codehosting": false,
+ "official_bugs": false,
+ "information_type": "Public",
+ "active": true,
+ "bug_reporting_guidelines": null,
+ "bug_reported_acknowledgement": null,
+ "official_bug_tags": [],
+ "recipes_collection_link": "https://api.launchpad.net/1.0/gekkoware/recipes",
+ "active_milestones_collection_link": "https://api.launchpad.net/1.0/gekkoware/active_milestones",
+ "all_milestones_collection_link": "https://api.launchpad.net/1.0/gekkoware/all_milestones",
+ "bug_supervisor_link": null,
+ "qualifies_for_free_hosting": true,
+ "reviewer_whiteboard": "tag:launchpad.net:2008:redacted",
+ "is_permitted": "tag:launchpad.net:2008:redacted",
+ "project_reviewed": "tag:launchpad.net:2008:redacted",
+ "license_approved": "tag:launchpad.net:2008:redacted",
+ "private": false,
+ "display_name": "gekkoware",
+ "icon_link": "https://api.launchpad.net/1.0/gekkoware/icon",
+ "logo_link": "https://api.launchpad.net/1.0/gekkoware/logo",
+ "name": "gekkoware",
+ "owner_link": "https://api.launchpad.net/1.0/~compermisos",
+ "project_group_link": null,
+ "title": "gekkoware",
+ "registrant_link": "https://api.launchpad.net/1.0/~compermisos",
+ "driver_link": null,
+ "summary": "A port of gekko to ubuntu",
+ "description": null,
+ "date_created": "2007-10-21T03:02:22.186775+00:00",
+ "homepage_url": "http://gekkoware.org",
+ "wiki_url": null,
+ "screenshots_url": null,
+ "download_url": null,
+ "programming_language": "php",
+ "sourceforge_project": "gekkoware",
+ "freshmeat_project": null,
+ "brand_link": "https://api.launchpad.net/1.0/gekkoware/brand",
+ "private_bugs": false,
+ "licenses": [
+ "GNU GPL v2"
+ ],
+ "license_info": null,
+ "bug_tracker_link": null,
+ "date_next_suggest_packaging": null,
+ "series_collection_link": "https://api.launchpad.net/1.0/gekkoware/series",
+ "development_focus_link": "https://api.launchpad.net/1.0/gekkoware/trunk",
+ "releases_collection_link": "https://api.launchpad.net/1.0/gekkoware/releases",
+ "translation_focus_link": null,
+ "commercial_subscription_link": null,
+ "commercial_subscription_is_due": false,
+ "remote_product": "117004&676653",
+ "security_contact": null,
+ "vcs": "Bazaar",
+ "http_etag": "\"b9802efcebb5afdd87c8ee10f8473040340bcead-159127be59c12e7cbb161eee4cae2ade72353c0d\"",
+ "date_last_modified":"2017-03-15T16:03:22.706432+00:00"
+ }
+]
diff --git a/swh/lister/launchpad/tests/test_lister.py b/swh/lister/launchpad/tests/test_lister.py
--- a/swh/lister/launchpad/tests/test_lister.py
+++ b/swh/lister/launchpad/tests/test_lister.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2020-2021 The Software Heritage developers
+# Copyright (C) 2020-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -10,7 +10,7 @@
import pytest
-from ..lister import LaunchpadLister
+from ..lister import LaunchpadLister, origin
class _Repo:
@@ -49,25 +49,35 @@
return _launchpad_response(datadir, "launchpad_response2.json")
-def _mock_getRepositories(mocker, launchpad_response):
+@pytest.fixture
+def launchpad_bzr_response(datadir):
+ return _launchpad_response(datadir, "launchpad_bzr_response.json")
+
+
+def _mock_launchpad(mocker, launchpad_response, launchpad_bzr_response=None):
mock_launchpad = mocker.patch("swh.lister.launchpad.lister.Launchpad")
mock_getRepositories = mock_launchpad.git_repositories.getRepositories
mock_getRepositories.return_value = launchpad_response
+ mock_getBranches = mock_launchpad.branches.getBranches
+ mock_getBranches.return_value = (
+ [] if launchpad_bzr_response is None else launchpad_bzr_response
+ )
mock_launchpad.login_anonymously.return_value = mock_launchpad
- return mock_getRepositories
+ return mock_getRepositories, mock_getBranches
-def _check_listed_origins(scheduler_origins, launchpad_response):
- for origin in launchpad_response:
+def _check_listed_origins(scheduler_origins, launchpad_response, vcs_type="git"):
+ for repo in launchpad_response:
filtered_origins = [
- o for o in scheduler_origins if o.url == origin.git_https_url
+ o for o in scheduler_origins if o.url == origin(vcs_type, repo)
]
assert len(filtered_origins) == 1
- assert filtered_origins[0].last_update == origin.date_last_modified
+ assert filtered_origins[0].last_update == repo.date_last_modified
+ assert filtered_origins[0].visit_type == vcs_type
def test_lister_from_configfile(swh_scheduler_config, mocker):
@@ -81,65 +91,95 @@
assert lister.credentials is not None
-def test_launchpad_full_lister(swh_scheduler, mocker, launchpad_response1):
- mock_getRepositories = _mock_getRepositories(mocker, launchpad_response1)
+def test_launchpad_full_lister(
+ swh_scheduler, mocker, launchpad_response1, launchpad_bzr_response
+):
+ mock_getRepositories, mock_getBranches = _mock_launchpad(
+ mocker, launchpad_response1, launchpad_bzr_response
+ )
lister = LaunchpadLister(scheduler=swh_scheduler)
stats = lister.run()
assert not lister.incremental
assert lister.updated
- assert stats.pages == 1
- assert stats.origins == len(launchpad_response1)
+ assert stats.pages == 1 + 1, "Expects 1 page for git origins, another for bzr ones"
+ assert stats.origins == len(launchpad_response1) + len(launchpad_bzr_response)
mock_getRepositories.assert_called_once_with(
order_by="most neglected first", modified_since_date=None
)
+ mock_getBranches.assert_called_once_with(
+ order_by="most neglected first", modified_since_date=None
+ )
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
- assert len(scheduler_origins) == len(launchpad_response1)
+ assert len(scheduler_origins) == len(launchpad_response1) + len(
+ launchpad_bzr_response
+ )
_check_listed_origins(scheduler_origins, launchpad_response1)
+ _check_listed_origins(scheduler_origins, launchpad_bzr_response, vcs_type="bzr")
def test_launchpad_incremental_lister(
- swh_scheduler, mocker, launchpad_response1, launchpad_response2
+ swh_scheduler,
+ mocker,
+ launchpad_response1,
+ launchpad_response2,
+ launchpad_bzr_response,
):
- mock_getRepositories = _mock_getRepositories(mocker, launchpad_response1)
+ mock_getRepositories, mock_getBranches = _mock_launchpad(
+ mocker, launchpad_response1, launchpad_bzr_response
+ )
lister = LaunchpadLister(scheduler=swh_scheduler, incremental=True)
stats = lister.run()
assert lister.incremental
assert lister.updated
- assert stats.pages == 1
- assert stats.origins == len(launchpad_response1)
+ assert stats.pages == 1 + 1, "Expects 1 page for git origins, another for bzr ones"
+ len_first_runs = len(launchpad_response1) + len(launchpad_bzr_response)
+ assert stats.origins == len_first_runs
mock_getRepositories.assert_called_once_with(
order_by="most neglected first", modified_since_date=None
)
+ mock_getBranches.assert_called_once_with(
+ order_by="most neglected first", modified_since_date=None
+ )
lister_state = lister.get_state_from_scheduler()
- assert lister_state.date_last_modified == launchpad_response1[-1].date_last_modified
+ assert (
+ lister_state.git_date_last_modified
+ == launchpad_response1[-1].date_last_modified
+ )
+ assert (
+ lister_state.bzr_date_last_modified
+ == launchpad_bzr_response[-1].date_last_modified
+ )
- mock_getRepositories = _mock_getRepositories(mocker, launchpad_response2)
+ mock_getRepositories, mock_getBranches = _mock_launchpad(
+ mocker, launchpad_response2
+ )
lister = LaunchpadLister(scheduler=swh_scheduler, incremental=True)
stats = lister.run()
assert lister.incremental
assert lister.updated
- assert stats.pages == 1
+ assert stats.pages == 2, "Empty bzr response still accounts for 1 page"
assert stats.origins == len(launchpad_response2)
mock_getRepositories.assert_called_once_with(
order_by="most neglected first",
- modified_since_date=lister_state.date_last_modified,
+ modified_since_date=lister_state.git_date_last_modified,
)
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
- assert len(scheduler_origins) == len(launchpad_response1) + len(launchpad_response2)
+ assert len(scheduler_origins) == len_first_runs + len(launchpad_response2)
_check_listed_origins(scheduler_origins, launchpad_response1)
+ _check_listed_origins(scheduler_origins, launchpad_bzr_response, vcs_type="bzr")
_check_listed_origins(scheduler_origins, launchpad_response2)
@@ -147,12 +187,12 @@
swh_scheduler, mocker,
):
invalid_origin = [_Repo({"git_https_url": "tag:launchpad.net:2008:redacted",})]
- _mock_getRepositories(mocker, invalid_origin)
+ _mock_launchpad(mocker, invalid_origin)
lister = LaunchpadLister(scheduler=swh_scheduler)
stats = lister.run()
assert not lister.updated
- assert stats.pages == 1
+ assert stats.pages == 1 + 1, "Empty pages are still accounted for (1 git, 1 bzr)"
assert stats.origins == 0
@@ -166,10 +206,10 @@
}
)
origins = [origin, origin]
- _mock_getRepositories(mocker, origins)
+ _mock_launchpad(mocker, origins)
lister = LaunchpadLister(scheduler=swh_scheduler)
stats = lister.run()
assert lister.updated
- assert stats.pages == 1
+ assert stats.pages == 1 + 1, "Empty bzr page is still accounted for (1 git, 1 bzr)"
assert stats.origins == 1

File Metadata

Mime Type
text/plain
Expires
Dec 20 2024, 3:48 PM (11 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3227496

Event Timeline