diff --git a/swh/lister/gitlab/lister.py b/swh/lister/gitlab/lister.py --- a/swh/lister/gitlab/lister.py +++ b/swh/lister/gitlab/lister.py @@ -23,6 +23,9 @@ logger = logging.getLogger(__name__) +IGNORED_DVCS = ("hg_git",) + + @dataclass class GitLabListerState: """State of the GitLabLister""" @@ -203,10 +206,17 @@ repositories = page_result.repositories if page_result.repositories else [] for repo in repositories: + visit_type = repo.get("vcs_type", "git") + url = repo["http_url_to_repo"] + if visit_type in IGNORED_DVCS: + logger.warning( + "Ignoring origin %s with type '%s'", url, visit_type, + ) + continue yield ListedOrigin( lister_id=self.lister_obj.id, - url=repo["http_url_to_repo"], - visit_type="git", + url=url, + visit_type=visit_type, last_update=iso8601.parse_date(repo["last_activity_at"]), ) diff --git a/swh/lister/gitlab/tests/data/https_foss.heptapod.net/api_response_page1.json b/swh/lister/gitlab/tests/data/https_foss.heptapod.net/api_response_page1.json new file mode 100644 --- /dev/null +++ b/swh/lister/gitlab/tests/data/https_foss.heptapod.net/api_response_page1.json @@ -0,0 +1,320 @@ +[ + { + "id": 1, + "description": "Slides for a Heptapod presentation at Mercurial Conference - Paris 2019", + "vcs_type": "hg_git", + "name": "2019-hg-paris", + "name_with_namespace": "heptapod / slides / 2019-hg-paris", + "path": "2019-hg-paris", + "path_with_namespace": "heptapod/slides/2019-hg-paris", + "created_at": "2019-05-28T00:53:04.064Z", + "default_branch": "branch/default", + "tag_list": [], + "topics": [], + "ssh_url_to_repo": "ssh://hg@foss.heptapod.net/heptapod/slides/2019-hg-paris", + "http_url_to_repo": "https://foss.heptapod.net/heptapod/slides/2019-hg-paris", + "web_url": "https://foss.heptapod.net/heptapod/slides/2019-hg-paris", + "readme_url": null, + "avatar_url": null, + "forks_count": 0, + "star_count": 0, + "last_activity_at": "2019-06-11T16:39:49.827Z", + "namespace": { + "id": 4, + "name": "slides", + "path": "slides", + "kind": "group", + "full_path": "heptapod/slides", + "parent_id": 3, + "avatar_url": null, + "web_url": "https://foss.heptapod.net/groups/heptapod/slides" + } + }, + { + "id": 3, + "description": "Obsolete fork of omnibus-gitlab, predating the creation of the separate heptapod-docker project done for Heptapod *0.6.1*", + "vcs_type": "hg_git", + "name": "omnibus", + "name_with_namespace": "heptapod / omnibus", + "path": "omnibus", + "path_with_namespace": "heptapod/omnibus", + "created_at": "2019-06-01T17:15:28.005Z", + "default_branch": "branch/heptapod", + "tag_list": [], + "topics": [], + "ssh_url_to_repo": "ssh://hg@foss.heptapod.net/heptapod/omnibus", + "http_url_to_repo": "https://foss.heptapod.net/heptapod/omnibus", + "web_url": "https://foss.heptapod.net/heptapod/omnibus", + "readme_url": "https://foss.heptapod.net/heptapod/omnibus/-/blob/branch/heptapod/README.md", + "avatar_url": null, + "forks_count": 0, + "star_count": 0, + "last_activity_at": "2020-04-24T13:57:28.102Z", + "namespace": { + "id": 3, + "name": "heptapod", + "path": "heptapod", + "kind": "group", + "full_path": "heptapod", + "parent_id": null, + "avatar_url": "/uploads/-/system/group/avatar/3/heptapod.png", + "web_url": "https://foss.heptapod.net/groups/heptapod" + } + }, + { + "id": 5, + "description": "GitLab CE Rails application, converted to a Mercurial repository and modified for Mercurial support in a Mercurial branch called \"Heptapod\".", + "vcs_type": "hg_git", + "name": "heptapod", + "name_with_namespace": "heptapod / heptapod", + "path": "heptapod", + "path_with_namespace": "heptapod/heptapod", + "created_at": "2019-06-02T10:49:49.250Z", + "default_branch": "branch/heptapod", + "tag_list": [], + "topics": [], + "ssh_url_to_repo": "ssh://hg@foss.heptapod.net/heptapod/heptapod", + "http_url_to_repo": "https://foss.heptapod.net/heptapod/heptapod", + "web_url": "https://foss.heptapod.net/heptapod/heptapod", + "readme_url": "https://foss.heptapod.net/heptapod/heptapod/-/blob/branch/heptapod/README.md", + "avatar_url": "https://foss.heptapod.net/uploads/-/system/project/avatar/5/heptapod.png", + "forks_count": 0, + "star_count": 17, + "last_activity_at": "2021-09-13T16:11:42.053Z", + "namespace": { + "id": 3, + "name": "heptapod", + "path": "heptapod", + "kind": "group", + "full_path": "heptapod", + "parent_id": null, + "avatar_url": "/uploads/-/system/group/avatar/3/heptapod.png", + "web_url": "https://foss.heptapod.net/groups/heptapod" + } + }, + { + "id": 7, + "description": "Mercurial-Git bridge, modified for the needs of Heptapod in a branch called \"heptapod\"", + "vcs_type": "hg_git", + "name": "hg-git", + "name_with_namespace": "heptapod / hg-git", + "path": "hg-git", + "path_with_namespace": "heptapod/hg-git", + "created_at": "2019-06-02T14:40:36.730Z", + "default_branch": "branch/heptapod-0-8", + "tag_list": [], + "topics": [], + "ssh_url_to_repo": "ssh://hg@foss.heptapod.net/heptapod/hg-git", + "http_url_to_repo": "https://foss.heptapod.net/heptapod/hg-git", + "web_url": "https://foss.heptapod.net/heptapod/hg-git", + "readme_url": "https://foss.heptapod.net/heptapod/hg-git/-/blob/branch/heptapod-0-8/README.md", + "avatar_url": null, + "forks_count": 0, + "star_count": 1, + "last_activity_at": "2020-05-06T13:29:51.900Z", + "namespace": { + "id": 3, + "name": "heptapod", + "path": "heptapod", + "kind": "group", + "full_path": "heptapod", + "parent_id": null, + "avatar_url": "/uploads/-/system/group/avatar/3/heptapod.png", + "web_url": "https://foss.heptapod.net/groups/heptapod" + } + }, + { + "id": 9, + "description": "A Mercurial extension to provide logs via the `logging` module of the Python standard library.", + "vcs_type": "hg", + "name": "hgext-loggingmod", + "name_with_namespace": "heptapod / hgext-loggingmod", + "path": "hgext-loggingmod", + "path_with_namespace": "heptapod/hgext-loggingmod", + "created_at": "2019-07-05T17:48:54.928Z", + "default_branch": "branch/default", + "tag_list": [], + "topics": [], + "ssh_url_to_repo": "ssh://hg@foss.heptapod.net/heptapod/hgext-loggingmod", + "http_url_to_repo": "https://foss.heptapod.net/heptapod/hgext-loggingmod", + "web_url": "https://foss.heptapod.net/heptapod/hgext-loggingmod", + "readme_url": "https://foss.heptapod.net/heptapod/hgext-loggingmod/-/blob/branch/default/README.md", + "avatar_url": null, + "forks_count": 0, + "star_count": 0, + "last_activity_at": "2020-08-03T11:37:39.413Z", + "namespace": { + "id": 3, + "name": "heptapod", + "path": "heptapod", + "kind": "group", + "full_path": "heptapod", + "parent_id": null, + "avatar_url": "/uploads/-/system/group/avatar/3/heptapod.png", + "web_url": "https://foss.heptapod.net/groups/heptapod" + } + }, + { + "id": 11, + "description": "Source for the statically generated website at https://heptapod.net", + "vcs_type": "hg_git", + "name": "website", + "name_with_namespace": "heptapod / website", + "path": "website", + "path_with_namespace": "heptapod/website", + "created_at": "2019-07-17T15:14:17.576Z", + "default_branch": "branch/default", + "tag_list": [], + "topics": [], + "ssh_url_to_repo": "ssh://hg@foss.heptapod.net/heptapod/website", + "http_url_to_repo": "https://foss.heptapod.net/heptapod/website", + "web_url": "https://foss.heptapod.net/heptapod/website", + "readme_url": "https://foss.heptapod.net/heptapod/website/-/blob/branch/default/README.rst", + "avatar_url": "https://foss.heptapod.net/uploads/-/system/project/avatar/11/logo-heptapod-www.png", + "forks_count": 0, + "star_count": 0, + "last_activity_at": "2021-08-25T08:21:51.036Z", + "namespace": { + "id": 3, + "name": "heptapod", + "path": "heptapod", + "kind": "group", + "full_path": "heptapod", + "parent_id": null, + "avatar_url": "/uploads/-/system/group/avatar/3/heptapod.png", + "web_url": "https://foss.heptapod.net/groups/heptapod" + } + }, + { + "id": 12, + "description": "A suite of functional / API tests written with Selenium and the Python requests library (for the API part)", + "vcs_type": "hg_git", + "name": "heptapod-tests", + "name_with_namespace": "heptapod / heptapod-tests", + "path": "heptapod-tests", + "path_with_namespace": "heptapod/heptapod-tests", + "created_at": "2019-07-19T14:51:15.657Z", + "default_branch": "branch/default", + "tag_list": [], + "topics": [], + "ssh_url_to_repo": "ssh://hg@foss.heptapod.net/heptapod/heptapod-tests", + "http_url_to_repo": "https://foss.heptapod.net/heptapod/heptapod-tests", + "web_url": "https://foss.heptapod.net/heptapod/heptapod-tests", + "readme_url": "https://foss.heptapod.net/heptapod/heptapod-tests/-/blob/branch/default/README.md", + "avatar_url": "https://foss.heptapod.net/uploads/-/system/project/avatar/12/selenium-logo.png", + "forks_count": 0, + "star_count": 0, + "last_activity_at": "2021-09-08T13:10:15.911Z", + "namespace": { + "id": 3, + "name": "heptapod", + "path": "heptapod", + "kind": "group", + "full_path": "heptapod", + "parent_id": null, + "avatar_url": "/uploads/-/system/group/avatar/3/heptapod.png", + "web_url": "https://foss.heptapod.net/groups/heptapod" + } + }, + { + "id": 14, + "description": "Generic Docker images to serve Mercurial content over HTTP, and notably to act as a mirror.\r\nThese are published on [Docker Hub](https://hub.docker.com/u/octobus)", + "vcs_type": "hg_git", + "name": "mercurial-mirror", + "name_with_namespace": "heptapod / mercurial-mirror", + "path": "mercurial-mirror", + "path_with_namespace": "heptapod/mercurial-mirror", + "created_at": "2019-08-21T13:10:30.330Z", + "default_branch": "branch/default", + "tag_list": [], + "topics": [], + "ssh_url_to_repo": "ssh://hg@foss.heptapod.net/heptapod/mercurial-mirror", + "http_url_to_repo": "https://foss.heptapod.net/heptapod/mercurial-mirror", + "web_url": "https://foss.heptapod.net/heptapod/mercurial-mirror", + "readme_url": null, + "avatar_url": null, + "forks_count": 0, + "star_count": 0, + "last_activity_at": "2019-08-21T13:10:30.330Z", + "namespace": { + "id": 3, + "name": "heptapod", + "path": "heptapod", + "kind": "group", + "full_path": "heptapod", + "parent_id": null, + "avatar_url": "/uploads/-/system/group/avatar/3/heptapod.png", + "web_url": "https://foss.heptapod.net/groups/heptapod" + } + }, + { + "id": 15, + "description": "This is the development repository for the evolve extension.\r\n\r\nOfficial repository at: https://mercurial-scm.org/repo/evolve/\r\n\r\nOfficial bug tracker: https://bz.mercurial-scm.org/ (component, \"evolution\")\r\n", + "vcs_type": "hg_git", + "name": "evolve", + "name_with_namespace": "mercurial / evolve", + "path": "evolve", + "path_with_namespace": "mercurial/evolve", + "created_at": "2019-08-31T07:34:31.812Z", + "default_branch": "branch/default", + "tag_list": [ + "extension", + "history-rewriting", + "mercurial" + ], + "topics": [ + "extension", + "history-rewriting", + "mercurial" + ], + "ssh_url_to_repo": "ssh://hg@foss.heptapod.net/mercurial/evolve", + "http_url_to_repo": "https://foss.heptapod.net/mercurial/evolve", + "web_url": "https://foss.heptapod.net/mercurial/evolve", + "readme_url": "https://foss.heptapod.net/mercurial/evolve/-/blob/branch/default/README.rst", + "avatar_url": "https://foss.heptapod.net/uploads/-/system/project/avatar/15/ammonoid.png", + "forks_count": 0, + "star_count": 11, + "last_activity_at": "2021-09-12T18:01:37.794Z", + "namespace": { + "id": 18, + "name": "mercurial", + "path": "mercurial", + "kind": "group", + "full_path": "mercurial", + "parent_id": null, + "avatar_url": "/uploads/-/system/group/avatar/18/droplets-100.png", + "web_url": "https://foss.heptapod.net/groups/mercurial" + } + }, + { + "id": 21, + "description": "This project is deprecated in favour of Omnibus Heptapod Docker build capability, and will be archived once Heptapod 0.17 becomes the new stable series.", + "vcs_type": "hg", + "name": "heptapod-docker", + "name_with_namespace": "heptapod / heptapod-docker", + "path": "heptapod-docker", + "path_with_namespace": "heptapod/heptapod-docker", + "created_at": "2019-09-17T17:06:28.678Z", + "default_branch": "branch/default", + "tag_list": [], + "topics": [], + "ssh_url_to_repo": "ssh://hg@foss.heptapod.net/heptapod/heptapod-docker", + "http_url_to_repo": "https://foss.heptapod.net/heptapod/heptapod-docker", + "web_url": "https://foss.heptapod.net/heptapod/heptapod-docker", + "readme_url": "https://foss.heptapod.net/heptapod/heptapod-docker/-/blob/branch/default/README.md", + "avatar_url": null, + "forks_count": 0, + "star_count": 0, + "last_activity_at": "2021-06-21T15:14:34.070Z", + "namespace": { + "id": 3, + "name": "heptapod", + "path": "heptapod", + "kind": "group", + "full_path": "heptapod", + "parent_id": null, + "avatar_url": "/uploads/-/system/group/avatar/3/heptapod.png", + "web_url": "https://foss.heptapod.net/groups/heptapod" + } + } +] diff --git a/swh/lister/gitlab/tests/test_lister.py b/swh/lister/gitlab/tests/test_lister.py --- a/swh/lister/gitlab/tests/test_lister.py +++ b/swh/lister/gitlab/tests/test_lister.py @@ -12,7 +12,7 @@ from requests.status_codes import codes from swh.lister import USER_AGENT -from swh.lister.gitlab.lister import GitLabLister, _parse_id_after +from swh.lister.gitlab.lister import IGNORED_DVCS, GitLabLister, _parse_id_after from swh.lister.pattern import ListerStats from swh.lister.tests.test_utils import assert_sleep_calls from swh.lister.utils import WAIT_EXP_BASE @@ -56,6 +56,38 @@ assert listed_origin.last_update is not None +def test_lister_gitlab_heptapod(datadir, swh_scheduler, requests_mock): + """Gitlab lister ignores some vcs_type + + """ + instance = "foss.heptapod.net" + lister = GitLabLister(swh_scheduler, url=api_url(instance), instance=instance) + response = gitlab_page_response(datadir, instance, 1) + + requests_mock.get( + lister.page_url(), [{"json": response}], additional_matcher=_match_request, + ) + + listed_result = lister.run() + expected_nb_origins = 0 + for entry in response: + if entry["vcs_type"] in IGNORED_DVCS: + continue + expected_nb_origins += 1 + + assert listed_result == ListerStats(pages=1, origins=expected_nb_origins) + + scheduler_origins = lister.scheduler.get_listed_origins( + lister.lister_obj.id + ).results + assert len(scheduler_origins) == expected_nb_origins + + for listed_origin in scheduler_origins: + assert listed_origin.visit_type == "hg" + assert listed_origin.url.startswith(f"https://{instance}") + assert listed_origin.last_update is not None + + def gitlab_page_response(datadir, instance: str, id_after: int) -> List[Dict]: """Return list of repositories (out of test dataset)""" datapath = Path(datadir, f"https_{instance}", f"api_response_page{id_after}.json")