Page MenuHomeSoftware Heritage

D6285.diff
No OneTemporary

D6285.diff

diff --git a/swh/lister/gitlab/lister.py b/swh/lister/gitlab/lister.py
--- a/swh/lister/gitlab/lister.py
+++ b/swh/lister/gitlab/lister.py
@@ -23,6 +23,9 @@
logger = logging.getLogger(__name__)
+IGNORED_DVCS = ("hg_git",)
+
+
@dataclass
class GitLabListerState:
"""State of the GitLabLister"""
@@ -203,10 +206,17 @@
repositories = page_result.repositories if page_result.repositories else []
for repo in repositories:
+ visit_type = repo.get("vcs_type", "git")
+ url = repo["http_url_to_repo"]
+ if visit_type in IGNORED_DVCS:
+ logger.warning(
+ "Ignoring origin %s with type '%s'", url, visit_type,
+ )
+ continue
yield ListedOrigin(
lister_id=self.lister_obj.id,
- url=repo["http_url_to_repo"],
- visit_type="git",
+ url=url,
+ visit_type=visit_type,
last_update=iso8601.parse_date(repo["last_activity_at"]),
)
diff --git a/swh/lister/gitlab/tests/data/https_foss.heptapod.net/api_response_page1.json b/swh/lister/gitlab/tests/data/https_foss.heptapod.net/api_response_page1.json
new file mode 100644
--- /dev/null
+++ b/swh/lister/gitlab/tests/data/https_foss.heptapod.net/api_response_page1.json
@@ -0,0 +1,320 @@
+[
+ {
+ "id": 1,
+ "description": "Slides for a Heptapod presentation at Mercurial Conference - Paris 2019",
+ "vcs_type": "hg_git",
+ "name": "2019-hg-paris",
+ "name_with_namespace": "heptapod / slides / 2019-hg-paris",
+ "path": "2019-hg-paris",
+ "path_with_namespace": "heptapod/slides/2019-hg-paris",
+ "created_at": "2019-05-28T00:53:04.064Z",
+ "default_branch": "branch/default",
+ "tag_list": [],
+ "topics": [],
+ "ssh_url_to_repo": "ssh://hg@foss.heptapod.net/heptapod/slides/2019-hg-paris",
+ "http_url_to_repo": "https://foss.heptapod.net/heptapod/slides/2019-hg-paris",
+ "web_url": "https://foss.heptapod.net/heptapod/slides/2019-hg-paris",
+ "readme_url": null,
+ "avatar_url": null,
+ "forks_count": 0,
+ "star_count": 0,
+ "last_activity_at": "2019-06-11T16:39:49.827Z",
+ "namespace": {
+ "id": 4,
+ "name": "slides",
+ "path": "slides",
+ "kind": "group",
+ "full_path": "heptapod/slides",
+ "parent_id": 3,
+ "avatar_url": null,
+ "web_url": "https://foss.heptapod.net/groups/heptapod/slides"
+ }
+ },
+ {
+ "id": 3,
+ "description": "Obsolete fork of omnibus-gitlab, predating the creation of the separate heptapod-docker project done for Heptapod *0.6.1*",
+ "vcs_type": "hg_git",
+ "name": "omnibus",
+ "name_with_namespace": "heptapod / omnibus",
+ "path": "omnibus",
+ "path_with_namespace": "heptapod/omnibus",
+ "created_at": "2019-06-01T17:15:28.005Z",
+ "default_branch": "branch/heptapod",
+ "tag_list": [],
+ "topics": [],
+ "ssh_url_to_repo": "ssh://hg@foss.heptapod.net/heptapod/omnibus",
+ "http_url_to_repo": "https://foss.heptapod.net/heptapod/omnibus",
+ "web_url": "https://foss.heptapod.net/heptapod/omnibus",
+ "readme_url": "https://foss.heptapod.net/heptapod/omnibus/-/blob/branch/heptapod/README.md",
+ "avatar_url": null,
+ "forks_count": 0,
+ "star_count": 0,
+ "last_activity_at": "2020-04-24T13:57:28.102Z",
+ "namespace": {
+ "id": 3,
+ "name": "heptapod",
+ "path": "heptapod",
+ "kind": "group",
+ "full_path": "heptapod",
+ "parent_id": null,
+ "avatar_url": "/uploads/-/system/group/avatar/3/heptapod.png",
+ "web_url": "https://foss.heptapod.net/groups/heptapod"
+ }
+ },
+ {
+ "id": 5,
+ "description": "GitLab CE Rails application, converted to a Mercurial repository and modified for Mercurial support in a Mercurial branch called \"Heptapod\".",
+ "vcs_type": "hg_git",
+ "name": "heptapod",
+ "name_with_namespace": "heptapod / heptapod",
+ "path": "heptapod",
+ "path_with_namespace": "heptapod/heptapod",
+ "created_at": "2019-06-02T10:49:49.250Z",
+ "default_branch": "branch/heptapod",
+ "tag_list": [],
+ "topics": [],
+ "ssh_url_to_repo": "ssh://hg@foss.heptapod.net/heptapod/heptapod",
+ "http_url_to_repo": "https://foss.heptapod.net/heptapod/heptapod",
+ "web_url": "https://foss.heptapod.net/heptapod/heptapod",
+ "readme_url": "https://foss.heptapod.net/heptapod/heptapod/-/blob/branch/heptapod/README.md",
+ "avatar_url": "https://foss.heptapod.net/uploads/-/system/project/avatar/5/heptapod.png",
+ "forks_count": 0,
+ "star_count": 17,
+ "last_activity_at": "2021-09-13T16:11:42.053Z",
+ "namespace": {
+ "id": 3,
+ "name": "heptapod",
+ "path": "heptapod",
+ "kind": "group",
+ "full_path": "heptapod",
+ "parent_id": null,
+ "avatar_url": "/uploads/-/system/group/avatar/3/heptapod.png",
+ "web_url": "https://foss.heptapod.net/groups/heptapod"
+ }
+ },
+ {
+ "id": 7,
+ "description": "Mercurial-Git bridge, modified for the needs of Heptapod in a branch called \"heptapod\"",
+ "vcs_type": "hg_git",
+ "name": "hg-git",
+ "name_with_namespace": "heptapod / hg-git",
+ "path": "hg-git",
+ "path_with_namespace": "heptapod/hg-git",
+ "created_at": "2019-06-02T14:40:36.730Z",
+ "default_branch": "branch/heptapod-0-8",
+ "tag_list": [],
+ "topics": [],
+ "ssh_url_to_repo": "ssh://hg@foss.heptapod.net/heptapod/hg-git",
+ "http_url_to_repo": "https://foss.heptapod.net/heptapod/hg-git",
+ "web_url": "https://foss.heptapod.net/heptapod/hg-git",
+ "readme_url": "https://foss.heptapod.net/heptapod/hg-git/-/blob/branch/heptapod-0-8/README.md",
+ "avatar_url": null,
+ "forks_count": 0,
+ "star_count": 1,
+ "last_activity_at": "2020-05-06T13:29:51.900Z",
+ "namespace": {
+ "id": 3,
+ "name": "heptapod",
+ "path": "heptapod",
+ "kind": "group",
+ "full_path": "heptapod",
+ "parent_id": null,
+ "avatar_url": "/uploads/-/system/group/avatar/3/heptapod.png",
+ "web_url": "https://foss.heptapod.net/groups/heptapod"
+ }
+ },
+ {
+ "id": 9,
+ "description": "A Mercurial extension to provide logs via the `logging` module of the Python standard library.",
+ "vcs_type": "hg",
+ "name": "hgext-loggingmod",
+ "name_with_namespace": "heptapod / hgext-loggingmod",
+ "path": "hgext-loggingmod",
+ "path_with_namespace": "heptapod/hgext-loggingmod",
+ "created_at": "2019-07-05T17:48:54.928Z",
+ "default_branch": "branch/default",
+ "tag_list": [],
+ "topics": [],
+ "ssh_url_to_repo": "ssh://hg@foss.heptapod.net/heptapod/hgext-loggingmod",
+ "http_url_to_repo": "https://foss.heptapod.net/heptapod/hgext-loggingmod",
+ "web_url": "https://foss.heptapod.net/heptapod/hgext-loggingmod",
+ "readme_url": "https://foss.heptapod.net/heptapod/hgext-loggingmod/-/blob/branch/default/README.md",
+ "avatar_url": null,
+ "forks_count": 0,
+ "star_count": 0,
+ "last_activity_at": "2020-08-03T11:37:39.413Z",
+ "namespace": {
+ "id": 3,
+ "name": "heptapod",
+ "path": "heptapod",
+ "kind": "group",
+ "full_path": "heptapod",
+ "parent_id": null,
+ "avatar_url": "/uploads/-/system/group/avatar/3/heptapod.png",
+ "web_url": "https://foss.heptapod.net/groups/heptapod"
+ }
+ },
+ {
+ "id": 11,
+ "description": "Source for the statically generated website at https://heptapod.net",
+ "vcs_type": "hg_git",
+ "name": "website",
+ "name_with_namespace": "heptapod / website",
+ "path": "website",
+ "path_with_namespace": "heptapod/website",
+ "created_at": "2019-07-17T15:14:17.576Z",
+ "default_branch": "branch/default",
+ "tag_list": [],
+ "topics": [],
+ "ssh_url_to_repo": "ssh://hg@foss.heptapod.net/heptapod/website",
+ "http_url_to_repo": "https://foss.heptapod.net/heptapod/website",
+ "web_url": "https://foss.heptapod.net/heptapod/website",
+ "readme_url": "https://foss.heptapod.net/heptapod/website/-/blob/branch/default/README.rst",
+ "avatar_url": "https://foss.heptapod.net/uploads/-/system/project/avatar/11/logo-heptapod-www.png",
+ "forks_count": 0,
+ "star_count": 0,
+ "last_activity_at": "2021-08-25T08:21:51.036Z",
+ "namespace": {
+ "id": 3,
+ "name": "heptapod",
+ "path": "heptapod",
+ "kind": "group",
+ "full_path": "heptapod",
+ "parent_id": null,
+ "avatar_url": "/uploads/-/system/group/avatar/3/heptapod.png",
+ "web_url": "https://foss.heptapod.net/groups/heptapod"
+ }
+ },
+ {
+ "id": 12,
+ "description": "A suite of functional / API tests written with Selenium and the Python requests library (for the API part)",
+ "vcs_type": "hg_git",
+ "name": "heptapod-tests",
+ "name_with_namespace": "heptapod / heptapod-tests",
+ "path": "heptapod-tests",
+ "path_with_namespace": "heptapod/heptapod-tests",
+ "created_at": "2019-07-19T14:51:15.657Z",
+ "default_branch": "branch/default",
+ "tag_list": [],
+ "topics": [],
+ "ssh_url_to_repo": "ssh://hg@foss.heptapod.net/heptapod/heptapod-tests",
+ "http_url_to_repo": "https://foss.heptapod.net/heptapod/heptapod-tests",
+ "web_url": "https://foss.heptapod.net/heptapod/heptapod-tests",
+ "readme_url": "https://foss.heptapod.net/heptapod/heptapod-tests/-/blob/branch/default/README.md",
+ "avatar_url": "https://foss.heptapod.net/uploads/-/system/project/avatar/12/selenium-logo.png",
+ "forks_count": 0,
+ "star_count": 0,
+ "last_activity_at": "2021-09-08T13:10:15.911Z",
+ "namespace": {
+ "id": 3,
+ "name": "heptapod",
+ "path": "heptapod",
+ "kind": "group",
+ "full_path": "heptapod",
+ "parent_id": null,
+ "avatar_url": "/uploads/-/system/group/avatar/3/heptapod.png",
+ "web_url": "https://foss.heptapod.net/groups/heptapod"
+ }
+ },
+ {
+ "id": 14,
+ "description": "Generic Docker images to serve Mercurial content over HTTP, and notably to act as a mirror.\r\nThese are published on [Docker Hub](https://hub.docker.com/u/octobus)",
+ "vcs_type": "hg_git",
+ "name": "mercurial-mirror",
+ "name_with_namespace": "heptapod / mercurial-mirror",
+ "path": "mercurial-mirror",
+ "path_with_namespace": "heptapod/mercurial-mirror",
+ "created_at": "2019-08-21T13:10:30.330Z",
+ "default_branch": "branch/default",
+ "tag_list": [],
+ "topics": [],
+ "ssh_url_to_repo": "ssh://hg@foss.heptapod.net/heptapod/mercurial-mirror",
+ "http_url_to_repo": "https://foss.heptapod.net/heptapod/mercurial-mirror",
+ "web_url": "https://foss.heptapod.net/heptapod/mercurial-mirror",
+ "readme_url": null,
+ "avatar_url": null,
+ "forks_count": 0,
+ "star_count": 0,
+ "last_activity_at": "2019-08-21T13:10:30.330Z",
+ "namespace": {
+ "id": 3,
+ "name": "heptapod",
+ "path": "heptapod",
+ "kind": "group",
+ "full_path": "heptapod",
+ "parent_id": null,
+ "avatar_url": "/uploads/-/system/group/avatar/3/heptapod.png",
+ "web_url": "https://foss.heptapod.net/groups/heptapod"
+ }
+ },
+ {
+ "id": 15,
+ "description": "This is the development repository for the evolve extension.\r\n\r\nOfficial repository at: https://mercurial-scm.org/repo/evolve/\r\n\r\nOfficial bug tracker: https://bz.mercurial-scm.org/ (component, \"evolution\")\r\n",
+ "vcs_type": "hg_git",
+ "name": "evolve",
+ "name_with_namespace": "mercurial / evolve",
+ "path": "evolve",
+ "path_with_namespace": "mercurial/evolve",
+ "created_at": "2019-08-31T07:34:31.812Z",
+ "default_branch": "branch/default",
+ "tag_list": [
+ "extension",
+ "history-rewriting",
+ "mercurial"
+ ],
+ "topics": [
+ "extension",
+ "history-rewriting",
+ "mercurial"
+ ],
+ "ssh_url_to_repo": "ssh://hg@foss.heptapod.net/mercurial/evolve",
+ "http_url_to_repo": "https://foss.heptapod.net/mercurial/evolve",
+ "web_url": "https://foss.heptapod.net/mercurial/evolve",
+ "readme_url": "https://foss.heptapod.net/mercurial/evolve/-/blob/branch/default/README.rst",
+ "avatar_url": "https://foss.heptapod.net/uploads/-/system/project/avatar/15/ammonoid.png",
+ "forks_count": 0,
+ "star_count": 11,
+ "last_activity_at": "2021-09-12T18:01:37.794Z",
+ "namespace": {
+ "id": 18,
+ "name": "mercurial",
+ "path": "mercurial",
+ "kind": "group",
+ "full_path": "mercurial",
+ "parent_id": null,
+ "avatar_url": "/uploads/-/system/group/avatar/18/droplets-100.png",
+ "web_url": "https://foss.heptapod.net/groups/mercurial"
+ }
+ },
+ {
+ "id": 21,
+ "description": "This project is deprecated in favour of Omnibus Heptapod Docker build capability, and will be archived once Heptapod 0.17 becomes the new stable series.",
+ "vcs_type": "hg",
+ "name": "heptapod-docker",
+ "name_with_namespace": "heptapod / heptapod-docker",
+ "path": "heptapod-docker",
+ "path_with_namespace": "heptapod/heptapod-docker",
+ "created_at": "2019-09-17T17:06:28.678Z",
+ "default_branch": "branch/default",
+ "tag_list": [],
+ "topics": [],
+ "ssh_url_to_repo": "ssh://hg@foss.heptapod.net/heptapod/heptapod-docker",
+ "http_url_to_repo": "https://foss.heptapod.net/heptapod/heptapod-docker",
+ "web_url": "https://foss.heptapod.net/heptapod/heptapod-docker",
+ "readme_url": "https://foss.heptapod.net/heptapod/heptapod-docker/-/blob/branch/default/README.md",
+ "avatar_url": null,
+ "forks_count": 0,
+ "star_count": 0,
+ "last_activity_at": "2021-06-21T15:14:34.070Z",
+ "namespace": {
+ "id": 3,
+ "name": "heptapod",
+ "path": "heptapod",
+ "kind": "group",
+ "full_path": "heptapod",
+ "parent_id": null,
+ "avatar_url": "/uploads/-/system/group/avatar/3/heptapod.png",
+ "web_url": "https://foss.heptapod.net/groups/heptapod"
+ }
+ }
+]
diff --git a/swh/lister/gitlab/tests/test_lister.py b/swh/lister/gitlab/tests/test_lister.py
--- a/swh/lister/gitlab/tests/test_lister.py
+++ b/swh/lister/gitlab/tests/test_lister.py
@@ -12,7 +12,7 @@
from requests.status_codes import codes
from swh.lister import USER_AGENT
-from swh.lister.gitlab.lister import GitLabLister, _parse_id_after
+from swh.lister.gitlab.lister import IGNORED_DVCS, GitLabLister, _parse_id_after
from swh.lister.pattern import ListerStats
from swh.lister.tests.test_utils import assert_sleep_calls
from swh.lister.utils import WAIT_EXP_BASE
@@ -56,6 +56,38 @@
assert listed_origin.last_update is not None
+def test_lister_gitlab_heptapod(datadir, swh_scheduler, requests_mock):
+ """Gitlab lister ignores some vcs_type
+
+ """
+ instance = "foss.heptapod.net"
+ lister = GitLabLister(swh_scheduler, url=api_url(instance), instance=instance)
+ response = gitlab_page_response(datadir, instance, 1)
+
+ requests_mock.get(
+ lister.page_url(), [{"json": response}], additional_matcher=_match_request,
+ )
+
+ listed_result = lister.run()
+ expected_nb_origins = 0
+ for entry in response:
+ if entry["vcs_type"] in IGNORED_DVCS:
+ continue
+ expected_nb_origins += 1
+
+ assert listed_result == ListerStats(pages=1, origins=expected_nb_origins)
+
+ scheduler_origins = lister.scheduler.get_listed_origins(
+ lister.lister_obj.id
+ ).results
+ assert len(scheduler_origins) == expected_nb_origins
+
+ for listed_origin in scheduler_origins:
+ assert listed_origin.visit_type == "hg"
+ assert listed_origin.url.startswith(f"https://{instance}")
+ assert listed_origin.last_update is not None
+
+
def gitlab_page_response(datadir, instance: str, id_after: int) -> List[Dict]:
"""Return list of repositories (out of test dataset)"""
datapath = Path(datadir, f"https_{instance}", f"api_response_page{id_after}.json")

File Metadata

Mime Type
text/plain
Expires
Dec 20 2024, 12:01 AM (11 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3217409

Event Timeline