diff --git a/swh/web/save_origin_webhooks/bitbucket.py b/swh/web/save_origin_webhooks/bitbucket.py index 21430ff6..2b38e311 100644 --- a/swh/web/save_origin_webhooks/bitbucket.py +++ b/swh/web/save_origin_webhooks/bitbucket.py @@ -1,44 +1,46 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from typing import Tuple from rest_framework.request import Request from swh.web.save_origin_webhooks.generic_receiver import OriginSaveWebhookReceiver class BitbucketOriginSaveWebhookReceiver(OriginSaveWebhookReceiver): FORGE_TYPE = "Bitbucket" WEBHOOK_GUIDE_URL = ( "https://support.atlassian.com/bitbucket-cloud/docs/manage-webhooks/" ) REPO_TYPES = "git" def is_forge_request(self, request: Request) -> bool: return ( request.headers.get("User-Agent", "").startswith( f"{self.FORGE_TYPE}-Webhooks/" ) and "X-Event-Key" in request.headers ) def is_push_event(self, request: Request) -> bool: return request.headers["X-Event-Key"] == "repo:push" - def extract_repo_url_and_visit_type(self, request: Request) -> Tuple[str, str]: + def extract_repo_info(self, request: Request) -> Tuple[str, str, bool]: repo_url = ( request.data.get("repository", {}) .get("links", {}) .get("html", {}) .get("href", "") ) if repo_url: repo_url += ".git" - return repo_url, "git" + private = request.data.get("repository", {}).get("is_private", False) + + return repo_url, "git", private api_origin_save_webhook_bitbucket = BitbucketOriginSaveWebhookReceiver() diff --git a/swh/web/save_origin_webhooks/generic_receiver.py b/swh/web/save_origin_webhooks/generic_receiver.py index 609058e6..fa63f11c 100644 --- a/swh/web/save_origin_webhooks/generic_receiver.py +++ b/swh/web/save_origin_webhooks/generic_receiver.py @@ -1,123 +1,129 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import abc from typing import Any, Dict, Tuple from rest_framework.request import Request from swh.web.api.apidoc import api_doc from swh.web.api.apiurls import APIUrls, api_route from swh.web.save_code_now.origin_save import create_save_origin_request from swh.web.utils.exc import BadInputExc webhooks_api_urls = APIUrls() class OriginSaveWebhookReceiver(abc.ABC): FORGE_TYPE: str WEBHOOK_GUIDE_URL: str REPO_TYPES: str @abc.abstractmethod def is_forge_request(self, request: Request) -> bool: ... def is_ping_event(self, request: Request) -> bool: return False @abc.abstractmethod def is_push_event(self, request: Request) -> bool: ... @abc.abstractmethod - def extract_repo_url_and_visit_type(self, request: Request) -> Tuple[str, str]: + def extract_repo_info(self, request: Request) -> Tuple[str, str, bool]: + """Extract and return a tuple (repository_url, visit_type, private) from + the forge webhook payload.""" ... def __init__(self): self.__doc__ = f""" .. http:post:: /api/1/origin/save/webhook/{self.FORGE_TYPE.lower()}/ Webhook receiver for {self.FORGE_TYPE} to request or update the archival of a repository when new commits are pushed to it. To add such webhook to one of your {self.REPO_TYPES} repository hosted on {self.FORGE_TYPE}, please follow `{self.FORGE_TYPE}'s webhooks guide <{self.WEBHOOK_GUIDE_URL}>`_. The expected content type for the webhook payload must be ``application/json``. :>json string origin_url: the url of the origin to save :>json string visit_type: the type of visit to perform :>json string save_request_date: the date (in iso format) the save request was issued :>json string save_request_status: the status of the save request, either **accepted**, **rejected** or **pending** :statuscode 200: save request for repository has been successfully created from the webhook payload. :statuscode 400: no save request has been created due to invalid POST request or missing data in webhook payload """ self.__name__ = "api_origin_save_webhook_{self.FORGE_TYPE.lower()}" api_doc( f"/origin/save/webhook/{self.FORGE_TYPE.lower()}/", category="Request archival", )(self) api_route( f"/origin/save/webhook/{self.FORGE_TYPE.lower()}/", f"api-1-origin-save-webhook-{self.FORGE_TYPE.lower()}", methods=["POST"], api_urls=webhooks_api_urls, )(self) def __call__( self, request: Request, ) -> Dict[str, Any]: if not self.is_forge_request(request): raise BadInputExc( f"POST request was not sent by a {self.FORGE_TYPE} webhook and " "has not been processed." ) if self.is_ping_event(request): return {"message": "pong"} if not self.is_push_event(request): raise BadInputExc( f"Event sent by {self.FORGE_TYPE} webhook is not a push one, request " "has not been processed." ) content_type = request.headers.get("Content-Type") if content_type and not content_type.startswith("application/json"): raise BadInputExc( f"Invalid content type '{content_type}' for the POST request sent by " f"{self.FORGE_TYPE} webhook, it should be 'application/json'." ) - repo_url, visit_type = self.extract_repo_url_and_visit_type(request) + repo_url, visit_type, private = self.extract_repo_info(request) if not repo_url: raise BadInputExc( f"Repository URL could not be extracted from {self.FORGE_TYPE} webhook " f"payload." ) if not visit_type: raise BadInputExc( f"Visit type could not be determined for repository {repo_url}." ) + if private: + raise BadInputExc( + f"Repository {repo_url} is private and cannot be cloned without authentication." + ) save_request = create_save_origin_request( visit_type=visit_type, origin_url=repo_url ) return { "origin_url": save_request["origin_url"], "visit_type": save_request["visit_type"], "save_request_date": save_request["save_request_date"], "save_request_status": save_request["save_request_status"], } diff --git a/swh/web/save_origin_webhooks/gitea.py b/swh/web/save_origin_webhooks/gitea.py index 32383932..c68300a5 100644 --- a/swh/web/save_origin_webhooks/gitea.py +++ b/swh/web/save_origin_webhooks/gitea.py @@ -1,28 +1,30 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from typing import Tuple from rest_framework.request import Request from swh.web.save_origin_webhooks.generic_receiver import OriginSaveWebhookReceiver class GiteaOriginSaveWebhookReceiver(OriginSaveWebhookReceiver): FORGE_TYPE = "Gitea" WEBHOOK_GUIDE_URL = "https://docs.gitea.io/en-us/webhooks/" REPO_TYPES = "git" def is_forge_request(self, request: Request) -> bool: return f"X-{self.FORGE_TYPE}-Event" in request.headers def is_push_event(self, request: Request) -> bool: return request.headers[f"X-{self.FORGE_TYPE}-Event"] == "push" - def extract_repo_url_and_visit_type(self, request: Request) -> Tuple[str, str]: - return request.data.get("repository", {}).get("clone_url", ""), "git" + def extract_repo_info(self, request: Request) -> Tuple[str, str, bool]: + repo_url = request.data.get("repository", {}).get("clone_url", "") + private = request.data.get("repository", {}).get("private", False) + return repo_url, "git", private api_origin_save_webhook_gitea = GiteaOriginSaveWebhookReceiver() diff --git a/swh/web/save_origin_webhooks/github.py b/swh/web/save_origin_webhooks/github.py index e3318896..de1ae43c 100644 --- a/swh/web/save_origin_webhooks/github.py +++ b/swh/web/save_origin_webhooks/github.py @@ -1,39 +1,41 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from typing import Tuple from rest_framework.request import Request from swh.web.save_origin_webhooks.generic_receiver import OriginSaveWebhookReceiver class GitHubOriginSaveWebhookReceiver(OriginSaveWebhookReceiver): FORGE_TYPE = "GitHub" WEBHOOK_GUIDE_URL = ( "https://docs.github.com/en/developers/webhooks-and-events/" "webhooks/creating-webhooks#setting-up-a-webhook" ) REPO_TYPES = "git" def is_forge_request(self, request: Request) -> bool: return ( request.headers.get("User-Agent", "").startswith( f"{self.FORGE_TYPE}-Hookshot/" ) and f"X-{self.FORGE_TYPE}-Event" in request.headers ) def is_ping_event(self, request: Request) -> bool: return request.headers[f"X-{self.FORGE_TYPE}-Event"] == "ping" def is_push_event(self, request: Request) -> bool: return request.headers[f"X-{self.FORGE_TYPE}-Event"] == "push" - def extract_repo_url_and_visit_type(self, request: Request) -> Tuple[str, str]: - return request.data.get("repository", {}).get("html_url", ""), "git" + def extract_repo_info(self, request: Request) -> Tuple[str, str, bool]: + repo_url = request.data.get("repository", {}).get("html_url", "") + private = request.data.get("repository", {}).get("private", False) + return repo_url, "git", private api_origin_save_webhook_github = GitHubOriginSaveWebhookReceiver() diff --git a/swh/web/save_origin_webhooks/gitlab.py b/swh/web/save_origin_webhooks/gitlab.py index e7bcc7a5..79e93b8b 100644 --- a/swh/web/save_origin_webhooks/gitlab.py +++ b/swh/web/save_origin_webhooks/gitlab.py @@ -1,34 +1,39 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from typing import Tuple from rest_framework.request import Request from swh.web.save_origin_webhooks.generic_receiver import OriginSaveWebhookReceiver class GitlabOriginSaveWebhookReceiver(OriginSaveWebhookReceiver): FORGE_TYPE = "GitLab" WEBHOOK_GUIDE_URL = ( "https://docs.gitlab.com/ee/user/project/integrations/" "webhooks.html#configure-a-webhook-in-gitlab" ) REPO_TYPES = "git" def is_forge_request(self, request: Request) -> bool: return ( request.headers.get("User-Agent", "").startswith(f"{self.FORGE_TYPE}/") and "X-Gitlab-Event" in request.headers ) def is_push_event(self, request: Request) -> bool: return request.headers["X-Gitlab-Event"] == "Push Hook" - def extract_repo_url_and_visit_type(self, request: Request) -> Tuple[str, str]: - return request.data.get("repository", {}).get("git_http_url", ""), "git" + def extract_repo_info(self, request: Request) -> Tuple[str, str, bool]: + repo_url = request.data.get("repository", {}).get("git_http_url", "") + # visibility_level values: 0 = private, 10 = internal, 20 = public + visibility_level = request.data.get("repository", {}).get( + "visibility_level", 20 + ) + return repo_url, "git", visibility_level != 20 api_origin_save_webhook_gitlab = GitlabOriginSaveWebhookReceiver() diff --git a/swh/web/save_origin_webhooks/sourceforge.py b/swh/web/save_origin_webhooks/sourceforge.py index 3667491a..b7a5bf2b 100644 --- a/swh/web/save_origin_webhooks/sourceforge.py +++ b/swh/web/save_origin_webhooks/sourceforge.py @@ -1,61 +1,63 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from typing import Tuple import requests from rest_framework.request import Request from swh.web.save_origin_webhooks.generic_receiver import OriginSaveWebhookReceiver class SourceforgeOriginSaveWebhookReceiver(OriginSaveWebhookReceiver): FORGE_TYPE = "SourceForge" WEBHOOK_GUIDE_URL = ( "https://sourceforge.net/blog/" "how-to-use-webhooks-for-git-mercurial-and-svn-repositories/" ) REPO_TYPES = "git, hg or svn" SOURCE_FORGE_API_PROJECT_URL_PATTERN = ( "https://sourceforge.net/rest/p/{project_name}" ) def is_forge_request(self, request: Request) -> bool: return ( request.headers.get("User-Agent", "") == "Allura Webhook (https://allura.apache.org/)" ) def is_push_event(self, request: Request) -> bool: # SourceForge only support webhooks for push events return True - def extract_repo_url_and_visit_type(self, request: Request) -> Tuple[str, str]: + def extract_repo_info(self, request: Request) -> Tuple[str, str, bool]: repo_url = "" visit_type = "" + private = False project_full_name = request.data.get("repository", {}).get("full_name") if project_full_name: project_name = project_full_name.split("/")[2] project_api_url = self.SOURCE_FORGE_API_PROJECT_URL_PATTERN.format( project_name=project_name ) response = requests.get(project_api_url) if response.ok: project_data = response.json() + private = project_data.get("private", False) for tool in project_data.get("tools", []): if tool.get("mount_point") == "code" and tool.get( "url", "" ).endswith(project_full_name): repo_url = tool.get( "clone_url_https_anon", tool.get("clone_url_ro", "") ) visit_type = tool.get("name", "") - return repo_url, visit_type + return repo_url, visit_type, private api_origin_save_webhook_sourceforge = SourceforgeOriginSaveWebhookReceiver() diff --git a/swh/web/save_origin_webhooks/tests/test_bitbucket.py b/swh/web/save_origin_webhooks/tests/test_bitbucket.py index 07db03eb..64725bbf 100644 --- a/swh/web/save_origin_webhooks/tests/test_bitbucket.py +++ b/swh/web/save_origin_webhooks/tests/test_bitbucket.py @@ -1,88 +1,105 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import json import os import pytest from .utils import ( origin_save_webhook_receiver_invalid_content_type_test, origin_save_webhook_receiver_invalid_event_test, origin_save_webhook_receiver_invalid_request_test, origin_save_webhook_receiver_no_repo_url_test, + origin_save_webhook_receiver_private_repo_test, origin_save_webhook_receiver_test, ) @pytest.mark.django_db def test_origin_save_bitbucket_webhook_receiver(api_client, swh_scheduler, datadir): with open(os.path.join(datadir, "bitbucket_webhook_payload.json"), "rb") as payload: origin_save_webhook_receiver_test( forge_type="Bitbucket", http_headers={ "User-Agent": "Bitbucket-Webhooks/2.0", "X-Event-Key": "repo:push", }, payload=json.load(payload), expected_origin_url="https://bitbucket.org/johndoe/webhook-test.git", expected_visit_type="git", api_client=api_client, swh_scheduler=swh_scheduler, ) def test_origin_save_bitbucket_webhook_receiver_invalid_request( api_client, ): origin_save_webhook_receiver_invalid_request_test( forge_type="Bitbucket", http_headers={}, payload={}, api_client=api_client, ) def test_origin_save_bitbucket_webhook_receiver_invalid_event( api_client, ): origin_save_webhook_receiver_invalid_event_test( forge_type="Bitbucket", http_headers={ "User-Agent": "Bitbucket-Webhooks/2.0", "X-Event-Key": "repo:fork", }, payload={}, api_client=api_client, ) def test_origin_save_bitbucket_webhook_receiver_invalid_content_type( api_client, ): origin_save_webhook_receiver_invalid_content_type_test( forge_type="Bitbucket", http_headers={ "User-Agent": "Bitbucket-Webhooks/2.0", "X-Event-Key": "repo:push", }, payload={}, api_client=api_client, ) def test_origin_save_bitbucket_webhook_receiver_no_repo_url(api_client, datadir): with open(os.path.join(datadir, "bitbucket_webhook_payload.json"), "rb") as payload: payload = json.load(payload) del payload["repository"] origin_save_webhook_receiver_no_repo_url_test( forge_type="Bitbucket", http_headers={ "User-Agent": "Bitbucket-Webhooks/2.0", "X-Event-Key": "repo:push", }, payload=payload, api_client=api_client, ) + + +def test_origin_save_bitbucket_webhook_receiver_private_repo(api_client, datadir): + with open(os.path.join(datadir, "bitbucket_webhook_payload.json"), "rb") as payload: + payload = json.load(payload) + payload["repository"]["is_private"] = True + origin_save_webhook_receiver_private_repo_test( + forge_type="Bitbucket", + http_headers={ + "User-Agent": "Bitbucket-Webhooks/2.0", + "X-Event-Key": "repo:push", + }, + payload=payload, + expected_origin_url="https://bitbucket.org/johndoe/webhook-test.git", + api_client=api_client, + ) diff --git a/swh/web/save_origin_webhooks/tests/test_gitea.py b/swh/web/save_origin_webhooks/tests/test_gitea.py index 05eae0ee..f26c3f07 100644 --- a/swh/web/save_origin_webhooks/tests/test_gitea.py +++ b/swh/web/save_origin_webhooks/tests/test_gitea.py @@ -1,87 +1,103 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import json import os import pytest from swh.web.save_code_now.models import SaveAuthorizedOrigin from .utils import ( origin_save_webhook_receiver_invalid_content_type_test, origin_save_webhook_receiver_invalid_event_test, origin_save_webhook_receiver_invalid_request_test, origin_save_webhook_receiver_no_repo_url_test, + origin_save_webhook_receiver_private_repo_test, origin_save_webhook_receiver_test, ) @pytest.mark.django_db def test_origin_save_gitea_webhook_receiver(api_client, swh_scheduler, datadir): SaveAuthorizedOrigin.objects.create(url="https://try.gitea.io/") with open(os.path.join(datadir, "gitea_webhook_payload.json"), "rb") as payload: origin_save_webhook_receiver_test( forge_type="Gitea", http_headers={ "X-Gitea-Event": "push", }, payload=json.load(payload), expected_origin_url="https://try.gitea.io/johndoe/webhook-test.git", expected_visit_type="git", api_client=api_client, swh_scheduler=swh_scheduler, ) def test_origin_save_gitea_webhook_receiver_invalid_request( api_client, ): origin_save_webhook_receiver_invalid_request_test( forge_type="Gitea", http_headers={}, payload={}, api_client=api_client, ) def test_origin_save_gitea_webhook_receiver_invalid_event( api_client, ): origin_save_webhook_receiver_invalid_event_test( forge_type="Gitea", http_headers={ "X-Gitea-Event": "issues", }, payload={}, api_client=api_client, ) def test_origin_save_gitea_webhook_receiver_invalid_content_type( api_client, ): origin_save_webhook_receiver_invalid_content_type_test( forge_type="Gitea", http_headers={ "X-Gitea-Event": "push", }, payload={}, api_client=api_client, ) def test_origin_save_gitea_webhook_receiver_no_repo_url(api_client, datadir): with open(os.path.join(datadir, "gitea_webhook_payload.json"), "rb") as payload: payload = json.load(payload) del payload["repository"] origin_save_webhook_receiver_no_repo_url_test( forge_type="Gitea", http_headers={ "X-Gitea-Event": "push", }, payload=payload, api_client=api_client, ) + + +def test_origin_save_gitea_webhook_receiver_private_repo(api_client, datadir): + with open(os.path.join(datadir, "gitea_webhook_payload.json"), "rb") as payload: + payload = json.load(payload) + payload["repository"]["private"] = True + origin_save_webhook_receiver_private_repo_test( + forge_type="Gitea", + http_headers={ + "X-Gitea-Event": "push", + }, + payload=payload, + api_client=api_client, + expected_origin_url="https://try.gitea.io/johndoe/webhook-test.git", + ) diff --git a/swh/web/save_origin_webhooks/tests/test_github.py b/swh/web/save_origin_webhooks/tests/test_github.py index cc179a8f..9a1f6b31 100644 --- a/swh/web/save_origin_webhooks/tests/test_github.py +++ b/swh/web/save_origin_webhooks/tests/test_github.py @@ -1,110 +1,127 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import json import os import pytest from swh.web.tests.helpers import check_api_post_responses from swh.web.utils import reverse from .utils import ( django_http_headers, origin_save_webhook_receiver_invalid_content_type_test, origin_save_webhook_receiver_invalid_event_test, origin_save_webhook_receiver_invalid_request_test, origin_save_webhook_receiver_no_repo_url_test, + origin_save_webhook_receiver_private_repo_test, origin_save_webhook_receiver_test, ) @pytest.mark.django_db def test_origin_save_github_webhook_receiver(api_client, swh_scheduler, datadir): with open(os.path.join(datadir, "github_webhook_payload.json"), "rb") as payload: origin_save_webhook_receiver_test( forge_type="GitHub", http_headers={ "User-Agent": "GitHub-Hookshot/ede37db", "X-GitHub-Event": "push", }, payload=json.load(payload), expected_origin_url="https://github.com/johndoe/webhook-test", expected_visit_type="git", api_client=api_client, swh_scheduler=swh_scheduler, ) def test_origin_save_github_webhook_receiver_invalid_request( api_client, ): origin_save_webhook_receiver_invalid_request_test( forge_type="GitHub", http_headers={}, payload={}, api_client=api_client, ) def test_origin_save_github_webhook_receiver_invalid_event( api_client, ): origin_save_webhook_receiver_invalid_event_test( forge_type="GitHub", http_headers={ "User-Agent": "GitHub-Hookshot/ede37db", "X-GitHub-Event": "issues", }, payload={}, api_client=api_client, ) def test_origin_save_github_webhook_receiver_invalid_content_type( api_client, ): origin_save_webhook_receiver_invalid_content_type_test( forge_type="GitHub", http_headers={ "User-Agent": "GitHub-Hookshot/ede37db", "X-GitHub-Event": "push", }, payload={}, api_client=api_client, ) def test_origin_save_github_webhook_receiver_no_repo_url(api_client, datadir): with open(os.path.join(datadir, "github_webhook_payload.json"), "rb") as payload: payload = json.load(payload) del payload["repository"] origin_save_webhook_receiver_no_repo_url_test( forge_type="GitHub", http_headers={ "User-Agent": "GitHub-Hookshot/ede37db", "X-GitHub-Event": "push", }, payload=payload, api_client=api_client, ) def test_origin_save_github_webhook_receiver_ping_event(api_client): url = reverse("api-1-origin-save-webhook-github") resp = check_api_post_responses( api_client, url, status_code=200, **django_http_headers( { "User-Agent": "GitHub-Hookshot/ede37db", "X-GitHub-Event": "ping", } ), ) assert resp.data == {"message": "pong"} + + +def test_origin_save_github_webhook_receiver_private_repo(api_client, datadir): + with open(os.path.join(datadir, "github_webhook_payload.json"), "rb") as payload: + payload = json.load(payload) + payload["repository"]["private"] = True + origin_save_webhook_receiver_private_repo_test( + forge_type="GitHub", + http_headers={ + "User-Agent": "GitHub-Hookshot/ede37db", + "X-GitHub-Event": "push", + }, + payload=payload, + api_client=api_client, + expected_origin_url="https://github.com/johndoe/webhook-test", + ) diff --git a/swh/web/save_origin_webhooks/tests/test_gitlab.py b/swh/web/save_origin_webhooks/tests/test_gitlab.py index 2497d872..6de61d96 100644 --- a/swh/web/save_origin_webhooks/tests/test_gitlab.py +++ b/swh/web/save_origin_webhooks/tests/test_gitlab.py @@ -1,88 +1,105 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import json import os import pytest from .utils import ( origin_save_webhook_receiver_invalid_content_type_test, origin_save_webhook_receiver_invalid_event_test, origin_save_webhook_receiver_invalid_request_test, origin_save_webhook_receiver_no_repo_url_test, + origin_save_webhook_receiver_private_repo_test, origin_save_webhook_receiver_test, ) @pytest.mark.django_db def test_origin_save_gitlab_webhook_receiver(api_client, swh_scheduler, datadir): with open(os.path.join(datadir, "gitlab_webhook_payload.json"), "rb") as payload: origin_save_webhook_receiver_test( forge_type="GitLab", http_headers={ "User-Agent": "GitLab/15.6.0-pre", "X-Gitlab-Event": "Push Hook", }, payload=json.load(payload), expected_origin_url="https://gitlab.com/johndoe/test.git", expected_visit_type="git", api_client=api_client, swh_scheduler=swh_scheduler, ) def test_origin_save_gitlab_webhook_receiver_invalid_request( api_client, ): origin_save_webhook_receiver_invalid_request_test( forge_type="GitLab", http_headers={}, payload={}, api_client=api_client, ) def test_origin_save_gitlab_webhook_receiver_invalid_event( api_client, ): origin_save_webhook_receiver_invalid_event_test( forge_type="GitLab", http_headers={ "User-Agent": "GitLab/15.6.0-pre", "X-Gitlab-Event": "Issue Hook", }, payload={}, api_client=api_client, ) def test_origin_save_gitlab_webhook_receiver_invalid_content_type( api_client, ): origin_save_webhook_receiver_invalid_content_type_test( forge_type="GitLab", http_headers={ "User-Agent": "GitLab/15.6.0-pre", "X-Gitlab-Event": "Push Hook", }, payload={}, api_client=api_client, ) def test_origin_save_gitlab_webhook_receiver_no_repo_url(api_client, datadir): with open(os.path.join(datadir, "gitlab_webhook_payload.json"), "rb") as payload: payload = json.load(payload) del payload["repository"] origin_save_webhook_receiver_no_repo_url_test( forge_type="GitLab", http_headers={ "User-Agent": "GitLab/15.6.0-pre", "X-Gitlab-Event": "Push Hook", }, payload=payload, api_client=api_client, ) + + +def test_origin_save_gitlab_webhook_receiver_private_repo(api_client, datadir): + with open(os.path.join(datadir, "gitlab_webhook_payload.json"), "rb") as payload: + payload = json.load(payload) + payload["repository"]["visibility_level"] = 0 + origin_save_webhook_receiver_private_repo_test( + forge_type="GitLab", + http_headers={ + "User-Agent": "GitLab/15.6.0-pre", + "X-Gitlab-Event": "Push Hook", + }, + payload=payload, + api_client=api_client, + expected_origin_url="https://gitlab.com/johndoe/test.git", + ) diff --git a/swh/web/save_origin_webhooks/tests/test_sourceforge.py b/swh/web/save_origin_webhooks/tests/test_sourceforge.py index 1c71a2f9..43b0aa0e 100644 --- a/swh/web/save_origin_webhooks/tests/test_sourceforge.py +++ b/swh/web/save_origin_webhooks/tests/test_sourceforge.py @@ -1,100 +1,149 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import json import os import pytest +import requests from .utils import ( origin_save_webhook_receiver_invalid_content_type_test, origin_save_webhook_receiver_invalid_request_test, origin_save_webhook_receiver_no_repo_url_test, + origin_save_webhook_receiver_private_repo_test, origin_save_webhook_receiver_test, ) @pytest.mark.django_db @pytest.mark.parametrize( "payload_file,expected_origin_url,expected_visit_type", [ ( "sourceforge_webhook_payload_hg.json", "http://hg.code.sf.net/p/webhook-test-hg/code", "hg", ), ( "sourceforge_webhook_payload_git.json", "https://git.code.sf.net/p/webhook-test-git/code", "git", ), ( "sourceforge_webhook_payload_svn.json", "https://svn.code.sf.net/p/webhook-test-svn/code/", "svn", ), ], ) def test_origin_save_sourceforge_webhook_receiver( api_client, swh_scheduler, datadir, requests_mock_datadir, payload_file, expected_origin_url, expected_visit_type, ): with open(os.path.join(datadir, payload_file), "rb") as payload: origin_save_webhook_receiver_test( forge_type="SourceForge", http_headers={ "User-Agent": "Allura Webhook (https://allura.apache.org/)", }, payload=json.load(payload), expected_origin_url=expected_origin_url, expected_visit_type=expected_visit_type, api_client=api_client, swh_scheduler=swh_scheduler, ) def test_origin_save_sourceforge_webhook_receiver_invalid_request( api_client, ): origin_save_webhook_receiver_invalid_request_test( forge_type="SourceForge", http_headers={}, payload={}, api_client=api_client, ) def test_origin_save_sourceforge_webhook_receiver_invalid_content_type( api_client, ): origin_save_webhook_receiver_invalid_content_type_test( forge_type="SourceForge", http_headers={ "User-Agent": "Allura Webhook (https://allura.apache.org/)", }, payload={}, api_client=api_client, ) def test_origin_save_sourceforge_webhook_receiver_no_repo_url(api_client, datadir): with open( os.path.join(datadir, "sourceforge_webhook_payload_git.json"), "rb" ) as payload: payload = json.load(payload) del payload["repository"] origin_save_webhook_receiver_no_repo_url_test( forge_type="SourceForge", http_headers={ "User-Agent": "Allura Webhook (https://allura.apache.org/)", }, payload=payload, api_client=api_client, ) + + +@pytest.mark.parametrize( + "payload_file,origin_url,visit_type", + [ + ( + "sourceforge_webhook_payload_hg.json", + "http://hg.code.sf.net/p/webhook-test-hg/code", + "hg", + ), + ( + "sourceforge_webhook_payload_git.json", + "https://git.code.sf.net/p/webhook-test-git/code", + "git", + ), + ( + "sourceforge_webhook_payload_svn.json", + "https://svn.code.sf.net/p/webhook-test-svn/code/", + "svn", + ), + ], +) +def test_origin_save_sourceforge_webhook_receiver_private_repo( + api_client, + datadir, + requests_mock_datadir, + requests_mock, + payload_file, + origin_url, + visit_type, +): + # override sourceforge REST API response + repo_data_url = f"https://sourceforge.net/rest/p/webhook-test-{visit_type}" + repo_data = requests.get(repo_data_url).json() + repo_data["private"] = True + requests_mock.get(repo_data_url, json=repo_data) + + with open(os.path.join(datadir, payload_file), "rb") as payload: + origin_save_webhook_receiver_private_repo_test( + forge_type="SourceForge", + http_headers={ + "User-Agent": "Allura Webhook (https://allura.apache.org/)", + }, + payload=json.load(payload), + expected_origin_url=origin_url, + api_client=api_client, + ) diff --git a/swh/web/save_origin_webhooks/tests/utils.py b/swh/web/save_origin_webhooks/tests/utils.py index 1a687d13..570c1af4 100644 --- a/swh/web/save_origin_webhooks/tests/utils.py +++ b/swh/web/save_origin_webhooks/tests/utils.py @@ -1,143 +1,169 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from typing import Any, Dict from swh.web.tests.helpers import check_api_post_responses from swh.web.utils import reverse def django_http_headers(http_headers: Dict[str, Any]): return {f"HTTP_{k.upper().replace('-', '_')}": v for k, v in http_headers.items()} def origin_save_webhook_receiver_test( forge_type: str, http_headers: Dict[str, Any], payload: Dict[str, Any], expected_origin_url: str, expected_visit_type: str, api_client, swh_scheduler, ): url = reverse(f"api-1-origin-save-webhook-{forge_type.lower()}") resp = check_api_post_responses( api_client, url, status_code=200, data=payload, **django_http_headers(http_headers), ) assert resp.data["origin_url"] == expected_origin_url assert resp.data["visit_type"] == expected_visit_type tasks = swh_scheduler.search_tasks(task_type=f"load-{expected_visit_type}") assert tasks task = dict(tasks[0].items()) assert task["arguments"]["kwargs"]["url"] == expected_origin_url def origin_save_webhook_receiver_invalid_request_test( forge_type: str, http_headers: Dict[str, Any], payload: Dict[str, Any], api_client, ): url = reverse(f"api-1-origin-save-webhook-{forge_type.lower()}") resp = check_api_post_responses( api_client, url, status_code=400, data=payload, **django_http_headers(http_headers), ) assert resp.data == { "exception": "BadInputExc", "reason": ( f"POST request was not sent by a {forge_type} webhook " "and has not been processed." ), } def origin_save_webhook_receiver_invalid_event_test( forge_type: str, http_headers: Dict[str, Any], payload: Dict[str, Any], api_client, ): url = reverse(f"api-1-origin-save-webhook-{forge_type.lower()}") resp = check_api_post_responses( api_client, url, status_code=400, data=payload, **django_http_headers(http_headers), ) assert resp.data == { "exception": "BadInputExc", "reason": ( f"Event sent by {forge_type} webhook is not a push one, request has " "not been processed." ), } def origin_save_webhook_receiver_invalid_content_type_test( forge_type: str, http_headers: Dict[str, Any], payload: Dict[str, Any], api_client, ): url = reverse(f"api-1-origin-save-webhook-{forge_type.lower()}") bad_content_type = "application/x-www-form-urlencoded" http_headers["Content-Type"] = bad_content_type resp = check_api_post_responses( api_client, url, status_code=400, data=payload, **django_http_headers(http_headers), ) assert resp.data == { "exception": "BadInputExc", "reason": ( f"Invalid content type '{bad_content_type}' for the POST request sent by " f"{forge_type} webhook, it should be 'application/json'." ), } def origin_save_webhook_receiver_no_repo_url_test( forge_type: str, http_headers: Dict[str, Any], payload: Dict[str, Any], api_client, ): url = reverse(f"api-1-origin-save-webhook-{forge_type.lower()}") resp = check_api_post_responses( api_client, url, status_code=400, data=payload, **django_http_headers(http_headers), ) assert resp.data == { "exception": "BadInputExc", "reason": ( f"Repository URL could not be extracted from {forge_type} webhook payload." ), } + + +def origin_save_webhook_receiver_private_repo_test( + forge_type: str, + http_headers: Dict[str, Any], + payload: Dict[str, Any], + api_client, + expected_origin_url: str, +): + url = reverse(f"api-1-origin-save-webhook-{forge_type.lower()}") + + resp = check_api_post_responses( + api_client, + url, + status_code=400, + data=payload, + **django_http_headers(http_headers), + ) + + assert resp.data == { + "exception": "BadInputExc", + "reason": ( + f"Repository {expected_origin_url} is private and cannot be cloned " + "without authentication." + ), + }