diff --git a/swh/web/save_origin_webhooks/generic_receiver.py b/swh/web/save_origin_webhooks/generic_receiver.py index 591ee4f8..609058e6 100644 --- a/swh/web/save_origin_webhooks/generic_receiver.py +++ b/swh/web/save_origin_webhooks/generic_receiver.py @@ -1,117 +1,123 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import abc from typing import Any, Dict, Tuple from rest_framework.request import Request from swh.web.api.apidoc import api_doc from swh.web.api.apiurls import APIUrls, api_route from swh.web.save_code_now.origin_save import create_save_origin_request from swh.web.utils.exc import BadInputExc webhooks_api_urls = APIUrls() class OriginSaveWebhookReceiver(abc.ABC): FORGE_TYPE: str WEBHOOK_GUIDE_URL: str REPO_TYPES: str @abc.abstractmethod def is_forge_request(self, request: Request) -> bool: ... + def is_ping_event(self, request: Request) -> bool: + return False + @abc.abstractmethod def is_push_event(self, request: Request) -> bool: ... @abc.abstractmethod def extract_repo_url_and_visit_type(self, request: Request) -> Tuple[str, str]: ... def __init__(self): self.__doc__ = f""" .. http:post:: /api/1/origin/save/webhook/{self.FORGE_TYPE.lower()}/ Webhook receiver for {self.FORGE_TYPE} to request or update the archival of a repository when new commits are pushed to it. To add such webhook to one of your {self.REPO_TYPES} repository hosted on {self.FORGE_TYPE}, please follow `{self.FORGE_TYPE}'s webhooks guide <{self.WEBHOOK_GUIDE_URL}>`_. The expected content type for the webhook payload must be ``application/json``. :>json string origin_url: the url of the origin to save :>json string visit_type: the type of visit to perform :>json string save_request_date: the date (in iso format) the save request was issued :>json string save_request_status: the status of the save request, either **accepted**, **rejected** or **pending** :statuscode 200: save request for repository has been successfully created from the webhook payload. :statuscode 400: no save request has been created due to invalid POST request or missing data in webhook payload """ self.__name__ = "api_origin_save_webhook_{self.FORGE_TYPE.lower()}" api_doc( f"/origin/save/webhook/{self.FORGE_TYPE.lower()}/", category="Request archival", )(self) api_route( f"/origin/save/webhook/{self.FORGE_TYPE.lower()}/", f"api-1-origin-save-webhook-{self.FORGE_TYPE.lower()}", methods=["POST"], api_urls=webhooks_api_urls, )(self) def __call__( self, request: Request, ) -> Dict[str, Any]: if not self.is_forge_request(request): raise BadInputExc( f"POST request was not sent by a {self.FORGE_TYPE} webhook and " "has not been processed." ) + if self.is_ping_event(request): + return {"message": "pong"} + if not self.is_push_event(request): raise BadInputExc( f"Event sent by {self.FORGE_TYPE} webhook is not a push one, request " "has not been processed." ) content_type = request.headers.get("Content-Type") if content_type and not content_type.startswith("application/json"): raise BadInputExc( f"Invalid content type '{content_type}' for the POST request sent by " f"{self.FORGE_TYPE} webhook, it should be 'application/json'." ) repo_url, visit_type = self.extract_repo_url_and_visit_type(request) if not repo_url: raise BadInputExc( f"Repository URL could not be extracted from {self.FORGE_TYPE} webhook " f"payload." ) if not visit_type: raise BadInputExc( f"Visit type could not be determined for repository {repo_url}." ) save_request = create_save_origin_request( visit_type=visit_type, origin_url=repo_url ) return { "origin_url": save_request["origin_url"], "visit_type": save_request["visit_type"], "save_request_date": save_request["save_request_date"], "save_request_status": save_request["save_request_status"], } diff --git a/swh/web/save_origin_webhooks/github.py b/swh/web/save_origin_webhooks/github.py index 95e59a36..e3318896 100644 --- a/swh/web/save_origin_webhooks/github.py +++ b/swh/web/save_origin_webhooks/github.py @@ -1,36 +1,39 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from typing import Tuple from rest_framework.request import Request from swh.web.save_origin_webhooks.generic_receiver import OriginSaveWebhookReceiver class GitHubOriginSaveWebhookReceiver(OriginSaveWebhookReceiver): FORGE_TYPE = "GitHub" WEBHOOK_GUIDE_URL = ( "https://docs.github.com/en/developers/webhooks-and-events/" "webhooks/creating-webhooks#setting-up-a-webhook" ) REPO_TYPES = "git" def is_forge_request(self, request: Request) -> bool: return ( request.headers.get("User-Agent", "").startswith( f"{self.FORGE_TYPE}-Hookshot/" ) and f"X-{self.FORGE_TYPE}-Event" in request.headers ) + def is_ping_event(self, request: Request) -> bool: + return request.headers[f"X-{self.FORGE_TYPE}-Event"] == "ping" + def is_push_event(self, request: Request) -> bool: return request.headers[f"X-{self.FORGE_TYPE}-Event"] == "push" def extract_repo_url_and_visit_type(self, request: Request) -> Tuple[str, str]: return request.data.get("repository", {}).get("html_url", ""), "git" api_origin_save_webhook_github = GitHubOriginSaveWebhookReceiver() diff --git a/swh/web/save_origin_webhooks/tests/test_github.py b/swh/web/save_origin_webhooks/tests/test_github.py index 2cc88dcf..cc179a8f 100644 --- a/swh/web/save_origin_webhooks/tests/test_github.py +++ b/swh/web/save_origin_webhooks/tests/test_github.py @@ -1,88 +1,110 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import json import os import pytest +from swh.web.tests.helpers import check_api_post_responses +from swh.web.utils import reverse + from .utils import ( + django_http_headers, origin_save_webhook_receiver_invalid_content_type_test, origin_save_webhook_receiver_invalid_event_test, origin_save_webhook_receiver_invalid_request_test, origin_save_webhook_receiver_no_repo_url_test, origin_save_webhook_receiver_test, ) @pytest.mark.django_db def test_origin_save_github_webhook_receiver(api_client, swh_scheduler, datadir): with open(os.path.join(datadir, "github_webhook_payload.json"), "rb") as payload: origin_save_webhook_receiver_test( forge_type="GitHub", http_headers={ "User-Agent": "GitHub-Hookshot/ede37db", "X-GitHub-Event": "push", }, payload=json.load(payload), expected_origin_url="https://github.com/johndoe/webhook-test", expected_visit_type="git", api_client=api_client, swh_scheduler=swh_scheduler, ) def test_origin_save_github_webhook_receiver_invalid_request( api_client, ): origin_save_webhook_receiver_invalid_request_test( forge_type="GitHub", http_headers={}, payload={}, api_client=api_client, ) def test_origin_save_github_webhook_receiver_invalid_event( api_client, ): origin_save_webhook_receiver_invalid_event_test( forge_type="GitHub", http_headers={ "User-Agent": "GitHub-Hookshot/ede37db", "X-GitHub-Event": "issues", }, payload={}, api_client=api_client, ) def test_origin_save_github_webhook_receiver_invalid_content_type( api_client, ): origin_save_webhook_receiver_invalid_content_type_test( forge_type="GitHub", http_headers={ "User-Agent": "GitHub-Hookshot/ede37db", "X-GitHub-Event": "push", }, payload={}, api_client=api_client, ) def test_origin_save_github_webhook_receiver_no_repo_url(api_client, datadir): with open(os.path.join(datadir, "github_webhook_payload.json"), "rb") as payload: payload = json.load(payload) del payload["repository"] origin_save_webhook_receiver_no_repo_url_test( forge_type="GitHub", http_headers={ "User-Agent": "GitHub-Hookshot/ede37db", "X-GitHub-Event": "push", }, payload=payload, api_client=api_client, ) + + +def test_origin_save_github_webhook_receiver_ping_event(api_client): + url = reverse("api-1-origin-save-webhook-github") + + resp = check_api_post_responses( + api_client, + url, + status_code=200, + **django_http_headers( + { + "User-Agent": "GitHub-Hookshot/ede37db", + "X-GitHub-Event": "ping", + } + ), + ) + + assert resp.data == {"message": "pong"} diff --git a/swh/web/save_origin_webhooks/tests/utils.py b/swh/web/save_origin_webhooks/tests/utils.py index 3322ab29..1a687d13 100644 --- a/swh/web/save_origin_webhooks/tests/utils.py +++ b/swh/web/save_origin_webhooks/tests/utils.py @@ -1,143 +1,143 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from typing import Any, Dict from swh.web.tests.helpers import check_api_post_responses from swh.web.utils import reverse -def _django_http_headers(http_headers: Dict[str, Any]): +def django_http_headers(http_headers: Dict[str, Any]): return {f"HTTP_{k.upper().replace('-', '_')}": v for k, v in http_headers.items()} def origin_save_webhook_receiver_test( forge_type: str, http_headers: Dict[str, Any], payload: Dict[str, Any], expected_origin_url: str, expected_visit_type: str, api_client, swh_scheduler, ): url = reverse(f"api-1-origin-save-webhook-{forge_type.lower()}") resp = check_api_post_responses( api_client, url, status_code=200, data=payload, - **_django_http_headers(http_headers), + **django_http_headers(http_headers), ) assert resp.data["origin_url"] == expected_origin_url assert resp.data["visit_type"] == expected_visit_type tasks = swh_scheduler.search_tasks(task_type=f"load-{expected_visit_type}") assert tasks task = dict(tasks[0].items()) assert task["arguments"]["kwargs"]["url"] == expected_origin_url def origin_save_webhook_receiver_invalid_request_test( forge_type: str, http_headers: Dict[str, Any], payload: Dict[str, Any], api_client, ): url = reverse(f"api-1-origin-save-webhook-{forge_type.lower()}") resp = check_api_post_responses( api_client, url, status_code=400, data=payload, - **_django_http_headers(http_headers), + **django_http_headers(http_headers), ) assert resp.data == { "exception": "BadInputExc", "reason": ( f"POST request was not sent by a {forge_type} webhook " "and has not been processed." ), } def origin_save_webhook_receiver_invalid_event_test( forge_type: str, http_headers: Dict[str, Any], payload: Dict[str, Any], api_client, ): url = reverse(f"api-1-origin-save-webhook-{forge_type.lower()}") resp = check_api_post_responses( api_client, url, status_code=400, data=payload, - **_django_http_headers(http_headers), + **django_http_headers(http_headers), ) assert resp.data == { "exception": "BadInputExc", "reason": ( f"Event sent by {forge_type} webhook is not a push one, request has " "not been processed." ), } def origin_save_webhook_receiver_invalid_content_type_test( forge_type: str, http_headers: Dict[str, Any], payload: Dict[str, Any], api_client, ): url = reverse(f"api-1-origin-save-webhook-{forge_type.lower()}") bad_content_type = "application/x-www-form-urlencoded" http_headers["Content-Type"] = bad_content_type resp = check_api_post_responses( api_client, url, status_code=400, data=payload, - **_django_http_headers(http_headers), + **django_http_headers(http_headers), ) assert resp.data == { "exception": "BadInputExc", "reason": ( f"Invalid content type '{bad_content_type}' for the POST request sent by " f"{forge_type} webhook, it should be 'application/json'." ), } def origin_save_webhook_receiver_no_repo_url_test( forge_type: str, http_headers: Dict[str, Any], payload: Dict[str, Any], api_client, ): url = reverse(f"api-1-origin-save-webhook-{forge_type.lower()}") resp = check_api_post_responses( api_client, url, status_code=400, data=payload, - **_django_http_headers(http_headers), + **django_http_headers(http_headers), ) assert resp.data == { "exception": "BadInputExc", "reason": ( f"Repository URL could not be extracted from {forge_type} webhook payload." ), }