diff --git a/swh/web/inbound_email/__init__.py b/swh/web/inbound_email/__init__.py new file mode 100644 diff --git a/swh/web/inbound_email/apps.py b/swh/web/inbound_email/apps.py new file mode 100644 --- /dev/null +++ b/swh/web/inbound_email/apps.py @@ -0,0 +1,11 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU Affero General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from django.apps import AppConfig + + +class InboundEmailConfig(AppConfig): + default_auto_field = "django.db.models.BigAutoField" + name = "inbound_email" diff --git a/swh/web/inbound_email/management/commands/process_inbound_email.py b/swh/web/inbound_email/management/commands/process_inbound_email.py new file mode 100644 --- /dev/null +++ b/swh/web/inbound_email/management/commands/process_inbound_email.py @@ -0,0 +1,73 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU Affero General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import email +import email.message +import email.policy +import logging +import sys +from typing import Callable + +import sentry_sdk + +from django.core.management.base import BaseCommand + +from swh.web.inbound_email import signals + +logger = logging.getLogger(__name__) + + +class Command(BaseCommand): + help = "Process a new inbound email" + + def handle(self, *args, **options): + raw_message = sys.stdin.buffer.read() + try: + message = email.message_from_bytes(raw_message, policy=email.policy.default) + except Exception as exc: + sentry_sdk.capture_exception(exc) + self.handle_failed_message(raw_message) + # XXX make sure having logging doesn't make postfix unhappy + logger.exception("Could not convert email from bytes") + return + + responses = signals.email_received.send_robust( + sender=self.__class__, message=message + ) + + handled = False + for receiver, response in responses: + if isinstance(response, Exception): + sentry_sdk.capture_exception(response) + self.handle_failing_receiver(message, receiver) + logger.error( + "Receiver produced the following exception", exc_info=response + ) + elif response is signals.EmailProcessingStatus.FAILED: + self.handle_failing_receiver(message, receiver) + elif response is signals.EmailProcessingStatus.PROCESSED: + handled = True + + if not handled: + self.handle_unhandled_message(message) + + def handle_failed_message(self, raw_message: bytes): + # TODO: forward email as attachment for inspection + logger.error("Failed message: %s", raw_message.decode("ascii", "replace")) + + def handle_failing_receiver( + self, message: email.message.EmailMessage, receiver: Callable + ): + # TODO: forward email for inspection + logger.error( + "Failed receiver %s:%s; message: %s", + receiver.__module__, + receiver.__qualname__, + str(message), + ) + + def handle_unhandled_message(self, message: email.message.EmailMessage): + # TODO: pass email through to a fallback alias? + logger.error("Unhandled message: %s", str(message)) diff --git a/swh/web/inbound_email/signals.py b/swh/web/inbound_email/signals.py new file mode 100644 --- /dev/null +++ b/swh/web/inbound_email/signals.py @@ -0,0 +1,36 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU Affero General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from enum import Enum, auto + +import django.dispatch + +email_received = django.dispatch.Signal(providing_args=["message"]) +"""This signal is sent by the `process_inbound_email` management command. + +Arguments: + message (:class:`email.message.EmailMessage`): the inbound email message + +Signal receivers must return an :class:`EmailProcessingStatus` value so that the +management command knows if the email has been processed. + +Signal receivers will be called for all received emails and are expected to do their own +filtering (e.g. using the original destination address). + +Receivers ignoring a message must return `EmailProcessingStatus.IGNORED` to let the +management command know that the message hasn't been processed. + +""" + + +class EmailProcessingStatus(Enum): + """Return values for the email processing signal listeners""" + + PROCESSED = auto() + """The email has been successfully processed""" + FAILED = auto() + """The email has been processed, but the processing failed""" + IGNORED = auto() + """The email has been ignored (e.g. unknown recipient)""" diff --git a/swh/web/inbound_email/utils.py b/swh/web/inbound_email/utils.py new file mode 100644 --- /dev/null +++ b/swh/web/inbound_email/utils.py @@ -0,0 +1,64 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU Affero General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from dataclasses import dataclass +from email.headerregistry import Address +from email.message import EmailMessage +from typing import List, Optional + + +def extract_recipients(message: EmailMessage) -> List[Address]: + """Extract a list of recipients of the `message`. + + This uses the ``To`` and ``Cc`` fields. + """ + + ret = [] + + for header_name in ("to", "cc"): + for header in message.get_all(header_name, []): + ret.extend(header.addresses) + + return ret + + +@dataclass +class AddressMatch: + """Data related to a recipient match""" + + recipient: Address + """The original recipient that matched the expected address""" + extension: Optional[str] + """The parsed +-extension of the matched recipient address""" + + +def recipient_matches(message: EmailMessage, address: str) -> List[AddressMatch]: + """Check whether any of the message recipients match the given address. + + This function supports "+-addressing", where the local part of the email address is + appended with a `+`. + """ + + ret = [] + + parsed_address = Address(addr_spec=address.lower()) + + for recipient in extract_recipients(message): + if recipient.domain.lower() != parsed_address.domain: + continue + + # extension is either empty (no + in username) or a 1-tuple (+ in username) + base_username, *extension = recipient.username.split("+", 1) + + if base_username.lower() != parsed_address.username: + continue + + ret.append( + AddressMatch( + recipient=recipient, extension=None if not extension else extension[0], + ) + ) + + return ret diff --git a/swh/web/settings/common.py b/swh/web/settings/common.py --- a/swh/web/settings/common.py +++ b/swh/web/settings/common.py @@ -43,6 +43,7 @@ "django.contrib.staticfiles", "rest_framework", "swh.web.common", + "swh.web.inbound_email", "swh.web.api", "swh.web.auth", "swh.web.browse", diff --git a/swh/web/tests/inbound_email/test_management_command.py b/swh/web/tests/inbound_email/test_management_command.py new file mode 100644 --- /dev/null +++ b/swh/web/tests/inbound_email/test_management_command.py @@ -0,0 +1,161 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU Affero General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from contextlib import contextmanager +from dataclasses import dataclass +from email.message import EmailMessage +from io import BytesIO, StringIO +import re +import sys +from typing import Callable, Iterator +from unittest.mock import MagicMock + +import pytest + +from django.core.management import call_command +from django.dispatch import Signal + +from swh.web.inbound_email.signals import EmailProcessingStatus, email_received + + +class MockedStdin: + def __init__(self): + self.buffer = BytesIO() + + +@dataclass +class CommandReturn: + out: str + err: str + + +@contextmanager +def signal_receiver(signal: Signal, name: str = "receiver_name") -> Iterator[Callable]: + receiver = MagicMock() + receiver.configure_mock(__name__=name, __qualname__=name) + + try: + signal.connect(receiver) + yield receiver + finally: + signal.disconnect(receiver) + + +def call_process_inbound_email(stdin_data: bytes) -> CommandReturn: + orig_stdin = sys.stdin + try: + sys.stdin = MockedStdin() # type: ignore + sys.stdin.buffer.write(stdin_data) + sys.stdin.buffer.seek(0) + + out = StringIO() + err = StringIO() + + call_command("process_inbound_email", stdout=out, stderr=err) + + out.seek(0) + err.seek(0) + return CommandReturn(out=out.read(), err=err.read()) + finally: + sys.stdin = orig_stdin + + +def test_empty_stdin(caplog): + ret = call_process_inbound_email(b"") + assert ret.out == "" + assert ret.err == "" + + assert len(caplog.records) == 1 + log = caplog.records[0] + assert log.levelname == "ERROR" + assert "Unhandled message" in log.getMessage() + + +@pytest.mark.parametrize( + "return_value,err_contents", + [ + # When the email gets processed by one of the receivers, the management command + # should not emit any output. + (EmailProcessingStatus.PROCESSED, ""), + # When a receiver fails, the management command outputs a message to this effect + (EmailProcessingStatus.FAILED, "Failed receiver.*receiver_name"), + # When all receivers ignore a message, this fact is printed too + (EmailProcessingStatus.IGNORED, "Unhandled message"), + ], +) +def test_signal_receiver(return_value, err_contents, caplog): + """Check that signal receivers are properly called when running the management command. + + Check for output depending on its return value""" + message = EmailMessage() + message["to"] = "test@example.com" + message["subject"] = "Test Subject" + message.set_content("This is a test message.\n") + + with signal_receiver(email_received) as receiver: + receiver.return_value = return_value + + ret = call_process_inbound_email(bytes(message)) + assert ret.out == "" + assert ret.err == "" + output = "\n".join(record.getMessage() for record in caplog.records) + if err_contents: + assert re.match(err_contents, output) + else: + assert output == "" + + calls = receiver.call_args_list + + assert len(calls) == 1 + assert bytes(calls[0][1]["message"]) == bytes(message) + + +def test_multiple_receivers(caplog): + message = EmailMessage() + message["to"] = "test@example.com" + message["subject"] = "Test Subject" + message.set_content("This is a test message.\n") + + with signal_receiver(email_received, name="ignored") as ignored, signal_receiver( + email_received, name="processed" + ) as processed: + ignored.return_value = EmailProcessingStatus.IGNORED + processed.return_value = EmailProcessingStatus.PROCESSED + + ret = call_process_inbound_email(bytes(message)) + assert ret.out == "" + assert ret.err == "" + + assert not caplog.records + + for receiver in [ignored, processed]: + calls = receiver.call_args_list + + assert len(calls) == 1 + assert bytes(calls[0][1]["message"]) == bytes(message) + + +def test_signal_receiver_exception(caplog): + message = EmailMessage() + message["to"] = "test@example.com" + message["subject"] = "Test Subject" + message.set_content("This is a test message.\n") + + with signal_receiver(email_received, name="exception_raised") as receiver: + receiver.side_effect = ValueError("I'm broken!") + + ret = call_process_inbound_email(bytes(message)) + assert ret.out == "" + assert ret.err == "" + + output = "\n".join( + record.getMessage() + ("\n" + record.exc_text if record.exc_text else "") + for record in caplog.records + ) + + assert re.match("Failed receiver.*exception_raised", output) + assert "following exception" in output + assert "ValueError" in output + assert "I'm broken" in output diff --git a/swh/web/tests/inbound_email/test_utils.py b/swh/web/tests/inbound_email/test_utils.py new file mode 100644 --- /dev/null +++ b/swh/web/tests/inbound_email/test_utils.py @@ -0,0 +1,113 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU Affero General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from email.headerregistry import Address +from email.message import EmailMessage + +from swh.web.inbound_email import utils + + +def test_extract_recipients(): + message = EmailMessage() + assert utils.extract_recipients(message) == [] + + message["To"] = "Test Recipient " + + assert utils.extract_recipients(message) == [ + Address(display_name="Test Recipient", addr_spec="test-recipient@example.com") + ] + + message["Cc"] = ( + "test-recipient-2@example.com, " + "Another Test Recipient " + ) + assert utils.extract_recipients(message) == [ + Address(display_name="Test Recipient", addr_spec="test-recipient@example.com"), + Address(addr_spec="test-recipient-2@example.com"), + Address( + display_name="Another Test Recipient", + addr_spec="test-recipient-3@example.com", + ), + ] + + del message["To"] + assert utils.extract_recipients(message) == [ + Address(addr_spec="test-recipient-2@example.com"), + Address( + display_name="Another Test Recipient", + addr_spec="test-recipient-3@example.com", + ), + ] + + +def test_recipient_matches(): + message = EmailMessage() + assert utils.recipient_matches(message, "match@example.com") == [] + + message = EmailMessage() + message["to"] = "nomatch@example.com" + assert utils.recipient_matches(message, "match@example.com") == [] + + message = EmailMessage() + message["to"] = "match@example.com" + assert utils.recipient_matches(message, "match@example.com") == [ + utils.AddressMatch( + recipient=Address(addr_spec="match@example.com"), extension=None + ) + ] + + message = EmailMessage() + message["to"] = "match+extension@example.com" + assert utils.recipient_matches(message, "match@example.com") == [ + utils.AddressMatch( + recipient=Address(addr_spec="match+extension@example.com"), + extension="extension", + ) + ] + + message = EmailMessage() + message["to"] = "match+weird+plussed+extension@example.com" + assert utils.recipient_matches(message, "match@example.com") == [ + utils.AddressMatch( + recipient=Address(addr_spec="match+weird+plussed+extension@example.com"), + extension="weird+plussed+extension", + ) + ] + + message = EmailMessage() + message["to"] = "nomatch@example.com" + message["cc"] = ", ".join( + ( + "match@example.com", + "match@notamatch.example.com", + "Another Match ", + ) + ) + assert utils.recipient_matches(message, "match@example.com") == [ + utils.AddressMatch( + recipient=Address(addr_spec="match@example.com"), extension=None, + ), + utils.AddressMatch( + recipient=Address( + display_name="Another Match", addr_spec="match+extension@example.com" + ), + extension="extension", + ), + ] + + +def test_recipient_matches_casemapping(): + message = EmailMessage() + message["to"] = "match@example.com" + + assert utils.recipient_matches(message, "Match@Example.Com") + assert utils.recipient_matches(message, "match@example.com") + + message = EmailMessage() + message["to"] = "Match+weirdCaseMapping@Example.Com" + + matches = utils.recipient_matches(message, "match@example.com") + assert matches + assert matches[0].extension == "weirdCaseMapping"