diff --git a/swh/web/inbound_email/__init__.py b/swh/web/inbound_email/__init__.py new file mode 100644 diff --git a/swh/web/inbound_email/apps.py b/swh/web/inbound_email/apps.py new file mode 100644 --- /dev/null +++ b/swh/web/inbound_email/apps.py @@ -0,0 +1,11 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU Affero General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from django.apps import AppConfig + + +class InboundEmailConfig(AppConfig): + default_auto_field = "django.db.models.BigAutoField" + name = "inbound_email" diff --git a/swh/web/inbound_email/management/commands/process_inbound_email.py b/swh/web/inbound_email/management/commands/process_inbound_email.py new file mode 100644 --- /dev/null +++ b/swh/web/inbound_email/management/commands/process_inbound_email.py @@ -0,0 +1,73 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU Affero General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import email +import email.message +import email.policy +import logging +import sys +from typing import Callable + +import sentry_sdk + +from django.core.management.base import BaseCommand + +from swh.web.inbound_email import signals + +logger = logging.getLogger(__name__) + + +class Command(BaseCommand): + help = "Process a new inbound email" + + def handle(self, *args, **options): + raw_message = sys.stdin.buffer.read() + try: + message = email.message_from_bytes(raw_message, policy=email.policy.default) + except Exception as exc: + sentry_sdk.capture_exception(exc) + self.handle_failed_message(raw_message) + # XXX make sure having logging doesn't make postfix unhappy + logger.exception("Could not convert email from bytes") + return + + responses = signals.email_received.send_robust( + sender=self.__class__, message=message + ) + + handled = False + for receiver, response in responses: + if isinstance(response, Exception): + sentry_sdk.capture_exception(response) + self.handle_failing_receiver(message, receiver) + logger.error( + "Receiver produced the following exception", exc_info=response + ) + elif response is signals.EmailProcessingStatus.FAILED: + self.handle_failing_receiver(message, receiver) + elif response is signals.EmailProcessingStatus.PROCESSED: + handled = True + + if not handled: + self.handle_unhandled_message(message) + + def handle_failed_message(self, raw_message: bytes): + # TODO: forward email as attachment for inspection + self.stderr.write("Failed message: %s" % raw_message.decode("ascii", "replace")) + + def handle_failing_receiver( + self, message: email.message.EmailMessage, receiver: Callable + ): + # TODO: forward email for inspection + self.stderr.write( + "Failed receiver %s:%s on message:" + % (receiver.__module__, receiver.__qualname__) + ) + self.stderr.write(str(message)) + + def handle_unhandled_message(self, message: email.message.EmailMessage): + # TODO: pass email through to a fallback alias? + self.stderr.write("Unhandled message:") + self.stderr.write(str(message)) diff --git a/swh/web/inbound_email/signals.py b/swh/web/inbound_email/signals.py new file mode 100644 --- /dev/null +++ b/swh/web/inbound_email/signals.py @@ -0,0 +1,36 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU Affero General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from enum import Enum, auto + +import django.dispatch + +email_received = django.dispatch.Signal(providing_args=["message"]) +"""This signal is sent by the `process_inbound_email` management command. + +Arguments: + message (:class:`email.message.EmailMessage`): the inbound email message + +Signal receivers must return an :class:`EmailProcessingStatus` value so that the +management command knows if the email has been processed. + +Signal receivers will be called for all received emails and are expected to do their own +filtering (e.g. using the original destination address). + +Receivers ignoring a message must return `EmailProcessingStatus.IGNORED` to let the +management command know that the message hasn't been processed. + +""" + + +class EmailProcessingStatus(Enum): + """Return values for the email processing signal listeners""" + + PROCESSED = auto() + """The email has been successfully processed""" + FAILED = auto() + """The email has been processed, but the processing failed""" + IGNORED = auto() + """The email has been ignored (e.g. unknown recipient)""" diff --git a/swh/web/inbound_email/tests.py b/swh/web/inbound_email/tests.py new file mode 100644 --- /dev/null +++ b/swh/web/inbound_email/tests.py @@ -0,0 +1,3 @@ +# from django.test import TestCase + +# Create your tests here. diff --git a/swh/web/inbound_email/utils.py b/swh/web/inbound_email/utils.py new file mode 100644 --- /dev/null +++ b/swh/web/inbound_email/utils.py @@ -0,0 +1,66 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU Affero General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from dataclasses import dataclass +from email.headerregistry import Address +from email.message import EmailMessage +from typing import List, Optional + + +def extract_recipients(message: EmailMessage) -> List[Address]: + """Extract a list of recipients of the `message`. + + This uses the ``To`` and ``Cc`` fields. + """ + + ret = [] + to = message.get("to") + if to is not None: + ret.extend(to.addresses) + + for cc in message.get_all("cc", []): + ret.extend(cc.addresses) + + return ret + + +@dataclass +class AddressMatch: + """Data related to a recipient match""" + + recipient: Address + """The original recipient that matched the expected address""" + extension: Optional[str] + """The parsed +-extension of the matched recipient address""" + + +def recipient_matches(message: EmailMessage, address: str) -> List[AddressMatch]: + """Check whether the message recipients match the given address. + + This function supports "+-addressing", where the local part of the email address is + appended with a `+`. + """ + + ret = [] + + parsed_address = Address(addr_spec=address.lower()) + + for recipient in extract_recipients(message): + if recipient.domain.lower() != parsed_address.domain: + continue + + # extension is either empty (no + in username) or a 1-tuple (+ in username) + base_username, *extension = recipient.username.split("+", 1) + + if base_username.lower() != parsed_address.username: + continue + + ret.append( + AddressMatch( + recipient=recipient, extension=None if not extension else extension[0], + ) + ) + + return ret diff --git a/swh/web/settings/common.py b/swh/web/settings/common.py --- a/swh/web/settings/common.py +++ b/swh/web/settings/common.py @@ -43,6 +43,7 @@ "django.contrib.staticfiles", "rest_framework", "swh.web.common", + "swh.web.inbound_email", "swh.web.api", "swh.web.auth", "swh.web.browse", diff --git a/swh/web/tests/inbound_email/test_utils.py b/swh/web/tests/inbound_email/test_utils.py new file mode 100644 --- /dev/null +++ b/swh/web/tests/inbound_email/test_utils.py @@ -0,0 +1,113 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU Affero General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from email.headerregistry import Address +from email.message import EmailMessage + +from swh.web.inbound_email import utils + + +def test_extract_recipients(): + message = EmailMessage() + assert utils.extract_recipients(message) == [] + + message["To"] = "Test Recipient " + + assert utils.extract_recipients(message) == [ + Address(display_name="Test Recipient", addr_spec="test-recipient@example.com") + ] + + message["Cc"] = ( + "test-recipient-2@example.com, " + "Another Test Recipient " + ) + assert utils.extract_recipients(message) == [ + Address(display_name="Test Recipient", addr_spec="test-recipient@example.com"), + Address(addr_spec="test-recipient-2@example.com"), + Address( + display_name="Another Test Recipient", + addr_spec="test-recipient-3@example.com", + ), + ] + + del message["To"] + assert utils.extract_recipients(message) == [ + Address(addr_spec="test-recipient-2@example.com"), + Address( + display_name="Another Test Recipient", + addr_spec="test-recipient-3@example.com", + ), + ] + + +def test_recipient_matches(): + message = EmailMessage() + assert utils.recipient_matches(message, "match@example.com") == [] + + message = EmailMessage() + message["to"] = "nomatch@example.com" + assert utils.recipient_matches(message, "match@example.com") == [] + + message = EmailMessage() + message["to"] = "match@example.com" + assert utils.recipient_matches(message, "match@example.com") == [ + utils.AddressMatch( + recipient=Address(addr_spec="match@example.com"), extension=None + ) + ] + + message = EmailMessage() + message["to"] = "match+extension@example.com" + assert utils.recipient_matches(message, "match@example.com") == [ + utils.AddressMatch( + recipient=Address(addr_spec="match+extension@example.com"), + extension="extension", + ) + ] + + message = EmailMessage() + message["to"] = "match+weird+plussed+extension@example.com" + assert utils.recipient_matches(message, "match@example.com") == [ + utils.AddressMatch( + recipient=Address(addr_spec="match+weird+plussed+extension@example.com"), + extension="weird+plussed+extension", + ) + ] + + message = EmailMessage() + message["to"] = "nomatch@example.com" + message["cc"] = ", ".join( + ( + "match@example.com", + "match@notamatch.example.com", + "Another Match ", + ) + ) + assert utils.recipient_matches(message, "match@example.com") == [ + utils.AddressMatch( + recipient=Address(addr_spec="match@example.com"), extension=None, + ), + utils.AddressMatch( + recipient=Address( + display_name="Another Match", addr_spec="match+extension@example.com" + ), + extension="extension", + ), + ] + + +def test_recipient_matches_casemapping(): + message = EmailMessage() + message["to"] = "match@example.com" + + assert utils.recipient_matches(message, "Match@Example.Com") + assert utils.recipient_matches(message, "match@example.com") + + message = EmailMessage() + message["to"] = "Match+weirdCaseMapping@Example.Com" + + matches = utils.recipient_matches(message, "match@example.com") + assert matches + assert matches[0].extension == "weirdCaseMapping"