diff --git a/assets/src/bundles/add_forge/request-dashboard.js b/assets/src/bundles/add_forge/request-dashboard.js --- a/assets/src/bundles/add_forge/request-dashboard.js +++ b/assets/src/bundles/add_forge/request-dashboard.js @@ -57,6 +57,7 @@ // Setting data for the email, now adding static data $('#contactForgeAdmin').attr('emailTo', forgeRequest.forge_contact_email); + $('#contactForgeAdmin').attr('emailCc', forgeRequest.inbound_email_address); $('#contactForgeAdmin').attr('emailSubject', `[swh-add_forge_now] Request ${forgeRequest.id}`); populateRequestHistory(data.history); populateDecisionSelectOption(forgeRequest.status); @@ -123,9 +124,10 @@ function contactForgeAdmin(event) { // Open the mailclient with pre-filled text const mailTo = $('#contactForgeAdmin').attr('emailTo'); + const mailCc = $('#contactForgeAdmin').attr('emailCc'); const subject = $('#contactForgeAdmin').attr('emailSubject'); const emailText = emailTempate({'forgeUrl': forgeRequest.forge_url}).trim().replace(/\n/g, '%0D%0A'); const w = window.open('', '_blank', '', true); - w.location.href = `mailto: ${mailTo}?subject=${subject}&body=${emailText}`; + w.location.href = `mailto:${mailTo}?Cc=${mailCc}&Reply-To=${mailCc}&Subject=${subject}&body=${emailText}`; w.focus(); } diff --git a/requirements-test.txt b/requirements-test.txt --- a/requirements-test.txt +++ b/requirements-test.txt @@ -6,7 +6,7 @@ pytest < 7.0.0 # v7.0.0 removed _pytest.tmpdir.TempdirFactory, which is used by some of the pytest plugins we use pytest-django pytest-mock -pytest-postgresql +pytest-postgresql < 4.0.0 requests-mock != 1.9.0, != 1.9.1 swh.core[http] >= 0.0.95 swh.loader.git >= 0.8.0 diff --git a/swh/web/add_forge_now/apps.py b/swh/web/add_forge_now/apps.py --- a/swh/web/add_forge_now/apps.py +++ b/swh/web/add_forge_now/apps.py @@ -11,3 +11,9 @@ class AddForgeNowConfig(AppConfig): name = "swh.web.add_forge_now" label = APP_LABEL + + def ready(self): + from ..inbound_email.signals import email_received + from .signal_receivers import handle_inbound_message + + email_received.connect(handle_inbound_message) diff --git a/swh/web/add_forge_now/migrations/0005_prepare_inbound_email.py b/swh/web/add_forge_now/migrations/0005_prepare_inbound_email.py new file mode 100644 --- /dev/null +++ b/swh/web/add_forge_now/migrations/0005_prepare_inbound_email.py @@ -0,0 +1,30 @@ +# Generated by Django 2.2.16 on 2022-04-01 15:10 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("swh_web_add_forge_now", "0004_rename_tables"), + ] + + operations = [ + migrations.AddField( + model_name="requesthistory", + name="raw_message", + field=models.BinaryField(null=True), + ), + migrations.AlterField( + model_name="requesthistory", + name="actor_role", + field=models.TextField( + choices=[ + ("MODERATOR", "moderator"), + ("SUBMITTER", "submitter"), + ("FORGE_ADMIN", "forge admin"), + ("EMAIL", "email"), + ] + ), + ), + ] diff --git a/swh/web/add_forge_now/models.py b/swh/web/add_forge_now/models.py --- a/swh/web/add_forge_now/models.py +++ b/swh/web/add_forge_now/models.py @@ -10,6 +10,8 @@ from django.db import models +from ..config import get_config +from ..inbound_email.utils import get_address_for_pk from .apps import APP_LABEL @@ -64,6 +66,7 @@ MODERATOR = "moderator" SUBMITTER = "submitter" FORGE_ADMIN = "forge admin" + EMAIL = "email" @classmethod def choices(cls): @@ -82,6 +85,7 @@ actor_role = models.TextField(choices=RequestActorRole.choices()) date = models.DateTimeField(auto_now_add=True) new_status = models.TextField(choices=RequestStatus.choices(), null=True) + raw_message = models.BinaryField(null=True) class Meta: app_label = APP_LABEL @@ -108,3 +112,8 @@ class Meta: app_label = APP_LABEL db_table = "add_forge_request" + + @property + def inbound_email_address(self) -> str: + base_address = get_config()["add_forge_now"]["email_address"] + return get_address_for_pk(salt=APP_LABEL, base_address=base_address, pk=self.pk) diff --git a/swh/web/add_forge_now/signal_receivers.py b/swh/web/add_forge_now/signal_receivers.py new file mode 100644 --- /dev/null +++ b/swh/web/add_forge_now/signal_receivers.py @@ -0,0 +1,70 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU Affero General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from email.message import EmailMessage +from typing import Type + +from ..config import get_config +from ..inbound_email.signals import EmailProcessingStatus +from ..inbound_email.utils import get_message_plaintext, get_pks_from_message +from .apps import APP_LABEL +from .models import Request, RequestActorRole, RequestHistory, RequestStatus + + +def handle_inbound_message(sender: Type, **kwargs) -> EmailProcessingStatus: + """Handle inbound email messages for add forge now. + + This uses the from field in the message to set the actor for the new entry in the + request history. We also unconditionally advance the status of requests in the + ``PENDING`` or ``WAITING_FOR_FEEDBACK`` states. + + The raw message is saved in the request history as an escape hatch if something goes + wrong during processing. + + """ + + message = kwargs["message"] + assert isinstance(message, EmailMessage) + + base_address = get_config()["add_forge_now"]["email_address"] + + pks = get_pks_from_message( + salt=APP_LABEL, base_address=base_address, message=message + ) + + if not pks: + return EmailProcessingStatus.IGNORED + + for pk in pks: + try: + request = Request.objects.get(pk=pk) + except Request.DoesNotExist: + continue + + request_updated = False + + history_entry = RequestHistory( + request=request, + actor=str(message["from"]), + actor_role=RequestActorRole.EMAIL.name, + text=get_message_plaintext(message) + or "Could not decode inbound message, see raw message.", + raw_message=bytes(message), + ) + + new_status = { + RequestStatus.PENDING: RequestStatus.WAITING_FOR_FEEDBACK, + RequestStatus.WAITING_FOR_FEEDBACK: RequestStatus.FEEDBACK_TO_HANDLE, + }.get(RequestStatus(request.status)) + + if new_status: + request.status = history_entry.new_status = new_status.name + request_updated = True + + history_entry.save() + if request_updated: + request.save() + + return EmailProcessingStatus.PROCESSED diff --git a/swh/web/api/views/add_forge_now.py b/swh/web/api/views/add_forge_now.py --- a/swh/web/api/views/add_forge_now.py +++ b/swh/web/api/views/add_forge_now.py @@ -58,6 +58,8 @@ class AddForgeNowRequestSerializer(serializers.ModelSerializer): + inbound_email_address = serializers.CharField() + class Meta: model = AddForgeRequest fields = "__all__" @@ -75,7 +77,7 @@ class AddForgeNowRequestHistorySerializer(serializers.ModelSerializer): class Meta: model = AddForgeNowRequestHistory - exclude = ("request",) + exclude = ("request", "raw_message") class AddForgeNowRequestHistoryPublicSerializer(serializers.ModelSerializer): diff --git a/swh/web/config.py b/swh/web/config.py --- a/swh/web/config.py +++ b/swh/web/config.py @@ -128,6 +128,7 @@ "instance_name": ("str", "archive-test.softwareheritage.org"), "give": ("dict", {"public_key": "", "token": ""}), "features": ("dict", {"add_forge_now": True}), + "add_forge_now": ("dict", {"email_address": "add-forge-now@example.com"}), } swhweb_config: Dict[str, Any] = {} diff --git a/swh/web/inbound_email/utils.py b/swh/web/inbound_email/utils.py --- a/swh/web/inbound_email/utils.py +++ b/swh/web/inbound_email/utils.py @@ -6,7 +6,13 @@ from dataclasses import dataclass from email.headerregistry import Address from email.message import EmailMessage -from typing import List, Optional +import logging +from typing import List, Optional, Set + +from django.core.signing import Signer +from django.utils.crypto import constant_time_compare + +logger = logging.getLogger(__name__) def extract_recipients(message: EmailMessage) -> List[Address]: @@ -34,6 +40,31 @@ """The parsed +-extension of the matched recipient address""" +def single_recipient_matches( + recipient: Address, address: str +) -> Optional[AddressMatch]: + """Check whether a single address matches the provided base address. + + The match is case-insensitive, which is not really RFC-compliant but is consistent + with what most people would expect. + + This function supports "+-addressing", where the local part of the email address is + appended with a `+`. + + """ + parsed_address = Address(addr_spec=address.lower()) + + if recipient.domain.lower() != parsed_address.domain: + return None + + base_username, _, extension = recipient.username.partition("+") + + if base_username.lower() != parsed_address.username: + return None + + return AddressMatch(recipient=recipient, extension=extension or None) + + def recipient_matches(message: EmailMessage, address: str) -> List[AddressMatch]: """Check whether any of the message recipients match the given address. @@ -47,17 +78,110 @@ ret = [] - parsed_address = Address(addr_spec=address.lower()) - for recipient in extract_recipients(message): - if recipient.domain.lower() != parsed_address.domain: - continue + match = single_recipient_matches(recipient, address) + if match: + ret.append(match) + + return ret + + +ADDRESS_SIGNER_SEP = "." +"""Separator for email address signatures""" - base_username, _, extension = recipient.username.partition("+") - if base_username.lower() != parsed_address.username: +def get_address_signer(salt: str): + """Get a signer for the given seed""" + return Signer(salt=salt, sep=ADDRESS_SIGNER_SEP) + + +def get_address_for_pk(salt: str, base_address: str, pk: int) -> str: + """Get the email address that will be able to receive messages to be logged in + this request.""" + if "@" not in base_address: + raise ValueError("Base address needs to contain an @") + + username, domain = base_address.split("@") + + extension = get_address_signer(salt).sign(str(pk)) + + return f"{username}+{extension}@{domain}" + + +def get_pk_from_extension(salt: str, extension: str) -> int: + """Retrieve the primary key for the given inbound address extension. + + We reimplement `Signer.unsign`, because the extension can be casemapped at any + point in the email chain (even though email is, theoretically, case sensitive), + so we have to compare lowercase versions of both the extension and the + signature... + + Raises ValueError if the signature couldn't be verified. + + """ + + value, signature = extension.rsplit(ADDRESS_SIGNER_SEP, 1) + expected_signature = get_address_signer(salt).signature(value) + if not constant_time_compare(signature.lower(), expected_signature.lower()): + raise ValueError(f"Invalid signature for extension {extension}") + + return int(value) + + +def get_pks_from_message( + salt: str, base_address: str, message: EmailMessage +) -> Set[int]: + """Retrieve the set of primary keys that were successfully decoded from the + recipients of the ``message`` matching ``base_address``. + + This uses :func:`recipient_matches` to retrieve all the recipient addresses matching + ``base_address``, then :func:`get_pk_from_extension` to decode the primary key and + verify the signature for every extension. To generate relevant email addresses, use + :func:`get_address_for_pk` with the same ``base_address`` and ``salt``. + + Returns: + the set of primary keys that were successfully decoded from the recipients of the + ``message`` + + """ + ret: Set[int] = set() + + for match in recipient_matches(message, base_address): + extension = match.extension + if extension is None: + logger.debug( + "Recipient address %s cannot be matched to a request, ignoring", + match.recipient.addr_spec, + ) continue - ret.append(AddressMatch(recipient=recipient, extension=extension or None)) + try: + ret.add(get_pk_from_extension(salt, extension)) + except ValueError: + logger.debug( + "Recipient address %s failed validation", match.recipient.addr_spec + ) + continue return ret + + +def get_message_plaintext(message: EmailMessage) -> Optional[str]: + """Get the plaintext body for a given message, if any such part exists. If only a html + part exists, return that instead.""" + + decoded_html = None + + if message.is_multipart(): + for part in message.walk(): + content_type = part.get_content_type() + content_disposition = str(part.get("Content-Disposition")) + if "attachment" in content_disposition: + continue + if content_type == "text/plain": + return part.get_payload(decode=True) + elif content_type == "text/html": + decoded_html = part.get_payload(decode=True) + return decoded_html + else: + return message.get_payload(decode=True) diff --git a/swh/web/templates/add_forge_now/request-dashboard.html b/swh/web/templates/add_forge_now/request-dashboard.html --- a/swh/web/templates/add_forge_now/request-dashboard.html +++ b/swh/web/templates/add_forge_now/request-dashboard.html @@ -107,7 +107,7 @@
diff --git a/swh/web/tests/api/views/test_add_forge_now.py b/swh/web/tests/api/views/test_add_forge_now.py --- a/swh/web/tests/api/views/test_add_forge_now.py +++ b/swh/web/tests/api/views/test_add_forge_now.py @@ -14,6 +14,8 @@ from swh.web.add_forge_now.models import Request from swh.web.common.utils import reverse +from swh.web.config import get_config +from swh.web.inbound_email.utils import get_address_for_pk from swh.web.tests.utils import ( check_api_get_responses, check_api_post_response, @@ -75,6 +77,15 @@ } +def inbound_email_for_pk(pk: int) -> str: + """Check that the inbound email matches the one expected for the given pk""" + + base_address = get_config()["add_forge_now"]["email_address"] + return get_address_for_pk( + salt="swh_web_add_forge_now", base_address=base_address, pk=pk + ) + + @pytest.mark.django_db(transaction=True, reset_sequences=True) @pytest.mark.parametrize( "add_forge_data", @@ -111,6 +122,7 @@ "submitter_name": regular_user.username, "submitter_email": regular_user.email, "submitter_forward_username": expected_consent_bool, + "inbound_email_address": inbound_email_for_pk(resp.data["id"]), } assert date_before < iso8601.parse_date(resp.data["submission_date"]) < date_after @@ -145,6 +157,7 @@ "submission_date": resp.data["submission_date"], "submitter_name": regular_user.username, "submitter_email": regular_user.email, + "inbound_email_address": inbound_email_for_pk(1), } assert date_before < iso8601.parse_date(resp.data["submission_date"]) < date_after @@ -355,6 +368,7 @@ "submitter_name": regular_user.username, "submitter_email": regular_user.email, "id": 1, + "inbound_email_address": inbound_email_for_pk(1), } other_forge_request = { @@ -364,6 +378,7 @@ "submitter_name": regular_user.username, "submitter_email": regular_user.email, "id": 2, + "inbound_email_address": inbound_email_for_pk(2), } assert resp.data == [other_forge_request, add_forge_request] @@ -495,6 +510,7 @@ "submission_date": submission_date, "submitter_name": regular_user.username, "submitter_email": regular_user.email, + "inbound_email_address": inbound_email_for_pk(1), }, "history": [ { diff --git a/swh/web/tests/inbound_email/test_utils.py b/swh/web/tests/inbound_email/test_utils.py --- a/swh/web/tests/inbound_email/test_utils.py +++ b/swh/web/tests/inbound_email/test_utils.py @@ -42,6 +42,25 @@ ] +def test_single_recipient_matches(): + assert ( + utils.single_recipient_matches( + Address(addr_spec="test@example.com"), "match@example.com" + ) + is None + ) + assert utils.single_recipient_matches( + Address(addr_spec="match@example.com"), "match@example.com" + ) == utils.AddressMatch( + recipient=Address(addr_spec="match@example.com"), extension=None + ) + assert utils.single_recipient_matches( + Address(addr_spec="MaTch+12345AbC@exaMple.Com"), "match@example.com" + ) == utils.AddressMatch( + recipient=Address(addr_spec="MaTch+12345AbC@exaMple.Com"), extension="12345AbC" + ) + + def test_recipient_matches(): message = EmailMessage() assert utils.recipient_matches(message, "match@example.com") == [] @@ -111,3 +130,114 @@ matches = utils.recipient_matches(message, "match@example.com") assert matches assert matches[0].extension == "weirdCaseMapping" + + +def test_get_address_for_pk(): + salt = "test_salt" + pks = [1, 10, 1000] + base_address = "base@example.com" + + addresses = { + pk: utils.get_address_for_pk(salt=salt, base_address=base_address, pk=pk) + for pk in pks + } + + assert len(set(addresses.values())) == len(addresses) + + for pk, address in addresses.items(): + localpart, _, domain = address.partition("@") + base_localpart, _, extension = localpart.partition("+") + assert domain == "example.com" + assert base_localpart == "base" + assert extension.startswith(f"{pk}.") + + +def test_get_address_for_pk_salt(): + pk = 1000 + base_address = "base@example.com" + addresses = [ + utils.get_address_for_pk(salt=salt, base_address=base_address, pk=pk) + for salt in ["salt1", "salt2"] + ] + + assert len(addresses) == len(set(addresses)) + + +def test_get_pks_from_message(): + salt = "test_salt" + pks = [1, 10, 1000] + base_address = "base@example.com" + + addresses = { + pk: utils.get_address_for_pk(salt=salt, base_address=base_address, pk=pk) + for pk in pks + } + + message = EmailMessage() + message["To"] = "test@example.com" + + assert utils.get_pks_from_message(salt, base_address, message) == set() + + message = EmailMessage() + message["To"] = f"Test Address <{addresses[1]}>" + + assert utils.get_pks_from_message(salt, base_address, message) == {1} + + message = EmailMessage() + message["To"] = f"Test Address <{addresses[1]}>" + message["Cc"] = ", ".join( + [ + f"Test Address <{addresses[1]}>", + f"Another Test Address <{addresses[10].lower()}>", + "A Third Address ", + ] + ) + + assert utils.get_pks_from_message(salt, base_address, message) == {1, 10} + + +def test_get_pks_from_message_logging(caplog): + salt = "test_salt" + pks = [1, 10, 1000] + base_address = "base@example.com" + + addresses = { + pk: utils.get_address_for_pk(salt=salt, base_address=base_address, pk=pk) + for pk in pks + } + + message = EmailMessage() + message["To"] = f"Test Address <{base_address}>" + + assert utils.get_pks_from_message(salt, base_address, message) == set() + + relevant_records = [ + record + for record in caplog.records + if record.name == "swh.web.inbound_email.utils" + ] + assert len(relevant_records) == 1 + assert relevant_records[0].levelname == "DEBUG" + assert ( + f"{base_address} cannot be matched to a request" + in relevant_records[0].getMessage() + ) + + # Replace the signature with "mangle{signature}" + mangled_address = addresses[1].replace(".", ".mangle", 1) + + message = EmailMessage() + message["To"] = f"Test Address <{mangled_address}>" + + assert utils.get_pks_from_message(salt, base_address, message) == set() + + relevant_records = [ + record + for record in caplog.records + if record.name == "swh.web.inbound_email.utils" + ] + assert len(relevant_records) == 2 + assert relevant_records[0].levelname == "DEBUG" + assert relevant_records[1].levelname == "DEBUG" + print(relevant_records) + assert f"{mangled_address} failed" in relevant_records[1].getMessage()