diff --git a/swh/web/inbound_email/utils.py b/swh/web/inbound_email/utils.py --- a/swh/web/inbound_email/utils.py +++ b/swh/web/inbound_email/utils.py @@ -180,6 +180,7 @@ text_parts: List[bytes] = [] fallback_parts: List[bytes] = [] + all_parts: List[bytes] = [] for part in message.walk(): content_type = part.get_content_type() @@ -190,15 +191,23 @@ current_part = part.get_payload(decode=True).rstrip(b"\n") if current_part: text_parts.append(current_part) - elif not text_parts and content_type == "text/html": + all_parts.append(current_part) + elif content_type == "text/html": current_part = part.get_payload(decode=True).rstrip(b"\n") if current_part: fallback_parts.append(current_part) - - if text_parts: - return max(text_parts, key=len) - - if fallback_parts: - return max(fallback_parts, key=len) + all_parts.append(current_part) + + assert message.get_content_maintype() == "multipart" + if message.get_content_subtype() == "alternative": + if text_parts: + return max(text_parts, key=len) + + if fallback_parts: + return max(fallback_parts, key=len) + else: + # Handles multipart/mixed; but this should be an appropriate handling for + # other multipart formats + return b"".join(all_parts) return None diff --git a/swh/web/tests/inbound_email/resources/multipart.eml b/swh/web/tests/inbound_email/resources/multipart_alternative.eml rename from swh/web/tests/inbound_email/resources/multipart.eml rename to swh/web/tests/inbound_email/resources/multipart_alternative.eml diff --git a/swh/web/tests/inbound_email/resources/multipart_html_only.eml b/swh/web/tests/inbound_email/resources/multipart_alternative_html_only.eml rename from swh/web/tests/inbound_email/resources/multipart_html_only.eml rename to swh/web/tests/inbound_email/resources/multipart_alternative_html_only.eml diff --git a/swh/web/tests/inbound_email/resources/multipart_text_only.eml b/swh/web/tests/inbound_email/resources/multipart_alternative_text_only.eml rename from swh/web/tests/inbound_email/resources/multipart_text_only.eml rename to swh/web/tests/inbound_email/resources/multipart_alternative_text_only.eml diff --git a/swh/web/tests/inbound_email/resources/multipart_mixed.eml b/swh/web/tests/inbound_email/resources/multipart_mixed.eml new file mode 100644 --- /dev/null +++ b/swh/web/tests/inbound_email/resources/multipart_mixed.eml @@ -0,0 +1,23 @@ +Return-Path: +Content-Type: multipart/mixed; boundary="------------BR6CvS06rj0369ladb31gYCL" +Message-ID: <8318382c-b4d3-9239-0805-e8cff4b9187a@inria.fr> +Date: Thu, 7 Apr 2022 10:30:36 +0200 +MIME-Version: 1.0 +User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:91.0) Gecko/20100101 + Thunderbird/91.7.0 +Content-Language: en-US +To: Valentin Lorentz +From: Valentin Lorentz +Subject: test email + +This is a multi-part message in MIME format. +--------------BR6CvS06rj0369ladb31gYCL +Content-Type: text/plain; charset=UTF-8; format=flowed +Content-Transfer-Encoding: 7bit + +This is plain text + +--------------BR6CvS06rj0369ladb31gYCL +Content-Type: text/html + +and this is HTML diff --git a/swh/web/tests/inbound_email/resources/multipart_mixed2.eml b/swh/web/tests/inbound_email/resources/multipart_mixed2.eml new file mode 100644 --- /dev/null +++ b/swh/web/tests/inbound_email/resources/multipart_mixed2.eml @@ -0,0 +1,25 @@ +Return-Path: +Content-Type: multipart/mixed; boundary="------------BR6CvS06rj0369ladb31gYCL" +Message-ID: <8318382c-b4d3-9239-0805-e8cff4b9187a@inria.fr> +Date: Thu, 7 Apr 2022 10:30:36 +0200 +MIME-Version: 1.0 +User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:91.0) Gecko/20100101 + Thunderbird/91.7.0 +Content-Language: en-US +To: Valentin Lorentz +From: Valentin Lorentz +Subject: test email + +This is a multi-part message in MIME format. +--------------BR6CvS06rj0369ladb31gYCL +Content-Type: text/plain; charset=UTF-8; format=flowed +Content-Transfer-Encoding: 7bit + +This is plain text + +--------------BR6CvS06rj0369ladb31gYCL +Content-Type: text/plain; charset=UTF-8; format=flowed +Content-Transfer-Encoding: 7bit + +and this is more text + diff --git a/swh/web/tests/inbound_email/resources/multipart_mixed_text_only.eml b/swh/web/tests/inbound_email/resources/multipart_mixed_text_only.eml new file mode 100644 --- /dev/null +++ b/swh/web/tests/inbound_email/resources/multipart_mixed_text_only.eml @@ -0,0 +1,36 @@ +Return-Path: +Content-Type: multipart/mixed; boundary="------------BR6CvS06rj0369ladb31gYCL" +Message-ID: <8318382c-b4d3-9239-0805-e8cff4b9187a@inria.fr> +Date: Thu, 7 Apr 2022 10:30:36 +0200 +MIME-Version: 1.0 +User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:91.0) Gecko/20100101 + Thunderbird/91.7.0 +Content-Language: en-US +To: Valentin Lorentz +From: Valentin Lorentz +Subject: test email + +This is a multi-part message in MIME format. +--------------BR6CvS06rj0369ladb31gYCL +Content-Type: text/plain; charset=UTF-8; format=flowed +Content-Transfer-Encoding: 7bit + +My test email +--------------BR6CvS06rj0369ladb31gYCL +Content-Type: text/html; charset=UTF-8; name="attachment.html" +Content-Disposition: attachment; filename="attachment.html" +Content-Transfer-Encoding: base64 + +PGh0bWw+CiAgICA8aGVhZD4KICAgICAgICA8dGl0bGU+VGhpcyBpcyBhbiBleGFtcGxlIEhU +TUwgYXR0YWNobWVudDwvdGl0bGU+CiAgICA8L2hlYWQ+CiAgICA8Ym9keT4KICAgICAgICA8 +cD5UaGlzIGlzIGFuIGV4YW1wbGUgSFRNTCBhdHRhY2htZW50PC9wPgogICAgPC9ib2R5Pgo8 +L2h0bWw+Cg== +--------------BR6CvS06rj0369ladb31gYCL +Content-Type: text/plain; charset=UTF-8; name="attachment.txt" +Content-Disposition: attachment; filename="attachment.txt" +Content-Transfer-Encoding: base64 + +VGhpcyBpcyBhIHRleHQgYXR0YWNobWVudC4K + +--------------BR6CvS06rj0369ladb31gYCL-- + diff --git a/swh/web/tests/inbound_email/test_utils.py b/swh/web/tests/inbound_email/test_utils.py --- a/swh/web/tests/inbound_email/test_utils.py +++ b/swh/web/tests/inbound_email/test_utils.py @@ -259,22 +259,44 @@ id="plaintext", ), pytest.param( - "multipart.eml", + "multipart_alternative.eml", [b"*Multipart email.*\n\n-- \nTest User"], [], - id="multipart", + id="multipart_alternative", ), pytest.param( - "multipart_html_only.eml", + "multipart_alternative_html_only.eml", [b"", b"Multipart email (a much longer html part)."], [b"Multipart email (short html part)"], - id="multipart_html_only", + id="multipart_alternative_html_only", ), pytest.param( - "multipart_text_only.eml", + "multipart_alternative_text_only.eml", [b"*Multipart email, but a longer text part.*\n\n--\nTest User"], [], - id="multipart_text_only", + id="multipart_alternative_text_only", + ), + pytest.param( + "multipart_mixed.eml", + [b"This is plain text", b"and this is HTML"], + [b"This is a multi-part message in MIME format."], + id="multipart_mixed", + ), + pytest.param( + "multipart_mixed2.eml", + [b"This is plain text", b"and this is more text"], + [b"This is a multi-part message in MIME format."], + id="multipart_mixed2", + ), + pytest.param( + "multipart_mixed_text_only.eml", + [b"My test email"], + [ + b"HTML attachment", + b"text attachment", + b"This is a multi-part message in MIME format.", + ], + id="multipart_mixed_text_only", ), ), )