Page MenuHomeSoftware Heritage

D7503.diff
No OneTemporary

D7503.diff

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -5,6 +5,7 @@
rev: v4.1.0
hooks:
- id: trailing-whitespace
+ exclude: '.eml$'
- id: check-json
- id: check-yaml
diff --git a/MANIFEST.in b/MANIFEST.in
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -8,6 +8,7 @@
recursive-include assets *
recursive-include swh/web/templates *
recursive-include swh/web/tests/resources *
+recursive-include swh/web/tests/inbound_email/resources *.eml
include package.json
include yarn.lock
diff --git a/swh/web/inbound_email/utils.py b/swh/web/inbound_email/utils.py
--- a/swh/web/inbound_email/utils.py
+++ b/swh/web/inbound_email/utils.py
@@ -164,3 +164,41 @@
continue
return ret
+
+
+def get_message_plaintext(message: EmailMessage) -> Optional[bytes]:
+ """Get the plaintext body for a given message, if any such part exists. If only a html
+ part exists, return that instead.
+
+ If there are multiple, ambiguous plain text or html parts in the message, this
+ function will return the largest of them.
+
+ """
+ if not message.is_multipart():
+ single_part = message.get_payload(decode=True).rstrip(b"\n")
+ return single_part or None
+
+ text_parts: List[bytes] = []
+ fallback_parts: List[bytes] = []
+
+ for part in message.walk():
+ content_type = part.get_content_type()
+ content_disposition = str(part.get("Content-Disposition"))
+ if "attachment" in content_disposition:
+ continue
+ if content_type == "text/plain":
+ current_part = part.get_payload(decode=True).rstrip(b"\n")
+ if current_part:
+ text_parts.append(current_part)
+ elif not text_parts and content_type == "text/html":
+ current_part = part.get_payload(decode=True).rstrip(b"\n")
+ if current_part:
+ fallback_parts.append(current_part)
+
+ if text_parts:
+ return max(text_parts, key=len)
+
+ if fallback_parts:
+ return max(fallback_parts, key=len)
+
+ return None
diff --git a/swh/web/tests/inbound_email/__init__.py b/swh/web/tests/inbound_email/__init__.py
new file mode 100644
diff --git a/swh/web/tests/inbound_email/resources/__init__.py b/swh/web/tests/inbound_email/resources/__init__.py
new file mode 100644
diff --git a/swh/web/tests/inbound_email/resources/multipart.eml b/swh/web/tests/inbound_email/resources/multipart.eml
new file mode 100644
--- /dev/null
+++ b/swh/web/tests/inbound_email/resources/multipart.eml
@@ -0,0 +1,24 @@
+Return-Path: <test@example.com>
+X-Mailer: MessagingEngine.com Webmail Interface
+User-Agent: Cyrus-JMAP/3.7.0-alpha0-382-g88b93171a9-fm-20220330.001-g88b93171
+Mime-Version: 1.0
+Message-Id: <c78f8ea4-6dc7-4604-b0b1-be1f4aecdc7f@www.fastmail.com>
+Date: Mon, 04 Apr 2022 17:10:00 +0200
+From: "Test User" <test@example.com>
+To: test@example.com
+Subject: Multipart email
+Content-Type: multipart/alternative;
+ boundary=67575b1b68b24603a2d00f02e032c975
+
+--67575b1b68b24603a2d00f02e032c975
+Content-Type: text/plain
+
+*Multipart email.*
+
+--
+Test User
+--67575b1b68b24603a2d00f02e032c975
+Content-Type: text/html
+
+<!DOCTYPE html><html><head><title></title><style type="text/css">p.MsoNormal,p.MsoNoSpacing{margin:0}</style></head><body><div><b>Multipart email.</b><br></div><div><br></div><div>--&nbsp;<br></div><div>Test User<br></div></body></html>
+--67575b1b68b24603a2d00f02e032c975--
diff --git a/swh/web/tests/inbound_email/resources/multipart_html_only.eml b/swh/web/tests/inbound_email/resources/multipart_html_only.eml
new file mode 100644
--- /dev/null
+++ b/swh/web/tests/inbound_email/resources/multipart_html_only.eml
@@ -0,0 +1,21 @@
+Return-Path: <test@example.com>
+X-Mailer: MessagingEngine.com Webmail Interface
+User-Agent: Cyrus-JMAP/3.7.0-alpha0-382-g88b93171a9-fm-20220330.001-g88b93171
+Mime-Version: 1.0
+Message-Id: <c78f8ea4-6dc7-4604-b0b1-be1f4aecdc7f@www.fastmail.com>
+Date: Mon, 04 Apr 2022 17:10:00 +0200
+From: "Test User" <test@example.com>
+To: test@example.com
+Subject: Multipart email
+Content-Type: multipart/alternative;
+ boundary=67575b1b68b24603a2d00f02e032c975
+
+--67575b1b68b24603a2d00f02e032c975
+Content-Type: text/html
+
+<!DOCTYPE html><html><head><title></title></head><body><div><b>Multipart email (short html part).</b><br></div><div><br></div><div>--&nbsp;<br></div><div>Test User<br></div></body></html>
+--67575b1b68b24603a2d00f02e032c975
+Content-Type: text/html
+
+<!DOCTYPE html><html><head><title></title></head><body><div><b>Multipart email (a much longer html part).</b><br></div><div><br></div><div>--&nbsp;<br></div><div>Test User<br></div></body></html>
+--67575b1b68b24603a2d00f02e032c975--
diff --git a/swh/web/tests/inbound_email/resources/multipart_text_only.eml b/swh/web/tests/inbound_email/resources/multipart_text_only.eml
new file mode 100644
--- /dev/null
+++ b/swh/web/tests/inbound_email/resources/multipart_text_only.eml
@@ -0,0 +1,27 @@
+Return-Path: <test@example.com>
+X-Mailer: MessagingEngine.com Webmail Interface
+User-Agent: Cyrus-JMAP/3.7.0-alpha0-382-g88b93171a9-fm-20220330.001-g88b93171
+Mime-Version: 1.0
+Message-Id: <c78f8ea4-6dc7-4604-b0b1-be1f4aecdc7f@www.fastmail.com>
+Date: Mon, 04 Apr 2022 17:10:00 +0200
+From: "Test User" <test@example.com>
+To: test@example.com
+Subject: Multipart email
+Content-Type: multipart/alternative;
+ boundary=67575b1b68b24603a2d00f02e032c975
+
+--67575b1b68b24603a2d00f02e032c975
+Content-Type: text/plain
+
+*Multipart email.*
+
+--
+Test User
+--67575b1b68b24603a2d00f02e032c975
+Content-Type: text/plain
+
+*Multipart email, but a longer text part.*
+
+--
+Test User
+--67575b1b68b24603a2d00f02e032c975--
diff --git a/swh/web/tests/inbound_email/resources/plaintext.eml b/swh/web/tests/inbound_email/resources/plaintext.eml
new file mode 100644
--- /dev/null
+++ b/swh/web/tests/inbound_email/resources/plaintext.eml
@@ -0,0 +1,15 @@
+Return-Path: <test@example.com>
+X-Mailer: MessagingEngine.com Webmail Interface
+User-Agent: Cyrus-JMAP/3.7.0-alpha0-382-g88b93171a9-fm-20220330.001-g88b93171
+Mime-Version: 1.0
+Message-Id: <21e17bca-d6a7-40fb-bab8-5dadd939835b@www.fastmail.com>
+Date: Mon, 04 Apr 2022 17:08:04 +0200
+From: "Test User" <test@example.com>
+To: test@example.com
+Subject: Plain text email
+Content-Type: text/plain
+
+Plain text email.
+
+--
+Test User
diff --git a/swh/web/tests/inbound_email/test_utils.py b/swh/web/tests/inbound_email/test_utils.py
--- a/swh/web/tests/inbound_email/test_utils.py
+++ b/swh/web/tests/inbound_email/test_utils.py
@@ -3,8 +3,14 @@
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
+import email
from email.headerregistry import Address
from email.message import EmailMessage
+import email.policy
+from importlib.resources import open_binary
+from typing import List
+
+import pytest
from swh.web.inbound_email import utils
@@ -241,3 +247,52 @@
assert relevant_records[1].levelname == "DEBUG"
assert f"{mangled_address} failed" in relevant_records[1].getMessage()
+
+
+@pytest.mark.parametrize(
+ "filename,expected_parts,expected_absent",
+ (
+ pytest.param(
+ "plaintext.eml",
+ [b"Plain text email.\n\n-- \nTest User"],
+ [],
+ id="plaintext",
+ ),
+ pytest.param(
+ "multipart.eml",
+ [b"*Multipart email.*\n\n-- \nTest User"],
+ [],
+ id="multipart",
+ ),
+ pytest.param(
+ "multipart_html_only.eml",
+ [b"<html>", b"<b>Multipart email (a much longer html part).</b>"],
+ [b"<b>Multipart email (short html part)</b>"],
+ id="multipart_html_only",
+ ),
+ pytest.param(
+ "multipart_text_only.eml",
+ [b"*Multipart email, but a longer text part.*\n\n--\nTest User"],
+ [],
+ id="multipart_text_only",
+ ),
+ ),
+)
+def test_get_message_plaintext(
+ filename: str, expected_parts: List[bytes], expected_absent: List[bytes]
+):
+ with open_binary("swh.web.tests.inbound_email.resources", filename) as f:
+ message = email.message_from_binary_file(f, policy=email.policy.default)
+
+ assert isinstance(message, EmailMessage)
+
+ plaintext = utils.get_message_plaintext(message)
+ assert plaintext is not None
+
+ if len(expected_parts) == 1:
+ assert plaintext == expected_parts[0]
+ else:
+ for part in expected_parts:
+ assert part in plaintext
+ for part in expected_absent:
+ assert part not in plaintext

File Metadata

Mime Type
text/plain
Expires
Thu, Dec 19, 11:05 AM (3 h, 29 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3216958

Event Timeline