diff --git a/imbox/parser.py b/imbox/parser.py index 538ba5c..d13c139 100644 --- a/imbox/parser.py +++ b/imbox/parser.py @@ -121,7 +121,7 @@ def decode_content(message): def parse_email(raw_email, policy=None): if isinstance(raw_email, bytes): - raw_email = str_encode(raw_email, 'utf-8') + raw_email = str_encode(raw_email, 'utf-8', errors='ignore') if policy is not None: email_parse_kwargs = dict(policy=policy) else: diff --git a/tests/8422.msg b/tests/8422.msg new file mode 100644 index 0000000..9586608 --- /dev/null +++ b/tests/8422.msg @@ -0,0 +1,22 @@ +Delivered-To: receiver@example.com +Return-Path: +Date: Thu, 20 Jul 2017 07:34:22 -0500 +Message-ID: <59705CFE.A95F.0016.0@journeys.com> +Subject: Following up Re: Looking to connect, let's schedule a call! +From: sender@example.com +To: "Receiver" +Mime-Version: 1.0 +Content-Type: multipart/mixed; boundary="=__PartBD85995F.0__=" + +This is a MIME message. If you are reading this text, you may want to +consider changing to a mail reader or gateway that understands how to +properly handle MIME multipart messages. + +--=__PartBD85995F.0__= +Content-Type: multipart/alternative; boundary="=__PartBD85995F.1__=" + +--=__PartBD85995F.1__= +Content-Type: text/plain; charset=Windows-1252 +Content-Transfer-Encoding: 8bit + +Following up on my previous message. I’d love to connect you with diff --git a/tests/parser_tests.py b/tests/parser_tests.py index 9365123..0ac1b70 100644 --- a/tests/parser_tests.py +++ b/tests/parser_tests.py @@ -1,6 +1,7 @@ import unittest from imbox.parser import * +import os import sys if sys.version_info.minor < 3: SMTP = False @@ -8,6 +9,9 @@ from email.policy import SMTP +TEST_DIR = os.path.dirname(os.path.abspath(__file__)) + + raw_email = """Delivered-To: johndoe@gmail.com X-Originating-Email: [martin@amon.cx] Message-ID: @@ -192,6 +196,10 @@ def test_parse_email_encoded(self): self.assertEqual('Выписка ΠΏΠΎ ΠΊΠ°Ρ€Ρ‚Π΅', parsed_email.subject) self.assertEqual('Выписка ΠΏΠΎ ΠΊΠ°Ρ€Ρ‚Π΅ 1234', parsed_email.body['html'][0]) + def test_parse_email_invalid_unicode(self): + parsed_email = parse_email(open(os.path.join(TEST_DIR, '8422.msg'), 'rb').read()) + self.assertEqual("Following up Re: Looking to connect, let's schedule a call!", parsed_email.subject) + def test_parse_email_inline_body(self): parsed_email = parse_email(raw_email_encoded_another_bad_multipart) self.assertEqual("Re: Reaching Out About Peoples Home Equity", parsed_email.subject)