Skip to content

Commit

Permalink
Merge pull request #96 from GetHappie/unicode-decode-fix
Browse files Browse the repository at this point in the history
Fix UnicecodeDecodeError parsing email
  • Loading branch information
martinrusev committed Sep 28, 2017
2 parents 2ed7284 + 7dad0ed commit 7c5a639
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 1 deletion.
2 changes: 1 addition & 1 deletion imbox/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def decode_content(message):

def parse_email(raw_email, policy=None):
if isinstance(raw_email, bytes):
raw_email = str_encode(raw_email, 'utf-8')
raw_email = str_encode(raw_email, 'utf-8', errors='ignore')
if policy is not None:
email_parse_kwargs = dict(policy=policy)
else:
Expand Down
22 changes: 22 additions & 0 deletions tests/8422.msg
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
Delivered-To: receiver@example.com
Return-Path: <sender@example.com>
Date: Thu, 20 Jul 2017 07:34:22 -0500
Message-ID: <59705CFE.A95F.0016.0@journeys.com>
Subject: Following up Re: Looking to connect, let's schedule a call!
From: sender@example.com
To: "Receiver" <receiver@example.com>
Mime-Version: 1.0
Content-Type: multipart/mixed; boundary="=__PartBD85995F.0__="

This is a MIME message. If you are reading this text, you may want to
consider changing to a mail reader or gateway that understands how to
properly handle MIME multipart messages.

--=__PartBD85995F.0__=
Content-Type: multipart/alternative; boundary="=__PartBD85995F.1__="

--=__PartBD85995F.1__=
Content-Type: text/plain; charset=Windows-1252
Content-Transfer-Encoding: 8bit

Following up on my previous message. I�d love to connect you with
8 changes: 8 additions & 0 deletions tests/parser_tests.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
import unittest
from imbox.parser import *

import os
import sys
if sys.version_info.minor < 3:
SMTP = False
else:
from email.policy import SMTP


TEST_DIR = os.path.dirname(os.path.abspath(__file__))


raw_email = """Delivered-To: johndoe@gmail.com
X-Originating-Email: [martin@amon.cx]
Message-ID: <test0@example.com>
Expand Down Expand Up @@ -192,6 +196,10 @@ def test_parse_email_encoded(self):
self.assertEqual('Выписка по карте', parsed_email.subject)
self.assertEqual('Выписка по карте 1234', parsed_email.body['html'][0])

def test_parse_email_invalid_unicode(self):
parsed_email = parse_email(open(os.path.join(TEST_DIR, '8422.msg'), 'rb').read())
self.assertEqual("Following up Re: Looking to connect, let's schedule a call!", parsed_email.subject)

def test_parse_email_inline_body(self):
parsed_email = parse_email(raw_email_encoded_another_bad_multipart)
self.assertEqual("Re: Reaching Out About Peoples Home Equity", parsed_email.subject)
Expand Down

0 comments on commit 7c5a639

Please sign in to comment.