Skip to content

Commit

Permalink
Merge pull request #75 from bhtucker/master
Browse files Browse the repository at this point in the history
Make Parsing Policy Configurable
  • Loading branch information
martinrusev committed Dec 6, 2016
2 parents 6a8f9cd + 0965543 commit 7f23694
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 7 deletions.
1 change: 0 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
language: python
python:
- "2.7"
- "3.2"
- "3.3"
- "3.4"
Expand Down
5 changes: 3 additions & 2 deletions imbox/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,14 @@
class Imbox(object):

def __init__(self, hostname, username=None, password=None, ssl=True,
port=None, ssl_context=None):
port=None, ssl_context=None, policy=None):

self.server = ImapTransport(hostname, ssl=ssl, port=port,
ssl_context=ssl_context)
self.hostname = hostname
self.username = username
self.password = password
self.parser_policy = policy
self.connection = self.server.connect(username, password)
logger.info("Connected to IMAP Server with user {username} on {hostname}{ssl}".format(
hostname=hostname, username=username, ssl=(" over SSL" if ssl else "")))
Expand All @@ -38,7 +39,7 @@ def fetch_by_uid(self, uid):
logger.debug("Fetched message for UID {}".format(int(uid)))
raw_email = data[0][1]

email_object = parse_email(raw_email)
email_object = parse_email(raw_email, policy=self.parser_policy)

return email_object

Expand Down
11 changes: 8 additions & 3 deletions imbox/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,13 +121,18 @@ def decode_content(message):
return content


def parse_email(raw_email):
def parse_email(raw_email, policy=None):
if isinstance(raw_email, binary_type):
raw_email = str_encode(raw_email, 'utf-8')
if policy is not None:
email_parse_kwargs = dict(policy=policy)
else:
email_parse_kwargs = {}

try:
email_message = email.message_from_string(raw_email)
email_message = email.message_from_string(raw_email, **email_parse_kwargs)
except UnicodeEncodeError:
email_message = email.message_from_string(raw_email.encode('utf-8'))
email_message = email.message_from_string(raw_email.encode('utf-8'), **email_parse_kwargs)
maintype = email_message.get_content_maintype()
parsed_email = {}

Expand Down
42 changes: 41 additions & 1 deletion tests/parser_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,13 @@
import unittest
from imbox.parser import *

import sys
if sys.version_info.major < 3 or sys.version_info.minor < 3:
SMTP = False
else:
from email.policy import SMTP


raw_email = """Delivered-To: johndoe@gmail.com
X-Originating-Email: [martin@amon.cx]
Message-ID: <test0@example.com>
Expand Down Expand Up @@ -55,6 +62,25 @@
------=_Part_1295_1644105626.1458989730614--
"""

raw_email_encoded_needs_refolding = b"""Delivered-To: receiver@example.com
Return-Path: <sender@example.com>
Date: Sat, 26 Mar 2016 13:55:30 +0300 (FET)
From: sender@example.com
To: "Receiver" <receiver@example.com>, "Second\r\n Receiver" <recipient@example.com>
Message-ID: <811170233.1296.1345983710614.JavaMail.bris@BRIS-AS-NEW.site>
Subject: =?ISO-8859-5?B?suvf2OHa0CDf3iDa0ODi1Q==?=
MIME-Version: 1.0
Content-Type: multipart/mixed;
boundary="----=_Part_1295_1644105626.1458989730614"
------=_Part_1295_1644105626.1458989730614
Content-Type: text/html; charset=ISO-8859-5
Content-Transfer-Encoding: quoted-printable
=B2=EB=DF=D8=E1=DA=D0 =DF=DE =DA=D0=E0=E2=D5 1234
------=_Part_1295_1644105626.1458989730614--
"""


class TestParser(unittest.TestCase):

Expand Down Expand Up @@ -84,11 +110,25 @@ def test_parse_attachment(self):

def test_decode_mail_header(self):
pass

def test_get_mail_addresses(self):

to_message_object = email.message_from_string("To: John Doe <johndoe@gmail.com>")
self.assertEqual([{'email': 'johndoe@gmail.com', 'name': 'John Doe'}], get_mail_addresses(to_message_object, 'to'))

from_message_object = email.message_from_string("From: John Smith <johnsmith@gmail.com>")
self.assertEqual([{'email': 'johnsmith@gmail.com', 'name': 'John Smith'}], get_mail_addresses(from_message_object, 'from'))

def test_parse_email_with_policy(self):
if not SMTP:
return

message_object = email.message_from_bytes(
raw_email_encoded_needs_refolding,
policy=SMTP.clone(refold_source='all')
)

self.assertEqual([
{'email': 'receiver@example.com', 'name': 'Receiver'},
{'email': 'recipient@example.com', 'name': 'Second Receiver'}
], get_mail_addresses(message_object, 'to'))

0 comments on commit 7f23694

Please sign in to comment.