Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make Parsing Policy Configurable #75

Merged
merged 2 commits into from
Dec 6, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
language: python
python:
- "2.7"
- "3.2"
- "3.3"
- "3.4"
Expand Down
5 changes: 3 additions & 2 deletions imbox/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,14 @@
class Imbox(object):

def __init__(self, hostname, username=None, password=None, ssl=True,
port=None, ssl_context=None):
port=None, ssl_context=None, policy=None):

self.server = ImapTransport(hostname, ssl=ssl, port=port,
ssl_context=ssl_context)
self.hostname = hostname
self.username = username
self.password = password
self.parser_policy = policy
self.connection = self.server.connect(username, password)
logger.info("Connected to IMAP Server with user {username} on {hostname}{ssl}".format(
hostname=hostname, username=username, ssl=(" over SSL" if ssl else "")))
Expand All @@ -38,7 +39,7 @@ def fetch_by_uid(self, uid):
logger.debug("Fetched message for UID {}".format(int(uid)))
raw_email = data[0][1]

email_object = parse_email(raw_email)
email_object = parse_email(raw_email, policy=self.parser_policy)

return email_object

Expand Down
11 changes: 8 additions & 3 deletions imbox/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,13 +121,18 @@ def decode_content(message):
return content


def parse_email(raw_email):
def parse_email(raw_email, policy=None):
if isinstance(raw_email, binary_type):
raw_email = str_encode(raw_email, 'utf-8')
if policy is not None:
email_parse_kwargs = dict(policy=policy)
else:
email_parse_kwargs = {}

try:
email_message = email.message_from_string(raw_email)
email_message = email.message_from_string(raw_email, **email_parse_kwargs)
except UnicodeEncodeError:
email_message = email.message_from_string(raw_email.encode('utf-8'))
email_message = email.message_from_string(raw_email.encode('utf-8'), **email_parse_kwargs)
maintype = email_message.get_content_maintype()
parsed_email = {}

Expand Down
42 changes: 41 additions & 1 deletion tests/parser_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,13 @@
import unittest
from imbox.parser import *

import sys
if sys.version_info.major < 3 or sys.version_info.minor < 3:
SMTP = False
else:
from email.policy import SMTP


raw_email = """Delivered-To: johndoe@gmail.com
X-Originating-Email: [martin@amon.cx]
Message-ID: <test0@example.com>
Expand Down Expand Up @@ -55,6 +62,25 @@
------=_Part_1295_1644105626.1458989730614--
"""

raw_email_encoded_needs_refolding = b"""Delivered-To: receiver@example.com
Return-Path: <sender@example.com>
Date: Sat, 26 Mar 2016 13:55:30 +0300 (FET)
From: sender@example.com
To: "Receiver" <receiver@example.com>, "Second\r\n Receiver" <recipient@example.com>
Message-ID: <811170233.1296.1345983710614.JavaMail.bris@BRIS-AS-NEW.site>
Subject: =?ISO-8859-5?B?suvf2OHa0CDf3iDa0ODi1Q==?=
MIME-Version: 1.0
Content-Type: multipart/mixed;
boundary="----=_Part_1295_1644105626.1458989730614"

------=_Part_1295_1644105626.1458989730614
Content-Type: text/html; charset=ISO-8859-5
Content-Transfer-Encoding: quoted-printable

=B2=EB=DF=D8=E1=DA=D0 =DF=DE =DA=D0=E0=E2=D5 1234
------=_Part_1295_1644105626.1458989730614--
"""


class TestParser(unittest.TestCase):

Expand Down Expand Up @@ -84,11 +110,25 @@ def test_parse_attachment(self):

def test_decode_mail_header(self):
pass

def test_get_mail_addresses(self):

to_message_object = email.message_from_string("To: John Doe <johndoe@gmail.com>")
self.assertEqual([{'email': 'johndoe@gmail.com', 'name': 'John Doe'}], get_mail_addresses(to_message_object, 'to'))

from_message_object = email.message_from_string("From: John Smith <johnsmith@gmail.com>")
self.assertEqual([{'email': 'johnsmith@gmail.com', 'name': 'John Smith'}], get_mail_addresses(from_message_object, 'from'))

def test_parse_email_with_policy(self):
if not SMTP:
return

message_object = email.message_from_bytes(
raw_email_encoded_needs_refolding,
policy=SMTP.clone(refold_source='all')
)

self.assertEqual([
{'email': 'receiver@example.com', 'name': 'Receiver'},
{'email': 'recipient@example.com', 'name': 'Second Receiver'}
], get_mail_addresses(message_object, 'to'))