Skip to content

Commit

Permalink
Merge pull request #231 from sangkaka/email_parse
Browse files Browse the repository at this point in the history
Should first get content charset then str_encode with charset.
  • Loading branch information
martinrusev committed Sep 24, 2022
2 parents faf5502 + 5018ccf commit d78ba65
Showing 1 changed file with 13 additions and 7 deletions.
20 changes: 13 additions & 7 deletions imbox/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,19 +198,25 @@ def parse_flags(headers):


def parse_email(raw_email, policy=None):
if isinstance(raw_email, bytes):
raw_email = str_encode(raw_email, 'utf-8', errors='ignore')
if policy is not None:
email_parse_kwargs = dict(policy=policy)
else:
email_parse_kwargs = {}

try:
email_message = email.message_from_string(
# Should first get content charset then str_encode with charset.
if isinstance(raw_email, bytes):
email_message = email.message_from_bytes(
raw_email, **email_parse_kwargs)
except UnicodeEncodeError:
email_message = email.message_from_string(
raw_email.encode('utf-8'), **email_parse_kwargs)
charset = email_message.get_content_charset('utf-8')
raw_email = str_encode(raw_email, charset, errors='ignore')
else:
try:
email_message = email.message_from_string(
raw_email, **email_parse_kwargs)
except UnicodeEncodeError:
email_message = email.message_from_string(
raw_email.encode('utf-8'), **email_parse_kwargs)

maintype = email_message.get_content_maintype()
parsed_email = {'raw_email': raw_email}

Expand Down

0 comments on commit d78ba65

Please sign in to comment.