Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Base64 decode param and recognize single file mails as attachment #224

Merged
merged 3 commits into from
Jul 1, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions imbox/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ def decode_param(param):
if missing_padding:
value += b"=" * (4 - missing_padding)

value = base64.b64decode(value)

value = str_encode(value, encoding)

value_results.append(value)
Expand Down Expand Up @@ -240,6 +242,12 @@ def parse_email(raw_email, policy=None):
payload = decode_content(email_message)
body['plain'].append(payload)

elif maintype == 'application':
if email_message.get_content_subtype() == 'pdf':
attachment = parse_attachment(email_message)
if attachment:
attachments.append(attachment)

parsed_email['attachments'] = attachments

parsed_email['body'] = body
Expand Down
42 changes: 42 additions & 0 deletions tests/parser_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -366,6 +366,39 @@
------=_NextPart_000_0038_01D3F85C.02934C4A--
"""

raw_email_attachment_only = """Delivered-To: johndoe@gmail.com
X-Originating-Email: [martin@amon.cx]
Message-ID: <test1@example.com>
Return-Path: martin@amon.cx
Date: Tue, 30 Jul 2013 15:56:29 +0300
From: Martin Rusev <martin@amon.cx>
MIME-Version: 1.0
To: John Doe <johndoe@gmail.com>
Subject: Test email - only pdf in body
Content-Type: application/pdf;
name="=?utf-8?B?YV9sb25nX2ZpbGVuYW1lX3dpdGhfc3BlY2lhbF9jaGFyX8O2w6Rf?=
=?utf-8?B?LTAxX28ucGRm?="
Content-Transfer-Encoding: base64
Content-Disposition: attachment;
filename="=?utf-8?B?YV9sb25nX2ZpbGVuYW1lX3dpdGhfc3BlY2lhbF9jaGFyX8O2w6Rf?=
=?utf-8?B?LTAxX28ucGRm?="

JVBERi0xLjQKJcOiw6PDj8OTCjUgMCBvYmoKPDwKL0xlbmd0aCAxCj4+CnN0cmVhbQogCmVuZHN0
cmVhbQplbmRvYmoKNCAwIG9iago8PAovVHlwZSAvUGFnZQovTWVkaWFCb3ggWzAgMCA2MTIgNzky
XQovUmVzb3VyY2VzIDw8Cj4+Ci9Db250ZW50cyA1IDAgUgovUGFyZW50IDIgMCBSCj4+CmVuZG9i
agoyIDAgb2JqCjw8Ci9UeXBlIC9QYWdlcwovS2lkcyBbNCAwIFJdCi9Db3VudCAxCj4+CmVuZG9i
agoxIDAgb2JqCjw8Ci9UeXBlIC9DYXRhbG9nCi9QYWdlcyAyIDAgUgo+PgplbmRvYmoKMyAwIG9i
ago8PAovQ3JlYXRvciAoUERGIENyZWF0b3IgaHR0cDovL3d3dy5wZGYtdG9vbHMuY29tKQovQ3Jl
YXRpb25EYXRlIChEOjIwMTUwNzAxMTEyNDQ3KzAyJzAwJykKL01vZERhdGUgKEQ6MjAyMjA2MDcx
ODM2MDIrMDInMDAnKQovUHJvZHVjZXIgKDMtSGVpZ2h0c1wyMjIgUERGIE9wdGltaXphdGlvbiBT
aGVsbCA2LjAuMC4wIFwoaHR0cDovL3d3dy5wZGYtdG9vbHMuY29tXCkpCj4+CmVuZG9iagp4cmVm
CjAgNgowMDAwMDAwMDAwIDY1NTM1IGYKMDAwMDAwMDIyNiAwMDAwMCBuCjAwMDAwMDAxNjkgMDAw
MDAgbgowMDAwMDAwMjc1IDAwMDAwIG4KMDAwMDAwMDA2NSAwMDAwMCBuCjAwMDAwMDAwMTUgMDAw
MDAgbgp0cmFpbGVyCjw8Ci9TaXplIDYKL1Jvb3QgMSAwIFIKL0luZm8gMyAwIFIKL0lEIFs8MUMz
NTAwQ0E5RjcyMzJCOTdFMEVGM0Y3ODlFOEI3RjI+IDwyNTRDOEQxNTNGNjU1RDQ5OTQ1RUFENjhE
ODAxRTAxMT5dCj4+CnN0YXJ0eHJlZgo1MDUKJSVFT0Y=
"""

class TestParser(unittest.TestCase):

def test_parse_email(self):
Expand Down Expand Up @@ -423,6 +456,15 @@ def test_parse_attachment_with_long_filename(self):
self.assertEqual(71, attachment['size'])
self.assertEqual('abcefghijklmnopqrstuvwxyz01234567890abcefghijklmnopqrstuvwxyz01234567890abcefghijklmnopqrstuvwxyz01234567890.xyz', attachment['filename'])
self.assertTrue(attachment['content'])

def test_parse_email_single_attachment(self):
parsed_email = parse_email(raw_email_attachment_only)
self.assertEqual(1, len(parsed_email.attachments))
attachment = parsed_email.attachments[0]
self.assertEqual('application/pdf', attachment['content-type'])
self.assertEqual(773, attachment['size'])
self.assertEqual('a_long_filename_with_special_char_öä_-01_o.pdf', attachment['filename'])
self.assertTrue(attachment['content'])

def test_parse_email_accept_if_declared_charset_contains_a_minus_character(self):
parsed_email = parse_email(raw_email_encoded_encoding_charset_contains_a_minus)
Expand Down