-
-
Notifications
You must be signed in to change notification settings - Fork 30.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
gh-102988: Detect email address parsing errors and return empty tuple to indicate the parsing error (old API) #102990
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -106,12 +106,42 @@ def formataddr(pair, charset='utf-8'): | |
return address | ||
|
||
|
||
def _pre_parse_validation(fieldvalues): | ||
"""Validate the field values are syntactically correct""" | ||
for v in fieldvalues: | ||
s = str(v).replace('\\(', '').replace('\\)', '') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. See later comments on |
||
if s.count('(') != s.count(')'): | ||
fieldvalues.remove(v) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this loop is modifying the apparent list it is iterating over within the loop. that makes reasoning about its exact behavior hard. removing an item could mean you wind up skipping an item, appending an item could make the loop iterate over that. furthermore remove is O(n)... you're re-finding the item to remove to remove it. Also, this code modifys the passed in list in place before returning it. A better code pattern for this is to build up a new list and return that. never modifying the input. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Something like: accepted_values = []
for v in fieldvalues:
s = v.replace('\\(', '').replace('\\)', '')
if s.count('(') != s.count(')'):
v = ('', '')
accepted_values.append(v)
return accepted_values |
||
fieldvalues.append('') | ||
|
||
return fieldvalues | ||
|
||
|
||
def _post_parse_validation(parsedvalues): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. parsed_email_address_tuples perhaps? |
||
"""Validate the parsed values are syntactically correct""" | ||
for v in parsedvalues: | ||
if '[' in v[1]: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add a comment explaining why only |
||
parsedvalues.remove(v) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same design comment as above, don't modify the list being iterated over and don't modify the argument in place. |
||
parsedvalues.append(('', '')) | ||
|
||
return parsedvalues | ||
|
||
|
||
def getaddresses(fieldvalues): | ||
"""Return a list of (REALNAME, EMAIL) for each fieldvalue.""" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add documentation mentioning that fieldvalues that could not be parsed may cause a ('', '') item to be returned in their place. |
||
fieldvalues = _pre_parse_validation(fieldvalues) | ||
all = COMMASPACE.join(str(v) for v in fieldvalues) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The existing code already calls To avoid propagating this mistake into more lines of code spread out, I suggest changing this function to do: fieldvalues = [str(v) for v in fieldvalues] on the first line and get rid of all subsequent str(v) calls on anything from fieldvalues in this function or in functions it calls. |
||
a = _AddressList(all) | ||
return a.addresslist | ||
result = _post_parse_validation(a.addresslist) | ||
|
||
n = 0 | ||
for v in fieldvalues: | ||
n += str(v).count(',') + 1 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. get rid of this str(v) |
||
|
||
if len(result) != n: | ||
return [('', '')] | ||
|
||
return result | ||
|
||
|
||
def _format_timetuple_and_zone(timetuple, zone): | ||
|
@@ -212,9 +242,18 @@ def parseaddr(addr): | |
Return a tuple of realname and email address, unless the parse fails, in | ||
which case return a 2-tuple of ('', ''). | ||
""" | ||
addrs = _AddressList(addr).addresslist | ||
if not addrs: | ||
return '', '' | ||
if type(addr) == list: | ||
addr = addr[0] | ||
|
||
if type(addr) != str: | ||
return ('', '') | ||
tdwyer marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
addr = _pre_parse_validation([addr])[0] | ||
addrs = _post_parse_validation(_AddressList(addr).addresslist) | ||
|
||
if not addrs or len(addrs) > 1: | ||
return ('', '') | ||
|
||
return addrs[0] | ||
|
||
|
||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -3319,15 +3319,90 @@ def test_getaddresses(self): | |||||
[('Al Person', 'aperson@dom.ain'), | ||||||
('Bud Person', 'bperson@dom.ain')]) | ||||||
|
||||||
def test_getaddresses_parsing_errors(self): | ||||||
"""Test for parsing errors from CVE-2023-27043""" | ||||||
eq = self.assertEqual | ||||||
eq(utils.getaddresses(['alice@example.org(<bob@example.com>']), | ||||||
[('' ,'')]) | ||||||
tdwyer marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
eq(utils.getaddresses(['alice@example.org)<bob@example.com>']), | ||||||
[('' ,'')]) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
eq(utils.getaddresses(['alice@example.org<<bob@example.com>']), | ||||||
[('' ,'')]) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
eq(utils.getaddresses(['alice@example.org><bob@example.com>']), | ||||||
[('' ,'')]) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
eq(utils.getaddresses(['alice@example.org@<bob@example.com>']), | ||||||
[('' ,'')]) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
eq(utils.getaddresses(['alice@example.org,<bob@example.com>']), | ||||||
[('', 'alice@example.org'), ('', 'bob@example.com')]) | ||||||
eq(utils.getaddresses(['alice@example.org;<bob@example.com>']), | ||||||
[('' ,'')]) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
eq(utils.getaddresses(['alice@example.org:<bob@example.com>']), | ||||||
[('' ,'')]) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
eq(utils.getaddresses(['alice@example.org.<bob@example.com>']), | ||||||
[('' ,'')]) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
eq(utils.getaddresses(['alice@example.org"<bob@example.com>']), | ||||||
[('' ,'')]) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
eq(utils.getaddresses(['alice@example.org[<bob@example.com>']), | ||||||
[('' ,'')]) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
eq(utils.getaddresses(['alice@example.org]<bob@example.com>']), | ||||||
[('' ,'')]) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
|
||||||
def test_parseaddr_parsing_errors(self): | ||||||
"""Test for parsing errors from CVE-2023-27043""" | ||||||
eq = self.assertEqual | ||||||
eq(utils.parseaddr(['alice@example.org(<bob@example.com>']), | ||||||
('' ,'')) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
eq(utils.parseaddr(['alice@example.org)<bob@example.com>']), | ||||||
('' ,'')) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
eq(utils.parseaddr(['alice@example.org<<bob@example.com>']), | ||||||
('' ,'')) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
eq(utils.parseaddr(['alice@example.org><bob@example.com>']), | ||||||
('' ,'')) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
eq(utils.parseaddr(['alice@example.org@<bob@example.com>']), | ||||||
('' ,'')) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
eq(utils.parseaddr(['alice@example.org,<bob@example.com>']), | ||||||
('' ,'')) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
eq(utils.parseaddr(['alice@example.org;<bob@example.com>']), | ||||||
('' ,'')) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
eq(utils.parseaddr(['alice@example.org:<bob@example.com>']), | ||||||
('' ,'')) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
eq(utils.parseaddr(['alice@example.org.<bob@example.com>']), | ||||||
('' ,'')) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
eq(utils.parseaddr(['alice@example.org"<bob@example.com>']), | ||||||
('' ,'')) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
eq(utils.parseaddr(['alice@example.org[<bob@example.com>']), | ||||||
('' ,'')) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
eq(utils.parseaddr(['alice@example.org]<bob@example.com>']), | ||||||
('' ,'')) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
|
||||||
def test_getaddresses_nasty(self): | ||||||
eq = self.assertEqual | ||||||
eq(utils.getaddresses(['foo: ;']), [('', '')]) | ||||||
eq(utils.getaddresses( | ||||||
['[]*-- =~$']), | ||||||
[('', ''), ('', ''), ('', '*--')]) | ||||||
eq(utils.getaddresses(['[]*-- =~$']), [('', '')]) | ||||||
eq(utils.getaddresses( | ||||||
['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']), | ||||||
[('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]) | ||||||
eq(utils.getaddresses( | ||||||
['Pete(A nice \) chap) <pete(his account)@silly.test(his host)>']), | ||||||
[('Pete (A nice ) chap his account his host)', 'pete@silly.test')]) | ||||||
eq(utils.getaddresses( | ||||||
['(Empty list)(start)Undisclosed recipients :(nobody(I know))']), | ||||||
[('', '')]) | ||||||
eq(utils.getaddresses( | ||||||
['Mary <@machine.tld:mary@example.net>, , jdoe@test . example']), | ||||||
[('Mary', 'mary@example.net'), ('', ''), ('', 'jdoe@test.example')]) | ||||||
eq(utils.getaddresses( | ||||||
['John Doe <jdoe@machine(comment). example>']), | ||||||
[('John Doe (comment)', 'jdoe@machine.example')]) | ||||||
eq(utils.getaddresses( | ||||||
['"Mary Smith: Personal Account" <smith@home.example>']), | ||||||
[('Mary Smith: Personal Account', 'smith@home.example')]) | ||||||
eq(utils.getaddresses( | ||||||
['Undisclosed recipients:;']), | ||||||
[('', '')]) | ||||||
eq(utils.getaddresses( | ||||||
['<boss@nil.test>, "Giant; \"Big\" Box" <bob@example.net>']), | ||||||
[('', 'boss@nil.test'), ('Giant; Big Box', 'bob@example.net')]) | ||||||
|
||||||
def test_getaddresses_embedded_comment(self): | ||||||
"""Test proper handling of a nested comment""" | ||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
the name "fieldvalues" is non-specific. i realize it comes from the name in getaddress() but we should make it more clear what these are. email_address_fields perhaps?
Related: I don't this docstring adds meaningful value. naming the function and parameter right along with it being short code is sufficiently self explanatory for this internal function. get rid of the docstring.