This repository has been archived by the owner on Oct 2, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 43
Support Unicode international characters #151
Merged
Merged
Changes from 12 commits
Commits
Show all changes
15 commits
Select commit
Hold shift + click to select a range
b4566ba
Support Unicode international characters
WesTyler 7c7901f
Disable linting for rewire "let" workaround
WesTyler 0ab6884
Add inline comment for eslint ignore on prefer-const rule
3487199
Expand test coverage of Unicode cases
WesTyler fbd9f1c
Utilize array.fill; do not pin dependency at the patch level
WesTyler 86c3194
Specify C0 and C1 charCode ranges to allow unicode
WesTyler 79e2261
Replace rewire with proxyquire
WesTyler 24f78ef
Remove "let" and eslint override introduced by rewire useage
WesTyler 996a71e
Add normalization to email addresses
WesTyler e836377
Monkey-patch V8 NUL normalize bug for Node 4.x
WesTyler 10bb502
Utilize indexOf instead of regex match for speed
WesTyler 38039b8
Add unicode length tests
skeggse 599a245
Check for surrogate pairs in token iteration
WesTyler 1fdcf9e
Check Buffer.byteLength instead of string.length
WesTyler 1d20f07
Deprecate internals.checkSurrogatePair
WesTyler File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,7 +3,7 @@ | |
// Load modules | ||
|
||
const Dns = require('dns'); | ||
|
||
const Punycode = require('punycode'); | ||
|
||
// Declare internals | ||
|
||
|
@@ -127,12 +127,44 @@ internals.specials = function () { | |
|
||
const specials = '()<>[]:;@\\,."'; // US-ASCII visible characters not valid for atext (http://tools.ietf.org/html/rfc5322#section-3.2.3) | ||
const lookup = new Array(0x100); | ||
for (let i = 0xff; i >= 0; --i) { | ||
lookup[i] = false; | ||
} | ||
lookup.fill(false); | ||
|
||
for (let i = 0; i < specials.length; ++i) { | ||
lookup[specials.charCodeAt(i)] = true; | ||
lookup[specials.codePointAt(i)] = true; | ||
} | ||
|
||
return function (code) { | ||
|
||
return lookup[code]; | ||
}; | ||
}(); | ||
|
||
internals.c0Controls = function () { | ||
|
||
const lookup = new Array(0x100); | ||
lookup.fill(false); | ||
|
||
// add C0 control characters | ||
|
||
for (let i = 0; i < 33; ++i) { | ||
lookup[i] = true; | ||
} | ||
|
||
return function (code) { | ||
|
||
return lookup[code]; | ||
}; | ||
}(); | ||
|
||
internals.c1Controls = function () { | ||
|
||
const lookup = new Array(0x100); | ||
lookup.fill(false); | ||
|
||
// add C1 control characters | ||
|
||
for (let i = 127; i < 160; ++i) { | ||
lookup[i] = true; | ||
} | ||
|
||
return function (code) { | ||
|
@@ -147,6 +179,19 @@ internals.regex = { | |
ipV6: /^[a-fA-F\d]{0,4}$/ | ||
}; | ||
|
||
// $lab:coverage:off$ | ||
internals.nulNormalize = function (email) { | ||
|
||
let emailPieces = email.split('\u0000'); | ||
emailPieces = emailPieces.map((string) => { | ||
|
||
return string.normalize('NFC'); | ||
}); | ||
|
||
return emailPieces.join('\u0000'); | ||
}; | ||
// $lab:coverage:on$ | ||
|
||
|
||
internals.checkIpV6 = function (items) { | ||
|
||
|
@@ -173,7 +218,7 @@ internals.validDomain = function (tldAtom, options) { | |
|
||
|
||
/** | ||
* Check that an email address conforms to RFCs 5321, 5322 and others | ||
* Check that an email address conforms to RFCs 5321, 5322, 6530 and others | ||
* | ||
* We distinguish clearly between a Mailbox as defined by RFC 5321 and an | ||
* addr-spec as defined by RFC 5322. Depending on the context, either can be | ||
|
@@ -197,6 +242,7 @@ internals.validDomain = function (tldAtom, options) { | |
exports.validate = internals.validate = function (email, options, callback) { | ||
|
||
options = options || {}; | ||
email = internals.normalize(email); | ||
|
||
if (typeof options === 'function') { | ||
callback = options; | ||
|
@@ -462,10 +508,10 @@ exports.validate = internals.validate = function (email, options, callback) { | |
} | ||
else { | ||
context.prev = context.now; | ||
charCode = token.charCodeAt(0); | ||
charCode = token.codePointAt(0); | ||
|
||
// Especially if charCode == 10 | ||
if (charCode < 33 || charCode > 126 || internals.specials(charCode)) { | ||
if (internals.specials(charCode) || internals.c0Controls(charCode) || internals.c1Controls(charCode)) { | ||
|
||
// Fatal error | ||
updateResult(internals.diagnoses.errExpectingATEXT); | ||
|
@@ -660,11 +706,11 @@ exports.validate = internals.validate = function (email, options, callback) { | |
} | ||
} | ||
|
||
charCode = token.charCodeAt(0); | ||
charCode = token.codePointAt(0); | ||
// Assume this token isn't a hyphen unless we discover it is | ||
hyphenFlag = false; | ||
|
||
if (charCode < 33 || charCode > 126 || internals.specials(charCode)) { | ||
if (internals.specials(charCode) || internals.c0Controls(charCode) || internals.c1Controls(charCode)) { | ||
// Fatal error | ||
updateResult(internals.diagnoses.errExpectingATEXT); | ||
} | ||
|
@@ -676,8 +722,8 @@ exports.validate = internals.validate = function (email, options, callback) { | |
|
||
hyphenFlag = true; | ||
} | ||
// Check if it's a neither a number nor a latin letter | ||
else if (charCode < 48 || charCode > 122 || (charCode > 57 && charCode < 65) || (charCode > 90 && charCode < 97)) { | ||
// Check if it's a neither a number nor a latin/unicode letter | ||
else if (charCode < 48 || (charCode > 122 && charCode < 192) || (charCode > 57 && charCode < 65) || (charCode > 90 && charCode < 97)) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @WesTyler I'm reading through some of this code as I refactor, and now I find myself asking where There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, I believe 192 is the beginning of the Unicode "international" character set (À). If I remember correctly, Unicode #s 123-191 are all non-character symbols. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ahh, ok. Thanks! |
||
// This is not an RFC 5321 subdomain, but still OK by RFC 5322 | ||
updateResult(internals.diagnoses.rfc5322Domain); | ||
} | ||
|
@@ -864,15 +910,15 @@ exports.validate = internals.validate = function (email, options, callback) { | |
// %d12 / ; include the carriage | ||
// %d14-31 / ; return, line feed, and | ||
// %d127 ; white space characters | ||
charCode = token.charCodeAt(0); | ||
charCode = token.codePointAt(0); | ||
|
||
// '\r', '\n', ' ', and '\t' have already been parsed above | ||
if (charCode > 127 || charCode === 0 || token === '[') { | ||
if ((charCode !== 127 && internals.c1Controls(charCode)) || charCode === 0 || token === '[') { | ||
// Fatal error | ||
updateResult(internals.diagnoses.errExpectingDTEXT); | ||
break; | ||
} | ||
else if (charCode < 33 || charCode === 127) { | ||
else if (internals.c0Controls(charCode) || charCode === 127) { | ||
updateResult(internals.diagnoses.rfc5322DomainLiteralOBSDText); | ||
} | ||
|
||
|
@@ -954,12 +1000,12 @@ exports.validate = internals.validate = function (email, options, callback) { | |
// %d12 / ; include the carriage | ||
// %d14-31 / ; return, line feed, and | ||
// %d127 ; white space characters | ||
charCode = token.charCodeAt(0); | ||
charCode = token.codePointAt(0); | ||
|
||
if (charCode > 127 || charCode === 0 || charCode === 10) { | ||
if ((charCode !== 127 && internals.c1Controls(charCode)) || charCode === 0 || charCode === 10) { | ||
updateResult(internals.diagnoses.errExpectingQTEXT); | ||
} | ||
else if (charCode < 32 || charCode === 127) { | ||
else if (internals.c0Controls(charCode) || charCode === 127) { | ||
updateResult(internals.diagnoses.deprecatedQTEXT); | ||
} | ||
|
||
|
@@ -992,9 +1038,9 @@ exports.validate = internals.validate = function (email, options, callback) { | |
// %d127 ; white space characters | ||
// | ||
// i.e. obs-qp = "\" (%d0-8, %d10-31 / %d127) | ||
charCode = token.charCodeAt(0); | ||
charCode = token.codePointAt(0); | ||
|
||
if (charCode > 127) { | ||
if (charCode !== 127 && internals.c1Controls(charCode)) { | ||
// Fatal error | ||
updateResult(internals.diagnoses.errExpectingQPair); | ||
} | ||
|
@@ -1099,14 +1145,14 @@ exports.validate = internals.validate = function (email, options, callback) { | |
// %d12 / ; include the carriage | ||
// %d14-31 / ; return, line feed, and | ||
// %d127 ; white space characters | ||
charCode = token.charCodeAt(0); | ||
charCode = token.codePointAt(0); | ||
|
||
if (charCode > 127 || charCode === 0 || charCode === 10) { | ||
if (charCode === 0 || charCode === 10 || (charCode !== 127 && internals.c1Controls(charCode))) { | ||
// Fatal error | ||
updateResult(internals.diagnoses.errExpectingCTEXT); | ||
break; | ||
} | ||
else if (charCode < 32 || charCode === 127) { | ||
else if (internals.c0Controls(charCode) || charCode === 127) { | ||
updateResult(internals.diagnoses.deprecatedCTEXT); | ||
} | ||
} | ||
|
@@ -1266,7 +1312,7 @@ exports.validate = internals.validate = function (email, options, callback) { | |
|
||
if (!dnsPositive && maxResult < internals.categories.dnsWarn) { | ||
// Per RFC 5321, domain atoms are limited to letter-digit-hyphen, so we only need to check code <= 57 to check for a digit | ||
const code = atomData.domains[elementCount].charCodeAt(0); | ||
const code = atomData.domains[elementCount].codePointAt(0); | ||
if (code <= 57) { | ||
updateResult(internals.diagnoses.rfc5321TLDNumeric); | ||
} | ||
|
@@ -1311,7 +1357,7 @@ exports.validate = internals.validate = function (email, options, callback) { | |
parseData.domain += '.'; | ||
} | ||
|
||
const dnsDomain = parseData.domain; | ||
const dnsDomain = Punycode.toASCII(parseData.domain); | ||
Dns.resolveMx(dnsDomain, (err, mxRecords) => { | ||
|
||
// If we have a fatal error, then we must assume that there are no records | ||
|
@@ -1376,3 +1422,16 @@ exports.diagnoses = internals.validate.diagnoses = (function () { | |
|
||
return diag; | ||
})(); | ||
|
||
|
||
exports.normalize = internals.normalize = function (email) { | ||
|
||
// $lab:coverage:off$ | ||
if (process.version[1] === '4' && email.indexOf('\u0000') >= 0) { | ||
return internals.nulNormalize(email); | ||
} | ||
// $lab:coverage:on$ | ||
|
||
|
||
return email.normalize('NFC'); | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Since this package only supports node 4+, you can probably use
lookup.fill
to initialize it.