Skip to content
This repository has been archived by the owner on Oct 2, 2024. It is now read-only.

Support Unicode international characters #151

Merged
merged 15 commits into from
Feb 15, 2017
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 84 additions & 25 deletions lib/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
// Load modules

const Dns = require('dns');

const Punycode = require('punycode');

// Declare internals

Expand Down Expand Up @@ -127,12 +127,44 @@ internals.specials = function () {

const specials = '()<>[]:;@\\,."'; // US-ASCII visible characters not valid for atext (http://tools.ietf.org/html/rfc5322#section-3.2.3)
const lookup = new Array(0x100);
for (let i = 0xff; i >= 0; --i) {
lookup[i] = false;
}
lookup.fill(false);

for (let i = 0; i < specials.length; ++i) {
lookup[specials.charCodeAt(i)] = true;
lookup[specials.codePointAt(i)] = true;
}

return function (code) {

return lookup[code];
};
}();

internals.c0Controls = function () {

const lookup = new Array(0x100);
lookup.fill(false);

// add C0 control characters

for (let i = 0; i < 33; ++i) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since this package only supports node 4+, you can probably use lookup.fill to initialize it.

lookup[i] = true;
}

return function (code) {

return lookup[code];
};
}();

internals.c1Controls = function () {

const lookup = new Array(0x100);
lookup.fill(false);

// add C1 control characters

for (let i = 127; i < 160; ++i) {
lookup[i] = true;
}

return function (code) {
Expand All @@ -147,6 +179,19 @@ internals.regex = {
ipV6: /^[a-fA-F\d]{0,4}$/
};

// $lab:coverage:off$
internals.nulNormalize = function (email) {

let emailPieces = email.split('\u0000');
emailPieces = emailPieces.map((string) => {

return string.normalize('NFC');
});

return emailPieces.join('\u0000');
};
// $lab:coverage:on$


internals.checkIpV6 = function (items) {

Expand All @@ -173,7 +218,7 @@ internals.validDomain = function (tldAtom, options) {


/**
* Check that an email address conforms to RFCs 5321, 5322 and others
* Check that an email address conforms to RFCs 5321, 5322, 6530 and others
*
* We distinguish clearly between a Mailbox as defined by RFC 5321 and an
* addr-spec as defined by RFC 5322. Depending on the context, either can be
Expand All @@ -197,6 +242,7 @@ internals.validDomain = function (tldAtom, options) {
exports.validate = internals.validate = function (email, options, callback) {

options = options || {};
email = internals.normalize(email);

if (typeof options === 'function') {
callback = options;
Expand Down Expand Up @@ -462,10 +508,10 @@ exports.validate = internals.validate = function (email, options, callback) {
}
else {
context.prev = context.now;
charCode = token.charCodeAt(0);
charCode = token.codePointAt(0);

// Especially if charCode == 10
if (charCode < 33 || charCode > 126 || internals.specials(charCode)) {
if (internals.specials(charCode) || internals.c0Controls(charCode) || internals.c1Controls(charCode)) {

// Fatal error
updateResult(internals.diagnoses.errExpectingATEXT);
Expand Down Expand Up @@ -660,11 +706,11 @@ exports.validate = internals.validate = function (email, options, callback) {
}
}

charCode = token.charCodeAt(0);
charCode = token.codePointAt(0);
// Assume this token isn't a hyphen unless we discover it is
hyphenFlag = false;

if (charCode < 33 || charCode > 126 || internals.specials(charCode)) {
if (internals.specials(charCode) || internals.c0Controls(charCode) || internals.c1Controls(charCode)) {
// Fatal error
updateResult(internals.diagnoses.errExpectingATEXT);
}
Expand All @@ -676,8 +722,8 @@ exports.validate = internals.validate = function (email, options, callback) {

hyphenFlag = true;
}
// Check if it's a neither a number nor a latin letter
else if (charCode < 48 || charCode > 122 || (charCode > 57 && charCode < 65) || (charCode > 90 && charCode < 97)) {
// Check if it's a neither a number nor a latin/unicode letter
else if (charCode < 48 || (charCode > 122 && charCode < 192) || (charCode > 57 && charCode < 65) || (charCode > 90 && charCode < 97)) {
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@WesTyler I'm reading through some of this code as I refactor, and now I find myself asking where 192 came from. You wouldn't happen to recall, would you?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I believe 192 is the beginning of the Unicode "international" character set (À).

If I remember correctly, Unicode #s 123-191 are all non-character symbols.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ahh, ok. Thanks!

// This is not an RFC 5321 subdomain, but still OK by RFC 5322
updateResult(internals.diagnoses.rfc5322Domain);
}
Expand Down Expand Up @@ -864,15 +910,15 @@ exports.validate = internals.validate = function (email, options, callback) {
// %d12 / ; include the carriage
// %d14-31 / ; return, line feed, and
// %d127 ; white space characters
charCode = token.charCodeAt(0);
charCode = token.codePointAt(0);

// '\r', '\n', ' ', and '\t' have already been parsed above
if (charCode > 127 || charCode === 0 || token === '[') {
if ((charCode !== 127 && internals.c1Controls(charCode)) || charCode === 0 || token === '[') {
// Fatal error
updateResult(internals.diagnoses.errExpectingDTEXT);
break;
}
else if (charCode < 33 || charCode === 127) {
else if (internals.c0Controls(charCode) || charCode === 127) {
updateResult(internals.diagnoses.rfc5322DomainLiteralOBSDText);
}

Expand Down Expand Up @@ -954,12 +1000,12 @@ exports.validate = internals.validate = function (email, options, callback) {
// %d12 / ; include the carriage
// %d14-31 / ; return, line feed, and
// %d127 ; white space characters
charCode = token.charCodeAt(0);
charCode = token.codePointAt(0);

if (charCode > 127 || charCode === 0 || charCode === 10) {
if ((charCode !== 127 && internals.c1Controls(charCode)) || charCode === 0 || charCode === 10) {
updateResult(internals.diagnoses.errExpectingQTEXT);
}
else if (charCode < 32 || charCode === 127) {
else if (internals.c0Controls(charCode) || charCode === 127) {
updateResult(internals.diagnoses.deprecatedQTEXT);
}

Expand Down Expand Up @@ -992,9 +1038,9 @@ exports.validate = internals.validate = function (email, options, callback) {
// %d127 ; white space characters
//
// i.e. obs-qp = "\" (%d0-8, %d10-31 / %d127)
charCode = token.charCodeAt(0);
charCode = token.codePointAt(0);

if (charCode > 127) {
if (charCode !== 127 && internals.c1Controls(charCode)) {
// Fatal error
updateResult(internals.diagnoses.errExpectingQPair);
}
Expand Down Expand Up @@ -1099,14 +1145,14 @@ exports.validate = internals.validate = function (email, options, callback) {
// %d12 / ; include the carriage
// %d14-31 / ; return, line feed, and
// %d127 ; white space characters
charCode = token.charCodeAt(0);
charCode = token.codePointAt(0);

if (charCode > 127 || charCode === 0 || charCode === 10) {
if (charCode === 0 || charCode === 10 || (charCode !== 127 && internals.c1Controls(charCode))) {
// Fatal error
updateResult(internals.diagnoses.errExpectingCTEXT);
break;
}
else if (charCode < 32 || charCode === 127) {
else if (internals.c0Controls(charCode) || charCode === 127) {
updateResult(internals.diagnoses.deprecatedCTEXT);
}
}
Expand Down Expand Up @@ -1266,7 +1312,7 @@ exports.validate = internals.validate = function (email, options, callback) {

if (!dnsPositive && maxResult < internals.categories.dnsWarn) {
// Per RFC 5321, domain atoms are limited to letter-digit-hyphen, so we only need to check code <= 57 to check for a digit
const code = atomData.domains[elementCount].charCodeAt(0);
const code = atomData.domains[elementCount].codePointAt(0);
if (code <= 57) {
updateResult(internals.diagnoses.rfc5321TLDNumeric);
}
Expand Down Expand Up @@ -1311,7 +1357,7 @@ exports.validate = internals.validate = function (email, options, callback) {
parseData.domain += '.';
}

const dnsDomain = parseData.domain;
const dnsDomain = Punycode.toASCII(parseData.domain);
Dns.resolveMx(dnsDomain, (err, mxRecords) => {

// If we have a fatal error, then we must assume that there are no records
Expand Down Expand Up @@ -1376,3 +1422,16 @@ exports.diagnoses = internals.validate.diagnoses = (function () {

return diag;
})();


exports.normalize = internals.normalize = function (email) {

// $lab:coverage:off$
if (process.version[1] === '4' && email.indexOf('\u0000') >= 0) {
return internals.nulNormalize(email);
}
// $lab:coverage:on$


return email.normalize('NFC');
};
4 changes: 3 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,12 @@
"node": ">=4.0.0"
},
"dependencies": {
"punycode": "2.1.x"
},
"devDependencies": {
"code": "3.x.x",
"lab": "10.x.x"
"lab": "10.x.x",
"proxyquire": "1.x.x"
},
"scripts": {
"test": "lab -a code -t 100 -L -m 5000",
Expand Down
42 changes: 42 additions & 0 deletions test/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,10 @@ const tldExpectations = [
['shouldbe@example.com', diag.valid]
];

const noDNSExpectations = [
['伊昭傑@郵件.商務', diag.valid],
['ñoñó1234@ñomething.com', diag.valid]
];

describe('validate()', () => {

Expand Down Expand Up @@ -207,6 +211,23 @@ describe('validate()', () => {
});
});

noDNSExpectations.forEach((obj, i) => {

const email = obj[0];
const result = obj[1];
it('should handle noDNS test ' + (i + 1), (done) => {

Isemail.validate(email, {
errorLevel: 0,
checkDNS: false
}, (res) => {

expect(res).to.equal(result);
done();
});
});
});

it('should handle domain atom test 1', (done) => {

expect(Isemail.validate('shouldbe@invalid', {
Expand All @@ -227,3 +248,24 @@ describe('validate()', () => {
done();
});
});

describe('normalize', () => {

const normalizeExpectations = [
['man\u0303ana.com', 'mañana.com']
];

normalizeExpectations.forEach((normalizingPair) => {

it('should properly normalize international characters', (done) => {

const normal = normalizingPair[1];
const email = normalizingPair[0];
const normalizedEmail = Isemail.normalize(email);

expect(email).to.not.equal(normal);
expect(normalizedEmail).to.equal(normal);
done();
});
});
});
18 changes: 16 additions & 2 deletions test/tests.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@
["test@nominet.org.uk", "valid"],
["test@about.museum", "valid"],
["a@iana.org", "valid"],
["êjness@iana.org", "valid"],
["ñoñó1234@iana.org", "valid"],
["ñoñó1234@something.com", "valid"],
["\ud801\udc37\ud852\udf62@iana.org", "valid"],
["test@e.com", "dnsWarnNoRecord"],
["test@iana.a", "dnsWarnNoRecord"],
["test.test@iana.org", "valid"],
Expand All @@ -26,7 +30,9 @@
["test@255.255.255.255", "rfc5321TLDNumeric"],
["abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghiklm@iana.org", "valid"],
["abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghiklmn@iana.org", "rfc5322LocalTooLong"],
["\ud83d\ude06\ud83d\ude06\ud83d\ude06\ud83d\ude06\ud83d\ude06\ud83d\ude06\ud83d\ude06\ud83d\ude06\ud83d\ude06\ud83d\ude06\ud83d\ude06\ud83d\ude06\ud83d\ude06\ud83d\ude06\ud83d\ude06\ud83d\ude06\ud83d\ude06@iana.org", "rfc5322LocalTooLong"],
["test@abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghiklm", "rfc5322LabelTooLong"],
["test@\ud83d\ude06\ud83d\ude06\ud83d\ude06\ud83d\ude06\ud83d\ude06\ud83d\ude06\ud83d\ude06\ud83d\ude06\ud83d\ude06\ud83d\ude06\ud83d\ude06\ud83d\ude06\ud83d\ude06\ud83d\ude06\ud83d\ude06\ud83d\ude06\ud83d\ude06", "rfc5322LabelTooLong"],
["test@abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghikl.com", "dnsWarnNoRecord"],
["test@abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghiklm.com", "rfc5322LabelTooLong"],
["test@mason-dixon.com", "valid"],
Expand All @@ -39,8 +45,11 @@
["a@a.b.c.d.e.f.g.h.i.j.k.l.m.n.o.p.q.r.s.t.u.v.w.x.y.z.a.b.c.d.e.f.g.h.i.j.k.l.m.n.o.p.q.r.s.t.u.v.w.x.y.z.a.b.c.d.e.f.g.h.i.j.k.l.m.n.o.p.q.r.s.t.u.v.w.x.y.z.a.b.c.d.e.f.g.h.i.j.k.l.m.n.o.p.q.r.s.t.u.v.w.x.y.z.a.b.c.d.e.f.g.h.i.j.k.l.m.n.o.p.q.r.s.t.u.v", "dnsWarnNoRecord"],
["abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghiklm@abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghikl.abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghikl.abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghi", "dnsWarnNoRecord"],
["abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghiklm@abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghikl.abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghikl.abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghij", "rfc5322TooLong"],
["abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghiklm@abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghikl.abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghikl.abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdef\ud83d\ude06", "rfc5322TooLong"],
["a@abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghikl.abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghikl.abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghikl.abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefg.hij", "rfc5322TooLong"],
["a@abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghikl.abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghikl.abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghikl.abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcde\ud83d\ude06", "rfc5322TooLong"],
["a@abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghikl.abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghikl.abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghikl.abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefg.hijk", "rfc5322DomainTooLong"],
["a@abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghikl.abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghikl.abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghikl.abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefg.\ud83d\ude06", "rfc5322DomainTooLong"],
["\"\r", "errCRNoLF"],
["\"test\"@iana.org", "rfc5321QuotedString"],
["\"\"@iana.org", "rfc5321QuotedString"],
Expand Down Expand Up @@ -115,6 +124,7 @@
["test@iana.org(comment\\)", "errUnclosedComment"],
["test@iana.org(comment\\", "errBackslashEnd"],
["test@[RFC-5322-domain-literal]", "rfc5322DomainLiteral"],
["test@[RFC-5322-郵件ñó-domain-literal]", "rfc5322DomainLiteral"],
["test@[RFC-5322]-domain-literal]", "errATEXTAfterDomainLiteral"],
["test@[RFC-5322-[domain-literal]", "errExpectingDTEXT"],
["test@[€", "errExpectingDTEXT"],
Expand All @@ -139,6 +149,7 @@
["\"\rtest\"@iana.org", "errCRNoLF"],
["(\r)test@iana.org", "errCRNoLF"],
["test@iana.org(\r)", "errCRNoLF"],
["test@<iana>.org", "errExpectingATEXT"],
["\ntest@iana.org", "errExpectingATEXT"],
["\"\n\"@iana.org", "errExpectingQTEXT"],
["\"\\\n\"@iana.org", "deprecatedQP"],
Expand Down Expand Up @@ -171,7 +182,7 @@
[" test@iana.org", "cfwsFWS"],
["test@iana.org ", "cfwsFWS"],
["test@[IPv6:1::2:]", "rfc5322IPv6ColonEnd"],
["\"test\\©\"@iana.org", "errExpectingQPair"],
["\"test\\\u0094\"@iana.org", "errExpectingQPair"],
["test@iana/icann.org", "rfc5322Domain"],
["test@iana!icann.org", "rfc5322Domain"],
["test@iana?icann.org", "rfc5322Domain"],
Expand All @@ -183,5 +194,8 @@
["(comment\r\n comment)test@iana.org", "cfwsFWS"],
["test@org", "rfc5321TLD"],
["test@example.com", "dnsWarnNoMXRecord"],
["test@nic.no", "dnsWarnNoRecord"]
["test@nic.no", "dnsWarnNoRecord"],
["test@ñoñó郵件ñoñó郵件ñoñó郵件ñoñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件.ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件.ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñ.oñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñ.oñó郵件ñoñó郵件ñoñó郵件.商務", "rfc5322DomainTooLong"],
["test@ñoñó郵件ñoñó郵件ñoñó郵件ñoñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件.商務", "rfc5322LabelTooLong"],
["ñoñó郵件ñoñó郵件ñoñó郵件ñoñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件@test.ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件.ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñ.oñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñoñó郵件ñ.oñó郵件ñoñó郵件ñoñó郵件.商務", "rfc5322TooLong"]
]
Loading