From 924446a91f4fa21e56b8a7644be382c187242ff7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hu=C3=A1ng=20J=C3=B9nli=C3=A0ng?= Date: Thu, 21 Nov 2024 15:12:01 -0500 Subject: [PATCH] Fix case-insensitive set operations (#104) * fix: expand case foldings before intersection/subtraction * fix: maintain config.modifiersData when we don't transform modifiers * fix: pass through caseFoldFlags to computeClassStrings * add more test cases * fix: update the anchor/dot when modifiers are transformed * add more test cases * refactor: rename caseFold to caseEquivalents In spec, caseFold refers to mapping uppercase letter to the lowercase, here we are actually adding case equivalents to any given set of characters, such that they map to the same character via scf(). To avoid confusion, rename caseFold to caseEquivalents. * build: emit one way mappings to iu-foldings * polish: apply scf() to the class set operand * test: add more test cases * perf: apply scf only in intersection/subtraction * fix: apply SCF on unicode escape and wW * fix: generate \D and \S from UNICODE_IV_SET * fix: call scf on character class range and pass through shouldApplySCF to nested class * test: remove matches tests for node 6 compat The matches are already tested in unicode-set.js * Update data/character-class-escape-sets.js * Update scripts/case-mappings.js * Update scripts/character-class-escape-sets.js --------- Co-authored-by: Mathias Bynens --- data/character-class-escape-sets.js | 22 + data/iu-foldings.js | 1486 ++++++++++++++++++++++++ package.json | 1 + rewrite-pattern.js | 230 ++-- scripts/case-mappings.js | 1 + scripts/character-class-escape-sets.js | 45 +- tests/fixtures/modifiers.js | 57 + tests/fixtures/unicode-set.js | 148 ++- 8 files changed, 1884 insertions(+), 106 deletions(-) create mode 100644 data/iu-foldings.js diff --git a/data/character-class-escape-sets.js b/data/character-class-escape-sets.js index d5bca33..6c1309b 100644 --- a/data/character-class-escape-sets.js +++ b/data/character-class-escape-sets.js @@ -2,6 +2,7 @@ 'use strict'; const regenerate = require('regenerate'); +const UNICODE_IV_SET = require('./all-characters.js').UNICODE_IV_SET; exports.REGULAR = new Map([ ['d', regenerate() @@ -103,3 +104,24 @@ exports.UNICODE_IGNORE_CASE = new Map([ .addRange(0x180, 0x2129) .addRange(0x212B, 0x10FFFF)] ]); + +exports.UNICODESET_IGNORE_CASE = new Map([ + ['d', regenerate() + .addRange(0x30, 0x39)], + ['D', UNICODE_IV_SET.clone().remove(regenerate() + .addRange(0x30, 0x39))], + ['s', regenerate(0x20, 0xA0, 0x1680, 0x202F, 0x205F, 0x3000, 0xFEFF) + .addRange(0x9, 0xD) + .addRange(0x2000, 0x200A) + .addRange(0x2028, 0x2029)], + ['S', UNICODE_IV_SET.clone().remove(regenerate(0x20, 0xA0, 0x1680, 0x202F, 0x205F, 0x3000, 0xFEFF) + .addRange(0x9, 0xD) + .addRange(0x2000, 0x200A) + .addRange(0x2028, 0x2029))], + ['w', regenerate(0x5F) + .addRange(0x30, 0x39) + .addRange(0x61, 0x7A)], + ['W', UNICODE_IV_SET.clone().remove(regenerate(0x5F) + .addRange(0x30, 0x39) + .addRange(0x61, 0x7A))] +]); diff --git a/data/iu-foldings.js b/data/iu-foldings.js new file mode 100644 index 0000000..b64a9c9 --- /dev/null +++ b/data/iu-foldings.js @@ -0,0 +1,1486 @@ +module.exports = new Map([ + [0x41, 0x61], + [0x42, 0x62], + [0x43, 0x63], + [0x44, 0x64], + [0x45, 0x65], + [0x46, 0x66], + [0x47, 0x67], + [0x48, 0x68], + [0x49, 0x69], + [0x4A, 0x6A], + [0x4B, 0x6B], + [0x4C, 0x6C], + [0x4D, 0x6D], + [0x4E, 0x6E], + [0x4F, 0x6F], + [0x50, 0x70], + [0x51, 0x71], + [0x52, 0x72], + [0x53, 0x73], + [0x54, 0x74], + [0x55, 0x75], + [0x56, 0x76], + [0x57, 0x77], + [0x58, 0x78], + [0x59, 0x79], + [0x5A, 0x7A], + [0xB5, 0x3BC], + [0xC0, 0xE0], + [0xC1, 0xE1], + [0xC2, 0xE2], + [0xC3, 0xE3], + [0xC4, 0xE4], + [0xC5, 0xE5], + [0xC6, 0xE6], + [0xC7, 0xE7], + [0xC8, 0xE8], + [0xC9, 0xE9], + [0xCA, 0xEA], + [0xCB, 0xEB], + [0xCC, 0xEC], + [0xCD, 0xED], + [0xCE, 0xEE], + [0xCF, 0xEF], + [0xD0, 0xF0], + [0xD1, 0xF1], + [0xD2, 0xF2], + [0xD3, 0xF3], + [0xD4, 0xF4], + [0xD5, 0xF5], + [0xD6, 0xF6], + [0xD8, 0xF8], + [0xD9, 0xF9], + [0xDA, 0xFA], + [0xDB, 0xFB], + [0xDC, 0xFC], + [0xDD, 0xFD], + [0xDE, 0xFE], + [0x100, 0x101], + [0x102, 0x103], + [0x104, 0x105], + [0x106, 0x107], + [0x108, 0x109], + [0x10A, 0x10B], + [0x10C, 0x10D], + [0x10E, 0x10F], + [0x110, 0x111], + [0x112, 0x113], + [0x114, 0x115], + [0x116, 0x117], + [0x118, 0x119], + [0x11A, 0x11B], + [0x11C, 0x11D], + [0x11E, 0x11F], + [0x120, 0x121], + [0x122, 0x123], + [0x124, 0x125], + [0x126, 0x127], + [0x128, 0x129], + [0x12A, 0x12B], + [0x12C, 0x12D], + [0x12E, 0x12F], + [0x132, 0x133], + [0x134, 0x135], + [0x136, 0x137], + [0x139, 0x13A], + [0x13B, 0x13C], + [0x13D, 0x13E], + [0x13F, 0x140], + [0x141, 0x142], + [0x143, 0x144], + [0x145, 0x146], + [0x147, 0x148], + [0x14A, 0x14B], + [0x14C, 0x14D], + [0x14E, 0x14F], + [0x150, 0x151], + [0x152, 0x153], + [0x154, 0x155], + [0x156, 0x157], + [0x158, 0x159], + [0x15A, 0x15B], + [0x15C, 0x15D], + [0x15E, 0x15F], + [0x160, 0x161], + [0x162, 0x163], + [0x164, 0x165], + [0x166, 0x167], + [0x168, 0x169], + [0x16A, 0x16B], + [0x16C, 0x16D], + [0x16E, 0x16F], + [0x170, 0x171], + [0x172, 0x173], + [0x174, 0x175], + [0x176, 0x177], + [0x178, 0xFF], + [0x179, 0x17A], + [0x17B, 0x17C], + [0x17D, 0x17E], + [0x17F, 0x73], + [0x181, 0x253], + [0x182, 0x183], + [0x184, 0x185], + [0x186, 0x254], + [0x187, 0x188], + [0x189, 0x256], + [0x18A, 0x257], + [0x18B, 0x18C], + [0x18E, 0x1DD], + [0x18F, 0x259], + [0x190, 0x25B], + [0x191, 0x192], + [0x193, 0x260], + [0x194, 0x263], + [0x196, 0x269], + [0x197, 0x268], + [0x198, 0x199], + [0x19C, 0x26F], + [0x19D, 0x272], + [0x19F, 0x275], + [0x1A0, 0x1A1], + [0x1A2, 0x1A3], + [0x1A4, 0x1A5], + [0x1A6, 0x280], + [0x1A7, 0x1A8], + [0x1A9, 0x283], + [0x1AC, 0x1AD], + [0x1AE, 0x288], + [0x1AF, 0x1B0], + [0x1B1, 0x28A], + [0x1B2, 0x28B], + [0x1B3, 0x1B4], + [0x1B5, 0x1B6], + [0x1B7, 0x292], + [0x1B8, 0x1B9], + [0x1BC, 0x1BD], + [0x1C4, 0x1C6], + [0x1C5, 0x1C6], + [0x1C7, 0x1C9], + [0x1C8, 0x1C9], + [0x1CA, 0x1CC], + [0x1CB, 0x1CC], + [0x1CD, 0x1CE], + [0x1CF, 0x1D0], + [0x1D1, 0x1D2], + [0x1D3, 0x1D4], + [0x1D5, 0x1D6], + [0x1D7, 0x1D8], + [0x1D9, 0x1DA], + [0x1DB, 0x1DC], + [0x1DE, 0x1DF], + [0x1E0, 0x1E1], + [0x1E2, 0x1E3], + [0x1E4, 0x1E5], + [0x1E6, 0x1E7], + [0x1E8, 0x1E9], + [0x1EA, 0x1EB], + [0x1EC, 0x1ED], + [0x1EE, 0x1EF], + [0x1F1, 0x1F3], + [0x1F2, 0x1F3], + [0x1F4, 0x1F5], + [0x1F6, 0x195], + [0x1F7, 0x1BF], + [0x1F8, 0x1F9], + [0x1FA, 0x1FB], + [0x1FC, 0x1FD], + [0x1FE, 0x1FF], + [0x200, 0x201], + [0x202, 0x203], + [0x204, 0x205], + [0x206, 0x207], + [0x208, 0x209], + [0x20A, 0x20B], + [0x20C, 0x20D], + [0x20E, 0x20F], + [0x210, 0x211], + [0x212, 0x213], + [0x214, 0x215], + [0x216, 0x217], + [0x218, 0x219], + [0x21A, 0x21B], + [0x21C, 0x21D], + [0x21E, 0x21F], + [0x220, 0x19E], + [0x222, 0x223], + [0x224, 0x225], + [0x226, 0x227], + [0x228, 0x229], + [0x22A, 0x22B], + [0x22C, 0x22D], + [0x22E, 0x22F], + [0x230, 0x231], + [0x232, 0x233], + [0x23A, 0x2C65], + [0x23B, 0x23C], + [0x23D, 0x19A], + [0x23E, 0x2C66], + [0x241, 0x242], + [0x243, 0x180], + [0x244, 0x289], + [0x245, 0x28C], + [0x246, 0x247], + [0x248, 0x249], + [0x24A, 0x24B], + [0x24C, 0x24D], + [0x24E, 0x24F], + [0x345, 0x3B9], + [0x370, 0x371], + [0x372, 0x373], + [0x376, 0x377], + [0x37F, 0x3F3], + [0x386, 0x3AC], + [0x388, 0x3AD], + [0x389, 0x3AE], + [0x38A, 0x3AF], + [0x38C, 0x3CC], + [0x38E, 0x3CD], + [0x38F, 0x3CE], + [0x391, 0x3B1], + [0x392, 0x3B2], + [0x393, 0x3B3], + [0x394, 0x3B4], + [0x395, 0x3B5], + [0x396, 0x3B6], + [0x397, 0x3B7], + [0x398, 0x3B8], + [0x399, 0x3B9], + [0x39A, 0x3BA], + [0x39B, 0x3BB], + [0x39C, 0x3BC], + [0x39D, 0x3BD], + [0x39E, 0x3BE], + [0x39F, 0x3BF], + [0x3A0, 0x3C0], + [0x3A1, 0x3C1], + [0x3A3, 0x3C3], + [0x3A4, 0x3C4], + [0x3A5, 0x3C5], + [0x3A6, 0x3C6], + [0x3A7, 0x3C7], + [0x3A8, 0x3C8], + [0x3A9, 0x3C9], + [0x3AA, 0x3CA], + [0x3AB, 0x3CB], + [0x3C2, 0x3C3], + [0x3CF, 0x3D7], + [0x3D0, 0x3B2], + [0x3D1, 0x3B8], + [0x3D5, 0x3C6], + [0x3D6, 0x3C0], + [0x3D8, 0x3D9], + [0x3DA, 0x3DB], + [0x3DC, 0x3DD], + [0x3DE, 0x3DF], + [0x3E0, 0x3E1], + [0x3E2, 0x3E3], + [0x3E4, 0x3E5], + [0x3E6, 0x3E7], + [0x3E8, 0x3E9], + [0x3EA, 0x3EB], + [0x3EC, 0x3ED], + [0x3EE, 0x3EF], + [0x3F0, 0x3BA], + [0x3F1, 0x3C1], + [0x3F4, 0x3B8], + [0x3F5, 0x3B5], + [0x3F7, 0x3F8], + [0x3F9, 0x3F2], + [0x3FA, 0x3FB], + [0x3FD, 0x37B], + [0x3FE, 0x37C], + [0x3FF, 0x37D], + [0x400, 0x450], + [0x401, 0x451], + [0x402, 0x452], + [0x403, 0x453], + [0x404, 0x454], + [0x405, 0x455], + [0x406, 0x456], + [0x407, 0x457], + [0x408, 0x458], + [0x409, 0x459], + [0x40A, 0x45A], + [0x40B, 0x45B], + [0x40C, 0x45C], + [0x40D, 0x45D], + [0x40E, 0x45E], + [0x40F, 0x45F], + [0x410, 0x430], + [0x411, 0x431], + [0x412, 0x432], + [0x413, 0x433], + [0x414, 0x434], + [0x415, 0x435], + [0x416, 0x436], + [0x417, 0x437], + [0x418, 0x438], + [0x419, 0x439], + [0x41A, 0x43A], + [0x41B, 0x43B], + [0x41C, 0x43C], + [0x41D, 0x43D], + [0x41E, 0x43E], + [0x41F, 0x43F], + [0x420, 0x440], + [0x421, 0x441], + [0x422, 0x442], + [0x423, 0x443], + [0x424, 0x444], + [0x425, 0x445], + [0x426, 0x446], + [0x427, 0x447], + [0x428, 0x448], + [0x429, 0x449], + [0x42A, 0x44A], + [0x42B, 0x44B], + [0x42C, 0x44C], + [0x42D, 0x44D], + [0x42E, 0x44E], + [0x42F, 0x44F], + [0x460, 0x461], + [0x462, 0x463], + [0x464, 0x465], + [0x466, 0x467], + [0x468, 0x469], + [0x46A, 0x46B], + [0x46C, 0x46D], + [0x46E, 0x46F], + [0x470, 0x471], + [0x472, 0x473], + [0x474, 0x475], + [0x476, 0x477], + [0x478, 0x479], + [0x47A, 0x47B], + [0x47C, 0x47D], + [0x47E, 0x47F], + [0x480, 0x481], + [0x48A, 0x48B], + [0x48C, 0x48D], + [0x48E, 0x48F], + [0x490, 0x491], + [0x492, 0x493], + [0x494, 0x495], + [0x496, 0x497], + [0x498, 0x499], + [0x49A, 0x49B], + [0x49C, 0x49D], + [0x49E, 0x49F], + [0x4A0, 0x4A1], + [0x4A2, 0x4A3], + [0x4A4, 0x4A5], + [0x4A6, 0x4A7], + [0x4A8, 0x4A9], + [0x4AA, 0x4AB], + [0x4AC, 0x4AD], + [0x4AE, 0x4AF], + [0x4B0, 0x4B1], + [0x4B2, 0x4B3], + [0x4B4, 0x4B5], + [0x4B6, 0x4B7], + [0x4B8, 0x4B9], + [0x4BA, 0x4BB], + [0x4BC, 0x4BD], + [0x4BE, 0x4BF], + [0x4C0, 0x4CF], + [0x4C1, 0x4C2], + [0x4C3, 0x4C4], + [0x4C5, 0x4C6], + [0x4C7, 0x4C8], + [0x4C9, 0x4CA], + [0x4CB, 0x4CC], + [0x4CD, 0x4CE], + [0x4D0, 0x4D1], + [0x4D2, 0x4D3], + [0x4D4, 0x4D5], + [0x4D6, 0x4D7], + [0x4D8, 0x4D9], + [0x4DA, 0x4DB], + [0x4DC, 0x4DD], + [0x4DE, 0x4DF], + [0x4E0, 0x4E1], + [0x4E2, 0x4E3], + [0x4E4, 0x4E5], + [0x4E6, 0x4E7], + [0x4E8, 0x4E9], + [0x4EA, 0x4EB], + [0x4EC, 0x4ED], + [0x4EE, 0x4EF], + [0x4F0, 0x4F1], + [0x4F2, 0x4F3], + [0x4F4, 0x4F5], + [0x4F6, 0x4F7], + [0x4F8, 0x4F9], + [0x4FA, 0x4FB], + [0x4FC, 0x4FD], + [0x4FE, 0x4FF], + [0x500, 0x501], + [0x502, 0x503], + [0x504, 0x505], + [0x506, 0x507], + [0x508, 0x509], + [0x50A, 0x50B], + [0x50C, 0x50D], + [0x50E, 0x50F], + [0x510, 0x511], + [0x512, 0x513], + [0x514, 0x515], + [0x516, 0x517], + [0x518, 0x519], + [0x51A, 0x51B], + [0x51C, 0x51D], + [0x51E, 0x51F], + [0x520, 0x521], + [0x522, 0x523], + [0x524, 0x525], + [0x526, 0x527], + [0x528, 0x529], + [0x52A, 0x52B], + [0x52C, 0x52D], + [0x52E, 0x52F], + [0x531, 0x561], + [0x532, 0x562], + [0x533, 0x563], + [0x534, 0x564], + [0x535, 0x565], + [0x536, 0x566], + [0x537, 0x567], + [0x538, 0x568], + [0x539, 0x569], + [0x53A, 0x56A], + [0x53B, 0x56B], + [0x53C, 0x56C], + [0x53D, 0x56D], + [0x53E, 0x56E], + [0x53F, 0x56F], + [0x540, 0x570], + [0x541, 0x571], + [0x542, 0x572], + [0x543, 0x573], + [0x544, 0x574], + [0x545, 0x575], + [0x546, 0x576], + [0x547, 0x577], + [0x548, 0x578], + [0x549, 0x579], + [0x54A, 0x57A], + [0x54B, 0x57B], + [0x54C, 0x57C], + [0x54D, 0x57D], + [0x54E, 0x57E], + [0x54F, 0x57F], + [0x550, 0x580], + [0x551, 0x581], + [0x552, 0x582], + [0x553, 0x583], + [0x554, 0x584], + [0x555, 0x585], + [0x556, 0x586], + [0x10A0, 0x2D00], + [0x10A1, 0x2D01], + [0x10A2, 0x2D02], + [0x10A3, 0x2D03], + [0x10A4, 0x2D04], + [0x10A5, 0x2D05], + [0x10A6, 0x2D06], + [0x10A7, 0x2D07], + [0x10A8, 0x2D08], + [0x10A9, 0x2D09], + [0x10AA, 0x2D0A], + [0x10AB, 0x2D0B], + [0x10AC, 0x2D0C], + [0x10AD, 0x2D0D], + [0x10AE, 0x2D0E], + [0x10AF, 0x2D0F], + [0x10B0, 0x2D10], + [0x10B1, 0x2D11], + [0x10B2, 0x2D12], + [0x10B3, 0x2D13], + [0x10B4, 0x2D14], + [0x10B5, 0x2D15], + [0x10B6, 0x2D16], + [0x10B7, 0x2D17], + [0x10B8, 0x2D18], + [0x10B9, 0x2D19], + [0x10BA, 0x2D1A], + [0x10BB, 0x2D1B], + [0x10BC, 0x2D1C], + [0x10BD, 0x2D1D], + [0x10BE, 0x2D1E], + [0x10BF, 0x2D1F], + [0x10C0, 0x2D20], + [0x10C1, 0x2D21], + [0x10C2, 0x2D22], + [0x10C3, 0x2D23], + [0x10C4, 0x2D24], + [0x10C5, 0x2D25], + [0x10C7, 0x2D27], + [0x10CD, 0x2D2D], + [0x13F8, 0x13F0], + [0x13F9, 0x13F1], + [0x13FA, 0x13F2], + [0x13FB, 0x13F3], + [0x13FC, 0x13F4], + [0x13FD, 0x13F5], + [0x1C80, 0x432], + [0x1C81, 0x434], + [0x1C82, 0x43E], + [0x1C83, 0x441], + [0x1C84, 0x442], + [0x1C85, 0x442], + [0x1C86, 0x44A], + [0x1C87, 0x463], + [0x1C88, 0xA64B], + [0x1C89, 0x1C8A], + [0x1C90, 0x10D0], + [0x1C91, 0x10D1], + [0x1C92, 0x10D2], + [0x1C93, 0x10D3], + [0x1C94, 0x10D4], + [0x1C95, 0x10D5], + [0x1C96, 0x10D6], + [0x1C97, 0x10D7], + [0x1C98, 0x10D8], + [0x1C99, 0x10D9], + [0x1C9A, 0x10DA], + [0x1C9B, 0x10DB], + [0x1C9C, 0x10DC], + [0x1C9D, 0x10DD], + [0x1C9E, 0x10DE], + [0x1C9F, 0x10DF], + [0x1CA0, 0x10E0], + [0x1CA1, 0x10E1], + [0x1CA2, 0x10E2], + [0x1CA3, 0x10E3], + [0x1CA4, 0x10E4], + [0x1CA5, 0x10E5], + [0x1CA6, 0x10E6], + [0x1CA7, 0x10E7], + [0x1CA8, 0x10E8], + [0x1CA9, 0x10E9], + [0x1CAA, 0x10EA], + [0x1CAB, 0x10EB], + [0x1CAC, 0x10EC], + [0x1CAD, 0x10ED], + [0x1CAE, 0x10EE], + [0x1CAF, 0x10EF], + [0x1CB0, 0x10F0], + [0x1CB1, 0x10F1], + [0x1CB2, 0x10F2], + [0x1CB3, 0x10F3], + [0x1CB4, 0x10F4], + [0x1CB5, 0x10F5], + [0x1CB6, 0x10F6], + [0x1CB7, 0x10F7], + [0x1CB8, 0x10F8], + [0x1CB9, 0x10F9], + [0x1CBA, 0x10FA], + [0x1CBD, 0x10FD], + [0x1CBE, 0x10FE], + [0x1CBF, 0x10FF], + [0x1E00, 0x1E01], + [0x1E02, 0x1E03], + [0x1E04, 0x1E05], + [0x1E06, 0x1E07], + [0x1E08, 0x1E09], + [0x1E0A, 0x1E0B], + [0x1E0C, 0x1E0D], + [0x1E0E, 0x1E0F], + [0x1E10, 0x1E11], + [0x1E12, 0x1E13], + [0x1E14, 0x1E15], + [0x1E16, 0x1E17], + [0x1E18, 0x1E19], + [0x1E1A, 0x1E1B], + [0x1E1C, 0x1E1D], + [0x1E1E, 0x1E1F], + [0x1E20, 0x1E21], + [0x1E22, 0x1E23], + [0x1E24, 0x1E25], + [0x1E26, 0x1E27], + [0x1E28, 0x1E29], + [0x1E2A, 0x1E2B], + [0x1E2C, 0x1E2D], + [0x1E2E, 0x1E2F], + [0x1E30, 0x1E31], + [0x1E32, 0x1E33], + [0x1E34, 0x1E35], + [0x1E36, 0x1E37], + [0x1E38, 0x1E39], + [0x1E3A, 0x1E3B], + [0x1E3C, 0x1E3D], + [0x1E3E, 0x1E3F], + [0x1E40, 0x1E41], + [0x1E42, 0x1E43], + [0x1E44, 0x1E45], + [0x1E46, 0x1E47], + [0x1E48, 0x1E49], + [0x1E4A, 0x1E4B], + [0x1E4C, 0x1E4D], + [0x1E4E, 0x1E4F], + [0x1E50, 0x1E51], + [0x1E52, 0x1E53], + [0x1E54, 0x1E55], + [0x1E56, 0x1E57], + [0x1E58, 0x1E59], + [0x1E5A, 0x1E5B], + [0x1E5C, 0x1E5D], + [0x1E5E, 0x1E5F], + [0x1E60, 0x1E61], + [0x1E62, 0x1E63], + [0x1E64, 0x1E65], + [0x1E66, 0x1E67], + [0x1E68, 0x1E69], + [0x1E6A, 0x1E6B], + [0x1E6C, 0x1E6D], + [0x1E6E, 0x1E6F], + [0x1E70, 0x1E71], + [0x1E72, 0x1E73], + [0x1E74, 0x1E75], + [0x1E76, 0x1E77], + [0x1E78, 0x1E79], + [0x1E7A, 0x1E7B], + [0x1E7C, 0x1E7D], + [0x1E7E, 0x1E7F], + [0x1E80, 0x1E81], + [0x1E82, 0x1E83], + [0x1E84, 0x1E85], + [0x1E86, 0x1E87], + [0x1E88, 0x1E89], + [0x1E8A, 0x1E8B], + [0x1E8C, 0x1E8D], + [0x1E8E, 0x1E8F], + [0x1E90, 0x1E91], + [0x1E92, 0x1E93], + [0x1E94, 0x1E95], + [0x1E9B, 0x1E61], + [0x1E9E, 0xDF], + [0x1EA0, 0x1EA1], + [0x1EA2, 0x1EA3], + [0x1EA4, 0x1EA5], + [0x1EA6, 0x1EA7], + [0x1EA8, 0x1EA9], + [0x1EAA, 0x1EAB], + [0x1EAC, 0x1EAD], + [0x1EAE, 0x1EAF], + [0x1EB0, 0x1EB1], + [0x1EB2, 0x1EB3], + [0x1EB4, 0x1EB5], + [0x1EB6, 0x1EB7], + [0x1EB8, 0x1EB9], + [0x1EBA, 0x1EBB], + [0x1EBC, 0x1EBD], + [0x1EBE, 0x1EBF], + [0x1EC0, 0x1EC1], + [0x1EC2, 0x1EC3], + [0x1EC4, 0x1EC5], + [0x1EC6, 0x1EC7], + [0x1EC8, 0x1EC9], + [0x1ECA, 0x1ECB], + [0x1ECC, 0x1ECD], + [0x1ECE, 0x1ECF], + [0x1ED0, 0x1ED1], + [0x1ED2, 0x1ED3], + [0x1ED4, 0x1ED5], + [0x1ED6, 0x1ED7], + [0x1ED8, 0x1ED9], + [0x1EDA, 0x1EDB], + [0x1EDC, 0x1EDD], + [0x1EDE, 0x1EDF], + [0x1EE0, 0x1EE1], + [0x1EE2, 0x1EE3], + [0x1EE4, 0x1EE5], + [0x1EE6, 0x1EE7], + [0x1EE8, 0x1EE9], + [0x1EEA, 0x1EEB], + [0x1EEC, 0x1EED], + [0x1EEE, 0x1EEF], + [0x1EF0, 0x1EF1], + [0x1EF2, 0x1EF3], + [0x1EF4, 0x1EF5], + [0x1EF6, 0x1EF7], + [0x1EF8, 0x1EF9], + [0x1EFA, 0x1EFB], + [0x1EFC, 0x1EFD], + [0x1EFE, 0x1EFF], + [0x1F08, 0x1F00], + [0x1F09, 0x1F01], + [0x1F0A, 0x1F02], + [0x1F0B, 0x1F03], + [0x1F0C, 0x1F04], + [0x1F0D, 0x1F05], + [0x1F0E, 0x1F06], + [0x1F0F, 0x1F07], + [0x1F18, 0x1F10], + [0x1F19, 0x1F11], + [0x1F1A, 0x1F12], + [0x1F1B, 0x1F13], + [0x1F1C, 0x1F14], + [0x1F1D, 0x1F15], + [0x1F28, 0x1F20], + [0x1F29, 0x1F21], + [0x1F2A, 0x1F22], + [0x1F2B, 0x1F23], + [0x1F2C, 0x1F24], + [0x1F2D, 0x1F25], + [0x1F2E, 0x1F26], + [0x1F2F, 0x1F27], + [0x1F38, 0x1F30], + [0x1F39, 0x1F31], + [0x1F3A, 0x1F32], + [0x1F3B, 0x1F33], + [0x1F3C, 0x1F34], + [0x1F3D, 0x1F35], + [0x1F3E, 0x1F36], + [0x1F3F, 0x1F37], + [0x1F48, 0x1F40], + [0x1F49, 0x1F41], + [0x1F4A, 0x1F42], + [0x1F4B, 0x1F43], + [0x1F4C, 0x1F44], + [0x1F4D, 0x1F45], + [0x1F59, 0x1F51], + [0x1F5B, 0x1F53], + [0x1F5D, 0x1F55], + [0x1F5F, 0x1F57], + [0x1F68, 0x1F60], + [0x1F69, 0x1F61], + [0x1F6A, 0x1F62], + [0x1F6B, 0x1F63], + [0x1F6C, 0x1F64], + [0x1F6D, 0x1F65], + [0x1F6E, 0x1F66], + [0x1F6F, 0x1F67], + [0x1F88, 0x1F80], + [0x1F89, 0x1F81], + [0x1F8A, 0x1F82], + [0x1F8B, 0x1F83], + [0x1F8C, 0x1F84], + [0x1F8D, 0x1F85], + [0x1F8E, 0x1F86], + [0x1F8F, 0x1F87], + [0x1F98, 0x1F90], + [0x1F99, 0x1F91], + [0x1F9A, 0x1F92], + [0x1F9B, 0x1F93], + [0x1F9C, 0x1F94], + [0x1F9D, 0x1F95], + [0x1F9E, 0x1F96], + [0x1F9F, 0x1F97], + [0x1FA8, 0x1FA0], + [0x1FA9, 0x1FA1], + [0x1FAA, 0x1FA2], + [0x1FAB, 0x1FA3], + [0x1FAC, 0x1FA4], + [0x1FAD, 0x1FA5], + [0x1FAE, 0x1FA6], + [0x1FAF, 0x1FA7], + [0x1FB8, 0x1FB0], + [0x1FB9, 0x1FB1], + [0x1FBA, 0x1F70], + [0x1FBB, 0x1F71], + [0x1FBC, 0x1FB3], + [0x1FBE, 0x3B9], + [0x1FC8, 0x1F72], + [0x1FC9, 0x1F73], + [0x1FCA, 0x1F74], + [0x1FCB, 0x1F75], + [0x1FCC, 0x1FC3], + [0x1FD3, 0x390], + [0x1FD8, 0x1FD0], + [0x1FD9, 0x1FD1], + [0x1FDA, 0x1F76], + [0x1FDB, 0x1F77], + [0x1FE3, 0x3B0], + [0x1FE8, 0x1FE0], + [0x1FE9, 0x1FE1], + [0x1FEA, 0x1F7A], + [0x1FEB, 0x1F7B], + [0x1FEC, 0x1FE5], + [0x1FF8, 0x1F78], + [0x1FF9, 0x1F79], + [0x1FFA, 0x1F7C], + [0x1FFB, 0x1F7D], + [0x1FFC, 0x1FF3], + [0x2126, 0x3C9], + [0x212A, 0x6B], + [0x212B, 0xE5], + [0x2132, 0x214E], + [0x2160, 0x2170], + [0x2161, 0x2171], + [0x2162, 0x2172], + [0x2163, 0x2173], + [0x2164, 0x2174], + [0x2165, 0x2175], + [0x2166, 0x2176], + [0x2167, 0x2177], + [0x2168, 0x2178], + [0x2169, 0x2179], + [0x216A, 0x217A], + [0x216B, 0x217B], + [0x216C, 0x217C], + [0x216D, 0x217D], + [0x216E, 0x217E], + [0x216F, 0x217F], + [0x2183, 0x2184], + [0x24B6, 0x24D0], + [0x24B7, 0x24D1], + [0x24B8, 0x24D2], + [0x24B9, 0x24D3], + [0x24BA, 0x24D4], + [0x24BB, 0x24D5], + [0x24BC, 0x24D6], + [0x24BD, 0x24D7], + [0x24BE, 0x24D8], + [0x24BF, 0x24D9], + [0x24C0, 0x24DA], + [0x24C1, 0x24DB], + [0x24C2, 0x24DC], + [0x24C3, 0x24DD], + [0x24C4, 0x24DE], + [0x24C5, 0x24DF], + [0x24C6, 0x24E0], + [0x24C7, 0x24E1], + [0x24C8, 0x24E2], + [0x24C9, 0x24E3], + [0x24CA, 0x24E4], + [0x24CB, 0x24E5], + [0x24CC, 0x24E6], + [0x24CD, 0x24E7], + [0x24CE, 0x24E8], + [0x24CF, 0x24E9], + [0x2C00, 0x2C30], + [0x2C01, 0x2C31], + [0x2C02, 0x2C32], + [0x2C03, 0x2C33], + [0x2C04, 0x2C34], + [0x2C05, 0x2C35], + [0x2C06, 0x2C36], + [0x2C07, 0x2C37], + [0x2C08, 0x2C38], + [0x2C09, 0x2C39], + [0x2C0A, 0x2C3A], + [0x2C0B, 0x2C3B], + [0x2C0C, 0x2C3C], + [0x2C0D, 0x2C3D], + [0x2C0E, 0x2C3E], + [0x2C0F, 0x2C3F], + [0x2C10, 0x2C40], + [0x2C11, 0x2C41], + [0x2C12, 0x2C42], + [0x2C13, 0x2C43], + [0x2C14, 0x2C44], + [0x2C15, 0x2C45], + [0x2C16, 0x2C46], + [0x2C17, 0x2C47], + [0x2C18, 0x2C48], + [0x2C19, 0x2C49], + [0x2C1A, 0x2C4A], + [0x2C1B, 0x2C4B], + [0x2C1C, 0x2C4C], + [0x2C1D, 0x2C4D], + [0x2C1E, 0x2C4E], + [0x2C1F, 0x2C4F], + [0x2C20, 0x2C50], + [0x2C21, 0x2C51], + [0x2C22, 0x2C52], + [0x2C23, 0x2C53], + [0x2C24, 0x2C54], + [0x2C25, 0x2C55], + [0x2C26, 0x2C56], + [0x2C27, 0x2C57], + [0x2C28, 0x2C58], + [0x2C29, 0x2C59], + [0x2C2A, 0x2C5A], + [0x2C2B, 0x2C5B], + [0x2C2C, 0x2C5C], + [0x2C2D, 0x2C5D], + [0x2C2E, 0x2C5E], + [0x2C2F, 0x2C5F], + [0x2C60, 0x2C61], + [0x2C62, 0x26B], + [0x2C63, 0x1D7D], + [0x2C64, 0x27D], + [0x2C67, 0x2C68], + [0x2C69, 0x2C6A], + [0x2C6B, 0x2C6C], + [0x2C6D, 0x251], + [0x2C6E, 0x271], + [0x2C6F, 0x250], + [0x2C70, 0x252], + [0x2C72, 0x2C73], + [0x2C75, 0x2C76], + [0x2C7E, 0x23F], + [0x2C7F, 0x240], + [0x2C80, 0x2C81], + [0x2C82, 0x2C83], + [0x2C84, 0x2C85], + [0x2C86, 0x2C87], + [0x2C88, 0x2C89], + [0x2C8A, 0x2C8B], + [0x2C8C, 0x2C8D], + [0x2C8E, 0x2C8F], + [0x2C90, 0x2C91], + [0x2C92, 0x2C93], + [0x2C94, 0x2C95], + [0x2C96, 0x2C97], + [0x2C98, 0x2C99], + [0x2C9A, 0x2C9B], + [0x2C9C, 0x2C9D], + [0x2C9E, 0x2C9F], + [0x2CA0, 0x2CA1], + [0x2CA2, 0x2CA3], + [0x2CA4, 0x2CA5], + [0x2CA6, 0x2CA7], + [0x2CA8, 0x2CA9], + [0x2CAA, 0x2CAB], + [0x2CAC, 0x2CAD], + [0x2CAE, 0x2CAF], + [0x2CB0, 0x2CB1], + [0x2CB2, 0x2CB3], + [0x2CB4, 0x2CB5], + [0x2CB6, 0x2CB7], + [0x2CB8, 0x2CB9], + [0x2CBA, 0x2CBB], + [0x2CBC, 0x2CBD], + [0x2CBE, 0x2CBF], + [0x2CC0, 0x2CC1], + [0x2CC2, 0x2CC3], + [0x2CC4, 0x2CC5], + [0x2CC6, 0x2CC7], + [0x2CC8, 0x2CC9], + [0x2CCA, 0x2CCB], + [0x2CCC, 0x2CCD], + [0x2CCE, 0x2CCF], + [0x2CD0, 0x2CD1], + [0x2CD2, 0x2CD3], + [0x2CD4, 0x2CD5], + [0x2CD6, 0x2CD7], + [0x2CD8, 0x2CD9], + [0x2CDA, 0x2CDB], + [0x2CDC, 0x2CDD], + [0x2CDE, 0x2CDF], + [0x2CE0, 0x2CE1], + [0x2CE2, 0x2CE3], + [0x2CEB, 0x2CEC], + [0x2CED, 0x2CEE], + [0x2CF2, 0x2CF3], + [0xA640, 0xA641], + [0xA642, 0xA643], + [0xA644, 0xA645], + [0xA646, 0xA647], + [0xA648, 0xA649], + [0xA64A, 0xA64B], + [0xA64C, 0xA64D], + [0xA64E, 0xA64F], + [0xA650, 0xA651], + [0xA652, 0xA653], + [0xA654, 0xA655], + [0xA656, 0xA657], + [0xA658, 0xA659], + [0xA65A, 0xA65B], + [0xA65C, 0xA65D], + [0xA65E, 0xA65F], + [0xA660, 0xA661], + [0xA662, 0xA663], + [0xA664, 0xA665], + [0xA666, 0xA667], + [0xA668, 0xA669], + [0xA66A, 0xA66B], + [0xA66C, 0xA66D], + [0xA680, 0xA681], + [0xA682, 0xA683], + [0xA684, 0xA685], + [0xA686, 0xA687], + [0xA688, 0xA689], + [0xA68A, 0xA68B], + [0xA68C, 0xA68D], + [0xA68E, 0xA68F], + [0xA690, 0xA691], + [0xA692, 0xA693], + [0xA694, 0xA695], + [0xA696, 0xA697], + [0xA698, 0xA699], + [0xA69A, 0xA69B], + [0xA722, 0xA723], + [0xA724, 0xA725], + [0xA726, 0xA727], + [0xA728, 0xA729], + [0xA72A, 0xA72B], + [0xA72C, 0xA72D], + [0xA72E, 0xA72F], + [0xA732, 0xA733], + [0xA734, 0xA735], + [0xA736, 0xA737], + [0xA738, 0xA739], + [0xA73A, 0xA73B], + [0xA73C, 0xA73D], + [0xA73E, 0xA73F], + [0xA740, 0xA741], + [0xA742, 0xA743], + [0xA744, 0xA745], + [0xA746, 0xA747], + [0xA748, 0xA749], + [0xA74A, 0xA74B], + [0xA74C, 0xA74D], + [0xA74E, 0xA74F], + [0xA750, 0xA751], + [0xA752, 0xA753], + [0xA754, 0xA755], + [0xA756, 0xA757], + [0xA758, 0xA759], + [0xA75A, 0xA75B], + [0xA75C, 0xA75D], + [0xA75E, 0xA75F], + [0xA760, 0xA761], + [0xA762, 0xA763], + [0xA764, 0xA765], + [0xA766, 0xA767], + [0xA768, 0xA769], + [0xA76A, 0xA76B], + [0xA76C, 0xA76D], + [0xA76E, 0xA76F], + [0xA779, 0xA77A], + [0xA77B, 0xA77C], + [0xA77D, 0x1D79], + [0xA77E, 0xA77F], + [0xA780, 0xA781], + [0xA782, 0xA783], + [0xA784, 0xA785], + [0xA786, 0xA787], + [0xA78B, 0xA78C], + [0xA78D, 0x265], + [0xA790, 0xA791], + [0xA792, 0xA793], + [0xA796, 0xA797], + [0xA798, 0xA799], + [0xA79A, 0xA79B], + [0xA79C, 0xA79D], + [0xA79E, 0xA79F], + [0xA7A0, 0xA7A1], + [0xA7A2, 0xA7A3], + [0xA7A4, 0xA7A5], + [0xA7A6, 0xA7A7], + [0xA7A8, 0xA7A9], + [0xA7AA, 0x266], + [0xA7AB, 0x25C], + [0xA7AC, 0x261], + [0xA7AD, 0x26C], + [0xA7AE, 0x26A], + [0xA7B0, 0x29E], + [0xA7B1, 0x287], + [0xA7B2, 0x29D], + [0xA7B3, 0xAB53], + [0xA7B4, 0xA7B5], + [0xA7B6, 0xA7B7], + [0xA7B8, 0xA7B9], + [0xA7BA, 0xA7BB], + [0xA7BC, 0xA7BD], + [0xA7BE, 0xA7BF], + [0xA7C0, 0xA7C1], + [0xA7C2, 0xA7C3], + [0xA7C4, 0xA794], + [0xA7C5, 0x282], + [0xA7C6, 0x1D8E], + [0xA7C7, 0xA7C8], + [0xA7C9, 0xA7CA], + [0xA7CB, 0x264], + [0xA7CC, 0xA7CD], + [0xA7D0, 0xA7D1], + [0xA7D6, 0xA7D7], + [0xA7D8, 0xA7D9], + [0xA7DA, 0xA7DB], + [0xA7DC, 0x19B], + [0xA7F5, 0xA7F6], + [0xAB70, 0x13A0], + [0xAB71, 0x13A1], + [0xAB72, 0x13A2], + [0xAB73, 0x13A3], + [0xAB74, 0x13A4], + [0xAB75, 0x13A5], + [0xAB76, 0x13A6], + [0xAB77, 0x13A7], + [0xAB78, 0x13A8], + [0xAB79, 0x13A9], + [0xAB7A, 0x13AA], + [0xAB7B, 0x13AB], + [0xAB7C, 0x13AC], + [0xAB7D, 0x13AD], + [0xAB7E, 0x13AE], + [0xAB7F, 0x13AF], + [0xAB80, 0x13B0], + [0xAB81, 0x13B1], + [0xAB82, 0x13B2], + [0xAB83, 0x13B3], + [0xAB84, 0x13B4], + [0xAB85, 0x13B5], + [0xAB86, 0x13B6], + [0xAB87, 0x13B7], + [0xAB88, 0x13B8], + [0xAB89, 0x13B9], + [0xAB8A, 0x13BA], + [0xAB8B, 0x13BB], + [0xAB8C, 0x13BC], + [0xAB8D, 0x13BD], + [0xAB8E, 0x13BE], + [0xAB8F, 0x13BF], + [0xAB90, 0x13C0], + [0xAB91, 0x13C1], + [0xAB92, 0x13C2], + [0xAB93, 0x13C3], + [0xAB94, 0x13C4], + [0xAB95, 0x13C5], + [0xAB96, 0x13C6], + [0xAB97, 0x13C7], + [0xAB98, 0x13C8], + [0xAB99, 0x13C9], + [0xAB9A, 0x13CA], + [0xAB9B, 0x13CB], + [0xAB9C, 0x13CC], + [0xAB9D, 0x13CD], + [0xAB9E, 0x13CE], + [0xAB9F, 0x13CF], + [0xABA0, 0x13D0], + [0xABA1, 0x13D1], + [0xABA2, 0x13D2], + [0xABA3, 0x13D3], + [0xABA4, 0x13D4], + [0xABA5, 0x13D5], + [0xABA6, 0x13D6], + [0xABA7, 0x13D7], + [0xABA8, 0x13D8], + [0xABA9, 0x13D9], + [0xABAA, 0x13DA], + [0xABAB, 0x13DB], + [0xABAC, 0x13DC], + [0xABAD, 0x13DD], + [0xABAE, 0x13DE], + [0xABAF, 0x13DF], + [0xABB0, 0x13E0], + [0xABB1, 0x13E1], + [0xABB2, 0x13E2], + [0xABB3, 0x13E3], + [0xABB4, 0x13E4], + [0xABB5, 0x13E5], + [0xABB6, 0x13E6], + [0xABB7, 0x13E7], + [0xABB8, 0x13E8], + [0xABB9, 0x13E9], + [0xABBA, 0x13EA], + [0xABBB, 0x13EB], + [0xABBC, 0x13EC], + [0xABBD, 0x13ED], + [0xABBE, 0x13EE], + [0xABBF, 0x13EF], + [0xFB05, 0xFB06], + [0xFF21, 0xFF41], + [0xFF22, 0xFF42], + [0xFF23, 0xFF43], + [0xFF24, 0xFF44], + [0xFF25, 0xFF45], + [0xFF26, 0xFF46], + [0xFF27, 0xFF47], + [0xFF28, 0xFF48], + [0xFF29, 0xFF49], + [0xFF2A, 0xFF4A], + [0xFF2B, 0xFF4B], + [0xFF2C, 0xFF4C], + [0xFF2D, 0xFF4D], + [0xFF2E, 0xFF4E], + [0xFF2F, 0xFF4F], + [0xFF30, 0xFF50], + [0xFF31, 0xFF51], + [0xFF32, 0xFF52], + [0xFF33, 0xFF53], + [0xFF34, 0xFF54], + [0xFF35, 0xFF55], + [0xFF36, 0xFF56], + [0xFF37, 0xFF57], + [0xFF38, 0xFF58], + [0xFF39, 0xFF59], + [0xFF3A, 0xFF5A], + [0x10400, 0x10428], + [0x10401, 0x10429], + [0x10402, 0x1042A], + [0x10403, 0x1042B], + [0x10404, 0x1042C], + [0x10405, 0x1042D], + [0x10406, 0x1042E], + [0x10407, 0x1042F], + [0x10408, 0x10430], + [0x10409, 0x10431], + [0x1040A, 0x10432], + [0x1040B, 0x10433], + [0x1040C, 0x10434], + [0x1040D, 0x10435], + [0x1040E, 0x10436], + [0x1040F, 0x10437], + [0x10410, 0x10438], + [0x10411, 0x10439], + [0x10412, 0x1043A], + [0x10413, 0x1043B], + [0x10414, 0x1043C], + [0x10415, 0x1043D], + [0x10416, 0x1043E], + [0x10417, 0x1043F], + [0x10418, 0x10440], + [0x10419, 0x10441], + [0x1041A, 0x10442], + [0x1041B, 0x10443], + [0x1041C, 0x10444], + [0x1041D, 0x10445], + [0x1041E, 0x10446], + [0x1041F, 0x10447], + [0x10420, 0x10448], + [0x10421, 0x10449], + [0x10422, 0x1044A], + [0x10423, 0x1044B], + [0x10424, 0x1044C], + [0x10425, 0x1044D], + [0x10426, 0x1044E], + [0x10427, 0x1044F], + [0x104B0, 0x104D8], + [0x104B1, 0x104D9], + [0x104B2, 0x104DA], + [0x104B3, 0x104DB], + [0x104B4, 0x104DC], + [0x104B5, 0x104DD], + [0x104B6, 0x104DE], + [0x104B7, 0x104DF], + [0x104B8, 0x104E0], + [0x104B9, 0x104E1], + [0x104BA, 0x104E2], + [0x104BB, 0x104E3], + [0x104BC, 0x104E4], + [0x104BD, 0x104E5], + [0x104BE, 0x104E6], + [0x104BF, 0x104E7], + [0x104C0, 0x104E8], + [0x104C1, 0x104E9], + [0x104C2, 0x104EA], + [0x104C3, 0x104EB], + [0x104C4, 0x104EC], + [0x104C5, 0x104ED], + [0x104C6, 0x104EE], + [0x104C7, 0x104EF], + [0x104C8, 0x104F0], + [0x104C9, 0x104F1], + [0x104CA, 0x104F2], + [0x104CB, 0x104F3], + [0x104CC, 0x104F4], + [0x104CD, 0x104F5], + [0x104CE, 0x104F6], + [0x104CF, 0x104F7], + [0x104D0, 0x104F8], + [0x104D1, 0x104F9], + [0x104D2, 0x104FA], + [0x104D3, 0x104FB], + [0x10570, 0x10597], + [0x10571, 0x10598], + [0x10572, 0x10599], + [0x10573, 0x1059A], + [0x10574, 0x1059B], + [0x10575, 0x1059C], + [0x10576, 0x1059D], + [0x10577, 0x1059E], + [0x10578, 0x1059F], + [0x10579, 0x105A0], + [0x1057A, 0x105A1], + [0x1057C, 0x105A3], + [0x1057D, 0x105A4], + [0x1057E, 0x105A5], + [0x1057F, 0x105A6], + [0x10580, 0x105A7], + [0x10581, 0x105A8], + [0x10582, 0x105A9], + [0x10583, 0x105AA], + [0x10584, 0x105AB], + [0x10585, 0x105AC], + [0x10586, 0x105AD], + [0x10587, 0x105AE], + [0x10588, 0x105AF], + [0x10589, 0x105B0], + [0x1058A, 0x105B1], + [0x1058C, 0x105B3], + [0x1058D, 0x105B4], + [0x1058E, 0x105B5], + [0x1058F, 0x105B6], + [0x10590, 0x105B7], + [0x10591, 0x105B8], + [0x10592, 0x105B9], + [0x10594, 0x105BB], + [0x10595, 0x105BC], + [0x10C80, 0x10CC0], + [0x10C81, 0x10CC1], + [0x10C82, 0x10CC2], + [0x10C83, 0x10CC3], + [0x10C84, 0x10CC4], + [0x10C85, 0x10CC5], + [0x10C86, 0x10CC6], + [0x10C87, 0x10CC7], + [0x10C88, 0x10CC8], + [0x10C89, 0x10CC9], + [0x10C8A, 0x10CCA], + [0x10C8B, 0x10CCB], + [0x10C8C, 0x10CCC], + [0x10C8D, 0x10CCD], + [0x10C8E, 0x10CCE], + [0x10C8F, 0x10CCF], + [0x10C90, 0x10CD0], + [0x10C91, 0x10CD1], + [0x10C92, 0x10CD2], + [0x10C93, 0x10CD3], + [0x10C94, 0x10CD4], + [0x10C95, 0x10CD5], + [0x10C96, 0x10CD6], + [0x10C97, 0x10CD7], + [0x10C98, 0x10CD8], + [0x10C99, 0x10CD9], + [0x10C9A, 0x10CDA], + [0x10C9B, 0x10CDB], + [0x10C9C, 0x10CDC], + [0x10C9D, 0x10CDD], + [0x10C9E, 0x10CDE], + [0x10C9F, 0x10CDF], + [0x10CA0, 0x10CE0], + [0x10CA1, 0x10CE1], + [0x10CA2, 0x10CE2], + [0x10CA3, 0x10CE3], + [0x10CA4, 0x10CE4], + [0x10CA5, 0x10CE5], + [0x10CA6, 0x10CE6], + [0x10CA7, 0x10CE7], + [0x10CA8, 0x10CE8], + [0x10CA9, 0x10CE9], + [0x10CAA, 0x10CEA], + [0x10CAB, 0x10CEB], + [0x10CAC, 0x10CEC], + [0x10CAD, 0x10CED], + [0x10CAE, 0x10CEE], + [0x10CAF, 0x10CEF], + [0x10CB0, 0x10CF0], + [0x10CB1, 0x10CF1], + [0x10CB2, 0x10CF2], + [0x10D50, 0x10D70], + [0x10D51, 0x10D71], + [0x10D52, 0x10D72], + [0x10D53, 0x10D73], + [0x10D54, 0x10D74], + [0x10D55, 0x10D75], + [0x10D56, 0x10D76], + [0x10D57, 0x10D77], + [0x10D58, 0x10D78], + [0x10D59, 0x10D79], + [0x10D5A, 0x10D7A], + [0x10D5B, 0x10D7B], + [0x10D5C, 0x10D7C], + [0x10D5D, 0x10D7D], + [0x10D5E, 0x10D7E], + [0x10D5F, 0x10D7F], + [0x10D60, 0x10D80], + [0x10D61, 0x10D81], + [0x10D62, 0x10D82], + [0x10D63, 0x10D83], + [0x10D64, 0x10D84], + [0x10D65, 0x10D85], + [0x118A0, 0x118C0], + [0x118A1, 0x118C1], + [0x118A2, 0x118C2], + [0x118A3, 0x118C3], + [0x118A4, 0x118C4], + [0x118A5, 0x118C5], + [0x118A6, 0x118C6], + [0x118A7, 0x118C7], + [0x118A8, 0x118C8], + [0x118A9, 0x118C9], + [0x118AA, 0x118CA], + [0x118AB, 0x118CB], + [0x118AC, 0x118CC], + [0x118AD, 0x118CD], + [0x118AE, 0x118CE], + [0x118AF, 0x118CF], + [0x118B0, 0x118D0], + [0x118B1, 0x118D1], + [0x118B2, 0x118D2], + [0x118B3, 0x118D3], + [0x118B4, 0x118D4], + [0x118B5, 0x118D5], + [0x118B6, 0x118D6], + [0x118B7, 0x118D7], + [0x118B8, 0x118D8], + [0x118B9, 0x118D9], + [0x118BA, 0x118DA], + [0x118BB, 0x118DB], + [0x118BC, 0x118DC], + [0x118BD, 0x118DD], + [0x118BE, 0x118DE], + [0x118BF, 0x118DF], + [0x16E40, 0x16E60], + [0x16E41, 0x16E61], + [0x16E42, 0x16E62], + [0x16E43, 0x16E63], + [0x16E44, 0x16E64], + [0x16E45, 0x16E65], + [0x16E46, 0x16E66], + [0x16E47, 0x16E67], + [0x16E48, 0x16E68], + [0x16E49, 0x16E69], + [0x16E4A, 0x16E6A], + [0x16E4B, 0x16E6B], + [0x16E4C, 0x16E6C], + [0x16E4D, 0x16E6D], + [0x16E4E, 0x16E6E], + [0x16E4F, 0x16E6F], + [0x16E50, 0x16E70], + [0x16E51, 0x16E71], + [0x16E52, 0x16E72], + [0x16E53, 0x16E73], + [0x16E54, 0x16E74], + [0x16E55, 0x16E75], + [0x16E56, 0x16E76], + [0x16E57, 0x16E77], + [0x16E58, 0x16E78], + [0x16E59, 0x16E79], + [0x16E5A, 0x16E7A], + [0x16E5B, 0x16E7B], + [0x16E5C, 0x16E7C], + [0x16E5D, 0x16E7D], + [0x16E5E, 0x16E7E], + [0x16E5F, 0x16E7F], + [0x1E900, 0x1E922], + [0x1E901, 0x1E923], + [0x1E902, 0x1E924], + [0x1E903, 0x1E925], + [0x1E904, 0x1E926], + [0x1E905, 0x1E927], + [0x1E906, 0x1E928], + [0x1E907, 0x1E929], + [0x1E908, 0x1E92A], + [0x1E909, 0x1E92B], + [0x1E90A, 0x1E92C], + [0x1E90B, 0x1E92D], + [0x1E90C, 0x1E92E], + [0x1E90D, 0x1E92F], + [0x1E90E, 0x1E930], + [0x1E90F, 0x1E931], + [0x1E910, 0x1E932], + [0x1E911, 0x1E933], + [0x1E912, 0x1E934], + [0x1E913, 0x1E935], + [0x1E914, 0x1E936], + [0x1E915, 0x1E937], + [0x1E916, 0x1E938], + [0x1E917, 0x1E939], + [0x1E918, 0x1E93A], + [0x1E919, 0x1E93B], + [0x1E91A, 0x1E93C], + [0x1E91B, 0x1E93D], + [0x1E91C, 0x1E93E], + [0x1E91D, 0x1E93F], + [0x1E91E, 0x1E940], + [0x1E91F, 0x1E941], + [0x1E920, 0x1E942], + [0x1E921, 0x1E943] +]); diff --git a/package.json b/package.json index 1a6f853..adc283a 100644 --- a/package.json +++ b/package.json @@ -42,6 +42,7 @@ "data/all-characters.js", "data/character-class-escape-sets.js", "data/i-bmp-mappings.js", + "data/iu-foldings.js", "data/iu-mappings.js" ], "scripts": { diff --git a/rewrite-pattern.js b/rewrite-pattern.js index 32bd5f5..02c52c2 100644 --- a/rewrite-pattern.js +++ b/rewrite-pattern.js @@ -7,6 +7,7 @@ const unicodeMatchProperty = require('unicode-match-property-ecmascript'); const unicodeMatchPropertyValue = require('unicode-match-property-value-ecmascript'); const iuMappings = require('./data/iu-mappings.js'); const iBMPMappings = require('./data/i-bmp-mappings.js'); +const iuFoldings = require('./data/iu-foldings.js'); const ESCAPE_SETS = require('./data/character-class-escape-sets.js'); const { UNICODE_SET, UNICODE_IV_SET } = require('./data/all-characters.js'); @@ -46,10 +47,15 @@ const NEWLINE_SET = regenerate().add( const DOT_SET_UNICODE = UNICODE_SET.clone() // all Unicode code points .remove(NEWLINE_SET); -const getCharacterClassEscapeSet = (character, unicode, ignoreCase) => { +const getCharacterClassEscapeSet = (character, unicode, ignoreCase, shouldApplySCF) => { if (unicode) { if (ignoreCase) { - return ESCAPE_SETS.UNICODE_IGNORE_CASE.get(character); + const result = ESCAPE_SETS.UNICODE_IGNORE_CASE.get(character); + if (shouldApplySCF) { + return ESCAPE_SETS.UNICODESET_IGNORE_CASE.get(character); + } else { + return result; + } } return ESCAPE_SETS.UNICODE.get(character); } @@ -123,16 +129,16 @@ const getUnicodePropertyEscapeSet = (value, isNegative, isUnicodeSetIgnoreCase) }; }; -const getUnicodePropertyEscapeCharacterClassData = (property, isNegative, isUnicodeSetIgnoreCase) => { +const getUnicodePropertyEscapeCharacterClassData = (property, isNegative, isUnicodeSetIgnoreCase, shouldApplySCF) => { const set = getUnicodePropertyEscapeSet(property, isNegative, isUnicodeSetIgnoreCase); const data = getCharacterClassEmptyData(); - const singleChars = set.characters; - const caseFoldFlags = configGetCaseFoldFlags(); - if (caseFoldFlags) { + const singleChars = shouldApplySCF ? regenerate(set.characters.toArray().map(ch => simpleCaseFolding(ch))) : set.characters; + const caseEqFlags = configGetCaseEqFlags(); + if (caseEqFlags) { for (const codepoint of singleChars.toArray()) { - const folded = caseFold(codepoint, caseFoldFlags); - if (folded) { - singleChars.add(folded); + const list = getCaseEquivalents(codepoint, caseEqFlags); + if (list) { + singleChars.add(list); } } } @@ -144,45 +150,45 @@ const getUnicodePropertyEscapeCharacterClassData = (property, isNegative, isUnic return data; }; -const CASE_FOLD_FLAG_NONE = 0b00; -const CASE_FOLD_FLAG_BMP = 0b01; -const CASE_FOLD_FLAG_UNICODE = 0b10; +const CASE_EQ_FLAG_NONE = 0b00; +const CASE_EQ_FLAG_BMP = 0b01; +const CASE_EQ_FLAG_UNICODE = 0b10; -function configGetCaseFoldFlags() { - let flags = CASE_FOLD_FLAG_NONE; +function configGetCaseEqFlags() { + let flags = CASE_EQ_FLAG_NONE; if (config.modifiersData.i === true) { if (config.transform.modifiers) { - flags |= CASE_FOLD_FLAG_BMP; + flags |= CASE_EQ_FLAG_BMP; if (config.flags.unicode || config.flags.unicodeSets) { - flags |= CASE_FOLD_FLAG_UNICODE; + flags |= CASE_EQ_FLAG_UNICODE; } } } else if (config.modifiersData.i === undefined) { if (config.transform.unicodeFlag && config.flags.ignoreCase) { - flags |= CASE_FOLD_FLAG_UNICODE; + flags |= CASE_EQ_FLAG_UNICODE; } } return flags; } -// Given a range of code points, add any case-folded code points in that range +// Given a range of code points, add any case-equivalent code points in that range // to a set. -regenerate.prototype.iuAddRange = function(min, max, caseFoldFlags) { +regenerate.prototype.iuAddRange = function(min, max, caseEqFlags) { const $this = this; do { - const folded = caseFold(min, caseFoldFlags); - if (folded) { - $this.add(folded); + const list = getCaseEquivalents(min, caseEqFlags); + if (list) { + $this.add(list); } } while (++min <= max); return $this; }; -regenerate.prototype.iuRemoveRange = function(min, max, caseFoldFlags) { +regenerate.prototype.iuRemoveRange = function(min, max, caseEqFlags) { const $this = this; do { - const folded = caseFold(min, caseFoldFlags); - if (folded) { - $this.remove(folded); + const list = getCaseEquivalents(min, caseEqFlags); + if (list) { + $this.remove(list); } } while (++min <= max); return $this; @@ -219,24 +225,50 @@ const wrap = (tree, pattern) => { }; }; -const caseFold = (codePoint, flags) => { - let folded = ((flags & CASE_FOLD_FLAG_UNICODE) ? iuMappings.get(codePoint) : undefined) || []; - if (typeof folded === "number") folded = [folded]; - if (flags & CASE_FOLD_FLAG_BMP) { - for (const cp of [codePoint].concat(folded)) { +/** + * Given any codepoint ch, returns false or an array of characters, + * such that for every c in the array, + * c != ch and Canonicalize(~, c) == Canonicalize(~, ch) + * + * where Canonicalize is defined in + * https://tc39.es/ecma262/#sec-runtime-semantics-canonicalize-ch + * @param {number} codePoint input code point + * @param {number} flags bitwise flags composed of CASE_EQ_FLAG_* + * @returns false | number[] + */ +const getCaseEquivalents = (codePoint, flags) => { + if (flags === CASE_EQ_FLAG_NONE) { + return false; + } + let result = ((flags & CASE_EQ_FLAG_UNICODE) ? iuMappings.get(codePoint) : undefined) || []; + if (typeof result === "number") result = [result]; + if (flags & CASE_EQ_FLAG_BMP) { + for (const cp of [codePoint].concat(result)) { // Fast path for ASCII characters if (cp >= 0x41 && cp <= 0x5a) { - folded.push(cp + 0x20); + result.push(cp + 0x20); } else if (cp >= 0x61 && cp <= 0x7a) { - folded.push(cp - 0x20); + result.push(cp - 0x20); } else { - folded = folded.concat(iBMPMappings.get(cp) || []); + result = result.concat(iBMPMappings.get(cp) || []); } } } - return folded.length == 0 ? false : folded; + return result.length == 0 ? false : result; }; +// https://tc39.es/ecma262/#sec-maybesimplecasefolding +const simpleCaseFolding = (codePoint) => { + // Fast path for ASCII characters + if (codePoint <= 0x7F) { + if (codePoint >= 0x41 && codePoint <= 0x5A) { + return codePoint + 0x20; + } + return codePoint; + } + return iuFoldings.get(codePoint) || codePoint; +} + const buildHandler = (action) => { switch (action) { case 'union': @@ -250,8 +282,8 @@ const buildHandler = (action) => { range: (data, start, end) => { data.singleChars.addRange(start, end); }, - iuRange: (data, start, end, caseFoldFlags) => { - data.singleChars.iuAddRange(start, end, caseFoldFlags); + iuRange: (data, start, end, caseEqFlags) => { + data.singleChars.iuAddRange(start, end, caseEqFlags); }, nested: (data, nestedData) => { data.singleChars.add(nestedData.singleChars); @@ -272,8 +304,8 @@ const buildHandler = (action) => { range: (data, start, end) => { data.singleChars = UNICODE_SET.clone().removeRange(start, end).add(data.singleChars); }, - iuRange: (data, start, end, caseFoldFlags) => { - data.singleChars = UNICODE_SET.clone().iuRemoveRange(start, end, caseFoldFlags).add(data.singleChars); + iuRange: (data, start, end, caseEqFlags) => { + data.singleChars = UNICODE_SET.clone().iuRemoveRange(start, end, caseEqFlags).add(data.singleChars); }, nested: (data, nestedData) => { regSet(data, nestedData.singleChars); @@ -303,9 +335,9 @@ const buildHandler = (action) => { data.longStrings.clear(); data.maybeIncludesStrings = false; }, - iuRange: (data, start, end, caseFoldFlags) => { - if (data.first) data.singleChars.iuAddRange(start, end, caseFoldFlags); - else data.singleChars.intersection(regenerate().iuAddRange(start, end, caseFoldFlags)); + iuRange: (data, start, end, caseEqFlags) => { + if (data.first) data.singleChars.iuAddRange(start, end, caseEqFlags); + else data.singleChars.intersection(regenerate().iuAddRange(start, end, caseEqFlags)); data.longStrings.clear(); data.maybeIncludesStrings = false; }, @@ -339,9 +371,9 @@ const buildHandler = (action) => { if (data.first) data.singleChars.addRange(start, end); else data.singleChars.removeRange(start, end); }, - iuRange: (data, start, end, caseFoldFlags) => { - if (data.first) data.singleChars.iuAddRange(start, end, caseFoldFlags); - else data.singleChars.iuRemoveRange(start, end, caseFoldFlags); + iuRange: (data, start, end, caseEqFlags) => { + if (data.first) data.singleChars.iuAddRange(start, end, caseEqFlags); + else data.singleChars.iuRemoveRange(start, end, caseEqFlags); }, nested: (data, nestedData) => { regSet(data, nestedData.singleChars); @@ -374,38 +406,40 @@ const getCharacterClassEmptyData = () => ({ maybeIncludesStrings: false }); -const maybeFold = (codePoint, caseFoldFlags) => { - if (caseFoldFlags) { - const folded = caseFold(codePoint, caseFoldFlags); - if (folded) { - return [codePoint, folded]; - } +const concatCaseEquivalents = (codePoint, caseEqFlags) => { + const caseEquivalents = getCaseEquivalents(codePoint, caseEqFlags); + if (caseEquivalents) { + return [codePoint, ...caseEquivalents]; } return [codePoint]; }; -const computeClassStrings = (classStrings, regenerateOptions) => { +const computeClassStrings = (classStrings, regenerateOptions, caseEqFlags, shouldApplySCF) => { let data = getCharacterClassEmptyData(); - const caseFoldFlags = configGetCaseFoldFlags(); - for (const string of classStrings.strings) { if (string.characters.length === 1) { - maybeFold(string.characters[0].codePoint, caseFoldFlags).forEach((cp) => { + const codePoint = shouldApplySCF ? simpleCaseFolding(string.characters[0].codePoint) : string.characters[0].codePoint + concatCaseEquivalents(codePoint, caseEqFlags).forEach((cp) => { data.singleChars.add(cp); }); } else { - let stringifiedString; - if (caseFoldFlags) { - stringifiedString = ''; + let stringifiedString = ''; + if (caseEqFlags) { for (const ch of string.characters) { - let set = regenerate(ch.codePoint); - const folded = maybeFold(ch.codePoint, caseFoldFlags); - if (folded) set.add(folded); + const codePoint = shouldApplySCF ? simpleCaseFolding(ch.codePoint) : ch.codePoint; + const set = regenerate(concatCaseEquivalents(codePoint, caseEqFlags)); stringifiedString += set.toString(regenerateOptions); } } else { - stringifiedString = string.characters.map(ch => generate(ch)).join('') + for (const ch of string.characters) { + const codePoint = shouldApplySCF ? simpleCaseFolding(ch.codePoint) : ch.codePoint; + if (codePoint !== ch.codePoint) { + stringifiedString += regenerate(codePoint).toString(regenerateOptions); + } else { + stringifiedString += generate(ch); + } + } } data.longStrings.add(stringifiedString); @@ -416,12 +450,14 @@ const computeClassStrings = (classStrings, regenerateOptions) => { return data; } -const computeCharacterClass = (characterClassItem, regenerateOptions) => { +const computeCharacterClass = (characterClassItem, regenerateOptions, shouldApplySCF) => { let data = getCharacterClassEmptyData(); let handlePositive; let handleNegative; + let caseEqFlags = configGetCaseEqFlags(); + switch (characterClassItem.kind) { case 'union': handlePositive = buildHandler('union'); @@ -431,11 +467,17 @@ const computeCharacterClass = (characterClassItem, regenerateOptions) => { handlePositive = buildHandler('intersection'); handleNegative = buildHandler('subtraction'); if (config.transform.unicodeSetsFlag) data.transformed = true; + if (config.isIgnoreCaseMode) { + shouldApplySCF = true; + } break; case 'subtraction': handlePositive = buildHandler('subtraction'); handleNegative = buildHandler('intersection'); if (config.transform.unicodeSetsFlag) data.transformed = true; + if (config.isIgnoreCaseMode) { + shouldApplySCF = true; + } break; // The `default` clause is only here as a safeguard; it should never be // reached. Code coverage tools should ignore it. @@ -444,25 +486,32 @@ const computeCharacterClass = (characterClassItem, regenerateOptions) => { throw new Error(`Unknown character class kind: ${ characterClassItem.kind }`); } - const caseFoldFlags = configGetCaseFoldFlags(); - for (const item of characterClassItem.body) { switch (item.type) { case 'value': - const folded = maybeFold(item.codePoint, caseFoldFlags); - folded.forEach((cp) => { - handlePositive.single(data, cp); - }); - if (folded.length > 1) { + const codePoint = shouldApplySCF ? simpleCaseFolding(item.codePoint) : item.codePoint; + const list = concatCaseEquivalents(codePoint, caseEqFlags); + handlePositive.regSet(data, regenerate(list)); + if (list.length > 1) { data.transformed = true; } break; case 'characterClassRange': const min = item.min.codePoint; const max = item.max.codePoint; - handlePositive.range(data, min, max); - if (caseFoldFlags) { - handlePositive.iuRange(data, min, max, caseFoldFlags); + if (shouldApplySCF) { + let list = []; + for (let cp = min; cp <= max; cp++) { + list.push(simpleCaseFolding(cp)); + } + handlePositive.regSet(data, regenerate(list)); + } else { + handlePositive.range(data, min, max); + } + if (caseEqFlags) { + // If shouldApplySCF is true, it is still ok to call iuRange because + // the set [min, max] shares the same case equivalents with scf([min, max]) + handlePositive.iuRange(data, min, max, caseEqFlags); data.transformed = true; } break; @@ -470,14 +519,16 @@ const computeCharacterClass = (characterClassItem, regenerateOptions) => { handlePositive.regSet(data, getCharacterClassEscapeSet( item.value, config.flags.unicode || config.flags.unicodeSets, - config.flags.ignoreCase + config.flags.ignoreCase, + shouldApplySCF )); break; case 'unicodePropertyEscape': const nestedData = getUnicodePropertyEscapeCharacterClassData( item.value, item.negative, - config.flags.unicodeSets && config.isIgnoreCaseMode + config.flags.unicodeSets && config.isIgnoreCaseMode, + shouldApplySCF ); handlePositive.nested(data, nestedData); data.transformed = @@ -487,12 +538,12 @@ const computeCharacterClass = (characterClassItem, regenerateOptions) => { break; case 'characterClass': const handler = item.negative ? handleNegative : handlePositive; - const res = computeCharacterClass(item, regenerateOptions); + const res = computeCharacterClass(item, regenerateOptions, shouldApplySCF); handler.nested(data, res); data.transformed = true; break; case 'classStrings': - handlePositive.nested(data, computeClassStrings(item, regenerateOptions)); + handlePositive.nested(data, computeClassStrings(item, regenerateOptions, caseEqFlags, shouldApplySCF)); data.transformed = true; break; // The `default` clause is only here as a safeguard; it should never be @@ -584,9 +635,6 @@ const processModifiers = (item, regenerateOptions, groups) => { const enabling = item.modifierFlags.enabling; const disabling = item.modifierFlags.disabling; - delete item.modifierFlags; - item.behavior = 'ignore'; - const oldData = Object.assign({}, config.modifiersData); for (const flag of enabling) { @@ -596,6 +644,11 @@ const processModifiers = (item, regenerateOptions, groups) => { config.modifiersData[flag] = false; } + if (config.transform.modifiers) { + delete item.modifierFlags; + item.behavior = 'ignore'; + } + item.body = item.body.map(term => { return processTerm(term, regenerateOptions, groups); }); @@ -613,7 +666,7 @@ const processTerm = (item, regenerateOptions, groups) => { item, getUnicodeDotSet(config.isDotAllMode).toString(regenerateOptions) ); - } else if ((config.modifiersData.s != null ? config.modifiersData.s : config.transform.dotAllFlag)) { + } else if ((config.modifiersData.s != null ? config.modifiersData.s && config.transform.modifiers : config.transform.dotAllFlag)) { // TODO: consider changing this at the regenerate level. update(item, '[^]'); } @@ -633,7 +686,7 @@ const processTerm = (item, regenerateOptions, groups) => { data.transformed = true; item = processCharacterClass(item, regenerateOptions, data); } - } else if (config.transform.unicodePropertyEscapes || configGetCaseFoldFlags()) { + } else if (config.transform.unicodePropertyEscapes || configGetCaseEqFlags()) { update( item, data.singleChars.toString(regenerateOptions) @@ -684,7 +737,7 @@ const processTerm = (item, regenerateOptions, groups) => { delete groups.unmatchedReferences[name]; } } - if (item.modifierFlags && config.transform.modifiers) { + if (item.modifierFlags) { return processModifiers(item, regenerateOptions, groups); } /* falls through */ @@ -709,14 +762,13 @@ const processTerm = (item, regenerateOptions, groups) => { break; case 'value': const codePoint = item.codePoint; - const set = regenerate(codePoint); - const caseFoldFlags = configGetCaseFoldFlags(); - const folded = maybeFold(codePoint, caseFoldFlags); - if (folded.length === 1 && item.kind === "symbol" && folded[0] >= 0x20 && folded[0] <= 0x7E) { + const caseEqFlags = configGetCaseEqFlags(); + const list = concatCaseEquivalents(codePoint, caseEqFlags); + if (list.length === 1 && item.kind === "symbol" && codePoint >= 0x20 && codePoint <= 0x7E) { // skip regenerate when it is a printable ASCII symbol break; } - set.add(folded); + const set = regenerate(list); update(item, set.toString(regenerateOptions)); break; case 'reference': @@ -756,7 +808,7 @@ const processTerm = (item, regenerateOptions, groups) => { } break; case 'anchor': - if (config.modifiersData.m) { + if (config.modifiersData.m && config.transform.modifiers) { if (item.kind == 'start') { update(item, `(?:^|(?<=${NEWLINE_SET.toString()}))`); } else if (item.kind == 'end') { diff --git a/scripts/case-mappings.js b/scripts/case-mappings.js index 3ce239f..f78607f 100644 --- a/scripts/case-mappings.js +++ b/scripts/case-mappings.js @@ -220,3 +220,4 @@ const iBMPMappings = flattenMapping(filteredBMPMappings); writeMap('data/i-bmp-mappings.js', iBMPMappings); writeMap('data/iu-mappings.js', iuMappings); +writeMap('data/iu-foldings.js', oneWayMappings); diff --git a/scripts/character-class-escape-sets.js b/scripts/character-class-escape-sets.js index 0e8cba1..b304212 100644 --- a/scripts/character-class-escape-sets.js +++ b/scripts/character-class-escape-sets.js @@ -8,14 +8,17 @@ require('./utils/regenerate-plugin-to-code.js'); const Zs = require('@unicode/unicode-16.0.0/General_Category/Space_Separator/code-points.js'); const iuMappings = require('../data/iu-mappings.js'); +const iuFoldings = require('../data/iu-foldings.js'); +const { UNICODE_SET, UNICODE_IV_SET } = require('../data/all-characters.js'); -const caseFold = (codePoint) => { +const simpleCaseFolding = (codePoint) => { + return iuFoldings.get(codePoint) || codePoint; +}; + +const getCaseEquivalents = (codePoint) => { return iuMappings.get(codePoint) || false; }; -// Prepare a Regenerate set containing all code points, used for negative -// character classes (if any). -const UNICODE_SET = regenerate().addRange(0x0, 0x10FFFF); // Without the `u` flag, the range stops at 0xFFFF. // https://mths.be/es#sec-pattern-semantics const BMP_SET = regenerate().addRange(0x0, 0xFFFF); @@ -23,6 +26,7 @@ const BMP_SET = regenerate().addRange(0x0, 0xFFFF); const ESCAPE_CHARS = {}; const ESCAPE_CHARS_UNICODE = {}; const ESCAPE_CHARS_UNICODE_IGNORE_CASE = {}; +const ESCAPE_CHARS_UNICODESET_IGNORE_CASE = {}; const addCharacterClassEscape = (lower, set) => { ESCAPE_CHARS[lower] = ESCAPE_CHARS_UNICODE[lower] = set; const upper = lower.toUpperCase(); @@ -33,24 +37,34 @@ const addCharacterClassEscape = (lower, set) => { // regular expressions that have both the `u` and `i` flags set. const codePoints = set.toArray(); const iuSet = regenerate(); - let containsFoldingSymbols = false; + let containsSimpleCaseFolding = false; for (const codePoint of codePoints) { - let folded = caseFold(codePoint); - if (folded) { - containsFoldingSymbols = true; - iuSet.add(folded); - folded = caseFold(folded); - if (folded) { - iuSet.add(folded); + let caseEquivalents = getCaseEquivalents(codePoint); + if (caseEquivalents) { + containsSimpleCaseFolding = true; + iuSet.add(caseEquivalents); + caseEquivalents = getCaseEquivalents(caseEquivalents); + if (caseEquivalents) { + iuSet.add(caseEquivalents); } } } - const iuLowerSet = containsFoldingSymbols ? + const iuLowerSet = containsSimpleCaseFolding ? iuSet.clone().add(set) : set; const iuUpperSet = UNICODE_SET.clone().remove(iuLowerSet); ESCAPE_CHARS_UNICODE_IGNORE_CASE[lower] = iuLowerSet; ESCAPE_CHARS_UNICODE_IGNORE_CASE[upper] = iuUpperSet; + + ESCAPE_CHARS_UNICODESET_IGNORE_CASE[lower] = regenerate( + iuLowerSet.toArray().map(ch => simpleCaseFolding(ch)) + ); + + ESCAPE_CHARS_UNICODESET_IGNORE_CASE[upper] = { + toCode() { + return 'UNICODE_IV_SET.clone().remove(' + ESCAPE_CHARS_UNICODESET_IGNORE_CASE[lower].toCode() + ')'; + } + } } // Prepare a Regenerate set for every existing character class escape. @@ -94,10 +108,11 @@ const stringify = (name, object) => { const source = [ '// Generated using `npm run build`. Do not edit.\n' + - `'use strict';\n\nconst regenerate = require('regenerate');`, + `'use strict';\n\nconst regenerate = require('regenerate');\nconst UNICODE_IV_SET = require('./all-characters.js').UNICODE_IV_SET`, stringify('REGULAR', ESCAPE_CHARS), stringify('UNICODE', ESCAPE_CHARS_UNICODE), - stringify('UNICODE_IGNORE_CASE', ESCAPE_CHARS_UNICODE_IGNORE_CASE) + stringify('UNICODE_IGNORE_CASE', ESCAPE_CHARS_UNICODE_IGNORE_CASE), + stringify('UNICODESET_IGNORE_CASE', ESCAPE_CHARS_UNICODESET_IGNORE_CASE) ].join('\n\n'); // Save the precompiled sets to a static file. diff --git a/tests/fixtures/modifiers.js b/tests/fixtures/modifiers.js index 7c78479..e7a73a6 100644 --- a/tests/fixtures/modifiers.js +++ b/tests/fixtures/modifiers.js @@ -166,15 +166,30 @@ const modifiersFixtures = [ 'pattern': '(?m:^[a-z])', 'expected': '(?:(?:^|(?<=[\\n\\r\\u2028\\u2029]))[a-z])', }, + { + 'pattern': '(?m:^[a-z])', + 'options': { modifiers: false }, + 'expected': '(?m:^[a-z])', + }, { 'pattern': '(?m:[a-z]$)', 'expected': '(?:[a-z](?:$|(?=[\\n\\r\\u2028\\u2029])))', }, + { + 'pattern': '(?m:[a-z]$)', + 'options': { modifiers: false }, + 'expected': '(?m:[a-z]$)', + }, // +s { 'pattern': '(?s:.)', 'expected': '(?:[^])', }, + { + 'pattern': '(?s:.)', + 'options': { modifiers: false }, + 'expected': '(?s:.)', + }, // -i { 'pattern': '(?-i:a)(a)', @@ -255,6 +270,48 @@ const modifiersFixtures = [ 'expected': '[A-Za-z\\u017F\\u212A](?:a)', 'expectedFlags': 'u' }, + { + 'pattern': '(?i:[[AB]&&B])', + 'options': { unicodeSetsFlag: 'transform', modifiers: 'transform' }, + 'flags': 'v', + 'expected': '(?:[Bb])' + }, + { + 'pattern': '(?i:[[AB]&&B])', + 'options': { modifiers: 'transform' }, + 'flags': 'v', + 'expected': '(?:[Bb])' + }, + { + 'pattern': '(?i:[K&&k])', + 'flags': 'v', + 'expected': '(?:[Kk\\u212A])', + 'expectedFlags': 'v' + }, + { + 'pattern': '(?i:[K--k])', + 'flags': 'v', + 'expected': '(?:[])', + 'expectedFlags': 'v' + }, + { + pattern: '(?i:[\\q{KK}&&\\q{kk}])', + flags: 'v', + expected: '(?:(?:[Kk\\u212A][Kk\\u212A]))', + expectedFlags: 'v', + }, + { + pattern: '(?i:[\\q{KK}--\\q{k\\u212A}])', + flags: 'v', + expected: '(?:[])', + expectedFlags: 'v' + }, + { + pattern: '(?i:[[J-Lj-l]--\\u212A])', + flags: 'v', + expected: '(?:[JLjl])', + expectedFlags: 'v' + }, // -m { 'pattern': '(?-m:^[a-z])(^[a-z])', diff --git a/tests/fixtures/unicode-set.js b/tests/fixtures/unicode-set.js index 8ae6c5c..a6d4a8c 100644 --- a/tests/fixtures/unicode-set.js +++ b/tests/fixtures/unicode-set.js @@ -173,13 +173,15 @@ const unicodeSetFixtures = [ { pattern: '[\\q{sA}asb]', flags: 'iv', - expected: '(?:sA|[abs])' + expected: '(?:sA|[abs])', + expectedFlags: 'iu' }, { pattern: '[\\q{sA}asb]', flags: 'iv', options: TRANSFORM_U, - expected: '(?:[s\\u017F]A|[abs\\u017F])' + expected: '(?:[s\\u017F]A|[abs\\u017F])', + expectedFlags: 'i' }, { pattern: '[[ab\\q{cd}]--a]', @@ -402,6 +404,148 @@ const unicodeSetFixtures = [ flags: 'iv', matches: ['k', 'K', '\u{212A}', '\u{0131}'], nonMatches: ['0', ','] + }, + { + pattern: '[K&&k]', + flags: 'iv', + expected: 'k', + expectedFlags: 'iu' + }, + { + pattern: '[K&&\\u212A]', + flags: 'iv', + expected: 'k', + expectedFlags: 'iu' + }, + { + pattern: '[K--k]', + flags: 'iv', + expected: '[]', + expectedFlags: 'iu' + }, + { + pattern: '[K--\\q{k}]', + flags: 'iv', + expected: '[]', + expectedFlags: 'iu' + }, + { + pattern: '[\\u212A--k]', + flags: 'iv', + expected: '[]', + expectedFlags: 'iu' + }, + { + pattern: '[\\q{\\u212A}--k]', + flags: 'iv', + expected: '[]', + expectedFlags: 'iu' + }, + { + pattern: '[K--\\u212A]', + flags: 'iv', + expected: '[]', + expectedFlags: 'iu' + }, + { + pattern: '[\\q{K}--\\q{\\u212A}]', + flags: 'iv', + expected: '[]', + expectedFlags: 'iu' + }, + { + pattern: '[\\q{KK}&&\\q{kk}]', + flags: 'iv', + expected: '(?:kk)', + expectedFlags: 'iu', + }, + { + pattern: '[\\q{KK}--\\q{k\\u212A}]', + flags: 'iv', + expected: '[]', + expectedFlags: 'iu' + }, + { + pattern: '[\\p{Lu}&&k]', + flags: 'iv', + expected: 'k', + expectedFlags: 'iu' + }, + { + pattern: '[\\p{Lu}--k]', + flags: 'iv', + expectedFlags: 'iu', + nonMatches: ['K', 'k', '\u212A'], + }, + { + pattern: '[[\\p{Lu}]--k]', + flags: 'iv', + expectedFlags: 'iu', + nonMatches: ['K', 'k', '\u212A'], + }, + { + pattern: '[\\w--k]', + flags: 'iv', + expected: '[0-9_a-jl-z]', + expectedFlags: 'iu', + nonMatches: ['K', 'k', '\u212A'], + }, + { + pattern: '[[\\w]--k]', + flags: 'iv', + expected: '[0-9_a-jl-z]', + expectedFlags: 'iu', + nonMatches: ['K', 'k', '\u212A'], + }, + { + pattern: '[\\W--Σ]', + flags: 'iv', + nonMatches: ['Σ', 'σ'], + matches: ['Θ', 'θ'], + expectedFlags: 'iu' + }, + { + pattern: '[[\\W]--Σ]', + flags: 'iv', + nonMatches: ['Σ', 'σ'], + matches: ['Θ', 'θ'], + expectedFlags: 'iu' + }, + { + pattern: '[\\D--Σ]', + flags: 'iv', + nonMatches: ['Σ', 'σ'], + matches: ['Θ', 'θ'], + expectedFlags: 'iu' + }, + { + pattern: '[[\\D]--Σ]', + flags: 'iv', + nonMatches: ['Σ', 'σ'], + matches: ['Θ', 'θ'], + expectedFlags: 'iu' + }, + { + pattern: '[\\S--Σ]', + flags: 'iv', + nonMatches: ['Σ', 'σ'], + matches: ['Θ', 'θ'], + expectedFlags: 'iu' + }, + { + pattern: '[[\\S]--Σ]', + flags: 'iv', + nonMatches: ['Σ', 'σ'], + matches: ['Θ', 'θ'], + expectedFlags: 'iu' + }, + { + pattern: '[[J-Lj-l]--\\u212A]', + flags: 'iv', + expected: '[jl]', + expectedFlags: 'iu', + nonMatches: ['K', 'k', '\u212A'], + matches: ['j', 'J', 'l', 'L'] } ];