From 7d5a58ca232568d72cc1c4c3bba18dc5ee422c41 Mon Sep 17 00:00:00 2001 From: Mohamed Akram Date: Fri, 8 Sep 2023 12:00:30 +0400 Subject: [PATCH] Use RegExp singletons to improve performance (#27) --- lib/hepburn.js | 37 ++++++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/lib/hepburn.js b/lib/hepburn.js index 1b3ae0c..2047fc1 100755 --- a/lib/hepburn.js +++ b/lib/hepburn.js @@ -180,6 +180,10 @@ var nihonShiki = { "DYO": "JO" }; +function keysToRegex(obj) { + return new RegExp(Object.keys(obj).join('|'), "g"); +} + // For use with toHiragana var hiraganaMap = {}; @@ -200,6 +204,8 @@ Object.keys(hiraganaDigraphs).forEach(function(key) { var hiraganaRegex = new RegExp(Object.keys(hiraganaMap).sort(function(a, b) { return b.length - a.length; }).join("|"), "g"); +var hiraganaMonographsRegex = keysToRegex(hiraganaMonographs); +var hiraganaDigraphsRegex = keysToRegex(hiraganaDigraphs); // For use with toKatakana var katakanaMap = {}; @@ -228,17 +234,23 @@ Object.keys(katakanaTrigraphs).forEach(function(key) { var katakanaRegex = new RegExp(Object.keys(katakanaMap).sort(function(a, b) { return b.length - a.length; }).join("|"), "g"); +var katakanaMonographsRegex = keysToRegex(katakanaMonographs); +var katakanaDigraphsRegex = keysToRegex(katakanaDigraphs); +var katakanaHalfwidthsCombinedRegex = keysToRegex(katakanaHalfwidthsCombined); +var katakanaHalfwidthsRegex = keysToRegex(katakanaHalfwidths); + +var nihonShikiRegex = keysToRegex(nihonShiki); // API exports.fromKana = function(str) { // Initial transliteration - str = bulkReplace(str, katakanaHalfwidthsCombined); - str = bulkReplace(str, katakanaHalfwidths); - str = bulkReplace(str, hiraganaDigraphs); - str = bulkReplace(str, katakanaDigraphs); - str = bulkReplace(str, hiraganaMonographs); - str = bulkReplace(str, katakanaMonographs); + str = bulkReplace(str, katakanaHalfwidthsCombinedRegex, katakanaHalfwidthsCombined); + str = bulkReplace(str, katakanaHalfwidthsRegex, katakanaHalfwidths); + str = bulkReplace(str, hiraganaDigraphsRegex, hiraganaDigraphs); + str = bulkReplace(str, katakanaDigraphsRegex, katakanaDigraphs); + str = bulkReplace(str, hiraganaMonographsRegex, hiraganaMonographs); + str = bulkReplace(str, katakanaMonographsRegex, katakanaMonographs); // Correct use of sokuon str = str.replace(/[っッ]C/g, "TC").replace(/[っッ](.)/g, "$1$1"); @@ -312,7 +324,7 @@ exports.cleanRomaji = function(str) { str = str.replace(/OH([^AIEO]|$)/g, "OO$1"); // Replace old Nihon-shiki usage with modern Hepburn form - str = bulkReplace(str, nihonShiki); + str = bulkReplace(str, nihonShikiRegex, nihonShiki); return str; }; @@ -369,14 +381,13 @@ exports.splitRomaji = function(str) { } exports.containsHiragana = function(str) { - return new RegExp(Object.keys(hiraganaMonographs).join('|')).test(str); + hiraganaMonographsRegex.lastIndex = 0; + return hiraganaMonographsRegex.test(str); }; exports.containsKatakana = function(str) { - return ( - new RegExp(Object.keys(katakanaMonographs).join('|')).test(str) || - new RegExp(Object.keys(katakanaHalfwidths).join('|')).test(str) - ); + katakanaMonographsRegex.lastIndex = katakanaHalfwidthsRegex.lastIndex = 0; + return katakanaMonographsRegex.test(str) || katakanaHalfwidthsRegex.test(str); }; exports.containsKana = function(str){ @@ -384,5 +395,5 @@ exports.containsKana = function(str){ }; exports.containsKanji = function(str){ - return new RegExp(/[\u4e00-\u9fcf\uf900-\ufaff\u3400-\u4dbf]/).test(str); + return /[\u4e00-\u9fcf\uf900-\ufaff\u3400-\u4dbf]/.test(str); }