Skip to content

Commit

Permalink
Use RegExp singletons to improve performance (#27)
Browse files Browse the repository at this point in the history
  • Loading branch information
mohd-akram authored Sep 8, 2023
1 parent c39b4e5 commit 7d5a58c
Showing 1 changed file with 24 additions and 13 deletions.
37 changes: 24 additions & 13 deletions lib/hepburn.js
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,10 @@ var nihonShiki = {
"DYO": "JO"
};

function keysToRegex(obj) {
return new RegExp(Object.keys(obj).join('|'), "g");
}

// For use with toHiragana
var hiraganaMap = {};

Expand All @@ -200,6 +204,8 @@ Object.keys(hiraganaDigraphs).forEach(function(key) {
var hiraganaRegex = new RegExp(Object.keys(hiraganaMap).sort(function(a, b) {
return b.length - a.length;
}).join("|"), "g");
var hiraganaMonographsRegex = keysToRegex(hiraganaMonographs);
var hiraganaDigraphsRegex = keysToRegex(hiraganaDigraphs);

// For use with toKatakana
var katakanaMap = {};
Expand Down Expand Up @@ -228,17 +234,23 @@ Object.keys(katakanaTrigraphs).forEach(function(key) {
var katakanaRegex = new RegExp(Object.keys(katakanaMap).sort(function(a, b) {
return b.length - a.length;
}).join("|"), "g");
var katakanaMonographsRegex = keysToRegex(katakanaMonographs);
var katakanaDigraphsRegex = keysToRegex(katakanaDigraphs);
var katakanaHalfwidthsCombinedRegex = keysToRegex(katakanaHalfwidthsCombined);
var katakanaHalfwidthsRegex = keysToRegex(katakanaHalfwidths);

var nihonShikiRegex = keysToRegex(nihonShiki);

// API

exports.fromKana = function(str) {
// Initial transliteration
str = bulkReplace(str, katakanaHalfwidthsCombined);
str = bulkReplace(str, katakanaHalfwidths);
str = bulkReplace(str, hiraganaDigraphs);
str = bulkReplace(str, katakanaDigraphs);
str = bulkReplace(str, hiraganaMonographs);
str = bulkReplace(str, katakanaMonographs);
str = bulkReplace(str, katakanaHalfwidthsCombinedRegex, katakanaHalfwidthsCombined);
str = bulkReplace(str, katakanaHalfwidthsRegex, katakanaHalfwidths);
str = bulkReplace(str, hiraganaDigraphsRegex, hiraganaDigraphs);
str = bulkReplace(str, katakanaDigraphsRegex, katakanaDigraphs);
str = bulkReplace(str, hiraganaMonographsRegex, hiraganaMonographs);
str = bulkReplace(str, katakanaMonographsRegex, katakanaMonographs);

// Correct use of sokuon
str = str.replace(/[っッ]C/g, "TC").replace(/[っッ](.)/g, "$1$1");
Expand Down Expand Up @@ -312,7 +324,7 @@ exports.cleanRomaji = function(str) {
str = str.replace(/OH([^AIEO]|$)/g, "OO$1");

// Replace old Nihon-shiki usage with modern Hepburn form
str = bulkReplace(str, nihonShiki);
str = bulkReplace(str, nihonShikiRegex, nihonShiki);

return str;
};
Expand Down Expand Up @@ -369,20 +381,19 @@ exports.splitRomaji = function(str) {
}

exports.containsHiragana = function(str) {
return new RegExp(Object.keys(hiraganaMonographs).join('|')).test(str);
hiraganaMonographsRegex.lastIndex = 0;
return hiraganaMonographsRegex.test(str);
};

exports.containsKatakana = function(str) {
return (
new RegExp(Object.keys(katakanaMonographs).join('|')).test(str) ||
new RegExp(Object.keys(katakanaHalfwidths).join('|')).test(str)
);
katakanaMonographsRegex.lastIndex = katakanaHalfwidthsRegex.lastIndex = 0;
return katakanaMonographsRegex.test(str) || katakanaHalfwidthsRegex.test(str);
};

exports.containsKana = function(str){
return (exports.containsHiragana(str) || exports.containsKatakana(str));
};

exports.containsKanji = function(str){
return new RegExp(/[\u4e00-\u9fcf\uf900-\ufaff\u3400-\u4dbf]/).test(str);
return /[\u4e00-\u9fcf\uf900-\ufaff\u3400-\u4dbf]/.test(str);
}

0 comments on commit 7d5a58c

Please sign in to comment.