-
Notifications
You must be signed in to change notification settings - Fork 29.6k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
readline: use icu based string width calculation
Rather than the pseudo-wcwidth impl used currently, use the ICU character properties database to calculate string width and determine if a character is full width or not. This allows the algorithm to correctly identify emoji's as full width, ensures the algorithm will continue to fucntion properly as new unicode codepoints are added, and it's faster. This was originally part of a proposal to add a new unicode module, but has been split out. Refs: #8075 PR-URL: #9040 Reviewed-By: Ben Noordhuis <info@bnoordhuis.nl> Reviewed-By: Steven R Loomis <srloomis@us.ibm.com>
- Loading branch information
Showing
4 changed files
with
228 additions
and
73 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
// Flags: --expose_internals | ||
'use strict'; | ||
|
||
const common = require('../common'); | ||
const assert = require('assert'); | ||
const readline = require('internal/readline'); | ||
|
||
if (!process.binding('config').hasIntl) { | ||
common.skip('missing intl... skipping test'); | ||
return; | ||
} | ||
|
||
// Test column width | ||
assert.strictEqual(readline.getStringWidth('a'), 1); | ||
assert.strictEqual(readline.getStringWidth('δΈ'), 2); | ||
assert.strictEqual(readline.getStringWidth('\ud83d\udc78\ud83c\udfff'), 2); | ||
assert.strictEqual(readline.getStringWidth('π '), 2); | ||
assert.strictEqual(readline.getStringWidth('\n'), 0); | ||
assert.strictEqual(readline.getStringWidth('\u200Ef\u200F'), 1); | ||
assert.strictEqual(readline.getStringWidth(97), 1); | ||
|
||
// The following is an emoji sequence. In some implementations, it is | ||
// represented as a single glyph, in other implementations as a sequence | ||
// of individual glyphs. By default, the algorithm will assume the single | ||
// glyph interpretation and return a value of 2. By passing the | ||
// expandEmojiSequence: true option, each component will be counted | ||
// individually. | ||
assert.strictEqual(readline.getStringWidth('π©βπ©βπ§βπ§'), 2); | ||
assert.strictEqual( | ||
readline.getStringWidth('π©βπ©βπ§βπ§', {expandEmojiSequence: true}), 8); | ||
|
||
// By default, unicode characters whose width is considered ambiguous will | ||
// be considered half-width. For these characters, getStringWidth will return | ||
// 1. In some contexts, however, it is more appropriate to consider them full | ||
// width. By default, the algorithm will assume half width. By passing | ||
// the ambiguousAsFullWidth: true option, ambiguous characters will be counted | ||
// as 2 columns. | ||
assert.strictEqual(readline.getStringWidth('\u01d4'), 1); | ||
assert.strictEqual( | ||
readline.getStringWidth('\u01d4', {ambiguousAsFullWidth: true}), 2); | ||
|
||
// Control chars and combining chars are zero | ||
assert.strictEqual(readline.getStringWidth('\u200E\n\u220A\u20D2'), 1); |