From a8c3e2c47707046e7e422a31d5950ddac78eb1f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Breitbart?= Date: Thu, 24 May 2018 22:25:49 +0200 Subject: [PATCH 1/3] make wcwidth customizable --- src/CharWidth.ts | 98 ++++++++++++++++++++++++++++++++++++++++++--- src/InputHandler.ts | 10 ++++- src/Types.ts | 10 +++++ 3 files changed, 110 insertions(+), 8 deletions(-) diff --git a/src/CharWidth.ts b/src/CharWidth.ts index 90673b2b7e..95d8d97f77 100644 --- a/src/CharWidth.ts +++ b/src/CharWidth.ts @@ -2,9 +2,69 @@ * Copyright (c) 2016 The xterm.js authors. All rights reserved. * @license MIT */ +import { IwcwidthOptions } from './Types'; -export const wcwidth = (function(opts: {nul: number, control: number}): (ucs: number) => number { +export function wcwidthFactory(opts: IwcwidthOptions): (num: number) => number { // extracted from https://www.cl.cam.ac.uk/%7Emgk25/ucs/wcwidth.c + // ambiguous maps taken from https://chromium.googlesource.com/native_client/nacl-newlib/+/master-backup/newlib/libc/string/wcwidth.c + // ambiguous characters + const AMBIGUOUS_BMP = [ + [0x00A1, 0x00A1], [0x00A4, 0x00A4], [0x00A7, 0x00A8], + [0x00AA, 0x00AA], [0x00AE, 0x00AE], [0x00B0, 0x00B4], + [0x00B6, 0x00BA], [0x00BC, 0x00BF], [0x00C6, 0x00C6], + [0x00D0, 0x00D0], [0x00D7, 0x00D8], [0x00DE, 0x00E1], + [0x00E6, 0x00E6], [0x00E8, 0x00EA], [0x00EC, 0x00ED], + [0x00F0, 0x00F0], [0x00F2, 0x00F3], [0x00F7, 0x00FA], + [0x00FC, 0x00FC], [0x00FE, 0x00FE], [0x0101, 0x0101], + [0x0111, 0x0111], [0x0113, 0x0113], [0x011B, 0x011B], + [0x0126, 0x0127], [0x012B, 0x012B], [0x0131, 0x0133], + [0x0138, 0x0138], [0x013F, 0x0142], [0x0144, 0x0144], + [0x0148, 0x014B], [0x014D, 0x014D], [0x0152, 0x0153], + [0x0166, 0x0167], [0x016B, 0x016B], [0x01CE, 0x01CE], + [0x01D0, 0x01D0], [0x01D2, 0x01D2], [0x01D4, 0x01D4], + [0x01D6, 0x01D6], [0x01D8, 0x01D8], [0x01DA, 0x01DA], + [0x01DC, 0x01DC], [0x0251, 0x0251], [0x0261, 0x0261], + [0x02C4, 0x02C4], [0x02C7, 0x02C7], [0x02C9, 0x02CB], + [0x02CD, 0x02CD], [0x02D0, 0x02D0], [0x02D8, 0x02DB], + [0x02DD, 0x02DD], [0x02DF, 0x02DF], [0x0391, 0x03A1], + [0x03A3, 0x03A9], [0x03B1, 0x03C1], [0x03C3, 0x03C9], + [0x0401, 0x0401], [0x0410, 0x044F], [0x0451, 0x0451], + [0x2010, 0x2010], [0x2013, 0x2016], [0x2018, 0x2019], + [0x201C, 0x201D], [0x2020, 0x2022], [0x2024, 0x2027], + [0x2030, 0x2030], [0x2032, 0x2033], [0x2035, 0x2035], + [0x203B, 0x203B], [0x203E, 0x203E], [0x2074, 0x2074], + [0x207F, 0x207F], [0x2081, 0x2084], [0x20AC, 0x20AC], + [0x2103, 0x2103], [0x2105, 0x2105], [0x2109, 0x2109], + [0x2113, 0x2113], [0x2116, 0x2116], [0x2121, 0x2122], + [0x2126, 0x2126], [0x212B, 0x212B], [0x2153, 0x2154], + [0x215B, 0x215E], [0x2160, 0x216B], [0x2170, 0x2179], + [0x2190, 0x2199], [0x21B8, 0x21B9], [0x21D2, 0x21D2], + [0x21D4, 0x21D4], [0x21E7, 0x21E7], [0x2200, 0x2200], + [0x2202, 0x2203], [0x2207, 0x2208], [0x220B, 0x220B], + [0x220F, 0x220F], [0x2211, 0x2211], [0x2215, 0x2215], + [0x221A, 0x221A], [0x221D, 0x2220], [0x2223, 0x2223], + [0x2225, 0x2225], [0x2227, 0x222C], [0x222E, 0x222E], + [0x2234, 0x2237], [0x223C, 0x223D], [0x2248, 0x2248], + [0x224C, 0x224C], [0x2252, 0x2252], [0x2260, 0x2261], + [0x2264, 0x2267], [0x226A, 0x226B], [0x226E, 0x226F], + [0x2282, 0x2283], [0x2286, 0x2287], [0x2295, 0x2295], + [0x2299, 0x2299], [0x22A5, 0x22A5], [0x22BF, 0x22BF], + [0x2312, 0x2312], [0x2460, 0x24E9], [0x24EB, 0x254B], + [0x2550, 0x2573], [0x2580, 0x258F], [0x2592, 0x2595], + [0x25A0, 0x25A1], [0x25A3, 0x25A9], [0x25B2, 0x25B3], + [0x25B6, 0x25B7], [0x25BC, 0x25BD], [0x25C0, 0x25C1], + [0x25C6, 0x25C8], [0x25CB, 0x25CB], [0x25CE, 0x25D1], + [0x25E2, 0x25E5], [0x25EF, 0x25EF], [0x2605, 0x2606], + [0x2609, 0x2609], [0x260E, 0x260F], [0x2614, 0x2615], + [0x261C, 0x261C], [0x261E, 0x261E], [0x2640, 0x2640], + [0x2642, 0x2642], [0x2660, 0x2661], [0x2663, 0x2665], + [0x2667, 0x266A], [0x266C, 0x266D], [0x266F, 0x266F], + [0x273D, 0x273D], [0x2776, 0x277F], [0xE000, 0xF8FF], + [0xFFFD, 0xFFFD] + ]; + const AMBIGUOUS_HIGH = [ + [0xF0000, 0xFFFFD], [0x100000, 0x10FFFD] + ]; // combining characters const COMBINING_BMP = [ [0x0300, 0x036F], [0x0483, 0x0486], [0x0488, 0x0489], @@ -58,6 +118,12 @@ export const wcwidth = (function(opts: {nul: number, control: number}): (ucs: nu [0x1D242, 0x1D244], [0xE0001, 0xE0001], [0xE0020, 0xE007F], [0xE0100, 0xE01EF] ]; + + const nul = opts.nul | 0; + const control = opts.control | 0; + const custom = opts.custom || Object.create(null); + const ambiguous = opts.ambiguous || null; + // binary search function bisearch(ucs: number, data: number[][]): boolean { let min = 0; @@ -81,11 +147,20 @@ export const wcwidth = (function(opts: {nul: number, control: number}): (ucs: nu function wcwidthBMP(ucs: number): number { // test for 8-bit control characters if (ucs === 0) { - return opts.nul; + return nul; } if (ucs < 32 || (ucs >= 0x7f && ucs < 0xa0)) { - return opts.control; + return control; + } + + // binary search for ambiguous characters + // only done if ambiguous is explicitly set + if (ambiguous) { + if (bisearch(ucs, AMBIGUOUS_BMP)) { + return ambiguous; + } } + // binary search in table of non-spacing characters if (bisearch(ucs, COMBINING_BMP)) { return 0; @@ -111,6 +186,18 @@ export const wcwidth = (function(opts: {nul: number, control: number}): (ucs: nu (ucs >= 0xffe0 && ucs <= 0xffe6))); } function wcwidthHigh(ucs: number): 0 | 1 | 2 { + // custom overrrides + if (custom && custom[ucs]) { + return custom[ucs]; + } + + // binary search for ambiguous characters + // only done if ambiguous is explicitly set + if (ambiguous) { + if (bisearch(ucs, AMBIGUOUS_HIGH)) { + return ambiguous; + } + } if (bisearch(ucs, COMBINING_HIGH)) { return 0; } @@ -119,7 +206,6 @@ export const wcwidth = (function(opts: {nul: number, control: number}): (ucs: nu } return 1; } - const control = opts.control | 0; let table: number[] | Uint32Array = null; function init_table(): number[] | Uint32Array { // lookup table for BMP @@ -135,7 +221,7 @@ export const wcwidth = (function(opts: {nul: number, control: number}): (ucs: nu let num = 0; let pos = CODEPOINTS_PER_ITEM; while (pos--) { - num = (num << 2) | wcwidthBMP(CODEPOINTS_PER_ITEM * i + pos); + num = (num << 2) | (custom[CODEPOINTS_PER_ITEM * i + pos] || wcwidthBMP(CODEPOINTS_PER_ITEM * i + pos)); } table[i] = num; } @@ -168,4 +254,4 @@ export const wcwidth = (function(opts: {nul: number, control: number}): (ucs: nu // do a full search for high codepoints return wcwidthHigh(num); }; -})({nul: 0, control: 0}); // configurable options +} diff --git a/src/InputHandler.ts b/src/InputHandler.ts index 7b12b6de13..aed87e8088 100644 --- a/src/InputHandler.ts +++ b/src/InputHandler.ts @@ -9,7 +9,7 @@ import { C0, C1 } from './EscapeSequences'; import { CHARSETS, DEFAULT_CHARSET } from './Charsets'; import { CHAR_DATA_CHAR_INDEX, CHAR_DATA_WIDTH_INDEX, CHAR_DATA_CODE_INDEX } from './Buffer'; import { FLAGS } from './renderer/Types'; -import { wcwidth } from './CharWidth'; +import { wcwidthFactory } from './CharWidth'; import { EscapeSequenceParser } from './EscapeSequenceParser'; /** @@ -112,6 +112,7 @@ class DECRQSS implements IDcsHandler { */ export class InputHandler implements IInputHandler { private _surrogateHigh: string; + private _wcwidth: (ucs: number) => number; constructor( private _terminal: any, // TODO: reestablish IInputHandlingTerminal here @@ -282,6 +283,11 @@ export class InputHandler implements IInputHandler { */ this._parser.setDcsHandler('$q', new DECRQSS(this._terminal)); this._parser.setDcsHandler('+q', new RequestTerminfo(this._terminal)); + + /** + * generate default wcwidth + */ + this._wcwidth = wcwidthFactory({nul: 0, control: 0}); } public parse(data: string): void { @@ -345,7 +351,7 @@ export class InputHandler implements IInputHandler { // calculate print space // expensive call, therefore we save width in line buffer - chWidth = wcwidth(code); + chWidth = this._wcwidth(code); // get charset replacement character // FIXME: Should code be replaced as well? diff --git a/src/Types.ts b/src/Types.ts index 5075daa9bf..aaa2bbd9d5 100644 --- a/src/Types.ts +++ b/src/Types.ts @@ -509,3 +509,13 @@ export interface IEscapeSequenceParser { setErrorHandler(callback: (state: IParsingState) => IParsingState): void; clearErrorHandler(): void; } + +/** + * Configure options for wcwidth + */ +export interface IwcwidthOptions { + nul: 0 | 1 | 2; + control: 0 | 1 | 2; + ambiguous?: 0 | 1 | 2; + custom?: {[key: number]: 0 | 1 | 2}; +} From 837b58a1397a5d1920ef1e3f0689eaa1a4c8f42d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Breitbart?= Date: Thu, 24 May 2018 22:55:44 +0200 Subject: [PATCH 2/3] provide default prebuilt wcwidth --- src/CharWidth.ts | 2 ++ src/InputHandler.ts | 14 +++++++++++--- src/Types.ts | 1 + 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/src/CharWidth.ts b/src/CharWidth.ts index 95d8d97f77..e9a7ee1907 100644 --- a/src/CharWidth.ts +++ b/src/CharWidth.ts @@ -255,3 +255,5 @@ export function wcwidthFactory(opts: IwcwidthOptions): (num: number) => number { return wcwidthHigh(num); }; } + +export const wcwidthDefault = wcwidthFactory({nul: 0, control: 0}); diff --git a/src/InputHandler.ts b/src/InputHandler.ts index aed87e8088..53972986cc 100644 --- a/src/InputHandler.ts +++ b/src/InputHandler.ts @@ -9,7 +9,7 @@ import { C0, C1 } from './EscapeSequences'; import { CHARSETS, DEFAULT_CHARSET } from './Charsets'; import { CHAR_DATA_CHAR_INDEX, CHAR_DATA_WIDTH_INDEX, CHAR_DATA_CODE_INDEX } from './Buffer'; import { FLAGS } from './renderer/Types'; -import { wcwidthFactory } from './CharWidth'; +import { wcwidthFactory, wcwidthDefault } from './CharWidth'; import { EscapeSequenceParser } from './EscapeSequenceParser'; /** @@ -285,9 +285,17 @@ export class InputHandler implements IInputHandler { this._parser.setDcsHandler('+q', new RequestTerminfo(this._terminal)); /** - * generate default wcwidth + * init wcwidth with default version */ - this._wcwidth = wcwidthFactory({nul: 0, control: 0}); + this._wcwidth = wcwidthDefault; + } + + public setWcwidthOptions(opts: {ambiguous?: 0 | 1 | 2, custom?: {[key: number]: 0 | 1 | 2}}): void { + if (opts.ambiguous === undefined && opts.custom === undefined) { + this._wcwidth = wcwidthDefault; + } else { + this._wcwidth = wcwidthFactory({nul: 0, control: 0, ambiguous: opts.ambiguous, custom: opts.custom}); + } } public parse(data: string): void { diff --git a/src/Types.ts b/src/Types.ts index aaa2bbd9d5..e3afebb4da 100644 --- a/src/Types.ts +++ b/src/Types.ts @@ -110,6 +110,7 @@ export interface ICompositionHelper { export interface IInputHandler { parse(data: string): void; print(data: string, start: number, end: number): void; + setWcwidthOptions(opts: {ambiguous?: 0 | 1 | 2, custom?: {[key: number]: 0 | 1 | 2}}): void; /** C0 BEL */ bell(): void; /** C0 LF */ lineFeed(): void; From 1139688db0e60784f3d94261432715fd63c79994 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Breitbart?= Date: Thu, 24 May 2018 23:11:15 +0200 Subject: [PATCH 3/3] simplify custom hooks --- src/CharWidth.ts | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/CharWidth.ts b/src/CharWidth.ts index e9a7ee1907..07c84a28ed 100644 --- a/src/CharWidth.ts +++ b/src/CharWidth.ts @@ -153,6 +153,11 @@ export function wcwidthFactory(opts: IwcwidthOptions): (num: number) => number { return control; } + // custom overrrides + if (custom && custom[ucs]) { + return custom[ucs]; + } + // binary search for ambiguous characters // only done if ambiguous is explicitly set if (ambiguous) { @@ -221,7 +226,7 @@ export function wcwidthFactory(opts: IwcwidthOptions): (num: number) => number { let num = 0; let pos = CODEPOINTS_PER_ITEM; while (pos--) { - num = (num << 2) | (custom[CODEPOINTS_PER_ITEM * i + pos] || wcwidthBMP(CODEPOINTS_PER_ITEM * i + pos)); + num = (num << 2) | wcwidthBMP(CODEPOINTS_PER_ITEM * i + pos); } table[i] = num; }