From 2d28b1aebd5dc81e09825ca86fbbead61eec07d1 Mon Sep 17 00:00:00 2001 From: gagdiez Date: Tue, 21 Nov 2023 13:35:44 +0100 Subject: [PATCH] fix: ut8-string encode/decode There was a bug by which we were storing the strings as unicode bytes instead of utf8 bytes. This was a bug since the specification clearly says that the encoding must be utf8. This commit fixes such bug using the TextEncode / TextDecode tools, which are widely supported by modern browsers and node versions. --- borsh-ts/deserialize.ts | 4 +++- borsh-ts/serialize.ts | 14 +++++++------- borsh-ts/test/(de)serialize.test.js | 3 +++ lib/cjs/deserialize.js | 26 +++++++++++++++++++++++++- lib/cjs/serialize.js | 13 ++++++------- lib/esm/deserialize.js | 3 ++- lib/esm/serialize.js | 13 ++++++------- 7 files changed, 52 insertions(+), 24 deletions(-) diff --git a/borsh-ts/deserialize.ts b/borsh-ts/deserialize.ts index 6a247c68..cf87b415 100644 --- a/borsh-ts/deserialize.ts +++ b/borsh-ts/deserialize.ts @@ -1,6 +1,8 @@ import { ArrayType, DecodeTypes, MapType, IntegerType, OptionType, Schema, SetType, StructType, integers, EnumType } from './types.js'; import { DecodeBuffer } from './buffer.js'; +import * as utfUtil from 'util'; + export class BorshDeserializer { buffer: DecodeBuffer; @@ -54,7 +56,7 @@ export class BorshDeserializer { decode_string(): string { const len: number = this.decode_integer('u32') as number; const buffer = new Uint8Array(this.buffer.consume_bytes(len)); - return String.fromCharCode.apply(null, buffer); + return new utfUtil.TextDecoder().decode(buffer); } decode_boolean(): boolean { diff --git a/borsh-ts/serialize.ts b/borsh-ts/serialize.ts index fe7845ac..3b5b2ab8 100644 --- a/borsh-ts/serialize.ts +++ b/borsh-ts/serialize.ts @@ -2,6 +2,8 @@ import { ArrayType, MapType, IntegerType, OptionType, Schema, SetType, StructTyp import { EncodeBuffer } from './buffer.js'; import * as utils from './utils.js'; +import * as utfUtil from 'util'; + export class BorshSerializer { encoded: EncodeBuffer; fieldPath: string[]; @@ -61,15 +63,13 @@ export class BorshSerializer { encode_string(value: unknown): void { this.checkTypes && utils.expect_type(value, 'string', this.fieldPath); - const _value = value as string; - // 4 bytes for length - this.encoded.store_value(_value.length, 'u32'); + // encode to utf8 bytes + const utf8Bytes = new utfUtil.TextEncoder().encode(value as string); - // string bytes - for (let i = 0; i < _value.length; i++) { - this.encoded.store_value(_value.charCodeAt(i), 'u8'); - } + // 4 bytes for length + string bytes + this.encoded.store_value(utf8Bytes.length, 'u32'); + this.encoded.store_bytes(utf8Bytes); } encode_boolean(value: unknown): void { diff --git a/borsh-ts/test/(de)serialize.test.js b/borsh-ts/test/(de)serialize.test.js index 59247260..42d171fa 100644 --- a/borsh-ts/test/(de)serialize.test.js +++ b/borsh-ts/test/(de)serialize.test.js @@ -40,6 +40,9 @@ test('serialize booleans', async () => { test('serialize strings', async () => { check_roundtrip('h"i', 'string', [3, 0, 0, 0, 104, 34, 105]); + check_roundtrip('Chévere', 'string', [8, 0, 0, 0, 67, 104, 195, 169, 118, 101, 114, 101]); + check_roundtrip('👍', 'string', [4, 0, 0, 0, 240, 159, 145, 141]); + check_roundtrip('óñ 漢', 'string', [8, 0, 0, 0, 195, 179, 195, 177, 32, 230, 188, 162]); }); test('serialize floats', async () => { diff --git a/lib/cjs/deserialize.js b/lib/cjs/deserialize.js index cf6c248a..c3f0b297 100644 --- a/lib/cjs/deserialize.js +++ b/lib/cjs/deserialize.js @@ -1,8 +1,32 @@ "use strict"; +var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { + if (k2 === undefined) k2 = k; + var desc = Object.getOwnPropertyDescriptor(m, k); + if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { + desc = { enumerable: true, get: function() { return m[k]; } }; + } + Object.defineProperty(o, k2, desc); +}) : (function(o, m, k, k2) { + if (k2 === undefined) k2 = k; + o[k2] = m[k]; +})); +var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { + Object.defineProperty(o, "default", { enumerable: true, value: v }); +}) : function(o, v) { + o["default"] = v; +}); +var __importStar = (this && this.__importStar) || function (mod) { + if (mod && mod.__esModule) return mod; + var result = {}; + if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k); + __setModuleDefault(result, mod); + return result; +}; exports.__esModule = true; exports.BorshDeserializer = void 0; var types_js_1 = require("./types.js"); var buffer_js_1 = require("./buffer.js"); +var utfUtil = __importStar(require("util")); var BorshDeserializer = /** @class */ (function () { function BorshDeserializer(bufferArray) { this.buffer = new buffer_js_1.DecodeBuffer(bufferArray); @@ -55,7 +79,7 @@ var BorshDeserializer = /** @class */ (function () { BorshDeserializer.prototype.decode_string = function () { var len = this.decode_integer('u32'); var buffer = new Uint8Array(this.buffer.consume_bytes(len)); - return String.fromCharCode.apply(null, buffer); + return new utfUtil.TextDecoder().decode(buffer); }; BorshDeserializer.prototype.decode_boolean = function () { return this.buffer.consume_value('u8') > 0; diff --git a/lib/cjs/serialize.js b/lib/cjs/serialize.js index 2f47e1b8..15a9428d 100644 --- a/lib/cjs/serialize.js +++ b/lib/cjs/serialize.js @@ -27,6 +27,7 @@ exports.BorshSerializer = void 0; var types_js_1 = require("./types.js"); var buffer_js_1 = require("./buffer.js"); var utils = __importStar(require("./utils.js")); +var utfUtil = __importStar(require("util")); var BorshSerializer = /** @class */ (function () { function BorshSerializer(checkTypes) { this.encoded = new buffer_js_1.EncodeBuffer(); @@ -83,13 +84,11 @@ var BorshSerializer = /** @class */ (function () { }; BorshSerializer.prototype.encode_string = function (value) { this.checkTypes && utils.expect_type(value, 'string', this.fieldPath); - var _value = value; - // 4 bytes for length - this.encoded.store_value(_value.length, 'u32'); - // string bytes - for (var i = 0; i < _value.length; i++) { - this.encoded.store_value(_value.charCodeAt(i), 'u8'); - } + // encode to utf8 bytes + var utf8Bytes = new utfUtil.TextEncoder().encode(value); + // 4 bytes for length + string bytes + this.encoded.store_value(utf8Bytes.length, 'u32'); + this.encoded.store_bytes(utf8Bytes); }; BorshSerializer.prototype.encode_boolean = function (value) { this.checkTypes && utils.expect_type(value, 'boolean', this.fieldPath); diff --git a/lib/esm/deserialize.js b/lib/esm/deserialize.js index d1aff9af..818b8cad 100644 --- a/lib/esm/deserialize.js +++ b/lib/esm/deserialize.js @@ -1,5 +1,6 @@ import { integers } from './types.js'; import { DecodeBuffer } from './buffer.js'; +import * as utfUtil from 'util'; var BorshDeserializer = /** @class */ (function () { function BorshDeserializer(bufferArray) { this.buffer = new DecodeBuffer(bufferArray); @@ -52,7 +53,7 @@ var BorshDeserializer = /** @class */ (function () { BorshDeserializer.prototype.decode_string = function () { var len = this.decode_integer('u32'); var buffer = new Uint8Array(this.buffer.consume_bytes(len)); - return String.fromCharCode.apply(null, buffer); + return new utfUtil.TextDecoder().decode(buffer); }; BorshDeserializer.prototype.decode_boolean = function () { return this.buffer.consume_value('u8') > 0; diff --git a/lib/esm/serialize.js b/lib/esm/serialize.js index acfd8aee..c76d7549 100644 --- a/lib/esm/serialize.js +++ b/lib/esm/serialize.js @@ -1,6 +1,7 @@ import { integers } from './types.js'; import { EncodeBuffer } from './buffer.js'; import * as utils from './utils.js'; +import * as utfUtil from 'util'; var BorshSerializer = /** @class */ (function () { function BorshSerializer(checkTypes) { this.encoded = new EncodeBuffer(); @@ -57,13 +58,11 @@ var BorshSerializer = /** @class */ (function () { }; BorshSerializer.prototype.encode_string = function (value) { this.checkTypes && utils.expect_type(value, 'string', this.fieldPath); - var _value = value; - // 4 bytes for length - this.encoded.store_value(_value.length, 'u32'); - // string bytes - for (var i = 0; i < _value.length; i++) { - this.encoded.store_value(_value.charCodeAt(i), 'u8'); - } + // encode to utf8 bytes + var utf8Bytes = new utfUtil.TextEncoder().encode(value); + // 4 bytes for length + string bytes + this.encoded.store_value(utf8Bytes.length, 'u32'); + this.encoded.store_bytes(utf8Bytes); }; BorshSerializer.prototype.encode_boolean = function (value) { this.checkTypes && utils.expect_type(value, 'boolean', this.fieldPath);