Skip to content

Commit

Permalink
Add serialization and deserialization of numerals larger than `Number…
Browse files Browse the repository at this point in the history
….MAX_SAFE_INTEGER`

Signed-off-by: Miki <miki@amazon.com>
  • Loading branch information
AMoo-Miki committed Jul 11, 2023
1 parent 7ad62b1 commit e792722
Show file tree
Hide file tree
Showing 3 changed files with 111 additions and 3 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
## [Unreleased]

### Added
- Add serialization and deserialization of numerals larger than `Number.MAX_SAFE_INTEGER` ([#544](https://github.com/opensearch-project/opensearch-js/pull/544))
### Dependencies
- Bumps `prettier` from 2.8.7 to 2.8.8
- Bumps `ora` from 6.1.2 to 6.3.0
Expand Down Expand Up @@ -146,4 +147,4 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
[2.1.0]: https://github.com/opensearch-project/opensearch-js/releases/tag/2.1.0
[2.2.0]: https://github.com/opensearch-project/opensearch-js/releases/tag/2.2.0
[2.2.1]: https://github.com/opensearch-project/opensearch-js/releases/tag/2.2.1
[Unreleased]: https://github.com/opensearch-project/opensearch-js/compare/2.2.1...HEAD
[Unreleased]: https://github.com/opensearch-project/opensearch-js/compare/2.2.1...HEAD
90 changes: 88 additions & 2 deletions lib/Serializer.js
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,77 @@ const sjson = require('secure-json-parse');
const { SerializationError, DeserializationError } = require('./errors');
const kJsonOptions = Symbol('secure json parse options');

/* In JavaScript, a `Number` is a 64-bit floating-point value which can store 16 digits. However, the
* serializer and deserializer will need to cater to numeric values generated by other languages which
* can have up to 19 digits. Native JSON parser and stringifier, incapable of handling the extra
* digits, corrupt the values, making them unusable.
*
* To work around this limitation, the deserializer converts long sequences of digits into strings and
* marks them before applying the parser. During the parsing, string values that begin with the mark
* are converted to `BigInt` values.
* Similarly, during stringification, the serializer converts `BigInt` values to marked strings and
* when done, it replaces them with plain numerals.
*
* `Number.MAX_SAFE_INTEGER`, 9,007,199,254,740,991, is the largest number that the native methods can
* parse and stringify, and any numeral greater than that would need to be translated using the
* workaround; all 17-digits or longer and only tail-end of the 16-digits need translation. It would
* be unfair to all the 16-digit numbers if the translation applied to `\d{16,}` only to cover the
* less than 10%. Hence, a RegExp is created to only match numerals too long to be a number.
*
* To make the explanation simpler, let's assume that MAX_SAFE_INTEGER is 8921 which has 4 digits.
* Starting from the right, we take each digit onwards, `[<start>-9]`:
* 1) 7922 - 7929: 792[2-9]\d{0}
* 2) 7930 - 7999: 79[3-9]\d{1}
* 9) 9 + 1 = 10 which results in a rollover; no need to do anything.
* 8) 9000 - 9999: [9-9]\d{3}
* Finally we add anything 5 digits or longer: `\d{5,}
*
* PS, a better solution would use AST but considering its performance penalty, RegExp is the next
* best solution.
*/
const maxIntAsString = String(Number.MAX_SAFE_INTEGER);
const maxIntLength = maxIntAsString.length;
// Sub-patterns for each digit
const bigIntMatcherTokens = [`\\d{${maxIntAsString.length + 1},}`];
for (let i = 0; i < maxIntLength; i++) {
if (maxIntAsString[i] !== '9') {
bigIntMatcherTokens.push(
maxIntAsString.substring(0, i) +
`[${parseInt(maxIntAsString[i], 10) + 1}-9]` +
`\\d{${maxIntLength - i - 1}}`
);
}
}

/* The matcher that looks for `": <numerals>, ...}` and `[..., <numeral>, ...]`
*
* The pattern starts by looking for `":` not immediately preceded by a `\`. That should be
* followed by any of the numeric sub-patterns. A comma, end of an array, end of an object, or
* the end of the input are the only acceptable elements after it.
*/
const bigIntMatcher = new RegExp(
`(\\[|,|(?<!\\\\)"\\s*:)\\s*(-?(?:${bigIntMatcherTokens.join('|')}))\\s*(?=,|}|]|$)`,
'g'
);

/* The mark is combination of characters with a highly unlikely chance of occurrence in a legitimate
* string. A more robust solution would dynamically choose the mark by examining the input but the
* performance penalty of doing so is not worth it.
*/
const bigIntMark = `෴෴෴`;
// Convert the numeral to a string and mark it
const bigIntMarker = `$1"${bigIntMark}$2"`;
const bigIntMarkFinder = new RegExp(`^${bigIntMark}-?\\d{${maxIntAsString.length},}$`);
const bigIntMarkLength = bigIntMark.length;

// The matcher that looks for `": "<mark><numerals>", ...}` and `[..., "<mark><numerals>", ...]` in previously marked numerals
const markedBigIntMatcher = new RegExp(
`(\\[|,|(?<!\\\\)"\\s*:)\\s*"${bigIntMark}(-?\\d{${maxIntAsString.length},})"\\s*(?=,|}|]|$)`,
'g'
);
// Drop the quotation marks and the mark
const markedBigIntResolver = '$1$2';

class Serializer {
constructor(opts = {}) {
const disable = opts.disablePrototypePoisoningProtection;
Expand All @@ -48,7 +119,13 @@ class Serializer {
debug('Serializing', object);
let json;
try {
json = JSON.stringify(object);
json = JSON.stringify(
object,
// Convert BigInt values to a string and mark them
(key, val) => (typeof val === 'bigint' ? `${bigIntMark}${val.toString()}` : val)
)
// Replace marked substrings with just the numerals
.replace(markedBigIntMatcher, markedBigIntResolver);
} catch (err) {
throw new SerializationError(err.message, object);
}
Expand All @@ -59,7 +136,16 @@ class Serializer {
debug('Deserializing', json);
let object;
try {
object = sjson.parse(json, this[kJsonOptions]);
object = sjson.parse(
// Convert long numerals to strings and mark them
json.replace(bigIntMatcher, bigIntMarker),
(key, val) =>
// Convert marked values to BigInt values
typeof val === 'string' && val.startsWith(bigIntMark) && bigIntMarkFinder.test(val)
? BigInt(val.substring(bigIntMarkLength)) // eslint-disable-line no-undef
: val,
this[kJsonOptions]
);
} catch (err) {
throw new DeserializationError(err.message, json);
}
Expand Down
21 changes: 21 additions & 0 deletions test/unit/serializer.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,27 @@ test('Basic', (t) => {
t.same(s.deserialize(json), obj);
});

test('Long numerals', (t) => {
t.plan(5);
const s = new Serializer();
const longPositive = BigInt(Number.MAX_SAFE_INTEGER) * 2n; // eslint-disable-line no-undef
const longNegative = BigInt(Number.MIN_SAFE_INTEGER) * 2n; // eslint-disable-line no-undef
const json =
`{` +
`"\\":${longPositive}": "NO-MATCH", ` +
`"positive": ${longPositive.toString()}, ` +
`"array": [ ${longNegative.toString()}, ${longPositive.toString()} ], ` +
`"negative": ${longNegative.toString()}` +
`}`;
const obj = s.deserialize(json);
const res = s.serialize(obj);
t.equal(obj.positive, longPositive);
t.equal(obj.negative, longNegative);
t.same(obj.array, [longNegative, longPositive]);
t.equal(obj['":' + longPositive], 'NO-MATCH');
t.equal(res, json.replace(/\s+/g, ''));
});

test('ndserialize', (t) => {
t.plan(1);
const s = new Serializer();
Expand Down

0 comments on commit e792722

Please sign in to comment.