Skip to content

Commit

Permalink
fix(marshal)!: compare strings by codepoint
Browse files Browse the repository at this point in the history
  • Loading branch information
erights committed Jan 29, 2024
1 parent 193e403 commit 1312009
Show file tree
Hide file tree
Showing 4 changed files with 116 additions and 2 deletions.
1 change: 1 addition & 0 deletions packages/marshal/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ export {

export {
trivialComparator,
compareByCodePoints,
assertRankSorted,
compareRank,
isRankSorted,
Expand Down
43 changes: 41 additions & 2 deletions packages/marshal/src/rankOrder.js
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,46 @@ const { entries, fromEntries, setPrototypeOf, is } = Object;
*/
const sameValueZero = (x, y) => x === y || is(x, y);

/**
* @param {any} left
* @param {any} right
* @returns {RankComparison}
*/
export const trivialComparator = (left, right) =>
// eslint-disable-next-line no-nested-ternary, @endo/restrict-comparison-operands
left < right ? -1 : left === right ? 0 : 1;
harden(trivialComparator);

// Apparently eslint confused about whether the function can ever exit
// without an explicit return.
// eslint-disable-next-line jsdoc/require-returns-check
/**
* @param {string} left
* @param {string} right
* @returns {RankComparison}
*/
export const compareByCodePoints = (left, right) => {
const leftIter = left[Symbol.iterator]();
const rightIter = right[Symbol.iterator]();
for (;;) {
const { value: leftChar } = leftIter.next();
const { value: rightChar } = rightIter.next();
if (leftChar === undefined && rightChar === undefined) {
return 0;
} else if (leftChar === undefined) {
// left is a prefix of right.
return -1;
} else if (rightChar === undefined) {
// right is a prefix of left.
return 1;
}
const leftCodepoint = /** @type {number} */ (leftChar.codePointAt(0));
const rightCodepoint = /** @type {number} */ (rightChar.codePointAt(0));
if (leftCodepoint < rightCodepoint) return -1;
if (leftCodepoint > rightCodepoint) return 1;
}
};
harden(compareByCodePoints);

/**
* @typedef {Record<PassStyle, { index: number, cover: RankCover }>} PassStyleRanksRecord
Expand Down Expand Up @@ -140,8 +177,7 @@ export const makeComparatorKit = (compareRemotables = (_x, _y) => 0) => {
return 0;
}
case 'boolean':
case 'bigint':
case 'string': {
case 'bigint': {
// Within each of these passStyles, the rank ordering agrees with
// JavaScript's relational operators `<` and `>`.
if (left < right) {
Expand All @@ -151,6 +187,9 @@ export const makeComparatorKit = (compareRemotables = (_x, _y) => 0) => {
return 1;
}
}
case 'string': {
return compareByCodePoints(left, right);
}
case 'symbol': {
return comparator(
nameForPassableSymbol(left),
Expand Down
36 changes: 36 additions & 0 deletions packages/marshal/test/test-string-rank-order.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import { test } from './prepare-test-env-ava.js';

import { compareRank } from '../src/rankOrder.js';

test('unicode code point order', t => {
// Test case from
// https://icu-project.org/docs/papers/utf16_code_point_order.html
const str0 = '\u{ff61}';
const str3 = '\u{d800}\u{dc02}';

// str1 and str2 become impossible examples once we prohibit
// non - well - formed strings.
// See https://github.com/endojs/endo/pull/2002
const str1 = '\u{d800}X';
const str2 = '\u{d800}\u{ff61}';

// harden to ensure it is not sorted in place, just for sanity
const strs = harden([str0, str1, str2, str3]);

/**
* @param {string} left
* @param {string} right
* @returns {import('../src/types.js').RankComparison}
*/
const nativeComp = (left, right) =>
// eslint-disable-next-line no-nested-ternary
left < right ? -1 : left > right ? 1 : 0;

const nativeSorted = strs.toSorted(nativeComp);

t.deepEqual(nativeSorted, [str1, str3, str2, str0]);

const rankSorted = strs.toSorted(compareRank);

t.deepEqual(rankSorted, [str1, str2, str0, str3]);
});
38 changes: 38 additions & 0 deletions packages/patterns/test/test-string-key-order.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// modeled on test-string-rank-order.js

import { test } from './prepare-test-env-ava.js';

import { compareKeys } from '../src/keys/compareKeys.js';

test('unicode code point order', t => {
// Test case from
// https://icu-project.org/docs/papers/utf16_code_point_order.html
const str0 = '\u{ff61}';
const str3 = '\u{d800}\u{dc02}';

// str1 and str2 become impossible examples once we prohibit
// non - well - formed strings.
// See https://github.com/endojs/endo/pull/2002
const str1 = '\u{d800}X';
const str2 = '\u{d800}\u{ff61}';

// harden to ensure it is not sorted in place, just for sanity
const strs = harden([str0, str1, str2, str3]);

/**
* @param {string} left
* @param {string} right
* @returns {import('../src/types.js').KeyComparison}
*/
const nativeComp = (left, right) =>
// eslint-disable-next-line no-nested-ternary
left < right ? -1 : left > right ? 1 : 0;

const nativeSorted = strs.toSorted(nativeComp);

t.deepEqual(nativeSorted, [str1, str3, str2, str0]);

const keySorted = strs.toSorted(compareKeys);

t.deepEqual(keySorted, [str1, str2, str0, str3]);
});

0 comments on commit 1312009

Please sign in to comment.