From afba8fe57e736eeff9ed87d4eedb9a911906e8c2 Mon Sep 17 00:00:00 2001 From: lionel-rowe Date: Wed, 18 Sep 2024 15:53:30 +0800 Subject: [PATCH] Add tests for remaining segmenters --- src/Differ.test.ts | 87 ++++++++++++++++++++++++++++------------------ src/Differ.ts | 27 ++++++++++---- src/_testUtils.ts | 8 ++--- 3 files changed, 77 insertions(+), 45 deletions(-) diff --git a/src/Differ.test.ts b/src/Differ.test.ts index 8b0d120..ee03a21 100644 --- a/src/Differ.test.ts +++ b/src/Differ.test.ts @@ -8,23 +8,23 @@ const differ = new Differ() Deno.test(differ.diff.name, async (t) => { await t.step('chars', () => { assertDiffsEqual( - [[-1, 'abc'], [0, 'd'], [1, 'efg']], differ.diff('abcd', 'defg'), + [[-1, 'abc'], [0, 'd'], [1, 'efg']], ) }) await t.step('non-BMP', async (t) => { await t.step('emojis', () => { assertDiffsEqual( - [[-1, 'πŸ’«'], [1, 'πŸ’©']], differ.diff('πŸ’«', 'πŸ’©'), + [[-1, 'πŸ’«'], [1, 'πŸ’©']], ) }) await t.step('can opt into old code unit behavior', () => { assertDiffsEqual( - [[0, '\ud83d'], [-1, '\udcab'], [1, '\udca9']], differ.diffCodeUnits('πŸ’«', 'πŸ’©'), + [[0, '\ud83d'], [-1, '\udcab'], [1, '\udca9']], ) assertEquals( @@ -34,23 +34,45 @@ Deno.test(differ.diff.name, async (t) => { }) }) + await t.step('graphemes', () => { + const before = 'กำ' + const after = 'ก' + + assertDiffsEqual( + differ.diff(before, after, { segmenter: segmenters.grapheme }), + [[-1, 'กำ'], [1, 'ก']], + ) + + // ...compared with default `char` segmenter... + assertDiffsEqual( + differ.diff(before, after), + [[0, 'ก'], [-1, 'ΰΈ³']], + ) + }) + + await t.step('sentences', () => { + assertDiffsEqual( + differ.diff( + 'This is a sentence. This is another sentence.', + 'This is a sentence. This is yet another sentence.', + { segmenter: segmenters.sentence }, + ), + [[0, 'This is a sentence. '], [-1, 'This is another sentence.'], [1, 'This is yet another sentence.']], + ) + }) + await t.step('words', async (t) => { await t.step('default word segmenter', () => { assertDiffsEqual( - [[-1, 'Hello'], [1, 'Goodbye'], [0, ', world!']], differ.diff('Hello, world!', 'Goodbye, world!', { segmenter: segmenters.word }), + [[-1, 'Hello'], [1, 'Goodbye'], [0, ', world!']], ) }) await t.step('xml', () => { assertDiffsEqual( - [[0, '']], differ.diff('', '', { segmenter: segmenters.word }), - ) - - assertDiffsEqual( [[0, '']], - differ.diff('', '', { segmenter: segmenters.word }), ) }) @@ -58,8 +80,8 @@ Deno.test(differ.diff.name, async (t) => { const segmenter = new Intl.Segmenter('zh-CN', { granularity: 'word' }) assertDiffsEqual( - [[0, 'δΈ€εͺ'], [-1, 'ε°θœœθœ‚'], [1, 'θ€θ™Ž']], differ.diff('δΈ€εͺε°θœœθœ‚', 'δΈ€εͺθ€θ™Ž', { segmenter }), + [[0, 'δΈ€εͺ'], [-1, 'ε°θœœθœ‚'], [1, 'θ€θ™Ž']], ) }) }) @@ -69,16 +91,15 @@ Deno.test(differ.diff.name, async (t) => { const segmenter = (str: string) => str.match(/\d+|./gus) ?? [] assertDiffsEqual( - [[-1, 'hell'], [1, 'go'], [0, 'o'], [1, 'dbye'], [0, ' '], [-1, '123'], [1, '135']], differ.diff('hello 123', 'goodbye 135', { segmenter }), + [[-1, 'hell'], [1, 'go'], [0, 'o'], [1, 'dbye'], [0, ' '], [-1, '123'], [1, '135']], ) }) }) - await t.step('parity with line diff function from docs', () => { - // https://github.com/google/diff-match-patch/wiki/Line-or-Word-Diffs - + await t.step('lines (parity with line diff function from docs)', () => { function diffLineMode(text1: string, text2: string) { + // https://github.com/google/diff-match-patch/wiki/Line-or-Word-Diffs const dmp = new DiffMatchPatchFull() const { chars1, chars2, lineArray } = dmp['diff_linesToChars_'](text1, text2) const diffs = dmp.diff_main(chars1, chars2, false) @@ -91,31 +112,29 @@ Deno.test(differ.diff.name, async (t) => { const str2 = '11\n12\n14\n15' assertEquals( - diffLineMode(str1, str2), differ.diff(str1, str2, { segmenter: segmenters.line }), + diffLineMode(str1, str2), ) }) }) -Deno.test(differ.diffWithin.name, async (t) => { - await t.step('chars', () => { - const text1 = `Line One\nLine Two\nLine Three\n` - const text2 = `Line One\nLine 2\nLine Three\nLine Four\nLine Five\n` - - const diffs = differ.diff(text1, text2, { segmenter: segmenters.line, join: false }) - const diff2d = differ.diffWithin(diffs, { segmenter: segmenters.word }) - - assertDiffsEqual2d( - diff2d, - [ - [[0, 'Line One\n']], - [[0, 'Line '], [-1, 'Two'], [1, '2'], [0, '\n']], - [[0, 'Line Three\n']], - [[1, 'Line Four\n']], - [[1, 'Line Five\n']], - ], - ) - }) +Deno.test(differ.diffWithin.name, () => { + const text1 = `Line One\nLine Two\nLine Three\n` + const text2 = `Line One\nLine 2\nLine Three\nLine Four\nLine Five\n` + + const diffs = differ.diff(text1, text2, { segmenter: segmenters.line, join: false }) + const diff2d = differ.diffWithin(diffs, { segmenter: segmenters.word }) + + assertDiffsEqual2d( + diff2d, + [ + [[0, 'Line One\n']], + [[0, 'Line '], [-1, 'Two'], [1, '2'], [0, '\n']], + [[0, 'Line Three\n']], + [[1, 'Line Four\n']], + [[1, 'Line Five\n']], + ], + ) }) Deno.test('README', () => { diff --git a/src/Differ.ts b/src/Differ.ts index be4b60a..ed7e910 100644 --- a/src/Differ.ts +++ b/src/Differ.ts @@ -7,9 +7,9 @@ import { SegmentCodec, StringIter } from './_SegmentCodec.ts' * {@linkcode DiffMatchPatch} instance. */ export type DiffMatchPatchConfig = { - [K in 'Diff_Timeout' | 'Diff_EditCost' as Uncapitalize>]: DiffMatchPatch[K] + [K in 'Diff_Timeout' | 'Diff_EditCost' as DiffMatchPatchConfigKey]: DiffMatchPatch[K] } -type GetK = Type extends `Diff_${infer U}` ? U : never +type DiffMatchPatchConfigKey = Type extends `Diff_${infer U}` ? Uncapitalize : never /** * Options for methods of the {@linkcode Differ} class. @@ -39,14 +39,27 @@ export type DiffOptions = { checkLines: boolean } -type Segmenter = SimpleSegmenter | Intl.Segmenter -type SimpleSegmenter = (str: string) => StringIter +type Segmenter = SegmentFunction | Intl.Segmenter +type SegmentFunction = (str: string) => StringIter /** * A collection of commonly-used segmenters, suitable for use as the `segmenter` option in the {@linkcode Differ} class. */ -export const segmenters: Record<'char' | 'line' | 'grapheme' | 'word' | 'sentence', Segmenter> = { - char: (str) => str, +export const segmenters: { + /** Separate by characters (Unicode code points) */ + char: Segmenter + /** Separate by lines, i.e. separate on newline `\n` */ + line: Segmenter + /** Separate by Unicode grapheme clusters */ + grapheme: Segmenter + /** Separate by words */ + word: Segmenter + /** Separate by sentences */ + sentence: Segmenter +} = { + *char(str) { + yield* str + }, *line(str) { for (let i = 0, n = 0; i < str.length; i = n + 1) { n = (str.length + str.indexOf('\n', i)) % str.length @@ -182,7 +195,7 @@ export class Differ { return this.#dmp.diff_main(before, after, checkLines, this.#deadline) } - #toSegmentFn(segmenter: Segmenter): SimpleSegmenter { + #toSegmentFn(segmenter: Segmenter): SegmentFunction { if (!(segmenter instanceof Intl.Segmenter)) { return segmenter } diff --git a/src/_testUtils.ts b/src/_testUtils.ts index 4bd0892..aba6d55 100644 --- a/src/_testUtils.ts +++ b/src/_testUtils.ts @@ -1,10 +1,10 @@ import { assertEquals } from '@std/assert/equals' import { DiffLike, makeDiffs } from './utils.ts' -export function assertDiffsEqual(d1: readonly DiffLike[], d2: readonly DiffLike[]) { - assertEquals(makeDiffs(d1), makeDiffs(d2)) +export function assertDiffsEqual(actual: readonly DiffLike[], expected: readonly DiffLike[]) { + assertEquals(makeDiffs(actual), makeDiffs(expected)) } -export function assertDiffsEqual2d(d1: readonly DiffLike[][], d2: readonly DiffLike[][]) { - assertEquals(d1.map(makeDiffs), d2.map(makeDiffs)) +export function assertDiffsEqual2d(actual: readonly DiffLike[][], expected: readonly DiffLike[][]) { + assertEquals(actual.map(makeDiffs), expected.map(makeDiffs)) }