Skip to content

Commit

Permalink
Add tests for remaining segmenters
Browse files Browse the repository at this point in the history
  • Loading branch information
lionel-rowe committed Sep 18, 2024
1 parent f906c6e commit afba8fe
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 45 deletions.
87 changes: 53 additions & 34 deletions src/Differ.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,23 +8,23 @@ const differ = new Differ()
Deno.test(differ.diff.name, async (t) => {
await t.step('chars', () => {
assertDiffsEqual(
[[-1, 'abc'], [0, 'd'], [1, 'efg']],
differ.diff('abcd', 'defg'),
[[-1, 'abc'], [0, 'd'], [1, 'efg']],
)
})

await t.step('non-BMP', async (t) => {
await t.step('emojis', () => {
assertDiffsEqual(
[[-1, '💫'], [1, '💩']],
differ.diff('💫', '💩'),
[[-1, '💫'], [1, '💩']],
)
})

await t.step('can opt into old code unit behavior', () => {
assertDiffsEqual(
[[0, '\ud83d'], [-1, '\udcab'], [1, '\udca9']],
differ.diffCodeUnits('💫', '💩'),
[[0, '\ud83d'], [-1, '\udcab'], [1, '\udca9']],
)

assertEquals(
Expand All @@ -34,32 +34,54 @@ Deno.test(differ.diff.name, async (t) => {
})
})

await t.step('graphemes', () => {
const before = 'กำ'
const after = 'ก'

assertDiffsEqual(
differ.diff(before, after, { segmenter: segmenters.grapheme }),
[[-1, 'กำ'], [1, 'ก']],
)

// ...compared with default `char` segmenter...
assertDiffsEqual(
differ.diff(before, after),
[[0, 'ก'], [-1, 'ำ']],
)
})

await t.step('sentences', () => {
assertDiffsEqual(
differ.diff(
'This is a sentence. This is another sentence.',
'This is a sentence. This is yet another sentence.',
{ segmenter: segmenters.sentence },
),
[[0, 'This is a sentence. '], [-1, 'This is another sentence.'], [1, 'This is yet another sentence.']],
)
})

await t.step('words', async (t) => {
await t.step('default word segmenter', () => {
assertDiffsEqual(
[[-1, 'Hello'], [1, 'Goodbye'], [0, ', world!']],
differ.diff('Hello, world!', 'Goodbye, world!', { segmenter: segmenters.word }),
[[-1, 'Hello'], [1, 'Goodbye'], [0, ', world!']],
)
})

await t.step('xml', () => {
assertDiffsEqual(
[[0, '<book price="'], [-1, '4.99'], [1, '7.99'], [0, '" />']],
differ.diff('<book price="4.99" />', '<book price="7.99" />', { segmenter: segmenters.word }),
)

assertDiffsEqual(
[[0, '<book price="'], [-1, '4.99'], [1, '7.99'], [0, '" />']],
differ.diff('<book price="4.99" />', '<book price="7.99" />', { segmenter: segmenters.word }),
)
})

await t.step('custom word segmenter', () => {
const segmenter = new Intl.Segmenter('zh-CN', { granularity: 'word' })

assertDiffsEqual(
[[0, '两只'], [-1, '小蜜蜂'], [1, '老虎']],
differ.diff('两只小蜜蜂', '两只老虎', { segmenter }),
[[0, '两只'], [-1, '小蜜蜂'], [1, '老虎']],
)
})
})
Expand All @@ -69,16 +91,15 @@ Deno.test(differ.diff.name, async (t) => {
const segmenter = (str: string) => str.match(/\d+|./gus) ?? []

assertDiffsEqual(
[[-1, 'hell'], [1, 'go'], [0, 'o'], [1, 'dbye'], [0, ' '], [-1, '123'], [1, '135']],
differ.diff('hello 123', 'goodbye 135', { segmenter }),
[[-1, 'hell'], [1, 'go'], [0, 'o'], [1, 'dbye'], [0, ' '], [-1, '123'], [1, '135']],
)
})
})

await t.step('parity with line diff function from docs', () => {
// https://github.com/google/diff-match-patch/wiki/Line-or-Word-Diffs

await t.step('lines (parity with line diff function from docs)', () => {
function diffLineMode(text1: string, text2: string) {
// https://github.com/google/diff-match-patch/wiki/Line-or-Word-Diffs
const dmp = new DiffMatchPatchFull()
const { chars1, chars2, lineArray } = dmp['diff_linesToChars_'](text1, text2)
const diffs = dmp.diff_main(chars1, chars2, false)
Expand All @@ -91,31 +112,29 @@ Deno.test(differ.diff.name, async (t) => {
const str2 = '11\n12\n14\n15'

assertEquals(
diffLineMode(str1, str2),
differ.diff(str1, str2, { segmenter: segmenters.line }),
diffLineMode(str1, str2),
)
})
})

Deno.test(differ.diffWithin.name, async (t) => {
await t.step('chars', () => {
const text1 = `Line One\nLine Two\nLine Three\n`
const text2 = `Line One\nLine 2\nLine Three\nLine Four\nLine Five\n`

const diffs = differ.diff(text1, text2, { segmenter: segmenters.line, join: false })
const diff2d = differ.diffWithin(diffs, { segmenter: segmenters.word })

assertDiffsEqual2d(
diff2d,
[
[[0, 'Line One\n']],
[[0, 'Line '], [-1, 'Two'], [1, '2'], [0, '\n']],
[[0, 'Line Three\n']],
[[1, 'Line Four\n']],
[[1, 'Line Five\n']],
],
)
})
Deno.test(differ.diffWithin.name, () => {
const text1 = `Line One\nLine Two\nLine Three\n`
const text2 = `Line One\nLine 2\nLine Three\nLine Four\nLine Five\n`

const diffs = differ.diff(text1, text2, { segmenter: segmenters.line, join: false })
const diff2d = differ.diffWithin(diffs, { segmenter: segmenters.word })

assertDiffsEqual2d(
diff2d,
[
[[0, 'Line One\n']],
[[0, 'Line '], [-1, 'Two'], [1, '2'], [0, '\n']],
[[0, 'Line Three\n']],
[[1, 'Line Four\n']],
[[1, 'Line Five\n']],
],
)
})

Deno.test('README', () => {
Expand Down
27 changes: 20 additions & 7 deletions src/Differ.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@ import { SegmentCodec, StringIter } from './_SegmentCodec.ts'
* {@linkcode DiffMatchPatch} instance.
*/
export type DiffMatchPatchConfig = {
[K in 'Diff_Timeout' | 'Diff_EditCost' as Uncapitalize<GetK<K>>]: DiffMatchPatch[K]
[K in 'Diff_Timeout' | 'Diff_EditCost' as DiffMatchPatchConfigKey<K>]: DiffMatchPatch[K]
}
type GetK<Type extends string> = Type extends `Diff_${infer U}` ? U : never
type DiffMatchPatchConfigKey<Type extends string> = Type extends `Diff_${infer U}` ? Uncapitalize<U> : never

/**
* Options for methods of the {@linkcode Differ} class.
Expand Down Expand Up @@ -39,14 +39,27 @@ export type DiffOptions = {
checkLines: boolean
}

type Segmenter = SimpleSegmenter | Intl.Segmenter
type SimpleSegmenter = (str: string) => StringIter
type Segmenter = SegmentFunction | Intl.Segmenter
type SegmentFunction = (str: string) => StringIter

/**
* A collection of commonly-used segmenters, suitable for use as the `segmenter` option in the {@linkcode Differ} class.
*/
export const segmenters: Record<'char' | 'line' | 'grapheme' | 'word' | 'sentence', Segmenter> = {
char: (str) => str,
export const segmenters: {
/** Separate by characters (Unicode code points) */
char: Segmenter
/** Separate by lines, i.e. separate on newline `\n` */
line: Segmenter
/** Separate by Unicode grapheme clusters */
grapheme: Segmenter
/** Separate by words */
word: Segmenter
/** Separate by sentences */
sentence: Segmenter
} = {
*char(str) {
yield* str
},
*line(str) {
for (let i = 0, n = 0; i < str.length; i = n + 1) {
n = (str.length + str.indexOf('\n', i)) % str.length
Expand Down Expand Up @@ -182,7 +195,7 @@ export class Differ {
return this.#dmp.diff_main(before, after, checkLines, this.#deadline)
}

#toSegmentFn(segmenter: Segmenter): SimpleSegmenter {
#toSegmentFn(segmenter: Segmenter): SegmentFunction {
if (!(segmenter instanceof Intl.Segmenter)) {
return segmenter
}
Expand Down
8 changes: 4 additions & 4 deletions src/_testUtils.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import { assertEquals } from '@std/assert/equals'
import { DiffLike, makeDiffs } from './utils.ts'

export function assertDiffsEqual(d1: readonly DiffLike[], d2: readonly DiffLike[]) {
assertEquals(makeDiffs(d1), makeDiffs(d2))
export function assertDiffsEqual(actual: readonly DiffLike[], expected: readonly DiffLike[]) {
assertEquals(makeDiffs(actual), makeDiffs(expected))
}

export function assertDiffsEqual2d(d1: readonly DiffLike[][], d2: readonly DiffLike[][]) {
assertEquals(d1.map(makeDiffs), d2.map(makeDiffs))
export function assertDiffsEqual2d(actual: readonly DiffLike[][], expected: readonly DiffLike[][]) {
assertEquals(actual.map(makeDiffs), expected.map(makeDiffs))
}

0 comments on commit afba8fe

Please sign in to comment.