Skip to content
This repository has been archived by the owner on Feb 25, 2023. It is now read-only.

Fix incorrect furigana distribution #1514

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 65 additions & 16 deletions ext/js/language/japanese-util.js
Original file line number Diff line number Diff line change
Expand Up @@ -466,27 +466,55 @@ const JapaneseUtil = (() => {
}

distributeFuriganaInflected(expression, reading, source) {
let stemLength = 0;
const shortest = Math.min(source.length, expression.length);
const sourceHiragana = this.convertKatakanaToHiragana(source);
const expressionHiragana = this.convertKatakanaToHiragana(expression);
while (stemLength < shortest && sourceHiragana[stemLength] === expressionHiragana[stemLength]) {
++stemLength;
const expressionNormalized = this.convertKatakanaToHiragana(expression);
const readingNormalized = this.convertKatakanaToHiragana(reading);
const sourceNormalized = this.convertKatakanaToHiragana(source);

let mainText = expression;
let stemLength = this._getStemLength(expressionNormalized, sourceNormalized);

// Check if source is derived from the reading instead of the expression
const readingStemLength = this._getStemLength(readingNormalized, sourceNormalized);
if (readingStemLength > stemLength) {
mainText = reading;
stemLength = readingStemLength;
}
const offset = source.length - stemLength;

const stemExpression = source.substring(0, source.length - offset);
const stemReading = reading.substring(
0,
offset === 0 ? reading.length : reading.length - expression.length + stemLength
);
const result = this.distributeFurigana(stemExpression, stemReading);
const segments = [];
if (stemLength > 0) {
const segments2 = this.distributeFurigana(mainText, reading);
let consumed = 0;
for (const segment of segments2) {
const {text} = segment;
const start = consumed;
consumed += text.length;
if (consumed < stemLength) {
segments.push(segment);
} else if (consumed === stemLength) {
segments.push(segment);
break;
} else {
if (start < stemLength) {
segments.push(this._createFuriganaSegment(mainText.substring(start, stemLength), ''));
}
break;
}
}
}

if (stemLength !== source.length) {
result.push(this._createFuriganaSegment(source.substring(stemLength), ''));
if (stemLength < source.length) {
const remainder = source.substring(stemLength);
const segmentCount = segments.length;
if (segmentCount > 0 && segments[segmentCount - 1].furigana.length === 0) {
// Append to the last segment if it has an empty reading
segments[segmentCount - 1].text += remainder;
} else {
// Otherwise, create a new segment
segments.push(this._createFuriganaSegment(remainder, ''));
}
}

return result;
return segments;
}

// Miscellaneous
Expand Down Expand Up @@ -648,6 +676,27 @@ const JapaneseUtil = (() => {

return result;
}

_getStemLength(text1, text2) {
const minLength = Math.min(text1.length, text2.length);
if (minLength === 0) { return 0; }

let i = 0;
while (true) {
const char1 = text1.codePointAt(i);
const char2 = text2.codePointAt(i);
if (char1 !== char2) { break; }
const charLength = String.fromCodePoint(char1).length;
i += charLength;
if (i >= minLength) {
if (i > minLength) {
i -= charLength; // Don't consume partial UTF16 surrogate characters
}
break;
}
}
return i;
}
}


Expand Down
18 changes: 14 additions & 4 deletions test/test-japanese.js
Original file line number Diff line number Diff line change
Expand Up @@ -729,16 +729,26 @@ function testDistributeFuriganaInflected() {
['美味しい', 'おいしい', '美味しかた'],
[
{text: '美味', furigana: 'おい'},
{text: 'し', furigana: ''},
{text: 'かた', furigana: ''}
{text: 'しかた', furigana: ''}
]
],
[
['食べる', 'たべる', '食べた'],
[
{text: '食', furigana: 'た'},
{text: 'べ', furigana: ''},
{text: 'た', furigana: ''}
{text: 'べた', furigana: ''}
]
],
[
['迄に', 'までに', 'までに'],
[
{text: 'までに', furigana: ''}
]
],
[
['行う', 'おこなう', 'おこなわなかった'],
[
{text: 'おこなわなかった', furigana: ''}
]
]
];
Expand Down