Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extend support for irregular inflections of suru-verbs #2038

Merged
merged 4 commits into from
Oct 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ app.
- Added handling for 戶 and 內 kyūjitai.
- Added deinflection for additional forms of -sugiru
([#2033](https://github.com/birchill/10ten-ja-reader/pull/2033)).
- Added deinflection for irregular forms of `vs-s` and `vz` class suru verbs
([#2038](https://github.com/birchill/10ten-ja-reader/pull/2038)).
- Removed deinflection of `vs-c` class verbs, as they are not used in modern
Japanese.
- Fixed unreasonable matches caused by duplicates in the deinflection reason chain
([#1966](https://github.com/birchill/10ten-ja-reader/issues/1966)).
- Fixed sorting of deinflected results in some cases (e.g. 見とれる).
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@
<img src="https://raw.githubusercontent.com/birchill/10ten-ja-reader/main/docs/jou-conversion-cropped.png" alt="Screenshot showing translation of 四畳半 into 7.29 square meters" title="Area translation" width="640">

- Recognition of a wide range of grammatical forms
(e.g. vs-c verbs like 兼した,
(e.g. irregular inflections of する-verbs like 罰せられる,
irregular verbs like いらっしゃいます,
continuous forms like 食べてた,
ん as a negative form like 分からん、知らん,
Expand Down
3 changes: 3 additions & 0 deletions _locales/en/messages.json
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,9 @@
"deinflect_imperative_negative": {
"message": "imperative negative"
},
"deinflect_irregular": {
"message": "irregular"
},
"deinflect_ki": {
"message": "-ki"
},
Expand Down
3 changes: 3 additions & 0 deletions _locales/ja/messages.json
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,9 @@
"deinflect_imperative_negative": {
"message": "否定命令形"
},
"deinflect_irregular": {
"message": "不規則"
},
"deinflect_ki": {
"message": "連体形"
},
Expand Down
3 changes: 3 additions & 0 deletions _locales/zh_hans/messages.json
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,9 @@
"deinflect_imperative_negative": {
"message": "否定命令形"
},
"deinflect_irregular": {
"message": "不规则"
},
"deinflect_ki": {
"message": "连体形"
},
Expand Down
68 changes: 58 additions & 10 deletions src/background/deinflect.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -128,14 +128,62 @@ describe('deinflect', () => {
});
});

it('deinflects vs-c', () => {
const result = deinflect('兼した');
const match = result.find((candidate) => candidate.word === '兼す');
expect(match).toEqual({
reasonChains: [[Reason.Past]],
type: 18,
word: '兼す',
});
it('deinflects all forms of する', () => {
const cases = [
['した', [Reason.Past]],
['しよう', [Reason.Volitional]],
['しない', [Reason.Negative]],
['せぬ', [Reason.Negative]],
['せん', [Reason.Negative]],
['せず', [Reason.Zu]],
['される', [Reason.Passive]],
['させる', [Reason.Causative]],
['しろ', [Reason.Imperative]],
['せよ', [Reason.Imperative]],
['すれば', [Reason.Ba]],
['できる', [Reason.Potential]],
];

for (const [inflected, reasons] of cases) {
const result = deinflect(inflected as string);
const match = result.find(
(candidate) =>
candidate.word == 'する' && candidate.type & WordType.SuruVerb
);
expect(match).toBeDefined();
expect(match!.reasonChains).toEqual([reasons]);
}
});

it('deinflects additional forms of special class suru-verbs', () => {
const cases = [
['発する', '発せさせる', [Reason.Irregular, Reason.Causative]],
['発する', '発せられる', [Reason.Irregular, Reason.PotentialOrPassive]],
['発する', '発しさせる', [Reason.Irregular, Reason.Causative]],
['発する', '発しられる', [Reason.Irregular, Reason.PotentialOrPassive]],
// 五段化
['発する', '発さない', [Reason.Irregular, Reason.Negative]],
['発する', '発さず', [Reason.Irregular, Reason.Zu]],
['発する', '発そう', [Reason.Irregular, Reason.Volitional]],
['愛する', '愛せる', [Reason.Irregular, Reason.Potential]],
['愛する', '愛せば', [Reason.Irregular, Reason.Ba]],
['愛する', '愛せ', [Reason.Irregular, Reason.Imperative]],
// ずる / vz class verbs
['信ずる', '信ぜぬ', [Reason.Irregular, Reason.Negative]],
['信ずる', '信ぜず', [Reason.Irregular, Reason.Zu]],
['信ずる', '信ぜさせる', [Reason.Irregular, Reason.Causative]],
['信ずる', '信ぜられる', [Reason.Irregular, Reason.PotentialOrPassive]],
['信ずる', '信ずれば', [Reason.Irregular, Reason.Ba]],
['信ずる', '信ぜよ', [Reason.Irregular, Reason.Imperative]],
];

for (const [plain, inflected, reasons] of cases) {
const result = deinflect(inflected as string);
const match = result.find((candidate) => candidate.word == plain);
expect(match).toBeDefined();
expect(match!.type).toEqual(WordType.SpecialSuruVerb);
expect(match!.reasonChains).toEqual([reasons]);
}
});

it('deinflects irregular forms of 行く', () => {
Expand Down Expand Up @@ -310,8 +358,8 @@ describe('deinflect', () => {
['歩いてる', '歩く', 2, undefined],
['泳いでいる', '泳ぐ', 2, undefined],
['泳いでる', '泳ぐ', 2, undefined],
['話している', '話す', 18, undefined],
['話してる', '話す', 18, undefined],
['話している', '話す', 2, undefined],
['話してる', '話す', 2, undefined],
['死んでいる', '死ぬ', 2, undefined],
['死んでる', '死ぬ', 2, undefined],
['飼っている', '飼う', 2, undefined],
Expand Down
64 changes: 48 additions & 16 deletions src/background/deinflect.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ export const enum Reason {
SuruNoun,
ZaruWoEnai,
NegativeTe,
Irregular,
}

export const deinflectL10NKeys: { [key: number]: string } = {
Expand Down Expand Up @@ -76,6 +77,7 @@ export const deinflectL10NKeys: { [key: number]: string } = {
[Reason.SuruNoun]: 'deinflect_suru_noun',
[Reason.ZaruWoEnai]: 'deinflect_zaru_wo_enai',
[Reason.NegativeTe]: 'deinflect_negative_te',
[Reason.Irregular]: 'deinflect_irregular',
};

const enum Type {
Expand All @@ -85,14 +87,21 @@ const enum Type {
IAdj = 1 << 2,
KuruVerb = 1 << 3,
SuruVerb = 1 << 4,
NounVS = 1 << 5,
All = IchidanVerb | GodanVerb | IAdj | KuruVerb | SuruVerb | NounVS,
SpecialSuruVerb = 1 << 5,
NounVS = 1 << 6,
All = IchidanVerb |
GodanVerb |
IAdj |
KuruVerb |
SuruVerb |
SpecialSuruVerb |
NounVS,
// Intermediate types
Initial = 1 << 6, // original word before any deinflection (from-type only)
TaTeStem = 1 << 7,
DaDeStem = 1 << 8,
MasuStem = 1 << 9,
IrrealisStem = 1 << 10,
Initial = 1 << 7, // original word before any deinflection (from-type only)
TaTeStem = 1 << 8,
DaDeStem = 1 << 9,
MasuStem = 1 << 10,
IrrealisStem = 1 << 11,
}

export { Type as WordType };
Expand Down Expand Up @@ -169,6 +178,12 @@ const deinflectRuleData: Array<
['ざるえぬ', '', Type.IAdj, Type.IrrealisStem, [Reason.ZaruWoEnai]],
['ざる得ぬ', '', Type.IAdj, Type.IrrealisStem, [Reason.ZaruWoEnai]],
['しないで', 'する', Type.Initial, Type.SuruVerb, [Reason.NegativeTe]],
['しさせる', 'する', Type.IchidanVerb, Type.SpecialSuruVerb, [Reason.Irregular, Reason.Causative]],
['しられる', 'する', Type.IchidanVerb, Type.SpecialSuruVerb, [Reason.Irregular, Reason.PotentialOrPassive]],
['せさせる', 'する', Type.IchidanVerb, Type.SpecialSuruVerb, [Reason.Irregular, Reason.Causative]],
['せられる', 'する', Type.IchidanVerb, Type.SpecialSuruVerb, [Reason.Irregular, Reason.PotentialOrPassive]],
['ぜさせる', 'ずる', Type.IchidanVerb, Type.SpecialSuruVerb, [Reason.Irregular, Reason.Causative]],
['ぜられる', 'ずる', Type.IchidanVerb, Type.SpecialSuruVerb, [Reason.Irregular, Reason.PotentialOrPassive]],
['たゆたう', 'たゆたう', Type.TaTeStem, Type.GodanVerb, []],
['たゆとう', 'たゆとう', Type.TaTeStem, Type.GodanVerb, []],
['のたまう', 'のたまう', Type.TaTeStem, Type.GodanVerb, []],
Expand All @@ -193,6 +208,7 @@ const deinflectRuleData: Array<
['御座い', '御座る', Type.MasuStem, Type.GodanVerb, [Reason.MasuStem]],
['させる', 'る', Type.IchidanVerb, Type.IchidanVerb | Type.KuruVerb, [Reason.Causative]],
['させる', 'する', Type.IchidanVerb, Type.SuruVerb, [Reason.Causative]],
['さない', 'する', Type.IAdj, Type.SpecialSuruVerb, [Reason.Irregular, Reason.Negative]],
['される', '', Type.IchidanVerb, Type.IrrealisStem, [Reason.CausativePassive]],
['される', 'する', Type.IchidanVerb, Type.SuruVerb, [Reason.Passive]],
['しない', 'する', Type.IAdj, Type.SuruVerb, [Reason.Negative]],
Expand All @@ -202,6 +218,7 @@ const deinflectRuleData: Array<
['すぎる', '', Type.IchidanVerb, Type.MasuStem, [Reason.Sugiru]],
['過ぎる', 'い', Type.IchidanVerb, Type.IAdj, [Reason.Sugiru]],
['過ぎる', '', Type.IchidanVerb, Type.MasuStem, [Reason.Sugiru]],
['ずれば', 'ずる', Type.Initial, Type.SpecialSuruVerb, [Reason.Irregular, Reason.Ba]],
['たまう', 'たまう', Type.TaTeStem, Type.GodanVerb, []],
['たもう', 'たもう', Type.TaTeStem, Type.GodanVerb, []],
['揺蕩う', '揺蕩う', Type.TaTeStem, Type.GodanVerb, []],
Expand Down Expand Up @@ -239,8 +256,8 @@ const deinflectRuleData: Array<
['こい', 'くる', Type.Initial, Type.KuruVerb, [Reason.Imperative]],
['こう', 'く', Type.Initial, Type.GodanVerb, [Reason.Volitional]],
['ごう', 'ぐ', Type.Initial, Type.GodanVerb, [Reason.Volitional]],
['しろ', 'す', Type.Initial, Type.SuruVerb, [Reason.Imperative]],
['しろ', 'する', Type.Initial, Type.SuruVerb, [Reason.Imperative]],
['さず', 'する', Type.Initial, Type.SpecialSuruVerb, [Reason.Irregular, Reason.Zu]],
['すぎ', 'い', Type.Initial, Type.IAdj, [Reason.Sugiru]],
['すぎ', '', Type.Initial, Type.MasuStem, [Reason.Sugiru]],
['過ぎ', 'い', Type.Initial, Type.IAdj, [Reason.Sugiru]],
Expand All @@ -249,17 +266,19 @@ const deinflectRuleData: Array<
['せず', 'する', Type.Initial, Type.SuruVerb, [Reason.Zu]],
['せぬ', 'する', Type.Initial, Type.SuruVerb, [Reason.Negative]],
['せん', 'する', Type.Initial, Type.SuruVerb, [Reason.Negative]],
['せず', 'す', Type.Initial, Type.SuruVerb, [Reason.Zu]],
['せぬ', 'す', Type.Initial, Type.SuruVerb, [Reason.Negative]],
['せん', 'す', Type.Initial, Type.SuruVerb, [Reason.Negative]],
['せば', 'す', Type.Initial, Type.GodanVerb | Type.SuruVerb, [Reason.Ba]],
['せば', 'す', Type.Initial, Type.GodanVerb, [Reason.Ba]],
['せば', 'する', Type.Initial, Type.SpecialSuruVerb, [Reason.Irregular, Reason.Ba]],
['せる', 'する', Type.IchidanVerb, Type.SpecialSuruVerb, [Reason.Irregular, Reason.Potential]],
['せよ', 'する', Type.Initial, Type.SuruVerb, [Reason.Imperative]],
['せよ', 'す', Type.Initial, Type.SuruVerb, [Reason.Imperative]],
['せる', 'す', Type.IchidanVerb, Type.GodanVerb, [Reason.Potential]],
['せる', '', Type.IchidanVerb, Type.IrrealisStem, [Reason.Causative]],
['ぜず', 'ずる', Type.Initial, Type.SpecialSuruVerb, [Reason.Irregular, Reason.Zu]],
['ぜぬ', 'ずる', Type.Initial, Type.SpecialSuruVerb, [Reason.Irregular, Reason.Negative]],
['ぜよ', 'ずる', Type.Initial, Type.SpecialSuruVerb, [Reason.Irregular, Reason.Imperative]],
['そう', '', Type.Initial, Type.MasuStem, [Reason.Sou]],
['そう', 'い', Type.Initial, Type.IAdj, [Reason.Sou]],
['そう', 'す', Type.Initial, Type.GodanVerb, [Reason.Volitional]],
['そう', 'する', Type.Initial, Type.SpecialSuruVerb, [Reason.Irregular, Reason.Volitional]],
['たい', '', Type.IAdj, Type.MasuStem, [Reason.Tai]],
['たら', '', Type.Initial, Type.TaTeStem, [Reason.Tara]],
['だら', '', Type.Initial, Type.DaDeStem, [Reason.Tara]],
Expand Down Expand Up @@ -334,10 +353,11 @@ const deinflectRuleData: Array<
['さ', 'す', Type.IrrealisStem, Type.GodanVerb, []],
['し', 'す', Type.MasuStem, Type.GodanVerb, [Reason.MasuStem]],
['し', 'する', Type.MasuStem, Type.SuruVerb, [Reason.MasuStem]],
['し', 'す', Type.TaTeStem, Type.GodanVerb | Type.SuruVerb, []],
['し', 'す', Type.TaTeStem, Type.GodanVerb, []],
['し', 'する', Type.TaTeStem, Type.SuruVerb, []],
['ず', '', Type.Initial, Type.IrrealisStem, [Reason.Zu]],
['せ', 'す', Type.Initial, Type.GodanVerb, [Reason.Imperative]],
['せ', 'する', Type.Initial, Type.SpecialSuruVerb, [Reason.Irregular, Reason.Imperative]],
['た', 'つ', Type.IrrealisStem, Type.GodanVerb, []],
['た', '', Type.Initial, Type.TaTeStem, [Reason.Past]],
['だ', '', Type.Initial, Type.DaDeStem, [Reason.Past]],
Expand Down Expand Up @@ -514,9 +534,21 @@ export function deinflect(word: string): CandidateWord[] {
continue;
}

// Continue if the rule introduces a duplicate in the reason chain,
// as it wouldn't make sense grammatically.
// Verify that adding the rule won't lead to duplicates or an unreasonable
// sequencing of reasons in the reason chain.
const ruleReasons = new Set(rule.reasons);

// Avoid matches such as 'potential < potential or passive' or
// 'potential < causative', which are grammatically incorrect.
// This is an issue when handling irregular forms like 罰せられる,
// where the structure could be misinterpreted as
// 'irregular < potential < potential or passive' instead of the correct
// 'irregular < potential or passive'.
if (ruleReasons.has(Reason.Potential)) {
ruleReasons.add(Reason.PotentialOrPassive);
ruleReasons.add(Reason.Causative);
}

if (thisCandidate.reasonChains.flat().some((r) => ruleReasons.has(r))) {
continue;
}
Expand Down
9 changes: 8 additions & 1 deletion src/background/word-search.ts
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,14 @@ function entryMatchesType(entry: DictionaryWordResult, type: number): boolean {

if (
type & WordType.SuruVerb &&
hasMatchingSense((pos) => pos.startsWith('vs-'))
hasMatchingSense((pos) => pos === 'vs-i' || pos === 'vs-s')
) {
return true;
}

if (
type & WordType.SpecialSuruVerb &&
hasMatchingSense((pos) => pos === 'vs-s' || pos === 'vz')
) {
return true;
}
Expand Down