Make basic changes to fix inflection parsing

radomirbosak · Feb 25, 2024 · 635b4ec · 635b4ec
1 parent 3856976
commit 635b4ec
Show file tree

Hide file tree

Showing 27 changed files with 733 additions and 273 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,11 @@
 
 ## Unreleased
 
+Breaking:
+
+* Rework and fix (grammatic) inflection parsing
+  * Replace the `.grammar` and `.grammar_raw` word properties with `.inflection`, `--grammar` with `--inflect`
+
 New features:
 
 * Add `pronunciation_audio_url` property #183 by @mundanevision20

diff --git a/README.md b/README.md
@@ -35,18 +35,16 @@ Ohr; [Ge]hörorgan; (salopp) Horcher, Horchlappen, Lauscher; (Jägersprache) Los
 
 ```console
 $ duden --help
-usage: duden [-h] [--title] [--name] [--article] [--part-of-speech]
-             [--frequency] [--usage] [--word-separation]
-             [--meaning-overview] [--synonyms] [--origin]
-             [--compounds [COMPOUNDS]] [-g [GRAMMAR]] [--export]
-             [--words-before] [--words-after] [-r RESULT] [--fuzzy]
+usage: duden [-h] [--title] [--name] [--article] [--part-of-speech] [--frequency] [--usage]
+             [--word-separation] [--meaning-overview] [--synonyms] [--origin] [--grammar-overview]
+             [--compounds [COMPOUNDS]] [-i] [--export] [--words-before] [--words-after] [-r RESULT] [--fuzzy]
              [--no-cache] [-V] [--phonetic] [--alternative-spellings]
              word
 
 positional arguments:
   word
 
-optional arguments:
+options:
   -h, --help            show this help message and exit
   --title               display word and article
   --name                display the word itself
@@ -58,10 +56,10 @@ optional arguments:
   --meaning-overview    display meaning overview
   --synonyms            list synonyms (line separated)
   --origin              display origin
+  --grammar-overview    display short grammar overview
   --compounds [COMPOUNDS]
                         list common compounds
-  -g [GRAMMAR], --grammar [GRAMMAR]
-                        list grammar forms
+  -i, --inflect         display inflections
   --export              export parsed word attributes in yaml format
   --words-before        list 5 words before this one
   --words-after         list 5 words after this one

diff --git a/completions/duden b/completions/duden
@@ -17,7 +17,7 @@ _duden () {
         --origin
         --grammar-overview
         --compounds
-        -g --grammar
+        -i --inflect
         -r --result
         --fuzzy
         --version

diff --git a/completions/duden.fish b/completions/duden.fish
@@ -1 +1 @@
-complete -c duden -xa "-h --help --title --name --article --part-of-speech --frequency --usage --word-separation --meaning-overview --synonyms --origin --grammar-overview --compounds -g --grammar -r --result --fuzzy --version --no-cache --export --phonetic --alternative-spellings"
+complete -c duden -xa "-h --help --title --name --article --part-of-speech --frequency --usage --word-separation --meaning-overview --synonyms --origin --grammar-overview --compounds -i --inflect -r --result --fuzzy --version --no-cache --export --phonetic --alternative-spellings"
diff --git a/docs/usage.md b/docs/usage.md
@@ -1,4 +1,4 @@
-# Usage
+# Module usage
 
 This example showcases the most useful functions of the `DudenWord` class.
 
@@ -36,7 +36,7 @@ This example showcases the most useful functions of the `DudenWord` class.
 'barmherziges Wesen, Verhalten'
 
 > w.synonyms
-['[Engels]güte, Milde, Nachsicht, Nachsichtigkeit']
+'[Engels]güte, Milde, Nachsicht, Nachsichtigkeit; (gehoben) Herzensgüte, Mildtätigkeit, Seelengüte; (bildungssprachlich) Humanität, Indulgenz; (veraltend) Wohltätigkeit; (Religion) Gnade'
 
 > w.origin
 'mittelhochdeutsch barmherzekeit, barmherze, althochdeutsch armherzi, nach (kirchen)lateinisch misericordia'
@@ -54,19 +54,21 @@ This example showcases the most useful functions of the `DudenWord` class.
 > w.pronunciation_audio_url
 'https://.../filename.mp3'
 
-> w.grammar_raw
-[({'Indikativ', 'Person I', 'Präsens', 'Singular'}, 'ich laufe'),
- ({'Konjunktiv I', 'Person I', 'Präsens', 'Singular'}, 'ich laufe'),
- ({'Imperativ', 'Person I', 'Präsens', 'Singular'}, '–'),
- ({'Indikativ', 'Person II', 'Präsens', 'Singular'}, 'du läufst'),
-...
- ({'Konjunktiv II', 'Person III', 'Plural', 'Präteritum'}, 'sie liefen'),
- ({'Partizip I'}, 'laufend'),
- ({'Partizip II'}, 'gelaufen'),
- ({'Infinitiv mit zu'}, 'zu laufen')]
+> w.inflection.data
+{'Indikativ': {'Präsens': {'ich': 'laufe (mich/mir)',
+   'du': 'läufst (dich/dir)',
+   'er/sie/es': 'läuft (sich)',
+   'wir': 'laufen (uns)',
+   'ihr': 'lauft (euch)',
+   'sie': 'laufen (sich)'},
+ ...
+ },
+ ...
+}
 
-> w.grammar(duden.SINGULAR, duden.PRASENS, duden.INDIKATIV)
-['ich laufe', 'du läufst', 'er/sie/es läuft']
+> from duden import Mood, Tense, Person
+> w.inflection.verb_conjugate(Mood.INDICATIVE, Tense.PRESENT, Person.FIRST_SINGULAR)
+'laufe (mich/mir)'
 
 > w = duden.get('Meme')
 > w.alternative_spellings
@@ -76,16 +78,134 @@ This example showcases the most useful functions of the `DudenWord` class.
 '[mi:m]'
 ```
 
-## Other functions
+## Word inflection
+
+For some nouns, adjectives and verbs, duden provides inflection data. Data structure that provides access to this data is a class called `Inflector` accessible via `word.inflection` attribute.
+
+```python
+> word = get("Hase")
+> inf = word.inflection
+> inf
+(Inflector: 'der Hase', ...)
+```
+
+This object provides the data in two ways:
+1. In raw form `word.inflection.data`:
+
+```python
+> inf.data
+{'Deklination': {'Singular': {'Nominativ': 'der Hase',
+   'Akkusativ': 'den Hasen',
+   'Dativ': 'dem Hasen',
+   'Genitiv': 'des Hasen'},
+  'Plural': {'Nominativ': 'die Hasen',
+   'Akkusativ': 'die Hasen',
+   'Dativ': 'den Hasen',
+   'Genitiv': 'der Hasen'}}}
+```
+
+2. Through specific inflection methods like `word.inflection.noun_decline`:
+```python
+> inf.noun_decline("Singular", "Nominativ")
+'der Hase'
+```
+
+### Inflection methods
+Inflection methods are provided for user's convenience and read data more or less directly from the raw inflection data.
+
+The methods produce an inflected form of the word when given required grammatical categories like _grammatical case_, _person_, _gender_, _tense_ etc.
+
+Grammatical category values can be specified by the following enum values:
+```python
+> from duden import Number, Case, Gender, Degree, Person, Mood, Tense, ImperativePerson, InfinitiveForm
+> Case.NOMINATIVE
+<Case.NOMINATIVE: 'Nominativ'>
+```
+
+#### Nouns declension
+Nouns can be declined by this method:
+```python
+inf.noun_decline(number, case)
+```
+where grammatical number and case can attain these values:
+
+| Grammatical category | Values |
+| -------------------- | ------ |
+| Number | `Number.SINGULAR`, `Number.PLURAL` |
+| Case   | `Case.NOMINATIVE`, `Case.GENITIVE`, `Case.DATIVE`, `Case.ACCUSATIVE` |
+
+Example:
+```python
+> inf = duden.get("Hase").inflection
+> inf.noun_decline(Number.PLURAL, Case.DATIVE)
+'den Hasen'
+```
+
+#### Adjective declension
+
+Adjective declension form is determined by _grammatical gender_ and _case_. Adjective comparison form by _grammatical degree_.
 
-### get
+```python
+inf.adjective_decline_strong(gender, case)
+inf.adjective_decline_weak(gender, case)
+inf.adjective_decline_mixed(gender, case)
+inf.adjective_compare(degree)
+```
+
+| Grammatical category | Values |
+| -------------------- | ------ |
+| Gender |`Gender.MASCULINE`, `Gender.FEMININE`, `Gender.NEUTER` |
+| Case   | `Case.NOMINATIVE`, `Case.GENITIVE`, `Case.DATIVE`, `Case.ACCUSATIVE` |
+| Degree | `Degree.POSITIVE`, `Degree.COMPARATIVE`, `Degree.SUPERLATIVE` |
+
+Example:
+```python
+> inf = duden.get("schnell").inflection
+> inf.adjective_compare(Degree.SUPERLATIVE)
+'am schnellsten'
+```
+
+#### Verb conjugation
+Conjugated verb form is determined by the _grammatical mood_, _tense_ and _person_.
+
+The verb imperative and infinitive forms are each defined by only one grammatical category; _person_ (only two values), and _infinitive form_ respectively.
+
+```python
+inf.verb_conjugate(mood, tense, person)
+inf.verb_imperative(person)
+inf.verb_infinitive_forms(form)
+```
+
+| Grammatical category | Values |
+| -------------------- | ------ |
+| Mood | `Mood.INDICATIVE`, `Mood.SUBJUNCTIVE_I`, `Mood.SUBJUNCTIVE_II` |
+| Tense | `Tense.PRESENT`, `Tense.PAST`, `Tense.PERFECT`, `Tense.PAST_PERFECT`, `Tense.FUTURE`, `Tense.FUTURE_PERFECT` |
+| Person | `Person.FIRST_SINGULAR`, `Person.SECOND_SINGULAR`, `Person.THIRD_SINGULAR`, `Person.FIRST_PLURAL`, `Person.SECOND_PLURAL`, `Person.THIRD_PLURAL` |
+| Person (imperative) | `ImperativePerson.PERSON_2_SINGULAR`, `ImperativePerson.PERSON_2_PLURAL` |
+| Infinitive forms | `InfinitiveForm.INFINITIVE_WITH_ZU`, `InfinitiveForm.PARTICIPLE_I`, `InfinitiveForm.PARTICIPLE_II` |
+
+Note that not all mood and tense combinations are valid when conjugating a verb:
+```python
+> word.inflection.verb_conjugate(Mood.SUBJUNCTIVE_II, Tense.PRESENT, Person.FIRST_SINGULAR)
+ValueError: Cannot inflect. Missing data for: 'Konjunktiv II'.'Präsens' . Did you mean 'Präteritum', 'Plusquamperfekt', 'Futur I', 'Futur II'?
+```
+
+Example:
+```python
+> inf = duden.get("laufen").inflection
+> inf.verb_infinitive_forms(InfinitiveForm.PARTICIPLE_I)
+'laufend'
+```
+
+## Word searching
+
+### `get` function
 
 The `duden.get` function requests directly the url `https://www.duden.de/rechtschreibung/{word}`, and retrieves a single parsed word. If the page was not found, returns `None`.
 
-### search
+### `search` function
 
 Some words such as `einfach` have multiple entries in the database and simply fetching `https://www.duden.de/rechtschreibung/einfach` yields a 404 page not found:
-
 ```python
 > duden.get('einfach')
 None
@@ -118,16 +238,14 @@ By default, just words with title matching exactly the searched word are returne
 ```
 
 To avoid automatically retrieving and parsing the search results, use the `return_words` keyword. This will return the word urlnames only.
-
 ```python
 > duden.search('einfach', return_words=False)
 ['einfach_einmal_simpel', 'einfach_vollkommen_wirklich']
 ```
 
-### Word of the day
-
-Retrieves a parses the Word of the day from the main page.
+## Word of the day
 
+Retrieves and parses the Word of the day from the main page.
 ```python
 > duden.get_word_of_the_day
 Quasimodogeniti (Substantiv ohne Artikel)

diff --git a/duden/__init__.py b/duden/__init__.py
@@ -13,44 +13,18 @@
     "get",
     "search",
     "get_word_of_the_day",
-    "SINGULAR",
-    "PLURAL",
-    "PRASENS",
-    "PRATERITUM",
-    "INDIKATIV",
-    "IMPERATIV",
-    "KONJUKTIV_1",
-    "KONJUKTIV_2",
-    "PARTIZIP_1",
-    "PARTIZIP_2",
-    "INFINITIV_MIT_ZU",
-    "PERSON_1",
-    "PERSON_2",
-    "PERSON_3",
-    "NOMINATIV",
-    "GENITIV",
-    "DATIV",
-    "AKKUSATIV",
 ]
 
-from .constants import (
-    AKKUSATIV,
-    DATIV,
-    GENITIV,
-    IMPERATIV,
-    INDIKATIV,
-    INFINITIV_MIT_ZU,
-    KONJUKTIV_1,
-    KONJUKTIV_2,
-    NOMINATIV,
-    PARTIZIP_1,
-    PARTIZIP_2,
-    PERSON_1,
-    PERSON_2,
-    PERSON_3,
-    PLURAL,
-    PRASENS,
-    PRATERITUM,
-    SINGULAR,
+# grammatical categories enums
+from .inflection import (
+    Case,
+    Degree,
+    Gender,
+    ImperativePerson,
+    InfinitiveForm,
+    Mood,
+    Number,
+    Person,
+    Tense,
 )
-from .search import get, get_word_of_the_day, search
+from .request import get, get_word_of_the_day, search
diff --git a/duden/cli.py b/duden/cli.py
@@ -13,11 +13,11 @@
 from .display import (
     describe_word,
     display_compounds,
-    display_grammar,
+    display_inflections,
     print_string_or_list,
     print_tree_of_strings,
 )
-from .search import get, search
+from .request import get, search
 
 
 def display_word(word, args):
@@ -67,8 +67,9 @@ def display_word(word, args):
         if word.alternative_spellings:
             for spelling in word.alternative_spellings:
                 print(spelling)
-    elif args.grammar:
-        display_grammar(word, args.grammar)
+    elif args.inflect:
+        if word.inflection:
+            display_inflections(word)
     elif args.export:
         yaml_string = yaml.dump(word.export(), sort_keys=False, allow_unicode=True)
         print(yaml_string, end="")
@@ -124,7 +125,7 @@ def parse_args():
         "--compounds", nargs="?", const="ALL", help=_("list common compounds")
     )
     parser.add_argument(
-        "-g", "--grammar", nargs="?", const="ALL", help=_("list grammar forms")
+        "-i", "--inflect", action="store_true", help=_("display inflections")
     )
     parser.add_argument(
         "--export",
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		complete -c duden -xa "-h --help --title --name --article --part-of-speech --frequency --usage --word-separation --meaning-overview --synonyms --origin --grammar-overview --compounds -g --grammar -r --result --fuzzy --version --no-cache --export --phonetic --alternative-spellings"
		complete -c duden -xa "-h --help --title --name --article --part-of-speech --frequency --usage --word-separation --meaning-overview --synonyms --origin --grammar-overview --compounds -i --inflect -r --result --fuzzy --version --no-cache --export --phonetic --alternative-spellings"