Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add "alternative_spellings" property + minor bug fix for words with alternative spellings in title #147

Merged
merged 17 commits into from
Oct 11, 2021
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -45,4 +45,5 @@ update-test-data:
./run_duden.py --export Kragen > tests/test_data/Kragen.yaml
./run_duden.py --export Petersilie > tests/test_data/Petersilie.yaml
./run_duden.py --export einfach -r1 > tests/test_data/einfach.yaml
./run_duden.py --export Keyboard > tests/test_data/Keyboard.yaml
./run_duden.py --export Keyboard > tests/test_data/Keyboard.yaml
./run_duden.py --export Meme > tests/test_data/Meme.yaml
1 change: 1 addition & 0 deletions completions/duden
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ _duden () {
--no-cache
--export
--phonetic
--alternative-spellings
)
opts_with_arg=(
-r --result
Expand Down
2 changes: 1 addition & 1 deletion completions/duden.fish
Original file line number Diff line number Diff line change
@@ -1 +1 @@
complete -c duden -xa "-h --help --title --name --article --part-of-speech --frequency --usage --word-separation --meaning-overview --synonyms --origin --compounds -g --grammar -r --result --fuzzy --version --no-cache --export --phonetic"
complete -c duden -xa "-h --help --title --name --article --part-of-speech --frequency --usage --word-separation --meaning-overview --synonyms --origin --compounds -g --grammar -r --result --fuzzy --version --no-cache --export --phonetic --alternative-spellings"
5 changes: 5 additions & 0 deletions duden/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,9 @@ def display_word(word, args):
elif args.phonetic:
if word.phonetic:
print(word.phonetic)
elif args.alternative_spellings:
if word.alternative_spellings:
print(word.alternative_spellings)
elif args.grammar:
display_grammar(word, args.grammar)
elif args.export:
Expand Down Expand Up @@ -119,6 +122,8 @@ def parse_args():
help=_('print program version'))
parser.add_argument('--phonetic', action='store_true',
help=_('display pronunciation'))
parser.add_argument('--alternative-spellings', action='store_true',
radomirbosak marked this conversation as resolved.
Show resolved Hide resolved
help=_('display alternative spellings'))

return parser.parse_args()

Expand Down
3 changes: 3 additions & 0 deletions duden/display.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,3 +152,6 @@ def describe_word(word):
for part_of_speech, words in word.compounds.items():
print(blue(' - {}:'.format(part_of_speech.capitalize())),
', '.join(words))

if word.alternative_spellings:
print(white(_('Alternative spellings:'), bold=True), word.alternative_spellings)
32 changes: 29 additions & 3 deletions duden/word.py
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
EXPORT_ATTRIBUTES = [
'name', 'urlname', 'title', 'article', 'part_of_speech', 'usage',
'frequency', 'word_separation', 'meaning_overview', 'origin', 'compounds',
'grammar_raw', 'synonyms', 'words_before', 'words_after', 'phonetic'
'grammar_raw', 'synonyms', 'words_before', 'words_after', 'phonetic', 'alternative_spellings'
]

gettext.install('duden', os.path.join(os.path.dirname(__file__), 'locale'))
Expand Down Expand Up @@ -53,12 +53,20 @@ def name(self):
"""
Word without article
"""

# Find span with class "lemma__main"
title_element = self.soup.find('span', {"class": "lemma__main"})
if title_element is not None:
# remove soft hyphens "\xad" and return
return clear_text(title_element.get_text())

# if the title_element does not exist, we fall back to the old method
if self.part_of_speech is not None and 'Substantiv' not in self.part_of_speech:
return self.title
if ', ' not in self.title:
return self.title

name, _ = self.title.split(', ')
name, _ = self.title.split(', ', 1)
return name

@property
Expand Down Expand Up @@ -88,12 +96,19 @@ def article(self):
"""
Word article
"""
# Find span with class "lemma__determiner"
article_element = self.soup.find('span', {"class": "lemma__determiner"})
if article_element is not None:
# remove soft hyphens "\xad" and return
return article_element.get_text().replace('\xad', '').strip()
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

clear_text can be used here as well


# if the article_element does not exist, we fall back to the old method
if self.part_of_speech is not None and 'Substantiv' not in self.part_of_speech:
return None
if ', ' not in self.title:
return None

_, article = self.title.split(', ')
_, article = self.title.split(', ', 1)
return article

def _find_tuple_dl(self, key, element=None):
Expand Down Expand Up @@ -357,3 +372,14 @@ def phonetic(self):
return ipa.get_text()

return None

@property
def alternative_spellings(self):
"""
Returns alternate spellings
"""
alternative_spellings = self.soup.find_all('span', {"class": "lemma__alt-spelling"})
if alternative_spellings is None:
return None

return [spelling.get_text() for spelling in alternative_spellings]
1 change: 1 addition & 0 deletions tests/test_data/Barmherzigkeit.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,4 @@ words_after:
- Bar-Mizwa
- Bar-Mizwa
phonetic: null
alternative_spellings: []
1 change: 1 addition & 0 deletions tests/test_data/Feiertag.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -81,3 +81,4 @@ words_after:
- Feiertagsruhe
- Feiertagsstille
phonetic: null
alternative_spellings: []
1 change: 1 addition & 0 deletions tests/test_data/Keyboard.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -54,3 +54,4 @@ words_after:
- Keyboardspielerin
- Keylogger
phonetic: '[ˈkiːbɔːɐ̯t]'
alternative_spellings: []
1 change: 1 addition & 0 deletions tests/test_data/Kragen.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,4 @@ words_after:
- kragenlos
- Kragennummer
phonetic: null
alternative_spellings: []
31 changes: 31 additions & 0 deletions tests/test_data/Meme.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
name: Meme
urlname: Meme
title: Meme, auch Mem, das
article: das
part_of_speech: Substantiv, Neutrum
usage: null
frequency: 1
word_separation:
- Meme, Mem
meaning_overview: (interessantes oder witziges) Bild, Video o. Ä., das in sozialen
Netzwerken schnell und weit verbreitet wird
origin: englisch meme, eigentlich = weit verbreitete Idee o. Ä., zu griechisch mnḗmē =
Gedächtnis
compounds: null
grammar_raw: []
synonyms: null
words_before:
- Membra
- Membran
- Membrane
- Membranofon
- Membrum
words_after:
- Memel
- Memel
- Memeler
- Memeler
- Memelerin
phonetic: '[miːm]'
alternative_spellings:
- Mem
1 change: 1 addition & 0 deletions tests/test_data/Petersilie.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,4 @@ words_after:
- Petersilienwurzel
- Peterskirche
phonetic: '[petɐˈziːli̯ə]'
alternative_spellings: []
1 change: 1 addition & 0 deletions tests/test_data/Qat.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,4 @@ words_after:
- qdm
- q. e. d.
phonetic: '[kat]'
alternative_spellings: []
1 change: 1 addition & 0 deletions tests/test_data/einfach.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -59,3 +59,4 @@ words_after:
- einfachgesetzlich
- Einfachheit
phonetic: '[ˈaɪ̯nfax]'
alternative_spellings: []
1 change: 1 addition & 0 deletions tests/test_data/laufen.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -233,3 +233,4 @@ words_after:
- Läufer
- Lauferei
phonetic: null
alternative_spellings: []