Skip to content

Commit

Permalink
Fix tests
Browse files Browse the repository at this point in the history
  • Loading branch information
1over137 committed Mar 23, 2024
1 parent c840623 commit 9fe1e3f
Show file tree
Hide file tree
Showing 4 changed files with 129 additions and 97 deletions.
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[metadata]
name = vocabsieve
version = 0.11.2
version = 0.12.0
author = FreeLanguageTools
author_email = pypi@5f37.8shield.net
description = A simple, effective sentence mining tool.
Expand Down
211 changes: 116 additions & 95 deletions tests/test_local_dictionary.py
Original file line number Diff line number Diff line change
@@ -1,124 +1,145 @@
from vocabsieve.local_dictionary import LocalDictionary

def test_local_dictionary():
dictdb = LocalDictionary("testdir/basic")
assert dictdb.countDicts() == 0
dictdb.importdict({"test": "a test is a test"}, "de", "test-dict")
assert dictdb.countDicts() == 1
assert dictdb.define("test", "de", "test-dict") == "a test is a test"
dictdb.deletedict("test-dict")
assert dictdb.countDicts() == 0

def test_import_stardict_normal():
dictdb = LocalDictionary("testdir/stardict_normal")
assert dictdb.countDicts() == 0
dictdb.dictimport("testdata/stardict/quick_eng-rus-2.4.2/quick_english-russian.ifo",
dicttype="stardict",
lang="en",
name="quick_eng-rus-2.4.2")
assert dictdb.countDicts() == 1
assert dictdb.define("abdominous", "en", "quick_eng-rus-2.4.2") == "толстый"
assert dictdb.define("loophole", "en", "quick_eng-rus-2.4.2") == "бойница"
assert dictdb.define("luggage", "en", "quick_eng-rus-2.4.2") == "багаж"

def test_local_dictionary(tmp_path):
db = LocalDictionary(tmp_path)
print(tmp_path)
assert db.countDicts() == 0
db.importdict({"test": "a test is a test"}, "de", "test-dict")
assert db.countDicts() == 1
assert db.define("test", "de", "test-dict") == "a test is a test"
db.deletedict("test-dict")
assert db.countDicts() == 0

def test_import_stardict_xdxf():
dictdb = LocalDictionary("testdir/stardict_xdxf")
assert dictdb.countDicts() == 0
dictdb.dictimport("testdata/stardict/stardict-FR-LingvoUniversal-2.4.2/FR-Universal.ifo",
dicttype="stardict",
lang="fr",
name="fr-universal")
assert dictdb.countDicts() == 1
assert dictdb.define("accouchement", "fr", "fr-universal") == '''<i>m</i>

def test_import_stardict_normal(tmp_path):
db = LocalDictionary(tmp_path)
assert db.countDicts() == 0
db.dictimport("testdata/stardict/quick_eng-rus-2.4.2/quick_english-russian.ifo",
dicttype="stardict",
lang="en",
name="quick_eng-rus-2.4.2")
assert db.countDicts() == 1
assert db.define("abdominous", "en", "quick_eng-rus-2.4.2") == "толстый"
assert db.define("loophole", "en", "quick_eng-rus-2.4.2") == "бойница"
assert db.define("luggage", "en", "quick_eng-rus-2.4.2") == "багаж"


def test_import_stardict_xdxf(tmp_path):
db = LocalDictionary(tmp_path)
assert db.countDicts() == 0
db.dictimport("testdata/stardict/stardict-FR-LingvoUniversal-2.4.2/FR-Universal.ifo",
dicttype="stardict",
lang="fr",
name="fr-universal")
assert db.countDicts() == 1
assert db.define("accouchement", "fr", "fr-universal") == '''<i>m</i>
1) р'оды
accouchement avant terme, accouchement prématuré — преждевр'еменные р'оды
accouchement après terme, accouchement tardif — запозд'алые р'оды
accouchement sans douleur — обезб'оливание р'одов
douleurs de l'accouchement — родов'ые б'оли
2) <i>перен.</i> дл'ительное созрев'ание, тр'удное осуществл'ение'''
assert dictdb.define("persévérant", "fr", "fr-universal") == '''<i>adj</i>, <i>subst</i> (<i>fém</i> - persévérante)
assert db.define("persévérant", "fr", "fr-universal") == '''<i>adj</i>, <i>subst</i> (<i>fém</i> - persévérante)
1) наст'ойчивый [наст'ойчивая], уп'орный [уп'орная]; твёрдый [твёрдая]
2) посто'янный [посто'янная]'''
assert dictdb.define("pièce-raccord", "fr", "fr-universal") == '''pièce-raccord
assert db.define("pièce-raccord", "fr", "fr-universal") == '''pièce-raccord
<i>m</i>
<i>(pl s + s</i> ) соедин'ительная часть, соедин'ительная дет'аль'''

def test_import_dsl():
dictdb = LocalDictionary("testdir/dsl")
assert dictdb.countDicts() == 0
dictdb.dictimport("testdata/dsl/ru_en.dsl",
dicttype="dsl",
lang="ru",
name="dsl_test"
)
dictdb.dictimport("testdata/dsl/ru_en.dsl.dz",
dicttype="dsl",
lang="ru",
name="dsl_test2"
)
assert dictdb.countDicts() == 2
assert dictdb.define("зубчатый", "ru", "dsl_test") == '''serrated, toothed'''
assert dictdb.define("лиственный", "ru", "dsl_test") == '''broadleaf; deciduous; leafy'''
assert dictdb.define("окорять", "ru", "dsl_test") == '''bark, peel'''
assert dictdb.define("зубчатый", "ru", "dsl_test2") == '''serrated, toothed'''
assert dictdb.define("лиственный", "ru", "dsl_test2") == '''broadleaf; deciduous; leafy'''
assert dictdb.define("окорять", "ru", "dsl_test2") == '''bark, peel'''
dictdb.dictimport("testdata/dsl/universal.dsl.dz",
dicttype="dsl",
lang="ru",
name="dsl_test3"
)
assert dictdb.countDicts() == 3
assert dictdb.define("ямчатость", "ru", "dsl_test3") == '''ж. с.-х.<br> (патологическое свойство плодов) pit<br>'''
assert dictdb.define("эмиграция", "ru", "dsl_test3") == '''ж.<br> 1) (переселение из своего отечества) emigration<br> 2) (пребывание в другой стране) life in emigration<br> жить в эмиграции — live as an emigrant / émigré (фр.) /<'emɪgreɪ/><br> 3) собир. emigrants pl; émigrés (фр.) /<'emɪgreɪz/> pl<br>'''
assert dictdb.define("щемящий", "ru", "dsl_test3") == '''1) (ноющий, тупой) aching /<'eɪk-/>, nagging<br> щемящая боль — nagging ache /<eɪk/><br> 2) (мучительный, гнетущий) painful, melancholy, oppressive<br> щемящий душу напев — plaintive / melancholy /<-k-/> tune<br>'''

def test_import_cognates():
dictdb = LocalDictionary("testdir/cognates")
assert dictdb.countDicts() == 0
dictdb.dictimport("testdata/cognates/cognates.json.gz",
dicttype="cognates",
lang="<all>",
name="cognates"
)
assert dictdb.countDicts() == 1
assert dictdb.define("chodník", "cs", "cognates") == '''["sk", "pl"]'''
assert dictdb.define("beluga", "hr", "cognates") == '''["fi", "hu", "ru", "nl", "en", "de", "bg", "fr", "ro", "ca", "mhr", "kk", "pt", "eo", "uk", "cs", "es"]'''
assert dictdb.define("apple", "en", "cognates") == '''["nl", "ksh", "xh", "nso", "da", "kn", "hsb", "pl", "dsb", "uk", "ltg", "hr", "af", "ru", "nb", "lb", "pap", "bg", "ml", "tn", "brx", "gd", "jam", "sah", "gv", "ve", "zu", "cs", "wym", "si", "cy", "fo", "sco", "bn", "sk", "ga", "sv", "zsm", "fy", "be", "mk", "as", "mi", "cu", "lt", "abe", "de", "nn", "br", "id", "ta", "st", "kok", "te", "ms", "sl", "is"]'''
assert dictdb.define("tragisch", "de", "cognates") == '''["nl", "fi", "ro", "pt", "pl", "hr", "hu", "nb", "ast", "fr", "ms", "eu", "eo", "cs", "ca", "sk", "sv", "lij", "es", "en", "id", "oc", "gl", "sl"]'''
def test_import_dsl(tmp_path):
db = LocalDictionary(tmp_path)
assert db.countDicts() == 0
db.dictimport("testdata/dsl/ru_en.dsl",
dicttype="dsl",
lang="ru",
name="dsl_test"
)
db.dictimport("testdata/dsl/ru_en.dsl.dz",
dicttype="dsl",
lang="ru",
name="dsl_test2"
)
assert db.countDicts() == 2
assert db.define("зубчатый", "ru", "dsl_test") == '''serrated, toothed'''
assert db.define("лиственный", "ru", "dsl_test") == '''broadleaf; deciduous; leafy'''
assert db.define("окорять", "ru", "dsl_test") == '''bark, peel'''
assert db.define("зубчатый", "ru", "dsl_test2") == '''serrated, toothed'''
assert db.define("лиственный", "ru", "dsl_test2") == '''broadleaf; deciduous; leafy'''
assert db.define("окорять", "ru", "dsl_test2") == '''bark, peel'''
db.dictimport("testdata/dsl/universal.dsl.dz",
dicttype="dsl",
lang="ru",
name="dsl_test3"
)
assert db.countDicts() == 3
assert db.define("ямчатость", "ru", "dsl_test3") == '''ж. с.-х.<br> (патологическое свойство плодов) pit<br>'''
assert db.define("эмиграция", "ru", "dsl_test3") == '''ж.<br> 1) (переселение из своего отечества) emigration<br> 2) (пребывание в другой стране) life in emigration<br> жить в эмиграции — live as an emigrant / émigré (фр.) /<'emɪgreɪ/><br> 3) собир. emigrants pl; émigrés (фр.) /<'emɪgreɪz/> pl<br>'''
assert db.define(
"щемящий",
"ru",
"dsl_test3") == '''1) (ноющий, тупой) aching /<'eɪk-/>, nagging<br> щемящая боль — nagging ache /<eɪk/><br> 2) (мучительный, гнетущий) painful, melancholy, oppressive<br> щемящий душу напев — plaintive / melancholy /<-k-/> tune<br>'''


def test_kaikki():
dictdb = LocalDictionary("testdir/kaikki")
assert dictdb.countDicts() == 0
dictdb.dictimport("testdata/kaikki/kaikki.org-dictionary-Swedish.json.gz",
dicttype="wiktdump",
lang="sv",
name="kaikki-swedish"
)
assert dictdb.countDicts() == 1
assert dictdb.define("uppåkrakaka", "sv", "kaikki-swedish") == '''<i>Noun</i>
def test_import_cognates(tmp_path):
db = LocalDictionary(tmp_path)
assert db.countDicts() == 0
db.dictimport("testdata/cognates/cognates.json.gz",
dicttype="cognates",
lang="<all>",
name="cognates"
)
assert db.define("chodník", "cs", "cognates") == '''["sk", "pl"]'''
assert db.define(
"beluga",
"hr",
"cognates") == '''["fi", "hu", "ru", "nl", "en", "de", "bg", "fr", "ro", "ca", "mhr", "kk", "pt", "eo", "uk", "cs", "es"]'''
assert db.define(
"apple",
"en",
"cognates") == '''["nl", "ksh", "xh", "nso", "da", "kn", "hsb", "pl", "dsb", "uk", "ltg", "hr", "af", "ru", "nb", "lb", "pap", "bg", "ml", "tn", "brx", "gd", "jam", "sah", "gv", "ve", "zu", "cs", "wym", "si", "cy", "fo", "sco", "bn", "sk", "ga", "sv", "zsm", "fy", "be", "mk", "as", "mi", "cu", "lt", "abe", "de", "nn", "br", "id", "ta", "st", "kok", "te", "ms", "sl", "is"]'''
assert db.define(
"tragisch",
"de",
"cognates") == '''["nl", "fi", "ro", "pt", "pl", "hr", "hu", "nb", "ast", "fr", "ms", "eu", "eo", "cs", "ca", "sk", "sv", "lij", "es", "en", "id", "oc", "gl", "sl"]'''
assert db.countDicts() == 1


def test_kaikki(tmp_path):
db = LocalDictionary(tmp_path)
assert db.countDicts() == 0
db.dictimport("testdata/kaikki/swedish_short.json",
dicttype="wiktdump",
lang="sv",
name="kaikki-swedish"
)
assert db.countDicts() == 1
assert db.define("uppåkrakaka", "sv", "kaikki-swedish") == '''<i>Noun</i>
uppåkrakaka c
1. a biscuit made of mördeg (without egg), in a circular shape folded almost in the middle, garnished with chopped pistachios and nib sugar'''
assert dictdb.define("affektionsvärde", "sv", "kaikki-swedish") == '''<i>Noun</i>
assert db.define("affektionsvärde", "sv", "kaikki-swedish") == '''<i>Noun</i>
affektionsvärde n
1. sentimental value'''
assert dictdb.define("rådigt", "sv", "kaikki-swedish") == '''<i>Adv</i>
assert db.define("rådigt", "sv", "kaikki-swedish") == '''<i>Adj</i>
rådigt
1. indefinite neuter singular of rådig
<i>Adv</i>
rådigt (comparative rådigare, superlative rådigast)
1. resourcefully, resolutely'''

dictdb.dictimport("testdata/kaikki/fr-extract.json.gz",
dicttype="wiktdump",
lang="fr",
name="kaikki-french"
)
assert dictdb.countDicts() == 2
db.dictimport("testdata/kaikki/fr_short.json",
dicttype="wiktdump",
lang="fr",
name="kaikki-french"
)
assert db.countDicts() == 2
# french tests
assert dictdb.define("évhémérisassent", "fr", "kaikki-french") == """<i>Verb</i>
assert db.define("évhémérisassent", "fr", "kaikki-french") == """<i>Verb</i>
1. Troisième personne du pluriel de l’imparfait du subjonctif de évhémériser."""
assert dictdb.define("fortitrer", "fr", "kaikki-french") == """<i>Verb</i>
assert db.define("fortitrer", "fr", "kaikki-french") == """<i>Verb</i>
1. Un cerf fortitre, quand il évite de passer près des chiens frais et des relais."""
assert dictdb.define("géminer", "fr", "kaikki-french") == """<i>Verb</i>
assert db.define("géminer", "fr", "kaikki-french") == """<i>Verb</i>
1. Se doubler.
2. Grouper deux à deux, doubler."""

1 change: 1 addition & 0 deletions vocabsieve/dictformats.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,7 @@ def parseKaikki(path, lang) -> dict[str, str]:
(https://github.com/tatuylonen/wiktextract)
The format is lines of json objects, each containing a word and its definition
'''
print("Parsing Kaikki wiktionary dump at " + path)
items: list[tuple[str, str]] = []
with zopen(path) as f:
logger.debug("Parsing Kaikki wiktionary dump at " + path)
Expand Down
12 changes: 11 additions & 1 deletion vocabsieve/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,15 @@ def setupMenu(self) -> None:
self.setMenuBar(self.menu)

def markWords(self):
if settings.value("freq_source", "<disabled>") == "<disabled>":
self.warn("No frequency source is set. Please set a frequency source in the configuration dialog.")
return
if not settings.value("lemfreq", False, type=bool):
self.warn("Marking words requires a lemmatized frequency list to work properly.")
return
if self.known_data is None:
self.warnKnownDataNotReady()
return
words = self.freq_widget.getAllWords()
dialog = WordMarkingDialog(self, words)
dialog.exec()
Expand Down Expand Up @@ -423,6 +432,7 @@ def _refreshKnownData(self) -> None:
with lock:
self.known_data, self.known_metadata = self.rec.getKnownData()
self.known_data_timestamp = time.time()
self.status("Known data is ready")

def exportWordData(self):
path, _ = QFileDialog.getSaveFileName(
Expand Down Expand Up @@ -1058,7 +1068,7 @@ def time(self) -> str:
return QDateTime.currentDateTime().toString('[hh:mm:ss]')

def status(self, msg: str) -> None:
self.status_bar.showMessage(self.time() + " " + msg, 4000)
self.status_bar.showMessage(self.time() + " " + msg)

def warn(self, text: str) -> None:
msg = QMessageBox()
Expand Down

0 comments on commit 9fe1e3f

Please sign in to comment.