Skip to content

Commit

Permalink
Spacy tests
Browse files Browse the repository at this point in the history
  • Loading branch information
ajdapretnar committed Jul 5, 2024
1 parent 4a3d360 commit 5bc8cbf
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 6 deletions.
29 changes: 29 additions & 0 deletions orangecontrib/text/tests/test_preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -721,5 +721,34 @@ def test_can_pickle(self):
self.assertEqual(loaded._NGrams__range, self.pp._NGrams__range)


class TestPOSTagging(unittest.TestCase):
def setUp(self):
self.corpus = Corpus.from_file("deerwester")
self.pp = [preprocess.WordPunctTokenizer(),
tag.SpacyPOSTagger()]

def test_no_tokens(self):
self.assertFalse(self.corpus.has_tokens())
tagger = tag.SpacyPOSTagger()
corpus = tagger(self.corpus)
self.assertEqual(len(corpus.used_preprocessor.preprocessors), 2)
self.assertTrue(corpus.has_tags())

def test_pos_tagger(self):
corpus = self.corpus
for pp in self.pp:
corpus = pp(corpus)
self.assertTrue(corpus.has_tokens())
self.assertTrue(corpus.has_tags())
self.assertEqual(len(corpus.pos_tags), len(corpus.tokens))
spacy_tags = corpus.pos_tags
tagger = tag.AveragedPerceptronTagger()
corpus = tagger(self.corpus)
self.assertEqual(len(corpus.pos_tags), len(corpus.tokens))
self.assertEqual(len(corpus.used_preprocessor.preprocessors), 2)
apt_tags = corpus.pos_tags
self.assertFalse(bool(np.array_equal(spacy_tags, apt_tags)))


if __name__ == "__main__":
unittest.main()
23 changes: 17 additions & 6 deletions orangecontrib/text/widgets/tests/test_owpreprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@
from orangecontrib.text.preprocess import RegexpTokenizer, WhitespaceTokenizer, \
LowercaseTransformer, HtmlTransformer, PorterStemmer, SnowballStemmer, \
UDPipeLemmatizer, StopwordsFilter, MostFrequentTokensFilter, NGrams
from orangecontrib.text.tag import AveragedPerceptronTagger, MaxEntTagger
from orangecontrib.text.tag import (AveragedPerceptronTagger, MaxEntTagger,
SpacyPOSTagger)
from orangecontrib.text.tests.test_preprocess import SF_LIST, SERVER_FILES
from orangecontrib.text.widgets.owpreprocess import (
OWPreprocess,
Expand Down Expand Up @@ -884,20 +885,21 @@ def buttons(self):

def test_init(self):
self.assertTrue(self.buttons[0].isChecked())
for i in range(1, 2):
for i in range(1, 3):
self.assertFalse(self.buttons[i].isChecked())

def test_parameters(self):
params = {"method": POSTaggingModule.Averaged}
params = {"method": POSTaggingModule.Averaged, "spacy_language":
POSTaggingModule.DEFAULT_LANGUAGE}
self.assertDictEqual(self.editor.parameters(), params)

def test_set_parameters(self):
params = {"method": POSTaggingModule.MaxEnt}
params = {"method": POSTaggingModule.Spacy, "spacy_language": "sl"}
self.editor.setParameters(params)
self.assertDictEqual(self.editor.parameters(), params)

self.assertTrue(self.buttons[1].isChecked())
for i in range(1):
self.assertTrue(self.buttons[2].isChecked())
for i in range(0, 2):
self.assertFalse(self.buttons[i].isChecked())

def test_createinstance(self):
Expand All @@ -907,9 +909,18 @@ def test_createinstance(self):
pp = self.editor.createinstance({"method": POSTaggingModule.MaxEnt})
self.assertIsInstance(pp, MaxEntTagger)

pp = self.editor.createinstance({"method": POSTaggingModule.Spacy})
self.assertIsInstance(pp, SpacyPOSTagger)

def test_repr(self):
self.assertEqual(str(self.editor), "Averaged Perceptron Tagger")

params = {"method": POSTaggingModule.Spacy, "spacy_language":
POSTaggingModule.DEFAULT_LANGUAGE}
self.editor.setParameters(params)
self.assertEqual(str(self.editor),
f"Spacy POS Tagger ({params['spacy_language']})")


class TestLanguageComboBox(WidgetTest):
def test_basic_setup(self):
Expand Down

0 comments on commit 5bc8cbf

Please sign in to comment.