diff --git a/.github/workflows/unittest.yml b/.github/workflows/unittest.yml index 640a694..7e9d01a 100644 --- a/.github/workflows/unittest.yml +++ b/.github/workflows/unittest.yml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.7"] + python-version: ["3.8"] steps: - uses: actions/checkout@v3 diff --git a/README.md b/README.md index e78dcf7..6b76cd9 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ To learn more about how these measures work, have a look at [Jannis' blog post]( ## Installation -- Requires Python >= 3.7 and PyTorch +- Requires Python >= 3.8 and PyTorch - `pip install nmtscore` - Extra requirements for the Prism model: `pip install nmtscore[prism]` @@ -25,7 +25,7 @@ from nmtscore import NMTScorer scorer = NMTScorer() scorer.score("This is a sentence.", "This is another sentence.") -# 0.5025776988808766 +# 0.4677300455046415 ``` #### Different similarity measures @@ -52,7 +52,7 @@ scorer.score( ["This is a sentence.", "This is a sentence.", "This is another sentence."], ["This is another sentence.", "This sentence is completely unrelated.", "This is another sentence."], ) -# [0.5025777998113548, 0.1640727324003354, 1.0000000000000049] +# [0.46772973967003206, 0.15306852595255185, 1.0] ``` The sentences in the first list are compared element-wise to the sentences in the second list. @@ -132,7 +132,7 @@ model.translate("de", ["This is a test."]) # ["Das ist ein Test."] model.score("de", ["This is a test."], ["Das ist ein Test."]) -# [0.7708902359008789] +# [0.8293135166168213] ``` ## Experiments diff --git a/setup.cfg b/setup.cfg index a1e7073..ee0a675 100644 --- a/setup.cfg +++ b/setup.cfg @@ -18,9 +18,9 @@ classifiers = package_dir = = src packages = find: -python_requires = >=3.7 +python_requires = >=3.8 install_requires = - transformers + transformers<4.34 # https://github.com/ZurichNLP/nmtscore/issues/7 sentencepiece tqdm sqlitedict diff --git a/src/nmtscore/models/m2m100.py b/src/nmtscore/models/m2m100.py index 69e9c03..2de1faa 100644 --- a/src/nmtscore/models/m2m100.py +++ b/src/nmtscore/models/m2m100.py @@ -94,8 +94,6 @@ def _score(self, batch(hypothesis_sentences, batch_size), ) for src_sentences, tgt_sentences in batch_iterator: - # Hack: Append a second EOS token to make sure that one EOS is still there after shift_tokens_right - tgt_sentences = [f"{sentence} {self.tokenizer.eos_token}" for sentence in tgt_sentences] inputs = self.tokenizer( src_sentences, text_target=tgt_sentences, diff --git a/tests/test_readme.py b/tests/test_readme.py index ddf095b..0583595 100644 --- a/tests/test_readme.py +++ b/tests/test_readme.py @@ -24,7 +24,7 @@ def tearDownClass(cls) -> None: def test_nmtscorer(self): scorer = NMTScorer() score = scorer.score("This is a sentence.", "This is another sentence.") - self.assertAlmostEqual(0.5025776988808766, score, places=4) + self.assertAlmostEqual(0.4677300455046415, score, places=4) def test_batch_processing(self): scorer = NMTScorer() @@ -33,20 +33,20 @@ def test_batch_processing(self): ["This is another sentence.", "This sentence is completely unrelated.", "This is another sentence."], ) self.assertEqual(3, len(scores)) - self.assertAlmostEqual(0.5025777998113548, scores[0], places=4) - self.assertAlmostEqual(0.1640727324003354, scores[1], places=4) - self.assertAlmostEqual(1.0000000000000049, scores[2], places=4) + self.assertAlmostEqual(0.46772973967003206, scores[0], places=4) + self.assertAlmostEqual(0.15306852595255185, scores[1], places=4) + self.assertAlmostEqual(1.0, scores[2], places=4) def test_different_similarity_measures(self): scorer = NMTScorer() a = "This is a sentence." b = "This is another sentence." score = scorer.score_cross_likelihood(a, b, tgt_lang="en", normalize=True, both_directions=True) - self.assertAlmostEqual(0.5025776988808766, score, places=4) + self.assertAlmostEqual(0.4677300455046415, score, places=4) score = scorer.score_direct(a, b, a_lang="en", b_lang="en", normalize=True, both_directions=True) - self.assertAlmostEqual(0.5025776988808766, score, places=4) + self.assertAlmostEqual(0.4677300455046415, score, places=4) score = scorer.score_pivot(a, b, a_lang="en", b_lang="en", pivot_lang="en", normalize=True, both_directions=True) - self.assertAlmostEqual(0.5025776988808766, score, places=4) + self.assertAlmostEqual(0.4677300455046415, score, places=4) @unittest.skipIf(os.getenv("SKIP_SLOW_TESTS", False), "Slow") def test_different_nmt_models(self): @@ -59,18 +59,18 @@ def test_batch_size(self): a = "This is a sentence." b = "This is another sentence." score = scorer.score_cross_likelihood(a, b, translate_kwargs={"batch_size": 16}, score_kwargs={"batch_size": 16}) - self.assertAlmostEqual(0.5025776988808766, score, places=4) + self.assertAlmostEqual(0.4677300455046415, score, places=4) score = scorer.score_direct(a, b, a_lang="en", b_lang="en", score_kwargs={"batch_size": 16}) - self.assertAlmostEqual(0.5025776988808766, score, places=4) + self.assertAlmostEqual(0.4677300455046415, score, places=4) def test_caching(self): scorer = NMTScorer() a = "This is a sentence." b = "This is another sentence." score = scorer.score_cross_likelihood(a, b, translate_kwargs={"use_cache": True}, score_kwargs={"use_cache": True}) - self.assertAlmostEqual(0.5025776988808766, score, places=4) + self.assertAlmostEqual(0.4677300455046415, score, places=4) score = scorer.score_direct(a, b, a_lang="en", b_lang="en", score_kwargs={"use_cache": True}) - self.assertAlmostEqual(0.5025776988808766, score, places=4) + self.assertAlmostEqual(0.4677300455046415, score, places=4) @mock.patch('sys.stdout', new_callable=io.StringIO) def test_version_signature(self, mock_stdout): @@ -85,4 +85,4 @@ def test_nmt_models(self): translations = model.translate("de", ["This is a test."], src_lang="en") self.assertEqual(["Das ist ein Test."], translations) scores = model.score("de", ["This is a test."], ["Das ist ein Test."], src_lang="en") - self.assertAlmostEqual(0.7708902359008789, scores[0], places=4) + self.assertAlmostEqual(0.8293135166168213, scores[0], places=4)