-
-
Notifications
You must be signed in to change notification settings - Fork 4.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
Browse the repository at this point in the history
* bm25 scoring function updated * Fixes #1828 * Fixes #1828 * Fixes #1828 * Fixes #1828 * Fixes #1828 * Fixes #1828 , Tests added * Fixes #1828 , Tests added * Fixes #1828 , Tests Added * Fixes #1828 , Tests Added * Fixes #1828 , Tests Added * Fixes #1828 , Tests Added * Fixes #1828
- Loading branch information
1 parent
4b8aebc
commit 4c2c638
Showing
2 changed files
with
56 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
#!/usr/bin/env python | ||
# -*- coding: utf-8 -*- | ||
# | ||
# Copyright (C) 2010 Radim Rehurek <radimrehurek@seznam.cz> | ||
# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html | ||
|
||
""" | ||
Automated tests for checking transformation algorithms (the models package). | ||
""" | ||
|
||
import logging | ||
import unittest | ||
|
||
from gensim.summarization.bm25 import get_bm25_weights | ||
from gensim.test.utils import common_texts | ||
|
||
|
||
class TestBM25(unittest.TestCase): | ||
def test_max_match_with_itself(self): | ||
""" Document should show maximum matching with itself """ | ||
weights = get_bm25_weights(common_texts) | ||
for index, doc_weights in enumerate(weights): | ||
expected = max(doc_weights) | ||
predicted = doc_weights[index] | ||
self.assertAlmostEqual(expected, predicted) | ||
|
||
def test_nonnegative_weights(self): | ||
""" All the weights for a partiular document should be non negative """ | ||
weights = get_bm25_weights(common_texts) | ||
for doc_weights in weights: | ||
for weight in doc_weights: | ||
self.assertTrue(weight >= 0.) | ||
|
||
def test_same_match_with_same_document(self): | ||
""" A document should always get the same weight when matched with a particular document """ | ||
corpus = [['cat', 'dog', 'mouse'], ['cat', 'lion'], ['cat', 'lion']] | ||
weights = get_bm25_weights(corpus) | ||
self.assertAlmostEqual(weights[0][1], weights[0][2]) | ||
|
||
def test_disjoint_docs_if_weight_zero(self): | ||
""" Two disjoint documents should have zero matching""" | ||
corpus = [['cat', 'dog', 'lion'], ['goat', 'fish', 'tiger']] | ||
weights = get_bm25_weights(corpus) | ||
self.assertAlmostEqual(weights[0][1], 0) | ||
self.assertAlmostEqual(weights[1][0], 0) | ||
|
||
|
||
if __name__ == '__main__': | ||
logging.basicConfig(level=logging.DEBUG) | ||
unittest.main() |