From dab90561d1d7bd784949c4bdd494b9165129a203 Mon Sep 17 00:00:00 2001 From: Mario Graff Date: Tue, 8 Oct 2024 13:59:18 +0000 Subject: [PATCH] Empty str --- dialectid/__init__.py | 2 +- dialectid/model.py | 3 +++ dialectid/tests/test_model.py | 14 +++++++++++++- 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/dialectid/__init__.py b/dialectid/__init__.py index 5e07f29..977f124 100644 --- a/dialectid/__init__.py +++ b/dialectid/__init__.py @@ -20,7 +20,7 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -__version__ = '0.0.5' +__version__ = '0.0.6' from dialectid.text_repr import BoW, SeqTM from dialectid.model import DialectId \ No newline at end of file diff --git a/dialectid/model.py b/dialectid/model.py index ba5684c..17a0fa9 100644 --- a/dialectid/model.py +++ b/dialectid/model.py @@ -152,4 +152,7 @@ def encode(self, text): except KeyError: continue W = self.weights + if len(seq) == 0: + dtype = getattr(np, f'float{self.precision}') + return np.ones((W.shape[0], 1), dtype=dtype) return np.vstack([W[:, x] for x in seq]).T diff --git a/dialectid/tests/test_model.py b/dialectid/tests/test_model.py index 5f8a4e8..31085ec 100644 --- a/dialectid/tests/test_model.py +++ b/dialectid/tests/test_model.py @@ -91,4 +91,16 @@ def test_DenseBoW_encode(): from dialectid.model import DenseBoW dense = DenseBoW(precision=16) - assert dense.encode('buenos días').shape[1] == 2 \ No newline at end of file + assert dense.encode('buenos días').shape[1] == 2 + + +def test_DenseBoW_encode_empty(): + """Test DenseBoW for empty""" + + # 'ᗩᒪᒪ ᒪIᐯEᔕ ᗰᗩTTEᖇ!!!!!' + + from dialectid.model import DenseBoW + dense = DenseBoW(precision=16) + X = dense.encode('') + assert X.shape[1] == 1 + assert X.sum() == len(dense.names)