Skip to content

Commit

Permalink
Empty str
Browse files Browse the repository at this point in the history
  • Loading branch information
mgraffg committed Oct 8, 2024
1 parent c3a1f82 commit dab9056
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 2 deletions.
2 changes: 1 addition & 1 deletion dialectid/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

__version__ = '0.0.5'
__version__ = '0.0.6'

from dialectid.text_repr import BoW, SeqTM
from dialectid.model import DialectId
3 changes: 3 additions & 0 deletions dialectid/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,4 +152,7 @@ def encode(self, text):
except KeyError:
continue
W = self.weights
if len(seq) == 0:
dtype = getattr(np, f'float{self.precision}')
return np.ones((W.shape[0], 1), dtype=dtype)
return np.vstack([W[:, x] for x in seq]).T
14 changes: 13 additions & 1 deletion dialectid/tests/test_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,4 +91,16 @@ def test_DenseBoW_encode():

from dialectid.model import DenseBoW
dense = DenseBoW(precision=16)
assert dense.encode('buenos días').shape[1] == 2
assert dense.encode('buenos días').shape[1] == 2


def test_DenseBoW_encode_empty():
"""Test DenseBoW for empty"""

# 'ᗩᒪᒪ ᒪIᐯEᔕ ᗰᗩTTEᖇ!!!!!'

from dialectid.model import DenseBoW
dense = DenseBoW(precision=16)
X = dense.encode('')
assert X.shape[1] == 1
assert X.sum() == len(dense.names)

0 comments on commit dab9056

Please sign in to comment.