Skip to content

Commit

Permalink
Add vocab size property to embeddingtokenizer
Browse files Browse the repository at this point in the history
  • Loading branch information
Timoeller committed Jun 18, 2020
1 parent b6e92a6 commit 06e45f9
Showing 1 changed file with 4 additions and 1 deletion.
5 changes: 4 additions & 1 deletion farm/modeling/tokenization.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,10 @@ def __init__(
self.unk_tok_idx = self.vocab[unk_token]
self.ids_to_tokens = collections.OrderedDict([(ids, tok) for tok, ids in self.vocab.items()])
self.do_lower_case = do_lower_case
self.vocab_size_farm = len(self.vocab)

@property
def vocab_size(self):
return len(self.vocab)

@classmethod
def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
Expand Down

0 comments on commit 06e45f9

Please sign in to comment.