Skip to content

Commit

Permalink
refactor: remove redundant if-condition and improve type correctness …
Browse files Browse the repository at this point in the history
…for `convert_tokens_to_ids` (huggingface#34030)

* chore: remove redundant if-condition

* fix: import `Iterable`
  • Loading branch information
winstxnhdw authored and BernardZach committed Dec 5, 2024
1 parent 3dd5225 commit 746df7b
Showing 1 changed file with 4 additions and 7 deletions.
11 changes: 4 additions & 7 deletions src/transformers/tokenization_utils_fast.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import json
import os
from collections import defaultdict
from typing import Any, Dict, List, Optional, Tuple, Union
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union

import tokenizers.pre_tokenizers as pre_tokenizers_fast
from tokenizers import Encoding as EncodingFast
Expand Down Expand Up @@ -326,20 +326,17 @@ def _convert_encoding(

return encoding_dict, encodings

def convert_tokens_to_ids(self, tokens: Union[str, List[str]]) -> Union[int, List[int]]:
def convert_tokens_to_ids(self, tokens: Union[str, Iterable[str]]) -> Union[int, List[int]]:
"""
Converts a token string (or a sequence of tokens) in a single integer id (or a sequence of ids), using the
Converts a token string (or a sequence of tokens) in a single integer id (or a Iterable of ids), using the
vocabulary.
Args:
tokens (`str` or `List[str]`): One or several token(s) to convert to token id(s).
tokens (`str` or `Iterable[str]`): One or several token(s) to convert to token id(s).
Returns:
`int` or `List[int]`: The token id or list of token ids.
"""
if tokens is None:
return None

if isinstance(tokens, str):
return self._convert_token_to_id_with_added_voc(tokens)

Expand Down

0 comments on commit 746df7b

Please sign in to comment.