From fe29045aeb70e2d6ac6991a4b75f52d65e130151 Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Thu, 18 Jul 2024 11:15:23 +0200 Subject: [PATCH 1/3] Update main.py --- src/mistral_inference/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mistral_inference/main.py b/src/mistral_inference/main.py index 9cafec8..7b81fb0 100644 --- a/src/mistral_inference/main.py +++ b/src/mistral_inference/main.py @@ -24,7 +24,7 @@ def is_torchrun() -> bool: def load_tokenizer(model_path: Path) -> MistralTokenizer: - tokenizer = [f for f in os.listdir(Path(model_path)) if f.startswith("tokenizer.model")] + tokenizer = [f for f in os.listdir(Path(model_path)) if f.startswith("tokenizer.model") or f.startswith("tekken")] assert ( len(tokenizer) > 0 ), f"No tokenizer found in {model_path}, make sure to place a `tokenizer.model.[v1,v2,v3]` file in {model_path}." From 3557b912676cfd4672f77257d47a33577d974227 Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Thu, 18 Jul 2024 12:31:29 +0000 Subject: [PATCH 2/3] v1.3.0 --- pyproject.toml | 4 ++-- src/mistral_inference/__init__.py | 2 +- src/mistral_inference/main.py | 6 ++++-- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 863df99..7e3b1af 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "mistral_inference" -version = "1.2.0" +version = "1.3.0" description = "" authors = ["bam4d "] readme = "README.md" @@ -27,7 +27,7 @@ python = "^3.9.10" xformers = ">=0.0.24" simple-parsing = ">=0.1.5" fire = ">=0.6.0" -mistral_common = "^1.0.0" +mistral_common = "^1.3.0" safetensors = ">=0.4.0" [tool.poetry.group.dev.dependencies] diff --git a/src/mistral_inference/__init__.py b/src/mistral_inference/__init__.py index c68196d..67bc602 100644 --- a/src/mistral_inference/__init__.py +++ b/src/mistral_inference/__init__.py @@ -1 +1 @@ -__version__ = "1.2.0" +__version__ = "1.3.0" diff --git a/src/mistral_inference/main.py b/src/mistral_inference/main.py index 7b81fb0..e99ce11 100644 --- a/src/mistral_inference/main.py +++ b/src/mistral_inference/main.py @@ -12,6 +12,8 @@ from mistral_common.protocol.instruct.request import ChatCompletionRequest from mistral_common.tokens.tokenizers.base import Tokenizer from mistral_common.tokens.tokenizers.mistral import MistralTokenizer +from mistral_common.tokens.tokenizers.sentencepiece import is_sentencepiece +from mistral_common.tokens.tokenizers.tekken import is_tekken from mistral_inference.generate import generate, generate_mamba from mistral_inference.mamba import Mamba @@ -24,10 +26,10 @@ def is_torchrun() -> bool: def load_tokenizer(model_path: Path) -> MistralTokenizer: - tokenizer = [f for f in os.listdir(Path(model_path)) if f.startswith("tokenizer.model") or f.startswith("tekken")] + tokenizer = [f for f in os.listdir(Path(model_path)) if is_tekken(f) or is_sentencepiece(f)] assert ( len(tokenizer) > 0 - ), f"No tokenizer found in {model_path}, make sure to place a `tokenizer.model.[v1,v2,v3]` file in {model_path}." + ), f"No tokenizer in {model_path}, place a `tokenizer.model.[v1,v2,v3]` or `tekken.json` file in {model_path}." assert ( len(tokenizer) == 1 ), f"Multiple tokenizers {', '.join(tokenizer)} found in `model_path`, make sure to only have one tokenizer" From 21790d6b665948e50d3a677ef1825fb25cc225b8 Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Thu, 18 Jul 2024 12:38:24 +0000 Subject: [PATCH 3/3] WIP --- src/mistral_inference/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mistral_inference/main.py b/src/mistral_inference/main.py index e99ce11..d4302fe 100644 --- a/src/mistral_inference/main.py +++ b/src/mistral_inference/main.py @@ -26,7 +26,7 @@ def is_torchrun() -> bool: def load_tokenizer(model_path: Path) -> MistralTokenizer: - tokenizer = [f for f in os.listdir(Path(model_path)) if is_tekken(f) or is_sentencepiece(f)] + tokenizer = [f for f in os.listdir(model_path) if is_tekken(model_path / f) or is_sentencepiece(model_path / f)] assert ( len(tokenizer) > 0 ), f"No tokenizer in {model_path}, place a `tokenizer.model.[v1,v2,v3]` or `tekken.json` file in {model_path}."