diff --git a/forge/test/models/pytorch/multimodal/deepseek/test_deepseek_math.py b/forge/test/models/pytorch/multimodal/deepseek/test_deepseek_math.py new file mode 100644 index 000000000..af77ec04c --- /dev/null +++ b/forge/test/models/pytorch/multimodal/deepseek/test_deepseek_math.py @@ -0,0 +1,86 @@ +# SPDX-FileCopyrightText: (c) 2025 Tenstorrent AI ULC +# +# SPDX-License-Identifier: Apache-2.0 +import pytest +import torch +from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig + +import forge + +from test.models.utils import Framework, Source, Task, build_module_name + + +def generation(max_new_tokens, compiled_model, input_ids, tokenizer): + for i in range(max_new_tokens): + logits = compiled_model(input_ids) + next_token_logits = logits[:, -1, :] + next_token_id = torch.argmax(next_token_logits, dim=-1) + + if next_token_id == tokenizer.eos_token_id: + break + + input_ids = torch.cat([input_ids, next_token_id.unsqueeze(0)], dim=-1) + + generated_text = tokenizer.decode(input_ids[0], skip_special_tokens=True) + return generated_text + + +def download_model_and_tokenizer(model_name): + tokenizer = AutoTokenizer.from_pretrained(model_name) + model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="cpu") + model.generation_config = GenerationConfig.from_pretrained(model_name) + model.generation_config.pad_token_id = model.generation_config.eos_token_id + model.generation_config.use_cache = False + + # Prepare input sentence + messages = [ + { + "role": "user", + "content": "what is the integral of x^2 from 0 to 2?\nPlease reason step by step, and put your final answer within \\boxed{}.", + } + ] + input_ids = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt") + return model, tokenizer, input_ids + + +class Wrapper(torch.nn.Module): + def __init__(self, model): + super().__init__() + self.model = model + + def forward(self, input_tensor): + return self.model(input_tensor, max_new_tokens=100).logits + + +@pytest.mark.parametrize("variant", ["deepseek-math-7b-instruct"]) +def test_deepseek_inference_no_cache_cpu(variant): + model_name = f"deepseek-ai/{variant}" + model, tokenizer, input_ids = download_model_and_tokenizer(model_name) + + framework_model = Wrapper(model) + + generated_text = generation( + max_new_tokens=100, compiled_model=framework_model, input_ids=input_ids, tokenizer=tokenizer + ) + print(generated_text) + + +@pytest.mark.parametrize("variant", ["deepseek-math-7b-instruct"]) +def test_deepseek_inference(record_forge_property, variant): + # Build Module Name + module_name = build_module_name( + framework=Framework.PYTORCH, model="deepseek", variant=variant, task=Task.QA, source=Source.HUGGINGFACE + ) + + # Record Forge Property + record_forge_property("model_name", module_name) + + model_name = f"deepseek-ai/{variant}" + model, tokenizer, input_ids = download_model_and_tokenizer(model_name) + framework_model = Wrapper(model) + + compiled_model = forge.compile(framework_model, sample_inputs=[input_ids], module_name=module_name) + generated_text = generation( + max_new_tokens=1, compiled_model=compiled_model, input_ids=input_ids, tokenizer=tokenizer + ) + print(generated_text)