From 80334f5e7c5c2a8eb0f05a4c276749900931d37f Mon Sep 17 00:00:00 2001 From: Parinita Rahi <101819959+parinitarahi@users.noreply.github.com> Date: Fri, 14 Feb 2025 15:48:51 -0800 Subject: [PATCH] Clarify GPU chat inference instructions --- docs/genai/tutorials/deepseek-python.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/genai/tutorials/deepseek-python.md b/docs/genai/tutorials/deepseek-python.md index 84a34f16efe62..4efa682b50001 100644 --- a/docs/genai/tutorials/deepseek-python.md +++ b/docs/genai/tutorials/deepseek-python.md @@ -51,6 +51,6 @@ python model-chat.py -m deepseek-r1-distill-qwen-1.5B/model -e cpu --chat_templa ``` ```bash -# GPU Chat inference. If you pulled the model from huggingface, adjust the model directory (-m) accordingly +# On-Device GPU Chat inference. Works on devices with Nvidia GPUs. If you pulled the model from huggingface, adjust the model directory (-m) accordingly curl -o https://raw.githubusercontent.com/microsoft/onnxruntime-genai/refs/heads/main/examples/python/model-chat.py python model-chat.py -m deepseek-r1-distill-qwen-1.5B/model -e cuda --chat_template "<|begin▁of▁sentence|><|User|>{input}<|Assistant|>" \ No newline at end of file