diff --git a/docs/source/en/quantization/torchao.md b/docs/source/en/quantization/torchao.md index d13b08eb776b44..cd1d0188c33eb5 100644 --- a/docs/source/en/quantization/torchao.md +++ b/docs/source/en/quantization/torchao.md @@ -81,7 +81,7 @@ quantized_model.save_pretrained(output_dir, safe_serialization=False) # quantized_model.push_to_hub(save_to, safe_serialization=False) # load quantized model -ckpt_id = "llama3-8b-int4wo-128" # or huggingface hub id +ckpt_id = "llama3-8b-int4wo-128" # or huggingface hub model id loaded_quantized_model = AutoModelForCausalLM.from_pretrained(ckpt_id, device_map="cuda")