From 5c3df2e1d7997a677b78775de888b8afb636add8 Mon Sep 17 00:00:00 2001
From: Salman Mohammadi <salman.mohammadi@outlook.com>
Date: Wed, 9 Oct 2024 10:53:57 +0100
Subject: [PATCH 1/5] fixing quantization

---
 recipes/eleuther_eval.py | 29 ++++++++++++++++++-----------
 1 file changed, 18 insertions(+), 11 deletions(-)

diff --git a/recipes/eleuther_eval.py b/recipes/eleuther_eval.py
index ce07497899..d4d39e52cb 100644
--- a/recipes/eleuther_eval.py
+++ b/recipes/eleuther_eval.py
@@ -28,6 +28,7 @@
 from torchtune.modules.tokenizers import ModelTokenizer
 from torchtune.modules.transforms import Transform
 from torchtune.recipe_interfaces import EvalRecipeInterface
+from torchtune.training import FullModelTorchTuneCheckpointer
 
 try:
     import lm_eval
@@ -475,13 +476,6 @@ def setup(self, cfg: DictConfig) -> None:
 
         # Load checkpoint
         checkpointer = config.instantiate(cfg.checkpointer)
-        if quantization_mode is None:
-            ckpt_dict = checkpointer.load_checkpoint()
-        else:
-            # weights_only needs to be False when loading a quantized model
-            # currently loading a quantized model is only supported with the
-            # FullModelTorchTuneCheckpointer
-            ckpt_dict = checkpointer.load_checkpoint(weights_only=False)
 
         # Initialize model
         with training.set_default_dtype(self.dtype), self.device:
@@ -489,14 +483,27 @@ def setup(self, cfg: DictConfig) -> None:
 
         # Quantize model if requested
         if quantization_mode is not None:
+            if not isinstance(checkpointer, FullModelTorchTuneCheckpointer):
+                raise ValueError(
+                    "Quantization is only supported for models quantized and saved with the "
+                    "FullModelTorchTuneCheckpointer - please ensure you have quantized your "
+                    "model and are using the quantized weights!"
+                )
+            if "qat" in quantization_mode:
+                model = quantizer.prepare(model)
             model = quantizer.quantize(model)
             model = model.to(device=self.device, dtype=self.dtype)
-            for k, v in model_state_dict.items():
-                model_state_dict[k] = v.to(self._device)
-            model.load_state_dict(model_state_dict, assign=True)
+            ckpt_dict = checkpointer.load_checkpoint(weights_only=False)[
+                training.MODEL_KEY
+            ]
+            for k, v in ckpt_dict.items():
+                ckpt_dict[k] = v.to(self.device)
+            model.load_state_dict(ckpt_dict, assign=True)
+        else:
+            ckpt_dict = checkpointer.load_checkpoint()[training.MODEL_KEY]
+            model.load_state_dict(ckpt_dict)
 
         # Load model weights into initialized model
-        model.load_state_dict(ckpt_dict[training.MODEL_KEY])
         self.logger.info(f"Model is initialized with precision {self.dtype}.")
 
         # Put model in eval mode.

From f472c6d8be11b0d6408f4109a2643caeeec52268 Mon Sep 17 00:00:00 2001
From: Salman Mohammadi <salman.mohammadi@outlook.com>
Date: Wed, 9 Oct 2024 11:08:22 +0100
Subject: [PATCH 2/5] fixing qat error

---
 recipes/eleuther_eval.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/recipes/eleuther_eval.py b/recipes/eleuther_eval.py
index d4d39e52cb..ad6ba41e74 100644
--- a/recipes/eleuther_eval.py
+++ b/recipes/eleuther_eval.py
@@ -490,7 +490,12 @@ def setup(self, cfg: DictConfig) -> None:
                     "model and are using the quantized weights!"
                 )
             if "qat" in quantization_mode:
-                model = quantizer.prepare(model)
+                raise ValueError(
+                    "You have specified a quantizer with 'QAT' - "
+                    "QAT quantizers should only be used during quantization aware training "
+                    "and when quantizing models. Please use the corresponding post-training "
+                    "quantizer e.g. Int8DynActInt4WeightQuantizer for Int8DynActInt4WeightQATQuantizer."
+                )
             model = quantizer.quantize(model)
             model = model.to(device=self.device, dtype=self.dtype)
             ckpt_dict = checkpointer.load_checkpoint(weights_only=False)[

From 6e98d7d8da42aed7e8bf5187ff77a2cc8431e7d6 Mon Sep 17 00:00:00 2001
From: Salman Mohammadi <salman.mohammadi@outlook.com>
Date: Wed, 9 Oct 2024 11:40:41 +0100
Subject: [PATCH 3/5] adding tests

---
 tests/recipes/test_eleuther_eval.py | 75 ++++++++++++++++++++++++++++-
 1 file changed, 74 insertions(+), 1 deletion(-)

diff --git a/tests/recipes/test_eleuther_eval.py b/tests/recipes/test_eleuther_eval.py
index 32eaee4b1b..d7c3728a16 100644
--- a/tests/recipes/test_eleuther_eval.py
+++ b/tests/recipes/test_eleuther_eval.py
@@ -14,7 +14,7 @@
 import pytest
 
 from tests.common import TUNE_PATH
-from tests.recipes.utils import llama2_test_config
+from tests.recipes.utils import llama2_test_config, write_hf_ckpt_config
 from tests.test_utils import CKPT_MODEL_PATHS
 
 
@@ -126,6 +126,79 @@ def test_eval_recipe_errors_without_lm_eval(self, capsys, monkeypatch, tmpdir):
             in printed_err
         )
 
+    @pytest.mark.integration_test
+    def test_eval_recipe_errors_with_quantization_hf_checkpointer(
+        self, capsys, monkeypatch, tmpdir
+    ):
+        ckpt = "llama2_hf"
+        ckpt_path = Path(CKPT_MODEL_PATHS[ckpt])
+        ckpt_dir = ckpt_path.parent
+
+        # Config file needed for model conversion.
+        write_hf_ckpt_config(ckpt_dir)
+
+        cmd = f"""
+        tune run eleuther_eval \
+            --config eleuther_evaluation \
+            output_dir={tmpdir} \
+            checkpointer=torchtune.training.FullModelHFCheckpointer \
+            checkpointer.checkpoint_dir='{ckpt_dir}' \
+            checkpointer.checkpoint_files=[{ckpt_path}]\
+            checkpointer.output_dir={tmpdir} \
+            checkpointer.model_type=LLAMA2 \
+            tokenizer.path=/tmp/test-artifacts/tokenizer.model \
+            tokenizer.prompt_template=null \
+            limit=1 \
+            dtype=fp32 \
+            device=cpu \
+            quantizer._component_=torchtune.training.quantization.Int8DynActInt4WeightQuantizer \
+            quantizer.groupsize=32 \
+        """.split()
+
+        monkeypatch.setattr(sys, "argv", cmd)
+        with pytest.raises(SystemExit, match="1"):
+            runpy.run_path(TUNE_PATH, run_name="__main__")
+
+        printed_err = capsys.readouterr().out
+        assert (
+            "Quantization is only supported for models quantized and saved with the FullModelTorchTuneCheckpointer"
+            in printed_err
+        )
+
+    @pytest.mark.integration_test
+    def test_eval_recipe_errors_with_qat_quantizer(self, capsys, monkeypatch, tmpdir):
+        ckpt = "llama2_tune"
+        ckpt_path = Path(CKPT_MODEL_PATHS[ckpt])
+        ckpt_dir = ckpt_path.parent
+
+        cmd = f"""
+        tune run eleuther_eval \
+            --config eleuther_evaluation \
+            output_dir={tmpdir} \
+            checkpointer=torchtune.training.FullModelTorchTuneCheckpointer \
+            checkpointer.checkpoint_dir='{ckpt_dir}' \
+            checkpointer.checkpoint_files=[{ckpt_path}]\
+            checkpointer.output_dir={tmpdir} \
+            checkpointer.model_type=LLAMA2 \
+            tokenizer.path=/tmp/test-artifacts/tokenizer.model \
+            tokenizer.prompt_template=null \
+            limit=1 \
+            dtype=fp32 \
+            device=cpu \
+            quantizer._component_=torchtune.training.quantization.Int8DynActInt4WeightQATQuantizer \
+            quantizer.groupsize=256 \
+        """.split()
+
+        monkeypatch.setattr(sys, "argv", cmd)
+        with pytest.raises(SystemExit, match="1"):
+            runpy.run_path(TUNE_PATH, run_name="__main__")
+
+        printed_err = capsys.readouterr().out
+        assert (
+            "QAT quantizers should only be used during quantization aware training"
+            in printed_err
+        )
+
     @pytest.mark.integration_test
     def test_eval_recipe_errors_with_generate_until_and_mc_tasks(
         self, caplog, capsys, monkeypatch, tmpdir

From deaf0749f090da8ac04fd67cf6b850854281d510 Mon Sep 17 00:00:00 2001
From: Salman Mohammadi <salman.mohammadi@outlook.com>
Date: Wed, 9 Oct 2024 12:03:52 +0100
Subject: [PATCH 4/5] fixing test

---
 tests/recipes/test_eleuther_eval.py | 37 +++++++++++++++++------------
 1 file changed, 22 insertions(+), 15 deletions(-)

diff --git a/tests/recipes/test_eleuther_eval.py b/tests/recipes/test_eleuther_eval.py
index d7c3728a16..2b40177ac8 100644
--- a/tests/recipes/test_eleuther_eval.py
+++ b/tests/recipes/test_eleuther_eval.py
@@ -152,19 +152,20 @@ def test_eval_recipe_errors_with_quantization_hf_checkpointer(
             dtype=fp32 \
             device=cpu \
             quantizer._component_=torchtune.training.quantization.Int8DynActInt4WeightQuantizer \
-            quantizer.groupsize=32 \
+            quantizer.groupsize=256 \
         """.split()
 
+        model_config = llama2_test_config()
+        cmd = cmd + model_config
+
         monkeypatch.setattr(sys, "argv", cmd)
-        with pytest.raises(SystemExit, match="1"):
+        with pytest.raises(
+            ValueError,
+            match="Quantization is only supported for models quantized and saved with the "
+            "FullModelTorchTuneCheckpointer",
+        ):
             runpy.run_path(TUNE_PATH, run_name="__main__")
 
-        printed_err = capsys.readouterr().out
-        assert (
-            "Quantization is only supported for models quantized and saved with the FullModelTorchTuneCheckpointer"
-            in printed_err
-        )
-
     @pytest.mark.integration_test
     def test_eval_recipe_errors_with_qat_quantizer(self, capsys, monkeypatch, tmpdir):
         ckpt = "llama2_tune"
@@ -186,18 +187,24 @@ def test_eval_recipe_errors_with_qat_quantizer(self, capsys, monkeypatch, tmpdir
             dtype=fp32 \
             device=cpu \
             quantizer._component_=torchtune.training.quantization.Int8DynActInt4WeightQATQuantizer \
-            quantizer.groupsize=256 \
+            quantizer.groupsize=32\
         """.split()
 
+        model_config = llama2_test_config()
+        cmd = cmd + model_config
+
         monkeypatch.setattr(sys, "argv", cmd)
-        with pytest.raises(SystemExit, match="1"):
+        with pytest.raises(
+            ValueError,
+            match="QAT quantizers should only be used during quantization aware training",
+        ):
             runpy.run_path(TUNE_PATH, run_name="__main__")
 
-        printed_err = capsys.readouterr().out
-        assert (
-            "QAT quantizers should only be used during quantization aware training"
-            in printed_err
-        )
+        # printed_err = capsys.readouterr().out
+        # assert (
+        #     "QAT quantizers should only be used during quantization aware training"
+        #     in printed_err
+        # )
 
     @pytest.mark.integration_test
     def test_eval_recipe_errors_with_generate_until_and_mc_tasks(

From 6242f2b76bdd279e23265e8635cc4052bed8c7f5 Mon Sep 17 00:00:00 2001
From: Salman Mohammadi <salman.mohammadi@outlook.com>
Date: Wed, 9 Oct 2024 12:17:39 +0100
Subject: [PATCH 5/5] removing comments

---
 tests/recipes/test_eleuther_eval.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/tests/recipes/test_eleuther_eval.py b/tests/recipes/test_eleuther_eval.py
index 2b40177ac8..f09daf2309 100644
--- a/tests/recipes/test_eleuther_eval.py
+++ b/tests/recipes/test_eleuther_eval.py
@@ -200,12 +200,6 @@ def test_eval_recipe_errors_with_qat_quantizer(self, capsys, monkeypatch, tmpdir
         ):
             runpy.run_path(TUNE_PATH, run_name="__main__")
 
-        # printed_err = capsys.readouterr().out
-        # assert (
-        #     "QAT quantizers should only be used during quantization aware training"
-        #     in printed_err
-        # )
-
     @pytest.mark.integration_test
     def test_eval_recipe_errors_with_generate_until_and_mc_tasks(
         self, caplog, capsys, monkeypatch, tmpdir