From 5c3df2e1d7997a677b78775de888b8afb636add8 Mon Sep 17 00:00:00 2001 From: Salman Mohammadi Date: Wed, 9 Oct 2024 10:53:57 +0100 Subject: [PATCH 1/5] fixing quantization --- recipes/eleuther_eval.py | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/recipes/eleuther_eval.py b/recipes/eleuther_eval.py index ce07497899..d4d39e52cb 100644 --- a/recipes/eleuther_eval.py +++ b/recipes/eleuther_eval.py @@ -28,6 +28,7 @@ from torchtune.modules.tokenizers import ModelTokenizer from torchtune.modules.transforms import Transform from torchtune.recipe_interfaces import EvalRecipeInterface +from torchtune.training import FullModelTorchTuneCheckpointer try: import lm_eval @@ -475,13 +476,6 @@ def setup(self, cfg: DictConfig) -> None: # Load checkpoint checkpointer = config.instantiate(cfg.checkpointer) - if quantization_mode is None: - ckpt_dict = checkpointer.load_checkpoint() - else: - # weights_only needs to be False when loading a quantized model - # currently loading a quantized model is only supported with the - # FullModelTorchTuneCheckpointer - ckpt_dict = checkpointer.load_checkpoint(weights_only=False) # Initialize model with training.set_default_dtype(self.dtype), self.device: @@ -489,14 +483,27 @@ def setup(self, cfg: DictConfig) -> None: # Quantize model if requested if quantization_mode is not None: + if not isinstance(checkpointer, FullModelTorchTuneCheckpointer): + raise ValueError( + "Quantization is only supported for models quantized and saved with the " + "FullModelTorchTuneCheckpointer - please ensure you have quantized your " + "model and are using the quantized weights!" + ) + if "qat" in quantization_mode: + model = quantizer.prepare(model) model = quantizer.quantize(model) model = model.to(device=self.device, dtype=self.dtype) - for k, v in model_state_dict.items(): - model_state_dict[k] = v.to(self._device) - model.load_state_dict(model_state_dict, assign=True) + ckpt_dict = checkpointer.load_checkpoint(weights_only=False)[ + training.MODEL_KEY + ] + for k, v in ckpt_dict.items(): + ckpt_dict[k] = v.to(self.device) + model.load_state_dict(ckpt_dict, assign=True) + else: + ckpt_dict = checkpointer.load_checkpoint()[training.MODEL_KEY] + model.load_state_dict(ckpt_dict) # Load model weights into initialized model - model.load_state_dict(ckpt_dict[training.MODEL_KEY]) self.logger.info(f"Model is initialized with precision {self.dtype}.") # Put model in eval mode. From f472c6d8be11b0d6408f4109a2643caeeec52268 Mon Sep 17 00:00:00 2001 From: Salman Mohammadi Date: Wed, 9 Oct 2024 11:08:22 +0100 Subject: [PATCH 2/5] fixing qat error --- recipes/eleuther_eval.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/recipes/eleuther_eval.py b/recipes/eleuther_eval.py index d4d39e52cb..ad6ba41e74 100644 --- a/recipes/eleuther_eval.py +++ b/recipes/eleuther_eval.py @@ -490,7 +490,12 @@ def setup(self, cfg: DictConfig) -> None: "model and are using the quantized weights!" ) if "qat" in quantization_mode: - model = quantizer.prepare(model) + raise ValueError( + "You have specified a quantizer with 'QAT' - " + "QAT quantizers should only be used during quantization aware training " + "and when quantizing models. Please use the corresponding post-training " + "quantizer e.g. Int8DynActInt4WeightQuantizer for Int8DynActInt4WeightQATQuantizer." + ) model = quantizer.quantize(model) model = model.to(device=self.device, dtype=self.dtype) ckpt_dict = checkpointer.load_checkpoint(weights_only=False)[ From 6e98d7d8da42aed7e8bf5187ff77a2cc8431e7d6 Mon Sep 17 00:00:00 2001 From: Salman Mohammadi Date: Wed, 9 Oct 2024 11:40:41 +0100 Subject: [PATCH 3/5] adding tests --- tests/recipes/test_eleuther_eval.py | 75 ++++++++++++++++++++++++++++- 1 file changed, 74 insertions(+), 1 deletion(-) diff --git a/tests/recipes/test_eleuther_eval.py b/tests/recipes/test_eleuther_eval.py index 32eaee4b1b..d7c3728a16 100644 --- a/tests/recipes/test_eleuther_eval.py +++ b/tests/recipes/test_eleuther_eval.py @@ -14,7 +14,7 @@ import pytest from tests.common import TUNE_PATH -from tests.recipes.utils import llama2_test_config +from tests.recipes.utils import llama2_test_config, write_hf_ckpt_config from tests.test_utils import CKPT_MODEL_PATHS @@ -126,6 +126,79 @@ def test_eval_recipe_errors_without_lm_eval(self, capsys, monkeypatch, tmpdir): in printed_err ) + @pytest.mark.integration_test + def test_eval_recipe_errors_with_quantization_hf_checkpointer( + self, capsys, monkeypatch, tmpdir + ): + ckpt = "llama2_hf" + ckpt_path = Path(CKPT_MODEL_PATHS[ckpt]) + ckpt_dir = ckpt_path.parent + + # Config file needed for model conversion. + write_hf_ckpt_config(ckpt_dir) + + cmd = f""" + tune run eleuther_eval \ + --config eleuther_evaluation \ + output_dir={tmpdir} \ + checkpointer=torchtune.training.FullModelHFCheckpointer \ + checkpointer.checkpoint_dir='{ckpt_dir}' \ + checkpointer.checkpoint_files=[{ckpt_path}]\ + checkpointer.output_dir={tmpdir} \ + checkpointer.model_type=LLAMA2 \ + tokenizer.path=/tmp/test-artifacts/tokenizer.model \ + tokenizer.prompt_template=null \ + limit=1 \ + dtype=fp32 \ + device=cpu \ + quantizer._component_=torchtune.training.quantization.Int8DynActInt4WeightQuantizer \ + quantizer.groupsize=32 \ + """.split() + + monkeypatch.setattr(sys, "argv", cmd) + with pytest.raises(SystemExit, match="1"): + runpy.run_path(TUNE_PATH, run_name="__main__") + + printed_err = capsys.readouterr().out + assert ( + "Quantization is only supported for models quantized and saved with the FullModelTorchTuneCheckpointer" + in printed_err + ) + + @pytest.mark.integration_test + def test_eval_recipe_errors_with_qat_quantizer(self, capsys, monkeypatch, tmpdir): + ckpt = "llama2_tune" + ckpt_path = Path(CKPT_MODEL_PATHS[ckpt]) + ckpt_dir = ckpt_path.parent + + cmd = f""" + tune run eleuther_eval \ + --config eleuther_evaluation \ + output_dir={tmpdir} \ + checkpointer=torchtune.training.FullModelTorchTuneCheckpointer \ + checkpointer.checkpoint_dir='{ckpt_dir}' \ + checkpointer.checkpoint_files=[{ckpt_path}]\ + checkpointer.output_dir={tmpdir} \ + checkpointer.model_type=LLAMA2 \ + tokenizer.path=/tmp/test-artifacts/tokenizer.model \ + tokenizer.prompt_template=null \ + limit=1 \ + dtype=fp32 \ + device=cpu \ + quantizer._component_=torchtune.training.quantization.Int8DynActInt4WeightQATQuantizer \ + quantizer.groupsize=256 \ + """.split() + + monkeypatch.setattr(sys, "argv", cmd) + with pytest.raises(SystemExit, match="1"): + runpy.run_path(TUNE_PATH, run_name="__main__") + + printed_err = capsys.readouterr().out + assert ( + "QAT quantizers should only be used during quantization aware training" + in printed_err + ) + @pytest.mark.integration_test def test_eval_recipe_errors_with_generate_until_and_mc_tasks( self, caplog, capsys, monkeypatch, tmpdir From deaf0749f090da8ac04fd67cf6b850854281d510 Mon Sep 17 00:00:00 2001 From: Salman Mohammadi Date: Wed, 9 Oct 2024 12:03:52 +0100 Subject: [PATCH 4/5] fixing test --- tests/recipes/test_eleuther_eval.py | 37 +++++++++++++++++------------ 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/tests/recipes/test_eleuther_eval.py b/tests/recipes/test_eleuther_eval.py index d7c3728a16..2b40177ac8 100644 --- a/tests/recipes/test_eleuther_eval.py +++ b/tests/recipes/test_eleuther_eval.py @@ -152,19 +152,20 @@ def test_eval_recipe_errors_with_quantization_hf_checkpointer( dtype=fp32 \ device=cpu \ quantizer._component_=torchtune.training.quantization.Int8DynActInt4WeightQuantizer \ - quantizer.groupsize=32 \ + quantizer.groupsize=256 \ """.split() + model_config = llama2_test_config() + cmd = cmd + model_config + monkeypatch.setattr(sys, "argv", cmd) - with pytest.raises(SystemExit, match="1"): + with pytest.raises( + ValueError, + match="Quantization is only supported for models quantized and saved with the " + "FullModelTorchTuneCheckpointer", + ): runpy.run_path(TUNE_PATH, run_name="__main__") - printed_err = capsys.readouterr().out - assert ( - "Quantization is only supported for models quantized and saved with the FullModelTorchTuneCheckpointer" - in printed_err - ) - @pytest.mark.integration_test def test_eval_recipe_errors_with_qat_quantizer(self, capsys, monkeypatch, tmpdir): ckpt = "llama2_tune" @@ -186,18 +187,24 @@ def test_eval_recipe_errors_with_qat_quantizer(self, capsys, monkeypatch, tmpdir dtype=fp32 \ device=cpu \ quantizer._component_=torchtune.training.quantization.Int8DynActInt4WeightQATQuantizer \ - quantizer.groupsize=256 \ + quantizer.groupsize=32\ """.split() + model_config = llama2_test_config() + cmd = cmd + model_config + monkeypatch.setattr(sys, "argv", cmd) - with pytest.raises(SystemExit, match="1"): + with pytest.raises( + ValueError, + match="QAT quantizers should only be used during quantization aware training", + ): runpy.run_path(TUNE_PATH, run_name="__main__") - printed_err = capsys.readouterr().out - assert ( - "QAT quantizers should only be used during quantization aware training" - in printed_err - ) + # printed_err = capsys.readouterr().out + # assert ( + # "QAT quantizers should only be used during quantization aware training" + # in printed_err + # ) @pytest.mark.integration_test def test_eval_recipe_errors_with_generate_until_and_mc_tasks( From 6242f2b76bdd279e23265e8635cc4052bed8c7f5 Mon Sep 17 00:00:00 2001 From: Salman Mohammadi Date: Wed, 9 Oct 2024 12:17:39 +0100 Subject: [PATCH 5/5] removing comments --- tests/recipes/test_eleuther_eval.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tests/recipes/test_eleuther_eval.py b/tests/recipes/test_eleuther_eval.py index 2b40177ac8..f09daf2309 100644 --- a/tests/recipes/test_eleuther_eval.py +++ b/tests/recipes/test_eleuther_eval.py @@ -200,12 +200,6 @@ def test_eval_recipe_errors_with_qat_quantizer(self, capsys, monkeypatch, tmpdir ): runpy.run_path(TUNE_PATH, run_name="__main__") - # printed_err = capsys.readouterr().out - # assert ( - # "QAT quantizers should only be used during quantization aware training" - # in printed_err - # ) - @pytest.mark.integration_test def test_eval_recipe_errors_with_generate_until_and_mc_tasks( self, caplog, capsys, monkeypatch, tmpdir