Skip to content

Commit

Permalink
Observer Restructure: Remove Observers, calibration, and applying `…
Browse files Browse the repository at this point in the history
…frozen` steps from lifecycle (#189)

* temporary workaround

* separate out calibration from forward pass

* fix missing import

* fix tests

* update all other tests

* clean

* update

* clean-up

* fix test case

* remove calibration and init observer steps

* update

* update

* clean-up/fix

* cleanup

* cleanup

* remove cache

* clean-up

* remove frozen

* more clean-up

* remove observer, cache, and frozen state

* update more test cases

* fix bit_depth test

* fix more tests

* clean-up remaining tests

* clean-up

* dont skip

* more clean-up

* fix
  • Loading branch information
dsikka authored Oct 31, 2024
1 parent 13b5c0b commit 2b79056
Show file tree
Hide file tree
Showing 33 changed files with 420 additions and 1,763 deletions.
1 change: 0 additions & 1 deletion src/compressed_tensors/quantization/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,3 @@
from .quant_config import *
from .quant_scheme import *
from .lifecycle import *
from .cache import QuantizedKVParameterCache
200 changes: 0 additions & 200 deletions src/compressed_tensors/quantization/cache.py

This file was deleted.

2 changes: 0 additions & 2 deletions src/compressed_tensors/quantization/lifecycle/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,7 @@
# flake8: noqa
# isort: skip_file

from .calibration import *
from .forward import *
from .frozen import *
from .initialize import *
from .compressed import *
from .apply import *
Expand Down
17 changes: 1 addition & 16 deletions src/compressed_tensors/quantization/lifecycle/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,9 @@

import torch
from compressed_tensors.config import CompressionFormat
from compressed_tensors.quantization.lifecycle.calibration import (
set_module_for_calibration,
)
from compressed_tensors.quantization.lifecycle.compressed import (
compress_quantized_weights,
)
from compressed_tensors.quantization.lifecycle.frozen import freeze_module_quantization
from compressed_tensors.quantization.lifecycle.initialize import (
initialize_module_for_quantization,
)
Expand Down Expand Up @@ -233,6 +229,7 @@ def apply_quantization_status(model: Module, status: QuantizationStatus):
:param model: model to apply quantization to
:param status: status to update the module to
"""

current_status = infer_quantization_status(model)

if status >= QuantizationStatus.INITIALIZED > current_status:
Expand All @@ -243,18 +240,6 @@ def apply_quantization_status(model: Module, status: QuantizationStatus):
)
)

if current_status < status >= QuantizationStatus.CALIBRATION > current_status:
# only quantize weights up front when our end goal state is calibration,
# weight quantization parameters are already loaded for frozen/compressed
quantize_weights_upfront = status == QuantizationStatus.CALIBRATION
model.apply(
lambda module: set_module_for_calibration(
module, quantize_weights_upfront=quantize_weights_upfront
)
)
if current_status < status >= QuantizationStatus.FROZEN > current_status:
model.apply(freeze_module_quantization)

if current_status < status >= QuantizationStatus.COMPRESSED > current_status:
model.apply(compress_quantized_weights)

Expand Down
80 changes: 0 additions & 80 deletions src/compressed_tensors/quantization/lifecycle/calibration.py

This file was deleted.

Loading

0 comments on commit 2b79056

Please sign in to comment.