-
Notifications
You must be signed in to change notification settings - Fork 177
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
eval script fixes #414
eval script fixes #414
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -35,8 +35,8 @@ def device_sync(device): | |
wd = Path(__file__).parent.parent.resolve() | ||
sys.path.append(str(wd)) | ||
|
||
from model import Transformer, prepare_inputs_for_model | ||
from tokenizer import get_tokenizer | ||
from torchao._models.llama.model import Transformer, prepare_inputs_for_model | ||
from torchao._models.llama.tokenizer import get_tokenizer | ||
|
||
def multinomial_sample_one_no_sync(probs_sort): # Does multinomial sampling without a cuda synchronization | ||
q = torch.empty_like(probs_sort).exponential_(1) | ||
|
@@ -189,21 +189,21 @@ def main( | |
if quantization: | ||
from torchao.quantization.quant_api import ( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we dedup the quant code in eval and generate.py? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. only a bit, its probably more trouble that its worth given the differences and needing to handle autoquant vs gptq ...etc |
||
quantize, | ||
int8wo, | ||
int8da_int8w, | ||
int4wo, | ||
int8_weight_only, | ||
int8_dynamic_activation_int8_weight, | ||
int4_weight_only, | ||
autoquant, | ||
unwrap_tensor_subclass | ||
) | ||
|
||
if "int8wo" in quantization: | ||
quantize(model, int8wo()) | ||
quantize(model, int8_weight_only()) | ||
if "int8dq" in quantization: | ||
quantize(model, int8da_int8w()) | ||
quantize(model, int8_dynamic_activation_int8_weight()) | ||
if "int4wo" in quantization: | ||
groupsize=int(quantization.split("-")[-1]) | ||
assert groupsize in [32,64,128,256], f"int4wo groupsize needs to be one of [32,64,128,256] but got {groupsize}" | ||
quantize(model, int4wo(groupsize=groupsize)) | ||
quantize(model, int4_weight_only(groupsize=groupsize)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. cc @HDCharles There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i'll fix it in another PR |
||
if "autoquant" == quantization: | ||
model = autoquant(model, manual=True) | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
does this need to be compatible with torch 2.3 and below? if so we could define similar helpers:
ao/test/integration/test_integration.py
Lines 99 to 118 in bc8599f
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
i think its mostly for our own testing, not sure if that's needed