-
Notifications
You must be signed in to change notification settings - Fork 0
/
quantize.py
31 lines (23 loc) · 1.07 KB
/
quantize.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# Quantizes rwkv.cpp model file from FP32 or FP16.
# Available format names are in rwkv_cpp_shared_library.QUANTIZED_FORMAT_NAMES
# Usage: python quantize.py C:\rwkv.cpp-169M-FP32.bin C:\rwkv.cpp-169M-Q5_1.bin Q5_1
import argparse
from rwkv_cpp import rwkv_cpp_shared_library
def parse_args():
format_names = rwkv_cpp_shared_library.QUANTIZED_FORMAT_NAMES
parser = argparse.ArgumentParser(description='Quantize rwkv.cpp model file from FP32 or FP16')
parser.add_argument('src_path', help='Path to FP32/FP16 checkpoint file')
parser.add_argument('dest_path', help='Path to resulting checkpoint file, will be overwritten')
parser.add_argument('format_name', help='Format name, one of ' + ', '.join(format_names), type=str, choices=format_names, default='Q5_1')
return parser.parse_args()
def main() -> None:
args = parse_args()
library = rwkv_cpp_shared_library.load_rwkv_shared_library()
library.rwkv_quantize_model_file(
args.src_path,
args.dest_path,
args.format_name
)
print('Done')
if __name__ == "__main__":
main()