Replies: 2 comments 3 replies
-
Hi, yeah this is something important that we wanted to add but got lost in the backlog. Technically, it's very simple to do, you need to override these functions . For the moment, you can do it via monkey patching like this: ##########################################################################
#Do this on the program start
from hqq.models.base import BaseHQQModel
import os
# Get available number of chunks
def get_num_weight_chunks(cls, save_dir: str) -> int:
name, ext = cls.get_weight_file(save_dir).split('.')
files = os.listdir(save_dir)
num_chunks = 0
for file in files:
c_file = file.split('.')
if(len(c_file)!=2):
continue
if(c_file[0] == name and c_file[1] == ext):
num_chunks +=1
return num_chunks
# Get name of the chunk file
def get_weight_file_chunk(cls, save_dir: str, chunk_id: int) -> str:
name, ext = cls.get_weight_file(save_dir).split('.')
return name + '_' + str(chunk_id) + '.' + ext
def split_weights_into_chunks(cls, weights, num_chunks="auto") -> list:
# TODO : logic to split weights
return weights_chunks
# Save weights to disk
def save_weights_chunked(cls, weights: dict, save_dir: str) -> None:
#weights is just a dictionary, splits the weights into weights_chunks: list.
weights_chunks = cls.split_weights_into_chunks(weights)
num_chunks = len(weights_chunks)
for i in range(num_chunks):
torch.save(weights_chunks[i], cls.get_weight_file_chunk(save_dir, i))
# Load weights from disk
def load_weights_chunked(cls, save_dir: str, map_location: bool = None):
weights = {}
num_chunks = cls.get_num_weight_chunks(save_dir)
for i in range(num_chunks):
weights.update(torch.load(cls.get_weight_file_chunk(save_dir, i), map_location=map_location))
return weights
BaseHQQModel.get_weight_file_chunk = get_weight_file_chunk
BaseHQQModel.split_weights_into_chunks = split_weights_into_chunks
BaseHQQModel.get_weights_file_chunked = get_weights_file_chunked
BaseHQQModel.save_weights = save_weights_chunked
BaseHQQModel.load_weights = load_weights_chunked
########################################################################## Later, we will do a refactoring to fully support safetensors as well. Let me know if the solution above works! |
Beta Was this translation helpful? Give feedback.
3 replies
-
Serialization will be fully supported directly in HF after this PR: huggingface/transformers#33141 |
Beta Was this translation helpful? Give feedback.
0 replies
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
-
I quantized Mixtral 8x22b. It produces a pt model file of approx 60GB in size. I can't push the model to huggingface because of per-file size limits (50GB max). Is there a way to shard it like we can with safetensors?
Beta Was this translation helpful? Give feedback.
All reactions