From 59e25244a2f0528d6b0f88033ea89af15e614eca Mon Sep 17 00:00:00 2001 From: Ce Gao Date: Thu, 25 May 2023 20:37:56 +0800 Subject: [PATCH] feat: Add low cpu by default (#40) Signed-off-by: Ce Gao --- src/modelz_llm/falcon_service.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/modelz_llm/falcon_service.py b/src/modelz_llm/falcon_service.py index f3a3415..10f5a2f 100644 --- a/src/modelz_llm/falcon_service.py +++ b/src/modelz_llm/falcon_service.py @@ -39,10 +39,12 @@ class LLM: def __init__(self, model_name: str, device: str) -> None: self.model_name = model_name self.tokenizer = transformers.AutoTokenizer.from_pretrained( - model_name, trust_remote_code=True + model_name, trust_remote_code=True, low_cpu_mem_usage=True ) model_cls = getattr(transformers, LanguageModels.transformer_cls(model_name)) - self.model = model_cls.from_pretrained(model_name, trust_remote_code=True) + self.model = model_cls.from_pretrained( + model_name, trust_remote_code=True, low_cpu_mem_usage=True + ) if device == "auto": self.device = ( torch.cuda.current_device() if torch.cuda.is_available() else "cpu"