From f47fa223d6cb5fd2e4132b566243cbb513581e96 Mon Sep 17 00:00:00 2001 From: Lyaaaaaaaaaaaaaaa Date: Mon, 15 Aug 2022 11:44:20 +0200 Subject: [PATCH 1/3] Updated the requirements conda_config: - Removed the pip packages - Replaced torch 1.10.2 by pytorch 1.12 - Replaced transformers 4.16.2 by 4.21 - Added accelerate 0.12.0 conda_config_cuda: - Replaced pytorch 1.10.2 by 1.12 - Replaced transformers 4.15 by 4.21 - Removed any specific version for cudatoolkit - Added sentencepiece and accelerate=0.12.0 --- conda_config.yml | 12 +++++++----- conda_config_cuda.yml | 10 ++++++---- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/conda_config.yml b/conda_config.yml index d4d0171..d1dd779 100644 --- a/conda_config.yml +++ b/conda_config.yml @@ -1,11 +1,13 @@ name: aidventure channels: - defaults + - conda-forge + - pytorch dependencies: - pip - python=3.9.7 - - pip: - - websockets==10.0 - - torch==1.10.2 - - transformers==4.16.2 - - sentencepiece + - pytorch=1.12 + - websockets=10.0 + - transformers=4.21 + - sentencepiece + - accelerate=0.12.0 \ No newline at end of file diff --git a/conda_config_cuda.yml b/conda_config_cuda.yml index 6c99082..5a0e2cf 100644 --- a/conda_config_cuda.yml +++ b/conda_config_cuda.yml @@ -6,8 +6,10 @@ channels: dependencies: - pip - python=3.9.7 - - pytorch=1.10.1 - - torchvision=0.11.2 - - cudatoolkit=11.3.1 + - pytorch=1.12 - websockets=10.0 - - transformers=4.15 + - transformers=4.21 + - sentencepiece + - accelerate=0.12.0 + - torchvision=0.12 + - cudatoolkit \ No newline at end of file From 8bd33ddafe13c7129816129186884d5adf713720 Mon Sep 17 00:00:00 2001 From: Lyaaaaaaaaaaaaaaa Date: Mon, 15 Aug 2022 11:44:37 +0200 Subject: [PATCH 2/3] Delete the obselete file "requirements.txt" --- requirements.txt | 5 ----- 1 file changed, 5 deletions(-) delete mode 100644 requirements.txt diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 14743ba..0000000 --- a/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -pytorch==1.10.1 -torchvision==0.11.2 -cudatoolkit==11.3.1 -websockets==10.0 -transformers==4.16 From a1526763fa89c5a56dbbc75361e37e9acaf229d6 Mon Sep 17 00:00:00 2001 From: Lyaaaaaaaaaaaaaaa Date: Mon, 15 Aug 2022 11:55:49 +0200 Subject: [PATCH 3/3] Implemented the usage of the "low_memory_mode" model: - Updated __init__ to receive the p_low_memory_mode parameter. - Updated _load to enable low_cpu_mem_usage option while loading the generator model. - Updated _load to fix the except being wrong. - Extracted a log print from _enable_gpu to _disable_gpu - Updated _empty_gpu_cache to torch.no_grad() otherwise the memory stays in use. Even with this solution a few hundreds of MB stays in use... server: - Updated a final except in handler. On unexpected error, the server will exit. - Updated handle_request to receive low_memory_mode value from the client. - Updated the call of Generator constructor to send it low_memory_mode --- server/model.py | 32 ++++++++++++++++++++++++-------- server/server.py | 28 ++++++++++++++++++++++++---- 2 files changed, 48 insertions(+), 12 deletions(-) diff --git a/server/model.py b/server/model.py index 2062617..9ab4786 100755 --- a/server/model.py +++ b/server/model.py @@ -105,6 +105,15 @@ #-- - 24/02/2022 Lyaaaaa #-- - Replaced the init of logging by the import of the new script logger. #-- - Replaced self._logger by logger.log. +#-- +#-- - 15/08/2022 Lyaaaaa +#-- - Updated __init__ to receive the p_low_memory_mode parameter. +#-- - Updated _load to enable low_cpu_mem_usage option while loading the +#-- generator model. +#-- - Updated _load to fix the except being wrong. +#-- - Extracted a log print from _enable_gpu to _disable_gpu +#-- - Updated _empty_gpu_cache to torch.no_grad() otherwise the memory stays +#-- in use. Even with this solution a few hundreds of MB stays in use... #------------------------------------------------------------------------------ from transformers import AutoModelForCausalLM, AutoModelForSeq2SeqLM, AutoTokenizer @@ -125,15 +134,17 @@ class Model(): #-- __init__ #------------------------------------------------------------------------------ def __init__(self, - p_model_name = "EleutherAI/gpt-neo-125M", - p_model_type = Model_Type.GENERATION.value, - p_use_gpu = True,): + p_model_name = "EleutherAI/gpt-neo-125M", + p_model_type = Model_Type.GENERATION.value, + p_use_gpu = True, + p_low_memory_mode = True): self._tokenizer_path = "tokenizers/" + p_model_name self._model_path = "models/" + p_model_name self._model_name = p_model_name self.is_cuda_available = torch.cuda.is_available() self.is_gpu_enabled = False self._model_type = p_model_type + self._low_memory_mode = p_low_memory_mode if self._load() == False: self._download() @@ -155,12 +166,15 @@ def _load(self): try: if self._model_type == Model_Type.GENERATION.value: - self._Model = AutoModelForCausalLM.from_pretrained(self._model_path) + args = {"low_cpu_mem_usage": self._low_memory_mode} + self._Model = AutoModelForCausalLM.from_pretrained(self._model_path, + **args) + elif self._model_type == Model_Type.TRANSLATION.value: self._Model = AutoModelForSeq2SeqLM.from_pretrained(self._model_path) - except error: - logger.log.error(error) + except: + logger.log.error("An unexpected error happened while loading the model") return False return True @@ -210,7 +224,6 @@ def _enable_gpu(self): except: logger.log.error("An error happened while using the GPU!") - logger.log.info("Falling back to CPU.") self._disable_gpu() @@ -229,8 +242,10 @@ def _disable_gpu(self): #------------------------------------------------------------------------------ def _empty_gpu_cache(self): logger.log.debug("Clearing GPU cache") - torch.cuda.empty_cache() + with torch.no_grad(): + torch.cuda.empty_cache() + self._get_gpu_info() #------------------------------------------------------------------------------ #-- _get_gpu_info @@ -245,3 +260,4 @@ def _get_gpu_info(self): logger.log.debug("---------------Max memory reserved---------------") logger.log.debug(torch.cuda.max_memory_reserved()) + diff --git a/server/server.py b/server/server.py index bc56fbc..3ed4996 100755 --- a/server/server.py +++ b/server/server.py @@ -67,6 +67,12 @@ #-- - 21/05/2022 Lyaaaaa #-- - Updated handle_request to add more debug messages and to use the #-- use_gpu value for both the generator and translator. +#-- +#-- - 15/08/2022 Lyaaaaa +#-- - Updated a final except in handler. On unexpected error, the server will +#-- exit. +#-- - Updated handle_request to receive low_memory_mode value from the client. +#-- - Updated the call of Generator constructor to send it low_memory_mode #------------------------------------------------------------------------------ import asyncio @@ -123,6 +129,10 @@ async def handler(p_websocket, path): print("Closing the server") shutdown_server() + except: + print("Unexpected error shutting down the server") + shutdown_server() + #------------------------------------------------------------------------------ # handle_request @@ -154,23 +164,32 @@ def handle_request(p_websocket, p_data : dict): shutdown_server() elif request == Request.LOAD_MODEL.value: - use_gpu = p_data['use_gpu'] + use_gpu = p_data['use_gpu'] + low_memory_mode = p_data['low_memory_mode'] + if p_data["model_type"] == Model_Type.GENERATION.value: logger.log.debug("loading generator") model_name = p_data['model_name'] - generator = Generator(model_name, Model_Type.GENERATION.value, use_gpu) + generator = Generator(model_name, + Model_Type.GENERATION.value, + use_gpu, + low_memory_mode) logger.log.info("Is CUDA available: " + format(generator.is_cuda_available)) logger.log.debug("Is GPU enabled for the generator: " + format(generator.is_gpu_enabled)) elif p_data["model_type"] == Model_Type.TRANSLATION.value: logger.log.debug("loading translator") model_name = p_data["to_eng_model"] - to_eng_translator = Translator(model_name, Model_Type.TRANSLATION.value, use_gpu) + to_eng_translator = Translator(model_name, + Model_Type.TRANSLATION.value, + use_gpu) logger.log.debug("Is GPU enabled for the to_eng translator: " + format(to_eng_translator.is_gpu_enabled)) model_name = p_data["from_eng_model"] - from_eng_translator = Translator(model_name, Model_Type.TRANSLATION.value, use_gpu) + from_eng_translator = Translator(model_name, + Model_Type.TRANSLATION.value, + use_gpu) logger.log.debug("Is GPU enabled for the from_eng translator: " + format(from_eng_translator.is_gpu_enabled)) p_data['request'] = Request.LOADED_MODEL.value @@ -242,3 +261,4 @@ async def main(): +