Skip to content

Commit

Permalink
Merge pull request #15 from LyaaaaaGames/experimental
Browse files Browse the repository at this point in the history
Release 1.2.1
  • Loading branch information
Lyaaaaaaaaaaaaaaa authored Oct 1, 2022
2 parents b21c634 + 2e4d459 commit b840a5d
Show file tree
Hide file tree
Showing 5 changed files with 61 additions and 26 deletions.
12 changes: 7 additions & 5 deletions conda_config.yml
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
name: aidventure
channels:
- defaults
- conda-forge
- pytorch
dependencies:
- pip
- python=3.9.7
- pip:
- websockets==10.0
- torch==1.10.2
- transformers==4.16.2
- sentencepiece
- pytorch=1.12
- websockets=10.0
- transformers=4.21
- sentencepiece
- accelerate=0.12.0
10 changes: 6 additions & 4 deletions conda_config_cuda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@ channels:
dependencies:
- pip
- python=3.9.7
- pytorch=1.10.1
- torchvision=0.11.2
- cudatoolkit=11.3.1
- pytorch=1.12
- websockets=10.0
- transformers=4.15
- transformers=4.21
- sentencepiece
- accelerate=0.12.0
- torchvision=0.12
- cudatoolkit
5 changes: 0 additions & 5 deletions requirements.txt

This file was deleted.

32 changes: 24 additions & 8 deletions server/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,15 @@
#-- - 24/02/2022 Lyaaaaa
#-- - Replaced the init of logging by the import of the new script logger.
#-- - Replaced self._logger by logger.log.
#--
#-- - 15/08/2022 Lyaaaaa
#-- - Updated __init__ to receive the p_low_memory_mode parameter.
#-- - Updated _load to enable low_cpu_mem_usage option while loading the
#-- generator model.
#-- - Updated _load to fix the except being wrong.
#-- - Extracted a log print from _enable_gpu to _disable_gpu
#-- - Updated _empty_gpu_cache to torch.no_grad() otherwise the memory stays
#-- in use. Even with this solution a few hundreds of MB stays in use...
#------------------------------------------------------------------------------

from transformers import AutoModelForCausalLM, AutoModelForSeq2SeqLM, AutoTokenizer
Expand All @@ -125,15 +134,17 @@ class Model():
#-- __init__
#------------------------------------------------------------------------------
def __init__(self,
p_model_name = "EleutherAI/gpt-neo-125M",
p_model_type = Model_Type.GENERATION.value,
p_use_gpu = True,):
p_model_name = "EleutherAI/gpt-neo-125M",
p_model_type = Model_Type.GENERATION.value,
p_use_gpu = True,
p_low_memory_mode = True):
self._tokenizer_path = "tokenizers/" + p_model_name
self._model_path = "models/" + p_model_name
self._model_name = p_model_name
self.is_cuda_available = torch.cuda.is_available()
self.is_gpu_enabled = False
self._model_type = p_model_type
self._low_memory_mode = p_low_memory_mode

if self._load() == False:
self._download()
Expand All @@ -155,12 +166,15 @@ def _load(self):

try:
if self._model_type == Model_Type.GENERATION.value:
self._Model = AutoModelForCausalLM.from_pretrained(self._model_path)
args = {"low_cpu_mem_usage": self._low_memory_mode}
self._Model = AutoModelForCausalLM.from_pretrained(self._model_path,
**args)

elif self._model_type == Model_Type.TRANSLATION.value:
self._Model = AutoModelForSeq2SeqLM.from_pretrained(self._model_path)

except error:
logger.log.error(error)
except:
logger.log.error("An unexpected error happened while loading the model")
return False

return True
Expand Down Expand Up @@ -210,7 +224,6 @@ def _enable_gpu(self):

except:
logger.log.error("An error happened while using the GPU!")
logger.log.info("Falling back to CPU.")
self._disable_gpu()


Expand All @@ -229,8 +242,10 @@ def _disable_gpu(self):
#------------------------------------------------------------------------------
def _empty_gpu_cache(self):
logger.log.debug("Clearing GPU cache")
torch.cuda.empty_cache()

with torch.no_grad():
torch.cuda.empty_cache()
self._get_gpu_info()

#------------------------------------------------------------------------------
#-- _get_gpu_info
Expand All @@ -245,3 +260,4 @@ def _get_gpu_info(self):
logger.log.debug("---------------Max memory reserved---------------")
logger.log.debug(torch.cuda.max_memory_reserved())


28 changes: 24 additions & 4 deletions server/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,12 @@
#-- - 21/05/2022 Lyaaaaa
#-- - Updated handle_request to add more debug messages and to use the
#-- use_gpu value for both the generator and translator.
#--
#-- - 15/08/2022 Lyaaaaa
#-- - Updated a final except in handler. On unexpected error, the server will
#-- exit.
#-- - Updated handle_request to receive low_memory_mode value from the client.
#-- - Updated the call of Generator constructor to send it low_memory_mode
#------------------------------------------------------------------------------

import asyncio
Expand Down Expand Up @@ -123,6 +129,10 @@ async def handler(p_websocket, path):
print("Closing the server")
shutdown_server()

except:
print("Unexpected error shutting down the server")
shutdown_server()


#------------------------------------------------------------------------------
# handle_request
Expand Down Expand Up @@ -154,23 +164,32 @@ def handle_request(p_websocket, p_data : dict):
shutdown_server()

elif request == Request.LOAD_MODEL.value:
use_gpu = p_data['use_gpu']
use_gpu = p_data['use_gpu']
low_memory_mode = p_data['low_memory_mode']

if p_data["model_type"] == Model_Type.GENERATION.value:
logger.log.debug("loading generator")
model_name = p_data['model_name']

generator = Generator(model_name, Model_Type.GENERATION.value, use_gpu)
generator = Generator(model_name,
Model_Type.GENERATION.value,
use_gpu,
low_memory_mode)
logger.log.info("Is CUDA available: " + format(generator.is_cuda_available))
logger.log.debug("Is GPU enabled for the generator: " + format(generator.is_gpu_enabled))

elif p_data["model_type"] == Model_Type.TRANSLATION.value:
logger.log.debug("loading translator")
model_name = p_data["to_eng_model"]
to_eng_translator = Translator(model_name, Model_Type.TRANSLATION.value, use_gpu)
to_eng_translator = Translator(model_name,
Model_Type.TRANSLATION.value,
use_gpu)
logger.log.debug("Is GPU enabled for the to_eng translator: " + format(to_eng_translator.is_gpu_enabled))

model_name = p_data["from_eng_model"]
from_eng_translator = Translator(model_name, Model_Type.TRANSLATION.value, use_gpu)
from_eng_translator = Translator(model_name,
Model_Type.TRANSLATION.value,
use_gpu)
logger.log.debug("Is GPU enabled for the from_eng translator: " + format(from_eng_translator.is_gpu_enabled))

p_data['request'] = Request.LOADED_MODEL.value
Expand Down Expand Up @@ -242,3 +261,4 @@ async def main():




0 comments on commit b840a5d

Please sign in to comment.