Merge pull request #23 from LyaaaaaGames/Split-GPU-CPU

Split gpu cpu
LyaaaaaGames · Sep 25, 2023 · 1d7c1e0 · 1d7c1e0
2 parents 7f82d65 + 980effc
commit 1d7c1e0
Showing 9 changed files with 439 additions and 182 deletions.
diff --git a/server/config.py b/server/config.py
@@ -9,6 +9,8 @@
 #--
 #-- Implementation Notes (Leave empty if nothing to say):
 #--  - This is the config file used by the server.
+#--  - The settings here have priority over the client's settings.
+#--      Setting them to None will give the priority to the client.
 #--
 #-- Anticipated changes (Leave empty if nothing to say):
 #--  -
@@ -29,15 +31,49 @@
 #--
 #--   09/11/2022 Lyaaaaa
 #--     - Set LOG_LEVEL default value back to INFO
+#--
+#--   04/05/2022 Lyaaaaa
+#--     - Added a new section "Models". This section contains settings for the
+#--         Model class.
+#--
+#--   05/05/2022 Lyaaaaa
+#--     - Import torch_dtype to support the usage of an enum for the dtypes.
+#--     - Added OFFLOAD_DICT to the settings. When True, it avoids RAM peak when
+#--         loading a model.
+#--
+#--   18/09/2023 Lyaaaaa
+#--     - LOG_FILEMODE default value is now "a" again. The log file is now
+#--         manually deleted to avoid losing logs.
 #---------------------------------------------------------------------------
 
 import logging
+from torch_dtype import Torch_Dtypes
 
 # Network
 HOST = "0.0.0.0"
 PORT = 9999
 
 # Logs
 LOG_FILENAME = "server_logs.text"
-LOG_FILEMODE = "w"
+LOG_FILEMODE = "a"
 LOG_LEVEL    = logging.INFO
+
+# Models.
+#See possible values here: https://huggingface.co/docs/transformers/main_classes/model#transformers.PreTrainedModel.from_pretrained
+
+TOKENIZERS_PATH    = "models/"
+MODELS_PATH        = "models/"
+DEFAULT_MODEL      = "EleutherAI/gpt-neo-125M"
+ALLOW_DOWNLOAD     = None # True/False/None. If True, the server will download AI's files.
+ALLOW_OFFLOAD      = None # True/False/None
+OFFLOAD_FOLDER     = "offload-" # Prefix to the temp folder.
+LOW_CPU_MEM_USAGE  = None # True/False/None
+LIMIT_MEMORY       = None # True/False/None
+OFFLOAD_DICT       = None # True/False/None
+
+# https://huggingface.co/docs/accelerate/main/en/usage_guides/big_modeling#designing-a-device-map
+# MAX_MEMORY must be a dict. E.G {0: "30GB", 1: "46GB", [x: "yMB/yGB"], "cpu": "20000MB"}. x is a gpu.
+MAX_MEMORY        = None # None/dict/See documentation
+DEVICE_MAP        = None # None/see documentation
+TORCH_DTYPE       = None # "Auto"/None/torch.dtype/See torch_dtype.py for more info.
+
diff --git a/server/downloader.py b/server/downloader.py
diff --git a/server/generator.py b/server/generator.py
@@ -25,10 +25,17 @@
 #--    - p_parameters aren't sent into generate() anymore. They are now given
 #--        to a GenerationConfig object which is an attribute (generation_config)
 #--        of the Model. generate() automatically uses these config.
+#--
+#--  - 05/05/2023 Lyaaaaa
+#--    - The condition for moving the inputs to the gpu is now "is_cuda_available"
+#--        and not checking the is_gpu_enabled attribute anymore.
+#--    - Import logger to display a log when loading the inputs in the gpu.
+#--    - Called _empty_gpu_cache after the generation. This releases some memory.
 #------------------------------------------------------------------------------
 
 from model import Model
 from transformers import GenerationConfig
+import logger
 
 class Generator(Model):
 
@@ -44,12 +51,14 @@ def generate_text(self,
     model_input    = p_memory + p_context + p_prompt
     model_input    = self._Tokenizer(model_input, return_tensors = "pt")
 
-    if self.is_gpu_enabled:
+    if self.is_cuda_available:
+      logger.log.info("Loading inputs to GPU")
       model_input.to("cuda")
 
     self._Model.generation_config = GenerationConfig(**p_parameters)
 
     model_output = self._Model.generate(**model_input)
     generated_text = self._Tokenizer.decode(model_output[0], skip_special_tokens=True)
 
+    self._empty_gpu_cache()
     return generated_text
diff --git a/server/logger.py b/server/logger.py
@@ -16,18 +16,32 @@
 #-- Changelog:
 #--   24/02/2022 Lyaaaaa
 #--     - Created the file.
+#--
+#--   18/09/2023 Lyaaaaa
+#--     - Added delete_log_file function.
+#--     - Updated init_logger to call delete_log_file.
 #---------------------------------------------------------------------------
 import logging
 import config
+import os
 
 log = None
 
 def init_logger():
   global log
+  delete_log_file()
   logging.basicConfig(filename = config.LOG_FILENAME,
                       filemode = config.LOG_FILEMODE,
                       format   = '%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s',
                       datefmt  = '%H:%M:%S')
   log = logging.getLogger("AIdventure_Server")
   log.setLevel(config.LOG_LEVEL)
   log.addHandler(logging.StreamHandler())
+
+
+#------------------------------------------------------------------------------
+#
+#------------------------------------------------------------------------------
+def delete_log_file():
+  if os.path.exists(config.LOG_FILENAME):
+    os.remove(config.LOG_FILENAME)