henk717 · henk717 · Aug 13, 2023 · Jul 19, 2023 · Jul 19, 2023 · Jul 19, 2023
diff --git a/aiserver.py b/aiserver.py
@@ -904,7 +904,7 @@ def delete(self, rule: str, **kwargs):
 api_version = None  # This gets set automatically so don't change this value
 
 api_v1 = KoboldAPISpec(
-    version="1.2.2",
+    version="1.2.3",
     prefixes=["/api/v1", "/api/latest"],
     tags=tags,
 )
@@ -1691,9 +1691,6 @@ def load_model(model_backend, initial_load=False):
     koboldai_vars.aibusy = True
     koboldai_vars.horde_share = False
 
-    if initial_load:
-        use_breakmodel_args = True
-
     koboldai_vars.reset_model()
 
     koboldai_vars.noai = False
@@ -3223,7 +3220,7 @@ def actionsubmit(
     gen_mode=GenerationMode.STANDARD
 ):
     # Ignore new submissions if the AI is currently busy
-    if(koboldai_vars.aibusy):
+    if koboldai_vars.aibusy and not ignore_aibusy:
         return
 
     while(True):
@@ -5101,9 +5098,13 @@ def load_story_v1(js, from_file=None):
 def load_story_v2(js, from_file=None):
     logger.debug("Loading V2 Story")
     logger.debug("Called from {}".format(inspect.stack()[1].function))
-    leave_room(session['story'])
-    session['story'] = js['story_name']
-    join_room(session['story'])
+
+    new_story = js["story_name"]
+    # In socket context
+    if hasattr(request, "sid"):
+        leave_room(session['story'])
+        join_room(new_story)
+    session['story'] = new_story
 
     koboldai_vars.load_story(session['story'], js)
 
@@ -8227,6 +8228,7 @@ class WorldInfoUIDsSchema(WorldInfoEntriesUIDsSchema):
 
 class ModelSelectionSchema(KoboldSchema):
     model: str = fields.String(required=True, validate=validate.Regexp(r"^(?!\s*NeoCustom)(?!\s*GPT2Custom)(?!\s*TPUMeshTransformerGPTJ)(?!\s*TPUMeshTransformerGPTNeoX)(?!\s*GooseAI)(?!\s*OAI)(?!\s*InferKit)(?!\s*Colab)(?!\s*API).*$"), metadata={"description": 'Hugging Face model ID, the path to a model folder (relative to the "models" folder in the KoboldAI root folder) or "ReadOnly" for no model'})
+    backend: Optional[str] = fields.String(required=False, validate=validate.OneOf(model_backends.keys()))
 
 def _generate_text(body: GenerationInputSchema):
     if koboldai_vars.aibusy or koboldai_vars.genseqs:
@@ -8484,6 +8486,7 @@ def put_model(body: ModelSelectionSchema):
       summary: Load a model
       description: |-2
         Loads a model given its Hugging Face model ID, the path to a model folder (relative to the "models" folder in the KoboldAI root folder) or "ReadOnly" for no model.
+        Optionally, a backend parameter can be passed in to dictate which backend loads the model.
       tags:
         - model
       requestBody:
@@ -8493,6 +8496,7 @@ def put_model(body: ModelSelectionSchema):
             schema: ModelSelectionSchema
             example:
               model: ReadOnly
+              backend: Read Only
       responses:
         200:
           description: Successful request
@@ -8510,8 +8514,18 @@ def put_model(body: ModelSelectionSchema):
     set_aibusy(1)
     old_model = koboldai_vars.model
     koboldai_vars.model = body.model.strip()
+
+    backend = getattr(body, "backend", None)
+    if not backend:
+        # Backend is optional for backwards compatibility; it should probably be
+        # required on the next major API version.
+        if body.model == "ReadOnly":
+            backend = "Read Only"
+        else:
+            backend = "Huggingface"
+
     try:
-        load_model(use_breakmodel_args=True, breakmodel_args_default_to_cpu=True)
+        load_model(backend)
     except Exception as e:
         koboldai_vars.model = old_model
         raise e
@@ -8799,8 +8813,14 @@ def get_story():
     chunks = []
     if koboldai_vars.gamestarted:
         chunks.append({"num": 0, "text": koboldai_vars.prompt})
-    for num, action in koboldai_vars.actions.items():
-        chunks.append({"num": num + 1, "text": action})
+
+    last_action_num = list(koboldai_vars.actions.actions.keys())[-1]
+    for num, action in koboldai_vars.actions.actions.items():
+        text = action["Selected Text"]
+        # The last action seems to always be empty
+        if not text and num == last_action_num:
+            continue
+        chunks.append({"num": num + 1, "text": text})
     return {"results": chunks}
 
 
@@ -8824,7 +8844,7 @@ def get_story_nums():
     chunks = []
     if koboldai_vars.gamestarted:
         chunks.append(0)
-    for num in koboldai_vars.actions.keys():
+    for num in koboldai_vars.actions.actions.keys():
         chunks.append(num + 1)
     return {"results": chunks}
 
@@ -9185,7 +9205,7 @@ def get_world_info():
             if wi["folder"] != last_folder:
                 folder = []
                 if wi["folder"] is not None:
-                    folders.append({"uid": wi["folder"], "name": koboldai_vars.wifolders_d[wi["folder"]]["name"], "entries": folder})
+                    folders.append({"uid": wi["folder"], "name": koboldai_vars.wifolders_d[str(wi["folder"])]["name"], "entries": folder})
                 last_folder = wi["folder"]
             (folder if wi["folder"] is not None else entries).append({k: v for k, v in wi.items() if k not in ("init", "folder", "num") and (wi["selective"] or k != "keysecondary")})
     return {"folders": folders, "entries": entries}

diff --git a/koboldai_settings.py b/koboldai_settings.py
@@ -6,7 +6,7 @@
 import shutil
 from typing import List, Union
 from io import BytesIO
-from flask import has_request_context, session
+from flask import has_request_context, session, request
 from flask_socketio import join_room, leave_room
 from collections import OrderedDict
 import multiprocessing
@@ -130,11 +130,14 @@ def load_story(self, story_name, json_data):
         original_story_name = story_name
         if not multi_story:
             story_name = 'default'
-        #Leave the old room and join the new one
-        logger.debug("Leaving room {}".format(session['story']))
-        leave_room(session['story'])
-        logger.debug("Joining room {}".format(story_name))
-        join_room(story_name)
+
+        # Leave the old room and join the new one if in socket context
+        if hasattr(request, "sid"):
+            logger.debug("Leaving room {}".format(session['story']))
+            leave_room(session['story'])
+            logger.debug("Joining room {}".format(story_name))
+            join_room(story_name)
+
         session['story'] = story_name
         logger.debug("Sending story reset")
         self._story_settings[story_name]._socketio.emit("reset_story", {}, broadcast=True, room=story_name)

diff --git a/modeling/inference_models/generic_hf_torch/class.py b/modeling/inference_models/generic_hf_torch/class.py
@@ -27,6 +27,10 @@
 model_backend_type = "Huggingface" #This should be a generic name in case multiple model backends are compatible (think Hugging Face Custom and Basic Hugging Face)
 
 class model_backend(HFTorchInferenceModel):
+    def __init__(self) -> None:
+        super().__init__()
+        self.use_4_bit = False
+
     def is_valid(self, model_name, model_path, menu_path):
         base_is_valid = super().is_valid(model_name, model_path, menu_path)
         path = False

diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py
@@ -19,8 +19,12 @@ class HFInferenceModel(InferenceModel):
     def __init__(self) -> None:
         super().__init__()
         self.model_config = None
-        #self.model_name = model_name
 
+        # TODO: model_name should probably be an instantiation parameter all the
+        # way down the inheritance chain.
+        self.model_name = None
+
+        self.path = None
         self.hf_torch = False
         self.model = None
         self.tokenizer = None
@@ -217,6 +221,11 @@ def unload(self):
                 torch.cuda.empty_cache()
         except:
             pass
+
+    def _pre_load(self) -> None:
+        # HACK: Make model instantiation work without UI parameters
+        self.model_name = self.model_name or utils.koboldai_vars.model
+        return super()._pre_load()
 
     def _post_load(self) -> None:
         self.badwordsids = koboldai_settings.badwordsids_default

diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py
@@ -93,7 +93,11 @@ def __init__(self) -> None:
         self.hf_torch = True
         self.lazy_load = True
         self.low_mem = False
+
+        # `nobreakmodel` indicates that breakmodel cannot be used, while `breakmodel`
+        # indicates whether breakmodel is currently being used
         self.nobreakmodel = False
+        self.breakmodel = False
 
         self.post_token_hooks = [
             PostTokenHooks.stream_tokens,