Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix most of the API #411

Merged
merged 9 commits into from
Aug 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 33 additions & 13 deletions aiserver.py
Original file line number Diff line number Diff line change
Expand Up @@ -904,7 +904,7 @@ def delete(self, rule: str, **kwargs):
api_version = None # This gets set automatically so don't change this value

api_v1 = KoboldAPISpec(
version="1.2.2",
version="1.2.3",
prefixes=["/api/v1", "/api/latest"],
tags=tags,
)
Expand Down Expand Up @@ -1691,9 +1691,6 @@ def load_model(model_backend, initial_load=False):
koboldai_vars.aibusy = True
koboldai_vars.horde_share = False

if initial_load:
use_breakmodel_args = True

koboldai_vars.reset_model()

koboldai_vars.noai = False
Expand Down Expand Up @@ -3223,7 +3220,7 @@ def actionsubmit(
gen_mode=GenerationMode.STANDARD
):
# Ignore new submissions if the AI is currently busy
if(koboldai_vars.aibusy):
if koboldai_vars.aibusy and not ignore_aibusy:
return

while(True):
Expand Down Expand Up @@ -5101,9 +5098,13 @@ def load_story_v1(js, from_file=None):
def load_story_v2(js, from_file=None):
logger.debug("Loading V2 Story")
logger.debug("Called from {}".format(inspect.stack()[1].function))
leave_room(session['story'])
session['story'] = js['story_name']
join_room(session['story'])

new_story = js["story_name"]
# In socket context
if hasattr(request, "sid"):
leave_room(session['story'])
join_room(new_story)
session['story'] = new_story

koboldai_vars.load_story(session['story'], js)

Expand Down Expand Up @@ -8227,6 +8228,7 @@ class WorldInfoUIDsSchema(WorldInfoEntriesUIDsSchema):

class ModelSelectionSchema(KoboldSchema):
model: str = fields.String(required=True, validate=validate.Regexp(r"^(?!\s*NeoCustom)(?!\s*GPT2Custom)(?!\s*TPUMeshTransformerGPTJ)(?!\s*TPUMeshTransformerGPTNeoX)(?!\s*GooseAI)(?!\s*OAI)(?!\s*InferKit)(?!\s*Colab)(?!\s*API).*$"), metadata={"description": 'Hugging Face model ID, the path to a model folder (relative to the "models" folder in the KoboldAI root folder) or "ReadOnly" for no model'})
backend: Optional[str] = fields.String(required=False, validate=validate.OneOf(model_backends.keys()))

def _generate_text(body: GenerationInputSchema):
if koboldai_vars.aibusy or koboldai_vars.genseqs:
Expand Down Expand Up @@ -8484,6 +8486,7 @@ def put_model(body: ModelSelectionSchema):
summary: Load a model
description: |-2
Loads a model given its Hugging Face model ID, the path to a model folder (relative to the "models" folder in the KoboldAI root folder) or "ReadOnly" for no model.
Optionally, a backend parameter can be passed in to dictate which backend loads the model.
tags:
- model
requestBody:
Expand All @@ -8493,6 +8496,7 @@ def put_model(body: ModelSelectionSchema):
schema: ModelSelectionSchema
example:
model: ReadOnly
backend: Read Only
responses:
200:
description: Successful request
Expand All @@ -8510,8 +8514,18 @@ def put_model(body: ModelSelectionSchema):
set_aibusy(1)
old_model = koboldai_vars.model
koboldai_vars.model = body.model.strip()

backend = getattr(body, "backend", None)
if not backend:
# Backend is optional for backwards compatibility; it should probably be
# required on the next major API version.
if body.model == "ReadOnly":
backend = "Read Only"
else:
backend = "Huggingface"

try:
load_model(use_breakmodel_args=True, breakmodel_args_default_to_cpu=True)
load_model(backend)
except Exception as e:
koboldai_vars.model = old_model
raise e
Expand Down Expand Up @@ -8799,8 +8813,14 @@ def get_story():
chunks = []
if koboldai_vars.gamestarted:
chunks.append({"num": 0, "text": koboldai_vars.prompt})
for num, action in koboldai_vars.actions.items():
chunks.append({"num": num + 1, "text": action})

last_action_num = list(koboldai_vars.actions.actions.keys())[-1]
for num, action in koboldai_vars.actions.actions.items():
text = action["Selected Text"]
# The last action seems to always be empty
if not text and num == last_action_num:
continue
chunks.append({"num": num + 1, "text": text})
return {"results": chunks}


Expand All @@ -8824,7 +8844,7 @@ def get_story_nums():
chunks = []
if koboldai_vars.gamestarted:
chunks.append(0)
for num in koboldai_vars.actions.keys():
for num in koboldai_vars.actions.actions.keys():
chunks.append(num + 1)
return {"results": chunks}

Expand Down Expand Up @@ -9185,7 +9205,7 @@ def get_world_info():
if wi["folder"] != last_folder:
folder = []
if wi["folder"] is not None:
folders.append({"uid": wi["folder"], "name": koboldai_vars.wifolders_d[wi["folder"]]["name"], "entries": folder})
folders.append({"uid": wi["folder"], "name": koboldai_vars.wifolders_d[str(wi["folder"])]["name"], "entries": folder})
last_folder = wi["folder"]
(folder if wi["folder"] is not None else entries).append({k: v for k, v in wi.items() if k not in ("init", "folder", "num") and (wi["selective"] or k != "keysecondary")})
return {"folders": folders, "entries": entries}
Expand Down
15 changes: 9 additions & 6 deletions koboldai_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import shutil
from typing import List, Union
from io import BytesIO
from flask import has_request_context, session
from flask import has_request_context, session, request
from flask_socketio import join_room, leave_room
from collections import OrderedDict
import multiprocessing
Expand Down Expand Up @@ -130,11 +130,14 @@ def load_story(self, story_name, json_data):
original_story_name = story_name
if not multi_story:
story_name = 'default'
#Leave the old room and join the new one
logger.debug("Leaving room {}".format(session['story']))
leave_room(session['story'])
logger.debug("Joining room {}".format(story_name))
join_room(story_name)

# Leave the old room and join the new one if in socket context
if hasattr(request, "sid"):
logger.debug("Leaving room {}".format(session['story']))
leave_room(session['story'])
logger.debug("Joining room {}".format(story_name))
join_room(story_name)

session['story'] = story_name
logger.debug("Sending story reset")
self._story_settings[story_name]._socketio.emit("reset_story", {}, broadcast=True, room=story_name)
Expand Down
4 changes: 4 additions & 0 deletions modeling/inference_models/generic_hf_torch/class.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@
model_backend_type = "Huggingface" #This should be a generic name in case multiple model backends are compatible (think Hugging Face Custom and Basic Hugging Face)

class model_backend(HFTorchInferenceModel):
def __init__(self) -> None:
super().__init__()
self.use_4_bit = False

def is_valid(self, model_name, model_path, menu_path):
base_is_valid = super().is_valid(model_name, model_path, menu_path)
path = False
Expand Down
11 changes: 10 additions & 1 deletion modeling/inference_models/hf.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,12 @@ class HFInferenceModel(InferenceModel):
def __init__(self) -> None:
super().__init__()
self.model_config = None
#self.model_name = model_name

# TODO: model_name should probably be an instantiation parameter all the
# way down the inheritance chain.
self.model_name = None

self.path = None
self.hf_torch = False
self.model = None
self.tokenizer = None
Expand Down Expand Up @@ -217,6 +221,11 @@ def unload(self):
torch.cuda.empty_cache()
except:
pass

def _pre_load(self) -> None:
# HACK: Make model instantiation work without UI parameters
self.model_name = self.model_name or utils.koboldai_vars.model
return super()._pre_load()

def _post_load(self) -> None:
self.badwordsids = koboldai_settings.badwordsids_default
Expand Down
4 changes: 4 additions & 0 deletions modeling/inference_models/hf_torch.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,11 @@ def __init__(self) -> None:
self.hf_torch = True
self.lazy_load = True
self.low_mem = False

# `nobreakmodel` indicates that breakmodel cannot be used, while `breakmodel`
# indicates whether breakmodel is currently being used
self.nobreakmodel = False
self.breakmodel = False

self.post_token_hooks = [
PostTokenHooks.stream_tokens,
Expand Down