From b6ad4e0851839cef14dc7b09cae2022d6cd51bd4 Mon Sep 17 00:00:00 2001
From: "Dilyara Zharikova (Baymurzina)" <dilyara.rimovna@gmail.com>
Date: Fri, 12 May 2023 15:08:11 +0500
Subject: [PATCH 1/4] fix: getting url not name (#449)

* fix: getting url not name

* fix: return

* fix: correct checks

* fix: utilizing envvars

* fix: envvars

* fix: use attributes instead of uttr

* fix: kwargs items

* fix: kwargs items

* fix: _list to s

* feat: wrap kwargs

* fix: configs from given

* fix: configs are dicts not path

* fix: use not long config as default

* fix: lower env vars

* fix: lower env vars

* fix: codestyle

* fix: add env vars to universal

* fix: add logs to universal

* fix: synch with template prompted

* fix: params in dockerfile
---
 .../docker-compose.override.yml               |  2 +
 .../llm_based_response_selector/server.py     |  2 +-
 .../generative_configs/openai-chatgpt.json    |  7 ++
 .../openai-text-davinci-003-long.json         |  7 ++
 .../openai-text-davinci-003.json              |  7 ++
 services/openai_api_lm/server.py              | 10 ++-
 services/openai_api_lm/test.py                |  4 +-
 .../default_generative_config.json            |  8 ++
 services/transformers_lm/server.py            |  6 ++
 .../scenario/response.py                      | 31 ++++++--
 .../dff_universal_prompted_skill/Dockerfile   |  4 +
 .../default_generative_config.json            |  2 +-
 .../scenario/response.py                      | 74 +++++++++----------
 13 files changed, 114 insertions(+), 50 deletions(-)
 create mode 100644 services/openai_api_lm/generative_configs/openai-chatgpt.json
 create mode 100644 services/openai_api_lm/generative_configs/openai-text-davinci-003-long.json
 create mode 100644 services/openai_api_lm/generative_configs/openai-text-davinci-003.json
 create mode 100644 services/transformers_lm/generative_configs/default_generative_config.json

diff --git a/assistant_dists/universal_prompted_assistant/docker-compose.override.yml b/assistant_dists/universal_prompted_assistant/docker-compose.override.yml
index 72e650887b..caa5a70fd1 100644
--- a/assistant_dists/universal_prompted_assistant/docker-compose.override.yml
+++ b/assistant_dists/universal_prompted_assistant/docker-compose.override.yml
@@ -172,6 +172,8 @@ services:
         SERVICE_NAME: dff_universal_prompted_skill
         GENERATIVE_TIMEOUT: 20
         N_UTTERANCES_CONTEXT: 7
+        DEFAULT_LM_SERVICE_URL: http://transformers-lm-oasst12b:8158/respond
+        DEFAULT_LM_SERVICE_CONFIG: default_generative_config.json
       context: .
       dockerfile: ./skills/dff_universal_prompted_skill/Dockerfile
     command: gunicorn --workers=1 server:app -b 0.0.0.0:8147 --reload
diff --git a/response_selectors/llm_based_response_selector/server.py b/response_selectors/llm_based_response_selector/server.py
index 045a0adb75..572a2d753c 100644
--- a/response_selectors/llm_based_response_selector/server.py
+++ b/response_selectors/llm_based_response_selector/server.py
@@ -35,7 +35,7 @@
 )
 ENVVARS_TO_SEND = getenv("ENVVARS_TO_SEND", None)
 ENVVARS_TO_SEND = [] if ENVVARS_TO_SEND is None else ENVVARS_TO_SEND.split(",")
-sending_variables = {f"{var}_list": [getenv(var, None)] for var in ENVVARS_TO_SEND}
+sending_variables = {f"{var}s": [getenv(var, None)] for var in ENVVARS_TO_SEND}
 # check if at least one of the env variables is not None
 if len(sending_variables.keys()) > 0 and all([var_value is None for var_value in sending_variables.values()]):
     raise NotImplementedError(
diff --git a/services/openai_api_lm/generative_configs/openai-chatgpt.json b/services/openai_api_lm/generative_configs/openai-chatgpt.json
new file mode 100644
index 0000000000..107e944b80
--- /dev/null
+++ b/services/openai_api_lm/generative_configs/openai-chatgpt.json
@@ -0,0 +1,7 @@
+{
+  "max_tokens": 64,
+  "temperature": 0.4,
+  "top_p": 1.0,
+  "frequency_penalty": 0,
+  "presence_penalty": 0
+}
\ No newline at end of file
diff --git a/services/openai_api_lm/generative_configs/openai-text-davinci-003-long.json b/services/openai_api_lm/generative_configs/openai-text-davinci-003-long.json
new file mode 100644
index 0000000000..7e23841d5f
--- /dev/null
+++ b/services/openai_api_lm/generative_configs/openai-text-davinci-003-long.json
@@ -0,0 +1,7 @@
+{
+  "max_tokens": 256,
+  "temperature": 0.4,
+  "top_p": 1.0,
+  "frequency_penalty": 0,
+  "presence_penalty": 0
+}
\ No newline at end of file
diff --git a/services/openai_api_lm/generative_configs/openai-text-davinci-003.json b/services/openai_api_lm/generative_configs/openai-text-davinci-003.json
new file mode 100644
index 0000000000..107e944b80
--- /dev/null
+++ b/services/openai_api_lm/generative_configs/openai-text-davinci-003.json
@@ -0,0 +1,7 @@
+{
+  "max_tokens": 64,
+  "temperature": 0.4,
+  "top_p": 1.0,
+  "frequency_penalty": 0,
+  "presence_penalty": 0
+}
\ No newline at end of file
diff --git a/services/openai_api_lm/server.py b/services/openai_api_lm/server.py
index d1b6da6c0c..ce8b4b7827 100644
--- a/services/openai_api_lm/server.py
+++ b/services/openai_api_lm/server.py
@@ -1,3 +1,4 @@
+import json
 import logging
 import os
 import time
@@ -22,6 +23,10 @@
 
 app = Flask(__name__)
 logging.getLogger("werkzeug").setLevel("WARNING")
+DEFAULT_CONFIGS = {
+    "text-davinci-003": json.load(open("generative_configs/openai-text-davinci-003.json", "r")),
+    "gpt-3.5-turbo": json.load(open("generative_configs/openai-chatgpt.json", "r")),
+}
 
 
 def generate_responses(context, openai_api_key, openai_org, prompt, generation_params, continue_last_uttr=False):
@@ -88,10 +93,11 @@ def respond():
     contexts = request.json.get("dialog_contexts", [])
     prompts = request.json.get("prompts", [])
     configs = request.json.get("configs", [])
+    configs = [DEFAULT_CONFIGS[PRETRAINED_MODEL_NAME_OR_PATH] if el is None else el for el in configs]
     if len(contexts) > 0 and len(prompts) == 0:
         prompts = [""] * len(contexts)
-    openai_api_keys = request.json.get("OPENAI_API_KEY_list", [])
-    openai_orgs = request.json.get("OPENAI_ORGANIZATION_list", None)
+    openai_api_keys = request.json.get("openai_api_keys", [])
+    openai_orgs = request.json.get("openai_api_organizations", None)
     openai_orgs = [None] * len(contexts) if openai_orgs is None else openai_orgs
 
     try:
diff --git a/services/openai_api_lm/test.py b/services/openai_api_lm/test.py
index 845a41259a..e13ed54bb2 100644
--- a/services/openai_api_lm/test.py
+++ b/services/openai_api_lm/test.py
@@ -30,8 +30,8 @@ def test_respond():
             "dialog_contexts": contexts,
             "prompts": prompts,
             "configs": [DEFAULT_CONFIG] * len(contexts),
-            "OPENAI_API_KEY_list": [OPENAI_API_KEY] * len(contexts),
-            "OPENAI_ORGANIZATION_list": [OPENAI_ORGANIZATION] * len(contexts),
+            "openai_api_keys": [OPENAI_API_KEY] * len(contexts),
+            "openai_api_organizations": [OPENAI_ORGANIZATION] * len(contexts),
         },
     ).json()
     print(result)
diff --git a/services/transformers_lm/generative_configs/default_generative_config.json b/services/transformers_lm/generative_configs/default_generative_config.json
new file mode 100644
index 0000000000..cecd6ab7cb
--- /dev/null
+++ b/services/transformers_lm/generative_configs/default_generative_config.json
@@ -0,0 +1,8 @@
+{
+  "max_length": 120,
+  "min_length": 8,
+  "top_p": 0.9,
+  "temperature": 0.9,
+  "do_sample": true,
+  "num_return_sequences": 2
+}
\ No newline at end of file
diff --git a/services/transformers_lm/server.py b/services/transformers_lm/server.py
index c11d585c36..93418068e9 100644
--- a/services/transformers_lm/server.py
+++ b/services/transformers_lm/server.py
@@ -1,3 +1,4 @@
+import json
 import logging
 import os
 import time
@@ -27,6 +28,10 @@
 
 app = Flask(__name__)
 logging.getLogger("werkzeug").setLevel("WARNING")
+DEFAULT_CONFIGS = {
+    "EleutherAI/gpt-j-6B": json.load(open("generative_configs/default_generative_config.json", "r")),
+    "OpenAssistant/oasst-sft-1-pythia-12b": json.load(open("generative_configs/default_generative_config.json", "r")),
+}
 
 
 def generate_responses(context, model, tokenizer, prompt, generation_params, continue_last_uttr=False):
@@ -110,6 +115,7 @@ def respond():
     contexts = request.json.get("dialog_contexts", [])
     prompts = request.json.get("prompts", [])
     configs = request.json.get("configs", [])
+    configs = [DEFAULT_CONFIGS[PRETRAINED_MODEL_NAME_OR_PATH] if el is None else el for el in configs]
     if len(contexts) > 0 and len(prompts) == 0:
         prompts = [""] * len(contexts)
 
diff --git a/skills/dff_template_prompted_skill/scenario/response.py b/skills/dff_template_prompted_skill/scenario/response.py
index e9b7dfdd34..ac5bed3f4d 100644
--- a/skills/dff_template_prompted_skill/scenario/response.py
+++ b/skills/dff_template_prompted_skill/scenario/response.py
@@ -3,6 +3,7 @@
 import re
 import requests
 import sentry_sdk
+from copy import deepcopy
 from os import getenv
 from typing import Any
 
@@ -92,15 +93,33 @@ def gathering_responses(reply, confidence, human_attr, bot_attr, attr):
     # get variables which names are in `ENVVARS_TO_SEND` (splitted by comma if many)
     # from user_utterance attributes or from environment
     human_uttr_attributes = int_ctx.get_last_human_utterance(ctx, actor).get("attributes", {})
-    sending_variables = {f"{var}_list": [human_uttr_attributes.get(var.lower(), None)] for var in ENVVARS_TO_SEND}
-    if if_none_var_values(sending_variables):
-        sending_variables = {f"{var}_list": [getenv(var, None)] for var in ENVVARS_TO_SEND}
+    envvars_to_send = ENVVARS_TO_SEND if len(ENVVARS_TO_SEND) else human_uttr_attributes.get("envvars_to_send", [])
+
+    if len(envvars_to_send):
+        # get variables which names are in `envvars_to_send` (splitted by comma if many)
+        # from the last human utterance's attributes
+        sending_variables = {
+            f"{var.lower()}s": [human_uttr_attributes.get(var.lower(), None)] for var in envvars_to_send
+        }
         if if_none_var_values(sending_variables):
-            logger.info(f"Did not get {ENVVARS_TO_SEND}'s values. Sending without them.")
+            # get variables which names are in `envvars_to_send` (splitted by comma if many)
+            # from env variables
+            sending_variables = {f"{var.lower()}s": [getenv(var, None)] for var in envvars_to_send}
+            if if_none_var_values(sending_variables):
+                logger.info(f"Did not get {envvars_to_send}'s values. Sending without them.")
+            else:
+                logger.info(f"Got {envvars_to_send}'s values from environment.")
         else:
-            logger.info(f"Got {ENVVARS_TO_SEND}'s values from environment.")
+            logger.info(f"Got {envvars_to_send}'s values from attributes.")
     else:
-        logger.info(f"Got {ENVVARS_TO_SEND}'s values from attributes.")
+        sending_variables = {}
+
+    # adding kwargs to request from the last human utterance's attributes
+    lm_service_kwargs = human_uttr_attributes.get("lm_service_kwargs", None)
+    lm_service_kwargs = {} if lm_service_kwargs is None else lm_service_kwargs
+    for _key, _value in lm_service_kwargs.items():
+        logger.info(f"Got/Re-writing {_key}s values from kwargs.")
+        sending_variables[f"{_key}s"] = [deepcopy(_value)]
 
     shared_memory = int_ctx.get_shared_memory(ctx, actor)
     prompt = shared_memory.get("prompt", "")
diff --git a/skills/dff_universal_prompted_skill/Dockerfile b/skills/dff_universal_prompted_skill/Dockerfile
index adfee01c12..1c5e561ad4 100644
--- a/skills/dff_universal_prompted_skill/Dockerfile
+++ b/skills/dff_universal_prompted_skill/Dockerfile
@@ -17,6 +17,10 @@ ARG GENERATIVE_TIMEOUT
 ENV GENERATIVE_TIMEOUT ${GENERATIVE_TIMEOUT}
 ARG N_UTTERANCES_CONTEXT
 ENV N_UTTERANCES_CONTEXT ${N_UTTERANCES_CONTEXT}
+ARG DEFAULT_LM_SERVICE_URL
+ENV DEFAULT_LM_SERVICE_URL ${DEFAULT_LM_SERVICE_URL}
+ARG DEFAULT_LM_SERVICE_CONFIG
+ENV DEFAULT_LM_SERVICE_CONFIG ${DEFAULT_LM_SERVICE_CONFIG}
 
 COPY skills/dff_universal_prompted_skill/requirements.txt .
 RUN pip install -r requirements.txt
diff --git a/skills/dff_universal_prompted_skill/generative_configs/default_generative_config.json b/skills/dff_universal_prompted_skill/generative_configs/default_generative_config.json
index 1edf84f186..cecd6ab7cb 100644
--- a/skills/dff_universal_prompted_skill/generative_configs/default_generative_config.json
+++ b/skills/dff_universal_prompted_skill/generative_configs/default_generative_config.json
@@ -1,5 +1,5 @@
 {
-  "max_length": 60,
+  "max_length": 120,
   "min_length": 8,
   "top_p": 0.9,
   "temperature": 0.9,
diff --git a/skills/dff_universal_prompted_skill/scenario/response.py b/skills/dff_universal_prompted_skill/scenario/response.py
index da69b6e170..ee7e20d18f 100644
--- a/skills/dff_universal_prompted_skill/scenario/response.py
+++ b/skills/dff_universal_prompted_skill/scenario/response.py
@@ -3,6 +3,7 @@
 import re
 import requests
 import sentry_sdk
+from copy import deepcopy
 from os import getenv
 from typing import Any
 
@@ -22,30 +23,15 @@
 DEFAULT_CONFIDENCE = 0.9
 LOW_CONFIDENCE = 0.7
 DEFAULT_PROMPT = "Respond like a friendly chatbot."
-
-CONSIDERED_LM_SERVICES = {
-    "GPT-J 6B": {
-        "url": "http://transformers-lm-gptj:8130/respond",
-        "config": json.load(open("generative_configs/default_generative_config.json", "r")),
-    },
-    "BLOOMZ 7B": {
-        "url": "http://transformers-lm-bloomz7b:8146/respond",
-        "config": json.load(open("generative_configs/default_generative_config.json", "r")),
-    },
-    "ChatGPT": {
-        "url": "http://openai-api-chatgpt:8145/respond",
-        "config": json.load(open("generative_configs/openai-chatgpt.json", "r")),
-        "envvars_to_send": ["OPENAI_API_KEY", "OPENAI_ORGANIZATION"],
-    },
-    "GPT-3.5": {
-        "url": "http://openai-api-davinci3:8131/respond",
-        "config": json.load(open("generative_configs/openai-text-davinci-003.json", "r")),
-        "envvars_to_send": ["OPENAI_API_KEY", "OPENAI_ORGANIZATION"],
-    },
-    "Open-Assistant SFT-1 12B": {
-        "url": "http://transformers-lm-oasst12b:8158/respond",
-        "config": json.load(open("generative_configs/default_generative_config.json", "r")),
-    },
+DEFAULT_LM_SERVICE_URL = getenv("DEFAULT_LM_SERVICE_URL", "http://transformers-lm-oasst12b:8158/respond")
+DEFAULT_LM_SERVICE_CONFIG = getenv("DEFAULT_LM_SERVICE_CONFIG", "default_generative_config.json")
+DEFAULT_LM_SERVICE_CONFIG = json.load(open(f"generative_configs/{DEFAULT_LM_SERVICE_CONFIG}", "r"))
+ENVVARS_TO_SEND = {
+    "http://transformers-lm-gptj:8130/respond": [],
+    "http://transformers-lm-bloomz7b:8146/respond": [],
+    "http://transformers-lm-oasst12b:8158/respond": [],
+    "http://openai-api-chatgpt:8145/respond": ["OPENAI_API_KEY", "OPENAI_ORGANIZATION"],
+    "http://openai-api-davinci3:8131/respond": ["OPENAI_API_KEY", "OPENAI_ORGANIZATION"],
 }
 
 
@@ -102,20 +88,27 @@ def gathering_responses(reply, confidence, human_attr, bot_attr, attr):
 
     dialog_context = compose_data_for_model(ctx, actor)
     logger.info(f"dialog_context: {dialog_context}")
-    last_uttr = int_ctx.get_last_human_utterance(ctx, actor)
-    prompt = last_uttr.get("attributes", {}).get("prompt", DEFAULT_PROMPT)
+    human_uttr_attributes = int_ctx.get_last_human_utterance(ctx, actor).get("attributes", {})
+    prompt = human_uttr_attributes.get("prompt", DEFAULT_PROMPT)
     logger.info(f"prompt: {prompt}")
-    lm_service = last_uttr.get("attributes", {}).get("lm_service", "GPT-J 6B")
-    logger.info(f"lm_service: {lm_service}")
-
-    if "envvars_to_send" in CONSIDERED_LM_SERVICES[lm_service]:
-        # get variables which names are in `ENVVARS_TO_SEND` (splitted by comma if many)
-        # from user_utterance attributes or from environment
-        envvars_to_send = CONSIDERED_LM_SERVICES[lm_service]["envvars_to_send"]
-        human_uttr_attributes = int_ctx.get_last_human_utterance(ctx, actor).get("attributes", {})
-        sending_variables = {f"{var}_list": [human_uttr_attributes.get(var.lower(), None)] for var in envvars_to_send}
+    lm_service_url = human_uttr_attributes.get("lm_service_url", DEFAULT_LM_SERVICE_URL)
+    logger.info(f"lm_service_url: {lm_service_url}")
+    # this is a dictionary! not a file!
+    lm_service_config = human_uttr_attributes.get("lm_service_config", None)
+    lm_service_kwargs = human_uttr_attributes.get("lm_service_kwargs", None)
+    lm_service_kwargs = {} if lm_service_kwargs is None else lm_service_kwargs
+    envvars_to_send = ENVVARS_TO_SEND.get(lm_service_url, [])
+
+    if len(envvars_to_send):
+        # get variables which names are in `envvars_to_send` (splitted by comma if many)
+        # from the last human utterance's attributes
+        sending_variables = {
+            f"{var.lower()}s": [human_uttr_attributes.get(var.lower(), None)] for var in envvars_to_send
+        }
         if if_none_var_values(sending_variables):
-            sending_variables = {f"{var}_list": [getenv(var, None)] for var in envvars_to_send}
+            # get variables which names are in `envvars_to_send` (splitted by comma if many)
+            # from env variables
+            sending_variables = {f"{var.lower()}s": [getenv(var, None)] for var in envvars_to_send}
             if if_none_var_values(sending_variables):
                 logger.info(f"Did not get {envvars_to_send}'s values. Sending without them.")
             else:
@@ -125,14 +118,19 @@ def gathering_responses(reply, confidence, human_attr, bot_attr, attr):
     else:
         sending_variables = {}
 
+    # adding kwargs to request from the last human utterance's attributes
+    for _key, _value in lm_service_kwargs.items():
+        logger.info(f"Got/Re-writing {_key}s values from kwargs.")
+        sending_variables[f"{_key}s"] = [deepcopy(_value)]
+
     if len(dialog_context) > 0:
         try:
             response = requests.post(
-                CONSIDERED_LM_SERVICES[lm_service]["url"],
+                lm_service_url,
                 json={
                     "dialog_contexts": [dialog_context],
                     "prompts": [prompt],
-                    "configs": [CONSIDERED_LM_SERVICES[lm_service]["config"]],
+                    "configs": [lm_service_config],
                     **sending_variables,
                 },
                 timeout=GENERATIVE_TIMEOUT,

From 1f3ff3864217f061f3c3a8a9d6f408e8fa1059c1 Mon Sep 17 00:00:00 2001
From: "Dilyara Zharikova (Baymurzina)" <dilyara.rimovna@gmail.com>
Date: Fri, 12 May 2023 15:17:54 +0500
Subject: [PATCH 2/4] feat: gpt-4 and gpt-4 32k services (#456)

* feat: gpt-4 and gpt-4 32k services

* fix: add to universal

* fix: add params
---
 MODELS.md                                     | 16 ++++----
 .../universal_prompted_assistant/dev.yml      | 12 ++++++
 .../docker-compose.override.yml               | 41 +++++++++++++++++++
 components/jkdhfgkhgodfiugpojwrnkjnlg.yml     | 28 +++++++++++++
 components/oinfjkrbnfmhkfsjdhfsd.yml          | 27 ++++++++++++
 services/openai_api_lm/server.py              |  2 +
 .../openai-api-gpt4-32k/environment.yml       |  5 +++
 .../openai-api-gpt4-32k/service.yml           | 31 ++++++++++++++
 .../openai-api-gpt4/environment.yml           |  5 +++
 .../openai-api-gpt4/service.yml               | 31 ++++++++++++++
 .../scenario/response.py                      |  2 +
 11 files changed, 193 insertions(+), 7 deletions(-)
 create mode 100644 components/jkdhfgkhgodfiugpojwrnkjnlg.yml
 create mode 100644 components/oinfjkrbnfmhkfsjdhfsd.yml
 create mode 100644 services/openai_api_lm/service_configs/openai-api-gpt4-32k/environment.yml
 create mode 100644 services/openai_api_lm/service_configs/openai-api-gpt4-32k/service.yml
 create mode 100644 services/openai_api_lm/service_configs/openai-api-gpt4/environment.yml
 create mode 100644 services/openai_api_lm/service_configs/openai-api-gpt4/service.yml

diff --git a/MODELS.md b/MODELS.md
index cff49d98ac..6e69e8ad9a 100644
--- a/MODELS.md
+++ b/MODELS.md
@@ -2,10 +2,12 @@
 
 Here you may find a list of models that currently available for use in Generative Assistants.
 
-| model name               | container name           | model link                                                          | open-source?             | size (billion parameters) | GPU usage                 | max tokens (prompt + response) | description                                                                                                                                                                                                                                                                   |
-|--------------------------|--------------------------|---------------------------------------------------------------------|--------------------------|---------------------------|---------------------------|--------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| BLOOMZ 7B                | transformers-lm-bloomz7b | [link](https://huggingface.co/bigscience/bloomz-7b1)                | yes                      | 7.1B                      | 33GB                      | 2,048 tokens                   | An open-source multilingual instruction-based large language model (46 languages). NB: free of charge. This model is up and running on our servers and can be used for free.                                                                                                  |
-| GPT-J 6B                 | transformers-lm-gptj     | [link](https://huggingface.co/EleutherAI/gpt-j-6b)                  | yes                      | 6B                        | 25GB                      | 2,048 tokens                   | An open-source English-only large language model which is NOT fine-tuned for instruction following and NOT capable of code generation. NB: free of charge. This model is up and running on our servers and can be used for free.                                              |
-| GPT-3.5                  | openai-api-davinci3      | [link](https://platform.openai.com/docs/models/gpt-3-5)             | no (paid access via API) | supposedly, 175B          | - (cannot be run locally) | 4,097 tokens                   | A multulingual instruction-based large language model which is capable of code generation. Unlike ChatGPT, not optimised for chat. NB: paid. You must provide your OpenAI API key to use the model. Your OpenAI account will be charged according to your usage.              |
-| ChatGPT                  | openai-api-chatgpt       | [link](https://platform.openai.com/docs/models/gpt-3-5)             | no (paid access via API) | supposedly, 175B          | - (cannot be run locally) | 4,096 tokens                   | Based on gpt-3.5-turbo -- the most capable of the entire GPT-3/GPT-3.5 models family. Optimized for chat. Able to understand and generate code. NB: paid. You must provide your OpenAI API key to use the model. Your OpenAI account will be charged according to your usage. |
-| Open-Assistant SFT-1 12B | transformers-lm-oasst12b | [link](https://huggingface.co/OpenAssistant/oasst-sft-1-pythia-12b) | yes                      | 12B                       | 26GB (half-precision)     | 5,120 tokens                   | An open-source English-only instruction-based large language model which is NOT good at answering math and coding questions. NB: free of charge. This model is up and running on our servers and can be used for free.                                                        |
+| model name                | container name           | model link                                                           | open-source?             | size (billion parameters) | GPU usage                 | max tokens (prompt + response) | description                                                                                                                                                                                                                                                                                                                                 |
+|---------------------------|--------------------------|----------------------------------------------------------------------|--------------------------|---------------------------|---------------------------|--------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| BLOOMZ 7B                 | transformers-lm-bloomz7b | [link](https://huggingface.co/bigscience/bloomz-7b1)                 | yes                      | 7.1B                      | 33GB                      | 2,048 tokens                   | An open-source multilingual instruction-based large language model (46 languages). NB: free of charge. This model is up and running on our servers and can be used for free.                                                                                                                                                                |
+| GPT-J 6B                  | transformers-lm-gptj     | [link](https://huggingface.co/EleutherAI/gpt-j-6b)                   | yes                      | 6B                        | 25GB                      | 2,048 tokens                   | An open-source English-only large language model which is NOT fine-tuned for instruction following and NOT capable of code generation. NB: free of charge. This model is up and running on our servers and can be used for free.                                                                                                            |
+| GPT-3.5                   | openai-api-davinci3      | [link](https://platform.openai.com/docs/models/gpt-3-5)              | no (paid access via API) | supposedly, 175B          | - (cannot be run locally) | 4,097 tokens                   | A multulingual instruction-based large language model which is capable of code generation. Unlike ChatGPT, not optimised for chat. NB: paid. You must provide your OpenAI API key to use the model. Your OpenAI account will be charged according to your usage.                                                                            |
+| ChatGPT                   | openai-api-chatgpt       | [link](https://platform.openai.com/docs/models/gpt-3-5)              | no (paid access via API) | supposedly, 175B          | - (cannot be run locally) | 4,096 tokens                   | Based on gpt-3.5-turbo -- the most capable of the entire GPT-3/GPT-3.5 models family. Optimized for chat. Able to understand and generate code. NB: paid. You must provide your OpenAI API key to use the model. Your OpenAI account will be charged according to your usage.                                                               |
+| Open-Assistant SFT-1 12B  | transformers-lm-oasst12b | [link](https://huggingface.co/OpenAssistant/oasst-sft-1-pythia-12b)  | yes                      | 12B                       | 26GB (half-precision)     | 5,120 tokens                   | An open-source English-only instruction-based large language model which is NOT good at answering math and coding questions. NB: free of charge. This model is up and running on our servers and can be used for free.                                                                                                                      |
+| GPT-4                     | openai-api-gpt4          | [link](https://platform.openai.com/docs/models/gpt-4)                | no (paid access via API) | supposedly, 175B          | - (cannot be run locally) | 8,192 tokens                   | A multilingual instruction-based large language model which is capable of code generation and other complex tasks. More capable than any GPT-3.5 model, able to do more complex tasks, and optimized for chat. NB: paid. You must provide your OpenAI API key to use the model. Your OpenAI account will be charged according to your usage. |
+| GPT-4 32K                 | openai-api-gpt4-32k      | [link](https://platform.openai.com/docs/models/gpt-4)                | no (paid access via API) | supposedly, 175B          | - (cannot be run locally) | 32,768 tokens                  | A multilingual instruction-based large language model which is capable of code generation and other complex tasks. 	Same capabilities as the base gpt-4 mode but with 4x the context length. NB: paid. You must provide your OpenAI API key to use the model. Your OpenAI account will be charged according to your usage.                  |
diff --git a/assistant_dists/universal_prompted_assistant/dev.yml b/assistant_dists/universal_prompted_assistant/dev.yml
index 6698e3d31d..57314e6a1a 100644
--- a/assistant_dists/universal_prompted_assistant/dev.yml
+++ b/assistant_dists/universal_prompted_assistant/dev.yml
@@ -54,6 +54,18 @@ services:
       - "./common:/src/common"
     ports:
       - 8131:8131
+  openai-api-gpt4:
+    volumes:
+      - "./services/openai_api_lm:/src"
+      - "./common:/src/common"
+    ports:
+      - 8159:8159
+  openai-api-gpt4-32k:
+    volumes:
+      - "./services/openai_api_lm:/src"
+      - "./common:/src/common"
+    ports:
+      - 8160:8160
   dff-universal-prompted-skill:
     volumes:
       - "./skills/dff_universal_prompted_skill:/src"
diff --git a/assistant_dists/universal_prompted_assistant/docker-compose.override.yml b/assistant_dists/universal_prompted_assistant/docker-compose.override.yml
index caa5a70fd1..686ec711db 100644
--- a/assistant_dists/universal_prompted_assistant/docker-compose.override.yml
+++ b/assistant_dists/universal_prompted_assistant/docker-compose.override.yml
@@ -5,6 +5,7 @@ services:
       WAIT_HOSTS: "sentseg:8011, ranking-based-response-selector:8002, combined-classification:8087, 
         sentence-ranker:8128, 
         transformers-lm-gptj:8130, transformers-lm-oasst12b:8158, openai-api-chatgpt:8145, openai-api-davinci3:8131,
+        openai-api-gpt4:8159, openai-api-gpt4-32k:8160,
         dff-universal-prompted-skill:8147"
       WAIT_HOSTS_TIMEOUT: ${WAIT_TIMEOUT:-1000}
 
@@ -164,6 +165,46 @@ services:
         reservations:
           memory: 100M
 
+  openai-api-gpt4:
+    env_file: [ .env ]
+    build:
+      args:
+        SERVICE_PORT: 8159
+        SERVICE_NAME: openai_api_gpt4
+        PRETRAINED_MODEL_NAME_OR_PATH: gpt-4
+      context: .
+      dockerfile: ./services/openai_api_lm/Dockerfile
+    command: flask run -h 0.0.0.0 -p 8159
+    environment:
+      - CUDA_VISIBLE_DEVICES=0
+      - FLASK_APP=server
+    deploy:
+      resources:
+        limits:
+          memory: 100M
+        reservations:
+          memory: 100M
+
+  openai-api-gpt4-32k:
+    env_file: [ .env ]
+    build:
+      args:
+        SERVICE_PORT: 8160
+        SERVICE_NAME: openai_api_gpt4_32k
+        PRETRAINED_MODEL_NAME_OR_PATH: gpt-4-32k
+      context: .
+      dockerfile: ./services/openai_api_lm/Dockerfile
+    command: flask run -h 0.0.0.0 -p 8160
+    environment:
+      - CUDA_VISIBLE_DEVICES=0
+      - FLASK_APP=server
+    deploy:
+      resources:
+        limits:
+          memory: 100M
+        reservations:
+          memory: 100M
+
   dff-universal-prompted-skill:
     env_file: [ .env ]
     build:
diff --git a/components/jkdhfgkhgodfiugpojwrnkjnlg.yml b/components/jkdhfgkhgodfiugpojwrnkjnlg.yml
new file mode 100644
index 0000000000..ce20bde833
--- /dev/null
+++ b/components/jkdhfgkhgodfiugpojwrnkjnlg.yml
@@ -0,0 +1,28 @@
+name: openai_api_gpt4
+display_name: GPT-4
+component_type: Generative
+model_type: NN-based
+is_customizable: false
+author: publisher@deeppavlov.ai
+description: A multilingual instruction-based large language model 
+  which is capable of code generation and other complex tasks. 
+  More capable than any GPT-3.5 model, able to do more complex tasks, 
+  and optimized for chat. Paid. 
+  You must provide your OpenAI API key to use the model. 
+  Your OpenAI account will be charged according to your usage.
+ram_usage: 100M
+gpu_usage: null
+group: services
+connector:
+  protocol: http
+  timeout: 20.0
+  url: http://openai-api-gpt4:8159/respond
+dialog_formatter: null
+response_formatter: null
+previous_services: null
+required_previous_services: null
+state_manager_method: null
+tags: null
+endpoint: respond
+service: services/openai_api_lm/service_configs/openai-api-gpt4
+date_created: '2023-04-16T09:45:32'
diff --git a/components/oinfjkrbnfmhkfsjdhfsd.yml b/components/oinfjkrbnfmhkfsjdhfsd.yml
new file mode 100644
index 0000000000..0d5c44200e
--- /dev/null
+++ b/components/oinfjkrbnfmhkfsjdhfsd.yml
@@ -0,0 +1,27 @@
+name: openai_api_gpt4_32k
+display_name: GPT-4 32k
+component_type: Generative
+model_type: NN-based
+is_customizable: false
+author: publisher@deeppavlov.ai
+description: A multilingual instruction-based large language model 
+  which is capable of code generation and other complex tasks. 
+  Same capabilities as the base gpt-4 mode but with 4x the context length. 
+  Paid. You must provide your OpenAI API key to use the model. 
+  Your OpenAI account will be charged according to your usage.
+ram_usage: 100M
+gpu_usage: null
+group: services
+connector:
+  protocol: http
+  timeout: 20.0
+  url: http://openai-api-gpt4-32k:8160/respond
+dialog_formatter: null
+response_formatter: null
+previous_services: null
+required_previous_services: null
+state_manager_method: null
+tags: null
+endpoint: respond
+service: services/openai_api_lm/service_configs/openai-api-gpt4-32k
+date_created: '2023-04-16T09:45:32'
diff --git a/services/openai_api_lm/server.py b/services/openai_api_lm/server.py
index ce8b4b7827..cc30279d1b 100644
--- a/services/openai_api_lm/server.py
+++ b/services/openai_api_lm/server.py
@@ -26,6 +26,8 @@
 DEFAULT_CONFIGS = {
     "text-davinci-003": json.load(open("generative_configs/openai-text-davinci-003.json", "r")),
     "gpt-3.5-turbo": json.load(open("generative_configs/openai-chatgpt.json", "r")),
+    "gpt-4": json.load(open("generative_configs/openai-chatgpt.json", "r")),
+    "gpt-4-32k": json.load(open("generative_configs/openai-chatgpt.json", "r")),
 }
 
 
diff --git a/services/openai_api_lm/service_configs/openai-api-gpt4-32k/environment.yml b/services/openai_api_lm/service_configs/openai-api-gpt4-32k/environment.yml
new file mode 100644
index 0000000000..ed4954db75
--- /dev/null
+++ b/services/openai_api_lm/service_configs/openai-api-gpt4-32k/environment.yml
@@ -0,0 +1,5 @@
+SERVICE_PORT: 8160
+SERVICE_NAME: openai_api_gpt4_32k
+PRETRAINED_MODEL_NAME_OR_PATH: gpt-4-32k
+CUDA_VISIBLE_DEVICES: '0'
+FLASK_APP: server
diff --git a/services/openai_api_lm/service_configs/openai-api-gpt4-32k/service.yml b/services/openai_api_lm/service_configs/openai-api-gpt4-32k/service.yml
new file mode 100644
index 0000000000..69a3b398c8
--- /dev/null
+++ b/services/openai_api_lm/service_configs/openai-api-gpt4-32k/service.yml
@@ -0,0 +1,31 @@
+name: openai-api-gpt4-32k
+endpoints:
+- respond
+compose:
+  env_file:
+  - .env
+  build:
+    args:
+      SERVICE_PORT: 8160
+      SERVICE_NAME: openai_api_gpt4_32k
+      PRETRAINED_MODEL_NAME_OR_PATH: gpt-4-32k
+      CUDA_VISIBLE_DEVICES: '0'
+      FLASK_APP: server
+    context: .
+    dockerfile: ./services/openai_api_lm/Dockerfile
+  command: flask run -h 0.0.0.0 -p 8160
+  environment:
+  - CUDA_VISIBLE_DEVICES=0
+  - FLASK_APP=server
+  deploy:
+    resources:
+      limits:
+        memory: 100M
+      reservations:
+        memory: 100M
+  volumes:
+  - ./services/openai_api_lm:/src
+  - ./common:/src/common
+  ports:
+  - 8160:8160
+proxy: null
diff --git a/services/openai_api_lm/service_configs/openai-api-gpt4/environment.yml b/services/openai_api_lm/service_configs/openai-api-gpt4/environment.yml
new file mode 100644
index 0000000000..f3cf8147a8
--- /dev/null
+++ b/services/openai_api_lm/service_configs/openai-api-gpt4/environment.yml
@@ -0,0 +1,5 @@
+SERVICE_PORT: 8159
+SERVICE_NAME: openai_api_gpt4
+PRETRAINED_MODEL_NAME_OR_PATH: gpt-4
+CUDA_VISIBLE_DEVICES: '0'
+FLASK_APP: server
diff --git a/services/openai_api_lm/service_configs/openai-api-gpt4/service.yml b/services/openai_api_lm/service_configs/openai-api-gpt4/service.yml
new file mode 100644
index 0000000000..898c870fbc
--- /dev/null
+++ b/services/openai_api_lm/service_configs/openai-api-gpt4/service.yml
@@ -0,0 +1,31 @@
+name: openai-api-gpt4
+endpoints:
+- respond
+compose:
+  env_file:
+  - .env
+  build:
+    args:
+      SERVICE_PORT: 8159
+      SERVICE_NAME: openai_api_gpt4
+      PRETRAINED_MODEL_NAME_OR_PATH: gpt-4
+      CUDA_VISIBLE_DEVICES: '0'
+      FLASK_APP: server
+    context: .
+    dockerfile: ./services/openai_api_lm/Dockerfile
+  command: flask run -h 0.0.0.0 -p 8159
+  environment:
+  - CUDA_VISIBLE_DEVICES=0
+  - FLASK_APP=server
+  deploy:
+    resources:
+      limits:
+        memory: 100M
+      reservations:
+        memory: 100M
+  volumes:
+  - ./services/openai_api_lm:/src
+  - ./common:/src/common
+  ports:
+  - 8159:8159
+proxy: null
diff --git a/skills/dff_universal_prompted_skill/scenario/response.py b/skills/dff_universal_prompted_skill/scenario/response.py
index ee7e20d18f..cddb3d82f8 100644
--- a/skills/dff_universal_prompted_skill/scenario/response.py
+++ b/skills/dff_universal_prompted_skill/scenario/response.py
@@ -32,6 +32,8 @@
     "http://transformers-lm-oasst12b:8158/respond": [],
     "http://openai-api-chatgpt:8145/respond": ["OPENAI_API_KEY", "OPENAI_ORGANIZATION"],
     "http://openai-api-davinci3:8131/respond": ["OPENAI_API_KEY", "OPENAI_ORGANIZATION"],
+    "http://openai-api-gpt4:8159/respond": ["OPENAI_API_KEY", "OPENAI_ORGANIZATION"],
+    "http://openai-api-gpt4-32k:8160/respond": ["OPENAI_API_KEY", "OPENAI_ORGANIZATION"],
 }
 
 

From 77029f93c5bf1efe5206285cde0ec9ee364d1494 Mon Sep 17 00:00:00 2001
From: "Dilyara Zharikova (Baymurzina)" <dilyara.rimovna@gmail.com>
Date: Fri, 12 May 2023 15:56:02 +0500
Subject: [PATCH 3/4] feat: update oasst model (#411)

* feat: update oasst model

* fix: model descriptions (#410)

* fix: sync dream_persona with demos (#413)

* Feat/llm-based and ranking-based dm (#405)

* feat: create files for llm based response selection

* feat: tests

* feat: algorithm

* fix: filter bad or toxic uttrs

* feat: ranking based response selector

* feat: model cards

* feat: ranking and refactoring

* feat: component files

* feat: component description

* feat: component description

* feat: use gpt-j resp selector

* feat: context

* feat: utilize hypotheses correctly

* fix: configs

* fix: configs

* fix: mapping

* fix: utilize ranker based selector

* fix: decay dummy

* fix: decay dummy

* fix: response

* fix: remove old selector

* fix: logs

* fix: getting annotations

* fix: code style

* fix: deepy uses resp selector

* fix: no language

* fix: ru usage

* fix: usage of formatter

* fix: codestyle

* fix: logs

* fix: after review

* fix: sync

* fix: add proxy usage (#416)

* feat: new proxy address (#417)

* prompt asks chatbot to be concise (otherwise problems with too long answers) (#418)

* added gptj to proxy file (#424)

* added gptj to proxy file

* return newline

* fix:dummy score decrease (#423)

* Feat/api keys from attributes (#415)

* fix: add attributes to formatter

* fix: try to get env vars from user utterance

* fix: none var values

* fix: logs correct

* fix: send lowercased keys

* prompt to ask fewer questions (#429)

* feat: universal dist requires api keys in request (#425)

* feat: template_template (#414)

* feat: template_template

* feat: readme

* fix: add chatgpt

* fix: tempalte does not use secret env

* fixed cutoff for AI utterance (#426)

* fixed cutoff for AI utterance

* fix for cases with extra ROBOT: etc

* style

* fix for newline

* feat: list of public dists (#433)

* fix: remove badlists from prompted dists (#431)

* added info about payments (#432)

* added info about payments

* better descs + reworked fashion prompt

* table update

* typo fix

* typo fix

* typo

* also increase fashion timeout in yml files

* forgot one file

* fix: no beauty in table

---------

Co-authored-by: dilyararimovna <dilyara.rimovna@gmail.com>

* timeout 30 for fairytales (#427)

* longer generation and timeout to reduce cutoffs (#420)

* longer generation and timeout to reduce cutoffs

* also updated in comp and pipe files

* change prompts for ai faq and nutrition (#430)

* reworked prompts to perform well with oasst

* solved conflict

* fix: tempalte dist and description (#435)

* Feat/multiskill assistant (#434)

* feat: multiskill assistant

* fix: prompt for meeting notes

* fix: waiting for

* fix: formatters

* fix: dot

* fix: secrets

* fix: add dream persona skill

* fix: add dream persona skill

* fix: add to public dists

* fix: folder name

* fix: description

* fix: component cards

* fix: component cards

* feat: thresholds for toxic_cls (#437)

* feat: thresholds for toxic_cls

* fix: codestyle

* fix: update pip (#439)

* fix: urllib neuralcoref

* fix: update pip

* fix: update pip in spacy annotator

* feat: smaller context for prompt selector (#438)

* feat: smaller context for prompt selector

* fix: index error

* Add compose definitions to component cards (#384)

* Fix requirements.txt (#84)

* fix itsdangerous requirements

* pin itsdangerous requirements for all flask==1.1.1 servers

* add compose definitions to component cards

* add host, port, endpoint to pipeline cards

* fix authors, add missing pipelines, add template

* add prompt-selector template, fix templates

* fix template keys

* remove unused comet-atomic component

* rework service, component cards

* move build args and env vars to environment.yml file

* fix empty proxy values

* fix pipeline confs

* fix component connector.annotations

* fix wait hosts in response_annotator_selectors service

* fix wait hosts in other services, environments

* fix response selectors, pipelines

* fix ports in response selector

* fix generative components

* fix: remove spelling and badlists from dream_persona_prompted

* fix: ignore all env_secret

* fix: new formatters for dff

* fix: universal dist does not utilize env_secret

* fix: multiskill components and timeouts

* fix: remove template_template distribution

* fix: deeppavlov assistant

* fix: formatters in components

* fix: volumes for resp selectors

* fix: correct filenames for multiskill

* fix: rullama7bru distribution

* fix pipelines after merge

* fix sentseg annotator source in pipelines

* fix agent services, components

* fix: sentseg and prompt selector do not wait for spelling

* fix: response_annotator_selectors card

* fix: timeouts

* fix: build args

* create services, components for prompt selector

* fix prompt selectors environment

* fix: flask server

* fix: path to prompt selector

* fix: required groups

* fix: required group skill_selectors

* fix: required elements

* fix: previous services

* fix: correct link to sentseg components card

* fix: correct link to sentseg components card

* remove unused prompt selector component

* remove old configs

* fix: rename files without dashes

---------

Co-authored-by: Andrii.Hura <54397922+AndriiHura@users.noreply.github.com>
Co-authored-by: Dilyara Baymurzina <dilyara.rimovna@gmail.com>

* Fix/remove duplicates (#443)

* fix: remove duplicating files

* fix: remove duplicating files

* Multiskill update (#440)

* long gen, long timeout, more prompts

* prompt update

* timeouts

* added longer context to persona

* longer max_tokens

* fix: increase to 20 sec

* fix: used config

* fix: dream persona openai

* fix:multiskill components

---------

Co-authored-by: dilyararimovna <dilyara.rimovna@gmail.com>

* Fix/increase all timeouts (#444)

* fix: increase all timeouts

* fix: increase all timeouts in pipelines

* fix: 7sec to 20 also

* fix: 5sec to 20 also for dream persona

* fix: rebase and change to new model

* fix: upd model

* fix: display name

---------

Co-authored-by: Nika Smilga <42929200+smilni@users.noreply.github.com>
Co-authored-by: Maxim Talimanchuk <mtalimanchuk@gmail.com>
Co-authored-by: Andrii.Hura <54397922+AndriiHura@users.noreply.github.com>
---
 MODELS.md                                      | 18 +++++++++---------
 .../docker-compose.override.yml                |  2 +-
 .../docker-compose.override.yml                |  2 +-
 .../docker-compose.override.yml                |  2 +-
 .../docker-compose.override.yml                |  2 +-
 components/sdkajfhsidhf8wfjh2ornfkle.yml       |  2 +-
 6 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/MODELS.md b/MODELS.md
index 6e69e8ad9a..c544e2358e 100644
--- a/MODELS.md
+++ b/MODELS.md
@@ -2,12 +2,12 @@
 
 Here you may find a list of models that currently available for use in Generative Assistants.
 
-| model name                | container name           | model link                                                           | open-source?             | size (billion parameters) | GPU usage                 | max tokens (prompt + response) | description                                                                                                                                                                                                                                                                                                                                 |
-|---------------------------|--------------------------|----------------------------------------------------------------------|--------------------------|---------------------------|---------------------------|--------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| BLOOMZ 7B                 | transformers-lm-bloomz7b | [link](https://huggingface.co/bigscience/bloomz-7b1)                 | yes                      | 7.1B                      | 33GB                      | 2,048 tokens                   | An open-source multilingual instruction-based large language model (46 languages). NB: free of charge. This model is up and running on our servers and can be used for free.                                                                                                                                                                |
-| GPT-J 6B                  | transformers-lm-gptj     | [link](https://huggingface.co/EleutherAI/gpt-j-6b)                   | yes                      | 6B                        | 25GB                      | 2,048 tokens                   | An open-source English-only large language model which is NOT fine-tuned for instruction following and NOT capable of code generation. NB: free of charge. This model is up and running on our servers and can be used for free.                                                                                                            |
-| GPT-3.5                   | openai-api-davinci3      | [link](https://platform.openai.com/docs/models/gpt-3-5)              | no (paid access via API) | supposedly, 175B          | - (cannot be run locally) | 4,097 tokens                   | A multulingual instruction-based large language model which is capable of code generation. Unlike ChatGPT, not optimised for chat. NB: paid. You must provide your OpenAI API key to use the model. Your OpenAI account will be charged according to your usage.                                                                            |
-| ChatGPT                   | openai-api-chatgpt       | [link](https://platform.openai.com/docs/models/gpt-3-5)              | no (paid access via API) | supposedly, 175B          | - (cannot be run locally) | 4,096 tokens                   | Based on gpt-3.5-turbo -- the most capable of the entire GPT-3/GPT-3.5 models family. Optimized for chat. Able to understand and generate code. NB: paid. You must provide your OpenAI API key to use the model. Your OpenAI account will be charged according to your usage.                                                               |
-| Open-Assistant SFT-1 12B  | transformers-lm-oasst12b | [link](https://huggingface.co/OpenAssistant/oasst-sft-1-pythia-12b)  | yes                      | 12B                       | 26GB (half-precision)     | 5,120 tokens                   | An open-source English-only instruction-based large language model which is NOT good at answering math and coding questions. NB: free of charge. This model is up and running on our servers and can be used for free.                                                                                                                      |
-| GPT-4                     | openai-api-gpt4          | [link](https://platform.openai.com/docs/models/gpt-4)                | no (paid access via API) | supposedly, 175B          | - (cannot be run locally) | 8,192 tokens                   | A multilingual instruction-based large language model which is capable of code generation and other complex tasks. More capable than any GPT-3.5 model, able to do more complex tasks, and optimized for chat. NB: paid. You must provide your OpenAI API key to use the model. Your OpenAI account will be charged according to your usage. |
-| GPT-4 32K                 | openai-api-gpt4-32k      | [link](https://platform.openai.com/docs/models/gpt-4)                | no (paid access via API) | supposedly, 175B          | - (cannot be run locally) | 32,768 tokens                  | A multilingual instruction-based large language model which is capable of code generation and other complex tasks. 	Same capabilities as the base gpt-4 mode but with 4x the context length. NB: paid. You must provide your OpenAI API key to use the model. Your OpenAI account will be charged according to your usage.                  |
+| model name                | container name           | model link                                                              | open-source?             | size (billion parameters) | GPU usage                 | max tokens (prompt + response) | description                                                                                                                                                                                                                                                                                                                                  |
+|---------------------------|--------------------------|-------------------------------------------------------------------------|--------------------------|---------------------------|---------------------------|--------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| BLOOMZ 7B                 | transformers-lm-bloomz7b | [link](https://huggingface.co/bigscience/bloomz-7b1)                    | yes                      | 7.1B                      | 33GB                      | 2,048 tokens                   | An open-source multilingual instruction-based large language model (46 languages). NB: free of charge. This model is up and running on our servers and can be used for free.                                                                                                                                                                 |
+| GPT-J 6B                  | transformers-lm-gptj     | [link](https://huggingface.co/EleutherAI/gpt-j-6b)                      | yes                      | 6B                        | 25GB                      | 2,048 tokens                   | An open-source English-only large language model which is NOT fine-tuned for instruction following and NOT capable of code generation. NB: free of charge. This model is up and running on our servers and can be used for free.                                                                                                             |
+| GPT-3.5                   | openai-api-davinci3      | [link](https://platform.openai.com/docs/models/gpt-3-5)                 | no (paid access via API) | supposedly, 175B          | - (cannot be run locally) | 4,097 tokens                   | A multulingual instruction-based large language model which is capable of code generation. Unlike ChatGPT, not optimised for chat. NB: paid. You must provide your OpenAI API key to use the model. Your OpenAI account will be charged according to your usage.                                                                             |
+| ChatGPT                   | openai-api-chatgpt       | [link](https://platform.openai.com/docs/models/gpt-3-5)                 | no (paid access via API) | supposedly, 175B          | - (cannot be run locally) | 4,096 tokens                   | Based on gpt-3.5-turbo -- the most capable of the entire GPT-3/GPT-3.5 models family. Optimized for chat. Able to understand and generate code. NB: paid. You must provide your OpenAI API key to use the model. Your OpenAI account will be charged according to your usage.                                                                |
+| Open-Assistant Pythia 12B | transformers-lm-oasst12b | [link](https://huggingface.co/OpenAssistant/pythia-12b-sft-v8-7k-steps) | yes                      | 12B                       | 26GB (half-precision)     | 5,120 tokens                   | An open-source English-only instruction-based large language model which is NOT good at answering math and coding questions. NB: free of charge. This model is up and running on our servers and can be used for free.                                                                                                                       |
+| GPT-4                     | openai-api-gpt4          | [link](https://platform.openai.com/docs/models/gpt-4)                   | no (paid access via API) | supposedly, 175B          | - (cannot be run locally) | 8,192 tokens                   | A multilingual instruction-based large language model which is capable of code generation and other complex tasks. More capable than any GPT-3.5 model, able to do more complex tasks, and optimized for chat. NB: paid. You must provide your OpenAI API key to use the model. Your OpenAI account will be charged according to your usage. |
+| GPT-4 32K                 | openai-api-gpt4-32k      | [link](https://platform.openai.com/docs/models/gpt-4)                   | no (paid access via API) | supposedly, 175B          | - (cannot be run locally) | 32,768 tokens                  | A multilingual instruction-based large language model which is capable of code generation and other complex tasks. 	Same capabilities as the base gpt-4 mode but with 4x the context length. NB: paid. You must provide your OpenAI API key to use the model. Your OpenAI account will be charged according to your usage.                   |
diff --git a/assistant_dists/ai_faq_assistant/docker-compose.override.yml b/assistant_dists/ai_faq_assistant/docker-compose.override.yml
index 9679d152f2..17a0914d0b 100644
--- a/assistant_dists/ai_faq_assistant/docker-compose.override.yml
+++ b/assistant_dists/ai_faq_assistant/docker-compose.override.yml
@@ -107,7 +107,7 @@ services:
       args:
         SERVICE_PORT: 8158
         SERVICE_NAME: transformers_lm_oasst12b
-        PRETRAINED_MODEL_NAME_OR_PATH: OpenAssistant/oasst-sft-1-pythia-12b
+        PRETRAINED_MODEL_NAME_OR_PATH: OpenAssistant/pythia-12b-sft-v8-7k-steps
         HALF_PRECISION: 1
       context: .
       dockerfile: ./services/transformers_lm/Dockerfile
diff --git a/assistant_dists/fashion_stylist_assistant/docker-compose.override.yml b/assistant_dists/fashion_stylist_assistant/docker-compose.override.yml
index 6dd4fbead9..10f242d77e 100644
--- a/assistant_dists/fashion_stylist_assistant/docker-compose.override.yml
+++ b/assistant_dists/fashion_stylist_assistant/docker-compose.override.yml
@@ -107,7 +107,7 @@ services:
       args:
         SERVICE_PORT: 8158
         SERVICE_NAME: transformers_lm_oasst12b
-        PRETRAINED_MODEL_NAME_OR_PATH: OpenAssistant/oasst-sft-1-pythia-12b
+        PRETRAINED_MODEL_NAME_OR_PATH: OpenAssistant/pythia-12b-sft-v8-7k-steps
         HALF_PRECISION: 1
       context: .
       dockerfile: ./services/transformers_lm/Dockerfile
diff --git a/assistant_dists/nutrition_assistant/docker-compose.override.yml b/assistant_dists/nutrition_assistant/docker-compose.override.yml
index b783cc76fe..da8670188f 100644
--- a/assistant_dists/nutrition_assistant/docker-compose.override.yml
+++ b/assistant_dists/nutrition_assistant/docker-compose.override.yml
@@ -107,7 +107,7 @@ services:
       args:
         SERVICE_PORT: 8158
         SERVICE_NAME: transformers_lm_oasst12b
-        PRETRAINED_MODEL_NAME_OR_PATH: OpenAssistant/oasst-sft-1-pythia-12b
+        PRETRAINED_MODEL_NAME_OR_PATH: OpenAssistant/pythia-12b-sft-v8-7k-steps
         HALF_PRECISION: 1
       context: .
       dockerfile: ./services/transformers_lm/Dockerfile
diff --git a/assistant_dists/universal_prompted_assistant/docker-compose.override.yml b/assistant_dists/universal_prompted_assistant/docker-compose.override.yml
index 686ec711db..d1c00672f7 100644
--- a/assistant_dists/universal_prompted_assistant/docker-compose.override.yml
+++ b/assistant_dists/universal_prompted_assistant/docker-compose.override.yml
@@ -110,7 +110,7 @@ services:
       args:
         SERVICE_PORT: 8158
         SERVICE_NAME: transformers_lm_oasst12b
-        PRETRAINED_MODEL_NAME_OR_PATH: OpenAssistant/oasst-sft-1-pythia-12b
+        PRETRAINED_MODEL_NAME_OR_PATH: OpenAssistant/pythia-12b-sft-v8-7k-steps
         HALF_PRECISION: 1
       context: .
       dockerfile: ./services/transformers_lm/Dockerfile
diff --git a/components/sdkajfhsidhf8wfjh2ornfkle.yml b/components/sdkajfhsidhf8wfjh2ornfkle.yml
index 0e0bc8e5e1..afe277b4eb 100644
--- a/components/sdkajfhsidhf8wfjh2ornfkle.yml
+++ b/components/sdkajfhsidhf8wfjh2ornfkle.yml
@@ -1,5 +1,5 @@
 name: transformers_lm_oasst12b
-display_name: Open-Assistant SFT-1 12B
+display_name: Open-Assistant Pythia 12B
 component_type: Generative
 model_type: NN-based
 is_customizable: false

From 27d91740374868b7a5f3f4df0de191bb473b8559 Mon Sep 17 00:00:00 2001
From: "Dilyara Zharikova (Baymurzina)" <dilyara.rimovna@gmail.com>
Date: Mon, 15 May 2023 17:33:27 +0300
Subject: [PATCH 4/4] feat: update openai for gpt-4 (#458)

---
 services/openai_api_lm/requirements.txt | 2 +-
 services/openai_api_lm/server.py        | 7 ++++---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/services/openai_api_lm/requirements.txt b/services/openai_api_lm/requirements.txt
index 44554938a9..928f3e3d58 100644
--- a/services/openai_api_lm/requirements.txt
+++ b/services/openai_api_lm/requirements.txt
@@ -6,4 +6,4 @@ sentry-sdk[flask]==0.14.1
 healthcheck==1.3.3
 jinja2<=3.0.3
 Werkzeug<=2.0.3
-openai==0.27.0
\ No newline at end of file
+openai==0.27.6
\ No newline at end of file
diff --git a/services/openai_api_lm/server.py b/services/openai_api_lm/server.py
index cc30279d1b..f1e5191a79 100644
--- a/services/openai_api_lm/server.py
+++ b/services/openai_api_lm/server.py
@@ -29,6 +29,7 @@
     "gpt-4": json.load(open("generative_configs/openai-chatgpt.json", "r")),
     "gpt-4-32k": json.load(open("generative_configs/openai-chatgpt.json", "r")),
 }
+CHAT_COMPLETION_MODELS = ["gpt-3.5-turbo", "gpt-4", "gpt-4-32k"]
 
 
 def generate_responses(context, openai_api_key, openai_org, prompt, generation_params, continue_last_uttr=False):
@@ -38,8 +39,8 @@ def generate_responses(context, openai_api_key, openai_org, prompt, generation_p
     openai.api_key = openai_api_key
     openai.organization = openai_org if openai_org else None
 
-    if PRETRAINED_MODEL_NAME_OR_PATH == "gpt-3.5-turbo":
-        logger.info("model=gpt-3.5-turbo, use special chat completion endpoint")
+    if PRETRAINED_MODEL_NAME_OR_PATH in CHAT_COMPLETION_MODELS:
+        logger.info("Use special chat completion endpoint")
         s = len(context) % 2
         messages = [
             {"role": "system", "content": prompt},
@@ -78,7 +79,7 @@ def generate_responses(context, openai_api_key, openai_org, prompt, generation_p
     elif isinstance(response, str):
         outputs = [response.strip()]
 
-    if PRETRAINED_MODEL_NAME_OR_PATH != "gpt-3.5-turbo":
+    if PRETRAINED_MODEL_NAME_OR_PATH not in CHAT_COMPLETION_MODELS:
         # post-processing of the responses by all models except of ChatGPT
         outputs = [GENERATIVE_ROBOT_TEMPLATE.sub("\n", resp).strip() for resp in outputs]
     return outputs