From b6ad4e0851839cef14dc7b09cae2022d6cd51bd4 Mon Sep 17 00:00:00 2001 From: "Dilyara Zharikova (Baymurzina)" Date: Fri, 12 May 2023 15:08:11 +0500 Subject: [PATCH 1/4] fix: getting url not name (#449) * fix: getting url not name * fix: return * fix: correct checks * fix: utilizing envvars * fix: envvars * fix: use attributes instead of uttr * fix: kwargs items * fix: kwargs items * fix: _list to s * feat: wrap kwargs * fix: configs from given * fix: configs are dicts not path * fix: use not long config as default * fix: lower env vars * fix: lower env vars * fix: codestyle * fix: add env vars to universal * fix: add logs to universal * fix: synch with template prompted * fix: params in dockerfile --- .../docker-compose.override.yml | 2 + .../llm_based_response_selector/server.py | 2 +- .../generative_configs/openai-chatgpt.json | 7 ++ .../openai-text-davinci-003-long.json | 7 ++ .../openai-text-davinci-003.json | 7 ++ services/openai_api_lm/server.py | 10 ++- services/openai_api_lm/test.py | 4 +- .../default_generative_config.json | 8 ++ services/transformers_lm/server.py | 6 ++ .../scenario/response.py | 31 ++++++-- .../dff_universal_prompted_skill/Dockerfile | 4 + .../default_generative_config.json | 2 +- .../scenario/response.py | 74 +++++++++---------- 13 files changed, 114 insertions(+), 50 deletions(-) create mode 100644 services/openai_api_lm/generative_configs/openai-chatgpt.json create mode 100644 services/openai_api_lm/generative_configs/openai-text-davinci-003-long.json create mode 100644 services/openai_api_lm/generative_configs/openai-text-davinci-003.json create mode 100644 services/transformers_lm/generative_configs/default_generative_config.json diff --git a/assistant_dists/universal_prompted_assistant/docker-compose.override.yml b/assistant_dists/universal_prompted_assistant/docker-compose.override.yml index 72e650887b..caa5a70fd1 100644 --- a/assistant_dists/universal_prompted_assistant/docker-compose.override.yml +++ b/assistant_dists/universal_prompted_assistant/docker-compose.override.yml @@ -172,6 +172,8 @@ services: SERVICE_NAME: dff_universal_prompted_skill GENERATIVE_TIMEOUT: 20 N_UTTERANCES_CONTEXT: 7 + DEFAULT_LM_SERVICE_URL: http://transformers-lm-oasst12b:8158/respond + DEFAULT_LM_SERVICE_CONFIG: default_generative_config.json context: . dockerfile: ./skills/dff_universal_prompted_skill/Dockerfile command: gunicorn --workers=1 server:app -b 0.0.0.0:8147 --reload diff --git a/response_selectors/llm_based_response_selector/server.py b/response_selectors/llm_based_response_selector/server.py index 045a0adb75..572a2d753c 100644 --- a/response_selectors/llm_based_response_selector/server.py +++ b/response_selectors/llm_based_response_selector/server.py @@ -35,7 +35,7 @@ ) ENVVARS_TO_SEND = getenv("ENVVARS_TO_SEND", None) ENVVARS_TO_SEND = [] if ENVVARS_TO_SEND is None else ENVVARS_TO_SEND.split(",") -sending_variables = {f"{var}_list": [getenv(var, None)] for var in ENVVARS_TO_SEND} +sending_variables = {f"{var}s": [getenv(var, None)] for var in ENVVARS_TO_SEND} # check if at least one of the env variables is not None if len(sending_variables.keys()) > 0 and all([var_value is None for var_value in sending_variables.values()]): raise NotImplementedError( diff --git a/services/openai_api_lm/generative_configs/openai-chatgpt.json b/services/openai_api_lm/generative_configs/openai-chatgpt.json new file mode 100644 index 0000000000..107e944b80 --- /dev/null +++ b/services/openai_api_lm/generative_configs/openai-chatgpt.json @@ -0,0 +1,7 @@ +{ + "max_tokens": 64, + "temperature": 0.4, + "top_p": 1.0, + "frequency_penalty": 0, + "presence_penalty": 0 +} \ No newline at end of file diff --git a/services/openai_api_lm/generative_configs/openai-text-davinci-003-long.json b/services/openai_api_lm/generative_configs/openai-text-davinci-003-long.json new file mode 100644 index 0000000000..7e23841d5f --- /dev/null +++ b/services/openai_api_lm/generative_configs/openai-text-davinci-003-long.json @@ -0,0 +1,7 @@ +{ + "max_tokens": 256, + "temperature": 0.4, + "top_p": 1.0, + "frequency_penalty": 0, + "presence_penalty": 0 +} \ No newline at end of file diff --git a/services/openai_api_lm/generative_configs/openai-text-davinci-003.json b/services/openai_api_lm/generative_configs/openai-text-davinci-003.json new file mode 100644 index 0000000000..107e944b80 --- /dev/null +++ b/services/openai_api_lm/generative_configs/openai-text-davinci-003.json @@ -0,0 +1,7 @@ +{ + "max_tokens": 64, + "temperature": 0.4, + "top_p": 1.0, + "frequency_penalty": 0, + "presence_penalty": 0 +} \ No newline at end of file diff --git a/services/openai_api_lm/server.py b/services/openai_api_lm/server.py index d1b6da6c0c..ce8b4b7827 100644 --- a/services/openai_api_lm/server.py +++ b/services/openai_api_lm/server.py @@ -1,3 +1,4 @@ +import json import logging import os import time @@ -22,6 +23,10 @@ app = Flask(__name__) logging.getLogger("werkzeug").setLevel("WARNING") +DEFAULT_CONFIGS = { + "text-davinci-003": json.load(open("generative_configs/openai-text-davinci-003.json", "r")), + "gpt-3.5-turbo": json.load(open("generative_configs/openai-chatgpt.json", "r")), +} def generate_responses(context, openai_api_key, openai_org, prompt, generation_params, continue_last_uttr=False): @@ -88,10 +93,11 @@ def respond(): contexts = request.json.get("dialog_contexts", []) prompts = request.json.get("prompts", []) configs = request.json.get("configs", []) + configs = [DEFAULT_CONFIGS[PRETRAINED_MODEL_NAME_OR_PATH] if el is None else el for el in configs] if len(contexts) > 0 and len(prompts) == 0: prompts = [""] * len(contexts) - openai_api_keys = request.json.get("OPENAI_API_KEY_list", []) - openai_orgs = request.json.get("OPENAI_ORGANIZATION_list", None) + openai_api_keys = request.json.get("openai_api_keys", []) + openai_orgs = request.json.get("openai_api_organizations", None) openai_orgs = [None] * len(contexts) if openai_orgs is None else openai_orgs try: diff --git a/services/openai_api_lm/test.py b/services/openai_api_lm/test.py index 845a41259a..e13ed54bb2 100644 --- a/services/openai_api_lm/test.py +++ b/services/openai_api_lm/test.py @@ -30,8 +30,8 @@ def test_respond(): "dialog_contexts": contexts, "prompts": prompts, "configs": [DEFAULT_CONFIG] * len(contexts), - "OPENAI_API_KEY_list": [OPENAI_API_KEY] * len(contexts), - "OPENAI_ORGANIZATION_list": [OPENAI_ORGANIZATION] * len(contexts), + "openai_api_keys": [OPENAI_API_KEY] * len(contexts), + "openai_api_organizations": [OPENAI_ORGANIZATION] * len(contexts), }, ).json() print(result) diff --git a/services/transformers_lm/generative_configs/default_generative_config.json b/services/transformers_lm/generative_configs/default_generative_config.json new file mode 100644 index 0000000000..cecd6ab7cb --- /dev/null +++ b/services/transformers_lm/generative_configs/default_generative_config.json @@ -0,0 +1,8 @@ +{ + "max_length": 120, + "min_length": 8, + "top_p": 0.9, + "temperature": 0.9, + "do_sample": true, + "num_return_sequences": 2 +} \ No newline at end of file diff --git a/services/transformers_lm/server.py b/services/transformers_lm/server.py index c11d585c36..93418068e9 100644 --- a/services/transformers_lm/server.py +++ b/services/transformers_lm/server.py @@ -1,3 +1,4 @@ +import json import logging import os import time @@ -27,6 +28,10 @@ app = Flask(__name__) logging.getLogger("werkzeug").setLevel("WARNING") +DEFAULT_CONFIGS = { + "EleutherAI/gpt-j-6B": json.load(open("generative_configs/default_generative_config.json", "r")), + "OpenAssistant/oasst-sft-1-pythia-12b": json.load(open("generative_configs/default_generative_config.json", "r")), +} def generate_responses(context, model, tokenizer, prompt, generation_params, continue_last_uttr=False): @@ -110,6 +115,7 @@ def respond(): contexts = request.json.get("dialog_contexts", []) prompts = request.json.get("prompts", []) configs = request.json.get("configs", []) + configs = [DEFAULT_CONFIGS[PRETRAINED_MODEL_NAME_OR_PATH] if el is None else el for el in configs] if len(contexts) > 0 and len(prompts) == 0: prompts = [""] * len(contexts) diff --git a/skills/dff_template_prompted_skill/scenario/response.py b/skills/dff_template_prompted_skill/scenario/response.py index e9b7dfdd34..ac5bed3f4d 100644 --- a/skills/dff_template_prompted_skill/scenario/response.py +++ b/skills/dff_template_prompted_skill/scenario/response.py @@ -3,6 +3,7 @@ import re import requests import sentry_sdk +from copy import deepcopy from os import getenv from typing import Any @@ -92,15 +93,33 @@ def gathering_responses(reply, confidence, human_attr, bot_attr, attr): # get variables which names are in `ENVVARS_TO_SEND` (splitted by comma if many) # from user_utterance attributes or from environment human_uttr_attributes = int_ctx.get_last_human_utterance(ctx, actor).get("attributes", {}) - sending_variables = {f"{var}_list": [human_uttr_attributes.get(var.lower(), None)] for var in ENVVARS_TO_SEND} - if if_none_var_values(sending_variables): - sending_variables = {f"{var}_list": [getenv(var, None)] for var in ENVVARS_TO_SEND} + envvars_to_send = ENVVARS_TO_SEND if len(ENVVARS_TO_SEND) else human_uttr_attributes.get("envvars_to_send", []) + + if len(envvars_to_send): + # get variables which names are in `envvars_to_send` (splitted by comma if many) + # from the last human utterance's attributes + sending_variables = { + f"{var.lower()}s": [human_uttr_attributes.get(var.lower(), None)] for var in envvars_to_send + } if if_none_var_values(sending_variables): - logger.info(f"Did not get {ENVVARS_TO_SEND}'s values. Sending without them.") + # get variables which names are in `envvars_to_send` (splitted by comma if many) + # from env variables + sending_variables = {f"{var.lower()}s": [getenv(var, None)] for var in envvars_to_send} + if if_none_var_values(sending_variables): + logger.info(f"Did not get {envvars_to_send}'s values. Sending without them.") + else: + logger.info(f"Got {envvars_to_send}'s values from environment.") else: - logger.info(f"Got {ENVVARS_TO_SEND}'s values from environment.") + logger.info(f"Got {envvars_to_send}'s values from attributes.") else: - logger.info(f"Got {ENVVARS_TO_SEND}'s values from attributes.") + sending_variables = {} + + # adding kwargs to request from the last human utterance's attributes + lm_service_kwargs = human_uttr_attributes.get("lm_service_kwargs", None) + lm_service_kwargs = {} if lm_service_kwargs is None else lm_service_kwargs + for _key, _value in lm_service_kwargs.items(): + logger.info(f"Got/Re-writing {_key}s values from kwargs.") + sending_variables[f"{_key}s"] = [deepcopy(_value)] shared_memory = int_ctx.get_shared_memory(ctx, actor) prompt = shared_memory.get("prompt", "") diff --git a/skills/dff_universal_prompted_skill/Dockerfile b/skills/dff_universal_prompted_skill/Dockerfile index adfee01c12..1c5e561ad4 100644 --- a/skills/dff_universal_prompted_skill/Dockerfile +++ b/skills/dff_universal_prompted_skill/Dockerfile @@ -17,6 +17,10 @@ ARG GENERATIVE_TIMEOUT ENV GENERATIVE_TIMEOUT ${GENERATIVE_TIMEOUT} ARG N_UTTERANCES_CONTEXT ENV N_UTTERANCES_CONTEXT ${N_UTTERANCES_CONTEXT} +ARG DEFAULT_LM_SERVICE_URL +ENV DEFAULT_LM_SERVICE_URL ${DEFAULT_LM_SERVICE_URL} +ARG DEFAULT_LM_SERVICE_CONFIG +ENV DEFAULT_LM_SERVICE_CONFIG ${DEFAULT_LM_SERVICE_CONFIG} COPY skills/dff_universal_prompted_skill/requirements.txt . RUN pip install -r requirements.txt diff --git a/skills/dff_universal_prompted_skill/generative_configs/default_generative_config.json b/skills/dff_universal_prompted_skill/generative_configs/default_generative_config.json index 1edf84f186..cecd6ab7cb 100644 --- a/skills/dff_universal_prompted_skill/generative_configs/default_generative_config.json +++ b/skills/dff_universal_prompted_skill/generative_configs/default_generative_config.json @@ -1,5 +1,5 @@ { - "max_length": 60, + "max_length": 120, "min_length": 8, "top_p": 0.9, "temperature": 0.9, diff --git a/skills/dff_universal_prompted_skill/scenario/response.py b/skills/dff_universal_prompted_skill/scenario/response.py index da69b6e170..ee7e20d18f 100644 --- a/skills/dff_universal_prompted_skill/scenario/response.py +++ b/skills/dff_universal_prompted_skill/scenario/response.py @@ -3,6 +3,7 @@ import re import requests import sentry_sdk +from copy import deepcopy from os import getenv from typing import Any @@ -22,30 +23,15 @@ DEFAULT_CONFIDENCE = 0.9 LOW_CONFIDENCE = 0.7 DEFAULT_PROMPT = "Respond like a friendly chatbot." - -CONSIDERED_LM_SERVICES = { - "GPT-J 6B": { - "url": "http://transformers-lm-gptj:8130/respond", - "config": json.load(open("generative_configs/default_generative_config.json", "r")), - }, - "BLOOMZ 7B": { - "url": "http://transformers-lm-bloomz7b:8146/respond", - "config": json.load(open("generative_configs/default_generative_config.json", "r")), - }, - "ChatGPT": { - "url": "http://openai-api-chatgpt:8145/respond", - "config": json.load(open("generative_configs/openai-chatgpt.json", "r")), - "envvars_to_send": ["OPENAI_API_KEY", "OPENAI_ORGANIZATION"], - }, - "GPT-3.5": { - "url": "http://openai-api-davinci3:8131/respond", - "config": json.load(open("generative_configs/openai-text-davinci-003.json", "r")), - "envvars_to_send": ["OPENAI_API_KEY", "OPENAI_ORGANIZATION"], - }, - "Open-Assistant SFT-1 12B": { - "url": "http://transformers-lm-oasst12b:8158/respond", - "config": json.load(open("generative_configs/default_generative_config.json", "r")), - }, +DEFAULT_LM_SERVICE_URL = getenv("DEFAULT_LM_SERVICE_URL", "http://transformers-lm-oasst12b:8158/respond") +DEFAULT_LM_SERVICE_CONFIG = getenv("DEFAULT_LM_SERVICE_CONFIG", "default_generative_config.json") +DEFAULT_LM_SERVICE_CONFIG = json.load(open(f"generative_configs/{DEFAULT_LM_SERVICE_CONFIG}", "r")) +ENVVARS_TO_SEND = { + "http://transformers-lm-gptj:8130/respond": [], + "http://transformers-lm-bloomz7b:8146/respond": [], + "http://transformers-lm-oasst12b:8158/respond": [], + "http://openai-api-chatgpt:8145/respond": ["OPENAI_API_KEY", "OPENAI_ORGANIZATION"], + "http://openai-api-davinci3:8131/respond": ["OPENAI_API_KEY", "OPENAI_ORGANIZATION"], } @@ -102,20 +88,27 @@ def gathering_responses(reply, confidence, human_attr, bot_attr, attr): dialog_context = compose_data_for_model(ctx, actor) logger.info(f"dialog_context: {dialog_context}") - last_uttr = int_ctx.get_last_human_utterance(ctx, actor) - prompt = last_uttr.get("attributes", {}).get("prompt", DEFAULT_PROMPT) + human_uttr_attributes = int_ctx.get_last_human_utterance(ctx, actor).get("attributes", {}) + prompt = human_uttr_attributes.get("prompt", DEFAULT_PROMPT) logger.info(f"prompt: {prompt}") - lm_service = last_uttr.get("attributes", {}).get("lm_service", "GPT-J 6B") - logger.info(f"lm_service: {lm_service}") - - if "envvars_to_send" in CONSIDERED_LM_SERVICES[lm_service]: - # get variables which names are in `ENVVARS_TO_SEND` (splitted by comma if many) - # from user_utterance attributes or from environment - envvars_to_send = CONSIDERED_LM_SERVICES[lm_service]["envvars_to_send"] - human_uttr_attributes = int_ctx.get_last_human_utterance(ctx, actor).get("attributes", {}) - sending_variables = {f"{var}_list": [human_uttr_attributes.get(var.lower(), None)] for var in envvars_to_send} + lm_service_url = human_uttr_attributes.get("lm_service_url", DEFAULT_LM_SERVICE_URL) + logger.info(f"lm_service_url: {lm_service_url}") + # this is a dictionary! not a file! + lm_service_config = human_uttr_attributes.get("lm_service_config", None) + lm_service_kwargs = human_uttr_attributes.get("lm_service_kwargs", None) + lm_service_kwargs = {} if lm_service_kwargs is None else lm_service_kwargs + envvars_to_send = ENVVARS_TO_SEND.get(lm_service_url, []) + + if len(envvars_to_send): + # get variables which names are in `envvars_to_send` (splitted by comma if many) + # from the last human utterance's attributes + sending_variables = { + f"{var.lower()}s": [human_uttr_attributes.get(var.lower(), None)] for var in envvars_to_send + } if if_none_var_values(sending_variables): - sending_variables = {f"{var}_list": [getenv(var, None)] for var in envvars_to_send} + # get variables which names are in `envvars_to_send` (splitted by comma if many) + # from env variables + sending_variables = {f"{var.lower()}s": [getenv(var, None)] for var in envvars_to_send} if if_none_var_values(sending_variables): logger.info(f"Did not get {envvars_to_send}'s values. Sending without them.") else: @@ -125,14 +118,19 @@ def gathering_responses(reply, confidence, human_attr, bot_attr, attr): else: sending_variables = {} + # adding kwargs to request from the last human utterance's attributes + for _key, _value in lm_service_kwargs.items(): + logger.info(f"Got/Re-writing {_key}s values from kwargs.") + sending_variables[f"{_key}s"] = [deepcopy(_value)] + if len(dialog_context) > 0: try: response = requests.post( - CONSIDERED_LM_SERVICES[lm_service]["url"], + lm_service_url, json={ "dialog_contexts": [dialog_context], "prompts": [prompt], - "configs": [CONSIDERED_LM_SERVICES[lm_service]["config"]], + "configs": [lm_service_config], **sending_variables, }, timeout=GENERATIVE_TIMEOUT, From 1f3ff3864217f061f3c3a8a9d6f408e8fa1059c1 Mon Sep 17 00:00:00 2001 From: "Dilyara Zharikova (Baymurzina)" Date: Fri, 12 May 2023 15:17:54 +0500 Subject: [PATCH 2/4] feat: gpt-4 and gpt-4 32k services (#456) * feat: gpt-4 and gpt-4 32k services * fix: add to universal * fix: add params --- MODELS.md | 16 ++++---- .../universal_prompted_assistant/dev.yml | 12 ++++++ .../docker-compose.override.yml | 41 +++++++++++++++++++ components/jkdhfgkhgodfiugpojwrnkjnlg.yml | 28 +++++++++++++ components/oinfjkrbnfmhkfsjdhfsd.yml | 27 ++++++++++++ services/openai_api_lm/server.py | 2 + .../openai-api-gpt4-32k/environment.yml | 5 +++ .../openai-api-gpt4-32k/service.yml | 31 ++++++++++++++ .../openai-api-gpt4/environment.yml | 5 +++ .../openai-api-gpt4/service.yml | 31 ++++++++++++++ .../scenario/response.py | 2 + 11 files changed, 193 insertions(+), 7 deletions(-) create mode 100644 components/jkdhfgkhgodfiugpojwrnkjnlg.yml create mode 100644 components/oinfjkrbnfmhkfsjdhfsd.yml create mode 100644 services/openai_api_lm/service_configs/openai-api-gpt4-32k/environment.yml create mode 100644 services/openai_api_lm/service_configs/openai-api-gpt4-32k/service.yml create mode 100644 services/openai_api_lm/service_configs/openai-api-gpt4/environment.yml create mode 100644 services/openai_api_lm/service_configs/openai-api-gpt4/service.yml diff --git a/MODELS.md b/MODELS.md index cff49d98ac..6e69e8ad9a 100644 --- a/MODELS.md +++ b/MODELS.md @@ -2,10 +2,12 @@ Here you may find a list of models that currently available for use in Generative Assistants. -| model name | container name | model link | open-source? | size (billion parameters) | GPU usage | max tokens (prompt + response) | description | -|--------------------------|--------------------------|---------------------------------------------------------------------|--------------------------|---------------------------|---------------------------|--------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| BLOOMZ 7B | transformers-lm-bloomz7b | [link](https://huggingface.co/bigscience/bloomz-7b1) | yes | 7.1B | 33GB | 2,048 tokens | An open-source multilingual instruction-based large language model (46 languages). NB: free of charge. This model is up and running on our servers and can be used for free. | -| GPT-J 6B | transformers-lm-gptj | [link](https://huggingface.co/EleutherAI/gpt-j-6b) | yes | 6B | 25GB | 2,048 tokens | An open-source English-only large language model which is NOT fine-tuned for instruction following and NOT capable of code generation. NB: free of charge. This model is up and running on our servers and can be used for free. | -| GPT-3.5 | openai-api-davinci3 | [link](https://platform.openai.com/docs/models/gpt-3-5) | no (paid access via API) | supposedly, 175B | - (cannot be run locally) | 4,097 tokens | A multulingual instruction-based large language model which is capable of code generation. Unlike ChatGPT, not optimised for chat. NB: paid. You must provide your OpenAI API key to use the model. Your OpenAI account will be charged according to your usage. | -| ChatGPT | openai-api-chatgpt | [link](https://platform.openai.com/docs/models/gpt-3-5) | no (paid access via API) | supposedly, 175B | - (cannot be run locally) | 4,096 tokens | Based on gpt-3.5-turbo -- the most capable of the entire GPT-3/GPT-3.5 models family. Optimized for chat. Able to understand and generate code. NB: paid. You must provide your OpenAI API key to use the model. Your OpenAI account will be charged according to your usage. | -| Open-Assistant SFT-1 12B | transformers-lm-oasst12b | [link](https://huggingface.co/OpenAssistant/oasst-sft-1-pythia-12b) | yes | 12B | 26GB (half-precision) | 5,120 tokens | An open-source English-only instruction-based large language model which is NOT good at answering math and coding questions. NB: free of charge. This model is up and running on our servers and can be used for free. | +| model name | container name | model link | open-source? | size (billion parameters) | GPU usage | max tokens (prompt + response) | description | +|---------------------------|--------------------------|----------------------------------------------------------------------|--------------------------|---------------------------|---------------------------|--------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| BLOOMZ 7B | transformers-lm-bloomz7b | [link](https://huggingface.co/bigscience/bloomz-7b1) | yes | 7.1B | 33GB | 2,048 tokens | An open-source multilingual instruction-based large language model (46 languages). NB: free of charge. This model is up and running on our servers and can be used for free. | +| GPT-J 6B | transformers-lm-gptj | [link](https://huggingface.co/EleutherAI/gpt-j-6b) | yes | 6B | 25GB | 2,048 tokens | An open-source English-only large language model which is NOT fine-tuned for instruction following and NOT capable of code generation. NB: free of charge. This model is up and running on our servers and can be used for free. | +| GPT-3.5 | openai-api-davinci3 | [link](https://platform.openai.com/docs/models/gpt-3-5) | no (paid access via API) | supposedly, 175B | - (cannot be run locally) | 4,097 tokens | A multulingual instruction-based large language model which is capable of code generation. Unlike ChatGPT, not optimised for chat. NB: paid. You must provide your OpenAI API key to use the model. Your OpenAI account will be charged according to your usage. | +| ChatGPT | openai-api-chatgpt | [link](https://platform.openai.com/docs/models/gpt-3-5) | no (paid access via API) | supposedly, 175B | - (cannot be run locally) | 4,096 tokens | Based on gpt-3.5-turbo -- the most capable of the entire GPT-3/GPT-3.5 models family. Optimized for chat. Able to understand and generate code. NB: paid. You must provide your OpenAI API key to use the model. Your OpenAI account will be charged according to your usage. | +| Open-Assistant SFT-1 12B | transformers-lm-oasst12b | [link](https://huggingface.co/OpenAssistant/oasst-sft-1-pythia-12b) | yes | 12B | 26GB (half-precision) | 5,120 tokens | An open-source English-only instruction-based large language model which is NOT good at answering math and coding questions. NB: free of charge. This model is up and running on our servers and can be used for free. | +| GPT-4 | openai-api-gpt4 | [link](https://platform.openai.com/docs/models/gpt-4) | no (paid access via API) | supposedly, 175B | - (cannot be run locally) | 8,192 tokens | A multilingual instruction-based large language model which is capable of code generation and other complex tasks. More capable than any GPT-3.5 model, able to do more complex tasks, and optimized for chat. NB: paid. You must provide your OpenAI API key to use the model. Your OpenAI account will be charged according to your usage. | +| GPT-4 32K | openai-api-gpt4-32k | [link](https://platform.openai.com/docs/models/gpt-4) | no (paid access via API) | supposedly, 175B | - (cannot be run locally) | 32,768 tokens | A multilingual instruction-based large language model which is capable of code generation and other complex tasks. Same capabilities as the base gpt-4 mode but with 4x the context length. NB: paid. You must provide your OpenAI API key to use the model. Your OpenAI account will be charged according to your usage. | diff --git a/assistant_dists/universal_prompted_assistant/dev.yml b/assistant_dists/universal_prompted_assistant/dev.yml index 6698e3d31d..57314e6a1a 100644 --- a/assistant_dists/universal_prompted_assistant/dev.yml +++ b/assistant_dists/universal_prompted_assistant/dev.yml @@ -54,6 +54,18 @@ services: - "./common:/src/common" ports: - 8131:8131 + openai-api-gpt4: + volumes: + - "./services/openai_api_lm:/src" + - "./common:/src/common" + ports: + - 8159:8159 + openai-api-gpt4-32k: + volumes: + - "./services/openai_api_lm:/src" + - "./common:/src/common" + ports: + - 8160:8160 dff-universal-prompted-skill: volumes: - "./skills/dff_universal_prompted_skill:/src" diff --git a/assistant_dists/universal_prompted_assistant/docker-compose.override.yml b/assistant_dists/universal_prompted_assistant/docker-compose.override.yml index caa5a70fd1..686ec711db 100644 --- a/assistant_dists/universal_prompted_assistant/docker-compose.override.yml +++ b/assistant_dists/universal_prompted_assistant/docker-compose.override.yml @@ -5,6 +5,7 @@ services: WAIT_HOSTS: "sentseg:8011, ranking-based-response-selector:8002, combined-classification:8087, sentence-ranker:8128, transformers-lm-gptj:8130, transformers-lm-oasst12b:8158, openai-api-chatgpt:8145, openai-api-davinci3:8131, + openai-api-gpt4:8159, openai-api-gpt4-32k:8160, dff-universal-prompted-skill:8147" WAIT_HOSTS_TIMEOUT: ${WAIT_TIMEOUT:-1000} @@ -164,6 +165,46 @@ services: reservations: memory: 100M + openai-api-gpt4: + env_file: [ .env ] + build: + args: + SERVICE_PORT: 8159 + SERVICE_NAME: openai_api_gpt4 + PRETRAINED_MODEL_NAME_OR_PATH: gpt-4 + context: . + dockerfile: ./services/openai_api_lm/Dockerfile + command: flask run -h 0.0.0.0 -p 8159 + environment: + - CUDA_VISIBLE_DEVICES=0 + - FLASK_APP=server + deploy: + resources: + limits: + memory: 100M + reservations: + memory: 100M + + openai-api-gpt4-32k: + env_file: [ .env ] + build: + args: + SERVICE_PORT: 8160 + SERVICE_NAME: openai_api_gpt4_32k + PRETRAINED_MODEL_NAME_OR_PATH: gpt-4-32k + context: . + dockerfile: ./services/openai_api_lm/Dockerfile + command: flask run -h 0.0.0.0 -p 8160 + environment: + - CUDA_VISIBLE_DEVICES=0 + - FLASK_APP=server + deploy: + resources: + limits: + memory: 100M + reservations: + memory: 100M + dff-universal-prompted-skill: env_file: [ .env ] build: diff --git a/components/jkdhfgkhgodfiugpojwrnkjnlg.yml b/components/jkdhfgkhgodfiugpojwrnkjnlg.yml new file mode 100644 index 0000000000..ce20bde833 --- /dev/null +++ b/components/jkdhfgkhgodfiugpojwrnkjnlg.yml @@ -0,0 +1,28 @@ +name: openai_api_gpt4 +display_name: GPT-4 +component_type: Generative +model_type: NN-based +is_customizable: false +author: publisher@deeppavlov.ai +description: A multilingual instruction-based large language model + which is capable of code generation and other complex tasks. + More capable than any GPT-3.5 model, able to do more complex tasks, + and optimized for chat. Paid. + You must provide your OpenAI API key to use the model. + Your OpenAI account will be charged according to your usage. +ram_usage: 100M +gpu_usage: null +group: services +connector: + protocol: http + timeout: 20.0 + url: http://openai-api-gpt4:8159/respond +dialog_formatter: null +response_formatter: null +previous_services: null +required_previous_services: null +state_manager_method: null +tags: null +endpoint: respond +service: services/openai_api_lm/service_configs/openai-api-gpt4 +date_created: '2023-04-16T09:45:32' diff --git a/components/oinfjkrbnfmhkfsjdhfsd.yml b/components/oinfjkrbnfmhkfsjdhfsd.yml new file mode 100644 index 0000000000..0d5c44200e --- /dev/null +++ b/components/oinfjkrbnfmhkfsjdhfsd.yml @@ -0,0 +1,27 @@ +name: openai_api_gpt4_32k +display_name: GPT-4 32k +component_type: Generative +model_type: NN-based +is_customizable: false +author: publisher@deeppavlov.ai +description: A multilingual instruction-based large language model + which is capable of code generation and other complex tasks. + Same capabilities as the base gpt-4 mode but with 4x the context length. + Paid. You must provide your OpenAI API key to use the model. + Your OpenAI account will be charged according to your usage. +ram_usage: 100M +gpu_usage: null +group: services +connector: + protocol: http + timeout: 20.0 + url: http://openai-api-gpt4-32k:8160/respond +dialog_formatter: null +response_formatter: null +previous_services: null +required_previous_services: null +state_manager_method: null +tags: null +endpoint: respond +service: services/openai_api_lm/service_configs/openai-api-gpt4-32k +date_created: '2023-04-16T09:45:32' diff --git a/services/openai_api_lm/server.py b/services/openai_api_lm/server.py index ce8b4b7827..cc30279d1b 100644 --- a/services/openai_api_lm/server.py +++ b/services/openai_api_lm/server.py @@ -26,6 +26,8 @@ DEFAULT_CONFIGS = { "text-davinci-003": json.load(open("generative_configs/openai-text-davinci-003.json", "r")), "gpt-3.5-turbo": json.load(open("generative_configs/openai-chatgpt.json", "r")), + "gpt-4": json.load(open("generative_configs/openai-chatgpt.json", "r")), + "gpt-4-32k": json.load(open("generative_configs/openai-chatgpt.json", "r")), } diff --git a/services/openai_api_lm/service_configs/openai-api-gpt4-32k/environment.yml b/services/openai_api_lm/service_configs/openai-api-gpt4-32k/environment.yml new file mode 100644 index 0000000000..ed4954db75 --- /dev/null +++ b/services/openai_api_lm/service_configs/openai-api-gpt4-32k/environment.yml @@ -0,0 +1,5 @@ +SERVICE_PORT: 8160 +SERVICE_NAME: openai_api_gpt4_32k +PRETRAINED_MODEL_NAME_OR_PATH: gpt-4-32k +CUDA_VISIBLE_DEVICES: '0' +FLASK_APP: server diff --git a/services/openai_api_lm/service_configs/openai-api-gpt4-32k/service.yml b/services/openai_api_lm/service_configs/openai-api-gpt4-32k/service.yml new file mode 100644 index 0000000000..69a3b398c8 --- /dev/null +++ b/services/openai_api_lm/service_configs/openai-api-gpt4-32k/service.yml @@ -0,0 +1,31 @@ +name: openai-api-gpt4-32k +endpoints: +- respond +compose: + env_file: + - .env + build: + args: + SERVICE_PORT: 8160 + SERVICE_NAME: openai_api_gpt4_32k + PRETRAINED_MODEL_NAME_OR_PATH: gpt-4-32k + CUDA_VISIBLE_DEVICES: '0' + FLASK_APP: server + context: . + dockerfile: ./services/openai_api_lm/Dockerfile + command: flask run -h 0.0.0.0 -p 8160 + environment: + - CUDA_VISIBLE_DEVICES=0 + - FLASK_APP=server + deploy: + resources: + limits: + memory: 100M + reservations: + memory: 100M + volumes: + - ./services/openai_api_lm:/src + - ./common:/src/common + ports: + - 8160:8160 +proxy: null diff --git a/services/openai_api_lm/service_configs/openai-api-gpt4/environment.yml b/services/openai_api_lm/service_configs/openai-api-gpt4/environment.yml new file mode 100644 index 0000000000..f3cf8147a8 --- /dev/null +++ b/services/openai_api_lm/service_configs/openai-api-gpt4/environment.yml @@ -0,0 +1,5 @@ +SERVICE_PORT: 8159 +SERVICE_NAME: openai_api_gpt4 +PRETRAINED_MODEL_NAME_OR_PATH: gpt-4 +CUDA_VISIBLE_DEVICES: '0' +FLASK_APP: server diff --git a/services/openai_api_lm/service_configs/openai-api-gpt4/service.yml b/services/openai_api_lm/service_configs/openai-api-gpt4/service.yml new file mode 100644 index 0000000000..898c870fbc --- /dev/null +++ b/services/openai_api_lm/service_configs/openai-api-gpt4/service.yml @@ -0,0 +1,31 @@ +name: openai-api-gpt4 +endpoints: +- respond +compose: + env_file: + - .env + build: + args: + SERVICE_PORT: 8159 + SERVICE_NAME: openai_api_gpt4 + PRETRAINED_MODEL_NAME_OR_PATH: gpt-4 + CUDA_VISIBLE_DEVICES: '0' + FLASK_APP: server + context: . + dockerfile: ./services/openai_api_lm/Dockerfile + command: flask run -h 0.0.0.0 -p 8159 + environment: + - CUDA_VISIBLE_DEVICES=0 + - FLASK_APP=server + deploy: + resources: + limits: + memory: 100M + reservations: + memory: 100M + volumes: + - ./services/openai_api_lm:/src + - ./common:/src/common + ports: + - 8159:8159 +proxy: null diff --git a/skills/dff_universal_prompted_skill/scenario/response.py b/skills/dff_universal_prompted_skill/scenario/response.py index ee7e20d18f..cddb3d82f8 100644 --- a/skills/dff_universal_prompted_skill/scenario/response.py +++ b/skills/dff_universal_prompted_skill/scenario/response.py @@ -32,6 +32,8 @@ "http://transformers-lm-oasst12b:8158/respond": [], "http://openai-api-chatgpt:8145/respond": ["OPENAI_API_KEY", "OPENAI_ORGANIZATION"], "http://openai-api-davinci3:8131/respond": ["OPENAI_API_KEY", "OPENAI_ORGANIZATION"], + "http://openai-api-gpt4:8159/respond": ["OPENAI_API_KEY", "OPENAI_ORGANIZATION"], + "http://openai-api-gpt4-32k:8160/respond": ["OPENAI_API_KEY", "OPENAI_ORGANIZATION"], } From 77029f93c5bf1efe5206285cde0ec9ee364d1494 Mon Sep 17 00:00:00 2001 From: "Dilyara Zharikova (Baymurzina)" Date: Fri, 12 May 2023 15:56:02 +0500 Subject: [PATCH 3/4] feat: update oasst model (#411) * feat: update oasst model * fix: model descriptions (#410) * fix: sync dream_persona with demos (#413) * Feat/llm-based and ranking-based dm (#405) * feat: create files for llm based response selection * feat: tests * feat: algorithm * fix: filter bad or toxic uttrs * feat: ranking based response selector * feat: model cards * feat: ranking and refactoring * feat: component files * feat: component description * feat: component description * feat: use gpt-j resp selector * feat: context * feat: utilize hypotheses correctly * fix: configs * fix: configs * fix: mapping * fix: utilize ranker based selector * fix: decay dummy * fix: decay dummy * fix: response * fix: remove old selector * fix: logs * fix: getting annotations * fix: code style * fix: deepy uses resp selector * fix: no language * fix: ru usage * fix: usage of formatter * fix: codestyle * fix: logs * fix: after review * fix: sync * fix: add proxy usage (#416) * feat: new proxy address (#417) * prompt asks chatbot to be concise (otherwise problems with too long answers) (#418) * added gptj to proxy file (#424) * added gptj to proxy file * return newline * fix:dummy score decrease (#423) * Feat/api keys from attributes (#415) * fix: add attributes to formatter * fix: try to get env vars from user utterance * fix: none var values * fix: logs correct * fix: send lowercased keys * prompt to ask fewer questions (#429) * feat: universal dist requires api keys in request (#425) * feat: template_template (#414) * feat: template_template * feat: readme * fix: add chatgpt * fix: tempalte does not use secret env * fixed cutoff for AI utterance (#426) * fixed cutoff for AI utterance * fix for cases with extra ROBOT: etc * style * fix for newline * feat: list of public dists (#433) * fix: remove badlists from prompted dists (#431) * added info about payments (#432) * added info about payments * better descs + reworked fashion prompt * table update * typo fix * typo fix * typo * also increase fashion timeout in yml files * forgot one file * fix: no beauty in table --------- Co-authored-by: dilyararimovna * timeout 30 for fairytales (#427) * longer generation and timeout to reduce cutoffs (#420) * longer generation and timeout to reduce cutoffs * also updated in comp and pipe files * change prompts for ai faq and nutrition (#430) * reworked prompts to perform well with oasst * solved conflict * fix: tempalte dist and description (#435) * Feat/multiskill assistant (#434) * feat: multiskill assistant * fix: prompt for meeting notes * fix: waiting for * fix: formatters * fix: dot * fix: secrets * fix: add dream persona skill * fix: add dream persona skill * fix: add to public dists * fix: folder name * fix: description * fix: component cards * fix: component cards * feat: thresholds for toxic_cls (#437) * feat: thresholds for toxic_cls * fix: codestyle * fix: update pip (#439) * fix: urllib neuralcoref * fix: update pip * fix: update pip in spacy annotator * feat: smaller context for prompt selector (#438) * feat: smaller context for prompt selector * fix: index error * Add compose definitions to component cards (#384) * Fix requirements.txt (#84) * fix itsdangerous requirements * pin itsdangerous requirements for all flask==1.1.1 servers * add compose definitions to component cards * add host, port, endpoint to pipeline cards * fix authors, add missing pipelines, add template * add prompt-selector template, fix templates * fix template keys * remove unused comet-atomic component * rework service, component cards * move build args and env vars to environment.yml file * fix empty proxy values * fix pipeline confs * fix component connector.annotations * fix wait hosts in response_annotator_selectors service * fix wait hosts in other services, environments * fix response selectors, pipelines * fix ports in response selector * fix generative components * fix: remove spelling and badlists from dream_persona_prompted * fix: ignore all env_secret * fix: new formatters for dff * fix: universal dist does not utilize env_secret * fix: multiskill components and timeouts * fix: remove template_template distribution * fix: deeppavlov assistant * fix: formatters in components * fix: volumes for resp selectors * fix: correct filenames for multiskill * fix: rullama7bru distribution * fix pipelines after merge * fix sentseg annotator source in pipelines * fix agent services, components * fix: sentseg and prompt selector do not wait for spelling * fix: response_annotator_selectors card * fix: timeouts * fix: build args * create services, components for prompt selector * fix prompt selectors environment * fix: flask server * fix: path to prompt selector * fix: required groups * fix: required group skill_selectors * fix: required elements * fix: previous services * fix: correct link to sentseg components card * fix: correct link to sentseg components card * remove unused prompt selector component * remove old configs * fix: rename files without dashes --------- Co-authored-by: Andrii.Hura <54397922+AndriiHura@users.noreply.github.com> Co-authored-by: Dilyara Baymurzina * Fix/remove duplicates (#443) * fix: remove duplicating files * fix: remove duplicating files * Multiskill update (#440) * long gen, long timeout, more prompts * prompt update * timeouts * added longer context to persona * longer max_tokens * fix: increase to 20 sec * fix: used config * fix: dream persona openai * fix:multiskill components --------- Co-authored-by: dilyararimovna * Fix/increase all timeouts (#444) * fix: increase all timeouts * fix: increase all timeouts in pipelines * fix: 7sec to 20 also * fix: 5sec to 20 also for dream persona * fix: rebase and change to new model * fix: upd model * fix: display name --------- Co-authored-by: Nika Smilga <42929200+smilni@users.noreply.github.com> Co-authored-by: Maxim Talimanchuk Co-authored-by: Andrii.Hura <54397922+AndriiHura@users.noreply.github.com> --- MODELS.md | 18 +++++++++--------- .../docker-compose.override.yml | 2 +- .../docker-compose.override.yml | 2 +- .../docker-compose.override.yml | 2 +- .../docker-compose.override.yml | 2 +- components/sdkajfhsidhf8wfjh2ornfkle.yml | 2 +- 6 files changed, 14 insertions(+), 14 deletions(-) diff --git a/MODELS.md b/MODELS.md index 6e69e8ad9a..c544e2358e 100644 --- a/MODELS.md +++ b/MODELS.md @@ -2,12 +2,12 @@ Here you may find a list of models that currently available for use in Generative Assistants. -| model name | container name | model link | open-source? | size (billion parameters) | GPU usage | max tokens (prompt + response) | description | -|---------------------------|--------------------------|----------------------------------------------------------------------|--------------------------|---------------------------|---------------------------|--------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| BLOOMZ 7B | transformers-lm-bloomz7b | [link](https://huggingface.co/bigscience/bloomz-7b1) | yes | 7.1B | 33GB | 2,048 tokens | An open-source multilingual instruction-based large language model (46 languages). NB: free of charge. This model is up and running on our servers and can be used for free. | -| GPT-J 6B | transformers-lm-gptj | [link](https://huggingface.co/EleutherAI/gpt-j-6b) | yes | 6B | 25GB | 2,048 tokens | An open-source English-only large language model which is NOT fine-tuned for instruction following and NOT capable of code generation. NB: free of charge. This model is up and running on our servers and can be used for free. | -| GPT-3.5 | openai-api-davinci3 | [link](https://platform.openai.com/docs/models/gpt-3-5) | no (paid access via API) | supposedly, 175B | - (cannot be run locally) | 4,097 tokens | A multulingual instruction-based large language model which is capable of code generation. Unlike ChatGPT, not optimised for chat. NB: paid. You must provide your OpenAI API key to use the model. Your OpenAI account will be charged according to your usage. | -| ChatGPT | openai-api-chatgpt | [link](https://platform.openai.com/docs/models/gpt-3-5) | no (paid access via API) | supposedly, 175B | - (cannot be run locally) | 4,096 tokens | Based on gpt-3.5-turbo -- the most capable of the entire GPT-3/GPT-3.5 models family. Optimized for chat. Able to understand and generate code. NB: paid. You must provide your OpenAI API key to use the model. Your OpenAI account will be charged according to your usage. | -| Open-Assistant SFT-1 12B | transformers-lm-oasst12b | [link](https://huggingface.co/OpenAssistant/oasst-sft-1-pythia-12b) | yes | 12B | 26GB (half-precision) | 5,120 tokens | An open-source English-only instruction-based large language model which is NOT good at answering math and coding questions. NB: free of charge. This model is up and running on our servers and can be used for free. | -| GPT-4 | openai-api-gpt4 | [link](https://platform.openai.com/docs/models/gpt-4) | no (paid access via API) | supposedly, 175B | - (cannot be run locally) | 8,192 tokens | A multilingual instruction-based large language model which is capable of code generation and other complex tasks. More capable than any GPT-3.5 model, able to do more complex tasks, and optimized for chat. NB: paid. You must provide your OpenAI API key to use the model. Your OpenAI account will be charged according to your usage. | -| GPT-4 32K | openai-api-gpt4-32k | [link](https://platform.openai.com/docs/models/gpt-4) | no (paid access via API) | supposedly, 175B | - (cannot be run locally) | 32,768 tokens | A multilingual instruction-based large language model which is capable of code generation and other complex tasks. Same capabilities as the base gpt-4 mode but with 4x the context length. NB: paid. You must provide your OpenAI API key to use the model. Your OpenAI account will be charged according to your usage. | +| model name | container name | model link | open-source? | size (billion parameters) | GPU usage | max tokens (prompt + response) | description | +|---------------------------|--------------------------|-------------------------------------------------------------------------|--------------------------|---------------------------|---------------------------|--------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| BLOOMZ 7B | transformers-lm-bloomz7b | [link](https://huggingface.co/bigscience/bloomz-7b1) | yes | 7.1B | 33GB | 2,048 tokens | An open-source multilingual instruction-based large language model (46 languages). NB: free of charge. This model is up and running on our servers and can be used for free. | +| GPT-J 6B | transformers-lm-gptj | [link](https://huggingface.co/EleutherAI/gpt-j-6b) | yes | 6B | 25GB | 2,048 tokens | An open-source English-only large language model which is NOT fine-tuned for instruction following and NOT capable of code generation. NB: free of charge. This model is up and running on our servers and can be used for free. | +| GPT-3.5 | openai-api-davinci3 | [link](https://platform.openai.com/docs/models/gpt-3-5) | no (paid access via API) | supposedly, 175B | - (cannot be run locally) | 4,097 tokens | A multulingual instruction-based large language model which is capable of code generation. Unlike ChatGPT, not optimised for chat. NB: paid. You must provide your OpenAI API key to use the model. Your OpenAI account will be charged according to your usage. | +| ChatGPT | openai-api-chatgpt | [link](https://platform.openai.com/docs/models/gpt-3-5) | no (paid access via API) | supposedly, 175B | - (cannot be run locally) | 4,096 tokens | Based on gpt-3.5-turbo -- the most capable of the entire GPT-3/GPT-3.5 models family. Optimized for chat. Able to understand and generate code. NB: paid. You must provide your OpenAI API key to use the model. Your OpenAI account will be charged according to your usage. | +| Open-Assistant Pythia 12B | transformers-lm-oasst12b | [link](https://huggingface.co/OpenAssistant/pythia-12b-sft-v8-7k-steps) | yes | 12B | 26GB (half-precision) | 5,120 tokens | An open-source English-only instruction-based large language model which is NOT good at answering math and coding questions. NB: free of charge. This model is up and running on our servers and can be used for free. | +| GPT-4 | openai-api-gpt4 | [link](https://platform.openai.com/docs/models/gpt-4) | no (paid access via API) | supposedly, 175B | - (cannot be run locally) | 8,192 tokens | A multilingual instruction-based large language model which is capable of code generation and other complex tasks. More capable than any GPT-3.5 model, able to do more complex tasks, and optimized for chat. NB: paid. You must provide your OpenAI API key to use the model. Your OpenAI account will be charged according to your usage. | +| GPT-4 32K | openai-api-gpt4-32k | [link](https://platform.openai.com/docs/models/gpt-4) | no (paid access via API) | supposedly, 175B | - (cannot be run locally) | 32,768 tokens | A multilingual instruction-based large language model which is capable of code generation and other complex tasks. Same capabilities as the base gpt-4 mode but with 4x the context length. NB: paid. You must provide your OpenAI API key to use the model. Your OpenAI account will be charged according to your usage. | diff --git a/assistant_dists/ai_faq_assistant/docker-compose.override.yml b/assistant_dists/ai_faq_assistant/docker-compose.override.yml index 9679d152f2..17a0914d0b 100644 --- a/assistant_dists/ai_faq_assistant/docker-compose.override.yml +++ b/assistant_dists/ai_faq_assistant/docker-compose.override.yml @@ -107,7 +107,7 @@ services: args: SERVICE_PORT: 8158 SERVICE_NAME: transformers_lm_oasst12b - PRETRAINED_MODEL_NAME_OR_PATH: OpenAssistant/oasst-sft-1-pythia-12b + PRETRAINED_MODEL_NAME_OR_PATH: OpenAssistant/pythia-12b-sft-v8-7k-steps HALF_PRECISION: 1 context: . dockerfile: ./services/transformers_lm/Dockerfile diff --git a/assistant_dists/fashion_stylist_assistant/docker-compose.override.yml b/assistant_dists/fashion_stylist_assistant/docker-compose.override.yml index 6dd4fbead9..10f242d77e 100644 --- a/assistant_dists/fashion_stylist_assistant/docker-compose.override.yml +++ b/assistant_dists/fashion_stylist_assistant/docker-compose.override.yml @@ -107,7 +107,7 @@ services: args: SERVICE_PORT: 8158 SERVICE_NAME: transformers_lm_oasst12b - PRETRAINED_MODEL_NAME_OR_PATH: OpenAssistant/oasst-sft-1-pythia-12b + PRETRAINED_MODEL_NAME_OR_PATH: OpenAssistant/pythia-12b-sft-v8-7k-steps HALF_PRECISION: 1 context: . dockerfile: ./services/transformers_lm/Dockerfile diff --git a/assistant_dists/nutrition_assistant/docker-compose.override.yml b/assistant_dists/nutrition_assistant/docker-compose.override.yml index b783cc76fe..da8670188f 100644 --- a/assistant_dists/nutrition_assistant/docker-compose.override.yml +++ b/assistant_dists/nutrition_assistant/docker-compose.override.yml @@ -107,7 +107,7 @@ services: args: SERVICE_PORT: 8158 SERVICE_NAME: transformers_lm_oasst12b - PRETRAINED_MODEL_NAME_OR_PATH: OpenAssistant/oasst-sft-1-pythia-12b + PRETRAINED_MODEL_NAME_OR_PATH: OpenAssistant/pythia-12b-sft-v8-7k-steps HALF_PRECISION: 1 context: . dockerfile: ./services/transformers_lm/Dockerfile diff --git a/assistant_dists/universal_prompted_assistant/docker-compose.override.yml b/assistant_dists/universal_prompted_assistant/docker-compose.override.yml index 686ec711db..d1c00672f7 100644 --- a/assistant_dists/universal_prompted_assistant/docker-compose.override.yml +++ b/assistant_dists/universal_prompted_assistant/docker-compose.override.yml @@ -110,7 +110,7 @@ services: args: SERVICE_PORT: 8158 SERVICE_NAME: transformers_lm_oasst12b - PRETRAINED_MODEL_NAME_OR_PATH: OpenAssistant/oasst-sft-1-pythia-12b + PRETRAINED_MODEL_NAME_OR_PATH: OpenAssistant/pythia-12b-sft-v8-7k-steps HALF_PRECISION: 1 context: . dockerfile: ./services/transformers_lm/Dockerfile diff --git a/components/sdkajfhsidhf8wfjh2ornfkle.yml b/components/sdkajfhsidhf8wfjh2ornfkle.yml index 0e0bc8e5e1..afe277b4eb 100644 --- a/components/sdkajfhsidhf8wfjh2ornfkle.yml +++ b/components/sdkajfhsidhf8wfjh2ornfkle.yml @@ -1,5 +1,5 @@ name: transformers_lm_oasst12b -display_name: Open-Assistant SFT-1 12B +display_name: Open-Assistant Pythia 12B component_type: Generative model_type: NN-based is_customizable: false From 27d91740374868b7a5f3f4df0de191bb473b8559 Mon Sep 17 00:00:00 2001 From: "Dilyara Zharikova (Baymurzina)" Date: Mon, 15 May 2023 17:33:27 +0300 Subject: [PATCH 4/4] feat: update openai for gpt-4 (#458) --- services/openai_api_lm/requirements.txt | 2 +- services/openai_api_lm/server.py | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/services/openai_api_lm/requirements.txt b/services/openai_api_lm/requirements.txt index 44554938a9..928f3e3d58 100644 --- a/services/openai_api_lm/requirements.txt +++ b/services/openai_api_lm/requirements.txt @@ -6,4 +6,4 @@ sentry-sdk[flask]==0.14.1 healthcheck==1.3.3 jinja2<=3.0.3 Werkzeug<=2.0.3 -openai==0.27.0 \ No newline at end of file +openai==0.27.6 \ No newline at end of file diff --git a/services/openai_api_lm/server.py b/services/openai_api_lm/server.py index cc30279d1b..f1e5191a79 100644 --- a/services/openai_api_lm/server.py +++ b/services/openai_api_lm/server.py @@ -29,6 +29,7 @@ "gpt-4": json.load(open("generative_configs/openai-chatgpt.json", "r")), "gpt-4-32k": json.load(open("generative_configs/openai-chatgpt.json", "r")), } +CHAT_COMPLETION_MODELS = ["gpt-3.5-turbo", "gpt-4", "gpt-4-32k"] def generate_responses(context, openai_api_key, openai_org, prompt, generation_params, continue_last_uttr=False): @@ -38,8 +39,8 @@ def generate_responses(context, openai_api_key, openai_org, prompt, generation_p openai.api_key = openai_api_key openai.organization = openai_org if openai_org else None - if PRETRAINED_MODEL_NAME_OR_PATH == "gpt-3.5-turbo": - logger.info("model=gpt-3.5-turbo, use special chat completion endpoint") + if PRETRAINED_MODEL_NAME_OR_PATH in CHAT_COMPLETION_MODELS: + logger.info("Use special chat completion endpoint") s = len(context) % 2 messages = [ {"role": "system", "content": prompt}, @@ -78,7 +79,7 @@ def generate_responses(context, openai_api_key, openai_org, prompt, generation_p elif isinstance(response, str): outputs = [response.strip()] - if PRETRAINED_MODEL_NAME_OR_PATH != "gpt-3.5-turbo": + if PRETRAINED_MODEL_NAME_OR_PATH not in CHAT_COMPLETION_MODELS: # post-processing of the responses by all models except of ChatGPT outputs = [GENERATIVE_ROBOT_TEMPLATE.sub("\n", resp).strip() for resp in outputs] return outputs