diff --git a/assistant_dists/sentius_management_assistant/dev.yml b/assistant_dists/sentius_management_assistant/dev.yml index 856bf796a4..c04f415282 100644 --- a/assistant_dists/sentius_management_assistant/dev.yml +++ b/assistant_dists/sentius_management_assistant/dev.yml @@ -40,24 +40,24 @@ services: - "./common:/src/common" ports: - 8145:8145 - openai-api-gpt4: + openai-api-gpt4-turbo: volumes: - "./services/openai_api_lm:/src" - "./common:/src/common" ports: - - 8159:8159 - dff-roles-prompted-skill: + - 8190:8190 + dff-general-pm-prompted-skill: volumes: - "./skills/dff_template_prompted_skill:/src" - "./common:/src/common" ports: - - 8185:8185 - dff-general-pm-prompted-skill: + - 8189:8189 + dff-roles-prompted-skill: volumes: - "./skills/dff_template_prompted_skill:/src" - "./common:/src/common" ports: - - 8189:8189 + - 8185:8185 dff-meeting-analysis-skill: volumes: - "./skills/dff_meeting_analysis_skill:/src" diff --git a/assistant_dists/sentius_management_assistant/docker-compose.override.yml b/assistant_dists/sentius_management_assistant/docker-compose.override.yml index e678e807ed..b5aedc1e26 100644 --- a/assistant_dists/sentius_management_assistant/docker-compose.override.yml +++ b/assistant_dists/sentius_management_assistant/docker-compose.override.yml @@ -5,7 +5,7 @@ services: WAIT_HOSTS: "llm-based-response-selector-gpt4:8003, combined-classification:8087, sentence-ranker:8128, prompt-selector:8135, openai-api-chatgpt:8145, dff-general-pm-prompted-skill:8189, dff-meeting-analysis-skill:8186, doc-processor-from-atts:8188, dff-roles-prompted-skill:8185, llm-based-skill-selector:8182, - openai-api-gpt4:8159" + openai-api-gpt4-turbo:8190" WAIT_HOSTS_TIMEOUT: ${WAIT_TIMEOUT:-1000} HIGH_PRIORITY_INTENTS: 1 RESTRICTION_FOR_SENSITIVE_CASE: 1 @@ -85,11 +85,11 @@ services: SERVICE_PORT: 8003 SERVICE_NAME: response_selector LANGUAGE: EN - GENERATIVE_SERVICE_URL: http://openai-api-gpt4:8159/respond - GENERATIVE_SERVICE_CONFIG: openai-gpt4-long.json + GENERATIVE_SERVICE_URL: http://openai-api-gpt4-turbo:8190/respond + GENERATIVE_SERVICE_CONFIG: openai-chatgpt-long.json GENERATIVE_TIMEOUT: 120 N_UTTERANCES_CONTEXT: 1 - FILTER_TOXIC_OR_BADLISTED: 1 + FILTER_TOXIC_OR_BADLISTED: 0 PROMPT_FILE: common/prompts/response_selector_uncropped.json context: . dockerfile: ./response_selectors/llm_based_response_selector/Dockerfile @@ -208,8 +208,8 @@ services: args: SERVICE_PORT: 8186 SERVICE_NAME: dff_meeting_analysis_skill - GENERATIVE_SERVICE_URL: http://openai-api-gpt4:8159/respond - GENERATIVE_SERVICE_CONFIG: openai-gpt4-long.json + GENERATIVE_SERVICE_URL: http://openai-api-gpt4-turbo:8190/respond + GENERATIVE_SERVICE_CONFIG: openai-chatgpt-long.json GENERATIVE_TIMEOUT: 120 SHORT_GENERATIVE_SERVICE_URL: http://openai-api-chatgpt:8145/respond SHORT_GENERATIVE_SERVICE_CONFIG: openai-chatgpt-long.json @@ -225,16 +225,16 @@ services: reservations: memory: 500M - openai-api-gpt4: + openai-api-gpt4-turbo: env_file: [ .env ] build: args: - SERVICE_PORT: 8159 - SERVICE_NAME: openai_api_gpt4 - PRETRAINED_MODEL_NAME_OR_PATH: gpt-4 + SERVICE_PORT: 8190 + SERVICE_NAME: openai_api_gpt4_turbo + PRETRAINED_MODEL_NAME_OR_PATH: gpt-4-1106-preview context: . dockerfile: ./services/openai_api_lm/Dockerfile - command: flask run -h 0.0.0.0 -p 8159 + command: flask run -h 0.0.0.0 -p 8190 environment: - FLASK_APP=server deploy: diff --git a/components.tsv b/components.tsv index bc637adb41..7c4cd632c6 100644 --- a/components.tsv +++ b/components.tsv @@ -191,5 +191,5 @@ 8187 universal-llm-based-skill-selector 8188 doc-processor-from-atts,doc-processor-from-args 8189 dff-general-pm-prompted-skill -8190 +8190 openai-api-gpt4-turbo 8191 \ No newline at end of file diff --git a/components/KJNVBinrb09jkefnp.yml b/components/KJNVBinrb09jkefnp.yml new file mode 100644 index 0000000000..0da2dc96d0 --- /dev/null +++ b/components/KJNVBinrb09jkefnp.yml @@ -0,0 +1,28 @@ +name: openai_api_gpt4_turbo +display_name: GPT-4 Turbo +component_type: Generative +model_type: NN-based +is_customizable: false +author: publisher@deeppavlov.ai +description: A multilingual instruction-based large language model + which is capable of code generation and other complex tasks. + Same capabilities as the base gpt-4 but cheaper, with 128k context length, + improved instruction following and more up-to-date real-world knowledge (up to April 2023). + Paid. You must provide your OpenAI API key to use the model. + Your OpenAI account will be charged according to your usage. +ram_usage: 100M +gpu_usage: null +group: services +connector: + protocol: http + timeout: 120.0 + url: http://openai-api-gpt4-turbo:8190/respond +dialog_formatter: null +response_formatter: null +previous_services: null +required_previous_services: null +state_manager_method: null +tags: null +endpoint: respond +service: services/openai_api_lm/service_configs/openai-api-gpt4-turbo +date_created: '2023-04-16T09:45:32' diff --git a/services/openai_api_lm/server.py b/services/openai_api_lm/server.py index 8633c73025..ca25d2ed2f 100644 --- a/services/openai_api_lm/server.py +++ b/services/openai_api_lm/server.py @@ -32,8 +32,10 @@ "gpt-3.5-turbo-16k": json.load(open("common/generative_configs/openai-chatgpt.json", "r")), "gpt-4": json.load(open("common/generative_configs/openai-chatgpt.json", "r")), "gpt-4-32k": json.load(open("common/generative_configs/openai-chatgpt.json", "r")), + # that is gpt-4-turbo + "gpt-4-1106-preview": json.load(open("common/generative_configs/openai-chatgpt-long.json", "r")), } -CHAT_COMPLETION_MODELS = ["gpt-3.5-turbo", "gpt-3.5-turbo-16k", "gpt-4", "gpt-4-32k"] +CHAT_COMPLETION_MODELS = ["gpt-3.5-turbo", "gpt-3.5-turbo-16k", "gpt-4", "gpt-4-32k", "gpt-4-1106-preview"] MAX_TOKENS = { "text-davinci-003": 4097, "davinci-002": 4097, @@ -41,6 +43,9 @@ "gpt-3.5-turbo-16k": 16384, "gpt-4": 8192, "gpt-4-32k": 32768, + # gpt4-turbo set to 64k instead of 128k on purpose + # as it was shown to perform worse for longer sequences + "gpt-4-1106-preview": 64000, } try: ENCODER = tiktoken.encoding_for_model(PRETRAINED_MODEL_NAME_OR_PATH) diff --git a/services/openai_api_lm/service_configs/openai-api-chatgpt-16k/environment.yml b/services/openai_api_lm/service_configs/openai-api-chatgpt-16k/environment.yml index eb07373420..5d945b4cdd 100644 --- a/services/openai_api_lm/service_configs/openai-api-chatgpt-16k/environment.yml +++ b/services/openai_api_lm/service_configs/openai-api-chatgpt-16k/environment.yml @@ -1,5 +1,4 @@ SERVICE_PORT: 8167 SERVICE_NAME: openai_api_chatgpt_16k PRETRAINED_MODEL_NAME_OR_PATH: gpt-3.5-turbo-16k -CUDA_VISIBLE_DEVICES: '0' FLASK_APP: server diff --git a/services/openai_api_lm/service_configs/openai-api-chatgpt-16k/service.yml b/services/openai_api_lm/service_configs/openai-api-chatgpt-16k/service.yml index 4798ae6d01..f097f5ead6 100644 --- a/services/openai_api_lm/service_configs/openai-api-chatgpt-16k/service.yml +++ b/services/openai_api_lm/service_configs/openai-api-chatgpt-16k/service.yml @@ -14,13 +14,11 @@ compose: SERVICE_PORT: 8167 SERVICE_NAME: openai_api_chatgpt_16k PRETRAINED_MODEL_NAME_OR_PATH: gpt-3.5-turbo-16k - CUDA_VISIBLE_DEVICES: '0' FLASK_APP: server context: . dockerfile: ./services/openai_api_lm/Dockerfile command: flask run -h 0.0.0.0 -p 8167 environment: - - CUDA_VISIBLE_DEVICES=0 - FLASK_APP=server deploy: resources: diff --git a/services/openai_api_lm/service_configs/openai-api-chatgpt/environment.yml b/services/openai_api_lm/service_configs/openai-api-chatgpt/environment.yml index 7b011e9477..e562929136 100644 --- a/services/openai_api_lm/service_configs/openai-api-chatgpt/environment.yml +++ b/services/openai_api_lm/service_configs/openai-api-chatgpt/environment.yml @@ -1,5 +1,4 @@ SERVICE_PORT: 8145 SERVICE_NAME: openai_api_chatgpt PRETRAINED_MODEL_NAME_OR_PATH: gpt-3.5-turbo -CUDA_VISIBLE_DEVICES: '0' FLASK_APP: server diff --git a/services/openai_api_lm/service_configs/openai-api-chatgpt/service.yml b/services/openai_api_lm/service_configs/openai-api-chatgpt/service.yml index 5dd97d98fd..4d7fcb2ac3 100644 --- a/services/openai_api_lm/service_configs/openai-api-chatgpt/service.yml +++ b/services/openai_api_lm/service_configs/openai-api-chatgpt/service.yml @@ -14,13 +14,11 @@ compose: SERVICE_PORT: 8145 SERVICE_NAME: openai_api_chatgpt PRETRAINED_MODEL_NAME_OR_PATH: gpt-3.5-turbo - CUDA_VISIBLE_DEVICES: '0' FLASK_APP: server context: . dockerfile: ./services/openai_api_lm/Dockerfile command: flask run -h 0.0.0.0 -p 8145 environment: - - CUDA_VISIBLE_DEVICES=0 - FLASK_APP=server deploy: resources: diff --git a/services/openai_api_lm/service_configs/openai-api-davinci3/environment.yml b/services/openai_api_lm/service_configs/openai-api-davinci3/environment.yml index fa38944280..4a54821648 100644 --- a/services/openai_api_lm/service_configs/openai-api-davinci3/environment.yml +++ b/services/openai_api_lm/service_configs/openai-api-davinci3/environment.yml @@ -1,5 +1,4 @@ SERVICE_PORT: 8131 SERVICE_NAME: openai_api_davinci3 PRETRAINED_MODEL_NAME_OR_PATH: text-davinci-003 -CUDA_VISIBLE_DEVICES: '0' FLASK_APP: server diff --git a/services/openai_api_lm/service_configs/openai-api-davinci3/service.yml b/services/openai_api_lm/service_configs/openai-api-davinci3/service.yml index b95ac4596c..6e52519f59 100644 --- a/services/openai_api_lm/service_configs/openai-api-davinci3/service.yml +++ b/services/openai_api_lm/service_configs/openai-api-davinci3/service.yml @@ -14,13 +14,11 @@ compose: SERVICE_PORT: 8131 SERVICE_NAME: openai_api_davinci3 PRETRAINED_MODEL_NAME_OR_PATH: text-davinci-003 - CUDA_VISIBLE_DEVICES: '0' FLASK_APP: server context: . dockerfile: ./services/openai_api_lm/Dockerfile command: flask run -h 0.0.0.0 -p 8131 environment: - - CUDA_VISIBLE_DEVICES=0 - FLASK_APP=server deploy: resources: diff --git a/services/openai_api_lm/service_configs/openai-api-gpt4-turbo/environment.yml b/services/openai_api_lm/service_configs/openai-api-gpt4-turbo/environment.yml new file mode 100644 index 0000000000..ee9ec0937d --- /dev/null +++ b/services/openai_api_lm/service_configs/openai-api-gpt4-turbo/environment.yml @@ -0,0 +1,4 @@ +SERVICE_PORT: 8190 +SERVICE_NAME: openai_api_gpt4_turbo +PRETRAINED_MODEL_NAME_OR_PATH: gpt-4-1106-preview +FLASK_APP: server \ No newline at end of file diff --git a/services/openai_api_lm/service_configs/openai-api-gpt4-turbo/service.yml b/services/openai_api_lm/service_configs/openai-api-gpt4-turbo/service.yml new file mode 100644 index 0000000000..ce1e32f496 --- /dev/null +++ b/services/openai_api_lm/service_configs/openai-api-gpt4-turbo/service.yml @@ -0,0 +1,34 @@ +name: openai-api-gpt4-turbo +endpoints: +- ping +- envvars_to_send +- max_tokens +- respond +- generate_goals +compose: + env_file: + - .env + - .env_azure + build: + args: + SERVICE_PORT: 8190 + SERVICE_NAME: openai_api_gpt4_turbo + PRETRAINED_MODEL_NAME_OR_PATH: gpt-4-1106-preview + FLASK_APP: server + context: . + dockerfile: ./services/openai_api_lm/Dockerfile + command: flask run -h 0.0.0.0 -p 8190 + environment: + - FLASK_APP=server + deploy: + resources: + limits: + memory: 100M + reservations: + memory: 100M + volumes: + - ./services/openai_api_lm:/src + - ./common:/src/common + ports: + - 8190:8190 +proxy: null diff --git a/services/openai_api_lm/service_configs/openai-api-gpt4/environment.yml b/services/openai_api_lm/service_configs/openai-api-gpt4/environment.yml index f3cf8147a8..b4d243dcae 100644 --- a/services/openai_api_lm/service_configs/openai-api-gpt4/environment.yml +++ b/services/openai_api_lm/service_configs/openai-api-gpt4/environment.yml @@ -1,5 +1,4 @@ SERVICE_PORT: 8159 SERVICE_NAME: openai_api_gpt4 PRETRAINED_MODEL_NAME_OR_PATH: gpt-4 -CUDA_VISIBLE_DEVICES: '0' FLASK_APP: server diff --git a/services/openai_api_lm/service_configs/openai-api-gpt4/service.yml b/services/openai_api_lm/service_configs/openai-api-gpt4/service.yml index dcc23d4cef..d82c836965 100644 --- a/services/openai_api_lm/service_configs/openai-api-gpt4/service.yml +++ b/services/openai_api_lm/service_configs/openai-api-gpt4/service.yml @@ -14,13 +14,11 @@ compose: SERVICE_PORT: 8159 SERVICE_NAME: openai_api_gpt4 PRETRAINED_MODEL_NAME_OR_PATH: gpt-4 - CUDA_VISIBLE_DEVICES: '0' FLASK_APP: server context: . dockerfile: ./services/openai_api_lm/Dockerfile command: flask run -h 0.0.0.0 -p 8159 environment: - - CUDA_VISIBLE_DEVICES=0 - FLASK_APP=server deploy: resources: