Skip to content

Commit

Permalink
Feat/gpt4 turbo in meeting analysis (#118)
Browse files Browse the repository at this point in the history
* add gpt-4-turbo

* add gpt4-turbo to sentius_management_assistant

* add cards for gpt4-turbo

* remove CUDA_VISIBLE_DEVICES from llm configs

* remove CUDA_VISIBLE_DEVICES from llm configs

* codestyle

* revert utils.py in meeting skill

* set management assistant back to gpt4

* correct port
  • Loading branch information
smilni authored Nov 14, 2023
1 parent 71bbc90 commit 2fe99ed
Show file tree
Hide file tree
Showing 15 changed files with 90 additions and 31 deletions.
12 changes: 6 additions & 6 deletions assistant_dists/sentius_management_assistant/dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,24 +40,24 @@ services:
- "./common:/src/common"
ports:
- 8145:8145
openai-api-gpt4:
openai-api-gpt4-turbo:
volumes:
- "./services/openai_api_lm:/src"
- "./common:/src/common"
ports:
- 8159:8159
dff-roles-prompted-skill:
- 8190:8190
dff-general-pm-prompted-skill:
volumes:
- "./skills/dff_template_prompted_skill:/src"
- "./common:/src/common"
ports:
- 8185:8185
dff-general-pm-prompted-skill:
- 8189:8189
dff-roles-prompted-skill:
volumes:
- "./skills/dff_template_prompted_skill:/src"
- "./common:/src/common"
ports:
- 8189:8189
- 8185:8185
dff-meeting-analysis-skill:
volumes:
- "./skills/dff_meeting_analysis_skill:/src"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ services:
WAIT_HOSTS: "llm-based-response-selector-gpt4:8003, combined-classification:8087, sentence-ranker:8128,
prompt-selector:8135, openai-api-chatgpt:8145, dff-general-pm-prompted-skill:8189, dff-meeting-analysis-skill:8186,
doc-processor-from-atts:8188, dff-roles-prompted-skill:8185, llm-based-skill-selector:8182,
openai-api-gpt4:8159"
openai-api-gpt4-turbo:8190"
WAIT_HOSTS_TIMEOUT: ${WAIT_TIMEOUT:-1000}
HIGH_PRIORITY_INTENTS: 1
RESTRICTION_FOR_SENSITIVE_CASE: 1
Expand Down Expand Up @@ -85,11 +85,11 @@ services:
SERVICE_PORT: 8003
SERVICE_NAME: response_selector
LANGUAGE: EN
GENERATIVE_SERVICE_URL: http://openai-api-gpt4:8159/respond
GENERATIVE_SERVICE_CONFIG: openai-gpt4-long.json
GENERATIVE_SERVICE_URL: http://openai-api-gpt4-turbo:8190/respond
GENERATIVE_SERVICE_CONFIG: openai-chatgpt-long.json
GENERATIVE_TIMEOUT: 120
N_UTTERANCES_CONTEXT: 1
FILTER_TOXIC_OR_BADLISTED: 1
FILTER_TOXIC_OR_BADLISTED: 0
PROMPT_FILE: common/prompts/response_selector_uncropped.json
context: .
dockerfile: ./response_selectors/llm_based_response_selector/Dockerfile
Expand Down Expand Up @@ -208,8 +208,8 @@ services:
args:
SERVICE_PORT: 8186
SERVICE_NAME: dff_meeting_analysis_skill
GENERATIVE_SERVICE_URL: http://openai-api-gpt4:8159/respond
GENERATIVE_SERVICE_CONFIG: openai-gpt4-long.json
GENERATIVE_SERVICE_URL: http://openai-api-gpt4-turbo:8190/respond
GENERATIVE_SERVICE_CONFIG: openai-chatgpt-long.json
GENERATIVE_TIMEOUT: 120
SHORT_GENERATIVE_SERVICE_URL: http://openai-api-chatgpt:8145/respond
SHORT_GENERATIVE_SERVICE_CONFIG: openai-chatgpt-long.json
Expand All @@ -225,16 +225,16 @@ services:
reservations:
memory: 500M

openai-api-gpt4:
openai-api-gpt4-turbo:
env_file: [ .env ]
build:
args:
SERVICE_PORT: 8159
SERVICE_NAME: openai_api_gpt4
PRETRAINED_MODEL_NAME_OR_PATH: gpt-4
SERVICE_PORT: 8190
SERVICE_NAME: openai_api_gpt4_turbo
PRETRAINED_MODEL_NAME_OR_PATH: gpt-4-1106-preview
context: .
dockerfile: ./services/openai_api_lm/Dockerfile
command: flask run -h 0.0.0.0 -p 8159
command: flask run -h 0.0.0.0 -p 8190
environment:
- FLASK_APP=server
deploy:
Expand Down
2 changes: 1 addition & 1 deletion components.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -191,5 +191,5 @@
8187 universal-llm-based-skill-selector
8188 doc-processor-from-atts,doc-processor-from-args
8189 dff-general-pm-prompted-skill
8190
8190 openai-api-gpt4-turbo
8191
28 changes: 28 additions & 0 deletions components/KJNVBinrb09jkefnp.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
name: openai_api_gpt4_turbo
display_name: GPT-4 Turbo
component_type: Generative
model_type: NN-based
is_customizable: false
author: publisher@deeppavlov.ai
description: A multilingual instruction-based large language model
which is capable of code generation and other complex tasks.
Same capabilities as the base gpt-4 but cheaper, with 128k context length,
improved instruction following and more up-to-date real-world knowledge (up to April 2023).
Paid. You must provide your OpenAI API key to use the model.
Your OpenAI account will be charged according to your usage.
ram_usage: 100M
gpu_usage: null
group: services
connector:
protocol: http
timeout: 120.0
url: http://openai-api-gpt4-turbo:8190/respond
dialog_formatter: null
response_formatter: null
previous_services: null
required_previous_services: null
state_manager_method: null
tags: null
endpoint: respond
service: services/openai_api_lm/service_configs/openai-api-gpt4-turbo
date_created: '2023-04-16T09:45:32'
7 changes: 6 additions & 1 deletion services/openai_api_lm/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,20 @@
"gpt-3.5-turbo-16k": json.load(open("common/generative_configs/openai-chatgpt.json", "r")),
"gpt-4": json.load(open("common/generative_configs/openai-chatgpt.json", "r")),
"gpt-4-32k": json.load(open("common/generative_configs/openai-chatgpt.json", "r")),
# that is gpt-4-turbo
"gpt-4-1106-preview": json.load(open("common/generative_configs/openai-chatgpt-long.json", "r")),
}
CHAT_COMPLETION_MODELS = ["gpt-3.5-turbo", "gpt-3.5-turbo-16k", "gpt-4", "gpt-4-32k"]
CHAT_COMPLETION_MODELS = ["gpt-3.5-turbo", "gpt-3.5-turbo-16k", "gpt-4", "gpt-4-32k", "gpt-4-1106-preview"]
MAX_TOKENS = {
"text-davinci-003": 4097,
"davinci-002": 4097,
"gpt-3.5-turbo": 4096,
"gpt-3.5-turbo-16k": 16384,
"gpt-4": 8192,
"gpt-4-32k": 32768,
# gpt4-turbo set to 64k instead of 128k on purpose
# as it was shown to perform worse for longer sequences
"gpt-4-1106-preview": 64000,
}
try:
ENCODER = tiktoken.encoding_for_model(PRETRAINED_MODEL_NAME_OR_PATH)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
SERVICE_PORT: 8167
SERVICE_NAME: openai_api_chatgpt_16k
PRETRAINED_MODEL_NAME_OR_PATH: gpt-3.5-turbo-16k
CUDA_VISIBLE_DEVICES: '0'
FLASK_APP: server
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,11 @@ compose:
SERVICE_PORT: 8167
SERVICE_NAME: openai_api_chatgpt_16k
PRETRAINED_MODEL_NAME_OR_PATH: gpt-3.5-turbo-16k
CUDA_VISIBLE_DEVICES: '0'
FLASK_APP: server
context: .
dockerfile: ./services/openai_api_lm/Dockerfile
command: flask run -h 0.0.0.0 -p 8167
environment:
- CUDA_VISIBLE_DEVICES=0
- FLASK_APP=server
deploy:
resources:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
SERVICE_PORT: 8145
SERVICE_NAME: openai_api_chatgpt
PRETRAINED_MODEL_NAME_OR_PATH: gpt-3.5-turbo
CUDA_VISIBLE_DEVICES: '0'
FLASK_APP: server
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,11 @@ compose:
SERVICE_PORT: 8145
SERVICE_NAME: openai_api_chatgpt
PRETRAINED_MODEL_NAME_OR_PATH: gpt-3.5-turbo
CUDA_VISIBLE_DEVICES: '0'
FLASK_APP: server
context: .
dockerfile: ./services/openai_api_lm/Dockerfile
command: flask run -h 0.0.0.0 -p 8145
environment:
- CUDA_VISIBLE_DEVICES=0
- FLASK_APP=server
deploy:
resources:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
SERVICE_PORT: 8131
SERVICE_NAME: openai_api_davinci3
PRETRAINED_MODEL_NAME_OR_PATH: text-davinci-003
CUDA_VISIBLE_DEVICES: '0'
FLASK_APP: server
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,11 @@ compose:
SERVICE_PORT: 8131
SERVICE_NAME: openai_api_davinci3
PRETRAINED_MODEL_NAME_OR_PATH: text-davinci-003
CUDA_VISIBLE_DEVICES: '0'
FLASK_APP: server
context: .
dockerfile: ./services/openai_api_lm/Dockerfile
command: flask run -h 0.0.0.0 -p 8131
environment:
- CUDA_VISIBLE_DEVICES=0
- FLASK_APP=server
deploy:
resources:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
SERVICE_PORT: 8190
SERVICE_NAME: openai_api_gpt4_turbo
PRETRAINED_MODEL_NAME_OR_PATH: gpt-4-1106-preview
FLASK_APP: server
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
name: openai-api-gpt4-turbo
endpoints:
- ping
- envvars_to_send
- max_tokens
- respond
- generate_goals
compose:
env_file:
- .env
- .env_azure
build:
args:
SERVICE_PORT: 8190
SERVICE_NAME: openai_api_gpt4_turbo
PRETRAINED_MODEL_NAME_OR_PATH: gpt-4-1106-preview
FLASK_APP: server
context: .
dockerfile: ./services/openai_api_lm/Dockerfile
command: flask run -h 0.0.0.0 -p 8190
environment:
- FLASK_APP=server
deploy:
resources:
limits:
memory: 100M
reservations:
memory: 100M
volumes:
- ./services/openai_api_lm:/src
- ./common:/src/common
ports:
- 8190:8190
proxy: null
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
SERVICE_PORT: 8159
SERVICE_NAME: openai_api_gpt4
PRETRAINED_MODEL_NAME_OR_PATH: gpt-4
CUDA_VISIBLE_DEVICES: '0'
FLASK_APP: server
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,11 @@ compose:
SERVICE_PORT: 8159
SERVICE_NAME: openai_api_gpt4
PRETRAINED_MODEL_NAME_OR_PATH: gpt-4
CUDA_VISIBLE_DEVICES: '0'
FLASK_APP: server
context: .
dockerfile: ./services/openai_api_lm/Dockerfile
command: flask run -h 0.0.0.0 -p 8159
environment:
- CUDA_VISIBLE_DEVICES=0
- FLASK_APP=server
deploy:
resources:
Expand Down

0 comments on commit 2fe99ed

Please sign in to comment.