Feat/gpt4 turbo in meeting analysis (#118)

* add gpt-4-turbo * add gpt4-turbo to sentius_management_assistant * add cards for gpt4-turbo * remove CUDA_VISIBLE_DEVICES from llm configs * remove CUDA_VISIBLE_DEVICES from llm configs * codestyle * revert utils.py in meeting skill * set management assistant back to gpt4 * correct port
deeppavlov · Nov 14, 2023 · 2fe99ed · 2fe99ed
1 parent 71bbc90
commit 2fe99ed
Show file tree

Hide file tree

Showing 15 changed files with 90 additions and 31 deletions.
diff --git a/assistant_dists/sentius_management_assistant/dev.yml b/assistant_dists/sentius_management_assistant/dev.yml
@@ -40,24 +40,24 @@ services:
       - "./common:/src/common"
     ports:
       - 8145:8145
-  openai-api-gpt4:
+  openai-api-gpt4-turbo:
     volumes:
       - "./services/openai_api_lm:/src"
       - "./common:/src/common"
     ports:
-      - 8159:8159
-  dff-roles-prompted-skill:
+      - 8190:8190
+  dff-general-pm-prompted-skill:
     volumes:
       - "./skills/dff_template_prompted_skill:/src"
       - "./common:/src/common"
     ports:
-      - 8185:8185
-  dff-general-pm-prompted-skill:
+      - 8189:8189
+  dff-roles-prompted-skill:
     volumes:
       - "./skills/dff_template_prompted_skill:/src"
       - "./common:/src/common"
     ports:
-      - 8189:8189
+      - 8185:8185
   dff-meeting-analysis-skill:
     volumes:
       - "./skills/dff_meeting_analysis_skill:/src"

diff --git a/assistant_dists/sentius_management_assistant/docker-compose.override.yml b/assistant_dists/sentius_management_assistant/docker-compose.override.yml
@@ -5,7 +5,7 @@ services:
       WAIT_HOSTS: "llm-based-response-selector-gpt4:8003, combined-classification:8087, sentence-ranker:8128, 
         prompt-selector:8135, openai-api-chatgpt:8145, dff-general-pm-prompted-skill:8189, dff-meeting-analysis-skill:8186, 
         doc-processor-from-atts:8188, dff-roles-prompted-skill:8185, llm-based-skill-selector:8182,
-        openai-api-gpt4:8159"
+        openai-api-gpt4-turbo:8190"
       WAIT_HOSTS_TIMEOUT: ${WAIT_TIMEOUT:-1000}
       HIGH_PRIORITY_INTENTS: 1
       RESTRICTION_FOR_SENSITIVE_CASE: 1
@@ -85,11 +85,11 @@ services:
         SERVICE_PORT: 8003
         SERVICE_NAME: response_selector
         LANGUAGE: EN
-        GENERATIVE_SERVICE_URL: http://openai-api-gpt4:8159/respond
-        GENERATIVE_SERVICE_CONFIG: openai-gpt4-long.json
+        GENERATIVE_SERVICE_URL: http://openai-api-gpt4-turbo:8190/respond
+        GENERATIVE_SERVICE_CONFIG: openai-chatgpt-long.json
         GENERATIVE_TIMEOUT: 120
         N_UTTERANCES_CONTEXT: 1
-        FILTER_TOXIC_OR_BADLISTED: 1
+        FILTER_TOXIC_OR_BADLISTED: 0
         PROMPT_FILE: common/prompts/response_selector_uncropped.json
       context: .
       dockerfile: ./response_selectors/llm_based_response_selector/Dockerfile
@@ -208,8 +208,8 @@ services:
       args:
         SERVICE_PORT: 8186
         SERVICE_NAME: dff_meeting_analysis_skill
-        GENERATIVE_SERVICE_URL: http://openai-api-gpt4:8159/respond
-        GENERATIVE_SERVICE_CONFIG: openai-gpt4-long.json
+        GENERATIVE_SERVICE_URL: http://openai-api-gpt4-turbo:8190/respond
+        GENERATIVE_SERVICE_CONFIG: openai-chatgpt-long.json
         GENERATIVE_TIMEOUT: 120
         SHORT_GENERATIVE_SERVICE_URL: http://openai-api-chatgpt:8145/respond
         SHORT_GENERATIVE_SERVICE_CONFIG: openai-chatgpt-long.json
@@ -225,16 +225,16 @@ services:
         reservations:
           memory: 500M
 
-  openai-api-gpt4:
+  openai-api-gpt4-turbo:
     env_file: [ .env ]
     build:
       args:
-        SERVICE_PORT: 8159
-        SERVICE_NAME: openai_api_gpt4
-        PRETRAINED_MODEL_NAME_OR_PATH: gpt-4
+        SERVICE_PORT: 8190
+        SERVICE_NAME: openai_api_gpt4_turbo
+        PRETRAINED_MODEL_NAME_OR_PATH: gpt-4-1106-preview
       context: .
       dockerfile: ./services/openai_api_lm/Dockerfile
-    command: flask run -h 0.0.0.0 -p 8159
+    command: flask run -h 0.0.0.0 -p 8190
     environment:
       - FLASK_APP=server
     deploy:

diff --git a/components.tsv b/components.tsv
@@ -191,5 +191,5 @@
 8187	universal-llm-based-skill-selector
 8188	doc-processor-from-atts,doc-processor-from-args
 8189	dff-general-pm-prompted-skill
-8190
+8190	openai-api-gpt4-turbo
 8191
diff --git a/components/KJNVBinrb09jkefnp.yml b/components/KJNVBinrb09jkefnp.yml
@@ -0,0 +1,28 @@
+name: openai_api_gpt4_turbo
+display_name: GPT-4 Turbo
+component_type: Generative
+model_type: NN-based
+is_customizable: false
+author: publisher@deeppavlov.ai
+description: A multilingual instruction-based large language model 
+  which is capable of code generation and other complex tasks. 
+  Same capabilities as the base gpt-4 but cheaper, with 128k context length,
+  improved instruction following and more up-to-date real-world knowledge (up to April 2023). 
+  Paid. You must provide your OpenAI API key to use the model. 
+  Your OpenAI account will be charged according to your usage.
+ram_usage: 100M
+gpu_usage: null
+group: services
+connector:
+  protocol: http
+  timeout: 120.0
+  url: http://openai-api-gpt4-turbo:8190/respond
+dialog_formatter: null
+response_formatter: null
+previous_services: null
+required_previous_services: null
+state_manager_method: null
+tags: null
+endpoint: respond
+service: services/openai_api_lm/service_configs/openai-api-gpt4-turbo
+date_created: '2023-04-16T09:45:32'
diff --git a/services/openai_api_lm/server.py b/services/openai_api_lm/server.py
@@ -32,15 +32,20 @@
     "gpt-3.5-turbo-16k": json.load(open("common/generative_configs/openai-chatgpt.json", "r")),
     "gpt-4": json.load(open("common/generative_configs/openai-chatgpt.json", "r")),
     "gpt-4-32k": json.load(open("common/generative_configs/openai-chatgpt.json", "r")),
+    # that is gpt-4-turbo
+    "gpt-4-1106-preview": json.load(open("common/generative_configs/openai-chatgpt-long.json", "r")),
 }
-CHAT_COMPLETION_MODELS = ["gpt-3.5-turbo", "gpt-3.5-turbo-16k", "gpt-4", "gpt-4-32k"]
+CHAT_COMPLETION_MODELS = ["gpt-3.5-turbo", "gpt-3.5-turbo-16k", "gpt-4", "gpt-4-32k", "gpt-4-1106-preview"]
 MAX_TOKENS = {
     "text-davinci-003": 4097,
     "davinci-002": 4097,
     "gpt-3.5-turbo": 4096,
     "gpt-3.5-turbo-16k": 16384,
     "gpt-4": 8192,
     "gpt-4-32k": 32768,
+    # gpt4-turbo set to 64k instead of 128k on purpose
+    # as it was shown to perform worse for longer sequences
+    "gpt-4-1106-preview": 64000,
 }
 try:
     ENCODER = tiktoken.encoding_for_model(PRETRAINED_MODEL_NAME_OR_PATH)

diff --git a/services/openai_api_lm/service_configs/openai-api-chatgpt-16k/environment.yml b/services/openai_api_lm/service_configs/openai-api-chatgpt-16k/environment.yml
@@ -1,5 +1,4 @@
 SERVICE_PORT: 8167
 SERVICE_NAME: openai_api_chatgpt_16k
 PRETRAINED_MODEL_NAME_OR_PATH: gpt-3.5-turbo-16k
-CUDA_VISIBLE_DEVICES: '0'
 FLASK_APP: server
diff --git a/services/openai_api_lm/service_configs/openai-api-chatgpt-16k/service.yml b/services/openai_api_lm/service_configs/openai-api-chatgpt-16k/service.yml
@@ -14,13 +14,11 @@ compose:
       SERVICE_PORT: 8167
       SERVICE_NAME: openai_api_chatgpt_16k
       PRETRAINED_MODEL_NAME_OR_PATH: gpt-3.5-turbo-16k
-      CUDA_VISIBLE_DEVICES: '0'
       FLASK_APP: server
     context: .
     dockerfile: ./services/openai_api_lm/Dockerfile
   command: flask run -h 0.0.0.0 -p 8167
   environment:
-  - CUDA_VISIBLE_DEVICES=0
   - FLASK_APP=server
   deploy:
     resources:

diff --git a/services/openai_api_lm/service_configs/openai-api-chatgpt/environment.yml b/services/openai_api_lm/service_configs/openai-api-chatgpt/environment.yml
@@ -1,5 +1,4 @@
 SERVICE_PORT: 8145
 SERVICE_NAME: openai_api_chatgpt
 PRETRAINED_MODEL_NAME_OR_PATH: gpt-3.5-turbo
-CUDA_VISIBLE_DEVICES: '0'
 FLASK_APP: server
diff --git a/services/openai_api_lm/service_configs/openai-api-chatgpt/service.yml b/services/openai_api_lm/service_configs/openai-api-chatgpt/service.yml
@@ -14,13 +14,11 @@ compose:
       SERVICE_PORT: 8145
       SERVICE_NAME: openai_api_chatgpt
       PRETRAINED_MODEL_NAME_OR_PATH: gpt-3.5-turbo
-      CUDA_VISIBLE_DEVICES: '0'
       FLASK_APP: server
     context: .
     dockerfile: ./services/openai_api_lm/Dockerfile
   command: flask run -h 0.0.0.0 -p 8145
   environment:
-  - CUDA_VISIBLE_DEVICES=0
   - FLASK_APP=server
   deploy:
     resources:

diff --git a/services/openai_api_lm/service_configs/openai-api-davinci3/environment.yml b/services/openai_api_lm/service_configs/openai-api-davinci3/environment.yml
@@ -1,5 +1,4 @@
 SERVICE_PORT: 8131
 SERVICE_NAME: openai_api_davinci3
 PRETRAINED_MODEL_NAME_OR_PATH: text-davinci-003
-CUDA_VISIBLE_DEVICES: '0'
 FLASK_APP: server
diff --git a/services/openai_api_lm/service_configs/openai-api-davinci3/service.yml b/services/openai_api_lm/service_configs/openai-api-davinci3/service.yml
@@ -14,13 +14,11 @@ compose:
       SERVICE_PORT: 8131
       SERVICE_NAME: openai_api_davinci3
       PRETRAINED_MODEL_NAME_OR_PATH: text-davinci-003
-      CUDA_VISIBLE_DEVICES: '0'
       FLASK_APP: server
     context: .
     dockerfile: ./services/openai_api_lm/Dockerfile
   command: flask run -h 0.0.0.0 -p 8131
   environment:
-  - CUDA_VISIBLE_DEVICES=0
   - FLASK_APP=server
   deploy:
     resources:

diff --git a/services/openai_api_lm/service_configs/openai-api-gpt4-turbo/environment.yml b/services/openai_api_lm/service_configs/openai-api-gpt4-turbo/environment.yml
@@ -0,0 +1,4 @@
+SERVICE_PORT: 8190
+SERVICE_NAME: openai_api_gpt4_turbo
+PRETRAINED_MODEL_NAME_OR_PATH: gpt-4-1106-preview
+FLASK_APP: server
diff --git a/services/openai_api_lm/service_configs/openai-api-gpt4-turbo/service.yml b/services/openai_api_lm/service_configs/openai-api-gpt4-turbo/service.yml
@@ -0,0 +1,34 @@
+name: openai-api-gpt4-turbo
+endpoints:
+- ping
+- envvars_to_send
+- max_tokens
+- respond
+- generate_goals
+compose:
+  env_file:
+  - .env
+  - .env_azure
+  build:
+    args:
+      SERVICE_PORT: 8190
+      SERVICE_NAME: openai_api_gpt4_turbo
+      PRETRAINED_MODEL_NAME_OR_PATH: gpt-4-1106-preview
+      FLASK_APP: server
+    context: .
+    dockerfile: ./services/openai_api_lm/Dockerfile
+  command: flask run -h 0.0.0.0 -p 8190
+  environment:
+  - FLASK_APP=server
+  deploy:
+    resources:
+      limits:
+        memory: 100M
+      reservations:
+        memory: 100M
+  volumes:
+  - ./services/openai_api_lm:/src
+  - ./common:/src/common
+  ports:
+  - 8190:8190
+proxy: null
diff --git a/services/openai_api_lm/service_configs/openai-api-gpt4/environment.yml b/services/openai_api_lm/service_configs/openai-api-gpt4/environment.yml
@@ -1,5 +1,4 @@
 SERVICE_PORT: 8159
 SERVICE_NAME: openai_api_gpt4
 PRETRAINED_MODEL_NAME_OR_PATH: gpt-4
-CUDA_VISIBLE_DEVICES: '0'
 FLASK_APP: server
diff --git a/services/openai_api_lm/service_configs/openai-api-gpt4/service.yml b/services/openai_api_lm/service_configs/openai-api-gpt4/service.yml
@@ -14,13 +14,11 @@ compose:
       SERVICE_PORT: 8159
       SERVICE_NAME: openai_api_gpt4
       PRETRAINED_MODEL_NAME_OR_PATH: gpt-4
-      CUDA_VISIBLE_DEVICES: '0'
       FLASK_APP: server
     context: .
     dockerfile: ./services/openai_api_lm/Dockerfile
   command: flask run -h 0.0.0.0 -p 8159
   environment:
-  - CUDA_VISIBLE_DEVICES=0
   - FLASK_APP=server
   deploy:
     resources: