deeppavlov · dilyararimovna · Jun 30, 2023 · Jun 20, 2023 · Jun 20, 2023 · Jun 20, 2023
diff --git a/MODELS.md b/MODELS.md
@@ -9,6 +9,10 @@ Here you may find a list of models that currently available for use in Generativ
 | GPT-3.5                      | openai-api-davinci3             | [link](https://platform.openai.com/docs/models/gpt-3-5)                 | no (paid access via API) | supposedly, 175B          | - (cannot be run locally) | 4,097 tokens                   | A multulingual instruction-based large language model which is capable of code generation. Unlike ChatGPT, not optimised for chat. NB: paid. You must provide your OpenAI API key to use the model. Your OpenAI account will be charged according to your usage.                                                                                                            |
 | ChatGPT                      | openai-api-chatgpt              | [link](https://platform.openai.com/docs/models/gpt-3-5)                 | no (paid access via API) | supposedly, 175B          | - (cannot be run locally) | 4,096 tokens                   | Based on gpt-3.5-turbo -- the most capable of the entire GPT-3/GPT-3.5 models family. Optimized for chat. Able to understand and generate code. NB: paid. You must provide your OpenAI API key to use the model. Your OpenAI account will be charged according to your usage.                                                                                               |
 | Open-Assistant Pythia 12B    | transformers-lm-oasst12b        | [link](https://huggingface.co/OpenAssistant/pythia-12b-sft-v8-7k-steps) | yes                      | 12B                       | 26GB (half-precision)     | 5,120 tokens                   | An open-source English-only instruction-based large language model which is NOT good at answering math and coding questions. NB: free of charge. This model is up and running on our servers and can be used for free.                                                                                                                                                      |
+| Vicuna 13B | transformers-lm-vicuna13b | [link](https://huggingface.co/lmsys/vicuna-13b-v1.3) | yes, but only for non-commercial use                      | 13B                       | 29GB (half-precision)     | 2,048 tokens                   |   An instruction-based large language model fine-tuned on LLaMa that achieves [more than 90%* quality of OpenAI ChatGPT and Google Bard](https://lmsys.org/blog/2023-03-30-vicuna/)
+The model performs best in English and is NOT good at answering math, reasoning, and coding questions.
+NB-1: Free of charge. This model is up and running on our servers and can be used for free.
+NB-2: cannot be used for commercial purposes (license restriction).      |
 | GPT-4                        | openai-api-gpt4                 | [link](https://platform.openai.com/docs/models/gpt-4)                   | no (paid access via API) | supposedly, 175B          | - (cannot be run locally) | 8,192 tokens                   | A multilingual instruction-based large language model which is capable of code generation and other complex tasks. More capable than any GPT-3.5 model, able to do more complex tasks, and optimized for chat. NB: paid. You must provide your OpenAI API key to use the model. Your OpenAI account will be charged according to your usage.                                |
 | GPT-4 32K                    | openai-api-gpt4-32k             | [link](https://platform.openai.com/docs/models/gpt-4)                   | no (paid access via API) | supposedly, 175B          | - (cannot be run locally) | 32,768 tokens                  | A multilingual instruction-based large language model which is capable of code generation and other complex tasks. 	Same capabilities as the base gpt-4 mode but with 4x the context length. NB: paid. You must provide your OpenAI API key to use the model. Your OpenAI account will be charged according to your usage.                                                  |
 | GPT-JT 6B                    | transformers-lm-gptjt           | [link](https://huggingface.co/togethercomputer/GPT-JT-6B-v1)            | yes                      | 6B                        | 26GB                      | 2,048 tokens                   | An open-source English-only large language model which was fine-tuned for instruction following but is NOT capable of code generation. NB: free of charge. This model is up and running on our servers and can be used for free.                                                                                                                                            |

diff --git a/components.tsv b/components.tsv
@@ -1,10 +1,10 @@
-3000    files
+3000	files
 3662	harvesters-maintenance-skill
 3772	faq-skill
 3882	harvesters-maintenance-gobot-skill
-4242    agent
+4242	agent
 8002	ranking-based-response-selector,ranking-based-response-selector-ru
-8003    llm-based-response-selector
+8003	llm-based-response-selector
 8004	convers-evaluator-annotator
 8005	confidence-based-response-selector
 8006	spacy-nounphrases
@@ -103,29 +103,29 @@
 8099	dff-music-skill
 8100	fact-retrieval
 8101	dff-science-skill
-8102    masked-lm
+8102	masked-lm
 8103	entity-detection-ru,entity-detection
 8104	dff-funfact-skill
 8105	dff-bot-persona-skill
-8106    infilling
+8106	infilling
 8107	speech-function-predictor
 8108	speech-function-classifier
 8109	dff-gossip-skill
-8110    fact-retrieval-ru
+8110	fact-retrieval-ru
 8111	dff-wiki-skill
 8112	news-api-annotator
 8113	topic-recommendation
 8114	user-persona-extractor
 8115	dff-gaming-skill
 8116	wiki-facts
 8117	dff-art-skill
-8118    toxic-classification-ru
+8118	toxic-classification-ru
 8119	fact-random
 8120	dff-template-skill
 8121	midas-predictor
 8122	dialogrpt-ru
 8123	image-captioning
-8124    dff-image-skill
+8124	dff-image-skill
 8125	dialogpt,dialogpt-ru
 8126	storygpt
 8127	prompt-storygpt
@@ -168,4 +168,5 @@
 8164	anthropic-api-claude-v1
 8165
 8166
-8167    openai-api-chatgpt-16k
+8167	openai-api-chatgpt-16k
+8168	transformers-lm-vicuna13b
diff --git a/components/ojpergpopnmpop3gg8.yml b/components/ojpergpopnmpop3gg8.yml
@@ -0,0 +1,29 @@
+name: transformers-lm-vicuna13b
+display_name: Vicuna 13B
+component_type: Generative
+model_type: NN-based
+is_customizable: false
+author: publisher@deeppavlov.ai
+description: |-
+  An instruction-based large language model fine-tuned on LLaMa that achieves
+  [more than 90%* quality of OpenAI ChatGPT and Google Bard](https://lmsys.org/blog/2023-03-30-vicuna/).
+  The model performs best in English and is NOT good at answering math, reasoning, and coding questions.
+  For more details, refer to [HuggingFace Model Page](https://huggingface.co/lmsys/vicuna-13b-v1.3). 
+  Free of charge. This model is up and running on our servers and can be used for free.
+  NB: cannot be used for commercial purposes (license restriction).
+ram_usage: 50G
+gpu_usage: 29G
+group: services
+connector:
+  protocol: http
+  timeout: 120.0
+  url: http://transformers-lm-vicuna13b:8168/respond
+dialog_formatter: null
+response_formatter: null
+previous_services: null
+required_previous_services: null
+state_manager_method: null
+tags: null
+endpoint: respond
+service: services/transformers_lm/service_configs/transformers-lm-vicuna13b
+date_created: '2023-04-16T09:45:32'
diff --git a/services/transformers_lm/requirements.txt b/services/transformers_lm/requirements.txt
@@ -1,4 +1,4 @@
-transformers==4.30.0
+transformers==4.30.2
 flask==1.1.1
 itsdangerous==2.0.1
 gunicorn==19.9.0
@@ -8,4 +8,6 @@ healthcheck==1.3.3
 jinja2<=3.0.3
 Werkzeug<=2.0.3
 markupsafe==2.0.1
-torch==1.13.1
+torch==1.13.1
+sentencepiece==0.1.99
+protobuf==3.20.1
diff --git a/services/transformers_lm/server.py b/services/transformers_lm/server.py
@@ -37,6 +37,7 @@
         open("common/generative_configs/default_generative_config.json", "r")
     ),
     "togethercomputer/GPT-JT-6B-v1": json.load(open("common/generative_configs/default_generative_config.json", "r")),
+    "lmsys/vicuna-13b-v1.3": json.load(open("common/generative_configs/default_generative_config.json", "r")),
 }
 
 

diff --git a/services/transformers_lm/service_configs/transformers-lm-vicuna13b/environment.yml b/services/transformers_lm/service_configs/transformers-lm-vicuna13b/environment.yml
@@ -0,0 +1,6 @@
+SERVICE_PORT: 8168
+SERVICE_NAME: transformers_lm_vicuna13b
+PRETRAINED_MODEL_NAME_OR_PATH: lmsys/vicuna-13b-v1.3
+HALF_PRECISION: 1
+CUDA_VISIBLE_DEVICES: '0'
+FLASK_APP: server
diff --git a/services/transformers_lm/service_configs/transformers-lm-vicuna13b/service.yml b/services/transformers_lm/service_configs/transformers-lm-vicuna13b/service.yml
@@ -0,0 +1,44 @@
+name: transformers-lm-vicuna13b
+endpoints:
+- respond
+- generate_goals
+compose:
+  env_file:
+  - .env
+  build:
+    args:
+      SERVICE_PORT: 8168
+      SERVICE_NAME: transformers_lm_vicuna13b
+      PRETRAINED_MODEL_NAME_OR_PATH: lmsys/vicuna-13b-v1.3
+      HALF_PRECISION: 1
+      CUDA_VISIBLE_DEVICES: '0'
+      FLASK_APP: server
+    context: .
+    dockerfile: ./services/transformers_lm/Dockerfile
+  command: flask run -h 0.0.0.0 -p 8168
+  environment:
+  - CUDA_VISIBLE_DEVICES=0
+  - FLASK_APP=server
+  deploy:
+    resources:
+      limits:
+        memory: 50G
+      reservations:
+        memory: 50G
+  volumes:
+  - ./services/transformers_lm:/src
+  - ./common:/src/common
+  - ~/.deeppavlov/cache:/root/.cache
+  ports:
+  - 8168:8168
+proxy:
+  command:
+  - nginx
+  - -g
+  - daemon off;
+  build:
+    context: dp/proxy/
+    dockerfile: Dockerfile
+  environment:
+  - PROXY_PASS=dream.deeppavlov.ai:8168
+  - PORT=8168
diff --git a/skills/dff_universal_prompted_skill/scenario/response.py b/skills/dff_universal_prompted_skill/scenario/response.py
@@ -37,6 +37,7 @@
     "http://transformers-lm-gptjt:8161/respond": [],
     "http://anthropic-api-claude-v1:8164/respond": ["ANTHROPIC_API_KEY"],
     "http://anthropic-api-claude-instant-v1:8163/respond": ["ANTHROPIC_API_KEY"],
+    "http://transformers-lm-vicuna13b:8168/respond": [],
 }