Skip to content

Commit

Permalink
feat: add mistral 7b 128k (#589)
Browse files Browse the repository at this point in the history
* feat: add mistral 7b 128k

* fix: model name

* fix: model name

* fix: configs

* fix: config names and cards

* fix: more cards

* fix: mem

* fix: no proxy available
  • Loading branch information
dilyararimovna authored Nov 22, 2023
1 parent f388f1d commit ad9f06f
Show file tree
Hide file tree
Showing 24 changed files with 859 additions and 0 deletions.
1 change: 1 addition & 0 deletions MODELS.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,4 @@
| [Anthropic Claude Instant v1](https://docs.anthropic.com/claude/reference/complete_post) | anthropic-api-claude-instant-v1 | no (paid access via API) | supposedly, 52B | - (cannot be run locally) | 9,000 tokens | available under subscription plan, commercial use allowed | A smaller model with far lower latency, sampling at roughly 40 words/sec! Its output quality is somewhat lower than the latest claude-1 model, particularly for complex tasks. However, it is much less expensive and blazing fast. NB: paid. You must provide your Anthropic API key to use the model. Your Anthropic API account will be charged according to your usage. |
| Russian XGLM 4.5B (private weights) | transformers-lm-ruxglm | no | 4.5B | 15GB | 2,048 tokens | Not available yet | A private large language model for the Russian language which was fine-tuned for instruction following by Dmitry Kosenko in Summer 2023. This model is up and running on our servers and can be used for free. |
| [ruGPT-3.5-13B](https://huggingface.co/ai-forever/ruGPT-3.5-13B) | transformers-lm-rugpt35 | yes | 13B | 35GB (half-precision) | 2,048 tokens | MIT | A large language model for the Russian language which was used for trainig GigaChat. This model is up and running on our servers and can be used for free. |
| [Mistral 7B 128k Tokens](https://huggingface.co/NousResearch/Yarn-Mistral-7b-128k) | transformers-lm-mistral-7b-128k | yes | 7B | 20GB (half-precision) | 128,000 tokens | Apache 2.0 , commercial use is allowed | An open-source English-only large language model which was fine-tuned for instruction following but is NOT capable of code generation. NB: free of charge. This model is up and running on our servers and can be used for free. |
10 changes: 10 additions & 0 deletions assistant_dists/document_based_qa_transformers/cpu.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
version: '3.7'
services:
combined-classification:
environment:
DEVICE: cpu
CUDA_VISIBLE_DEVICES: ""
sentence-ranker:
environment:
DEVICE: cpu
CUDA_VISIBLE_DEVICES: ""
6 changes: 6 additions & 0 deletions assistant_dists/document_based_qa_transformers/db_conf.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"host": "DB_HOST",
"port": "DB_PORT",
"name": "DB_NAME",
"env": true
}
52 changes: 52 additions & 0 deletions assistant_dists/document_based_qa_transformers/dev.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# С такими volumes удобно дебажить, не нужно пересобирать контейнер каждый раз при изменении кода
services:
agent:
volumes:
- ".:/dp-agent"
ports:
- 4242:4242
files:
ports:
- 3000:3000
volumes:
- "~/.deeppavlov/file_server:/tmp"
sentseg:
volumes:
- "./annotators/SentSeg:/src"
ports:
- 8011:8011
combined-classification:
volumes:
- "./common:/src/common"
- "./annotators/combined_classification:/src"
ports:
- 8087:8087
sentence-ranker:
volumes:
- "./services/sentence_ranker:/src"
- "~/.deeppavlov/cache:/root/.cache"
ports:
- 8128:8128
transformers-lm-mistral-7b-128k:
volumes:
- "./services/transformers_lm:/src"
- "./common:/src/common"
- "~/.deeppavlov/cache:/root/.cache"
ports:
- 8185:8185
doc-retriever:
volumes:
- "./annotators/doc_retriever:/src"
- "./common:/src/common"
- "./documents:/src/documents"
ports:
- 8165:8165
dff-document-qa-transformers-llm-skill:
volumes:
- "./skills/dff_document_qa_llm_skill:/src"
- "./common:/src/common"
- "./documents:/src/documents"
ports:
- 8186:8186

version: "3.7"
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
services:
agent:
command: sh -c 'bin/wait && python -m deeppavlov_agent.run agent.pipeline_config=assistant_dists/document_based_qa_transformers/pipeline_conf.json'
environment:
WAIT_HOSTS: "sentseg:8011, combined-classification:8087, ranking-based-response-selector:8002,
sentence-ranker:8128, transformers-lm-mistral-7b-128k:8185, doc-retriever:8165, dff-document-qa-transformers-llm-skill:8186"
WAIT_HOSTS_TIMEOUT: ${WAIT_TIMEOUT:-1000}
HIGH_PRIORITY_INTENTS: 1
RESTRICTION_FOR_SENSITIVE_CASE: 1
ALWAYS_TURN_ON_ALL_SKILLS: 0
LANGUAGE: EN

files:
image: julienmeerschart/simple-file-upload-download-server

sentseg:
env_file: [ .env ]
build:
context: ./annotators/SentSeg/
dockerfile: Dockerfile-test
command: flask run -h 0.0.0.0 -p 8011
environment:
- FLASK_APP=server
deploy:
resources:
limits:
memory: 1.5G
reservations:
memory: 1.5G

doc-retriever:
env_file: [ .env ]
build:
context: .
dockerfile: ./annotators/doc_retriever/Dockerfile
args:
SERVICE_PORT: 8165
SERVICE_NAME: doc_retriever
CONFIG_PATH: ./doc_retriever_config.json
DOC_PATH_OR_LINK: http://files.deeppavlov.ai/dream_data/documents_for_qa/test_file_dream_repo.html,http://files.deeppavlov.ai/dream_data/documents_for_qa/alphabet_financial_report.txt,http://files.deeppavlov.ai/dream_data/documents_for_qa/test_file_jurafsky_chatbots.pdf
PARAGRAPHS_NUM: 5
FILE_SERVER_TIMEOUT: 30
command: python -m flask run -h 0.0.0.0 -p 8165
environment:
- FLASK_APP=server
- CUDA_VISIBLE_DEVICES=0
deploy:
resources:
limits:
memory: 5G
reservations:
memory: 5G

combined-classification:
env_file: [ .env ]
build:
args:
CONFIG: combined_classifier.json
SERVICE_PORT: 8087
context: .
dockerfile: ./annotators/combined_classification/Dockerfile
command: gunicorn --workers=1 server:app -b 0.0.0.0:8087 --timeout 600
environment:
- CUDA_VISIBLE_DEVICES=0
deploy:
resources:
limits:
memory: 2G
reservations:
memory: 2G

ranking-based-response-selector:
env_file: [ .env ]
build:
args:
SERVICE_PORT: 8002
SERVICE_NAME: response_selector
LANGUAGE: EN
SENTENCE_RANKER_ANNOTATION_NAME: sentence_ranker
SENTENCE_RANKER_SERVICE_URL: http://sentence-ranker:8128/respond
SENTENCE_RANKER_TIMEOUT: 3
N_UTTERANCES_CONTEXT: 5
FILTER_TOXIC_OR_BADLISTED: 1
context: .
dockerfile: ./response_selectors/ranking_based_response_selector/Dockerfile
command: flask run -h 0.0.0.0 -p 8002
environment:
- FLASK_APP=server
deploy:
resources:
limits:
memory: 100M
reservations:
memory: 100M

sentence-ranker:
env_file: [ .env ]
build:
args:
SERVICE_PORT: 8128
SERVICE_NAME: sentence_ranker
PRETRAINED_MODEL_NAME_OR_PATH: sentence-transformers/all-MiniLM-L6-v2
context: ./services/sentence_ranker/
command: flask run -h 0.0.0.0 -p 8128
environment:
- CUDA_VISIBLE_DEVICES=0
- FLASK_APP=server
deploy:
resources:
limits:
memory: 3G
reservations:
memory: 3G

transformers-lm-mistral-7b-128k:
env_file: [ .env ]
build:
args:
SERVICE_PORT: 8185
SERVICE_NAME: transformers_lm_mistral_7b_128k
PRETRAINED_MODEL_NAME_OR_PATH: NousResearch/Yarn-Mistral-7b-128k
HALF_PRECISION: 1
USE_FLASH_ATTENTION_2: 1
context: .
dockerfile: ./services/transformers_lm/Dockerfile
command: flask run -h 0.0.0.0 -p 8185
environment:
- CUDA_VISIBLE_DEVICES=0
- FLASK_APP=server
deploy:
resources:
limits:
memory: 50G
reservations:
memory: 50G

dff-document-qa-transformers-llm-skill:
env_file: [ .env ]
build:
args:
SERVICE_PORT: 8186
SERVICE_NAME: dff_document_qa_llm_skill
GENERATIVE_SERVICE_URL: http://transformers-lm-mistral-7b-128k:8185/respond
GENERATIVE_SERVICE_CONFIG: transformers_mistral.json
GENERATIVE_TIMEOUT: 120
N_UTTERANCES_CONTEXT: 7
FILE_SERVER_TIMEOUT: 30
DOCUMENT_PROMPT_FILE: common/prompts/document_qa_instruction.json
context: .
dockerfile: ./skills/dff_document_qa_llm_skill/Dockerfile
deploy:
resources:
limits:
memory: 128M
reservations:
memory: 128M

version: '3.7'
Loading

0 comments on commit ad9f06f

Please sign in to comment.