forked from h2oai/h2ogpt
-
Notifications
You must be signed in to change notification settings - Fork 0
/
.env
16 lines (10 loc) · 794 Bytes
/
.env
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
# H2OGPT
H2OGPT_PORT=7860
H2OGPT_BASE_MODEL=h2oai/h2ogpt-4096-llama2-7b-chat
H2OGPT_ARGS="/workspace/generate.py --base_model=${H2OGPT_BASE_MODEL} --use_safetensors=True --prompt_type=llama2 --save_dir=/workspace/save/ --use_gpu_id=False --score_model=None --max_max_new_tokens=2048 --max_new_tokens=1024"
# VLLM
VLLM_TOKENIZER=hf-internal-testing/llama-tokenizer
H2OGPT_VLLM_ARGS="--model=${H2OGPT_BASE_MODEL} --tokenizer=${VLLM_TOKENIZER} --tensor-parallel-size=2 --seed=1234 --trust-remote-code --download-dir=/workspace/.cache/huggingface/hub"
# CPU models
MODEL_PATH_LLAMA=https://huggingface.co/TheBloke/Llama-2-7b-Chat-GGUF/resolve/main/llama-2-7b-chat.Q6_K.gguf
H2OGPT_CPU_ARGS="/workspace/generate.py --base_model=llama --model_path_llama=${MODEL_PATH_LLAMA} --max_seq_len=4096"