Skip to content

Commit

Permalink
[Model] Add DeepSeek-R1-Distill and Hermes-3-Llama-3.2 (#652)
Browse files Browse the repository at this point in the history
This PR adds the following models to the prebuilt list:

- `DeepSeek-R1-Distill-Qwen-7B-q4f16_1-MLC`
- `DeepSeek-R1-Distill-Qwen-7B-q4f32_1-MLC`
- `DeepSeek-R1-Distill-Llama-8B-q4f16_1-MLC`
- `DeepSeek-R1-Distill-Llama-8B-q4f32_1-MLC`
- `Hermes-3-Llama-3.2-3B-q4f16_1-MLC`
- `Hermes-3-Llama-3.2-3B-q4f32_1-MLC`

We will add `DeepSeek-R1-Distill-Qwen-1.5B` afterward, which is
currently experiencing correctness issues.

Separately, we fix the handling of `role_content_sep` and
`role_empty_sep` when it is `""`, which evaluates to false (currently we
make it `": "`, which is inconsistent with what the model expects).
  • Loading branch information
CharlieFRuan authored Jan 21, 2025
1 parent 8b77b3d commit 808685b
Show file tree
Hide file tree
Showing 2 changed files with 123 additions and 7 deletions.
114 changes: 113 additions & 1 deletion src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,7 @@ export const functionCallingModelIds = [
export const prebuiltAppConfig: AppConfig = {
useIndexedDBCache: false,
model_list: [
// Llama-3.2
{
model: "https://huggingface.co/mlc-ai/Llama-3.2-1B-Instruct-q4f32_1-MLC",
model_id: "Llama-3.2-1B-Instruct-q4f32_1-MLC",
Expand Down Expand Up @@ -440,7 +441,92 @@ export const prebuiltAppConfig: AppConfig = {
context_window_size: 4096,
},
},
// Hermes-2
// DeepSeek-R1-Distill-Qwen
// TODO(Charlie): Qwen2-1.5B is experiencing correctness issue, hence commented for now.
// {
// model: "https://huggingface.co/mlc-ai/DeepSeek-R1-Distill-Qwen-1.5B-q4f16_1-MLC",
// model_id: "DeepSeek-R1-Distill-Qwen-1.5B-q4f16_1-MLC",
// model_lib:
// modelLibURLPrefix +
// modelVersion +
// "/Qwen2-1.5B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm",
// low_resource_required: true,
// vram_required_MB: 1629.75,
// overrides: {
// context_window_size: 4096,
// },
// },
// {
// model: "https://huggingface.co/mlc-ai/DeepSeek-R1-Distill-Qwen-1.5B-q4f32_1-MLC",
// model_id: "DeepSeek-R1-Distill-Qwen-1.5B-q4f32_1-MLC",
// model_lib:
// modelLibURLPrefix +
// modelVersion +
// "/Qwen2-1.5B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm",
// low_resource_required: true,
// vram_required_MB: 1888.97,
// overrides: {
// context_window_size: 4096,
// },
// },
{
model:
"https://huggingface.co/mlc-ai/DeepSeek-R1-Distill-Qwen-7B-q4f16_1-MLC",
model_id: "DeepSeek-R1-Distill-Qwen-7B-q4f16_1-MLC",
model_lib:
modelLibURLPrefix +
modelVersion +
"/Qwen2-7B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm",
low_resource_required: false,
vram_required_MB: 5106.67,
overrides: {
context_window_size: 4096,
},
},
{
model:
"https://huggingface.co/mlc-ai/DeepSeek-R1-Distill-Qwen-7B-q4f32_1-MLC",
model_id: "DeepSeek-R1-Distill-Qwen-7B-q4f32_1-MLC",
model_lib:
modelLibURLPrefix +
modelVersion +
"/Qwen2-7B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm",
low_resource_required: false,
vram_required_MB: 5900.09,
overrides: {
context_window_size: 4096,
},
},
// DeepSeek-R1-Distill-Llama
{
model:
"https://huggingface.co/mlc-ai/DeepSeek-R1-Distill-Llama-8B-q4f32_1-MLC",
model_id: "DeepSeek-R1-Distill-Llama-8B-q4f32_1-MLC",
model_lib:
modelLibURLPrefix +
modelVersion +
"/Llama-3_1-8B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm",
vram_required_MB: 6101.01,
low_resource_required: false,
overrides: {
context_window_size: 4096,
},
},
{
model:
"https://huggingface.co/mlc-ai/DeepSeek-R1-Distill-Llama-8B-q4f16_1-MLC",
model_id: "DeepSeek-R1-Distill-Llama-8B-q4f16_1-MLC",
model_lib:
modelLibURLPrefix +
modelVersion +
"/Llama-3_1-8B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm",
vram_required_MB: 5001.0,
low_resource_required: false,
overrides: {
context_window_size: 4096,
},
},
// Hermes-3 and Hermes-2
{
model:
"https://huggingface.co/mlc-ai/Hermes-2-Theta-Llama-3-8B-q4f16_1-MLC",
Expand Down Expand Up @@ -497,6 +583,32 @@ export const prebuiltAppConfig: AppConfig = {
context_window_size: 4096,
},
},
{
model: "https://huggingface.co/mlc-ai/Hermes-3-Llama-3.2-3B-q4f32_1-MLC",
model_id: "Hermes-3-Llama-3.2-3B-q4f32_1-MLC",
model_lib:
modelLibURLPrefix +
modelVersion +
"/Llama-3.2-3B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm",
vram_required_MB: 2951.51,
low_resource_required: true,
overrides: {
context_window_size: 4096,
},
},
{
model: "https://huggingface.co/mlc-ai/Hermes-3-Llama-3.2-3B-q4f16_1-MLC",
model_id: "Hermes-3-Llama-3.2-3B-q4f16_1-MLC",
model_lib:
modelLibURLPrefix +
modelVersion +
"/Llama-3.2-3B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm",
vram_required_MB: 2263.69,
low_resource_required: true,
overrides: {
context_window_size: 4096,
},
},
{
model: "https://huggingface.co/mlc-ai/Hermes-3-Llama-3.1-8B-q4f32_1-MLC",
model_id: "Hermes-3-Llama-3.1-8B-q4f32_1-MLC",
Expand Down
16 changes: 10 additions & 6 deletions src/conversation.ts
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,11 @@ export class Conversation {
"message for a reply header.",
);
}
const empty_sep = this.config.role_empty_sep
? this.config.role_empty_sep
: ": ";
// Add ": " if there is no such field. If "", do not add sep
const empty_sep =
this.config.role_empty_sep || this.config.role_empty_sep == ""
? this.config.role_empty_sep
: ": ";
ret.push(role_str + empty_sep);
continue;
}
Expand Down Expand Up @@ -153,9 +155,11 @@ export class Conversation {
) {
role_prefix = "";
} else {
const content_sep = this.config.role_content_sep
? this.config.role_content_sep
: ": ";
// Add ": " if there is no such field. If "", do not add sep
const content_sep =
this.config.role_content_sep || this.config.role_content_sep == ""
? this.config.role_content_sep
: ": ";
role_prefix = role_str + content_sep;
}

Expand Down

0 comments on commit 808685b

Please sign in to comment.