Skip to content

Commit

Permalink
Add WllamaError class, fix llama_decode hangs on long input text (#130)
Browse files Browse the repository at this point in the history
* add WllamaError class

* fix batching problem

* build latest upstream source code

* minor UI fix

* v1.17.0
  • Loading branch information
ngxson authored Oct 31, 2024
1 parent b727c3c commit f7562fa
Show file tree
Hide file tree
Showing 11 changed files with 232 additions and 105 deletions.
8 changes: 5 additions & 3 deletions actions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,8 @@ json action_load(app_t &app, json &body)
return json{
{"success", true},
{"n_ctx", cparams.n_ctx},
{"n_batch", llama_n_batch(app.ctx)},
{"n_ubatch", llama_n_ubatch(app.ctx)},
{"n_vocab", llama_n_vocab(app.model)},
{"n_ctx_train", llama_n_ctx_train(app.model)},
{"n_embd", llama_n_embd(app.model)},
Expand Down Expand Up @@ -325,8 +327,6 @@ json action_sampling_init(app_t &app, json &body)
sparams.n_probs = body["n_probs"];
if (body.contains("min_p"))
sparams.min_p = body["min_p"];
if (body.contains("tfs_z"))
sparams.tfs_z = body["tfs_z"];
if (body.contains("typical_p")) // for compat
sparams.typ_p = body["typical_p"];
if (body.contains("typ_p"))
Expand Down Expand Up @@ -428,7 +428,9 @@ json action_detokenize(app_t &app, json &body)
json action_decode(app_t &app, json &body)
{
std::vector<llama_token> tokens_list = body["tokens"];
bool skip_logits = body.contains("skip_logits");
bool skip_logits = body.contains("skip_logits")
? body.at("skip_logits").get<bool>()
: false;
size_t i = 0;
common_batch_clear(app.batch);
for (auto id : tokens_list)
Expand Down
23 changes: 16 additions & 7 deletions examples/main/src/components/ModelScreen.tsx
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { ManageModel, ModelState } from '../utils/types';
import { ManageModel, ModelState, Screen } from '../utils/types';
import { useWllama } from '../utils/wllama.context';
import { FontAwesomeIcon } from '@fortawesome/react-fontawesome';
import {
Expand Down Expand Up @@ -225,6 +225,7 @@ function ModelCard({
unloadModel,
removeCustomModel,
currRuntimeInfo,
navigateTo,
} = useWllama();

const m = model;
Expand Down Expand Up @@ -321,12 +322,20 @@ function ModelCard({
</>
)}
{m.state === ModelState.LOADED && (
<button
className="btn btn-outline btn-primary btn-sm mr-2"
onClick={() => unloadModel()}
>
Unload
</button>
<>
<button
className="btn btn-primary btn-sm mr-2"
onClick={() => navigateTo(Screen.CHAT)}
>
Start chat
</button>
<button
className="btn btn-outline btn-primary btn-sm mr-2"
onClick={() => unloadModel()}
>
Unload
</button>
</>
)}
{m.state === ModelState.NOT_DOWNLOADED && m.userAdded && (
<button
Expand Down
40 changes: 20 additions & 20 deletions examples/main/src/components/Sidebar.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ export default function Sidebar({ children }: { children: any }) {

<div className="h-screen lg:max-h-[calc(100vh-4rem)] flex flex-col text-base-content bg-base-200">
<div className="grow w-80 overflow-auto p-4">
<ul className="grow menu gap-1">
<ul className="menu gap-1 overflow-x-hidden">
<li onClick={() => navigateTo(Screen.CHAT)}>
<a
className={
Expand All @@ -45,26 +45,28 @@ export default function Sidebar({ children }: { children: any }) {
<li
key={conv.id}
onClick={() => navigateTo(Screen.CHAT, conv.id)}
className="group flex flex-row"
>
<a
className={`group ${conv.id === currentConvId ? 'active' : ''}`}
className={`${conv.id === currentConvId ? 'active' : ''} flex-1 min-w-0`}
>
{conv.messages[0]?.content}
<span className="text-right hidden group-hover:inline cursor-pointer">
<FontAwesomeIcon
icon={faTrashAlt}
onClick={(e) => {
e.preventDefault();
if (
confirm('Are you sure to delete this conversation?')
) {
navigateTo(Screen.CHAT);
deleteConversation(conv.id);
}
}}
/>
</span>
<div className="truncate">{conv.messages[0]?.content}</div>
</a>

<span className="text-right hidden group-hover:inline">
<FontAwesomeIcon
icon={faTrashAlt}
onClick={(e) => {
e.preventDefault();
if (
confirm('Are you sure to delete this conversation?')
) {
navigateTo(Screen.CHAT);
deleteConversation(conv.id);
}
}}
/>
</span>
</li>
))}
</ul>
Expand Down Expand Up @@ -104,9 +106,7 @@ export default function Sidebar({ children }: { children: any }) {
</li>
</ul>

<div className="text-xs pl-6 pt-2">
Version {WLLAMA_VERSION}
</div>
<div className="text-xs pl-6 pt-2">Version {WLLAMA_VERSION}</div>
</div>
</div>
</div>
Expand Down
8 changes: 6 additions & 2 deletions examples/main/src/utils/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,12 @@ export const formatChat = async (
const template = new Template(
modelWllama.getChatTemplate() ?? DEFAULT_CHAT_TEMPLATE
);
const bos_token: string = textDecoder.decode(await modelWllama.detokenize([modelWllama.getBOS()]));
const eos_token: string = textDecoder.decode(await modelWllama.detokenize([modelWllama.getEOS()]));
const bos_token: string = textDecoder.decode(
await modelWllama.detokenize([modelWllama.getBOS()])
);
const eos_token: string = textDecoder.decode(
await modelWllama.detokenize([modelWllama.getEOS()])
);
return template.render({
messages,
bos_token,
Expand Down
2 changes: 1 addition & 1 deletion llama.cpp
Submodule llama.cpp updated 58 files
+1 −1 ci/run.sh
+67 −13 common/arg.cpp
+4 −14 common/common.cpp
+55 −34 common/common.h
+19 −11 common/sampling.cpp
+3 −0 convert_hf_to_gguf.py
+1 −0 convert_hf_to_gguf_update.py
+3 −3 convert_lora_to_gguf.py
+22 −110 examples/llama-bench/llama-bench.cpp
+33 −10 examples/main/README.md
+30 −8 examples/server/README.md
+11 −4 examples/server/public/index-new.html
+8 −2 examples/server/public/index.html
+0 −0 examples/server/public/style.css
+194 −367 examples/server/server.cpp
+36 −0 examples/server/tests/features/infill.feature
+46 −0 examples/server/tests/features/steps/steps.py
+0 −2 examples/server/themes/buttons-top/index.html
+0 −2 examples/server/themes/wild/index.html
+246 −14 examples/server/utils.hpp
+3 −3 flake.lock
+12 −7 ggml/include/ggml-backend.h
+1 −1 ggml/include/ggml-cuda.h
+4 −0 ggml/include/ggml-kompute.h
+2 −0 ggml/src/CMakeLists.txt
+268 −0 ggml/src/ggml-aarch64.c
+8 −25 ggml/src/ggml-amx.cpp
+4 −15 ggml/src/ggml-backend-impl.h
+127 −115 ggml/src/ggml-backend.cpp
+5 −15 ggml/src/ggml-blas.cpp
+6 −44 ggml/src/ggml-cann.cpp
+69 −78 ggml/src/ggml-cuda.cu
+1 −1 ggml/src/ggml-cuda/cpy.cuh
+238 −72 ggml/src/ggml-kompute.cpp
+67 −52 ggml/src/ggml-metal.m
+380 −196 ggml/src/ggml-metal.metal
+2 −18 ggml/src/ggml-rpc.cpp
+14 −40 ggml/src/ggml-sycl.cpp
+80 −18 ggml/src/ggml-vulkan.cpp
+57 −14 ggml/src/ggml.c
+9 −0 ggml/src/kompute-shaders/common.comp
+133 −0 ggml/src/kompute-shaders/op_mul_mat_q4_k.comp
+57 −0 ggml/src/llamafile/sgemm.cpp
+74 −0 ggml/src/vulkan-shaders/pool2d.comp
+4 −0 ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp
+12 −8 include/llama.h
+1 −1 scripts/compare-llama-bench.py
+1 −1 scripts/run-with-preset.py
+7 −0 scripts/sync-ggml-am.sh
+1 −1 scripts/sync-ggml.last
+3 −0 scripts/sync-ggml.sh
+392 −96 src/llama-sampling.cpp
+18 −0 src/llama-sampling.h
+16 −0 src/llama-vocab.cpp
+5 −0 src/llama-vocab.h
+1,635 −1,516 src/llama.cpp
+4 −0 tests/test-chat-template.cpp
+37 −21 tests/test-sampling.cpp
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@wllama/wllama",
"version": "1.16.4",
"version": "1.17.0",
"description": "Low-level WASM binding for llama.cpp",
"main": "index.js",
"type": "module",
Expand Down
2 changes: 1 addition & 1 deletion src/multi-thread/wllama.js

Large diffs are not rendered by default.

Binary file modified src/multi-thread/wllama.wasm
Binary file not shown.
2 changes: 1 addition & 1 deletion src/single-thread/wllama.js

Large diffs are not rendered by default.

Binary file modified src/single-thread/wllama.wasm
Binary file not shown.
Loading

0 comments on commit f7562fa

Please sign in to comment.