Skip to content

Commit

Permalink
xbot:使用线程池
Browse files Browse the repository at this point in the history
  • Loading branch information
ylsdamxssjxxdd committed Sep 2, 2024
1 parent 89da367 commit fdcb01a
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 56 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR}/Release) # 设置

###################################### 编译选项 ######################################
option(BODY_PACK "pack eva" OFF) # 是否打包
option(GGML_CUDA "ggml: use CUDA" ON) # 速度900%
option(GGML_CUDA "ggml: use CUDA" OFF) # 速度900%
option(GGML_VULKAN "ggml: use Vulkan" OFF) # 速度250%,暂不支持sd

##################################### 处理编译选项 ####################################
Expand Down
41 changes: 30 additions & 11 deletions ui/xbot.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@
#include "xbot.h"

xBot::xBot() {
log_disable(); //禁止llama.cpp输出日志文件
log_disable(); //禁止llama.cpp输出日志文件
llama_log_set(xBot::bot_log_callback, this); //设置回调,获取llama的日志
QObject::connect(this, &xBot::bot_llama_log, this, &xBot::recv_llama_log);
showSpecial = true; // 是否显示特殊标志 <bos> <eos> <eot>

//初始的模型参数
gpt_params_.n_gpu_layers = DEFAULT_NGL; // gpu负载层数
gpt_params_.model = ""; //模型路径
gpt_params_.cpuparams.n_threads = DEFAULT_NTHREAD; //默认使用一半的线程数
gpt_params_.cpuparams.n_threads = DEFAULT_NTHREAD; //文字生成线程数,默认使用一半的线程数
gpt_params_.cpuparams_batch.n_threads = DEFAULT_NTHREAD; //上文处理线程数,为了简单,与文字生成线程数保持一致
gpt_params_.n_ctx = DEFAULT_NCTX; //上下文最大长度
gpt_params_.n_batch = DEFAULT_BATCH; //一次最大处理批量,主要分批次推理用户的输入,新增似乎和推理时内存泄露有关

Expand Down Expand Up @@ -622,6 +622,8 @@ void xBot::load(QString modelpath_) {
model = nullptr;
emit bot2ui_kv(0, n_past); //新增,当前没有缓存
Brain_vector.clear();
ggml_threadpool_free(threadpool);
ggml_threadpool_free(threadpool_batch);
emit bot2expend_brainvector(Brain_vector, gpt_params_.n_ctx, 1); // 1强制刷新记忆矩阵
emit bot2ui_state("bot:" + jtr("free model and ctx"));
} else {
Expand Down Expand Up @@ -653,6 +655,28 @@ void xBot::load(QString modelpath_) {
llama_init_result llama_init = llama_init_from_gpt_params(gpt_params_);
model = llama_init.model;
ctx = llama_init.context;

//创建线程池
struct ggml_threadpool_params tpp_batch = ggml_threadpool_params_from_cpu_params(gpt_params_.cpuparams_batch);
struct ggml_threadpool_params tpp = ggml_threadpool_params_from_cpu_params(gpt_params_.cpuparams);
if (!ggml_threadpool_params_match(&tpp, &tpp_batch))
{
threadpool_batch = ggml_threadpool_new(&tpp_batch);
if (!threadpool_batch) // 线程池创建失败
{
is_first_load = true;
emit bot2ui_loadover(false, 0);
emit bot2ui_state(jtr("eva broken"), EVA_SIGNAL);
emit bot2ui_state("bot:batch threadpool create failed", WRONG_SIGNAL);
return;
}
// Start the non-batch threadpool in the paused state
tpp.paused = true;
}
set_process_priority(gpt_params_.cpuparams.priority);
ggml_threadpool_params_match(&tpp, &tpp_batch);
threadpool = ggml_threadpool_new(&tpp);
llama_attach_threadpool(ctx, threadpool, threadpool_batch);

//挂载视觉
if (mmprojpath != "") {
Expand Down Expand Up @@ -855,10 +879,7 @@ void xBot::preDecodeSystemPrompt() {

std::string str;
str = llama_token_to_piece(ctx, token);
if (!showSpecial && (token == eos_token || token == eot_token || token == bos_token)) {
} else {
token_str += str;
}
token_str += str;
Brain_vector.push_back({i + 1, token, QString::fromStdString(str)});
// qDebug()<<token<<QString::fromStdString(llama_token_to_piece(ctx, token));
}
Expand Down Expand Up @@ -954,10 +975,7 @@ void xBot::push_out(INPUTS input, std::vector<llama_token> embd_output, int cont
for (int i = 0; i < embd_output.size(); ++i) {
const llama_token token = embd_output[i];
std::string str = llama_token_to_piece(ctx, token);
if (!showSpecial && (token == eos_token || token == eot_token || token == bos_token)) {
} else {
token_str += str;
}
token_str += str;
}
//如果是工具输出的结果给过来的话,用天蓝色,前缀后缀都是\n则认为是工具
if (input.role == ROLE_TOOL) {
Expand Down Expand Up @@ -1013,6 +1031,7 @@ void xBot::recv_set(SETTINGS settings, bool can_reload) {
//如果线程数改变则重新加载模型
if (gpt_params_.cpuparams.n_threads != settings.nthread) {
gpt_params_.cpuparams.n_threads = settings.nthread;
gpt_params_.cpuparams_batch.n_threads = settings.nthread;
reload_flag = true;
}
//如果ctx改变则重新加载模型
Expand Down
46 changes: 2 additions & 44 deletions ui/xbot.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ class xBot : public QObject {
llama_model_params hparams; //模型内部参数
gpt_params gpt_params_; //控制模型的参数,内含控制采样的参数sparams
llama_sampling_context *sparams; //采样用参数
struct ggml_threadpool * threadpool = NULL; // 线程池,文字生成
struct ggml_threadpool * threadpool_batch = NULL; // 线程池,上文处理

llama_model *model; //模型
llama_context *ctx; //上下文
Expand Down Expand Up @@ -128,7 +130,6 @@ class xBot : public QObject {
bool is_debuging = false; // debug中状态
int debuging_one = 0; // debuging时控制循环只进行一次
std::vector<Brain_Cell> Brain_vector; //记忆向量(当前记忆)
bool showSpecial = true; // 是否显示特殊标志

public slots:
void recv_stop();//接受停止信号
Expand Down Expand Up @@ -166,46 +167,3 @@ class xBot : public QObject {
};

#endif // XBOT_H


//-------------------------------------------------------------------------
//----------------------------------多后端支持--------------------------------
//-------------------------------------------------------------------------

// // 定义函数指针类型
// typedef struct llama_model* (*llama_load_model_from_file_t)(const char *path_model, struct llama_model_params params);
// typedef struct llama_context* (*llama_new_context_with_model_t)(struct llama_model * model, struct llama_context_params params);
// typedef bool (*llava_eval_image_embed_t)(struct llama_context * ctx_llama, const struct llava_image_embed * embed, int n_batch, int * n_past);
// typedef struct clip_ctx * (*clip_model_load_t)(const char * fname, int verbosity);

// 定义函数指针
// llama_load_model_from_file_t llama_load_model_from_file_;
// llama_new_context_with_model_t llama_new_context_with_model_;
// llava_eval_image_embed_t llava_eval_image_embed_;
// clip_model_load_t clip_model_load_;

// // 动态加载
// QString accelerate = "cuda";
// QString libraryPath_ggml = applicationDirPath_ + "/llama-dll/ggml-" + accelerate;
// QString libraryPath_llama = applicationDirPath_ + "/llama-dll/llama-" + accelerate;
// QString libraryPath_llava_shared = applicationDirPath_ + "/llama-dll/llava_shared-" + accelerate;

// QLibrary ggml_Lib(libraryPath_ggml);// 加载DLL
// if (!ggml_Lib.load()) {qDebug()<<"Failed to load the DLL." << ggml_Lib.errorString();}

// QLibrary llama_Lib(libraryPath_llama);// 加载DLL
// if (!llama_Lib.load()) {qDebug()<<"Failed to load the DLL." << llama_Lib.errorString();}

// QLibrary llava_shared_Lib(libraryPath_llava_shared);// 加载DLL
// if (!llava_shared_Lib.load()) {qDebug()<<"Failed to load the DLL." << llava_shared_Lib.errorString();}

// // 获取函数指针
// llama_load_model_from_file_ = (llama_load_model_from_file_t)llama_Lib.resolve("llama_load_model_from_file");
// llama_new_context_with_model_ = (llama_new_context_with_model_t)llama_Lib.resolve("llama_new_context_with_model");
// if (!llama_load_model_from_file_) {qDebug()<<"Failed to resolve the function llama_load_model_from_file_.";}
// if (!llama_new_context_with_model_) {qDebug()<<"Failed to resolve the function llama_new_context_with_model_.";}

// llava_eval_image_embed_ = (llava_eval_image_embed_t)llava_shared_Lib.resolve("llava_eval_image_embed");
// if (!llava_eval_image_embed_) {qDebug()<<"Failed to resolve the function llava_eval_image_embed.";}
// clip_model_load_ = (clip_model_load_t)llava_shared_Lib.resolve("clip_model_load");
// if (!clip_model_load_) {qDebug()<<"Failed to resolve the function clip_model_load.";}

0 comments on commit fdcb01a

Please sign in to comment.