不使用llama.cpp的采样器记录模型最近的输出

ylsdamxssjxxdd · Sep 22, 2024 · 620b1e2 · 620b1e2
1 parent d505f2f
commit 620b1e2
Show file tree

Hide file tree

Showing 4 changed files with 15 additions and 8 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -10,9 +10,9 @@ set(version b3797) # 打包版本
 set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR}/Release) # 设置最终eva的输出目录
 
 ###################################### 编译选项 ######################################
-option(BODY_PACK                     "pack eva"                                   ON) # 是否打包
-option(GGML_CUDA                     "ggml: use CUDA"                             OFF) # 速度900%
-option(GGML_VULKAN                   "ggml: use Vulkan"                           ON) # 速度250%,暂不支持sd
+option(BODY_PACK                     "pack eva"                                   OFF) # 是否打包
+option(GGML_CUDA                     "ggml: use CUDA"                             ON) # 速度900%
+option(GGML_VULKAN                   "ggml: use Vulkan"                           OFF) # 速度250%,暂不支持sd
 
 ##################################### 处理编译选项 ####################################
 option(BUILD_SHARED_LIBS        "build shared libraries"           ON) # 都用动态链接

diff --git a/llama.cpp/examples/main/main.cpp b/llama.cpp/examples/main/main.cpp
@@ -680,7 +680,7 @@ int main(int argc, char ** argv) {
             }
 
             const llama_token id = gpt_sampler_sample(smpl, ctx, -1);
-
+            
             gpt_sampler_accept(smpl, id, /* accept_grammar= */ true);
 
             // LOG_DBG("last: %s\n", string_from(ctx, smpl->prev.to_vector()).c_str());

diff --git a/ui/xbot.cpp b/ui/xbot.cpp
@@ -80,7 +80,7 @@ void xBot::predict(INPUTS inputs) {
     // qDebug()<<"插入前embd"<<view_embd(ctx,embd);
     while ((int)embd_inp.size() > n_consumed) {
         embd.push_back(embd_inp[n_consumed]);
-        gpt_sampler_accept(smpl, embd_inp[n_consumed], /* accept_grammar= */ false); //记录token的id
+        // gpt_sampler_accept(smpl, embd_inp[n_consumed], /* accept_grammar= */ false); //记录token的id
         ++n_consumed;
     }
     // qDebug()<<"插入后embd"<<view_embd(ctx,embd);
@@ -93,6 +93,7 @@ void xBot::predict(INPUTS inputs) {
             if ((inputs.role != ROLE_DEBUG)) {
                 bot2ui_state("DEBUGING 0 ", DEBUGING_SIGNAL);
                 remain_n_remain = gpt_params_.n_predict;  //用来记录一次debuging过程的n_remain值
+                current_output = "";                      //清空上一轮的输出记录
             }
         }
     }
@@ -164,7 +165,7 @@ int xBot::stream()
 {
     is_stop = false;
     single_timer.restart();  //后面减去batch_timer记录的时间就是单解码用时
-
+    if (!is_debuging) {current_output = "";}
     //退出循环的情况:n_remain!=0/停止标签/推理失败/结束标志/用户昵称/额外停止标志
     while (n_remain != 0) {
 
@@ -661,7 +662,7 @@ void xBot::preDecodeSystemPrompt() {
     //---------------------embd_inp插入到embd中----------------------
     while ((int)embd_inp.size() > n_consumed) {
         embd.push_back(embd_inp[n_consumed]);
-        gpt_sampler_accept(smpl, embd_inp[n_consumed], /* accept_grammar= */ false); //记录token的id
+        // gpt_sampler_accept(smpl, embd_inp[n_consumed], /* accept_grammar= */ false); //记录token的id
         ++n_consumed;
     }
 
@@ -1280,14 +1281,19 @@ bool xBot::checkStop(std::string *sstr, llama_token *id)
         emit bot2ui_state("bot:" + sample_str + "token=" + QString::number(*id) + " " + QString::fromStdString(*sstr));
         if (is_debuging) {emit bot2ui_state("bot:" + jtr("sampling") + " " + jtr("use time") + " " + QString::number(debuging_timer.nsecsElapsed() / 1000000000.0, 'f', 4) + " s", SUCCESS_SIGNAL);}
 
+        current_output += *sstr;
+        if (current_output.length() > 16) 
+        {
+            current_output = current_output.substr(current_output.length() - 16, 16);  //只保留16个字符
+        }
+
         emit bot2ui_output(QString::fromUtf8(sstr->c_str()));
     }
 
     //检测输出的内容中是否包含反提示和额外停止词,如果有则停止
     if (!is_complete)  // 补完模式不检测
     {
         int list_num = 0;  //记录第一个元素,只有第一个元素需要控制is_antiprompt = true
-        std::string current_output = gpt_sampler_prev_str(smpl, ctx, 3);// 对最近产生的3个token进行检视
         // qDebug() << QString::fromStdString(current_output);
         for (const std::string &antiprompt : gpt_params_.antiprompt) 
         {

diff --git a/ui/xbot.h b/ui/xbot.h
@@ -136,6 +136,7 @@ class xBot : public QObject {
     bool is_debuging = false;              // debug中状态
     int debuging_one = 0;                  // debuging时控制循环只进行一次
     std::vector<Brain_Cell> Brain_vector;  //记忆向量(当前记忆)
+    std::string current_output; // 模型最近输出的内容，用来判断里面是否存在反向词
 
    public slots:
     void recv_stop();//接受停止信号