From ce885a33c1d75d3adb817d58a079244e68e9be91 Mon Sep 17 00:00:00 2001 From: "ziyang.wang" Date: Mon, 11 Nov 2024 17:54:42 +0800 Subject: [PATCH] [llama3.2] fix wrong vocab size --- models/Llama3_2-Vision/compile/export_onnx.py | 2 +- models/Llama3_2-Vision/python_demo/CMakeLists.txt | 4 +++- models/Llama3_2-Vision/python_demo/pipeline.py | 3 +-- models/Qwen2_5/python_demo/CMakeLists.txt | 6 ++++-- 4 files changed, 9 insertions(+), 6 deletions(-) diff --git a/models/Llama3_2-Vision/compile/export_onnx.py b/models/Llama3_2-Vision/compile/export_onnx.py index 5a56c42..6c0453b 100755 --- a/models/Llama3_2-Vision/compile/export_onnx.py +++ b/models/Llama3_2-Vision/compile/export_onnx.py @@ -60,7 +60,7 @@ NUM_ATTENTION_HEADS = config.text_config.num_attention_heads NUM_KEY_VALUE_HEADS = config.text_config.num_key_value_heads HEAD_DIM = HIDDEN_SIZE // NUM_ATTENTION_HEADS -VOCAB_SIZE = config.text_config.hidden_size +VOCAB_SIZE = config.text_config.vocab_size CROSS_ATTN_LAYERS = config.text_config.cross_attention_layers print(f'\nLLM config:\n\ Layers: {NUM_LAYERS}\n\ diff --git a/models/Llama3_2-Vision/python_demo/CMakeLists.txt b/models/Llama3_2-Vision/python_demo/CMakeLists.txt index a439063..73c5cc0 100755 --- a/models/Llama3_2-Vision/python_demo/CMakeLists.txt +++ b/models/Llama3_2-Vision/python_demo/CMakeLists.txt @@ -5,6 +5,8 @@ if (NOT DEFINED TARGET_ARCH) set(TARGET_ARCH pcie) endif() +set(CMAKE_CXX_STANDARD 17) + include_directories(${PROJECT_SOURCE_DIR}/../../../support/include) if (${CMAKE_HOST_SYSTEM_PROCESSOR} STREQUAL "aarch64") @@ -17,7 +19,7 @@ elseif (${TARGET_ARCH} STREQUAL "pcie") message("PCIE mode, starting......") endif() -add_definitions(-DDEBUG --std=c++17 -fPIC -Wall -Werror) +add_definitions(-DDEBUG -fPIC -Wall -Werror) set(CMAKE_BUILD_TYPE "Debug") find_package(pybind11 REQUIRED CONFIG) diff --git a/models/Llama3_2-Vision/python_demo/pipeline.py b/models/Llama3_2-Vision/python_demo/pipeline.py index 0e63d43..c9bca65 100755 --- a/models/Llama3_2-Vision/python_demo/pipeline.py +++ b/models/Llama3_2-Vision/python_demo/pipeline.py @@ -137,8 +137,7 @@ def stream_answer(self, inputs): self.answer_token += full_word_tokens print(word, flush=True, end="") - if tok_num > self.model.max_new_tokens and word == '.': - break + token = self.model.forward_next() tok_num += 1 full_word_tokens = [] diff --git a/models/Qwen2_5/python_demo/CMakeLists.txt b/models/Qwen2_5/python_demo/CMakeLists.txt index c32d1d2..87e068e 100755 --- a/models/Qwen2_5/python_demo/CMakeLists.txt +++ b/models/Qwen2_5/python_demo/CMakeLists.txt @@ -1,10 +1,12 @@ cmake_minimum_required(VERSION 3.10) -project(qwen2) +project(qwen2_5) if (NOT DEFINED TARGET_ARCH) set(TARGET_ARCH pcie) endif() +set(CMAKE_CXX_STANDARD 17) + include_directories(${PROJECT_SOURCE_DIR}/../support/include) if (${CMAKE_HOST_SYSTEM_PROCESSOR} STREQUAL "aarch64") @@ -17,7 +19,7 @@ elseif (${TARGET_ARCH} STREQUAL "pcie") message("PCIE mode, starting......") endif() -add_definitions(-DDEBUG --std=c++17 -fPIC -Wall -Werror) +add_definitions(-DDEBUG -fPIC -Wall -Werror) set(CMAKE_BUILD_TYPE "Debug") find_package(pybind11 REQUIRED CONFIG)