Skip to content
This repository has been archived by the owner on Oct 25, 2024. It is now read-only.

Commit

Permalink
add TP and gptj model support (#223)
Browse files Browse the repository at this point in the history
* add TP and gptj model support
1. add TP_1D algo
2. add parallel_context for broadcast/reduce
3. support all data type
4. support gptj model

Signed-off-by: Clark Chin <xi2.chen@intel.com>
  • Loading branch information
ClarkChin08 authored Sep 7, 2023
1 parent 4705040 commit fe0d65c
Show file tree
Hide file tree
Showing 12 changed files with 958 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,12 @@ option(NE_ALL_WARNINGS "neural_engine: enable all compiler warnings"
option(NE_ALL_WARNINGS_3RD_PARTY "neural_engine: enable all compiler warnings in 3rd party libs" OFF)
option(NE_GPROF "neural_engine: enable gprof" OFF)

# tensor parallism
option(NE_TP "neural_engine: enable tensor parallism" OFF)
if (NE_TP)
add_compile_definitions(NE_TP_MODEL)
endif()

# sanitizers
option(NE_SANITIZE_THREAD "neural_engine: enable thread sanitizer" OFF)
option(NE_SANITIZE_ADDRESS "neural_engine: enable address sanitizer" OFF)
Expand Down
4 changes: 4 additions & 0 deletions intel_extension_for_transformers/llm/runtime/graph/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -114,3 +114,7 @@ LLM running script args explanations:
| --color | colorise output to distinguish prompt and user input from generations |
| --keep | number of tokens to keep from the initial prompt (default: 0, -1 = all) |


### 4. Tensor Parallelism cross nodes/sockets

We support tensor parallelism strategy for distributed inference/training on multi-node and multi-socket. You can refer to [tensor_parallelism.md](./tensor_parallelism.md) to enable this feature.
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,16 @@ add_library_w_warning(ne_layers "${sources}")
target_include_directories(ne_layers PUBLIC .)
target_compile_features(ne_layers PUBLIC c_std_11) # don't bump
set_target_properties(ne_layers PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_link_libraries(ne_layers PUBLIC Threads::Threads jblas::jblas ne_vec)
if (NE_TP)
find_package(oneCCL REQUIRED)
find_package(MPI REQUIRED)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
add_library(parallel_context STATIC parallel_context.cpp)
target_link_libraries(ne_layers PUBLIC Threads::Threads jblas::jblas ne_vec MPI::MPI_CXX ccl parallel_context)
else ()
target_link_libraries(ne_layers PUBLIC Threads::Threads jblas::jblas ne_vec)
endif()

if(NOT WIN32)
target_link_libraries(ne_layers PUBLIC rt)
endif()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,10 @@ enum ne_op {
NE_OP_MAP_UNARY,
NE_OP_MAP_BINARY,

NE_OP_SPLIT,
NE_OP_ALL_REDUCE,
NE_OP_TP_CONCAT,
NE_OP_DUMP_TENSOR,
NE_OP_COUNT,
};

Expand Down
3 changes: 3 additions & 0 deletions intel_extension_for_transformers/llm/runtime/graph/core/ne.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@

#include "core/data_types.h"
#include "layers/Ops.h"
#ifdef NE_TP_MODEL
#include "core/parallel_context.h"
#endif

#define NE_FILE_MAGIC 0x67676d6c // "ne"
#define NE_FILE_VERSION 1
Expand Down
Loading

0 comments on commit fe0d65c

Please sign in to comment.