Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Dialect] Timing dialect for automatically profiling models #372

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions examples/BuddyLlama/CMakeLists.txt
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove this modification.

Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ target_link_directories(buddy-llama-run PRIVATE ${LLVM_LIBRARY_DIR})
set(BUDDY_LLAMA_LIBS
LLAMA
mlir_c_runner_utils
mlir_runner_utils
omp
)
if(BUDDY_MLIR_USE_MIMALLOC)
Expand Down
4 changes: 4 additions & 0 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ if(BUDDY_LLAMA_EXAMPLES)
add_subdirectory(BuddyLlama)
endif()

if(BUDDY_TIMING_EXAMPLES)
add_subdirectory(TimingDialect)
endif()

if (BUDDY_BERT_EXAMPLES)
add_subdirectory(BuddyBert)
endif()
Expand Down
8 changes: 8 additions & 0 deletions examples/TimingDialect/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
log.*

# model params file
arg0.data

# model mlir file
forward.mlir
subgraph0.mlir
127 changes: 127 additions & 0 deletions examples/TimingDialect/CMakeLists.txt
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove cmake components.

Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
add_custom_command(
OUTPUT ${BUDDY_EXAMPLES_DIR}/TimingDialect/forward.mlir ${BUDDY_EXAMPLES_DIR}/TimingDialect/subgraph0.mlir ${BUDDY_EXAMPLES_DIR}/TimingDialect/arg0.data
COMMAND ${Python3_EXECUTABLE} ${BUDDY_EXAMPLES_DIR}/TimingDialect/import-llama2.py
COMMENT "Generating forward.mlir, subgraph0.mlir and arg0.data..."
)

add_custom_command(
OUTPUT forward.o
COMMAND ${LLVM_MLIR_BINARY_DIR}/mlir-opt ${BUDDY_EXAMPLES_DIR}/TimingDialect/forward.mlir
-pass-pipeline "builtin.module(func.func(tosa-to-linalg-named),func.func(tosa-to-linalg),func.func(tosa-to-tensor),func.func(tosa-to-arith))" |
${BUDDY_BINARY_DIR}/buddy-opt
-arith-expand
-eliminate-empty-tensors
-empty-tensor-to-alloc-tensor
-one-shot-bufferize
-matmul-paralell-vectorization-optimize
-batchmatmul-optimize
-convert-linalg-to-affine-loops
-affine-loop-fusion
-affine-parallelize
-lower-affine
-convert-scf-to-openmp
-func-bufferize
-arith-bufferize
-tensor-bufferize
-buffer-deallocation
-finalizing-bufferize
-convert-vector-to-scf
-expand-strided-metadata
-convert-vector-to-llvm
-memref-expand
-arith-expand
-convert-arith-to-llvm
-finalize-memref-to-llvm
-convert-scf-to-cf
-llvm-request-c-wrappers
-convert-openmp-to-llvm
-convert-arith-to-llvm
-convert-math-to-llvm
-convert-math-to-libm
-convert-func-to-llvm
-reconcile-unrealized-casts |
${LLVM_MLIR_BINARY_DIR}/mlir-translate -mlir-to-llvmir |
${LLVM_MLIR_BINARY_DIR}/llvm-as |
${LLVM_MLIR_BINARY_DIR}/llc -filetype=obj -relocation-model=pic -O3
-o ${BUDDY_BINARY_DIR}/../examples/TimingDialect/forward.o
DEPENDS buddy-opt ${BUDDY_EXAMPLES_DIR}/TimingDialect/forward.mlir
COMMENT "Building forward.o "
VERBATIM)

add_custom_command(
OUTPUT subgraph.o
COMMAND ${BUDDY_BINARY_DIR}/buddy-opt ${BUDDY_EXAMPLES_DIR}/TimingDialect/subgraph0.mlir --timing --lower-timing |
${LLVM_MLIR_BINARY_DIR}/mlir-opt
-pass-pipeline "builtin.module(func.func(tosa-to-linalg-named),func.func(tosa-to-linalg),func.func(tosa-to-tensor),func.func(tosa-to-arith))" |
${BUDDY_BINARY_DIR}/buddy-opt
-arith-expand
-eliminate-empty-tensors
-empty-tensor-to-alloc-tensor
-one-shot-bufferize
-matmul-paralell-vectorization-optimize
-batchmatmul-optimize
-convert-linalg-to-affine-loops
-affine-loop-fusion
-affine-parallelize
-lower-affine
-convert-scf-to-openmp
-func-bufferize-dynamic-offset
-tensor-bufferize
-arith-bufferize
-buffer-deallocation
-finalizing-bufferize
-convert-vector-to-scf
-expand-strided-metadata
-cse
-convert-vector-to-llvm
-memref-expand
-arith-expand
-convert-arith-to-llvm
-finalize-memref-to-llvm
-convert-scf-to-cf
-llvm-request-c-wrappers
-convert-openmp-to-llvm
-convert-arith-to-llvm
-convert-math-to-llvm
-convert-math-to-libm
-convert-func-to-llvm
-reconcile-unrealized-casts |
${LLVM_MLIR_BINARY_DIR}/mlir-translate -mlir-to-llvmir |
${LLVM_MLIR_BINARY_DIR}/llvm-as |
${LLVM_MLIR_BINARY_DIR}/llc -filetype=obj -relocation-model=pic -O3
-o ${BUDDY_BINARY_DIR}/../examples/TimingDialect/subgraph.o
DEPENDS buddy-opt ${BUDDY_EXAMPLES_DIR}/TimingDialect/subgraph0.mlir
COMMENT "Building subgraph.o "
VERBATIM)

add_library(LLAMA_T STATIC forward.o subgraph.o)

add_library(TIMING_UTILS STATIC utils.cpp)

SET_SOURCE_FILES_PROPERTIES(
template.o
PROPERTIES
EXTERNAL_OBJECT true
GENERATED true)

SET_TARGET_PROPERTIES(
LLAMA_T
PROPERTIES
LINKER_LANGUAGE C)

add_executable(buddy-llama-run-timing llama-main.cpp)
target_link_directories(buddy-llama-run-timing PRIVATE ${LLVM_MLIR_LIBRARY_DIR})

set(BUDDY_LLAMA_T_LIBS
LLAMA_T
TIMING_UTILS
mlir_c_runner_utils
mlir_runner_utils
omp
)

if(BUDDY_MLIR_USE_MIMALLOC)
list(APPEND BUDDY_LLAMA_T_LIBS mimalloc)
endif()

target_link_libraries(buddy-llama-run-timing ${BUDDY_LLAMA_T_LIBS})
91 changes: 91 additions & 0 deletions examples/TimingDialect/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
# Buddy Compiler LLaMA Example

1. Download LLaMA2 model

You should download llama model. You can get model from [meta ai](https://ai.meta.com/llama/).

2. Enter Python virtual environment

We recommend you to use anaconda3 to create python virtual environment. You should install python packages as buddy-mlir/requirements.

```
$ conda activate <your virtual environment name>
$ cd buddy-mlir
$ pip install -r requirements.txt
```

3. LLaMA2 model convert to HuggingFace format

You should convert LLaMA2 model which download from meta ai to HuggingFace format. Because we use HuggingFace api to get LLaMA2 model.

```
$ cd examples/BuddyLlama
$ python llama2-to-hf.py --input_dir path-to-llama2-model --model_size 7B --output_dir path-to-save-llama-hf-model
```

Such as you have a 7B LLaMA2 model, in your input_dir path-to-llama-model, you should have a tokenizer.model and a directory named "7B". You should put your 7B LLaMA2 model inside the "7B" directory.

In addition, set an environment variable for the generated LLaMA model.
```
$ export LLAMA_MODEL_PATH=/path-to-save-llama-hf-model/
```

4. Build and check LLVM/MLIR

```
$ cd buddy-mlir
$ mkdir llvm/build
$ cd llvm/build
$ cmake -G Ninja ../llvm \
-DLLVM_ENABLE_PROJECTS="mlir;clang;openmp" \
-DLLVM_TARGETS_TO_BUILD="host;RISCV" \
-DLLVM_ENABLE_ASSERTIONS=ON \
-DOPENMP_ENABLE_LIBOMPTARGET=OFF \
-DCMAKE_BUILD_TYPE=RELEASE \
-DMLIR_ENABLE_BINDINGS_PYTHON=ON \
-DPython3_EXECUTABLE=$(which python3)
$ ninja check-clang check-mlir omp
```

5. Build and check buddy-mlir

```
$ cd buddy-mlir
$ mkdir build
$ cd build
$ cmake -G Ninja .. \
-DMLIR_DIR=$PWD/../llvm/build/lib/cmake/mlir \
-DLLVM_DIR=$PWD/../llvm/build/lib/cmake/llvm \
-DLLVM_ENABLE_ASSERTIONS=ON \
-DCMAKE_BUILD_TYPE=RELEASE \
-DBUDDY_MLIR_ENABLE_PYTHON_PACKAGES=ON \
-DPython3_EXECUTABLE=$(which python3)
$ ninja
$ ninja check-buddy
```

Set the `PYTHONPATH` environment variable. Make sure that the `PYTHONPATH` variable includes the directory of LLVM/MLIR python bindings and the directory of Buddy MLIR python packages.

```
$ export PYTHONPATH=/path-to-buddy-mlir/llvm/build/tools/mlir/python_packages/mlir_core:/path-to-buddy-mlir/build/python_packages:${PYTHONPATH}

// For example:
// Navigate to your buddy-mlir/build directory
$ cd buddy-mlir/build
$ export BUDDY_MLIR_BUILD_DIR=$PWD
$ export LLVM_MLIR_BUILD_DIR=$PWD/../llvm/build
$ export PYTHONPATH=${LLVM_MLIR_BUILD_DIR}/tools/mlir/python_packages/mlir_core:${BUDDY_MLIR_BUILD_DIR}/python_packages:${PYTHONPATH}
```

6. Build and run LLaMA example

```
$ cmake -G Ninja .. -DBUDDY_TIMING_EXAMPLES=ON
$ ninja buddy-llama-run-timing
$ cd bin
$ ./buddy-llama-run-timing
```
This build will spend a few minutes. We recommend you to use better cpu such as server-level cpu to run buddy-llama-run-timing.

If you wish to utilize `mimalloc` as a memory allocator, you need to set `BUDDY_MLIR_USE_MIMALLOC` and `MIMALLOC_BUILD_DIR`.
For more details, please see [here](../../thirdparty/README.md#the-mimalloc-allocator).
70 changes: 70 additions & 0 deletions examples/TimingDialect/import-llama2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import os
import torch
import torch._dynamo as dynamo
from transformers import LlamaForCausalLM, LlamaTokenizer
from torch._inductor.decomposition import decompositions as inductor_decomp
import numpy

from buddy.compiler.frontend import DynamoCompiler
# ===- import-llama2.py --------------------------------------------------------
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# ===---------------------------------------------------------------------------
#
# This is the test of llama2 model.
#
# ===---------------------------------------------------------------------------
from buddy.compiler.ops import tosa
from buddy.compiler.graph import GraphDriver
from buddy.compiler.graph.transform import simply_fuse

# Retrieve the LLaMA model path from environment variables.
model_path = os.environ.get("LLAMA_MODEL_PATH")
if model_path is None:
raise EnvironmentError(
"The environment variable 'LLAMA_MODEL_PATH' is not set or is invalid."
)

# Initialize the tokenizer and model from the specified model path.
tokenizer = LlamaTokenizer.from_pretrained(model_path)
model = LlamaForCausalLM.from_pretrained(model_path, torchscript=True)
model.config.use_cache = False

# Initialize Dynamo Compiler with specific configurations as an importer.
dynamo_compiler = DynamoCompiler(
primary_registry=tosa.ops_registry,
aot_autograd_decomposition=inductor_decomp,
)

# Import the model into MLIR module and parameters.
with torch.no_grad():
data = torch.tensor([[1 for i in range(40)]], dtype=torch.int64)
graphs = dynamo_compiler.importer(model, data)

assert len(graphs) == 1
graph = graphs[0]
params = dynamo_compiler.imported_params[graph]
pattern_list = [simply_fuse]
graphs[0].fuse_ops(pattern_list)
driver = GraphDriver(graphs[0])
driver.subgraphs[0].lower_to_top_level_ir()
path_prefix = os.path.dirname(os.path.abspath(__file__))
with open(os.path.join(path_prefix, "subgraph0.mlir"), "w") as module_file:
print(driver.subgraphs[0]._imported_module, file=module_file)
with open(os.path.join(path_prefix, "forward.mlir"), "w") as module_file:
print(driver.construct_main_graph(True), file=module_file)
all_param = numpy.concatenate(
[param.detach().numpy().reshape([-1]) for param in params]
)
all_param.tofile(os.path.join(path_prefix, "arg0.data"))
Loading
Loading