buddy-compiler · ShiHaoGao · Jul 31, 2024 · Aug 12, 2024 · Sep 19, 2024 · Sep 19, 2024
diff --git a/examples/BuddyLlama/CMakeLists.txt b/examples/BuddyLlama/CMakeLists.txt
@@ -112,6 +112,7 @@ target_link_directories(buddy-llama-run PRIVATE ${LLVM_LIBRARY_DIR})
 set(BUDDY_LLAMA_LIBS
   LLAMA
   mlir_c_runner_utils
+  mlir_runner_utils
   omp
 )
 if(BUDDY_MLIR_USE_MIMALLOC)

diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
@@ -8,6 +8,10 @@ if(BUDDY_LLAMA_EXAMPLES)
   add_subdirectory(BuddyLlama)
 endif()
 
+if(BUDDY_TIMING_EXAMPLES)
+  add_subdirectory(TimingDialect)
+endif()
+
 if (BUDDY_BERT_EXAMPLES)
   add_subdirectory(BuddyBert)
 endif()

diff --git a/examples/TimingDialect/.gitignore b/examples/TimingDialect/.gitignore
@@ -0,0 +1,8 @@
+log.*
+
+# model params file
+arg0.data
+
+# model mlir file
+forward.mlir
+subgraph0.mlir
diff --git a/examples/TimingDialect/CMakeLists.txt b/examples/TimingDialect/CMakeLists.txt
@@ -0,0 +1,127 @@
+add_custom_command(
+  OUTPUT ${BUDDY_EXAMPLES_DIR}/TimingDialect/forward.mlir ${BUDDY_EXAMPLES_DIR}/TimingDialect/subgraph0.mlir ${BUDDY_EXAMPLES_DIR}/TimingDialect/arg0.data
+  COMMAND ${Python3_EXECUTABLE} ${BUDDY_EXAMPLES_DIR}/TimingDialect/import-llama2.py
+  COMMENT "Generating forward.mlir, subgraph0.mlir and arg0.data..."
+)
+
+add_custom_command(
+  OUTPUT forward.o
+  COMMAND ${BUDDY_BINARY_DIR}/buddy-opt ${BUDDY_EXAMPLES_DIR}/TimingDialect/forward.mlir 
+            -pass-pipeline "builtin.module(func.func(tosa-to-linalg-named),func.func(tosa-to-linalg),func.func(tosa-to-tensor),func.func(tosa-to-arith))" |
+          ${BUDDY_BINARY_DIR}/buddy-opt
+            -arith-expand
+            -eliminate-empty-tensors
+            -empty-tensor-to-alloc-tensor
+            -one-shot-bufferize
+            --matmul-parallel-vectorization-optimize
+            -batchmatmul-optimize
+            -convert-linalg-to-affine-loops
+            -affine-loop-fusion
+            -affine-parallelize
+            -lower-affine
+            -convert-scf-to-openmp
+            -func-bufferize
+            -arith-bufferize
+            -tensor-bufferize
+            -buffer-deallocation
+            -finalizing-bufferize
+            -convert-vector-to-scf
+            -expand-strided-metadata
+            -convert-vector-to-llvm
+            -memref-expand
+            -arith-expand
+            -convert-arith-to-llvm
+            -finalize-memref-to-llvm
+            -convert-scf-to-cf
+            -llvm-request-c-wrappers
+            -convert-openmp-to-llvm
+            -convert-arith-to-llvm
+            -convert-math-to-llvm
+            -convert-math-to-libm 
+            -convert-func-to-llvm
+            -reconcile-unrealized-casts |
+        ${LLVM_TOOLS_BINARY_DIR}/mlir-translate -mlir-to-llvmir |
+        ${LLVM_TOOLS_BINARY_DIR}/llvm-as |
+        ${LLVM_TOOLS_BINARY_DIR}/llc -filetype=obj -relocation-model=pic -O3
+          -o ${BUDDY_BINARY_DIR}/../examples/TimingDialect/forward.o
+  DEPENDS buddy-opt ${BUDDY_EXAMPLES_DIR}/TimingDialect/forward.mlir
+  COMMENT "Building forward.o "
+  VERBATIM)
+
+add_custom_command(
+    OUTPUT subgraph.o
+    COMMAND ${BUDDY_BINARY_DIR}/buddy-opt ${BUDDY_EXAMPLES_DIR}/TimingDialect/subgraph0.mlir --timing |
+            ${BUDDY_BINARY_DIR}/buddy-opt  
+              -pass-pipeline "builtin.module(func.func(tosa-to-linalg-named),func.func(tosa-to-linalg),func.func(tosa-to-tensor),func.func(tosa-to-arith))" |
+            ${BUDDY_BINARY_DIR}/buddy-opt
+            -arith-expand
+            -eliminate-empty-tensors
+            -empty-tensor-to-alloc-tensor
+            -one-shot-bufferize
+            --matmul-parallel-vectorization-optimize
+            -batchmatmul-optimize
+            -convert-linalg-to-affine-loops
+            -affine-loop-fusion
+            -affine-parallelize
+            -lower-affine
+            -convert-scf-to-openmp
+            -func-bufferize-dynamic-offset
+            -tensor-bufferize
+            -arith-bufferize
+            -buffer-deallocation
+            -finalizing-bufferize
+            -convert-vector-to-scf
+            -expand-strided-metadata
+            -cse
+            -convert-vector-to-llvm
+            -memref-expand
+            -arith-expand
+            -convert-arith-to-llvm
+            -finalize-memref-to-llvm
+            -convert-scf-to-cf
+            -llvm-request-c-wrappers
+            -convert-openmp-to-llvm
+            -convert-arith-to-llvm
+            -convert-math-to-llvm
+            -convert-math-to-libm 
+            -convert-func-to-llvm
+            -reconcile-unrealized-casts |
+          ${LLVM_TOOLS_BINARY_DIR}/mlir-translate -mlir-to-llvmir |
+          ${LLVM_TOOLS_BINARY_DIR}/llvm-as |
+          ${LLVM_TOOLS_BINARY_DIR}/llc -filetype=obj -relocation-model=pic -O3
+            -o ${BUDDY_BINARY_DIR}/../examples/TimingDialect/subgraph.o
+    DEPENDS buddy-opt ${BUDDY_EXAMPLES_DIR}/TimingDialect/subgraph0.mlir
+    COMMENT "Building subgraph.o "
+    VERBATIM)
+
+add_library(LLAMA_T STATIC forward.o subgraph.o)
+
+add_library(TIMING_UTILS STATIC utils.cpp runtime.cpp)  
+
+SET_SOURCE_FILES_PROPERTIES(
+  template.o
+  PROPERTIES
+  EXTERNAL_OBJECT true
+  GENERATED true)
+
+SET_TARGET_PROPERTIES(
+  LLAMA_T
+  PROPERTIES
+  LINKER_LANGUAGE C)
+
+add_executable(buddy-llama-run-timing llama-main.cpp)
+target_link_directories(buddy-llama-run-timing PRIVATE ${LLVM_MLIR_LIBRARY_DIR})
+
+set(BUDDY_LLAMA_T_LIBS
+  LLAMA_T
+  TIMING_UTILS
+  mlir_c_runner_utils
+  mlir_runner_utils
+  omp
+)
+
+if(BUDDY_MLIR_USE_MIMALLOC)
+  list(APPEND BUDDY_LLAMA_T_LIBS mimalloc)
+endif()
+
+target_link_libraries(buddy-llama-run-timing ${BUDDY_LLAMA_T_LIBS})
diff --git a/examples/TimingDialect/README.md b/examples/TimingDialect/README.md
@@ -0,0 +1,91 @@
+# Buddy Compiler LLaMA Example
+
+1. Download LLaMA2 model
+
+You should download llama model. You can get model from [meta ai](https://ai.meta.com/llama/).
+
+2. Enter Python virtual environment
+
+We recommend you to use anaconda3 to create python virtual environment. You should install python packages as buddy-mlir/requirements.
+
+```
+$ conda activate <your virtual environment name>
+$ cd buddy-mlir
+$ pip install -r requirements.txt
+```
+
+3. LLaMA2 model convert to HuggingFace format
+
+You should convert LLaMA2 model which download from meta ai to HuggingFace format. Because we use HuggingFace api to get LLaMA2 model.
+
+```
+$ cd examples/BuddyLlama
+$ python llama2-to-hf.py --input_dir path-to-llama2-model --model_size 7B --output_dir path-to-save-llama-hf-model
+```
+
+Such as you have a 7B LLaMA2 model, in your input_dir path-to-llama-model, you should have a tokenizer.model and a directory named "7B". You should put your 7B LLaMA2 model inside the "7B" directory.
+
+In addition, set an environment variable for the generated LLaMA model.
+```
+$ export LLAMA_MODEL_PATH=/path-to-save-llama-hf-model/
+```
+
+4. Build and check LLVM/MLIR
+
+```
+$ cd buddy-mlir
+$ mkdir llvm/build
+$ cd llvm/build
+$ cmake -G Ninja ../llvm \
+    -DLLVM_ENABLE_PROJECTS="mlir;clang;openmp" \
+    -DLLVM_TARGETS_TO_BUILD="host;RISCV" \
+    -DLLVM_ENABLE_ASSERTIONS=ON \
+    -DOPENMP_ENABLE_LIBOMPTARGET=OFF \
+    -DCMAKE_BUILD_TYPE=RELEASE \
+    -DMLIR_ENABLE_BINDINGS_PYTHON=ON \
+    -DPython3_EXECUTABLE=$(which python3)
+$ ninja check-clang check-mlir omp
+```
+
+5. Build and check buddy-mlir
+
+```
+$ cd buddy-mlir
+$ mkdir build
+$ cd build
+$ cmake -G Ninja .. \
+    -DMLIR_DIR=$PWD/../llvm/build/lib/cmake/mlir \
+    -DLLVM_DIR=$PWD/../llvm/build/lib/cmake/llvm \
+    -DLLVM_ENABLE_ASSERTIONS=ON \
+    -DCMAKE_BUILD_TYPE=RELEASE \
+    -DBUDDY_MLIR_ENABLE_PYTHON_PACKAGES=ON \
+    -DPython3_EXECUTABLE=$(which python3)
+$ ninja
+$ ninja check-buddy
+```
+
+Set the `PYTHONPATH` environment variable. Make sure that the `PYTHONPATH` variable includes the directory of LLVM/MLIR python bindings and the directory of Buddy MLIR python packages.
+
+```
+$ export PYTHONPATH=/path-to-buddy-mlir/llvm/build/tools/mlir/python_packages/mlir_core:/path-to-buddy-mlir/build/python_packages:${PYTHONPATH}
+
+// For example:
+// Navigate to your buddy-mlir/build directory
+$ cd buddy-mlir/build
+$ export BUDDY_MLIR_BUILD_DIR=$PWD
+$ export LLVM_MLIR_BUILD_DIR=$PWD/../llvm/build
+$ export PYTHONPATH=${LLVM_MLIR_BUILD_DIR}/tools/mlir/python_packages/mlir_core:${BUDDY_MLIR_BUILD_DIR}/python_packages:${PYTHONPATH}
+```
+
+6. Build and run LLaMA example
+
+```
+$ cmake -G Ninja .. -DBUDDY_TIMING_EXAMPLES=ON
+$ ninja buddy-llama-run-timing
+$ cd bin
+$ ./buddy-llama-run-timing
+```
+This build will spend a few minutes. We recommend you to use better cpu such as server-level cpu to run buddy-llama-run-timing.
+
+If you wish to utilize `mimalloc` as a memory allocator, you need to set `BUDDY_MLIR_USE_MIMALLOC` and `MIMALLOC_BUILD_DIR`.
+For more details, please see [here](../../thirdparty/README.md#the-mimalloc-allocator).
diff --git a/examples/TimingDialect/import-llama2.py b/examples/TimingDialect/import-llama2.py
@@ -0,0 +1,70 @@
+import os
+import torch
+import torch._dynamo as dynamo
+from transformers import LlamaForCausalLM, LlamaTokenizer
+from torch._inductor.decomposition import decompositions as inductor_decomp
+import numpy
+
+from buddy.compiler.frontend import DynamoCompiler
+# ===- import-llama2.py --------------------------------------------------------
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# ===---------------------------------------------------------------------------
+#
+# This is the test of llama2 model.
+#
+# ===---------------------------------------------------------------------------
+from buddy.compiler.ops import tosa
+from buddy.compiler.graph import GraphDriver
+from buddy.compiler.graph.transform import simply_fuse
+
+# Retrieve the LLaMA model path from environment variables.
+model_path = os.environ.get("LLAMA_MODEL_PATH")
+if model_path is None:
+    raise EnvironmentError(
+        "The environment variable 'LLAMA_MODEL_PATH' is not set or is invalid."
+    )
+
+# Initialize the tokenizer and model from the specified model path.
+tokenizer = LlamaTokenizer.from_pretrained(model_path)
+model = LlamaForCausalLM.from_pretrained(model_path, torchscript=True)
+model.config.use_cache = False
+
+# Initialize Dynamo Compiler with specific configurations as an importer.
+dynamo_compiler = DynamoCompiler(
+    primary_registry=tosa.ops_registry,
+    aot_autograd_decomposition=inductor_decomp,
+)
+
+# Import the model into MLIR module and parameters.
+with torch.no_grad():
+    data = torch.tensor([[1 for i in range(40)]], dtype=torch.int64)
+    graphs = dynamo_compiler.importer(model, data)
+
+assert len(graphs) == 1
+graph = graphs[0]
+params = dynamo_compiler.imported_params[graph]
+pattern_list = [simply_fuse]
+graphs[0].fuse_ops(pattern_list)
+driver = GraphDriver(graphs[0])
+driver.subgraphs[0].lower_to_top_level_ir()
+path_prefix = os.path.dirname(os.path.abspath(__file__))
+with open(os.path.join(path_prefix, "subgraph0.mlir"), "w") as module_file:
+    print(driver.subgraphs[0]._imported_module, file=module_file)
+with open(os.path.join(path_prefix, "forward.mlir"), "w") as module_file:
+    print(driver.construct_main_graph(True), file=module_file)
+all_param = numpy.concatenate(
+    [param.detach().numpy().reshape([-1]) for param in params]
+)
+all_param.tofile(os.path.join(path_prefix, "arg0.data"))