Merge branch 'main' into mem_reduction_stickified

Signed-off-by: Haruki Imai <imaihal@jp.ibm.com>
imaihal · Aug 26, 2024 · 5dff7cf · 5dff7cf
2 parents 82ada4e + ad885ed
commit 5dff7cf
Show file tree

Hide file tree

Showing 210 changed files with 6,085 additions and 2,674 deletions.
diff --git a/.buildbot/Jenkinsfile b/.buildbot/Jenkinsfile
@@ -26,6 +26,7 @@ def call() {
       skipDefaultCheckout()
       buildDiscarder(logRotator(numToKeepStr:'1000'))
       ansiColor('xterm')
+      timeout(time: 6, unit: 'HOURS')
     }
 
     agent {

diff --git a/docker/Dockerfile.onnx-mlir b/docker/Dockerfile.onnx-mlir
@@ -43,6 +43,7 @@ RUN LLVM_PROJECT_ROOT=${WORK_DIR}/llvm-project \
     && CC=clang CXX=clang++ \
        cmake -DMLIR_DIR=${LLVM_PROJECT_ROOT}/build/lib/cmake/mlir \
              -DCMAKE_BUILD_TYPE=Release \
+             -DLLVM_ENABLE_ASSERTIONS=ON \
              -DCMAKE_INSTALL_MESSAGE=NEVER \
              -DONNX_MLIR_ACCELERATORS=${ACCEL} .. \
     && make -j${NPROC} \

diff --git a/docs/BuildOnLinuxOSX.md b/docs/BuildOnLinuxOSX.md
@@ -15,25 +15,38 @@ Firstly, install MLIR (as a part of LLVM-Project):
 ``` bash
 git clone -n https://github.com/llvm/llvm-project.git
 # Check out a specific branch that is known to work with ONNX-MLIR.
-cd llvm-project && git checkout 6461b921fd06b1c812f1172685b8b7edc0608af7 && cd ..
+cd llvm-project && git checkout 60a7d33106d3cd645d3100a8a935a1e3837f885d && cd ..
 ```
 
 [same-as-file]: <> (utils/build-mlir.sh)
 ``` bash
 mkdir llvm-project/build
 cd llvm-project/build
+
 cmake -G Ninja ../llvm \
-   -DLLVM_ENABLE_PROJECTS=mlir \
+   -DLLVM_ENABLE_PROJECTS="mlir;clang;openmp" \
    -DLLVM_TARGETS_TO_BUILD="host" \
    -DCMAKE_BUILD_TYPE=Release \
    -DLLVM_ENABLE_ASSERTIONS=ON \
    -DLLVM_ENABLE_RTTI=ON \
+   -DENABLE_LIBOMPTARGET=OFF \
    -DLLVM_ENABLE_LIBEDIT=OFF
 
 cmake --build . -- ${MAKEFLAGS}
 cmake --build . --target check-mlir
 ```
 
+To enable parallelization for onnx-mlir, llvm-project should be configured as
+```
+cmake -G Ninja ../llvm \
+   -DLLVM_ENABLE_PROJECTS=mlir \
+   -DLLVM_TARGETS_TO_BUILD="host" \
+   -DCMAKE_BUILD_TYPE=Release \
+   -DLLVM_ENABLE_ASSERTIONS=ON \
+   -DLLVM_ENABLE_RTTI=ON \
+   -DLLVM_ENABLE_LIBEDIT=OFF
+```
+
 ## ONNX-MLIR (this project)
 
 ### Build
@@ -54,11 +67,15 @@ mkdir onnx-mlir/build && cd onnx-mlir/build
 if [[ -z "$pythonLocation" ]]; then
   cmake -G Ninja \
         -DCMAKE_CXX_COMPILER=/usr/bin/c++ \
+        -DCMAKE_BUILD_TYPE=Release \
+        -DLLVM_ENABLE_ASSERTIONS=ON \
         -DMLIR_DIR=${MLIR_DIR} \
         ..
 else
   cmake -G Ninja \
         -DCMAKE_CXX_COMPILER=/usr/bin/c++ \
+        -DCMAKE_BUILD_TYPE=Release \
+        -DLLVM_ENABLE_ASSERTIONS=ON \
         -DPython3_ROOT_DIR=$pythonLocation \
         -DMLIR_DIR=${MLIR_DIR} \
         ..

diff --git a/docs/BuildOnWindows.md b/docs/BuildOnWindows.md
@@ -52,7 +52,7 @@ Install MLIR (as a part of LLVM-Project):
 ```shell
 git clone -n https://github.com/llvm/llvm-project.git
 # Check out a specific branch that is known to work with ONNX-MLIR.
-cd llvm-project && git checkout 6461b921fd06b1c812f1172685b8b7edc0608af7 && cd ..
+cd llvm-project && git checkout 60a7d33106d3cd645d3100a8a935a1e3837f885d && cd ..
 ```
 
 [same-as-file]: <> (utils/build-mlir.cmd)
@@ -62,13 +62,14 @@ md llvm-project\build
 cd llvm-project\build
 call cmake %root_dir%\llvm-project\llvm -G "Ninja" ^
    -DCMAKE_INSTALL_PREFIX="%root_dir%\llvm-project\build\install" ^
-   -DLLVM_ENABLE_PROJECTS=mlir ^
+   -DLLVM_ENABLE_PROJECTS="mlir;clang;openmp" ^
    -DLLVM_TARGETS_TO_BUILD="host" ^
    -DCMAKE_BUILD_TYPE=Release ^
    -DLLVM_ENABLE_ASSERTIONS=ON ^
    -DLLVM_ENABLE_RTTI=ON ^
    -DLLVM_ENABLE_ZLIB=OFF ^
    -DLLVM_INSTALL_UTILS=ON ^
+   -DENABLE_LIBOMPTARGET=OFF ^
    -DLLVM_ENABLE_LIBEDIT=OFF
 
 call cmake --build . --config Release

diff --git a/src/Accelerators/Accelerator.hpp b/src/Accelerators/Accelerator.hpp
@@ -4,15 +4,16 @@
 
 //===-------------------------- Accelerator.hpp ---------------------------===//
 //
-// Copyright 2022 The IBM Research Authors.
+// Copyright 2022-2024 The IBM Research Authors.
 //
 // =============================================================================
 //
 // Accelerator base class
 //
 //===----------------------------------------------------------------------===//
 
-#pragma once
+#ifndef ONNX_MLIR_ACCELERATOR_H
+#define ONNX_MLIR_ACCELERATOR_H
 
 #include "mlir/Conversion/LLVMCommon/TypeConverter.h"
 #include "mlir/IR/BuiltinOps.h"
@@ -32,14 +33,14 @@
 #define CREATE_ACCEL_ENUM(name) name,
 #define DECLARE_ACCEL_INIT_FUNCTION(name) extern Accelerator *create##name();
 #define INVOKE_ACCEL_INIT_FUNCTION(name, kinds)                                \
-  if (!kinds.empty() &&                                                        \
+  if (!(kinds).empty() &&                                                      \
       llvm::is_contained(kinds, accel::Accelerator::Kind::name))               \
     create##name()->setName(#name);
 #define CREATE_ACCEL_CL_ENUM(name)                                             \
   clEnumValN(accel::Accelerator::Kind::name, #name, #name " accelerator"),
 #define ACCEL_CL_ENUM_FROM_STRING(name, var, str)                              \
-  if (str.compare(std::string(#name)) == 0) {                                  \
-    var = accel::Accelerator::Kind::name;                                      \
+  if ((str).compare(std::string(#name)) == 0) {                                \
+    (var) = accel::Accelerator::Kind::name;                                    \
     return true;                                                               \
   }
 #define ACCEL_CL_ENUM_TO_STRING(name, map)                                     \
@@ -164,3 +165,4 @@ extern void initAccelerators(llvm::ArrayRef<Accelerator::Kind> kinds);
 
 } // namespace accel
 } // namespace onnx_mlir
+#endif
diff --git a/src/Accelerators/NNPA/Compiler/NNPACompilerOptions.cpp b/src/Accelerators/NNPA/Compiler/NNPACompilerOptions.cpp
@@ -29,7 +29,8 @@ llvm::cl::opt<NNPAEmissionTargetType> nnpaEmissionTarget(
 llvm::cl::opt<bool> nnpaClipToDLFloatRange("nnpa-clip-to-dlfloat-range",
     llvm::cl::desc("Clip CPU tensors to dlfloat range before stickification to "
                    "avoid out-of-range. Only clip Softmax inputs at this "
-                   "moment. Default is true."),
+                   "moment. Default is true. This option will be removed and "
+                   "replaced by --nnpa-saturation in the future."),
     llvm::cl::init(true), llvm::cl::cat(OnnxMlirOptions));
 
 llvm::cl::opt<bool> nnpaEnableZHighToOnnx("enable-zhigh-to-onnx",
@@ -49,11 +50,13 @@ llvm::cl::opt<bool> nnpaEnableZHighDecomposeStickUnstick(
         "Default is false."),
     llvm::cl::init(false), llvm::cl::cat(OnnxMlirOptions));
 
+// Enabled default now, could also enable it only if parallel is on as parallel
+// stick/unstick is quite a bit faster than sequential.
 llvm::cl::opt<bool> nnpaEnableCompilerStickUnstick(
     "enable-compiler-stick-unstick",
     llvm::cl::desc("[Experimental feature] Enable the compiler generate some "
-                   "stick/unstick code. Default is false."),
-    llvm::cl::init(false), llvm::cl::cat(OnnxMlirOptions));
+                   "stick/unstick code. Default is true."),
+    llvm::cl::init(true), llvm::cl::cat(OnnxMlirCommonOptions));
 
 llvm::cl::opt<bool> nnpaEnableScalarBcastBinary(
     "nnpa-enable-scalar-bcast-binary",
@@ -93,6 +96,7 @@ llvm::cl::opt<NNPAPlacementHeuristic> nnpaPlacementHeuristic{
 
 llvm::cl::opt<bool> nnpaEnableSaturation("nnpa-saturation",
     llvm::cl::desc("Enable saturating f32 values before stickify them."
+                   "This option turns enable-compiler-stick-unstick on."
                    "Default is false."),
     llvm::cl::init(false), llvm::cl::cat(OnnxMlirCommonOptions));
 

diff --git a/src/Accelerators/NNPA/Compiler/NNPACompilerOptions.hpp b/src/Accelerators/NNPA/Compiler/NNPACompilerOptions.hpp
@@ -4,14 +4,15 @@
 
 //===------------------------ NNPACompilerOptions.hpp ---------------------===//
 //
-// Copyright 2022 The IBM Research Authors.
+// Copyright 2022-2024 The IBM Research Authors.
 //
 // =============================================================================
 //
 //
 //===----------------------------------------------------------------------===//
 
-#pragma once
+#ifndef ONNX_MLIR_NNPA_COMPILER_OPTIONS_H
+#define ONNX_MLIR_NNPA_COMPILER_OPTIONS_H
 
 #include "llvm/Support/CommandLine.h"
 
@@ -69,3 +70,4 @@ extern llvm::cl::opt<std::string> nnpaSaveDevicePlacementFile;
 extern llvm::cl::opt<bool> nnpaEnableSaturation;
 
 } // namespace onnx_mlir
+#endif
diff --git a/src/Accelerators/NNPA/Compiler/NNPACompilerUtils.cpp b/src/Accelerators/NNPA/Compiler/NNPACompilerUtils.cpp
@@ -48,6 +48,15 @@ namespace onnx_mlir {
 
 void configurePassesNNPA() {
   configureOnnxToZHighLoweringPass(optReport == OptReport::NNPAUnsupportedOps);
+  // Compiler generated sticks supports saturation, so force its usage.
+  // TODO: remove this if zDNN adds support for saturation.
+  if (nnpaEnableSaturation)
+    nnpaEnableCompilerStickUnstick = true;
+  // Currently nnpaEnableCompilerStickUnstick not supported on zOS.
+  // TODO enable on zOS
+  if (mtriple == "s390x-ibm-zos") {
+    nnpaEnableCompilerStickUnstick = false;
+  }
 }
 
 void addONNXToZHighPasses(mlir::PassManager &pm) {
@@ -94,7 +103,8 @@ void addONNXToZHighPasses(mlir::PassManager &pm) {
   // Clip zhigh.Stick inputs if required. This is to avoid out-of-range of
   // dlfloat. Do constant propagation after clipping to remove ONNX ops used for
   // clipping such as ONNXMax if applicable.
-  if (nnpaClipToDLFloatRange) {
+  // This pass will be removed and replaced by nnpa-saturation in the future.
+  if (!nnpaEnableSaturation && nnpaClipToDLFloatRange) {
     pm.addNestedPass<func::FuncOp>(
         onnx_mlir::zhigh::createZHighClipToDLFloatPass());
     pm.addNestedPass<func::FuncOp>(onnx_mlir::createConstPropONNXToONNXPass());
@@ -214,8 +224,8 @@ void addPassesNNPA(mlir::OwningOpRef<mlir::ModuleOp> &module,
       else if (optStr == "-O3")
         optLevel = OptLevel::O3;
       // Lower ONNX to Krnl, ZHigh to ZLow.
-      addONNXToKrnlPasses(pm, optLevel, /*enableCSE*/ true,
-          instrumentONNXSignature, ONNXOpStats);
+      addONNXToKrnlPasses(
+          pm, optLevel, /*enableCSE*/ true, instrumentSignatures, ONNXOpStats);
 
       if (nnpaEmissionTarget >= EmitZLowIR)
         emissionTarget = EmitMLIR;

diff --git a/src/Accelerators/NNPA/Compiler/NNPACompilerUtils.hpp b/src/Accelerators/NNPA/Compiler/NNPACompilerUtils.hpp
@@ -4,14 +4,15 @@
 
 //===------------------------- NNPACompilerUtils.hpp ----------------------===//
 //
-// Copyright 2022 The IBM Research Authors.
+// Copyright 2022-2024 The IBM Research Authors.
 //
 // =============================================================================
 //
 //
 //===----------------------------------------------------------------------===//
 
-#pragma once
+#ifndef ONNX_MLIR_NNPA_COMPILER_UTILS_H
+#define ONNX_MLIR_NNPA_COMPILER_UTILS_H
 
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/Pass/PassManager.h"
@@ -37,3 +38,4 @@ void addPassesNNPA(mlir::OwningOpRef<mlir::ModuleOp> &module,
 void configurePassesNNPA();
 
 } // namespace onnx_mlir
+#endif
diff --git a/src/Accelerators/NNPA/Conversion/ONNXToZHigh/DevicePlacementHeuristic.hpp b/src/Accelerators/NNPA/Conversion/ONNXToZHigh/DevicePlacementHeuristic.hpp
@@ -4,15 +4,16 @@
 
 //===-------- DevicePlacementHeuristic.hpp - Place ops using model  -------===//
 //
-// Copyright 2023 The IBM Research Authors.
+// Copyright 2023-2024 The IBM Research Authors.
 //
 // =============================================================================
 //
 // This file contains heuristics to place operations on CPU or NNPA.
 //
 //===----------------------------------------------------------------------===//
 
-#pragma once
+#ifndef ONNX_MLIR_HEURISTICS_H
+#define ONNX_MLIR_HEURISTICS_H
 
 #include "mlir/IR/BuiltinOps.h"
 
@@ -85,3 +86,4 @@ void PlaceBeneficialOpsOnNNPAWithStickUnstick(mlir::MLIRContext *context,
     double significantNNPAFactor = 3.0);
 
 } // namespace onnx_mlir
+#endif
diff --git a/src/Accelerators/NNPA/Conversion/ONNXToZHigh/ONNXLegalityCheck.hpp b/src/Accelerators/NNPA/Conversion/ONNXToZHigh/ONNXLegalityCheck.hpp
@@ -4,7 +4,7 @@
 
 //===---------- ONNXLegalityCheck.hpp - Check legality for ONNX ops -------===//
 //
-// Copyright 2019-2023 The IBM Research Authors.
+// Copyright 2019-2024 The IBM Research Authors.
 //
 // =============================================================================
 //
@@ -14,7 +14,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#pragma once
+#ifndef ONNX_MLIR_LEGALITY_H
+#define ONNX_MLIR_LEGALITY_H
 
 #include "src/Conversion/ONNXToKrnl/ONNXToKrnlCommon.hpp"
 #include "src/Dialect/ONNX/ONNXDimAnalysis.hpp"
@@ -53,3 +54,5 @@ bool onnxToZHighUnsupportedReport(
 
 bool onnxToZHighInCompatibilityReport(
     mlir::Operation *op, std::string inputNNPALevel);
+
+#endif
diff --git a/src/Accelerators/NNPA/Conversion/ONNXToZHigh/ONNXToZHigh.hpp b/src/Accelerators/NNPA/Conversion/ONNXToZHigh/ONNXToZHigh.hpp
@@ -4,7 +4,7 @@
 
 //====------ ONNXToZHigh.hpp - ONNX dialect to ZHigh lowering -------------===//
 //
-// Copyright 2019-2022 The IBM Research Authors.
+// Copyright 2019-2024 The IBM Research Authors.
 //
 // =============================================================================
 //
@@ -13,7 +13,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#pragma once
+#ifndef ONNX_MLIR_ONNX_TO_ZHIGH_H
+#define ONNX_MLIR_ONNX_TO_ZHIGH_H
 
 #include "mlir/IR/PatternMatch.h"
 #include "mlir/Transforms/DialectConversion.h"
@@ -30,3 +31,4 @@ void getONNXToZHighOneOpDynamicallyLegal(
     mlir::ConversionTarget *target, const DimAnalysis *dimAnalysis);
 
 } // namespace onnx_mlir
+#endif
diff --git a/src/Accelerators/NNPA/Conversion/ONNXToZHigh/ONNXToZHighCommon.hpp b/src/Accelerators/NNPA/Conversion/ONNXToZHigh/ONNXToZHighCommon.hpp
@@ -2,7 +2,8 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-//===---------- ONNXToZHigh.hpp - Common functions in ONNXToZHigh ---------===//
+//===---------- ONNXToZHighCommon.hpp - Common functions in ONNXToZHigh
+//---------===//
 //
 // Copyright 2019-2024 The IBM Research Authors.
 //
@@ -12,7 +13,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#pragma once
+#ifndef ONNX_MLIR_ZHIGH_COMMON_H
+#define ONNX_MLIR_ZHIGH_COMMON_H
 
 #include "llvm/ADT/STLExtras.h"
 
@@ -115,3 +117,4 @@ mlir::Value getDynShape(
     mlir::Location loc, mlir::PatternRewriter &rewriter, mlir::Value x);
 
 } // namespace onnx_mlir
+#endif
diff --git a/src/Accelerators/NNPA/Conversion/ONNXToZHigh/PerfModel.hpp b/src/Accelerators/NNPA/Conversion/ONNXToZHigh/PerfModel.hpp
@@ -4,7 +4,7 @@
 
 //===-------- PerfModel.hpp - Estimate if CPU or NNPA is faster  ----------===//
 //
-// Copyright 2023 The IBM Research Authors.
+// Copyright 2023-2024 The IBM Research Authors.
 //
 // =============================================================================
 //
@@ -13,7 +13,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#pragma once
+#ifndef ONNX_MLIR_PERF_H
+#define ONNX_MLIR_PERF_H
 
 #include "mlir/IR/BuiltinOps.h"
 
@@ -32,3 +33,4 @@ double estimateTimeForStickOp(mlir::Value oper);
 double estimateTimeForUnstickOp(mlir::Value oper);
 
 } // namespace onnx_mlir
+#endif