Merge branch 'main' into mem_reduction_stickified

imaihal · Aug 26, 2024 · 2f23ff8 · 2f23ff8
2 parents 9c5dd88 + b861652
commit 2f23ff8
Show file tree

Hide file tree

Showing 11 changed files with 72 additions and 32 deletions.
diff --git a/docker/Dockerfile.llvm-project b/docker/Dockerfile.llvm-project
@@ -47,11 +47,11 @@ RUN distro=$(cat /etc/os-release|grep -Po '(?<=^ID=").*(?=")|(?<=^ID=)[^"].*[^"]
               autoconf automake ca-certificates clang cmake diffutils \
               file java-11-openjdk-devel java-11-openjdk-headless \
               gcc gcc-c++ git libtool make ncurses-devel ninja-build \
-              python39 python39-devel python39-numpy python39-pip \
-              python39-setuptools python39-wheel tzdata-java zlib-devel && \
+              python39 python39-devel python39-pip python39-setuptools \
+              python39-wheel tzdata-java zlib-devel && \
           # Use same versions as those in ubuntu:jammy
           pip3 install -q \
-               Cython pytest==6.2.5 pytest-forked==1.4.0 \
+               Cython pytest==6.2.5 numpy==1.21.5 pytest-forked==1.4.0 \
                pytest-xdist==2.5.0 typing-extensions==3.10.0.2 && \
           rm -rf /var/cache/dnf/* && \
           echo -e "/usr/local/lib" > /etc/ld.so.conf.d/local.conf; \

diff --git a/docs/Dialects/onnx.md b/docs/Dialects/onnx.md
@@ -529,7 +529,7 @@ AveragePool consumes an input tensor X and applies average pooling across
  ```
  output_spatial_shape[i] = ceil((input_spatial_shape[i] + pad_shape[i] - dilation[i] * (kernel_shape[i] - 1) - 1) / strides_spatial_shape[i] + 1)
  ```
- if ceil_mode is enabled. `pad_shape[i]` is the sum of pads along axis `i`.
+ if ceil_mode is enabled. `pad_shape[i]` is the sum of pads along axis `i`. Sliding windows that would start in the right padded region are ignored.
 
  `auto_pad` is a DEPRECATED attribute. If you are using them currently, the output spatial shape will be following when ceil_mode is enabled:
  ```
@@ -4201,7 +4201,9 @@ This is layer normalization defined in ONNX as function.
       Let `d[i]` indicate the i-th dimension of `X`.
       If `X`'s shape is `[d[0], ..., d[axis-1], d[axis], ..., d[rank-1]]`,
       the shape of `Mean` and `InvStdDev` is `[d[0], ..., d[axis-1], 1, ..., 1]`.
-      `Y` and `X` have the same shape.
+      `Y` and `X` have the same shape. This operator supports unidirectional broadcasting
+      (tensors `Scale` and `B` should be unidirectional broadcastable to tensor `X`);
+      for more details please check [the doc](Broadcasting.md).
 
 Traits: `AlwaysSpeculatableImplTrait`
 
@@ -4851,7 +4853,7 @@ MaxPool consumes an input tensor X and applies max pooling across
  ```
  output_spatial_shape[i] = ceil((input_spatial_shape[i] + pad_shape[i] - dilation[i] * (kernel_shape[i] - 1) - 1) / strides_spatial_shape[i] + 1)
  ```
- if ceil_mode is enabled. `pad_shape[i]` is the sum of pads along axis `i`.
+ if ceil_mode is enabled. `pad_shape[i]` is the sum of pads along axis `i`. Sliding windows that would start in the right padded region are ignored.
 
  `auto_pad` is a DEPRECATED attribute. If you are using them currently, the output spatial shape will be following when ceil_mode is enabled:
  ```
@@ -10294,11 +10296,11 @@ Effects: `MemoryEffects::Effect{}`
 _ONNX TopK operation_
 
 Retrieve the top-K largest or smallest elements along a specified axis. Given an input tensor of
-shape [a_1, a_2, ..., a_n, r] and integer argument k, return two outputs:
+shape [a_0, a_1, ..., a_{n-1\}\] and integer argument k, return two outputs:
 
-* Value tensor of shape [a_1, a_2, ..., a_{axis-1}, k, a_{axis+1}, ... a_n]
+* Value tensor of shape [a_0, a_1, ..., a_{axis-1}, k, a_{axis+1}, ... a_{n-1\}\]
   which contains the values of the top k elements along the specified axis
-* Index tensor of shape [a_1, a_2, ..., a_{axis-1}, k, a_{axis+1}, ... a_n] which
+* Index tensor of shape [a_0, a_1, ..., a_{axis-1}, k, a_{axis+1}, ... a_{n-1\}\] which
   contains the indices of the top k elements (original indices from the input
   tensor).
 

diff --git a/src/Dialect/ONNX/ONNXOps.td.inc b/src/Dialect/ONNX/ONNXOps.td.inc
@@ -426,7 +426,7 @@ def ONNXAveragePoolOp:ONNX_Op<"AveragePool",
    ```
    output_spatial_shape[i] = ceil((input_spatial_shape[i] + pad_shape[i] - dilation[i] * (kernel_shape[i] - 1) - 1) / strides_spatial_shape[i] + 1)
    ```
-   if ceil_mode is enabled. `pad_shape[i]` is the sum of pads along axis `i`.
+   if ceil_mode is enabled. `pad_shape[i]` is the sum of pads along axis `i`. Sliding windows that would start in the right padded region are ignored.
 
    `auto_pad` is a DEPRECATED attribute. If you are using them currently, the output spatial shape will be following when ceil_mode is enabled:
    ```
@@ -3672,7 +3672,9 @@ def ONNXLayerNormalizationOp:ONNX_Op<"LayerNormalization",
         Let `d[i]` indicate the i-th dimension of `X`.
         If `X`'s shape is `[d[0], ..., d[axis-1], d[axis], ..., d[rank-1]]`,
         the shape of `Mean` and `InvStdDev` is `[d[0], ..., d[axis-1], 1, ..., 1]`.
-        `Y` and `X` have the same shape.
+        `Y` and `X` have the same shape. This operator supports unidirectional broadcasting
+        (tensors `Scale` and `B` should be unidirectional broadcastable to tensor `X`);
+        for more details please check [the doc](Broadcasting.md).
   }];
   let arguments = (ins AnyTypeOf<[TensorOf<[F16]>, TensorOf<[F32]>, TensorOf<[F64]>, TensorOf<[BF16]>]>:$X,
     AnyTypeOf<[TensorOf<[F16]>, TensorOf<[F32]>, TensorOf<[F64]>, TensorOf<[BF16]>]>:$Scale,
@@ -4293,7 +4295,7 @@ def ONNXMaxPoolOp:ONNX_Op<"MaxPool",
    ```
    output_spatial_shape[i] = ceil((input_spatial_shape[i] + pad_shape[i] - dilation[i] * (kernel_shape[i] - 1) - 1) / strides_spatial_shape[i] + 1)
    ```
-   if ceil_mode is enabled. `pad_shape[i]` is the sum of pads along axis `i`.
+   if ceil_mode is enabled. `pad_shape[i]` is the sum of pads along axis `i`. Sliding windows that would start in the right padded region are ignored.
 
    `auto_pad` is a DEPRECATED attribute. If you are using them currently, the output spatial shape will be following when ceil_mode is enabled:
    ```
@@ -9609,11 +9611,11 @@ def ONNXTopKOp:ONNX_Op<"TopK",
   let summary = "ONNX TopK operation";
   let description = [{
   Retrieve the top-K largest or smallest elements along a specified axis. Given an input tensor of
-  shape [a_1, a_2, ..., a_n, r] and integer argument k, return two outputs:
+  shape [a_0, a_1, ..., a_{n-1\}\] and integer argument k, return two outputs:
 
-  * Value tensor of shape [a_1, a_2, ..., a_{axis-1}, k, a_{axis+1}, ... a_n]
+  * Value tensor of shape [a_0, a_1, ..., a_{axis-1}, k, a_{axis+1}, ... a_{n-1\}\]
     which contains the values of the top k elements along the specified axis
-  * Index tensor of shape [a_1, a_2, ..., a_{axis-1}, k, a_{axis+1}, ... a_n] which
+  * Index tensor of shape [a_0, a_1, ..., a_{axis-1}, k, a_{axis+1}, ... a_{n-1\}\] which
     contains the indices of the top k elements (original indices from the input
     tensor).
 

diff --git a/src/Dialect/ONNX/ONNXOps/OpHelper.cpp b/src/Dialect/ONNX/ONNXOps/OpHelper.cpp
@@ -625,6 +625,8 @@ Type convertONNXTypeToMLIRType(
 
   case onnx::TensorProto_DataType::TensorProto_DataType_COMPLEX64:
   case onnx::TensorProto_DataType::TensorProto_DataType_COMPLEX128:
+  case onnx::TensorProto_DataType::TensorProto_DataType_INT4:
+  case onnx::TensorProto_DataType::TensorProto_DataType_UINT4:
   case onnx::TensorProto_DataType::TensorProto_DataType_UNDEFINED:
     llvm_unreachable("Unsupported data type encountered.");
     return nullptr;

diff --git a/test/backend/CMakeLists.txt b/test/backend/CMakeLists.txt
@@ -226,6 +226,7 @@ add_dependencies(check-onnx-backend-model onnx-mlir)
 add_dependencies(check-onnx-backend-model PyRuntimeC)
 add_dependencies(check-onnx-backend-signature onnx-mlir)
 add_dependencies(check-onnx-backend-signature PyRuntimeC)
+add_dependencies(check-onnx-backend-case PyRuntimeC)
 add_dependencies(check-onnx-backend-input-verification onnx-mlir)
 add_dependencies(check-onnx-backend-input-verification PyRuntimeC)
 add_dependencies(check-onnx-backend-compilerlib CompilerLibTest)

diff --git a/test/backend/all_test_names.txt b/test/backend/all_test_names.txt
@@ -1,5 +1,5 @@
 # This file is automatically generated by "make check-onnx-backend-case"
-# From onnx 1.15.0
+# From onnx 1.16.2
 # All test cases for cpu target
 test_bvlc_alexnet_cpu
 test_densenet121_cpu
@@ -36,6 +36,8 @@ test_ai_onnx_ml_label_encoder_string_int_cpu
 test_ai_onnx_ml_label_encoder_string_int_no_default_cpu
 test_ai_onnx_ml_label_encoder_tensor_mapping_cpu
 test_ai_onnx_ml_label_encoder_tensor_value_only_mapping_cpu
+test_ai_onnx_ml_tree_ensemble_set_membership_cpu
+test_ai_onnx_ml_tree_ensemble_single_tree_cpu
 test_and2d_cpu
 test_and3d_cpu
 test_and4d_cpu
@@ -153,6 +155,8 @@ test_cast_FLOAT16_to_FLOAT8E4M3FN_cpu
 test_cast_FLOAT16_to_FLOAT8E5M2FNUZ_cpu
 test_cast_FLOAT16_to_FLOAT8E5M2_cpu
 test_cast_FLOAT16_to_FLOAT_cpu
+test_cast_FLOAT16_to_INT4_cpu
+test_cast_FLOAT16_to_UINT4_cpu
 test_cast_FLOAT8E4M3FNUZ_to_FLOAT16_cpu
 test_cast_FLOAT8E4M3FNUZ_to_FLOAT_cpu
 test_cast_FLOAT8E4M3FN_to_FLOAT16_cpu
@@ -168,8 +172,16 @@ test_cast_FLOAT_to_FLOAT8E4M3FNUZ_cpu
 test_cast_FLOAT_to_FLOAT8E4M3FN_cpu
 test_cast_FLOAT_to_FLOAT8E5M2FNUZ_cpu
 test_cast_FLOAT_to_FLOAT8E5M2_cpu
+test_cast_FLOAT_to_INT4_cpu
 test_cast_FLOAT_to_STRING_cpu
+test_cast_FLOAT_to_UINT4_cpu
+test_cast_INT4_to_FLOAT16_cpu
+test_cast_INT4_to_FLOAT_cpu
+test_cast_INT4_to_INT8_cpu
 test_cast_STRING_to_FLOAT_cpu
+test_cast_UINT4_to_FLOAT16_cpu
+test_cast_UINT4_to_FLOAT_cpu
+test_cast_UINT4_to_UINT8_cpu
 test_cast_no_saturate_FLOAT16_to_FLOAT8E4M3FNUZ_cpu
 test_cast_no_saturate_FLOAT16_to_FLOAT8E4M3FN_cpu
 test_cast_no_saturate_FLOAT16_to_FLOAT8E5M2FNUZ_cpu
@@ -311,11 +323,16 @@ test_deform_conv_with_multiple_offset_groups_cpu
 test_depthtospace_crd_mode_example_cpu
 test_depthtospace_example_cpu
 test_dequantizelinear_axis_cpu
+test_dequantizelinear_blocked_cpu
 test_dequantizelinear_cpu
 test_dequantizelinear_e4m3fn_cpu
 test_dequantizelinear_e4m3fn_float16_cpu
 test_dequantizelinear_e4m3fn_zero_point_cpu
 test_dequantizelinear_e5m2_cpu
+test_dequantizelinear_int16_cpu
+test_dequantizelinear_int4_cpu
+test_dequantizelinear_uint16_cpu
+test_dequantizelinear_uint4_cpu
 test_det_2d_cpu
 test_det_nd_cpu
 test_dft_axis_cpu
@@ -615,6 +632,7 @@ test_max_uint64_cpu
 test_max_uint8_cpu
 test_maxpool_1d_default_cpu
 test_maxpool_2d_ceil_cpu
+test_maxpool_2d_ceil_output_size_reduce_by_one_cpu
 test_maxpool_2d_default_cpu
 test_maxpool_2d_dilations_cpu
 test_maxpool_2d_pads_cpu
@@ -769,12 +787,24 @@ test_prelu_broadcast_expanded_cpu
 test_prelu_example_cpu
 test_prelu_example_expanded_cpu
 test_qlinearconv_cpu
-test_qlinearmatmul_2D_cpu
-test_qlinearmatmul_3D_cpu
+test_qlinearmatmul_2D_int8_float16_cpu
+test_qlinearmatmul_2D_int8_float32_cpu
+test_qlinearmatmul_2D_uint8_float16_cpu
+test_qlinearmatmul_2D_uint8_float32_cpu
+test_qlinearmatmul_3D_int8_float16_cpu
+test_qlinearmatmul_3D_int8_float32_cpu
+test_qlinearmatmul_3D_uint8_float16_cpu
+test_qlinearmatmul_3D_uint8_float32_cpu
 test_quantizelinear_axis_cpu
+test_quantizelinear_blocked_asymmetric_cpu
+test_quantizelinear_blocked_symmetric_cpu
 test_quantizelinear_cpu
 test_quantizelinear_e4m3fn_cpu
 test_quantizelinear_e5m2_cpu
+test_quantizelinear_int16_cpu
+test_quantizelinear_int4_cpu
+test_quantizelinear_uint16_cpu
+test_quantizelinear_uint4_cpu
 test_range_float_type_positive_delta_cpu
 test_range_float_type_positive_delta_expanded_cpu
 test_range_int32_type_negative_delta_cpu

diff --git a/test/backend/inference_backend.py b/test/backend/inference_backend.py
@@ -6,6 +6,7 @@
 #
 ################################################################################
 from __future__ import absolute_import
+from __future__ import annotations
 from __future__ import division
 from __future__ import print_function
 from __future__ import unicode_literals
@@ -1257,21 +1258,21 @@ def get_test_models():
         },
         # ==OP== GroupNormalization
         # ==MIN== 18
-        "test_group_normalization_epsilon_cpu": {
-            STATIC_SHAPE: {},
-            DYNAMIC_SHAPE: {-1: {-1}},
-            CONSTANT_INPUT: {-1},
-        },
+        # "test_group_normalization_epsilon_cpu": {
+        #    STATIC_SHAPE: {},
+        #    DYNAMIC_SHAPE: {-1: {-1}},
+        #    CONSTANT_INPUT: {-1},
+        # },
         "test_group_normalization_epsilon_expanded_cpu": {
             STATIC_SHAPE: {},
             DYNAMIC_SHAPE: {-1: {-1}},
             CONSTANT_INPUT: {-1},
         },
-        "test_group_normalization_example_cpu": {
-            STATIC_SHAPE: {},
-            DYNAMIC_SHAPE: {-1: {-1}},
-            CONSTANT_INPUT: {-1},
-        },
+        # "test_group_normalization_example_cpu": {
+        #    STATIC_SHAPE: {},
+        #    DYNAMIC_SHAPE: {-1: {-1}},
+        #    CONSTANT_INPUT: {-1},
+        # },
         "test_group_normalization_example_expanded_cpu": {
             STATIC_SHAPE: {},
             DYNAMIC_SHAPE: {-1: {-1}},
@@ -3619,11 +3620,12 @@ def assert_similar_outputs(
         outputs: Sequence[Any],
         rtol: float,
         atol: float,
+        model_dir: str | None = None,
     ) -> None:
         rtol = float(os.getenv("TEST_RTOL", rtol))
         atol = float(os.getenv("TEST_ATOL", atol))
         super(InferenceBackendTest, cls).assert_similar_outputs(
-            ref_outputs, outputs, rtol, atol
+            ref_outputs, outputs, rtol, atol, model_dir
         )
 
     def _add_onnxmlir_model_test(

diff --git a/test/multiple-models/CMakeLists.txt b/test/multiple-models/CMakeLists.txt
@@ -30,3 +30,4 @@ add_custom_target(check-multiple-models
 
 add_dependencies(check-onnx-backend onnx-mlir)
 add_dependencies(check-multiple-models PyRuntimeC)
+add_dependencies(check-multiple-models PyCompileAndRuntimeC)
diff --git a/third_party/onnx b/third_party/onnx
diff --git a/utils/gen_onnx_mlir.py b/utils/gen_onnx_mlir.py
@@ -66,7 +66,7 @@
 
 # ==UPDATE_ONNX_VERSION_OPSET==
 # Look for tag above and update all references when upgrading the ONNX support within ONNX-MLIR.
-current_onnx_version = "1.15.0"
+current_onnx_version = "1.16.2"
 
 # Check the version of onnx package being used.
 if (

diff --git a/utils/pre-onnx-mlir.py b/utils/pre-onnx-mlir.py
@@ -39,7 +39,7 @@
 
 # ==UPDATE_ONNX_VERSION_OPSET==
 # Look for tag above and update all references when upgrading the ONNX support within ONNX-MLIR.
-current_onnx_opset = 19
+current_onnx_opset = 21
 
 converted_model = version_converter.convert_version(original_model, current_onnx_opset)