Llama2 model Operator/Layer level instance extraction

buddy-compiler · Aug 1, 2024 · 3033e0f · 3033e0f
1 parent 01f83f5
commit 3033e0f
Show file tree

Hide file tree

Showing 13 changed files with 79 additions and 16 deletions.
diff --git a/examples/BuddyNext/next-fc.mlir b/examples/BuddyNext/next-fc.mlir
@@ -58,6 +58,11 @@ func.func @kernel_fc_layer(%arg0 : tensor<1x40x4096xf32>, %arg1 : tensor<4096x40
 
 %tensor_unranked = tensor.cast %51 : tensor<1x40x4096xf32> to tensor<*xf32>
 
+  // CHECK: Unranked Memref base@ = {{.*}} rank = 3 offset = 0 sizes = [1, 40, 4096] strides = [163840, 4096, 1] data =
+  // CHECK-NEXT: [
+  // CHECK-SAME: [
+  // CHECK-SAME: [49152{{(, 49152)*}}],
+
 call @printMemrefF32(%tensor_unranked) : (tensor<*xf32>) -> ()
 vector.print %time : f64
 

diff --git a/examples/BuddyNext/next-ffn.mlir b/examples/BuddyNext/next-ffn.mlir
@@ -77,22 +77,28 @@ func.func @kernel_ffn(%arg0: tensor<1x40x4096xf32>, %arg9: tensor<4096xf32>, %ar
 
   %tensor_unranked = tensor.cast %158 : tensor<1x40x4096xf32> to tensor<*xf32>
 
+  // CHECK: Unranked Memref base@ = {{.*}} rank = 3 offset = 0 sizes = [1, 40, 4096] strides = [163840, 4096, 1] data =
+  // CHECK-NEXT: [
+  // CHECK-SAME: [
+  // CHECK-SAME: [461655{{(, 461655)*}}],
+
   call @printMemrefF32(%tensor_unranked) : (tensor<*xf32>) -> ()
   vector.print %time : f64
 
   return
 }
 
 func.func @main() {
-  %input_tensor = arith.constant dense<3.0> : tensor<1x40x4096xf32>
-  %weight1 = arith.constant dense<1.0> : tensor<4096xf32>
-  %weight2 = arith.constant dense<1.0> : tensor<11008x4096xf32>
-  %weight3 = arith.constant dense<2.0> : tensor<11008x4096xf32>
-  %weight4 = arith.constant dense<1.0> : tensor<4096x11008xf32>
+  %input_tensor = arith.constant dense<0.5> : tensor<1x40x4096xf32>
+  %weight1 = arith.constant dense<0.1> : tensor<4096xf32>
+  %weight2 = arith.constant dense<0.1> : tensor<11008x4096xf32>
+  %weight3 = arith.constant dense<0.1> : tensor<11008x4096xf32>
+  %weight4 = arith.constant dense<0.1> : tensor<4096x11008xf32>
 
+  // Print timings.
   call @kernel_ffn(%input_tensor, %weight1, %weight2, %weight3, %weight4) : (tensor<1x40x4096xf32>, tensor<4096xf32>, tensor<11008x4096xf32>, tensor<11008x4096xf32>, tensor<4096x11008xf32>) -> ()
 
   return
 }
 
-func.func private @printMemrefF32(%ptr : tensor<*xf32>)
+func.func private @printMemrefF32(%ptr : tensor<*xf32>)
diff --git a/examples/BuddyNext/next-fpowi.mlir b/examples/BuddyNext/next-fpowi.mlir
@@ -52,6 +52,11 @@ module {
 
     %tensor_unranked = tensor.cast %result : tensor<1x32x40x64xf32> to tensor<*xf32>
 
+    // CHECK: Unranked Memref base@ = {{.*}} rank = 4 offset = 0 sizes = [1, 32, 40, 64] strides = [81920, 2560, 64, 1] data =
+    // CHECK-NEXT: [
+    // CHECK-SAME: [
+    // CHECK-SAME: [25{{(, 25)*}}],
+
     call @printMemrefF32(%tensor_unranked) : (tensor<*xf32>) -> ()
     vector.print %time : f64
 

diff --git a/examples/BuddyNext/next-matmul.mlir b/examples/BuddyNext/next-matmul.mlir
@@ -44,6 +44,10 @@ func.func @kernel_matmul(%arg0 : tensor<40x4096xf32>, %arg1 : tensor<4096x4096xf
 
   %tensor_unranked = tensor.cast %matmul_result : tensor<40x4096xf32> to tensor<*xf32>
 
+  // CHECK: Unranked Memref base@ = {{.*}} rank = 2 offset = 0 sizes = [40, 4096] strides = [4096, 1] data =
+  // CHECK-NEXT: [
+  // CHECK-SAME: [24576{{(, 24576)*}}]
+
   call @printMemrefF32(%tensor_unranked) : (tensor<*xf32>) -> ()
   vector.print %time : f64
 

diff --git a/examples/BuddyNext/next-mul.mlir b/examples/BuddyNext/next-mul.mlir
@@ -46,6 +46,10 @@ module {
 
     %tensor_unranked = tensor.cast %mul_result : tensor<1x40x1xf32> to tensor<*xf32>
 
+    // CHECK: Unranked Memref base@ = {{.*}} rank = 3 offset = 0 sizes = [1, 40, 1] strides = [40, 1, 1] data =
+    // CHECK-NEXT: [
+    // CHECK-SAME: [6{{(, 6)*}}]
+
     call @printMemrefF32(%tensor_unranked) : (tensor<*xf32>) -> ()
     vector.print %time : f64
 

diff --git a/examples/BuddyNext/next-negate.mlir b/examples/BuddyNext/next-negate.mlir
@@ -30,7 +30,7 @@
 // RUN: | mlir-cpu-runner -e main -entry-point-result=void \
 // RUN:     -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext \
 // RUN:     -shared-libs=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext \
-// RUN: | FileCheck %s
+// RUN: | FileCheck %s  
 
 module {
   func.func private @rtclock() -> f64
@@ -46,6 +46,11 @@ module {
 
     %tensor_unranked = tensor.cast %negated : tensor<1x32x40x64xf32> to tensor<*xf32>
 
+    // CHECK: Unranked Memref base@ = {{.*}} rank = 4 offset = 0 sizes = [1, 32, 40, 64] strides = [81920, 2560, 64, 1] data =
+    // CHECK-NEXT: [
+    // CHECK-SAME: [
+    // CHECK-SAME: [-1{{(, -1)*}}],
+
     call @printMemrefF32(%tensor_unranked) : (tensor<*xf32>) -> ()
     vector.print %time : f64
 

diff --git a/examples/BuddyNext/next-reciprocal.mlir b/examples/BuddyNext/next-reciprocal.mlir
@@ -46,14 +46,18 @@ module {
 
     %tensor_unranked = tensor.cast %result : tensor<1x10xf32> to tensor<*xf32>
 
+    // CHECK: Unranked Memref base@ = {{.*}} rank = 2 offset = 0 sizes = [1, 10] strides = [10, 1] data = 
+    // CHECK-NEXT: [
+    // CHECK-SAME: [0.5{{(, 0.5)*}}]
+
     call @printMemrefF32(%tensor_unranked) : (tensor<*xf32>) -> ()
     vector.print %time : f64
 
     return
   }
 
   func.func @main() {
-    %input_tensor = "tosa.const"() {value = dense<[[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]]> : tensor<1x10xf32>} : () -> tensor<1x10xf32>
+    %input_tensor = "tosa.const"() {value = dense<2.0> : tensor<1x10xf32>} : () -> tensor<1x10xf32>
 
     call @kernel_reciprocal(%input_tensor) : (tensor<1x10xf32>) -> ()
 

diff --git a/examples/BuddyNext/next-reducesum.mlir b/examples/BuddyNext/next-reducesum.mlir
@@ -46,6 +46,11 @@ module {
 
     %tensor_unranked = tensor.cast %result : tensor<1x40x1xf32> to tensor<*xf32>
 
+    // CHECK: Unranked Memref base@ = {{.*}} rank = 3 offset = 0 sizes = [1, 40, 1] strides = [40, 1, 1] data =
+    // CHECK-NEXT: [
+    // CHECK-SAME: [
+    // CHECK-SAME: [4096{{(, 4096)*}}],
+
     call @printMemrefF32(%tensor_unranked) : (tensor<*xf32>) -> ()
     vector.print %time : f64
 

diff --git a/examples/BuddyNext/next-rmsnorm.mlir b/examples/BuddyNext/next-rmsnorm.mlir
@@ -68,6 +68,11 @@ func.func @kernel_rmsnorm(%arg0: tensor<1x40x4096xf32>) {
 
   %tensor_unranked = tensor.cast %39 : tensor<1x40x4096xf32> to tensor<*xf32>
 
+  // CHECK: Unranked Memref base@ = {{.*}} rank = 3 offset = 0 sizes = [1, 40, 4096] strides = [163840, 4096, 1] data =
+  // CHECK-NEXT: [
+  // CHECK-SAME: [
+  // CHECK-SAME: [0.999999{{(, 0.999999)*}}],
+
   call @printMemrefF32(%tensor_unranked) : (tensor<*xf32>) -> ()
   vector.print %time : f64
 

diff --git a/examples/BuddyNext/next-rsqrt.mlir b/examples/BuddyNext/next-rsqrt.mlir
@@ -45,6 +45,11 @@ func.func @kernel_rsqrt(%arg0 : tensor<1x40x1xf32>) {
 
   %tensor_unranked = tensor.cast %rsqrt_result : tensor<1x40x1xf32> to tensor<*xf32>
 
+  // CHECK: Unranked Memref base@ = {{.*}} rank = 3 offset = 0 sizes = [1, 40, 1] strides = [40, 1, 1] data =
+  // CHECK-NEXT: [
+  // CHECK-SAME: [
+  // CHECK-SAME: [0.57735{{(, 0.57735)*}}],
+
   call @printMemrefF32(%tensor_unranked) : (tensor<*xf32>) -> ()
   vector.print %time : f64
 

diff --git a/examples/BuddyNext/next-selfattention.mlir b/examples/BuddyNext/next-selfattention.mlir
@@ -200,22 +200,27 @@ func.func @kernel_self_attention(%arg0 : tensor<1x1x4096xf32>, %arg1 : tensor<1x
 
   %tensor_unranked = tensor.cast %127 : tensor<1x40x4096xf32> to tensor<*xf32>
 
+  // CHECK: Unranked Memref base@ = {{.*}} rank = 3 offset = 0 sizes = [1, 40, 4096] strides = [163840, 4096, 1] data =
+  // CHECK-NEXT: [
+  // CHECK-SAME: [
+  // CHECK-SAME: [83883.8{{(, 83883.8)*}}],
+
   call @printMemrefF32(%tensor_unranked) : (tensor<*xf32>) -> ()
   vector.print %time : f64
 
   return
 }
 
 func.func @main() {
-  %input_tensor_0 = arith.constant dense<3.0> : tensor<1x1x4096xf32>
-  %input_tensor_1 = arith.constant dense<1.0> : tensor<1x40x4096xf32>
-  %input_tensor_2 = arith.constant dense<2> : tensor<40xi64>
-  %input_tensor_3 = arith.constant dense<1.0> : tensor<4096x4096xf32>
-  %input_tensor_4 = arith.constant dense<1.0> : tensor<4096x4096xf32>
-  %input_tensor_5 = arith.constant dense<1.0> : tensor<4096x4096xf32>
+  %input_tensor_0 = arith.constant dense<1.0> : tensor<1x1x4096xf32>
+  %input_tensor_1 = arith.constant dense<0.1> : tensor<1x40x4096xf32>
+  %input_tensor_2 = arith.constant dense<1> : tensor<40xi64>
+  %input_tensor_3 = arith.constant dense<0.5> : tensor<4096x4096xf32>
+  %input_tensor_4 = arith.constant dense<0.1> : tensor<4096x4096xf32>
+  %input_tensor_5 = arith.constant dense<0.1> : tensor<4096x4096xf32>
   %input_tensor_6 = arith.constant dense<1.0> : tensor<1x1x2048x128xf32>
-  %input_tensor_7 = arith.constant dense<1.0> : tensor<1x1x2048x128xf32>
-  %input_tensor_8 = arith.constant dense<2.0> : tensor<4096x4096xf32>
+  %input_tensor_7 = arith.constant dense<0.1> : tensor<1x1x2048x128xf32>
+  %input_tensor_8 = arith.constant dense<0.5> : tensor<4096x4096xf32>
   %input_tensor_9 = arith.constant dense<0.0> : tensor<1x1x40x40xf32>
 
   call @kernel_self_attention(%input_tensor_0, %input_tensor_1, %input_tensor_2, %input_tensor_3, %input_tensor_4, %input_tensor_5, %input_tensor_6, %input_tensor_7, %input_tensor_8, %input_tensor_9) : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>, tensor<40xi64>, tensor<4096x4096xf32>, tensor<4096x4096xf32>, tensor<4096x4096xf32>, tensor<1x1x2048x128xf32>, tensor<1x1x2048x128xf32>, tensor<4096x4096xf32>, tensor<1x1x40x40xf32>) -> ()

diff --git a/examples/BuddyNext/next-softmax.mlir b/examples/BuddyNext/next-softmax.mlir
@@ -54,6 +54,11 @@ func.func @kernel_softmax(%arg0 : tensor<1x32x40x40xf32>, %arg1 : tensor<1x1x40x
 
   %tensor_unranked = tensor.cast %109 : tensor<1x32x40x40xf32> to tensor<*xf32>
 
+  // CHECK: Unranked Memref base@ = {{.*}} rank = 4 offset = 0 sizes = [1, 32, 40, 40] strides = [51200, 1600, 40, 1] data =
+  // CHECK-NEXT: [
+  // CHECK-SAME: [
+  // CHECK-SAME: [0.025{{(, 0.025)*}}],
+
   call @printMemrefF32(%tensor_unranked) : (tensor<*xf32>) -> ()
   vector.print %time : f64
 

diff --git a/examples/BuddyNext/next-transpose.mlir b/examples/BuddyNext/next-transpose.mlir
@@ -47,6 +47,11 @@ module {
 
     %tensor_unranked = tensor.cast %transposed : tensor<1x32x40x128xf32> to tensor<*xf32>
 
+    // CHECK: Unranked Memref base@ = {{.*}} rank = 4 offset = 0 sizes = [1, 32, 40, 128] strides = [163840, 5120, 128, 1] data =
+    // CHECK-NEXT: [
+    // CHECK-SAME: [
+    // CHECK-SAME: [1{{(, 1)*}}],
+
     call @printMemrefF32(%tensor_unranked) : (tensor<*xf32>) -> ()
     vector.print %time : f64