Skip to content

Commit

Permalink
Llama2 model Operator/Layer level instance extraction
Browse files Browse the repository at this point in the history
  • Loading branch information
hayden-brown committed Aug 1, 2024
1 parent 01f83f5 commit 3033e0f
Show file tree
Hide file tree
Showing 13 changed files with 79 additions and 16 deletions.
5 changes: 5 additions & 0 deletions examples/BuddyNext/next-fc.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,11 @@ func.func @kernel_fc_layer(%arg0 : tensor<1x40x4096xf32>, %arg1 : tensor<4096x40

%tensor_unranked = tensor.cast %51 : tensor<1x40x4096xf32> to tensor<*xf32>

// CHECK: Unranked Memref base@ = {{.*}} rank = 3 offset = 0 sizes = [1, 40, 4096] strides = [163840, 4096, 1] data =
// CHECK-NEXT: [
// CHECK-SAME: [
// CHECK-SAME: [49152{{(, 49152)*}}],

call @printMemrefF32(%tensor_unranked) : (tensor<*xf32>) -> ()
vector.print %time : f64

Expand Down
18 changes: 12 additions & 6 deletions examples/BuddyNext/next-ffn.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -77,22 +77,28 @@ func.func @kernel_ffn(%arg0: tensor<1x40x4096xf32>, %arg9: tensor<4096xf32>, %ar

%tensor_unranked = tensor.cast %158 : tensor<1x40x4096xf32> to tensor<*xf32>

// CHECK: Unranked Memref base@ = {{.*}} rank = 3 offset = 0 sizes = [1, 40, 4096] strides = [163840, 4096, 1] data =
// CHECK-NEXT: [
// CHECK-SAME: [
// CHECK-SAME: [461655{{(, 461655)*}}],

call @printMemrefF32(%tensor_unranked) : (tensor<*xf32>) -> ()
vector.print %time : f64

return
}

func.func @main() {
%input_tensor = arith.constant dense<3.0> : tensor<1x40x4096xf32>
%weight1 = arith.constant dense<1.0> : tensor<4096xf32>
%weight2 = arith.constant dense<1.0> : tensor<11008x4096xf32>
%weight3 = arith.constant dense<2.0> : tensor<11008x4096xf32>
%weight4 = arith.constant dense<1.0> : tensor<4096x11008xf32>
%input_tensor = arith.constant dense<0.5> : tensor<1x40x4096xf32>
%weight1 = arith.constant dense<0.1> : tensor<4096xf32>
%weight2 = arith.constant dense<0.1> : tensor<11008x4096xf32>
%weight3 = arith.constant dense<0.1> : tensor<11008x4096xf32>
%weight4 = arith.constant dense<0.1> : tensor<4096x11008xf32>

// Print timings.
call @kernel_ffn(%input_tensor, %weight1, %weight2, %weight3, %weight4) : (tensor<1x40x4096xf32>, tensor<4096xf32>, tensor<11008x4096xf32>, tensor<11008x4096xf32>, tensor<4096x11008xf32>) -> ()

return
}

func.func private @printMemrefF32(%ptr : tensor<*xf32>)
func.func private @printMemrefF32(%ptr : tensor<*xf32>)
5 changes: 5 additions & 0 deletions examples/BuddyNext/next-fpowi.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,11 @@ module {

%tensor_unranked = tensor.cast %result : tensor<1x32x40x64xf32> to tensor<*xf32>

// CHECK: Unranked Memref base@ = {{.*}} rank = 4 offset = 0 sizes = [1, 32, 40, 64] strides = [81920, 2560, 64, 1] data =
// CHECK-NEXT: [
// CHECK-SAME: [
// CHECK-SAME: [25{{(, 25)*}}],

call @printMemrefF32(%tensor_unranked) : (tensor<*xf32>) -> ()
vector.print %time : f64

Expand Down
4 changes: 4 additions & 0 deletions examples/BuddyNext/next-matmul.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,10 @@ func.func @kernel_matmul(%arg0 : tensor<40x4096xf32>, %arg1 : tensor<4096x4096xf

%tensor_unranked = tensor.cast %matmul_result : tensor<40x4096xf32> to tensor<*xf32>

// CHECK: Unranked Memref base@ = {{.*}} rank = 2 offset = 0 sizes = [40, 4096] strides = [4096, 1] data =
// CHECK-NEXT: [
// CHECK-SAME: [24576{{(, 24576)*}}]

call @printMemrefF32(%tensor_unranked) : (tensor<*xf32>) -> ()
vector.print %time : f64

Expand Down
4 changes: 4 additions & 0 deletions examples/BuddyNext/next-mul.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ module {

%tensor_unranked = tensor.cast %mul_result : tensor<1x40x1xf32> to tensor<*xf32>

// CHECK: Unranked Memref base@ = {{.*}} rank = 3 offset = 0 sizes = [1, 40, 1] strides = [40, 1, 1] data =
// CHECK-NEXT: [
// CHECK-SAME: [6{{(, 6)*}}]

call @printMemrefF32(%tensor_unranked) : (tensor<*xf32>) -> ()
vector.print %time : f64

Expand Down
7 changes: 6 additions & 1 deletion examples/BuddyNext/next-negate.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
// RUN: | mlir-cpu-runner -e main -entry-point-result=void \
// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext \
// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext \
// RUN: | FileCheck %s
// RUN: | FileCheck %s

module {
func.func private @rtclock() -> f64
Expand All @@ -46,6 +46,11 @@ module {

%tensor_unranked = tensor.cast %negated : tensor<1x32x40x64xf32> to tensor<*xf32>

// CHECK: Unranked Memref base@ = {{.*}} rank = 4 offset = 0 sizes = [1, 32, 40, 64] strides = [81920, 2560, 64, 1] data =
// CHECK-NEXT: [
// CHECK-SAME: [
// CHECK-SAME: [-1{{(, -1)*}}],

call @printMemrefF32(%tensor_unranked) : (tensor<*xf32>) -> ()
vector.print %time : f64

Expand Down
6 changes: 5 additions & 1 deletion examples/BuddyNext/next-reciprocal.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -46,14 +46,18 @@ module {

%tensor_unranked = tensor.cast %result : tensor<1x10xf32> to tensor<*xf32>

// CHECK: Unranked Memref base@ = {{.*}} rank = 2 offset = 0 sizes = [1, 10] strides = [10, 1] data =
// CHECK-NEXT: [
// CHECK-SAME: [0.5{{(, 0.5)*}}]

call @printMemrefF32(%tensor_unranked) : (tensor<*xf32>) -> ()
vector.print %time : f64

return
}

func.func @main() {
%input_tensor = "tosa.const"() {value = dense<[[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0]]> : tensor<1x10xf32>} : () -> tensor<1x10xf32>
%input_tensor = "tosa.const"() {value = dense<2.0> : tensor<1x10xf32>} : () -> tensor<1x10xf32>

call @kernel_reciprocal(%input_tensor) : (tensor<1x10xf32>) -> ()

Expand Down
5 changes: 5 additions & 0 deletions examples/BuddyNext/next-reducesum.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,11 @@ module {

%tensor_unranked = tensor.cast %result : tensor<1x40x1xf32> to tensor<*xf32>

// CHECK: Unranked Memref base@ = {{.*}} rank = 3 offset = 0 sizes = [1, 40, 1] strides = [40, 1, 1] data =
// CHECK-NEXT: [
// CHECK-SAME: [
// CHECK-SAME: [4096{{(, 4096)*}}],

call @printMemrefF32(%tensor_unranked) : (tensor<*xf32>) -> ()
vector.print %time : f64

Expand Down
5 changes: 5 additions & 0 deletions examples/BuddyNext/next-rmsnorm.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,11 @@ func.func @kernel_rmsnorm(%arg0: tensor<1x40x4096xf32>) {

%tensor_unranked = tensor.cast %39 : tensor<1x40x4096xf32> to tensor<*xf32>

// CHECK: Unranked Memref base@ = {{.*}} rank = 3 offset = 0 sizes = [1, 40, 4096] strides = [163840, 4096, 1] data =
// CHECK-NEXT: [
// CHECK-SAME: [
// CHECK-SAME: [0.999999{{(, 0.999999)*}}],

call @printMemrefF32(%tensor_unranked) : (tensor<*xf32>) -> ()
vector.print %time : f64

Expand Down
5 changes: 5 additions & 0 deletions examples/BuddyNext/next-rsqrt.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,11 @@ func.func @kernel_rsqrt(%arg0 : tensor<1x40x1xf32>) {

%tensor_unranked = tensor.cast %rsqrt_result : tensor<1x40x1xf32> to tensor<*xf32>

// CHECK: Unranked Memref base@ = {{.*}} rank = 3 offset = 0 sizes = [1, 40, 1] strides = [40, 1, 1] data =
// CHECK-NEXT: [
// CHECK-SAME: [
// CHECK-SAME: [0.57735{{(, 0.57735)*}}],

call @printMemrefF32(%tensor_unranked) : (tensor<*xf32>) -> ()
vector.print %time : f64

Expand Down
21 changes: 13 additions & 8 deletions examples/BuddyNext/next-selfattention.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -200,22 +200,27 @@ func.func @kernel_self_attention(%arg0 : tensor<1x1x4096xf32>, %arg1 : tensor<1x

%tensor_unranked = tensor.cast %127 : tensor<1x40x4096xf32> to tensor<*xf32>

// CHECK: Unranked Memref base@ = {{.*}} rank = 3 offset = 0 sizes = [1, 40, 4096] strides = [163840, 4096, 1] data =
// CHECK-NEXT: [
// CHECK-SAME: [
// CHECK-SAME: [83883.8{{(, 83883.8)*}}],

call @printMemrefF32(%tensor_unranked) : (tensor<*xf32>) -> ()
vector.print %time : f64

return
}

func.func @main() {
%input_tensor_0 = arith.constant dense<3.0> : tensor<1x1x4096xf32>
%input_tensor_1 = arith.constant dense<1.0> : tensor<1x40x4096xf32>
%input_tensor_2 = arith.constant dense<2> : tensor<40xi64>
%input_tensor_3 = arith.constant dense<1.0> : tensor<4096x4096xf32>
%input_tensor_4 = arith.constant dense<1.0> : tensor<4096x4096xf32>
%input_tensor_5 = arith.constant dense<1.0> : tensor<4096x4096xf32>
%input_tensor_0 = arith.constant dense<1.0> : tensor<1x1x4096xf32>
%input_tensor_1 = arith.constant dense<0.1> : tensor<1x40x4096xf32>
%input_tensor_2 = arith.constant dense<1> : tensor<40xi64>
%input_tensor_3 = arith.constant dense<0.5> : tensor<4096x4096xf32>
%input_tensor_4 = arith.constant dense<0.1> : tensor<4096x4096xf32>
%input_tensor_5 = arith.constant dense<0.1> : tensor<4096x4096xf32>
%input_tensor_6 = arith.constant dense<1.0> : tensor<1x1x2048x128xf32>
%input_tensor_7 = arith.constant dense<1.0> : tensor<1x1x2048x128xf32>
%input_tensor_8 = arith.constant dense<2.0> : tensor<4096x4096xf32>
%input_tensor_7 = arith.constant dense<0.1> : tensor<1x1x2048x128xf32>
%input_tensor_8 = arith.constant dense<0.5> : tensor<4096x4096xf32>
%input_tensor_9 = arith.constant dense<0.0> : tensor<1x1x40x40xf32>

call @kernel_self_attention(%input_tensor_0, %input_tensor_1, %input_tensor_2, %input_tensor_3, %input_tensor_4, %input_tensor_5, %input_tensor_6, %input_tensor_7, %input_tensor_8, %input_tensor_9) : (tensor<1x1x4096xf32>, tensor<1x40x4096xf32>, tensor<40xi64>, tensor<4096x4096xf32>, tensor<4096x4096xf32>, tensor<4096x4096xf32>, tensor<1x1x2048x128xf32>, tensor<1x1x2048x128xf32>, tensor<4096x4096xf32>, tensor<1x1x40x40xf32>) -> ()
Expand Down
5 changes: 5 additions & 0 deletions examples/BuddyNext/next-softmax.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,11 @@ func.func @kernel_softmax(%arg0 : tensor<1x32x40x40xf32>, %arg1 : tensor<1x1x40x

%tensor_unranked = tensor.cast %109 : tensor<1x32x40x40xf32> to tensor<*xf32>

// CHECK: Unranked Memref base@ = {{.*}} rank = 4 offset = 0 sizes = [1, 32, 40, 40] strides = [51200, 1600, 40, 1] data =
// CHECK-NEXT: [
// CHECK-SAME: [
// CHECK-SAME: [0.025{{(, 0.025)*}}],

call @printMemrefF32(%tensor_unranked) : (tensor<*xf32>) -> ()
vector.print %time : f64

Expand Down
5 changes: 5 additions & 0 deletions examples/BuddyNext/next-transpose.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,11 @@ module {

%tensor_unranked = tensor.cast %transposed : tensor<1x32x40x128xf32> to tensor<*xf32>

// CHECK: Unranked Memref base@ = {{.*}} rank = 4 offset = 0 sizes = [1, 32, 40, 128] strides = [163840, 5120, 128, 1] data =
// CHECK-NEXT: [
// CHECK-SAME: [
// CHECK-SAME: [1{{(, 1)*}}],

call @printMemrefF32(%tensor_unranked) : (tensor<*xf32>) -> ()
vector.print %time : f64

Expand Down

0 comments on commit 3033e0f

Please sign in to comment.