Skip to content

Commit

Permalink
Fix instruction order issue in scalarizer (microsoft#5001)
Browse files Browse the repository at this point in the history
When scalarize shuffle inst which has extract element as the input element,
create clone before the shuffle instead of after.

Because later when replace the shuffle, new vector will be inserted before the shuffle.
The elt define will be after its use if inserted before the shuffle earlier.
Like
%[[B:.+]] = load <2 x float>, <2 x float>* %b, align 4 %[[X:.+]] = insertelement <4 x float> undef, float %[[BX0]], i32 0
%[[Y:.+]] = insertelement <4 x float> %[[X]], float %[[BY0]], i32 1
%[[Z:.+]] = insertelement <4 x float> %[[Y]], float %[[BX1]], i32 2
%[[W:.+]] = insertelement <4 x float> %[[Z]], float %[[BY1]], i32 3 %[[BX0:.+]] = extractelement <2 x float> %[[B]], i32 0
%[[BY0:.+]] = extractelement <2 x float> %[[B]], i32 1
%[[BX1:.+]] = extractelement <2 x float> %[[B]], i32 0
%[[BY1:.+]] = extractelement <2 x float> %[[B]], i32 1
  • Loading branch information
python3kgae authored Feb 9, 2023
1 parent 40d5e1f commit b3dedc9
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 1 deletion.
2 changes: 1 addition & 1 deletion lib/Transforms/Scalar/Scalarizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -633,7 +633,7 @@ bool Scalarizer::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
// instruction is processed, it will be replaced without updating our
// Gather entry. This dead instruction will be accessed by finish(),
// causing assert or crash.
Res[I] = IRBuilder<>(SVI.getNextNode()).Insert(EA->clone());
Res[I] = IRBuilder<>(&SVI).Insert(EA->clone());
}
// HLSL Change Ends
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
; RUN: opt -S -scalarizer -dce %s | FileCheck %s

; CHECK: %[[B:.+]] = load <2 x float>, <2 x float>* %b, align 4
; CHECK: %[[BX0:.+]] = extractelement <2 x float> %[[B]], i32 0
; CHECK: %[[BY0:.+]] = extractelement <2 x float> %[[B]], i32 1
; CHECK: %[[BX1:.+]] = extractelement <2 x float> %[[B]], i32 0
; CHECK: %[[BY1:.+]] = extractelement <2 x float> %[[B]], i32 1

; CHECK: %[[X:.+]] = insertelement <4 x float> undef, float %[[BX0]], i32 0
; CHECK: %[[Y:.+]] = insertelement <4 x float> %[[X]], float %[[BY0]], i32 1
; CHECK: %[[Z:.+]] = insertelement <4 x float> %[[Y]], float %[[BX1]], i32 2
; CHECK: %[[W:.+]] = insertelement <4 x float> %[[Z]], float %[[BY1]], i32 3
; CHECK: ret <4 x float> %[[W]]

declare void @foo(<2 x float>, <2 x float>* dereferenceable(8))

; Function Attrs: noinline nounwind
define internal <4 x float> @bar(<3 x float> %v) #0 {
entry:
%0 = alloca <2 x float>
%b = alloca <2 x float>, align 4
store <2 x float> zeroinitializer, <2 x float>* %b, align 4
%1 = insertelement <3 x float> %v, float 1.000000e+00, i32 0
%2 = shufflevector <3 x float> %1, <3 x float> undef, <2 x i32> <i32 0, i32 1>
store <2 x float> %2, <2 x float>* %0
;call void @foo(<2 x float>* dereferenceable(8) %0, <2 x float>* dereferenceable(8) %b)
%3 = load <2 x float>, <2 x float>* %b, align 4
%4 = shufflevector <2 x float> %3, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
ret <4 x float> %4
}

attributes #0 = { noinline nounwind }

0 comments on commit b3dedc9

Please sign in to comment.