Skip to content

Commit

Permalink
[SelectionDAG] Use unaligned store/load to move AVX registers onto st…
Browse files Browse the repository at this point in the history
…ack for `insertelement` (llvm#82130)

Prior to this patch, SelectionDAG generated aligned move onto stacks for
AVX registers when the function was marked as a no-realign-stack
function. This lead to misalignment between the stack and the
instruction generated. This patch fixes the issue. There was a similar
issue reported for `extractelement` which was fixed in
a6614ec

Co-authored-by: Manish Kausik H <hmamishkausik@gmail.com>
  • Loading branch information
2 people authored and bwendling committed Aug 15, 2024
1 parent fc1eddb commit 7a31e39
Show file tree
Hide file tree
Showing 2 changed files with 135 additions and 4 deletions.
19 changes: 15 additions & 4 deletions llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1474,11 +1474,17 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);

// First store the whole vector.
SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo);
Align BaseVecAlignment =
DAG.getMachineFunction().getFrameInfo().getObjectAlign(FI);
SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo,
BaseVecAlignment);

// Freeze the index so we don't poison the clamping code we're about to emit.
Idx = DAG.getFreeze(Idx);

Type *PartTy = PartVT.getTypeForEVT(*DAG.getContext());
Align PartAlignment = DAG.getDataLayout().getPrefTypeAlign(PartTy);

// Then store the inserted part.
if (PartVT.isVector()) {
SDValue SubStackPtr =
Expand All @@ -1487,7 +1493,8 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
// Store the subvector.
Ch = DAG.getStore(
Ch, dl, Part, SubStackPtr,
MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()),
PartAlignment);
} else {
SDValue SubStackPtr =
TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
Expand All @@ -1496,11 +1503,15 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
Ch = DAG.getTruncStore(
Ch, dl, Part, SubStackPtr,
MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()),
VecVT.getVectorElementType());
VecVT.getVectorElementType(), PartAlignment);
}

assert(cast<StoreSDNode>(Ch)->getAlign() == PartAlignment &&
"ElementAlignment does not match!");

// Finally, load the updated vector.
return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, PtrInfo);
return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, PtrInfo,
BaseVecAlignment);
}

SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx | FileCheck %s

define <8 x i32> @foo(<8 x i32> %arg1, i32 %n) #0 {
; CHECK-LABEL: foo:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
; CHECK-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: andl $7, %edi
; CHECK-NEXT: movl $42, -40(%rsp,%rdi,4)
; CHECK-NEXT: vmovups -{{[0-9]+}}(%rsp), %ymm0
; CHECK-NEXT: retq
entry:
%a = insertelement <8 x i32> %arg1, i32 42, i32 %n
ret <8 x i32> %a
}

define <8 x i32> @foo2(<8 x i32> %arg1, i32 %n) alignstack(8) #0 {
; CHECK-LABEL: foo2:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
; CHECK-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: andl $7, %edi
; CHECK-NEXT: movl $42, -32(%rsp,%rdi,4)
; CHECK-NEXT: vmovups -{{[0-9]+}}(%rsp), %ymm0
; CHECK-NEXT: retq
entry:
%a = insertelement <8 x i32> %arg1, i32 42, i32 %n
ret <8 x i32> %a
}

define <8 x i32> @foo3(<8 x i32> %arg1, i32 %n) alignstack(16) #0 {
; CHECK-LABEL: foo3:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
; CHECK-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: andl $7, %edi
; CHECK-NEXT: movl $42, -40(%rsp,%rdi,4)
; CHECK-NEXT: vmovups -{{[0-9]+}}(%rsp), %ymm0
; CHECK-NEXT: retq
entry:
%a = insertelement <8 x i32> %arg1, i32 42, i32 %n
ret <8 x i32> %a
}

define <8 x i32> @foo4(<8 x i32> %arg1, i32 %n) alignstack(64) #0 {
; CHECK-LABEL: foo4:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
; CHECK-NEXT: vmovaps %ymm0, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: andl $7, %edi
; CHECK-NEXT: movl $42, -56(%rsp,%rdi,4)
; CHECK-NEXT: vmovaps -{{[0-9]+}}(%rsp), %ymm0
; CHECK-NEXT: retq
entry:
%a = insertelement <8 x i32> %arg1, i32 42, i32 %n
ret <8 x i32> %a
}

define <8 x i32> @foo5(<8 x i32> %arg1, i32 %n) alignstack(256) #0 {
; CHECK-LABEL: foo5:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: subq $120, %rsp
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
; CHECK-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
; CHECK-NEXT: andl $7, %edi
; CHECK-NEXT: movl $42, 64(%rsp,%rdi,4)
; CHECK-NEXT: vmovaps {{[0-9]+}}(%rsp), %ymm0
; CHECK-NEXT: addq $120, %rsp
; CHECK-NEXT: retq
entry:
%a = insertelement <8 x i32> %arg1, i32 42, i32 %n
ret <8 x i32> %a
}

define <8 x i16> @foo6(<8 x i16> %arg1, i32 %n) #0 {
; CHECK-LABEL: foo6:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
; CHECK-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: andl $7, %edi
; CHECK-NEXT: movw $42, -24(%rsp,%rdi,2)
; CHECK-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0
; CHECK-NEXT: retq
entry:
%a = insertelement <8 x i16> %arg1, i16 42, i32 %n
ret <8 x i16> %a
}

define <8 x i8> @foo7(<8 x i8> %arg1, i32 %n) #0 {
; CHECK-LABEL: foo7:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
; CHECK-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: andl $15, %edi
; CHECK-NEXT: movb $42, -24(%rsp,%rdi)
; CHECK-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0
; CHECK-NEXT: retq
entry:
%a = insertelement <8 x i8> %arg1, i8 42, i32 %n
ret <8 x i8> %a
}

define <8 x i64> @foo8(<8 x i64> %arg1, i32 %n) #0 {
; CHECK-LABEL: foo8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
; CHECK-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: andl $7, %edi
; CHECK-NEXT: movq $42, -72(%rsp,%rdi,8)
; CHECK-NEXT: vmovups -{{[0-9]+}}(%rsp), %ymm0
; CHECK-NEXT: vmovups -{{[0-9]+}}(%rsp), %ymm1
; CHECK-NEXT: retq
entry:
%a = insertelement <8 x i64> %arg1, i64 42, i32 %n
ret <8 x i64> %a
}

attributes #0 = { "no-realign-stack" nounwind }

0 comments on commit 7a31e39

Please sign in to comment.