Skip to content

Commit

Permalink
[AArch64][AMDGPU][GlobalISel] Remove vector handling from unmerge_dea…
Browse files Browse the repository at this point in the history
…d_to_trunc (#82224)

This combine transforms an unmerge where only the first element is used
into a truncate. That works OK for scalar but for vector needs to insert
a bitcast to integers, perform the truncate then bitcast back to
vectors. This generates more awkward code than using an Unmerge.
  • Loading branch information
davemgreen authored Feb 20, 2024
1 parent 39fd3fc commit 1b12974
Show file tree
Hide file tree
Showing 23 changed files with 265 additions and 433 deletions.
18 changes: 4 additions & 14 deletions llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2078,6 +2078,9 @@ bool CombinerHelper::matchCombineUnmergeUndef(
bool CombinerHelper::matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
"Expected an unmerge");
if (MRI.getType(MI.getOperand(0).getReg()).isVector() ||
MRI.getType(MI.getOperand(MI.getNumDefs()).getReg()).isVector())
return false;
// Check that all the lanes are dead except the first one.
for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
if (!MRI.use_nodbg_empty(MI.getOperand(Idx).getReg()))
Expand All @@ -2089,21 +2092,8 @@ bool CombinerHelper::matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) {
void CombinerHelper::applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) {
Builder.setInstrAndDebugLoc(MI);
Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
// Truncating a vector is going to truncate every single lane,
// whereas we want the full lowbits.
// Do the operation on a scalar instead.
LLT SrcTy = MRI.getType(SrcReg);
if (SrcTy.isVector())
SrcReg =
Builder.buildCast(LLT::scalar(SrcTy.getSizeInBits()), SrcReg).getReg(0);

Register Dst0Reg = MI.getOperand(0).getReg();
LLT Dst0Ty = MRI.getType(Dst0Reg);
if (Dst0Ty.isVector()) {
auto MIB = Builder.buildTrunc(LLT::scalar(Dst0Ty.getSizeInBits()), SrcReg);
Builder.buildCast(Dst0Reg, MIB);
} else
Builder.buildTrunc(Dst0Reg, SrcReg);
Builder.buildTrunc(Dst0Reg, SrcReg);
MI.eraseFromParent();
}

Expand Down
11 changes: 4 additions & 7 deletions llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir
Original file line number Diff line number Diff line change
Expand Up @@ -326,10 +326,8 @@ body: |
bb.1:
; CHECK-LABEL: name: test_combine_unmerge_dead_to_trunc_vec_in_n_out
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $x0
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[COPY]](<4 x s16>)
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[BITCAST]](s64)
; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[TRUNC]](s32)
; CHECK-NEXT: $w0 = COPY [[BITCAST1]](<2 x s16>)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
; CHECK-NEXT: $w0 = COPY [[UV]](<2 x s16>)
%0:_(<4 x s16>) = COPY $x0
%1:_(<2 x s16>),%2:_(<2 x s16>) = G_UNMERGE_VALUES %0(<4 x s16>)
$w0 = COPY %1(<2 x s16>)
Expand All @@ -343,9 +341,8 @@ body: |
bb.1:
; CHECK-LABEL: name: test_combine_unmerge_dead_to_trunc_vec_in
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $x0
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[COPY]](<2 x s32>)
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s64)
; CHECK-NEXT: $h0 = COPY [[TRUNC]](s16)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
; CHECK-NEXT: $h0 = COPY [[UV]](s16)
%0:_(<2 x s32>) = COPY $x0
%1:_(s16),%2:_(s16),%3:_(s16),%4:_(s16) = G_UNMERGE_VALUES %0(<2 x s32>)
$h0 = COPY %1(s16)
Expand Down
36 changes: 21 additions & 15 deletions llvm/test/CodeGen/AArch64/aarch64-bif-gen.ll
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,14 @@ define <1 x i8> @test_bitf_v1i8(<1 x i8> %A, <1 x i8> %B, <1 x i8> %C) {
;
; CHECK-GI-LABEL: test_bitf_v1i8:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: fmov x8, d0
; CHECK-GI-NEXT: fmov x9, d1
; CHECK-GI-NEXT: fmov x10, d2
; CHECK-GI-NEXT: bic w9, w9, w10
; CHECK-GI-NEXT: and w8, w10, w8
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
; CHECK-GI-NEXT: umov w8, v2.b[0]
; CHECK-GI-NEXT: umov w9, v1.b[0]
; CHECK-GI-NEXT: umov w10, v0.b[0]
; CHECK-GI-NEXT: bic w9, w9, w8
; CHECK-GI-NEXT: and w8, w8, w10
; CHECK-GI-NEXT: orr w8, w9, w8
; CHECK-GI-NEXT: fmov s0, w8
; CHECK-GI-NEXT: ret
Expand All @@ -39,11 +42,14 @@ define <1 x i16> @test_bitf_v1i16(<1 x i16> %A, <1 x i16> %B, <1 x i16> %C) {
;
; CHECK-GI-LABEL: test_bitf_v1i16:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: fmov x8, d0
; CHECK-GI-NEXT: fmov x9, d1
; CHECK-GI-NEXT: fmov x10, d2
; CHECK-GI-NEXT: bic w9, w9, w10
; CHECK-GI-NEXT: and w8, w10, w8
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
; CHECK-GI-NEXT: umov w8, v2.h[0]
; CHECK-GI-NEXT: umov w9, v1.h[0]
; CHECK-GI-NEXT: umov w10, v0.h[0]
; CHECK-GI-NEXT: bic w9, w9, w8
; CHECK-GI-NEXT: and w8, w8, w10
; CHECK-GI-NEXT: orr w8, w9, w8
; CHECK-GI-NEXT: fmov s0, w8
; CHECK-GI-NEXT: ret
Expand All @@ -64,11 +70,11 @@ define <1 x i32> @test_bitf_v1i32(<1 x i32> %A, <1 x i32> %B, <1 x i32> %C) {
;
; CHECK-GI-LABEL: test_bitf_v1i32:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: fmov x8, d0
; CHECK-GI-NEXT: fmov x9, d1
; CHECK-GI-NEXT: fmov x10, d2
; CHECK-GI-NEXT: bic w9, w9, w10
; CHECK-GI-NEXT: and w8, w10, w8
; CHECK-GI-NEXT: fmov w8, s2
; CHECK-GI-NEXT: fmov w9, s1
; CHECK-GI-NEXT: fmov w10, s0
; CHECK-GI-NEXT: bic w9, w9, w8
; CHECK-GI-NEXT: and w8, w8, w10
; CHECK-GI-NEXT: orr w8, w9, w8
; CHECK-GI-NEXT: fmov s0, w8
; CHECK-GI-NEXT: mov v0.s[1], w8
Expand Down
36 changes: 21 additions & 15 deletions llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,14 @@ define <1 x i8> @test_bit_v1i8(<1 x i8> %A, <1 x i8> %B, <1 x i8> %C) {
;
; CHECK-GI-LABEL: test_bit_v1i8:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: fmov x8, d0
; CHECK-GI-NEXT: fmov x9, d1
; CHECK-GI-NEXT: fmov x10, d2
; CHECK-GI-NEXT: and w9, w10, w9
; CHECK-GI-NEXT: bic w8, w8, w10
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
; CHECK-GI-NEXT: umov w8, v2.b[0]
; CHECK-GI-NEXT: umov w9, v1.b[0]
; CHECK-GI-NEXT: umov w10, v0.b[0]
; CHECK-GI-NEXT: and w9, w8, w9
; CHECK-GI-NEXT: bic w8, w10, w8
; CHECK-GI-NEXT: orr w8, w9, w8
; CHECK-GI-NEXT: fmov s0, w8
; CHECK-GI-NEXT: ret
Expand All @@ -41,11 +44,14 @@ define <1 x i16> @test_bit_v1i16(<1 x i16> %A, <1 x i16> %B, <1 x i16> %C) {
;
; CHECK-GI-LABEL: test_bit_v1i16:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: fmov x8, d0
; CHECK-GI-NEXT: fmov x9, d1
; CHECK-GI-NEXT: fmov x10, d2
; CHECK-GI-NEXT: and w9, w10, w9
; CHECK-GI-NEXT: bic w8, w8, w10
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
; CHECK-GI-NEXT: umov w8, v2.h[0]
; CHECK-GI-NEXT: umov w9, v1.h[0]
; CHECK-GI-NEXT: umov w10, v0.h[0]
; CHECK-GI-NEXT: and w9, w8, w9
; CHECK-GI-NEXT: bic w8, w10, w8
; CHECK-GI-NEXT: orr w8, w9, w8
; CHECK-GI-NEXT: fmov s0, w8
; CHECK-GI-NEXT: ret
Expand All @@ -66,11 +72,11 @@ define <1 x i32> @test_bit_v1i32(<1 x i32> %A, <1 x i32> %B, <1 x i32> %C) {
;
; CHECK-GI-LABEL: test_bit_v1i32:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: fmov x8, d0
; CHECK-GI-NEXT: fmov x9, d1
; CHECK-GI-NEXT: fmov x10, d2
; CHECK-GI-NEXT: and w9, w10, w9
; CHECK-GI-NEXT: bic w8, w8, w10
; CHECK-GI-NEXT: fmov w8, s2
; CHECK-GI-NEXT: fmov w9, s1
; CHECK-GI-NEXT: fmov w10, s0
; CHECK-GI-NEXT: and w9, w8, w9
; CHECK-GI-NEXT: bic w8, w10, w8
; CHECK-GI-NEXT: orr w8, w9, w8
; CHECK-GI-NEXT: fmov s0, w8
; CHECK-GI-NEXT: mov v0.s[1], w8
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AArch64/abs.ll
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ define <1 x i32> @abs_v1i32(<1 x i32> %a){
;
; CHECK-GI-LABEL: abs_v1i32:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: fmov x8, d0
; CHECK-GI-NEXT: fmov w8, s0
; CHECK-GI-NEXT: asr w9, w8, #31
; CHECK-GI-NEXT: add w8, w8, w9
; CHECK-GI-NEXT: eor w8, w8, w9
Expand Down
86 changes: 28 additions & 58 deletions llvm/test/CodeGen/AArch64/arm64-neon-copy.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1394,7 +1394,7 @@ define <8 x i8> @testDUP.v1i8(<1 x i8> %a) {
;
; CHECK-GI-LABEL: testDUP.v1i8:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: fmov x8, d0
; CHECK-GI-NEXT: fmov w8, s0
; CHECK-GI-NEXT: dup v0.8b, w8
; CHECK-GI-NEXT: ret
%b = extractelement <1 x i8> %a, i32 0
Expand All @@ -1410,17 +1410,11 @@ define <8 x i8> @testDUP.v1i8(<1 x i8> %a) {
}

define <8 x i16> @testDUP.v1i16(<1 x i16> %a) {
; CHECK-SD-LABEL: testDUP.v1i16:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: dup v0.8h, v0.h[0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: testDUP.v1i16:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: fmov x8, d0
; CHECK-GI-NEXT: dup v0.8h, w8
; CHECK-GI-NEXT: ret
; CHECK-LABEL: testDUP.v1i16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: dup v0.8h, v0.h[0]
; CHECK-NEXT: ret
%b = extractelement <1 x i16> %a, i32 0
%c = insertelement <8 x i16> undef, i16 %b, i32 0
%d = insertelement <8 x i16> %c, i16 %b, i32 1
Expand All @@ -1434,17 +1428,11 @@ define <8 x i16> @testDUP.v1i16(<1 x i16> %a) {
}

define <4 x i32> @testDUP.v1i32(<1 x i32> %a) {
; CHECK-SD-LABEL: testDUP.v1i32:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: dup v0.4s, v0.s[0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: testDUP.v1i32:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: fmov x8, d0
; CHECK-GI-NEXT: dup v0.4s, w8
; CHECK-GI-NEXT: ret
; CHECK-LABEL: testDUP.v1i32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: dup v0.4s, v0.s[0]
; CHECK-NEXT: ret
%b = extractelement <1 x i32> %a, i32 0
%c = insertelement <4 x i32> undef, i32 %b, i32 0
%d = insertelement <4 x i32> %c, i32 %b, i32 1
Expand Down Expand Up @@ -2448,33 +2436,21 @@ define <16 x i8> @concat_vector_v16i8_const() {
}

define <4 x i16> @concat_vector_v4i16(<1 x i16> %a) {
; CHECK-SD-LABEL: concat_vector_v4i16:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: dup v0.4h, v0.h[0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: concat_vector_v4i16:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: fmov x8, d0
; CHECK-GI-NEXT: dup v0.4h, w8
; CHECK-GI-NEXT: ret
; CHECK-LABEL: concat_vector_v4i16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: dup v0.4h, v0.h[0]
; CHECK-NEXT: ret
%r = shufflevector <1 x i16> %a, <1 x i16> undef, <4 x i32> zeroinitializer
ret <4 x i16> %r
}

define <4 x i32> @concat_vector_v4i32(<1 x i32> %a) {
; CHECK-SD-LABEL: concat_vector_v4i32:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: dup v0.4s, v0.s[0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: concat_vector_v4i32:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: fmov x8, d0
; CHECK-GI-NEXT: dup v0.4s, w8
; CHECK-GI-NEXT: ret
; CHECK-LABEL: concat_vector_v4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: dup v0.4s, v0.s[0]
; CHECK-NEXT: ret
%r = shufflevector <1 x i32> %a, <1 x i32> undef, <4 x i32> zeroinitializer
ret <4 x i32> %r
}
Expand All @@ -2488,25 +2464,19 @@ define <8 x i8> @concat_vector_v8i8(<1 x i8> %a) {
;
; CHECK-GI-LABEL: concat_vector_v8i8:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: fmov x8, d0
; CHECK-GI-NEXT: fmov w8, s0
; CHECK-GI-NEXT: dup v0.8b, w8
; CHECK-GI-NEXT: ret
%r = shufflevector <1 x i8> %a, <1 x i8> undef, <8 x i32> zeroinitializer
ret <8 x i8> %r
}

define <8 x i16> @concat_vector_v8i16(<1 x i16> %a) {
; CHECK-SD-LABEL: concat_vector_v8i16:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: dup v0.8h, v0.h[0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: concat_vector_v8i16:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: fmov x8, d0
; CHECK-GI-NEXT: dup v0.8h, w8
; CHECK-GI-NEXT: ret
; CHECK-LABEL: concat_vector_v8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: dup v0.8h, v0.h[0]
; CHECK-NEXT: ret
%r = shufflevector <1 x i16> %a, <1 x i16> undef, <8 x i32> zeroinitializer
ret <8 x i16> %r
}
Expand All @@ -2520,7 +2490,7 @@ define <16 x i8> @concat_vector_v16i8(<1 x i8> %a) {
;
; CHECK-GI-LABEL: concat_vector_v16i8:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: fmov x8, d0
; CHECK-GI-NEXT: fmov w8, s0
; CHECK-GI-NEXT: dup v0.16b, w8
; CHECK-GI-NEXT: ret
%r = shufflevector <1 x i8> %a, <1 x i8> undef, <16 x i32> zeroinitializer
Expand Down
2 changes: 0 additions & 2 deletions llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll
Original file line number Diff line number Diff line change
Expand Up @@ -217,8 +217,6 @@ define half @test_vcvt_f16_f32(<1 x float> %x) {
;
; GISEL-LABEL: test_vcvt_f16_f32:
; GISEL: // %bb.0:
; GISEL-NEXT: fmov x8, d0
; GISEL-NEXT: fmov s0, w8
; GISEL-NEXT: fcvt h0, s0
; GISEL-NEXT: ret
%tmp = fptrunc <1 x float> %x to <1 x half>
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AArch64/bswap.ll
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ define <1 x i32> @bswap_v1i32(<1 x i32> %a){
;
; CHECK-GI-LABEL: bswap_v1i32:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: fmov x8, d0
; CHECK-GI-NEXT: fmov w8, s0
; CHECK-GI-NEXT: rev w8, w8
; CHECK-GI-NEXT: fmov s0, w8
; CHECK-GI-NEXT: mov v0.s[1], w8
Expand Down
29 changes: 9 additions & 20 deletions llvm/test/CodeGen/AArch64/fpext.ll
Original file line number Diff line number Diff line change
Expand Up @@ -85,24 +85,14 @@ entry:
}

define <2 x double> @fpext_v2f16_v2f64(<2 x half> %a) {
; CHECK-SD-LABEL: fpext_v2f16_v2f64:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: mov h1, v0.h[1]
; CHECK-SD-NEXT: fcvt d0, h0
; CHECK-SD-NEXT: fcvt d1, h1
; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: fpext_v2f16_v2f64:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: fmov x8, d0
; CHECK-GI-NEXT: fmov s0, w8
; CHECK-GI-NEXT: mov h1, v0.h[1]
; CHECK-GI-NEXT: fcvt d0, h0
; CHECK-GI-NEXT: fcvt d1, h1
; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
; CHECK-GI-NEXT: ret
; CHECK-LABEL: fpext_v2f16_v2f64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: mov h1, v0.h[1]
; CHECK-NEXT: fcvt d0, h0
; CHECK-NEXT: fcvt d1, h1
; CHECK-NEXT: mov v0.d[1], v1.d[0]
; CHECK-NEXT: ret
entry:
%c = fpext <2 x half> %a to <2 x double>
ret <2 x double> %c
Expand Down Expand Up @@ -165,8 +155,7 @@ define <2 x float> @fpext_v2f16_v2f32(<2 x half> %a) {
;
; CHECK-GI-LABEL: fpext_v2f16_v2f32:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: fmov x8, d0
; CHECK-GI-NEXT: fmov s0, w8
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: mov h1, v0.h[1]
; CHECK-GI-NEXT: mov v0.h[1], v1.h[0]
; CHECK-GI-NEXT: mov v0.h[2], v0.h[0]
Expand Down
Loading

0 comments on commit 1b12974

Please sign in to comment.