diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index f7f4395a4cd8b8..779ec49f4d13a7 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -2078,6 +2078,9 @@ bool CombinerHelper::matchCombineUnmergeUndef( bool CombinerHelper::matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) { assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && "Expected an unmerge"); + if (MRI.getType(MI.getOperand(0).getReg()).isVector() || + MRI.getType(MI.getOperand(MI.getNumDefs()).getReg()).isVector()) + return false; // Check that all the lanes are dead except the first one. for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) { if (!MRI.use_nodbg_empty(MI.getOperand(Idx).getReg())) @@ -2089,21 +2092,8 @@ bool CombinerHelper::matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) { void CombinerHelper::applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) { Builder.setInstrAndDebugLoc(MI); Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg(); - // Truncating a vector is going to truncate every single lane, - // whereas we want the full lowbits. - // Do the operation on a scalar instead. - LLT SrcTy = MRI.getType(SrcReg); - if (SrcTy.isVector()) - SrcReg = - Builder.buildCast(LLT::scalar(SrcTy.getSizeInBits()), SrcReg).getReg(0); - Register Dst0Reg = MI.getOperand(0).getReg(); - LLT Dst0Ty = MRI.getType(Dst0Reg); - if (Dst0Ty.isVector()) { - auto MIB = Builder.buildTrunc(LLT::scalar(Dst0Ty.getSizeInBits()), SrcReg); - Builder.buildCast(Dst0Reg, MIB); - } else - Builder.buildTrunc(Dst0Reg, SrcReg); + Builder.buildTrunc(Dst0Reg, SrcReg); MI.eraseFromParent(); } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir index e2874bc28e1eea..c2c6e04d2d0ce5 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir @@ -326,10 +326,8 @@ body: | bb.1: ; CHECK-LABEL: name: test_combine_unmerge_dead_to_trunc_vec_in_n_out ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $x0 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[BITCAST]](s64) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[TRUNC]](s32) - ; CHECK-NEXT: $w0 = COPY [[BITCAST1]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: $w0 = COPY [[UV]](<2 x s16>) %0:_(<4 x s16>) = COPY $x0 %1:_(<2 x s16>),%2:_(<2 x s16>) = G_UNMERGE_VALUES %0(<4 x s16>) $w0 = COPY %1(<2 x s16>) @@ -343,9 +341,8 @@ body: | bb.1: ; CHECK-LABEL: name: test_combine_unmerge_dead_to_trunc_vec_in ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $x0 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s64) - ; CHECK-NEXT: $h0 = COPY [[TRUNC]](s16) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: $h0 = COPY [[UV]](s16) %0:_(<2 x s32>) = COPY $x0 %1:_(s16),%2:_(s16),%3:_(s16),%4:_(s16) = G_UNMERGE_VALUES %0(<2 x s32>) $h0 = COPY %1(s16) diff --git a/llvm/test/CodeGen/AArch64/aarch64-bif-gen.ll b/llvm/test/CodeGen/AArch64/aarch64-bif-gen.ll index cd93332210eda3..273bf559554c9d 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-bif-gen.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-bif-gen.ll @@ -14,11 +14,14 @@ define <1 x i8> @test_bitf_v1i8(<1 x i8> %A, <1 x i8> %B, <1 x i8> %C) { ; ; CHECK-GI-LABEL: test_bitf_v1i8: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: fmov x8, d0 -; CHECK-GI-NEXT: fmov x9, d1 -; CHECK-GI-NEXT: fmov x10, d2 -; CHECK-GI-NEXT: bic w9, w9, w10 -; CHECK-GI-NEXT: and w8, w10, w8 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-GI-NEXT: umov w8, v2.b[0] +; CHECK-GI-NEXT: umov w9, v1.b[0] +; CHECK-GI-NEXT: umov w10, v0.b[0] +; CHECK-GI-NEXT: bic w9, w9, w8 +; CHECK-GI-NEXT: and w8, w8, w10 ; CHECK-GI-NEXT: orr w8, w9, w8 ; CHECK-GI-NEXT: fmov s0, w8 ; CHECK-GI-NEXT: ret @@ -39,11 +42,14 @@ define <1 x i16> @test_bitf_v1i16(<1 x i16> %A, <1 x i16> %B, <1 x i16> %C) { ; ; CHECK-GI-LABEL: test_bitf_v1i16: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: fmov x8, d0 -; CHECK-GI-NEXT: fmov x9, d1 -; CHECK-GI-NEXT: fmov x10, d2 -; CHECK-GI-NEXT: bic w9, w9, w10 -; CHECK-GI-NEXT: and w8, w10, w8 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-GI-NEXT: umov w8, v2.h[0] +; CHECK-GI-NEXT: umov w9, v1.h[0] +; CHECK-GI-NEXT: umov w10, v0.h[0] +; CHECK-GI-NEXT: bic w9, w9, w8 +; CHECK-GI-NEXT: and w8, w8, w10 ; CHECK-GI-NEXT: orr w8, w9, w8 ; CHECK-GI-NEXT: fmov s0, w8 ; CHECK-GI-NEXT: ret @@ -64,11 +70,11 @@ define <1 x i32> @test_bitf_v1i32(<1 x i32> %A, <1 x i32> %B, <1 x i32> %C) { ; ; CHECK-GI-LABEL: test_bitf_v1i32: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: fmov x8, d0 -; CHECK-GI-NEXT: fmov x9, d1 -; CHECK-GI-NEXT: fmov x10, d2 -; CHECK-GI-NEXT: bic w9, w9, w10 -; CHECK-GI-NEXT: and w8, w10, w8 +; CHECK-GI-NEXT: fmov w8, s2 +; CHECK-GI-NEXT: fmov w9, s1 +; CHECK-GI-NEXT: fmov w10, s0 +; CHECK-GI-NEXT: bic w9, w9, w8 +; CHECK-GI-NEXT: and w8, w8, w10 ; CHECK-GI-NEXT: orr w8, w9, w8 ; CHECK-GI-NEXT: fmov s0, w8 ; CHECK-GI-NEXT: mov v0.s[1], w8 diff --git a/llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll b/llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll index b4ddff76f25b87..a92ae39c69724d 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll @@ -16,11 +16,14 @@ define <1 x i8> @test_bit_v1i8(<1 x i8> %A, <1 x i8> %B, <1 x i8> %C) { ; ; CHECK-GI-LABEL: test_bit_v1i8: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: fmov x8, d0 -; CHECK-GI-NEXT: fmov x9, d1 -; CHECK-GI-NEXT: fmov x10, d2 -; CHECK-GI-NEXT: and w9, w10, w9 -; CHECK-GI-NEXT: bic w8, w8, w10 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-GI-NEXT: umov w8, v2.b[0] +; CHECK-GI-NEXT: umov w9, v1.b[0] +; CHECK-GI-NEXT: umov w10, v0.b[0] +; CHECK-GI-NEXT: and w9, w8, w9 +; CHECK-GI-NEXT: bic w8, w10, w8 ; CHECK-GI-NEXT: orr w8, w9, w8 ; CHECK-GI-NEXT: fmov s0, w8 ; CHECK-GI-NEXT: ret @@ -41,11 +44,14 @@ define <1 x i16> @test_bit_v1i16(<1 x i16> %A, <1 x i16> %B, <1 x i16> %C) { ; ; CHECK-GI-LABEL: test_bit_v1i16: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: fmov x8, d0 -; CHECK-GI-NEXT: fmov x9, d1 -; CHECK-GI-NEXT: fmov x10, d2 -; CHECK-GI-NEXT: and w9, w10, w9 -; CHECK-GI-NEXT: bic w8, w8, w10 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-GI-NEXT: umov w8, v2.h[0] +; CHECK-GI-NEXT: umov w9, v1.h[0] +; CHECK-GI-NEXT: umov w10, v0.h[0] +; CHECK-GI-NEXT: and w9, w8, w9 +; CHECK-GI-NEXT: bic w8, w10, w8 ; CHECK-GI-NEXT: orr w8, w9, w8 ; CHECK-GI-NEXT: fmov s0, w8 ; CHECK-GI-NEXT: ret @@ -66,11 +72,11 @@ define <1 x i32> @test_bit_v1i32(<1 x i32> %A, <1 x i32> %B, <1 x i32> %C) { ; ; CHECK-GI-LABEL: test_bit_v1i32: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: fmov x8, d0 -; CHECK-GI-NEXT: fmov x9, d1 -; CHECK-GI-NEXT: fmov x10, d2 -; CHECK-GI-NEXT: and w9, w10, w9 -; CHECK-GI-NEXT: bic w8, w8, w10 +; CHECK-GI-NEXT: fmov w8, s2 +; CHECK-GI-NEXT: fmov w9, s1 +; CHECK-GI-NEXT: fmov w10, s0 +; CHECK-GI-NEXT: and w9, w8, w9 +; CHECK-GI-NEXT: bic w8, w10, w8 ; CHECK-GI-NEXT: orr w8, w9, w8 ; CHECK-GI-NEXT: fmov s0, w8 ; CHECK-GI-NEXT: mov v0.s[1], w8 diff --git a/llvm/test/CodeGen/AArch64/abs.ll b/llvm/test/CodeGen/AArch64/abs.ll index 934aae9ec74c03..40ba2c12fa15f4 100644 --- a/llvm/test/CodeGen/AArch64/abs.ll +++ b/llvm/test/CodeGen/AArch64/abs.ll @@ -250,7 +250,7 @@ define <1 x i32> @abs_v1i32(<1 x i32> %a){ ; ; CHECK-GI-LABEL: abs_v1i32: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: fmov x8, d0 +; CHECK-GI-NEXT: fmov w8, s0 ; CHECK-GI-NEXT: asr w9, w8, #31 ; CHECK-GI-NEXT: add w8, w8, w9 ; CHECK-GI-NEXT: eor w8, w8, w9 diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll index 328b782c14956c..cc3d80008143cd 100644 --- a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll @@ -1394,7 +1394,7 @@ define <8 x i8> @testDUP.v1i8(<1 x i8> %a) { ; ; CHECK-GI-LABEL: testDUP.v1i8: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: fmov x8, d0 +; CHECK-GI-NEXT: fmov w8, s0 ; CHECK-GI-NEXT: dup v0.8b, w8 ; CHECK-GI-NEXT: ret %b = extractelement <1 x i8> %a, i32 0 @@ -1410,17 +1410,11 @@ define <8 x i8> @testDUP.v1i8(<1 x i8> %a) { } define <8 x i16> @testDUP.v1i16(<1 x i16> %a) { -; CHECK-SD-LABEL: testDUP.v1i16: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: dup v0.8h, v0.h[0] -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: testDUP.v1i16: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: fmov x8, d0 -; CHECK-GI-NEXT: dup v0.8h, w8 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: testDUP.v1i16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: dup v0.8h, v0.h[0] +; CHECK-NEXT: ret %b = extractelement <1 x i16> %a, i32 0 %c = insertelement <8 x i16> undef, i16 %b, i32 0 %d = insertelement <8 x i16> %c, i16 %b, i32 1 @@ -1434,17 +1428,11 @@ define <8 x i16> @testDUP.v1i16(<1 x i16> %a) { } define <4 x i32> @testDUP.v1i32(<1 x i32> %a) { -; CHECK-SD-LABEL: testDUP.v1i32: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: dup v0.4s, v0.s[0] -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: testDUP.v1i32: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: fmov x8, d0 -; CHECK-GI-NEXT: dup v0.4s, w8 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: testDUP.v1i32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: dup v0.4s, v0.s[0] +; CHECK-NEXT: ret %b = extractelement <1 x i32> %a, i32 0 %c = insertelement <4 x i32> undef, i32 %b, i32 0 %d = insertelement <4 x i32> %c, i32 %b, i32 1 @@ -2448,33 +2436,21 @@ define <16 x i8> @concat_vector_v16i8_const() { } define <4 x i16> @concat_vector_v4i16(<1 x i16> %a) { -; CHECK-SD-LABEL: concat_vector_v4i16: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: dup v0.4h, v0.h[0] -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: concat_vector_v4i16: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: fmov x8, d0 -; CHECK-GI-NEXT: dup v0.4h, w8 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: concat_vector_v4i16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: dup v0.4h, v0.h[0] +; CHECK-NEXT: ret %r = shufflevector <1 x i16> %a, <1 x i16> undef, <4 x i32> zeroinitializer ret <4 x i16> %r } define <4 x i32> @concat_vector_v4i32(<1 x i32> %a) { -; CHECK-SD-LABEL: concat_vector_v4i32: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: dup v0.4s, v0.s[0] -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: concat_vector_v4i32: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: fmov x8, d0 -; CHECK-GI-NEXT: dup v0.4s, w8 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: concat_vector_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: dup v0.4s, v0.s[0] +; CHECK-NEXT: ret %r = shufflevector <1 x i32> %a, <1 x i32> undef, <4 x i32> zeroinitializer ret <4 x i32> %r } @@ -2488,7 +2464,7 @@ define <8 x i8> @concat_vector_v8i8(<1 x i8> %a) { ; ; CHECK-GI-LABEL: concat_vector_v8i8: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: fmov x8, d0 +; CHECK-GI-NEXT: fmov w8, s0 ; CHECK-GI-NEXT: dup v0.8b, w8 ; CHECK-GI-NEXT: ret %r = shufflevector <1 x i8> %a, <1 x i8> undef, <8 x i32> zeroinitializer @@ -2496,17 +2472,11 @@ define <8 x i8> @concat_vector_v8i8(<1 x i8> %a) { } define <8 x i16> @concat_vector_v8i16(<1 x i16> %a) { -; CHECK-SD-LABEL: concat_vector_v8i16: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: dup v0.8h, v0.h[0] -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: concat_vector_v8i16: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: fmov x8, d0 -; CHECK-GI-NEXT: dup v0.8h, w8 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: concat_vector_v8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: dup v0.8h, v0.h[0] +; CHECK-NEXT: ret %r = shufflevector <1 x i16> %a, <1 x i16> undef, <8 x i32> zeroinitializer ret <8 x i16> %r } @@ -2520,7 +2490,7 @@ define <16 x i8> @concat_vector_v16i8(<1 x i8> %a) { ; ; CHECK-GI-LABEL: concat_vector_v16i8: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: fmov x8, d0 +; CHECK-GI-NEXT: fmov w8, s0 ; CHECK-GI-NEXT: dup v0.16b, w8 ; CHECK-GI-NEXT: ret %r = shufflevector <1 x i8> %a, <1 x i8> undef, <16 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll b/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll index 269ffed98a844e..aa6b7cb495f189 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll @@ -217,8 +217,6 @@ define half @test_vcvt_f16_f32(<1 x float> %x) { ; ; GISEL-LABEL: test_vcvt_f16_f32: ; GISEL: // %bb.0: -; GISEL-NEXT: fmov x8, d0 -; GISEL-NEXT: fmov s0, w8 ; GISEL-NEXT: fcvt h0, s0 ; GISEL-NEXT: ret %tmp = fptrunc <1 x float> %x to <1 x half> diff --git a/llvm/test/CodeGen/AArch64/bswap.ll b/llvm/test/CodeGen/AArch64/bswap.ll index 6df62a00a8f8fb..9b065accce9146 100644 --- a/llvm/test/CodeGen/AArch64/bswap.ll +++ b/llvm/test/CodeGen/AArch64/bswap.ll @@ -134,7 +134,7 @@ define <1 x i32> @bswap_v1i32(<1 x i32> %a){ ; ; CHECK-GI-LABEL: bswap_v1i32: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: fmov x8, d0 +; CHECK-GI-NEXT: fmov w8, s0 ; CHECK-GI-NEXT: rev w8, w8 ; CHECK-GI-NEXT: fmov s0, w8 ; CHECK-GI-NEXT: mov v0.s[1], w8 diff --git a/llvm/test/CodeGen/AArch64/fpext.ll b/llvm/test/CodeGen/AArch64/fpext.ll index db1105d613cdb8..eca3389bcd88b5 100644 --- a/llvm/test/CodeGen/AArch64/fpext.ll +++ b/llvm/test/CodeGen/AArch64/fpext.ll @@ -85,24 +85,14 @@ entry: } define <2 x double> @fpext_v2f16_v2f64(<2 x half> %a) { -; CHECK-SD-LABEL: fpext_v2f16_v2f64: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: mov h1, v0.h[1] -; CHECK-SD-NEXT: fcvt d0, h0 -; CHECK-SD-NEXT: fcvt d1, h1 -; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: fpext_v2f16_v2f64: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: fmov x8, d0 -; CHECK-GI-NEXT: fmov s0, w8 -; CHECK-GI-NEXT: mov h1, v0.h[1] -; CHECK-GI-NEXT: fcvt d0, h0 -; CHECK-GI-NEXT: fcvt d1, h1 -; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: fpext_v2f16_v2f64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: mov h1, v0.h[1] +; CHECK-NEXT: fcvt d0, h0 +; CHECK-NEXT: fcvt d1, h1 +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: ret entry: %c = fpext <2 x half> %a to <2 x double> ret <2 x double> %c @@ -165,8 +155,7 @@ define <2 x float> @fpext_v2f16_v2f32(<2 x half> %a) { ; ; CHECK-GI-LABEL: fpext_v2f16_v2f32: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: fmov x8, d0 -; CHECK-GI-NEXT: fmov s0, w8 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NEXT: mov h1, v0.h[1] ; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-NEXT: mov v0.h[2], v0.h[0] diff --git a/llvm/test/CodeGen/AArch64/fptoi.ll b/llvm/test/CodeGen/AArch64/fptoi.ll index 251719c1e3b430..facb89671056f6 100644 --- a/llvm/test/CodeGen/AArch64/fptoi.ll +++ b/llvm/test/CodeGen/AArch64/fptoi.ll @@ -3240,8 +3240,7 @@ define <2 x i64> @fptos_v2f16_v2i64(<2 x half> %a) { ; ; CHECK-GI-NOFP16-LABEL: fptos_v2f16_v2i64: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: fmov x8, d0 -; CHECK-GI-NOFP16-NEXT: fmov s0, w8 +; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] ; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v0.h[0] @@ -3253,8 +3252,7 @@ define <2 x i64> @fptos_v2f16_v2i64(<2 x half> %a) { ; ; CHECK-GI-FP16-LABEL: fptos_v2f16_v2i64: ; CHECK-GI-FP16: // %bb.0: // %entry -; CHECK-GI-FP16-NEXT: fmov x8, d0 -; CHECK-GI-FP16-NEXT: fmov s0, w8 +; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] ; CHECK-GI-FP16-NEXT: fcvt d0, h0 ; CHECK-GI-FP16-NEXT: fcvt d1, h1 @@ -3291,8 +3289,7 @@ define <2 x i64> @fptou_v2f16_v2i64(<2 x half> %a) { ; ; CHECK-GI-NOFP16-LABEL: fptou_v2f16_v2i64: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: fmov x8, d0 -; CHECK-GI-NOFP16-NEXT: fmov s0, w8 +; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] ; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v0.h[0] @@ -3304,8 +3301,7 @@ define <2 x i64> @fptou_v2f16_v2i64(<2 x half> %a) { ; ; CHECK-GI-FP16-LABEL: fptou_v2f16_v2i64: ; CHECK-GI-FP16: // %bb.0: // %entry -; CHECK-GI-FP16-NEXT: fmov x8, d0 -; CHECK-GI-FP16-NEXT: fmov s0, w8 +; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-FP16-NEXT: mov h1, v0.h[1] ; CHECK-GI-FP16-NEXT: fcvt d0, h0 ; CHECK-GI-FP16-NEXT: fcvt d1, h1 @@ -4997,8 +4993,7 @@ define <2 x i32> @fptos_v2f16_v2i32(<2 x half> %a) { ; ; CHECK-GI-LABEL: fptos_v2f16_v2i32: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: fmov x8, d0 -; CHECK-GI-NEXT: fmov s0, w8 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NEXT: mov h1, v0.h[1] ; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-NEXT: mov v0.h[2], v0.h[0] @@ -5021,8 +5016,7 @@ define <2 x i32> @fptou_v2f16_v2i32(<2 x half> %a) { ; ; CHECK-GI-LABEL: fptou_v2f16_v2i32: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: fmov x8, d0 -; CHECK-GI-NEXT: fmov s0, w8 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NEXT: mov h1, v0.h[1] ; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-NEXT: mov v0.h[2], v0.h[0] @@ -5279,8 +5273,7 @@ define <2 x i16> @fptos_v2f16_v2i16(<2 x half> %a) { ; ; CHECK-GI-NOFP16-LABEL: fptos_v2f16_v2i16: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: fmov x8, d0 -; CHECK-GI-NOFP16-NEXT: fmov s0, w8 +; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] ; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v0.h[0] @@ -5310,8 +5303,7 @@ define <2 x i16> @fptou_v2f16_v2i16(<2 x half> %a) { ; ; CHECK-GI-NOFP16-LABEL: fptou_v2f16_v2i16: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: fmov x8, d0 -; CHECK-GI-NOFP16-NEXT: fmov s0, w8 +; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] ; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v0.h[0] @@ -5761,8 +5753,7 @@ define <2 x i8> @fptos_v2f16_v2i8(<2 x half> %a) { ; ; CHECK-GI-NOFP16-LABEL: fptos_v2f16_v2i8: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: fmov x8, d0 -; CHECK-GI-NOFP16-NEXT: fmov s0, w8 +; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] ; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v0.h[0] @@ -5792,8 +5783,7 @@ define <2 x i8> @fptou_v2f16_v2i8(<2 x half> %a) { ; ; CHECK-GI-NOFP16-LABEL: fptou_v2f16_v2i8: ; CHECK-GI-NOFP16: // %bb.0: // %entry -; CHECK-GI-NOFP16-NEXT: fmov x8, d0 -; CHECK-GI-NOFP16-NEXT: fmov s0, w8 +; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1] ; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v0.h[0] diff --git a/llvm/test/CodeGen/AArch64/llvm.exp10.ll b/llvm/test/CodeGen/AArch64/llvm.exp10.ll index 8e6b15c8ba8d47..70df88ba9f8985 100644 --- a/llvm/test/CodeGen/AArch64/llvm.exp10.ll +++ b/llvm/test/CodeGen/AArch64/llvm.exp10.ll @@ -100,8 +100,7 @@ define <2 x half> @exp10_v2f16(<2 x half> %x) { ; GISEL-NEXT: .cfi_def_cfa_offset 32 ; GISEL-NEXT: .cfi_offset w30, -8 ; GISEL-NEXT: .cfi_offset b8, -16 -; GISEL-NEXT: fmov x8, d0 -; GISEL-NEXT: fmov s0, w8 +; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0 ; GISEL-NEXT: mov h8, v0.h[1] ; GISEL-NEXT: fcvt s0, h0 ; GISEL-NEXT: bl exp10f @@ -323,8 +322,7 @@ define <1 x float> @exp10_v1f32(<1 x float> %x) { ; GISEL-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; GISEL-NEXT: .cfi_def_cfa_offset 16 ; GISEL-NEXT: .cfi_offset w30, -16 -; GISEL-NEXT: fmov x8, d0 -; GISEL-NEXT: fmov s0, w8 +; GISEL-NEXT: // kill: def $s0 killed $s0 killed $d0 ; GISEL-NEXT: bl exp10f ; GISEL-NEXT: // kill: def $s0 killed $s0 def $d0 ; GISEL-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/reduce-and.ll b/llvm/test/CodeGen/AArch64/reduce-and.ll index 8b7438a42b711e..62ad45b212967a 100644 --- a/llvm/test/CodeGen/AArch64/reduce-and.ll +++ b/llvm/test/CodeGen/AArch64/reduce-and.ll @@ -206,8 +206,8 @@ define i8 @test_redand_v1i8(<1 x i8> %a) { ; ; GISEL-LABEL: test_redand_v1i8: ; GISEL: // %bb.0: -; GISEL-NEXT: fmov x0, d0 -; GISEL-NEXT: // kill: def $w0 killed $w0 killed $x0 +; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0 +; GISEL-NEXT: umov w0, v0.b[0] ; GISEL-NEXT: ret %and_result = call i8 @llvm.vector.reduce.and.v1i8(<1 x i8> %a) ret i8 %and_result diff --git a/llvm/test/CodeGen/AArch64/reduce-or.ll b/llvm/test/CodeGen/AArch64/reduce-or.ll index c4ac01f32e3651..20c498d36fdea4 100644 --- a/llvm/test/CodeGen/AArch64/reduce-or.ll +++ b/llvm/test/CodeGen/AArch64/reduce-or.ll @@ -206,8 +206,8 @@ define i8 @test_redor_v1i8(<1 x i8> %a) { ; ; GISEL-LABEL: test_redor_v1i8: ; GISEL: // %bb.0: -; GISEL-NEXT: fmov x0, d0 -; GISEL-NEXT: // kill: def $w0 killed $w0 killed $x0 +; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0 +; GISEL-NEXT: umov w0, v0.b[0] ; GISEL-NEXT: ret %or_result = call i8 @llvm.vector.reduce.or.v1i8(<1 x i8> %a) ret i8 %or_result diff --git a/llvm/test/CodeGen/AArch64/reduce-xor.ll b/llvm/test/CodeGen/AArch64/reduce-xor.ll index 5c2a808ef2e88c..b8ca99e003b627 100644 --- a/llvm/test/CodeGen/AArch64/reduce-xor.ll +++ b/llvm/test/CodeGen/AArch64/reduce-xor.ll @@ -195,8 +195,8 @@ define i8 @test_redxor_v1i8(<1 x i8> %a) { ; ; GISEL-LABEL: test_redxor_v1i8: ; GISEL: // %bb.0: -; GISEL-NEXT: fmov x0, d0 -; GISEL-NEXT: // kill: def $w0 killed $w0 killed $x0 +; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0 +; GISEL-NEXT: umov w0, v0.b[0] ; GISEL-NEXT: ret %xor_result = call i8 @llvm.vector.reduce.xor.v1i8(<1 x i8> %a) ret i8 %xor_result diff --git a/llvm/test/CodeGen/AArch64/shift.ll b/llvm/test/CodeGen/AArch64/shift.ll index 15c8e1792f3d31..ccc06f2e1058d5 100644 --- a/llvm/test/CodeGen/AArch64/shift.ll +++ b/llvm/test/CodeGen/AArch64/shift.ll @@ -590,8 +590,8 @@ define <1 x i32> @shl_v1i32(<1 x i32> %0, <1 x i32> %1){ ; ; CHECK-GI-LABEL: shl_v1i32: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: fmov x8, d0 -; CHECK-GI-NEXT: fmov x9, d1 +; CHECK-GI-NEXT: fmov w8, s0 +; CHECK-GI-NEXT: fmov w9, s1 ; CHECK-GI-NEXT: lsl w8, w8, w9 ; CHECK-GI-NEXT: fmov s0, w8 ; CHECK-GI-NEXT: mov v0.s[1], w8 @@ -693,8 +693,8 @@ define <1 x i32> @ashr_v1i32(<1 x i32> %0, <1 x i32> %1){ ; ; CHECK-GI-LABEL: ashr_v1i32: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: fmov x8, d0 -; CHECK-GI-NEXT: fmov x9, d1 +; CHECK-GI-NEXT: fmov w8, s0 +; CHECK-GI-NEXT: fmov w9, s1 ; CHECK-GI-NEXT: asr w8, w8, w9 ; CHECK-GI-NEXT: fmov s0, w8 ; CHECK-GI-NEXT: mov v0.s[1], w8 @@ -786,8 +786,8 @@ define <1 x i32> @lshr_v1i32(<1 x i32> %0, <1 x i32> %1){ ; ; CHECK-GI-LABEL: lshr_v1i32: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: fmov x8, d0 -; CHECK-GI-NEXT: fmov x9, d1 +; CHECK-GI-NEXT: fmov w8, s0 +; CHECK-GI-NEXT: fmov w9, s1 ; CHECK-GI-NEXT: lsr w8, w8, w9 ; CHECK-GI-NEXT: fmov s0, w8 ; CHECK-GI-NEXT: mov v0.s[1], w8 diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization-nan.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization-nan.ll index 4354fcd465dac8..1d295a30a994b0 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization-nan.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization-nan.ll @@ -58,14 +58,12 @@ define float @test_v1f32(<1 x float> %a) nounwind { ; ; CHECK-NOFP-GI-LABEL: test_v1f32: ; CHECK-NOFP-GI: // %bb.0: -; CHECK-NOFP-GI-NEXT: fmov x8, d0 -; CHECK-NOFP-GI-NEXT: fmov s0, w8 +; CHECK-NOFP-GI-NEXT: // kill: def $s0 killed $s0 killed $d0 ; CHECK-NOFP-GI-NEXT: ret ; ; CHECK-FP-GI-LABEL: test_v1f32: ; CHECK-FP-GI: // %bb.0: -; CHECK-FP-GI-NEXT: fmov x8, d0 -; CHECK-FP-GI-NEXT: fmov s0, w8 +; CHECK-FP-GI-NEXT: // kill: def $s0 killed $s0 killed $d0 ; CHECK-FP-GI-NEXT: ret %b = call float @llvm.vector.reduce.fmax.v1f32(<1 x float> %a) ret float %b diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll index 16b34cce93293e..4c02a5240ba6af 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll @@ -58,14 +58,12 @@ define float @test_v1f32(<1 x float> %a) nounwind { ; ; CHECK-NOFP-GI-LABEL: test_v1f32: ; CHECK-NOFP-GI: // %bb.0: -; CHECK-NOFP-GI-NEXT: fmov x8, d0 -; CHECK-NOFP-GI-NEXT: fmov s0, w8 +; CHECK-NOFP-GI-NEXT: // kill: def $s0 killed $s0 killed $d0 ; CHECK-NOFP-GI-NEXT: ret ; ; CHECK-FP-GI-LABEL: test_v1f32: ; CHECK-FP-GI: // %bb.0: -; CHECK-FP-GI-NEXT: fmov x8, d0 -; CHECK-FP-GI-NEXT: fmov s0, w8 +; CHECK-FP-GI-NEXT: // kill: def $s0 killed $s0 killed $d0 ; CHECK-FP-GI-NEXT: ret %b = call nnan float @llvm.vector.reduce.fmax.v1f32(<1 x float> %a) ret float %b diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmaximum.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmaximum.ll index 635ed3e1977ce1..be61f9b5217952 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-fmaximum.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fmaximum.ll @@ -54,14 +54,12 @@ define float @test_v1f32(<1 x float> %a) nounwind { ; ; CHECK-NOFP-GI-LABEL: test_v1f32: ; CHECK-NOFP-GI: // %bb.0: -; CHECK-NOFP-GI-NEXT: fmov x8, d0 -; CHECK-NOFP-GI-NEXT: fmov s0, w8 +; CHECK-NOFP-GI-NEXT: // kill: def $s0 killed $s0 killed $d0 ; CHECK-NOFP-GI-NEXT: ret ; ; CHECK-FP-GI-LABEL: test_v1f32: ; CHECK-FP-GI: // %bb.0: -; CHECK-FP-GI-NEXT: fmov x8, d0 -; CHECK-FP-GI-NEXT: fmov s0, w8 +; CHECK-FP-GI-NEXT: // kill: def $s0 killed $s0 killed $d0 ; CHECK-FP-GI-NEXT: ret %b = call float @llvm.vector.reduce.fmaximum.v1f32(<1 x float> %a) ret float %b diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll index 497109dfeaf09e..18d40cb18ba609 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll @@ -58,14 +58,12 @@ define float @test_v1f32(<1 x float> %a) nounwind { ; ; CHECK-NOFP-GI-LABEL: test_v1f32: ; CHECK-NOFP-GI: // %bb.0: -; CHECK-NOFP-GI-NEXT: fmov x8, d0 -; CHECK-NOFP-GI-NEXT: fmov s0, w8 +; CHECK-NOFP-GI-NEXT: // kill: def $s0 killed $s0 killed $d0 ; CHECK-NOFP-GI-NEXT: ret ; ; CHECK-FP-GI-LABEL: test_v1f32: ; CHECK-FP-GI: // %bb.0: -; CHECK-FP-GI-NEXT: fmov x8, d0 -; CHECK-FP-GI-NEXT: fmov s0, w8 +; CHECK-FP-GI-NEXT: // kill: def $s0 killed $s0 killed $d0 ; CHECK-FP-GI-NEXT: ret %b = call nnan float @llvm.vector.reduce.fmin.v1f32(<1 x float> %a) ret float %b diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fminimum.ll b/llvm/test/CodeGen/AArch64/vecreduce-fminimum.ll index 8123ca6d1b54ff..e735f670ced0cf 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-fminimum.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fminimum.ll @@ -54,14 +54,12 @@ define float @test_v1f32(<1 x float> %a) nounwind { ; ; CHECK-NOFP-GI-LABEL: test_v1f32: ; CHECK-NOFP-GI: // %bb.0: -; CHECK-NOFP-GI-NEXT: fmov x8, d0 -; CHECK-NOFP-GI-NEXT: fmov s0, w8 +; CHECK-NOFP-GI-NEXT: // kill: def $s0 killed $s0 killed $d0 ; CHECK-NOFP-GI-NEXT: ret ; ; CHECK-FP-GI-LABEL: test_v1f32: ; CHECK-FP-GI: // %bb.0: -; CHECK-FP-GI-NEXT: fmov x8, d0 -; CHECK-FP-GI-NEXT: fmov s0, w8 +; CHECK-FP-GI-NEXT: // kill: def $s0 killed $s0 killed $d0 ; CHECK-FP-GI-NEXT: ret %b = call float @llvm.vector.reduce.fminimum.v1f32(<1 x float> %a) ret float %b diff --git a/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll index 7f804fe48fd854..8988481708cfb6 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll @@ -29,33 +29,21 @@ define i1 @test_v1i1(<1 x i1> %a) nounwind { } define i8 @test_v1i8(<1 x i8> %a) nounwind { -; CHECK-SD-LABEL: test_v1i8: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: umov w0, v0.b[0] -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: test_v1i8: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: fmov x0, d0 -; CHECK-GI-NEXT: // kill: def $w0 killed $w0 killed $x0 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: test_v1i8: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: umov w0, v0.b[0] +; CHECK-NEXT: ret %b = call i8 @llvm.vector.reduce.umax.v1i8(<1 x i8> %a) ret i8 %b } define i16 @test_v1i16(<1 x i16> %a) nounwind { -; CHECK-SD-LABEL: test_v1i16: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: umov w0, v0.h[0] -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: test_v1i16: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: fmov x0, d0 -; CHECK-GI-NEXT: // kill: def $w0 killed $w0 killed $x0 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: test_v1i16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: umov w0, v0.h[0] +; CHECK-NEXT: ret %b = call i16 @llvm.vector.reduce.umax.v1i16(<1 x i16> %a) ret i16 %b } @@ -77,8 +65,7 @@ define i32 @test_v1i32(<1 x i32> %a) nounwind { ; ; CHECK-GI-LABEL: test_v1i32: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: fmov x0, d0 -; CHECK-GI-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-GI-NEXT: fmov w0, s0 ; CHECK-GI-NEXT: ret %b = call i32 @llvm.vector.reduce.umax.v1i32(<1 x i32> %a) ret i32 %b diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-post-legalize.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-post-legalize.mir index dbc0794d3e35b0..789385dcbae829 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-post-legalize.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-post-legalize.mir @@ -1630,29 +1630,24 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST]](s96) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC]](s48) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 ; GFX9-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS1]](<6 x s16>) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST2]](s96) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC1]](s48) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX9-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) - ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = G_FMUL [[BITCAST1]], [[BITCAST3]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<3 x s16>) = G_FADD [[BITCAST5]], [[FMUL]] + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = G_FMUL [[UV]], [[UV2]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<3 x s16>) = G_FADD [[UV4]], [[FMUL]] ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FADD]](<3 x s16>), [[DEF1]](<3 x s16>) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) + ; GFX9-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; ; GFX9-CONTRACT-LABEL: name: test_3xhalf_add_mul_rhs ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-CONTRACT-NEXT: {{ $}} @@ -1660,29 +1655,24 @@ body: | ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX9-CONTRACT-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-CONTRACT-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) - ; GFX9-CONTRACT-NEXT: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST]](s96) - ; GFX9-CONTRACT-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC]](s48) + ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-CONTRACT-NEXT: [[BITCAST2:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS1]](<6 x s16>) - ; GFX9-CONTRACT-NEXT: [[TRUNC1:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST2]](s96) - ; GFX9-CONTRACT-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC1]](s48) + ; GFX9-CONTRACT-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-CONTRACT-NEXT: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) - ; GFX9-CONTRACT-NEXT: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) - ; GFX9-CONTRACT-NEXT: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) - ; GFX9-CONTRACT-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = G_FMUL [[BITCAST1]], [[BITCAST3]] - ; GFX9-CONTRACT-NEXT: [[FADD:%[0-9]+]]:_(<3 x s16>) = G_FADD [[BITCAST5]], [[FMUL]] + ; GFX9-CONTRACT-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) + ; GFX9-CONTRACT-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = G_FMUL [[UV]], [[UV2]] + ; GFX9-CONTRACT-NEXT: [[FADD:%[0-9]+]]:_(<3 x s16>) = G_FADD [[UV4]], [[FMUL]] ; GFX9-CONTRACT-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FADD]](<3 x s16>), [[DEF1]](<3 x s16>) - ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX9-CONTRACT-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) + ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) + ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; ; GFX9-DENORM-LABEL: name: test_3xhalf_add_mul_rhs ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-DENORM-NEXT: {{ $}} @@ -1690,29 +1680,24 @@ body: | ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX9-DENORM-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-DENORM-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) - ; GFX9-DENORM-NEXT: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST]](s96) - ; GFX9-DENORM-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC]](s48) + ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-DENORM-NEXT: [[BITCAST2:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS1]](<6 x s16>) - ; GFX9-DENORM-NEXT: [[TRUNC1:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST2]](s96) - ; GFX9-DENORM-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC1]](s48) + ; GFX9-DENORM-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-DENORM-NEXT: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) - ; GFX9-DENORM-NEXT: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) - ; GFX9-DENORM-NEXT: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) - ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = G_FMUL [[BITCAST1]], [[BITCAST3]] - ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<3 x s16>) = G_FADD [[BITCAST5]], [[FMUL]] + ; GFX9-DENORM-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) + ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = G_FMUL [[UV]], [[UV2]] + ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<3 x s16>) = G_FADD [[UV4]], [[FMUL]] ; GFX9-DENORM-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FADD]](<3 x s16>), [[DEF1]](<3 x s16>) - ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) - ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX9-DENORM-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) + ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; ; GFX9-UNSAFE-LABEL: name: test_3xhalf_add_mul_rhs ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-UNSAFE-NEXT: {{ $}} @@ -1720,29 +1705,24 @@ body: | ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX9-UNSAFE-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) - ; GFX9-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST]](s96) - ; GFX9-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC]](s48) + ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS1]](<6 x s16>) - ; GFX9-UNSAFE-NEXT: [[TRUNC1:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST2]](s96) - ; GFX9-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC1]](s48) + ; GFX9-UNSAFE-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) - ; GFX9-UNSAFE-NEXT: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) - ; GFX9-UNSAFE-NEXT: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) - ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = G_FMUL [[BITCAST1]], [[BITCAST3]] - ; GFX9-UNSAFE-NEXT: [[FADD:%[0-9]+]]:_(<3 x s16>) = G_FADD [[BITCAST5]], [[FMUL]] + ; GFX9-UNSAFE-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) + ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = G_FMUL [[UV]], [[UV2]] + ; GFX9-UNSAFE-NEXT: [[FADD:%[0-9]+]]:_(<3 x s16>) = G_FADD [[UV4]], [[FMUL]] ; GFX9-UNSAFE-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FADD]](<3 x s16>), [[DEF1]](<3 x s16>) - ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX9-UNSAFE-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) + ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) + ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; ; GFX10-LABEL: name: test_3xhalf_add_mul_rhs ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-NEXT: {{ $}} @@ -1750,29 +1730,24 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST]](s96) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC]](s48) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 ; GFX10-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS1]](<6 x s16>) - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST2]](s96) - ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC1]](s48) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX10-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) - ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = G_FMUL [[BITCAST1]], [[BITCAST3]] - ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(<3 x s16>) = G_FADD [[BITCAST5]], [[FMUL]] + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = G_FMUL [[UV]], [[UV2]] + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(<3 x s16>) = G_FADD [[UV4]], [[FMUL]] ; GFX10-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF ; GFX10-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FADD]](<3 x s16>), [[DEF1]](<3 x s16>) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) + ; GFX10-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; ; GFX10-CONTRACT-LABEL: name: test_3xhalf_add_mul_rhs ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-CONTRACT-NEXT: {{ $}} @@ -1780,29 +1755,24 @@ body: | ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX10-CONTRACT-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-CONTRACT-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) - ; GFX10-CONTRACT-NEXT: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST]](s96) - ; GFX10-CONTRACT-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC]](s48) + ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-CONTRACT-NEXT: [[BITCAST2:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS1]](<6 x s16>) - ; GFX10-CONTRACT-NEXT: [[TRUNC1:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST2]](s96) - ; GFX10-CONTRACT-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC1]](s48) + ; GFX10-CONTRACT-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-CONTRACT-NEXT: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) - ; GFX10-CONTRACT-NEXT: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) - ; GFX10-CONTRACT-NEXT: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) - ; GFX10-CONTRACT-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = G_FMUL [[BITCAST1]], [[BITCAST3]] - ; GFX10-CONTRACT-NEXT: [[FADD:%[0-9]+]]:_(<3 x s16>) = G_FADD [[BITCAST5]], [[FMUL]] + ; GFX10-CONTRACT-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) + ; GFX10-CONTRACT-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = G_FMUL [[UV]], [[UV2]] + ; GFX10-CONTRACT-NEXT: [[FADD:%[0-9]+]]:_(<3 x s16>) = G_FADD [[UV4]], [[FMUL]] ; GFX10-CONTRACT-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FADD]](<3 x s16>), [[DEF1]](<3 x s16>) - ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX10-CONTRACT-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) + ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) + ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; ; GFX10-DENORM-LABEL: name: test_3xhalf_add_mul_rhs ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-DENORM-NEXT: {{ $}} @@ -1810,29 +1780,24 @@ body: | ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX10-DENORM-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-DENORM-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) - ; GFX10-DENORM-NEXT: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST]](s96) - ; GFX10-DENORM-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC]](s48) + ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-DENORM-NEXT: [[BITCAST2:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS1]](<6 x s16>) - ; GFX10-DENORM-NEXT: [[TRUNC1:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST2]](s96) - ; GFX10-DENORM-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC1]](s48) + ; GFX10-DENORM-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-DENORM-NEXT: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) - ; GFX10-DENORM-NEXT: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) - ; GFX10-DENORM-NEXT: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) - ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = G_FMUL [[BITCAST1]], [[BITCAST3]] - ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<3 x s16>) = G_FADD [[BITCAST5]], [[FMUL]] + ; GFX10-DENORM-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) + ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = G_FMUL [[UV]], [[UV2]] + ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<3 x s16>) = G_FADD [[UV4]], [[FMUL]] ; GFX10-DENORM-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FADD]](<3 x s16>), [[DEF1]](<3 x s16>) - ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) - ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX10-DENORM-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) + ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; ; GFX10-UNSAFE-LABEL: name: test_3xhalf_add_mul_rhs ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-UNSAFE-NEXT: {{ $}} @@ -1840,28 +1805,22 @@ body: | ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX10-UNSAFE-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) - ; GFX10-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST]](s96) - ; GFX10-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC]](s48) + ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS1]](<6 x s16>) - ; GFX10-UNSAFE-NEXT: [[TRUNC1:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST2]](s96) - ; GFX10-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC1]](s48) + ; GFX10-UNSAFE-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-UNSAFE-NEXT: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) - ; GFX10-UNSAFE-NEXT: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) - ; GFX10-UNSAFE-NEXT: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) - ; GFX10-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = G_FMUL [[BITCAST1]], [[BITCAST3]] - ; GFX10-UNSAFE-NEXT: [[FADD:%[0-9]+]]:_(<3 x s16>) = G_FADD [[BITCAST5]], [[FMUL]] + ; GFX10-UNSAFE-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) + ; GFX10-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = G_FMUL [[UV]], [[UV2]] + ; GFX10-UNSAFE-NEXT: [[FADD:%[0-9]+]]:_(<3 x s16>) = G_FADD [[UV4]], [[FMUL]] ; GFX10-UNSAFE-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FADD]](<3 x s16>), [[DEF1]](<3 x s16>) - ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX10-UNSAFE-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) + ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) + ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 %4:_(<2 x s16>) = COPY $vgpr0 %5:_(<2 x s16>) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-pre-legalize.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-pre-legalize.mir index e486be5238b324..42e53bedb8d857 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-pre-legalize.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-pre-legalize.mir @@ -1443,28 +1443,22 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST]](s96) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC]](s48) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 ; GFX9-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS1]](<6 x s16>) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST2]](s96) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC1]](s48) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX9-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) - ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) - ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = reassoc G_FMUL [[BITCAST1]], [[BITCAST3]] - ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<3 x s16>) = reassoc G_FADD [[BITCAST5]], [[FMUL]] + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = reassoc G_FMUL [[UV]], [[UV2]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<3 x s16>) = reassoc G_FADD [[UV4]], [[FMUL]] ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FADD]](<3 x s16>), [[DEF1]](<3 x s16>) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) - ; GFX9-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) + ; GFX9-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; ; GFX9-CONTRACT-LABEL: name: test_3xhalf_add_mul_rhs @@ -1474,27 +1468,21 @@ body: | ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX9-CONTRACT-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-CONTRACT-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) - ; GFX9-CONTRACT-NEXT: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST]](s96) - ; GFX9-CONTRACT-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC]](s48) + ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-CONTRACT-NEXT: [[BITCAST2:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS1]](<6 x s16>) - ; GFX9-CONTRACT-NEXT: [[TRUNC1:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST2]](s96) - ; GFX9-CONTRACT-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC1]](s48) + ; GFX9-CONTRACT-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-CONTRACT-NEXT: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) - ; GFX9-CONTRACT-NEXT: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) - ; GFX9-CONTRACT-NEXT: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) - ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(<3 x s16>) = G_FMA [[BITCAST1]], [[BITCAST3]], [[BITCAST5]] + ; GFX9-CONTRACT-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) + ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(<3 x s16>) = G_FMA [[UV]], [[UV2]], [[UV4]] ; GFX9-CONTRACT-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FMA]](<3 x s16>), [[DEF1]](<3 x s16>) - ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX9-CONTRACT-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) + ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) + ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; ; GFX9-DENORM-LABEL: name: test_3xhalf_add_mul_rhs @@ -1504,28 +1492,22 @@ body: | ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX9-DENORM-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-DENORM-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) - ; GFX9-DENORM-NEXT: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST]](s96) - ; GFX9-DENORM-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC]](s48) + ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-DENORM-NEXT: [[BITCAST2:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS1]](<6 x s16>) - ; GFX9-DENORM-NEXT: [[TRUNC1:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST2]](s96) - ; GFX9-DENORM-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC1]](s48) + ; GFX9-DENORM-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-DENORM-NEXT: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) - ; GFX9-DENORM-NEXT: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) - ; GFX9-DENORM-NEXT: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) - ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = reassoc G_FMUL [[BITCAST1]], [[BITCAST3]] - ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<3 x s16>) = reassoc G_FADD [[BITCAST5]], [[FMUL]] + ; GFX9-DENORM-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) + ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = reassoc G_FMUL [[UV]], [[UV2]] + ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<3 x s16>) = reassoc G_FADD [[UV4]], [[FMUL]] ; GFX9-DENORM-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FADD]](<3 x s16>), [[DEF1]](<3 x s16>) - ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) - ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX9-DENORM-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) + ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; ; GFX9-UNSAFE-LABEL: name: test_3xhalf_add_mul_rhs @@ -1535,27 +1517,21 @@ body: | ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX9-UNSAFE-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) - ; GFX9-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST]](s96) - ; GFX9-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC]](s48) + ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS1]](<6 x s16>) - ; GFX9-UNSAFE-NEXT: [[TRUNC1:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST2]](s96) - ; GFX9-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC1]](s48) + ; GFX9-UNSAFE-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) - ; GFX9-UNSAFE-NEXT: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) - ; GFX9-UNSAFE-NEXT: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(<3 x s16>) = G_FMA [[BITCAST1]], [[BITCAST3]], [[BITCAST5]] + ; GFX9-UNSAFE-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) + ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(<3 x s16>) = G_FMA [[UV]], [[UV2]], [[UV4]] ; GFX9-UNSAFE-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FMA]](<3 x s16>), [[DEF1]](<3 x s16>) - ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX9-UNSAFE-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) + ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) + ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; ; GFX10-LABEL: name: test_3xhalf_add_mul_rhs @@ -1565,28 +1541,22 @@ body: | ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) - ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST]](s96) - ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC]](s48) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 ; GFX10-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS1]](<6 x s16>) - ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST2]](s96) - ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC1]](s48) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX10-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-NEXT: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) - ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) - ; GFX10-NEXT: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) - ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = reassoc G_FMUL [[BITCAST1]], [[BITCAST3]] - ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(<3 x s16>) = reassoc G_FADD [[BITCAST5]], [[FMUL]] + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = reassoc G_FMUL [[UV]], [[UV2]] + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(<3 x s16>) = reassoc G_FADD [[UV4]], [[FMUL]] ; GFX10-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF ; GFX10-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FADD]](<3 x s16>), [[DEF1]](<3 x s16>) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) - ; GFX10-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) + ; GFX10-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; ; GFX10-CONTRACT-LABEL: name: test_3xhalf_add_mul_rhs @@ -1596,27 +1566,21 @@ body: | ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX10-CONTRACT-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-CONTRACT-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) - ; GFX10-CONTRACT-NEXT: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST]](s96) - ; GFX10-CONTRACT-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC]](s48) + ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-CONTRACT-NEXT: [[BITCAST2:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS1]](<6 x s16>) - ; GFX10-CONTRACT-NEXT: [[TRUNC1:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST2]](s96) - ; GFX10-CONTRACT-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC1]](s48) + ; GFX10-CONTRACT-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-CONTRACT-NEXT: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) - ; GFX10-CONTRACT-NEXT: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) - ; GFX10-CONTRACT-NEXT: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) - ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(<3 x s16>) = G_FMA [[BITCAST1]], [[BITCAST3]], [[BITCAST5]] + ; GFX10-CONTRACT-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) + ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(<3 x s16>) = G_FMA [[UV]], [[UV2]], [[UV4]] ; GFX10-CONTRACT-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FMA]](<3 x s16>), [[DEF1]](<3 x s16>) - ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX10-CONTRACT-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) + ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) + ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; ; GFX10-DENORM-LABEL: name: test_3xhalf_add_mul_rhs @@ -1626,28 +1590,22 @@ body: | ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX10-DENORM-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-DENORM-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) - ; GFX10-DENORM-NEXT: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST]](s96) - ; GFX10-DENORM-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC]](s48) + ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-DENORM-NEXT: [[BITCAST2:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS1]](<6 x s16>) - ; GFX10-DENORM-NEXT: [[TRUNC1:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST2]](s96) - ; GFX10-DENORM-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC1]](s48) + ; GFX10-DENORM-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-DENORM-NEXT: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) - ; GFX10-DENORM-NEXT: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) - ; GFX10-DENORM-NEXT: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) - ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = reassoc G_FMUL [[BITCAST1]], [[BITCAST3]] - ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<3 x s16>) = reassoc G_FADD [[BITCAST5]], [[FMUL]] + ; GFX10-DENORM-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) + ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = reassoc G_FMUL [[UV]], [[UV2]] + ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<3 x s16>) = reassoc G_FADD [[UV4]], [[FMUL]] ; GFX10-DENORM-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FADD]](<3 x s16>), [[DEF1]](<3 x s16>) - ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) - ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX10-DENORM-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) + ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; ; GFX10-UNSAFE-LABEL: name: test_3xhalf_add_mul_rhs @@ -1657,27 +1615,21 @@ body: | ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX10-UNSAFE-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) - ; GFX10-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST]](s96) - ; GFX10-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC]](s48) + ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS1]](<6 x s16>) - ; GFX10-UNSAFE-NEXT: [[TRUNC1:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST2]](s96) - ; GFX10-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC1]](s48) + ; GFX10-UNSAFE-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-UNSAFE-NEXT: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) - ; GFX10-UNSAFE-NEXT: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) - ; GFX10-UNSAFE-NEXT: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) - ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(<3 x s16>) = G_FMA [[BITCAST1]], [[BITCAST3]], [[BITCAST5]] + ; GFX10-UNSAFE-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) + ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(<3 x s16>) = G_FMA [[UV]], [[UV2]], [[UV4]] ; GFX10-UNSAFE-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FMA]](<3 x s16>), [[DEF1]](<3 x s16>) - ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX10-UNSAFE-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) + ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) + ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 %4:_(<2 x s16>) = COPY $vgpr0 %5:_(<2 x s16>) = COPY $vgpr1