From cb2d8b30ad0dc08bb151b922f5a137014ef9fc87 Mon Sep 17 00:00:00 2001 From: Jessica Paquette Date: Thu, 4 Jun 2020 11:07:47 -0700 Subject: [PATCH 01/25] [AArch64][GlobalISel] Select trn1 and trn2 Same idea as for zip, uzp, etc. Teach the post-legalizer combiner to recognize G_SHUFFLE_VECTORs that are trn1/trn2 instructions. - Add G_TRN1 and G_TRN2 - Port mask matching code from AArch64ISelLowering - Produce G_TRN1 and G_TRN2 in the post-legalizer combiner - Select via importer Add select-trn.mir to test selection. Add postlegalizer-combiner-trn.mir to test the combine. This is similar to the existing arm64-trn test. Note that both of these tests contain things we currently don't legalize. I figured it would be easier to test these now rather than later, since once we legalize the G_SHUFFLE_VECTORs, it's not guaranteed that someone will update the tests. Differential Revision: https://reviews.llvm.org/D81182 --- llvm/lib/Target/AArch64/AArch64Combine.td | 9 +- llvm/lib/Target/AArch64/AArch64InstrGISel.td | 15 + .../GISel/AArch64PostLegalizerCombiner.cpp | 34 ++ .../GlobalISel/postlegalizer-combiner-trn.mir | 234 ++++++++++++++ .../CodeGen/AArch64/GlobalISel/select-trn.mir | 300 ++++++++++++++++++ 5 files changed, 591 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-trn.mir create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/select-trn.mir diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td index 26d5dde4679e33..f45a3b560cf449 100644 --- a/llvm/lib/Target/AArch64/AArch64Combine.td +++ b/llvm/lib/Target/AArch64/AArch64Combine.td @@ -56,9 +56,16 @@ def dup: GICombineRule < (apply [{ applyShuffleVectorPseudo(*${root}, ${matchinfo}); }]) >; +def trn : GICombineRule< + (defs root:$root, shuffle_matchdata:$matchinfo), + (match (wip_match_opcode G_SHUFFLE_VECTOR):$root, + [{ return matchTRN(*${root}, MRI, ${matchinfo}); }]), + (apply [{ applyShuffleVectorPseudo(*${root}, ${matchinfo}); }]) +>; + // Combines which replace a G_SHUFFLE_VECTOR with a target-specific pseudo // instruction. -def shuffle_vector_pseudos : GICombineGroup<[dup, rev, zip, uzp]>; +def shuffle_vector_pseudos : GICombineGroup<[dup, rev, zip, uzp, trn]>; def AArch64PostLegalizerCombinerHelper : GICombinerHelper<"AArch64GenPostLegalizerCombinerHelper", diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td index 8c95515fa390ee..0bd8a206705d30 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td +++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td @@ -80,6 +80,19 @@ def G_DUP: AArch64GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type1:$lane); } +// Represents a trn1 instruction. Produced post-legalization from +// G_SHUFFLE_VECTORs with appropriate masks. +def G_TRN1 : AArch64GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$v1, type0:$v2); +} + +// Represents a trn2 instruction. Produced post-legalization from +// G_SHUFFLE_VECTORs with appropriate masks. +def G_TRN2 : AArch64GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$v1, type0:$v2); +} def : GINodeEquiv; def : GINodeEquiv; @@ -89,3 +102,5 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp index bee187202a5756..1ce69a8900eba4 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp @@ -95,6 +95,22 @@ static bool isREVMask(ArrayRef M, unsigned EltSize, unsigned NumElts, return true; } +/// Determines if \p M is a shuffle vector mask for a TRN of \p NumElts. +/// Whether or not G_TRN1 or G_TRN2 should be used is stored in \p WhichResult. +static bool isTRNMask(ArrayRef M, unsigned NumElts, + unsigned &WhichResult) { + if (NumElts % 2 != 0) + return false; + WhichResult = (M[0] == 0 ? 0 : 1); + for (unsigned i = 0; i < NumElts; i += 2) { + if ((M[i] >= 0 && static_cast(M[i]) != i + WhichResult) || + (M[i + 1] >= 0 && + static_cast(M[i + 1]) != i + NumElts + WhichResult)) + return false; + } + return true; +} + /// Determines if \p M is a shuffle vector mask for a UZP of \p NumElts. /// Whether or not G_UZP1 or G_UZP2 should be used is stored in \p WhichResult. static bool isUZPMask(ArrayRef M, unsigned NumElts, @@ -158,6 +174,24 @@ static bool matchREV(MachineInstr &MI, MachineRegisterInfo &MRI, return false; } +/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with +/// a G_TRN1 or G_TRN2 instruction. +static bool matchTRN(MachineInstr &MI, MachineRegisterInfo &MRI, + ShuffleVectorPseudo &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); + unsigned WhichResult; + ArrayRef ShuffleMask = MI.getOperand(3).getShuffleMask(); + Register Dst = MI.getOperand(0).getReg(); + unsigned NumElts = MRI.getType(Dst).getNumElements(); + if (!isTRNMask(ShuffleMask, NumElts, WhichResult)) + return false; + unsigned Opc = (WhichResult == 0) ? AArch64::G_TRN1 : AArch64::G_TRN2; + Register V1 = MI.getOperand(1).getReg(); + Register V2 = MI.getOperand(2).getReg(); + MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2}); + return true; +} + /// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with /// a G_UZP1 or G_UZP2 instruction. /// diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-trn.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-trn.mir new file mode 100644 index 00000000000000..037177a78c5df5 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-trn.mir @@ -0,0 +1,234 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s +# +# Check that we produce G_TRN1 or G_TRN2 when we have an appropriate shuffle +# mask. +# + +... +--- +name: trn1_v8s8 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $d0, $d1 + ; CHECK-LABEL: name: trn1_v8s8 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s8>) = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<8 x s8>) = COPY $d1 + ; CHECK: [[TRN1_:%[0-9]+]]:_(<8 x s8>) = G_TRN1 [[COPY]], [[COPY1]] + ; CHECK: $d0 = COPY [[TRN1_]](<8 x s8>) + ; CHECK: RET_ReallyLR implicit $q0 + %0:_(<8 x s8>) = COPY $d0 + %1:_(<8 x s8>) = COPY $d1 + %2:_(<8 x s8>) = G_SHUFFLE_VECTOR %0(<8 x s8>), %1, shufflemask(0, 8, 2, 10, 4, 12, 6, 14) + $d0 = COPY %2(<8 x s8>) + RET_ReallyLR implicit $q0 + +... +--- +name: trn2_v8s8 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $d0, $d1 + ; CHECK-LABEL: name: trn2_v8s8 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s8>) = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<8 x s8>) = COPY $d1 + ; CHECK: [[TRN2_:%[0-9]+]]:_(<8 x s8>) = G_TRN2 [[COPY]], [[COPY1]] + ; CHECK: $d0 = COPY [[TRN2_]](<8 x s8>) + ; CHECK: RET_ReallyLR implicit $q0 + %0:_(<8 x s8>) = COPY $d0 + %1:_(<8 x s8>) = COPY $d1 + %2:_(<8 x s8>) = G_SHUFFLE_VECTOR %0(<8 x s8>), %1, shufflemask(1, 9, 3, 11, 5, 13, 7, 15) + $d0 = COPY %2(<8 x s8>) + RET_ReallyLR implicit $q0 + +... +--- +name: trn1_v16s8 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $q0, $q1 + ; CHECK-LABEL: name: trn1_v16s8 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s8>) = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<16 x s8>) = COPY $q1 + ; CHECK: [[TRN1_:%[0-9]+]]:_(<16 x s8>) = G_TRN1 [[COPY]], [[COPY1]] + ; CHECK: $q0 = COPY [[TRN1_]](<16 x s8>) + ; CHECK: RET_ReallyLR implicit $q0 + %0:_(<16 x s8>) = COPY $q0 + %1:_(<16 x s8>) = COPY $q1 + %2:_(<16 x s8>) = G_SHUFFLE_VECTOR %0(<16 x s8>), %1, shufflemask(0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30) + $q0 = COPY %2(<16 x s8>) + RET_ReallyLR implicit $q0 + +... +--- +name: trn2_v16s8 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $q0, $q1 + ; CHECK-LABEL: name: trn2_v16s8 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s8>) = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<16 x s8>) = COPY $q1 + ; CHECK: [[TRN2_:%[0-9]+]]:_(<16 x s8>) = G_TRN2 [[COPY]], [[COPY1]] + ; CHECK: $q0 = COPY [[TRN2_]](<16 x s8>) + ; CHECK: RET_ReallyLR implicit $q0 + %0:_(<16 x s8>) = COPY $q0 + %1:_(<16 x s8>) = COPY $q1 + %2:_(<16 x s8>) = G_SHUFFLE_VECTOR %0(<16 x s8>), %1, shufflemask(1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31) + $q0 = COPY %2(<16 x s8>) + RET_ReallyLR implicit $q0 + +... +--- +name: trn1_v4s32 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $q0, $q1 + ; CHECK-LABEL: name: trn1_v4s32 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 + ; CHECK: [[TRN1_:%[0-9]+]]:_(<4 x s32>) = G_TRN1 [[COPY]], [[COPY1]] + ; CHECK: $q0 = COPY [[TRN1_]](<4 x s32>) + ; CHECK: RET_ReallyLR implicit $q0 + %0:_(<4 x s32>) = COPY $q0 + %1:_(<4 x s32>) = COPY $q1 + %2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(0, 4, 2, 6) + $q0 = COPY %2(<4 x s32>) + RET_ReallyLR implicit $q0 + +... +--- +name: trn2_v4s32 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $q0, $q1 + ; CHECK-LABEL: name: trn2_v4s32 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 + ; CHECK: [[TRN2_:%[0-9]+]]:_(<4 x s32>) = G_TRN2 [[COPY]], [[COPY1]] + ; CHECK: $q0 = COPY [[TRN2_]](<4 x s32>) + ; CHECK: RET_ReallyLR implicit $q0 + %0:_(<4 x s32>) = COPY $q0 + %1:_(<4 x s32>) = COPY $q1 + %2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(1, 5, 3, 7) + $q0 = COPY %2(<4 x s32>) + RET_ReallyLR implicit $q0 + +... +--- +name: redundant_with_zip1 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $d0, $d1 + ; 2 x s32 TRN is redundant with ZIP. Make sure we prioritize ZIP. + ; + ; CHECK-LABEL: name: redundant_with_zip1 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1 + ; CHECK: [[ZIP1_:%[0-9]+]]:_(<2 x s32>) = G_ZIP1 [[COPY]], [[COPY1]] + ; CHECK: $d0 = COPY [[ZIP1_]](<2 x s32>) + ; CHECK: RET_ReallyLR implicit $d0 + %0:_(<2 x s32>) = COPY $d0 + %1:_(<2 x s32>) = COPY $d1 + %2:_(<2 x s32>) = G_SHUFFLE_VECTOR %0(<2 x s32>), %1, shufflemask(0, 2) + $d0 = COPY %2(<2 x s32>) + RET_ReallyLR implicit $d0 + +... +--- +name: redundant_with_zip2 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $d0, $d1 + ; 2 x s32 TRN is redundant with ZIP. Make sure we prioritize ZIP. + ; + ; CHECK-LABEL: name: redundant_with_zip2 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1 + ; CHECK: [[ZIP2_:%[0-9]+]]:_(<2 x s32>) = G_ZIP2 [[COPY]], [[COPY1]] + ; CHECK: $d0 = COPY [[ZIP2_]](<2 x s32>) + ; CHECK: RET_ReallyLR implicit $d0 + %0:_(<2 x s32>) = COPY $d0 + %1:_(<2 x s32>) = COPY $d1 + %2:_(<2 x s32>) = G_SHUFFLE_VECTOR %0(<2 x s32>), %1, shufflemask(1, 3) + $d0 = COPY %2(<2 x s32>) + RET_ReallyLR implicit $d0 + +... +--- +name: trn1_undef +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $d0, $d1 + ; Undef shuffle indices should not prevent matching to G_TRN1. + ; + ; CHECK-LABEL: name: trn1_undef + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s8>) = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<8 x s8>) = COPY $d1 + ; CHECK: [[TRN1_:%[0-9]+]]:_(<8 x s8>) = G_TRN1 [[COPY]], [[COPY1]] + ; CHECK: $d0 = COPY [[TRN1_]](<8 x s8>) + ; CHECK: RET_ReallyLR implicit $d0 + %0:_(<8 x s8>) = COPY $d0 + %1:_(<8 x s8>) = COPY $d1 + %2:_(<8 x s8>) = G_SHUFFLE_VECTOR %0(<8 x s8>), %1, shufflemask(0, 8, -1, -1, 4, 12, 6, 14) + $d0 = COPY %2(<8 x s8>) + RET_ReallyLR implicit $d0 + +... +--- +name: trn2_undef +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $d0, $d1 + ; Undef shuffle indices should not prevent matching to G_TRN2. + ; + ; CHECK-LABEL: name: trn2_undef + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s8>) = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<8 x s8>) = COPY $d1 + ; CHECK: [[TRN2_:%[0-9]+]]:_(<8 x s8>) = G_TRN2 [[COPY]], [[COPY1]] + ; CHECK: $d0 = COPY [[TRN2_]](<8 x s8>) + ; CHECK: RET_ReallyLR implicit $d0 + %0:_(<8 x s8>) = COPY $d0 + %1:_(<8 x s8>) = COPY $d1 + %2:_(<8 x s8>) = G_SHUFFLE_VECTOR %0(<8 x s8>), %1, shufflemask(1, -1, 3, 11, 5, 13, -1, -1) + $d0 = COPY %2(<8 x s8>) + RET_ReallyLR implicit $d0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-trn.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-trn.mir new file mode 100644 index 00000000000000..738aacf2c372cd --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-trn.mir @@ -0,0 +1,300 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s +# +# Test that we can select G_TRN1 and G_TRN2. +# +# Each testcase is named based off of the instruction which should be selected. + +... +--- +name: TRN1v2i32 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1 + ; CHECK-LABEL: name: TRN1v2i32 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[TRN1v2i32_:%[0-9]+]]:fpr64 = TRN1v2i32 [[COPY]], [[COPY1]] + ; CHECK: RET_ReallyLR + %0:fpr(<2 x s32>) = COPY $d0 + %1:fpr(<2 x s32>) = COPY $d1 + %2:fpr(<2 x s32>) = G_TRN1 %0, %1 + RET_ReallyLR + +... +--- +name: TRN1v2i64 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1 + ; CHECK-LABEL: name: TRN1v2i64 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[TRN1v2i64_:%[0-9]+]]:fpr128 = TRN1v2i64 [[COPY]], [[COPY1]] + ; CHECK: RET_ReallyLR + %0:fpr(<2 x s64>) = COPY $q0 + %1:fpr(<2 x s64>) = COPY $q1 + %2:fpr(<2 x s64>) = G_TRN1 %0, %1 + RET_ReallyLR + +... +--- +name: TRN1v4i16 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1 + ; CHECK-LABEL: name: TRN1v4i16 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[TRN1v4i16_:%[0-9]+]]:fpr64 = TRN1v4i16 [[COPY]], [[COPY1]] + ; CHECK: RET_ReallyLR + %0:fpr(<4 x s16>) = COPY $d0 + %1:fpr(<4 x s16>) = COPY $d1 + %2:fpr(<4 x s16>) = G_TRN1 %0, %1 + RET_ReallyLR + +... +--- +name: TRN1v4i32 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1 + ; CHECK-LABEL: name: TRN1v4i32 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[TRN1v4i32_:%[0-9]+]]:fpr128 = TRN1v4i32 [[COPY]], [[COPY1]] + ; CHECK: RET_ReallyLR + %0:fpr(<4 x s32>) = COPY $q0 + %1:fpr(<4 x s32>) = COPY $q1 + %2:fpr(<4 x s32>) = G_TRN1 %0, %1 + RET_ReallyLR + +... +--- +name: TRN1v8i8 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1 + ; CHECK-LABEL: name: TRN1v8i8 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[TRN1v8i8_:%[0-9]+]]:fpr64 = TRN1v8i8 [[COPY]], [[COPY1]] + ; CHECK: RET_ReallyLR + %0:fpr(<8 x s8>) = COPY $d0 + %1:fpr(<8 x s8>) = COPY $d1 + %2:fpr(<8 x s8>) = G_TRN1 %0, %1 + RET_ReallyLR + +... +--- +name: TRN1v8i16 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1 + ; CHECK-LABEL: name: TRN1v8i16 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[TRN1v8i16_:%[0-9]+]]:fpr128 = TRN1v8i16 [[COPY]], [[COPY1]] + ; CHECK: RET_ReallyLR + %0:fpr(<8 x s16>) = COPY $q0 + %1:fpr(<8 x s16>) = COPY $q1 + %2:fpr(<8 x s16>) = G_TRN1 %0, %1 + RET_ReallyLR + +... +--- +name: TRN1v16i8 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1 + ; CHECK-LABEL: name: TRN1v16i8 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[TRN1v16i8_:%[0-9]+]]:fpr128 = TRN1v16i8 [[COPY]], [[COPY1]] + ; CHECK: RET_ReallyLR + %0:fpr(<16 x s8>) = COPY $q0 + %1:fpr(<16 x s8>) = COPY $q1 + %2:fpr(<16 x s8>) = G_TRN1 %0, %1 + RET_ReallyLR + +... +--- +name: TRN2v2i32 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1 + ; CHECK-LABEL: name: TRN2v2i32 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[TRN2v2i32_:%[0-9]+]]:fpr64 = TRN2v2i32 [[COPY]], [[COPY1]] + ; CHECK: RET_ReallyLR + %0:fpr(<2 x s32>) = COPY $d0 + %1:fpr(<2 x s32>) = COPY $d1 + %2:fpr(<2 x s32>) = G_TRN2 %0, %1 + RET_ReallyLR + +... +--- +name: TRN2v2i64 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1 + ; CHECK-LABEL: name: TRN2v2i64 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[TRN2v2i64_:%[0-9]+]]:fpr128 = TRN2v2i64 [[COPY]], [[COPY1]] + ; CHECK: RET_ReallyLR + %0:fpr(<2 x s64>) = COPY $q0 + %1:fpr(<2 x s64>) = COPY $q1 + %2:fpr(<2 x s64>) = G_TRN2 %0, %1 + RET_ReallyLR + +... +--- +name: TRN2v4i16 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1 + ; CHECK-LABEL: name: TRN2v4i16 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[TRN2v4i16_:%[0-9]+]]:fpr64 = TRN2v4i16 [[COPY]], [[COPY1]] + ; CHECK: RET_ReallyLR + %0:fpr(<4 x s16>) = COPY $d0 + %1:fpr(<4 x s16>) = COPY $d1 + %2:fpr(<4 x s16>) = G_TRN2 %0, %1 + RET_ReallyLR + +... +--- +name: TRN2v4i32 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1 + ; CHECK-LABEL: name: TRN2v4i32 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[TRN2v4i32_:%[0-9]+]]:fpr128 = TRN2v4i32 [[COPY]], [[COPY1]] + ; CHECK: RET_ReallyLR + %0:fpr(<4 x s32>) = COPY $q0 + %1:fpr(<4 x s32>) = COPY $q1 + %2:fpr(<4 x s32>) = G_TRN2 %0, %1 + RET_ReallyLR + +... +--- +name: TRN2v8i8 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1 + ; CHECK-LABEL: name: TRN2v8i8 + ; CHECK: liveins: $d0, $d1 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK: [[TRN2v8i8_:%[0-9]+]]:fpr64 = TRN2v8i8 [[COPY]], [[COPY1]] + ; CHECK: RET_ReallyLR + %0:fpr(<8 x s8>) = COPY $d0 + %1:fpr(<8 x s8>) = COPY $d1 + %2:fpr(<8 x s8>) = G_TRN2 %0, %1 + RET_ReallyLR + +... +--- +name: TRN2v8i16 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1 + ; CHECK-LABEL: name: TRN2v8i16 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[TRN2v8i16_:%[0-9]+]]:fpr128 = TRN2v8i16 [[COPY]], [[COPY1]] + ; CHECK: RET_ReallyLR + %0:fpr(<8 x s16>) = COPY $q0 + %1:fpr(<8 x s16>) = COPY $q1 + %2:fpr(<8 x s16>) = G_TRN2 %0, %1 + RET_ReallyLR + +... +--- +name: TRN2v16i8 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $q0, $q1 + ; CHECK-LABEL: name: TRN2v16i8 + ; CHECK: liveins: $q0, $q1 + ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 + ; CHECK: [[TRN2v16i8_:%[0-9]+]]:fpr128 = TRN2v16i8 [[COPY]], [[COPY1]] + ; CHECK: RET_ReallyLR + %0:fpr(<16 x s8>) = COPY $q0 + %1:fpr(<16 x s8>) = COPY $q1 + %2:fpr(<16 x s8>) = G_TRN2 %0, %1 + RET_ReallyLR From 7a38618a20596e419abbbbb249300e812763a028 Mon Sep 17 00:00:00 2001 From: Daniel Kiss Date: Tue, 9 Jun 2020 19:56:30 +0200 Subject: [PATCH 02/25] [AArch64] Allow BTI mnemonics in the HINT space with BTI disabled Summary: It is important to emit HINT instructions instead of BTI ones when BTI is disabled. This allows compatibility with other assemblers (e.g. GAS). Still, developers of assembly code will want to write code that is compatible with both pre- and post-BTI CPUs. They could use HINT mnemonics, but the new mnemonics are a lot more readable (e.g. bti c instead of hint #34), and they will result in the same encodings. So, while LLVM should not *emit* the new mnemonics when BTI is disabled, this patch will at least make LLVM *accept* assembly code that uses them. Reviewers: pbarrio, tamas.petz, ostannard Reviewed By: pbarrio, ostannard Subscribers: ostannard, kristof.beyls, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D81257 --- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 6 ++++++ llvm/test/MC/AArch64/armv8.5a-bti.s | 18 +++++++----------- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index d642640c37520e..5aa73760f77049 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -726,6 +726,12 @@ def : InstAlias<"sevl", (HINT 0b101)>; def : InstAlias<"dgh", (HINT 0b110)>; def : InstAlias<"esb", (HINT 0b10000)>, Requires<[HasRAS]>; def : InstAlias<"csdb", (HINT 20)>; +// In order to be able to write readable assembly, LLVM should accept assembly +// inputs that use Branch Target Indentification mnemonics, even with BTI disabled. +// However, in order to be compatible with other assemblers (e.g. GAS), LLVM +// should not emit these mnemonics unless BTI is enabled. +def : InstAlias<"bti", (HINT 32), 0>; +def : InstAlias<"bti $op", (HINT btihint_op:$op), 0>; def : InstAlias<"bti", (HINT 32)>, Requires<[HasBTI]>; def : InstAlias<"bti $op", (HINT btihint_op:$op)>, Requires<[HasBTI]>; diff --git a/llvm/test/MC/AArch64/armv8.5a-bti.s b/llvm/test/MC/AArch64/armv8.5a-bti.s index ca55516890c42c..e0585f7613fcc8 100644 --- a/llvm/test/MC/AArch64/armv8.5a-bti.s +++ b/llvm/test/MC/AArch64/armv8.5a-bti.s @@ -1,6 +1,6 @@ -// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+bti < %s | FileCheck %s -// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.5a < %s | FileCheck %s -// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=-bti < %s 2>&1 | FileCheck %s --check-prefix=NOBTI +// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+bti < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.5a < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=-bti < %s | FileCheck %s --check-prefix=NOBTI bti bti c @@ -12,14 +12,10 @@ bti jc // CHECK: bti j // encoding: [0x9f,0x24,0x03,0xd5] // CHECK: bti jc // encoding: [0xdf,0x24,0x03,0xd5] -// NOBTI: instruction requires: bti -// NOBTI-NEXT: bti -// NOBTI: instruction requires: bti -// NOBTI-NEXT: bti -// NOBTI: instruction requires: bti -// NOBTI-NEXT: bti -// NOBTI: instruction requires: bti -// NOBTI-NEXT: bti +// NOBTI: hint #32 // encoding: [0x1f,0x24,0x03,0xd5] +// NOBTI: hint #34 // encoding: [0x5f,0x24,0x03,0xd5] +// NOBTI: hint #36 // encoding: [0x9f,0x24,0x03,0xd5] +// NOBTI: hint #38 // encoding: [0xdf,0x24,0x03,0xd5] hint #32 hint #34 From bc38793852c0552337bae54961eb14fb0bacf356 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Tue, 9 Jun 2020 09:55:25 -0700 Subject: [PATCH 03/25] Change debuginfo check for addHeapAllocSiteMetadata Summary: Move check inside of addHeapAllocSiteMetadata(). Change check to DebugInfo <= DebugLineTablesOnly. Reviewers: akhuang Subscribers: cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D81481 --- clang/lib/CodeGen/CGDebugInfo.cpp | 3 +++ clang/lib/CodeGen/CGExprCXX.cpp | 3 +-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index 1737154d179a68..65d513c8cf0581 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -2149,6 +2149,9 @@ llvm::DIType *CGDebugInfo::getOrCreateStandaloneType(QualType D, void CGDebugInfo::addHeapAllocSiteMetadata(llvm::CallBase *CI, QualType AllocatedTy, SourceLocation Loc) { + if (CGM.getCodeGenOpts().getDebugInfo() <= + codegenoptions::DebugLineTablesOnly) + return; llvm::MDNode *node; if (AllocatedTy->isVoidType()) node = llvm::MDNode::get(CGM.getLLVMContext(), None); diff --git a/clang/lib/CodeGen/CGExprCXX.cpp b/clang/lib/CodeGen/CGExprCXX.cpp index d018443858bd5b..d59aa6ce0fb94f 100644 --- a/clang/lib/CodeGen/CGExprCXX.cpp +++ b/clang/lib/CodeGen/CGExprCXX.cpp @@ -1639,8 +1639,7 @@ llvm::Value *CodeGenFunction::EmitCXXNewExpr(const CXXNewExpr *E) { EmitNewDeleteCall(*this, allocator, allocatorType, allocatorArgs); // Set !heapallocsite metadata on the call to operator new. - if (CGM.getCodeGenOpts().getDebugInfo() != codegenoptions::NoDebugInfo && - getDebugInfo()) + if (getDebugInfo()) if (auto *newCall = dyn_cast(RV.getScalarVal())) getDebugInfo()->addHeapAllocSiteMetadata(newCall, allocType, E->getExprLoc()); From 5dc4e7c2b95fc665c1dc86c6b40cf02171f8801d Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 9 Jun 2020 18:36:14 +0100 Subject: [PATCH 04/25] [VectorCombine] scalarizeBinop - support an all-constant src vector operand scalarizeBinop currently folds vec_bo((inselt VecC0, V0, Index), (inselt VecC1, V1, Index)) -> inselt(vec_bo(VecC0, VecC1), scl_bo(V0,V1), Index) This patch extends this to account for cases where one of the vec_bo operands is already all-constant and performs similar cost checks to determine if the scalar binop with a constant still makes sense: vec_bo((inselt VecC0, V0, Index), VecC1) -> inselt(vec_bo(VecC0, VecC1), scl_bo(V0,extractelt(V1,Index)), Index) Fixes PR42174 Differential Revision: https://reviews.llvm.org/D80885 --- .../Transforms/Vectorize/VectorCombine.cpp | 59 +++- .../PhaseOrdering/X86/scalarization.ll | 31 +- .../X86/insert-binop-with-constant.ll | 272 +++++++++--------- 3 files changed, 202 insertions(+), 160 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index c03e70fdcef55d..b68182e6098dba 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -313,23 +313,48 @@ static bool foldBitcastShuf(Instruction &I, const TargetTransformInfo &TTI) { /// Match a vector binop instruction with inserted scalar operands and convert /// to scalar binop followed by insertelement. static bool scalarizeBinop(Instruction &I, const TargetTransformInfo &TTI) { - Instruction *Ins0, *Ins1; - if (!match(&I, m_BinOp(m_Instruction(Ins0), m_Instruction(Ins1)))) + Value *Ins0, *Ins1; + if (!match(&I, m_BinOp(m_Value(Ins0), m_Value(Ins1)))) return false; + // Match against one or both scalar values being inserted into constant + // vectors: + // vec_bo VecC0, (inselt VecC1, V1, Index) + // vec_bo (inselt VecC0, V0, Index), VecC1 + // vec_bo (inselt VecC0, V0, Index), (inselt VecC1, V1, Index) // TODO: Deal with mismatched index constants and variable indexes? - Constant *VecC0, *VecC1; - Value *V0, *V1; - uint64_t Index; + Constant *VecC0 = nullptr, *VecC1 = nullptr; + Value *V0 = nullptr, *V1 = nullptr; + uint64_t Index0 = 0, Index1 = 0; if (!match(Ins0, m_InsertElt(m_Constant(VecC0), m_Value(V0), - m_ConstantInt(Index))) || - !match(Ins1, m_InsertElt(m_Constant(VecC1), m_Value(V1), - m_SpecificInt(Index)))) + m_ConstantInt(Index0))) && + !match(Ins0, m_Constant(VecC0))) + return false; + if (!match(Ins1, m_InsertElt(m_Constant(VecC1), m_Value(V1), + m_ConstantInt(Index1))) && + !match(Ins1, m_Constant(VecC1))) + return false; + + bool IsConst0 = !V0; + bool IsConst1 = !V1; + if (IsConst0 && IsConst1) + return false; + if (!IsConst0 && !IsConst1 && Index0 != Index1) return false; - Type *ScalarTy = V0->getType(); + // Bail for single insertion if it is a load. + // TODO: Handle this once getVectorInstrCost can cost for load/stores. + auto *I0 = dyn_cast_or_null(V0); + auto *I1 = dyn_cast_or_null(V1); + if ((IsConst0 && I1 && I1->mayReadFromMemory()) || + (IsConst1 && I0 && I0->mayReadFromMemory())) + return false; + + uint64_t Index = IsConst0 ? Index1 : Index0; + Type *ScalarTy = IsConst0 ? V1->getType() : V0->getType(); Type *VecTy = I.getType(); - assert(VecTy->isVectorTy() && ScalarTy == V1->getType() && + assert(VecTy->isVectorTy() && + (IsConst0 || IsConst1 || V0->getType() == V1->getType()) && (ScalarTy->isIntegerTy() || ScalarTy->isFloatingPointTy()) && "Unexpected types for insert into binop"); @@ -341,10 +366,11 @@ static bool scalarizeBinop(Instruction &I, const TargetTransformInfo &TTI) { // both sequences. int InsertCost = TTI.getVectorInstrCost(Instruction::InsertElement, VecTy, Index); - int OldCost = InsertCost + InsertCost + VectorOpCost; + int OldCost = (IsConst0 ? 0 : InsertCost) + (IsConst1 ? 0 : InsertCost) + + VectorOpCost; int NewCost = ScalarOpCost + InsertCost + - !Ins0->hasOneUse() * InsertCost + - !Ins1->hasOneUse() * InsertCost; + (IsConst0 ? 0 : !Ins0->hasOneUse() * InsertCost) + + (IsConst1 ? 0 : !Ins1->hasOneUse() * InsertCost); // We want to scalarize unless the vector variant actually has lower cost. if (OldCost < NewCost) @@ -354,6 +380,13 @@ static bool scalarizeBinop(Instruction &I, const TargetTransformInfo &TTI) { // inselt NewVecC, (scalar_bo V0, V1), Index ++NumScalarBO; IRBuilder<> Builder(&I); + + // For constant cases, extract the scalar element, this should constant fold. + if (IsConst0) + V0 = ConstantExpr::getExtractElement(VecC0, Builder.getInt64(Index)); + if (IsConst1) + V1 = ConstantExpr::getExtractElement(VecC1, Builder.getInt64(Index)); + Value *Scalar = Builder.CreateBinOp(Opcode, V0, V1, I.getName() + ".scalar"); // All IR flags are safe to back-propagate. There is no potential for extra diff --git a/llvm/test/Transforms/PhaseOrdering/X86/scalarization.ll b/llvm/test/Transforms/PhaseOrdering/X86/scalarization.ll index 3b341f6a5b7a55..0d99654be52893 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/scalarization.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/scalarization.ll @@ -12,31 +12,24 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" define <4 x i32> @square(<4 x i32> %num, i32 %y, i32 %x, i32 %h, i32 %k, i32 %w, i32 %p, i32 %j, i32 %u) { ; CHECK-LABEL: @square( ; CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[K:%.*]], 2 -; CHECK-NEXT: [[SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[DIV]], i32 0 ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[P:%.*]], 6234 -; CHECK-NEXT: [[SPLATINSERT2:%.*]] = insertelement <4 x i32> undef, i32 [[MUL]], i32 0 ; CHECK-NEXT: [[MUL5:%.*]] = mul nsw i32 [[H:%.*]], 75 -; CHECK-NEXT: [[SPLATINSERT6:%.*]] = insertelement <4 x i32> undef, i32 [[MUL5]], i32 0 ; CHECK-NEXT: [[DIV9:%.*]] = sdiv i32 [[J:%.*]], 3452 -; CHECK-NEXT: [[SPLATINSERT10:%.*]] = insertelement <4 x i32> undef, i32 [[DIV9]], i32 0 ; CHECK-NEXT: [[MUL13:%.*]] = mul nsw i32 [[W:%.*]], 53 -; CHECK-NEXT: [[SPLATINSERT14:%.*]] = insertelement <4 x i32> undef, i32 [[MUL13]], i32 0 ; CHECK-NEXT: [[DIV17:%.*]] = sdiv i32 [[X:%.*]], 820 -; CHECK-NEXT: [[SPLATINSERT18:%.*]] = insertelement <4 x i32> undef, i32 [[DIV17]], i32 0 ; CHECK-NEXT: [[MUL21:%.*]] = shl nsw i32 [[U:%.*]], 2 -; CHECK-NEXT: [[SPLATINSERT22:%.*]] = insertelement <4 x i32> undef, i32 [[MUL21]], i32 0 -; CHECK-NEXT: [[SPLATINSERT25:%.*]] = insertelement <4 x i32> undef, i32 [[Y:%.*]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[SPLATINSERT25]], -; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[TMP1]], [[SPLATINSERT18]] -; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[SPLATINSERT6]] -; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP3]], [[SPLATINSERT]] -; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP4]], [[SPLATINSERT14]] -; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i32> [[TMP5]], [[SPLATINSERT2]] -; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i32> [[TMP6]], [[SPLATINSERT10]] -; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i32> [[TMP7]], [[SPLATINSERT22]] -; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i32> [[TMP8]], -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP9]], <4 x i32> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: [[ADD29:%.*]] = add <4 x i32> [[TMP10]], [[NUM:%.*]] +; CHECK-NEXT: [[DOTSCALAR:%.*]] = add i32 [[Y:%.*]], 1 +; CHECK-NEXT: [[DOTSCALAR1:%.*]] = add i32 [[DOTSCALAR]], [[DIV17]] +; CHECK-NEXT: [[DOTSCALAR2:%.*]] = add i32 [[DOTSCALAR1]], [[MUL5]] +; CHECK-NEXT: [[DOTSCALAR3:%.*]] = add i32 [[DOTSCALAR2]], [[DIV]] +; CHECK-NEXT: [[DOTSCALAR4:%.*]] = add i32 [[DOTSCALAR3]], [[MUL13]] +; CHECK-NEXT: [[DOTSCALAR5:%.*]] = add i32 [[DOTSCALAR4]], [[MUL]] +; CHECK-NEXT: [[DOTSCALAR6:%.*]] = add i32 [[DOTSCALAR5]], [[DIV9]] +; CHECK-NEXT: [[DOTSCALAR7:%.*]] = add i32 [[DOTSCALAR6]], [[MUL21]] +; CHECK-NEXT: [[DOTSCALAR8:%.*]] = add i32 [[DOTSCALAR7]], 317425 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> undef, i32 [[DOTSCALAR8]], i64 0 +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[ADD29:%.*]] = add <4 x i32> [[TMP2]], [[NUM:%.*]] ; CHECK-NEXT: ret <4 x i32> [[ADD29]] ; %add = add <4 x i32> %num, diff --git a/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll b/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll index e0d4623f505f70..a400e8f42907cd 100644 --- a/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll +++ b/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll @@ -4,8 +4,8 @@ define <2 x i64> @add_constant(i64 %x) { ; CHECK-LABEL: @add_constant( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0 -; CHECK-NEXT: [[BO:%.*]] = add <2 x i64> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = add i64 [[X:%.*]], 42 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -15,8 +15,8 @@ define <2 x i64> @add_constant(i64 %x) { define <2 x i64> @add_constant_not_undef_lane(i64 %x) { ; CHECK-LABEL: @add_constant_not_undef_lane( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0 -; CHECK-NEXT: [[BO:%.*]] = add <2 x i64> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = add i64 [[X:%.*]], 42 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -41,8 +41,8 @@ define <2 x i64> @add_constant_load(i64* %p) { define <4 x i32> @sub_constant_op0(i32 %x) { ; CHECK-LABEL: @sub_constant_op0( -; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x i32> undef, i32 [[X:%.*]], i32 1 -; CHECK-NEXT: [[BO:%.*]] = sub nuw nsw <4 x i32> , [[INS]] +; CHECK-NEXT: [[BO_SCALAR:%.*]] = sub nuw nsw i32 -42, [[X:%.*]] +; CHECK-NEXT: [[BO:%.*]] = insertelement <4 x i32> undef, i32 [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <4 x i32> [[BO]] ; %ins = insertelement <4 x i32> undef, i32 %x, i32 1 @@ -52,8 +52,8 @@ define <4 x i32> @sub_constant_op0(i32 %x) { define <4 x i32> @sub_constant_op0_not_undef_lane(i32 %x) { ; CHECK-LABEL: @sub_constant_op0_not_undef_lane( -; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x i32> undef, i32 [[X:%.*]], i32 1 -; CHECK-NEXT: [[BO:%.*]] = sub nuw <4 x i32> , [[INS]] +; CHECK-NEXT: [[BO_SCALAR:%.*]] = sub nuw i32 42, [[X:%.*]] +; CHECK-NEXT: [[BO:%.*]] = insertelement <4 x i32> undef, i32 [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <4 x i32> [[BO]] ; %ins = insertelement <4 x i32> undef, i32 %x, i32 1 @@ -63,8 +63,8 @@ define <4 x i32> @sub_constant_op0_not_undef_lane(i32 %x) { define <8 x i16> @sub_constant_op1(i16 %x) { ; CHECK-LABEL: @sub_constant_op1( -; CHECK-NEXT: [[INS:%.*]] = insertelement <8 x i16> undef, i16 [[X:%.*]], i32 0 -; CHECK-NEXT: [[BO:%.*]] = sub nuw <8 x i16> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = sub nuw i16 [[X:%.*]], 42 +; CHECK-NEXT: [[BO:%.*]] = insertelement <8 x i16> undef, i16 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <8 x i16> [[BO]] ; %ins = insertelement <8 x i16> undef, i16 %x, i32 0 @@ -74,8 +74,8 @@ define <8 x i16> @sub_constant_op1(i16 %x) { define <8 x i16> @sub_constant_op1_not_undef_lane(i16 %x) { ; CHECK-LABEL: @sub_constant_op1_not_undef_lane( -; CHECK-NEXT: [[INS:%.*]] = insertelement <8 x i16> undef, i16 [[X:%.*]], i32 0 -; CHECK-NEXT: [[BO:%.*]] = sub nuw <8 x i16> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = sub nuw i16 [[X:%.*]], 42 +; CHECK-NEXT: [[BO:%.*]] = insertelement <8 x i16> undef, i16 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <8 x i16> [[BO]] ; %ins = insertelement <8 x i16> undef, i16 %x, i32 0 @@ -85,8 +85,8 @@ define <8 x i16> @sub_constant_op1_not_undef_lane(i16 %x) { define <16 x i8> @mul_constant(i8 %x) { ; CHECK-LABEL: @mul_constant( -; CHECK-NEXT: [[INS:%.*]] = insertelement <16 x i8> undef, i8 [[X:%.*]], i32 2 -; CHECK-NEXT: [[BO:%.*]] = mul <16 x i8> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = mul i8 [[X:%.*]], -42 +; CHECK-NEXT: [[BO:%.*]] = insertelement <16 x i8> , i8 [[BO_SCALAR]], i64 2 ; CHECK-NEXT: ret <16 x i8> [[BO]] ; %ins = insertelement <16 x i8> undef, i8 %x, i32 2 @@ -96,8 +96,8 @@ define <16 x i8> @mul_constant(i8 %x) { define <3 x i64> @mul_constant_not_undef_lane(i64 %x) { ; CHECK-LABEL: @mul_constant_not_undef_lane( -; CHECK-NEXT: [[INS:%.*]] = insertelement <3 x i64> undef, i64 [[X:%.*]], i32 2 -; CHECK-NEXT: [[BO:%.*]] = mul <3 x i64> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = mul i64 [[X:%.*]], -42 +; CHECK-NEXT: [[BO:%.*]] = insertelement <3 x i64> , i64 [[BO_SCALAR]], i64 2 ; CHECK-NEXT: ret <3 x i64> [[BO]] ; %ins = insertelement <3 x i64> undef, i64 %x, i32 2 @@ -106,12 +106,20 @@ define <3 x i64> @mul_constant_not_undef_lane(i64 %x) { } define <16 x i8> @mul_constant_multiuse(i8 %a0, <16 x i8> %a1) { -; CHECK-LABEL: @mul_constant_multiuse( -; CHECK-NEXT: [[INS:%.*]] = insertelement <16 x i8> , i8 [[A0:%.*]], i32 0 -; CHECK-NEXT: [[MUL:%.*]] = mul <16 x i8> [[INS]], -; CHECK-NEXT: [[AND:%.*]] = and <16 x i8> [[INS]], [[A1:%.*]] -; CHECK-NEXT: [[XOR:%.*]] = xor <16 x i8> [[AND]], [[MUL]] -; CHECK-NEXT: ret <16 x i8> [[XOR]] +; SSE-LABEL: @mul_constant_multiuse( +; SSE-NEXT: [[INS:%.*]] = insertelement <16 x i8> , i8 [[A0:%.*]], i32 0 +; SSE-NEXT: [[MUL:%.*]] = mul <16 x i8> [[INS]], +; SSE-NEXT: [[AND:%.*]] = and <16 x i8> [[INS]], [[A1:%.*]] +; SSE-NEXT: [[XOR:%.*]] = xor <16 x i8> [[AND]], [[MUL]] +; SSE-NEXT: ret <16 x i8> [[XOR]] +; +; AVX-LABEL: @mul_constant_multiuse( +; AVX-NEXT: [[INS:%.*]] = insertelement <16 x i8> , i8 [[A0:%.*]], i32 0 +; AVX-NEXT: [[MUL_SCALAR:%.*]] = mul i8 [[A0]], 3 +; AVX-NEXT: [[MUL:%.*]] = insertelement <16 x i8> , i8 [[MUL_SCALAR]], i64 0 +; AVX-NEXT: [[AND:%.*]] = and <16 x i8> [[INS]], [[A1:%.*]] +; AVX-NEXT: [[XOR:%.*]] = xor <16 x i8> [[AND]], [[MUL]] +; AVX-NEXT: ret <16 x i8> [[XOR]] ; %ins = insertelement <16 x i8> , i8 %a0, i32 0 %mul = mul <16 x i8> %ins, @@ -122,8 +130,8 @@ define <16 x i8> @mul_constant_multiuse(i8 %a0, <16 x i8> %a1) { define <2 x i64> @shl_constant_op0(i64 %x) { ; CHECK-LABEL: @shl_constant_op0( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1 -; CHECK-NEXT: [[BO:%.*]] = shl <2 x i64> , [[INS]] +; CHECK-NEXT: [[BO_SCALAR:%.*]] = shl i64 2, [[X:%.*]] +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 1 @@ -133,8 +141,8 @@ define <2 x i64> @shl_constant_op0(i64 %x) { define <2 x i64> @shl_constant_op0_not_undef_lane(i64 %x) { ; CHECK-LABEL: @shl_constant_op0_not_undef_lane( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1 -; CHECK-NEXT: [[BO:%.*]] = shl <2 x i64> , [[INS]] +; CHECK-NEXT: [[BO_SCALAR:%.*]] = shl i64 2, [[X:%.*]] +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 1 @@ -156,12 +164,20 @@ define <2 x i64> @shl_constant_op0_load(i64* %p) { } define <4 x i32> @shl_constant_op0_multiuse(i32 %a0, <4 x i32> %a1) { -; CHECK-LABEL: @shl_constant_op0_multiuse( -; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x i32> , i32 [[A0:%.*]], i32 0 -; CHECK-NEXT: [[MUL:%.*]] = shl <4 x i32> [[INS]], -; CHECK-NEXT: [[AND:%.*]] = and <4 x i32> [[INS]], [[A1:%.*]] -; CHECK-NEXT: [[XOR:%.*]] = xor <4 x i32> [[AND]], [[MUL]] -; CHECK-NEXT: ret <4 x i32> [[XOR]] +; SSE-LABEL: @shl_constant_op0_multiuse( +; SSE-NEXT: [[INS:%.*]] = insertelement <4 x i32> , i32 [[A0:%.*]], i32 0 +; SSE-NEXT: [[MUL_SCALAR:%.*]] = shl i32 [[A0]], 3 +; SSE-NEXT: [[MUL:%.*]] = insertelement <4 x i32> , i32 [[MUL_SCALAR]], i64 0 +; SSE-NEXT: [[AND:%.*]] = and <4 x i32> [[INS]], [[A1:%.*]] +; SSE-NEXT: [[XOR:%.*]] = xor <4 x i32> [[AND]], [[MUL]] +; SSE-NEXT: ret <4 x i32> [[XOR]] +; +; AVX-LABEL: @shl_constant_op0_multiuse( +; AVX-NEXT: [[INS:%.*]] = insertelement <4 x i32> , i32 [[A0:%.*]], i32 0 +; AVX-NEXT: [[MUL:%.*]] = shl <4 x i32> [[INS]], +; AVX-NEXT: [[AND:%.*]] = and <4 x i32> [[INS]], [[A1:%.*]] +; AVX-NEXT: [[XOR:%.*]] = xor <4 x i32> [[AND]], [[MUL]] +; AVX-NEXT: ret <4 x i32> [[XOR]] ; %ins = insertelement <4 x i32> , i32 %a0, i32 0 %mul = shl <4 x i32> %ins, @@ -172,8 +188,8 @@ define <4 x i32> @shl_constant_op0_multiuse(i32 %a0, <4 x i32> %a1) { define <2 x i64> @shl_constant_op1(i64 %x) { ; CHECK-LABEL: @shl_constant_op1( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0 -; CHECK-NEXT: [[BO:%.*]] = shl nuw <2 x i64> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = shl nuw i64 [[X:%.*]], 5 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> , i64 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -183,8 +199,8 @@ define <2 x i64> @shl_constant_op1(i64 %x) { define <2 x i64> @shl_constant_op1_not_undef_lane(i64 %x) { ; CHECK-LABEL: @shl_constant_op1_not_undef_lane( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0 -; CHECK-NEXT: [[BO:%.*]] = shl nuw <2 x i64> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = shl nuw i64 [[X:%.*]], 5 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> zeroinitializer, i64 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -207,8 +223,8 @@ define <2 x i64> @shl_constant_op1_load(i64* %p) { define <2 x i64> @ashr_constant_op0(i64 %x) { ; CHECK-LABEL: @ashr_constant_op0( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1 -; CHECK-NEXT: [[BO:%.*]] = ashr exact <2 x i64> , [[INS]] +; CHECK-NEXT: [[BO_SCALAR:%.*]] = ashr exact i64 2, [[X:%.*]] +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 1 @@ -218,8 +234,8 @@ define <2 x i64> @ashr_constant_op0(i64 %x) { define <2 x i64> @ashr_constant_op0_not_undef_lane(i64 %x) { ; CHECK-LABEL: @ashr_constant_op0_not_undef_lane( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1 -; CHECK-NEXT: [[BO:%.*]] = ashr exact <2 x i64> , [[INS]] +; CHECK-NEXT: [[BO_SCALAR:%.*]] = ashr exact i64 2, [[X:%.*]] +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 1 @@ -229,8 +245,8 @@ define <2 x i64> @ashr_constant_op0_not_undef_lane(i64 %x) { define <2 x i64> @ashr_constant_op1(i64 %x) { ; CHECK-LABEL: @ashr_constant_op1( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0 -; CHECK-NEXT: [[BO:%.*]] = ashr <2 x i64> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = ashr i64 [[X:%.*]], 5 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> , i64 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -240,8 +256,8 @@ define <2 x i64> @ashr_constant_op1(i64 %x) { define <2 x i64> @ashr_constant_op1_not_undef_lane(i64 %x) { ; CHECK-LABEL: @ashr_constant_op1_not_undef_lane( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0 -; CHECK-NEXT: [[BO:%.*]] = ashr <2 x i64> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = ashr i64 [[X:%.*]], 5 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> zeroinitializer, i64 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -251,8 +267,8 @@ define <2 x i64> @ashr_constant_op1_not_undef_lane(i64 %x) { define <2 x i64> @lshr_constant_op0(i64 %x) { ; CHECK-LABEL: @lshr_constant_op0( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0 -; CHECK-NEXT: [[BO:%.*]] = lshr <2 x i64> , [[INS]] +; CHECK-NEXT: [[BO_SCALAR:%.*]] = lshr i64 5, [[X:%.*]] +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -262,8 +278,8 @@ define <2 x i64> @lshr_constant_op0(i64 %x) { define <2 x i64> @lshr_constant_op0_not_undef_lane(i64 %x) { ; CHECK-LABEL: @lshr_constant_op0_not_undef_lane( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0 -; CHECK-NEXT: [[BO:%.*]] = lshr <2 x i64> , [[INS]] +; CHECK-NEXT: [[BO_SCALAR:%.*]] = lshr i64 5, [[X:%.*]] +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -273,8 +289,8 @@ define <2 x i64> @lshr_constant_op0_not_undef_lane(i64 %x) { define <2 x i64> @lshr_constant_op1(i64 %x) { ; CHECK-LABEL: @lshr_constant_op1( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1 -; CHECK-NEXT: [[BO:%.*]] = lshr exact <2 x i64> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = lshr exact i64 [[X:%.*]], 2 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> , i64 [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 1 @@ -284,8 +300,8 @@ define <2 x i64> @lshr_constant_op1(i64 %x) { define <2 x i64> @lshr_constant_op1_not_undef_lane(i64 %x) { ; CHECK-LABEL: @lshr_constant_op1_not_undef_lane( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1 -; CHECK-NEXT: [[BO:%.*]] = lshr exact <2 x i64> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = lshr exact i64 [[X:%.*]], 2 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> zeroinitializer, i64 [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 1 @@ -295,8 +311,8 @@ define <2 x i64> @lshr_constant_op1_not_undef_lane(i64 %x) { define <2 x i64> @urem_constant_op0(i64 %x) { ; CHECK-LABEL: @urem_constant_op0( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0 -; CHECK-NEXT: [[BO:%.*]] = urem <2 x i64> , [[INS]] +; CHECK-NEXT: [[BO_SCALAR:%.*]] = urem i64 5, [[X:%.*]] +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -306,8 +322,8 @@ define <2 x i64> @urem_constant_op0(i64 %x) { define <2 x i64> @urem_constant_op0_not_undef_lane(i64 %x) { ; CHECK-LABEL: @urem_constant_op0_not_undef_lane( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0 -; CHECK-NEXT: [[BO:%.*]] = urem <2 x i64> , [[INS]] +; CHECK-NEXT: [[BO_SCALAR:%.*]] = urem i64 5, [[X:%.*]] +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -317,8 +333,8 @@ define <2 x i64> @urem_constant_op0_not_undef_lane(i64 %x) { define <2 x i64> @urem_constant_op1(i64 %x) { ; CHECK-LABEL: @urem_constant_op1( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1 -; CHECK-NEXT: [[BO:%.*]] = urem <2 x i64> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = urem i64 [[X:%.*]], 2 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> , i64 [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 1 @@ -328,8 +344,8 @@ define <2 x i64> @urem_constant_op1(i64 %x) { define <2 x i64> @urem_constant_op1_not_undef_lane(i64 %x) { ; CHECK-LABEL: @urem_constant_op1_not_undef_lane( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1 -; CHECK-NEXT: [[BO:%.*]] = urem <2 x i64> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = urem i64 [[X:%.*]], 2 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> zeroinitializer, i64 [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 1 @@ -339,8 +355,8 @@ define <2 x i64> @urem_constant_op1_not_undef_lane(i64 %x) { define <2 x i64> @srem_constant_op0(i64 %x) { ; CHECK-LABEL: @srem_constant_op0( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0 -; CHECK-NEXT: [[BO:%.*]] = srem <2 x i64> , [[INS]] +; CHECK-NEXT: [[BO_SCALAR:%.*]] = srem i64 5, [[X:%.*]] +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -350,8 +366,8 @@ define <2 x i64> @srem_constant_op0(i64 %x) { define <2 x i64> @srem_constant_op0_not_undef_lane(i64 %x) { ; CHECK-LABEL: @srem_constant_op0_not_undef_lane( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0 -; CHECK-NEXT: [[BO:%.*]] = srem <2 x i64> , [[INS]] +; CHECK-NEXT: [[BO_SCALAR:%.*]] = srem i64 5, [[X:%.*]] +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -361,8 +377,8 @@ define <2 x i64> @srem_constant_op0_not_undef_lane(i64 %x) { define <2 x i64> @srem_constant_op1(i64 %x) { ; CHECK-LABEL: @srem_constant_op1( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1 -; CHECK-NEXT: [[BO:%.*]] = srem <2 x i64> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = srem i64 [[X:%.*]], 2 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> , i64 [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 1 @@ -372,8 +388,8 @@ define <2 x i64> @srem_constant_op1(i64 %x) { define <2 x i64> @srem_constant_op1_not_undef_lane(i64 %x) { ; CHECK-LABEL: @srem_constant_op1_not_undef_lane( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1 -; CHECK-NEXT: [[BO:%.*]] = srem <2 x i64> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = srem i64 [[X:%.*]], 2 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> zeroinitializer, i64 [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 1 @@ -383,8 +399,8 @@ define <2 x i64> @srem_constant_op1_not_undef_lane(i64 %x) { define <2 x i64> @udiv_constant_op0(i64 %x) { ; CHECK-LABEL: @udiv_constant_op0( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0 -; CHECK-NEXT: [[BO:%.*]] = udiv exact <2 x i64> , [[INS]] +; CHECK-NEXT: [[BO_SCALAR:%.*]] = udiv exact i64 5, [[X:%.*]] +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -394,8 +410,8 @@ define <2 x i64> @udiv_constant_op0(i64 %x) { define <2 x i64> @udiv_constant_op0_not_undef_lane(i64 %x) { ; CHECK-LABEL: @udiv_constant_op0_not_undef_lane( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0 -; CHECK-NEXT: [[BO:%.*]] = udiv exact <2 x i64> , [[INS]] +; CHECK-NEXT: [[BO_SCALAR:%.*]] = udiv exact i64 5, [[X:%.*]] +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -405,8 +421,8 @@ define <2 x i64> @udiv_constant_op0_not_undef_lane(i64 %x) { define <2 x i64> @udiv_constant_op1(i64 %x) { ; CHECK-LABEL: @udiv_constant_op1( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1 -; CHECK-NEXT: [[BO:%.*]] = udiv <2 x i64> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = udiv i64 [[X:%.*]], 2 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> , i64 [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 1 @@ -416,8 +432,8 @@ define <2 x i64> @udiv_constant_op1(i64 %x) { define <2 x i64> @udiv_constant_op1_not_undef_lane(i64 %x) { ; CHECK-LABEL: @udiv_constant_op1_not_undef_lane( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1 -; CHECK-NEXT: [[BO:%.*]] = udiv <2 x i64> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = udiv i64 [[X:%.*]], 2 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> zeroinitializer, i64 [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 1 @@ -427,8 +443,8 @@ define <2 x i64> @udiv_constant_op1_not_undef_lane(i64 %x) { define <2 x i64> @sdiv_constant_op0(i64 %x) { ; CHECK-LABEL: @sdiv_constant_op0( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0 -; CHECK-NEXT: [[BO:%.*]] = sdiv <2 x i64> , [[INS]] +; CHECK-NEXT: [[BO_SCALAR:%.*]] = sdiv i64 5, [[X:%.*]] +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -438,8 +454,8 @@ define <2 x i64> @sdiv_constant_op0(i64 %x) { define <2 x i64> @sdiv_constant_op0_not_undef_lane(i64 %x) { ; CHECK-LABEL: @sdiv_constant_op0_not_undef_lane( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0 -; CHECK-NEXT: [[BO:%.*]] = sdiv <2 x i64> , [[INS]] +; CHECK-NEXT: [[BO_SCALAR:%.*]] = sdiv i64 5, [[X:%.*]] +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -449,8 +465,8 @@ define <2 x i64> @sdiv_constant_op0_not_undef_lane(i64 %x) { define <2 x i64> @sdiv_constant_op1(i64 %x) { ; CHECK-LABEL: @sdiv_constant_op1( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1 -; CHECK-NEXT: [[BO:%.*]] = sdiv exact <2 x i64> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = sdiv exact i64 [[X:%.*]], 2 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> , i64 [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 1 @@ -460,8 +476,8 @@ define <2 x i64> @sdiv_constant_op1(i64 %x) { define <2 x i64> @sdiv_constant_op1_not_undef_lane(i64 %x) { ; CHECK-LABEL: @sdiv_constant_op1_not_undef_lane( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1 -; CHECK-NEXT: [[BO:%.*]] = sdiv exact <2 x i64> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = sdiv exact i64 [[X:%.*]], 2 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> zeroinitializer, i64 [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 1 @@ -471,8 +487,8 @@ define <2 x i64> @sdiv_constant_op1_not_undef_lane(i64 %x) { define <2 x i64> @and_constant(i64 %x) { ; CHECK-LABEL: @and_constant( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0 -; CHECK-NEXT: [[BO:%.*]] = and <2 x i64> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = and i64 [[X:%.*]], 42 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> , i64 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -482,8 +498,8 @@ define <2 x i64> @and_constant(i64 %x) { define <2 x i64> @and_constant_not_undef_lane(i64 %x) { ; CHECK-LABEL: @and_constant_not_undef_lane( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0 -; CHECK-NEXT: [[BO:%.*]] = and <2 x i64> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = and i64 [[X:%.*]], 42 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> zeroinitializer, i64 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -493,8 +509,8 @@ define <2 x i64> @and_constant_not_undef_lane(i64 %x) { define <2 x i64> @or_constant(i64 %x) { ; CHECK-LABEL: @or_constant( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1 -; CHECK-NEXT: [[BO:%.*]] = or <2 x i64> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = or i64 [[X:%.*]], -42 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> , i64 [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 1 @@ -504,8 +520,8 @@ define <2 x i64> @or_constant(i64 %x) { define <2 x i64> @or_constant_not_undef_lane(i64 %x) { ; CHECK-LABEL: @or_constant_not_undef_lane( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1 -; CHECK-NEXT: [[BO:%.*]] = or <2 x i64> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = or i64 [[X:%.*]], -42 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> , i64 [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 1 @@ -515,8 +531,8 @@ define <2 x i64> @or_constant_not_undef_lane(i64 %x) { define <2 x i64> @xor_constant(i64 %x) { ; CHECK-LABEL: @xor_constant( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0 -; CHECK-NEXT: [[BO:%.*]] = xor <2 x i64> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = xor i64 [[X:%.*]], 42 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> , i64 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -526,8 +542,8 @@ define <2 x i64> @xor_constant(i64 %x) { define <2 x i64> @xor_constant_not_undef_lane(i64 %x) { ; CHECK-LABEL: @xor_constant_not_undef_lane( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0 -; CHECK-NEXT: [[BO:%.*]] = xor <2 x i64> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = xor i64 [[X:%.*]], 42 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x i64> [[BO]] ; %ins = insertelement <2 x i64> undef, i64 %x, i32 0 @@ -537,8 +553,8 @@ define <2 x i64> @xor_constant_not_undef_lane(i64 %x) { define <2 x double> @fadd_constant(double %x) { ; CHECK-LABEL: @fadd_constant( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0 -; CHECK-NEXT: [[BO:%.*]] = fadd <2 x double> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = fadd double [[X:%.*]], 4.200000e+01 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> , double [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x double> [[BO]] ; %ins = insertelement <2 x double> undef, double %x, i32 0 @@ -548,8 +564,8 @@ define <2 x double> @fadd_constant(double %x) { define <2 x double> @fadd_constant_not_undef_lane(double %x) { ; CHECK-LABEL: @fadd_constant_not_undef_lane( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 1 -; CHECK-NEXT: [[BO:%.*]] = fadd <2 x double> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = fadd double [[X:%.*]], -4.200000e+01 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> , double [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x double> [[BO]] ; %ins = insertelement <2 x double> undef, double %x, i32 1 @@ -559,8 +575,8 @@ define <2 x double> @fadd_constant_not_undef_lane(double %x) { define <2 x double> @fsub_constant_op0(double %x) { ; CHECK-LABEL: @fsub_constant_op0( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0 -; CHECK-NEXT: [[BO:%.*]] = fsub fast <2 x double> , [[INS]] +; CHECK-NEXT: [[BO_SCALAR:%.*]] = fsub fast double 4.200000e+01, [[X:%.*]] +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> , double [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x double> [[BO]] ; %ins = insertelement <2 x double> undef, double %x, i32 0 @@ -570,8 +586,8 @@ define <2 x double> @fsub_constant_op0(double %x) { define <2 x double> @fsub_constant_op0_not_undef_lane(double %x) { ; CHECK-LABEL: @fsub_constant_op0_not_undef_lane( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 1 -; CHECK-NEXT: [[BO:%.*]] = fsub nsz <2 x double> , [[INS]] +; CHECK-NEXT: [[BO_SCALAR:%.*]] = fsub nsz double -4.200000e+01, [[X:%.*]] +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> , double [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x double> [[BO]] ; %ins = insertelement <2 x double> undef, double %x, i32 1 @@ -581,8 +597,8 @@ define <2 x double> @fsub_constant_op0_not_undef_lane(double %x) { define <2 x double> @fsub_constant_op1(double %x) { ; CHECK-LABEL: @fsub_constant_op1( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 1 -; CHECK-NEXT: [[BO:%.*]] = fsub <2 x double> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = fsub double [[X:%.*]], 4.200000e+01 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> , double [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x double> [[BO]] ; %ins = insertelement <2 x double> undef, double %x, i32 1 @@ -592,8 +608,8 @@ define <2 x double> @fsub_constant_op1(double %x) { define <2 x double> @fsub_constant_op1_not_undef_lane(double %x) { ; CHECK-LABEL: @fsub_constant_op1_not_undef_lane( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0 -; CHECK-NEXT: [[BO:%.*]] = fsub <2 x double> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = fsub double [[X:%.*]], 4.200000e+01 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> , double [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x double> [[BO]] ; %ins = insertelement <2 x double> undef, double %x, i32 0 @@ -603,8 +619,8 @@ define <2 x double> @fsub_constant_op1_not_undef_lane(double %x) { define <2 x double> @fmul_constant(double %x) { ; CHECK-LABEL: @fmul_constant( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0 -; CHECK-NEXT: [[BO:%.*]] = fmul reassoc <2 x double> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = fmul reassoc double [[X:%.*]], 4.200000e+01 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> , double [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x double> [[BO]] ; %ins = insertelement <2 x double> undef, double %x, i32 0 @@ -614,8 +630,8 @@ define <2 x double> @fmul_constant(double %x) { define <2 x double> @fmul_constant_not_undef_lane(double %x) { ; CHECK-LABEL: @fmul_constant_not_undef_lane( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 1 -; CHECK-NEXT: [[BO:%.*]] = fmul <2 x double> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = fmul double [[X:%.*]], -4.200000e+01 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> , double [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x double> [[BO]] ; %ins = insertelement <2 x double> undef, double %x, i32 1 @@ -625,8 +641,8 @@ define <2 x double> @fmul_constant_not_undef_lane(double %x) { define <2 x double> @fdiv_constant_op0(double %x) { ; CHECK-LABEL: @fdiv_constant_op0( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 1 -; CHECK-NEXT: [[BO:%.*]] = fdiv nnan <2 x double> , [[INS]] +; CHECK-NEXT: [[BO_SCALAR:%.*]] = fdiv nnan double 4.200000e+01, [[X:%.*]] +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> , double [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x double> [[BO]] ; %ins = insertelement <2 x double> undef, double %x, i32 1 @@ -636,8 +652,8 @@ define <2 x double> @fdiv_constant_op0(double %x) { define <2 x double> @fdiv_constant_op0_not_undef_lane(double %x) { ; CHECK-LABEL: @fdiv_constant_op0_not_undef_lane( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0 -; CHECK-NEXT: [[BO:%.*]] = fdiv ninf <2 x double> , [[INS]] +; CHECK-NEXT: [[BO_SCALAR:%.*]] = fdiv ninf double 4.200000e+01, [[X:%.*]] +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> , double [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x double> [[BO]] ; %ins = insertelement <2 x double> undef, double %x, i32 0 @@ -647,8 +663,8 @@ define <2 x double> @fdiv_constant_op0_not_undef_lane(double %x) { define <2 x double> @fdiv_constant_op1(double %x) { ; CHECK-LABEL: @fdiv_constant_op1( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0 -; CHECK-NEXT: [[BO:%.*]] = fdiv <2 x double> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = fdiv double [[X:%.*]], 4.200000e+01 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> , double [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x double> [[BO]] ; %ins = insertelement <2 x double> undef, double %x, i32 0 @@ -658,8 +674,8 @@ define <2 x double> @fdiv_constant_op1(double %x) { define <2 x double> @fdiv_constant_op1_not_undef_lane(double %x) { ; CHECK-LABEL: @fdiv_constant_op1_not_undef_lane( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0 -; CHECK-NEXT: [[BO:%.*]] = fdiv <2 x double> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = fdiv double [[X:%.*]], 4.200000e+01 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> , double [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x double> [[BO]] ; %ins = insertelement <2 x double> undef, double %x, i32 0 @@ -669,8 +685,8 @@ define <2 x double> @fdiv_constant_op1_not_undef_lane(double %x) { define <2 x double> @frem_constant_op0(double %x) { ; CHECK-LABEL: @frem_constant_op0( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0 -; CHECK-NEXT: [[BO:%.*]] = frem fast <2 x double> , [[INS]] +; CHECK-NEXT: [[BO_SCALAR:%.*]] = frem fast double 4.200000e+01, [[X:%.*]] +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> , double [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x double> [[BO]] ; %ins = insertelement <2 x double> undef, double %x, i32 0 @@ -680,8 +696,8 @@ define <2 x double> @frem_constant_op0(double %x) { define <2 x double> @frem_constant_op0_not_undef_lane(double %x) { ; CHECK-LABEL: @frem_constant_op0_not_undef_lane( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 1 -; CHECK-NEXT: [[BO:%.*]] = frem <2 x double> , [[INS]] +; CHECK-NEXT: [[BO_SCALAR:%.*]] = frem double -4.200000e+01, [[X:%.*]] +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> , double [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x double> [[BO]] ; %ins = insertelement <2 x double> undef, double %x, i32 1 @@ -691,8 +707,8 @@ define <2 x double> @frem_constant_op0_not_undef_lane(double %x) { define <2 x double> @frem_constant_op1(double %x) { ; CHECK-LABEL: @frem_constant_op1( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 1 -; CHECK-NEXT: [[BO:%.*]] = frem ninf <2 x double> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = frem ninf double [[X:%.*]], 4.200000e+01 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> , double [[BO_SCALAR]], i64 1 ; CHECK-NEXT: ret <2 x double> [[BO]] ; %ins = insertelement <2 x double> undef, double %x, i32 1 @@ -702,8 +718,8 @@ define <2 x double> @frem_constant_op1(double %x) { define <2 x double> @frem_constant_op1_not_undef_lane(double %x) { ; CHECK-LABEL: @frem_constant_op1_not_undef_lane( -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0 -; CHECK-NEXT: [[BO:%.*]] = frem nnan <2 x double> [[INS]], +; CHECK-NEXT: [[BO_SCALAR:%.*]] = frem nnan double [[X:%.*]], 4.200000e+01 +; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> , double [[BO_SCALAR]], i64 0 ; CHECK-NEXT: ret <2 x double> [[BO]] ; %ins = insertelement <2 x double> undef, double %x, i32 0 From 6bb93e3dd0e28dafe6d3ddb700d2036d00b323aa Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 9 Jun 2020 11:07:25 -0700 Subject: [PATCH 05/25] [gcov][test] Add mkdir -p %t && cd %t This allows an alternative lit runner (which does not chdir to %T) to run within a read-only source tree. --- llvm/test/Transforms/GCOVProfiling/global-ctor.ll | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/test/Transforms/GCOVProfiling/global-ctor.ll b/llvm/test/Transforms/GCOVProfiling/global-ctor.ll index 596a2ad77635d1..e90385c7c42e00 100644 --- a/llvm/test/Transforms/GCOVProfiling/global-ctor.ll +++ b/llvm/test/Transforms/GCOVProfiling/global-ctor.ll @@ -1,5 +1,6 @@ ;; For a global constructor, _GLOBAL__sub_I_ only has artificial lines. ;; Test that we don't instrument those functions. +; RUN: mkdir -p %t && cd %t ; RUN: opt -S -insert-gcov-profiling < %s | FileCheck %s ; RUN: opt -S -passes=insert-gcov-profiling < %s | FileCheck %s From 2fea3fe41c5a177d019dd99fb1b43d767eccde24 Mon Sep 17 00:00:00 2001 From: David Green Date: Tue, 9 Jun 2020 17:35:45 +0100 Subject: [PATCH 06/25] [MachineScheduler] Update available queue on the first mop of a new cycle If a resource can be held for multiple cycles in the schedule model then an instruction can be placed into the available queue, another instruction can be scheduled, but the first will not be taken back out if the two instructions hazard. To fix this make sure that we update the available queue even on the first MOp of a cycle, pushing available instructions back into the pending queue if they now conflict. This happens with some downstream schedules we have around MVE instruction scheduling where we use ResourceCycles=[2] to show the instruction executing over two beats. Apparently the test changes here are OK too. Differential Revision: https://reviews.llvm.org/D76909 --- llvm/lib/CodeGen/MachineScheduler.cpp | 16 +- .../CodeGen/AArch64/misched-fusion-aes.ll | 4 +- .../CodeGen/PowerPC/2007-01-15-AsmDialect.ll | 2 +- .../CodeGen/PowerPC/2008-10-28-f128-i32.ll | 126 +++---- llvm/test/CodeGen/PowerPC/aix-cc-abi.ll | 6 +- .../CodeGen/PowerPC/aix32-cc-abi-vaarg.ll | 18 +- .../PowerPC/fp128-bitcast-after-operation.ll | 4 +- llvm/test/CodeGen/PowerPC/inc-of-add.ll | 332 +++++++++--------- llvm/test/CodeGen/PowerPC/ppc32-skip-regs.ll | 2 +- .../ppcf128-constrained-fp-intrinsics.ll | 14 +- llvm/test/CodeGen/PowerPC/pr43976.ll | 6 +- llvm/test/CodeGen/PowerPC/spe.ll | 6 +- llvm/test/CodeGen/PowerPC/sub-of-not.ll | 332 +++++++++--------- .../umulo-128-legalisation-lowering.ll | 60 ++-- llvm/test/CodeGen/PowerPC/vec_splat.ll | 156 ++++---- 15 files changed, 542 insertions(+), 542 deletions(-) diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp index a6889919137495..0f21c97a30f685 100644 --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -2424,16 +2424,14 @@ SUnit *SchedBoundary::pickOnlyChoice() { if (CheckPending) releasePending(); - if (CurrMOps > 0) { - // Defer any ready instrs that now have a hazard. - for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) { - if (checkHazard(*I)) { - Pending.push(*I); - I = Available.remove(I); - continue; - } - ++I; + // Defer any ready instrs that now have a hazard. + for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) { + if (checkHazard(*I)) { + Pending.push(*I); + I = Available.remove(I); + continue; } + ++I; } for (unsigned i = 0; Available.empty(); ++i) { // FIXME: Re-enable assert once PR20057 is resolved. diff --git a/llvm/test/CodeGen/AArch64/misched-fusion-aes.ll b/llvm/test/CodeGen/AArch64/misched-fusion-aes.ll index 70038e934c9f74..95a419bd7398a0 100644 --- a/llvm/test/CodeGen/AArch64/misched-fusion-aes.ll +++ b/llvm/test/CodeGen/AArch64/misched-fusion-aes.ll @@ -79,7 +79,7 @@ define void @aesea(<16 x i8>* %a0, <16 x i8>* %b0, <16 x i8>* %c0, <16 x i8> %d, ; CHECK-LABEL: aesea: ; CHECK: aese [[VA:v[0-7].16b]], {{v[0-7].16b}} -; CHECK-NEXT: aesmc [[VA]], [[VA]] +; CHECK: aesmc [[VA]], [[VA]] ; CHECK: aese [[VB:v[0-7].16b]], {{v[0-7].16b}} ; CHECK-NEXT: aesmc [[VB]], [[VB]] ; CHECK: aese [[VC:v[0-7].16b]], {{v[0-7].16b}} @@ -163,7 +163,7 @@ define void @aesda(<16 x i8>* %a0, <16 x i8>* %b0, <16 x i8>* %c0, <16 x i8> %d, ; CHECK-LABEL: aesda: ; CHECK: aesd [[VA:v[0-7].16b]], {{v[0-7].16b}} -; CHECK-NEXT: aesimc [[VA]], [[VA]] +; CHECK: aesimc [[VA]], [[VA]] ; CHECK: aesd [[VB:v[0-7].16b]], {{v[0-7].16b}} ; CHECK-NEXT: aesimc [[VB]], [[VB]] ; CHECK: aesd [[VC:v[0-7].16b]], {{v[0-7].16b}} diff --git a/llvm/test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll b/llvm/test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll index d216cf59bde263..9af68e7d801279 100644 --- a/llvm/test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll +++ b/llvm/test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll @@ -2,7 +2,7 @@ define i32 @foo() nounwind { entry: -; CHECK: cntlzw 3, 4 +; CHECK: cntlzw 3, 3 %retval = alloca i32, align 4 ; [#uses=2] %temp = alloca i32, align 4 ; [#uses=2] %ctz_x = alloca i32, align 4 ; [#uses=3] diff --git a/llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll b/llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll index 7897d1c6b8a5af..028904fc3200a9 100644 --- a/llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll +++ b/llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll @@ -9,29 +9,29 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone { ; CHECK-NEXT: stwu 1, -464(1) ; CHECK-NEXT: mfcr 12 ; CHECK-NEXT: stw 29, 412(1) # 4-byte Folded Spill -; CHECK-NEXT: stw 30, 416(1) # 4-byte Folded Spill ; CHECK-NEXT: lis 3, .LCPI0_0@ha +; CHECK-NEXT: stw 30, 416(1) # 4-byte Folded Spill ; CHECK-NEXT: stw 12, 408(1) ; CHECK-NEXT: stfd 2, 376(1) -; CHECK-NEXT: stfd 27, 424(1) # 8-byte Folded Spill -; CHECK-NEXT: stfd 1, 384(1) -; CHECK-NEXT: stfd 28, 432(1) # 8-byte Folded Spill -; CHECK-NEXT: stfd 29, 440(1) # 8-byte Folded Spill -; CHECK-NEXT: stfd 30, 448(1) # 8-byte Folded Spill -; CHECK-NEXT: stfd 31, 456(1) # 8-byte Folded Spill ; CHECK-NEXT: lwz 4, 380(1) -; CHECK-NEXT: lfs 27, .LCPI0_0@l(3) -; CHECK-NEXT: lwz 3, 384(1) +; CHECK-NEXT: stfd 27, 424(1) # 8-byte Folded Spill ; CHECK-NEXT: stw 4, 396(1) -; CHECK-NEXT: fcmpu 0, 2, 27 ; CHECK-NEXT: lwz 4, 376(1) +; CHECK-NEXT: lfs 27, .LCPI0_0@l(3) +; CHECK-NEXT: stfd 1, 384(1) +; CHECK-NEXT: stw 4, 392(1) +; CHECK-NEXT: fcmpu 0, 2, 27 +; CHECK-NEXT: lwz 4, 388(1) ; CHECK-NEXT: fcmpu 1, 1, 27 +; CHECK-NEXT: lwz 3, 384(1) ; CHECK-NEXT: crand 20, 6, 0 ; CHECK-NEXT: cror 20, 4, 20 -; CHECK-NEXT: stw 4, 392(1) -; CHECK-NEXT: stw 3, 400(1) -; CHECK-NEXT: lwz 4, 388(1) +; CHECK-NEXT: stfd 28, 432(1) # 8-byte Folded Spill +; CHECK-NEXT: stfd 29, 440(1) # 8-byte Folded Spill +; CHECK-NEXT: stfd 30, 448(1) # 8-byte Folded Spill +; CHECK-NEXT: stfd 31, 456(1) # 8-byte Folded Spill ; CHECK-NEXT: stw 4, 404(1) +; CHECK-NEXT: stw 3, 400(1) ; CHECK-NEXT: bc 4, 20, .LBB0_2 ; CHECK-NEXT: # %bb.1: # %bb5 ; CHECK-NEXT: li 3, 0 @@ -41,54 +41,53 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone { ; CHECK-NEXT: lfd 0, 400(1) ; CHECK-NEXT: lis 3, 15856 ; CHECK-NEXT: stw 3, 336(1) -; CHECK-NEXT: lfd 1, 392(1) ; CHECK-NEXT: li 29, 0 ; CHECK-NEXT: stfd 0, 304(1) -; CHECK-NEXT: stw 29, 340(1) -; CHECK-NEXT: stw 29, 332(1) -; CHECK-NEXT: stw 29, 328(1) ; CHECK-NEXT: lwz 3, 308(1) -; CHECK-NEXT: stfd 1, 296(1) -; CHECK-NEXT: lfd 3, 336(1) -; CHECK-NEXT: lfd 4, 328(1) +; CHECK-NEXT: lfd 1, 392(1) ; CHECK-NEXT: stw 3, 324(1) ; CHECK-NEXT: lwz 3, 304(1) +; CHECK-NEXT: stfd 1, 296(1) ; CHECK-NEXT: stw 3, 320(1) ; CHECK-NEXT: lwz 3, 300(1) -; CHECK-NEXT: lfd 31, 320(1) +; CHECK-NEXT: stw 29, 340(1) ; CHECK-NEXT: stw 3, 316(1) -; CHECK-NEXT: fmr 1, 31 ; CHECK-NEXT: lwz 3, 296(1) +; CHECK-NEXT: stw 29, 332(1) ; CHECK-NEXT: stw 3, 312(1) +; CHECK-NEXT: stw 29, 328(1) +; CHECK-NEXT: lfd 31, 320(1) ; CHECK-NEXT: lfd 30, 312(1) +; CHECK-NEXT: lfd 3, 336(1) +; CHECK-NEXT: fmr 1, 31 +; CHECK-NEXT: lfd 4, 328(1) ; CHECK-NEXT: fmr 2, 30 ; CHECK-NEXT: bl __gcc_qmul ; CHECK-NEXT: lis 3, 16864 ; CHECK-NEXT: stfd 1, 280(1) -; CHECK-NEXT: stw 3, 368(1) -; CHECK-NEXT: stfd 2, 288(1) -; CHECK-NEXT: stw 29, 372(1) -; CHECK-NEXT: stw 29, 364(1) -; CHECK-NEXT: stw 29, 360(1) ; CHECK-NEXT: fmr 29, 1 -; CHECK-NEXT: lwz 3, 284(1) +; CHECK-NEXT: stw 3, 368(1) ; CHECK-NEXT: fmr 28, 2 -; CHECK-NEXT: lfd 3, 368(1) -; CHECK-NEXT: lfd 4, 360(1) +; CHECK-NEXT: lwz 3, 284(1) +; CHECK-NEXT: stfd 2, 288(1) ; CHECK-NEXT: stw 3, 356(1) ; CHECK-NEXT: lwz 3, 280(1) +; CHECK-NEXT: stw 29, 372(1) ; CHECK-NEXT: stw 3, 352(1) ; CHECK-NEXT: lwz 3, 292(1) -; CHECK-NEXT: lfd 1, 352(1) +; CHECK-NEXT: stw 29, 364(1) ; CHECK-NEXT: stw 3, 348(1) ; CHECK-NEXT: lwz 3, 288(1) +; CHECK-NEXT: stw 29, 360(1) ; CHECK-NEXT: stw 3, 344(1) +; CHECK-NEXT: lfd 3, 368(1) +; CHECK-NEXT: lfd 4, 360(1) +; CHECK-NEXT: lfd 1, 352(1) ; CHECK-NEXT: lfd 2, 344(1) ; CHECK-NEXT: bl __gcc_qsub ; CHECK-NEXT: mffs 0 ; CHECK-NEXT: mtfsb1 31 ; CHECK-NEXT: lis 3, .LCPI0_1@ha -; CHECK-NEXT: fcmpu 0, 28, 27 ; CHECK-NEXT: mtfsb0 30 ; CHECK-NEXT: fadd 1, 2, 1 ; CHECK-NEXT: mtfsf 1, 0 @@ -102,6 +101,7 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone { ; CHECK-NEXT: lfs 1, .LCPI0_1@l(3) ; CHECK-NEXT: fctiwz 0, 0 ; CHECK-NEXT: stfd 0, 152(1) +; CHECK-NEXT: fcmpu 0, 28, 27 ; CHECK-NEXT: lwz 3, 164(1) ; CHECK-NEXT: fcmpu 1, 29, 1 ; CHECK-NEXT: lwz 4, 156(1) @@ -120,25 +120,25 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone { ; CHECK-NEXT: bl __floatditf ; CHECK-NEXT: lis 3, 17392 ; CHECK-NEXT: stfd 1, 208(1) -; CHECK-NEXT: stw 3, 240(1) -; CHECK-NEXT: stfd 2, 200(1) -; CHECK-NEXT: stw 29, 244(1) -; CHECK-NEXT: stw 29, 236(1) -; CHECK-NEXT: stw 29, 232(1) ; CHECK-NEXT: fmr 29, 1 -; CHECK-NEXT: lwz 3, 212(1) +; CHECK-NEXT: stw 3, 240(1) ; CHECK-NEXT: fmr 28, 2 -; CHECK-NEXT: lfd 3, 240(1) -; CHECK-NEXT: lfd 4, 232(1) +; CHECK-NEXT: lwz 3, 212(1) ; CHECK-NEXT: cmpwi 2, 30, 0 +; CHECK-NEXT: stfd 2, 200(1) ; CHECK-NEXT: stw 3, 228(1) ; CHECK-NEXT: lwz 3, 208(1) +; CHECK-NEXT: stw 29, 244(1) ; CHECK-NEXT: stw 3, 224(1) ; CHECK-NEXT: lwz 3, 204(1) -; CHECK-NEXT: lfd 1, 224(1) +; CHECK-NEXT: stw 29, 236(1) ; CHECK-NEXT: stw 3, 220(1) ; CHECK-NEXT: lwz 3, 200(1) +; CHECK-NEXT: stw 29, 232(1) ; CHECK-NEXT: stw 3, 216(1) +; CHECK-NEXT: lfd 3, 240(1) +; CHECK-NEXT: lfd 4, 232(1) +; CHECK-NEXT: lfd 1, 224(1) ; CHECK-NEXT: lfd 2, 216(1) ; CHECK-NEXT: bl __gcc_qadd ; CHECK-NEXT: blt 2, .LBB0_7 @@ -150,9 +150,9 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone { ; CHECK-NEXT: fmr 1, 29 ; CHECK-NEXT: .LBB0_9: # %bb1 ; CHECK-NEXT: stfd 1, 184(1) -; CHECK-NEXT: stfd 2, 192(1) ; CHECK-NEXT: fmr 1, 31 ; CHECK-NEXT: lwz 3, 188(1) +; CHECK-NEXT: stfd 2, 192(1) ; CHECK-NEXT: fmr 2, 30 ; CHECK-NEXT: stw 3, 260(1) ; CHECK-NEXT: lwz 3, 184(1) @@ -165,10 +165,10 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone { ; CHECK-NEXT: lfd 4, 248(1) ; CHECK-NEXT: bl __gcc_qsub ; CHECK-NEXT: stfd 2, 176(1) -; CHECK-NEXT: stfd 1, 168(1) ; CHECK-NEXT: fcmpu 1, 2, 27 ; CHECK-NEXT: lwz 3, 180(1) ; CHECK-NEXT: fcmpu 0, 1, 27 +; CHECK-NEXT: stfd 1, 168(1) ; CHECK-NEXT: crandc 20, 2, 4 ; CHECK-NEXT: stw 3, 268(1) ; CHECK-NEXT: lwz 3, 176(1) @@ -184,27 +184,27 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone { ; CHECK-NEXT: cror 20, 1, 3 ; CHECK-NEXT: bc 12, 20, .LBB0_14 ; CHECK-NEXT: # %bb.11: # %bb2 -; CHECK-NEXT: fneg 28, 31 -; CHECK-NEXT: stfd 28, 48(1) +; CHECK-NEXT: fneg 29, 31 +; CHECK-NEXT: stfd 29, 48(1) ; CHECK-NEXT: lis 3, 16864 ; CHECK-NEXT: stw 3, 80(1) -; CHECK-NEXT: fneg 29, 30 +; CHECK-NEXT: fneg 28, 30 ; CHECK-NEXT: lwz 3, 52(1) -; CHECK-NEXT: stfd 29, 40(1) ; CHECK-NEXT: li 29, 0 -; CHECK-NEXT: stw 29, 84(1) -; CHECK-NEXT: stw 29, 76(1) -; CHECK-NEXT: stw 29, 72(1) +; CHECK-NEXT: stfd 28, 40(1) ; CHECK-NEXT: stw 3, 68(1) -; CHECK-NEXT: lfd 3, 80(1) -; CHECK-NEXT: lfd 4, 72(1) ; CHECK-NEXT: lwz 3, 48(1) +; CHECK-NEXT: stw 29, 84(1) ; CHECK-NEXT: stw 3, 64(1) ; CHECK-NEXT: lwz 3, 44(1) -; CHECK-NEXT: lfd 1, 64(1) +; CHECK-NEXT: stw 29, 76(1) ; CHECK-NEXT: stw 3, 60(1) ; CHECK-NEXT: lwz 3, 40(1) +; CHECK-NEXT: stw 29, 72(1) ; CHECK-NEXT: stw 3, 56(1) +; CHECK-NEXT: lfd 3, 80(1) +; CHECK-NEXT: lfd 4, 72(1) +; CHECK-NEXT: lfd 1, 64(1) ; CHECK-NEXT: lfd 2, 56(1) ; CHECK-NEXT: bl __gcc_qsub ; CHECK-NEXT: mffs 0 @@ -220,12 +220,12 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone { ; CHECK-NEXT: lfs 0, .LCPI0_2@l(3) ; CHECK-NEXT: lis 3, .LCPI0_3@ha ; CHECK-NEXT: mtfsb0 30 -; CHECK-NEXT: fadd 2, 29, 28 +; CHECK-NEXT: fadd 2, 28, 29 ; CHECK-NEXT: mtfsf 1, 1 ; CHECK-NEXT: lfs 1, .LCPI0_3@l(3) -; CHECK-NEXT: fcmpu 0, 30, 0 ; CHECK-NEXT: fctiwz 2, 2 ; CHECK-NEXT: stfd 2, 24(1) +; CHECK-NEXT: fcmpu 0, 30, 0 ; CHECK-NEXT: lwz 3, 36(1) ; CHECK-NEXT: fcmpu 1, 31, 1 ; CHECK-NEXT: lwz 4, 28(1) @@ -244,22 +244,22 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone { ; CHECK-NEXT: stfd 31, 112(1) ; CHECK-NEXT: li 3, 0 ; CHECK-NEXT: stw 3, 148(1) +; CHECK-NEXT: lis 4, 16864 ; CHECK-NEXT: stw 3, 140(1) ; CHECK-NEXT: stw 3, 136(1) -; CHECK-NEXT: stfd 30, 104(1) -; CHECK-NEXT: lis 4, 16864 ; CHECK-NEXT: lwz 3, 116(1) -; CHECK-NEXT: stw 4, 144(1) -; CHECK-NEXT: lfd 4, 136(1) +; CHECK-NEXT: stfd 30, 104(1) ; CHECK-NEXT: stw 3, 132(1) -; CHECK-NEXT: lfd 3, 144(1) ; CHECK-NEXT: lwz 3, 112(1) +; CHECK-NEXT: stw 4, 144(1) ; CHECK-NEXT: stw 3, 128(1) ; CHECK-NEXT: lwz 3, 108(1) -; CHECK-NEXT: lfd 1, 128(1) +; CHECK-NEXT: lfd 3, 144(1) ; CHECK-NEXT: stw 3, 124(1) ; CHECK-NEXT: lwz 3, 104(1) +; CHECK-NEXT: lfd 4, 136(1) ; CHECK-NEXT: stw 3, 120(1) +; CHECK-NEXT: lfd 1, 128(1) ; CHECK-NEXT: lfd 2, 120(1) ; CHECK-NEXT: bl __gcc_qsub ; CHECK-NEXT: mffs 0 @@ -278,9 +278,9 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone { ; CHECK-NEXT: fadd 2, 30, 31 ; CHECK-NEXT: mtfsf 1, 1 ; CHECK-NEXT: lfs 1, .LCPI0_1@l(3) -; CHECK-NEXT: fcmpu 0, 30, 0 ; CHECK-NEXT: fctiwz 2, 2 ; CHECK-NEXT: stfd 2, 88(1) +; CHECK-NEXT: fcmpu 0, 30, 0 ; CHECK-NEXT: lwz 3, 100(1) ; CHECK-NEXT: fcmpu 1, 31, 1 ; CHECK-NEXT: lwz 4, 92(1) @@ -300,8 +300,8 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone { ; CHECK-NEXT: lfd 28, 432(1) # 8-byte Folded Reload ; CHECK-NEXT: lwz 12, 408(1) ; CHECK-NEXT: lfd 27, 424(1) # 8-byte Folded Reload -; CHECK-NEXT: lwz 30, 416(1) # 4-byte Folded Reload ; CHECK-NEXT: mtcrf 32, 12 # cr2 +; CHECK-NEXT: lwz 30, 416(1) # 4-byte Folded Reload ; CHECK-NEXT: lwz 29, 412(1) # 4-byte Folded Reload ; CHECK-NEXT: lwz 0, 468(1) ; CHECK-NEXT: addi 1, 1, 464 diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll b/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll index d155a78812257a..52070aa9063d6c 100644 --- a/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll +++ b/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll @@ -704,8 +704,8 @@ declare void @test_vararg(i32, ...) ; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r[[REG]] :: (dereferenceable load 4 from @f1) ; 32BIT-NEXT: renamable $r[[REG:[0-9]+]] = LWZtoc @d1, $r2 :: (load 4 from got) ; 32BIT-NEXT: STFD renamable $f1, 0, %stack.[[SLOT1:[0-9]+]] :: (store 8 into %stack.[[SLOT1]]) -; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r[[REG]] :: (dereferenceable load 8 from @d1) ; 32BIT-NEXT: renamable $r4 = LWZ 0, %stack.[[SLOT1]] :: (load 4 from %stack.[[SLOT1]], align 8) +; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r[[REG]] :: (dereferenceable load 8 from @d1) ; 32BIT-NEXT: renamable $r5 = LWZ 4, %stack.[[SLOT1]] :: (load 4 from %stack.[[SLOT1]] + 4) ; 32BIT-NEXT: STFD renamable $f2, 0, %stack.[[SLOT2:[0-9]+]] :: (store 8 into %stack.[[SLOT2]]) ; 32BIT-NEXT: renamable $r6 = LWZ 0, %stack.[[SLOT2]] :: (load 4 from %stack.[[SLOT2]], align 8) @@ -773,8 +773,8 @@ entry: ; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r[[REG]] :: (dereferenceable load 4 from @f1) ; 32BIT-NEXT: renamable $r[[REG:[0-9]+]] = LWZtoc @d1, $r2 :: (load 4 from got) ; 32BIT-NEXT: STFD renamable $f1, 0, %stack.[[SLOT1:[0-9]+]] :: (store 8 into %stack.[[SLOT1]]) -; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r[[REG]] :: (dereferenceable load 8 from @d1) ; 32BIT-NEXT: renamable $r4 = LWZ 0, %stack.[[SLOT1]] :: (load 4 from %stack.[[SLOT1]], align 8) +; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r[[REG]] :: (dereferenceable load 8 from @d1) ; 32BIT-NEXT: renamable $r5 = LWZ 4, %stack.[[SLOT1]] :: (load 4 from %stack.[[SLOT1]] + 4) ; 32BIT-NEXT: STFD renamable $f2, 0, %stack.[[SLOT2:[0-9]+]] :: (store 8 into %stack.[[SLOT2]]) ; 32BIT-NEXT: renamable $r7 = LWZ 0, %stack.[[SLOT2]] :: (load 4 from %stack.[[SLOT2]], align 8) @@ -844,8 +844,8 @@ entry: ; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r[[REG]] :: (dereferenceable load 4 from @f1) ; 32BIT-NEXT: renamable $r[[REG:[0-9]+]] = LWZtoc @d1, $r2 :: (load 4 from got) ; 32BIT-NEXT: STFD renamable $f1, 0, %stack.[[SLOT1:[0-9]+]] :: (store 8 into %stack.[[SLOT1]]) -; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r[[REG]] :: (dereferenceable load 8 from @d1) ; 32BIT-NEXT: renamable $r4 = LWZ 0, %stack.[[SLOT1]] :: (load 4 from %stack.[[SLOT1]], align 8) +; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r[[REG]] :: (dereferenceable load 8 from @d1) ; 32BIT-NEXT: renamable $r5 = LWZ 4, %stack.[[SLOT1]] :: (load 4 from %stack.[[SLOT1]] + 4) ; 32BIT-NEXT: STFD renamable $f2, 0, %stack.[[SLOT2:[0-9]+]] :: (store 8 into %stack.[[SLOT2]]) ; 32BIT-NEXT: renamable $r8 = LWZ 0, %stack.[[SLOT2]] :: (load 4 from %stack.[[SLOT2]], align 8) diff --git a/llvm/test/CodeGen/PowerPC/aix32-cc-abi-vaarg.ll b/llvm/test/CodeGen/PowerPC/aix32-cc-abi-vaarg.ll index 9f521788a3fccf..c276d4ccc39523 100644 --- a/llvm/test/CodeGen/PowerPC/aix32-cc-abi-vaarg.ll +++ b/llvm/test/CodeGen/PowerPC/aix32-cc-abi-vaarg.ll @@ -68,15 +68,15 @@ ; 32BIT-DAG: STW killed renamable $r8, 16, %fixed-stack.0 :: (store 4) ; 32BIT-DAG: STW killed renamable $r9, 20, %fixed-stack.0 :: (store 4) ; 32BIT-DAG: STW killed renamable $r10, 24, %fixed-stack.0 :: (store 4) -; 32BIT-DAG: STW killed renamable $r5, 0, %stack.1.arg2 :: (store 4 into %ir.arg2) -; 32BIT-DAG: renamable $r5 = ADDI %fixed-stack.0, 4 -; 32BIT-DAG: STW killed renamable $r4, 0, %stack.1.arg2 :: (store 4 into %ir.1) -; 32BIT-DAG: renamable $r4 = ADDI %fixed-stack.0, 0 -; 32BIT-DAG: STW renamable $r4, 0, %stack.0.arg1 :: (store 4 into %ir.0) -; 32BIT-DAG: STW renamable $r5, 0, %stack.0.arg1 :: (store 4 into %ir.arg1) -; 32BIT-DAG: renamable $r4 = LWZ 0, %fixed-stack.0 :: (load 4 from %ir.2) -; 32BIT-DAG: renamable $r5 = LWZ 0, %fixed-stack.0 :: (load 4 from %ir.4) -; 32BIT-DAG: renamable $r3 = nsw ADD4 killed renamable $r4, killed renamable $r3 +; 32BIT-DAG: STW killed renamable $r4, 0, %stack.1.arg2 :: (store 4 into %ir.arg2) +; 32BIT-DAG: renamable $r4 = ADDI %fixed-stack.0, 4 +; 32BIT-DAG: STW killed renamable $r11, 0, %stack.1.arg2 :: (store 4 into %ir.1) +; 32BIT-DAG: renamable $r11 = ADDI %fixed-stack.0, 0 +; 32BIT-DAG: STW renamable $r11, 0, %stack.0.arg1 :: (store 4 into %ir.0) +; 32BIT-DAG: STW renamable $r4, 0, %stack.0.arg1 :: (store 4 into %ir.arg1) +; 32BIT-DAG: renamable $r5 = LWZ 0, %fixed-stack.0 :: (load 4 from %ir.2) +; 32BIT-DAG: renamable $r4 = LWZ 0, %fixed-stack.0 :: (load 4 from %ir.4) +; 32BIT-DAG: renamable $r3 = nsw ADD4 killed renamable $r5, killed renamable $r3 ; 32BIT-DAG: renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r4 ; 32BIT-DAG: BLR implicit $lr, implicit $rm, implicit $r3 diff --git a/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll b/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll index 54ceccd9c59ab1..fa57f50cb43df2 100644 --- a/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll +++ b/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll @@ -28,9 +28,9 @@ entry: ; PPC32-DAG: stfd 2, 16(1) ; PPC32-DAG: lwz [[HI0:[0-9]+]], 24(1) ; PPC32-DAG: lwz [[LO0:[0-9]+]], 16(1) +; PPC32: rlwinm [[FLIP_BIT:[0-9]+]], [[HI0]], 0, 0, 0 ; PPC32-DAG: lwz [[HI1:[0-9]+]], 28(1) ; PPC32-DAG: lwz [[LO1:[0-9]+]], 20(1) -; PPC32: rlwinm [[FLIP_BIT:[0-9]+]], [[HI0]], 0, 0, 0 ; PPC32-DAG: xor [[HI0]], [[HI0]], [[FLIP_BIT]] ; PPC32-DAG: xor [[LO0]], [[LO0]], [[FLIP_BIT]] ; PPC32: blr @@ -68,9 +68,9 @@ entry: ; PPC32-DAG: lwz [[HI0:[0-9]+]], 24(1) ; PPC32-DAG: lwz [[LO0:[0-9]+]], 16(1) ; PPC32-DAG: lwz [[HI1:[0-9]+]], 28(1) -; PPC32-DAG: lwz [[LO1:[0-9]+]], 20(1) ; PPC32-NOT: BARRIER ; PPC32-DAG: xoris [[HI0]], [[HI0]], 32768 +; PPC32-DAG: lwz [[LO1:[0-9]+]], 20(1) ; PPC32-DAG: xoris [[LO0]], [[LO0]], 32768 ; PPC32: blr %0 = fsub ppc_fp128 0xM80000000000000000000000000000000, %x diff --git a/llvm/test/CodeGen/PowerPC/inc-of-add.ll b/llvm/test/CodeGen/PowerPC/inc-of-add.ll index fa03379a3c3076..90004143326fbc 100644 --- a/llvm/test/CodeGen/PowerPC/inc-of-add.ll +++ b/llvm/test/CodeGen/PowerPC/inc-of-add.ll @@ -65,88 +65,88 @@ define <16 x i8> @vector_i128_i8(<16 x i8> %x, <16 x i8> %y) nounwind { ; PPC32: # %bb.0: ; PPC32-NEXT: stwu 1, -64(1) ; PPC32-NEXT: stw 21, 20(1) # 4-byte Folded Spill -; PPC32-NEXT: lbz 4, 119(1) -; PPC32-NEXT: lbz 11, 115(1) -; PPC32-NEXT: stw 24, 32(1) # 4-byte Folded Spill -; PPC32-NEXT: stw 23, 28(1) # 4-byte Folded Spill -; PPC32-NEXT: add 4, 4, 6 ; PPC32-NEXT: lbz 21, 123(1) -; PPC32-NEXT: lbz 6, 131(1) -; PPC32-NEXT: add 5, 11, 5 -; PPC32-NEXT: lbz 11, 127(1) +; PPC32-NEXT: stw 22, 24(1) # 4-byte Folded Spill +; PPC32-NEXT: stw 23, 28(1) # 4-byte Folded Spill ; PPC32-NEXT: add 7, 21, 7 +; PPC32-NEXT: lbz 23, 115(1) +; PPC32-NEXT: lbz 22, 119(1) ; PPC32-NEXT: lbz 21, 135(1) -; PPC32-NEXT: lbz 24, 83(1) -; PPC32-NEXT: lbz 23, 79(1) -; PPC32-NEXT: add 6, 6, 9 +; PPC32-NEXT: add 5, 23, 5 +; PPC32-NEXT: lbz 23, 127(1) +; PPC32-NEXT: add 6, 22, 6 +; PPC32-NEXT: lbz 22, 131(1) ; PPC32-NEXT: add 10, 21, 10 -; PPC32-NEXT: lbz 21, 147(1) -; PPC32-NEXT: lbz 9, 143(1) -; PPC32-NEXT: stw 22, 24(1) # 4-byte Folded Spill -; PPC32-NEXT: add 8, 11, 8 -; PPC32-NEXT: lbz 22, 75(1) -; PPC32-NEXT: lbz 11, 139(1) -; PPC32-NEXT: stw 27, 44(1) # 4-byte Folded Spill -; PPC32-NEXT: add 24, 21, 24 -; PPC32-NEXT: lbz 27, 95(1) -; PPC32-NEXT: lbz 21, 159(1) ; PPC32-NEXT: stw 26, 40(1) # 4-byte Folded Spill -; PPC32-NEXT: add 9, 9, 23 -; PPC32-NEXT: lbz 26, 91(1) -; PPC32-NEXT: lbz 23, 155(1) +; PPC32-NEXT: add 8, 23, 8 +; PPC32-NEXT: lbz 26, 83(1) +; PPC32-NEXT: add 9, 22, 9 +; PPC32-NEXT: lbz 21, 147(1) +; PPC32-NEXT: stw 24, 32(1) # 4-byte Folded Spill ; PPC32-NEXT: stw 25, 36(1) # 4-byte Folded Spill -; PPC32-NEXT: add 11, 11, 22 -; PPC32-NEXT: lbz 25, 87(1) -; PPC32-NEXT: lbz 22, 151(1) -; PPC32-NEXT: lbz 12, 111(1) -; PPC32-NEXT: add 27, 21, 27 -; PPC32-NEXT: lbz 21, 175(1) -; PPC32-NEXT: stw 30, 56(1) # 4-byte Folded Spill -; PPC32-NEXT: lbz 0, 107(1) +; PPC32-NEXT: add 26, 21, 26 +; PPC32-NEXT: lbz 25, 79(1) +; PPC32-NEXT: lbz 24, 75(1) +; PPC32-NEXT: lbz 23, 139(1) +; PPC32-NEXT: lbz 22, 143(1) ; PPC32-NEXT: stw 29, 52(1) # 4-byte Folded Spill -; PPC32-NEXT: add 26, 23, 26 -; PPC32-NEXT: lbz 30, 171(1) -; PPC32-NEXT: lbz 29, 103(1) -; PPC32-NEXT: lbz 23, 167(1) -; PPC32-NEXT: stw 28, 48(1) # 4-byte Folded Spill +; PPC32-NEXT: add 24, 23, 24 +; PPC32-NEXT: lbz 29, 95(1) ; PPC32-NEXT: add 25, 22, 25 -; PPC32-NEXT: lbz 28, 99(1) -; PPC32-NEXT: lbz 22, 163(1) -; PPC32-NEXT: add 12, 21, 12 -; PPC32-NEXT: add 30, 30, 0 -; PPC32-NEXT: addi 12, 12, 1 -; PPC32-NEXT: add 29, 23, 29 -; PPC32-NEXT: stb 12, 15(3) -; PPC32-NEXT: addi 12, 30, 1 +; PPC32-NEXT: lbz 21, 159(1) +; PPC32-NEXT: stw 27, 44(1) # 4-byte Folded Spill +; PPC32-NEXT: stw 28, 48(1) # 4-byte Folded Spill +; PPC32-NEXT: add 29, 21, 29 +; PPC32-NEXT: lbz 28, 91(1) +; PPC32-NEXT: lbz 27, 87(1) +; PPC32-NEXT: lbz 23, 151(1) +; PPC32-NEXT: lbz 22, 155(1) +; PPC32-NEXT: lbz 4, 111(1) +; PPC32-NEXT: add 27, 23, 27 +; PPC32-NEXT: lbz 21, 175(1) ; PPC32-NEXT: add 28, 22, 28 -; PPC32-NEXT: stb 12, 14(3) -; PPC32-NEXT: addi 12, 29, 1 -; PPC32-NEXT: stb 12, 13(3) -; PPC32-NEXT: addi 12, 28, 1 -; PPC32-NEXT: stb 12, 12(3) -; PPC32-NEXT: addi 12, 27, 1 -; PPC32-NEXT: stb 12, 11(3) -; PPC32-NEXT: addi 12, 26, 1 -; PPC32-NEXT: addi 9, 9, 1 -; PPC32-NEXT: addi 6, 6, 1 -; PPC32-NEXT: stb 12, 10(3) -; PPC32-NEXT: addi 12, 25, 1 -; PPC32-NEXT: stb 9, 7(3) -; PPC32-NEXT: addi 9, 11, 1 -; PPC32-NEXT: stb 6, 4(3) -; PPC32-NEXT: addi 6, 8, 1 +; PPC32-NEXT: lbz 11, 107(1) +; PPC32-NEXT: lbz 12, 171(1) +; PPC32-NEXT: add 4, 21, 4 +; PPC32-NEXT: stw 30, 56(1) # 4-byte Folded Spill ; PPC32-NEXT: addi 4, 4, 1 -; PPC32-NEXT: stb 12, 9(3) -; PPC32-NEXT: addi 12, 24, 1 -; PPC32-NEXT: stb 9, 6(3) -; PPC32-NEXT: addi 9, 10, 1 -; PPC32-NEXT: stb 6, 3(3) -; PPC32-NEXT: addi 6, 7, 1 +; PPC32-NEXT: lbz 0, 103(1) +; PPC32-NEXT: add 11, 12, 11 +; PPC32-NEXT: lbz 30, 99(1) +; PPC32-NEXT: lbz 23, 163(1) +; PPC32-NEXT: lbz 22, 167(1) +; PPC32-NEXT: add 30, 23, 30 +; PPC32-NEXT: stb 4, 15(3) +; PPC32-NEXT: add 23, 22, 0 +; PPC32-NEXT: addi 4, 11, 1 +; PPC32-NEXT: stb 4, 14(3) +; PPC32-NEXT: addi 4, 23, 1 +; PPC32-NEXT: stb 4, 13(3) +; PPC32-NEXT: addi 4, 30, 1 +; PPC32-NEXT: stb 4, 12(3) +; PPC32-NEXT: addi 4, 29, 1 +; PPC32-NEXT: stb 4, 11(3) +; PPC32-NEXT: addi 4, 28, 1 +; PPC32-NEXT: stb 4, 10(3) +; PPC32-NEXT: addi 4, 27, 1 +; PPC32-NEXT: stb 4, 9(3) +; PPC32-NEXT: addi 4, 26, 1 +; PPC32-NEXT: stb 4, 8(3) +; PPC32-NEXT: addi 4, 25, 1 +; PPC32-NEXT: stb 4, 7(3) +; PPC32-NEXT: addi 4, 24, 1 +; PPC32-NEXT: stb 4, 6(3) +; PPC32-NEXT: addi 4, 10, 1 +; PPC32-NEXT: stb 4, 5(3) +; PPC32-NEXT: addi 4, 9, 1 +; PPC32-NEXT: stb 4, 4(3) +; PPC32-NEXT: addi 4, 8, 1 +; PPC32-NEXT: stb 4, 3(3) +; PPC32-NEXT: addi 4, 7, 1 +; PPC32-NEXT: stb 4, 2(3) +; PPC32-NEXT: addi 4, 6, 1 ; PPC32-NEXT: stb 4, 1(3) ; PPC32-NEXT: addi 4, 5, 1 -; PPC32-NEXT: stb 12, 8(3) -; PPC32-NEXT: stb 9, 5(3) -; PPC32-NEXT: stb 6, 2(3) ; PPC32-NEXT: stb 4, 0(3) ; PPC32-NEXT: lwz 30, 56(1) # 4-byte Folded Reload ; PPC32-NEXT: lwz 29, 52(1) # 4-byte Folded Reload @@ -165,79 +165,79 @@ define <16 x i8> @vector_i128_i8(<16 x i8> %x, <16 x i8> %y) nounwind { ; PPC64BE: # %bb.0: ; PPC64BE-NEXT: std 21, -88(1) # 8-byte Folded Spill ; PPC64BE-NEXT: lbz 21, 207(1) -; PPC64BE-NEXT: lbz 11, 199(1) -; PPC64BE-NEXT: lbz 12, 191(1) -; PPC64BE-NEXT: std 23, -72(1) # 8-byte Folded Spill ; PPC64BE-NEXT: std 22, -80(1) # 8-byte Folded Spill -; PPC64BE-NEXT: std 26, -48(1) # 8-byte Folded Spill +; PPC64BE-NEXT: std 23, -72(1) # 8-byte Folded Spill ; PPC64BE-NEXT: std 25, -56(1) # 8-byte Folded Spill ; PPC64BE-NEXT: std 24, -64(1) # 8-byte Folded Spill -; PPC64BE-NEXT: std 29, -24(1) # 8-byte Folded Spill +; PPC64BE-NEXT: std 28, -32(1) # 8-byte Folded Spill +; PPC64BE-NEXT: std 27, -40(1) # 8-byte Folded Spill +; PPC64BE-NEXT: std 26, -48(1) # 8-byte Folded Spill ; PPC64BE-NEXT: std 30, -16(1) # 8-byte Folded Spill -; PPC64BE-NEXT: lbz 0, 183(1) +; PPC64BE-NEXT: std 29, -24(1) # 8-byte Folded Spill +; PPC64BE-NEXT: lbz 22, 199(1) +; PPC64BE-NEXT: lbz 23, 191(1) ; PPC64BE-NEXT: add 6, 21, 6 ; PPC64BE-NEXT: lbz 21, 231(1) -; PPC64BE-NEXT: add 5, 11, 5 -; PPC64BE-NEXT: lbz 11, 223(1) -; PPC64BE-NEXT: add 4, 12, 4 -; PPC64BE-NEXT: lbz 12, 215(1) -; PPC64BE-NEXT: lbz 23, 127(1) +; PPC64BE-NEXT: add 5, 22, 5 +; PPC64BE-NEXT: lbz 22, 223(1) +; PPC64BE-NEXT: add 4, 23, 4 +; PPC64BE-NEXT: lbz 23, 215(1) ; PPC64BE-NEXT: add 9, 21, 9 +; PPC64BE-NEXT: lbz 25, 127(1) +; PPC64BE-NEXT: add 8, 22, 8 ; PPC64BE-NEXT: lbz 21, 255(1) -; PPC64BE-NEXT: lbz 22, 119(1) -; PPC64BE-NEXT: add 8, 11, 8 -; PPC64BE-NEXT: lbz 11, 247(1) -; PPC64BE-NEXT: add 7, 12, 7 -; PPC64BE-NEXT: lbz 12, 239(1) -; PPC64BE-NEXT: lbz 26, 151(1) -; PPC64BE-NEXT: add 23, 21, 23 -; PPC64BE-NEXT: lbz 21, 279(1) -; PPC64BE-NEXT: lbz 25, 143(1) -; PPC64BE-NEXT: add 11, 11, 22 -; PPC64BE-NEXT: lbz 22, 271(1) -; PPC64BE-NEXT: lbz 24, 135(1) -; PPC64BE-NEXT: add 10, 12, 10 -; PPC64BE-NEXT: lbz 12, 263(1) -; PPC64BE-NEXT: lbz 30, 175(1) -; PPC64BE-NEXT: lbz 29, 303(1) -; PPC64BE-NEXT: add 26, 21, 26 -; PPC64BE-NEXT: lbz 21, 311(1) -; PPC64BE-NEXT: std 28, -32(1) # 8-byte Folded Spill -; PPC64BE-NEXT: add 25, 22, 25 -; PPC64BE-NEXT: lbz 28, 167(1) -; PPC64BE-NEXT: lbz 22, 295(1) -; PPC64BE-NEXT: std 27, -40(1) # 8-byte Folded Spill -; PPC64BE-NEXT: add 12, 12, 24 -; PPC64BE-NEXT: lbz 27, 159(1) -; PPC64BE-NEXT: lbz 24, 287(1) -; PPC64BE-NEXT: add 30, 29, 30 -; PPC64BE-NEXT: add 29, 21, 0 -; PPC64BE-NEXT: addi 0, 29, 1 -; PPC64BE-NEXT: add 28, 22, 28 -; PPC64BE-NEXT: stb 0, 15(3) -; PPC64BE-NEXT: addi 0, 30, 1 -; PPC64BE-NEXT: add 27, 24, 27 -; PPC64BE-NEXT: stb 0, 14(3) -; PPC64BE-NEXT: addi 0, 28, 1 -; PPC64BE-NEXT: stb 0, 13(3) -; PPC64BE-NEXT: addi 0, 27, 1 -; PPC64BE-NEXT: stb 0, 12(3) -; PPC64BE-NEXT: addi 0, 26, 1 -; PPC64BE-NEXT: addi 12, 12, 1 -; PPC64BE-NEXT: stb 0, 11(3) -; PPC64BE-NEXT: addi 0, 25, 1 -; PPC64BE-NEXT: stb 12, 9(3) -; PPC64BE-NEXT: addi 12, 23, 1 -; PPC64BE-NEXT: addi 11, 11, 1 -; PPC64BE-NEXT: addi 10, 10, 1 +; PPC64BE-NEXT: add 7, 23, 7 +; PPC64BE-NEXT: lbz 24, 119(1) ; PPC64BE-NEXT: addi 9, 9, 1 +; PPC64BE-NEXT: lbz 22, 247(1) +; PPC64BE-NEXT: add 25, 21, 25 +; PPC64BE-NEXT: lbz 23, 239(1) ; PPC64BE-NEXT: addi 8, 8, 1 +; PPC64BE-NEXT: lbz 28, 151(1) +; PPC64BE-NEXT: add 24, 22, 24 +; PPC64BE-NEXT: lbz 21, 279(1) +; PPC64BE-NEXT: add 10, 23, 10 +; PPC64BE-NEXT: lbz 27, 143(1) +; PPC64BE-NEXT: addi 10, 10, 1 +; PPC64BE-NEXT: lbz 22, 271(1) +; PPC64BE-NEXT: add 28, 21, 28 +; PPC64BE-NEXT: lbz 26, 135(1) ; PPC64BE-NEXT: addi 7, 7, 1 +; PPC64BE-NEXT: lbz 23, 263(1) +; PPC64BE-NEXT: add 27, 22, 27 +; PPC64BE-NEXT: lbz 11, 183(1) ; PPC64BE-NEXT: addi 6, 6, 1 +; PPC64BE-NEXT: lbz 21, 311(1) +; PPC64BE-NEXT: add 26, 23, 26 +; PPC64BE-NEXT: lbz 12, 175(1) ; PPC64BE-NEXT: addi 5, 5, 1 +; PPC64BE-NEXT: lbz 0, 303(1) +; PPC64BE-NEXT: add 11, 21, 11 +; PPC64BE-NEXT: lbz 30, 167(1) +; PPC64BE-NEXT: addi 11, 11, 1 +; PPC64BE-NEXT: lbz 22, 295(1) +; PPC64BE-NEXT: add 12, 0, 12 +; PPC64BE-NEXT: lbz 29, 159(1) ; PPC64BE-NEXT: addi 4, 4, 1 -; PPC64BE-NEXT: stb 0, 10(3) -; PPC64BE-NEXT: stb 12, 8(3) +; PPC64BE-NEXT: lbz 23, 287(1) +; PPC64BE-NEXT: add 30, 22, 30 +; PPC64BE-NEXT: stb 11, 15(3) +; PPC64BE-NEXT: addi 11, 12, 1 +; PPC64BE-NEXT: add 29, 23, 29 +; PPC64BE-NEXT: stb 11, 14(3) +; PPC64BE-NEXT: addi 11, 30, 1 +; PPC64BE-NEXT: stb 11, 13(3) +; PPC64BE-NEXT: addi 11, 29, 1 +; PPC64BE-NEXT: stb 11, 12(3) +; PPC64BE-NEXT: addi 11, 28, 1 +; PPC64BE-NEXT: stb 11, 11(3) +; PPC64BE-NEXT: addi 11, 27, 1 +; PPC64BE-NEXT: stb 11, 10(3) +; PPC64BE-NEXT: addi 11, 26, 1 +; PPC64BE-NEXT: stb 11, 9(3) +; PPC64BE-NEXT: addi 11, 25, 1 +; PPC64BE-NEXT: stb 11, 8(3) +; PPC64BE-NEXT: addi 11, 24, 1 ; PPC64BE-NEXT: stb 11, 7(3) ; PPC64BE-NEXT: stb 10, 6(3) ; PPC64BE-NEXT: stb 9, 5(3) @@ -277,23 +277,23 @@ define <8 x i16> @vector_i128_i16(<8 x i16> %x, <8 x i16> %y) nounwind { ; PPC32-NEXT: stw 28, 16(1) # 4-byte Folded Spill ; PPC32-NEXT: stw 29, 20(1) # 4-byte Folded Spill ; PPC32-NEXT: stw 30, 24(1) # 4-byte Folded Spill -; PPC32-NEXT: lhz 11, 50(1) -; PPC32-NEXT: lhz 12, 46(1) -; PPC32-NEXT: lhz 0, 42(1) -; PPC32-NEXT: lhz 30, 70(1) -; PPC32-NEXT: lhz 29, 66(1) -; PPC32-NEXT: lhz 28, 62(1) -; PPC32-NEXT: lhz 27, 58(1) +; PPC32-NEXT: lhz 11, 70(1) +; PPC32-NEXT: lhz 12, 66(1) +; PPC32-NEXT: lhz 0, 62(1) +; PPC32-NEXT: add 10, 11, 10 +; PPC32-NEXT: lhz 30, 58(1) +; PPC32-NEXT: add 9, 12, 9 +; PPC32-NEXT: lhz 29, 50(1) +; PPC32-NEXT: add 8, 0, 8 +; PPC32-NEXT: lhz 28, 42(1) +; PPC32-NEXT: add 7, 30, 7 +; PPC32-NEXT: lhz 27, 46(1) +; PPC32-NEXT: add 5, 29, 5 ; PPC32-NEXT: lhz 26, 54(1) -; PPC32-NEXT: add 3, 0, 3 -; PPC32-NEXT: add 4, 12, 4 -; PPC32-NEXT: add 5, 11, 5 -; PPC32-NEXT: add 6, 26, 6 -; PPC32-NEXT: add 7, 27, 7 -; PPC32-NEXT: add 8, 28, 8 -; PPC32-NEXT: add 9, 29, 9 -; PPC32-NEXT: add 10, 30, 10 +; PPC32-NEXT: add 3, 28, 3 +; PPC32-NEXT: add 4, 27, 4 ; PPC32-NEXT: addi 3, 3, 1 +; PPC32-NEXT: add 6, 26, 6 ; PPC32-NEXT: addi 4, 4, 1 ; PPC32-NEXT: addi 5, 5, 1 ; PPC32-NEXT: addi 6, 6, 1 @@ -317,31 +317,31 @@ define <8 x i16> @vector_i128_i16(<8 x i16> %x, <8 x i16> %y) nounwind { ; PPC64BE-NEXT: std 28, -32(1) # 8-byte Folded Spill ; PPC64BE-NEXT: std 29, -24(1) # 8-byte Folded Spill ; PPC64BE-NEXT: std 30, -16(1) # 8-byte Folded Spill -; PPC64BE-NEXT: lhz 11, 142(1) -; PPC64BE-NEXT: lhz 12, 134(1) -; PPC64BE-NEXT: lhz 0, 126(1) -; PPC64BE-NEXT: lhz 30, 118(1) -; PPC64BE-NEXT: lhz 29, 182(1) -; PPC64BE-NEXT: lhz 28, 174(1) -; PPC64BE-NEXT: lhz 27, 166(1) -; PPC64BE-NEXT: lhz 26, 158(1) +; PPC64BE-NEXT: lhz 11, 118(1) +; PPC64BE-NEXT: lhz 12, 182(1) +; PPC64BE-NEXT: lhz 0, 174(1) +; PPC64BE-NEXT: lhz 30, 166(1) +; PPC64BE-NEXT: add 11, 12, 11 +; PPC64BE-NEXT: lhz 29, 158(1) +; PPC64BE-NEXT: add 10, 0, 10 +; PPC64BE-NEXT: lhz 28, 142(1) +; PPC64BE-NEXT: add 9, 30, 9 +; PPC64BE-NEXT: lhz 27, 126(1) +; PPC64BE-NEXT: add 8, 29, 8 +; PPC64BE-NEXT: lhz 26, 134(1) +; PPC64BE-NEXT: add 6, 28, 6 ; PPC64BE-NEXT: lhz 25, 150(1) -; PPC64BE-NEXT: add 4, 0, 4 -; PPC64BE-NEXT: add 5, 12, 5 -; PPC64BE-NEXT: add 6, 11, 6 +; PPC64BE-NEXT: add 4, 27, 4 +; PPC64BE-NEXT: add 5, 26, 5 +; PPC64BE-NEXT: addi 11, 11, 1 ; PPC64BE-NEXT: add 7, 25, 7 -; PPC64BE-NEXT: add 8, 26, 8 -; PPC64BE-NEXT: add 9, 27, 9 -; PPC64BE-NEXT: add 10, 28, 10 -; PPC64BE-NEXT: add 11, 29, 30 -; PPC64BE-NEXT: addi 4, 4, 1 -; PPC64BE-NEXT: addi 5, 5, 1 -; PPC64BE-NEXT: addi 6, 6, 1 -; PPC64BE-NEXT: addi 7, 7, 1 -; PPC64BE-NEXT: addi 8, 8, 1 -; PPC64BE-NEXT: addi 9, 9, 1 ; PPC64BE-NEXT: addi 10, 10, 1 -; PPC64BE-NEXT: addi 11, 11, 1 +; PPC64BE-NEXT: addi 9, 9, 1 +; PPC64BE-NEXT: addi 8, 8, 1 +; PPC64BE-NEXT: addi 7, 7, 1 +; PPC64BE-NEXT: addi 6, 6, 1 +; PPC64BE-NEXT: addi 5, 5, 1 +; PPC64BE-NEXT: addi 4, 4, 1 ; PPC64BE-NEXT: sth 11, 14(3) ; PPC64BE-NEXT: sth 10, 12(3) ; PPC64BE-NEXT: sth 9, 10(3) diff --git a/llvm/test/CodeGen/PowerPC/ppc32-skip-regs.ll b/llvm/test/CodeGen/PowerPC/ppc32-skip-regs.ll index 42cbb30318bceb..5fae34f212cccc 100644 --- a/llvm/test/CodeGen/PowerPC/ppc32-skip-regs.ll +++ b/llvm/test/CodeGen/PowerPC/ppc32-skip-regs.ll @@ -17,9 +17,9 @@ entry: ; argument put on stack. ; CHECK-NOT: mr 8, 4 ; CHECK: stw 6, 16(1) +; CHECK: stw 7, 20(1) ; CHECK: stw 5, 12(1) ; CHECK: stw 4, 8(1) -; CHECK: stw 7, 20(1) declare i32 @printf(i8* nocapture readonly, ...) diff --git a/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll index c9d9cf870e49fc..b87f1a682e25aa 100644 --- a/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll @@ -1442,19 +1442,19 @@ define void @test_constrained_libcall_multichain(float* %firstptr, ppc_fp128* %r ; PC64-NEXT: mr 29, 3 ; PC64-NEXT: li 3, 0 ; PC64-NEXT: stfd 31, 168(1) # 8-byte Folded Spill -; PC64-NEXT: stfd 30, 160(1) # 8-byte Folded Spill ; PC64-NEXT: std 30, 128(1) # 8-byte Folded Spill -; PC64-NEXT: stfd 28, 144(1) # 8-byte Folded Spill -; PC64-NEXT: stfd 29, 152(1) # 8-byte Folded Spill ; PC64-NEXT: mr 30, 4 ; PC64-NEXT: lfs 31, 0(29) ; PC64-NEXT: std 3, 8(4) ; PC64-NEXT: addis 3, 2, .LCPI32_0@toc@ha +; PC64-NEXT: stfd 30, 160(1) # 8-byte Folded Spill ; PC64-NEXT: lfs 30, .LCPI32_0@toc@l(3) ; PC64-NEXT: fmr 1, 31 ; PC64-NEXT: fmr 3, 31 +; PC64-NEXT: stfd 28, 144(1) # 8-byte Folded Spill ; PC64-NEXT: fmr 2, 30 ; PC64-NEXT: fmr 4, 30 +; PC64-NEXT: stfd 29, 152(1) # 8-byte Folded Spill ; PC64-NEXT: stfd 31, 0(4) ; PC64-NEXT: bl __gcc_qadd ; PC64-NEXT: nop @@ -1475,14 +1475,14 @@ define void @test_constrained_libcall_multichain(float* %firstptr, ppc_fp128* %r ; PC64-NEXT: nop ; PC64-NEXT: frsp 0, 1 ; PC64-NEXT: stfs 0, 0(29) -; PC64-NEXT: lfd 31, 168(1) # 8-byte Folded Reload -; PC64-NEXT: lfd 30, 160(1) # 8-byte Folded Reload -; PC64-NEXT: lfd 29, 152(1) # 8-byte Folded Reload -; PC64-NEXT: lfd 28, 144(1) # 8-byte Folded Reload ; PC64-NEXT: ld 29, 120(1) # 8-byte Folded Reload ; PC64-NEXT: stfd 1, -16(30) ; PC64-NEXT: stfd 2, -8(30) ; PC64-NEXT: ld 30, 128(1) # 8-byte Folded Reload +; PC64-NEXT: lfd 31, 168(1) # 8-byte Folded Reload +; PC64-NEXT: lfd 30, 160(1) # 8-byte Folded Reload +; PC64-NEXT: lfd 29, 152(1) # 8-byte Folded Reload +; PC64-NEXT: lfd 28, 144(1) # 8-byte Folded Reload ; PC64-NEXT: addi 1, 1, 176 ; PC64-NEXT: ld 0, 16(1) ; PC64-NEXT: mtlr 0 diff --git a/llvm/test/CodeGen/PowerPC/pr43976.ll b/llvm/test/CodeGen/PowerPC/pr43976.ll index 91722283f4ae01..9dc1a52c567f57 100644 --- a/llvm/test/CodeGen/PowerPC/pr43976.ll +++ b/llvm/test/CodeGen/PowerPC/pr43976.ll @@ -10,11 +10,11 @@ define dso_local signext i32 @b() local_unnamed_addr #0 { ; CHECK-NEXT: std r0, 16(r1) ; CHECK-NEXT: stdu r1, -144(r1) ; CHECK-NEXT: addis r3, r2, a@toc@ha -; CHECK-NEXT: addis r4, r2, .LCPI0_0@toc@ha -; CHECK-NEXT: lfd f0, a@toc@l(r3) -; CHECK-NEXT: lfs f1, .LCPI0_0@toc@l(r4) ; CHECK-NEXT: li r4, 1 +; CHECK-NEXT: lfd f0, a@toc@l(r3) +; CHECK-NEXT: addis r3, r2, .LCPI0_0@toc@ha ; CHECK-NEXT: sldi r4, r4, 63 +; CHECK-NEXT: lfs f1, .LCPI0_0@toc@l(r3) ; CHECK-NEXT: fsub f2, f0, f1 ; CHECK-NEXT: fctidz f2, f2 ; CHECK-NEXT: stfd f2, 128(r1) diff --git a/llvm/test/CodeGen/PowerPC/spe.ll b/llvm/test/CodeGen/PowerPC/spe.ll index d2400be43cb490..1c4c7a33981770 100644 --- a/llvm/test/CodeGen/PowerPC/spe.ll +++ b/llvm/test/CodeGen/PowerPC/spe.ll @@ -1297,6 +1297,8 @@ define double @test_spill(double %a, i32 %a1, i64 %a2, i8 * %a3, i32 *%a4, i32* ; CHECK-NEXT: evlddx 31, 1, 5 # 8-byte Folded Reload ; CHECK-NEXT: li 5, 256 ; CHECK-NEXT: evlddx 30, 1, 5 # 8-byte Folded Reload +; CHECK-NEXT: # kill: def $r3 killed $r3 killed $s3 +; CHECK-NEXT: # kill: def $r4 killed $r4 killed $s4 ; CHECK-NEXT: evldd 29, 248(1) # 8-byte Folded Reload ; CHECK-NEXT: evldd 28, 240(1) # 8-byte Folded Reload ; CHECK-NEXT: evldd 27, 232(1) # 8-byte Folded Reload @@ -1313,8 +1315,6 @@ define double @test_spill(double %a, i32 %a1, i64 %a2, i8 * %a3, i32 *%a4, i32* ; CHECK-NEXT: evldd 16, 144(1) # 8-byte Folded Reload ; CHECK-NEXT: evldd 15, 136(1) # 8-byte Folded Reload ; CHECK-NEXT: evldd 14, 128(1) # 8-byte Folded Reload -; CHECK-NEXT: # kill: def $r3 killed $r3 killed $s3 -; CHECK-NEXT: # kill: def $r4 killed $r4 killed $s4 ; CHECK-NEXT: lwz 31, 348(1) # 4-byte Folded Reload ; CHECK-NEXT: lwz 30, 344(1) # 4-byte Folded Reload ; CHECK-NEXT: lwz 29, 340(1) # 4-byte Folded Reload @@ -1392,8 +1392,8 @@ define dso_local float @test_fma(i32 %d) local_unnamed_addr #0 { ; CHECK-NEXT: # implicit-def: $r5 ; CHECK-NEXT: .LBB57_4: # %for.cond.cleanup ; CHECK-NEXT: evldd 30, 16(1) # 8-byte Folded Reload -; CHECK-NEXT: evldd 29, 8(1) # 8-byte Folded Reload ; CHECK-NEXT: mr 3, 5 +; CHECK-NEXT: evldd 29, 8(1) # 8-byte Folded Reload ; CHECK-NEXT: lwz 30, 40(1) # 4-byte Folded Reload ; CHECK-NEXT: lwz 29, 36(1) # 4-byte Folded Reload ; CHECK-NEXT: lwz 0, 52(1) diff --git a/llvm/test/CodeGen/PowerPC/sub-of-not.ll b/llvm/test/CodeGen/PowerPC/sub-of-not.ll index db92a3eb1bee95..d2b55aaf7ac839 100644 --- a/llvm/test/CodeGen/PowerPC/sub-of-not.ll +++ b/llvm/test/CodeGen/PowerPC/sub-of-not.ll @@ -65,88 +65,88 @@ define <16 x i8> @vector_i128_i8(<16 x i8> %x, <16 x i8> %y) nounwind { ; PPC32: # %bb.0: ; PPC32-NEXT: stwu 1, -64(1) ; PPC32-NEXT: stw 21, 20(1) # 4-byte Folded Spill -; PPC32-NEXT: lbz 4, 119(1) -; PPC32-NEXT: lbz 11, 115(1) -; PPC32-NEXT: stw 24, 32(1) # 4-byte Folded Spill -; PPC32-NEXT: stw 23, 28(1) # 4-byte Folded Spill -; PPC32-NEXT: add 4, 4, 6 ; PPC32-NEXT: lbz 21, 123(1) -; PPC32-NEXT: lbz 6, 131(1) -; PPC32-NEXT: add 5, 11, 5 -; PPC32-NEXT: lbz 11, 127(1) +; PPC32-NEXT: stw 22, 24(1) # 4-byte Folded Spill +; PPC32-NEXT: stw 23, 28(1) # 4-byte Folded Spill ; PPC32-NEXT: add 7, 21, 7 +; PPC32-NEXT: lbz 23, 115(1) +; PPC32-NEXT: lbz 22, 119(1) ; PPC32-NEXT: lbz 21, 135(1) -; PPC32-NEXT: lbz 24, 83(1) -; PPC32-NEXT: lbz 23, 79(1) -; PPC32-NEXT: add 6, 6, 9 +; PPC32-NEXT: add 5, 23, 5 +; PPC32-NEXT: lbz 23, 127(1) +; PPC32-NEXT: add 6, 22, 6 +; PPC32-NEXT: lbz 22, 131(1) ; PPC32-NEXT: add 10, 21, 10 -; PPC32-NEXT: lbz 21, 147(1) -; PPC32-NEXT: lbz 9, 143(1) -; PPC32-NEXT: stw 22, 24(1) # 4-byte Folded Spill -; PPC32-NEXT: add 8, 11, 8 -; PPC32-NEXT: lbz 22, 75(1) -; PPC32-NEXT: lbz 11, 139(1) -; PPC32-NEXT: stw 27, 44(1) # 4-byte Folded Spill -; PPC32-NEXT: add 24, 21, 24 -; PPC32-NEXT: lbz 27, 95(1) -; PPC32-NEXT: lbz 21, 159(1) ; PPC32-NEXT: stw 26, 40(1) # 4-byte Folded Spill -; PPC32-NEXT: add 9, 9, 23 -; PPC32-NEXT: lbz 26, 91(1) -; PPC32-NEXT: lbz 23, 155(1) +; PPC32-NEXT: add 8, 23, 8 +; PPC32-NEXT: lbz 26, 83(1) +; PPC32-NEXT: add 9, 22, 9 +; PPC32-NEXT: lbz 21, 147(1) +; PPC32-NEXT: stw 24, 32(1) # 4-byte Folded Spill ; PPC32-NEXT: stw 25, 36(1) # 4-byte Folded Spill -; PPC32-NEXT: add 11, 11, 22 -; PPC32-NEXT: lbz 25, 87(1) -; PPC32-NEXT: lbz 22, 151(1) -; PPC32-NEXT: lbz 12, 111(1) -; PPC32-NEXT: add 27, 21, 27 -; PPC32-NEXT: lbz 21, 175(1) -; PPC32-NEXT: stw 30, 56(1) # 4-byte Folded Spill -; PPC32-NEXT: lbz 0, 107(1) +; PPC32-NEXT: add 26, 21, 26 +; PPC32-NEXT: lbz 25, 79(1) +; PPC32-NEXT: lbz 24, 75(1) +; PPC32-NEXT: lbz 23, 139(1) +; PPC32-NEXT: lbz 22, 143(1) ; PPC32-NEXT: stw 29, 52(1) # 4-byte Folded Spill -; PPC32-NEXT: add 26, 23, 26 -; PPC32-NEXT: lbz 30, 171(1) -; PPC32-NEXT: lbz 29, 103(1) -; PPC32-NEXT: lbz 23, 167(1) -; PPC32-NEXT: stw 28, 48(1) # 4-byte Folded Spill +; PPC32-NEXT: add 24, 23, 24 +; PPC32-NEXT: lbz 29, 95(1) ; PPC32-NEXT: add 25, 22, 25 -; PPC32-NEXT: lbz 28, 99(1) -; PPC32-NEXT: lbz 22, 163(1) -; PPC32-NEXT: add 12, 21, 12 -; PPC32-NEXT: add 30, 30, 0 -; PPC32-NEXT: addi 12, 12, 1 -; PPC32-NEXT: add 29, 23, 29 -; PPC32-NEXT: stb 12, 15(3) -; PPC32-NEXT: addi 12, 30, 1 +; PPC32-NEXT: lbz 21, 159(1) +; PPC32-NEXT: stw 27, 44(1) # 4-byte Folded Spill +; PPC32-NEXT: stw 28, 48(1) # 4-byte Folded Spill +; PPC32-NEXT: add 29, 21, 29 +; PPC32-NEXT: lbz 28, 91(1) +; PPC32-NEXT: lbz 27, 87(1) +; PPC32-NEXT: lbz 23, 151(1) +; PPC32-NEXT: lbz 22, 155(1) +; PPC32-NEXT: lbz 4, 111(1) +; PPC32-NEXT: add 27, 23, 27 +; PPC32-NEXT: lbz 21, 175(1) ; PPC32-NEXT: add 28, 22, 28 -; PPC32-NEXT: stb 12, 14(3) -; PPC32-NEXT: addi 12, 29, 1 -; PPC32-NEXT: stb 12, 13(3) -; PPC32-NEXT: addi 12, 28, 1 -; PPC32-NEXT: stb 12, 12(3) -; PPC32-NEXT: addi 12, 27, 1 -; PPC32-NEXT: stb 12, 11(3) -; PPC32-NEXT: addi 12, 26, 1 -; PPC32-NEXT: addi 9, 9, 1 -; PPC32-NEXT: addi 6, 6, 1 -; PPC32-NEXT: stb 12, 10(3) -; PPC32-NEXT: addi 12, 25, 1 -; PPC32-NEXT: stb 9, 7(3) -; PPC32-NEXT: addi 9, 11, 1 -; PPC32-NEXT: stb 6, 4(3) -; PPC32-NEXT: addi 6, 8, 1 +; PPC32-NEXT: lbz 11, 107(1) +; PPC32-NEXT: lbz 12, 171(1) +; PPC32-NEXT: add 4, 21, 4 +; PPC32-NEXT: stw 30, 56(1) # 4-byte Folded Spill ; PPC32-NEXT: addi 4, 4, 1 -; PPC32-NEXT: stb 12, 9(3) -; PPC32-NEXT: addi 12, 24, 1 -; PPC32-NEXT: stb 9, 6(3) -; PPC32-NEXT: addi 9, 10, 1 -; PPC32-NEXT: stb 6, 3(3) -; PPC32-NEXT: addi 6, 7, 1 +; PPC32-NEXT: lbz 0, 103(1) +; PPC32-NEXT: add 11, 12, 11 +; PPC32-NEXT: lbz 30, 99(1) +; PPC32-NEXT: lbz 23, 163(1) +; PPC32-NEXT: lbz 22, 167(1) +; PPC32-NEXT: add 30, 23, 30 +; PPC32-NEXT: stb 4, 15(3) +; PPC32-NEXT: add 23, 22, 0 +; PPC32-NEXT: addi 4, 11, 1 +; PPC32-NEXT: stb 4, 14(3) +; PPC32-NEXT: addi 4, 23, 1 +; PPC32-NEXT: stb 4, 13(3) +; PPC32-NEXT: addi 4, 30, 1 +; PPC32-NEXT: stb 4, 12(3) +; PPC32-NEXT: addi 4, 29, 1 +; PPC32-NEXT: stb 4, 11(3) +; PPC32-NEXT: addi 4, 28, 1 +; PPC32-NEXT: stb 4, 10(3) +; PPC32-NEXT: addi 4, 27, 1 +; PPC32-NEXT: stb 4, 9(3) +; PPC32-NEXT: addi 4, 26, 1 +; PPC32-NEXT: stb 4, 8(3) +; PPC32-NEXT: addi 4, 25, 1 +; PPC32-NEXT: stb 4, 7(3) +; PPC32-NEXT: addi 4, 24, 1 +; PPC32-NEXT: stb 4, 6(3) +; PPC32-NEXT: addi 4, 10, 1 +; PPC32-NEXT: stb 4, 5(3) +; PPC32-NEXT: addi 4, 9, 1 +; PPC32-NEXT: stb 4, 4(3) +; PPC32-NEXT: addi 4, 8, 1 +; PPC32-NEXT: stb 4, 3(3) +; PPC32-NEXT: addi 4, 7, 1 +; PPC32-NEXT: stb 4, 2(3) +; PPC32-NEXT: addi 4, 6, 1 ; PPC32-NEXT: stb 4, 1(3) ; PPC32-NEXT: addi 4, 5, 1 -; PPC32-NEXT: stb 12, 8(3) -; PPC32-NEXT: stb 9, 5(3) -; PPC32-NEXT: stb 6, 2(3) ; PPC32-NEXT: stb 4, 0(3) ; PPC32-NEXT: lwz 30, 56(1) # 4-byte Folded Reload ; PPC32-NEXT: lwz 29, 52(1) # 4-byte Folded Reload @@ -165,79 +165,79 @@ define <16 x i8> @vector_i128_i8(<16 x i8> %x, <16 x i8> %y) nounwind { ; PPC64BE: # %bb.0: ; PPC64BE-NEXT: std 21, -88(1) # 8-byte Folded Spill ; PPC64BE-NEXT: lbz 21, 207(1) -; PPC64BE-NEXT: lbz 11, 199(1) -; PPC64BE-NEXT: lbz 12, 191(1) -; PPC64BE-NEXT: std 23, -72(1) # 8-byte Folded Spill ; PPC64BE-NEXT: std 22, -80(1) # 8-byte Folded Spill -; PPC64BE-NEXT: std 26, -48(1) # 8-byte Folded Spill +; PPC64BE-NEXT: std 23, -72(1) # 8-byte Folded Spill ; PPC64BE-NEXT: std 25, -56(1) # 8-byte Folded Spill ; PPC64BE-NEXT: std 24, -64(1) # 8-byte Folded Spill -; PPC64BE-NEXT: std 29, -24(1) # 8-byte Folded Spill +; PPC64BE-NEXT: std 28, -32(1) # 8-byte Folded Spill +; PPC64BE-NEXT: std 27, -40(1) # 8-byte Folded Spill +; PPC64BE-NEXT: std 26, -48(1) # 8-byte Folded Spill ; PPC64BE-NEXT: std 30, -16(1) # 8-byte Folded Spill -; PPC64BE-NEXT: lbz 0, 183(1) +; PPC64BE-NEXT: std 29, -24(1) # 8-byte Folded Spill +; PPC64BE-NEXT: lbz 22, 199(1) +; PPC64BE-NEXT: lbz 23, 191(1) ; PPC64BE-NEXT: add 6, 21, 6 ; PPC64BE-NEXT: lbz 21, 231(1) -; PPC64BE-NEXT: add 5, 11, 5 -; PPC64BE-NEXT: lbz 11, 223(1) -; PPC64BE-NEXT: add 4, 12, 4 -; PPC64BE-NEXT: lbz 12, 215(1) -; PPC64BE-NEXT: lbz 23, 127(1) +; PPC64BE-NEXT: add 5, 22, 5 +; PPC64BE-NEXT: lbz 22, 223(1) +; PPC64BE-NEXT: add 4, 23, 4 +; PPC64BE-NEXT: lbz 23, 215(1) ; PPC64BE-NEXT: add 9, 21, 9 +; PPC64BE-NEXT: lbz 25, 127(1) +; PPC64BE-NEXT: add 8, 22, 8 ; PPC64BE-NEXT: lbz 21, 255(1) -; PPC64BE-NEXT: lbz 22, 119(1) -; PPC64BE-NEXT: add 8, 11, 8 -; PPC64BE-NEXT: lbz 11, 247(1) -; PPC64BE-NEXT: add 7, 12, 7 -; PPC64BE-NEXT: lbz 12, 239(1) -; PPC64BE-NEXT: lbz 26, 151(1) -; PPC64BE-NEXT: add 23, 21, 23 -; PPC64BE-NEXT: lbz 21, 279(1) -; PPC64BE-NEXT: lbz 25, 143(1) -; PPC64BE-NEXT: add 11, 11, 22 -; PPC64BE-NEXT: lbz 22, 271(1) -; PPC64BE-NEXT: lbz 24, 135(1) -; PPC64BE-NEXT: add 10, 12, 10 -; PPC64BE-NEXT: lbz 12, 263(1) -; PPC64BE-NEXT: lbz 30, 175(1) -; PPC64BE-NEXT: lbz 29, 303(1) -; PPC64BE-NEXT: add 26, 21, 26 -; PPC64BE-NEXT: lbz 21, 311(1) -; PPC64BE-NEXT: std 28, -32(1) # 8-byte Folded Spill -; PPC64BE-NEXT: add 25, 22, 25 -; PPC64BE-NEXT: lbz 28, 167(1) -; PPC64BE-NEXT: lbz 22, 295(1) -; PPC64BE-NEXT: std 27, -40(1) # 8-byte Folded Spill -; PPC64BE-NEXT: add 12, 12, 24 -; PPC64BE-NEXT: lbz 27, 159(1) -; PPC64BE-NEXT: lbz 24, 287(1) -; PPC64BE-NEXT: add 30, 29, 30 -; PPC64BE-NEXT: add 29, 21, 0 -; PPC64BE-NEXT: addi 0, 29, 1 -; PPC64BE-NEXT: add 28, 22, 28 -; PPC64BE-NEXT: stb 0, 15(3) -; PPC64BE-NEXT: addi 0, 30, 1 -; PPC64BE-NEXT: add 27, 24, 27 -; PPC64BE-NEXT: stb 0, 14(3) -; PPC64BE-NEXT: addi 0, 28, 1 -; PPC64BE-NEXT: stb 0, 13(3) -; PPC64BE-NEXT: addi 0, 27, 1 -; PPC64BE-NEXT: stb 0, 12(3) -; PPC64BE-NEXT: addi 0, 26, 1 -; PPC64BE-NEXT: addi 12, 12, 1 -; PPC64BE-NEXT: stb 0, 11(3) -; PPC64BE-NEXT: addi 0, 25, 1 -; PPC64BE-NEXT: stb 12, 9(3) -; PPC64BE-NEXT: addi 12, 23, 1 -; PPC64BE-NEXT: addi 11, 11, 1 -; PPC64BE-NEXT: addi 10, 10, 1 +; PPC64BE-NEXT: add 7, 23, 7 +; PPC64BE-NEXT: lbz 24, 119(1) ; PPC64BE-NEXT: addi 9, 9, 1 +; PPC64BE-NEXT: lbz 22, 247(1) +; PPC64BE-NEXT: add 25, 21, 25 +; PPC64BE-NEXT: lbz 23, 239(1) ; PPC64BE-NEXT: addi 8, 8, 1 +; PPC64BE-NEXT: lbz 28, 151(1) +; PPC64BE-NEXT: add 24, 22, 24 +; PPC64BE-NEXT: lbz 21, 279(1) +; PPC64BE-NEXT: add 10, 23, 10 +; PPC64BE-NEXT: lbz 27, 143(1) +; PPC64BE-NEXT: addi 10, 10, 1 +; PPC64BE-NEXT: lbz 22, 271(1) +; PPC64BE-NEXT: add 28, 21, 28 +; PPC64BE-NEXT: lbz 26, 135(1) ; PPC64BE-NEXT: addi 7, 7, 1 +; PPC64BE-NEXT: lbz 23, 263(1) +; PPC64BE-NEXT: add 27, 22, 27 +; PPC64BE-NEXT: lbz 11, 183(1) ; PPC64BE-NEXT: addi 6, 6, 1 +; PPC64BE-NEXT: lbz 21, 311(1) +; PPC64BE-NEXT: add 26, 23, 26 +; PPC64BE-NEXT: lbz 12, 175(1) ; PPC64BE-NEXT: addi 5, 5, 1 +; PPC64BE-NEXT: lbz 0, 303(1) +; PPC64BE-NEXT: add 11, 21, 11 +; PPC64BE-NEXT: lbz 30, 167(1) +; PPC64BE-NEXT: addi 11, 11, 1 +; PPC64BE-NEXT: lbz 22, 295(1) +; PPC64BE-NEXT: add 12, 0, 12 +; PPC64BE-NEXT: lbz 29, 159(1) ; PPC64BE-NEXT: addi 4, 4, 1 -; PPC64BE-NEXT: stb 0, 10(3) -; PPC64BE-NEXT: stb 12, 8(3) +; PPC64BE-NEXT: lbz 23, 287(1) +; PPC64BE-NEXT: add 30, 22, 30 +; PPC64BE-NEXT: stb 11, 15(3) +; PPC64BE-NEXT: addi 11, 12, 1 +; PPC64BE-NEXT: add 29, 23, 29 +; PPC64BE-NEXT: stb 11, 14(3) +; PPC64BE-NEXT: addi 11, 30, 1 +; PPC64BE-NEXT: stb 11, 13(3) +; PPC64BE-NEXT: addi 11, 29, 1 +; PPC64BE-NEXT: stb 11, 12(3) +; PPC64BE-NEXT: addi 11, 28, 1 +; PPC64BE-NEXT: stb 11, 11(3) +; PPC64BE-NEXT: addi 11, 27, 1 +; PPC64BE-NEXT: stb 11, 10(3) +; PPC64BE-NEXT: addi 11, 26, 1 +; PPC64BE-NEXT: stb 11, 9(3) +; PPC64BE-NEXT: addi 11, 25, 1 +; PPC64BE-NEXT: stb 11, 8(3) +; PPC64BE-NEXT: addi 11, 24, 1 ; PPC64BE-NEXT: stb 11, 7(3) ; PPC64BE-NEXT: stb 10, 6(3) ; PPC64BE-NEXT: stb 9, 5(3) @@ -277,23 +277,23 @@ define <8 x i16> @vector_i128_i16(<8 x i16> %x, <8 x i16> %y) nounwind { ; PPC32-NEXT: stw 28, 16(1) # 4-byte Folded Spill ; PPC32-NEXT: stw 29, 20(1) # 4-byte Folded Spill ; PPC32-NEXT: stw 30, 24(1) # 4-byte Folded Spill -; PPC32-NEXT: lhz 11, 50(1) -; PPC32-NEXT: lhz 12, 46(1) -; PPC32-NEXT: lhz 0, 42(1) -; PPC32-NEXT: lhz 30, 70(1) -; PPC32-NEXT: lhz 29, 66(1) -; PPC32-NEXT: lhz 28, 62(1) -; PPC32-NEXT: lhz 27, 58(1) +; PPC32-NEXT: lhz 11, 70(1) +; PPC32-NEXT: lhz 12, 66(1) +; PPC32-NEXT: lhz 0, 62(1) +; PPC32-NEXT: add 10, 11, 10 +; PPC32-NEXT: lhz 30, 58(1) +; PPC32-NEXT: add 9, 12, 9 +; PPC32-NEXT: lhz 29, 50(1) +; PPC32-NEXT: add 8, 0, 8 +; PPC32-NEXT: lhz 28, 42(1) +; PPC32-NEXT: add 7, 30, 7 +; PPC32-NEXT: lhz 27, 46(1) +; PPC32-NEXT: add 5, 29, 5 ; PPC32-NEXT: lhz 26, 54(1) -; PPC32-NEXT: add 3, 0, 3 -; PPC32-NEXT: add 4, 12, 4 -; PPC32-NEXT: add 5, 11, 5 -; PPC32-NEXT: add 6, 26, 6 -; PPC32-NEXT: add 7, 27, 7 -; PPC32-NEXT: add 8, 28, 8 -; PPC32-NEXT: add 9, 29, 9 -; PPC32-NEXT: add 10, 30, 10 +; PPC32-NEXT: add 3, 28, 3 +; PPC32-NEXT: add 4, 27, 4 ; PPC32-NEXT: addi 3, 3, 1 +; PPC32-NEXT: add 6, 26, 6 ; PPC32-NEXT: addi 4, 4, 1 ; PPC32-NEXT: addi 5, 5, 1 ; PPC32-NEXT: addi 6, 6, 1 @@ -317,31 +317,31 @@ define <8 x i16> @vector_i128_i16(<8 x i16> %x, <8 x i16> %y) nounwind { ; PPC64BE-NEXT: std 28, -32(1) # 8-byte Folded Spill ; PPC64BE-NEXT: std 29, -24(1) # 8-byte Folded Spill ; PPC64BE-NEXT: std 30, -16(1) # 8-byte Folded Spill -; PPC64BE-NEXT: lhz 11, 142(1) -; PPC64BE-NEXT: lhz 12, 134(1) -; PPC64BE-NEXT: lhz 0, 126(1) -; PPC64BE-NEXT: lhz 30, 118(1) -; PPC64BE-NEXT: lhz 29, 182(1) -; PPC64BE-NEXT: lhz 28, 174(1) -; PPC64BE-NEXT: lhz 27, 166(1) -; PPC64BE-NEXT: lhz 26, 158(1) +; PPC64BE-NEXT: lhz 11, 118(1) +; PPC64BE-NEXT: lhz 12, 182(1) +; PPC64BE-NEXT: lhz 0, 174(1) +; PPC64BE-NEXT: lhz 30, 166(1) +; PPC64BE-NEXT: add 11, 12, 11 +; PPC64BE-NEXT: lhz 29, 158(1) +; PPC64BE-NEXT: add 10, 0, 10 +; PPC64BE-NEXT: lhz 28, 142(1) +; PPC64BE-NEXT: add 9, 30, 9 +; PPC64BE-NEXT: lhz 27, 126(1) +; PPC64BE-NEXT: add 8, 29, 8 +; PPC64BE-NEXT: lhz 26, 134(1) +; PPC64BE-NEXT: add 6, 28, 6 ; PPC64BE-NEXT: lhz 25, 150(1) -; PPC64BE-NEXT: add 4, 0, 4 -; PPC64BE-NEXT: add 5, 12, 5 -; PPC64BE-NEXT: add 6, 11, 6 +; PPC64BE-NEXT: add 4, 27, 4 +; PPC64BE-NEXT: add 5, 26, 5 +; PPC64BE-NEXT: addi 11, 11, 1 ; PPC64BE-NEXT: add 7, 25, 7 -; PPC64BE-NEXT: add 8, 26, 8 -; PPC64BE-NEXT: add 9, 27, 9 -; PPC64BE-NEXT: add 10, 28, 10 -; PPC64BE-NEXT: add 11, 29, 30 -; PPC64BE-NEXT: addi 4, 4, 1 -; PPC64BE-NEXT: addi 5, 5, 1 -; PPC64BE-NEXT: addi 6, 6, 1 -; PPC64BE-NEXT: addi 7, 7, 1 -; PPC64BE-NEXT: addi 8, 8, 1 -; PPC64BE-NEXT: addi 9, 9, 1 ; PPC64BE-NEXT: addi 10, 10, 1 -; PPC64BE-NEXT: addi 11, 11, 1 +; PPC64BE-NEXT: addi 9, 9, 1 +; PPC64BE-NEXT: addi 8, 8, 1 +; PPC64BE-NEXT: addi 7, 7, 1 +; PPC64BE-NEXT: addi 6, 6, 1 +; PPC64BE-NEXT: addi 5, 5, 1 +; PPC64BE-NEXT: addi 4, 4, 1 ; PPC64BE-NEXT: sth 11, 14(3) ; PPC64BE-NEXT: sth 10, 12(3) ; PPC64BE-NEXT: sth 9, 10(3) diff --git a/llvm/test/CodeGen/PowerPC/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/PowerPC/umulo-128-legalisation-lowering.ll index c0a8a76c7f1af2..815d5b7443e430 100644 --- a/llvm/test/CodeGen/PowerPC/umulo-128-legalisation-lowering.ll +++ b/llvm/test/CodeGen/PowerPC/umulo-128-legalisation-lowering.ll @@ -5,23 +5,23 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 { ; PPC64-LABEL: muloti_test: ; PPC64: # %bb.0: # %start +; PPC64-NEXT: mulhdu. 8, 3, 6 +; PPC64-NEXT: mcrf 1, 0 ; PPC64-NEXT: mulld 8, 5, 4 -; PPC64-NEXT: cmpdi 5, 3, 0 -; PPC64-NEXT: mulhdu. 9, 3, 6 +; PPC64-NEXT: cmpdi 3, 0 ; PPC64-NEXT: mulld 3, 3, 6 -; PPC64-NEXT: mcrf 1, 0 +; PPC64-NEXT: cmpdi 5, 5, 0 ; PPC64-NEXT: add 3, 3, 8 -; PPC64-NEXT: cmpdi 5, 0 -; PPC64-NEXT: crnor 20, 2, 22 -; PPC64-NEXT: cmpldi 3, 0 +; PPC64-NEXT: crnor 20, 22, 2 ; PPC64-NEXT: mulhdu 8, 4, 6 +; PPC64-NEXT: cmpldi 3, 0 ; PPC64-NEXT: add 3, 8, 3 ; PPC64-NEXT: cmpld 6, 3, 8 ; PPC64-NEXT: crandc 21, 24, 2 ; PPC64-NEXT: crorc 20, 20, 6 -; PPC64-NEXT: li 7, 1 ; PPC64-NEXT: mulhdu. 5, 5, 4 ; PPC64-NEXT: crorc 20, 20, 2 +; PPC64-NEXT: li 7, 1 ; PPC64-NEXT: crnor 20, 20, 21 ; PPC64-NEXT: mulld 4, 4, 6 ; PPC64-NEXT: bc 12, 20, .LBB0_2 @@ -38,13 +38,13 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 { ; PPC32-NEXT: stw 0, 4(1) ; PPC32-NEXT: stwu 1, -80(1) ; PPC32-NEXT: stw 26, 56(1) # 4-byte Folded Spill +; PPC32-NEXT: mfcr 12 ; PPC32-NEXT: stw 27, 60(1) # 4-byte Folded Spill +; PPC32-NEXT: mr 27, 4 ; PPC32-NEXT: stw 29, 68(1) # 4-byte Folded Spill +; PPC32-NEXT: mr 29, 7 ; PPC32-NEXT: stw 30, 72(1) # 4-byte Folded Spill -; PPC32-NEXT: mfcr 12 ; PPC32-NEXT: mr 30, 8 -; PPC32-NEXT: mr 29, 7 -; PPC32-NEXT: mr 27, 4 ; PPC32-NEXT: mr 26, 3 ; PPC32-NEXT: li 3, 0 ; PPC32-NEXT: li 4, 0 @@ -54,30 +54,36 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 { ; PPC32-NEXT: stw 21, 36(1) # 4-byte Folded Spill ; PPC32-NEXT: stw 22, 40(1) # 4-byte Folded Spill ; PPC32-NEXT: stw 23, 44(1) # 4-byte Folded Spill +; PPC32-NEXT: mr 23, 6 ; PPC32-NEXT: stw 24, 48(1) # 4-byte Folded Spill +; PPC32-NEXT: mr 24, 5 ; PPC32-NEXT: stw 25, 52(1) # 4-byte Folded Spill -; PPC32-NEXT: stw 28, 64(1) # 4-byte Folded Spill ; PPC32-NEXT: mr 25, 10 -; PPC32-NEXT: stw 12, 28(1) +; PPC32-NEXT: stw 28, 64(1) # 4-byte Folded Spill ; PPC32-NEXT: mr 28, 9 -; PPC32-NEXT: mr 23, 6 -; PPC32-NEXT: mr 24, 5 +; PPC32-NEXT: stw 12, 28(1) ; PPC32-NEXT: bl __multi3 ; PPC32-NEXT: mr 7, 4 ; PPC32-NEXT: mullw 4, 24, 30 +; PPC32-NEXT: cmpwi 5, 24, 0 +; PPC32-NEXT: cmpwi 6, 26, 0 +; PPC32-NEXT: cmpwi 7, 28, 0 +; PPC32-NEXT: crnor 9, 30, 26 ; PPC32-NEXT: mullw 8, 29, 23 -; PPC32-NEXT: mullw 10, 28, 27 -; PPC32-NEXT: mullw 11, 26, 25 +; PPC32-NEXT: add 21, 8, 4 +; PPC32-NEXT: mullw 11, 28, 27 +; PPC32-NEXT: mullw 12, 26, 25 +; PPC32-NEXT: add 11, 12, 11 +; PPC32-NEXT: cmplwi 7, 11, 0 ; PPC32-NEXT: mulhwu 9, 30, 23 -; PPC32-NEXT: mulhwu 12, 27, 25 +; PPC32-NEXT: add 12, 9, 21 +; PPC32-NEXT: cmplw 6, 12, 9 +; PPC32-NEXT: mulhwu 10, 27, 25 ; PPC32-NEXT: mullw 0, 30, 23 ; PPC32-NEXT: mullw 22, 27, 25 -; PPC32-NEXT: add 21, 8, 4 -; PPC32-NEXT: add 10, 11, 10 ; PPC32-NEXT: addc 4, 22, 0 -; PPC32-NEXT: add 11, 9, 21 -; PPC32-NEXT: add 0, 12, 10 -; PPC32-NEXT: adde 8, 0, 11 +; PPC32-NEXT: add 0, 10, 11 +; PPC32-NEXT: adde 8, 0, 12 ; PPC32-NEXT: addc 4, 7, 4 ; PPC32-NEXT: adde 8, 3, 8 ; PPC32-NEXT: xor 22, 4, 7 @@ -85,21 +91,15 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 { ; PPC32-NEXT: or. 22, 22, 20 ; PPC32-NEXT: mcrf 1, 0 ; PPC32-NEXT: cmpwi 29, 0 -; PPC32-NEXT: cmpwi 5, 24, 0 -; PPC32-NEXT: cmpwi 6, 26, 0 -; PPC32-NEXT: cmpwi 7, 28, 0 ; PPC32-NEXT: crnor 8, 22, 2 ; PPC32-NEXT: mulhwu. 23, 29, 23 -; PPC32-NEXT: crnor 9, 30, 26 ; PPC32-NEXT: mcrf 5, 0 ; PPC32-NEXT: cmplwi 21, 0 -; PPC32-NEXT: cmplw 6, 11, 9 -; PPC32-NEXT: cmplwi 7, 10, 0 ; PPC32-NEXT: crandc 10, 24, 2 -; PPC32-NEXT: cmplw 3, 0, 12 +; PPC32-NEXT: cmplw 3, 0, 10 +; PPC32-NEXT: crandc 11, 12, 30 ; PPC32-NEXT: mulhwu. 9, 24, 30 ; PPC32-NEXT: mcrf 6, 0 -; PPC32-NEXT: crandc 11, 12, 30 ; PPC32-NEXT: cmplw 4, 7 ; PPC32-NEXT: cmplw 7, 8, 3 ; PPC32-NEXT: crand 12, 30, 0 diff --git a/llvm/test/CodeGen/PowerPC/vec_splat.ll b/llvm/test/CodeGen/PowerPC/vec_splat.ll index 7c048ff3710858..0e6626bbce2315 100644 --- a/llvm/test/CodeGen/PowerPC/vec_splat.ll +++ b/llvm/test/CodeGen/PowerPC/vec_splat.ll @@ -10,17 +10,17 @@ define void @splat(%f4* %P, %f4* %Q, float %X) nounwind { ; G3-LABEL: splat: ; G3: # %bb.0: -; G3-NEXT: lfs 0, 0(4) +; G3-NEXT: lfs 0, 12(4) ; G3-NEXT: lfs 2, 8(4) ; G3-NEXT: lfs 3, 4(4) -; G3-NEXT: lfs 4, 12(4) ; G3-NEXT: fadds 0, 0, 1 -; G3-NEXT: fadds 2, 2, 1 -; G3-NEXT: fadds 3, 3, 1 -; G3-NEXT: fadds 1, 4, 1 -; G3-NEXT: stfs 1, 12(3) -; G3-NEXT: stfs 2, 8(3) -; G3-NEXT: stfs 3, 4(3) +; G3-NEXT: lfs 4, 0(4) +; G3-NEXT: stfs 0, 12(3) +; G3-NEXT: fadds 0, 2, 1 +; G3-NEXT: stfs 0, 8(3) +; G3-NEXT: fadds 0, 3, 1 +; G3-NEXT: stfs 0, 4(3) +; G3-NEXT: fadds 0, 4, 1 ; G3-NEXT: stfs 0, 0(3) ; G3-NEXT: blr ; @@ -49,18 +49,18 @@ define void @splat(%f4* %P, %f4* %Q, float %X) nounwind { define void @splat_i4(%i4* %P, %i4* %Q, i32 %X) nounwind { ; G3-LABEL: splat_i4: ; G3: # %bb.0: -; G3-NEXT: lwz 6, 0(4) +; G3-NEXT: lwz 6, 12(4) ; G3-NEXT: lwz 7, 8(4) ; G3-NEXT: lwz 8, 4(4) -; G3-NEXT: lwz 4, 12(4) ; G3-NEXT: add 6, 6, 5 -; G3-NEXT: add 8, 8, 5 -; G3-NEXT: add 7, 7, 5 +; G3-NEXT: lwz 4, 0(4) +; G3-NEXT: stw 6, 12(3) +; G3-NEXT: add 6, 7, 5 +; G3-NEXT: stw 6, 8(3) +; G3-NEXT: add 6, 8, 5 ; G3-NEXT: add 4, 4, 5 -; G3-NEXT: stw 4, 12(3) -; G3-NEXT: stw 7, 8(3) -; G3-NEXT: stw 8, 4(3) -; G3-NEXT: stw 6, 0(3) +; G3-NEXT: stw 6, 4(3) +; G3-NEXT: stw 4, 0(3) ; G3-NEXT: blr ; ; G5-LABEL: splat_i4: @@ -88,18 +88,18 @@ define void @splat_i4(%i4* %P, %i4* %Q, i32 %X) nounwind { define void @splat_imm_i32(%i4* %P, %i4* %Q, i32 %X) nounwind { ; G3-LABEL: splat_imm_i32: ; G3: # %bb.0: -; G3-NEXT: lwz 5, 0(4) +; G3-NEXT: lwz 5, 12(4) ; G3-NEXT: lwz 6, 8(4) ; G3-NEXT: lwz 7, 4(4) -; G3-NEXT: lwz 4, 12(4) ; G3-NEXT: addi 5, 5, -1 -; G3-NEXT: addi 7, 7, -1 -; G3-NEXT: addi 6, 6, -1 +; G3-NEXT: lwz 4, 0(4) +; G3-NEXT: stw 5, 12(3) +; G3-NEXT: addi 5, 6, -1 +; G3-NEXT: stw 5, 8(3) +; G3-NEXT: addi 5, 7, -1 ; G3-NEXT: addi 4, 4, -1 -; G3-NEXT: stw 4, 12(3) -; G3-NEXT: stw 6, 8(3) -; G3-NEXT: stw 7, 4(3) -; G3-NEXT: stw 5, 0(3) +; G3-NEXT: stw 5, 4(3) +; G3-NEXT: stw 4, 0(3) ; G3-NEXT: blr ; ; G5-LABEL: splat_imm_i32: @@ -118,22 +118,22 @@ define void @splat_imm_i32(%i4* %P, %i4* %Q, i32 %X) nounwind { define void @splat_imm_i16(%i4* %P, %i4* %Q, i32 %X) nounwind { ; G3-LABEL: splat_imm_i16: ; G3: # %bb.0: -; G3-NEXT: lwz 5, 0(4) -; G3-NEXT: lwz 6, 8(4) +; G3-NEXT: lwz 5, 8(4) +; G3-NEXT: lwz 6, 0(4) ; G3-NEXT: lwz 7, 4(4) -; G3-NEXT: lwz 4, 12(4) ; G3-NEXT: addi 5, 5, 1 -; G3-NEXT: addi 7, 7, 1 +; G3-NEXT: lwz 4, 12(4) ; G3-NEXT: addi 6, 6, 1 +; G3-NEXT: addi 7, 7, 1 ; G3-NEXT: addi 4, 4, 1 -; G3-NEXT: addis 5, 5, 1 -; G3-NEXT: addis 7, 7, 1 -; G3-NEXT: addis 6, 6, 1 ; G3-NEXT: addis 4, 4, 1 ; G3-NEXT: stw 4, 12(3) -; G3-NEXT: stw 6, 8(3) -; G3-NEXT: stw 7, 4(3) -; G3-NEXT: stw 5, 0(3) +; G3-NEXT: addis 4, 5, 1 +; G3-NEXT: stw 4, 8(3) +; G3-NEXT: addis 4, 7, 1 +; G3-NEXT: stw 4, 4(3) +; G3-NEXT: addis 4, 6, 1 +; G3-NEXT: stw 4, 0(3) ; G3-NEXT: blr ; ; G5-LABEL: splat_imm_i16: @@ -189,58 +189,60 @@ define void @spltish(<16 x i8>* %A, <16 x i8>* %B) nounwind { ; G3-LABEL: spltish: ; G3: # %bb.0: ; G3-NEXT: stwu 1, -48(1) +; G3-NEXT: stw 30, 40(1) # 4-byte Folded Spill +; G3-NEXT: lbz 5, 0(4) +; G3-NEXT: lbz 30, 15(4) +; G3-NEXT: stw 29, 36(1) # 4-byte Folded Spill +; G3-NEXT: lbz 29, 13(4) +; G3-NEXT: stw 28, 32(1) # 4-byte Folded Spill +; G3-NEXT: lbz 28, 11(4) +; G3-NEXT: stw 27, 28(1) # 4-byte Folded Spill +; G3-NEXT: lbz 27, 9(4) +; G3-NEXT: stw 24, 16(1) # 4-byte Folded Spill ; G3-NEXT: stw 25, 20(1) # 4-byte Folded Spill ; G3-NEXT: stw 26, 24(1) # 4-byte Folded Spill -; G3-NEXT: stw 27, 28(1) # 4-byte Folded Spill -; G3-NEXT: stw 28, 32(1) # 4-byte Folded Spill -; G3-NEXT: stw 29, 36(1) # 4-byte Folded Spill -; G3-NEXT: stw 30, 40(1) # 4-byte Folded Spill -; G3-NEXT: lbz 5, 5(4) -; G3-NEXT: lbz 6, 3(4) -; G3-NEXT: lbz 7, 1(4) -; G3-NEXT: lbz 8, 0(4) -; G3-NEXT: lbz 9, 2(4) -; G3-NEXT: lbz 10, 4(4) -; G3-NEXT: lbz 11, 6(4) -; G3-NEXT: lbz 12, 8(4) -; G3-NEXT: lbz 0, 10(4) -; G3-NEXT: addi 7, 7, -15 -; G3-NEXT: lbz 30, 12(4) -; G3-NEXT: lbz 29, 14(4) -; G3-NEXT: lbz 28, 15(4) -; G3-NEXT: lbz 27, 13(4) -; G3-NEXT: lbz 26, 11(4) -; G3-NEXT: lbz 25, 9(4) -; G3-NEXT: addi 6, 6, -15 -; G3-NEXT: lbz 4, 7(4) -; G3-NEXT: addi 5, 5, -15 -; G3-NEXT: addi 25, 25, -15 -; G3-NEXT: addi 26, 26, -15 -; G3-NEXT: addi 4, 4, -15 -; G3-NEXT: addi 27, 27, -15 -; G3-NEXT: addi 28, 28, -15 -; G3-NEXT: stb 29, 14(3) -; G3-NEXT: stb 30, 12(3) -; G3-NEXT: stb 0, 10(3) -; G3-NEXT: stb 12, 8(3) -; G3-NEXT: stb 11, 6(3) -; G3-NEXT: stb 10, 4(3) -; G3-NEXT: stb 9, 2(3) -; G3-NEXT: stb 8, 0(3) -; G3-NEXT: stb 28, 15(3) -; G3-NEXT: stb 27, 13(3) -; G3-NEXT: stb 26, 11(3) -; G3-NEXT: stb 25, 9(3) -; G3-NEXT: stb 4, 7(3) +; G3-NEXT: lbz 6, 2(4) +; G3-NEXT: lbz 7, 4(4) +; G3-NEXT: lbz 8, 6(4) +; G3-NEXT: lbz 9, 8(4) +; G3-NEXT: lbz 10, 10(4) +; G3-NEXT: lbz 11, 12(4) +; G3-NEXT: lbz 12, 14(4) +; G3-NEXT: lbz 26, 7(4) +; G3-NEXT: lbz 25, 5(4) +; G3-NEXT: lbz 24, 3(4) +; G3-NEXT: lbz 4, 1(4) +; G3-NEXT: stb 5, 0(3) +; G3-NEXT: addi 5, 30, -15 +; G3-NEXT: stb 5, 15(3) +; G3-NEXT: addi 5, 29, -15 +; G3-NEXT: stb 5, 13(3) +; G3-NEXT: addi 5, 28, -15 +; G3-NEXT: stb 5, 11(3) +; G3-NEXT: addi 5, 27, -15 +; G3-NEXT: stb 5, 9(3) +; G3-NEXT: addi 5, 26, -15 +; G3-NEXT: stb 5, 7(3) +; G3-NEXT: addi 5, 25, -15 ; G3-NEXT: stb 5, 5(3) -; G3-NEXT: stb 6, 3(3) -; G3-NEXT: stb 7, 1(3) +; G3-NEXT: addi 5, 24, -15 +; G3-NEXT: addi 4, 4, -15 +; G3-NEXT: stb 12, 14(3) +; G3-NEXT: stb 11, 12(3) +; G3-NEXT: stb 10, 10(3) +; G3-NEXT: stb 9, 8(3) +; G3-NEXT: stb 8, 6(3) +; G3-NEXT: stb 7, 4(3) +; G3-NEXT: stb 6, 2(3) +; G3-NEXT: stb 5, 3(3) +; G3-NEXT: stb 4, 1(3) ; G3-NEXT: lwz 30, 40(1) # 4-byte Folded Reload ; G3-NEXT: lwz 29, 36(1) # 4-byte Folded Reload ; G3-NEXT: lwz 28, 32(1) # 4-byte Folded Reload ; G3-NEXT: lwz 27, 28(1) # 4-byte Folded Reload ; G3-NEXT: lwz 26, 24(1) # 4-byte Folded Reload ; G3-NEXT: lwz 25, 20(1) # 4-byte Folded Reload +; G3-NEXT: lwz 24, 16(1) # 4-byte Folded Reload ; G3-NEXT: addi 1, 1, 48 ; G3-NEXT: blr ; From ba890da2878299dc82b104c06f067e45162d880f Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 9 Jun 2020 11:25:55 -0700 Subject: [PATCH 07/25] [ELF] Demote lazy symbols relative to a discarded section to Undefined Fixes PR45594. In `ObjFile::initializeSymbols()`, for a defined symbol relative to a discarded section (due to section group rules), it may have been inserted as a lazy symbol. We need to demote it to an Undefined to enable the `discarded section` error happened in a later pass. Add `LazyObjFile::fetched` (if true) and `ArchiveFile::parsed` (if false) to represent that there is an ongoing lazy symbol fetch and we should replace the current lazy symbol with an Undefined, instead of calling `Symbol::resolve` (`Symbol::resolve` should be called if the lazy symbol was added by an unrelated archive/lazy object). As a side result, one small issue in start-lib-comdat.s is now fixed. The hack motivating D51892 will be unsupported: if `.gnu.linkonce.t.__i686.get_pc_thunk.bx` in an archive is referenced by another section, this will likely be errored unless the function is also defined in a regular object file. (Bringing back rL330869 would error `undefined symbol` instead of the more relevant `discarded section`.) Note, glibc i386's crti.o still works (PR31215), because `.gnu.linkonce.t.__x86.get_pc_thunk.bx` is in crti.o (one of the first regular object files in a linker command line). Reviewed By: psmith Differential Revision: https://reviews.llvm.org/D79300 --- lld/ELF/InputFiles.cpp | 25 +++++++++--- lld/ELF/InputFiles.h | 4 ++ lld/test/ELF/comdat-discarded-lazy.s | 60 ++++++++++++++++++++++++++++ lld/test/ELF/i386-linkonce.s | 4 +- lld/test/ELF/start-lib-comdat.s | 2 +- 5 files changed, 88 insertions(+), 7 deletions(-) create mode 100644 lld/test/ELF/comdat-discarded-lazy.s diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index c451aee1f921a9..5bbd6f0df7e9ab 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -1117,8 +1117,20 @@ template void ObjFile::initializeSymbols() { // COMDAT member sections, and if a comdat group is discarded, some // defined symbol in a .eh_frame becomes dangling symbols. if (sec == &InputSection::discarded) { - this->symbols[i]->resolve( - Undefined{this, name, binding, stOther, type, secIdx}); + Undefined und{this, name, binding, stOther, type, secIdx}; + Symbol *sym = this->symbols[i]; + // !ArchiveFile::parsed or LazyObjFile::fetched means that the file + // containing this object has not finished processing, i.e. this symbol is + // a result of a lazy symbol fetch. We should demote the lazy symbol to an + // Undefined so that any relocations outside of the group to it will + // trigger a discarded section error. + if ((sym->symbolKind == Symbol::LazyArchiveKind && + !cast(sym->file)->parsed) || + (sym->symbolKind == Symbol::LazyObjectKind && + cast(sym->file)->fetched)) + sym->replace(und); + else + sym->resolve(und); continue; } @@ -1141,6 +1153,10 @@ ArchiveFile::ArchiveFile(std::unique_ptr &&file) void ArchiveFile::parse() { for (const Archive::Symbol &sym : file->symbols()) symtab->addSymbol(LazyArchive{*this, sym}); + + // Inform a future invocation of ObjFile::initializeSymbols() that this + // archive has been processed. + parsed = true; } // Returns a buffer pointing to a member file containing a given symbol. @@ -1615,14 +1631,13 @@ InputFile *elf::createObjectFile(MemoryBufferRef mb, StringRef archiveName, } void LazyObjFile::fetch() { - if (mb.getBuffer().empty()) + if (fetched) return; + fetched = true; InputFile *file = createObjectFile(mb, archiveName, offsetInArchive); file->groupId = groupId; - mb = {}; - // Copy symbol vector so that the new InputFile doesn't have to // insert the same defined symbols to the symbol table again. file->symbols = std::move(symbols); diff --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h index 51882e0c964719..7af85e417ca583 100644 --- a/lld/ELF/InputFiles.h +++ b/lld/ELF/InputFiles.h @@ -307,6 +307,8 @@ class LazyObjFile : public InputFile { template void parse(); void fetch(); + bool fetched = false; + private: uint64_t offsetInArchive; }; @@ -327,6 +329,8 @@ class ArchiveFile : public InputFile { size_t getMemberCount() const; size_t getFetchedMemberCount() const { return seen.size(); } + bool parsed = false; + private: std::unique_ptr file; llvm::DenseSet seen; diff --git a/lld/test/ELF/comdat-discarded-lazy.s b/lld/test/ELF/comdat-discarded-lazy.s new file mode 100644 index 00000000000000..8ee15158f6b3ae --- /dev/null +++ b/lld/test/ELF/comdat-discarded-lazy.s @@ -0,0 +1,60 @@ +# REQUIRES: x86 +## Test that lazy symbols in a section group can be demoted to Undefined, +## so that we can report a "discarded section" error. + +# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o +# RUN: echo '.globl f1, foo; f1: call foo; \ +# RUN: .section .text.foo,"axG",@progbits,foo,comdat; foo:' | \ +# RUN: llvm-mc -filetype=obj -triple=x86_64 - -o %t1.o + +## Test the case when the symbol causing a "discarded section" is ordered +## *before* the symbol fetching the lazy object. +## The test relies on the symbol table order of llvm-mc (lexical), which will +## need adjustment if llvm-mc changes its behavior. +# RUN: echo '.globl f2, aa; f2: call aa; \ +# RUN: .section .text.foo,"axG",@progbits,foo,comdat; aa:' | \ +# RUN: llvm-mc -filetype=obj -triple=x86_64 - -o %taa.o +# RUN: llvm-nm -p %taa.o | FileCheck --check-prefix=AA-NM %s +# RUN: not ld.lld %t.o --start-lib %t1.o %taa.o --end-lib -o /dev/null 2>&1 | FileCheck --check-prefix=AA %s +# RUN: rm -f %taa.a && llvm-ar rc %taa.a %taa.o +# RUN: not ld.lld %t.o --start-lib %t1.o --end-lib %taa.a -o /dev/null 2>&1 | FileCheck --check-prefix=AA %s + +# AA-NM: aa +# AA-NM: f2 + +# AA: error: relocation refers to a symbol in a discarded section: aa +# AA-NEXT: >>> defined in {{.*}}aa.o +# AA-NEXT: >>> section group signature: foo +# AA-NEXT: >>> prevailing definition is in {{.*}}1.o +# AA-NEXT: >>> referenced by {{.*}}aa.o:(.text+0x1) + +## Test the case when the symbol causing a "discarded section" is ordered +## *after* the symbol fetching the lazy object. +# RUN: echo '.globl f2, zz; f2: call zz; \ +# RUN: .section .text.foo,"axG",@progbits,foo,comdat; zz:' | \ +# RUN: llvm-mc -filetype=obj -triple=x86_64 - -o %tzz.o +# RUN: llvm-nm -p %tzz.o | FileCheck --check-prefix=ZZ-NM %s +# RUN: not ld.lld %t.o --start-lib %t1.o %tzz.o --end-lib -o /dev/null 2>&1 | FileCheck --check-prefix=ZZ %s +# RUN: rm -f %tzz.a && llvm-ar rc %tzz.a %tzz.o +# RUN: not ld.lld %t.o --start-lib %t1.o --end-lib %tzz.a -o /dev/null 2>&1 | FileCheck --check-prefix=ZZ %s + +# ZZ-NM: f2 +# ZZ-NM: zz + +# ZZ: error: relocation refers to a symbol in a discarded section: zz +# ZZ-NEXT: >>> defined in {{.*}}zz.o +# ZZ-NEXT: >>> section group signature: foo +# ZZ-NEXT: >>> prevailing definition is in {{.*}}1.o +# ZZ-NEXT: >>> referenced by {{.*}}zz.o:(.text+0x1) + +## Don't error if the symbol which would cause "discarded section" +## was inserted before %tzz.o +# RUN: echo '.globl zz; zz:' | llvm-mc -filetype=obj -triple=x86_64 - -o %tdef.o +# RUN: ld.lld %t.o --start-lib %t1.o %tdef.o %tzz.o --end-lib -o /dev/null +# RUN: rm -f %tdef.a && llvm-ar rc %tdef.a %tdef.o +# RUN: ld.lld %t.o --start-lib %t1.o %tdef.a %tzz.o --end-lib -o /dev/null + +.globl _start +_start: + call f1 + call f2 diff --git a/lld/test/ELF/i386-linkonce.s b/lld/test/ELF/i386-linkonce.s index c06b042c7638e4..f7da0aed4af585 100644 --- a/lld/test/ELF/i386-linkonce.s +++ b/lld/test/ELF/i386-linkonce.s @@ -2,7 +2,9 @@ // RUN: llvm-mc -filetype=obj -triple=i386-linux-gnu %s -o %t.o // RUN: llvm-mc -filetype=obj -triple=i386-linux-gnu %p/Inputs/i386-linkonce.s -o %t2.o // RUN: llvm-ar rcs %t2.a %t2.o -// RUN: ld.lld %t.o %t2.a -o %t +// RUN: not ld.lld %t.o %t2.a -o /dev/null 2>&1 | FileCheck %s + +// CHECK: error: relocation refers to a symbol in a discarded section: __i686.get_pc_thunk.bx .globl _start _start: diff --git a/lld/test/ELF/start-lib-comdat.s b/lld/test/ELF/start-lib-comdat.s index 34c9934803f092..996ddb485bab3c 100644 --- a/lld/test/ELF/start-lib-comdat.s +++ b/lld/test/ELF/start-lib-comdat.s @@ -6,7 +6,7 @@ // RUN: ld.lld -shared -o %t3 %t1.o --start-lib %t2.o --end-lib // RUN: llvm-readobj --symbols %t3 | FileCheck %s // RUN: ld.lld -shared -o %t3 --start-lib %t2.o --end-lib %t1.o -// RUN: llvm-readobj --symbols %t3 | FileCheck %s +// RUN: llvm-readobj --symbols %t3 | FileCheck /dev/null --implicit-check-not='Name: zed' // CHECK: Name: zed // CHECK-NEXT: Value: From e7c5412b3731b3b095567e6db85c2989133dd6de Mon Sep 17 00:00:00 2001 From: Anh Tuyen Tran Date: Tue, 9 Jun 2020 18:30:56 +0000 Subject: [PATCH 08/25] [NFC][LV][TEST]: extend pr45679-fold-tail-by-masking.ll with -force-vector-width=1 -force-vector-interleave=4 Summary: Add -force-vector-width=1 -force-vector-interleave=4 to pr45679-fold-tail-by-masking.ll Author: anhtuyen (Anh Tuyen Tran) Reviewers: Ayal (Ayal Zaks) Reviewed By: Ayal (Ayal Zaks) Subscribers: rkruppe (Hanna Kruppe), llvm-commits, LLVM Tag: LLVM Differential Revision: https://reviews.llvm.org/D80446 --- .../pr45679-fold-tail-by-masking.ll | 58 ++++++++++++++ .../tail-folding-vectorization-factor-1.ll | 78 ------------------- 2 files changed, 58 insertions(+), 78 deletions(-) diff --git a/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll b/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll index 835a6dad32061f..d77abbd39f0d28 100644 --- a/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll +++ b/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -loop-vectorize -force-vector-width=4 -S | FileCheck %s ; RUN: opt < %s -loop-vectorize -force-vector-width=2 -force-vector-interleave=2 -S | FileCheck %s -check-prefix=VF2UF2 +; RUN: opt < %s -loop-vectorize -force-vector-width=1 -force-vector-interleave=4 -S | FileCheck %s -check-prefix=VF1UF4 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" @@ -132,6 +133,63 @@ define void @pr45679(i32* %A) optsize { ; VF2UF2: exit: ; VF2UF2-NEXT: ret void ; +; VF1UF4-LABEL: @pr45679( +; VF1UF4-NEXT: entry: +; VF1UF4-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; VF1UF4: vector.ph: +; VF1UF4-NEXT: br label [[VECTOR_BODY:%.*]] +; VF1UF4: vector.body: +; VF1UF4-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE9:%.*]] ] +; VF1UF4-NEXT: [[INDUCTION:%.*]] = add i32 [[INDEX]], 0 +; VF1UF4-NEXT: [[INDUCTION1:%.*]] = add i32 [[INDEX]], 1 +; VF1UF4-NEXT: [[INDUCTION2:%.*]] = add i32 [[INDEX]], 2 +; VF1UF4-NEXT: [[INDUCTION3:%.*]] = add i32 [[INDEX]], 3 +; VF1UF4-NEXT: [[TMP0:%.*]] = icmp ule i32 [[INDUCTION]], 13 +; VF1UF4-NEXT: [[TMP1:%.*]] = icmp ule i32 [[INDUCTION1]], 13 +; VF1UF4-NEXT: [[TMP2:%.*]] = icmp ule i32 [[INDUCTION2]], 13 +; VF1UF4-NEXT: [[TMP3:%.*]] = icmp ule i32 [[INDUCTION3]], 13 +; VF1UF4-NEXT: br i1 [[TMP0]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] +; VF1UF4: pred.store.if: +; VF1UF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDUCTION]] +; VF1UF4-NEXT: store i32 13, i32* [[TMP4]], align 1 +; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE]] +; VF1UF4: pred.store.continue: +; VF1UF4-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]] +; VF1UF4: pred.store.if4: +; VF1UF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDUCTION1]] +; VF1UF4-NEXT: store i32 13, i32* [[TMP5]], align 1 +; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE5]] +; VF1UF4: pred.store.continue5: +; VF1UF4-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7:%.*]] +; VF1UF4: pred.store.if6: +; VF1UF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDUCTION2]] +; VF1UF4-NEXT: store i32 13, i32* [[TMP6]], align 1 +; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE7]] +; VF1UF4: pred.store.continue7: +; VF1UF4-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9]] +; VF1UF4: pred.store.if8: +; VF1UF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDUCTION3]] +; VF1UF4-NEXT: store i32 13, i32* [[TMP7]], align 1 +; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE9]] +; VF1UF4: pred.store.continue9: +; VF1UF4-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; VF1UF4-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 +; VF1UF4-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]] +; VF1UF4: middle.block: +; VF1UF4-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; VF1UF4: scalar.ph: +; VF1UF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; VF1UF4-NEXT: br label [[LOOP:%.*]] +; VF1UF4: loop: +; VF1UF4-NEXT: [[RIV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[RIVPLUS1:%.*]], [[LOOP]] ] +; VF1UF4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[RIV]] +; VF1UF4-NEXT: store i32 13, i32* [[ARRAYIDX]], align 1 +; VF1UF4-NEXT: [[RIVPLUS1]] = add nuw nsw i32 [[RIV]], 1 +; VF1UF4-NEXT: [[COND:%.*]] = icmp eq i32 [[RIVPLUS1]], 14 +; VF1UF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]] +; VF1UF4: exit: +; VF1UF4-NEXT: ret void +; entry: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll index 2973a4425a5d4f..973d7013837a6e 100644 --- a/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll +++ b/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll @@ -1,7 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -loop-vectorize -force-vector-interleave=4 -pass-remarks='loop-vectorize' -disable-output -S 2>&1 | FileCheck %s --check-prefix=CHECK-REMARKS ; RUN: opt < %s -loop-vectorize -force-vector-interleave=4 -S | FileCheck %s -; RUN: opt < %s -loop-vectorize -force-vector-width=1 -force-vector-interleave=4 -S | FileCheck %s --check-prefix=CHECK-VF1 ; These tests are to check that fold-tail procedure produces correct scalar code when ; loop-vectorization is only unrolling but not vectorizing. @@ -110,80 +109,3 @@ for.body: %cond = icmp eq double* %ptr, %ptr2 br i1 %cond, label %for.cond.cleanup, label %for.body } - -; The following testcase is extended from the test of https://reviews.llvm.org/D80085 -; Similar to two tests above, it is to check that fold-tail procedure produces correct scalar code when -; loop-vectorization is only unrolling but not vectorizing. - -define void @pr45679(i32* %A) optsize { -; CHECK-VF1-LABEL: @pr45679 -; CHECK-VF1-NEXT: entry: -; CHECK-VF1-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] -; CHECK-VF1: vector.ph: -; CHECK-VF1-NEXT: br label [[VECTOR_BODY:%.*]] -; CHECK-VF1: vector.body: -; CHECK-VF1-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE9:%.*]] ] -; CHECK-VF1-NEXT: [[INDUCTION:%.*]] = add i32 [[INDEX]], 0 -; CHECK-VF1-NEXT: [[INDUCTION1:%.*]] = add i32 [[INDEX]], 1 -; CHECK-VF1-NEXT: [[INDUCTION2:%.*]] = add i32 [[INDEX]], 2 -; CHECK-VF1-NEXT: [[INDUCTION3:%.*]] = add i32 [[INDEX]], 3 -; CHECK-VF1-NEXT: [[TMP0:%.*]] = icmp ule i32 [[INDUCTION]], 13 -; CHECK-VF1-NEXT: [[TMP1:%.*]] = icmp ule i32 [[INDUCTION1]], 13 -; CHECK-VF1-NEXT: [[TMP2:%.*]] = icmp ule i32 [[INDUCTION2]], 13 -; CHECK-VF1-NEXT: [[TMP3:%.*]] = icmp ule i32 [[INDUCTION3]], 13 -; CHECK-VF1-NEXT: br i1 [[TMP0]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] -; CHECK-VF1: pred.store.if: -; CHECK-VF1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDUCTION]] -; CHECK-VF1-NEXT: store i32 13, i32* [[TMP4]], align 1 -; CHECK-VF1-NEXT: br label [[PRED_STORE_CONTINUE]] -; CHECK-VF1: pred.store.continue: -; CHECK-VF1-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]] -; CHECK-VF1: pred.store.if4: -; CHECK-VF1-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDUCTION1]] -; CHECK-VF1-NEXT: store i32 13, i32* [[TMP5]], align 1 -; CHECK-VF1-NEXT: br label [[PRED_STORE_CONTINUE5]] -; CHECK-VF1: pred.store.continue5: -; CHECK-VF1-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7:%.*]] -; CHECK-VF1: pred.store.if6: -; CHECK-VF1-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDUCTION2]] -; CHECK-VF1-NEXT: store i32 13, i32* [[TMP6]], align 1 -; CHECK-VF1-NEXT: br label [[PRED_STORE_CONTINUE7]] -; CHECK-VF1: pred.store.continue7: -; CHECK-VF1-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9]] -; CHECK-VF1: pred.store.if8: -; CHECK-VF1-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDUCTION3]] -; CHECK-VF1-NEXT: store i32 13, i32* [[TMP7]], align 1 -; CHECK-VF1-NEXT: br label [[PRED_STORE_CONTINUE9]] -; CHECK-VF1: pred.store.continue9: -; CHECK-VF1-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; CHECK-VF1-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 -; CHECK-VF1-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]] -; CHECK-VF1: middle.block: -; CHECK-VF1-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] -; CHECK-VF1: scalar.ph: -; CHECK-VF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-VF1-NEXT: br label [[LOOP:%.*]] -; CHECK-VF1: loop: -; CHECK-VF1-NEXT: [[RIV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[RIVPLUS1:%.*]], [[LOOP]] ] -; CHECK-VF1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[RIV]] -; CHECK-VF1-NEXT: store i32 13, i32* [[ARRAYIDX]], align 1 -; CHECK-VF1-NEXT: [[RIVPLUS1]] = add nuw nsw i32 [[RIV]], 1 -; CHECK-VF1-NEXT: [[COND:%.*]] = icmp eq i32 [[RIVPLUS1]], 14 -; CHECK-VF1-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]] -; CHECK-VF1: exit: -; CHECK-VF1-NEXT: ret void -; -entry: - br label %loop - -loop: - %riv = phi i32 [ 0, %entry ], [ %rivPlus1, %loop ] - %arrayidx = getelementptr inbounds i32, i32* %A, i32 %riv - store i32 13, i32* %arrayidx, align 1 - %rivPlus1 = add nuw nsw i32 %riv, 1 - %cond = icmp eq i32 %rivPlus1, 14 - br i1 %cond, label %exit, label %loop - -exit: - ret void -} From 7fb40e1569dd66292b647f4501b85517e9247953 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Tue, 9 Jun 2020 14:08:55 -0400 Subject: [PATCH 09/25] [libc++] Fix too stringent availability markup for bad_optional_access The availability markup for bad_optional_access marked it as being added in MacOS 10.14 and aligned releases, however it appears to have been added in Mac OS 10.13 and aligned releases. --- libcxx/include/__config | 8 ++++---- libcxx/utils/libcxx/test/config.py | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/libcxx/include/__config b/libcxx/include/__config index cf596a7872abd5..26fadcff7ced42 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -1392,10 +1392,10 @@ _LIBCPP_FUNC_VIS extern "C" void __sanitizer_annotate_contiguous_container( __attribute__((availability(tvos,strict,introduced=10.0))) \ __attribute__((availability(watchos,strict,introduced=3.0))) # define _LIBCPP_AVAILABILITY_BAD_OPTIONAL_ACCESS \ - __attribute__((availability(macosx,strict,introduced=10.14))) \ - __attribute__((availability(ios,strict,introduced=12.0))) \ - __attribute__((availability(tvos,strict,introduced=12.0))) \ - __attribute__((availability(watchos,strict,introduced=5.0))) + __attribute__((availability(macosx,strict,introduced=10.13))) \ + __attribute__((availability(ios,strict,introduced=11.0))) \ + __attribute__((availability(tvos,strict,introduced=11.0))) \ + __attribute__((availability(watchos,strict,introduced=4.0))) # define _LIBCPP_AVAILABILITY_BAD_VARIANT_ACCESS \ _LIBCPP_AVAILABILITY_BAD_OPTIONAL_ACCESS # define _LIBCPP_AVAILABILITY_BAD_ANY_CAST \ diff --git a/libcxx/utils/libcxx/test/config.py b/libcxx/utils/libcxx/test/config.py index 22ec6c457e5694..35dac0df56824d 100644 --- a/libcxx/utils/libcxx/test/config.py +++ b/libcxx/utils/libcxx/test/config.py @@ -857,8 +857,8 @@ def configure_deployment(self): self.config.available_features.add('dylib-has-no-shared_mutex') self.lit_config.note("shared_mutex is not supported by the deployment target") # Throwing bad_optional_access, bad_variant_access and bad_any_cast is - # supported starting in macosx10.14. - if name == 'macosx' and version in ('10.%s' % v for v in range(9, 14)): + # supported starting in macosx10.13. + if name == 'macosx' and version in ('10.%s' % v for v in range(9, 13)): self.config.available_features.add('dylib-has-no-bad_optional_access') self.lit_config.note("throwing bad_optional_access is not supported by the deployment target") From ce5fecb7d0a12c27763afe3f89d1d7e8a1893dc0 Mon Sep 17 00:00:00 2001 From: Tridacnid Date: Tue, 9 Jun 2020 19:43:48 +0100 Subject: [PATCH 10/25] Assignment and Inc/Dec operators wouldn't register as a mutation when Implicit Paren Casts were present Add ignoringParenImpCasts to assignment and inc/dec mutation checks in ExprMutationAnalyzer to fix clang-tidy bug PR45490. https://bugs.llvm.org/show_bug.cgi?id=45490 Reviewed By: njames93, aaron.ballman, gribozavr2 Differential Revision: https://reviews.llvm.org/D79912 --- .../checkers/bugprone-infinite-loop.cpp | 23 +++++++++++++++++++ clang/lib/Analysis/ExprMutationAnalyzer.cpp | 9 ++++---- .../Analysis/ExprMutationAnalyzerTest.cpp | 23 ++++++++++++++----- 3 files changed, 45 insertions(+), 10 deletions(-) diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone-infinite-loop.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone-infinite-loop.cpp index 427b5f0272b94a..8bd4df7cd84450 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone-infinite-loop.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone-infinite-loop.cpp @@ -70,11 +70,25 @@ void simple_not_infinite1() { i++; } + while ((Limit)--) { + // Not an error since 'Limit' is updated. + i++; + } + + while ((Limit) -= 1) { + // Not an error since 'Limit' is updated. + } + while (int k = Limit) { // Not an error since 'Limit' is updated. Limit--; } + while (int k = Limit) { + // Not an error since 'Limit' is updated + (Limit)--; + } + while (int k = Limit--) { // Not an error since 'Limit' is updated. i++; @@ -86,6 +100,15 @@ void simple_not_infinite1() { for (i = 0; i < Limit; Limit--) { } + + for (i = 0; i < Limit; (Limit) = Limit - 1) { + } + + for (i = 0; i < Limit; (Limit) -= 1) { + } + + for (i = 0; i < Limit; --(Limit)) { + } } void simple_not_infinite2() { diff --git a/clang/lib/Analysis/ExprMutationAnalyzer.cpp b/clang/lib/Analysis/ExprMutationAnalyzer.cpp index cb5cabfd3089c2..2f80285f17b4da 100644 --- a/clang/lib/Analysis/ExprMutationAnalyzer.cpp +++ b/clang/lib/Analysis/ExprMutationAnalyzer.cpp @@ -201,14 +201,15 @@ const Stmt *ExprMutationAnalyzer::findDeclPointeeMutation( const Stmt *ExprMutationAnalyzer::findDirectMutation(const Expr *Exp) { // LHS of any assignment operators. - const auto AsAssignmentLhs = - binaryOperator(isAssignmentOperator(), - hasLHS(maybeEvalCommaExpr(equalsNode(Exp)))); + const auto AsAssignmentLhs = binaryOperator( + isAssignmentOperator(), + hasLHS(maybeEvalCommaExpr(ignoringParenImpCasts(equalsNode(Exp))))); // Operand of increment/decrement operators. const auto AsIncDecOperand = unaryOperator(anyOf(hasOperatorName("++"), hasOperatorName("--")), - hasUnaryOperand(maybeEvalCommaExpr(equalsNode(Exp)))); + hasUnaryOperand(maybeEvalCommaExpr( + ignoringParenImpCasts(equalsNode(Exp))))); // Invoking non-const member function. // A member function is assumed to be non-const when it is unresolved. diff --git a/clang/unittests/Analysis/ExprMutationAnalyzerTest.cpp b/clang/unittests/Analysis/ExprMutationAnalyzerTest.cpp index 9b0a3dbda81e41..9d26eeb6af7347 100644 --- a/clang/unittests/Analysis/ExprMutationAnalyzerTest.cpp +++ b/clang/unittests/Analysis/ExprMutationAnalyzerTest.cpp @@ -112,11 +112,21 @@ TEST(ExprMutationAnalyzerTest, Trivial) { class AssignmentTest : public ::testing::TestWithParam {}; TEST_P(AssignmentTest, AssignmentModifies) { - const std::string ModExpr = "x " + GetParam() + " 10"; - const auto AST = buildASTFromCode("void f() { int x; " + ModExpr + "; }"); - const auto Results = - match(withEnclosingCompound(declRefTo("x")), AST->getASTContext()); - EXPECT_THAT(mutatedBy(Results, AST.get()), ElementsAre(ModExpr)); + { + const std::string ModExpr = "x " + GetParam() + " 10"; + const auto AST = buildASTFromCode("void f() { int x; " + ModExpr + "; }"); + const auto Results = + match(withEnclosingCompound(declRefTo("x")), AST->getASTContext()); + EXPECT_THAT(mutatedBy(Results, AST.get()), ElementsAre(ModExpr)); + } + + { + const std::string ModExpr = "(x) " + GetParam() + " 10"; + const auto AST = buildASTFromCode("void f() { int x; " + ModExpr + "; }"); + const auto Results = + match(withEnclosingCompound(declRefTo("x")), AST->getASTContext()); + EXPECT_THAT(mutatedBy(Results, AST.get()), ElementsAre(ModExpr)); + } } INSTANTIATE_TEST_CASE_P(AllAssignmentOperators, AssignmentTest, @@ -134,7 +144,8 @@ TEST_P(IncDecTest, IncDecModifies) { } INSTANTIATE_TEST_CASE_P(AllIncDecOperators, IncDecTest, - Values("++x", "--x", "x++", "x--"), ); + Values("++x", "--x", "x++", "x--", "++(x)", "--(x)", + "(x)++", "(x)--"), ); TEST(ExprMutationAnalyzerTest, NonConstMemberFunc) { const auto AST = buildASTFromCode( From d9dec697cbb7f825aa1b8e6336027675a01a0823 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Tue, 16 Apr 2019 17:12:54 -0400 Subject: [PATCH 11/25] [libc++][CMake] Add CMake caches for commonly supported configurations This commit adds CMake caches for the various configurations of libc++ that are tested by our build bots. Differential Revision: https://reviews.llvm.org/D81293 --- libcxx/cmake/caches/Generic-32bits.cmake | 1 + libcxx/cmake/caches/Generic-asan.cmake | 1 + libcxx/cmake/caches/Generic-cxx03.cmake | 1 + libcxx/cmake/caches/Generic-cxx11.cmake | 1 + libcxx/cmake/caches/Generic-cxx14.cmake | 1 + libcxx/cmake/caches/Generic-cxx17.cmake | 1 + libcxx/cmake/caches/Generic-cxx2a.cmake | 1 + libcxx/cmake/caches/Generic-msan.cmake | 1 + libcxx/cmake/caches/Generic-noexceptions.cmake | 2 ++ libcxx/cmake/caches/Generic-singlethreaded.cmake | 3 +++ libcxx/cmake/caches/Generic-tsan.cmake | 1 + libcxx/cmake/caches/Generic-ubsan.cmake | 2 ++ libcxx/cmake/caches/README.md | 13 +++++++++++++ 13 files changed, 29 insertions(+) create mode 100644 libcxx/cmake/caches/Generic-32bits.cmake create mode 100644 libcxx/cmake/caches/Generic-asan.cmake create mode 100644 libcxx/cmake/caches/Generic-cxx03.cmake create mode 100644 libcxx/cmake/caches/Generic-cxx11.cmake create mode 100644 libcxx/cmake/caches/Generic-cxx14.cmake create mode 100644 libcxx/cmake/caches/Generic-cxx17.cmake create mode 100644 libcxx/cmake/caches/Generic-cxx2a.cmake create mode 100644 libcxx/cmake/caches/Generic-msan.cmake create mode 100644 libcxx/cmake/caches/Generic-noexceptions.cmake create mode 100644 libcxx/cmake/caches/Generic-singlethreaded.cmake create mode 100644 libcxx/cmake/caches/Generic-tsan.cmake create mode 100644 libcxx/cmake/caches/Generic-ubsan.cmake create mode 100644 libcxx/cmake/caches/README.md diff --git a/libcxx/cmake/caches/Generic-32bits.cmake b/libcxx/cmake/caches/Generic-32bits.cmake new file mode 100644 index 00000000000000..ae7b2ac3e8d83a --- /dev/null +++ b/libcxx/cmake/caches/Generic-32bits.cmake @@ -0,0 +1 @@ +set(LLVM_BUILD_32_BITS ON CACHE BOOL "") diff --git a/libcxx/cmake/caches/Generic-asan.cmake b/libcxx/cmake/caches/Generic-asan.cmake new file mode 100644 index 00000000000000..cf919765c3a29c --- /dev/null +++ b/libcxx/cmake/caches/Generic-asan.cmake @@ -0,0 +1 @@ +set(LLVM_USE_SANITIZER "Address" CACHE STRING "") diff --git a/libcxx/cmake/caches/Generic-cxx03.cmake b/libcxx/cmake/caches/Generic-cxx03.cmake new file mode 100644 index 00000000000000..d1d67d86d74a9c --- /dev/null +++ b/libcxx/cmake/caches/Generic-cxx03.cmake @@ -0,0 +1 @@ +set(LLVM_LIT_ARGS "--param std=c++03" CACHE STRING "") diff --git a/libcxx/cmake/caches/Generic-cxx11.cmake b/libcxx/cmake/caches/Generic-cxx11.cmake new file mode 100644 index 00000000000000..e203c6aeaf29fc --- /dev/null +++ b/libcxx/cmake/caches/Generic-cxx11.cmake @@ -0,0 +1 @@ +set(LLVM_LIT_ARGS "--param std=c++11" CACHE STRING "") diff --git a/libcxx/cmake/caches/Generic-cxx14.cmake b/libcxx/cmake/caches/Generic-cxx14.cmake new file mode 100644 index 00000000000000..b1bf1244b510ab --- /dev/null +++ b/libcxx/cmake/caches/Generic-cxx14.cmake @@ -0,0 +1 @@ +set(LLVM_LIT_ARGS "--param std=c++14" CACHE STRING "") diff --git a/libcxx/cmake/caches/Generic-cxx17.cmake b/libcxx/cmake/caches/Generic-cxx17.cmake new file mode 100644 index 00000000000000..b23204729ced20 --- /dev/null +++ b/libcxx/cmake/caches/Generic-cxx17.cmake @@ -0,0 +1 @@ +set(LLVM_LIT_ARGS "--param std=c++17" CACHE STRING "") diff --git a/libcxx/cmake/caches/Generic-cxx2a.cmake b/libcxx/cmake/caches/Generic-cxx2a.cmake new file mode 100644 index 00000000000000..31f1b76ab91f99 --- /dev/null +++ b/libcxx/cmake/caches/Generic-cxx2a.cmake @@ -0,0 +1 @@ +set(LLVM_LIT_ARGS "--param std=c++2a" CACHE STRING "") diff --git a/libcxx/cmake/caches/Generic-msan.cmake b/libcxx/cmake/caches/Generic-msan.cmake new file mode 100644 index 00000000000000..7c948f51642dd4 --- /dev/null +++ b/libcxx/cmake/caches/Generic-msan.cmake @@ -0,0 +1 @@ +set(LLVM_USE_SANITIZER "MemoryWithOrigins" CACHE STRING "") diff --git a/libcxx/cmake/caches/Generic-noexceptions.cmake b/libcxx/cmake/caches/Generic-noexceptions.cmake new file mode 100644 index 00000000000000..f0dffef60dba08 --- /dev/null +++ b/libcxx/cmake/caches/Generic-noexceptions.cmake @@ -0,0 +1,2 @@ +set(LIBCXX_ENABLE_EXCEPTIONS OFF CACHE BOOL "") +set(LIBCXXABI_ENABLE_EXCEPTIONS OFF CACHE BOOL "") diff --git a/libcxx/cmake/caches/Generic-singlethreaded.cmake b/libcxx/cmake/caches/Generic-singlethreaded.cmake new file mode 100644 index 00000000000000..616baef1be7bef --- /dev/null +++ b/libcxx/cmake/caches/Generic-singlethreaded.cmake @@ -0,0 +1,3 @@ +set(LIBCXX_ENABLE_THREADS OFF CACHE BOOL "") +set(LIBCXXABI_ENABLE_THREADS OFF CACHE BOOL "") +set(LIBCXX_ENABLE_MONOTONIC_CLOCK OFF CACHE BOOL "") diff --git a/libcxx/cmake/caches/Generic-tsan.cmake b/libcxx/cmake/caches/Generic-tsan.cmake new file mode 100644 index 00000000000000..a4b599e3e5094b --- /dev/null +++ b/libcxx/cmake/caches/Generic-tsan.cmake @@ -0,0 +1 @@ +set(LLVM_USE_SANITIZER "Thread" CACHE STRING "") diff --git a/libcxx/cmake/caches/Generic-ubsan.cmake b/libcxx/cmake/caches/Generic-ubsan.cmake new file mode 100644 index 00000000000000..7ad891e4aed9e8 --- /dev/null +++ b/libcxx/cmake/caches/Generic-ubsan.cmake @@ -0,0 +1,2 @@ +set(LLVM_USE_SANITIZER "Undefined" CACHE STRING "") +set(LIBCXX_ABI_UNSTABLE ON CACHE BOOL "") diff --git a/libcxx/cmake/caches/README.md b/libcxx/cmake/caches/README.md new file mode 100644 index 00000000000000..60837ee2940177 --- /dev/null +++ b/libcxx/cmake/caches/README.md @@ -0,0 +1,13 @@ +# libc++ / libc++abi configuration caches + +This directory contains CMake caches for the supported configurations of libc++. +Some of the configurations are specific to a vendor, others are generic and not +tied to any vendor. + +While we won't explicitly work to break configurations not listed here, any +configuration not listed here is not explicitly supported. If you use or ship +libc++ under a configuration not listed here, you should work with the libc++ +maintainers to make it into a supported configuration and add it here. + +Similarly, adding any new configuration that's not already covered must be +discussed with the libc++ maintainers as it entails a maintenance burden. From d31c9e5a46ee692daf2430b52626afcea1db18ab Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Fri, 27 Mar 2020 23:58:06 +0000 Subject: [PATCH 12/25] Change filecheck default to dump input on failure Having the input dumped on failure seems like a better default: I debugged FileCheck tests for a while without knowing about this option, which really helps to understand failures. Remove `-dump-input-on-failure` and the environment variable FILECHECK_DUMP_INPUT_ON_FAILURE which are now obsolete. Differential Revision: https://reviews.llvm.org/D81422 --- clang/test/CodeGenObjC/externally-retained.m | 4 +- clang/test/Driver/rocm-device-libs.cl | 34 ++++++------- compiler-rt/test/fuzzer/fork.test | 2 +- .../llvm-prettyprinters/gdb/llvm-support.gdb | 2 +- llvm/docs/CommandGuide/FileCheck.rst | 9 +--- .../AArch64/speculation-hardening-dagisel.ll | 4 +- .../AArch64/speculation-hardening-loads.ll | 2 +- .../CodeGen/AArch64/speculation-hardening.ll | 12 ++--- .../CodeGen/AArch64/speculation-hardening.mir | 2 +- llvm/test/FileCheck/comment/after-words.txt | 2 +- .../test/FileCheck/comment/blank-comments.txt | 2 +- llvm/test/FileCheck/comment/suffixes.txt | 4 +- .../FileCheck/comment/suppresses-checks.txt | 6 +-- .../comment/unused-comment-prefixes.txt | 4 +- llvm/test/FileCheck/dump-input-enable.txt | 48 +++---------------- llvm/test/FileCheck/envvar-opts.txt | 6 +-- llvm/test/FileCheck/lit.local.cfg | 4 +- llvm/test/FileCheck/match-full-lines.txt | 4 +- llvm/test/FileCheck/verbose.txt | 6 +-- .../Transforms/InstCombine/fortify-folding.ll | 2 +- llvm/utils/FileCheck/FileCheck.cpp | 11 +---- llvm/utils/lit/lit/TestingConfig.py | 5 +- llvm/utils/lit/tests/lit.cfg | 2 +- mlir/test/Analysis/test-callgraph.mlir | 2 +- mlir/test/Analysis/test-dominance.mlir | 2 +- mlir/test/Analysis/test-liveness.mlir | 2 +- .../Conversion/GPUToNVVM/gpu-to-nvvm.mlir | 2 +- .../Conversion/GPUToROCDL/gpu-to-rocdl.mlir | 2 +- .../SCFToGPU/no_blocks_no_threads.mlir | 4 +- .../Conversion/SCFToGPU/parallel_loop.mlir | 2 +- .../ShapeToStandard/shape-to-standard.mlir | 2 +- mlir/test/Dialect/GPU/outlining.mlir | 2 +- mlir/test/Dialect/Linalg/fusion-tensor.mlir | 2 +- mlir/test/Dialect/Linalg/fusion.mlir | 2 +- .../Linalg/fusion_indexed_generic.mlir | 2 +- mlir/test/Dialect/Linalg/parallel_loops.mlir | 2 +- .../Dialect/Linalg/tensors-to-buffers.mlir | 2 +- .../Dialect/Linalg/tile_conv_padding.mlir | 4 +- mlir/test/Dialect/Linalg/tile_parallel.mlir | 8 ++-- mlir/test/Dialect/SCF/ops.mlir | 6 +-- .../Dialect/SCF/parallel-loop-fusion.mlir | 2 +- .../SCF/parallel-loop-specialization.mlir | 2 +- .../Dialect/SCF/parallel-loop-tiling.mlir | 2 +- mlir/test/Dialect/Shape/ops.mlir | 6 +-- mlir/test/Dialect/Shape/shape-to-shape.mlir | 2 +- mlir/test/Dialect/Standard/expand-atomic.mlir | 2 +- .../Vector/vector-contract-transforms.mlir | 6 +-- .../Vector/vector-flat-transforms.mlir | 2 +- mlir/test/EDSC/builder-api-test.cpp | 2 +- mlir/test/IR/print-op-local-scope.mlir | 2 +- ...nt-preparation-allowed-memref-results.mlir | 2 +- .../buffer-placement-preparation.mlir | 2 +- mlir/test/Transforms/buffer-placement.mlir | 2 +- mlir/test/Transforms/canonicalize.mlir | 2 +- mlir/test/Transforms/sccp-callgraph.mlir | 4 +- mlir/test/mlir-tblgen/op-attribute.td | 6 +-- mlir/test/mlir-tblgen/op-decl.td | 2 +- .../mlir-tblgen/op-derived-attribute.mlir | 2 +- mlir/test/mlir-tblgen/op-format-spec.td | 2 +- mlir/test/mlir-tblgen/op-interface.td | 4 +- mlir/test/mlir-tblgen/pattern.mlir | 2 +- mlir/test/mlir-tblgen/predicate.td | 2 +- mlir/test/mlir-tblgen/return-types.mlir | 2 +- 63 files changed, 119 insertions(+), 168 deletions(-) diff --git a/clang/test/CodeGenObjC/externally-retained.m b/clang/test/CodeGenObjC/externally-retained.m index f68696879768fc..b842b8c4c68ce8 100644 --- a/clang/test/CodeGenObjC/externally-retained.m +++ b/clang/test/CodeGenObjC/externally-retained.m @@ -1,5 +1,5 @@ -// RUN: %clang_cc1 -triple x86_64-apple-macosx10.13.0 -fobjc-arc -fblocks -Wno-objc-root-class -O0 %s -S -emit-llvm -o - | FileCheck %s --dump-input-on-failure -// RUN: %clang_cc1 -triple x86_64-apple-macosx10.13.0 -fobjc-arc -fblocks -Wno-objc-root-class -O0 -xobjective-c++ -std=c++11 %s -S -emit-llvm -o - | FileCheck %s --check-prefix CHECKXX --dump-input-on-failure +// RUN: %clang_cc1 -triple x86_64-apple-macosx10.13.0 -fobjc-arc -fblocks -Wno-objc-root-class -O0 %s -S -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-apple-macosx10.13.0 -fobjc-arc -fblocks -Wno-objc-root-class -O0 -xobjective-c++ -std=c++11 %s -S -emit-llvm -o - | FileCheck %s --check-prefix CHECKXX #define EXT_RET __attribute__((objc_externally_retained)) diff --git a/clang/test/Driver/rocm-device-libs.cl b/clang/test/Driver/rocm-device-libs.cl index cdb4716bde9a83..7f45116d363065 100644 --- a/clang/test/Driver/rocm-device-libs.cl +++ b/clang/test/Driver/rocm-device-libs.cl @@ -8,7 +8,7 @@ // RUN: -x cl -mcpu=gfx900 \ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %s \ -// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-DEFAULT,GFX900-DEFAULT,GFX900,WAVE64 %s +// RUN: 2>&1 | FileCheck --check-prefixes=COMMON,COMMON-DEFAULT,GFX900-DEFAULT,GFX900,WAVE64 %s @@ -17,7 +17,7 @@ // RUN: -x cl -mcpu=gfx803 \ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %s \ -// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-DEFAULT,GFX803-DEFAULT,GFX803,WAVE64 %s +// RUN: 2>&1 | FileCheck --check-prefixes=COMMON,COMMON-DEFAULT,GFX803-DEFAULT,GFX803,WAVE64 %s @@ -26,7 +26,7 @@ // RUN: -x cl -mcpu=fiji \ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %s \ -// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-DEFAULT,GFX803-DEFAULT,GFX803,WAVE64 %s +// RUN: 2>&1 | FileCheck --check-prefixes=COMMON,COMMON-DEFAULT,GFX803-DEFAULT,GFX803,WAVE64 %s @@ -35,7 +35,7 @@ // RUN: -cl-denorms-are-zero \ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %s \ -// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-DAZ,GFX900,WAVE64 %s +// RUN: 2>&1 | FileCheck --check-prefixes=COMMON,COMMON-DAZ,GFX900,WAVE64 %s // RUN: %clang -### -target amdgcn-amd-amdhsa \ @@ -43,7 +43,7 @@ // RUN: -cl-denorms-are-zero \ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %s \ -// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-DAZ,GFX803,WAVE64 %s +// RUN: 2>&1 | FileCheck --check-prefixes=COMMON,COMMON-DAZ,GFX803,WAVE64 %s @@ -52,7 +52,7 @@ // RUN: -cl-finite-math-only \ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %s \ -// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-FINITE-ONLY,GFX803,WAVE64 %s +// RUN: 2>&1 | FileCheck --check-prefixes=COMMON,COMMON-FINITE-ONLY,GFX803,WAVE64 %s @@ -61,7 +61,7 @@ // RUN: -cl-fp32-correctly-rounded-divide-sqrt \ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %s \ -// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-CORRECT-SQRT,GFX803,WAVE64 %s +// RUN: 2>&1 | FileCheck --check-prefixes=COMMON,COMMON-CORRECT-SQRT,GFX803,WAVE64 %s @@ -70,7 +70,7 @@ // RUN: -cl-fast-relaxed-math \ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %s \ -// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-FAST-RELAXED,GFX803,WAVE64 %s +// RUN: 2>&1 | FileCheck --check-prefixes=COMMON,COMMON-FAST-RELAXED,GFX803,WAVE64 %s @@ -79,45 +79,45 @@ // RUN: -cl-unsafe-math-optimizations \ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %s \ -// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-UNSAFE,GFX803,WAVE64 %s +// RUN: 2>&1 | FileCheck --check-prefixes=COMMON,COMMON-UNSAFE,GFX803,WAVE64 %s // RUN: %clang -### -target amdgcn-amd-amdhsa \ // RUN: -x cl -mcpu=gfx1010 \ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %s \ -// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMMON,GFX1010,WAVE32 %s +// RUN: 2>&1 | FileCheck --check-prefixes=COMMMON,GFX1010,WAVE32 %s // RUN: %clang -### -target amdgcn-amd-amdhsa \ // RUN: -x cl -mcpu=gfx1011 \ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %s \ -// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMMON,GFX1011,WAVE32 %s +// RUN: 2>&1 | FileCheck --check-prefixes=COMMMON,GFX1011,WAVE32 %s // RUN: %clang -### -target amdgcn-amd-amdhsa \ // RUN: -x cl -mcpu=gfx1012 \ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %s \ -// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMMON,GFX1012,WAVE32 %s +// RUN: 2>&1 | FileCheck --check-prefixes=COMMMON,GFX1012,WAVE32 %s // RUN: %clang -### -target amdgcn-amd-amdhsa \ // RUN: -x cl -mcpu=gfx1010 -mwavefrontsize64 \ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %s \ -// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMMON,GFX1010,WAVE64 %s +// RUN: 2>&1 | FileCheck --check-prefixes=COMMMON,GFX1010,WAVE64 %s // RUN: %clang -### -target amdgcn-amd-amdhsa \ // RUN: -x cl -mcpu=gfx1010 -mwavefrontsize64 -mno-wavefrontsize64 \ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %s \ -// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMMON,GFX1010,WAVE32 %s +// RUN: 2>&1 | FileCheck --check-prefixes=COMMMON,GFX1010,WAVE32 %s // Ignore -mno-wavefrontsize64 without wave32 support // RUN: %clang -### -target amdgcn-amd-amdhsa \ // RUN: -x cl -mcpu=gfx803 -mno-wavefrontsize64 \ // RUN: --rocm-path=%S/Inputs/rocm \ // RUN: %s \ -// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMMON,GFX803,WAVE64 %s +// RUN: 2>&1 | FileCheck --check-prefixes=COMMMON,GFX803,WAVE64 %s @@ -126,13 +126,13 @@ // RUN: -x cl -mcpu=gfx900 \ // RUN: --hip-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode \ // RUN: %S/opencl.cl \ -// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-DEFAULT,GFX900-DEFAULT,GFX900,WAVE64 %s +// RUN: 2>&1 | FileCheck --check-prefixes=COMMON,COMMON-DEFAULT,GFX900-DEFAULT,GFX900,WAVE64 %s // Test environment variable HIP_DEVICE_LIB_PATH // RUN: env HIP_DEVICE_LIB_PATH=%S/Inputs/rocm/amdgcn/bitcode %clang -### -target amdgcn-amd-amdhsa \ // RUN: -x cl -mcpu=gfx900 \ // RUN: %S/opencl.cl \ -// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-DEFAULT,GFX900-DEFAULT,GFX900,WAVE64 %s +// RUN: 2>&1 | FileCheck --check-prefixes=COMMON,COMMON-DEFAULT,GFX900-DEFAULT,GFX900,WAVE64 %s diff --git a/compiler-rt/test/fuzzer/fork.test b/compiler-rt/test/fuzzer/fork.test index e0f348b2bff1b4..6e76fe7f2b06a5 100644 --- a/compiler-rt/test/fuzzer/fork.test +++ b/compiler-rt/test/fuzzer/fork.test @@ -18,4 +18,4 @@ RUN: not %run %t-ShallowOOMDeepCrash -fork=1 -rss_limit_mb=128 2>&1 | FileCheck MAX_TOTAL_TIME: INFO: fuzzed for {{.*}} seconds, wrapping up soon MAX_TOTAL_TIME: INFO: exiting: {{.*}} time: -RUN: not %run %t-ShallowOOMDeepCrash -fork=1 -rss_limit_mb=128 -ignore_crashes=1 -max_total_time=10 2>&1 | FileCheck %s --dump-input-on-failure --check-prefix=MAX_TOTAL_TIME +RUN: not %run %t-ShallowOOMDeepCrash -fork=1 -rss_limit_mb=128 -ignore_crashes=1 -max_total_time=10 2>&1 | FileCheck %s --check-prefix=MAX_TOTAL_TIME diff --git a/debuginfo-tests/llvm-prettyprinters/gdb/llvm-support.gdb b/debuginfo-tests/llvm-prettyprinters/gdb/llvm-support.gdb index cdd3388d6d2e23..6ae1c7016b680e 100644 --- a/debuginfo-tests/llvm-prettyprinters/gdb/llvm-support.gdb +++ b/debuginfo-tests/llvm-prettyprinters/gdb/llvm-support.gdb @@ -1,4 +1,4 @@ -# RUN: gdb -q -batch -n -iex 'source %llvm_src_root/utils/gdb-scripts/prettyprinters.py' -x %s %llvm_tools_dir/check-gdb-llvm-support | FileCheck %s --dump-input-on-failure +# RUN: gdb -q -batch -n -iex 'source %llvm_src_root/utils/gdb-scripts/prettyprinters.py' -x %s %llvm_tools_dir/check-gdb-llvm-support | FileCheck %s # REQUIRES: debug-info break main diff --git a/llvm/docs/CommandGuide/FileCheck.rst b/llvm/docs/CommandGuide/FileCheck.rst index 0512133f2e995c..b2e3dfcf01ad64 100644 --- a/llvm/docs/CommandGuide/FileCheck.rst +++ b/llvm/docs/CommandGuide/FileCheck.rst @@ -106,13 +106,8 @@ and from the command line. .. option:: --dump-input Dump input to stderr, adding annotations representing currently enabled - diagnostics. Do this either 'always', on 'fail', or 'never'. Specify 'help' - to explain the dump format and quit. - -.. option:: --dump-input-on-failure - - When the check fails, dump all of the original input. This option is - deprecated in favor of `--dump-input=fail`. + diagnostics. Do this either 'always', on 'fail' (default), or 'never'. + Specify 'help' to explain the dump format and quit. .. option:: --enable-var-scope diff --git a/llvm/test/CodeGen/AArch64/speculation-hardening-dagisel.ll b/llvm/test/CodeGen/AArch64/speculation-hardening-dagisel.ll index 72f3170fb09c89..0f16235d7c69e0 100644 --- a/llvm/test/CodeGen/AArch64/speculation-hardening-dagisel.ll +++ b/llvm/test/CodeGen/AArch64/speculation-hardening-dagisel.ll @@ -1,5 +1,5 @@ -; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,SLH --dump-input-on-failure -; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,NOSLH --dump-input-on-failure +; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,SLH +; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,NOSLH declare i64 @g(i64, i64) local_unnamed_addr define i64 @f_using_reserved_reg_x16(i64 %a, i64 %b) local_unnamed_addr SLHATTR { diff --git a/llvm/test/CodeGen/AArch64/speculation-hardening-loads.ll b/llvm/test/CodeGen/AArch64/speculation-hardening-loads.ll index c5aae051430074..58690052183545 100644 --- a/llvm/test/CodeGen/AArch64/speculation-hardening-loads.ll +++ b/llvm/test/CodeGen/AArch64/speculation-hardening-loads.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --dump-input-on-failure +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s define i128 @ldp_single_csdb(i128* %p) speculative_load_hardening { entry: diff --git a/llvm/test/CodeGen/AArch64/speculation-hardening.ll b/llvm/test/CodeGen/AArch64/speculation-hardening.ll index 23b87563013fd9..d298efa94dc596 100644 --- a/llvm/test/CodeGen/AArch64/speculation-hardening.ll +++ b/llvm/test/CodeGen/AArch64/speculation-hardening.ll @@ -1,9 +1,9 @@ -; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,SLH --dump-input-on-failure -; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,NOSLH --dump-input-on-failure -; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel | FileCheck %s --check-prefixes=CHECK,SLH --dump-input-on-failure -; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel | FileCheck %s --check-prefixes=CHECK,NOSLH --dump-input-on-failure -; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -fast-isel | FileCheck %s --check-prefixes=CHECK,SLH --dump-input-on-failure -; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -fast-isel | FileCheck %s --check-prefixes=CHECK,NOSLH --dump-input-on-failure +; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,SLH +; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,NOSLH +; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel | FileCheck %s --check-prefixes=CHECK,SLH +; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel | FileCheck %s --check-prefixes=CHECK,NOSLH +; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -fast-isel | FileCheck %s --check-prefixes=CHECK,SLH +; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -fast-isel | FileCheck %s --check-prefixes=CHECK,NOSLH define i32 @f(i8* nocapture readonly %p, i32 %i, i32 %N) local_unnamed_addr SLHATTR { ; CHECK-LABEL: f diff --git a/llvm/test/CodeGen/AArch64/speculation-hardening.mir b/llvm/test/CodeGen/AArch64/speculation-hardening.mir index 5991c4df0407f7..0073bedf8ffad2 100644 --- a/llvm/test/CodeGen/AArch64/speculation-hardening.mir +++ b/llvm/test/CodeGen/AArch64/speculation-hardening.mir @@ -1,6 +1,6 @@ # RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu \ # RUN: -start-before aarch64-speculation-hardening -o - %s \ -# RUN: | FileCheck %s --dump-input-on-failure +# RUN: | FileCheck %s # Check that the speculation hardening pass generates code as expected for # basic blocks ending with a variety of branch patterns: diff --git a/llvm/test/FileCheck/comment/after-words.txt b/llvm/test/FileCheck/comment/after-words.txt index 46eeb657f0157d..3650f959be3fa8 100644 --- a/llvm/test/FileCheck/comment/after-words.txt +++ b/llvm/test/FileCheck/comment/after-words.txt @@ -8,7 +8,7 @@ RUN: echo 'FOO-COM: CHECK: foo' > %t.chk RUN: echo 'RUN_COM: CHECK: bar' >> %t.chk RUN: echo 'RUN3COM: CHECK: foo' >> %t.chk RUN: echo ' COMRUN: CHECK: bar' >> %t.chk -RUN: %ProtectFileCheckOutput FileCheck -vv %t.chk < %t.in 2>&1 | FileCheck %s +RUN: %ProtectFileCheckOutput FileCheck -dump-input=never -vv %t.chk < %t.in 2>&1 | FileCheck %s CHECK: .chk:1:17: remark: CHECK: expected string found in input CHECK: .chk:2:17: remark: CHECK: expected string found in input diff --git a/llvm/test/FileCheck/comment/blank-comments.txt b/llvm/test/FileCheck/comment/blank-comments.txt index b035ddd750d09e..1bad6d2daada51 100644 --- a/llvm/test/FileCheck/comment/blank-comments.txt +++ b/llvm/test/FileCheck/comment/blank-comments.txt @@ -4,6 +4,6 @@ RUN: echo 'foo' > %t.in RUN: echo 'COM:' > %t.chk RUN: echo 'CHECK: foo' >> %t.chk RUN: echo ' COM: ' >> %t.chk -RUN: %ProtectFileCheckOutput FileCheck -vv %t.chk < %t.in 2>&1 | FileCheck %s +RUN: %ProtectFileCheckOutput FileCheck -dump-input=never -vv %t.chk < %t.in 2>&1 | FileCheck %s CHECK: .chk:2:8: remark: CHECK: expected string found in input diff --git a/llvm/test/FileCheck/comment/suffixes.txt b/llvm/test/FileCheck/comment/suffixes.txt index 47805b46d0c94e..85b05fb5778cf3 100644 --- a/llvm/test/FileCheck/comment/suffixes.txt +++ b/llvm/test/FileCheck/comment/suffixes.txt @@ -6,7 +6,7 @@ RUN: echo bar >> %t.in RUN: echo 'COM-NEXT: CHECK: foo' > %t.chk RUN: echo 'RUN-NOT: CHECK: bar' >> %t.chk -RUN: %ProtectFileCheckOutput FileCheck -vv %t.chk < %t.in 2>&1 | \ +RUN: %ProtectFileCheckOutput FileCheck -dump-input=never -vv %t.chk < %t.in 2>&1 | \ RUN: FileCheck -check-prefix=CHECK1 %s CHECK1: .chk:1:18: remark: CHECK: expected string found in input @@ -15,7 +15,7 @@ CHECK1: .chk:2:17: remark: CHECK: expected string found in input # But we can define them as comment prefixes. RUN: %ProtectFileCheckOutput \ -RUN: FileCheck -vv -comment-prefixes=COM,RUN,RUN-NOT %t.chk < %t.in 2>&1 | \ +RUN: FileCheck -dump-input=never -vv -comment-prefixes=COM,RUN,RUN-NOT %t.chk < %t.in 2>&1 | \ RUN: FileCheck -check-prefix=CHECK2 %s CHECK2: .chk:1:18: remark: CHECK: expected string found in input diff --git a/llvm/test/FileCheck/comment/suppresses-checks.txt b/llvm/test/FileCheck/comment/suppresses-checks.txt index 98f01811f53f4d..a58a040b5d39a7 100644 --- a/llvm/test/FileCheck/comment/suppresses-checks.txt +++ b/llvm/test/FileCheck/comment/suppresses-checks.txt @@ -7,7 +7,7 @@ RUN: echo 'foo' > %t-1.in RUN: echo 'COM: CHECK: bar' > %t-1.chk RUN: echo 'CHECK: foo' >> %t-1.chk RUN: echo 'RUN: echo "CHECK: baz"' >> %t-1.chk -RUN: %ProtectFileCheckOutput FileCheck -vv %t-1.chk < %t-1.in 2>&1 | \ +RUN: %ProtectFileCheckOutput FileCheck -dump-input=never -vv %t-1.chk < %t-1.in 2>&1 | \ RUN: FileCheck -DCHECK_LINE=2 %s # Check the case of one user-specified comment prefix. @@ -16,7 +16,7 @@ RUN: echo 'foo' > %t-2.in RUN: echo 'CHECK: foo' > %t-2.chk RUN: echo 'letters then space MY-PREFIX: CHECK: bar' >> %t-2.chk RUN: %ProtectFileCheckOutput \ -RUN: FileCheck -vv %t-2.chk -comment-prefixes=MY-PREFIX < %t-2.in 2>&1 | \ +RUN: FileCheck -dump-input=never -vv %t-2.chk -comment-prefixes=MY-PREFIX < %t-2.in 2>&1 | \ RUN: FileCheck -DCHECK_LINE=1 %s # Check the case of multiple user-specified comment prefixes. @@ -26,7 +26,7 @@ RUN: echo 'CHECK: foo' >> %t-3.chk RUN: echo 'Foo_1: CHECK: Foo' >> %t-3.chk RUN: echo 'Baz_3: CHECK: Baz' >> %t-3.chk RUN: %ProtectFileCheckOutput \ -RUN: FileCheck -vv %t-3.chk -comment-prefixes=Foo_1,Bar_2 \ +RUN: FileCheck -dump-input=never -vv %t-3.chk -comment-prefixes=Foo_1,Bar_2 \ RUN: -comment-prefixes=Baz_3 < %t-3.in 2>&1 | \ RUN: FileCheck -DCHECK_LINE=2 %s diff --git a/llvm/test/FileCheck/comment/unused-comment-prefixes.txt b/llvm/test/FileCheck/comment/unused-comment-prefixes.txt index 29212ecb6aadce..5dadc8f3569d5c 100644 --- a/llvm/test/FileCheck/comment/unused-comment-prefixes.txt +++ b/llvm/test/FileCheck/comment/unused-comment-prefixes.txt @@ -5,12 +5,12 @@ RUN: echo 'CHECK: foo' > %t.chk # Check the case of default comment prefixes. RUN: %ProtectFileCheckOutput \ -RUN: FileCheck -vv %t.chk < %t.in 2>&1 | FileCheck %s +RUN: FileCheck -dump-input=never -vv %t.chk < %t.in 2>&1 | FileCheck %s # Specifying non-default comment prefixes doesn't mean you have to use them. # For example, they might be applied to an entire test suite via # FILECHECK_OPTS or via a wrapper command or substitution. RUN: %ProtectFileCheckOutput \ -RUN: FileCheck -vv -comment-prefixes=FOO %t.chk < %t.in 2>&1 | FileCheck %s +RUN: FileCheck -dump-input=never -vv -comment-prefixes=FOO %t.chk < %t.in 2>&1 | FileCheck %s CHECK: .chk:1:8: remark: CHECK: expected string found in input diff --git a/llvm/test/FileCheck/dump-input-enable.txt b/llvm/test/FileCheck/dump-input-enable.txt index cf47f03dfa835b..48a6eef417154e 100644 --- a/llvm/test/FileCheck/dump-input-enable.txt +++ b/llvm/test/FileCheck/dump-input-enable.txt @@ -74,20 +74,22 @@ BADVAL: {{F|f}}ile{{C|c}}heck{{.*}}: for the --dump-input option: Cannot find op ; RUN: | FileCheck %s -match-full-lines -check-prefixes=TRACE,ERR,NODUMP ;-------------------------------------------------- -; Check no -dump-input, which defaults to never. +; Check no -dump-input, which defaults to fail. ;-------------------------------------------------- -; FileCheck success, -v => no dump, trace. +; FileCheck success, -v => no dump, no trace. ; RUN: %ProtectFileCheckOutput \ ; RUN: FileCheck -input-file %t.good %t.check -check-prefix=CHECK \ ; RUN: -match-full-lines -v 2>&1 \ -; RUN: | FileCheck %s -match-full-lines -check-prefixes=TRACE,NODUMP +; RUN: | FileCheck %s -match-full-lines -allow-empty \ +; RUN: -check-prefixes=NOTRACE,NODUMP -; FileCheck fail, -v => no dump, trace. +; FileCheck fail, -v => dump, no trace. ; RUN: %ProtectFileCheckOutput \ ; RUN: not FileCheck -input-file %t.err %t.check -check-prefix=CHECK \ ; RUN: -match-full-lines -v 2>&1 \ -; RUN: | FileCheck %s -match-full-lines -check-prefixes=TRACE,ERR,NODUMP +; RUN: | FileCheck %s -match-full-lines \ +; RUN: -check-prefixes=NOTRACE,ERR,DUMP-ERR,DUMP-ERR-V ;-------------------------------------------------- ; Check -dump-input=fail. @@ -122,42 +124,6 @@ BADVAL: {{F|f}}ile{{C|c}}heck{{.*}}: for the --dump-input option: Cannot find op ; RUN: | FileCheck %s -match-full-lines \ ; RUN: -check-prefixes=NOTRACE,ERR,DUMP-ERR,DUMP-ERR-V -;-------------------------------------------------- -; Check -dump-input-on-failure. -;-------------------------------------------------- - -; Command-line option. - -; FileCheck success, -v => no dump, no trace. -; RUN: %ProtectFileCheckOutput \ -; RUN: FileCheck -input-file %t.good %t.check -check-prefix=CHECK \ -; RUN: -match-full-lines -dump-input-on-failure -v 2>&1 \ -; RUN: | FileCheck %s -match-full-lines -allow-empty \ -; RUN: -check-prefixes=NOTRACE,NODUMP - -; FileCheck fail, -v => dump, no trace. -; RUN: %ProtectFileCheckOutput \ -; RUN: not FileCheck -input-file %t.err %t.check -check-prefix=CHECK \ -; RUN: -match-full-lines -dump-input-on-failure -v 2>&1 \ -; RUN: | FileCheck %s -match-full-lines \ -; RUN: -check-prefixes=NOTRACE,ERR,DUMP-ERR,DUMP-ERR-V - -; FILECHECK_DUMP_INPUT_ON_FAILURE=1. - -; FileCheck success, -v => no dump, no trace. -; RUN: %ProtectFileCheckOutput FILECHECK_DUMP_INPUT_ON_FAILURE=1 \ -; RUN: FileCheck -input-file %t.good %t.check -check-prefix=CHECK \ -; RUN: -match-full-lines -v 2>&1 \ -; RUN: | FileCheck %s -match-full-lines -allow-empty \ -; RUN: -check-prefixes=NOTRACE,NODUMP - -; FileCheck fail, -v => dump, no trace. -; RUN: %ProtectFileCheckOutput FILECHECK_DUMP_INPUT_ON_FAILURE=1 \ -; RUN: not FileCheck -input-file %t.err %t.check -check-prefix=CHECK \ -; RUN: -match-full-lines -v 2>&1 \ -; RUN: | FileCheck %s -match-full-lines \ -; RUN: -check-prefixes=NOTRACE,ERR,DUMP-ERR,DUMP-ERR-V - ;-------------------------------------------------- ; Check -dump-input=always. ;-------------------------------------------------- diff --git a/llvm/test/FileCheck/envvar-opts.txt b/llvm/test/FileCheck/envvar-opts.txt index c1a9b2e1b243e6..da2b9f919a0d89 100644 --- a/llvm/test/FileCheck/envvar-opts.txt +++ b/llvm/test/FileCheck/envvar-opts.txt @@ -4,15 +4,15 @@ ; CHECK: bar ; RUN: %ProtectFileCheckOutput \ -; RUN: not FileCheck %s -input-file %t.in 2>&1 \ +; RUN: not FileCheck %s -dump-input=never -input-file %t.in 2>&1 \ ; RUN: | FileCheck -check-prefix QUIET %s ; RUN: %ProtectFileCheckOutput FILECHECK_OPTS= \ -; RUN: not FileCheck %s -input-file %t.in 2>&1 \ +; RUN: not FileCheck %s -dump-input=never -input-file %t.in 2>&1 \ ; RUN: | FileCheck -check-prefix QUIET %s ; RUN: %ProtectFileCheckOutput FILECHECK_OPTS=-v \ -; RUN: not FileCheck %s -input-file %t.in 2>&1 \ +; RUN: not FileCheck %s -dump-input=never -input-file %t.in 2>&1 \ ; RUN: | FileCheck -check-prefix VERB %s ; QUIET-NOT: remark: {{CHECK}}: expected string found in input diff --git a/llvm/test/FileCheck/lit.local.cfg b/llvm/test/FileCheck/lit.local.cfg index 65aba149e22d7c..9164f683fc1be3 100644 --- a/llvm/test/FileCheck/lit.local.cfg +++ b/llvm/test/FileCheck/lit.local.cfg @@ -39,7 +39,7 @@ config.test_format = lit.formats.ShTest(execute_external=False) # ; FILECHECK_OPTS beforehand. # ; # ; RUN: %ProtectFileCheckOutput FILECHECK_OPTS=-v \ -# ; RUN: FileCheck -input-file %s %s 2>&1 \ +# ; RUN: FileCheck -dump-input=never -input-file %s %s 2>&1 \ # ; RUN: | FileCheck -check-prefix TRACE %s # ; # ; CHECK: {{[0-9]+\.0}} @@ -53,4 +53,4 @@ config.test_format = lit.formats.ShTest(execute_external=False) # status (e.g., FILECHECK_OPTS=-strict-whitespace), he shouldn't be surprised # that test results throughout all test suites are affected. config.substitutions.append(('%ProtectFileCheckOutput', - 'env -u FILECHECK_OPTS -u FILECHECK_DUMP_INPUT_ON_FAILURE')) + 'env -u FILECHECK_OPTS')) diff --git a/llvm/test/FileCheck/match-full-lines.txt b/llvm/test/FileCheck/match-full-lines.txt index 114f628d8bc927..d69ebbc4c5a9b9 100644 --- a/llvm/test/FileCheck/match-full-lines.txt +++ b/llvm/test/FileCheck/match-full-lines.txt @@ -1,8 +1,8 @@ // RUN: %ProtectFileCheckOutput \ -// RUN: not FileCheck -match-full-lines -input-file %s %s 2>&1 \ +// RUN: not FileCheck -match-full-lines -dump-input=never -input-file %s %s 2>&1 \ // RUN: | FileCheck --check-prefix=ERROR --implicit-check-not=error: %s // RUN: %ProtectFileCheckOutput \ -// RUN: not FileCheck -match-full-lines -strict-whitespace -input-file %s %s \ +// RUN: not FileCheck -match-full-lines -strict-whitespace -dump-input=never -input-file %s %s \ // RUN: 2>&1 | FileCheck --check-prefix=ERROR-STRICT --check-prefix=ERROR \ // RUN: --implicit-check-not=error: %s diff --git a/llvm/test/FileCheck/verbose.txt b/llvm/test/FileCheck/verbose.txt index 66c4b1efbe06b5..f852702a9b1f8b 100644 --- a/llvm/test/FileCheck/verbose.txt +++ b/llvm/test/FileCheck/verbose.txt @@ -1,8 +1,8 @@ -; RUN: %ProtectFileCheckOutput FileCheck -input-file %s %s 2>&1 \ +; RUN: %ProtectFileCheckOutput FileCheck -dump-input=never -input-file %s %s 2>&1 \ ; RUN: | FileCheck -check-prefix QUIET --allow-empty %s -; RUN: %ProtectFileCheckOutput FileCheck -v -input-file %s %s 2>&1 \ +; RUN: %ProtectFileCheckOutput FileCheck -dump-input=never -v -input-file %s %s 2>&1 \ ; RUN: | FileCheck --strict-whitespace -check-prefix V %s -; RUN: %ProtectFileCheckOutput FileCheck -vv -input-file %s %s 2>&1 \ +; RUN: %ProtectFileCheckOutput FileCheck -dump-input=never -vv -input-file %s %s 2>&1 \ ; RUN: | FileCheck --strict-whitespace -check-prefixes V,VV %s foo diff --git a/llvm/test/Transforms/InstCombine/fortify-folding.ll b/llvm/test/Transforms/InstCombine/fortify-folding.ll index b2171a44f57ef8..2602640595e65e 100644 --- a/llvm/test/Transforms/InstCombine/fortify-folding.ll +++ b/llvm/test/Transforms/InstCombine/fortify-folding.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -instcombine -S | FileCheck %s --dump-input-on-failure +; RUN: opt < %s -instcombine -S | FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/utils/FileCheck/FileCheck.cpp b/llvm/utils/FileCheck/FileCheck.cpp index 3ee7c5a66a908f..8e41365de81a84 100644 --- a/llvm/utils/FileCheck/FileCheck.cpp +++ b/llvm/utils/FileCheck/FileCheck.cpp @@ -106,15 +106,6 @@ static cl::opt VerboseVerbose( cl::desc("Print information helpful in diagnosing internal FileCheck\n" "issues, or add it to the input dump if enabled. Implies\n" "-v.\n")); -static const char * DumpInputEnv = "FILECHECK_DUMP_INPUT_ON_FAILURE"; - -static cl::opt DumpInputOnFailure( - "dump-input-on-failure", - cl::init(std::getenv(DumpInputEnv) && *std::getenv(DumpInputEnv)), - cl::desc("Dump original input to stderr before failing.\n" - "The value can be also controlled using\n" - "FILECHECK_DUMP_INPUT_ON_FAILURE environment variable.\n" - "This option is deprecated in favor of -dump-input=fail.\n")); // The order of DumpInputValue members affects their precedence, as documented // for -dump-input below. @@ -678,7 +669,7 @@ int main(int argc, char **argv) { SMLoc()); if (DumpInput == DumpInputDefault) - DumpInput = DumpInputOnFailure ? DumpInputFail : DumpInputNever; + DumpInput = DumpInputFail; std::vector Diags; int ExitCode = FC.checkInput(SM, InputFileText, diff --git a/llvm/utils/lit/lit/TestingConfig.py b/llvm/utils/lit/lit/TestingConfig.py index dd2d3f52f89cb1..cfc0dab86e1105 100644 --- a/llvm/utils/lit/lit/TestingConfig.py +++ b/llvm/utils/lit/lit/TestingConfig.py @@ -26,9 +26,8 @@ def fromdefaults(litConfig): 'LSAN_OPTIONS', 'ADB', 'ANDROID_SERIAL', 'SANITIZER_IGNORE_CVE_2016_2143', 'TMPDIR', 'TMP', 'TEMP', 'TEMPDIR', 'AVRLIT_BOARD', 'AVRLIT_PORT', - 'FILECHECK_DUMP_INPUT_ON_FAILURE', 'FILECHECK_OPTS', - 'VCINSTALLDIR', 'VCToolsinstallDir', 'VSINSTALLDIR', - 'WindowsSdkDir', 'WindowsSDKLibVersion'] + 'FILECHECK_OPTS', 'VCINSTALLDIR', 'VCToolsinstallDir', + 'VSINSTALLDIR', 'WindowsSdkDir', 'WindowsSDKLibVersion'] if sys.platform == 'win32': pass_vars.append('INCLUDE') diff --git a/llvm/utils/lit/tests/lit.cfg b/llvm/utils/lit/tests/lit.cfg index ba9cb4da9cfd1f..f5686873ec891d 100644 --- a/llvm/utils/lit/tests/lit.cfg +++ b/llvm/utils/lit/tests/lit.cfg @@ -61,7 +61,7 @@ config.substitutions.append(('%{inputs}', os.path.join( config.test_source_root, 'Inputs'))) config.substitutions.append(('%{lit}', "{env} %{{python}} {lit}".format( - env="env -u FILECHECK_OPTS -u FILECHECK_DUMP_INPUT_ON_FAILURE", + env="env -u FILECHECK_OPTS", lit=os.path.join(lit_path, 'lit.py')))) config.substitutions.append(('%{python}', '"%s"' % (sys.executable))) diff --git a/mlir/test/Analysis/test-callgraph.mlir b/mlir/test/Analysis/test-callgraph.mlir index 8c295ff248e554..bb5ba60742006c 100644 --- a/mlir/test/Analysis/test-callgraph.mlir +++ b/mlir/test/Analysis/test-callgraph.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -test-print-callgraph -split-input-file 2>&1 | FileCheck %s --dump-input-on-failure +// RUN: mlir-opt %s -test-print-callgraph -split-input-file 2>&1 | FileCheck %s // CHECK-LABEL: Testing : "simple" module attributes {test.name = "simple"} { diff --git a/mlir/test/Analysis/test-dominance.mlir b/mlir/test/Analysis/test-dominance.mlir index 6366a49a62e3ef..9430038a538f96 100644 --- a/mlir/test/Analysis/test-dominance.mlir +++ b/mlir/test/Analysis/test-dominance.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -test-print-dominance -split-input-file 2>&1 | FileCheck %s --dump-input-on-failure +// RUN: mlir-opt %s -test-print-dominance -split-input-file 2>&1 | FileCheck %s // CHECK-LABEL: Testing : func_condBranch func @func_condBranch(%cond : i1) { diff --git a/mlir/test/Analysis/test-liveness.mlir b/mlir/test/Analysis/test-liveness.mlir index 9e1329f3609b23..3beb2186afb55f 100644 --- a/mlir/test/Analysis/test-liveness.mlir +++ b/mlir/test/Analysis/test-liveness.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -test-print-liveness -split-input-file 2>&1 | FileCheck %s --dump-input-on-failure +// RUN: mlir-opt %s -test-print-liveness -split-input-file 2>&1 | FileCheck %s // CHECK-LABEL: Testing : func_empty func @func_empty() { diff --git a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir index 925615c0674e7d..20d166bab05d1a 100644 --- a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir +++ b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -convert-gpu-to-nvvm -split-input-file | FileCheck %s --dump-input-on-failure +// RUN: mlir-opt %s -convert-gpu-to-nvvm -split-input-file | FileCheck %s gpu.module @test_module { // CHECK-LABEL: func @gpu_index_ops() diff --git a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir index 4404cebec85319..61becff83c6cfd 100644 --- a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir +++ b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -convert-gpu-to-rocdl -split-input-file | FileCheck %s --dump-input-on-failure +// RUN: mlir-opt %s -convert-gpu-to-rocdl -split-input-file | FileCheck %s gpu.module @test_module { // CHECK-LABEL: func @gpu_index_ops() diff --git a/mlir/test/Conversion/SCFToGPU/no_blocks_no_threads.mlir b/mlir/test/Conversion/SCFToGPU/no_blocks_no_threads.mlir index 44f170bc43bbc9..451fcbe173da0c 100644 --- a/mlir/test/Conversion/SCFToGPU/no_blocks_no_threads.mlir +++ b/mlir/test/Conversion/SCFToGPU/no_blocks_no_threads.mlir @@ -1,5 +1,5 @@ -// RUN: mlir-opt -convert-affine-for-to-gpu="gpu-block-dims=0 gpu-thread-dims=1" %s | FileCheck --check-prefix=CHECK-THREADS %s --dump-input-on-failure -// RUN: mlir-opt -convert-affine-for-to-gpu="gpu-block-dims=1 gpu-thread-dims=0" %s | FileCheck --check-prefix=CHECK-BLOCKS %s --dump-input-on-failure +// RUN: mlir-opt -convert-affine-for-to-gpu="gpu-block-dims=0 gpu-thread-dims=1" %s | FileCheck --check-prefix=CHECK-THREADS %s +// RUN: mlir-opt -convert-affine-for-to-gpu="gpu-block-dims=1 gpu-thread-dims=0" %s | FileCheck --check-prefix=CHECK-BLOCKS %s // CHECK-THREADS-LABEL: @one_d_loop // CHECK-BLOCKS-LABEL: @one_d_loop diff --git a/mlir/test/Conversion/SCFToGPU/parallel_loop.mlir b/mlir/test/Conversion/SCFToGPU/parallel_loop.mlir index 52ed94cae567e7..de19331ce91192 100644 --- a/mlir/test/Conversion/SCFToGPU/parallel_loop.mlir +++ b/mlir/test/Conversion/SCFToGPU/parallel_loop.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -convert-parallel-loops-to-gpu -split-input-file -verify-diagnostics %s | FileCheck %s -dump-input-on-failure +// RUN: mlir-opt -convert-parallel-loops-to-gpu -split-input-file -verify-diagnostics %s | FileCheck %s // 2-d parallel loop mapped to block.y and block.x diff --git a/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir b/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir index fd1f980c536938..7c7098d76afa86 100644 --- a/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir +++ b/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt --split-input-file --convert-shape-to-std --verify-diagnostics %s | FileCheck %s --dump-input-on-failure +// RUN: mlir-opt --split-input-file --convert-shape-to-std --verify-diagnostics %s | FileCheck %s // Convert `size` to `index` type. // CHECK-LABEL: @size_id diff --git a/mlir/test/Dialect/GPU/outlining.mlir b/mlir/test/Dialect/GPU/outlining.mlir index d15f10fd75ecee..51394ab615258c 100644 --- a/mlir/test/Dialect/GPU/outlining.mlir +++ b/mlir/test/Dialect/GPU/outlining.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -allow-unregistered-dialect -gpu-kernel-outlining -split-input-file -verify-diagnostics %s | FileCheck %s -dump-input-on-failure +// RUN: mlir-opt -allow-unregistered-dialect -gpu-kernel-outlining -split-input-file -verify-diagnostics %s | FileCheck %s // CHECK: module attributes {gpu.container_module} diff --git a/mlir/test/Dialect/Linalg/fusion-tensor.mlir b/mlir/test/Dialect/Linalg/fusion-tensor.mlir index 6d6a409edbd2a8..5f1f90707a6ebf 100644 --- a/mlir/test/Dialect/Linalg/fusion-tensor.mlir +++ b/mlir/test/Dialect/Linalg/fusion-tensor.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -linalg-fusion-for-tensor-ops -split-input-file | FileCheck %s --dump-input-on-failure +// RUN: mlir-opt %s -linalg-fusion-for-tensor-ops -split-input-file | FileCheck %s // CHECK-DAG: [[MAP0:#[a-zA-Z0-9_]*]] = affine_map<(d0, d1) -> (d0, d1)> #map0 = affine_map<(d0, d1) -> (d0, d1)> diff --git a/mlir/test/Dialect/Linalg/fusion.mlir b/mlir/test/Dialect/Linalg/fusion.mlir index 2f472aa6aaf2d4..db47e8eea6165c 100644 --- a/mlir/test/Dialect/Linalg/fusion.mlir +++ b/mlir/test/Dialect/Linalg/fusion.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -linalg-fusion -split-input-file | FileCheck %s --dump-input-on-failure +// RUN: mlir-opt %s -linalg-fusion -split-input-file | FileCheck %s func @f1(%A: memref, %B: memref, diff --git a/mlir/test/Dialect/Linalg/fusion_indexed_generic.mlir b/mlir/test/Dialect/Linalg/fusion_indexed_generic.mlir index de16e4b50f33ac..c14db3bed1c4b6 100644 --- a/mlir/test/Dialect/Linalg/fusion_indexed_generic.mlir +++ b/mlir/test/Dialect/Linalg/fusion_indexed_generic.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -linalg-fusion -split-input-file | FileCheck %s --dump-input-on-failure +// RUN: mlir-opt %s -linalg-fusion -split-input-file | FileCheck %s #map = affine_map<(d0, d1)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2)> #id_2d = affine_map<(d0, d1) -> (d0, d1)> diff --git a/mlir/test/Dialect/Linalg/parallel_loops.mlir b/mlir/test/Dialect/Linalg/parallel_loops.mlir index 2174ddc3c269da..597990eac264eb 100644 --- a/mlir/test/Dialect/Linalg/parallel_loops.mlir +++ b/mlir/test/Dialect/Linalg/parallel_loops.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -convert-linalg-to-parallel-loops -split-input-file | FileCheck %s --dump-input-on-failure +// RUN: mlir-opt %s -convert-linalg-to-parallel-loops -split-input-file | FileCheck %s #map0 = affine_map<(d0, d1) -> (d0, d1)> func @linalg_generic_sum(%lhs: memref<2x2xf32>, diff --git a/mlir/test/Dialect/Linalg/tensors-to-buffers.mlir b/mlir/test/Dialect/Linalg/tensors-to-buffers.mlir index ed82c93622dff5..a744d14af74a9e 100644 --- a/mlir/test/Dialect/Linalg/tensors-to-buffers.mlir +++ b/mlir/test/Dialect/Linalg/tensors-to-buffers.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -convert-linalg-on-tensors-to-buffers -buffer-placement -split-input-file %s | FileCheck %s -dump-input-on-failure +// RUN: mlir-opt -convert-linalg-on-tensors-to-buffers -buffer-placement -split-input-file %s | FileCheck %s #map0 = affine_map<(d0) -> (d0)> diff --git a/mlir/test/Dialect/Linalg/tile_conv_padding.mlir b/mlir/test/Dialect/Linalg/tile_conv_padding.mlir index 98cecc3e81e2e8..273f6491315973 100644 --- a/mlir/test/Dialect/Linalg/tile_conv_padding.mlir +++ b/mlir/test/Dialect/Linalg/tile_conv_padding.mlir @@ -1,5 +1,5 @@ -// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,3,0,0,4" | FileCheck %s -check-prefix=TILE-23004 --dump-input-on-failure -// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2" | FileCheck %s -check-prefix=TILE-20000 --dump-input-on-failure +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,3,0,0,4" | FileCheck %s -check-prefix=TILE-23004 +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2" | FileCheck %s -check-prefix=TILE-20000 // TILE-23004-DAG: #[[strided4D:.*]] = affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3)> // TILE-20000-DAG: #[[strided4D:.*]] = affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3)> diff --git a/mlir/test/Dialect/Linalg/tile_parallel.mlir b/mlir/test/Dialect/Linalg/tile_parallel.mlir index 963051b7c7b308..18d9d2016b1d24 100644 --- a/mlir/test/Dialect/Linalg/tile_parallel.mlir +++ b/mlir/test/Dialect/Linalg/tile_parallel.mlir @@ -1,7 +1,7 @@ -// RUN: mlir-opt %s -linalg-tile-to-parallel-loops="linalg-tile-sizes=2" | FileCheck %s -check-prefix=TILE-2 --dump-input-on-failure -// RUN: mlir-opt %s -linalg-tile-to-parallel-loops="linalg-tile-sizes=0,2" | FileCheck %s -check-prefix=TILE-02 --dump-input-on-failure -// RUN: mlir-opt %s -linalg-tile-to-parallel-loops="linalg-tile-sizes=0,0,2" | FileCheck %s -check-prefix=TILE-002 --dump-input-on-failure -// RUN: mlir-opt %s -linalg-tile-to-parallel-loops="linalg-tile-sizes=2,3,4" | FileCheck %s -check-prefix=TILE-234 --dump-input-on-failure +// RUN: mlir-opt %s -linalg-tile-to-parallel-loops="linalg-tile-sizes=2" | FileCheck %s -check-prefix=TILE-2 +// RUN: mlir-opt %s -linalg-tile-to-parallel-loops="linalg-tile-sizes=0,2" | FileCheck %s -check-prefix=TILE-02 +// RUN: mlir-opt %s -linalg-tile-to-parallel-loops="linalg-tile-sizes=0,0,2" | FileCheck %s -check-prefix=TILE-002 +// RUN: mlir-opt %s -linalg-tile-to-parallel-loops="linalg-tile-sizes=2,3,4" | FileCheck %s -check-prefix=TILE-234 #id_2d = affine_map<(i, j) -> (i, j)> #pointwise_2d_trait = { diff --git a/mlir/test/Dialect/SCF/ops.mlir b/mlir/test/Dialect/SCF/ops.mlir index c21451d8cf7fdf..1058983f5fb9a4 100644 --- a/mlir/test/Dialect/SCF/ops.mlir +++ b/mlir/test/Dialect/SCF/ops.mlir @@ -1,8 +1,8 @@ -// RUN: mlir-opt %s | FileCheck %s --dump-input-on-failure +// RUN: mlir-opt %s | FileCheck %s // Verify the printed output can be parsed. -// RUN: mlir-opt %s | mlir-opt | FileCheck %s --dump-input-on-failure +// RUN: mlir-opt %s | mlir-opt | FileCheck %s // Verify the generic form can be parsed. -// RUN: mlir-opt -mlir-print-op-generic %s | mlir-opt | FileCheck %s --dump-input-on-failure +// RUN: mlir-opt -mlir-print-op-generic %s | mlir-opt | FileCheck %s func @std_for(%arg0 : index, %arg1 : index, %arg2 : index) { scf.for %i0 = %arg0 to %arg1 step %arg2 { diff --git a/mlir/test/Dialect/SCF/parallel-loop-fusion.mlir b/mlir/test/Dialect/SCF/parallel-loop-fusion.mlir index 6ed5ad36819e7c..8e6769961c10c6 100644 --- a/mlir/test/Dialect/SCF/parallel-loop-fusion.mlir +++ b/mlir/test/Dialect/SCF/parallel-loop-fusion.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='func(parallel-loop-fusion)' -split-input-file | FileCheck %s --dump-input-on-failure +// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='func(parallel-loop-fusion)' -split-input-file | FileCheck %s func @fuse_empty_loops() { %c2 = constant 2 : index diff --git a/mlir/test/Dialect/SCF/parallel-loop-specialization.mlir b/mlir/test/Dialect/SCF/parallel-loop-specialization.mlir index 5843eb6d4134a8..d7c0f1d3074e3a 100644 --- a/mlir/test/Dialect/SCF/parallel-loop-specialization.mlir +++ b/mlir/test/Dialect/SCF/parallel-loop-specialization.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -parallel-loop-specialization -split-input-file | FileCheck %s --dump-input-on-failure +// RUN: mlir-opt %s -parallel-loop-specialization -split-input-file | FileCheck %s #map0 = affine_map<()[s0, s1] -> (1024, s0 - s1)> #map1 = affine_map<()[s0, s1] -> (64, s0 - s1)> diff --git a/mlir/test/Dialect/SCF/parallel-loop-tiling.mlir b/mlir/test/Dialect/SCF/parallel-loop-tiling.mlir index 7b37830e8c5db9..14912436f96b27 100644 --- a/mlir/test/Dialect/SCF/parallel-loop-tiling.mlir +++ b/mlir/test/Dialect/SCF/parallel-loop-tiling.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -pass-pipeline='func(parallel-loop-tiling{parallel-loop-tile-sizes=1,4})' -split-input-file | FileCheck %s --dump-input-on-failure +// RUN: mlir-opt %s -pass-pipeline='func(parallel-loop-tiling{parallel-loop-tile-sizes=1,4})' -split-input-file | FileCheck %s func @parallel_loop(%arg0 : index, %arg1 : index, %arg2 : index, %arg3 : index, %arg4 : index, %arg5 : index, diff --git a/mlir/test/Dialect/Shape/ops.mlir b/mlir/test/Dialect/Shape/ops.mlir index d25a7f01535e0e..a6668187f078df 100644 --- a/mlir/test/Dialect/Shape/ops.mlir +++ b/mlir/test/Dialect/Shape/ops.mlir @@ -1,8 +1,8 @@ -// RUN: mlir-opt -split-input-file %s | mlir-opt | FileCheck %s --dump-input-on-failure +// RUN: mlir-opt -split-input-file %s | mlir-opt | FileCheck %s // Verify the printed output can be parsed. -// RUN: mlir-opt %s | mlir-opt | FileCheck %s --dump-input-on-failure +// RUN: mlir-opt %s | mlir-opt | FileCheck %s // Verify the generic form can be parsed. -// RUN: mlir-opt -mlir-print-op-generic %s | mlir-opt | FileCheck %s --dump-input-on-failure +// RUN: mlir-opt -mlir-print-op-generic %s | mlir-opt | FileCheck %s // CHECK-LABEL: shape_num_elements func @shape_num_elements(%shape : !shape.shape) -> !shape.size { diff --git a/mlir/test/Dialect/Shape/shape-to-shape.mlir b/mlir/test/Dialect/Shape/shape-to-shape.mlir index d2338cddc5e1c2..b3be4c9de3a1bd 100644 --- a/mlir/test/Dialect/Shape/shape-to-shape.mlir +++ b/mlir/test/Dialect/Shape/shape-to-shape.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -shape-to-shape-lowering -split-input-file %s | FileCheck %s --dump-input-on-failure +// RUN: mlir-opt -shape-to-shape-lowering -split-input-file %s | FileCheck %s // CHECK-LABEL: func @num_elements_to_reduce( // CHECK-SAME: [[ARG:%.*]]: !shape.shape) -> [[SIZE_TY:!.*]] { diff --git a/mlir/test/Dialect/Standard/expand-atomic.mlir b/mlir/test/Dialect/Standard/expand-atomic.mlir index b4e65945f58aeb..2f5cc7c179ed45 100644 --- a/mlir/test/Dialect/Standard/expand-atomic.mlir +++ b/mlir/test/Dialect/Standard/expand-atomic.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -expand-atomic -split-input-file | FileCheck %s --dump-input-on-failure +// RUN: mlir-opt %s -expand-atomic -split-input-file | FileCheck %s // CHECK-LABEL: func @atomic_rmw_to_generic // CHECK-SAME: ([[F:%.*]]: memref<10xf32>, [[f:%.*]]: f32, [[i:%.*]]: index) diff --git a/mlir/test/Dialect/Vector/vector-contract-transforms.mlir b/mlir/test/Dialect/Vector/vector-contract-transforms.mlir index 76d7a9a0e7df0b..da784205224a6e 100644 --- a/mlir/test/Dialect/Vector/vector-contract-transforms.mlir +++ b/mlir/test/Dialect/Vector/vector-contract-transforms.mlir @@ -1,6 +1,6 @@ -// RUN: mlir-opt %s -test-vector-contraction-conversion | FileCheck %s --dump-input-on-failure -// RUN: mlir-opt %s -test-vector-contraction-conversion=vector-lower-matrix-intrinsics=1 | FileCheck %s --check-prefix=MATRIX --dump-input-on-failure -// RUN: mlir-opt %s -test-vector-contraction-conversion=vector-outerproduct=1 | FileCheck %s --check-prefix=OUTERPRODUCT --dump-input-on-failure +// RUN: mlir-opt %s -test-vector-contraction-conversion | FileCheck %s +// RUN: mlir-opt %s -test-vector-contraction-conversion=vector-lower-matrix-intrinsics=1 | FileCheck %s --check-prefix=MATRIX +// RUN: mlir-opt %s -test-vector-contraction-conversion=vector-outerproduct=1 | FileCheck %s --check-prefix=OUTERPRODUCT #dotp_accesses = [ affine_map<(i) -> (i)>, diff --git a/mlir/test/Dialect/Vector/vector-flat-transforms.mlir b/mlir/test/Dialect/Vector/vector-flat-transforms.mlir index e715755738de86..6a1e6ee85a7d47 100644 --- a/mlir/test/Dialect/Vector/vector-flat-transforms.mlir +++ b/mlir/test/Dialect/Vector/vector-flat-transforms.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -test-vector-contraction-conversion=vector-flat-transpose=1 | FileCheck %s --dump-input-on-failure +// RUN: mlir-opt %s -test-vector-contraction-conversion=vector-flat-transpose=1 | FileCheck %s // Tests for lowering 2-D vector.transpose into vector.flat_transpose. // diff --git a/mlir/test/EDSC/builder-api-test.cpp b/mlir/test/EDSC/builder-api-test.cpp index b48fd99c8f7d54..4d0888e55312ab 100644 --- a/mlir/test/EDSC/builder-api-test.cpp +++ b/mlir/test/EDSC/builder-api-test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// RUN: mlir-edsc-builder-api-test | FileCheck %s -dump-input-on-failure +// RUN: mlir-edsc-builder-api-test | FileCheck %s #include "mlir/Dialect/Affine/EDSC/Intrinsics.h" #include "mlir/Dialect/Linalg/EDSC/Builders.h" diff --git a/mlir/test/IR/print-op-local-scope.mlir b/mlir/test/IR/print-op-local-scope.mlir index 93b25fca943ebc..2ff201cf6debbf 100644 --- a/mlir/test/IR/print-op-local-scope.mlir +++ b/mlir/test/IR/print-op-local-scope.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -allow-unregistered-dialect %s -mlir-print-local-scope | FileCheck %s --dump-input-on-failure +// RUN: mlir-opt -allow-unregistered-dialect %s -mlir-print-local-scope | FileCheck %s // CHECK: "foo.op"() : () -> memref (d0 * 2)>> "foo.op"() : () -> (memref (2*d0)>>) diff --git a/mlir/test/Transforms/buffer-placement-preparation-allowed-memref-results.mlir b/mlir/test/Transforms/buffer-placement-preparation-allowed-memref-results.mlir index adf6e30fe6c6c8..97c96008f26910 100644 --- a/mlir/test/Transforms/buffer-placement-preparation-allowed-memref-results.mlir +++ b/mlir/test/Transforms/buffer-placement-preparation-allowed-memref-results.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -test-buffer-placement-preparation-with-allowed-memref-results -split-input-file %s | FileCheck %s -dump-input-on-failure +// RUN: mlir-opt -test-buffer-placement-preparation-with-allowed-memref-results -split-input-file %s | FileCheck %s // Since allowMemrefEscaping is on for Buffer Placement in this test pass, all // tensor typed function results are converted to memref and remain as function diff --git a/mlir/test/Transforms/buffer-placement-preparation.mlir b/mlir/test/Transforms/buffer-placement-preparation.mlir index cae2829ead1757..9b0755aad18009 100644 --- a/mlir/test/Transforms/buffer-placement-preparation.mlir +++ b/mlir/test/Transforms/buffer-placement-preparation.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -test-buffer-placement-preparation -split-input-file %s | FileCheck %s -dump-input-on-failure +// RUN: mlir-opt -test-buffer-placement-preparation -split-input-file %s | FileCheck %s // CHECK-LABEL: func @func_signature_conversion func @func_signature_conversion(%arg0: tensor<4x8xf32>) { diff --git a/mlir/test/Transforms/buffer-placement.mlir b/mlir/test/Transforms/buffer-placement.mlir index 4b401cc841afee..176e063a700be0 100644 --- a/mlir/test/Transforms/buffer-placement.mlir +++ b/mlir/test/Transforms/buffer-placement.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -buffer-placement -split-input-file %s | FileCheck %s -dump-input-on-failure +// RUN: mlir-opt -buffer-placement -split-input-file %s | FileCheck %s // This file checks the behaviour of BufferPlacement pass for moving Alloc and Dealloc // operations and inserting the missing the DeallocOps in their correct positions. diff --git a/mlir/test/Transforms/canonicalize.mlir b/mlir/test/Transforms/canonicalize.mlir index 6e24bb3b2d832a..f1ad305d5c87fd 100644 --- a/mlir/test/Transforms/canonicalize.mlir +++ b/mlir/test/Transforms/canonicalize.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='func(canonicalize)' -split-input-file | FileCheck %s -dump-input-on-failure +// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='func(canonicalize)' -split-input-file | FileCheck %s // CHECK-LABEL: func @test_subi_zero func @test_subi_zero(%arg0: i32) -> i32 { diff --git a/mlir/test/Transforms/sccp-callgraph.mlir b/mlir/test/Transforms/sccp-callgraph.mlir index add65d9e33c5ae..c30cdf7bfb97de 100644 --- a/mlir/test/Transforms/sccp-callgraph.mlir +++ b/mlir/test/Transforms/sccp-callgraph.mlir @@ -1,5 +1,5 @@ -// RUN: mlir-opt -allow-unregistered-dialect %s -sccp -split-input-file | FileCheck %s -dump-input-on-failure -// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="module(sccp)" -split-input-file | FileCheck %s --check-prefix=NESTED -dump-input-on-failure +// RUN: mlir-opt -allow-unregistered-dialect %s -sccp -split-input-file | FileCheck %s +// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="module(sccp)" -split-input-file | FileCheck %s --check-prefix=NESTED /// Check that a constant is properly propagated through the arguments and /// results of a private function. diff --git a/mlir/test/mlir-tblgen/op-attribute.td b/mlir/test/mlir-tblgen/op-attribute.td index b4c850269a1d26..fc10d4c2d66e57 100644 --- a/mlir/test/mlir-tblgen/op-attribute.td +++ b/mlir/test/mlir-tblgen/op-attribute.td @@ -1,6 +1,6 @@ -// RUN: mlir-tblgen -gen-op-decls -I %S/../../include %s | FileCheck %s --check-prefix=DECL --dump-input-on-failure -// RUN: mlir-tblgen -gen-op-defs -I %S/../../include %s | FileCheck %s --check-prefix=DEF --dump-input-on-failure -// RUN: mlir-tblgen -print-records -I %S/../../include %s | FileCheck %s --check-prefix=RECORD --dump-input-on-failure +// RUN: mlir-tblgen -gen-op-decls -I %S/../../include %s | FileCheck %s --check-prefix=DECL +// RUN: mlir-tblgen -gen-op-defs -I %S/../../include %s | FileCheck %s --check-prefix=DEF +// RUN: mlir-tblgen -print-records -I %S/../../include %s | FileCheck %s --check-prefix=RECORD include "mlir/IR/OpBase.td" diff --git a/mlir/test/mlir-tblgen/op-decl.td b/mlir/test/mlir-tblgen/op-decl.td index a101103b08fc0f..655d49cbd3a7cf 100644 --- a/mlir/test/mlir-tblgen/op-decl.td +++ b/mlir/test/mlir-tblgen/op-decl.td @@ -1,4 +1,4 @@ -// RUN: mlir-tblgen -gen-op-decls -I %S/../../include %s | FileCheck --dump-input-on-failure %s +// RUN: mlir-tblgen -gen-op-decls -I %S/../../include %s | FileCheck %s include "mlir/IR/OpBase.td" include "mlir/Interfaces/InferTypeOpInterface.td" diff --git a/mlir/test/mlir-tblgen/op-derived-attribute.mlir b/mlir/test/mlir-tblgen/op-derived-attribute.mlir index b11df48a319c88..ec4f4dcf7dae42 100644 --- a/mlir/test/mlir-tblgen/op-derived-attribute.mlir +++ b/mlir/test/mlir-tblgen/op-derived-attribute.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -test-derived-attr -verify-diagnostics %s | FileCheck %s --dump-input-on-failure +// RUN: mlir-opt -test-derived-attr -verify-diagnostics %s | FileCheck %s // CHECK-LABEL: verifyDerivedAttributes func @verifyDerivedAttributes() { diff --git a/mlir/test/mlir-tblgen/op-format-spec.td b/mlir/test/mlir-tblgen/op-format-spec.td index 613f3d1d482963..47255d47f8a761 100644 --- a/mlir/test/mlir-tblgen/op-format-spec.td +++ b/mlir/test/mlir-tblgen/op-format-spec.td @@ -1,4 +1,4 @@ -// RUN: mlir-tblgen -gen-op-decls -asmformat-error-is-fatal=false -I %S/../../include %s -o=%t 2>&1 | FileCheck %s --dump-input-on-failure +// RUN: mlir-tblgen -gen-op-decls -asmformat-error-is-fatal=false -I %S/../../include %s -o=%t 2>&1 | FileCheck %s // This file contains tests for the specification of the declarative op format. diff --git a/mlir/test/mlir-tblgen/op-interface.td b/mlir/test/mlir-tblgen/op-interface.td index cb53a77ac0cb36..8e5167e6fe539c 100644 --- a/mlir/test/mlir-tblgen/op-interface.td +++ b/mlir/test/mlir-tblgen/op-interface.td @@ -1,5 +1,5 @@ -// RUN: mlir-tblgen -gen-op-interface-decls -I %S/../../include %s | FileCheck %s --check-prefix=DECL --dump-input-on-failure -// RUN: mlir-tblgen -gen-op-decls -I %S/../../include %s | FileCheck %s --check-prefix=OP_DECL --dump-input-on-failure +// RUN: mlir-tblgen -gen-op-interface-decls -I %S/../../include %s | FileCheck %s --check-prefix=DECL +// RUN: mlir-tblgen -gen-op-decls -I %S/../../include %s | FileCheck %s --check-prefix=OP_DECL include "mlir/IR/OpBase.td" diff --git a/mlir/test/mlir-tblgen/pattern.mlir b/mlir/test/mlir-tblgen/pattern.mlir index 50ec1688ddcce4..6154e6bc4c4579 100644 --- a/mlir/test/mlir-tblgen/pattern.mlir +++ b/mlir/test/mlir-tblgen/pattern.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -test-patterns -mlir-print-debuginfo %s | FileCheck %s --dump-input-on-failure +// RUN: mlir-opt -test-patterns -mlir-print-debuginfo %s | FileCheck %s // CHECK-LABEL: verifyFusedLocs func @verifyFusedLocs(%arg0 : i32) -> i32 { diff --git a/mlir/test/mlir-tblgen/predicate.td b/mlir/test/mlir-tblgen/predicate.td index a617208d157a0d..040d2b6de3935e 100644 --- a/mlir/test/mlir-tblgen/predicate.td +++ b/mlir/test/mlir-tblgen/predicate.td @@ -1,4 +1,4 @@ -// RUN: mlir-tblgen -gen-op-defs -I %S/../../include %s | FileCheck %s --dump-input-on-failure +// RUN: mlir-tblgen -gen-op-defs -I %S/../../include %s | FileCheck %s include "mlir/IR/OpBase.td" diff --git a/mlir/test/mlir-tblgen/return-types.mlir b/mlir/test/mlir-tblgen/return-types.mlir index d0eb364a6a9d78..01e6294564d157 100644 --- a/mlir/test/mlir-tblgen/return-types.mlir +++ b/mlir/test/mlir-tblgen/return-types.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -test-return-type -split-input-file -verify-diagnostics | FileCheck %s --dump-input-on-failure +// RUN: mlir-opt %s -test-return-type -split-input-file -verify-diagnostics | FileCheck %s // CHECK-LABEL: testCreateFunctions // This function tests invoking the create method with different inference From 6c5c4a2a50e1fcdd94c0288008af65c544a96452 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Tue, 9 Jun 2020 11:58:22 -0700 Subject: [PATCH 13/25] [lldb/Reproducers] Also collect ::open and ::fopen Report files opened trough ::open and ::fopen to the FileCollector. --- lldb/source/Host/posix/FileSystemPosix.cpp | 2 ++ lldb/source/Host/windows/FileSystem.cpp | 2 ++ 2 files changed, 4 insertions(+) diff --git a/lldb/source/Host/posix/FileSystemPosix.cpp b/lldb/source/Host/posix/FileSystemPosix.cpp index 3660f67895a4f7..0aa34bc5943596 100644 --- a/lldb/source/Host/posix/FileSystemPosix.cpp +++ b/lldb/source/Host/posix/FileSystemPosix.cpp @@ -72,9 +72,11 @@ Status FileSystem::ResolveSymbolicLink(const FileSpec &src, FileSpec &dst) { } FILE *FileSystem::Fopen(const char *path, const char *mode) { + Collect(path); return llvm::sys::RetryAfterSignal(nullptr, ::fopen, path, mode); } int FileSystem::Open(const char *path, int flags, int mode) { + Collect(path); return llvm::sys::RetryAfterSignal(-1, ::open, path, flags, mode); } diff --git a/lldb/source/Host/windows/FileSystem.cpp b/lldb/source/Host/windows/FileSystem.cpp index cbd1915bdb448c..94872c99b15ecb 100644 --- a/lldb/source/Host/windows/FileSystem.cpp +++ b/lldb/source/Host/windows/FileSystem.cpp @@ -86,6 +86,7 @@ Status FileSystem::ResolveSymbolicLink(const FileSpec &src, FileSpec &dst) { } FILE *FileSystem::Fopen(const char *path, const char *mode) { + Collect(path); std::wstring wpath, wmode; if (!llvm::ConvertUTF8toWide(path, wpath)) return nullptr; @@ -98,6 +99,7 @@ FILE *FileSystem::Fopen(const char *path, const char *mode) { } int FileSystem::Open(const char *path, int flags, int mode) { + Collect(path); std::wstring wpath; if (!llvm::ConvertUTF8toWide(path, wpath)) return -1; From 6eeac6ae33046f022f2d2c857ef38d2329acfc88 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 9 Jun 2020 08:21:03 -0400 Subject: [PATCH 14/25] GlobalISel: Fix double printing new instructions in legalizer New instructions were getting printed both in createdInstr, and in the final printNewInstrs, so it made it look like the same instructions were created twice. This overall made reading the debug output harder. Stop printing the initial construction and only print new instructions in the summary at the end. This avoids printing the less useful case where instructions are sometimes initially created with no operands. I'm not sure this is the correct instance to remove; now the visible ordering is different. Now you will typically see the one erased instruction message before all the new instructions in order. I think this is the more logical view of typical legalization changes, although it's mechanically backwards from the normal insert-new-erase-old pattern. --- llvm/lib/CodeGen/GlobalISel/Legalizer.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp index 0a2d71c275d582..a9bfc11d0aa68d 100644 --- a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp @@ -133,7 +133,6 @@ class LegalizerWorkListManager : public GISelChangeObserver { } void createdInstr(MachineInstr &MI) override { - LLVM_DEBUG(dbgs() << ".. .. New MI: " << MI); LLVM_DEBUG(NewMIs.push_back(&MI)); createdOrChangedInstr(MI); } From bb6cb6bfe413e1f3e368f1bf0550a7517d7c8d66 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 9 Jun 2020 09:20:57 -0400 Subject: [PATCH 15/25] GlobalISel: Remove redundant check in verifier This was already checked earlier for all instructions. --- llvm/lib/CodeGen/MachineVerifier.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp index df23ccf4e195c4..c477626172450a 100644 --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -915,9 +915,6 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { switch (MI->getOpcode()) { case TargetOpcode::G_CONSTANT: case TargetOpcode::G_FCONSTANT: { - if (MI->getNumOperands() < MCID.getNumOperands()) - break; - LLT DstTy = MRI->getType(MI->getOperand(0).getReg()); if (DstTy.isVector()) report("Instruction cannot use a vector result type", MI); From babbf4441b6022a2d76f831316b7c3588ade9e15 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 7 Jun 2020 21:24:34 -0400 Subject: [PATCH 16/25] GlobalISel: Move some trivial MIRBuilder methods into the header The construction APIs for MachineIRBuilder don't make much sense, and it's been annoying to sort through it with these trivial functions separate from the declaration. --- .../CodeGen/GlobalISel/MachineIRBuilder.h | 40 +++++++++++++++---- .../CodeGen/GlobalISel/MachineIRBuilder.cpp | 38 ------------------ 2 files changed, 32 insertions(+), 46 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h index 118a177f88dfb6..44eac6bb2ba331 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -228,7 +228,11 @@ class MachineIRBuilder { void validateSelectOp(const LLT ResTy, const LLT TstTy, const LLT Op0Ty, const LLT Op1Ty); - void recordInsertion(MachineInstr *MI) const; + + void recordInsertion(MachineInstr *InsertedInstr) const { + if (State.Observer) + State.Observer->createdInstr(*InsertedInstr); + } public: /// Some constructors for easy use. @@ -292,10 +296,16 @@ class MachineIRBuilder { /// Set the insertion point before the specified position. /// \pre MBB must be in getMF(). /// \pre II must be a valid iterator in MBB. - void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II); + void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II) { + assert(MBB.getParent() == &getMF() && + "Basic block is in a different function"); + State.MBB = &MBB; + State.II = II; + } + /// @} - void setCSEInfo(GISelCSEInfo *Info); + void setCSEInfo(GISelCSEInfo *Info) { State.CSEInfo = Info; } /// \name Setters for the insertion point. /// @{ @@ -304,11 +314,20 @@ class MachineIRBuilder { /// Set the insertion point to the end of \p MBB. /// \pre \p MBB must be contained by getMF(). - void setMBB(MachineBasicBlock &MBB); + void setMBB(MachineBasicBlock &MBB) { + State.MBB = &MBB; + State.II = MBB.end(); + assert(&getMF() == MBB.getParent() && + "Basic block is in a different function"); + } /// Set the insertion point to before MI. /// \pre MI must be in getMF(). - void setInstr(MachineInstr &MI); + void setInstr(MachineInstr &MI) { + assert(MI.getParent() && "Instruction is not part of a basic block"); + setMBB(*MI.getParent()); + State.II = MI.getIterator(); + } /// @} /// Set the insertion point to before MI, and set the debug loc to MI's loc. @@ -318,8 +337,11 @@ class MachineIRBuilder { setDebugLoc(MI.getDebugLoc()); } - void setChangeObserver(GISelChangeObserver &Observer); - void stopObservingChanges(); + void setChangeObserver(GISelChangeObserver &Observer) { + State.Observer = &Observer; + } + + void stopObservingChanges() { State.Observer = nullptr; } /// @} /// Set the debug location to \p DL for all the next build instructions. @@ -335,7 +357,9 @@ class MachineIRBuilder { /// \pre setBasicBlock or setMI must have been called. /// /// \return a MachineInstrBuilder for the newly created instruction. - MachineInstrBuilder buildInstr(unsigned Opcode); + MachineInstrBuilder buildInstr(unsigned Opcode) { + return insertInstr(buildInstrNoInsert(Opcode)); + } /// Build but don't insert = \p Opcode . /// diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 4236fdd8208425..ea98233beb0eca 100644 --- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -33,48 +33,10 @@ void MachineIRBuilder::setMF(MachineFunction &MF) { State.Observer = nullptr; } -void MachineIRBuilder::setMBB(MachineBasicBlock &MBB) { - State.MBB = &MBB; - State.II = MBB.end(); - assert(&getMF() == MBB.getParent() && - "Basic block is in a different function"); -} - -void MachineIRBuilder::setInstr(MachineInstr &MI) { - assert(MI.getParent() && "Instruction is not part of a basic block"); - setMBB(*MI.getParent()); - State.II = MI.getIterator(); -} - -void MachineIRBuilder::setCSEInfo(GISelCSEInfo *Info) { State.CSEInfo = Info; } - -void MachineIRBuilder::setInsertPt(MachineBasicBlock &MBB, - MachineBasicBlock::iterator II) { - assert(MBB.getParent() == &getMF() && - "Basic block is in a different function"); - State.MBB = &MBB; - State.II = II; -} - -void MachineIRBuilder::recordInsertion(MachineInstr *InsertedInstr) const { - if (State.Observer) - State.Observer->createdInstr(*InsertedInstr); -} - -void MachineIRBuilder::setChangeObserver(GISelChangeObserver &Observer) { - State.Observer = &Observer; -} - -void MachineIRBuilder::stopObservingChanges() { State.Observer = nullptr; } - //------------------------------------------------------------------------------ // Build instruction variants. //------------------------------------------------------------------------------ -MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opcode) { - return insertInstr(buildInstrNoInsert(Opcode)); -} - MachineInstrBuilder MachineIRBuilder::buildInstrNoInsert(unsigned Opcode) { MachineInstrBuilder MIB = BuildMI(getMF(), getDL(), getTII().get(Opcode)); return MIB; From b94c9e3b55ab97f6646018dec2c1d3647c04cda3 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 7 Jun 2020 21:37:29 -0400 Subject: [PATCH 17/25] GlobalISel: Improve MachineIRBuilder construction The current relationship between LegalizerHelper and MachineIRBuilder confuses me, because the LegalizerHelper modifies the MachineIRBuilder which it does not own. Constructing a LegalizerHelper destroys the insert point, since the constructor calls setMF, which clears all the fields. Try to separate these functions, so it's possible to construct a LegalizerHelper from an existing MachineIRBuilder without losing the insert point/debug loc. --- .../CodeGen/GlobalISel/MachineIRBuilder.h | 22 +++++++++++++------ llvm/lib/CodeGen/GlobalISel/Legalizer.cpp | 1 + .../CodeGen/GlobalISel/LegalizerHelper.cpp | 2 -- 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h index 44eac6bb2ba331..d6498345f25c85 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -35,23 +35,23 @@ class GISelChangeObserver; /// to transfer BuilderState between different kinds of MachineIRBuilders. struct MachineIRBuilderState { /// MachineFunction under construction. - MachineFunction *MF; + MachineFunction *MF = nullptr; /// Information used to access the description of the opcodes. - const TargetInstrInfo *TII; + const TargetInstrInfo *TII = nullptr; /// Information used to verify types are consistent and to create virtual registers. - MachineRegisterInfo *MRI; + MachineRegisterInfo *MRI = nullptr; /// Debug location to be set to any instruction we create. DebugLoc DL; /// \name Fields describing the insertion point. /// @{ - MachineBasicBlock *MBB; + MachineBasicBlock *MBB = nullptr; MachineBasicBlock::iterator II; /// @} - GISelChangeObserver *Observer; + GISelChangeObserver *Observer = nullptr; - GISelCSEInfo *CSEInfo; + GISelCSEInfo *CSEInfo = nullptr; }; class DstOp { @@ -238,8 +238,16 @@ class MachineIRBuilder { /// Some constructors for easy use. MachineIRBuilder() = default; MachineIRBuilder(MachineFunction &MF) { setMF(MF); } - MachineIRBuilder(MachineInstr &MI) : MachineIRBuilder(*MI.getMF()) { + + MachineIRBuilder(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt) { + setMF(*MBB.getParent()); + setInsertPt(MBB, InsPt); + } + + MachineIRBuilder(MachineInstr &MI) : + MachineIRBuilder(*MI.getParent(), MI.getIterator()) { setInstr(MI); + setDebugLoc(MI.getDebugLoc()); } virtual ~MachineIRBuilder() = default; diff --git a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp index a9bfc11d0aa68d..1d7be54de3b045 100644 --- a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp @@ -169,6 +169,7 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI, ArrayRef AuxObservers, LostDebugLocObserver &LocObserver, MachineIRBuilder &MIRBuilder) { + MIRBuilder.setMF(MF); MachineRegisterInfo &MRI = MF.getRegInfo(); // Populate worklists. diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index c38d08f41e685d..3a6d499c9cde59 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -87,7 +87,6 @@ LegalizerHelper::LegalizerHelper(MachineFunction &MF, MachineIRBuilder &Builder) : MIRBuilder(Builder), MRI(MF.getRegInfo()), LI(*MF.getSubtarget().getLegalizerInfo()), Observer(Observer) { - MIRBuilder.setMF(MF); MIRBuilder.setChangeObserver(Observer); } @@ -95,7 +94,6 @@ LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI, GISelChangeObserver &Observer, MachineIRBuilder &B) : MIRBuilder(B), MRI(MF.getRegInfo()), LI(LI), Observer(Observer) { - MIRBuilder.setMF(MF); MIRBuilder.setChangeObserver(Observer); } LegalizerHelper::LegalizeResult From 113b0d7d0bd637743efb050ad619dd0c6d306e96 Mon Sep 17 00:00:00 2001 From: Erich Keane Date: Tue, 9 Jun 2020 12:19:35 -0700 Subject: [PATCH 18/25] PR46255: Fix field diagnostics for C records with anonymous members. The ParseStructUnionBody function was separately keeping track of the field decls for historical reasons, however the "ActOn" functions add the field to the RecordDecl anyway. The "ParseStructDeclaration" function, which handles parsing fields didn't have a way of handling what happens on an anonymous field, and changing it would alter a large amount of objc code, so I chose instead to implement this by just filling the FieldDecls vector with the actual FieldDecls that were successfully added to the recorddecl . --- .../clang/Basic/DiagnosticSemaKinds.td | 5 ++- clang/include/clang/Parse/Parser.h | 2 +- clang/lib/Parse/ParseDecl.cpp | 9 ++-- clang/lib/Parse/ParseDeclCXX.cpp | 2 +- clang/test/Sema/struct-decl.c | 41 +++++++++++++++++++ 5 files changed, 50 insertions(+), 9 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 84bcf66a148e5c..e1adf199a12b93 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -5713,8 +5713,9 @@ def ext_flexible_array_union_gnu : Extension< def err_flexible_array_not_at_end : Error< "flexible array member %0 with type %1 is not at the end of" " %select{struct|interface|union|class|enum}2">; -def err_objc_variable_sized_type_not_at_end : Error< - "field %0 with variable sized type %1 is not at the end of class">; +def err_objc_variable_sized_type_not_at_end + : Error<"%select{field %1|unnamed field}0 with variable sized type %2 is " + "not at the end of class">; def note_next_field_declaration : Note< "next field declaration is here">; def note_next_ivar_declaration : Note< diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h index b6b161e482ac15..1ae219781c696c 100644 --- a/clang/include/clang/Parse/Parser.h +++ b/clang/include/clang/Parse/Parser.h @@ -2333,7 +2333,7 @@ class Parser : public CodeCompletionHandler { AccessSpecifier AS, DeclSpecContext DSC); void ParseEnumBody(SourceLocation StartLoc, Decl *TagDecl); void ParseStructUnionBody(SourceLocation StartLoc, DeclSpec::TST TagType, - Decl *TagDecl); + RecordDecl *TagDecl); void ParseStructDeclaration( ParsingDeclSpec &DS, diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp index 7e761978455731..79a3b19bac5766 100644 --- a/clang/lib/Parse/ParseDecl.cpp +++ b/clang/lib/Parse/ParseDecl.cpp @@ -4249,7 +4249,7 @@ void Parser::ParseStructDeclaration( /// [OBC] '@' 'defs' '(' class-name ')' /// void Parser::ParseStructUnionBody(SourceLocation RecordLoc, - DeclSpec::TST TagType, Decl *TagDecl) { + DeclSpec::TST TagType, RecordDecl *TagDecl) { PrettyDeclStackTraceEntry CrashInfo(Actions.Context, TagDecl, RecordLoc, "parsing struct/union body"); assert(!getLangOpts().CPlusPlus && "C++ declarations not supported"); @@ -4261,8 +4261,6 @@ void Parser::ParseStructUnionBody(SourceLocation RecordLoc, ParseScope StructScope(this, Scope::ClassScope|Scope::DeclScope); Actions.ActOnTagStartDefinition(getCurScope(), TagDecl); - SmallVector FieldDecls; - // While we still have something to read, read the declarations in the struct. while (!tryParseMisplacedModuleImport() && Tok.isNot(tok::r_brace) && Tok.isNot(tok::eof)) { @@ -4314,7 +4312,6 @@ void Parser::ParseStructUnionBody(SourceLocation RecordLoc, Actions.ActOnField(getCurScope(), TagDecl, FD.D.getDeclSpec().getSourceRange().getBegin(), FD.D, FD.BitfieldSize); - FieldDecls.push_back(Field); FD.complete(Field); }; @@ -4338,7 +4335,6 @@ void Parser::ParseStructUnionBody(SourceLocation RecordLoc, SmallVector Fields; Actions.ActOnDefs(getCurScope(), TagDecl, Tok.getLocation(), Tok.getIdentifierInfo(), Fields); - FieldDecls.insert(FieldDecls.end(), Fields.begin(), Fields.end()); ConsumeToken(); ExpectAndConsume(tok::r_paren); } @@ -4364,6 +4360,9 @@ void Parser::ParseStructUnionBody(SourceLocation RecordLoc, // If attributes exist after struct contents, parse them. MaybeParseGNUAttributes(attrs); + SmallVector FieldDecls(TagDecl->field_begin(), + TagDecl->field_end()); + Actions.ActOnFields(getCurScope(), RecordLoc, TagDecl, FieldDecls, T.getOpenLocation(), T.getCloseLocation(), attrs); StructScope.Exit(); diff --git a/clang/lib/Parse/ParseDeclCXX.cpp b/clang/lib/Parse/ParseDeclCXX.cpp index 1a82475117baac..8753c929287512 100644 --- a/clang/lib/Parse/ParseDeclCXX.cpp +++ b/clang/lib/Parse/ParseDeclCXX.cpp @@ -1964,7 +1964,7 @@ void Parser::ParseClassSpecifier(tok::TokenKind TagTokKind, Decl *D = SkipBody.CheckSameAsPrevious ? SkipBody.New : TagOrTempResult.get(); // Parse the definition body. - ParseStructUnionBody(StartLoc, TagType, D); + ParseStructUnionBody(StartLoc, TagType, cast(D)); if (SkipBody.CheckSameAsPrevious && !Actions.ActOnDuplicateDefinition(DS, TagOrTempResult.get(), SkipBody)) { diff --git a/clang/test/Sema/struct-decl.c b/clang/test/Sema/struct-decl.c index 80cac0e0d145cf..ee3e79182eaa7d 100644 --- a/clang/test/Sema/struct-decl.c +++ b/clang/test/Sema/struct-decl.c @@ -69,3 +69,44 @@ void test_hiding() { struct PreserveAttributes {}; typedef struct __attribute__((noreturn)) PreserveAttributes PreserveAttributes_t; // expected-warning {{'noreturn' attribute only applies to functions and methods}} + +// PR46255 +struct FlexibleArrayMem { + int a; + int b[]; +}; + +struct FollowedByNamed { + struct FlexibleArrayMem a; // expected-warning {{field 'a' with variable sized type 'struct FlexibleArrayMem' not at the end of a struct or class is a GNU extension}} + int i; +}; + +struct FollowedByUnNamed { + struct FlexibleArrayMem a; // expected-warning {{field 'a' with variable sized type 'struct FlexibleArrayMem' not at the end of a struct or class is a GNU extension}} + struct { + int i; + }; +}; + +struct InAnonymous { + struct { // expected-warning-re {{field '' with variable sized type 'struct InAnonymous::(anonymous at {{.+}})' not at the end of a struct or class is a GNU extension}} + + struct FlexibleArrayMem a; + }; + int i; +}; +struct InAnonymousFollowedByAnon { + struct { // expected-warning-re {{field '' with variable sized type 'struct InAnonymousFollowedByAnon::(anonymous at {{.+}})' not at the end of a struct or class is a GNU extension}} + + struct FlexibleArrayMem a; + }; + struct { + int i; + }; +}; + +// This is the behavior in C++ as well, so making sure we reproduce it here. +struct InAnonymousFollowedByEmpty { + struct FlexibleArrayMem a; // expected-warning {{field 'a' with variable sized type 'struct FlexibleArrayMem' not at the end of a struct or class is a GNU extension}} + struct {}; +}; From f71a3b54f0c5c300440c5ce21c76b5f7f41fc626 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 9 Jun 2020 12:23:36 -0400 Subject: [PATCH 19/25] [InstCombine] add tests for diff-of-sums; NFC --- .../InstCombine/vector-reductions.ll | 75 +++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 llvm/test/Transforms/InstCombine/vector-reductions.ll diff --git a/llvm/test/Transforms/InstCombine/vector-reductions.ll b/llvm/test/Transforms/InstCombine/vector-reductions.ll new file mode 100644 index 00000000000000..5eac0e09414ca5 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/vector-reductions.ll @@ -0,0 +1,75 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s + +declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float, <4 x float>) +declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32(float, <8 x float>) +declare void @use_f32(float) + +define float @diff_of_sums_v4f32(float %a0, <4 x float> %v0, float %a1, <4 x float> %v1) { +; CHECK-LABEL: @diff_of_sums_v4f32( +; CHECK-NEXT: [[R0:%.*]] = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float [[A0:%.*]], <4 x float> [[V0:%.*]]) +; CHECK-NEXT: [[R1:%.*]] = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float [[A1:%.*]], <4 x float> [[V1:%.*]]) +; CHECK-NEXT: [[R:%.*]] = fsub reassoc nsz float [[R0]], [[R1]] +; CHECK-NEXT: ret float [[R]] +; + %r0 = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float %a0, <4 x float> %v0) + %r1 = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float %a1, <4 x float> %v1) + %r = fsub reassoc nsz float %r0, %r1 + ret float %r +} + +define float @diff_of_sums_v4f32_fmf(float %a0, <4 x float> %v0, float %a1, <4 x float> %v1) { +; CHECK-LABEL: @diff_of_sums_v4f32_fmf( +; CHECK-NEXT: [[R0:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float [[A0:%.*]], <4 x float> [[V0:%.*]]) +; CHECK-NEXT: [[R1:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float [[A1:%.*]], <4 x float> [[V1:%.*]]) +; CHECK-NEXT: [[R:%.*]] = fsub nnan ninf nsz float [[R0]], [[R1]] +; CHECK-NEXT: ret float [[R]] +; + %r0 = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float %a0, <4 x float> %v0) + %r1 = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float %a1, <4 x float> %v1) + %r = fsub ninf nnan nsz float %r0, %r1 + ret float %r +} + +define float @diff_of_sums_extra_use1(float %a0, <4 x float> %v0, float %a1, <4 x float> %v1) { +; CHECK-LABEL: @diff_of_sums_extra_use1( +; CHECK-NEXT: [[R0:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float [[A0:%.*]], <4 x float> [[V0:%.*]]) +; CHECK-NEXT: call void @use_f32(float [[R0]]) +; CHECK-NEXT: [[R1:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float [[A1:%.*]], <4 x float> [[V1:%.*]]) +; CHECK-NEXT: [[R:%.*]] = fsub fast float [[R0]], [[R1]] +; CHECK-NEXT: ret float [[R]] +; + %r0 = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float %a0, <4 x float> %v0) + call void @use_f32(float %r0) + %r1 = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float %a1, <4 x float> %v1) + %r = fsub fast float %r0, %r1 + ret float %r +} + +define float @diff_of_sums_extra_use2(float %a0, <4 x float> %v0, float %a1, <4 x float> %v1) { +; CHECK-LABEL: @diff_of_sums_extra_use2( +; CHECK-NEXT: [[R0:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float [[A0:%.*]], <4 x float> [[V0:%.*]]) +; CHECK-NEXT: [[R1:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float [[A1:%.*]], <4 x float> [[V1:%.*]]) +; CHECK-NEXT: call void @use_f32(float [[R1]]) +; CHECK-NEXT: [[R:%.*]] = fsub fast float [[R0]], [[R1]] +; CHECK-NEXT: ret float [[R]] +; + %r0 = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float %a0, <4 x float> %v0) + %r1 = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float %a1, <4 x float> %v1) + call void @use_f32(float %r1) + %r = fsub fast float %r0, %r1 + ret float %r +} + +define float @diff_of_sums_type_mismatch(float %a0, <4 x float> %v0, float %a1, <8 x float> %v1) { +; CHECK-LABEL: @diff_of_sums_type_mismatch( +; CHECK-NEXT: [[R0:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float [[A0:%.*]], <4 x float> [[V0:%.*]]) +; CHECK-NEXT: [[R1:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32(float [[A1:%.*]], <8 x float> [[V1:%.*]]) +; CHECK-NEXT: [[R:%.*]] = fsub fast float [[R0]], [[R1]] +; CHECK-NEXT: ret float [[R]] +; + %r0 = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float %a0, <4 x float> %v0) + %r1 = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32(float %a1, <8 x float> %v1) + %r = fsub fast float %r0, %r1 + ret float %r +} From 0e04342ae0399876f3488464d12f5a4da5085456 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Tue, 9 Jun 2020 15:14:13 -0400 Subject: [PATCH 20/25] [NFCI] Clean up exceptions related CMake and Lit options in libc++abi and libunwind First, libc++abi doesn't need to add the no-exceptions Lit feature itself, since that is already done in the config.py for libc++, which it reuses. Specifically, config.enable_exceptions is set based on @LIBCXXABI_ENABLE_EXCEPTIONS@ in libc++abi's lit.cfg.in, and libc++'s config.py handles that correctly. Secondly, libunwind's LIBUNWIND_ENABLE_EXCEPTIONS is never set (it's probably a remnant of copy-pasting code between the runtime libraries), so the library is always built with exceptions disabled (which makes sense since it implements the runtime support for exceptions). Conversely, the test suite is always run with exceptions enabled (not sure why), but that is preserved by the default behavior of libc++'s config.py. --- libcxxabi/test/libcxxabi/test/config.py | 2 -- libunwind/test/CMakeLists.txt | 1 - libunwind/test/libunwind/test/config.py | 4 ---- libunwind/test/lit.cfg | 3 --- libunwind/test/lit.site.cfg.in | 1 - 5 files changed, 11 deletions(-) diff --git a/libcxxabi/test/libcxxabi/test/config.py b/libcxxabi/test/libcxxabi/test/config.py index fe76d193e79ed2..f1eb453e09f313 100644 --- a/libcxxabi/test/libcxxabi/test/config.py +++ b/libcxxabi/test/libcxxabi/test/config.py @@ -38,8 +38,6 @@ def has_cpp_feature(self, feature, required_value): def configure_features(self): super(Configuration, self).configure_features() - if not self.get_lit_bool('enable_exceptions', True): - self.config.available_features.add('no-exceptions') if not self.has_cpp_feature('noexcept_function_type', 201510): self.config.available_features.add('libcxxabi-no-noexcept-function-type') if not self.get_lit_bool('llvm_unwinder', False): diff --git a/libunwind/test/CMakeLists.txt b/libunwind/test/CMakeLists.txt index 40d4acd4e8c2a9..e608c1708b8abb 100644 --- a/libunwind/test/CMakeLists.txt +++ b/libunwind/test/CMakeLists.txt @@ -15,7 +15,6 @@ pythonize_bool(LIBUNWIND_BUILD_32_BITS) pythonize_bool(LIBCXX_ENABLE_SHARED) pythonize_bool(LIBUNWIND_ENABLE_SHARED) pythonize_bool(LIBUNWIND_ENABLE_THREADS) -pythonize_bool(LIBUNWIND_ENABLE_EXCEPTIONS) pythonize_bool(LIBUNWIND_USES_ARM_EHABI) pythonize_bool(LIBUNWIND_USE_COMPILER_RT) pythonize_bool(LIBUNWIND_BUILD_EXTERNAL_THREAD_LIBRARY) diff --git a/libunwind/test/libunwind/test/config.py b/libunwind/test/libunwind/test/config.py index 36501f230272cd..7e4f230d821c75 100644 --- a/libunwind/test/libunwind/test/config.py +++ b/libunwind/test/libunwind/test/config.py @@ -35,15 +35,11 @@ def has_cpp_feature(self, feature, required_value): def configure_features(self): super(Configuration, self).configure_features() - if not self.get_lit_bool('enable_exceptions', True): - self.config.available_features.add('no-exceptions') if self.get_lit_bool('arm_ehabi', False): self.config.available_features.add('libunwind-arm-ehabi') def configure_compile_flags(self): self.cxx.compile_flags += ['-DLIBUNWIND_NO_TIMER'] - if not self.get_lit_bool('enable_exceptions', True): - self.cxx.compile_flags += ['-fno-exceptions', '-DLIBUNWIND_HAS_NO_EXCEPTIONS'] # Stack unwinding tests need unwinding tables and these are not # generated by default on all Targets. self.cxx.compile_flags += ['-funwind-tables'] diff --git a/libunwind/test/lit.cfg b/libunwind/test/lit.cfg index 262f25af0d70a4..7f74bd6e4afb4c 100644 --- a/libunwind/test/lit.cfg +++ b/libunwind/test/lit.cfg @@ -23,9 +23,6 @@ config.suffixes = ['.cpp', '.s'] # test_source_root: The root path where tests are located. config.test_source_root = os.path.dirname(__file__) -# needed to test libunwind with code that throws exceptions -config.enable_exceptions = True - # Infer the libcxx_test_source_root for configuration import. # If libcxx_source_root isn't specified in the config, assume that the libcxx # and libunwind source directories are sibling directories. diff --git a/libunwind/test/lit.site.cfg.in b/libunwind/test/lit.site.cfg.in index 37f90a90efdb43..809ad1009f4bdc 100644 --- a/libunwind/test/lit.site.cfg.in +++ b/libunwind/test/lit.site.cfg.in @@ -18,7 +18,6 @@ config.test_compiler_flags = "@LIBUNWIND_TEST_COMPILER_FLAGS@" config.executor = "@LIBUNWIND_EXECUTOR@" config.libunwind_shared = @LIBUNWIND_ENABLE_SHARED@ config.enable_shared = @LIBCXX_ENABLE_SHARED@ -config.enable_exceptions = @LIBUNWIND_ENABLE_EXCEPTIONS@ config.arm_ehabi = @LIBUNWIND_USES_ARM_EHABI@ config.host_triple = "@LLVM_HOST_TRIPLE@" config.target_triple = "@TARGET_TRIPLE@" From 32823091c36cfa2b27b717246f15d4f12591e6f4 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 7 Jun 2020 20:57:28 -0400 Subject: [PATCH 21/25] GlobalISel: Set instr/debugloc before any legalizer action It was annoying enough that every custom lowering needed to set the insert point, but this was made worse since now these all needed to be updated to setInstrAndDebugLoc. Consolidate these so every legalization action has the right insert position by default. This should fix dropping debug info in every custom AMDGPU legalization. --- .../CodeGen/GlobalISel/LegalizerHelper.cpp | 13 +---- .../AArch64/GISel/AArch64LegalizerInfo.cpp | 4 -- .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 58 ------------------- .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 6 +- llvm/lib/Target/Mips/MipsLegalizerInfo.cpp | 2 - .../GlobalISel/LegalizerHelperTest.cpp | 37 ++++++++++++ 6 files changed, 43 insertions(+), 77 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 3a6d499c9cde59..6c6ef78816b8d9 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -100,6 +100,8 @@ LegalizerHelper::LegalizeResult LegalizerHelper::legalizeInstrStep(MachineInstr &MI) { LLVM_DEBUG(dbgs() << "Legalizing: " << MI); + MIRBuilder.setInstrAndDebugLoc(MI); + if (MI.getOpcode() == TargetOpcode::G_INTRINSIC || MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS) return LI.legalizeIntrinsic(MI, MIRBuilder, Observer) ? Legalized @@ -634,8 +636,6 @@ LegalizerHelper::libcall(MachineInstr &MI) { unsigned Size = LLTy.getSizeInBits(); auto &Ctx = MIRBuilder.getMF().getFunction().getContext(); - MIRBuilder.setInstrAndDebugLoc(MI); - switch (MI.getOpcode()) { default: return UnableToLegalize; @@ -731,8 +731,6 @@ LegalizerHelper::libcall(MachineInstr &MI) { LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { - MIRBuilder.setInstrAndDebugLoc(MI); - uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); uint64_t NarrowSize = NarrowTy.getSizeInBits(); @@ -1644,8 +1642,6 @@ LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx, LegalizerHelper::LegalizeResult LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { - MIRBuilder.setInstrAndDebugLoc(MI); - switch (MI.getOpcode()) { default: return UnableToLegalize; @@ -2195,8 +2191,6 @@ LegalizerHelper::lowerBitcast(MachineInstr &MI) { LegalizerHelper::LegalizeResult LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) { - MIRBuilder.setInstr(MI); - switch (MI.getOpcode()) { case TargetOpcode::G_LOAD: { if (TypeIdx != 0) @@ -2251,7 +2245,6 @@ LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) { LegalizerHelper::LegalizeResult LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { using namespace TargetOpcode; - MIRBuilder.setInstrAndDebugLoc(MI); switch(MI.getOpcode()) { default: @@ -3325,7 +3318,6 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { using namespace TargetOpcode; - MIRBuilder.setInstrAndDebugLoc(MI); switch (MI.getOpcode()) { case G_IMPLICIT_DEF: return fewerElementsVectorImplicitDef(MI, TypeIdx, NarrowTy); @@ -3648,7 +3640,6 @@ LegalizerHelper::moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, LegalizerHelper::LegalizeResult LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy) { - MIRBuilder.setInstrAndDebugLoc(MI); unsigned Opc = MI.getOpcode(); switch (Opc) { case TargetOpcode::G_IMPLICIT_DEF: diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 26dcde47ccfe39..c02f8dd0bb62e3 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -661,7 +661,6 @@ bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(MachineInstr &MI, if (GV->isThreadLocal()) return true; // Don't want to modify TLS vars. - MIRBuilder.setInstrAndDebugLoc(MI); auto &TM = ST->getTargetLowering()->getTargetMachine(); unsigned OpFlags = ST->ClassifyGlobalReference(GV, TM); @@ -717,7 +716,6 @@ bool AArch64LegalizerInfo::legalizeShlAshrLshr( if (Amount > 31) return true; // This will have to remain a register variant. assert(MRI.getType(AmtReg).getSizeInBits() == 32); - MIRBuilder.setInstrAndDebugLoc(MI); auto ExtCst = MIRBuilder.buildZExt(LLT::scalar(64), AmtReg); MI.getOperand(2).setReg(ExtCst.getReg(0)); return true; @@ -746,7 +744,6 @@ bool AArch64LegalizerInfo::legalizeLoadStore( return false; } - MIRBuilder.setInstrAndDebugLoc(MI); unsigned PtrSize = ValTy.getElementType().getSizeInBits(); const LLT NewTy = LLT::vector(ValTy.getNumElements(), PtrSize); auto &MMO = **MI.memoperands_begin(); @@ -764,7 +761,6 @@ bool AArch64LegalizerInfo::legalizeLoadStore( bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder) const { - MIRBuilder.setInstrAndDebugLoc(MI); MachineFunction &MF = MIRBuilder.getMF(); Align Alignment(MI.getOperand(2).getImm()); Register Dst = MI.getOperand(0).getReg(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index c1e9e225469259..6d383d409aa89d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -1571,8 +1571,6 @@ bool AMDGPULegalizerInfo::legalizeAddrSpaceCast( MachineIRBuilder &B) const { MachineFunction &MF = B.getMF(); - B.setInstr(MI); - const LLT S32 = LLT::scalar(32); Register Dst = MI.getOperand(0).getReg(); Register Src = MI.getOperand(1).getReg(); @@ -1668,8 +1666,6 @@ bool AMDGPULegalizerInfo::legalizeAddrSpaceCast( bool AMDGPULegalizerInfo::legalizeFrint( MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { - B.setInstr(MI); - Register Src = MI.getOperand(1).getReg(); LLT Ty = MRI.getType(Src); assert(Ty.isScalar() && Ty.getSizeInBits() == 64); @@ -1695,7 +1691,6 @@ bool AMDGPULegalizerInfo::legalizeFrint( bool AMDGPULegalizerInfo::legalizeFceil( MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { - B.setInstr(MI); const LLT S1 = LLT::scalar(1); const LLT S64 = LLT::scalar(64); @@ -1740,8 +1735,6 @@ static MachineInstrBuilder extractF64Exponent(unsigned Hi, bool AMDGPULegalizerInfo::legalizeIntrinsicTrunc( MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { - B.setInstr(MI); - const LLT S1 = LLT::scalar(1); const LLT S32 = LLT::scalar(32); const LLT S64 = LLT::scalar(64); @@ -1786,7 +1779,6 @@ bool AMDGPULegalizerInfo::legalizeIntrinsicTrunc( bool AMDGPULegalizerInfo::legalizeITOFP( MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B, bool Signed) const { - B.setInstr(MI); Register Dst = MI.getOperand(0).getReg(); Register Src = MI.getOperand(1).getReg(); @@ -1820,7 +1812,6 @@ bool AMDGPULegalizerInfo::legalizeITOFP( bool AMDGPULegalizerInfo::legalizeFPTOI( MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B, bool Signed) const { - B.setInstr(MI); Register Dst = MI.getOperand(0).getReg(); Register Src = MI.getOperand(1).getReg(); @@ -1871,7 +1862,6 @@ bool AMDGPULegalizerInfo::legalizeMinNumMaxNum( MachineIRBuilder HelperBuilder(MI); GISelObserverWrapper DummyObserver; LegalizerHelper Helper(MF, DummyObserver, HelperBuilder); - HelperBuilder.setInstr(MI); return Helper.lowerFMinNumMaxNum(MI) == LegalizerHelper::Legalized; } @@ -1897,8 +1887,6 @@ bool AMDGPULegalizerInfo::legalizeExtractVectorElt( LLT EltTy = VecTy.getElementType(); assert(EltTy == MRI.getType(Dst)); - B.setInstr(MI); - if (IdxVal->Value < VecTy.getNumElements()) B.buildExtract(Dst, Vec, IdxVal->Value * EltTy.getSizeInBits()); else @@ -1931,8 +1919,6 @@ bool AMDGPULegalizerInfo::legalizeInsertVectorElt( LLT EltTy = VecTy.getElementType(); assert(EltTy == MRI.getType(Ins)); - B.setInstr(MI); - if (IdxVal->Value < VecTy.getNumElements()) B.buildInsert(Dst, Vec, Ins, IdxVal->Value * EltTy.getSizeInBits()); else @@ -1959,14 +1945,12 @@ bool AMDGPULegalizerInfo::legalizeShuffleVector( MachineIRBuilder HelperBuilder(MI); GISelObserverWrapper DummyObserver; LegalizerHelper Helper(B.getMF(), DummyObserver, HelperBuilder); - HelperBuilder.setInstr(MI); return Helper.lowerShuffleVector(MI) == LegalizerHelper::Legalized; } bool AMDGPULegalizerInfo::legalizeSinCos( MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { - B.setInstr(MI); Register DstReg = MI.getOperand(0).getReg(); Register SrcReg = MI.getOperand(1).getReg(); @@ -2058,7 +2042,6 @@ bool AMDGPULegalizerInfo::legalizeGlobalValue( const GlobalValue *GV = MI.getOperand(1).getGlobal(); MachineFunction &MF = B.getMF(); SIMachineFunctionInfo *MFI = MF.getInfo(); - B.setInstr(MI); if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) { if (!MFI->isEntryFunction()) { @@ -2138,7 +2121,6 @@ bool AMDGPULegalizerInfo::legalizeGlobalValue( bool AMDGPULegalizerInfo::legalizeLoad( MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B, GISelChangeObserver &Observer) const { - B.setInstr(MI); LLT ConstPtr = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64); auto Cast = B.buildAddrSpaceCast(ConstPtr, MI.getOperand(1).getReg()); Observer.changingInstr(MI); @@ -2166,7 +2148,6 @@ bool AMDGPULegalizerInfo::legalizeFMad( MachineIRBuilder HelperBuilder(MI); GISelObserverWrapper DummyObserver; LegalizerHelper Helper(MF, DummyObserver, HelperBuilder); - HelperBuilder.setInstr(MI); return Helper.lowerFMad(MI) == LegalizerHelper::Legalized; } @@ -2184,7 +2165,6 @@ bool AMDGPULegalizerInfo::legalizeAtomicCmpXChg( LLT ValTy = MRI.getType(CmpVal); LLT VecTy = LLT::vector(2, ValTy); - B.setInstr(MI); Register PackedVal = B.buildBuildVector(VecTy, { NewVal, CmpVal }).getReg(0); B.buildInstr(AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG) @@ -2203,7 +2183,6 @@ bool AMDGPULegalizerInfo::legalizeFlog( Register Src = MI.getOperand(1).getReg(); LLT Ty = B.getMRI()->getType(Dst); unsigned Flags = MI.getFlags(); - B.setInstr(MI); auto Log2Operand = B.buildFLog2(Ty, Src, Flags); auto Log2BaseInvertedOperand = B.buildFConstant(Ty, Log2BaseInverted); @@ -2219,7 +2198,6 @@ bool AMDGPULegalizerInfo::legalizeFExp(MachineInstr &MI, Register Src = MI.getOperand(1).getReg(); unsigned Flags = MI.getFlags(); LLT Ty = B.getMRI()->getType(Dst); - B.setInstr(MI); auto K = B.buildFConstant(Ty, numbers::log2e); auto Mul = B.buildFMul(Ty, Src, K, Flags); @@ -2235,7 +2213,6 @@ bool AMDGPULegalizerInfo::legalizeFPow(MachineInstr &MI, Register Src1 = MI.getOperand(2).getReg(); unsigned Flags = MI.getFlags(); LLT Ty = B.getMRI()->getType(Dst); - B.setInstr(MI); const LLT S16 = LLT::scalar(16); const LLT S32 = LLT::scalar(32); @@ -2279,7 +2256,6 @@ static Register stripAnySourceMods(Register OrigSrc, MachineRegisterInfo &MRI) { bool AMDGPULegalizerInfo::legalizeFFloor(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { - B.setInstr(MI); const LLT S1 = LLT::scalar(1); const LLT S64 = LLT::scalar(64); @@ -2345,7 +2321,6 @@ bool AMDGPULegalizerInfo::legalizeBuildVector( Register Src1 = MI.getOperand(2).getReg(); assert(MRI.getType(Src0) == LLT::scalar(16)); - B.setInstr(MI); auto Merge = B.buildMerge(S32, {Src0, Src1}); B.buildBitcast(Dst, Merge); @@ -2483,7 +2458,6 @@ bool AMDGPULegalizerInfo::loadInputValue(Register DstReg, MachineIRBuilder &B, bool AMDGPULegalizerInfo::legalizePreloadedArgIntrin( MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B, AMDGPUFunctionArgInfo::PreloadedValue ArgType) const { - B.setInstr(MI); const ArgDescriptor *Arg = getArgDescriptor(B, ArgType); if (!Arg) @@ -2499,7 +2473,6 @@ bool AMDGPULegalizerInfo::legalizePreloadedArgIntrin( bool AMDGPULegalizerInfo::legalizeFDIV(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { - B.setInstr(MI); Register Dst = MI.getOperand(0).getReg(); LLT DstTy = MRI.getType(Dst); LLT S16 = LLT::scalar(16); @@ -2622,7 +2595,6 @@ void AMDGPULegalizerInfo::legalizeUDIV_UREM32Impl(MachineIRBuilder &B, bool AMDGPULegalizerInfo::legalizeUDIV_UREM32(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { - B.setInstr(MI); const bool IsRem = MI.getOpcode() == AMDGPU::G_UREM; Register DstReg = MI.getOperand(0).getReg(); Register Num = MI.getOperand(1).getReg(); @@ -2678,8 +2650,6 @@ static std::pair emitReciprocalU64(MachineIRBuilder &B, bool AMDGPULegalizerInfo::legalizeUDIV_UREM64(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { - B.setInstr(MI); - const bool IsDiv = MI.getOpcode() == TargetOpcode::G_UDIV; const LLT S32 = LLT::scalar(32); const LLT S64 = LLT::scalar(64); @@ -2808,7 +2778,6 @@ bool AMDGPULegalizerInfo::legalizeUDIV_UREM(MachineInstr &MI, bool AMDGPULegalizerInfo::legalizeSDIV_SREM32(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { - B.setInstr(MI); const LLT S32 = LLT::scalar(32); const bool IsRem = MI.getOpcode() == AMDGPU::G_SREM; @@ -2915,7 +2884,6 @@ bool AMDGPULegalizerInfo::legalizeFastUnsafeFDIV(MachineInstr &MI, bool AMDGPULegalizerInfo::legalizeFDIV16(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { - B.setInstr(MI); Register Res = MI.getOperand(0).getReg(); Register LHS = MI.getOperand(1).getReg(); Register RHS = MI.getOperand(2).getReg(); @@ -2978,7 +2946,6 @@ static void toggleSPDenormMode(bool Enable, bool AMDGPULegalizerInfo::legalizeFDIV32(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { - B.setInstr(MI); Register Res = MI.getOperand(0).getReg(); Register LHS = MI.getOperand(1).getReg(); Register RHS = MI.getOperand(2).getReg(); @@ -3045,7 +3012,6 @@ bool AMDGPULegalizerInfo::legalizeFDIV32(MachineInstr &MI, bool AMDGPULegalizerInfo::legalizeFDIV64(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { - B.setInstr(MI); Register Res = MI.getOperand(0).getReg(); Register LHS = MI.getOperand(1).getReg(); Register RHS = MI.getOperand(2).getReg(); @@ -3124,7 +3090,6 @@ bool AMDGPULegalizerInfo::legalizeFDIV64(MachineInstr &MI, bool AMDGPULegalizerInfo::legalizeFDIVFastIntrin(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { - B.setInstr(MI); Register Res = MI.getOperand(0).getReg(); Register LHS = MI.getOperand(2).getReg(); Register RHS = MI.getOperand(3).getReg(); @@ -3166,8 +3131,6 @@ bool AMDGPULegalizerInfo::legalizeImplicitArgPtr(MachineInstr &MI, AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR); } - B.setInstr(MI); - uint64_t Offset = ST.getTargetLowering()->getImplicitParameterOffset( B.getMF(), AMDGPUTargetLowering::FIRST_IMPLICIT); @@ -3195,7 +3158,6 @@ bool AMDGPULegalizerInfo::legalizeIsAddrSpace(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B, unsigned AddrSpace) const { - B.setInstr(MI); Register ApertureReg = getSegmentAperture(AddrSpace, MRI, B); auto Hi32 = B.buildExtract(LLT::scalar(32), MI.getOperand(2).getReg(), 32); B.buildICmp(ICmpInst::ICMP_EQ, MI.getOperand(0), Hi32, ApertureReg); @@ -3303,8 +3265,6 @@ bool AMDGPULegalizerInfo::legalizeBufferStore(MachineInstr &MI, MachineIRBuilder &B, bool IsTyped, bool IsFormat) const { - B.setInstr(MI); - Register VData = MI.getOperand(1).getReg(); LLT Ty = MRI.getType(VData); LLT EltTy = Ty.getScalarType(); @@ -3395,8 +3355,6 @@ bool AMDGPULegalizerInfo::legalizeBufferLoad(MachineInstr &MI, MachineIRBuilder &B, bool IsFormat, bool IsTyped) const { - B.setInstr(MI); - // FIXME: Verifier should enforce 1 MMO for these intrinsics. MachineMemOperand *MMO = *MI.memoperands_begin(); const int MemSize = MMO->getSize(); @@ -3515,7 +3473,6 @@ bool AMDGPULegalizerInfo::legalizeBufferLoad(MachineInstr &MI, bool AMDGPULegalizerInfo::legalizeAtomicIncDec(MachineInstr &MI, MachineIRBuilder &B, bool IsInc) const { - B.setInstr(MI); unsigned Opc = IsInc ? AMDGPU::G_AMDGPU_ATOMIC_INC : AMDGPU::G_AMDGPU_ATOMIC_DEC; B.buildInstr(Opc) @@ -3576,8 +3533,6 @@ static unsigned getBufferAtomicPseudo(Intrinsic::ID IntrID) { bool AMDGPULegalizerInfo::legalizeBufferAtomic(MachineInstr &MI, MachineIRBuilder &B, Intrinsic::ID IID) const { - B.setInstr(MI); - const bool IsCmpSwap = IID == Intrinsic::amdgcn_raw_buffer_atomic_cmpswap || IID == Intrinsic::amdgcn_struct_buffer_atomic_cmpswap; @@ -3733,7 +3688,6 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic( MachineInstr &MI, MachineIRBuilder &B, GISelChangeObserver &Observer, const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr) const { - B.setInstr(MI); const int NumDefs = MI.getNumExplicitDefs(); bool IsTFE = NumDefs == 2; @@ -3913,8 +3867,6 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic( if (!Ty.isVector() || Ty.getElementType() != S16) return true; - B.setInstr(MI); - Register RepackedReg = handleD16VData(B, *MRI, VData); if (RepackedReg != VData) { MI.getOperand(1).setReg(RepackedReg); @@ -4118,7 +4070,6 @@ bool AMDGPULegalizerInfo::legalizeSBufferLoad( // out this needs to be converted to a vector load during RegBankSelect. if (!isPowerOf2_32(Size)) { LegalizerHelper Helper(MF, *this, Observer, B); - B.setInstr(MI); if (Ty.isVector()) Helper.moreElementsVectorDst(MI, getPow2VectorType(Ty), 0); @@ -4133,8 +4084,6 @@ bool AMDGPULegalizerInfo::legalizeSBufferLoad( bool AMDGPULegalizerInfo::legalizeTrapIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { - B.setInstr(MI); - // Is non-HSA path or trap-handler disabled? then, insert s_endpgm instruction if (ST.getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbiHsa || !ST.isTrapHandlerEnabled()) { @@ -4165,8 +4114,6 @@ bool AMDGPULegalizerInfo::legalizeTrapIntrinsic(MachineInstr &MI, bool AMDGPULegalizerInfo::legalizeDebugTrapIntrinsic( MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { - B.setInstr(MI); - // Is non-HSA path or trap-handler disabled? then, report a warning // accordingly if (ST.getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbiHsa || @@ -4201,7 +4148,6 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI, const SIRegisterInfo *TRI = static_cast(MRI.getTargetRegisterInfo()); - B.setInstr(*BrCond); Register Def = MI.getOperand(1).getReg(); Register Use = MI.getOperand(3).getReg(); @@ -4244,8 +4190,6 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI, const SIRegisterInfo *TRI = static_cast(MRI.getTargetRegisterInfo()); - B.setInstr(*BrCond); - MachineBasicBlock *CondBrTarget = BrCond->getOperand(1).getMBB(); Register Reg = MI.getOperand(2).getReg(); B.buildInstr(AMDGPU::SI_LOOP) @@ -4267,7 +4211,6 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI, } case Intrinsic::amdgcn_kernarg_segment_ptr: if (!AMDGPU::isKernel(B.getMF().getFunction().getCallingConv())) { - B.setInstr(MI); // This only makes sense to call in a kernel, so just lower to null. B.buildConstant(MI.getOperand(0).getReg(), 0); MI.eraseFromParent(); @@ -4315,7 +4258,6 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI, case Intrinsic::amdgcn_is_private: return legalizeIsAddrSpace(MI, MRI, B, AMDGPUAS::PRIVATE_ADDRESS); case Intrinsic::amdgcn_wavefrontsize: { - B.setInstr(MI); B.buildConstant(MI.getOperand(0), ST.getWavefrontSize()); MI.eraseFromParent(); return true; diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index bec5e0ea082930..040c0ead66db01 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -2209,7 +2209,8 @@ void AMDGPURegisterBankInfo::applyMappingImpl( break; const LLT S32 = LLT::scalar(32); - MachineFunction *MF = MI.getParent()->getParent(); + MachineBasicBlock *MBB = MI.getParent(); + MachineFunction *MF = MBB->getParent(); MachineIRBuilder B(MI); ApplyRegBankMapping ApplySALU(*this, MRI, &AMDGPU::SGPRRegBank); GISelObserverWrapper Observer(&ApplySALU); @@ -2234,9 +2235,10 @@ void AMDGPURegisterBankInfo::applyMappingImpl( if (Helper.widenScalar(MI, 0, S32) != LegalizerHelper::Legalized) llvm_unreachable("widen scalar should have succeeded"); - // FIXME: s16 shift amounts should be lgeal. + // FIXME: s16 shift amounts should be legal. if (Opc == AMDGPU::G_SHL || Opc == AMDGPU::G_LSHR || Opc == AMDGPU::G_ASHR) { + B.setInsertPt(*MBB, MI.getIterator()); if (Helper.widenScalar(MI, 1, S32) != LegalizerHelper::Legalized) llvm_unreachable("widen scalar should have succeeded"); } diff --git a/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp b/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp index 6388e8d2d65771..0afc152744136e 100644 --- a/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp +++ b/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp @@ -333,7 +333,6 @@ bool MipsLegalizerInfo::legalizeCustom(MachineInstr &MI, using namespace TargetOpcode; - MIRBuilder.setInstr(MI); const LLT s32 = LLT::scalar(32); const LLT s64 = LLT::scalar(64); @@ -507,7 +506,6 @@ bool MipsLegalizerInfo::legalizeIntrinsic(MachineInstr &MI, const MipsInstrInfo &TII = *ST.getInstrInfo(); const MipsRegisterInfo &TRI = *ST.getRegisterInfo(); const RegisterBankInfo &RBI = *ST.getRegBankInfo(); - MIRBuilder.setInstr(MI); switch (MI.getIntrinsicID()) { case Intrinsic::memcpy: diff --git a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp index 93f4f703d239b7..2cfab39d456228 100644 --- a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp +++ b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp @@ -176,6 +176,8 @@ TEST_F(AArch64GISelMITest, LowerBitCountingCTTZ2) { AInfo Info(MF->getSubtarget()); DummyGISelObserver Observer; LegalizerHelper Helper(*MF, Info, Observer, B); + + B.setInsertPt(*EntryMBB, MIBCTTZ->getIterator()); EXPECT_TRUE(Helper.lower(*MIBCTTZ, 0, LLT::scalar(64)) == LegalizerHelper::LegalizeResult::Legalized); @@ -2583,6 +2585,7 @@ TEST_F(AArch64GISelMITest, BitcastLoad) { AInfo Info(MF->getSubtarget()); DummyGISelObserver Observer; + B.setInsertPt(*EntryMBB, Load->getIterator()); LegalizerHelper Helper(*MF, Info, Observer, B); EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, Helper.bitcast(*Load, 0, S32)); @@ -2618,6 +2621,7 @@ TEST_F(AArch64GISelMITest, BitcastStore) { AInfo Info(MF->getSubtarget()); DummyGISelObserver Observer; LegalizerHelper Helper(*MF, Info, Observer, B); + B.setInsertPt(*EntryMBB, Store->getIterator()); EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, Helper.bitcast(*Store, 0, S32)); @@ -2651,6 +2655,7 @@ TEST_F(AArch64GISelMITest, BitcastSelect) { AInfo Info(MF->getSubtarget()); DummyGISelObserver Observer; LegalizerHelper Helper(*MF, Info, Observer, B); + B.setInsertPt(*EntryMBB, Select->getIterator()); EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, Helper.bitcast(*Select, 0, S32)); @@ -2669,6 +2674,8 @@ TEST_F(AArch64GISelMITest, BitcastSelect) { // Doesn't make sense auto VCond = B.buildUndef(LLT::vector(4, 1)); auto VSelect = B.buildSelect(V4S8, VCond, Val0, Val1); + + B.setInsertPt(*EntryMBB, VSelect->getIterator()); EXPECT_EQ(LegalizerHelper::LegalizeResult::UnableToLegalize, Helper.bitcast(*VSelect, 0, S32)); EXPECT_EQ(LegalizerHelper::LegalizeResult::UnableToLegalize, @@ -2694,10 +2701,15 @@ TEST_F(AArch64GISelMITest, BitcastBitOps) { AInfo Info(MF->getSubtarget()); DummyGISelObserver Observer; LegalizerHelper Helper(*MF, Info, Observer, B); + B.setInsertPt(*EntryMBB, And->getIterator()); EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, Helper.bitcast(*And, 0, S32)); + + B.setInsertPt(*EntryMBB, Or->getIterator()); EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, Helper.bitcast(*Or, 0, S32)); + + B.setInsertPt(*EntryMBB, Xor->getIterator()); EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, Helper.bitcast(*Xor, 0, S32)); @@ -2773,12 +2785,20 @@ TEST_F(AArch64GISelMITest, NarrowImplicitDef) { LegalizerHelper Helper(*MF, Info, Observer, B); // Perform Legalization + + B.setInsertPt(*EntryMBB, Implicit1->getIterator()); EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, Helper.narrowScalar(*Implicit1, 0, S48)); + + B.setInsertPt(*EntryMBB, Implicit2->getIterator()); EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, Helper.narrowScalar(*Implicit2, 0, S32)); + + B.setInsertPt(*EntryMBB, Implicit3->getIterator()); EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, Helper.narrowScalar(*Implicit3, 0, S48)); + + B.setInsertPt(*EntryMBB, Implicit4->getIterator()); EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, Helper.narrowScalar(*Implicit4, 0, S32)); @@ -2828,8 +2848,12 @@ TEST_F(AArch64GISelMITest, WidenFreeze) { LegalizerHelper Helper(*MF, Info, Observer, B); // Perform Legalization + + B.setInsertPt(*EntryMBB, FreezeScalar->getIterator()); EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, Helper.widenScalar(*FreezeScalar, 0, S128)); + + B.setInsertPt(*EntryMBB, FreezeVector->getIterator()); EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, Helper.widenScalar(*FreezeVector, 0, V2S64)); @@ -2879,12 +2903,20 @@ TEST_F(AArch64GISelMITest, NarrowFreeze) { LegalizerHelper Helper(*MF, Info, Observer, B); // Perform Legalization + + B.setInsertPt(*EntryMBB, FreezeScalar->getIterator()); EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, Helper.narrowScalar(*FreezeScalar, 0, S32)); + + B.setInsertPt(*EntryMBB, FreezeOdd->getIterator()); EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, Helper.narrowScalar(*FreezeOdd, 0, S32)); + + B.setInsertPt(*EntryMBB, FreezeVector->getIterator()); EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, Helper.narrowScalar(*FreezeVector, 0, V2S16)); + + B.setInsertPt(*EntryMBB, FreezeVector1->getIterator()); EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, Helper.narrowScalar(*FreezeVector1, 0, S16)); @@ -2954,8 +2986,12 @@ TEST_F(AArch64GISelMITest, FewerElementsFreeze) { LegalizerHelper Helper(*MF, Info, Observer, B); // Perform Legalization + + B.setInsertPt(*EntryMBB, FreezeVector1->getIterator()); EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, Helper.fewerElementsVector(*FreezeVector1, 0, S32)); + + B.setInsertPt(*EntryMBB, FreezeVector2->getIterator()); EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, Helper.fewerElementsVector(*FreezeVector2, 0, V2S16)); @@ -2998,6 +3034,7 @@ TEST_F(AArch64GISelMITest, MoreElementsFreeze) { LegalizerHelper Helper(*MF, Info, Observer, B); // Perform Legalization + B.setInsertPt(*EntryMBB, FreezeVector1->getIterator()); EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, Helper.moreElementsVector(*FreezeVector1, 0, V4S32)); From 90ee8cf63613df525c4dab5e715fbae5f7310a78 Mon Sep 17 00:00:00 2001 From: Erich Keane Date: Tue, 9 Jun 2020 12:40:37 -0700 Subject: [PATCH 22/25] Undo change inadvertently added in 113b0d7d --- clang/include/clang/Basic/DiagnosticSemaKinds.td | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index e1adf199a12b93..84bcf66a148e5c 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -5713,9 +5713,8 @@ def ext_flexible_array_union_gnu : Extension< def err_flexible_array_not_at_end : Error< "flexible array member %0 with type %1 is not at the end of" " %select{struct|interface|union|class|enum}2">; -def err_objc_variable_sized_type_not_at_end - : Error<"%select{field %1|unnamed field}0 with variable sized type %2 is " - "not at the end of class">; +def err_objc_variable_sized_type_not_at_end : Error< + "field %0 with variable sized type %1 is not at the end of class">; def note_next_field_declaration : Note< "next field declaration is here">; def note_next_ivar_declaration : Note< From 6f6d2d238360883039cd17986c9ef598d04995a3 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 9 Jun 2020 15:43:34 -0400 Subject: [PATCH 23/25] [x86] refine conditions for immediate hoisting to save code-size As shown in PR46237: https://bugs.llvm.org/show_bug.cgi?id=46237 The size-savings win for hoisting an 8-bit ALU immediate (intentionally excluding store constants) requires extreme conditions; it may not even be possible when including REX prefix bytes on x86-64. I did draft a version of this patch that included use counts after the loop, but I suspect that accounting is not working as expected. I think that is because the number of constant uses are changing as we select instructions (for example as we transform shl/add into LEA). Differential Revision: https://reviews.llvm.org/D81468 --- llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 7 +++--- llvm/test/CodeGen/X86/immediate_merging.ll | 26 +++++++++----------- llvm/test/CodeGen/X86/immediate_merging64.ll | 13 +++++----- llvm/test/CodeGen/X86/pr27202.ll | 15 +++++------ 4 files changed, 30 insertions(+), 31 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 2171be293914e5..fadcb173cd4b9a 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -364,9 +364,10 @@ namespace { if (User->getNumOperands() != 2) continue; - // If this can match to INC/DEC, don't count it as a use. - if (User->getOpcode() == ISD::ADD && - (isOneConstant(SDValue(N, 0)) || isAllOnesConstant(SDValue(N, 0)))) + // If this is a sign-extended 8-bit integer immediate used in an ALU + // instruction, there is probably an opcode encoding to save space. + auto *C = dyn_cast(N); + if (C && isInt<8>(C->getSExtValue())) continue; // Immediates that are used for offsets as part of stack diff --git a/llvm/test/CodeGen/X86/immediate_merging.ll b/llvm/test/CodeGen/X86/immediate_merging.ll index 1bed1014f94e39..038c56f6dd5dd5 100644 --- a/llvm/test/CodeGen/X86/immediate_merging.ll +++ b/llvm/test/CodeGen/X86/immediate_merging.ll @@ -12,16 +12,16 @@ @i = common global i32 0, align 4 ; Test -Os to make sure immediates with multiple users don't get pulled in to -; instructions. +; instructions (8-bit immediates are exceptions). + define i32 @foo() optsize { ; X86-LABEL: foo: ; X86: # %bb.0: # %entry ; X86-NEXT: movl $1234, %eax # imm = 0x4D2 ; X86-NEXT: movl %eax, a ; X86-NEXT: movl %eax, b -; X86-NEXT: movl $12, %eax -; X86-NEXT: movl %eax, c -; X86-NEXT: cmpl %eax, e +; X86-NEXT: movl $12, c +; X86-NEXT: cmpl $12, e ; X86-NEXT: jne .LBB0_2 ; X86-NEXT: # %bb.1: # %if.then ; X86-NEXT: movl $1, x @@ -38,9 +38,8 @@ define i32 @foo() optsize { ; X64-NEXT: movl $1234, %eax # imm = 0x4D2 ; X64-NEXT: movl %eax, {{.*}}(%rip) ; X64-NEXT: movl %eax, {{.*}}(%rip) -; X64-NEXT: movl $12, %eax -; X64-NEXT: movl %eax, {{.*}}(%rip) -; X64-NEXT: cmpl %eax, {{.*}}(%rip) +; X64-NEXT: movl $12, {{.*}}(%rip) +; X64-NEXT: cmpl $12, {{.*}}(%rip) ; X64-NEXT: jne .LBB0_2 ; X64-NEXT: # %bb.1: # %if.then ; X64-NEXT: movl $1, {{.*}}(%rip) @@ -74,16 +73,16 @@ if.end: ; preds = %if.then, %entry } ; Test PGSO to make sure immediates with multiple users don't get pulled in to -; instructions. +; instructions (8-bit immediates are exceptions). + define i32 @foo_pgso() !prof !14 { ; X86-LABEL: foo_pgso: ; X86: # %bb.0: # %entry ; X86-NEXT: movl $1234, %eax # imm = 0x4D2 ; X86-NEXT: movl %eax, a ; X86-NEXT: movl %eax, b -; X86-NEXT: movl $12, %eax -; X86-NEXT: movl %eax, c -; X86-NEXT: cmpl %eax, e +; X86-NEXT: movl $12, c +; X86-NEXT: cmpl $12, e ; X86-NEXT: jne .LBB1_2 ; X86-NEXT: # %bb.1: # %if.then ; X86-NEXT: movl $1, x @@ -100,9 +99,8 @@ define i32 @foo_pgso() !prof !14 { ; X64-NEXT: movl $1234, %eax # imm = 0x4D2 ; X64-NEXT: movl %eax, {{.*}}(%rip) ; X64-NEXT: movl %eax, {{.*}}(%rip) -; X64-NEXT: movl $12, %eax -; X64-NEXT: movl %eax, {{.*}}(%rip) -; X64-NEXT: cmpl %eax, {{.*}}(%rip) +; X64-NEXT: movl $12, {{.*}}(%rip) +; X64-NEXT: cmpl $12, {{.*}}(%rip) ; X64-NEXT: jne .LBB1_2 ; X64-NEXT: # %bb.1: # %if.then ; X64-NEXT: movl $1, {{.*}}(%rip) diff --git a/llvm/test/CodeGen/X86/immediate_merging64.ll b/llvm/test/CodeGen/X86/immediate_merging64.ll index a807a119e89353..d355bea1603a5e 100644 --- a/llvm/test/CodeGen/X86/immediate_merging64.ll +++ b/llvm/test/CodeGen/X86/immediate_merging64.ll @@ -5,13 +5,13 @@ ; 32-bit immediates are merged for code size savings. ; Immediates with multiple users should not be pulled into instructions when -; optimizing for code size. +; optimizing for code size (but 8-bit immediates are exceptions). + define i1 @imm_multiple_users(i64 %a, i64* %b) optsize { ; CHECK-LABEL: imm_multiple_users: ; CHECK: # %bb.0: -; CHECK-NEXT: movq $-1, %rax -; CHECK-NEXT: movq %rax, (%rsi) -; CHECK-NEXT: cmpq %rax, %rdi +; CHECK-NEXT: movq $-1, (%rsi) +; CHECK-NEXT: cmpq $-1, %rdi ; CHECK-NEXT: sete %al ; CHECK-NEXT: retq store i64 -1, i64* %b, align 8 @@ -22,9 +22,8 @@ define i1 @imm_multiple_users(i64 %a, i64* %b) optsize { define i1 @imm_multiple_users_pgso(i64 %a, i64* %b) !prof !14 { ; CHECK-LABEL: imm_multiple_users_pgso: ; CHECK: # %bb.0: -; CHECK-NEXT: movq $-1, %rax -; CHECK-NEXT: movq %rax, (%rsi) -; CHECK-NEXT: cmpq %rax, %rdi +; CHECK-NEXT: movq $-1, (%rsi) +; CHECK-NEXT: cmpq $-1, %rdi ; CHECK-NEXT: sete %al ; CHECK-NEXT: retq store i64 -1, i64* %b, align 8 diff --git a/llvm/test/CodeGen/X86/pr27202.ll b/llvm/test/CodeGen/X86/pr27202.ll index ea5781ed8c5fcd..bb6be1d1685da5 100644 --- a/llvm/test/CodeGen/X86/pr27202.ll +++ b/llvm/test/CodeGen/X86/pr27202.ll @@ -14,12 +14,14 @@ define i1 @foo(i32 %i) optsize { ret i1 %cmp } +; 8-bit ALU immediates probably have small encodings. +; We do not want to hoist the constant into a register here. + define zeroext i1 @g(i32 %x) optsize { ; CHECK-LABEL: g: ; CHECK: # %bb.0: -; CHECK-NEXT: movl $1, %eax -; CHECK-NEXT: orl %eax, %edi -; CHECK-NEXT: cmpl %eax, %edi +; CHECK-NEXT: orl $1, %edi +; CHECK-NEXT: cmpl $1, %edi ; CHECK-NEXT: sete %al ; CHECK-NEXT: retq %t0 = or i32 %x, 1 @@ -27,7 +29,7 @@ define zeroext i1 @g(i32 %x) optsize { ret i1 %t1 } -; 8-bit immediates probably have small encodings. +; 8-bit ALU immediates probably have small encodings. ; We do not want to hoist the constant into a register here. define i64 @PR46237(i64 %x, i64 %y, i64 %z) optsize { @@ -36,9 +38,8 @@ define i64 @PR46237(i64 %x, i64 %y, i64 %z) optsize { ; CHECK-NEXT: movl %edx, %eax ; CHECK-NEXT: shll $6, %eax ; CHECK-NEXT: movzbl %al, %ecx -; CHECK-NEXT: movl $7, %eax -; CHECK-NEXT: andq %rax, %rsi -; CHECK-NEXT: andq %rax, %rdx +; CHECK-NEXT: andl $7, %esi +; CHECK-NEXT: andl $7, %edx ; CHECK-NEXT: leaq (%rdx,%rsi,8), %rax ; CHECK-NEXT: orq %rcx, %rax ; CHECK-NEXT: retq From 44b355f34b8b0c705909da94fdcdacbe3b00900a Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 9 Jun 2020 15:46:31 -0400 Subject: [PATCH 24/25] AMDGPU/GlobalISel: Add new baseline tests for bitcast legalization --- .../AMDGPU/GlobalISel/legalize-bitcast.mir | 1038 ++++++++++++++++- 1 file changed, 1037 insertions(+), 1 deletion(-) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir index 4cff1a1d1a2f61..98183b01ce364d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck %s --- name: test_bitcast_s32_to_v2s16 @@ -283,6 +283,36 @@ body: | $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1 ... +--- +name: test_bitcast_v32s32_to_v16s64 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + + ; CHECK-LABEL: name: test_bitcast_v32s32_to_v16s64 + ; CHECK: [[COPY:%[0-9]+]]:_(<32 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; CHECK: [[BITCAST:%[0-9]+]]:_(<16 x s64>) = G_BITCAST [[COPY]](<32 x s32>) + ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<16 x s64>) + %0:_(<32 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + %1:_(<16 x s64>) = G_BITCAST %0 + S_ENDPGM 0, implicit %1 +... + +--- +name: test_bitcast_v16s64_to_v32s32 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + + ; CHECK-LABEL: name: test_bitcast_v16s64_to_v32s32 + ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; CHECK: [[BITCAST:%[0-9]+]]:_(<32 x s32>) = G_BITCAST [[COPY]](<16 x s64>) + ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<32 x s32>) + %0:_(<16 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + %1:_(<32 x s32>) = G_BITCAST %0 + S_ENDPGM 0, implicit %1 +... + --- name: test_bitcast_s24_to_v3s8 body: | @@ -481,3 +511,1009 @@ body: | %3:_(s32) = G_ANYEXT %2 $vgpr0 = COPY %3 ... + +--- + +name: test_bitcast_v2s16_to_v4s8 +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_bitcast_v2s16_to_v4s8 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK: [[BITCAST:%[0-9]+]]:_(<4 x s8>) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST]](<4 x s8>) + ; CHECK: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST]](<4 x s8>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV4]](s8) + ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT]], [[ANYEXT1]] + ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ADD]](s16) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV5]](s8) + ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT2]], [[ANYEXT3]] + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[ADD1]](s16) + ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) + ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s16) = G_ANYEXT [[UV6]](s8) + ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT4]], [[ANYEXT5]] + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[ADD2]](s16) + ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) + ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s16) = G_ANYEXT [[UV7]](s8) + ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT6]], [[ANYEXT7]] + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[ADD3]](s16) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8) + ; CHECK: [[ANYEXT8:%[0-9]+]]:_(<4 x s32>) = G_ANYEXT [[BUILD_VECTOR]](<4 x s8>) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[ANYEXT8]](<4 x s32>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<4 x s8>) = G_BITCAST %0 + %2:_(<4 x s8>) = G_ADD %1, %1 + %3:_(<4 x s32>) = G_ANYEXT %2 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 +... + +--- +name: test_bitcast_v4s8_to_v2s16 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + + ; CHECK-LABEL: name: test_bitcast_v4s8_to_v2s16 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[COPY]](<4 x s32>) + ; CHECK: [[ADD:%[0-9]+]]:_(<4 x s8>) = G_ADD [[TRUNC]], [[TRUNC]] + ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[ADD]](<4 x s8>) + ; CHECK: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<4 x s8>) = G_TRUNC %0 + %2:_(<4 x s8>) = G_ADD %1, %1 + %3:_(<2 x s16>) = G_BITCAST %2 + $vgpr0 = COPY %3 +... + +--- +name: test_bitcast_v2s16_to_v8s4 +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_bitcast_v2s16_to_v8s4 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK: [[BITCAST:%[0-9]+]]:_(<8 x s4>) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK: [[UV:%[0-9]+]]:_(s4), [[UV1:%[0-9]+]]:_(s4), [[UV2:%[0-9]+]]:_(s4), [[UV3:%[0-9]+]]:_(s4), [[UV4:%[0-9]+]]:_(s4), [[UV5:%[0-9]+]]:_(s4), [[UV6:%[0-9]+]]:_(s4), [[UV7:%[0-9]+]]:_(s4) = G_UNMERGE_VALUES [[BITCAST]](<8 x s4>) + ; CHECK: [[UV8:%[0-9]+]]:_(s4), [[UV9:%[0-9]+]]:_(s4), [[UV10:%[0-9]+]]:_(s4), [[UV11:%[0-9]+]]:_(s4), [[UV12:%[0-9]+]]:_(s4), [[UV13:%[0-9]+]]:_(s4), [[UV14:%[0-9]+]]:_(s4), [[UV15:%[0-9]+]]:_(s4) = G_UNMERGE_VALUES [[BITCAST]](<8 x s4>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s4) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV8]](s4) + ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT]], [[ANYEXT1]] + ; CHECK: [[TRUNC:%[0-9]+]]:_(s4) = G_TRUNC [[ADD]](s16) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s4) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV9]](s4) + ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT2]], [[ANYEXT3]] + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s4) = G_TRUNC [[ADD1]](s16) + ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s4) + ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s16) = G_ANYEXT [[UV10]](s4) + ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT4]], [[ANYEXT5]] + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s4) = G_TRUNC [[ADD2]](s16) + ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s4) + ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s16) = G_ANYEXT [[UV11]](s4) + ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT6]], [[ANYEXT7]] + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s4) = G_TRUNC [[ADD3]](s16) + ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s16) = G_ANYEXT [[UV4]](s4) + ; CHECK: [[ANYEXT9:%[0-9]+]]:_(s16) = G_ANYEXT [[UV12]](s4) + ; CHECK: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT8]], [[ANYEXT9]] + ; CHECK: [[TRUNC4:%[0-9]+]]:_(s4) = G_TRUNC [[ADD4]](s16) + ; CHECK: [[ANYEXT10:%[0-9]+]]:_(s16) = G_ANYEXT [[UV5]](s4) + ; CHECK: [[ANYEXT11:%[0-9]+]]:_(s16) = G_ANYEXT [[UV13]](s4) + ; CHECK: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT10]], [[ANYEXT11]] + ; CHECK: [[TRUNC5:%[0-9]+]]:_(s4) = G_TRUNC [[ADD5]](s16) + ; CHECK: [[ANYEXT12:%[0-9]+]]:_(s16) = G_ANYEXT [[UV6]](s4) + ; CHECK: [[ANYEXT13:%[0-9]+]]:_(s16) = G_ANYEXT [[UV14]](s4) + ; CHECK: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT12]], [[ANYEXT13]] + ; CHECK: [[TRUNC6:%[0-9]+]]:_(s4) = G_TRUNC [[ADD6]](s16) + ; CHECK: [[ANYEXT14:%[0-9]+]]:_(s16) = G_ANYEXT [[UV7]](s4) + ; CHECK: [[ANYEXT15:%[0-9]+]]:_(s16) = G_ANYEXT [[UV15]](s4) + ; CHECK: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT14]], [[ANYEXT15]] + ; CHECK: [[TRUNC7:%[0-9]+]]:_(s4) = G_TRUNC [[ADD7]](s16) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s4>) = G_BUILD_VECTOR [[TRUNC]](s4), [[TRUNC1]](s4), [[TRUNC2]](s4), [[TRUNC3]](s4), [[TRUNC4]](s4), [[TRUNC5]](s4), [[TRUNC6]](s4), [[TRUNC7]](s4) + ; CHECK: [[ANYEXT16:%[0-9]+]]:_(<8 x s32>) = G_ANYEXT [[BUILD_VECTOR]](<8 x s4>) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[ANYEXT16]](<8 x s32>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<8 x s4>) = G_BITCAST %0 + %2:_(<8 x s4>) = G_ADD %1, %1 + %3:_(<8 x s32>) = G_ANYEXT %2 + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3 +... + +--- +name: test_bitcast_v8s4_to_v2s16 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + + ; CHECK-LABEL: name: test_bitcast_v8s4_to_v2s16 + ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK: [[TRUNC:%[0-9]+]]:_(<8 x s4>) = G_TRUNC [[COPY]](<8 x s32>) + ; CHECK: [[ADD:%[0-9]+]]:_(<8 x s4>) = G_ADD [[TRUNC]], [[TRUNC]] + ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[ADD]](<8 x s4>) + ; CHECK: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + %0:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:_(<8 x s4>) = G_TRUNC %0 + %2:_(<8 x s4>) = G_ADD %1, %1 + %3:_(<2 x s16>) = G_BITCAST %2 + $vgpr0 = COPY %3 +... + +--- +name: test_bitcast_v4s16_to_v2s32 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_bitcast_v4s16_to_v2s32 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 + ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) + ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<2 x s32>) + %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 + %1:_(<2 x s32>) = G_BITCAST %0 + S_ENDPGM 0, implicit %1 +... + +--- +name: test_bitcast_v2s32_to_v4s16 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_bitcast_v2s32_to_v4s16 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK: [[BITCAST:%[0-9]+]]:_(<4 x s16>) = G_BITCAST [[COPY]](<2 x s32>) + ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x s16>) + ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; CHECK: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x s16>) + ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; CHECK: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC4]] + ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC5]] + ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC6]] + ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[TRUNC7]] + ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) + ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; CHECK: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ADD2]](s16) + ; CHECK: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[ADD3]](s16) + ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; CHECK: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) + ; CHECK: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<4 x s16>) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(<4 x s16>) = G_BITCAST %0 + %2:_(<4 x s16>) = G_ADD %1, %1 + S_ENDPGM 0, implicit %2 + +... + +--- +name: test_bitcast_v2s32_to_v8s8 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_bitcast_v2s32_to_v8s8 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[COPY]](<2 x s32>) + ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST]](<8 x s8>) + ; CHECK: [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8), [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST]](<8 x s8>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV8]](s8) + ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT]], [[ANYEXT1]] + ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ADD]](s16) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV9]](s8) + ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT2]], [[ANYEXT3]] + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[ADD1]](s16) + ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) + ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s16) = G_ANYEXT [[UV10]](s8) + ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT4]], [[ANYEXT5]] + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[ADD2]](s16) + ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) + ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s16) = G_ANYEXT [[UV11]](s8) + ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT6]], [[ANYEXT7]] + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[ADD3]](s16) + ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s16) = G_ANYEXT [[UV4]](s8) + ; CHECK: [[ANYEXT9:%[0-9]+]]:_(s16) = G_ANYEXT [[UV12]](s8) + ; CHECK: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT8]], [[ANYEXT9]] + ; CHECK: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[ADD4]](s16) + ; CHECK: [[ANYEXT10:%[0-9]+]]:_(s16) = G_ANYEXT [[UV5]](s8) + ; CHECK: [[ANYEXT11:%[0-9]+]]:_(s16) = G_ANYEXT [[UV13]](s8) + ; CHECK: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT10]], [[ANYEXT11]] + ; CHECK: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[ADD5]](s16) + ; CHECK: [[ANYEXT12:%[0-9]+]]:_(s16) = G_ANYEXT [[UV6]](s8) + ; CHECK: [[ANYEXT13:%[0-9]+]]:_(s16) = G_ANYEXT [[UV14]](s8) + ; CHECK: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT12]], [[ANYEXT13]] + ; CHECK: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[ADD6]](s16) + ; CHECK: [[ANYEXT14:%[0-9]+]]:_(s16) = G_ANYEXT [[UV7]](s8) + ; CHECK: [[ANYEXT15:%[0-9]+]]:_(s16) = G_ANYEXT [[UV15]](s8) + ; CHECK: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT14]], [[ANYEXT15]] + ; CHECK: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[ADD7]](s16) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8), [[TRUNC4]](s8), [[TRUNC5]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8) + ; CHECK: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<8 x s8>) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(<8 x s8>) = G_BITCAST %0 + %2:_(<8 x s8>) = G_ADD %1, %1 + S_ENDPGM 0, implicit %2 + +... + +--- +name: test_bitcast_v8s8_to_v2s32 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + + ; CHECK-LABEL: name: test_bitcast_v8s8_to_v2s32 + ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK: [[TRUNC:%[0-9]+]]:_(<8 x s8>) = G_TRUNC [[COPY]](<8 x s32>) + ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[TRUNC]](<8 x s8>) + ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<2 x s32>) + %0:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:_(<8 x s8>) = G_TRUNC %0 + %2:_(<2 x s32>) = G_BITCAST %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: test_bitcast_v8s8_to_s64 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + + ; CHECK-LABEL: name: test_bitcast_v8s8_to_s64 + ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) + ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] + ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; CHECK: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) + ; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) + ; CHECK: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) + ; CHECK: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]] + ; CHECK: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C1]](s16) + ; CHECK: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] + ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) + ; CHECK: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C]] + ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) + ; CHECK: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C]] + ; CHECK: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C1]](s16) + ; CHECK: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32) + ; CHECK: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C]] + ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[UV7]](s32) + ; CHECK: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C]] + ; CHECK: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C1]](s16) + ; CHECK: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] + ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) + ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CHECK: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C2]](s32) + ; CHECK: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; CHECK: S_ENDPGM 0, implicit [[MV]](s64) + %0:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:_(<8 x s8>) = G_TRUNC %0 + %2:_(s64) = G_BITCAST %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: test_bitcast_v2s32_to_v16s4 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_bitcast_v2s32_to_v16s4 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK: [[BITCAST:%[0-9]+]]:_(<16 x s4>) = G_BITCAST [[COPY]](<2 x s32>) + ; CHECK: [[UV:%[0-9]+]]:_(s4), [[UV1:%[0-9]+]]:_(s4), [[UV2:%[0-9]+]]:_(s4), [[UV3:%[0-9]+]]:_(s4), [[UV4:%[0-9]+]]:_(s4), [[UV5:%[0-9]+]]:_(s4), [[UV6:%[0-9]+]]:_(s4), [[UV7:%[0-9]+]]:_(s4), [[UV8:%[0-9]+]]:_(s4), [[UV9:%[0-9]+]]:_(s4), [[UV10:%[0-9]+]]:_(s4), [[UV11:%[0-9]+]]:_(s4), [[UV12:%[0-9]+]]:_(s4), [[UV13:%[0-9]+]]:_(s4), [[UV14:%[0-9]+]]:_(s4), [[UV15:%[0-9]+]]:_(s4) = G_UNMERGE_VALUES [[BITCAST]](<16 x s4>) + ; CHECK: [[UV16:%[0-9]+]]:_(s4), [[UV17:%[0-9]+]]:_(s4), [[UV18:%[0-9]+]]:_(s4), [[UV19:%[0-9]+]]:_(s4), [[UV20:%[0-9]+]]:_(s4), [[UV21:%[0-9]+]]:_(s4), [[UV22:%[0-9]+]]:_(s4), [[UV23:%[0-9]+]]:_(s4), [[UV24:%[0-9]+]]:_(s4), [[UV25:%[0-9]+]]:_(s4), [[UV26:%[0-9]+]]:_(s4), [[UV27:%[0-9]+]]:_(s4), [[UV28:%[0-9]+]]:_(s4), [[UV29:%[0-9]+]]:_(s4), [[UV30:%[0-9]+]]:_(s4), [[UV31:%[0-9]+]]:_(s4) = G_UNMERGE_VALUES [[BITCAST]](<16 x s4>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s4) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV16]](s4) + ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT]], [[ANYEXT1]] + ; CHECK: [[TRUNC:%[0-9]+]]:_(s4) = G_TRUNC [[ADD]](s16) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s4) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV17]](s4) + ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT2]], [[ANYEXT3]] + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s4) = G_TRUNC [[ADD1]](s16) + ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s4) + ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s16) = G_ANYEXT [[UV18]](s4) + ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT4]], [[ANYEXT5]] + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s4) = G_TRUNC [[ADD2]](s16) + ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s4) + ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s16) = G_ANYEXT [[UV19]](s4) + ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT6]], [[ANYEXT7]] + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s4) = G_TRUNC [[ADD3]](s16) + ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s16) = G_ANYEXT [[UV4]](s4) + ; CHECK: [[ANYEXT9:%[0-9]+]]:_(s16) = G_ANYEXT [[UV20]](s4) + ; CHECK: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT8]], [[ANYEXT9]] + ; CHECK: [[TRUNC4:%[0-9]+]]:_(s4) = G_TRUNC [[ADD4]](s16) + ; CHECK: [[ANYEXT10:%[0-9]+]]:_(s16) = G_ANYEXT [[UV5]](s4) + ; CHECK: [[ANYEXT11:%[0-9]+]]:_(s16) = G_ANYEXT [[UV21]](s4) + ; CHECK: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT10]], [[ANYEXT11]] + ; CHECK: [[TRUNC5:%[0-9]+]]:_(s4) = G_TRUNC [[ADD5]](s16) + ; CHECK: [[ANYEXT12:%[0-9]+]]:_(s16) = G_ANYEXT [[UV6]](s4) + ; CHECK: [[ANYEXT13:%[0-9]+]]:_(s16) = G_ANYEXT [[UV22]](s4) + ; CHECK: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT12]], [[ANYEXT13]] + ; CHECK: [[TRUNC6:%[0-9]+]]:_(s4) = G_TRUNC [[ADD6]](s16) + ; CHECK: [[ANYEXT14:%[0-9]+]]:_(s16) = G_ANYEXT [[UV7]](s4) + ; CHECK: [[ANYEXT15:%[0-9]+]]:_(s16) = G_ANYEXT [[UV23]](s4) + ; CHECK: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT14]], [[ANYEXT15]] + ; CHECK: [[TRUNC7:%[0-9]+]]:_(s4) = G_TRUNC [[ADD7]](s16) + ; CHECK: [[ANYEXT16:%[0-9]+]]:_(s16) = G_ANYEXT [[UV8]](s4) + ; CHECK: [[ANYEXT17:%[0-9]+]]:_(s16) = G_ANYEXT [[UV24]](s4) + ; CHECK: [[ADD8:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT16]], [[ANYEXT17]] + ; CHECK: [[TRUNC8:%[0-9]+]]:_(s4) = G_TRUNC [[ADD8]](s16) + ; CHECK: [[ANYEXT18:%[0-9]+]]:_(s16) = G_ANYEXT [[UV9]](s4) + ; CHECK: [[ANYEXT19:%[0-9]+]]:_(s16) = G_ANYEXT [[UV25]](s4) + ; CHECK: [[ADD9:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT18]], [[ANYEXT19]] + ; CHECK: [[TRUNC9:%[0-9]+]]:_(s4) = G_TRUNC [[ADD9]](s16) + ; CHECK: [[ANYEXT20:%[0-9]+]]:_(s16) = G_ANYEXT [[UV10]](s4) + ; CHECK: [[ANYEXT21:%[0-9]+]]:_(s16) = G_ANYEXT [[UV26]](s4) + ; CHECK: [[ADD10:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT20]], [[ANYEXT21]] + ; CHECK: [[TRUNC10:%[0-9]+]]:_(s4) = G_TRUNC [[ADD10]](s16) + ; CHECK: [[ANYEXT22:%[0-9]+]]:_(s16) = G_ANYEXT [[UV11]](s4) + ; CHECK: [[ANYEXT23:%[0-9]+]]:_(s16) = G_ANYEXT [[UV27]](s4) + ; CHECK: [[ADD11:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT22]], [[ANYEXT23]] + ; CHECK: [[TRUNC11:%[0-9]+]]:_(s4) = G_TRUNC [[ADD11]](s16) + ; CHECK: [[ANYEXT24:%[0-9]+]]:_(s16) = G_ANYEXT [[UV12]](s4) + ; CHECK: [[ANYEXT25:%[0-9]+]]:_(s16) = G_ANYEXT [[UV28]](s4) + ; CHECK: [[ADD12:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT24]], [[ANYEXT25]] + ; CHECK: [[TRUNC12:%[0-9]+]]:_(s4) = G_TRUNC [[ADD12]](s16) + ; CHECK: [[ANYEXT26:%[0-9]+]]:_(s16) = G_ANYEXT [[UV13]](s4) + ; CHECK: [[ANYEXT27:%[0-9]+]]:_(s16) = G_ANYEXT [[UV29]](s4) + ; CHECK: [[ADD13:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT26]], [[ANYEXT27]] + ; CHECK: [[TRUNC13:%[0-9]+]]:_(s4) = G_TRUNC [[ADD13]](s16) + ; CHECK: [[ANYEXT28:%[0-9]+]]:_(s16) = G_ANYEXT [[UV14]](s4) + ; CHECK: [[ANYEXT29:%[0-9]+]]:_(s16) = G_ANYEXT [[UV30]](s4) + ; CHECK: [[ADD14:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT28]], [[ANYEXT29]] + ; CHECK: [[TRUNC14:%[0-9]+]]:_(s4) = G_TRUNC [[ADD14]](s16) + ; CHECK: [[ANYEXT30:%[0-9]+]]:_(s16) = G_ANYEXT [[UV15]](s4) + ; CHECK: [[ANYEXT31:%[0-9]+]]:_(s16) = G_ANYEXT [[UV31]](s4) + ; CHECK: [[ADD15:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT30]], [[ANYEXT31]] + ; CHECK: [[TRUNC15:%[0-9]+]]:_(s4) = G_TRUNC [[ADD15]](s16) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s4>) = G_BUILD_VECTOR [[TRUNC]](s4), [[TRUNC1]](s4), [[TRUNC2]](s4), [[TRUNC3]](s4), [[TRUNC4]](s4), [[TRUNC5]](s4), [[TRUNC6]](s4), [[TRUNC7]](s4), [[TRUNC8]](s4), [[TRUNC9]](s4), [[TRUNC10]](s4), [[TRUNC11]](s4), [[TRUNC12]](s4), [[TRUNC13]](s4), [[TRUNC14]](s4), [[TRUNC15]](s4) + ; CHECK: [[ANYEXT32:%[0-9]+]]:_(<16 x s16>) = G_ANYEXT [[BUILD_VECTOR]](<16 x s4>) + ; CHECK: S_ENDPGM 0, implicit [[ANYEXT32]](<16 x s16>) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(<16 x s4>) = G_BITCAST %0 + %2:_(<16 x s4>) = G_ADD %1, %1 + %3:_(<16 x s16>) = G_ANYEXT %2 + S_ENDPGM 0, implicit %3 + +... + +--- +name: test_bitcast_v16s4_to_v2s32 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + + ; CHECK-LABEL: name: test_bitcast_v16s4_to_v2s32 + ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK: [[TRUNC:%[0-9]+]]:_(<16 x s4>) = G_TRUNC [[COPY]](<16 x s16>) + ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[TRUNC]](<16 x s4>) + ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<2 x s32>) + %0:_(<16 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + %1:_(<16 x s4>) = G_TRUNC %0 + %2:_(<2 x s32>) = G_BITCAST %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: test_bitcast_s64_to_v8s8 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_bitcast_s64_to_v8s8 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; CHECK: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C1]](s16) + ; CHECK: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16) + ; CHECK: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C1]](s16) + ; CHECK: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C1]](s16) + ; CHECK: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) + ; CHECK: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) + ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[COPY1]], [[COPY2]] + ; CHECK: [[COPY3:%[0-9]+]]:_(s16) = COPY [[LSHR2]](s16) + ; CHECK: [[COPY4:%[0-9]+]]:_(s16) = COPY [[LSHR2]](s16) + ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[COPY3]], [[COPY4]] + ; CHECK: [[COPY5:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) + ; CHECK: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) + ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[COPY5]], [[COPY6]] + ; CHECK: [[COPY7:%[0-9]+]]:_(s16) = COPY [[LSHR3]](s16) + ; CHECK: [[COPY8:%[0-9]+]]:_(s16) = COPY [[LSHR3]](s16) + ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[COPY7]], [[COPY8]] + ; CHECK: [[COPY9:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) + ; CHECK: [[COPY10:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) + ; CHECK: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[COPY9]], [[COPY10]] + ; CHECK: [[COPY11:%[0-9]+]]:_(s16) = COPY [[LSHR4]](s16) + ; CHECK: [[COPY12:%[0-9]+]]:_(s16) = COPY [[LSHR4]](s16) + ; CHECK: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[COPY11]], [[COPY12]] + ; CHECK: [[COPY13:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) + ; CHECK: [[COPY14:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) + ; CHECK: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[COPY13]], [[COPY14]] + ; CHECK: [[COPY15:%[0-9]+]]:_(s16) = COPY [[LSHR5]](s16) + ; CHECK: [[COPY16:%[0-9]+]]:_(s16) = COPY [[LSHR5]](s16) + ; CHECK: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[COPY15]], [[COPY16]] + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD3]](s16) + ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD4]](s16) + ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD5]](s16) + ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD6]](s16) + ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD7]](s16) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32), [[ANYEXT4]](s32), [[ANYEXT5]](s32), [[ANYEXT6]](s32), [[ANYEXT7]](s32) + ; CHECK: [[TRUNC4:%[0-9]+]]:_(<8 x s8>) = G_TRUNC [[BUILD_VECTOR]](<8 x s32>) + ; CHECK: S_ENDPGM 0, implicit [[TRUNC4]](<8 x s8>) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(<8 x s8>) = G_BITCAST %0 + %2:_(<8 x s8>) = G_ADD %1, %1 + S_ENDPGM 0, implicit %2 + +... + +--- +name: test_bitcast_v3s32_to_v12s8 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2 + + ; CHECK-LABEL: name: test_bitcast_v3s32_to_v12s8 + ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK: [[BITCAST:%[0-9]+]]:_(<12 x s8>) = G_BITCAST [[COPY]](<3 x s32>) + ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST]](<12 x s8>) + ; CHECK: [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8), [[UV16:%[0-9]+]]:_(s8), [[UV17:%[0-9]+]]:_(s8), [[UV18:%[0-9]+]]:_(s8), [[UV19:%[0-9]+]]:_(s8), [[UV20:%[0-9]+]]:_(s8), [[UV21:%[0-9]+]]:_(s8), [[UV22:%[0-9]+]]:_(s8), [[UV23:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST]](<12 x s8>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV12]](s8) + ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT]], [[ANYEXT1]] + ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ADD]](s16) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV13]](s8) + ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT2]], [[ANYEXT3]] + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[ADD1]](s16) + ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) + ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s16) = G_ANYEXT [[UV14]](s8) + ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT4]], [[ANYEXT5]] + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[ADD2]](s16) + ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) + ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s16) = G_ANYEXT [[UV15]](s8) + ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT6]], [[ANYEXT7]] + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[ADD3]](s16) + ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s16) = G_ANYEXT [[UV4]](s8) + ; CHECK: [[ANYEXT9:%[0-9]+]]:_(s16) = G_ANYEXT [[UV16]](s8) + ; CHECK: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT8]], [[ANYEXT9]] + ; CHECK: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[ADD4]](s16) + ; CHECK: [[ANYEXT10:%[0-9]+]]:_(s16) = G_ANYEXT [[UV5]](s8) + ; CHECK: [[ANYEXT11:%[0-9]+]]:_(s16) = G_ANYEXT [[UV17]](s8) + ; CHECK: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT10]], [[ANYEXT11]] + ; CHECK: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[ADD5]](s16) + ; CHECK: [[ANYEXT12:%[0-9]+]]:_(s16) = G_ANYEXT [[UV6]](s8) + ; CHECK: [[ANYEXT13:%[0-9]+]]:_(s16) = G_ANYEXT [[UV18]](s8) + ; CHECK: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT12]], [[ANYEXT13]] + ; CHECK: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[ADD6]](s16) + ; CHECK: [[ANYEXT14:%[0-9]+]]:_(s16) = G_ANYEXT [[UV7]](s8) + ; CHECK: [[ANYEXT15:%[0-9]+]]:_(s16) = G_ANYEXT [[UV19]](s8) + ; CHECK: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT14]], [[ANYEXT15]] + ; CHECK: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[ADD7]](s16) + ; CHECK: [[ANYEXT16:%[0-9]+]]:_(s16) = G_ANYEXT [[UV8]](s8) + ; CHECK: [[ANYEXT17:%[0-9]+]]:_(s16) = G_ANYEXT [[UV20]](s8) + ; CHECK: [[ADD8:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT16]], [[ANYEXT17]] + ; CHECK: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[ADD8]](s16) + ; CHECK: [[ANYEXT18:%[0-9]+]]:_(s16) = G_ANYEXT [[UV9]](s8) + ; CHECK: [[ANYEXT19:%[0-9]+]]:_(s16) = G_ANYEXT [[UV21]](s8) + ; CHECK: [[ADD9:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT18]], [[ANYEXT19]] + ; CHECK: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[ADD9]](s16) + ; CHECK: [[ANYEXT20:%[0-9]+]]:_(s16) = G_ANYEXT [[UV10]](s8) + ; CHECK: [[ANYEXT21:%[0-9]+]]:_(s16) = G_ANYEXT [[UV22]](s8) + ; CHECK: [[ADD10:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT20]], [[ANYEXT21]] + ; CHECK: [[TRUNC10:%[0-9]+]]:_(s8) = G_TRUNC [[ADD10]](s16) + ; CHECK: [[ANYEXT22:%[0-9]+]]:_(s16) = G_ANYEXT [[UV11]](s8) + ; CHECK: [[ANYEXT23:%[0-9]+]]:_(s16) = G_ANYEXT [[UV23]](s8) + ; CHECK: [[ADD11:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT22]], [[ANYEXT23]] + ; CHECK: [[TRUNC11:%[0-9]+]]:_(s8) = G_TRUNC [[ADD11]](s16) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<12 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8), [[TRUNC4]](s8), [[TRUNC5]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8), [[TRUNC8]](s8), [[TRUNC9]](s8), [[TRUNC10]](s8), [[TRUNC11]](s8) + ; CHECK: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<12 x s8>) + %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<12 x s8>) = G_BITCAST %0 + %2:_(<12 x s8>) = G_ADD %1, %1 + S_ENDPGM 0, implicit %2 + +... + +--- +name: test_bitcast_v12s8_to_v3s32 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11 + + ; CHECK-LABEL: name: test_bitcast_v12s8_to_v3s32 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[COPY]](<4 x s32>), [[COPY1]](<4 x s32>), [[COPY2]](<4 x s32>) + ; CHECK: [[TRUNC:%[0-9]+]]:_(<12 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<12 x s32>) + ; CHECK: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[TRUNC]](<12 x s8>) + ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<3 x s32>) + %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + %2:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + %3:_(<12 x s32>) = G_CONCAT_VECTORS %0, %1, %2 + %4:_(<12 x s8>) = G_TRUNC %3 + %5:_(<3 x s32>) = G_BITCAST %4 + S_ENDPGM 0, implicit %5 +... + +--- +name: test_bitcast_v6s8_to_v3s16 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + + ; CHECK-LABEL: name: test_bitcast_v6s8_to_v3s16 + ; CHECK: [[COPY:%[0-9]+]]:_(<6 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; CHECK: [[TRUNC:%[0-9]+]]:_(<6 x s8>) = G_TRUNC [[COPY]](<6 x s32>) + ; CHECK: [[ADD:%[0-9]+]]:_(<6 x s8>) = G_ADD [[TRUNC]], [[TRUNC]] + ; CHECK: [[BITCAST:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[ADD]](<6 x s8>) + ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<3 x s16>) + %0:_(<6 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + %1:_(<6 x s8>) = G_TRUNC %0 + %2:_(<6 x s8>) = G_ADD %1, %1 + %3:_(<3 x s16>) = G_BITCAST %2 + S_ENDPGM 0, implicit %3 +... + +--- +name: test_bitcast_v3s16_to_v6s8 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2 + + ; CHECK-LABEL: name: test_bitcast_v3s16_to_v6s8 + ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[COPY]](<3 x s32>) + ; CHECK: [[ADD:%[0-9]+]]:_(<3 x s16>) = G_ADD [[TRUNC]], [[TRUNC]] + ; CHECK: [[BITCAST:%[0-9]+]]:_(<6 x s8>) = G_BITCAST [[ADD]](<3 x s16>) + ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<6 x s8>) + %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(<3 x s16>) = G_TRUNC %0 + %2:_(<3 x s16>) = G_ADD %1, %1 + %3:_(<6 x s8>) = G_BITCAST %2 + S_ENDPGM 0, implicit %3 +... + +--- +name: test_bitcast_v2s64_to_v16s8 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + + ; CHECK-LABEL: name: test_bitcast_v2s64_to_v16s8 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[COPY]](<2 x s64>) + ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8), [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST]](<16 x s8>) + ; CHECK: [[UV16:%[0-9]+]]:_(s8), [[UV17:%[0-9]+]]:_(s8), [[UV18:%[0-9]+]]:_(s8), [[UV19:%[0-9]+]]:_(s8), [[UV20:%[0-9]+]]:_(s8), [[UV21:%[0-9]+]]:_(s8), [[UV22:%[0-9]+]]:_(s8), [[UV23:%[0-9]+]]:_(s8), [[UV24:%[0-9]+]]:_(s8), [[UV25:%[0-9]+]]:_(s8), [[UV26:%[0-9]+]]:_(s8), [[UV27:%[0-9]+]]:_(s8), [[UV28:%[0-9]+]]:_(s8), [[UV29:%[0-9]+]]:_(s8), [[UV30:%[0-9]+]]:_(s8), [[UV31:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST]](<16 x s8>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV16]](s8) + ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT]], [[ANYEXT1]] + ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ADD]](s16) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV17]](s8) + ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT2]], [[ANYEXT3]] + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[ADD1]](s16) + ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) + ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s16) = G_ANYEXT [[UV18]](s8) + ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT4]], [[ANYEXT5]] + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[ADD2]](s16) + ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) + ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s16) = G_ANYEXT [[UV19]](s8) + ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT6]], [[ANYEXT7]] + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[ADD3]](s16) + ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s16) = G_ANYEXT [[UV4]](s8) + ; CHECK: [[ANYEXT9:%[0-9]+]]:_(s16) = G_ANYEXT [[UV20]](s8) + ; CHECK: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT8]], [[ANYEXT9]] + ; CHECK: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[ADD4]](s16) + ; CHECK: [[ANYEXT10:%[0-9]+]]:_(s16) = G_ANYEXT [[UV5]](s8) + ; CHECK: [[ANYEXT11:%[0-9]+]]:_(s16) = G_ANYEXT [[UV21]](s8) + ; CHECK: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT10]], [[ANYEXT11]] + ; CHECK: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[ADD5]](s16) + ; CHECK: [[ANYEXT12:%[0-9]+]]:_(s16) = G_ANYEXT [[UV6]](s8) + ; CHECK: [[ANYEXT13:%[0-9]+]]:_(s16) = G_ANYEXT [[UV22]](s8) + ; CHECK: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT12]], [[ANYEXT13]] + ; CHECK: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[ADD6]](s16) + ; CHECK: [[ANYEXT14:%[0-9]+]]:_(s16) = G_ANYEXT [[UV7]](s8) + ; CHECK: [[ANYEXT15:%[0-9]+]]:_(s16) = G_ANYEXT [[UV23]](s8) + ; CHECK: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT14]], [[ANYEXT15]] + ; CHECK: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[ADD7]](s16) + ; CHECK: [[ANYEXT16:%[0-9]+]]:_(s16) = G_ANYEXT [[UV8]](s8) + ; CHECK: [[ANYEXT17:%[0-9]+]]:_(s16) = G_ANYEXT [[UV24]](s8) + ; CHECK: [[ADD8:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT16]], [[ANYEXT17]] + ; CHECK: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[ADD8]](s16) + ; CHECK: [[ANYEXT18:%[0-9]+]]:_(s16) = G_ANYEXT [[UV9]](s8) + ; CHECK: [[ANYEXT19:%[0-9]+]]:_(s16) = G_ANYEXT [[UV25]](s8) + ; CHECK: [[ADD9:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT18]], [[ANYEXT19]] + ; CHECK: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[ADD9]](s16) + ; CHECK: [[ANYEXT20:%[0-9]+]]:_(s16) = G_ANYEXT [[UV10]](s8) + ; CHECK: [[ANYEXT21:%[0-9]+]]:_(s16) = G_ANYEXT [[UV26]](s8) + ; CHECK: [[ADD10:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT20]], [[ANYEXT21]] + ; CHECK: [[TRUNC10:%[0-9]+]]:_(s8) = G_TRUNC [[ADD10]](s16) + ; CHECK: [[ANYEXT22:%[0-9]+]]:_(s16) = G_ANYEXT [[UV11]](s8) + ; CHECK: [[ANYEXT23:%[0-9]+]]:_(s16) = G_ANYEXT [[UV27]](s8) + ; CHECK: [[ADD11:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT22]], [[ANYEXT23]] + ; CHECK: [[TRUNC11:%[0-9]+]]:_(s8) = G_TRUNC [[ADD11]](s16) + ; CHECK: [[ANYEXT24:%[0-9]+]]:_(s16) = G_ANYEXT [[UV12]](s8) + ; CHECK: [[ANYEXT25:%[0-9]+]]:_(s16) = G_ANYEXT [[UV28]](s8) + ; CHECK: [[ADD12:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT24]], [[ANYEXT25]] + ; CHECK: [[TRUNC12:%[0-9]+]]:_(s8) = G_TRUNC [[ADD12]](s16) + ; CHECK: [[ANYEXT26:%[0-9]+]]:_(s16) = G_ANYEXT [[UV13]](s8) + ; CHECK: [[ANYEXT27:%[0-9]+]]:_(s16) = G_ANYEXT [[UV29]](s8) + ; CHECK: [[ADD13:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT26]], [[ANYEXT27]] + ; CHECK: [[TRUNC13:%[0-9]+]]:_(s8) = G_TRUNC [[ADD13]](s16) + ; CHECK: [[ANYEXT28:%[0-9]+]]:_(s16) = G_ANYEXT [[UV14]](s8) + ; CHECK: [[ANYEXT29:%[0-9]+]]:_(s16) = G_ANYEXT [[UV30]](s8) + ; CHECK: [[ADD14:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT28]], [[ANYEXT29]] + ; CHECK: [[TRUNC14:%[0-9]+]]:_(s8) = G_TRUNC [[ADD14]](s16) + ; CHECK: [[ANYEXT30:%[0-9]+]]:_(s16) = G_ANYEXT [[UV15]](s8) + ; CHECK: [[ANYEXT31:%[0-9]+]]:_(s16) = G_ANYEXT [[UV31]](s8) + ; CHECK: [[ADD15:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT30]], [[ANYEXT31]] + ; CHECK: [[TRUNC15:%[0-9]+]]:_(s8) = G_TRUNC [[ADD15]](s16) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8), [[TRUNC4]](s8), [[TRUNC5]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8), [[TRUNC8]](s8), [[TRUNC9]](s8), [[TRUNC10]](s8), [[TRUNC11]](s8), [[TRUNC12]](s8), [[TRUNC13]](s8), [[TRUNC14]](s8), [[TRUNC15]](s8) + ; CHECK: [[ANYEXT32:%[0-9]+]]:_(<16 x s32>) = G_ANYEXT [[BUILD_VECTOR]](<16 x s8>) + ; CHECK: S_ENDPGM 0, implicit [[ANYEXT32]](<16 x s32>) + %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<16 x s8>) = G_BITCAST %0 + %2:_(<16 x s8>) = G_ADD %1, %1 + %3:_(<16 x s32>) = G_ANYEXT %2 + S_ENDPGM 0, implicit %3 +... + +--- +name: test_bitcast_v16s8_to_v2s64 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + + ; CHECK-LABEL: name: test_bitcast_v16s8_to_v2s64 + ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK: [[TRUNC:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[COPY]](<16 x s32>) + ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s64>) = G_BITCAST [[TRUNC]](<16 x s8>) + ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<2 x s64>) + %0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + %1:_(<16 x s8>) = G_TRUNC %0 + %2:_(<2 x s64>) = G_BITCAST %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: test_bitcast_v4s32_to_v16s8 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-LABEL: name: test_bitcast_v4s32_to_v16s8 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[COPY]](<4 x s32>) + ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8), [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST]](<16 x s8>) + ; CHECK: [[UV16:%[0-9]+]]:_(s8), [[UV17:%[0-9]+]]:_(s8), [[UV18:%[0-9]+]]:_(s8), [[UV19:%[0-9]+]]:_(s8), [[UV20:%[0-9]+]]:_(s8), [[UV21:%[0-9]+]]:_(s8), [[UV22:%[0-9]+]]:_(s8), [[UV23:%[0-9]+]]:_(s8), [[UV24:%[0-9]+]]:_(s8), [[UV25:%[0-9]+]]:_(s8), [[UV26:%[0-9]+]]:_(s8), [[UV27:%[0-9]+]]:_(s8), [[UV28:%[0-9]+]]:_(s8), [[UV29:%[0-9]+]]:_(s8), [[UV30:%[0-9]+]]:_(s8), [[UV31:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST]](<16 x s8>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV16]](s8) + ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT]], [[ANYEXT1]] + ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ADD]](s16) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV17]](s8) + ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT2]], [[ANYEXT3]] + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[ADD1]](s16) + ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) + ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s16) = G_ANYEXT [[UV18]](s8) + ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT4]], [[ANYEXT5]] + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[ADD2]](s16) + ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) + ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s16) = G_ANYEXT [[UV19]](s8) + ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT6]], [[ANYEXT7]] + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[ADD3]](s16) + ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s16) = G_ANYEXT [[UV4]](s8) + ; CHECK: [[ANYEXT9:%[0-9]+]]:_(s16) = G_ANYEXT [[UV20]](s8) + ; CHECK: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT8]], [[ANYEXT9]] + ; CHECK: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[ADD4]](s16) + ; CHECK: [[ANYEXT10:%[0-9]+]]:_(s16) = G_ANYEXT [[UV5]](s8) + ; CHECK: [[ANYEXT11:%[0-9]+]]:_(s16) = G_ANYEXT [[UV21]](s8) + ; CHECK: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT10]], [[ANYEXT11]] + ; CHECK: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[ADD5]](s16) + ; CHECK: [[ANYEXT12:%[0-9]+]]:_(s16) = G_ANYEXT [[UV6]](s8) + ; CHECK: [[ANYEXT13:%[0-9]+]]:_(s16) = G_ANYEXT [[UV22]](s8) + ; CHECK: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT12]], [[ANYEXT13]] + ; CHECK: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[ADD6]](s16) + ; CHECK: [[ANYEXT14:%[0-9]+]]:_(s16) = G_ANYEXT [[UV7]](s8) + ; CHECK: [[ANYEXT15:%[0-9]+]]:_(s16) = G_ANYEXT [[UV23]](s8) + ; CHECK: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT14]], [[ANYEXT15]] + ; CHECK: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[ADD7]](s16) + ; CHECK: [[ANYEXT16:%[0-9]+]]:_(s16) = G_ANYEXT [[UV8]](s8) + ; CHECK: [[ANYEXT17:%[0-9]+]]:_(s16) = G_ANYEXT [[UV24]](s8) + ; CHECK: [[ADD8:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT16]], [[ANYEXT17]] + ; CHECK: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[ADD8]](s16) + ; CHECK: [[ANYEXT18:%[0-9]+]]:_(s16) = G_ANYEXT [[UV9]](s8) + ; CHECK: [[ANYEXT19:%[0-9]+]]:_(s16) = G_ANYEXT [[UV25]](s8) + ; CHECK: [[ADD9:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT18]], [[ANYEXT19]] + ; CHECK: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[ADD9]](s16) + ; CHECK: [[ANYEXT20:%[0-9]+]]:_(s16) = G_ANYEXT [[UV10]](s8) + ; CHECK: [[ANYEXT21:%[0-9]+]]:_(s16) = G_ANYEXT [[UV26]](s8) + ; CHECK: [[ADD10:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT20]], [[ANYEXT21]] + ; CHECK: [[TRUNC10:%[0-9]+]]:_(s8) = G_TRUNC [[ADD10]](s16) + ; CHECK: [[ANYEXT22:%[0-9]+]]:_(s16) = G_ANYEXT [[UV11]](s8) + ; CHECK: [[ANYEXT23:%[0-9]+]]:_(s16) = G_ANYEXT [[UV27]](s8) + ; CHECK: [[ADD11:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT22]], [[ANYEXT23]] + ; CHECK: [[TRUNC11:%[0-9]+]]:_(s8) = G_TRUNC [[ADD11]](s16) + ; CHECK: [[ANYEXT24:%[0-9]+]]:_(s16) = G_ANYEXT [[UV12]](s8) + ; CHECK: [[ANYEXT25:%[0-9]+]]:_(s16) = G_ANYEXT [[UV28]](s8) + ; CHECK: [[ADD12:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT24]], [[ANYEXT25]] + ; CHECK: [[TRUNC12:%[0-9]+]]:_(s8) = G_TRUNC [[ADD12]](s16) + ; CHECK: [[ANYEXT26:%[0-9]+]]:_(s16) = G_ANYEXT [[UV13]](s8) + ; CHECK: [[ANYEXT27:%[0-9]+]]:_(s16) = G_ANYEXT [[UV29]](s8) + ; CHECK: [[ADD13:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT26]], [[ANYEXT27]] + ; CHECK: [[TRUNC13:%[0-9]+]]:_(s8) = G_TRUNC [[ADD13]](s16) + ; CHECK: [[ANYEXT28:%[0-9]+]]:_(s16) = G_ANYEXT [[UV14]](s8) + ; CHECK: [[ANYEXT29:%[0-9]+]]:_(s16) = G_ANYEXT [[UV30]](s8) + ; CHECK: [[ADD14:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT28]], [[ANYEXT29]] + ; CHECK: [[TRUNC14:%[0-9]+]]:_(s8) = G_TRUNC [[ADD14]](s16) + ; CHECK: [[ANYEXT30:%[0-9]+]]:_(s16) = G_ANYEXT [[UV15]](s8) + ; CHECK: [[ANYEXT31:%[0-9]+]]:_(s16) = G_ANYEXT [[UV31]](s8) + ; CHECK: [[ADD15:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT30]], [[ANYEXT31]] + ; CHECK: [[TRUNC15:%[0-9]+]]:_(s8) = G_TRUNC [[ADD15]](s16) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8), [[TRUNC4]](s8), [[TRUNC5]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8), [[TRUNC8]](s8), [[TRUNC9]](s8), [[TRUNC10]](s8), [[TRUNC11]](s8), [[TRUNC12]](s8), [[TRUNC13]](s8), [[TRUNC14]](s8), [[TRUNC15]](s8) + ; CHECK: [[ANYEXT32:%[0-9]+]]:_(<16 x s32>) = G_ANYEXT [[BUILD_VECTOR]](<16 x s8>) + ; CHECK: S_ENDPGM 0, implicit [[ANYEXT32]](<16 x s32>) + %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<16 x s8>) = G_BITCAST %0 + %2:_(<16 x s8>) = G_ADD %1, %1 + %3:_(<16 x s32>) = G_ANYEXT %2 + S_ENDPGM 0, implicit %3 +... + +--- +name: test_bitcast_v16s8_to_v4s32 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + + ; CHECK-LABEL: name: test_bitcast_v16s8_to_v4s32 + ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK: [[TRUNC:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[COPY]](<16 x s32>) + ; CHECK: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[TRUNC]](<16 x s8>) + ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<4 x s32>) + %0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + %1:_(<16 x s8>) = G_TRUNC %0 + %2:_(<4 x s32>) = G_BITCAST %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: test_bitcast_v8s16_to_v16s8 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + + ; CHECK-LABEL: name: test_bitcast_v8s16_to_v16s8 + ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[COPY]](<8 x s16>) + ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8), [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST]](<16 x s8>) + ; CHECK: [[UV16:%[0-9]+]]:_(s8), [[UV17:%[0-9]+]]:_(s8), [[UV18:%[0-9]+]]:_(s8), [[UV19:%[0-9]+]]:_(s8), [[UV20:%[0-9]+]]:_(s8), [[UV21:%[0-9]+]]:_(s8), [[UV22:%[0-9]+]]:_(s8), [[UV23:%[0-9]+]]:_(s8), [[UV24:%[0-9]+]]:_(s8), [[UV25:%[0-9]+]]:_(s8), [[UV26:%[0-9]+]]:_(s8), [[UV27:%[0-9]+]]:_(s8), [[UV28:%[0-9]+]]:_(s8), [[UV29:%[0-9]+]]:_(s8), [[UV30:%[0-9]+]]:_(s8), [[UV31:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST]](<16 x s8>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV16]](s8) + ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT]], [[ANYEXT1]] + ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ADD]](s16) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV17]](s8) + ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT2]], [[ANYEXT3]] + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[ADD1]](s16) + ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) + ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s16) = G_ANYEXT [[UV18]](s8) + ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT4]], [[ANYEXT5]] + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[ADD2]](s16) + ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) + ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s16) = G_ANYEXT [[UV19]](s8) + ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT6]], [[ANYEXT7]] + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[ADD3]](s16) + ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s16) = G_ANYEXT [[UV4]](s8) + ; CHECK: [[ANYEXT9:%[0-9]+]]:_(s16) = G_ANYEXT [[UV20]](s8) + ; CHECK: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT8]], [[ANYEXT9]] + ; CHECK: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[ADD4]](s16) + ; CHECK: [[ANYEXT10:%[0-9]+]]:_(s16) = G_ANYEXT [[UV5]](s8) + ; CHECK: [[ANYEXT11:%[0-9]+]]:_(s16) = G_ANYEXT [[UV21]](s8) + ; CHECK: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT10]], [[ANYEXT11]] + ; CHECK: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[ADD5]](s16) + ; CHECK: [[ANYEXT12:%[0-9]+]]:_(s16) = G_ANYEXT [[UV6]](s8) + ; CHECK: [[ANYEXT13:%[0-9]+]]:_(s16) = G_ANYEXT [[UV22]](s8) + ; CHECK: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT12]], [[ANYEXT13]] + ; CHECK: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[ADD6]](s16) + ; CHECK: [[ANYEXT14:%[0-9]+]]:_(s16) = G_ANYEXT [[UV7]](s8) + ; CHECK: [[ANYEXT15:%[0-9]+]]:_(s16) = G_ANYEXT [[UV23]](s8) + ; CHECK: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT14]], [[ANYEXT15]] + ; CHECK: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[ADD7]](s16) + ; CHECK: [[ANYEXT16:%[0-9]+]]:_(s16) = G_ANYEXT [[UV8]](s8) + ; CHECK: [[ANYEXT17:%[0-9]+]]:_(s16) = G_ANYEXT [[UV24]](s8) + ; CHECK: [[ADD8:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT16]], [[ANYEXT17]] + ; CHECK: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[ADD8]](s16) + ; CHECK: [[ANYEXT18:%[0-9]+]]:_(s16) = G_ANYEXT [[UV9]](s8) + ; CHECK: [[ANYEXT19:%[0-9]+]]:_(s16) = G_ANYEXT [[UV25]](s8) + ; CHECK: [[ADD9:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT18]], [[ANYEXT19]] + ; CHECK: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[ADD9]](s16) + ; CHECK: [[ANYEXT20:%[0-9]+]]:_(s16) = G_ANYEXT [[UV10]](s8) + ; CHECK: [[ANYEXT21:%[0-9]+]]:_(s16) = G_ANYEXT [[UV26]](s8) + ; CHECK: [[ADD10:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT20]], [[ANYEXT21]] + ; CHECK: [[TRUNC10:%[0-9]+]]:_(s8) = G_TRUNC [[ADD10]](s16) + ; CHECK: [[ANYEXT22:%[0-9]+]]:_(s16) = G_ANYEXT [[UV11]](s8) + ; CHECK: [[ANYEXT23:%[0-9]+]]:_(s16) = G_ANYEXT [[UV27]](s8) + ; CHECK: [[ADD11:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT22]], [[ANYEXT23]] + ; CHECK: [[TRUNC11:%[0-9]+]]:_(s8) = G_TRUNC [[ADD11]](s16) + ; CHECK: [[ANYEXT24:%[0-9]+]]:_(s16) = G_ANYEXT [[UV12]](s8) + ; CHECK: [[ANYEXT25:%[0-9]+]]:_(s16) = G_ANYEXT [[UV28]](s8) + ; CHECK: [[ADD12:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT24]], [[ANYEXT25]] + ; CHECK: [[TRUNC12:%[0-9]+]]:_(s8) = G_TRUNC [[ADD12]](s16) + ; CHECK: [[ANYEXT26:%[0-9]+]]:_(s16) = G_ANYEXT [[UV13]](s8) + ; CHECK: [[ANYEXT27:%[0-9]+]]:_(s16) = G_ANYEXT [[UV29]](s8) + ; CHECK: [[ADD13:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT26]], [[ANYEXT27]] + ; CHECK: [[TRUNC13:%[0-9]+]]:_(s8) = G_TRUNC [[ADD13]](s16) + ; CHECK: [[ANYEXT28:%[0-9]+]]:_(s16) = G_ANYEXT [[UV14]](s8) + ; CHECK: [[ANYEXT29:%[0-9]+]]:_(s16) = G_ANYEXT [[UV30]](s8) + ; CHECK: [[ADD14:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT28]], [[ANYEXT29]] + ; CHECK: [[TRUNC14:%[0-9]+]]:_(s8) = G_TRUNC [[ADD14]](s16) + ; CHECK: [[ANYEXT30:%[0-9]+]]:_(s16) = G_ANYEXT [[UV15]](s8) + ; CHECK: [[ANYEXT31:%[0-9]+]]:_(s16) = G_ANYEXT [[UV31]](s8) + ; CHECK: [[ADD15:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT30]], [[ANYEXT31]] + ; CHECK: [[TRUNC15:%[0-9]+]]:_(s8) = G_TRUNC [[ADD15]](s16) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8), [[TRUNC4]](s8), [[TRUNC5]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8), [[TRUNC8]](s8), [[TRUNC9]](s8), [[TRUNC10]](s8), [[TRUNC11]](s8), [[TRUNC12]](s8), [[TRUNC13]](s8), [[TRUNC14]](s8), [[TRUNC15]](s8) + ; CHECK: [[ANYEXT32:%[0-9]+]]:_(<16 x s32>) = G_ANYEXT [[BUILD_VECTOR]](<16 x s8>) + ; CHECK: S_ENDPGM 0, implicit [[ANYEXT32]](<16 x s32>) + %0:_(<8 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(<16 x s8>) = G_BITCAST %0 + %2:_(<16 x s8>) = G_ADD %1, %1 + %3:_(<16 x s32>) = G_ANYEXT %2 + S_ENDPGM 0, implicit %3 +... + +--- +name: test_bitcast_v16s8_to_v8s16 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + + ; CHECK-LABEL: name: test_bitcast_v16s8_to_v8s16 + ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK: [[TRUNC:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[COPY]](<16 x s32>) + ; CHECK: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[TRUNC]](<16 x s8>) + ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<8 x s16>) + %0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + %1:_(<16 x s8>) = G_TRUNC %0 + %2:_(<8 x s16>) = G_BITCAST %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: test_bitcast_v3s64_to_v6s32 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + + ; CHECK-LABEL: name: test_bitcast_v3s64_to_v6s32 + ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; CHECK: [[BITCAST:%[0-9]+]]:_(<6 x s32>) = G_BITCAST [[COPY]](<3 x s64>) + ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<6 x s32>) + %0:_(<3 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + %1:_(<6 x s32>) = G_BITCAST %0 + S_ENDPGM 0, implicit %1 +... + +--- +name: test_bitcast_v6s32_to_v3s64 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + + ; CHECK-LABEL: name: test_bitcast_v6s32_to_v3s64 + ; CHECK: [[COPY:%[0-9]+]]:_(<6 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; CHECK: [[BITCAST:%[0-9]+]]:_(<3 x s64>) = G_BITCAST [[COPY]](<6 x s32>) + ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<3 x s64>) + %0:_(<6 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + %1:_(<3 x s64>) = G_BITCAST %0 + S_ENDPGM 0, implicit %1 +... + +--- +name: test_bitcast_v3s64_to_v12s16 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + + ; CHECK-LABEL: name: test_bitcast_v3s64_to_v12s16 + ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; CHECK: [[BITCAST:%[0-9]+]]:_(<12 x s16>) = G_BITCAST [[COPY]](<3 x s64>) + ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<12 x s16>) + %0:_(<3 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + %1:_(<12 x s16>) = G_BITCAST %0 + S_ENDPGM 0, implicit %1 +... + +--- +name: test_bitcast_v12s16_to_v3s64 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + + ; CHECK-LABEL: name: test_bitcast_v12s16_to_v3s64 + ; CHECK: [[COPY:%[0-9]+]]:_(<12 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; CHECK: [[BITCAST:%[0-9]+]]:_(<3 x s64>) = G_BITCAST [[COPY]](<12 x s16>) + ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<3 x s64>) + %0:_(<12 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + %1:_(<3 x s64>) = G_BITCAST %0 + S_ENDPGM 0, implicit %1 +... + +--- +name: test_bitcast_v3s64_to_v24s8 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + + ; CHECK-LABEL: name: test_bitcast_v3s64_to_v24s8 + ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; CHECK: [[BITCAST:%[0-9]+]]:_(<24 x s8>) = G_BITCAST [[COPY]](<3 x s64>) + ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<24 x s8>) + %0:_(<3 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + %1:_(<24 x s8>) = G_BITCAST %0 + S_ENDPGM 0, implicit %1 +... + +--- +name: test_bitcast_v24s8_to_v3s64 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + + ; CHECK-LABEL: name: test_bitcast_v24s8_to_v3s64 + ; CHECK: [[COPY:%[0-9]+]]:_(<24 x s8>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; CHECK: [[BITCAST:%[0-9]+]]:_(<3 x s64>) = G_BITCAST [[COPY]](<24 x s8>) + ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<3 x s64>) + %0:_(<24 x s8>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + %1:_(<3 x s64>) = G_BITCAST %0 + S_ENDPGM 0, implicit %1 +... From d5c28c4094324e94f6eee403022ca21c8d76998e Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 9 Jun 2020 12:18:08 -0700 Subject: [PATCH 25/25] [X86] Move CPUKind enum from clang to llvm/lib/Support. NFCI Similar to what some other targets have done. This information could be reused by other frontends so doesn't make sense to live in clang. -Rename CK_Generic to CK_None to better reflect its illegalness. -Move function for translating from string to enum into llvm. -Call checkCPUKind directly from the string to enum translation and update CPU kind to CK_None accordinly. Caller will use CK_None as sentinel for bad CPU. I'm planning to move all the CPU to feature mapping out next. As part of that I want to devise a better way to express CPUs inheriting features from an earlier CPU. Allowing this to be expressed in a less rigid way than just falling through a switch. Or using gotos as we've had to do lately. Differential Revision: https://reviews.llvm.org/D81439 --- clang/include/clang/Basic/X86Target.def | 240 ----------------- clang/lib/Basic/Targets/X86.cpp | 55 ++-- clang/lib/Basic/Targets/X86.h | 22 +- llvm/include/llvm/Support/X86TargetParser.def | 242 ++++++++++++++++++ llvm/include/llvm/Support/X86TargetParser.h | 41 +++ llvm/lib/Support/CMakeLists.txt | 1 + llvm/lib/Support/X86TargetParser.cpp | 58 +++++ 7 files changed, 364 insertions(+), 295 deletions(-) create mode 100644 llvm/include/llvm/Support/X86TargetParser.h create mode 100644 llvm/lib/Support/X86TargetParser.cpp diff --git a/clang/include/clang/Basic/X86Target.def b/clang/include/clang/Basic/X86Target.def index ba4e5981e7dcca..70f3879f33a140 100644 --- a/clang/include/clang/Basic/X86Target.def +++ b/clang/include/clang/Basic/X86Target.def @@ -11,19 +11,6 @@ // //===----------------------------------------------------------------------===// -#ifndef PROC_WITH_FEAT -#define PROC_WITH_FEAT(ENUM, STRING, IS64BIT, KEYFEATURE) \ - PROC(ENUM, STRING, IS64BIT) -#endif - -#ifndef PROC -#define PROC(ENUM, STRING, IS64BIT) -#endif - -#ifndef PROC_ALIAS -#define PROC_ALIAS(ENUM, ALIAS) -#endif - #ifndef FEATURE #define FEATURE(ENUM) #endif @@ -36,230 +23,6 @@ #define CPU_SPECIFIC_ALIAS(NEW_NAME, NAME) #endif -#define PROC_64_BIT true -#define PROC_32_BIT false - -/// \name i386 -/// i386-generation processors. -//@{ -PROC(i386, "i386", PROC_32_BIT) -//@} - -/// \name i486 -/// i486-generation processors. -//@{ -PROC(i486, "i486", PROC_32_BIT) -PROC(WinChipC6, "winchip-c6", PROC_32_BIT) -PROC(WinChip2, "winchip2", PROC_32_BIT) -PROC(C3, "c3", PROC_32_BIT) -//@} - -/// \name i586 -/// i586-generation processors, P5 microarchitecture based. -//@{ -PROC(i586, "i586", PROC_32_BIT) -PROC(Pentium, "pentium", PROC_32_BIT) -PROC(PentiumMMX, "pentium-mmx", PROC_32_BIT) -//@} - -/// \name i686 -/// i686-generation processors, P6 / Pentium M microarchitecture based. -//@{ -PROC(PentiumPro, "pentiumpro", PROC_32_BIT) -PROC(i686, "i686", PROC_32_BIT) -PROC(Pentium2, "pentium2", PROC_32_BIT) -PROC(Pentium3, "pentium3", PROC_32_BIT) -PROC_ALIAS(Pentium3, "pentium3m") -PROC(PentiumM, "pentium-m", PROC_32_BIT) -PROC(C3_2, "c3-2", PROC_32_BIT) - -/// This enumerator is a bit odd, as GCC no longer accepts -march=yonah. -/// Clang however has some logic to support this. -// FIXME: Warn, deprecate, and potentially remove this. -PROC(Yonah, "yonah", PROC_32_BIT) -//@} - -/// \name Netburst -/// Netburst microarchitecture based processors. -//@{ -PROC(Pentium4, "pentium4", PROC_32_BIT) -PROC_ALIAS(Pentium4, "pentium4m") - -PROC(Prescott, "prescott", PROC_32_BIT) -PROC(Nocona, "nocona", PROC_64_BIT) -//@} - -/// \name Core -/// Core microarchitecture based processors. -//@{ -PROC_WITH_FEAT(Core2, "core2", PROC_64_BIT, FEATURE_SSSE3) - -/// This enumerator, like Yonah, is a bit odd. It is another -/// codename which GCC no longer accepts as an option to -march, but Clang -/// has some logic for recognizing it. -// FIXME: Warn, deprecate, and potentially remove this. -PROC(Penryn, "penryn", PROC_64_BIT) -//@} - -/// \name Atom -/// Atom processors -//@{ -PROC_WITH_FEAT(Bonnell, "bonnell", PROC_64_BIT, FEATURE_SSSE3) -PROC_ALIAS(Bonnell, "atom") - -PROC_WITH_FEAT(Silvermont, "silvermont", PROC_64_BIT, FEATURE_SSE4_2) -PROC_ALIAS(Silvermont, "slm") - -PROC(Goldmont, "goldmont", PROC_64_BIT) -PROC(GoldmontPlus, "goldmont-plus", PROC_64_BIT) - -PROC(Tremont, "tremont", PROC_64_BIT) -//@} - -/// \name Nehalem -/// Nehalem microarchitecture based processors. -PROC_WITH_FEAT(Nehalem, "nehalem", PROC_64_BIT, FEATURE_SSE4_2) -PROC_ALIAS(Nehalem, "corei7") - -/// \name Westmere -/// Westmere microarchitecture based processors. -PROC_WITH_FEAT(Westmere, "westmere", PROC_64_BIT, FEATURE_PCLMUL) - -/// \name Sandy Bridge -/// Sandy Bridge microarchitecture based processors. -PROC_WITH_FEAT(SandyBridge, "sandybridge", PROC_64_BIT, FEATURE_AVX) -PROC_ALIAS(SandyBridge, "corei7-avx") - -/// \name Ivy Bridge -/// Ivy Bridge microarchitecture based processors. -PROC_WITH_FEAT(IvyBridge, "ivybridge", PROC_64_BIT, FEATURE_AVX) -PROC_ALIAS(IvyBridge, "core-avx-i") - -/// \name Haswell -/// Haswell microarchitecture based processors. -PROC_WITH_FEAT(Haswell, "haswell", PROC_64_BIT, FEATURE_AVX2) -PROC_ALIAS(Haswell, "core-avx2") - -/// \name Broadwell -/// Broadwell microarchitecture based processors. -PROC_WITH_FEAT(Broadwell, "broadwell", PROC_64_BIT, FEATURE_AVX2) - -/// \name Skylake Client -/// Skylake client microarchitecture based processors. -PROC_WITH_FEAT(SkylakeClient, "skylake", PROC_64_BIT, FEATURE_AVX2) - -/// \name Skylake Server -/// Skylake server microarchitecture based processors. -PROC_WITH_FEAT(SkylakeServer, "skylake-avx512", PROC_64_BIT, FEATURE_AVX512F) -PROC_ALIAS(SkylakeServer, "skx") - -/// \name Cascadelake Server -/// Cascadelake Server microarchitecture based processors. -PROC_WITH_FEAT(Cascadelake, "cascadelake", PROC_64_BIT, FEATURE_AVX512VNNI) - -/// \name Cooperlake Server -/// Cooperlake Server microarchitecture based processors. -PROC_WITH_FEAT(Cooperlake, "cooperlake", PROC_64_BIT, FEATURE_AVX512BF16) - -/// \name Cannonlake Client -/// Cannonlake client microarchitecture based processors. -PROC_WITH_FEAT(Cannonlake, "cannonlake", PROC_64_BIT, FEATURE_AVX512VBMI) - -/// \name Icelake Client -/// Icelake client microarchitecture based processors. -PROC(IcelakeClient, "icelake-client", PROC_64_BIT) - -/// \name Icelake Server -/// Icelake server microarchitecture based processors. -PROC(IcelakeServer, "icelake-server", PROC_64_BIT) - -/// \name Tigerlake -/// Tigerlake microarchitecture based processors. -PROC(Tigerlake, "tigerlake", PROC_64_BIT) - -/// \name Knights Landing -/// Knights Landing processor. -PROC_WITH_FEAT(KNL, "knl", PROC_64_BIT, FEATURE_AVX512F) - -/// \name Knights Mill -/// Knights Mill processor. -PROC_WITH_FEAT(KNM, "knm", PROC_64_BIT, FEATURE_AVX5124FMAPS) - -/// \name Lakemont -/// Lakemont microarchitecture based processors. -PROC(Lakemont, "lakemont", PROC_32_BIT) - -/// \name K6 -/// K6 architecture processors. -//@{ -PROC(K6, "k6", PROC_32_BIT) -PROC(K6_2, "k6-2", PROC_32_BIT) -PROC(K6_3, "k6-3", PROC_32_BIT) -//@} - -/// \name K7 -/// K7 architecture processors. -//@{ -PROC(Athlon, "athlon", PROC_32_BIT) -PROC_ALIAS(Athlon, "athlon-tbird") - -PROC(AthlonXP, "athlon-xp", PROC_32_BIT) -PROC_ALIAS(AthlonXP, "athlon-mp") -PROC_ALIAS(AthlonXP, "athlon-4") -//@} - -/// \name K8 -/// K8 architecture processors. -//@{ -PROC(K8, "k8", PROC_64_BIT) -PROC_ALIAS(K8, "athlon64") -PROC_ALIAS(K8, "athlon-fx") -PROC_ALIAS(K8, "opteron") - -PROC(K8SSE3, "k8-sse3", PROC_64_BIT) -PROC_ALIAS(K8SSE3, "athlon64-sse3") -PROC_ALIAS(K8SSE3, "opteron-sse3") - -PROC_WITH_FEAT(AMDFAM10, "amdfam10", PROC_64_BIT, FEATURE_SSE4_A) -PROC_ALIAS(AMDFAM10, "barcelona") -//@} - -/// \name Bobcat -/// Bobcat architecture processors. -//@{ -PROC_WITH_FEAT(BTVER1, "btver1", PROC_64_BIT, FEATURE_SSE4_A) -PROC_WITH_FEAT(BTVER2, "btver2", PROC_64_BIT, FEATURE_BMI) -//@} - -/// \name Bulldozer -/// Bulldozer architecture processors. -//@{ -PROC_WITH_FEAT(BDVER1, "bdver1", PROC_64_BIT, FEATURE_XOP) -PROC_WITH_FEAT(BDVER2, "bdver2", PROC_64_BIT, FEATURE_FMA) -PROC_WITH_FEAT(BDVER3, "bdver3", PROC_64_BIT, FEATURE_FMA) -PROC_WITH_FEAT(BDVER4, "bdver4", PROC_64_BIT, FEATURE_AVX2) -//@} - -/// \name zen -/// Zen architecture processors. -//@{ -PROC_WITH_FEAT(ZNVER1, "znver1", PROC_64_BIT, FEATURE_AVX2) -PROC_WITH_FEAT(ZNVER2, "znver2", PROC_64_BIT, FEATURE_AVX2) -//@} - -/// This specification is deprecated and will be removed in the future. -/// Users should prefer K8. -// FIXME: Warn on this when the CPU is set to it. -//@{ -PROC(x86_64, "x86-64", PROC_64_BIT) -//@} - -/// \name Geode -/// Geode processors. -//@{ -PROC(Geode, "geode", PROC_32_BIT) -//@} - // List of CPU Supports features in order. These need to remain in the order // required by attribute 'target' checking. Note that not all are supported/ // prioritized by GCC, so synchronization with GCC's implementation may require @@ -345,6 +108,3 @@ CPU_SPECIFIC("knm", 'j', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+mo #undef PROC_64_BIT #undef PROC_32_BIT #undef FEATURE -#undef PROC -#undef PROC_ALIAS -#undef PROC_WITH_FEAT diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp index b87490a6a85898..05c6ec22af3a89 100644 --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -109,7 +109,8 @@ bool X86TargetInfo::initFeatureMap( if (getTriple().getArch() == llvm::Triple::x86_64) setFeatureEnabledImpl(Features, "sse2", true); - const CPUKind Kind = getCPUKind(CPU); + using namespace llvm::X86; + const enum CPUKind Kind = parseArchX86(CPU); // Enable X87 for all X86 processors but Lakemont. if (Kind != CK_Lakemont) @@ -117,11 +118,11 @@ bool X86TargetInfo::initFeatureMap( // Enable cmpxchg8 for i586 and greater CPUs. Include generic for backwards // compatibility. - if (Kind >= CK_i586 || Kind == CK_Generic) + if (Kind >= CK_i586 || Kind == CK_None) setFeatureEnabledImpl(Features, "cx8", true); switch (Kind) { - case CK_Generic: + case CK_None: case CK_i386: case CK_i486: case CK_i586: @@ -936,8 +937,9 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts, // Subtarget options. // FIXME: We are hard-coding the tune parameters based on the CPU, but they // truly should be based on -mtune options. + using namespace llvm::X86; switch (CPU) { - case CK_Generic: + case CK_None: break; case CK_i386: // The rest are coming from the i386 define above. @@ -1324,7 +1326,7 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts, break; } - if (CPU >= CK_i486 || CPU == CK_Generic) { + if (CPU >= CK_i486 || CPU == CK_None) { Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1"); Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2"); Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4"); @@ -1548,8 +1550,9 @@ static unsigned getFeaturePriority(llvm::X86::ProcessorFeatures Feat) { unsigned X86TargetInfo::multiVersionSortPriority(StringRef Name) const { // Valid CPUs have a 'key feature' that compares just better than its key // feature. - CPUKind Kind = getCPUKind(Name); - if (Kind != CK_Generic) { + using namespace llvm::X86; + CPUKind Kind = parseArchX86(Name); + if (Kind != CK_None) { switch (Kind) { default: llvm_unreachable( @@ -1557,7 +1560,7 @@ unsigned X86TargetInfo::multiVersionSortPriority(StringRef Name) const { #define PROC_WITH_FEAT(ENUM, STR, IS64, KEY_FEAT) \ case CK_##ENUM: \ return (getFeaturePriority(llvm::X86::KEY_FEAT) << 1) + 1; -#include "clang/Basic/X86Target.def" +#include "llvm/Support/X86TargetParser.def" } } @@ -1761,6 +1764,7 @@ bool X86TargetInfo::validateAsmConstraint( // | Knights Mill | 64 | https://software.intel.com/sites/default/files/managed/9e/bc/64-ia-32-architectures-optimization-manual.pdf?countrylabel=Colombia "2.5.5.2 L1 DCache " | // +------------------------------------+-------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ Optional X86TargetInfo::getCPUCacheLineSize() const { + using namespace llvm::X86; switch (CPU) { // i386 case CK_i386: @@ -1846,7 +1850,7 @@ Optional X86TargetInfo::getCPUCacheLineSize() const { // The following currently have unknown cache line sizes (but they are probably all 64): // Core - case CK_Generic: + case CK_None: return None; } llvm_unreachable("Unknown CPU kind"); @@ -1977,38 +1981,9 @@ std::string X86TargetInfo::convertConstraint(const char *&Constraint) const { } } -bool X86TargetInfo::checkCPUKind(CPUKind Kind) const { - // Perform any per-CPU checks necessary to determine if this CPU is - // acceptable. - switch (Kind) { - case CK_Generic: - // No processor selected! - return false; -#define PROC(ENUM, STRING, IS64BIT) \ - case CK_##ENUM: \ - return IS64BIT || getTriple().getArch() == llvm::Triple::x86; -#include "clang/Basic/X86Target.def" - } - llvm_unreachable("Unhandled CPU kind"); -} - void X86TargetInfo::fillValidCPUList(SmallVectorImpl &Values) const { -#define PROC(ENUM, STRING, IS64BIT) \ - if (IS64BIT || getTriple().getArch() == llvm::Triple::x86) \ - Values.emplace_back(STRING); - // For aliases we need to lookup the CPUKind to check get the 64-bit ness. -#define PROC_ALIAS(ENUM, ALIAS) \ - if (checkCPUKind(CK_##ENUM)) \ - Values.emplace_back(ALIAS); -#include "clang/Basic/X86Target.def" -} - -X86TargetInfo::CPUKind X86TargetInfo::getCPUKind(StringRef CPU) const { - return llvm::StringSwitch(CPU) -#define PROC(ENUM, STRING, IS64BIT) .Case(STRING, CK_##ENUM) -#define PROC_ALIAS(ENUM, ALIAS) .Case(ALIAS, CK_##ENUM) -#include "clang/Basic/X86Target.def" - .Default(CK_Generic); + bool Only64Bit = getTriple().getArch() != llvm::Triple::x86; + llvm::X86::fillValidCPUArchList(Values, Only64Bit); } ArrayRef X86TargetInfo::getGCCRegNames() const { diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h index 39ccac96a49d8a..c33c608e27c843 100644 --- a/clang/lib/Basic/Targets/X86.h +++ b/clang/lib/Basic/Targets/X86.h @@ -18,6 +18,7 @@ #include "clang/Basic/TargetOptions.h" #include "llvm/ADT/Triple.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/X86TargetParser.h" namespace clang { namespace targets { @@ -128,19 +129,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo { bool HasTSXLDTRK = false; protected: - /// Enumeration of all of the X86 CPUs supported by Clang. - /// - /// Each enumeration represents a particular CPU supported by Clang. These - /// loosely correspond to the options passed to '-march' or '-mtune' flags. - enum CPUKind { - CK_Generic, -#define PROC(ENUM, STRING, IS64BIT) CK_##ENUM, -#include "clang/Basic/X86Target.def" - } CPU = CK_Generic; - - bool checkCPUKind(CPUKind Kind) const; - - CPUKind getCPUKind(StringRef CPU) const; + llvm::X86::CPUKind CPU = llvm::X86::CK_None; enum FPMathKind { FP_Default, FP_SSE, FP_387 } FPMath = FP_Default; @@ -313,13 +302,16 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo { } bool isValidCPUName(StringRef Name) const override { - return checkCPUKind(getCPUKind(Name)); + bool Only64Bit = getTriple().getArch() != llvm::Triple::x86; + return llvm::X86::parseArchX86(Name, Only64Bit) != llvm::X86::CK_None; } void fillValidCPUList(SmallVectorImpl &Values) const override; bool setCPU(const std::string &Name) override { - return checkCPUKind(CPU = getCPUKind(Name)); + bool Only64Bit = getTriple().getArch() != llvm::Triple::x86; + CPU = llvm::X86::parseArchX86(Name, Only64Bit); + return CPU != llvm::X86::CK_None; } unsigned multiVersionSortPriority(StringRef Name) const override; diff --git a/llvm/include/llvm/Support/X86TargetParser.def b/llvm/include/llvm/Support/X86TargetParser.def index aef189a562a5b0..4d2b615e9d3de3 100644 --- a/llvm/include/llvm/Support/X86TargetParser.def +++ b/llvm/include/llvm/Support/X86TargetParser.def @@ -177,3 +177,245 @@ X86_FEATURE (67, FEATURE_CLFLUSHOPT) X86_FEATURE (68, FEATURE_SHA) #undef X86_FEATURE_COMPAT #undef X86_FEATURE + + +#ifndef PROC_WITH_FEAT +#define PROC_WITH_FEAT(ENUM, STRING, IS64BIT, KEYFEATURE) \ + PROC(ENUM, STRING, IS64BIT) +#endif + +#ifndef PROC +#define PROC(ENUM, STRING, IS64BIT) +#endif + +#ifndef PROC_ALIAS +#define PROC_ALIAS(ENUM, ALIAS) +#endif + +#define PROC_64_BIT true +#define PROC_32_BIT false + +/// \name i386 +/// i386-generation processors. +//@{ +PROC(i386, "i386", PROC_32_BIT) +//@} + +/// \name i486 +/// i486-generation processors. +//@{ +PROC(i486, "i486", PROC_32_BIT) +PROC(WinChipC6, "winchip-c6", PROC_32_BIT) +PROC(WinChip2, "winchip2", PROC_32_BIT) +PROC(C3, "c3", PROC_32_BIT) +//@} + +/// \name i586 +/// i586-generation processors, P5 microarchitecture based. +//@{ +PROC(i586, "i586", PROC_32_BIT) +PROC(Pentium, "pentium", PROC_32_BIT) +PROC(PentiumMMX, "pentium-mmx", PROC_32_BIT) +//@} + +/// \name i686 +/// i686-generation processors, P6 / Pentium M microarchitecture based. +//@{ +PROC(PentiumPro, "pentiumpro", PROC_32_BIT) +PROC(i686, "i686", PROC_32_BIT) +PROC(Pentium2, "pentium2", PROC_32_BIT) +PROC(Pentium3, "pentium3", PROC_32_BIT) +PROC_ALIAS(Pentium3, "pentium3m") +PROC(PentiumM, "pentium-m", PROC_32_BIT) +PROC(C3_2, "c3-2", PROC_32_BIT) + +/// This enumerator is a bit odd, as GCC no longer accepts -march=yonah. +/// Clang however has some logic to support this. +// FIXME: Warn, deprecate, and potentially remove this. +PROC(Yonah, "yonah", PROC_32_BIT) +//@} + +/// \name Netburst +/// Netburst microarchitecture based processors. +//@{ +PROC(Pentium4, "pentium4", PROC_32_BIT) +PROC_ALIAS(Pentium4, "pentium4m") + +PROC(Prescott, "prescott", PROC_32_BIT) +PROC(Nocona, "nocona", PROC_64_BIT) +//@} + +/// \name Core +/// Core microarchitecture based processors. +//@{ +PROC_WITH_FEAT(Core2, "core2", PROC_64_BIT, FEATURE_SSSE3) + +/// This enumerator, like Yonah, is a bit odd. It is another +/// codename which GCC no longer accepts as an option to -march, but Clang +/// has some logic for recognizing it. +// FIXME: Warn, deprecate, and potentially remove this. +PROC(Penryn, "penryn", PROC_64_BIT) +//@} + +/// \name Atom +/// Atom processors +//@{ +PROC_WITH_FEAT(Bonnell, "bonnell", PROC_64_BIT, FEATURE_SSSE3) +PROC_ALIAS(Bonnell, "atom") + +PROC_WITH_FEAT(Silvermont, "silvermont", PROC_64_BIT, FEATURE_SSE4_2) +PROC_ALIAS(Silvermont, "slm") + +PROC(Goldmont, "goldmont", PROC_64_BIT) +PROC(GoldmontPlus, "goldmont-plus", PROC_64_BIT) + +PROC(Tremont, "tremont", PROC_64_BIT) +//@} + +/// \name Nehalem +/// Nehalem microarchitecture based processors. +PROC_WITH_FEAT(Nehalem, "nehalem", PROC_64_BIT, FEATURE_SSE4_2) +PROC_ALIAS(Nehalem, "corei7") + +/// \name Westmere +/// Westmere microarchitecture based processors. +PROC_WITH_FEAT(Westmere, "westmere", PROC_64_BIT, FEATURE_PCLMUL) + +/// \name Sandy Bridge +/// Sandy Bridge microarchitecture based processors. +PROC_WITH_FEAT(SandyBridge, "sandybridge", PROC_64_BIT, FEATURE_AVX) +PROC_ALIAS(SandyBridge, "corei7-avx") + +/// \name Ivy Bridge +/// Ivy Bridge microarchitecture based processors. +PROC_WITH_FEAT(IvyBridge, "ivybridge", PROC_64_BIT, FEATURE_AVX) +PROC_ALIAS(IvyBridge, "core-avx-i") + +/// \name Haswell +/// Haswell microarchitecture based processors. +PROC_WITH_FEAT(Haswell, "haswell", PROC_64_BIT, FEATURE_AVX2) +PROC_ALIAS(Haswell, "core-avx2") + +/// \name Broadwell +/// Broadwell microarchitecture based processors. +PROC_WITH_FEAT(Broadwell, "broadwell", PROC_64_BIT, FEATURE_AVX2) + +/// \name Skylake Client +/// Skylake client microarchitecture based processors. +PROC_WITH_FEAT(SkylakeClient, "skylake", PROC_64_BIT, FEATURE_AVX2) + +/// \name Skylake Server +/// Skylake server microarchitecture based processors. +PROC_WITH_FEAT(SkylakeServer, "skylake-avx512", PROC_64_BIT, FEATURE_AVX512F) +PROC_ALIAS(SkylakeServer, "skx") + +/// \name Cascadelake Server +/// Cascadelake Server microarchitecture based processors. +PROC_WITH_FEAT(Cascadelake, "cascadelake", PROC_64_BIT, FEATURE_AVX512VNNI) + +/// \name Cooperlake Server +/// Cooperlake Server microarchitecture based processors. +PROC_WITH_FEAT(Cooperlake, "cooperlake", PROC_64_BIT, FEATURE_AVX512BF16) + +/// \name Cannonlake Client +/// Cannonlake client microarchitecture based processors. +PROC_WITH_FEAT(Cannonlake, "cannonlake", PROC_64_BIT, FEATURE_AVX512VBMI) + +/// \name Icelake Client +/// Icelake client microarchitecture based processors. +PROC(IcelakeClient, "icelake-client", PROC_64_BIT) + +/// \name Icelake Server +/// Icelake server microarchitecture based processors. +PROC(IcelakeServer, "icelake-server", PROC_64_BIT) + +/// \name Tigerlake +/// Tigerlake microarchitecture based processors. +PROC(Tigerlake, "tigerlake", PROC_64_BIT) + +/// \name Knights Landing +/// Knights Landing processor. +PROC_WITH_FEAT(KNL, "knl", PROC_64_BIT, FEATURE_AVX512F) + +/// \name Knights Mill +/// Knights Mill processor. +PROC_WITH_FEAT(KNM, "knm", PROC_64_BIT, FEATURE_AVX5124FMAPS) + +/// \name Lakemont +/// Lakemont microarchitecture based processors. +PROC(Lakemont, "lakemont", PROC_32_BIT) + +/// \name K6 +/// K6 architecture processors. +//@{ +PROC(K6, "k6", PROC_32_BIT) +PROC(K6_2, "k6-2", PROC_32_BIT) +PROC(K6_3, "k6-3", PROC_32_BIT) +//@} + +/// \name K7 +/// K7 architecture processors. +//@{ +PROC(Athlon, "athlon", PROC_32_BIT) +PROC_ALIAS(Athlon, "athlon-tbird") + +PROC(AthlonXP, "athlon-xp", PROC_32_BIT) +PROC_ALIAS(AthlonXP, "athlon-mp") +PROC_ALIAS(AthlonXP, "athlon-4") +//@} + +/// \name K8 +/// K8 architecture processors. +//@{ +PROC(K8, "k8", PROC_64_BIT) +PROC_ALIAS(K8, "athlon64") +PROC_ALIAS(K8, "athlon-fx") +PROC_ALIAS(K8, "opteron") + +PROC(K8SSE3, "k8-sse3", PROC_64_BIT) +PROC_ALIAS(K8SSE3, "athlon64-sse3") +PROC_ALIAS(K8SSE3, "opteron-sse3") + +PROC_WITH_FEAT(AMDFAM10, "amdfam10", PROC_64_BIT, FEATURE_SSE4_A) +PROC_ALIAS(AMDFAM10, "barcelona") +//@} + +/// \name Bobcat +/// Bobcat architecture processors. +//@{ +PROC_WITH_FEAT(BTVER1, "btver1", PROC_64_BIT, FEATURE_SSE4_A) +PROC_WITH_FEAT(BTVER2, "btver2", PROC_64_BIT, FEATURE_BMI) +//@} + +/// \name Bulldozer +/// Bulldozer architecture processors. +//@{ +PROC_WITH_FEAT(BDVER1, "bdver1", PROC_64_BIT, FEATURE_XOP) +PROC_WITH_FEAT(BDVER2, "bdver2", PROC_64_BIT, FEATURE_FMA) +PROC_WITH_FEAT(BDVER3, "bdver3", PROC_64_BIT, FEATURE_FMA) +PROC_WITH_FEAT(BDVER4, "bdver4", PROC_64_BIT, FEATURE_AVX2) +//@} + +/// \name zen +/// Zen architecture processors. +//@{ +PROC_WITH_FEAT(ZNVER1, "znver1", PROC_64_BIT, FEATURE_AVX2) +PROC_WITH_FEAT(ZNVER2, "znver2", PROC_64_BIT, FEATURE_AVX2) +//@} + +/// This specification is deprecated and will be removed in the future. +/// Users should prefer K8. +// FIXME: Warn on this when the CPU is set to it. +//@{ +PROC(x86_64, "x86-64", PROC_64_BIT) +//@} + +/// \name Geode +/// Geode processors. +//@{ +PROC(Geode, "geode", PROC_32_BIT) +//@} + +#undef PROC +#undef PROC_ALIAS +#undef PROC_WITH_FEAT diff --git a/llvm/include/llvm/Support/X86TargetParser.h b/llvm/include/llvm/Support/X86TargetParser.h new file mode 100644 index 00000000000000..1c9ad03cde8135 --- /dev/null +++ b/llvm/include/llvm/Support/X86TargetParser.h @@ -0,0 +1,41 @@ +//===-- X86TargetParser - Parser for X86 features ---------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a target parser to recognise X86 hardware features. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_X86TARGETPARSERCOMMON_H +#define LLVM_SUPPORT_X86TARGETPARSERCOMMON_H + +#include "llvm/ADT/SmallVector.h" + +namespace llvm { +class StringRef; + +namespace X86 { + +enum CPUKind { + CK_None, +#define PROC(ENUM, STRING, IS64BIT) CK_##ENUM, +#include "llvm/Support/X86TargetParser.def" +}; + +/// Parse \p CPU string into a CPUKind. Will only accept 64-bit capable CPUs if +/// \p Only64Bit is true. +CPUKind parseArchX86(StringRef CPU, bool Only64Bit = false); + +/// Provide a list of valid CPU names. If \p Only64Bit is true, the list will +/// only contain 64-bit capable CPUs. +void fillValidCPUArchList(SmallVectorImpl &Values, + bool ArchIs32Bit); + +} // namespace X86 +} // namespace llvm + +#endif diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt index 6a3448dc3f8566..17bef023078976 100644 --- a/llvm/lib/Support/CMakeLists.txt +++ b/llvm/lib/Support/CMakeLists.txt @@ -158,6 +158,7 @@ add_llvm_component_library(LLVMSupport VersionTuple.cpp VirtualFileSystem.cpp WithColor.cpp + X86TargetParser.cpp YAMLParser.cpp YAMLTraits.cpp raw_os_ostream.cpp diff --git a/llvm/lib/Support/X86TargetParser.cpp b/llvm/lib/Support/X86TargetParser.cpp new file mode 100644 index 00000000000000..ba85ed7ee6260c --- /dev/null +++ b/llvm/lib/Support/X86TargetParser.cpp @@ -0,0 +1,58 @@ +//===-- X86TargetParser - Parser for X86 features ---------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a target parser to recognise X86 hardware features. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/X86TargetParser.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Triple.h" + +using namespace llvm; + +bool checkCPUKind(llvm::X86::CPUKind Kind, bool Only64Bit) { + using namespace X86; + // Perform any per-CPU checks necessary to determine if this CPU is + // acceptable. + switch (Kind) { + case CK_None: + // No processor selected! + return false; +#define PROC(ENUM, STRING, IS64BIT) \ + case CK_##ENUM: \ + return IS64BIT || !Only64Bit; +#include "llvm/Support/X86TargetParser.def" + } + llvm_unreachable("Unhandled CPU kind"); +} + +X86::CPUKind llvm::X86::parseArchX86(StringRef CPU, bool Only64Bit) { + X86::CPUKind Kind = llvm::StringSwitch(CPU) +#define PROC(ENUM, STRING, IS64BIT) .Case(STRING, CK_##ENUM) +#define PROC_ALIAS(ENUM, ALIAS) .Case(ALIAS, CK_##ENUM) +#include "llvm/Support/X86TargetParser.def" + .Default(CK_None); + + if (!checkCPUKind(Kind, Only64Bit)) + Kind = CK_None; + + return Kind; +} + +void llvm::X86::fillValidCPUArchList(SmallVectorImpl &Values, + bool Only64Bit) { +#define PROC(ENUM, STRING, IS64BIT) \ + if (IS64BIT || !Only64Bit) \ + Values.emplace_back(STRING); + // For aliases we need to lookup the CPUKind to get the 64-bit ness. +#define PROC_ALIAS(ENUM, ALIAS) \ + if (checkCPUKind(CK_##ENUM, Only64Bit)) \ + Values.emplace_back(ALIAS); +#include "llvm/Support/X86TargetParser.def" +}