From cb2d8b30ad0dc08bb151b922f5a137014ef9fc87 Mon Sep 17 00:00:00 2001
From: Jessica Paquette <jpaquette@apple.com>
Date: Thu, 4 Jun 2020 11:07:47 -0700
Subject: [PATCH 01/25] [AArch64][GlobalISel] Select trn1 and trn2

Same idea as for zip, uzp, etc. Teach the post-legalizer combiner to recognize
G_SHUFFLE_VECTORs that are trn1/trn2 instructions.

- Add G_TRN1 and G_TRN2
- Port mask matching code from AArch64ISelLowering
- Produce G_TRN1 and G_TRN2 in the post-legalizer combiner
- Select via importer

Add select-trn.mir to test selection.

Add postlegalizer-combiner-trn.mir to test the combine. This is similar to the
existing arm64-trn test.

Note that both of these tests contain things we currently don't legalize.

I figured it would be easier to test these now rather than later, since once
we legalize the G_SHUFFLE_VECTORs, it's not guaranteed that someone will update
the tests.

Differential Revision: https://reviews.llvm.org/D81182
---
 llvm/lib/Target/AArch64/AArch64Combine.td     |   9 +-
 llvm/lib/Target/AArch64/AArch64InstrGISel.td  |  15 +
 .../GISel/AArch64PostLegalizerCombiner.cpp    |  34 ++
 .../GlobalISel/postlegalizer-combiner-trn.mir | 234 ++++++++++++++
 .../CodeGen/AArch64/GlobalISel/select-trn.mir | 300 ++++++++++++++++++
 5 files changed, 591 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-trn.mir
 create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/select-trn.mir
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index 26d5dde4679e33..f45a3b560cf449 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -56,9 +56,16 @@ def dup: GICombineRule <
   (apply [{ applyShuffleVectorPseudo(*${root}, ${matchinfo}); }])
 >;
 
+def trn : GICombineRule<
+  (defs root:$root, shuffle_matchdata:$matchinfo),
+  (match (wip_match_opcode G_SHUFFLE_VECTOR):$root,
+         [{ return matchTRN(*${root}, MRI, ${matchinfo}); }]),
+  (apply [{ applyShuffleVectorPseudo(*${root}, ${matchinfo}); }])
+>;
+
 // Combines which replace a G_SHUFFLE_VECTOR with a target-specific pseudo
 // instruction.
-def shuffle_vector_pseudos : GICombineGroup<[dup, rev, zip, uzp]>;
+def shuffle_vector_pseudos : GICombineGroup<[dup, rev, zip, uzp, trn]>;
 
 def AArch64PostLegalizerCombinerHelper
     : GICombinerHelper<"AArch64GenPostLegalizerCombinerHelper",
diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
index 8c95515fa390ee..0bd8a206705d30 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
@@ -80,6 +80,19 @@ def G_DUP: AArch64GenericInstruction {
   let OutOperandList = (outs type0:$dst);
   let InOperandList = (ins type1:$lane);
 }
+// Represents a trn1 instruction. Produced post-legalization from
+// G_SHUFFLE_VECTORs with appropriate masks.
+def G_TRN1 : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$v1, type0:$v2);
+}
+
+// Represents a trn2 instruction. Produced post-legalization from
+// G_SHUFFLE_VECTORs with appropriate masks.
+def G_TRN2 : AArch64GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$v1, type0:$v2);
+}
 
 def : GINodeEquiv<G_REV16, AArch64rev16>;
 def : GINodeEquiv<G_REV32, AArch64rev32>;
@@ -89,3 +102,5 @@ def : GINodeEquiv<G_UZP2, AArch64uzp2>;
 def : GINodeEquiv<G_ZIP1, AArch64zip1>;
 def : GINodeEquiv<G_ZIP2, AArch64zip2>;
 def : GINodeEquiv<G_DUP, AArch64dup>;
+def : GINodeEquiv<G_TRN1, AArch64trn1>;
+def : GINodeEquiv<G_TRN2, AArch64trn2>;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
index bee187202a5756..1ce69a8900eba4 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
@@ -95,6 +95,22 @@ static bool isREVMask(ArrayRef<int> M, unsigned EltSize, unsigned NumElts,
   return true;
 }
 
+/// Determines if \p M is a shuffle vector mask for a TRN of \p NumElts.
+/// Whether or not G_TRN1 or G_TRN2 should be used is stored in \p WhichResult.
+static bool isTRNMask(ArrayRef<int> M, unsigned NumElts,
+                      unsigned &WhichResult) {
+  if (NumElts % 2 != 0)
+    return false;
+  WhichResult = (M[0] == 0 ? 0 : 1);
+  for (unsigned i = 0; i < NumElts; i += 2) {
+    if ((M[i] >= 0 && static_cast<unsigned>(M[i]) != i + WhichResult) ||
+        (M[i + 1] >= 0 &&
+         static_cast<unsigned>(M[i + 1]) != i + NumElts + WhichResult))
+      return false;
+  }
+  return true;
+}
+
 /// Determines if \p M is a shuffle vector mask for a UZP of \p NumElts.
 /// Whether or not G_UZP1 or G_UZP2 should be used is stored in \p WhichResult.
 static bool isUZPMask(ArrayRef<int> M, unsigned NumElts,
@@ -158,6 +174,24 @@ static bool matchREV(MachineInstr &MI, MachineRegisterInfo &MRI,
   return false;
 }
 
+/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with
+/// a G_TRN1 or G_TRN2 instruction.
+static bool matchTRN(MachineInstr &MI, MachineRegisterInfo &MRI,
+                     ShuffleVectorPseudo &MatchInfo) {
+  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
+  unsigned WhichResult;
+  ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
+  Register Dst = MI.getOperand(0).getReg();
+  unsigned NumElts = MRI.getType(Dst).getNumElements();
+  if (!isTRNMask(ShuffleMask, NumElts, WhichResult))
+    return false;
+  unsigned Opc = (WhichResult == 0) ? AArch64::G_TRN1 : AArch64::G_TRN2;
+  Register V1 = MI.getOperand(1).getReg();
+  Register V2 = MI.getOperand(2).getReg();
+  MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
+  return true;
+}
+
 /// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with
 /// a G_UZP1 or G_UZP2 instruction.
 ///
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-trn.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-trn.mir
new file mode 100644
index 00000000000000..037177a78c5df5
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-trn.mir
@@ -0,0 +1,234 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
+#
+# Check that we produce G_TRN1 or G_TRN2 when we have an appropriate shuffle
+# mask.
+#
+
+...
+---
+name:            trn1_v8s8
+alignment:       4
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $d0, $d1
+    ; CHECK-LABEL: name: trn1_v8s8
+    ; CHECK: liveins: $d0, $d1
+    ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s8>) = COPY $d0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(<8 x s8>) = COPY $d1
+    ; CHECK: [[TRN1_:%[0-9]+]]:_(<8 x s8>) = G_TRN1 [[COPY]], [[COPY1]]
+    ; CHECK: $d0 = COPY [[TRN1_]](<8 x s8>)
+    ; CHECK: RET_ReallyLR implicit $q0
+    %0:_(<8 x s8>) = COPY $d0
+    %1:_(<8 x s8>) = COPY $d1
+    %2:_(<8 x s8>) = G_SHUFFLE_VECTOR %0(<8 x s8>), %1, shufflemask(0, 8, 2, 10, 4, 12, 6, 14)
+    $d0 = COPY %2(<8 x s8>)
+    RET_ReallyLR implicit $q0
+
+...
+---
+name:            trn2_v8s8
+alignment:       4
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $d0, $d1
+    ; CHECK-LABEL: name: trn2_v8s8
+    ; CHECK: liveins: $d0, $d1
+    ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s8>) = COPY $d0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(<8 x s8>) = COPY $d1
+    ; CHECK: [[TRN2_:%[0-9]+]]:_(<8 x s8>) = G_TRN2 [[COPY]], [[COPY1]]
+    ; CHECK: $d0 = COPY [[TRN2_]](<8 x s8>)
+    ; CHECK: RET_ReallyLR implicit $q0
+    %0:_(<8 x s8>) = COPY $d0
+    %1:_(<8 x s8>) = COPY $d1
+    %2:_(<8 x s8>) = G_SHUFFLE_VECTOR %0(<8 x s8>), %1, shufflemask(1, 9, 3, 11, 5, 13, 7, 15)
+    $d0 = COPY %2(<8 x s8>)
+    RET_ReallyLR implicit $q0
+
+...
+---
+name:            trn1_v16s8
+alignment:       4
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $q0, $q1
+    ; CHECK-LABEL: name: trn1_v16s8
+    ; CHECK: liveins: $q0, $q1
+    ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s8>) = COPY $q0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(<16 x s8>) = COPY $q1
+    ; CHECK: [[TRN1_:%[0-9]+]]:_(<16 x s8>) = G_TRN1 [[COPY]], [[COPY1]]
+    ; CHECK: $q0 = COPY [[TRN1_]](<16 x s8>)
+    ; CHECK: RET_ReallyLR implicit $q0
+    %0:_(<16 x s8>) = COPY $q0
+    %1:_(<16 x s8>) = COPY $q1
+    %2:_(<16 x s8>) = G_SHUFFLE_VECTOR %0(<16 x s8>), %1, shufflemask(0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30)
+    $q0 = COPY %2(<16 x s8>)
+    RET_ReallyLR implicit $q0
+
+...
+---
+name:            trn2_v16s8
+alignment:       4
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $q0, $q1
+    ; CHECK-LABEL: name: trn2_v16s8
+    ; CHECK: liveins: $q0, $q1
+    ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s8>) = COPY $q0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(<16 x s8>) = COPY $q1
+    ; CHECK: [[TRN2_:%[0-9]+]]:_(<16 x s8>) = G_TRN2 [[COPY]], [[COPY1]]
+    ; CHECK: $q0 = COPY [[TRN2_]](<16 x s8>)
+    ; CHECK: RET_ReallyLR implicit $q0
+    %0:_(<16 x s8>) = COPY $q0
+    %1:_(<16 x s8>) = COPY $q1
+    %2:_(<16 x s8>) = G_SHUFFLE_VECTOR %0(<16 x s8>), %1, shufflemask(1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31)
+    $q0 = COPY %2(<16 x s8>)
+    RET_ReallyLR implicit $q0
+
+...
+---
+name:            trn1_v4s32
+alignment:       4
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $q0, $q1
+    ; CHECK-LABEL: name: trn1_v4s32
+    ; CHECK: liveins: $q0, $q1
+    ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+    ; CHECK: [[TRN1_:%[0-9]+]]:_(<4 x s32>) = G_TRN1 [[COPY]], [[COPY1]]
+    ; CHECK: $q0 = COPY [[TRN1_]](<4 x s32>)
+    ; CHECK: RET_ReallyLR implicit $q0
+    %0:_(<4 x s32>) = COPY $q0
+    %1:_(<4 x s32>) = COPY $q1
+    %2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(0, 4, 2, 6)
+    $q0 = COPY %2(<4 x s32>)
+    RET_ReallyLR implicit $q0
+
+...
+---
+name:            trn2_v4s32
+alignment:       4
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $q0, $q1
+    ; CHECK-LABEL: name: trn2_v4s32
+    ; CHECK: liveins: $q0, $q1
+    ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+    ; CHECK: [[TRN2_:%[0-9]+]]:_(<4 x s32>) = G_TRN2 [[COPY]], [[COPY1]]
+    ; CHECK: $q0 = COPY [[TRN2_]](<4 x s32>)
+    ; CHECK: RET_ReallyLR implicit $q0
+    %0:_(<4 x s32>) = COPY $q0
+    %1:_(<4 x s32>) = COPY $q1
+    %2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(1, 5, 3, 7)
+    $q0 = COPY %2(<4 x s32>)
+    RET_ReallyLR implicit $q0
+
+...
+---
+name:            redundant_with_zip1
+alignment:       4
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $d0, $d1
+    ; 2 x s32 TRN is redundant with ZIP. Make sure we prioritize ZIP.
+    ;
+    ; CHECK-LABEL: name: redundant_with_zip1
+    ; CHECK: liveins: $d0, $d1
+    ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1
+    ; CHECK: [[ZIP1_:%[0-9]+]]:_(<2 x s32>) = G_ZIP1 [[COPY]], [[COPY1]]
+    ; CHECK: $d0 = COPY [[ZIP1_]](<2 x s32>)
+    ; CHECK: RET_ReallyLR implicit $d0
+    %0:_(<2 x s32>) = COPY $d0
+    %1:_(<2 x s32>) = COPY $d1
+    %2:_(<2 x s32>) = G_SHUFFLE_VECTOR %0(<2 x s32>), %1, shufflemask(0, 2)
+    $d0 = COPY %2(<2 x s32>)
+    RET_ReallyLR implicit $d0
+
+...
+---
+name:            redundant_with_zip2
+alignment:       4
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $d0, $d1
+    ; 2 x s32 TRN is redundant with ZIP. Make sure we prioritize ZIP.
+    ;
+    ; CHECK-LABEL: name: redundant_with_zip2
+    ; CHECK: liveins: $d0, $d1
+    ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1
+    ; CHECK: [[ZIP2_:%[0-9]+]]:_(<2 x s32>) = G_ZIP2 [[COPY]], [[COPY1]]
+    ; CHECK: $d0 = COPY [[ZIP2_]](<2 x s32>)
+    ; CHECK: RET_ReallyLR implicit $d0
+    %0:_(<2 x s32>) = COPY $d0
+    %1:_(<2 x s32>) = COPY $d1
+    %2:_(<2 x s32>) = G_SHUFFLE_VECTOR %0(<2 x s32>), %1, shufflemask(1, 3)
+    $d0 = COPY %2(<2 x s32>)
+    RET_ReallyLR implicit $d0
+
+...
+---
+name:            trn1_undef
+alignment:       4
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $d0, $d1
+    ; Undef shuffle indices should not prevent matching to G_TRN1.
+    ;
+    ; CHECK-LABEL: name: trn1_undef
+    ; CHECK: liveins: $d0, $d1
+    ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s8>) = COPY $d0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(<8 x s8>) = COPY $d1
+    ; CHECK: [[TRN1_:%[0-9]+]]:_(<8 x s8>) = G_TRN1 [[COPY]], [[COPY1]]
+    ; CHECK: $d0 = COPY [[TRN1_]](<8 x s8>)
+    ; CHECK: RET_ReallyLR implicit $d0
+    %0:_(<8 x s8>) = COPY $d0
+    %1:_(<8 x s8>) = COPY $d1
+    %2:_(<8 x s8>) = G_SHUFFLE_VECTOR %0(<8 x s8>), %1, shufflemask(0, 8, -1, -1, 4, 12, 6, 14)
+    $d0 = COPY %2(<8 x s8>)
+    RET_ReallyLR implicit $d0
+
+...
+---
+name:            trn2_undef
+alignment:       4
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $d0, $d1
+    ; Undef shuffle indices should not prevent matching to G_TRN2.
+    ;
+    ; CHECK-LABEL: name: trn2_undef
+    ; CHECK: liveins: $d0, $d1
+    ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s8>) = COPY $d0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(<8 x s8>) = COPY $d1
+    ; CHECK: [[TRN2_:%[0-9]+]]:_(<8 x s8>) = G_TRN2 [[COPY]], [[COPY1]]
+    ; CHECK: $d0 = COPY [[TRN2_]](<8 x s8>)
+    ; CHECK: RET_ReallyLR implicit $d0
+    %0:_(<8 x s8>) = COPY $d0
+    %1:_(<8 x s8>) = COPY $d1
+    %2:_(<8 x s8>) = G_SHUFFLE_VECTOR %0(<8 x s8>), %1, shufflemask(1, -1, 3, 11, 5, 13, -1, -1)
+    $d0 = COPY %2(<8 x s8>)
+    RET_ReallyLR implicit $d0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-trn.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-trn.mir
new file mode 100644
index 00000000000000..738aacf2c372cd
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-trn.mir
@@ -0,0 +1,300 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple aarch64 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
+#
+# Test that we can select G_TRN1 and G_TRN2.
+#
+# Each testcase is named based off of the instruction which should be selected.
+
+...
+---
+name:            TRN1v2i32
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $d0, $d1
+    ; CHECK-LABEL: name: TRN1v2i32
+    ; CHECK: liveins: $d0, $d1
+    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
+    ; CHECK: [[TRN1v2i32_:%[0-9]+]]:fpr64 = TRN1v2i32 [[COPY]], [[COPY1]]
+    ; CHECK: RET_ReallyLR
+    %0:fpr(<2 x s32>) = COPY $d0
+    %1:fpr(<2 x s32>) = COPY $d1
+    %2:fpr(<2 x s32>) = G_TRN1 %0, %1
+    RET_ReallyLR
+
+...
+---
+name:            TRN1v2i64
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $q0, $q1
+    ; CHECK-LABEL: name: TRN1v2i64
+    ; CHECK: liveins: $q0, $q1
+    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
+    ; CHECK: [[TRN1v2i64_:%[0-9]+]]:fpr128 = TRN1v2i64 [[COPY]], [[COPY1]]
+    ; CHECK: RET_ReallyLR
+    %0:fpr(<2 x s64>) = COPY $q0
+    %1:fpr(<2 x s64>) = COPY $q1
+    %2:fpr(<2 x s64>) = G_TRN1 %0, %1
+    RET_ReallyLR
+
+...
+---
+name:            TRN1v4i16
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $d0, $d1
+    ; CHECK-LABEL: name: TRN1v4i16
+    ; CHECK: liveins: $d0, $d1
+    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
+    ; CHECK: [[TRN1v4i16_:%[0-9]+]]:fpr64 = TRN1v4i16 [[COPY]], [[COPY1]]
+    ; CHECK: RET_ReallyLR
+    %0:fpr(<4 x s16>) = COPY $d0
+    %1:fpr(<4 x s16>) = COPY $d1
+    %2:fpr(<4 x s16>) = G_TRN1 %0, %1
+    RET_ReallyLR
+
+...
+---
+name:            TRN1v4i32
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $q0, $q1
+    ; CHECK-LABEL: name: TRN1v4i32
+    ; CHECK: liveins: $q0, $q1
+    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
+    ; CHECK: [[TRN1v4i32_:%[0-9]+]]:fpr128 = TRN1v4i32 [[COPY]], [[COPY1]]
+    ; CHECK: RET_ReallyLR
+    %0:fpr(<4 x s32>) = COPY $q0
+    %1:fpr(<4 x s32>) = COPY $q1
+    %2:fpr(<4 x s32>) = G_TRN1 %0, %1
+    RET_ReallyLR
+
+...
+---
+name:            TRN1v8i8
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $d0, $d1
+    ; CHECK-LABEL: name: TRN1v8i8
+    ; CHECK: liveins: $d0, $d1
+    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
+    ; CHECK: [[TRN1v8i8_:%[0-9]+]]:fpr64 = TRN1v8i8 [[COPY]], [[COPY1]]
+    ; CHECK: RET_ReallyLR
+    %0:fpr(<8 x s8>) = COPY $d0
+    %1:fpr(<8 x s8>) = COPY $d1
+    %2:fpr(<8 x s8>) = G_TRN1 %0, %1
+    RET_ReallyLR
+
+...
+---
+name:            TRN1v8i16
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $q0, $q1
+    ; CHECK-LABEL: name: TRN1v8i16
+    ; CHECK: liveins: $q0, $q1
+    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
+    ; CHECK: [[TRN1v8i16_:%[0-9]+]]:fpr128 = TRN1v8i16 [[COPY]], [[COPY1]]
+    ; CHECK: RET_ReallyLR
+    %0:fpr(<8 x s16>) = COPY $q0
+    %1:fpr(<8 x s16>) = COPY $q1
+    %2:fpr(<8 x s16>) = G_TRN1 %0, %1
+    RET_ReallyLR
+
+...
+---
+name:            TRN1v16i8
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $q0, $q1
+    ; CHECK-LABEL: name: TRN1v16i8
+    ; CHECK: liveins: $q0, $q1
+    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
+    ; CHECK: [[TRN1v16i8_:%[0-9]+]]:fpr128 = TRN1v16i8 [[COPY]], [[COPY1]]
+    ; CHECK: RET_ReallyLR
+    %0:fpr(<16 x s8>) = COPY $q0
+    %1:fpr(<16 x s8>) = COPY $q1
+    %2:fpr(<16 x s8>) = G_TRN1 %0, %1
+    RET_ReallyLR
+
+...
+---
+name:            TRN2v2i32
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $d0, $d1
+    ; CHECK-LABEL: name: TRN2v2i32
+    ; CHECK: liveins: $d0, $d1
+    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
+    ; CHECK: [[TRN2v2i32_:%[0-9]+]]:fpr64 = TRN2v2i32 [[COPY]], [[COPY1]]
+    ; CHECK: RET_ReallyLR
+    %0:fpr(<2 x s32>) = COPY $d0
+    %1:fpr(<2 x s32>) = COPY $d1
+    %2:fpr(<2 x s32>) = G_TRN2 %0, %1
+    RET_ReallyLR
+
+...
+---
+name:            TRN2v2i64
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $q0, $q1
+    ; CHECK-LABEL: name: TRN2v2i64
+    ; CHECK: liveins: $q0, $q1
+    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
+    ; CHECK: [[TRN2v2i64_:%[0-9]+]]:fpr128 = TRN2v2i64 [[COPY]], [[COPY1]]
+    ; CHECK: RET_ReallyLR
+    %0:fpr(<2 x s64>) = COPY $q0
+    %1:fpr(<2 x s64>) = COPY $q1
+    %2:fpr(<2 x s64>) = G_TRN2 %0, %1
+    RET_ReallyLR
+
+...
+---
+name:            TRN2v4i16
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $d0, $d1
+    ; CHECK-LABEL: name: TRN2v4i16
+    ; CHECK: liveins: $d0, $d1
+    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
+    ; CHECK: [[TRN2v4i16_:%[0-9]+]]:fpr64 = TRN2v4i16 [[COPY]], [[COPY1]]
+    ; CHECK: RET_ReallyLR
+    %0:fpr(<4 x s16>) = COPY $d0
+    %1:fpr(<4 x s16>) = COPY $d1
+    %2:fpr(<4 x s16>) = G_TRN2 %0, %1
+    RET_ReallyLR
+
+...
+---
+name:            TRN2v4i32
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $q0, $q1
+    ; CHECK-LABEL: name: TRN2v4i32
+    ; CHECK: liveins: $q0, $q1
+    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
+    ; CHECK: [[TRN2v4i32_:%[0-9]+]]:fpr128 = TRN2v4i32 [[COPY]], [[COPY1]]
+    ; CHECK: RET_ReallyLR
+    %0:fpr(<4 x s32>) = COPY $q0
+    %1:fpr(<4 x s32>) = COPY $q1
+    %2:fpr(<4 x s32>) = G_TRN2 %0, %1
+    RET_ReallyLR
+
+...
+---
+name:            TRN2v8i8
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $d0, $d1
+    ; CHECK-LABEL: name: TRN2v8i8
+    ; CHECK: liveins: $d0, $d1
+    ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1
+    ; CHECK: [[TRN2v8i8_:%[0-9]+]]:fpr64 = TRN2v8i8 [[COPY]], [[COPY1]]
+    ; CHECK: RET_ReallyLR
+    %0:fpr(<8 x s8>) = COPY $d0
+    %1:fpr(<8 x s8>) = COPY $d1
+    %2:fpr(<8 x s8>) = G_TRN2 %0, %1
+    RET_ReallyLR
+
+...
+---
+name:            TRN2v8i16
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $q0, $q1
+    ; CHECK-LABEL: name: TRN2v8i16
+    ; CHECK: liveins: $q0, $q1
+    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
+    ; CHECK: [[TRN2v8i16_:%[0-9]+]]:fpr128 = TRN2v8i16 [[COPY]], [[COPY1]]
+    ; CHECK: RET_ReallyLR
+    %0:fpr(<8 x s16>) = COPY $q0
+    %1:fpr(<8 x s16>) = COPY $q1
+    %2:fpr(<8 x s16>) = G_TRN2 %0, %1
+    RET_ReallyLR
+
+...
+---
+name:            TRN2v16i8
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $q0, $q1
+    ; CHECK-LABEL: name: TRN2v16i8
+    ; CHECK: liveins: $q0, $q1
+    ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
+    ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1
+    ; CHECK: [[TRN2v16i8_:%[0-9]+]]:fpr128 = TRN2v16i8 [[COPY]], [[COPY1]]
+    ; CHECK: RET_ReallyLR
+    %0:fpr(<16 x s8>) = COPY $q0
+    %1:fpr(<16 x s8>) = COPY $q1
+    %2:fpr(<16 x s8>) = G_TRN2 %0, %1
+    RET_ReallyLR

From 7a38618a20596e419abbbbb249300e812763a028 Mon Sep 17 00:00:00 2001
From: Daniel Kiss <daniel.kiss@arm.com>
Date: Tue, 9 Jun 2020 19:56:30 +0200
Subject: [PATCH 02/25] [AArch64] Allow BTI mnemonics in the HINT space with
 BTI disabled

Summary:
It is important to emit HINT instructions instead of BTI  ones when
BTI is disabled. This allows compatibility with other assemblers
(e.g. GAS).

Still, developers of assembly code will want to write code that is
compatible with both pre- and post-BTI CPUs. They could use HINT
mnemonics, but the new mnemonics are a lot more readable (e.g.
bti c instead of hint #34), and they will result in the same
encodings. So, while LLVM should not *emit* the new mnemonics when
BTI is disabled, this patch will at least make LLVM *accept*
assembly code that uses them.

Reviewers: pbarrio, tamas.petz, ostannard

Reviewed By: pbarrio, ostannard

Subscribers: ostannard, kristof.beyls, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D81257
---
 llvm/lib/Target/AArch64/AArch64InstrInfo.td |  6 ++++++
 llvm/test/MC/AArch64/armv8.5a-bti.s         | 18 +++++++-----------
 2 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index d642640c37520e..5aa73760f77049 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -726,6 +726,12 @@ def : InstAlias<"sevl", (HINT 0b101)>;
 def : InstAlias<"dgh",  (HINT 0b110)>;
 def : InstAlias<"esb",  (HINT 0b10000)>, Requires<[HasRAS]>;
 def : InstAlias<"csdb", (HINT 20)>;
+// In order to be able to write readable assembly, LLVM should accept assembly
+// inputs that use Branch Target Indentification mnemonics, even with BTI disabled.
+// However, in order to be compatible with other assemblers (e.g. GAS), LLVM
+// should not emit these mnemonics unless BTI is enabled.
+def : InstAlias<"bti",  (HINT 32), 0>;
+def : InstAlias<"bti $op", (HINT btihint_op:$op), 0>;
 def : InstAlias<"bti",  (HINT 32)>, Requires<[HasBTI]>;
 def : InstAlias<"bti $op", (HINT btihint_op:$op)>, Requires<[HasBTI]>;
 
diff --git a/llvm/test/MC/AArch64/armv8.5a-bti.s b/llvm/test/MC/AArch64/armv8.5a-bti.s
index ca55516890c42c..e0585f7613fcc8 100644
--- a/llvm/test/MC/AArch64/armv8.5a-bti.s
+++ b/llvm/test/MC/AArch64/armv8.5a-bti.s
@@ -1,6 +1,6 @@
-// RUN:     llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+bti   < %s      | FileCheck %s
-// RUN:     llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.5a < %s      | FileCheck %s
-// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=-bti   < %s 2>&1 | FileCheck %s --check-prefix=NOBTI
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+bti   < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.5a < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=-bti   < %s | FileCheck %s --check-prefix=NOBTI
 
 bti
 bti c
@@ -12,14 +12,10 @@ bti jc
 // CHECK: bti j    // encoding: [0x9f,0x24,0x03,0xd5]
 // CHECK: bti jc   // encoding: [0xdf,0x24,0x03,0xd5]
 
-// NOBTI:      instruction requires: bti
-// NOBTI-NEXT: bti
-// NOBTI:      instruction requires: bti
-// NOBTI-NEXT: bti
-// NOBTI:      instruction requires: bti
-// NOBTI-NEXT: bti
-// NOBTI:      instruction requires: bti
-// NOBTI-NEXT: bti
+// NOBTI: hint #32 // encoding: [0x1f,0x24,0x03,0xd5]
+// NOBTI: hint #34 // encoding: [0x5f,0x24,0x03,0xd5]
+// NOBTI: hint #36 // encoding: [0x9f,0x24,0x03,0xd5]
+// NOBTI: hint #38 // encoding: [0xdf,0x24,0x03,0xd5]
 
 hint #32
 hint #34

From bc38793852c0552337bae54961eb14fb0bacf356 Mon Sep 17 00:00:00 2001
From: Arthur Eubanks <aeubanks@google.com>
Date: Tue, 9 Jun 2020 09:55:25 -0700
Subject: [PATCH 03/25] Change debuginfo check for addHeapAllocSiteMetadata

Summary:
Move check inside of addHeapAllocSiteMetadata().
Change check to DebugInfo <= DebugLineTablesOnly.

Reviewers: akhuang

Subscribers: cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D81481
---
 clang/lib/CodeGen/CGDebugInfo.cpp | 3 +++
 clang/lib/CodeGen/CGExprCXX.cpp   | 3 +--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp
index 1737154d179a68..65d513c8cf0581 100644
--- a/clang/lib/CodeGen/CGDebugInfo.cpp
+++ b/clang/lib/CodeGen/CGDebugInfo.cpp
@@ -2149,6 +2149,9 @@ llvm::DIType *CGDebugInfo::getOrCreateStandaloneType(QualType D,
 void CGDebugInfo::addHeapAllocSiteMetadata(llvm::CallBase *CI,
                                            QualType AllocatedTy,
                                            SourceLocation Loc) {
+  if (CGM.getCodeGenOpts().getDebugInfo() <=
+      codegenoptions::DebugLineTablesOnly)
+    return;
   llvm::MDNode *node;
   if (AllocatedTy->isVoidType())
     node = llvm::MDNode::get(CGM.getLLVMContext(), None);
diff --git a/clang/lib/CodeGen/CGExprCXX.cpp b/clang/lib/CodeGen/CGExprCXX.cpp
index d018443858bd5b..d59aa6ce0fb94f 100644
--- a/clang/lib/CodeGen/CGExprCXX.cpp
+++ b/clang/lib/CodeGen/CGExprCXX.cpp
@@ -1639,8 +1639,7 @@ llvm::Value *CodeGenFunction::EmitCXXNewExpr(const CXXNewExpr *E) {
       EmitNewDeleteCall(*this, allocator, allocatorType, allocatorArgs);
 
     // Set !heapallocsite metadata on the call to operator new.
-    if (CGM.getCodeGenOpts().getDebugInfo() != codegenoptions::NoDebugInfo &&
-        getDebugInfo())
+    if (getDebugInfo())
       if (auto *newCall = dyn_cast<llvm::CallBase>(RV.getScalarVal()))
         getDebugInfo()->addHeapAllocSiteMetadata(newCall, allocType,
                                                  E->getExprLoc());

From 5dc4e7c2b95fc665c1dc86c6b40cf02171f8801d Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Tue, 9 Jun 2020 18:36:14 +0100
Subject: [PATCH 04/25] [VectorCombine] scalarizeBinop - support an
 all-constant src vector operand

scalarizeBinop currently folds

  vec_bo((inselt VecC0, V0, Index), (inselt VecC1, V1, Index))
  ->
  inselt(vec_bo(VecC0, VecC1), scl_bo(V0,V1), Index)

This patch extends this to account for cases where one of the vec_bo operands is already all-constant and performs similar cost checks to determine if the scalar binop with a constant still makes sense:

  vec_bo((inselt VecC0, V0, Index), VecC1)
  ->
  inselt(vec_bo(VecC0, VecC1), scl_bo(V0,extractelt(V1,Index)), Index)

Fixes PR42174

Differential Revision: https://reviews.llvm.org/D80885
---
 .../Transforms/Vectorize/VectorCombine.cpp    |  59 +++-
 .../PhaseOrdering/X86/scalarization.ll        |  31 +-
 .../X86/insert-binop-with-constant.ll         | 272 +++++++++---------
 3 files changed, 202 insertions(+), 160 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index c03e70fdcef55d..b68182e6098dba 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -313,23 +313,48 @@ static bool foldBitcastShuf(Instruction &I, const TargetTransformInfo &TTI) {
 /// Match a vector binop instruction with inserted scalar operands and convert
 /// to scalar binop followed by insertelement.
 static bool scalarizeBinop(Instruction &I, const TargetTransformInfo &TTI) {
-  Instruction *Ins0, *Ins1;
-  if (!match(&I, m_BinOp(m_Instruction(Ins0), m_Instruction(Ins1))))
+  Value *Ins0, *Ins1;
+  if (!match(&I, m_BinOp(m_Value(Ins0), m_Value(Ins1))))
     return false;
 
+  // Match against one or both scalar values being inserted into constant
+  // vectors:
+  // vec_bo VecC0, (inselt VecC1, V1, Index)
+  // vec_bo (inselt VecC0, V0, Index), VecC1
+  // vec_bo (inselt VecC0, V0, Index), (inselt VecC1, V1, Index)
   // TODO: Deal with mismatched index constants and variable indexes?
-  Constant *VecC0, *VecC1;
-  Value *V0, *V1;
-  uint64_t Index;
+  Constant *VecC0 = nullptr, *VecC1 = nullptr;
+  Value *V0 = nullptr, *V1 = nullptr;
+  uint64_t Index0 = 0, Index1 = 0;
   if (!match(Ins0, m_InsertElt(m_Constant(VecC0), m_Value(V0),
-                               m_ConstantInt(Index))) ||
-      !match(Ins1, m_InsertElt(m_Constant(VecC1), m_Value(V1),
-                               m_SpecificInt(Index))))
+                               m_ConstantInt(Index0))) &&
+      !match(Ins0, m_Constant(VecC0)))
+    return false;
+  if (!match(Ins1, m_InsertElt(m_Constant(VecC1), m_Value(V1),
+                               m_ConstantInt(Index1))) &&
+      !match(Ins1, m_Constant(VecC1)))
+    return false;
+
+  bool IsConst0 = !V0;
+  bool IsConst1 = !V1;
+  if (IsConst0 && IsConst1)
+    return false;
+  if (!IsConst0 && !IsConst1 && Index0 != Index1)
     return false;
 
-  Type *ScalarTy = V0->getType();
+  // Bail for single insertion if it is a load.
+  // TODO: Handle this once getVectorInstrCost can cost for load/stores.
+  auto *I0 = dyn_cast_or_null<Instruction>(V0);
+  auto *I1 = dyn_cast_or_null<Instruction>(V1);
+  if ((IsConst0 && I1 && I1->mayReadFromMemory()) ||
+      (IsConst1 && I0 && I0->mayReadFromMemory()))
+    return false;
+
+  uint64_t Index = IsConst0 ? Index1 : Index0;
+  Type *ScalarTy = IsConst0 ? V1->getType() : V0->getType();
   Type *VecTy = I.getType();
-  assert(VecTy->isVectorTy() && ScalarTy == V1->getType() &&
+  assert(VecTy->isVectorTy() &&
+         (IsConst0 || IsConst1 || V0->getType() == V1->getType()) &&
          (ScalarTy->isIntegerTy() || ScalarTy->isFloatingPointTy()) &&
          "Unexpected types for insert into binop");
 
@@ -341,10 +366,11 @@ static bool scalarizeBinop(Instruction &I, const TargetTransformInfo &TTI) {
   // both sequences.
   int InsertCost =
       TTI.getVectorInstrCost(Instruction::InsertElement, VecTy, Index);
-  int OldCost = InsertCost + InsertCost + VectorOpCost;
+  int OldCost = (IsConst0 ? 0 : InsertCost) + (IsConst1 ? 0 : InsertCost) +
+                VectorOpCost;
   int NewCost = ScalarOpCost + InsertCost +
-                !Ins0->hasOneUse() * InsertCost +
-                !Ins1->hasOneUse() * InsertCost;
+                (IsConst0 ? 0 : !Ins0->hasOneUse() * InsertCost) +
+                (IsConst1 ? 0 : !Ins1->hasOneUse() * InsertCost);
 
   // We want to scalarize unless the vector variant actually has lower cost.
   if (OldCost < NewCost)
@@ -354,6 +380,13 @@ static bool scalarizeBinop(Instruction &I, const TargetTransformInfo &TTI) {
   // inselt NewVecC, (scalar_bo V0, V1), Index
   ++NumScalarBO;
   IRBuilder<> Builder(&I);
+
+  // For constant cases, extract the scalar element, this should constant fold.
+  if (IsConst0)
+    V0 = ConstantExpr::getExtractElement(VecC0, Builder.getInt64(Index));
+  if (IsConst1)
+    V1 = ConstantExpr::getExtractElement(VecC1, Builder.getInt64(Index));
+
   Value *Scalar = Builder.CreateBinOp(Opcode, V0, V1, I.getName() + ".scalar");
 
   // All IR flags are safe to back-propagate. There is no potential for extra
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/scalarization.ll b/llvm/test/Transforms/PhaseOrdering/X86/scalarization.ll
index 3b341f6a5b7a55..0d99654be52893 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/scalarization.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/scalarization.ll
@@ -12,31 +12,24 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 define <4 x i32> @square(<4 x i32> %num, i32 %y, i32 %x, i32 %h, i32 %k, i32 %w, i32 %p, i32 %j, i32 %u) {
 ; CHECK-LABEL: @square(
 ; CHECK-NEXT:    [[DIV:%.*]] = sdiv i32 [[K:%.*]], 2
-; CHECK-NEXT:    [[SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[DIV]], i32 0
 ; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[P:%.*]], 6234
-; CHECK-NEXT:    [[SPLATINSERT2:%.*]] = insertelement <4 x i32> undef, i32 [[MUL]], i32 0
 ; CHECK-NEXT:    [[MUL5:%.*]] = mul nsw i32 [[H:%.*]], 75
-; CHECK-NEXT:    [[SPLATINSERT6:%.*]] = insertelement <4 x i32> undef, i32 [[MUL5]], i32 0
 ; CHECK-NEXT:    [[DIV9:%.*]] = sdiv i32 [[J:%.*]], 3452
-; CHECK-NEXT:    [[SPLATINSERT10:%.*]] = insertelement <4 x i32> undef, i32 [[DIV9]], i32 0
 ; CHECK-NEXT:    [[MUL13:%.*]] = mul nsw i32 [[W:%.*]], 53
-; CHECK-NEXT:    [[SPLATINSERT14:%.*]] = insertelement <4 x i32> undef, i32 [[MUL13]], i32 0
 ; CHECK-NEXT:    [[DIV17:%.*]] = sdiv i32 [[X:%.*]], 820
-; CHECK-NEXT:    [[SPLATINSERT18:%.*]] = insertelement <4 x i32> undef, i32 [[DIV17]], i32 0
 ; CHECK-NEXT:    [[MUL21:%.*]] = shl nsw i32 [[U:%.*]], 2
-; CHECK-NEXT:    [[SPLATINSERT22:%.*]] = insertelement <4 x i32> undef, i32 [[MUL21]], i32 0
-; CHECK-NEXT:    [[SPLATINSERT25:%.*]] = insertelement <4 x i32> undef, i32 [[Y:%.*]], i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i32> [[SPLATINSERT25]], <i32 1, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT:    [[TMP2:%.*]] = add <4 x i32> [[TMP1]], [[SPLATINSERT18]]
-; CHECK-NEXT:    [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[SPLATINSERT6]]
-; CHECK-NEXT:    [[TMP4:%.*]] = add <4 x i32> [[TMP3]], [[SPLATINSERT]]
-; CHECK-NEXT:    [[TMP5:%.*]] = add <4 x i32> [[TMP4]], [[SPLATINSERT14]]
-; CHECK-NEXT:    [[TMP6:%.*]] = add <4 x i32> [[TMP5]], [[SPLATINSERT2]]
-; CHECK-NEXT:    [[TMP7:%.*]] = add <4 x i32> [[TMP6]], [[SPLATINSERT10]]
-; CHECK-NEXT:    [[TMP8:%.*]] = add <4 x i32> [[TMP7]], [[SPLATINSERT22]]
-; CHECK-NEXT:    [[TMP9:%.*]] = add <4 x i32> [[TMP8]], <i32 317425, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP9]], <4 x i32> undef, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[ADD29:%.*]] = add <4 x i32> [[TMP10]], [[NUM:%.*]]
+; CHECK-NEXT:    [[DOTSCALAR:%.*]] = add i32 [[Y:%.*]], 1
+; CHECK-NEXT:    [[DOTSCALAR1:%.*]] = add i32 [[DOTSCALAR]], [[DIV17]]
+; CHECK-NEXT:    [[DOTSCALAR2:%.*]] = add i32 [[DOTSCALAR1]], [[MUL5]]
+; CHECK-NEXT:    [[DOTSCALAR3:%.*]] = add i32 [[DOTSCALAR2]], [[DIV]]
+; CHECK-NEXT:    [[DOTSCALAR4:%.*]] = add i32 [[DOTSCALAR3]], [[MUL13]]
+; CHECK-NEXT:    [[DOTSCALAR5:%.*]] = add i32 [[DOTSCALAR4]], [[MUL]]
+; CHECK-NEXT:    [[DOTSCALAR6:%.*]] = add i32 [[DOTSCALAR5]], [[DIV9]]
+; CHECK-NEXT:    [[DOTSCALAR7:%.*]] = add i32 [[DOTSCALAR6]], [[MUL21]]
+; CHECK-NEXT:    [[DOTSCALAR8:%.*]] = add i32 [[DOTSCALAR7]], 317425
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i32> undef, i32 [[DOTSCALAR8]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[ADD29:%.*]] = add <4 x i32> [[TMP2]], [[NUM:%.*]]
 ; CHECK-NEXT:    ret <4 x i32> [[ADD29]]
 ;
   %add = add <4 x i32> %num, <i32 1, i32 1, i32 1, i32 1>
diff --git a/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll b/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll
index e0d4623f505f70..a400e8f42907cd 100644
--- a/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll
@@ -4,8 +4,8 @@
 
 define <2 x i64> @add_constant(i64 %x) {
 ; CHECK-LABEL: @add_constant(
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0
-; CHECK-NEXT:    [[BO:%.*]] = add <2 x i64> [[INS]], <i64 42, i64 undef>
+; CHECK-NEXT:    [[BO_SCALAR:%.*]] = add i64 [[X:%.*]], 42
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0
 ; CHECK-NEXT:    ret <2 x i64> [[BO]]
 ;
   %ins = insertelement <2 x i64> undef, i64 %x, i32 0
@@ -15,8 +15,8 @@ define <2 x i64> @add_constant(i64 %x) {
 
 define <2 x i64> @add_constant_not_undef_lane(i64 %x) {
 ; CHECK-LABEL: @add_constant_not_undef_lane(
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0
-; CHECK-NEXT:    [[BO:%.*]] = add <2 x i64> [[INS]], <i64 42, i64 -42>
+; CHECK-NEXT:    [[BO_SCALAR:%.*]] = add i64 [[X:%.*]], 42
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0
 ; CHECK-NEXT:    ret <2 x i64> [[BO]]
 ;
   %ins = insertelement <2 x i64> undef, i64 %x, i32 0
@@ -41,8 +41,8 @@ define <2 x i64> @add_constant_load(i64* %p) {
 
 define <4 x i32> @sub_constant_op0(i32 %x) {
 ; CHECK-LABEL: @sub_constant_op0(
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <4 x i32> undef, i32 [[X:%.*]], i32 1
-; CHECK-NEXT:    [[BO:%.*]] = sub nuw nsw <4 x i32> <i32 undef, i32 -42, i32 undef, i32 undef>, [[INS]]
+; CHECK-NEXT:    [[BO_SCALAR:%.*]] = sub nuw nsw i32 -42, [[X:%.*]]
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <4 x i32> undef, i32 [[BO_SCALAR]], i64 1
 ; CHECK-NEXT:    ret <4 x i32> [[BO]]
 ;
   %ins = insertelement <4 x i32> undef, i32 %x, i32 1
@@ -52,8 +52,8 @@ define <4 x i32> @sub_constant_op0(i32 %x) {
 
 define <4 x i32> @sub_constant_op0_not_undef_lane(i32 %x) {
 ; CHECK-LABEL: @sub_constant_op0_not_undef_lane(
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <4 x i32> undef, i32 [[X:%.*]], i32 1
-; CHECK-NEXT:    [[BO:%.*]] = sub nuw <4 x i32> <i32 1, i32 42, i32 42, i32 -42>, [[INS]]
+; CHECK-NEXT:    [[BO_SCALAR:%.*]] = sub nuw i32 42, [[X:%.*]]
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <4 x i32> undef, i32 [[BO_SCALAR]], i64 1
 ; CHECK-NEXT:    ret <4 x i32> [[BO]]
 ;
   %ins = insertelement <4 x i32> undef, i32 %x, i32 1
@@ -63,8 +63,8 @@ define <4 x i32> @sub_constant_op0_not_undef_lane(i32 %x) {
 
 define <8 x i16> @sub_constant_op1(i16 %x) {
 ; CHECK-LABEL: @sub_constant_op1(
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <8 x i16> undef, i16 [[X:%.*]], i32 0
-; CHECK-NEXT:    [[BO:%.*]] = sub nuw <8 x i16> [[INS]], <i16 42, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef>
+; CHECK-NEXT:    [[BO_SCALAR:%.*]] = sub nuw i16 [[X:%.*]], 42
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <8 x i16> undef, i16 [[BO_SCALAR]], i64 0
 ; CHECK-NEXT:    ret <8 x i16> [[BO]]
 ;
   %ins = insertelement <8 x i16> undef, i16 %x, i32 0
@@ -74,8 +74,8 @@ define <8 x i16> @sub_constant_op1(i16 %x) {
 
 define <8 x i16> @sub_constant_op1_not_undef_lane(i16 %x) {
 ; CHECK-LABEL: @sub_constant_op1_not_undef_lane(
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <8 x i16> undef, i16 [[X:%.*]], i32 0
-; CHECK-NEXT:    [[BO:%.*]] = sub nuw <8 x i16> [[INS]], <i16 42, i16 -42, i16 0, i16 1, i16 -2, i16 3, i16 -4, i16 5>
+; CHECK-NEXT:    [[BO_SCALAR:%.*]] = sub nuw i16 [[X:%.*]], 42
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <8 x i16> undef, i16 [[BO_SCALAR]], i64 0
 ; CHECK-NEXT:    ret <8 x i16> [[BO]]
 ;
   %ins = insertelement <8 x i16> undef, i16 %x, i32 0
@@ -85,8 +85,8 @@ define <8 x i16> @sub_constant_op1_not_undef_lane(i16 %x) {
 
 define <16 x i8> @mul_constant(i8 %x) {
 ; CHECK-LABEL: @mul_constant(
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <16 x i8> undef, i8 [[X:%.*]], i32 2
-; CHECK-NEXT:    [[BO:%.*]] = mul <16 x i8> [[INS]], <i8 undef, i8 undef, i8 -42, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>
+; CHECK-NEXT:    [[BO_SCALAR:%.*]] = mul i8 [[X:%.*]], -42
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <16 x i8> <i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 [[BO_SCALAR]], i64 2
 ; CHECK-NEXT:    ret <16 x i8> [[BO]]
 ;
   %ins = insertelement <16 x i8> undef, i8 %x, i32 2
@@ -96,8 +96,8 @@ define <16 x i8> @mul_constant(i8 %x) {
 
 define <3 x i64> @mul_constant_not_undef_lane(i64 %x) {
 ; CHECK-LABEL: @mul_constant_not_undef_lane(
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <3 x i64> undef, i64 [[X:%.*]], i32 2
-; CHECK-NEXT:    [[BO:%.*]] = mul <3 x i64> [[INS]], <i64 42, i64 undef, i64 -42>
+; CHECK-NEXT:    [[BO_SCALAR:%.*]] = mul i64 [[X:%.*]], -42
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <3 x i64> <i64 0, i64 undef, i64 0>, i64 [[BO_SCALAR]], i64 2
 ; CHECK-NEXT:    ret <3 x i64> [[BO]]
 ;
   %ins = insertelement <3 x i64> undef, i64 %x, i32 2
@@ -106,12 +106,20 @@ define <3 x i64> @mul_constant_not_undef_lane(i64 %x) {
 }
 
 define <16 x i8> @mul_constant_multiuse(i8 %a0, <16 x i8> %a1) {
-; CHECK-LABEL: @mul_constant_multiuse(
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <16 x i8> <i8 undef, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, i8 [[A0:%.*]], i32 0
-; CHECK-NEXT:    [[MUL:%.*]] = mul <16 x i8> [[INS]], <i8 3, i8 7, i8 9, i8 11, i8 13, i8 15, i8 17, i8 19, i8 21, i8 23, i8 25, i8 27, i8 29, i8 31, i8 33, i8 35>
-; CHECK-NEXT:    [[AND:%.*]] = and <16 x i8> [[INS]], [[A1:%.*]]
-; CHECK-NEXT:    [[XOR:%.*]] = xor <16 x i8> [[AND]], [[MUL]]
-; CHECK-NEXT:    ret <16 x i8> [[XOR]]
+; SSE-LABEL: @mul_constant_multiuse(
+; SSE-NEXT:    [[INS:%.*]] = insertelement <16 x i8> <i8 undef, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, i8 [[A0:%.*]], i32 0
+; SSE-NEXT:    [[MUL:%.*]] = mul <16 x i8> [[INS]], <i8 3, i8 7, i8 9, i8 11, i8 13, i8 15, i8 17, i8 19, i8 21, i8 23, i8 25, i8 27, i8 29, i8 31, i8 33, i8 35>
+; SSE-NEXT:    [[AND:%.*]] = and <16 x i8> [[INS]], [[A1:%.*]]
+; SSE-NEXT:    [[XOR:%.*]] = xor <16 x i8> [[AND]], [[MUL]]
+; SSE-NEXT:    ret <16 x i8> [[XOR]]
+;
+; AVX-LABEL: @mul_constant_multiuse(
+; AVX-NEXT:    [[INS:%.*]] = insertelement <16 x i8> <i8 undef, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, i8 [[A0:%.*]], i32 0
+; AVX-NEXT:    [[MUL_SCALAR:%.*]] = mul i8 [[A0]], 3
+; AVX-NEXT:    [[MUL:%.*]] = insertelement <16 x i8> <i8 undef, i8 7, i8 18, i8 33, i8 52, i8 75, i8 102, i8 -123, i8 -88, i8 -49, i8 -6, i8 41, i8 92, i8 -109, i8 -50, i8 13>, i8 [[MUL_SCALAR]], i64 0
+; AVX-NEXT:    [[AND:%.*]] = and <16 x i8> [[INS]], [[A1:%.*]]
+; AVX-NEXT:    [[XOR:%.*]] = xor <16 x i8> [[AND]], [[MUL]]
+; AVX-NEXT:    ret <16 x i8> [[XOR]]
 ;
   %ins = insertelement <16 x i8> <i8 undef, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, i8 %a0, i32 0
   %mul = mul <16 x i8> %ins, <i8 3, i8 7, i8 9, i8 11, i8 13, i8 15, i8 17, i8 19, i8 21, i8 23, i8 25, i8 27, i8 29, i8 31, i8 33, i8 35>
@@ -122,8 +130,8 @@ define <16 x i8> @mul_constant_multiuse(i8 %a0, <16 x i8> %a1) {
 
 define <2 x i64> @shl_constant_op0(i64 %x) {
 ; CHECK-LABEL: @shl_constant_op0(
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1
-; CHECK-NEXT:    [[BO:%.*]] = shl <2 x i64> <i64 undef, i64 2>, [[INS]]
+; CHECK-NEXT:    [[BO_SCALAR:%.*]] = shl i64 2, [[X:%.*]]
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 1
 ; CHECK-NEXT:    ret <2 x i64> [[BO]]
 ;
   %ins = insertelement <2 x i64> undef, i64 %x, i32 1
@@ -133,8 +141,8 @@ define <2 x i64> @shl_constant_op0(i64 %x) {
 
 define <2 x i64> @shl_constant_op0_not_undef_lane(i64 %x) {
 ; CHECK-LABEL: @shl_constant_op0_not_undef_lane(
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1
-; CHECK-NEXT:    [[BO:%.*]] = shl <2 x i64> <i64 5, i64 2>, [[INS]]
+; CHECK-NEXT:    [[BO_SCALAR:%.*]] = shl i64 2, [[X:%.*]]
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 1
 ; CHECK-NEXT:    ret <2 x i64> [[BO]]
 ;
   %ins = insertelement <2 x i64> undef, i64 %x, i32 1
@@ -156,12 +164,20 @@ define <2 x i64> @shl_constant_op0_load(i64* %p) {
 }
 
 define <4 x i32> @shl_constant_op0_multiuse(i32 %a0, <4 x i32> %a1) {
-; CHECK-LABEL: @shl_constant_op0_multiuse(
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <4 x i32> <i32 undef, i32 1, i32 2, i32 3>, i32 [[A0:%.*]], i32 0
-; CHECK-NEXT:    [[MUL:%.*]] = shl <4 x i32> [[INS]], <i32 3, i32 4, i32 5, i32 6>
-; CHECK-NEXT:    [[AND:%.*]] = and <4 x i32> [[INS]], [[A1:%.*]]
-; CHECK-NEXT:    [[XOR:%.*]] = xor <4 x i32> [[AND]], [[MUL]]
-; CHECK-NEXT:    ret <4 x i32> [[XOR]]
+; SSE-LABEL: @shl_constant_op0_multiuse(
+; SSE-NEXT:    [[INS:%.*]] = insertelement <4 x i32> <i32 undef, i32 1, i32 2, i32 3>, i32 [[A0:%.*]], i32 0
+; SSE-NEXT:    [[MUL_SCALAR:%.*]] = shl i32 [[A0]], 3
+; SSE-NEXT:    [[MUL:%.*]] = insertelement <4 x i32> <i32 0, i32 16, i32 64, i32 192>, i32 [[MUL_SCALAR]], i64 0
+; SSE-NEXT:    [[AND:%.*]] = and <4 x i32> [[INS]], [[A1:%.*]]
+; SSE-NEXT:    [[XOR:%.*]] = xor <4 x i32> [[AND]], [[MUL]]
+; SSE-NEXT:    ret <4 x i32> [[XOR]]
+;
+; AVX-LABEL: @shl_constant_op0_multiuse(
+; AVX-NEXT:    [[INS:%.*]] = insertelement <4 x i32> <i32 undef, i32 1, i32 2, i32 3>, i32 [[A0:%.*]], i32 0
+; AVX-NEXT:    [[MUL:%.*]] = shl <4 x i32> [[INS]], <i32 3, i32 4, i32 5, i32 6>
+; AVX-NEXT:    [[AND:%.*]] = and <4 x i32> [[INS]], [[A1:%.*]]
+; AVX-NEXT:    [[XOR:%.*]] = xor <4 x i32> [[AND]], [[MUL]]
+; AVX-NEXT:    ret <4 x i32> [[XOR]]
 ;
   %ins = insertelement <4 x i32> <i32 undef, i32 1, i32 2, i32 3>, i32 %a0, i32 0
   %mul = shl <4 x i32> %ins, <i32 3, i32 4, i32 5, i32 6>
@@ -172,8 +188,8 @@ define <4 x i32> @shl_constant_op0_multiuse(i32 %a0, <4 x i32> %a1) {
 
 define <2 x i64> @shl_constant_op1(i64 %x) {
 ; CHECK-LABEL: @shl_constant_op1(
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0
-; CHECK-NEXT:    [[BO:%.*]] = shl nuw <2 x i64> [[INS]], <i64 5, i64 undef>
+; CHECK-NEXT:    [[BO_SCALAR:%.*]] = shl nuw i64 [[X:%.*]], 5
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> <i64 0, i64 undef>, i64 [[BO_SCALAR]], i64 0
 ; CHECK-NEXT:    ret <2 x i64> [[BO]]
 ;
   %ins = insertelement <2 x i64> undef, i64 %x, i32 0
@@ -183,8 +199,8 @@ define <2 x i64> @shl_constant_op1(i64 %x) {
 
 define <2 x i64> @shl_constant_op1_not_undef_lane(i64 %x) {
 ; CHECK-LABEL: @shl_constant_op1_not_undef_lane(
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0
-; CHECK-NEXT:    [[BO:%.*]] = shl nuw <2 x i64> [[INS]], <i64 5, i64 2>
+; CHECK-NEXT:    [[BO_SCALAR:%.*]] = shl nuw i64 [[X:%.*]], 5
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> zeroinitializer, i64 [[BO_SCALAR]], i64 0
 ; CHECK-NEXT:    ret <2 x i64> [[BO]]
 ;
   %ins = insertelement <2 x i64> undef, i64 %x, i32 0
@@ -207,8 +223,8 @@ define <2 x i64> @shl_constant_op1_load(i64* %p) {
 
 define <2 x i64> @ashr_constant_op0(i64 %x) {
 ; CHECK-LABEL: @ashr_constant_op0(
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1
-; CHECK-NEXT:    [[BO:%.*]] = ashr exact <2 x i64> <i64 undef, i64 2>, [[INS]]
+; CHECK-NEXT:    [[BO_SCALAR:%.*]] = ashr exact i64 2, [[X:%.*]]
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 1
 ; CHECK-NEXT:    ret <2 x i64> [[BO]]
 ;
   %ins = insertelement <2 x i64> undef, i64 %x, i32 1
@@ -218,8 +234,8 @@ define <2 x i64> @ashr_constant_op0(i64 %x) {
 
 define <2 x i64> @ashr_constant_op0_not_undef_lane(i64 %x) {
 ; CHECK-LABEL: @ashr_constant_op0_not_undef_lane(
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1
-; CHECK-NEXT:    [[BO:%.*]] = ashr exact <2 x i64> <i64 5, i64 2>, [[INS]]
+; CHECK-NEXT:    [[BO_SCALAR:%.*]] = ashr exact i64 2, [[X:%.*]]
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 1
 ; CHECK-NEXT:    ret <2 x i64> [[BO]]
 ;
   %ins = insertelement <2 x i64> undef, i64 %x, i32 1
@@ -229,8 +245,8 @@ define <2 x i64> @ashr_constant_op0_not_undef_lane(i64 %x) {
 
 define <2 x i64> @ashr_constant_op1(i64 %x) {
 ; CHECK-LABEL: @ashr_constant_op1(
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0
-; CHECK-NEXT:    [[BO:%.*]] = ashr <2 x i64> [[INS]], <i64 5, i64 undef>
+; CHECK-NEXT:    [[BO_SCALAR:%.*]] = ashr i64 [[X:%.*]], 5
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> <i64 0, i64 undef>, i64 [[BO_SCALAR]], i64 0
 ; CHECK-NEXT:    ret <2 x i64> [[BO]]
 ;
   %ins = insertelement <2 x i64> undef, i64 %x, i32 0
@@ -240,8 +256,8 @@ define <2 x i64> @ashr_constant_op1(i64 %x) {
 
 define <2 x i64> @ashr_constant_op1_not_undef_lane(i64 %x) {
 ; CHECK-LABEL: @ashr_constant_op1_not_undef_lane(
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0
-; CHECK-NEXT:    [[BO:%.*]] = ashr <2 x i64> [[INS]], <i64 5, i64 2>
+; CHECK-NEXT:    [[BO_SCALAR:%.*]] = ashr i64 [[X:%.*]], 5
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> zeroinitializer, i64 [[BO_SCALAR]], i64 0
 ; CHECK-NEXT:    ret <2 x i64> [[BO]]
 ;
   %ins = insertelement <2 x i64> undef, i64 %x, i32 0
@@ -251,8 +267,8 @@ define <2 x i64> @ashr_constant_op1_not_undef_lane(i64 %x) {
 
 define <2 x i64> @lshr_constant_op0(i64 %x) {
 ; CHECK-LABEL: @lshr_constant_op0(
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0
-; CHECK-NEXT:    [[BO:%.*]] = lshr <2 x i64> <i64 5, i64 undef>, [[INS]]
+; CHECK-NEXT:    [[BO_SCALAR:%.*]] = lshr i64 5, [[X:%.*]]
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0
 ; CHECK-NEXT:    ret <2 x i64> [[BO]]
 ;
   %ins = insertelement <2 x i64> undef, i64 %x, i32 0
@@ -262,8 +278,8 @@ define <2 x i64> @lshr_constant_op0(i64 %x) {
 
 define <2 x i64> @lshr_constant_op0_not_undef_lane(i64 %x) {
 ; CHECK-LABEL: @lshr_constant_op0_not_undef_lane(
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0
-; CHECK-NEXT:    [[BO:%.*]] = lshr <2 x i64> <i64 5, i64 2>, [[INS]]
+; CHECK-NEXT:    [[BO_SCALAR:%.*]] = lshr i64 5, [[X:%.*]]
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0
 ; CHECK-NEXT:    ret <2 x i64> [[BO]]
 ;
   %ins = insertelement <2 x i64> undef, i64 %x, i32 0
@@ -273,8 +289,8 @@ define <2 x i64> @lshr_constant_op0_not_undef_lane(i64 %x) {
 
 define <2 x i64> @lshr_constant_op1(i64 %x) {
 ; CHECK-LABEL: @lshr_constant_op1(
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1
-; CHECK-NEXT:    [[BO:%.*]] = lshr exact <2 x i64> [[INS]], <i64 undef, i64 2>
+; CHECK-NEXT:    [[BO_SCALAR:%.*]] = lshr exact i64 [[X:%.*]], 2
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> <i64 undef, i64 0>, i64 [[BO_SCALAR]], i64 1
 ; CHECK-NEXT:    ret <2 x i64> [[BO]]
 ;
   %ins = insertelement <2 x i64> undef, i64 %x, i32 1
@@ -284,8 +300,8 @@ define <2 x i64> @lshr_constant_op1(i64 %x) {
 
 define <2 x i64> @lshr_constant_op1_not_undef_lane(i64 %x) {
 ; CHECK-LABEL: @lshr_constant_op1_not_undef_lane(
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1
-; CHECK-NEXT:    [[BO:%.*]] = lshr exact <2 x i64> [[INS]], <i64 5, i64 2>
+; CHECK-NEXT:    [[BO_SCALAR:%.*]] = lshr exact i64 [[X:%.*]], 2
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> zeroinitializer, i64 [[BO_SCALAR]], i64 1
 ; CHECK-NEXT:    ret <2 x i64> [[BO]]
 ;
   %ins = insertelement <2 x i64> undef, i64 %x, i32 1
@@ -295,8 +311,8 @@ define <2 x i64> @lshr_constant_op1_not_undef_lane(i64 %x) {
 
 define <2 x i64> @urem_constant_op0(i64 %x) {
 ; CHECK-LABEL: @urem_constant_op0(
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0
-; CHECK-NEXT:    [[BO:%.*]] = urem <2 x i64> <i64 5, i64 undef>, [[INS]]
+; CHECK-NEXT:    [[BO_SCALAR:%.*]] = urem i64 5, [[X:%.*]]
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0
 ; CHECK-NEXT:    ret <2 x i64> [[BO]]
 ;
   %ins = insertelement <2 x i64> undef, i64 %x, i32 0
@@ -306,8 +322,8 @@ define <2 x i64> @urem_constant_op0(i64 %x) {
 
 define <2 x i64> @urem_constant_op0_not_undef_lane(i64 %x) {
 ; CHECK-LABEL: @urem_constant_op0_not_undef_lane(
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0
-; CHECK-NEXT:    [[BO:%.*]] = urem <2 x i64> <i64 5, i64 2>, [[INS]]
+; CHECK-NEXT:    [[BO_SCALAR:%.*]] = urem i64 5, [[X:%.*]]
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0
 ; CHECK-NEXT:    ret <2 x i64> [[BO]]
 ;
   %ins = insertelement <2 x i64> undef, i64 %x, i32 0
@@ -317,8 +333,8 @@ define <2 x i64> @urem_constant_op0_not_undef_lane(i64 %x) {
 
 define <2 x i64> @urem_constant_op1(i64 %x) {
 ; CHECK-LABEL: @urem_constant_op1(
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1
-; CHECK-NEXT:    [[BO:%.*]] = urem <2 x i64> [[INS]], <i64 undef, i64 2>
+; CHECK-NEXT:    [[BO_SCALAR:%.*]] = urem i64 [[X:%.*]], 2
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> <i64 undef, i64 0>, i64 [[BO_SCALAR]], i64 1
 ; CHECK-NEXT:    ret <2 x i64> [[BO]]
 ;
   %ins = insertelement <2 x i64> undef, i64 %x, i32 1
@@ -328,8 +344,8 @@ define <2 x i64> @urem_constant_op1(i64 %x) {
 
 define <2 x i64> @urem_constant_op1_not_undef_lane(i64 %x) {
 ; CHECK-LABEL: @urem_constant_op1_not_undef_lane(
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1
-; CHECK-NEXT:    [[BO:%.*]] = urem <2 x i64> [[INS]], <i64 5, i64 2>
+; CHECK-NEXT:    [[BO_SCALAR:%.*]] = urem i64 [[X:%.*]], 2
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> zeroinitializer, i64 [[BO_SCALAR]], i64 1
 ; CHECK-NEXT:    ret <2 x i64> [[BO]]
 ;
   %ins = insertelement <2 x i64> undef, i64 %x, i32 1
@@ -339,8 +355,8 @@ define <2 x i64> @urem_constant_op1_not_undef_lane(i64 %x) {
 
 define <2 x i64> @srem_constant_op0(i64 %x) {
 ; CHECK-LABEL: @srem_constant_op0(
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0
-; CHECK-NEXT:    [[BO:%.*]] = srem <2 x i64> <i64 5, i64 undef>, [[INS]]
+; CHECK-NEXT:    [[BO_SCALAR:%.*]] = srem i64 5, [[X:%.*]]
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0
 ; CHECK-NEXT:    ret <2 x i64> [[BO]]
 ;
   %ins = insertelement <2 x i64> undef, i64 %x, i32 0
@@ -350,8 +366,8 @@ define <2 x i64> @srem_constant_op0(i64 %x) {
 
 define <2 x i64> @srem_constant_op0_not_undef_lane(i64 %x) {
 ; CHECK-LABEL: @srem_constant_op0_not_undef_lane(
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0
-; CHECK-NEXT:    [[BO:%.*]] = srem <2 x i64> <i64 5, i64 2>, [[INS]]
+; CHECK-NEXT:    [[BO_SCALAR:%.*]] = srem i64 5, [[X:%.*]]
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0
 ; CHECK-NEXT:    ret <2 x i64> [[BO]]
 ;
   %ins = insertelement <2 x i64> undef, i64 %x, i32 0
@@ -361,8 +377,8 @@ define <2 x i64> @srem_constant_op0_not_undef_lane(i64 %x) {
 
 define <2 x i64> @srem_constant_op1(i64 %x) {
 ; CHECK-LABEL: @srem_constant_op1(
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1
-; CHECK-NEXT:    [[BO:%.*]] = srem <2 x i64> [[INS]], <i64 undef, i64 2>
+; CHECK-NEXT:    [[BO_SCALAR:%.*]] = srem i64 [[X:%.*]], 2
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> <i64 undef, i64 0>, i64 [[BO_SCALAR]], i64 1
 ; CHECK-NEXT:    ret <2 x i64> [[BO]]
 ;
   %ins = insertelement <2 x i64> undef, i64 %x, i32 1
@@ -372,8 +388,8 @@ define <2 x i64> @srem_constant_op1(i64 %x) {
 
 define <2 x i64> @srem_constant_op1_not_undef_lane(i64 %x) {
 ; CHECK-LABEL: @srem_constant_op1_not_undef_lane(
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1
-; CHECK-NEXT:    [[BO:%.*]] = srem <2 x i64> [[INS]], <i64 5, i64 2>
+; CHECK-NEXT:    [[BO_SCALAR:%.*]] = srem i64 [[X:%.*]], 2
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> zeroinitializer, i64 [[BO_SCALAR]], i64 1
 ; CHECK-NEXT:    ret <2 x i64> [[BO]]
 ;
   %ins = insertelement <2 x i64> undef, i64 %x, i32 1
@@ -383,8 +399,8 @@ define <2 x i64> @srem_constant_op1_not_undef_lane(i64 %x) {
 
 define <2 x i64> @udiv_constant_op0(i64 %x) {
 ; CHECK-LABEL: @udiv_constant_op0(
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0
-; CHECK-NEXT:    [[BO:%.*]] = udiv exact <2 x i64> <i64 5, i64 undef>, [[INS]]
+; CHECK-NEXT:    [[BO_SCALAR:%.*]] = udiv exact i64 5, [[X:%.*]]
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0
 ; CHECK-NEXT:    ret <2 x i64> [[BO]]
 ;
   %ins = insertelement <2 x i64> undef, i64 %x, i32 0
@@ -394,8 +410,8 @@ define <2 x i64> @udiv_constant_op0(i64 %x) {
 
 define <2 x i64> @udiv_constant_op0_not_undef_lane(i64 %x) {
 ; CHECK-LABEL: @udiv_constant_op0_not_undef_lane(
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0
-; CHECK-NEXT:    [[BO:%.*]] = udiv exact <2 x i64> <i64 5, i64 2>, [[INS]]
+; CHECK-NEXT:    [[BO_SCALAR:%.*]] = udiv exact i64 5, [[X:%.*]]
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0
 ; CHECK-NEXT:    ret <2 x i64> [[BO]]
 ;
   %ins = insertelement <2 x i64> undef, i64 %x, i32 0
@@ -405,8 +421,8 @@ define <2 x i64> @udiv_constant_op0_not_undef_lane(i64 %x) {
 
 define <2 x i64> @udiv_constant_op1(i64 %x) {
 ; CHECK-LABEL: @udiv_constant_op1(
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1
-; CHECK-NEXT:    [[BO:%.*]] = udiv <2 x i64> [[INS]], <i64 undef, i64 2>
+; CHECK-NEXT:    [[BO_SCALAR:%.*]] = udiv i64 [[X:%.*]], 2
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> <i64 undef, i64 0>, i64 [[BO_SCALAR]], i64 1
 ; CHECK-NEXT:    ret <2 x i64> [[BO]]
 ;
   %ins = insertelement <2 x i64> undef, i64 %x, i32 1
@@ -416,8 +432,8 @@ define <2 x i64> @udiv_constant_op1(i64 %x) {
 
 define <2 x i64> @udiv_constant_op1_not_undef_lane(i64 %x) {
 ; CHECK-LABEL: @udiv_constant_op1_not_undef_lane(
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1
-; CHECK-NEXT:    [[BO:%.*]] = udiv <2 x i64> [[INS]], <i64 5, i64 2>
+; CHECK-NEXT:    [[BO_SCALAR:%.*]] = udiv i64 [[X:%.*]], 2
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> zeroinitializer, i64 [[BO_SCALAR]], i64 1
 ; CHECK-NEXT:    ret <2 x i64> [[BO]]
 ;
   %ins = insertelement <2 x i64> undef, i64 %x, i32 1
@@ -427,8 +443,8 @@ define <2 x i64> @udiv_constant_op1_not_undef_lane(i64 %x) {
 
 define <2 x i64> @sdiv_constant_op0(i64 %x) {
 ; CHECK-LABEL: @sdiv_constant_op0(
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0
-; CHECK-NEXT:    [[BO:%.*]] = sdiv <2 x i64> <i64 5, i64 undef>, [[INS]]
+; CHECK-NEXT:    [[BO_SCALAR:%.*]] = sdiv i64 5, [[X:%.*]]
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0
 ; CHECK-NEXT:    ret <2 x i64> [[BO]]
 ;
   %ins = insertelement <2 x i64> undef, i64 %x, i32 0
@@ -438,8 +454,8 @@ define <2 x i64> @sdiv_constant_op0(i64 %x) {
 
 define <2 x i64> @sdiv_constant_op0_not_undef_lane(i64 %x) {
 ; CHECK-LABEL: @sdiv_constant_op0_not_undef_lane(
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0
-; CHECK-NEXT:    [[BO:%.*]] = sdiv <2 x i64> <i64 5, i64 2>, [[INS]]
+; CHECK-NEXT:    [[BO_SCALAR:%.*]] = sdiv i64 5, [[X:%.*]]
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0
 ; CHECK-NEXT:    ret <2 x i64> [[BO]]
 ;
   %ins = insertelement <2 x i64> undef, i64 %x, i32 0
@@ -449,8 +465,8 @@ define <2 x i64> @sdiv_constant_op0_not_undef_lane(i64 %x) {
 
 define <2 x i64> @sdiv_constant_op1(i64 %x) {
 ; CHECK-LABEL: @sdiv_constant_op1(
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1
-; CHECK-NEXT:    [[BO:%.*]] = sdiv exact <2 x i64> [[INS]], <i64 undef, i64 2>
+; CHECK-NEXT:    [[BO_SCALAR:%.*]] = sdiv exact i64 [[X:%.*]], 2
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> <i64 undef, i64 0>, i64 [[BO_SCALAR]], i64 1
 ; CHECK-NEXT:    ret <2 x i64> [[BO]]
 ;
   %ins = insertelement <2 x i64> undef, i64 %x, i32 1
@@ -460,8 +476,8 @@ define <2 x i64> @sdiv_constant_op1(i64 %x) {
 
 define <2 x i64> @sdiv_constant_op1_not_undef_lane(i64 %x) {
 ; CHECK-LABEL: @sdiv_constant_op1_not_undef_lane(
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1
-; CHECK-NEXT:    [[BO:%.*]] = sdiv exact <2 x i64> [[INS]], <i64 5, i64 2>
+; CHECK-NEXT:    [[BO_SCALAR:%.*]] = sdiv exact i64 [[X:%.*]], 2
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> zeroinitializer, i64 [[BO_SCALAR]], i64 1
 ; CHECK-NEXT:    ret <2 x i64> [[BO]]
 ;
   %ins = insertelement <2 x i64> undef, i64 %x, i32 1
@@ -471,8 +487,8 @@ define <2 x i64> @sdiv_constant_op1_not_undef_lane(i64 %x) {
 
 define <2 x i64> @and_constant(i64 %x) {
 ; CHECK-LABEL: @and_constant(
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0
-; CHECK-NEXT:    [[BO:%.*]] = and <2 x i64> [[INS]], <i64 42, i64 undef>
+; CHECK-NEXT:    [[BO_SCALAR:%.*]] = and i64 [[X:%.*]], 42
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> <i64 0, i64 undef>, i64 [[BO_SCALAR]], i64 0
 ; CHECK-NEXT:    ret <2 x i64> [[BO]]
 ;
   %ins = insertelement <2 x i64> undef, i64 %x, i32 0
@@ -482,8 +498,8 @@ define <2 x i64> @and_constant(i64 %x) {
 
 define <2 x i64> @and_constant_not_undef_lane(i64 %x) {
 ; CHECK-LABEL: @and_constant_not_undef_lane(
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0
-; CHECK-NEXT:    [[BO:%.*]] = and <2 x i64> [[INS]], <i64 42, i64 -42>
+; CHECK-NEXT:    [[BO_SCALAR:%.*]] = and i64 [[X:%.*]], 42
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> zeroinitializer, i64 [[BO_SCALAR]], i64 0
 ; CHECK-NEXT:    ret <2 x i64> [[BO]]
 ;
   %ins = insertelement <2 x i64> undef, i64 %x, i32 0
@@ -493,8 +509,8 @@ define <2 x i64> @and_constant_not_undef_lane(i64 %x) {
 
 define <2 x i64> @or_constant(i64 %x) {
 ; CHECK-LABEL: @or_constant(
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1
-; CHECK-NEXT:    [[BO:%.*]] = or <2 x i64> [[INS]], <i64 undef, i64 -42>
+; CHECK-NEXT:    [[BO_SCALAR:%.*]] = or i64 [[X:%.*]], -42
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> <i64 undef, i64 -1>, i64 [[BO_SCALAR]], i64 1
 ; CHECK-NEXT:    ret <2 x i64> [[BO]]
 ;
   %ins = insertelement <2 x i64> undef, i64 %x, i32 1
@@ -504,8 +520,8 @@ define <2 x i64> @or_constant(i64 %x) {
 
 define <2 x i64> @or_constant_not_undef_lane(i64 %x) {
 ; CHECK-LABEL: @or_constant_not_undef_lane(
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 1
-; CHECK-NEXT:    [[BO:%.*]] = or <2 x i64> [[INS]], <i64 42, i64 -42>
+; CHECK-NEXT:    [[BO_SCALAR:%.*]] = or i64 [[X:%.*]], -42
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> <i64 -1, i64 -1>, i64 [[BO_SCALAR]], i64 1
 ; CHECK-NEXT:    ret <2 x i64> [[BO]]
 ;
   %ins = insertelement <2 x i64> undef, i64 %x, i32 1
@@ -515,8 +531,8 @@ define <2 x i64> @or_constant_not_undef_lane(i64 %x) {
 
 define <2 x i64> @xor_constant(i64 %x) {
 ; CHECK-LABEL: @xor_constant(
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0
-; CHECK-NEXT:    [[BO:%.*]] = xor <2 x i64> [[INS]], <i64 42, i64 undef>
+; CHECK-NEXT:    [[BO_SCALAR:%.*]] = xor i64 [[X:%.*]], 42
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> <i64 undef, i64 0>, i64 [[BO_SCALAR]], i64 0
 ; CHECK-NEXT:    ret <2 x i64> [[BO]]
 ;
   %ins = insertelement <2 x i64> undef, i64 %x, i32 0
@@ -526,8 +542,8 @@ define <2 x i64> @xor_constant(i64 %x) {
 
 define <2 x i64> @xor_constant_not_undef_lane(i64 %x) {
 ; CHECK-LABEL: @xor_constant_not_undef_lane(
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0
-; CHECK-NEXT:    [[BO:%.*]] = xor <2 x i64> [[INS]], <i64 42, i64 -42>
+; CHECK-NEXT:    [[BO_SCALAR:%.*]] = xor i64 [[X:%.*]], 42
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> undef, i64 [[BO_SCALAR]], i64 0
 ; CHECK-NEXT:    ret <2 x i64> [[BO]]
 ;
   %ins = insertelement <2 x i64> undef, i64 %x, i32 0
@@ -537,8 +553,8 @@ define <2 x i64> @xor_constant_not_undef_lane(i64 %x) {
 
 define <2 x double> @fadd_constant(double %x) {
 ; CHECK-LABEL: @fadd_constant(
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0
-; CHECK-NEXT:    [[BO:%.*]] = fadd <2 x double> [[INS]], <double 4.200000e+01, double undef>
+; CHECK-NEXT:    [[BO_SCALAR:%.*]] = fadd double [[X:%.*]], 4.200000e+01
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x double> <double 0x7FF8000000000000, double undef>, double [[BO_SCALAR]], i64 0
 ; CHECK-NEXT:    ret <2 x double> [[BO]]
 ;
   %ins = insertelement <2 x double> undef, double %x, i32 0
@@ -548,8 +564,8 @@ define <2 x double> @fadd_constant(double %x) {
 
 define <2 x double> @fadd_constant_not_undef_lane(double %x) {
 ; CHECK-LABEL: @fadd_constant_not_undef_lane(
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 1
-; CHECK-NEXT:    [[BO:%.*]] = fadd <2 x double> [[INS]], <double 4.200000e+01, double -4.200000e+01>
+; CHECK-NEXT:    [[BO_SCALAR:%.*]] = fadd double [[X:%.*]], -4.200000e+01
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>, double [[BO_SCALAR]], i64 1
 ; CHECK-NEXT:    ret <2 x double> [[BO]]
 ;
   %ins = insertelement <2 x double> undef, double %x, i32 1
@@ -559,8 +575,8 @@ define <2 x double> @fadd_constant_not_undef_lane(double %x) {
 
 define <2 x double> @fsub_constant_op0(double %x) {
 ; CHECK-LABEL: @fsub_constant_op0(
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0
-; CHECK-NEXT:    [[BO:%.*]] = fsub fast <2 x double> <double 4.200000e+01, double undef>, [[INS]]
+; CHECK-NEXT:    [[BO_SCALAR:%.*]] = fsub fast double 4.200000e+01, [[X:%.*]]
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x double> <double 0x7FF8000000000000, double undef>, double [[BO_SCALAR]], i64 0
 ; CHECK-NEXT:    ret <2 x double> [[BO]]
 ;
   %ins = insertelement <2 x double> undef, double %x, i32 0
@@ -570,8 +586,8 @@ define <2 x double> @fsub_constant_op0(double %x) {
 
 define <2 x double> @fsub_constant_op0_not_undef_lane(double %x) {
 ; CHECK-LABEL: @fsub_constant_op0_not_undef_lane(
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 1
-; CHECK-NEXT:    [[BO:%.*]] = fsub nsz <2 x double> <double 4.200000e+01, double -4.200000e+01>, [[INS]]
+; CHECK-NEXT:    [[BO_SCALAR:%.*]] = fsub nsz double -4.200000e+01, [[X:%.*]]
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>, double [[BO_SCALAR]], i64 1
 ; CHECK-NEXT:    ret <2 x double> [[BO]]
 ;
   %ins = insertelement <2 x double> undef, double %x, i32 1
@@ -581,8 +597,8 @@ define <2 x double> @fsub_constant_op0_not_undef_lane(double %x) {
 
 define <2 x double> @fsub_constant_op1(double %x) {
 ; CHECK-LABEL: @fsub_constant_op1(
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 1
-; CHECK-NEXT:    [[BO:%.*]] = fsub <2 x double> [[INS]], <double undef, double 4.200000e+01>
+; CHECK-NEXT:    [[BO_SCALAR:%.*]] = fsub double [[X:%.*]], 4.200000e+01
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x double> <double undef, double 0x7FF8000000000000>, double [[BO_SCALAR]], i64 1
 ; CHECK-NEXT:    ret <2 x double> [[BO]]
 ;
   %ins = insertelement <2 x double> undef, double %x, i32 1
@@ -592,8 +608,8 @@ define <2 x double> @fsub_constant_op1(double %x) {
 
 define <2 x double> @fsub_constant_op1_not_undef_lane(double %x) {
 ; CHECK-LABEL: @fsub_constant_op1_not_undef_lane(
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0
-; CHECK-NEXT:    [[BO:%.*]] = fsub <2 x double> [[INS]], <double 4.200000e+01, double -4.200000e+01>
+; CHECK-NEXT:    [[BO_SCALAR:%.*]] = fsub double [[X:%.*]], 4.200000e+01
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>, double [[BO_SCALAR]], i64 0
 ; CHECK-NEXT:    ret <2 x double> [[BO]]
 ;
   %ins = insertelement <2 x double> undef, double %x, i32 0
@@ -603,8 +619,8 @@ define <2 x double> @fsub_constant_op1_not_undef_lane(double %x) {
 
 define <2 x double> @fmul_constant(double %x) {
 ; CHECK-LABEL: @fmul_constant(
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0
-; CHECK-NEXT:    [[BO:%.*]] = fmul reassoc <2 x double> [[INS]], <double 4.200000e+01, double undef>
+; CHECK-NEXT:    [[BO_SCALAR:%.*]] = fmul reassoc double [[X:%.*]], 4.200000e+01
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x double> <double 0x7FF8000000000000, double undef>, double [[BO_SCALAR]], i64 0
 ; CHECK-NEXT:    ret <2 x double> [[BO]]
 ;
   %ins = insertelement <2 x double> undef, double %x, i32 0
@@ -614,8 +630,8 @@ define <2 x double> @fmul_constant(double %x) {
 
 define <2 x double> @fmul_constant_not_undef_lane(double %x) {
 ; CHECK-LABEL: @fmul_constant_not_undef_lane(
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 1
-; CHECK-NEXT:    [[BO:%.*]] = fmul <2 x double> [[INS]], <double 4.200000e+01, double -4.200000e+01>
+; CHECK-NEXT:    [[BO_SCALAR:%.*]] = fmul double [[X:%.*]], -4.200000e+01
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>, double [[BO_SCALAR]], i64 1
 ; CHECK-NEXT:    ret <2 x double> [[BO]]
 ;
   %ins = insertelement <2 x double> undef, double %x, i32 1
@@ -625,8 +641,8 @@ define <2 x double> @fmul_constant_not_undef_lane(double %x) {
 
 define <2 x double> @fdiv_constant_op0(double %x) {
 ; CHECK-LABEL: @fdiv_constant_op0(
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 1
-; CHECK-NEXT:    [[BO:%.*]] = fdiv nnan <2 x double> <double undef, double 4.200000e+01>, [[INS]]
+; CHECK-NEXT:    [[BO_SCALAR:%.*]] = fdiv nnan double 4.200000e+01, [[X:%.*]]
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x double> <double undef, double 0x7FF8000000000000>, double [[BO_SCALAR]], i64 1
 ; CHECK-NEXT:    ret <2 x double> [[BO]]
 ;
   %ins = insertelement <2 x double> undef, double %x, i32 1
@@ -636,8 +652,8 @@ define <2 x double> @fdiv_constant_op0(double %x) {
 
 define <2 x double> @fdiv_constant_op0_not_undef_lane(double %x) {
 ; CHECK-LABEL: @fdiv_constant_op0_not_undef_lane(
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0
-; CHECK-NEXT:    [[BO:%.*]] = fdiv ninf <2 x double> <double 4.200000e+01, double -4.200000e+01>, [[INS]]
+; CHECK-NEXT:    [[BO_SCALAR:%.*]] = fdiv ninf double 4.200000e+01, [[X:%.*]]
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>, double [[BO_SCALAR]], i64 0
 ; CHECK-NEXT:    ret <2 x double> [[BO]]
 ;
   %ins = insertelement <2 x double> undef, double %x, i32 0
@@ -647,8 +663,8 @@ define <2 x double> @fdiv_constant_op0_not_undef_lane(double %x) {
 
 define <2 x double> @fdiv_constant_op1(double %x) {
 ; CHECK-LABEL: @fdiv_constant_op1(
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0
-; CHECK-NEXT:    [[BO:%.*]] = fdiv <2 x double> [[INS]], <double 4.200000e+01, double undef>
+; CHECK-NEXT:    [[BO_SCALAR:%.*]] = fdiv double [[X:%.*]], 4.200000e+01
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x double> <double 0x7FF8000000000000, double undef>, double [[BO_SCALAR]], i64 0
 ; CHECK-NEXT:    ret <2 x double> [[BO]]
 ;
   %ins = insertelement <2 x double> undef, double %x, i32 0
@@ -658,8 +674,8 @@ define <2 x double> @fdiv_constant_op1(double %x) {
 
 define <2 x double> @fdiv_constant_op1_not_undef_lane(double %x) {
 ; CHECK-LABEL: @fdiv_constant_op1_not_undef_lane(
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0
-; CHECK-NEXT:    [[BO:%.*]] = fdiv <2 x double> [[INS]], <double 4.200000e+01, double -4.200000e+01>
+; CHECK-NEXT:    [[BO_SCALAR:%.*]] = fdiv double [[X:%.*]], 4.200000e+01
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>, double [[BO_SCALAR]], i64 0
 ; CHECK-NEXT:    ret <2 x double> [[BO]]
 ;
   %ins = insertelement <2 x double> undef, double %x, i32 0
@@ -669,8 +685,8 @@ define <2 x double> @fdiv_constant_op1_not_undef_lane(double %x) {
 
 define <2 x double> @frem_constant_op0(double %x) {
 ; CHECK-LABEL: @frem_constant_op0(
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0
-; CHECK-NEXT:    [[BO:%.*]] = frem fast <2 x double> <double 4.200000e+01, double undef>, [[INS]]
+; CHECK-NEXT:    [[BO_SCALAR:%.*]] = frem fast double 4.200000e+01, [[X:%.*]]
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x double> <double 0x7FF8000000000000, double undef>, double [[BO_SCALAR]], i64 0
 ; CHECK-NEXT:    ret <2 x double> [[BO]]
 ;
   %ins = insertelement <2 x double> undef, double %x, i32 0
@@ -680,8 +696,8 @@ define <2 x double> @frem_constant_op0(double %x) {
 
 define <2 x double> @frem_constant_op0_not_undef_lane(double %x) {
 ; CHECK-LABEL: @frem_constant_op0_not_undef_lane(
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 1
-; CHECK-NEXT:    [[BO:%.*]] = frem <2 x double> <double 4.200000e+01, double -4.200000e+01>, [[INS]]
+; CHECK-NEXT:    [[BO_SCALAR:%.*]] = frem double -4.200000e+01, [[X:%.*]]
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>, double [[BO_SCALAR]], i64 1
 ; CHECK-NEXT:    ret <2 x double> [[BO]]
 ;
   %ins = insertelement <2 x double> undef, double %x, i32 1
@@ -691,8 +707,8 @@ define <2 x double> @frem_constant_op0_not_undef_lane(double %x) {
 
 define <2 x double> @frem_constant_op1(double %x) {
 ; CHECK-LABEL: @frem_constant_op1(
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 1
-; CHECK-NEXT:    [[BO:%.*]] = frem ninf <2 x double> [[INS]], <double undef, double 4.200000e+01>
+; CHECK-NEXT:    [[BO_SCALAR:%.*]] = frem ninf double [[X:%.*]], 4.200000e+01
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x double> <double undef, double 0x7FF8000000000000>, double [[BO_SCALAR]], i64 1
 ; CHECK-NEXT:    ret <2 x double> [[BO]]
 ;
   %ins = insertelement <2 x double> undef, double %x, i32 1
@@ -702,8 +718,8 @@ define <2 x double> @frem_constant_op1(double %x) {
 
 define <2 x double> @frem_constant_op1_not_undef_lane(double %x) {
 ; CHECK-LABEL: @frem_constant_op1_not_undef_lane(
-; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0
-; CHECK-NEXT:    [[BO:%.*]] = frem nnan <2 x double> [[INS]], <double 4.200000e+01, double -4.200000e+01>
+; CHECK-NEXT:    [[BO_SCALAR:%.*]] = frem nnan double [[X:%.*]], 4.200000e+01
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>, double [[BO_SCALAR]], i64 0
 ; CHECK-NEXT:    ret <2 x double> [[BO]]
 ;
   %ins = insertelement <2 x double> undef, double %x, i32 0

From 6bb93e3dd0e28dafe6d3ddb700d2036d00b323aa Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Tue, 9 Jun 2020 11:07:25 -0700
Subject: [PATCH 05/25] [gcov][test] Add mkdir -p %t && cd %t

This allows an alternative lit runner (which does not chdir to %T)
to run within a read-only source tree.
---
 llvm/test/Transforms/GCOVProfiling/global-ctor.ll | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/test/Transforms/GCOVProfiling/global-ctor.ll b/llvm/test/Transforms/GCOVProfiling/global-ctor.ll
index 596a2ad77635d1..e90385c7c42e00 100644
--- a/llvm/test/Transforms/GCOVProfiling/global-ctor.ll
+++ b/llvm/test/Transforms/GCOVProfiling/global-ctor.ll
@@ -1,5 +1,6 @@
 ;; For a global constructor, _GLOBAL__sub_I_ only has artificial lines.
 ;; Test that we don't instrument those functions.
+; RUN: mkdir -p %t && cd %t
 ; RUN: opt -S -insert-gcov-profiling < %s | FileCheck %s
 ; RUN: opt -S -passes=insert-gcov-profiling < %s | FileCheck %s
 

From 2fea3fe41c5a177d019dd99fb1b43d767eccde24 Mon Sep 17 00:00:00 2001
From: David Green <david.green@arm.com>
Date: Tue, 9 Jun 2020 17:35:45 +0100
Subject: [PATCH 06/25] [MachineScheduler] Update available queue on the first
 mop of a new cycle

If a resource can be held for multiple cycles in the schedule model
then an instruction can be placed into the available queue, another
instruction can be scheduled, but the first will not be taken back out if
the two instructions hazard. To fix this make sure that we update the
available queue even on the first MOp of a cycle, pushing available
instructions back into the pending queue if they now conflict.

This happens with some downstream schedules we have around MVE
instruction scheduling where we use ResourceCycles=[2] to show the
instruction executing over two beats. Apparently the test changes here
are OK too.

Differential Revision: https://reviews.llvm.org/D76909
---
 llvm/lib/CodeGen/MachineScheduler.cpp         |  16 +-
 .../CodeGen/AArch64/misched-fusion-aes.ll     |   4 +-
 .../CodeGen/PowerPC/2007-01-15-AsmDialect.ll  |   2 +-
 .../CodeGen/PowerPC/2008-10-28-f128-i32.ll    | 126 +++----
 llvm/test/CodeGen/PowerPC/aix-cc-abi.ll       |   6 +-
 .../CodeGen/PowerPC/aix32-cc-abi-vaarg.ll     |  18 +-
 .../PowerPC/fp128-bitcast-after-operation.ll  |   4 +-
 llvm/test/CodeGen/PowerPC/inc-of-add.ll       | 332 +++++++++---------
 llvm/test/CodeGen/PowerPC/ppc32-skip-regs.ll  |   2 +-
 .../ppcf128-constrained-fp-intrinsics.ll      |  14 +-
 llvm/test/CodeGen/PowerPC/pr43976.ll          |   6 +-
 llvm/test/CodeGen/PowerPC/spe.ll              |   6 +-
 llvm/test/CodeGen/PowerPC/sub-of-not.ll       | 332 +++++++++---------
 .../umulo-128-legalisation-lowering.ll        |  60 ++--
 llvm/test/CodeGen/PowerPC/vec_splat.ll        | 156 ++++----
 15 files changed, 542 insertions(+), 542 deletions(-)

diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp
index a6889919137495..0f21c97a30f685 100644
--- a/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -2424,16 +2424,14 @@ SUnit *SchedBoundary::pickOnlyChoice() {
   if (CheckPending)
     releasePending();
 
-  if (CurrMOps > 0) {
-    // Defer any ready instrs that now have a hazard.
-    for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
-      if (checkHazard(*I)) {
-        Pending.push(*I);
-        I = Available.remove(I);
-        continue;
-      }
-      ++I;
+  // Defer any ready instrs that now have a hazard.
+  for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
+    if (checkHazard(*I)) {
+      Pending.push(*I);
+      I = Available.remove(I);
+      continue;
     }
+    ++I;
   }
   for (unsigned i = 0; Available.empty(); ++i) {
 //  FIXME: Re-enable assert once PR20057 is resolved.
diff --git a/llvm/test/CodeGen/AArch64/misched-fusion-aes.ll b/llvm/test/CodeGen/AArch64/misched-fusion-aes.ll
index 70038e934c9f74..95a419bd7398a0 100644
--- a/llvm/test/CodeGen/AArch64/misched-fusion-aes.ll
+++ b/llvm/test/CodeGen/AArch64/misched-fusion-aes.ll
@@ -79,7 +79,7 @@ define void @aesea(<16 x i8>* %a0, <16 x i8>* %b0, <16 x i8>* %c0, <16 x i8> %d,
 
 ; CHECK-LABEL: aesea:
 ; CHECK: aese [[VA:v[0-7].16b]], {{v[0-7].16b}}
-; CHECK-NEXT: aesmc [[VA]], [[VA]]
+; CHECK: aesmc [[VA]], [[VA]]
 ; CHECK: aese [[VB:v[0-7].16b]], {{v[0-7].16b}}
 ; CHECK-NEXT: aesmc [[VB]], [[VB]]
 ; CHECK: aese [[VC:v[0-7].16b]], {{v[0-7].16b}}
@@ -163,7 +163,7 @@ define void @aesda(<16 x i8>* %a0, <16 x i8>* %b0, <16 x i8>* %c0, <16 x i8> %d,
 
 ; CHECK-LABEL: aesda:
 ; CHECK: aesd [[VA:v[0-7].16b]], {{v[0-7].16b}}
-; CHECK-NEXT: aesimc [[VA]], [[VA]]
+; CHECK: aesimc [[VA]], [[VA]]
 ; CHECK: aesd [[VB:v[0-7].16b]], {{v[0-7].16b}}
 ; CHECK-NEXT: aesimc [[VB]], [[VB]]
 ; CHECK: aesd [[VC:v[0-7].16b]], {{v[0-7].16b}}
diff --git a/llvm/test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll b/llvm/test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll
index d216cf59bde263..9af68e7d801279 100644
--- a/llvm/test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll
+++ b/llvm/test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll
@@ -2,7 +2,7 @@
 
 define i32 @foo() nounwind {
 entry:
-; CHECK: cntlzw 3, 4
+; CHECK: cntlzw 3, 3
 	%retval = alloca i32, align 4		; <i32*> [#uses=2]
 	%temp = alloca i32, align 4		; <i32*> [#uses=2]
 	%ctz_x = alloca i32, align 4		; <i32*> [#uses=3]
diff --git a/llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll b/llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll
index 7897d1c6b8a5af..028904fc3200a9 100644
--- a/llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll
+++ b/llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll
@@ -9,29 +9,29 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone {
 ; CHECK-NEXT:    stwu 1, -464(1)
 ; CHECK-NEXT:    mfcr 12
 ; CHECK-NEXT:    stw 29, 412(1) # 4-byte Folded Spill
-; CHECK-NEXT:    stw 30, 416(1) # 4-byte Folded Spill
 ; CHECK-NEXT:    lis 3, .LCPI0_0@ha
+; CHECK-NEXT:    stw 30, 416(1) # 4-byte Folded Spill
 ; CHECK-NEXT:    stw 12, 408(1)
 ; CHECK-NEXT:    stfd 2, 376(1)
-; CHECK-NEXT:    stfd 27, 424(1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd 1, 384(1)
-; CHECK-NEXT:    stfd 28, 432(1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd 29, 440(1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd 30, 448(1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd 31, 456(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    lwz 4, 380(1)
-; CHECK-NEXT:    lfs 27, .LCPI0_0@l(3)
-; CHECK-NEXT:    lwz 3, 384(1)
+; CHECK-NEXT:    stfd 27, 424(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    stw 4, 396(1)
-; CHECK-NEXT:    fcmpu 0, 2, 27
 ; CHECK-NEXT:    lwz 4, 376(1)
+; CHECK-NEXT:    lfs 27, .LCPI0_0@l(3)
+; CHECK-NEXT:    stfd 1, 384(1)
+; CHECK-NEXT:    stw 4, 392(1)
+; CHECK-NEXT:    fcmpu 0, 2, 27
+; CHECK-NEXT:    lwz 4, 388(1)
 ; CHECK-NEXT:    fcmpu 1, 1, 27
+; CHECK-NEXT:    lwz 3, 384(1)
 ; CHECK-NEXT:    crand 20, 6, 0
 ; CHECK-NEXT:    cror 20, 4, 20
-; CHECK-NEXT:    stw 4, 392(1)
-; CHECK-NEXT:    stw 3, 400(1)
-; CHECK-NEXT:    lwz 4, 388(1)
+; CHECK-NEXT:    stfd 28, 432(1) # 8-byte Folded Spill
+; CHECK-NEXT:    stfd 29, 440(1) # 8-byte Folded Spill
+; CHECK-NEXT:    stfd 30, 448(1) # 8-byte Folded Spill
+; CHECK-NEXT:    stfd 31, 456(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    stw 4, 404(1)
+; CHECK-NEXT:    stw 3, 400(1)
 ; CHECK-NEXT:    bc 4, 20, .LBB0_2
 ; CHECK-NEXT:  # %bb.1: # %bb5
 ; CHECK-NEXT:    li 3, 0
@@ -41,54 +41,53 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone {
 ; CHECK-NEXT:    lfd 0, 400(1)
 ; CHECK-NEXT:    lis 3, 15856
 ; CHECK-NEXT:    stw 3, 336(1)
-; CHECK-NEXT:    lfd 1, 392(1)
 ; CHECK-NEXT:    li 29, 0
 ; CHECK-NEXT:    stfd 0, 304(1)
-; CHECK-NEXT:    stw 29, 340(1)
-; CHECK-NEXT:    stw 29, 332(1)
-; CHECK-NEXT:    stw 29, 328(1)
 ; CHECK-NEXT:    lwz 3, 308(1)
-; CHECK-NEXT:    stfd 1, 296(1)
-; CHECK-NEXT:    lfd 3, 336(1)
-; CHECK-NEXT:    lfd 4, 328(1)
+; CHECK-NEXT:    lfd 1, 392(1)
 ; CHECK-NEXT:    stw 3, 324(1)
 ; CHECK-NEXT:    lwz 3, 304(1)
+; CHECK-NEXT:    stfd 1, 296(1)
 ; CHECK-NEXT:    stw 3, 320(1)
 ; CHECK-NEXT:    lwz 3, 300(1)
-; CHECK-NEXT:    lfd 31, 320(1)
+; CHECK-NEXT:    stw 29, 340(1)
 ; CHECK-NEXT:    stw 3, 316(1)
-; CHECK-NEXT:    fmr 1, 31
 ; CHECK-NEXT:    lwz 3, 296(1)
+; CHECK-NEXT:    stw 29, 332(1)
 ; CHECK-NEXT:    stw 3, 312(1)
+; CHECK-NEXT:    stw 29, 328(1)
+; CHECK-NEXT:    lfd 31, 320(1)
 ; CHECK-NEXT:    lfd 30, 312(1)
+; CHECK-NEXT:    lfd 3, 336(1)
+; CHECK-NEXT:    fmr 1, 31
+; CHECK-NEXT:    lfd 4, 328(1)
 ; CHECK-NEXT:    fmr 2, 30
 ; CHECK-NEXT:    bl __gcc_qmul
 ; CHECK-NEXT:    lis 3, 16864
 ; CHECK-NEXT:    stfd 1, 280(1)
-; CHECK-NEXT:    stw 3, 368(1)
-; CHECK-NEXT:    stfd 2, 288(1)
-; CHECK-NEXT:    stw 29, 372(1)
-; CHECK-NEXT:    stw 29, 364(1)
-; CHECK-NEXT:    stw 29, 360(1)
 ; CHECK-NEXT:    fmr 29, 1
-; CHECK-NEXT:    lwz 3, 284(1)
+; CHECK-NEXT:    stw 3, 368(1)
 ; CHECK-NEXT:    fmr 28, 2
-; CHECK-NEXT:    lfd 3, 368(1)
-; CHECK-NEXT:    lfd 4, 360(1)
+; CHECK-NEXT:    lwz 3, 284(1)
+; CHECK-NEXT:    stfd 2, 288(1)
 ; CHECK-NEXT:    stw 3, 356(1)
 ; CHECK-NEXT:    lwz 3, 280(1)
+; CHECK-NEXT:    stw 29, 372(1)
 ; CHECK-NEXT:    stw 3, 352(1)
 ; CHECK-NEXT:    lwz 3, 292(1)
-; CHECK-NEXT:    lfd 1, 352(1)
+; CHECK-NEXT:    stw 29, 364(1)
 ; CHECK-NEXT:    stw 3, 348(1)
 ; CHECK-NEXT:    lwz 3, 288(1)
+; CHECK-NEXT:    stw 29, 360(1)
 ; CHECK-NEXT:    stw 3, 344(1)
+; CHECK-NEXT:    lfd 3, 368(1)
+; CHECK-NEXT:    lfd 4, 360(1)
+; CHECK-NEXT:    lfd 1, 352(1)
 ; CHECK-NEXT:    lfd 2, 344(1)
 ; CHECK-NEXT:    bl __gcc_qsub
 ; CHECK-NEXT:    mffs 0
 ; CHECK-NEXT:    mtfsb1 31
 ; CHECK-NEXT:    lis 3, .LCPI0_1@ha
-; CHECK-NEXT:    fcmpu 0, 28, 27
 ; CHECK-NEXT:    mtfsb0 30
 ; CHECK-NEXT:    fadd 1, 2, 1
 ; CHECK-NEXT:    mtfsf 1, 0
@@ -102,6 +101,7 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone {
 ; CHECK-NEXT:    lfs 1, .LCPI0_1@l(3)
 ; CHECK-NEXT:    fctiwz 0, 0
 ; CHECK-NEXT:    stfd 0, 152(1)
+; CHECK-NEXT:    fcmpu 0, 28, 27
 ; CHECK-NEXT:    lwz 3, 164(1)
 ; CHECK-NEXT:    fcmpu 1, 29, 1
 ; CHECK-NEXT:    lwz 4, 156(1)
@@ -120,25 +120,25 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone {
 ; CHECK-NEXT:    bl __floatditf
 ; CHECK-NEXT:    lis 3, 17392
 ; CHECK-NEXT:    stfd 1, 208(1)
-; CHECK-NEXT:    stw 3, 240(1)
-; CHECK-NEXT:    stfd 2, 200(1)
-; CHECK-NEXT:    stw 29, 244(1)
-; CHECK-NEXT:    stw 29, 236(1)
-; CHECK-NEXT:    stw 29, 232(1)
 ; CHECK-NEXT:    fmr 29, 1
-; CHECK-NEXT:    lwz 3, 212(1)
+; CHECK-NEXT:    stw 3, 240(1)
 ; CHECK-NEXT:    fmr 28, 2
-; CHECK-NEXT:    lfd 3, 240(1)
-; CHECK-NEXT:    lfd 4, 232(1)
+; CHECK-NEXT:    lwz 3, 212(1)
 ; CHECK-NEXT:    cmpwi 2, 30, 0
+; CHECK-NEXT:    stfd 2, 200(1)
 ; CHECK-NEXT:    stw 3, 228(1)
 ; CHECK-NEXT:    lwz 3, 208(1)
+; CHECK-NEXT:    stw 29, 244(1)
 ; CHECK-NEXT:    stw 3, 224(1)
 ; CHECK-NEXT:    lwz 3, 204(1)
-; CHECK-NEXT:    lfd 1, 224(1)
+; CHECK-NEXT:    stw 29, 236(1)
 ; CHECK-NEXT:    stw 3, 220(1)
 ; CHECK-NEXT:    lwz 3, 200(1)
+; CHECK-NEXT:    stw 29, 232(1)
 ; CHECK-NEXT:    stw 3, 216(1)
+; CHECK-NEXT:    lfd 3, 240(1)
+; CHECK-NEXT:    lfd 4, 232(1)
+; CHECK-NEXT:    lfd 1, 224(1)
 ; CHECK-NEXT:    lfd 2, 216(1)
 ; CHECK-NEXT:    bl __gcc_qadd
 ; CHECK-NEXT:    blt 2, .LBB0_7
@@ -150,9 +150,9 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone {
 ; CHECK-NEXT:    fmr 1, 29
 ; CHECK-NEXT:  .LBB0_9: # %bb1
 ; CHECK-NEXT:    stfd 1, 184(1)
-; CHECK-NEXT:    stfd 2, 192(1)
 ; CHECK-NEXT:    fmr 1, 31
 ; CHECK-NEXT:    lwz 3, 188(1)
+; CHECK-NEXT:    stfd 2, 192(1)
 ; CHECK-NEXT:    fmr 2, 30
 ; CHECK-NEXT:    stw 3, 260(1)
 ; CHECK-NEXT:    lwz 3, 184(1)
@@ -165,10 +165,10 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone {
 ; CHECK-NEXT:    lfd 4, 248(1)
 ; CHECK-NEXT:    bl __gcc_qsub
 ; CHECK-NEXT:    stfd 2, 176(1)
-; CHECK-NEXT:    stfd 1, 168(1)
 ; CHECK-NEXT:    fcmpu 1, 2, 27
 ; CHECK-NEXT:    lwz 3, 180(1)
 ; CHECK-NEXT:    fcmpu 0, 1, 27
+; CHECK-NEXT:    stfd 1, 168(1)
 ; CHECK-NEXT:    crandc 20, 2, 4
 ; CHECK-NEXT:    stw 3, 268(1)
 ; CHECK-NEXT:    lwz 3, 176(1)
@@ -184,27 +184,27 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone {
 ; CHECK-NEXT:    cror 20, 1, 3
 ; CHECK-NEXT:    bc 12, 20, .LBB0_14
 ; CHECK-NEXT:  # %bb.11: # %bb2
-; CHECK-NEXT:    fneg 28, 31
-; CHECK-NEXT:    stfd 28, 48(1)
+; CHECK-NEXT:    fneg 29, 31
+; CHECK-NEXT:    stfd 29, 48(1)
 ; CHECK-NEXT:    lis 3, 16864
 ; CHECK-NEXT:    stw 3, 80(1)
-; CHECK-NEXT:    fneg 29, 30
+; CHECK-NEXT:    fneg 28, 30
 ; CHECK-NEXT:    lwz 3, 52(1)
-; CHECK-NEXT:    stfd 29, 40(1)
 ; CHECK-NEXT:    li 29, 0
-; CHECK-NEXT:    stw 29, 84(1)
-; CHECK-NEXT:    stw 29, 76(1)
-; CHECK-NEXT:    stw 29, 72(1)
+; CHECK-NEXT:    stfd 28, 40(1)
 ; CHECK-NEXT:    stw 3, 68(1)
-; CHECK-NEXT:    lfd 3, 80(1)
-; CHECK-NEXT:    lfd 4, 72(1)
 ; CHECK-NEXT:    lwz 3, 48(1)
+; CHECK-NEXT:    stw 29, 84(1)
 ; CHECK-NEXT:    stw 3, 64(1)
 ; CHECK-NEXT:    lwz 3, 44(1)
-; CHECK-NEXT:    lfd 1, 64(1)
+; CHECK-NEXT:    stw 29, 76(1)
 ; CHECK-NEXT:    stw 3, 60(1)
 ; CHECK-NEXT:    lwz 3, 40(1)
+; CHECK-NEXT:    stw 29, 72(1)
 ; CHECK-NEXT:    stw 3, 56(1)
+; CHECK-NEXT:    lfd 3, 80(1)
+; CHECK-NEXT:    lfd 4, 72(1)
+; CHECK-NEXT:    lfd 1, 64(1)
 ; CHECK-NEXT:    lfd 2, 56(1)
 ; CHECK-NEXT:    bl __gcc_qsub
 ; CHECK-NEXT:    mffs 0
@@ -220,12 +220,12 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone {
 ; CHECK-NEXT:    lfs 0, .LCPI0_2@l(3)
 ; CHECK-NEXT:    lis 3, .LCPI0_3@ha
 ; CHECK-NEXT:    mtfsb0 30
-; CHECK-NEXT:    fadd 2, 29, 28
+; CHECK-NEXT:    fadd 2, 28, 29
 ; CHECK-NEXT:    mtfsf 1, 1
 ; CHECK-NEXT:    lfs 1, .LCPI0_3@l(3)
-; CHECK-NEXT:    fcmpu 0, 30, 0
 ; CHECK-NEXT:    fctiwz 2, 2
 ; CHECK-NEXT:    stfd 2, 24(1)
+; CHECK-NEXT:    fcmpu 0, 30, 0
 ; CHECK-NEXT:    lwz 3, 36(1)
 ; CHECK-NEXT:    fcmpu 1, 31, 1
 ; CHECK-NEXT:    lwz 4, 28(1)
@@ -244,22 +244,22 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone {
 ; CHECK-NEXT:    stfd 31, 112(1)
 ; CHECK-NEXT:    li 3, 0
 ; CHECK-NEXT:    stw 3, 148(1)
+; CHECK-NEXT:    lis 4, 16864
 ; CHECK-NEXT:    stw 3, 140(1)
 ; CHECK-NEXT:    stw 3, 136(1)
-; CHECK-NEXT:    stfd 30, 104(1)
-; CHECK-NEXT:    lis 4, 16864
 ; CHECK-NEXT:    lwz 3, 116(1)
-; CHECK-NEXT:    stw 4, 144(1)
-; CHECK-NEXT:    lfd 4, 136(1)
+; CHECK-NEXT:    stfd 30, 104(1)
 ; CHECK-NEXT:    stw 3, 132(1)
-; CHECK-NEXT:    lfd 3, 144(1)
 ; CHECK-NEXT:    lwz 3, 112(1)
+; CHECK-NEXT:    stw 4, 144(1)
 ; CHECK-NEXT:    stw 3, 128(1)
 ; CHECK-NEXT:    lwz 3, 108(1)
-; CHECK-NEXT:    lfd 1, 128(1)
+; CHECK-NEXT:    lfd 3, 144(1)
 ; CHECK-NEXT:    stw 3, 124(1)
 ; CHECK-NEXT:    lwz 3, 104(1)
+; CHECK-NEXT:    lfd 4, 136(1)
 ; CHECK-NEXT:    stw 3, 120(1)
+; CHECK-NEXT:    lfd 1, 128(1)
 ; CHECK-NEXT:    lfd 2, 120(1)
 ; CHECK-NEXT:    bl __gcc_qsub
 ; CHECK-NEXT:    mffs 0
@@ -278,9 +278,9 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone {
 ; CHECK-NEXT:    fadd 2, 30, 31
 ; CHECK-NEXT:    mtfsf 1, 1
 ; CHECK-NEXT:    lfs 1, .LCPI0_1@l(3)
-; CHECK-NEXT:    fcmpu 0, 30, 0
 ; CHECK-NEXT:    fctiwz 2, 2
 ; CHECK-NEXT:    stfd 2, 88(1)
+; CHECK-NEXT:    fcmpu 0, 30, 0
 ; CHECK-NEXT:    lwz 3, 100(1)
 ; CHECK-NEXT:    fcmpu 1, 31, 1
 ; CHECK-NEXT:    lwz 4, 92(1)
@@ -300,8 +300,8 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone {
 ; CHECK-NEXT:    lfd 28, 432(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    lwz 12, 408(1)
 ; CHECK-NEXT:    lfd 27, 424(1) # 8-byte Folded Reload
-; CHECK-NEXT:    lwz 30, 416(1) # 4-byte Folded Reload
 ; CHECK-NEXT:    mtcrf 32, 12 # cr2
+; CHECK-NEXT:    lwz 30, 416(1) # 4-byte Folded Reload
 ; CHECK-NEXT:    lwz 29, 412(1) # 4-byte Folded Reload
 ; CHECK-NEXT:    lwz 0, 468(1)
 ; CHECK-NEXT:    addi 1, 1, 464
diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll b/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll
index d155a78812257a..52070aa9063d6c 100644
--- a/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-cc-abi.ll
@@ -704,8 +704,8 @@ declare void @test_vararg(i32, ...)
 ; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r[[REG]] :: (dereferenceable load 4 from @f1)
 ; 32BIT-NEXT: renamable $r[[REG:[0-9]+]] = LWZtoc @d1, $r2 :: (load 4 from got)
 ; 32BIT-NEXT: STFD renamable $f1, 0, %stack.[[SLOT1:[0-9]+]] :: (store 8 into %stack.[[SLOT1]])
-; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r[[REG]] :: (dereferenceable load 8 from @d1)
 ; 32BIT-NEXT: renamable $r4 = LWZ 0, %stack.[[SLOT1]] :: (load 4 from %stack.[[SLOT1]], align 8)
+; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r[[REG]] :: (dereferenceable load 8 from @d1)
 ; 32BIT-NEXT: renamable $r5 = LWZ 4, %stack.[[SLOT1]] :: (load 4 from %stack.[[SLOT1]] + 4)
 ; 32BIT-NEXT: STFD renamable $f2, 0, %stack.[[SLOT2:[0-9]+]] :: (store 8 into %stack.[[SLOT2]])
 ; 32BIT-NEXT: renamable $r6 = LWZ 0, %stack.[[SLOT2]] :: (load 4 from %stack.[[SLOT2]], align 8)
@@ -773,8 +773,8 @@ entry:
 ; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r[[REG]] :: (dereferenceable load 4 from @f1)
 ; 32BIT-NEXT: renamable $r[[REG:[0-9]+]] = LWZtoc @d1, $r2 :: (load 4 from got)
 ; 32BIT-NEXT: STFD renamable $f1, 0, %stack.[[SLOT1:[0-9]+]] :: (store 8 into %stack.[[SLOT1]])
-; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r[[REG]] :: (dereferenceable load 8 from @d1)
 ; 32BIT-NEXT: renamable $r4 = LWZ 0, %stack.[[SLOT1]] :: (load 4 from %stack.[[SLOT1]], align 8)
+; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r[[REG]] :: (dereferenceable load 8 from @d1)
 ; 32BIT-NEXT: renamable $r5 = LWZ 4, %stack.[[SLOT1]] :: (load 4 from %stack.[[SLOT1]] + 4)
 ; 32BIT-NEXT: STFD renamable $f2, 0, %stack.[[SLOT2:[0-9]+]] :: (store 8 into %stack.[[SLOT2]])
 ; 32BIT-NEXT: renamable $r7 = LWZ 0, %stack.[[SLOT2]] :: (load 4 from %stack.[[SLOT2]], align 8)
@@ -844,8 +844,8 @@ entry:
 ; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r[[REG]] :: (dereferenceable load 4 from @f1)
 ; 32BIT-NEXT: renamable $r[[REG:[0-9]+]] = LWZtoc @d1, $r2 :: (load 4 from got)
 ; 32BIT-NEXT: STFD renamable $f1, 0, %stack.[[SLOT1:[0-9]+]] :: (store 8 into %stack.[[SLOT1]])
-; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r[[REG]] :: (dereferenceable load 8 from @d1)
 ; 32BIT-NEXT: renamable $r4 = LWZ 0, %stack.[[SLOT1]] :: (load 4 from %stack.[[SLOT1]], align 8)
+; 32BIT-NEXT: renamable $f2 = LFD 0, killed renamable $r[[REG]] :: (dereferenceable load 8 from @d1)
 ; 32BIT-NEXT: renamable $r5 = LWZ 4, %stack.[[SLOT1]] :: (load 4 from %stack.[[SLOT1]] + 4)
 ; 32BIT-NEXT: STFD renamable $f2, 0, %stack.[[SLOT2:[0-9]+]] :: (store 8 into %stack.[[SLOT2]])
 ; 32BIT-NEXT: renamable $r8 = LWZ 0, %stack.[[SLOT2]] :: (load 4 from %stack.[[SLOT2]], align 8)
diff --git a/llvm/test/CodeGen/PowerPC/aix32-cc-abi-vaarg.ll b/llvm/test/CodeGen/PowerPC/aix32-cc-abi-vaarg.ll
index 9f521788a3fccf..c276d4ccc39523 100644
--- a/llvm/test/CodeGen/PowerPC/aix32-cc-abi-vaarg.ll
+++ b/llvm/test/CodeGen/PowerPC/aix32-cc-abi-vaarg.ll
@@ -68,15 +68,15 @@
 ; 32BIT-DAG:     STW killed renamable $r8, 16, %fixed-stack.0 :: (store 4)
 ; 32BIT-DAG:     STW killed renamable $r9, 20, %fixed-stack.0 :: (store 4)
 ; 32BIT-DAG:     STW killed renamable $r10, 24, %fixed-stack.0 :: (store 4)
-; 32BIT-DAG:     STW killed renamable $r5, 0, %stack.1.arg2 :: (store 4 into %ir.arg2)
-; 32BIT-DAG:     renamable $r5 = ADDI %fixed-stack.0, 4
-; 32BIT-DAG:     STW killed renamable $r4, 0, %stack.1.arg2 :: (store 4 into %ir.1)
-; 32BIT-DAG:     renamable $r4 = ADDI %fixed-stack.0, 0
-; 32BIT-DAG:     STW renamable $r4, 0, %stack.0.arg1 :: (store 4 into %ir.0)
-; 32BIT-DAG:     STW renamable $r5, 0, %stack.0.arg1 :: (store 4 into %ir.arg1)
-; 32BIT-DAG:     renamable $r4 = LWZ 0, %fixed-stack.0 :: (load 4 from %ir.2)
-; 32BIT-DAG:     renamable $r5 = LWZ 0, %fixed-stack.0 :: (load 4 from %ir.4)
-; 32BIT-DAG:     renamable $r3 = nsw ADD4 killed renamable $r4, killed renamable $r3
+; 32BIT-DAG:     STW killed renamable $r4, 0, %stack.1.arg2 :: (store 4 into %ir.arg2)
+; 32BIT-DAG:     renamable $r4 = ADDI %fixed-stack.0, 4
+; 32BIT-DAG:     STW killed renamable $r11, 0, %stack.1.arg2 :: (store 4 into %ir.1)
+; 32BIT-DAG:     renamable $r11 = ADDI %fixed-stack.0, 0
+; 32BIT-DAG:     STW renamable $r11, 0, %stack.0.arg1 :: (store 4 into %ir.0)
+; 32BIT-DAG:     STW renamable $r4, 0, %stack.0.arg1 :: (store 4 into %ir.arg1)
+; 32BIT-DAG:     renamable $r5 = LWZ 0, %fixed-stack.0 :: (load 4 from %ir.2)
+; 32BIT-DAG:     renamable $r4 = LWZ 0, %fixed-stack.0 :: (load 4 from %ir.4)
+; 32BIT-DAG:     renamable $r3 = nsw ADD4 killed renamable $r5, killed renamable $r3
 ; 32BIT-DAG:     renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r4
 ; 32BIT-DAG:     BLR implicit $lr, implicit $rm, implicit $r3
 
diff --git a/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll b/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll
index 54ceccd9c59ab1..fa57f50cb43df2 100644
--- a/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll
+++ b/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll
@@ -28,9 +28,9 @@ entry:
 ; PPC32-DAG: stfd 2, 16(1)
 ; PPC32-DAG: lwz [[HI0:[0-9]+]], 24(1)
 ; PPC32-DAG: lwz [[LO0:[0-9]+]], 16(1)
+; PPC32: rlwinm [[FLIP_BIT:[0-9]+]], [[HI0]], 0, 0, 0
 ; PPC32-DAG: lwz [[HI1:[0-9]+]], 28(1)
 ; PPC32-DAG: lwz [[LO1:[0-9]+]], 20(1)
-; PPC32: rlwinm [[FLIP_BIT:[0-9]+]], [[HI0]], 0, 0, 0
 ; PPC32-DAG: xor [[HI0]], [[HI0]], [[FLIP_BIT]]
 ; PPC32-DAG: xor [[LO0]], [[LO0]], [[FLIP_BIT]]
 ; PPC32: blr
@@ -68,9 +68,9 @@ entry:
 ; PPC32-DAG: lwz [[HI0:[0-9]+]], 24(1)
 ; PPC32-DAG: lwz [[LO0:[0-9]+]], 16(1)
 ; PPC32-DAG: lwz [[HI1:[0-9]+]], 28(1)
-; PPC32-DAG: lwz [[LO1:[0-9]+]], 20(1)
 ; PPC32-NOT: BARRIER
 ; PPC32-DAG: xoris [[HI0]], [[HI0]], 32768
+; PPC32-DAG: lwz [[LO1:[0-9]+]], 20(1)
 ; PPC32-DAG: xoris [[LO0]], [[LO0]], 32768
 ; PPC32: blr
 	%0 = fsub ppc_fp128 0xM80000000000000000000000000000000, %x
diff --git a/llvm/test/CodeGen/PowerPC/inc-of-add.ll b/llvm/test/CodeGen/PowerPC/inc-of-add.ll
index fa03379a3c3076..90004143326fbc 100644
--- a/llvm/test/CodeGen/PowerPC/inc-of-add.ll
+++ b/llvm/test/CodeGen/PowerPC/inc-of-add.ll
@@ -65,88 +65,88 @@ define <16 x i8> @vector_i128_i8(<16 x i8> %x, <16 x i8> %y) nounwind {
 ; PPC32:       # %bb.0:
 ; PPC32-NEXT:    stwu 1, -64(1)
 ; PPC32-NEXT:    stw 21, 20(1) # 4-byte Folded Spill
-; PPC32-NEXT:    lbz 4, 119(1)
-; PPC32-NEXT:    lbz 11, 115(1)
-; PPC32-NEXT:    stw 24, 32(1) # 4-byte Folded Spill
-; PPC32-NEXT:    stw 23, 28(1) # 4-byte Folded Spill
-; PPC32-NEXT:    add 4, 4, 6
 ; PPC32-NEXT:    lbz 21, 123(1)
-; PPC32-NEXT:    lbz 6, 131(1)
-; PPC32-NEXT:    add 5, 11, 5
-; PPC32-NEXT:    lbz 11, 127(1)
+; PPC32-NEXT:    stw 22, 24(1) # 4-byte Folded Spill
+; PPC32-NEXT:    stw 23, 28(1) # 4-byte Folded Spill
 ; PPC32-NEXT:    add 7, 21, 7
+; PPC32-NEXT:    lbz 23, 115(1)
+; PPC32-NEXT:    lbz 22, 119(1)
 ; PPC32-NEXT:    lbz 21, 135(1)
-; PPC32-NEXT:    lbz 24, 83(1)
-; PPC32-NEXT:    lbz 23, 79(1)
-; PPC32-NEXT:    add 6, 6, 9
+; PPC32-NEXT:    add 5, 23, 5
+; PPC32-NEXT:    lbz 23, 127(1)
+; PPC32-NEXT:    add 6, 22, 6
+; PPC32-NEXT:    lbz 22, 131(1)
 ; PPC32-NEXT:    add 10, 21, 10
-; PPC32-NEXT:    lbz 21, 147(1)
-; PPC32-NEXT:    lbz 9, 143(1)
-; PPC32-NEXT:    stw 22, 24(1) # 4-byte Folded Spill
-; PPC32-NEXT:    add 8, 11, 8
-; PPC32-NEXT:    lbz 22, 75(1)
-; PPC32-NEXT:    lbz 11, 139(1)
-; PPC32-NEXT:    stw 27, 44(1) # 4-byte Folded Spill
-; PPC32-NEXT:    add 24, 21, 24
-; PPC32-NEXT:    lbz 27, 95(1)
-; PPC32-NEXT:    lbz 21, 159(1)
 ; PPC32-NEXT:    stw 26, 40(1) # 4-byte Folded Spill
-; PPC32-NEXT:    add 9, 9, 23
-; PPC32-NEXT:    lbz 26, 91(1)
-; PPC32-NEXT:    lbz 23, 155(1)
+; PPC32-NEXT:    add 8, 23, 8
+; PPC32-NEXT:    lbz 26, 83(1)
+; PPC32-NEXT:    add 9, 22, 9
+; PPC32-NEXT:    lbz 21, 147(1)
+; PPC32-NEXT:    stw 24, 32(1) # 4-byte Folded Spill
 ; PPC32-NEXT:    stw 25, 36(1) # 4-byte Folded Spill
-; PPC32-NEXT:    add 11, 11, 22
-; PPC32-NEXT:    lbz 25, 87(1)
-; PPC32-NEXT:    lbz 22, 151(1)
-; PPC32-NEXT:    lbz 12, 111(1)
-; PPC32-NEXT:    add 27, 21, 27
-; PPC32-NEXT:    lbz 21, 175(1)
-; PPC32-NEXT:    stw 30, 56(1) # 4-byte Folded Spill
-; PPC32-NEXT:    lbz 0, 107(1)
+; PPC32-NEXT:    add 26, 21, 26
+; PPC32-NEXT:    lbz 25, 79(1)
+; PPC32-NEXT:    lbz 24, 75(1)
+; PPC32-NEXT:    lbz 23, 139(1)
+; PPC32-NEXT:    lbz 22, 143(1)
 ; PPC32-NEXT:    stw 29, 52(1) # 4-byte Folded Spill
-; PPC32-NEXT:    add 26, 23, 26
-; PPC32-NEXT:    lbz 30, 171(1)
-; PPC32-NEXT:    lbz 29, 103(1)
-; PPC32-NEXT:    lbz 23, 167(1)
-; PPC32-NEXT:    stw 28, 48(1) # 4-byte Folded Spill
+; PPC32-NEXT:    add 24, 23, 24
+; PPC32-NEXT:    lbz 29, 95(1)
 ; PPC32-NEXT:    add 25, 22, 25
-; PPC32-NEXT:    lbz 28, 99(1)
-; PPC32-NEXT:    lbz 22, 163(1)
-; PPC32-NEXT:    add 12, 21, 12
-; PPC32-NEXT:    add 30, 30, 0
-; PPC32-NEXT:    addi 12, 12, 1
-; PPC32-NEXT:    add 29, 23, 29
-; PPC32-NEXT:    stb 12, 15(3)
-; PPC32-NEXT:    addi 12, 30, 1
+; PPC32-NEXT:    lbz 21, 159(1)
+; PPC32-NEXT:    stw 27, 44(1) # 4-byte Folded Spill
+; PPC32-NEXT:    stw 28, 48(1) # 4-byte Folded Spill
+; PPC32-NEXT:    add 29, 21, 29
+; PPC32-NEXT:    lbz 28, 91(1)
+; PPC32-NEXT:    lbz 27, 87(1)
+; PPC32-NEXT:    lbz 23, 151(1)
+; PPC32-NEXT:    lbz 22, 155(1)
+; PPC32-NEXT:    lbz 4, 111(1)
+; PPC32-NEXT:    add 27, 23, 27
+; PPC32-NEXT:    lbz 21, 175(1)
 ; PPC32-NEXT:    add 28, 22, 28
-; PPC32-NEXT:    stb 12, 14(3)
-; PPC32-NEXT:    addi 12, 29, 1
-; PPC32-NEXT:    stb 12, 13(3)
-; PPC32-NEXT:    addi 12, 28, 1
-; PPC32-NEXT:    stb 12, 12(3)
-; PPC32-NEXT:    addi 12, 27, 1
-; PPC32-NEXT:    stb 12, 11(3)
-; PPC32-NEXT:    addi 12, 26, 1
-; PPC32-NEXT:    addi 9, 9, 1
-; PPC32-NEXT:    addi 6, 6, 1
-; PPC32-NEXT:    stb 12, 10(3)
-; PPC32-NEXT:    addi 12, 25, 1
-; PPC32-NEXT:    stb 9, 7(3)
-; PPC32-NEXT:    addi 9, 11, 1
-; PPC32-NEXT:    stb 6, 4(3)
-; PPC32-NEXT:    addi 6, 8, 1
+; PPC32-NEXT:    lbz 11, 107(1)
+; PPC32-NEXT:    lbz 12, 171(1)
+; PPC32-NEXT:    add 4, 21, 4
+; PPC32-NEXT:    stw 30, 56(1) # 4-byte Folded Spill
 ; PPC32-NEXT:    addi 4, 4, 1
-; PPC32-NEXT:    stb 12, 9(3)
-; PPC32-NEXT:    addi 12, 24, 1
-; PPC32-NEXT:    stb 9, 6(3)
-; PPC32-NEXT:    addi 9, 10, 1
-; PPC32-NEXT:    stb 6, 3(3)
-; PPC32-NEXT:    addi 6, 7, 1
+; PPC32-NEXT:    lbz 0, 103(1)
+; PPC32-NEXT:    add 11, 12, 11
+; PPC32-NEXT:    lbz 30, 99(1)
+; PPC32-NEXT:    lbz 23, 163(1)
+; PPC32-NEXT:    lbz 22, 167(1)
+; PPC32-NEXT:    add 30, 23, 30
+; PPC32-NEXT:    stb 4, 15(3)
+; PPC32-NEXT:    add 23, 22, 0
+; PPC32-NEXT:    addi 4, 11, 1
+; PPC32-NEXT:    stb 4, 14(3)
+; PPC32-NEXT:    addi 4, 23, 1
+; PPC32-NEXT:    stb 4, 13(3)
+; PPC32-NEXT:    addi 4, 30, 1
+; PPC32-NEXT:    stb 4, 12(3)
+; PPC32-NEXT:    addi 4, 29, 1
+; PPC32-NEXT:    stb 4, 11(3)
+; PPC32-NEXT:    addi 4, 28, 1
+; PPC32-NEXT:    stb 4, 10(3)
+; PPC32-NEXT:    addi 4, 27, 1
+; PPC32-NEXT:    stb 4, 9(3)
+; PPC32-NEXT:    addi 4, 26, 1
+; PPC32-NEXT:    stb 4, 8(3)
+; PPC32-NEXT:    addi 4, 25, 1
+; PPC32-NEXT:    stb 4, 7(3)
+; PPC32-NEXT:    addi 4, 24, 1
+; PPC32-NEXT:    stb 4, 6(3)
+; PPC32-NEXT:    addi 4, 10, 1
+; PPC32-NEXT:    stb 4, 5(3)
+; PPC32-NEXT:    addi 4, 9, 1
+; PPC32-NEXT:    stb 4, 4(3)
+; PPC32-NEXT:    addi 4, 8, 1
+; PPC32-NEXT:    stb 4, 3(3)
+; PPC32-NEXT:    addi 4, 7, 1
+; PPC32-NEXT:    stb 4, 2(3)
+; PPC32-NEXT:    addi 4, 6, 1
 ; PPC32-NEXT:    stb 4, 1(3)
 ; PPC32-NEXT:    addi 4, 5, 1
-; PPC32-NEXT:    stb 12, 8(3)
-; PPC32-NEXT:    stb 9, 5(3)
-; PPC32-NEXT:    stb 6, 2(3)
 ; PPC32-NEXT:    stb 4, 0(3)
 ; PPC32-NEXT:    lwz 30, 56(1) # 4-byte Folded Reload
 ; PPC32-NEXT:    lwz 29, 52(1) # 4-byte Folded Reload
@@ -165,79 +165,79 @@ define <16 x i8> @vector_i128_i8(<16 x i8> %x, <16 x i8> %y) nounwind {
 ; PPC64BE:       # %bb.0:
 ; PPC64BE-NEXT:    std 21, -88(1) # 8-byte Folded Spill
 ; PPC64BE-NEXT:    lbz 21, 207(1)
-; PPC64BE-NEXT:    lbz 11, 199(1)
-; PPC64BE-NEXT:    lbz 12, 191(1)
-; PPC64BE-NEXT:    std 23, -72(1) # 8-byte Folded Spill
 ; PPC64BE-NEXT:    std 22, -80(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    std 26, -48(1) # 8-byte Folded Spill
+; PPC64BE-NEXT:    std 23, -72(1) # 8-byte Folded Spill
 ; PPC64BE-NEXT:    std 25, -56(1) # 8-byte Folded Spill
 ; PPC64BE-NEXT:    std 24, -64(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    std 29, -24(1) # 8-byte Folded Spill
+; PPC64BE-NEXT:    std 28, -32(1) # 8-byte Folded Spill
+; PPC64BE-NEXT:    std 27, -40(1) # 8-byte Folded Spill
+; PPC64BE-NEXT:    std 26, -48(1) # 8-byte Folded Spill
 ; PPC64BE-NEXT:    std 30, -16(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    lbz 0, 183(1)
+; PPC64BE-NEXT:    std 29, -24(1) # 8-byte Folded Spill
+; PPC64BE-NEXT:    lbz 22, 199(1)
+; PPC64BE-NEXT:    lbz 23, 191(1)
 ; PPC64BE-NEXT:    add 6, 21, 6
 ; PPC64BE-NEXT:    lbz 21, 231(1)
-; PPC64BE-NEXT:    add 5, 11, 5
-; PPC64BE-NEXT:    lbz 11, 223(1)
-; PPC64BE-NEXT:    add 4, 12, 4
-; PPC64BE-NEXT:    lbz 12, 215(1)
-; PPC64BE-NEXT:    lbz 23, 127(1)
+; PPC64BE-NEXT:    add 5, 22, 5
+; PPC64BE-NEXT:    lbz 22, 223(1)
+; PPC64BE-NEXT:    add 4, 23, 4
+; PPC64BE-NEXT:    lbz 23, 215(1)
 ; PPC64BE-NEXT:    add 9, 21, 9
+; PPC64BE-NEXT:    lbz 25, 127(1)
+; PPC64BE-NEXT:    add 8, 22, 8
 ; PPC64BE-NEXT:    lbz 21, 255(1)
-; PPC64BE-NEXT:    lbz 22, 119(1)
-; PPC64BE-NEXT:    add 8, 11, 8
-; PPC64BE-NEXT:    lbz 11, 247(1)
-; PPC64BE-NEXT:    add 7, 12, 7
-; PPC64BE-NEXT:    lbz 12, 239(1)
-; PPC64BE-NEXT:    lbz 26, 151(1)
-; PPC64BE-NEXT:    add 23, 21, 23
-; PPC64BE-NEXT:    lbz 21, 279(1)
-; PPC64BE-NEXT:    lbz 25, 143(1)
-; PPC64BE-NEXT:    add 11, 11, 22
-; PPC64BE-NEXT:    lbz 22, 271(1)
-; PPC64BE-NEXT:    lbz 24, 135(1)
-; PPC64BE-NEXT:    add 10, 12, 10
-; PPC64BE-NEXT:    lbz 12, 263(1)
-; PPC64BE-NEXT:    lbz 30, 175(1)
-; PPC64BE-NEXT:    lbz 29, 303(1)
-; PPC64BE-NEXT:    add 26, 21, 26
-; PPC64BE-NEXT:    lbz 21, 311(1)
-; PPC64BE-NEXT:    std 28, -32(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    add 25, 22, 25
-; PPC64BE-NEXT:    lbz 28, 167(1)
-; PPC64BE-NEXT:    lbz 22, 295(1)
-; PPC64BE-NEXT:    std 27, -40(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    add 12, 12, 24
-; PPC64BE-NEXT:    lbz 27, 159(1)
-; PPC64BE-NEXT:    lbz 24, 287(1)
-; PPC64BE-NEXT:    add 30, 29, 30
-; PPC64BE-NEXT:    add 29, 21, 0
-; PPC64BE-NEXT:    addi 0, 29, 1
-; PPC64BE-NEXT:    add 28, 22, 28
-; PPC64BE-NEXT:    stb 0, 15(3)
-; PPC64BE-NEXT:    addi 0, 30, 1
-; PPC64BE-NEXT:    add 27, 24, 27
-; PPC64BE-NEXT:    stb 0, 14(3)
-; PPC64BE-NEXT:    addi 0, 28, 1
-; PPC64BE-NEXT:    stb 0, 13(3)
-; PPC64BE-NEXT:    addi 0, 27, 1
-; PPC64BE-NEXT:    stb 0, 12(3)
-; PPC64BE-NEXT:    addi 0, 26, 1
-; PPC64BE-NEXT:    addi 12, 12, 1
-; PPC64BE-NEXT:    stb 0, 11(3)
-; PPC64BE-NEXT:    addi 0, 25, 1
-; PPC64BE-NEXT:    stb 12, 9(3)
-; PPC64BE-NEXT:    addi 12, 23, 1
-; PPC64BE-NEXT:    addi 11, 11, 1
-; PPC64BE-NEXT:    addi 10, 10, 1
+; PPC64BE-NEXT:    add 7, 23, 7
+; PPC64BE-NEXT:    lbz 24, 119(1)
 ; PPC64BE-NEXT:    addi 9, 9, 1
+; PPC64BE-NEXT:    lbz 22, 247(1)
+; PPC64BE-NEXT:    add 25, 21, 25
+; PPC64BE-NEXT:    lbz 23, 239(1)
 ; PPC64BE-NEXT:    addi 8, 8, 1
+; PPC64BE-NEXT:    lbz 28, 151(1)
+; PPC64BE-NEXT:    add 24, 22, 24
+; PPC64BE-NEXT:    lbz 21, 279(1)
+; PPC64BE-NEXT:    add 10, 23, 10
+; PPC64BE-NEXT:    lbz 27, 143(1)
+; PPC64BE-NEXT:    addi 10, 10, 1
+; PPC64BE-NEXT:    lbz 22, 271(1)
+; PPC64BE-NEXT:    add 28, 21, 28
+; PPC64BE-NEXT:    lbz 26, 135(1)
 ; PPC64BE-NEXT:    addi 7, 7, 1
+; PPC64BE-NEXT:    lbz 23, 263(1)
+; PPC64BE-NEXT:    add 27, 22, 27
+; PPC64BE-NEXT:    lbz 11, 183(1)
 ; PPC64BE-NEXT:    addi 6, 6, 1
+; PPC64BE-NEXT:    lbz 21, 311(1)
+; PPC64BE-NEXT:    add 26, 23, 26
+; PPC64BE-NEXT:    lbz 12, 175(1)
 ; PPC64BE-NEXT:    addi 5, 5, 1
+; PPC64BE-NEXT:    lbz 0, 303(1)
+; PPC64BE-NEXT:    add 11, 21, 11
+; PPC64BE-NEXT:    lbz 30, 167(1)
+; PPC64BE-NEXT:    addi 11, 11, 1
+; PPC64BE-NEXT:    lbz 22, 295(1)
+; PPC64BE-NEXT:    add 12, 0, 12
+; PPC64BE-NEXT:    lbz 29, 159(1)
 ; PPC64BE-NEXT:    addi 4, 4, 1
-; PPC64BE-NEXT:    stb 0, 10(3)
-; PPC64BE-NEXT:    stb 12, 8(3)
+; PPC64BE-NEXT:    lbz 23, 287(1)
+; PPC64BE-NEXT:    add 30, 22, 30
+; PPC64BE-NEXT:    stb 11, 15(3)
+; PPC64BE-NEXT:    addi 11, 12, 1
+; PPC64BE-NEXT:    add 29, 23, 29
+; PPC64BE-NEXT:    stb 11, 14(3)
+; PPC64BE-NEXT:    addi 11, 30, 1
+; PPC64BE-NEXT:    stb 11, 13(3)
+; PPC64BE-NEXT:    addi 11, 29, 1
+; PPC64BE-NEXT:    stb 11, 12(3)
+; PPC64BE-NEXT:    addi 11, 28, 1
+; PPC64BE-NEXT:    stb 11, 11(3)
+; PPC64BE-NEXT:    addi 11, 27, 1
+; PPC64BE-NEXT:    stb 11, 10(3)
+; PPC64BE-NEXT:    addi 11, 26, 1
+; PPC64BE-NEXT:    stb 11, 9(3)
+; PPC64BE-NEXT:    addi 11, 25, 1
+; PPC64BE-NEXT:    stb 11, 8(3)
+; PPC64BE-NEXT:    addi 11, 24, 1
 ; PPC64BE-NEXT:    stb 11, 7(3)
 ; PPC64BE-NEXT:    stb 10, 6(3)
 ; PPC64BE-NEXT:    stb 9, 5(3)
@@ -277,23 +277,23 @@ define <8 x i16> @vector_i128_i16(<8 x i16> %x, <8 x i16> %y) nounwind {
 ; PPC32-NEXT:    stw 28, 16(1) # 4-byte Folded Spill
 ; PPC32-NEXT:    stw 29, 20(1) # 4-byte Folded Spill
 ; PPC32-NEXT:    stw 30, 24(1) # 4-byte Folded Spill
-; PPC32-NEXT:    lhz 11, 50(1)
-; PPC32-NEXT:    lhz 12, 46(1)
-; PPC32-NEXT:    lhz 0, 42(1)
-; PPC32-NEXT:    lhz 30, 70(1)
-; PPC32-NEXT:    lhz 29, 66(1)
-; PPC32-NEXT:    lhz 28, 62(1)
-; PPC32-NEXT:    lhz 27, 58(1)
+; PPC32-NEXT:    lhz 11, 70(1)
+; PPC32-NEXT:    lhz 12, 66(1)
+; PPC32-NEXT:    lhz 0, 62(1)
+; PPC32-NEXT:    add 10, 11, 10
+; PPC32-NEXT:    lhz 30, 58(1)
+; PPC32-NEXT:    add 9, 12, 9
+; PPC32-NEXT:    lhz 29, 50(1)
+; PPC32-NEXT:    add 8, 0, 8
+; PPC32-NEXT:    lhz 28, 42(1)
+; PPC32-NEXT:    add 7, 30, 7
+; PPC32-NEXT:    lhz 27, 46(1)
+; PPC32-NEXT:    add 5, 29, 5
 ; PPC32-NEXT:    lhz 26, 54(1)
-; PPC32-NEXT:    add 3, 0, 3
-; PPC32-NEXT:    add 4, 12, 4
-; PPC32-NEXT:    add 5, 11, 5
-; PPC32-NEXT:    add 6, 26, 6
-; PPC32-NEXT:    add 7, 27, 7
-; PPC32-NEXT:    add 8, 28, 8
-; PPC32-NEXT:    add 9, 29, 9
-; PPC32-NEXT:    add 10, 30, 10
+; PPC32-NEXT:    add 3, 28, 3
+; PPC32-NEXT:    add 4, 27, 4
 ; PPC32-NEXT:    addi 3, 3, 1
+; PPC32-NEXT:    add 6, 26, 6
 ; PPC32-NEXT:    addi 4, 4, 1
 ; PPC32-NEXT:    addi 5, 5, 1
 ; PPC32-NEXT:    addi 6, 6, 1
@@ -317,31 +317,31 @@ define <8 x i16> @vector_i128_i16(<8 x i16> %x, <8 x i16> %y) nounwind {
 ; PPC64BE-NEXT:    std 28, -32(1) # 8-byte Folded Spill
 ; PPC64BE-NEXT:    std 29, -24(1) # 8-byte Folded Spill
 ; PPC64BE-NEXT:    std 30, -16(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    lhz 11, 142(1)
-; PPC64BE-NEXT:    lhz 12, 134(1)
-; PPC64BE-NEXT:    lhz 0, 126(1)
-; PPC64BE-NEXT:    lhz 30, 118(1)
-; PPC64BE-NEXT:    lhz 29, 182(1)
-; PPC64BE-NEXT:    lhz 28, 174(1)
-; PPC64BE-NEXT:    lhz 27, 166(1)
-; PPC64BE-NEXT:    lhz 26, 158(1)
+; PPC64BE-NEXT:    lhz 11, 118(1)
+; PPC64BE-NEXT:    lhz 12, 182(1)
+; PPC64BE-NEXT:    lhz 0, 174(1)
+; PPC64BE-NEXT:    lhz 30, 166(1)
+; PPC64BE-NEXT:    add 11, 12, 11
+; PPC64BE-NEXT:    lhz 29, 158(1)
+; PPC64BE-NEXT:    add 10, 0, 10
+; PPC64BE-NEXT:    lhz 28, 142(1)
+; PPC64BE-NEXT:    add 9, 30, 9
+; PPC64BE-NEXT:    lhz 27, 126(1)
+; PPC64BE-NEXT:    add 8, 29, 8
+; PPC64BE-NEXT:    lhz 26, 134(1)
+; PPC64BE-NEXT:    add 6, 28, 6
 ; PPC64BE-NEXT:    lhz 25, 150(1)
-; PPC64BE-NEXT:    add 4, 0, 4
-; PPC64BE-NEXT:    add 5, 12, 5
-; PPC64BE-NEXT:    add 6, 11, 6
+; PPC64BE-NEXT:    add 4, 27, 4
+; PPC64BE-NEXT:    add 5, 26, 5
+; PPC64BE-NEXT:    addi 11, 11, 1
 ; PPC64BE-NEXT:    add 7, 25, 7
-; PPC64BE-NEXT:    add 8, 26, 8
-; PPC64BE-NEXT:    add 9, 27, 9
-; PPC64BE-NEXT:    add 10, 28, 10
-; PPC64BE-NEXT:    add 11, 29, 30
-; PPC64BE-NEXT:    addi 4, 4, 1
-; PPC64BE-NEXT:    addi 5, 5, 1
-; PPC64BE-NEXT:    addi 6, 6, 1
-; PPC64BE-NEXT:    addi 7, 7, 1
-; PPC64BE-NEXT:    addi 8, 8, 1
-; PPC64BE-NEXT:    addi 9, 9, 1
 ; PPC64BE-NEXT:    addi 10, 10, 1
-; PPC64BE-NEXT:    addi 11, 11, 1
+; PPC64BE-NEXT:    addi 9, 9, 1
+; PPC64BE-NEXT:    addi 8, 8, 1
+; PPC64BE-NEXT:    addi 7, 7, 1
+; PPC64BE-NEXT:    addi 6, 6, 1
+; PPC64BE-NEXT:    addi 5, 5, 1
+; PPC64BE-NEXT:    addi 4, 4, 1
 ; PPC64BE-NEXT:    sth 11, 14(3)
 ; PPC64BE-NEXT:    sth 10, 12(3)
 ; PPC64BE-NEXT:    sth 9, 10(3)
diff --git a/llvm/test/CodeGen/PowerPC/ppc32-skip-regs.ll b/llvm/test/CodeGen/PowerPC/ppc32-skip-regs.ll
index 42cbb30318bceb..5fae34f212cccc 100644
--- a/llvm/test/CodeGen/PowerPC/ppc32-skip-regs.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc32-skip-regs.ll
@@ -17,9 +17,9 @@ entry:
 ; argument put on stack.
 ; CHECK-NOT: mr 8, 4
 ; CHECK: stw 6, 16(1)
+; CHECK: stw 7, 20(1)
 ; CHECK: stw 5, 12(1)
 ; CHECK: stw 4, 8(1)
-; CHECK: stw 7, 20(1)
 
 declare i32 @printf(i8* nocapture readonly, ...)
 
diff --git a/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll
index c9d9cf870e49fc..b87f1a682e25aa 100644
--- a/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll
@@ -1442,19 +1442,19 @@ define void @test_constrained_libcall_multichain(float* %firstptr, ppc_fp128* %r
 ; PC64-NEXT:    mr 29, 3
 ; PC64-NEXT:    li 3, 0
 ; PC64-NEXT:    stfd 31, 168(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 30, 160(1) # 8-byte Folded Spill
 ; PC64-NEXT:    std 30, 128(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 28, 144(1) # 8-byte Folded Spill
-; PC64-NEXT:    stfd 29, 152(1) # 8-byte Folded Spill
 ; PC64-NEXT:    mr 30, 4
 ; PC64-NEXT:    lfs 31, 0(29)
 ; PC64-NEXT:    std 3, 8(4)
 ; PC64-NEXT:    addis 3, 2, .LCPI32_0@toc@ha
+; PC64-NEXT:    stfd 30, 160(1) # 8-byte Folded Spill
 ; PC64-NEXT:    lfs 30, .LCPI32_0@toc@l(3)
 ; PC64-NEXT:    fmr 1, 31
 ; PC64-NEXT:    fmr 3, 31
+; PC64-NEXT:    stfd 28, 144(1) # 8-byte Folded Spill
 ; PC64-NEXT:    fmr 2, 30
 ; PC64-NEXT:    fmr 4, 30
+; PC64-NEXT:    stfd 29, 152(1) # 8-byte Folded Spill
 ; PC64-NEXT:    stfd 31, 0(4)
 ; PC64-NEXT:    bl __gcc_qadd
 ; PC64-NEXT:    nop
@@ -1475,14 +1475,14 @@ define void @test_constrained_libcall_multichain(float* %firstptr, ppc_fp128* %r
 ; PC64-NEXT:    nop
 ; PC64-NEXT:    frsp 0, 1
 ; PC64-NEXT:    stfs 0, 0(29)
-; PC64-NEXT:    lfd 31, 168(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 30, 160(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 29, 152(1) # 8-byte Folded Reload
-; PC64-NEXT:    lfd 28, 144(1) # 8-byte Folded Reload
 ; PC64-NEXT:    ld 29, 120(1) # 8-byte Folded Reload
 ; PC64-NEXT:    stfd 1, -16(30)
 ; PC64-NEXT:    stfd 2, -8(30)
 ; PC64-NEXT:    ld 30, 128(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 31, 168(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 30, 160(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 29, 152(1) # 8-byte Folded Reload
+; PC64-NEXT:    lfd 28, 144(1) # 8-byte Folded Reload
 ; PC64-NEXT:    addi 1, 1, 176
 ; PC64-NEXT:    ld 0, 16(1)
 ; PC64-NEXT:    mtlr 0
diff --git a/llvm/test/CodeGen/PowerPC/pr43976.ll b/llvm/test/CodeGen/PowerPC/pr43976.ll
index 91722283f4ae01..9dc1a52c567f57 100644
--- a/llvm/test/CodeGen/PowerPC/pr43976.ll
+++ b/llvm/test/CodeGen/PowerPC/pr43976.ll
@@ -10,11 +10,11 @@ define dso_local signext i32 @b() local_unnamed_addr #0 {
 ; CHECK-NEXT:    std r0, 16(r1)
 ; CHECK-NEXT:    stdu r1, -144(r1)
 ; CHECK-NEXT:    addis r3, r2, a@toc@ha
-; CHECK-NEXT:    addis r4, r2, .LCPI0_0@toc@ha
-; CHECK-NEXT:    lfd f0, a@toc@l(r3)
-; CHECK-NEXT:    lfs f1, .LCPI0_0@toc@l(r4)
 ; CHECK-NEXT:    li r4, 1
+; CHECK-NEXT:    lfd f0, a@toc@l(r3)
+; CHECK-NEXT:    addis r3, r2, .LCPI0_0@toc@ha
 ; CHECK-NEXT:    sldi r4, r4, 63
+; CHECK-NEXT:    lfs f1, .LCPI0_0@toc@l(r3)
 ; CHECK-NEXT:    fsub f2, f0, f1
 ; CHECK-NEXT:    fctidz f2, f2
 ; CHECK-NEXT:    stfd f2, 128(r1)
diff --git a/llvm/test/CodeGen/PowerPC/spe.ll b/llvm/test/CodeGen/PowerPC/spe.ll
index d2400be43cb490..1c4c7a33981770 100644
--- a/llvm/test/CodeGen/PowerPC/spe.ll
+++ b/llvm/test/CodeGen/PowerPC/spe.ll
@@ -1297,6 +1297,8 @@ define double @test_spill(double %a, i32 %a1, i64 %a2, i8 * %a3, i32 *%a4, i32*
 ; CHECK-NEXT:    evlddx 31, 1, 5 # 8-byte Folded Reload
 ; CHECK-NEXT:    li 5, 256
 ; CHECK-NEXT:    evlddx 30, 1, 5 # 8-byte Folded Reload
+; CHECK-NEXT:    # kill: def $r3 killed $r3 killed $s3
+; CHECK-NEXT:    # kill: def $r4 killed $r4 killed $s4
 ; CHECK-NEXT:    evldd 29, 248(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    evldd 28, 240(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    evldd 27, 232(1) # 8-byte Folded Reload
@@ -1313,8 +1315,6 @@ define double @test_spill(double %a, i32 %a1, i64 %a2, i8 * %a3, i32 *%a4, i32*
 ; CHECK-NEXT:    evldd 16, 144(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    evldd 15, 136(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    evldd 14, 128(1) # 8-byte Folded Reload
-; CHECK-NEXT:    # kill: def $r3 killed $r3 killed $s3
-; CHECK-NEXT:    # kill: def $r4 killed $r4 killed $s4
 ; CHECK-NEXT:    lwz 31, 348(1) # 4-byte Folded Reload
 ; CHECK-NEXT:    lwz 30, 344(1) # 4-byte Folded Reload
 ; CHECK-NEXT:    lwz 29, 340(1) # 4-byte Folded Reload
@@ -1392,8 +1392,8 @@ define dso_local float @test_fma(i32 %d) local_unnamed_addr #0 {
 ; CHECK-NEXT:    # implicit-def: $r5
 ; CHECK-NEXT:  .LBB57_4: # %for.cond.cleanup
 ; CHECK-NEXT:    evldd 30, 16(1) # 8-byte Folded Reload
-; CHECK-NEXT:    evldd 29, 8(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    mr 3, 5
+; CHECK-NEXT:    evldd 29, 8(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    lwz 30, 40(1) # 4-byte Folded Reload
 ; CHECK-NEXT:    lwz 29, 36(1) # 4-byte Folded Reload
 ; CHECK-NEXT:    lwz 0, 52(1)
diff --git a/llvm/test/CodeGen/PowerPC/sub-of-not.ll b/llvm/test/CodeGen/PowerPC/sub-of-not.ll
index db92a3eb1bee95..d2b55aaf7ac839 100644
--- a/llvm/test/CodeGen/PowerPC/sub-of-not.ll
+++ b/llvm/test/CodeGen/PowerPC/sub-of-not.ll
@@ -65,88 +65,88 @@ define <16 x i8> @vector_i128_i8(<16 x i8> %x, <16 x i8> %y) nounwind {
 ; PPC32:       # %bb.0:
 ; PPC32-NEXT:    stwu 1, -64(1)
 ; PPC32-NEXT:    stw 21, 20(1) # 4-byte Folded Spill
-; PPC32-NEXT:    lbz 4, 119(1)
-; PPC32-NEXT:    lbz 11, 115(1)
-; PPC32-NEXT:    stw 24, 32(1) # 4-byte Folded Spill
-; PPC32-NEXT:    stw 23, 28(1) # 4-byte Folded Spill
-; PPC32-NEXT:    add 4, 4, 6
 ; PPC32-NEXT:    lbz 21, 123(1)
-; PPC32-NEXT:    lbz 6, 131(1)
-; PPC32-NEXT:    add 5, 11, 5
-; PPC32-NEXT:    lbz 11, 127(1)
+; PPC32-NEXT:    stw 22, 24(1) # 4-byte Folded Spill
+; PPC32-NEXT:    stw 23, 28(1) # 4-byte Folded Spill
 ; PPC32-NEXT:    add 7, 21, 7
+; PPC32-NEXT:    lbz 23, 115(1)
+; PPC32-NEXT:    lbz 22, 119(1)
 ; PPC32-NEXT:    lbz 21, 135(1)
-; PPC32-NEXT:    lbz 24, 83(1)
-; PPC32-NEXT:    lbz 23, 79(1)
-; PPC32-NEXT:    add 6, 6, 9
+; PPC32-NEXT:    add 5, 23, 5
+; PPC32-NEXT:    lbz 23, 127(1)
+; PPC32-NEXT:    add 6, 22, 6
+; PPC32-NEXT:    lbz 22, 131(1)
 ; PPC32-NEXT:    add 10, 21, 10
-; PPC32-NEXT:    lbz 21, 147(1)
-; PPC32-NEXT:    lbz 9, 143(1)
-; PPC32-NEXT:    stw 22, 24(1) # 4-byte Folded Spill
-; PPC32-NEXT:    add 8, 11, 8
-; PPC32-NEXT:    lbz 22, 75(1)
-; PPC32-NEXT:    lbz 11, 139(1)
-; PPC32-NEXT:    stw 27, 44(1) # 4-byte Folded Spill
-; PPC32-NEXT:    add 24, 21, 24
-; PPC32-NEXT:    lbz 27, 95(1)
-; PPC32-NEXT:    lbz 21, 159(1)
 ; PPC32-NEXT:    stw 26, 40(1) # 4-byte Folded Spill
-; PPC32-NEXT:    add 9, 9, 23
-; PPC32-NEXT:    lbz 26, 91(1)
-; PPC32-NEXT:    lbz 23, 155(1)
+; PPC32-NEXT:    add 8, 23, 8
+; PPC32-NEXT:    lbz 26, 83(1)
+; PPC32-NEXT:    add 9, 22, 9
+; PPC32-NEXT:    lbz 21, 147(1)
+; PPC32-NEXT:    stw 24, 32(1) # 4-byte Folded Spill
 ; PPC32-NEXT:    stw 25, 36(1) # 4-byte Folded Spill
-; PPC32-NEXT:    add 11, 11, 22
-; PPC32-NEXT:    lbz 25, 87(1)
-; PPC32-NEXT:    lbz 22, 151(1)
-; PPC32-NEXT:    lbz 12, 111(1)
-; PPC32-NEXT:    add 27, 21, 27
-; PPC32-NEXT:    lbz 21, 175(1)
-; PPC32-NEXT:    stw 30, 56(1) # 4-byte Folded Spill
-; PPC32-NEXT:    lbz 0, 107(1)
+; PPC32-NEXT:    add 26, 21, 26
+; PPC32-NEXT:    lbz 25, 79(1)
+; PPC32-NEXT:    lbz 24, 75(1)
+; PPC32-NEXT:    lbz 23, 139(1)
+; PPC32-NEXT:    lbz 22, 143(1)
 ; PPC32-NEXT:    stw 29, 52(1) # 4-byte Folded Spill
-; PPC32-NEXT:    add 26, 23, 26
-; PPC32-NEXT:    lbz 30, 171(1)
-; PPC32-NEXT:    lbz 29, 103(1)
-; PPC32-NEXT:    lbz 23, 167(1)
-; PPC32-NEXT:    stw 28, 48(1) # 4-byte Folded Spill
+; PPC32-NEXT:    add 24, 23, 24
+; PPC32-NEXT:    lbz 29, 95(1)
 ; PPC32-NEXT:    add 25, 22, 25
-; PPC32-NEXT:    lbz 28, 99(1)
-; PPC32-NEXT:    lbz 22, 163(1)
-; PPC32-NEXT:    add 12, 21, 12
-; PPC32-NEXT:    add 30, 30, 0
-; PPC32-NEXT:    addi 12, 12, 1
-; PPC32-NEXT:    add 29, 23, 29
-; PPC32-NEXT:    stb 12, 15(3)
-; PPC32-NEXT:    addi 12, 30, 1
+; PPC32-NEXT:    lbz 21, 159(1)
+; PPC32-NEXT:    stw 27, 44(1) # 4-byte Folded Spill
+; PPC32-NEXT:    stw 28, 48(1) # 4-byte Folded Spill
+; PPC32-NEXT:    add 29, 21, 29
+; PPC32-NEXT:    lbz 28, 91(1)
+; PPC32-NEXT:    lbz 27, 87(1)
+; PPC32-NEXT:    lbz 23, 151(1)
+; PPC32-NEXT:    lbz 22, 155(1)
+; PPC32-NEXT:    lbz 4, 111(1)
+; PPC32-NEXT:    add 27, 23, 27
+; PPC32-NEXT:    lbz 21, 175(1)
 ; PPC32-NEXT:    add 28, 22, 28
-; PPC32-NEXT:    stb 12, 14(3)
-; PPC32-NEXT:    addi 12, 29, 1
-; PPC32-NEXT:    stb 12, 13(3)
-; PPC32-NEXT:    addi 12, 28, 1
-; PPC32-NEXT:    stb 12, 12(3)
-; PPC32-NEXT:    addi 12, 27, 1
-; PPC32-NEXT:    stb 12, 11(3)
-; PPC32-NEXT:    addi 12, 26, 1
-; PPC32-NEXT:    addi 9, 9, 1
-; PPC32-NEXT:    addi 6, 6, 1
-; PPC32-NEXT:    stb 12, 10(3)
-; PPC32-NEXT:    addi 12, 25, 1
-; PPC32-NEXT:    stb 9, 7(3)
-; PPC32-NEXT:    addi 9, 11, 1
-; PPC32-NEXT:    stb 6, 4(3)
-; PPC32-NEXT:    addi 6, 8, 1
+; PPC32-NEXT:    lbz 11, 107(1)
+; PPC32-NEXT:    lbz 12, 171(1)
+; PPC32-NEXT:    add 4, 21, 4
+; PPC32-NEXT:    stw 30, 56(1) # 4-byte Folded Spill
 ; PPC32-NEXT:    addi 4, 4, 1
-; PPC32-NEXT:    stb 12, 9(3)
-; PPC32-NEXT:    addi 12, 24, 1
-; PPC32-NEXT:    stb 9, 6(3)
-; PPC32-NEXT:    addi 9, 10, 1
-; PPC32-NEXT:    stb 6, 3(3)
-; PPC32-NEXT:    addi 6, 7, 1
+; PPC32-NEXT:    lbz 0, 103(1)
+; PPC32-NEXT:    add 11, 12, 11
+; PPC32-NEXT:    lbz 30, 99(1)
+; PPC32-NEXT:    lbz 23, 163(1)
+; PPC32-NEXT:    lbz 22, 167(1)
+; PPC32-NEXT:    add 30, 23, 30
+; PPC32-NEXT:    stb 4, 15(3)
+; PPC32-NEXT:    add 23, 22, 0
+; PPC32-NEXT:    addi 4, 11, 1
+; PPC32-NEXT:    stb 4, 14(3)
+; PPC32-NEXT:    addi 4, 23, 1
+; PPC32-NEXT:    stb 4, 13(3)
+; PPC32-NEXT:    addi 4, 30, 1
+; PPC32-NEXT:    stb 4, 12(3)
+; PPC32-NEXT:    addi 4, 29, 1
+; PPC32-NEXT:    stb 4, 11(3)
+; PPC32-NEXT:    addi 4, 28, 1
+; PPC32-NEXT:    stb 4, 10(3)
+; PPC32-NEXT:    addi 4, 27, 1
+; PPC32-NEXT:    stb 4, 9(3)
+; PPC32-NEXT:    addi 4, 26, 1
+; PPC32-NEXT:    stb 4, 8(3)
+; PPC32-NEXT:    addi 4, 25, 1
+; PPC32-NEXT:    stb 4, 7(3)
+; PPC32-NEXT:    addi 4, 24, 1
+; PPC32-NEXT:    stb 4, 6(3)
+; PPC32-NEXT:    addi 4, 10, 1
+; PPC32-NEXT:    stb 4, 5(3)
+; PPC32-NEXT:    addi 4, 9, 1
+; PPC32-NEXT:    stb 4, 4(3)
+; PPC32-NEXT:    addi 4, 8, 1
+; PPC32-NEXT:    stb 4, 3(3)
+; PPC32-NEXT:    addi 4, 7, 1
+; PPC32-NEXT:    stb 4, 2(3)
+; PPC32-NEXT:    addi 4, 6, 1
 ; PPC32-NEXT:    stb 4, 1(3)
 ; PPC32-NEXT:    addi 4, 5, 1
-; PPC32-NEXT:    stb 12, 8(3)
-; PPC32-NEXT:    stb 9, 5(3)
-; PPC32-NEXT:    stb 6, 2(3)
 ; PPC32-NEXT:    stb 4, 0(3)
 ; PPC32-NEXT:    lwz 30, 56(1) # 4-byte Folded Reload
 ; PPC32-NEXT:    lwz 29, 52(1) # 4-byte Folded Reload
@@ -165,79 +165,79 @@ define <16 x i8> @vector_i128_i8(<16 x i8> %x, <16 x i8> %y) nounwind {
 ; PPC64BE:       # %bb.0:
 ; PPC64BE-NEXT:    std 21, -88(1) # 8-byte Folded Spill
 ; PPC64BE-NEXT:    lbz 21, 207(1)
-; PPC64BE-NEXT:    lbz 11, 199(1)
-; PPC64BE-NEXT:    lbz 12, 191(1)
-; PPC64BE-NEXT:    std 23, -72(1) # 8-byte Folded Spill
 ; PPC64BE-NEXT:    std 22, -80(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    std 26, -48(1) # 8-byte Folded Spill
+; PPC64BE-NEXT:    std 23, -72(1) # 8-byte Folded Spill
 ; PPC64BE-NEXT:    std 25, -56(1) # 8-byte Folded Spill
 ; PPC64BE-NEXT:    std 24, -64(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    std 29, -24(1) # 8-byte Folded Spill
+; PPC64BE-NEXT:    std 28, -32(1) # 8-byte Folded Spill
+; PPC64BE-NEXT:    std 27, -40(1) # 8-byte Folded Spill
+; PPC64BE-NEXT:    std 26, -48(1) # 8-byte Folded Spill
 ; PPC64BE-NEXT:    std 30, -16(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    lbz 0, 183(1)
+; PPC64BE-NEXT:    std 29, -24(1) # 8-byte Folded Spill
+; PPC64BE-NEXT:    lbz 22, 199(1)
+; PPC64BE-NEXT:    lbz 23, 191(1)
 ; PPC64BE-NEXT:    add 6, 21, 6
 ; PPC64BE-NEXT:    lbz 21, 231(1)
-; PPC64BE-NEXT:    add 5, 11, 5
-; PPC64BE-NEXT:    lbz 11, 223(1)
-; PPC64BE-NEXT:    add 4, 12, 4
-; PPC64BE-NEXT:    lbz 12, 215(1)
-; PPC64BE-NEXT:    lbz 23, 127(1)
+; PPC64BE-NEXT:    add 5, 22, 5
+; PPC64BE-NEXT:    lbz 22, 223(1)
+; PPC64BE-NEXT:    add 4, 23, 4
+; PPC64BE-NEXT:    lbz 23, 215(1)
 ; PPC64BE-NEXT:    add 9, 21, 9
+; PPC64BE-NEXT:    lbz 25, 127(1)
+; PPC64BE-NEXT:    add 8, 22, 8
 ; PPC64BE-NEXT:    lbz 21, 255(1)
-; PPC64BE-NEXT:    lbz 22, 119(1)
-; PPC64BE-NEXT:    add 8, 11, 8
-; PPC64BE-NEXT:    lbz 11, 247(1)
-; PPC64BE-NEXT:    add 7, 12, 7
-; PPC64BE-NEXT:    lbz 12, 239(1)
-; PPC64BE-NEXT:    lbz 26, 151(1)
-; PPC64BE-NEXT:    add 23, 21, 23
-; PPC64BE-NEXT:    lbz 21, 279(1)
-; PPC64BE-NEXT:    lbz 25, 143(1)
-; PPC64BE-NEXT:    add 11, 11, 22
-; PPC64BE-NEXT:    lbz 22, 271(1)
-; PPC64BE-NEXT:    lbz 24, 135(1)
-; PPC64BE-NEXT:    add 10, 12, 10
-; PPC64BE-NEXT:    lbz 12, 263(1)
-; PPC64BE-NEXT:    lbz 30, 175(1)
-; PPC64BE-NEXT:    lbz 29, 303(1)
-; PPC64BE-NEXT:    add 26, 21, 26
-; PPC64BE-NEXT:    lbz 21, 311(1)
-; PPC64BE-NEXT:    std 28, -32(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    add 25, 22, 25
-; PPC64BE-NEXT:    lbz 28, 167(1)
-; PPC64BE-NEXT:    lbz 22, 295(1)
-; PPC64BE-NEXT:    std 27, -40(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    add 12, 12, 24
-; PPC64BE-NEXT:    lbz 27, 159(1)
-; PPC64BE-NEXT:    lbz 24, 287(1)
-; PPC64BE-NEXT:    add 30, 29, 30
-; PPC64BE-NEXT:    add 29, 21, 0
-; PPC64BE-NEXT:    addi 0, 29, 1
-; PPC64BE-NEXT:    add 28, 22, 28
-; PPC64BE-NEXT:    stb 0, 15(3)
-; PPC64BE-NEXT:    addi 0, 30, 1
-; PPC64BE-NEXT:    add 27, 24, 27
-; PPC64BE-NEXT:    stb 0, 14(3)
-; PPC64BE-NEXT:    addi 0, 28, 1
-; PPC64BE-NEXT:    stb 0, 13(3)
-; PPC64BE-NEXT:    addi 0, 27, 1
-; PPC64BE-NEXT:    stb 0, 12(3)
-; PPC64BE-NEXT:    addi 0, 26, 1
-; PPC64BE-NEXT:    addi 12, 12, 1
-; PPC64BE-NEXT:    stb 0, 11(3)
-; PPC64BE-NEXT:    addi 0, 25, 1
-; PPC64BE-NEXT:    stb 12, 9(3)
-; PPC64BE-NEXT:    addi 12, 23, 1
-; PPC64BE-NEXT:    addi 11, 11, 1
-; PPC64BE-NEXT:    addi 10, 10, 1
+; PPC64BE-NEXT:    add 7, 23, 7
+; PPC64BE-NEXT:    lbz 24, 119(1)
 ; PPC64BE-NEXT:    addi 9, 9, 1
+; PPC64BE-NEXT:    lbz 22, 247(1)
+; PPC64BE-NEXT:    add 25, 21, 25
+; PPC64BE-NEXT:    lbz 23, 239(1)
 ; PPC64BE-NEXT:    addi 8, 8, 1
+; PPC64BE-NEXT:    lbz 28, 151(1)
+; PPC64BE-NEXT:    add 24, 22, 24
+; PPC64BE-NEXT:    lbz 21, 279(1)
+; PPC64BE-NEXT:    add 10, 23, 10
+; PPC64BE-NEXT:    lbz 27, 143(1)
+; PPC64BE-NEXT:    addi 10, 10, 1
+; PPC64BE-NEXT:    lbz 22, 271(1)
+; PPC64BE-NEXT:    add 28, 21, 28
+; PPC64BE-NEXT:    lbz 26, 135(1)
 ; PPC64BE-NEXT:    addi 7, 7, 1
+; PPC64BE-NEXT:    lbz 23, 263(1)
+; PPC64BE-NEXT:    add 27, 22, 27
+; PPC64BE-NEXT:    lbz 11, 183(1)
 ; PPC64BE-NEXT:    addi 6, 6, 1
+; PPC64BE-NEXT:    lbz 21, 311(1)
+; PPC64BE-NEXT:    add 26, 23, 26
+; PPC64BE-NEXT:    lbz 12, 175(1)
 ; PPC64BE-NEXT:    addi 5, 5, 1
+; PPC64BE-NEXT:    lbz 0, 303(1)
+; PPC64BE-NEXT:    add 11, 21, 11
+; PPC64BE-NEXT:    lbz 30, 167(1)
+; PPC64BE-NEXT:    addi 11, 11, 1
+; PPC64BE-NEXT:    lbz 22, 295(1)
+; PPC64BE-NEXT:    add 12, 0, 12
+; PPC64BE-NEXT:    lbz 29, 159(1)
 ; PPC64BE-NEXT:    addi 4, 4, 1
-; PPC64BE-NEXT:    stb 0, 10(3)
-; PPC64BE-NEXT:    stb 12, 8(3)
+; PPC64BE-NEXT:    lbz 23, 287(1)
+; PPC64BE-NEXT:    add 30, 22, 30
+; PPC64BE-NEXT:    stb 11, 15(3)
+; PPC64BE-NEXT:    addi 11, 12, 1
+; PPC64BE-NEXT:    add 29, 23, 29
+; PPC64BE-NEXT:    stb 11, 14(3)
+; PPC64BE-NEXT:    addi 11, 30, 1
+; PPC64BE-NEXT:    stb 11, 13(3)
+; PPC64BE-NEXT:    addi 11, 29, 1
+; PPC64BE-NEXT:    stb 11, 12(3)
+; PPC64BE-NEXT:    addi 11, 28, 1
+; PPC64BE-NEXT:    stb 11, 11(3)
+; PPC64BE-NEXT:    addi 11, 27, 1
+; PPC64BE-NEXT:    stb 11, 10(3)
+; PPC64BE-NEXT:    addi 11, 26, 1
+; PPC64BE-NEXT:    stb 11, 9(3)
+; PPC64BE-NEXT:    addi 11, 25, 1
+; PPC64BE-NEXT:    stb 11, 8(3)
+; PPC64BE-NEXT:    addi 11, 24, 1
 ; PPC64BE-NEXT:    stb 11, 7(3)
 ; PPC64BE-NEXT:    stb 10, 6(3)
 ; PPC64BE-NEXT:    stb 9, 5(3)
@@ -277,23 +277,23 @@ define <8 x i16> @vector_i128_i16(<8 x i16> %x, <8 x i16> %y) nounwind {
 ; PPC32-NEXT:    stw 28, 16(1) # 4-byte Folded Spill
 ; PPC32-NEXT:    stw 29, 20(1) # 4-byte Folded Spill
 ; PPC32-NEXT:    stw 30, 24(1) # 4-byte Folded Spill
-; PPC32-NEXT:    lhz 11, 50(1)
-; PPC32-NEXT:    lhz 12, 46(1)
-; PPC32-NEXT:    lhz 0, 42(1)
-; PPC32-NEXT:    lhz 30, 70(1)
-; PPC32-NEXT:    lhz 29, 66(1)
-; PPC32-NEXT:    lhz 28, 62(1)
-; PPC32-NEXT:    lhz 27, 58(1)
+; PPC32-NEXT:    lhz 11, 70(1)
+; PPC32-NEXT:    lhz 12, 66(1)
+; PPC32-NEXT:    lhz 0, 62(1)
+; PPC32-NEXT:    add 10, 11, 10
+; PPC32-NEXT:    lhz 30, 58(1)
+; PPC32-NEXT:    add 9, 12, 9
+; PPC32-NEXT:    lhz 29, 50(1)
+; PPC32-NEXT:    add 8, 0, 8
+; PPC32-NEXT:    lhz 28, 42(1)
+; PPC32-NEXT:    add 7, 30, 7
+; PPC32-NEXT:    lhz 27, 46(1)
+; PPC32-NEXT:    add 5, 29, 5
 ; PPC32-NEXT:    lhz 26, 54(1)
-; PPC32-NEXT:    add 3, 0, 3
-; PPC32-NEXT:    add 4, 12, 4
-; PPC32-NEXT:    add 5, 11, 5
-; PPC32-NEXT:    add 6, 26, 6
-; PPC32-NEXT:    add 7, 27, 7
-; PPC32-NEXT:    add 8, 28, 8
-; PPC32-NEXT:    add 9, 29, 9
-; PPC32-NEXT:    add 10, 30, 10
+; PPC32-NEXT:    add 3, 28, 3
+; PPC32-NEXT:    add 4, 27, 4
 ; PPC32-NEXT:    addi 3, 3, 1
+; PPC32-NEXT:    add 6, 26, 6
 ; PPC32-NEXT:    addi 4, 4, 1
 ; PPC32-NEXT:    addi 5, 5, 1
 ; PPC32-NEXT:    addi 6, 6, 1
@@ -317,31 +317,31 @@ define <8 x i16> @vector_i128_i16(<8 x i16> %x, <8 x i16> %y) nounwind {
 ; PPC64BE-NEXT:    std 28, -32(1) # 8-byte Folded Spill
 ; PPC64BE-NEXT:    std 29, -24(1) # 8-byte Folded Spill
 ; PPC64BE-NEXT:    std 30, -16(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    lhz 11, 142(1)
-; PPC64BE-NEXT:    lhz 12, 134(1)
-; PPC64BE-NEXT:    lhz 0, 126(1)
-; PPC64BE-NEXT:    lhz 30, 118(1)
-; PPC64BE-NEXT:    lhz 29, 182(1)
-; PPC64BE-NEXT:    lhz 28, 174(1)
-; PPC64BE-NEXT:    lhz 27, 166(1)
-; PPC64BE-NEXT:    lhz 26, 158(1)
+; PPC64BE-NEXT:    lhz 11, 118(1)
+; PPC64BE-NEXT:    lhz 12, 182(1)
+; PPC64BE-NEXT:    lhz 0, 174(1)
+; PPC64BE-NEXT:    lhz 30, 166(1)
+; PPC64BE-NEXT:    add 11, 12, 11
+; PPC64BE-NEXT:    lhz 29, 158(1)
+; PPC64BE-NEXT:    add 10, 0, 10
+; PPC64BE-NEXT:    lhz 28, 142(1)
+; PPC64BE-NEXT:    add 9, 30, 9
+; PPC64BE-NEXT:    lhz 27, 126(1)
+; PPC64BE-NEXT:    add 8, 29, 8
+; PPC64BE-NEXT:    lhz 26, 134(1)
+; PPC64BE-NEXT:    add 6, 28, 6
 ; PPC64BE-NEXT:    lhz 25, 150(1)
-; PPC64BE-NEXT:    add 4, 0, 4
-; PPC64BE-NEXT:    add 5, 12, 5
-; PPC64BE-NEXT:    add 6, 11, 6
+; PPC64BE-NEXT:    add 4, 27, 4
+; PPC64BE-NEXT:    add 5, 26, 5
+; PPC64BE-NEXT:    addi 11, 11, 1
 ; PPC64BE-NEXT:    add 7, 25, 7
-; PPC64BE-NEXT:    add 8, 26, 8
-; PPC64BE-NEXT:    add 9, 27, 9
-; PPC64BE-NEXT:    add 10, 28, 10
-; PPC64BE-NEXT:    add 11, 29, 30
-; PPC64BE-NEXT:    addi 4, 4, 1
-; PPC64BE-NEXT:    addi 5, 5, 1
-; PPC64BE-NEXT:    addi 6, 6, 1
-; PPC64BE-NEXT:    addi 7, 7, 1
-; PPC64BE-NEXT:    addi 8, 8, 1
-; PPC64BE-NEXT:    addi 9, 9, 1
 ; PPC64BE-NEXT:    addi 10, 10, 1
-; PPC64BE-NEXT:    addi 11, 11, 1
+; PPC64BE-NEXT:    addi 9, 9, 1
+; PPC64BE-NEXT:    addi 8, 8, 1
+; PPC64BE-NEXT:    addi 7, 7, 1
+; PPC64BE-NEXT:    addi 6, 6, 1
+; PPC64BE-NEXT:    addi 5, 5, 1
+; PPC64BE-NEXT:    addi 4, 4, 1
 ; PPC64BE-NEXT:    sth 11, 14(3)
 ; PPC64BE-NEXT:    sth 10, 12(3)
 ; PPC64BE-NEXT:    sth 9, 10(3)
diff --git a/llvm/test/CodeGen/PowerPC/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/PowerPC/umulo-128-legalisation-lowering.ll
index c0a8a76c7f1af2..815d5b7443e430 100644
--- a/llvm/test/CodeGen/PowerPC/umulo-128-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/PowerPC/umulo-128-legalisation-lowering.ll
@@ -5,23 +5,23 @@
 define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
 ; PPC64-LABEL: muloti_test:
 ; PPC64:       # %bb.0: # %start
+; PPC64-NEXT:    mulhdu. 8, 3, 6
+; PPC64-NEXT:    mcrf 1, 0
 ; PPC64-NEXT:    mulld 8, 5, 4
-; PPC64-NEXT:    cmpdi 5, 3, 0
-; PPC64-NEXT:    mulhdu. 9, 3, 6
+; PPC64-NEXT:    cmpdi 3, 0
 ; PPC64-NEXT:    mulld 3, 3, 6
-; PPC64-NEXT:    mcrf 1, 0
+; PPC64-NEXT:    cmpdi 5, 5, 0
 ; PPC64-NEXT:    add 3, 3, 8
-; PPC64-NEXT:    cmpdi 5, 0
-; PPC64-NEXT:    crnor 20, 2, 22
-; PPC64-NEXT:    cmpldi 3, 0
+; PPC64-NEXT:    crnor 20, 22, 2
 ; PPC64-NEXT:    mulhdu 8, 4, 6
+; PPC64-NEXT:    cmpldi 3, 0
 ; PPC64-NEXT:    add 3, 8, 3
 ; PPC64-NEXT:    cmpld 6, 3, 8
 ; PPC64-NEXT:    crandc 21, 24, 2
 ; PPC64-NEXT:    crorc 20, 20, 6
-; PPC64-NEXT:    li 7, 1
 ; PPC64-NEXT:    mulhdu. 5, 5, 4
 ; PPC64-NEXT:    crorc 20, 20, 2
+; PPC64-NEXT:    li 7, 1
 ; PPC64-NEXT:    crnor 20, 20, 21
 ; PPC64-NEXT:    mulld 4, 4, 6
 ; PPC64-NEXT:    bc 12, 20, .LBB0_2
@@ -38,13 +38,13 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
 ; PPC32-NEXT:    stw 0, 4(1)
 ; PPC32-NEXT:    stwu 1, -80(1)
 ; PPC32-NEXT:    stw 26, 56(1) # 4-byte Folded Spill
+; PPC32-NEXT:    mfcr 12
 ; PPC32-NEXT:    stw 27, 60(1) # 4-byte Folded Spill
+; PPC32-NEXT:    mr 27, 4
 ; PPC32-NEXT:    stw 29, 68(1) # 4-byte Folded Spill
+; PPC32-NEXT:    mr 29, 7
 ; PPC32-NEXT:    stw 30, 72(1) # 4-byte Folded Spill
-; PPC32-NEXT:    mfcr 12
 ; PPC32-NEXT:    mr 30, 8
-; PPC32-NEXT:    mr 29, 7
-; PPC32-NEXT:    mr 27, 4
 ; PPC32-NEXT:    mr 26, 3
 ; PPC32-NEXT:    li 3, 0
 ; PPC32-NEXT:    li 4, 0
@@ -54,30 +54,36 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
 ; PPC32-NEXT:    stw 21, 36(1) # 4-byte Folded Spill
 ; PPC32-NEXT:    stw 22, 40(1) # 4-byte Folded Spill
 ; PPC32-NEXT:    stw 23, 44(1) # 4-byte Folded Spill
+; PPC32-NEXT:    mr 23, 6
 ; PPC32-NEXT:    stw 24, 48(1) # 4-byte Folded Spill
+; PPC32-NEXT:    mr 24, 5
 ; PPC32-NEXT:    stw 25, 52(1) # 4-byte Folded Spill
-; PPC32-NEXT:    stw 28, 64(1) # 4-byte Folded Spill
 ; PPC32-NEXT:    mr 25, 10
-; PPC32-NEXT:    stw 12, 28(1)
+; PPC32-NEXT:    stw 28, 64(1) # 4-byte Folded Spill
 ; PPC32-NEXT:    mr 28, 9
-; PPC32-NEXT:    mr 23, 6
-; PPC32-NEXT:    mr 24, 5
+; PPC32-NEXT:    stw 12, 28(1)
 ; PPC32-NEXT:    bl __multi3
 ; PPC32-NEXT:    mr 7, 4
 ; PPC32-NEXT:    mullw 4, 24, 30
+; PPC32-NEXT:    cmpwi 5, 24, 0
+; PPC32-NEXT:    cmpwi 6, 26, 0
+; PPC32-NEXT:    cmpwi 7, 28, 0
+; PPC32-NEXT:    crnor 9, 30, 26
 ; PPC32-NEXT:    mullw 8, 29, 23
-; PPC32-NEXT:    mullw 10, 28, 27
-; PPC32-NEXT:    mullw 11, 26, 25
+; PPC32-NEXT:    add 21, 8, 4
+; PPC32-NEXT:    mullw 11, 28, 27
+; PPC32-NEXT:    mullw 12, 26, 25
+; PPC32-NEXT:    add 11, 12, 11
+; PPC32-NEXT:    cmplwi 7, 11, 0
 ; PPC32-NEXT:    mulhwu 9, 30, 23
-; PPC32-NEXT:    mulhwu 12, 27, 25
+; PPC32-NEXT:    add 12, 9, 21
+; PPC32-NEXT:    cmplw 6, 12, 9
+; PPC32-NEXT:    mulhwu 10, 27, 25
 ; PPC32-NEXT:    mullw 0, 30, 23
 ; PPC32-NEXT:    mullw 22, 27, 25
-; PPC32-NEXT:    add 21, 8, 4
-; PPC32-NEXT:    add 10, 11, 10
 ; PPC32-NEXT:    addc 4, 22, 0
-; PPC32-NEXT:    add 11, 9, 21
-; PPC32-NEXT:    add 0, 12, 10
-; PPC32-NEXT:    adde 8, 0, 11
+; PPC32-NEXT:    add 0, 10, 11
+; PPC32-NEXT:    adde 8, 0, 12
 ; PPC32-NEXT:    addc 4, 7, 4
 ; PPC32-NEXT:    adde 8, 3, 8
 ; PPC32-NEXT:    xor 22, 4, 7
@@ -85,21 +91,15 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
 ; PPC32-NEXT:    or. 22, 22, 20
 ; PPC32-NEXT:    mcrf 1, 0
 ; PPC32-NEXT:    cmpwi 29, 0
-; PPC32-NEXT:    cmpwi 5, 24, 0
-; PPC32-NEXT:    cmpwi 6, 26, 0
-; PPC32-NEXT:    cmpwi 7, 28, 0
 ; PPC32-NEXT:    crnor 8, 22, 2
 ; PPC32-NEXT:    mulhwu. 23, 29, 23
-; PPC32-NEXT:    crnor 9, 30, 26
 ; PPC32-NEXT:    mcrf 5, 0
 ; PPC32-NEXT:    cmplwi 21, 0
-; PPC32-NEXT:    cmplw 6, 11, 9
-; PPC32-NEXT:    cmplwi 7, 10, 0
 ; PPC32-NEXT:    crandc 10, 24, 2
-; PPC32-NEXT:    cmplw 3, 0, 12
+; PPC32-NEXT:    cmplw 3, 0, 10
+; PPC32-NEXT:    crandc 11, 12, 30
 ; PPC32-NEXT:    mulhwu. 9, 24, 30
 ; PPC32-NEXT:    mcrf 6, 0
-; PPC32-NEXT:    crandc 11, 12, 30
 ; PPC32-NEXT:    cmplw 4, 7
 ; PPC32-NEXT:    cmplw 7, 8, 3
 ; PPC32-NEXT:    crand 12, 30, 0
diff --git a/llvm/test/CodeGen/PowerPC/vec_splat.ll b/llvm/test/CodeGen/PowerPC/vec_splat.ll
index 7c048ff3710858..0e6626bbce2315 100644
--- a/llvm/test/CodeGen/PowerPC/vec_splat.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_splat.ll
@@ -10,17 +10,17 @@
 define void @splat(%f4* %P, %f4* %Q, float %X) nounwind {
 ; G3-LABEL: splat:
 ; G3:       # %bb.0:
-; G3-NEXT:    lfs 0, 0(4)
+; G3-NEXT:    lfs 0, 12(4)
 ; G3-NEXT:    lfs 2, 8(4)
 ; G3-NEXT:    lfs 3, 4(4)
-; G3-NEXT:    lfs 4, 12(4)
 ; G3-NEXT:    fadds 0, 0, 1
-; G3-NEXT:    fadds 2, 2, 1
-; G3-NEXT:    fadds 3, 3, 1
-; G3-NEXT:    fadds 1, 4, 1
-; G3-NEXT:    stfs 1, 12(3)
-; G3-NEXT:    stfs 2, 8(3)
-; G3-NEXT:    stfs 3, 4(3)
+; G3-NEXT:    lfs 4, 0(4)
+; G3-NEXT:    stfs 0, 12(3)
+; G3-NEXT:    fadds 0, 2, 1
+; G3-NEXT:    stfs 0, 8(3)
+; G3-NEXT:    fadds 0, 3, 1
+; G3-NEXT:    stfs 0, 4(3)
+; G3-NEXT:    fadds 0, 4, 1
 ; G3-NEXT:    stfs 0, 0(3)
 ; G3-NEXT:    blr
 ;
@@ -49,18 +49,18 @@ define void @splat(%f4* %P, %f4* %Q, float %X) nounwind {
 define void @splat_i4(%i4* %P, %i4* %Q, i32 %X) nounwind {
 ; G3-LABEL: splat_i4:
 ; G3:       # %bb.0:
-; G3-NEXT:    lwz 6, 0(4)
+; G3-NEXT:    lwz 6, 12(4)
 ; G3-NEXT:    lwz 7, 8(4)
 ; G3-NEXT:    lwz 8, 4(4)
-; G3-NEXT:    lwz 4, 12(4)
 ; G3-NEXT:    add 6, 6, 5
-; G3-NEXT:    add 8, 8, 5
-; G3-NEXT:    add 7, 7, 5
+; G3-NEXT:    lwz 4, 0(4)
+; G3-NEXT:    stw 6, 12(3)
+; G3-NEXT:    add 6, 7, 5
+; G3-NEXT:    stw 6, 8(3)
+; G3-NEXT:    add 6, 8, 5
 ; G3-NEXT:    add 4, 4, 5
-; G3-NEXT:    stw 4, 12(3)
-; G3-NEXT:    stw 7, 8(3)
-; G3-NEXT:    stw 8, 4(3)
-; G3-NEXT:    stw 6, 0(3)
+; G3-NEXT:    stw 6, 4(3)
+; G3-NEXT:    stw 4, 0(3)
 ; G3-NEXT:    blr
 ;
 ; G5-LABEL: splat_i4:
@@ -88,18 +88,18 @@ define void @splat_i4(%i4* %P, %i4* %Q, i32 %X) nounwind {
 define void @splat_imm_i32(%i4* %P, %i4* %Q, i32 %X) nounwind {
 ; G3-LABEL: splat_imm_i32:
 ; G3:       # %bb.0:
-; G3-NEXT:    lwz 5, 0(4)
+; G3-NEXT:    lwz 5, 12(4)
 ; G3-NEXT:    lwz 6, 8(4)
 ; G3-NEXT:    lwz 7, 4(4)
-; G3-NEXT:    lwz 4, 12(4)
 ; G3-NEXT:    addi 5, 5, -1
-; G3-NEXT:    addi 7, 7, -1
-; G3-NEXT:    addi 6, 6, -1
+; G3-NEXT:    lwz 4, 0(4)
+; G3-NEXT:    stw 5, 12(3)
+; G3-NEXT:    addi 5, 6, -1
+; G3-NEXT:    stw 5, 8(3)
+; G3-NEXT:    addi 5, 7, -1
 ; G3-NEXT:    addi 4, 4, -1
-; G3-NEXT:    stw 4, 12(3)
-; G3-NEXT:    stw 6, 8(3)
-; G3-NEXT:    stw 7, 4(3)
-; G3-NEXT:    stw 5, 0(3)
+; G3-NEXT:    stw 5, 4(3)
+; G3-NEXT:    stw 4, 0(3)
 ; G3-NEXT:    blr
 ;
 ; G5-LABEL: splat_imm_i32:
@@ -118,22 +118,22 @@ define void @splat_imm_i32(%i4* %P, %i4* %Q, i32 %X) nounwind {
 define void @splat_imm_i16(%i4* %P, %i4* %Q, i32 %X) nounwind {
 ; G3-LABEL: splat_imm_i16:
 ; G3:       # %bb.0:
-; G3-NEXT:    lwz 5, 0(4)
-; G3-NEXT:    lwz 6, 8(4)
+; G3-NEXT:    lwz 5, 8(4)
+; G3-NEXT:    lwz 6, 0(4)
 ; G3-NEXT:    lwz 7, 4(4)
-; G3-NEXT:    lwz 4, 12(4)
 ; G3-NEXT:    addi 5, 5, 1
-; G3-NEXT:    addi 7, 7, 1
+; G3-NEXT:    lwz 4, 12(4)
 ; G3-NEXT:    addi 6, 6, 1
+; G3-NEXT:    addi 7, 7, 1
 ; G3-NEXT:    addi 4, 4, 1
-; G3-NEXT:    addis 5, 5, 1
-; G3-NEXT:    addis 7, 7, 1
-; G3-NEXT:    addis 6, 6, 1
 ; G3-NEXT:    addis 4, 4, 1
 ; G3-NEXT:    stw 4, 12(3)
-; G3-NEXT:    stw 6, 8(3)
-; G3-NEXT:    stw 7, 4(3)
-; G3-NEXT:    stw 5, 0(3)
+; G3-NEXT:    addis 4, 5, 1
+; G3-NEXT:    stw 4, 8(3)
+; G3-NEXT:    addis 4, 7, 1
+; G3-NEXT:    stw 4, 4(3)
+; G3-NEXT:    addis 4, 6, 1
+; G3-NEXT:    stw 4, 0(3)
 ; G3-NEXT:    blr
 ;
 ; G5-LABEL: splat_imm_i16:
@@ -189,58 +189,60 @@ define void @spltish(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ; G3-LABEL: spltish:
 ; G3:       # %bb.0:
 ; G3-NEXT:    stwu 1, -48(1)
+; G3-NEXT:    stw 30, 40(1) # 4-byte Folded Spill
+; G3-NEXT:    lbz 5, 0(4)
+; G3-NEXT:    lbz 30, 15(4)
+; G3-NEXT:    stw 29, 36(1) # 4-byte Folded Spill
+; G3-NEXT:    lbz 29, 13(4)
+; G3-NEXT:    stw 28, 32(1) # 4-byte Folded Spill
+; G3-NEXT:    lbz 28, 11(4)
+; G3-NEXT:    stw 27, 28(1) # 4-byte Folded Spill
+; G3-NEXT:    lbz 27, 9(4)
+; G3-NEXT:    stw 24, 16(1) # 4-byte Folded Spill
 ; G3-NEXT:    stw 25, 20(1) # 4-byte Folded Spill
 ; G3-NEXT:    stw 26, 24(1) # 4-byte Folded Spill
-; G3-NEXT:    stw 27, 28(1) # 4-byte Folded Spill
-; G3-NEXT:    stw 28, 32(1) # 4-byte Folded Spill
-; G3-NEXT:    stw 29, 36(1) # 4-byte Folded Spill
-; G3-NEXT:    stw 30, 40(1) # 4-byte Folded Spill
-; G3-NEXT:    lbz 5, 5(4)
-; G3-NEXT:    lbz 6, 3(4)
-; G3-NEXT:    lbz 7, 1(4)
-; G3-NEXT:    lbz 8, 0(4)
-; G3-NEXT:    lbz 9, 2(4)
-; G3-NEXT:    lbz 10, 4(4)
-; G3-NEXT:    lbz 11, 6(4)
-; G3-NEXT:    lbz 12, 8(4)
-; G3-NEXT:    lbz 0, 10(4)
-; G3-NEXT:    addi 7, 7, -15
-; G3-NEXT:    lbz 30, 12(4)
-; G3-NEXT:    lbz 29, 14(4)
-; G3-NEXT:    lbz 28, 15(4)
-; G3-NEXT:    lbz 27, 13(4)
-; G3-NEXT:    lbz 26, 11(4)
-; G3-NEXT:    lbz 25, 9(4)
-; G3-NEXT:    addi 6, 6, -15
-; G3-NEXT:    lbz 4, 7(4)
-; G3-NEXT:    addi 5, 5, -15
-; G3-NEXT:    addi 25, 25, -15
-; G3-NEXT:    addi 26, 26, -15
-; G3-NEXT:    addi 4, 4, -15
-; G3-NEXT:    addi 27, 27, -15
-; G3-NEXT:    addi 28, 28, -15
-; G3-NEXT:    stb 29, 14(3)
-; G3-NEXT:    stb 30, 12(3)
-; G3-NEXT:    stb 0, 10(3)
-; G3-NEXT:    stb 12, 8(3)
-; G3-NEXT:    stb 11, 6(3)
-; G3-NEXT:    stb 10, 4(3)
-; G3-NEXT:    stb 9, 2(3)
-; G3-NEXT:    stb 8, 0(3)
-; G3-NEXT:    stb 28, 15(3)
-; G3-NEXT:    stb 27, 13(3)
-; G3-NEXT:    stb 26, 11(3)
-; G3-NEXT:    stb 25, 9(3)
-; G3-NEXT:    stb 4, 7(3)
+; G3-NEXT:    lbz 6, 2(4)
+; G3-NEXT:    lbz 7, 4(4)
+; G3-NEXT:    lbz 8, 6(4)
+; G3-NEXT:    lbz 9, 8(4)
+; G3-NEXT:    lbz 10, 10(4)
+; G3-NEXT:    lbz 11, 12(4)
+; G3-NEXT:    lbz 12, 14(4)
+; G3-NEXT:    lbz 26, 7(4)
+; G3-NEXT:    lbz 25, 5(4)
+; G3-NEXT:    lbz 24, 3(4)
+; G3-NEXT:    lbz 4, 1(4)
+; G3-NEXT:    stb 5, 0(3)
+; G3-NEXT:    addi 5, 30, -15
+; G3-NEXT:    stb 5, 15(3)
+; G3-NEXT:    addi 5, 29, -15
+; G3-NEXT:    stb 5, 13(3)
+; G3-NEXT:    addi 5, 28, -15
+; G3-NEXT:    stb 5, 11(3)
+; G3-NEXT:    addi 5, 27, -15
+; G3-NEXT:    stb 5, 9(3)
+; G3-NEXT:    addi 5, 26, -15
+; G3-NEXT:    stb 5, 7(3)
+; G3-NEXT:    addi 5, 25, -15
 ; G3-NEXT:    stb 5, 5(3)
-; G3-NEXT:    stb 6, 3(3)
-; G3-NEXT:    stb 7, 1(3)
+; G3-NEXT:    addi 5, 24, -15
+; G3-NEXT:    addi 4, 4, -15
+; G3-NEXT:    stb 12, 14(3)
+; G3-NEXT:    stb 11, 12(3)
+; G3-NEXT:    stb 10, 10(3)
+; G3-NEXT:    stb 9, 8(3)
+; G3-NEXT:    stb 8, 6(3)
+; G3-NEXT:    stb 7, 4(3)
+; G3-NEXT:    stb 6, 2(3)
+; G3-NEXT:    stb 5, 3(3)
+; G3-NEXT:    stb 4, 1(3)
 ; G3-NEXT:    lwz 30, 40(1) # 4-byte Folded Reload
 ; G3-NEXT:    lwz 29, 36(1) # 4-byte Folded Reload
 ; G3-NEXT:    lwz 28, 32(1) # 4-byte Folded Reload
 ; G3-NEXT:    lwz 27, 28(1) # 4-byte Folded Reload
 ; G3-NEXT:    lwz 26, 24(1) # 4-byte Folded Reload
 ; G3-NEXT:    lwz 25, 20(1) # 4-byte Folded Reload
+; G3-NEXT:    lwz 24, 16(1) # 4-byte Folded Reload
 ; G3-NEXT:    addi 1, 1, 48
 ; G3-NEXT:    blr
 ;

From ba890da2878299dc82b104c06f067e45162d880f Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Tue, 9 Jun 2020 11:25:55 -0700
Subject: [PATCH 07/25] [ELF] Demote lazy symbols relative to a discarded
 section to Undefined

Fixes PR45594.

In `ObjFile<ELFT>::initializeSymbols()`, for a defined symbol relative to
a discarded section (due to section group rules), it may have been
inserted as a lazy symbol. We need to demote it to an Undefined to
enable the `discarded section` error happened in a later pass.

Add `LazyObjFile::fetched` (if true) and `ArchiveFile::parsed` (if
false) to represent that there is an ongoing lazy symbol fetch and we
should replace the current lazy symbol with an Undefined, instead of
calling `Symbol::resolve` (`Symbol::resolve` should be called if the lazy
symbol was added by an unrelated archive/lazy object).

As a side result, one small issue in start-lib-comdat.s is now fixed.
The hack motivating D51892 will be unsupported: if
`.gnu.linkonce.t.__i686.get_pc_thunk.bx` in an archive is referenced
by another section, this will likely be errored unless the function is
also defined in a regular object file.
(Bringing back rL330869 would error `undefined symbol` instead of the
more relevant `discarded section`.)

Note, glibc i386's crti.o still works (PR31215), because
`.gnu.linkonce.t.__x86.get_pc_thunk.bx` is in crti.o (one of the first
regular object files in a linker command line).

Reviewed By: psmith

Differential Revision: https://reviews.llvm.org/D79300
---
 lld/ELF/InputFiles.cpp               | 25 +++++++++---
 lld/ELF/InputFiles.h                 |  4 ++
 lld/test/ELF/comdat-discarded-lazy.s | 60 ++++++++++++++++++++++++++++
 lld/test/ELF/i386-linkonce.s         |  4 +-
 lld/test/ELF/start-lib-comdat.s      |  2 +-
 5 files changed, 88 insertions(+), 7 deletions(-)
 create mode 100644 lld/test/ELF/comdat-discarded-lazy.s

diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp
index c451aee1f921a9..5bbd6f0df7e9ab 100644
--- a/lld/ELF/InputFiles.cpp
+++ b/lld/ELF/InputFiles.cpp
@@ -1117,8 +1117,20 @@ template <class ELFT> void ObjFile<ELFT>::initializeSymbols() {
     // COMDAT member sections, and if a comdat group is discarded, some
     // defined symbol in a .eh_frame becomes dangling symbols.
     if (sec == &InputSection::discarded) {
-      this->symbols[i]->resolve(
-          Undefined{this, name, binding, stOther, type, secIdx});
+      Undefined und{this, name, binding, stOther, type, secIdx};
+      Symbol *sym = this->symbols[i];
+      // !ArchiveFile::parsed or LazyObjFile::fetched means that the file
+      // containing this object has not finished processing, i.e. this symbol is
+      // a result of a lazy symbol fetch. We should demote the lazy symbol to an
+      // Undefined so that any relocations outside of the group to it will
+      // trigger a discarded section error.
+      if ((sym->symbolKind == Symbol::LazyArchiveKind &&
+           !cast<ArchiveFile>(sym->file)->parsed) ||
+          (sym->symbolKind == Symbol::LazyObjectKind &&
+           cast<LazyObjFile>(sym->file)->fetched))
+        sym->replace(und);
+      else
+        sym->resolve(und);
       continue;
     }
 
@@ -1141,6 +1153,10 @@ ArchiveFile::ArchiveFile(std::unique_ptr<Archive> &&file)
 void ArchiveFile::parse() {
   for (const Archive::Symbol &sym : file->symbols())
     symtab->addSymbol(LazyArchive{*this, sym});
+
+  // Inform a future invocation of ObjFile<ELFT>::initializeSymbols() that this
+  // archive has been processed.
+  parsed = true;
 }
 
 // Returns a buffer pointing to a member file containing a given symbol.
@@ -1615,14 +1631,13 @@ InputFile *elf::createObjectFile(MemoryBufferRef mb, StringRef archiveName,
 }
 
 void LazyObjFile::fetch() {
-  if (mb.getBuffer().empty())
+  if (fetched)
     return;
+  fetched = true;
 
   InputFile *file = createObjectFile(mb, archiveName, offsetInArchive);
   file->groupId = groupId;
 
-  mb = {};
-
   // Copy symbol vector so that the new InputFile doesn't have to
   // insert the same defined symbols to the symbol table again.
   file->symbols = std::move(symbols);
diff --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h
index 51882e0c964719..7af85e417ca583 100644
--- a/lld/ELF/InputFiles.h
+++ b/lld/ELF/InputFiles.h
@@ -307,6 +307,8 @@ class LazyObjFile : public InputFile {
   template <class ELFT> void parse();
   void fetch();
 
+  bool fetched = false;
+
 private:
   uint64_t offsetInArchive;
 };
@@ -327,6 +329,8 @@ class ArchiveFile : public InputFile {
   size_t getMemberCount() const;
   size_t getFetchedMemberCount() const { return seen.size(); }
 
+  bool parsed = false;
+
 private:
   std::unique_ptr<Archive> file;
   llvm::DenseSet<uint64_t> seen;
diff --git a/lld/test/ELF/comdat-discarded-lazy.s b/lld/test/ELF/comdat-discarded-lazy.s
new file mode 100644
index 00000000000000..8ee15158f6b3ae
--- /dev/null
+++ b/lld/test/ELF/comdat-discarded-lazy.s
@@ -0,0 +1,60 @@
+# REQUIRES: x86
+## Test that lazy symbols in a section group can be demoted to Undefined,
+## so that we can report a "discarded section" error.
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o
+# RUN: echo '.globl f1, foo; f1: call foo; \
+# RUN:   .section .text.foo,"axG",@progbits,foo,comdat; foo:' | \
+# RUN:   llvm-mc -filetype=obj -triple=x86_64 - -o %t1.o
+
+## Test the case when the symbol causing a "discarded section" is ordered
+## *before* the symbol fetching the lazy object.
+## The test relies on the symbol table order of llvm-mc (lexical), which will
+## need adjustment if llvm-mc changes its behavior.
+# RUN: echo '.globl f2, aa; f2: call aa; \
+# RUN:   .section .text.foo,"axG",@progbits,foo,comdat; aa:' | \
+# RUN:   llvm-mc -filetype=obj -triple=x86_64 - -o %taa.o
+# RUN: llvm-nm -p %taa.o | FileCheck --check-prefix=AA-NM %s
+# RUN: not ld.lld %t.o --start-lib %t1.o %taa.o --end-lib -o /dev/null 2>&1 | FileCheck --check-prefix=AA %s
+# RUN: rm -f %taa.a && llvm-ar rc %taa.a %taa.o
+# RUN: not ld.lld %t.o --start-lib %t1.o --end-lib %taa.a -o /dev/null 2>&1 | FileCheck --check-prefix=AA %s
+
+# AA-NM: aa
+# AA-NM: f2
+
+# AA:      error: relocation refers to a symbol in a discarded section: aa
+# AA-NEXT: >>> defined in {{.*}}aa.o
+# AA-NEXT: >>> section group signature: foo
+# AA-NEXT: >>> prevailing definition is in {{.*}}1.o
+# AA-NEXT: >>> referenced by {{.*}}aa.o:(.text+0x1)
+
+## Test the case when the symbol causing a "discarded section" is ordered
+## *after* the symbol fetching the lazy object.
+# RUN: echo '.globl f2, zz; f2: call zz; \
+# RUN:   .section .text.foo,"axG",@progbits,foo,comdat; zz:' | \
+# RUN:   llvm-mc -filetype=obj -triple=x86_64 - -o %tzz.o
+# RUN: llvm-nm -p %tzz.o | FileCheck --check-prefix=ZZ-NM %s
+# RUN: not ld.lld %t.o --start-lib %t1.o %tzz.o --end-lib -o /dev/null 2>&1 | FileCheck --check-prefix=ZZ %s
+# RUN: rm -f %tzz.a && llvm-ar rc %tzz.a %tzz.o
+# RUN: not ld.lld %t.o --start-lib %t1.o --end-lib %tzz.a -o /dev/null 2>&1 | FileCheck --check-prefix=ZZ %s
+
+# ZZ-NM: f2
+# ZZ-NM: zz
+
+# ZZ:      error: relocation refers to a symbol in a discarded section: zz
+# ZZ-NEXT: >>> defined in {{.*}}zz.o
+# ZZ-NEXT: >>> section group signature: foo
+# ZZ-NEXT: >>> prevailing definition is in {{.*}}1.o
+# ZZ-NEXT: >>> referenced by {{.*}}zz.o:(.text+0x1)
+
+## Don't error if the symbol which would cause "discarded section"
+## was inserted before %tzz.o
+# RUN: echo '.globl zz; zz:' | llvm-mc -filetype=obj -triple=x86_64 - -o %tdef.o
+# RUN: ld.lld %t.o --start-lib %t1.o %tdef.o %tzz.o --end-lib -o /dev/null
+# RUN: rm -f %tdef.a && llvm-ar rc %tdef.a %tdef.o
+# RUN: ld.lld %t.o --start-lib %t1.o %tdef.a %tzz.o --end-lib -o /dev/null
+
+.globl _start
+_start:
+  call f1
+  call f2
diff --git a/lld/test/ELF/i386-linkonce.s b/lld/test/ELF/i386-linkonce.s
index c06b042c7638e4..f7da0aed4af585 100644
--- a/lld/test/ELF/i386-linkonce.s
+++ b/lld/test/ELF/i386-linkonce.s
@@ -2,7 +2,9 @@
 // RUN: llvm-mc -filetype=obj -triple=i386-linux-gnu %s -o %t.o
 // RUN: llvm-mc -filetype=obj -triple=i386-linux-gnu %p/Inputs/i386-linkonce.s -o %t2.o
 // RUN: llvm-ar rcs %t2.a %t2.o
-// RUN: ld.lld %t.o %t2.a -o %t
+// RUN: not ld.lld %t.o %t2.a -o /dev/null 2>&1 | FileCheck %s
+
+// CHECK: error: relocation refers to a symbol in a discarded section: __i686.get_pc_thunk.bx
 
     .globl _start
 _start:
diff --git a/lld/test/ELF/start-lib-comdat.s b/lld/test/ELF/start-lib-comdat.s
index 34c9934803f092..996ddb485bab3c 100644
--- a/lld/test/ELF/start-lib-comdat.s
+++ b/lld/test/ELF/start-lib-comdat.s
@@ -6,7 +6,7 @@
 // RUN: ld.lld -shared -o %t3 %t1.o --start-lib %t2.o --end-lib
 // RUN: llvm-readobj --symbols %t3 | FileCheck %s
 // RUN: ld.lld -shared -o %t3 --start-lib %t2.o --end-lib %t1.o
-// RUN: llvm-readobj --symbols %t3 | FileCheck %s
+// RUN: llvm-readobj --symbols %t3 | FileCheck /dev/null --implicit-check-not='Name: zed'
 
 // CHECK:      Name: zed
 // CHECK-NEXT: Value:

From e7c5412b3731b3b095567e6db85c2989133dd6de Mon Sep 17 00:00:00 2001
From: Anh Tuyen Tran <anhtuyen@ca.ibm.com>
Date: Tue, 9 Jun 2020 18:30:56 +0000
Subject: [PATCH 08/25] [NFC][LV][TEST]: extend pr45679-fold-tail-by-masking.ll
 with -force-vector-width=1 -force-vector-interleave=4

Summary:
Add -force-vector-width=1 -force-vector-interleave=4 to pr45679-fold-tail-by-masking.ll

Author: anhtuyen (Anh Tuyen Tran)

Reviewers: Ayal (Ayal Zaks)

Reviewed By: Ayal (Ayal Zaks)

Subscribers: rkruppe (Hanna Kruppe), llvm-commits, LLVM

Tag: LLVM

Differential Revision: https://reviews.llvm.org/D80446
---
 .../pr45679-fold-tail-by-masking.ll           | 58 ++++++++++++++
 .../tail-folding-vectorization-factor-1.ll    | 78 -------------------
 2 files changed, 58 insertions(+), 78 deletions(-)

diff --git a/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll b/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll
index 835a6dad32061f..d77abbd39f0d28 100644
--- a/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -loop-vectorize -force-vector-width=4 -S | FileCheck %s
 ; RUN: opt < %s -loop-vectorize -force-vector-width=2 -force-vector-interleave=2 -S | FileCheck %s -check-prefix=VF2UF2
+; RUN: opt < %s -loop-vectorize -force-vector-width=1 -force-vector-interleave=4 -S | FileCheck %s -check-prefix=VF1UF4
 
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 
@@ -132,6 +133,63 @@ define void @pr45679(i32* %A) optsize {
 ; VF2UF2:       exit:
 ; VF2UF2-NEXT:    ret void
 ;
+; VF1UF4-LABEL: @pr45679(
+; VF1UF4-NEXT:  entry:
+; VF1UF4-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; VF1UF4:       vector.ph:
+; VF1UF4-NEXT:    br label [[VECTOR_BODY:%.*]]
+; VF1UF4:       vector.body:
+; VF1UF4-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE9:%.*]] ]
+; VF1UF4-NEXT:    [[INDUCTION:%.*]] = add i32 [[INDEX]], 0
+; VF1UF4-NEXT:    [[INDUCTION1:%.*]] = add i32 [[INDEX]], 1
+; VF1UF4-NEXT:    [[INDUCTION2:%.*]] = add i32 [[INDEX]], 2
+; VF1UF4-NEXT:    [[INDUCTION3:%.*]] = add i32 [[INDEX]], 3
+; VF1UF4-NEXT:    [[TMP0:%.*]] = icmp ule i32 [[INDUCTION]], 13
+; VF1UF4-NEXT:    [[TMP1:%.*]] = icmp ule i32 [[INDUCTION1]], 13
+; VF1UF4-NEXT:    [[TMP2:%.*]] = icmp ule i32 [[INDUCTION2]], 13
+; VF1UF4-NEXT:    [[TMP3:%.*]] = icmp ule i32 [[INDUCTION3]], 13
+; VF1UF4-NEXT:    br i1 [[TMP0]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
+; VF1UF4:       pred.store.if:
+; VF1UF4-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDUCTION]]
+; VF1UF4-NEXT:    store i32 13, i32* [[TMP4]], align 1
+; VF1UF4-NEXT:    br label [[PRED_STORE_CONTINUE]]
+; VF1UF4:       pred.store.continue:
+; VF1UF4-NEXT:    br i1 [[TMP1]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]]
+; VF1UF4:       pred.store.if4:
+; VF1UF4-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDUCTION1]]
+; VF1UF4-NEXT:    store i32 13, i32* [[TMP5]], align 1
+; VF1UF4-NEXT:    br label [[PRED_STORE_CONTINUE5]]
+; VF1UF4:       pred.store.continue5:
+; VF1UF4-NEXT:    br i1 [[TMP2]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7:%.*]]
+; VF1UF4:       pred.store.if6:
+; VF1UF4-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDUCTION2]]
+; VF1UF4-NEXT:    store i32 13, i32* [[TMP6]], align 1
+; VF1UF4-NEXT:    br label [[PRED_STORE_CONTINUE7]]
+; VF1UF4:       pred.store.continue7:
+; VF1UF4-NEXT:    br i1 [[TMP3]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9]]
+; VF1UF4:       pred.store.if8:
+; VF1UF4-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDUCTION3]]
+; VF1UF4-NEXT:    store i32 13, i32* [[TMP7]], align 1
+; VF1UF4-NEXT:    br label [[PRED_STORE_CONTINUE9]]
+; VF1UF4:       pred.store.continue9:
+; VF1UF4-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
+; VF1UF4-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16
+; VF1UF4-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]]
+; VF1UF4:       middle.block:
+; VF1UF4-NEXT:    br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
+; VF1UF4:       scalar.ph:
+; VF1UF4-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; VF1UF4-NEXT:    br label [[LOOP:%.*]]
+; VF1UF4:       loop:
+; VF1UF4-NEXT:    [[RIV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[RIVPLUS1:%.*]], [[LOOP]] ]
+; VF1UF4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[RIV]]
+; VF1UF4-NEXT:    store i32 13, i32* [[ARRAYIDX]], align 1
+; VF1UF4-NEXT:    [[RIVPLUS1]] = add nuw nsw i32 [[RIV]], 1
+; VF1UF4-NEXT:    [[COND:%.*]] = icmp eq i32 [[RIVPLUS1]], 14
+; VF1UF4-NEXT:    br i1 [[COND]], label [[EXIT]], label [[LOOP]]
+; VF1UF4:       exit:
+; VF1UF4-NEXT:    ret void
+;
 entry:
   br label %loop
 
diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll
index 2973a4425a5d4f..973d7013837a6e 100644
--- a/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll
+++ b/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll
@@ -1,7 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s  -loop-vectorize -force-vector-interleave=4 -pass-remarks='loop-vectorize' -disable-output -S 2>&1 | FileCheck %s --check-prefix=CHECK-REMARKS
 ; RUN: opt < %s  -loop-vectorize -force-vector-interleave=4 -S | FileCheck %s
-; RUN: opt < %s  -loop-vectorize -force-vector-width=1 -force-vector-interleave=4 -S | FileCheck %s --check-prefix=CHECK-VF1
 
 ; These tests are to check that fold-tail procedure produces correct scalar code when
 ; loop-vectorization is only unrolling but not vectorizing.
@@ -110,80 +109,3 @@ for.body:
   %cond = icmp eq double* %ptr, %ptr2
   br i1 %cond, label %for.cond.cleanup, label %for.body
 }
-
-; The following testcase is extended from the test of https://reviews.llvm.org/D80085
-; Similar to two tests above, it is to check that fold-tail procedure produces correct scalar code when
-; loop-vectorization is only unrolling but not vectorizing.
-
-define void @pr45679(i32* %A) optsize {
-; CHECK-VF1-LABEL: @pr45679
-; CHECK-VF1-NEXT:  entry:
-; CHECK-VF1-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
-; CHECK-VF1:       vector.ph:
-; CHECK-VF1-NEXT:    br label [[VECTOR_BODY:%.*]]
-; CHECK-VF1:       vector.body:
-; CHECK-VF1-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE9:%.*]] ]
-; CHECK-VF1-NEXT:    [[INDUCTION:%.*]] = add i32 [[INDEX]], 0
-; CHECK-VF1-NEXT:    [[INDUCTION1:%.*]] = add i32 [[INDEX]], 1
-; CHECK-VF1-NEXT:    [[INDUCTION2:%.*]] = add i32 [[INDEX]], 2
-; CHECK-VF1-NEXT:    [[INDUCTION3:%.*]] = add i32 [[INDEX]], 3
-; CHECK-VF1-NEXT:    [[TMP0:%.*]] = icmp ule i32 [[INDUCTION]], 13
-; CHECK-VF1-NEXT:    [[TMP1:%.*]] = icmp ule i32 [[INDUCTION1]], 13
-; CHECK-VF1-NEXT:    [[TMP2:%.*]] = icmp ule i32 [[INDUCTION2]], 13
-; CHECK-VF1-NEXT:    [[TMP3:%.*]] = icmp ule i32 [[INDUCTION3]], 13
-; CHECK-VF1-NEXT:    br i1 [[TMP0]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
-; CHECK-VF1:       pred.store.if:
-; CHECK-VF1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDUCTION]]
-; CHECK-VF1-NEXT:    store i32 13, i32* [[TMP4]], align 1
-; CHECK-VF1-NEXT:    br label [[PRED_STORE_CONTINUE]]
-; CHECK-VF1:       pred.store.continue:
-; CHECK-VF1-NEXT:    br i1 [[TMP1]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]]
-; CHECK-VF1:       pred.store.if4:
-; CHECK-VF1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDUCTION1]]
-; CHECK-VF1-NEXT:    store i32 13, i32* [[TMP5]], align 1
-; CHECK-VF1-NEXT:    br label [[PRED_STORE_CONTINUE5]]
-; CHECK-VF1:       pred.store.continue5:
-; CHECK-VF1-NEXT:    br i1 [[TMP2]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7:%.*]]
-; CHECK-VF1:       pred.store.if6:
-; CHECK-VF1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDUCTION2]]
-; CHECK-VF1-NEXT:    store i32 13, i32* [[TMP6]], align 1
-; CHECK-VF1-NEXT:    br label [[PRED_STORE_CONTINUE7]]
-; CHECK-VF1:       pred.store.continue7:
-; CHECK-VF1-NEXT:    br i1 [[TMP3]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9]]
-; CHECK-VF1:       pred.store.if8:
-; CHECK-VF1-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDUCTION3]]
-; CHECK-VF1-NEXT:    store i32 13, i32* [[TMP7]], align 1
-; CHECK-VF1-NEXT:    br label [[PRED_STORE_CONTINUE9]]
-; CHECK-VF1:       pred.store.continue9:
-; CHECK-VF1-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
-; CHECK-VF1-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16
-; CHECK-VF1-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]]
-; CHECK-VF1:       middle.block:
-; CHECK-VF1-NEXT:    br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
-; CHECK-VF1:       scalar.ph:
-; CHECK-VF1-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-VF1-NEXT:    br label [[LOOP:%.*]]
-; CHECK-VF1:       loop:
-; CHECK-VF1-NEXT:    [[RIV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[RIVPLUS1:%.*]], [[LOOP]] ]
-; CHECK-VF1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[RIV]]
-; CHECK-VF1-NEXT:    store i32 13, i32* [[ARRAYIDX]], align 1
-; CHECK-VF1-NEXT:    [[RIVPLUS1]] = add nuw nsw i32 [[RIV]], 1
-; CHECK-VF1-NEXT:    [[COND:%.*]] = icmp eq i32 [[RIVPLUS1]], 14
-; CHECK-VF1-NEXT:    br i1 [[COND]], label [[EXIT]], label [[LOOP]]
-; CHECK-VF1:       exit:
-; CHECK-VF1-NEXT:    ret void
-;
-entry:
-  br label %loop
-
-loop:
-  %riv = phi i32 [ 0, %entry ], [ %rivPlus1, %loop ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %riv
-  store i32 13, i32* %arrayidx, align 1
-  %rivPlus1 = add nuw nsw i32 %riv, 1
-  %cond = icmp eq i32 %rivPlus1, 14
-  br i1 %cond, label %exit, label %loop
-
-exit:
-  ret void
-}

From 7fb40e1569dd66292b647f4501b85517e9247953 Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne@apple.com>
Date: Tue, 9 Jun 2020 14:08:55 -0400
Subject: [PATCH 09/25] [libc++] Fix too stringent availability markup for
 bad_optional_access

The availability markup for bad_optional_access marked it as being added
in MacOS 10.14 and aligned releases, however it appears to have been added
in Mac OS 10.13 and aligned releases.
---
 libcxx/include/__config            | 8 ++++----
 libcxx/utils/libcxx/test/config.py | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/libcxx/include/__config b/libcxx/include/__config
index cf596a7872abd5..26fadcff7ced42 100644
--- a/libcxx/include/__config
+++ b/libcxx/include/__config
@@ -1392,10 +1392,10 @@ _LIBCPP_FUNC_VIS extern "C" void __sanitizer_annotate_contiguous_container(
      __attribute__((availability(tvos,strict,introduced=10.0)))                \
      __attribute__((availability(watchos,strict,introduced=3.0)))
 #  define _LIBCPP_AVAILABILITY_BAD_OPTIONAL_ACCESS                             \
-     __attribute__((availability(macosx,strict,introduced=10.14)))             \
-     __attribute__((availability(ios,strict,introduced=12.0)))                 \
-     __attribute__((availability(tvos,strict,introduced=12.0)))                \
-     __attribute__((availability(watchos,strict,introduced=5.0)))
+     __attribute__((availability(macosx,strict,introduced=10.13)))             \
+     __attribute__((availability(ios,strict,introduced=11.0)))                 \
+     __attribute__((availability(tvos,strict,introduced=11.0)))                \
+     __attribute__((availability(watchos,strict,introduced=4.0)))
 #  define _LIBCPP_AVAILABILITY_BAD_VARIANT_ACCESS                              \
      _LIBCPP_AVAILABILITY_BAD_OPTIONAL_ACCESS
 #  define _LIBCPP_AVAILABILITY_BAD_ANY_CAST                                    \
diff --git a/libcxx/utils/libcxx/test/config.py b/libcxx/utils/libcxx/test/config.py
index 22ec6c457e5694..35dac0df56824d 100644
--- a/libcxx/utils/libcxx/test/config.py
+++ b/libcxx/utils/libcxx/test/config.py
@@ -857,8 +857,8 @@ def configure_deployment(self):
                 self.config.available_features.add('dylib-has-no-shared_mutex')
                 self.lit_config.note("shared_mutex is not supported by the deployment target")
             # Throwing bad_optional_access, bad_variant_access and bad_any_cast is
-            # supported starting in macosx10.14.
-            if name == 'macosx' and version in ('10.%s' % v for v in range(9, 14)):
+            # supported starting in macosx10.13.
+            if name == 'macosx' and version in ('10.%s' % v for v in range(9, 13)):
                 self.config.available_features.add('dylib-has-no-bad_optional_access')
                 self.lit_config.note("throwing bad_optional_access is not supported by the deployment target")
 

From ce5fecb7d0a12c27763afe3f89d1d7e8a1893dc0 Mon Sep 17 00:00:00 2001
From: Tridacnid <tridacnid@gmail.com>
Date: Tue, 9 Jun 2020 19:43:48 +0100
Subject: [PATCH 10/25] Assignment and Inc/Dec operators wouldn't register as a
 mutation when Implicit Paren Casts were present

Add ignoringParenImpCasts to assignment and inc/dec mutation checks in ExprMutationAnalyzer to fix clang-tidy bug PR45490.
https://bugs.llvm.org/show_bug.cgi?id=45490

Reviewed By: njames93, aaron.ballman, gribozavr2

Differential Revision: https://reviews.llvm.org/D79912
---
 .../checkers/bugprone-infinite-loop.cpp       | 23 +++++++++++++++++++
 clang/lib/Analysis/ExprMutationAnalyzer.cpp   |  9 ++++----
 .../Analysis/ExprMutationAnalyzerTest.cpp     | 23 ++++++++++++++-----
 3 files changed, 45 insertions(+), 10 deletions(-)

diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone-infinite-loop.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone-infinite-loop.cpp
index 427b5f0272b94a..8bd4df7cd84450 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/bugprone-infinite-loop.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone-infinite-loop.cpp
@@ -70,11 +70,25 @@ void simple_not_infinite1() {
     i++;
   }
 
+  while ((Limit)--) {
+    // Not an error since 'Limit' is updated.
+    i++;
+  }
+
+  while ((Limit) -= 1) {
+    // Not an error since 'Limit' is updated.
+  }
+
   while (int k = Limit) {
     // Not an error since 'Limit' is updated.
     Limit--;
   }
 
+  while (int k = Limit) {
+    // Not an error since 'Limit' is updated
+    (Limit)--;
+  }
+
   while (int k = Limit--) {
     // Not an error since 'Limit' is updated.
     i++;
@@ -86,6 +100,15 @@ void simple_not_infinite1() {
 
   for (i = 0; i < Limit; Limit--) {
   }
+
+  for (i = 0; i < Limit; (Limit) = Limit - 1) {
+  }
+
+  for (i = 0; i < Limit; (Limit) -= 1) {
+  }
+
+  for (i = 0; i < Limit; --(Limit)) {
+  }
 }
 
 void simple_not_infinite2() {
diff --git a/clang/lib/Analysis/ExprMutationAnalyzer.cpp b/clang/lib/Analysis/ExprMutationAnalyzer.cpp
index cb5cabfd3089c2..2f80285f17b4da 100644
--- a/clang/lib/Analysis/ExprMutationAnalyzer.cpp
+++ b/clang/lib/Analysis/ExprMutationAnalyzer.cpp
@@ -201,14 +201,15 @@ const Stmt *ExprMutationAnalyzer::findDeclPointeeMutation(
 
 const Stmt *ExprMutationAnalyzer::findDirectMutation(const Expr *Exp) {
   // LHS of any assignment operators.
-  const auto AsAssignmentLhs =
-      binaryOperator(isAssignmentOperator(),
-                     hasLHS(maybeEvalCommaExpr(equalsNode(Exp))));
+  const auto AsAssignmentLhs = binaryOperator(
+      isAssignmentOperator(),
+      hasLHS(maybeEvalCommaExpr(ignoringParenImpCasts(equalsNode(Exp)))));
 
   // Operand of increment/decrement operators.
   const auto AsIncDecOperand =
       unaryOperator(anyOf(hasOperatorName("++"), hasOperatorName("--")),
-                    hasUnaryOperand(maybeEvalCommaExpr(equalsNode(Exp))));
+                    hasUnaryOperand(maybeEvalCommaExpr(
+                        ignoringParenImpCasts(equalsNode(Exp)))));
 
   // Invoking non-const member function.
   // A member function is assumed to be non-const when it is unresolved.
diff --git a/clang/unittests/Analysis/ExprMutationAnalyzerTest.cpp b/clang/unittests/Analysis/ExprMutationAnalyzerTest.cpp
index 9b0a3dbda81e41..9d26eeb6af7347 100644
--- a/clang/unittests/Analysis/ExprMutationAnalyzerTest.cpp
+++ b/clang/unittests/Analysis/ExprMutationAnalyzerTest.cpp
@@ -112,11 +112,21 @@ TEST(ExprMutationAnalyzerTest, Trivial) {
 class AssignmentTest : public ::testing::TestWithParam<std::string> {};
 
 TEST_P(AssignmentTest, AssignmentModifies) {
-  const std::string ModExpr = "x " + GetParam() + " 10";
-  const auto AST = buildASTFromCode("void f() { int x; " + ModExpr + "; }");
-  const auto Results =
-      match(withEnclosingCompound(declRefTo("x")), AST->getASTContext());
-  EXPECT_THAT(mutatedBy(Results, AST.get()), ElementsAre(ModExpr));
+  {
+    const std::string ModExpr = "x " + GetParam() + " 10";
+    const auto AST = buildASTFromCode("void f() { int x; " + ModExpr + "; }");
+    const auto Results =
+        match(withEnclosingCompound(declRefTo("x")), AST->getASTContext());
+    EXPECT_THAT(mutatedBy(Results, AST.get()), ElementsAre(ModExpr));
+  }
+
+  {
+    const std::string ModExpr = "(x) " + GetParam() + " 10";
+    const auto AST = buildASTFromCode("void f() { int x; " + ModExpr + "; }");
+    const auto Results =
+        match(withEnclosingCompound(declRefTo("x")), AST->getASTContext());
+    EXPECT_THAT(mutatedBy(Results, AST.get()), ElementsAre(ModExpr));
+  }
 }
 
 INSTANTIATE_TEST_CASE_P(AllAssignmentOperators, AssignmentTest,
@@ -134,7 +144,8 @@ TEST_P(IncDecTest, IncDecModifies) {
 }
 
 INSTANTIATE_TEST_CASE_P(AllIncDecOperators, IncDecTest,
-                        Values("++x", "--x", "x++", "x--"), );
+                        Values("++x", "--x", "x++", "x--", "++(x)", "--(x)",
+                               "(x)++", "(x)--"), );
 
 TEST(ExprMutationAnalyzerTest, NonConstMemberFunc) {
   const auto AST = buildASTFromCode(

From d9dec697cbb7f825aa1b8e6336027675a01a0823 Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne@apple.com>
Date: Tue, 16 Apr 2019 17:12:54 -0400
Subject: [PATCH 11/25] [libc++][CMake] Add CMake caches for commonly supported
 configurations

This commit adds CMake caches for the various configurations of libc++
that are tested by our build bots.

Differential Revision: https://reviews.llvm.org/D81293
---
 libcxx/cmake/caches/Generic-32bits.cmake         |  1 +
 libcxx/cmake/caches/Generic-asan.cmake           |  1 +
 libcxx/cmake/caches/Generic-cxx03.cmake          |  1 +
 libcxx/cmake/caches/Generic-cxx11.cmake          |  1 +
 libcxx/cmake/caches/Generic-cxx14.cmake          |  1 +
 libcxx/cmake/caches/Generic-cxx17.cmake          |  1 +
 libcxx/cmake/caches/Generic-cxx2a.cmake          |  1 +
 libcxx/cmake/caches/Generic-msan.cmake           |  1 +
 libcxx/cmake/caches/Generic-noexceptions.cmake   |  2 ++
 libcxx/cmake/caches/Generic-singlethreaded.cmake |  3 +++
 libcxx/cmake/caches/Generic-tsan.cmake           |  1 +
 libcxx/cmake/caches/Generic-ubsan.cmake          |  2 ++
 libcxx/cmake/caches/README.md                    | 13 +++++++++++++
 13 files changed, 29 insertions(+)
 create mode 100644 libcxx/cmake/caches/Generic-32bits.cmake
 create mode 100644 libcxx/cmake/caches/Generic-asan.cmake
 create mode 100644 libcxx/cmake/caches/Generic-cxx03.cmake
 create mode 100644 libcxx/cmake/caches/Generic-cxx11.cmake
 create mode 100644 libcxx/cmake/caches/Generic-cxx14.cmake
 create mode 100644 libcxx/cmake/caches/Generic-cxx17.cmake
 create mode 100644 libcxx/cmake/caches/Generic-cxx2a.cmake
 create mode 100644 libcxx/cmake/caches/Generic-msan.cmake
 create mode 100644 libcxx/cmake/caches/Generic-noexceptions.cmake
 create mode 100644 libcxx/cmake/caches/Generic-singlethreaded.cmake
 create mode 100644 libcxx/cmake/caches/Generic-tsan.cmake
 create mode 100644 libcxx/cmake/caches/Generic-ubsan.cmake
 create mode 100644 libcxx/cmake/caches/README.md

diff --git a/libcxx/cmake/caches/Generic-32bits.cmake b/libcxx/cmake/caches/Generic-32bits.cmake
new file mode 100644
index 00000000000000..ae7b2ac3e8d83a
--- /dev/null
+++ b/libcxx/cmake/caches/Generic-32bits.cmake
@@ -0,0 +1 @@
+set(LLVM_BUILD_32_BITS ON CACHE BOOL "")
diff --git a/libcxx/cmake/caches/Generic-asan.cmake b/libcxx/cmake/caches/Generic-asan.cmake
new file mode 100644
index 00000000000000..cf919765c3a29c
--- /dev/null
+++ b/libcxx/cmake/caches/Generic-asan.cmake
@@ -0,0 +1 @@
+set(LLVM_USE_SANITIZER "Address" CACHE STRING "")
diff --git a/libcxx/cmake/caches/Generic-cxx03.cmake b/libcxx/cmake/caches/Generic-cxx03.cmake
new file mode 100644
index 00000000000000..d1d67d86d74a9c
--- /dev/null
+++ b/libcxx/cmake/caches/Generic-cxx03.cmake
@@ -0,0 +1 @@
+set(LLVM_LIT_ARGS "--param std=c++03" CACHE STRING "")
diff --git a/libcxx/cmake/caches/Generic-cxx11.cmake b/libcxx/cmake/caches/Generic-cxx11.cmake
new file mode 100644
index 00000000000000..e203c6aeaf29fc
--- /dev/null
+++ b/libcxx/cmake/caches/Generic-cxx11.cmake
@@ -0,0 +1 @@
+set(LLVM_LIT_ARGS "--param std=c++11" CACHE STRING "")
diff --git a/libcxx/cmake/caches/Generic-cxx14.cmake b/libcxx/cmake/caches/Generic-cxx14.cmake
new file mode 100644
index 00000000000000..b1bf1244b510ab
--- /dev/null
+++ b/libcxx/cmake/caches/Generic-cxx14.cmake
@@ -0,0 +1 @@
+set(LLVM_LIT_ARGS "--param std=c++14" CACHE STRING "")
diff --git a/libcxx/cmake/caches/Generic-cxx17.cmake b/libcxx/cmake/caches/Generic-cxx17.cmake
new file mode 100644
index 00000000000000..b23204729ced20
--- /dev/null
+++ b/libcxx/cmake/caches/Generic-cxx17.cmake
@@ -0,0 +1 @@
+set(LLVM_LIT_ARGS "--param std=c++17" CACHE STRING "")
diff --git a/libcxx/cmake/caches/Generic-cxx2a.cmake b/libcxx/cmake/caches/Generic-cxx2a.cmake
new file mode 100644
index 00000000000000..31f1b76ab91f99
--- /dev/null
+++ b/libcxx/cmake/caches/Generic-cxx2a.cmake
@@ -0,0 +1 @@
+set(LLVM_LIT_ARGS "--param std=c++2a" CACHE STRING "")
diff --git a/libcxx/cmake/caches/Generic-msan.cmake b/libcxx/cmake/caches/Generic-msan.cmake
new file mode 100644
index 00000000000000..7c948f51642dd4
--- /dev/null
+++ b/libcxx/cmake/caches/Generic-msan.cmake
@@ -0,0 +1 @@
+set(LLVM_USE_SANITIZER "MemoryWithOrigins" CACHE STRING "")
diff --git a/libcxx/cmake/caches/Generic-noexceptions.cmake b/libcxx/cmake/caches/Generic-noexceptions.cmake
new file mode 100644
index 00000000000000..f0dffef60dba08
--- /dev/null
+++ b/libcxx/cmake/caches/Generic-noexceptions.cmake
@@ -0,0 +1,2 @@
+set(LIBCXX_ENABLE_EXCEPTIONS OFF CACHE BOOL "")
+set(LIBCXXABI_ENABLE_EXCEPTIONS OFF CACHE BOOL "")
diff --git a/libcxx/cmake/caches/Generic-singlethreaded.cmake b/libcxx/cmake/caches/Generic-singlethreaded.cmake
new file mode 100644
index 00000000000000..616baef1be7bef
--- /dev/null
+++ b/libcxx/cmake/caches/Generic-singlethreaded.cmake
@@ -0,0 +1,3 @@
+set(LIBCXX_ENABLE_THREADS OFF CACHE BOOL "")
+set(LIBCXXABI_ENABLE_THREADS OFF CACHE BOOL "")
+set(LIBCXX_ENABLE_MONOTONIC_CLOCK OFF CACHE BOOL "")
diff --git a/libcxx/cmake/caches/Generic-tsan.cmake b/libcxx/cmake/caches/Generic-tsan.cmake
new file mode 100644
index 00000000000000..a4b599e3e5094b
--- /dev/null
+++ b/libcxx/cmake/caches/Generic-tsan.cmake
@@ -0,0 +1 @@
+set(LLVM_USE_SANITIZER "Thread" CACHE STRING "")
diff --git a/libcxx/cmake/caches/Generic-ubsan.cmake b/libcxx/cmake/caches/Generic-ubsan.cmake
new file mode 100644
index 00000000000000..7ad891e4aed9e8
--- /dev/null
+++ b/libcxx/cmake/caches/Generic-ubsan.cmake
@@ -0,0 +1,2 @@
+set(LLVM_USE_SANITIZER "Undefined" CACHE STRING "")
+set(LIBCXX_ABI_UNSTABLE ON CACHE BOOL "")
diff --git a/libcxx/cmake/caches/README.md b/libcxx/cmake/caches/README.md
new file mode 100644
index 00000000000000..60837ee2940177
--- /dev/null
+++ b/libcxx/cmake/caches/README.md
@@ -0,0 +1,13 @@
+# libc++ / libc++abi configuration caches
+
+This directory contains CMake caches for the supported configurations of libc++.
+Some of the configurations are specific to a vendor, others are generic and not
+tied to any vendor.
+
+While we won't explicitly work to break configurations not listed here, any
+configuration not listed here is not explicitly supported. If you use or ship
+libc++ under a configuration not listed here, you should work with the libc++
+maintainers to make it into a supported configuration and add it here.
+
+Similarly, adding any new configuration that's not already covered must be
+discussed with the libc++ maintainers as it entails a maintenance burden.

From d31c9e5a46ee692daf2430b52626afcea1db18ab Mon Sep 17 00:00:00 2001
From: Mehdi Amini <joker.eph@gmail.com>
Date: Fri, 27 Mar 2020 23:58:06 +0000
Subject: [PATCH 12/25] Change filecheck default to dump input on failure

Having the input dumped on failure seems like a better
default: I debugged FileCheck tests for a while without knowing
about this option, which really helps to understand failures.

Remove `-dump-input-on-failure` and the environment variable
FILECHECK_DUMP_INPUT_ON_FAILURE which are now obsolete.

Differential Revision: https://reviews.llvm.org/D81422
---
 clang/test/CodeGenObjC/externally-retained.m  |  4 +-
 clang/test/Driver/rocm-device-libs.cl         | 34 ++++++-------
 compiler-rt/test/fuzzer/fork.test             |  2 +-
 .../llvm-prettyprinters/gdb/llvm-support.gdb  |  2 +-
 llvm/docs/CommandGuide/FileCheck.rst          |  9 +---
 .../AArch64/speculation-hardening-dagisel.ll  |  4 +-
 .../AArch64/speculation-hardening-loads.ll    |  2 +-
 .../CodeGen/AArch64/speculation-hardening.ll  | 12 ++---
 .../CodeGen/AArch64/speculation-hardening.mir |  2 +-
 llvm/test/FileCheck/comment/after-words.txt   |  2 +-
 .../test/FileCheck/comment/blank-comments.txt |  2 +-
 llvm/test/FileCheck/comment/suffixes.txt      |  4 +-
 .../FileCheck/comment/suppresses-checks.txt   |  6 +--
 .../comment/unused-comment-prefixes.txt       |  4 +-
 llvm/test/FileCheck/dump-input-enable.txt     | 48 +++----------------
 llvm/test/FileCheck/envvar-opts.txt           |  6 +--
 llvm/test/FileCheck/lit.local.cfg             |  4 +-
 llvm/test/FileCheck/match-full-lines.txt      |  4 +-
 llvm/test/FileCheck/verbose.txt               |  6 +--
 .../Transforms/InstCombine/fortify-folding.ll |  2 +-
 llvm/utils/FileCheck/FileCheck.cpp            | 11 +----
 llvm/utils/lit/lit/TestingConfig.py           |  5 +-
 llvm/utils/lit/tests/lit.cfg                  |  2 +-
 mlir/test/Analysis/test-callgraph.mlir        |  2 +-
 mlir/test/Analysis/test-dominance.mlir        |  2 +-
 mlir/test/Analysis/test-liveness.mlir         |  2 +-
 .../Conversion/GPUToNVVM/gpu-to-nvvm.mlir     |  2 +-
 .../Conversion/GPUToROCDL/gpu-to-rocdl.mlir   |  2 +-
 .../SCFToGPU/no_blocks_no_threads.mlir        |  4 +-
 .../Conversion/SCFToGPU/parallel_loop.mlir    |  2 +-
 .../ShapeToStandard/shape-to-standard.mlir    |  2 +-
 mlir/test/Dialect/GPU/outlining.mlir          |  2 +-
 mlir/test/Dialect/Linalg/fusion-tensor.mlir   |  2 +-
 mlir/test/Dialect/Linalg/fusion.mlir          |  2 +-
 .../Linalg/fusion_indexed_generic.mlir        |  2 +-
 mlir/test/Dialect/Linalg/parallel_loops.mlir  |  2 +-
 .../Dialect/Linalg/tensors-to-buffers.mlir    |  2 +-
 .../Dialect/Linalg/tile_conv_padding.mlir     |  4 +-
 mlir/test/Dialect/Linalg/tile_parallel.mlir   |  8 ++--
 mlir/test/Dialect/SCF/ops.mlir                |  6 +--
 .../Dialect/SCF/parallel-loop-fusion.mlir     |  2 +-
 .../SCF/parallel-loop-specialization.mlir     |  2 +-
 .../Dialect/SCF/parallel-loop-tiling.mlir     |  2 +-
 mlir/test/Dialect/Shape/ops.mlir              |  6 +--
 mlir/test/Dialect/Shape/shape-to-shape.mlir   |  2 +-
 mlir/test/Dialect/Standard/expand-atomic.mlir |  2 +-
 .../Vector/vector-contract-transforms.mlir    |  6 +--
 .../Vector/vector-flat-transforms.mlir        |  2 +-
 mlir/test/EDSC/builder-api-test.cpp           |  2 +-
 mlir/test/IR/print-op-local-scope.mlir        |  2 +-
 ...nt-preparation-allowed-memref-results.mlir |  2 +-
 .../buffer-placement-preparation.mlir         |  2 +-
 mlir/test/Transforms/buffer-placement.mlir    |  2 +-
 mlir/test/Transforms/canonicalize.mlir        |  2 +-
 mlir/test/Transforms/sccp-callgraph.mlir      |  4 +-
 mlir/test/mlir-tblgen/op-attribute.td         |  6 +--
 mlir/test/mlir-tblgen/op-decl.td              |  2 +-
 .../mlir-tblgen/op-derived-attribute.mlir     |  2 +-
 mlir/test/mlir-tblgen/op-format-spec.td       |  2 +-
 mlir/test/mlir-tblgen/op-interface.td         |  4 +-
 mlir/test/mlir-tblgen/pattern.mlir            |  2 +-
 mlir/test/mlir-tblgen/predicate.td            |  2 +-
 mlir/test/mlir-tblgen/return-types.mlir       |  2 +-
 63 files changed, 119 insertions(+), 168 deletions(-)

diff --git a/clang/test/CodeGenObjC/externally-retained.m b/clang/test/CodeGenObjC/externally-retained.m
index f68696879768fc..b842b8c4c68ce8 100644
--- a/clang/test/CodeGenObjC/externally-retained.m
+++ b/clang/test/CodeGenObjC/externally-retained.m
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -triple x86_64-apple-macosx10.13.0 -fobjc-arc -fblocks -Wno-objc-root-class -O0 %s -S -emit-llvm -o - | FileCheck %s --dump-input-on-failure
-// RUN: %clang_cc1 -triple x86_64-apple-macosx10.13.0 -fobjc-arc -fblocks -Wno-objc-root-class -O0 -xobjective-c++ -std=c++11 %s -S -emit-llvm -o - | FileCheck %s --check-prefix CHECKXX --dump-input-on-failure
+// RUN: %clang_cc1 -triple x86_64-apple-macosx10.13.0 -fobjc-arc -fblocks -Wno-objc-root-class -O0 %s -S -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-apple-macosx10.13.0 -fobjc-arc -fblocks -Wno-objc-root-class -O0 -xobjective-c++ -std=c++11 %s -S -emit-llvm -o - | FileCheck %s --check-prefix CHECKXX
 
 #define EXT_RET __attribute__((objc_externally_retained))
 
diff --git a/clang/test/Driver/rocm-device-libs.cl b/clang/test/Driver/rocm-device-libs.cl
index cdb4716bde9a83..7f45116d363065 100644
--- a/clang/test/Driver/rocm-device-libs.cl
+++ b/clang/test/Driver/rocm-device-libs.cl
@@ -8,7 +8,7 @@
 // RUN:   -x cl -mcpu=gfx900 \
 // RUN:   --rocm-path=%S/Inputs/rocm \
 // RUN:   %s \
-// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-DEFAULT,GFX900-DEFAULT,GFX900,WAVE64 %s
+// RUN: 2>&1 | FileCheck  --check-prefixes=COMMON,COMMON-DEFAULT,GFX900-DEFAULT,GFX900,WAVE64 %s
 
 
 
@@ -17,7 +17,7 @@
 // RUN:   -x cl -mcpu=gfx803 \
 // RUN:   --rocm-path=%S/Inputs/rocm \
 // RUN:   %s \
-// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-DEFAULT,GFX803-DEFAULT,GFX803,WAVE64 %s
+// RUN: 2>&1 | FileCheck  --check-prefixes=COMMON,COMMON-DEFAULT,GFX803-DEFAULT,GFX803,WAVE64 %s
 
 
 
@@ -26,7 +26,7 @@
 // RUN:   -x cl -mcpu=fiji \
 // RUN:   --rocm-path=%S/Inputs/rocm \
 // RUN:   %s \
-// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-DEFAULT,GFX803-DEFAULT,GFX803,WAVE64 %s
+// RUN: 2>&1 | FileCheck  --check-prefixes=COMMON,COMMON-DEFAULT,GFX803-DEFAULT,GFX803,WAVE64 %s
 
 
 
@@ -35,7 +35,7 @@
 // RUN:   -cl-denorms-are-zero \
 // RUN:   --rocm-path=%S/Inputs/rocm \
 // RUN:   %s \
-// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-DAZ,GFX900,WAVE64 %s
+// RUN: 2>&1 | FileCheck  --check-prefixes=COMMON,COMMON-DAZ,GFX900,WAVE64 %s
 
 
 // RUN: %clang -### -target amdgcn-amd-amdhsa \
@@ -43,7 +43,7 @@
 // RUN:   -cl-denorms-are-zero \
 // RUN:   --rocm-path=%S/Inputs/rocm \
 // RUN:   %s \
-// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-DAZ,GFX803,WAVE64 %s
+// RUN: 2>&1 | FileCheck  --check-prefixes=COMMON,COMMON-DAZ,GFX803,WAVE64 %s
 
 
 
@@ -52,7 +52,7 @@
 // RUN:   -cl-finite-math-only \
 // RUN:   --rocm-path=%S/Inputs/rocm \
 // RUN:   %s \
-// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-FINITE-ONLY,GFX803,WAVE64 %s
+// RUN: 2>&1 | FileCheck  --check-prefixes=COMMON,COMMON-FINITE-ONLY,GFX803,WAVE64 %s
 
 
 
@@ -61,7 +61,7 @@
 // RUN:   -cl-fp32-correctly-rounded-divide-sqrt \
 // RUN:   --rocm-path=%S/Inputs/rocm \
 // RUN:   %s \
-// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-CORRECT-SQRT,GFX803,WAVE64 %s
+// RUN: 2>&1 | FileCheck  --check-prefixes=COMMON,COMMON-CORRECT-SQRT,GFX803,WAVE64 %s
 
 
 
@@ -70,7 +70,7 @@
 // RUN:   -cl-fast-relaxed-math \
 // RUN:   --rocm-path=%S/Inputs/rocm \
 // RUN:   %s \
-// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-FAST-RELAXED,GFX803,WAVE64 %s
+// RUN: 2>&1 | FileCheck  --check-prefixes=COMMON,COMMON-FAST-RELAXED,GFX803,WAVE64 %s
 
 
 
@@ -79,45 +79,45 @@
 // RUN:   -cl-unsafe-math-optimizations \
 // RUN:   --rocm-path=%S/Inputs/rocm \
 // RUN:   %s \
-// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-UNSAFE,GFX803,WAVE64 %s
+// RUN: 2>&1 | FileCheck  --check-prefixes=COMMON,COMMON-UNSAFE,GFX803,WAVE64 %s
 
 // RUN: %clang -### -target amdgcn-amd-amdhsa    \
 // RUN:   -x cl -mcpu=gfx1010                    \
 // RUN:   --rocm-path=%S/Inputs/rocm \
 // RUN:   %s \
-// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMMON,GFX1010,WAVE32 %s
+// RUN: 2>&1 | FileCheck  --check-prefixes=COMMMON,GFX1010,WAVE32 %s
 
 // RUN: %clang -### -target amdgcn-amd-amdhsa    \
 // RUN:   -x cl -mcpu=gfx1011                    \
 // RUN:   --rocm-path=%S/Inputs/rocm \
 // RUN:   %s \
-// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMMON,GFX1011,WAVE32 %s
+// RUN: 2>&1 | FileCheck  --check-prefixes=COMMMON,GFX1011,WAVE32 %s
 
 // RUN: %clang -### -target amdgcn-amd-amdhsa    \
 // RUN:   -x cl -mcpu=gfx1012                    \
 // RUN:   --rocm-path=%S/Inputs/rocm \
 // RUN:   %s \
-// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMMON,GFX1012,WAVE32 %s
+// RUN: 2>&1 | FileCheck  --check-prefixes=COMMMON,GFX1012,WAVE32 %s
 
 
 // RUN: %clang -### -target amdgcn-amd-amdhsa    \
 // RUN:   -x cl -mcpu=gfx1010 -mwavefrontsize64  \
 // RUN:   --rocm-path=%S/Inputs/rocm \
 // RUN:   %s \
-// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMMON,GFX1010,WAVE64 %s
+// RUN: 2>&1 | FileCheck  --check-prefixes=COMMMON,GFX1010,WAVE64 %s
 
 // RUN: %clang -### -target amdgcn-amd-amdhsa    \
 // RUN:   -x cl -mcpu=gfx1010 -mwavefrontsize64 -mno-wavefrontsize64  \
 // RUN:   --rocm-path=%S/Inputs/rocm \
 // RUN:   %s \
-// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMMON,GFX1010,WAVE32 %s
+// RUN: 2>&1 | FileCheck  --check-prefixes=COMMMON,GFX1010,WAVE32 %s
 
 // Ignore -mno-wavefrontsize64 without wave32 support
 // RUN: %clang -### -target amdgcn-amd-amdhsa       \
 // RUN:   -x cl -mcpu=gfx803  -mno-wavefrontsize64  \
 // RUN:   --rocm-path=%S/Inputs/rocm    \
 // RUN:   %s \
-// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMMON,GFX803,WAVE64 %s
+// RUN: 2>&1 | FileCheck  --check-prefixes=COMMMON,GFX803,WAVE64 %s
 
 
 
@@ -126,13 +126,13 @@
 // RUN:   -x cl -mcpu=gfx900 \
 // RUN:   --hip-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode \
 // RUN:   %S/opencl.cl \
-// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-DEFAULT,GFX900-DEFAULT,GFX900,WAVE64 %s
+// RUN: 2>&1 | FileCheck  --check-prefixes=COMMON,COMMON-DEFAULT,GFX900-DEFAULT,GFX900,WAVE64 %s
 
 // Test environment variable HIP_DEVICE_LIB_PATH
 // RUN: env HIP_DEVICE_LIB_PATH=%S/Inputs/rocm/amdgcn/bitcode %clang -### -target amdgcn-amd-amdhsa \
 // RUN:   -x cl -mcpu=gfx900 \
 // RUN:   %S/opencl.cl \
-// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-DEFAULT,GFX900-DEFAULT,GFX900,WAVE64 %s
+// RUN: 2>&1 | FileCheck  --check-prefixes=COMMON,COMMON-DEFAULT,GFX900-DEFAULT,GFX900,WAVE64 %s
 
 
 
diff --git a/compiler-rt/test/fuzzer/fork.test b/compiler-rt/test/fuzzer/fork.test
index e0f348b2bff1b4..6e76fe7f2b06a5 100644
--- a/compiler-rt/test/fuzzer/fork.test
+++ b/compiler-rt/test/fuzzer/fork.test
@@ -18,4 +18,4 @@ RUN: not %run %t-ShallowOOMDeepCrash -fork=1 -rss_limit_mb=128 2>&1 | FileCheck
 
 MAX_TOTAL_TIME: INFO: fuzzed for {{.*}} seconds, wrapping up soon
 MAX_TOTAL_TIME: INFO: exiting: {{.*}} time:
-RUN: not %run %t-ShallowOOMDeepCrash -fork=1 -rss_limit_mb=128 -ignore_crashes=1 -max_total_time=10 2>&1 | FileCheck %s --dump-input-on-failure --check-prefix=MAX_TOTAL_TIME
+RUN: not %run %t-ShallowOOMDeepCrash -fork=1 -rss_limit_mb=128 -ignore_crashes=1 -max_total_time=10 2>&1 | FileCheck %s  --check-prefix=MAX_TOTAL_TIME
diff --git a/debuginfo-tests/llvm-prettyprinters/gdb/llvm-support.gdb b/debuginfo-tests/llvm-prettyprinters/gdb/llvm-support.gdb
index cdd3388d6d2e23..6ae1c7016b680e 100644
--- a/debuginfo-tests/llvm-prettyprinters/gdb/llvm-support.gdb
+++ b/debuginfo-tests/llvm-prettyprinters/gdb/llvm-support.gdb
@@ -1,4 +1,4 @@
-# RUN: gdb -q -batch -n -iex 'source %llvm_src_root/utils/gdb-scripts/prettyprinters.py' -x %s %llvm_tools_dir/check-gdb-llvm-support | FileCheck %s --dump-input-on-failure
+# RUN: gdb -q -batch -n -iex 'source %llvm_src_root/utils/gdb-scripts/prettyprinters.py' -x %s %llvm_tools_dir/check-gdb-llvm-support | FileCheck %s
 # REQUIRES: debug-info
 
 break main
diff --git a/llvm/docs/CommandGuide/FileCheck.rst b/llvm/docs/CommandGuide/FileCheck.rst
index 0512133f2e995c..b2e3dfcf01ad64 100644
--- a/llvm/docs/CommandGuide/FileCheck.rst
+++ b/llvm/docs/CommandGuide/FileCheck.rst
@@ -106,13 +106,8 @@ and from the command line.
 .. option:: --dump-input <mode>
 
   Dump input to stderr, adding annotations representing currently enabled
-  diagnostics.  Do this either 'always', on 'fail', or 'never'.  Specify 'help'
-  to explain the dump format and quit.
-
-.. option:: --dump-input-on-failure
-
-  When the check fails, dump all of the original input.  This option is
-  deprecated in favor of `--dump-input=fail`.
+  diagnostics.  Do this either 'always', on 'fail' (default), or 'never'.
+  Specify 'help' to explain the dump format and quit.
 
 .. option:: --enable-var-scope
 
diff --git a/llvm/test/CodeGen/AArch64/speculation-hardening-dagisel.ll b/llvm/test/CodeGen/AArch64/speculation-hardening-dagisel.ll
index 72f3170fb09c89..0f16235d7c69e0 100644
--- a/llvm/test/CodeGen/AArch64/speculation-hardening-dagisel.ll
+++ b/llvm/test/CodeGen/AArch64/speculation-hardening-dagisel.ll
@@ -1,5 +1,5 @@
-; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,SLH --dump-input-on-failure
-; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,NOSLH --dump-input-on-failure
+; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,SLH
+; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,NOSLH
 
 declare i64 @g(i64, i64) local_unnamed_addr
 define i64 @f_using_reserved_reg_x16(i64 %a, i64 %b) local_unnamed_addr SLHATTR {
diff --git a/llvm/test/CodeGen/AArch64/speculation-hardening-loads.ll b/llvm/test/CodeGen/AArch64/speculation-hardening-loads.ll
index c5aae051430074..58690052183545 100644
--- a/llvm/test/CodeGen/AArch64/speculation-hardening-loads.ll
+++ b/llvm/test/CodeGen/AArch64/speculation-hardening-loads.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --dump-input-on-failure
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s
 
 define i128 @ldp_single_csdb(i128* %p) speculative_load_hardening {
 entry:
diff --git a/llvm/test/CodeGen/AArch64/speculation-hardening.ll b/llvm/test/CodeGen/AArch64/speculation-hardening.ll
index 23b87563013fd9..d298efa94dc596 100644
--- a/llvm/test/CodeGen/AArch64/speculation-hardening.ll
+++ b/llvm/test/CodeGen/AArch64/speculation-hardening.ll
@@ -1,9 +1,9 @@
-; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,SLH --dump-input-on-failure
-; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,NOSLH --dump-input-on-failure
-; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel | FileCheck %s --check-prefixes=CHECK,SLH --dump-input-on-failure
-; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel | FileCheck %s --check-prefixes=CHECK,NOSLH --dump-input-on-failure
-; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -fast-isel | FileCheck %s --check-prefixes=CHECK,SLH --dump-input-on-failure
-; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -fast-isel | FileCheck %s --check-prefixes=CHECK,NOSLH --dump-input-on-failure
+; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,SLH
+; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,NOSLH
+; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel | FileCheck %s --check-prefixes=CHECK,SLH
+; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel | FileCheck %s --check-prefixes=CHECK,NOSLH
+; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -fast-isel | FileCheck %s --check-prefixes=CHECK,SLH
+; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -fast-isel | FileCheck %s --check-prefixes=CHECK,NOSLH
 
 define i32 @f(i8* nocapture readonly %p, i32 %i, i32 %N) local_unnamed_addr SLHATTR {
 ; CHECK-LABEL: f
diff --git a/llvm/test/CodeGen/AArch64/speculation-hardening.mir b/llvm/test/CodeGen/AArch64/speculation-hardening.mir
index 5991c4df0407f7..0073bedf8ffad2 100644
--- a/llvm/test/CodeGen/AArch64/speculation-hardening.mir
+++ b/llvm/test/CodeGen/AArch64/speculation-hardening.mir
@@ -1,6 +1,6 @@
 # RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu \
 # RUN:     -start-before aarch64-speculation-hardening -o - %s \
-# RUN:   | FileCheck %s --dump-input-on-failure
+# RUN:   | FileCheck %s
 
 # Check that the speculation hardening pass generates code as expected for
 # basic blocks ending with a variety of branch patterns:
diff --git a/llvm/test/FileCheck/comment/after-words.txt b/llvm/test/FileCheck/comment/after-words.txt
index 46eeb657f0157d..3650f959be3fa8 100644
--- a/llvm/test/FileCheck/comment/after-words.txt
+++ b/llvm/test/FileCheck/comment/after-words.txt
@@ -8,7 +8,7 @@ RUN: echo 'FOO-COM: CHECK: foo' >  %t.chk
 RUN: echo 'RUN_COM: CHECK: bar' >> %t.chk
 RUN: echo 'RUN3COM: CHECK: foo' >> %t.chk
 RUN: echo ' COMRUN: CHECK: bar' >> %t.chk
-RUN: %ProtectFileCheckOutput FileCheck -vv %t.chk < %t.in 2>&1 | FileCheck %s
+RUN: %ProtectFileCheckOutput FileCheck -dump-input=never -vv %t.chk < %t.in 2>&1 | FileCheck %s
 
 CHECK: .chk:1:17: remark: CHECK: expected string found in input
 CHECK: .chk:2:17: remark: CHECK: expected string found in input
diff --git a/llvm/test/FileCheck/comment/blank-comments.txt b/llvm/test/FileCheck/comment/blank-comments.txt
index b035ddd750d09e..1bad6d2daada51 100644
--- a/llvm/test/FileCheck/comment/blank-comments.txt
+++ b/llvm/test/FileCheck/comment/blank-comments.txt
@@ -4,6 +4,6 @@ RUN: echo 'foo'        >  %t.in
 RUN: echo 'COM:'       >  %t.chk
 RUN: echo 'CHECK: foo' >> %t.chk
 RUN: echo ' COM: '     >> %t.chk
-RUN: %ProtectFileCheckOutput FileCheck -vv %t.chk < %t.in 2>&1 | FileCheck %s
+RUN: %ProtectFileCheckOutput FileCheck -dump-input=never -vv %t.chk < %t.in 2>&1 | FileCheck %s
 
 CHECK: .chk:2:8: remark: CHECK: expected string found in input
diff --git a/llvm/test/FileCheck/comment/suffixes.txt b/llvm/test/FileCheck/comment/suffixes.txt
index 47805b46d0c94e..85b05fb5778cf3 100644
--- a/llvm/test/FileCheck/comment/suffixes.txt
+++ b/llvm/test/FileCheck/comment/suffixes.txt
@@ -6,7 +6,7 @@ RUN: echo bar                    >> %t.in
 RUN: echo 'COM-NEXT: CHECK: foo' >  %t.chk
 RUN: echo 'RUN-NOT: CHECK: bar'  >> %t.chk
 
-RUN: %ProtectFileCheckOutput FileCheck -vv %t.chk < %t.in 2>&1 | \
+RUN: %ProtectFileCheckOutput FileCheck -dump-input=never -vv %t.chk < %t.in 2>&1 | \
 RUN:   FileCheck -check-prefix=CHECK1 %s
 
 CHECK1: .chk:1:18: remark: CHECK: expected string found in input
@@ -15,7 +15,7 @@ CHECK1: .chk:2:17: remark: CHECK: expected string found in input
 # But we can define them as comment prefixes.
 
 RUN: %ProtectFileCheckOutput \
-RUN: FileCheck -vv -comment-prefixes=COM,RUN,RUN-NOT %t.chk < %t.in 2>&1 | \
+RUN: FileCheck -dump-input=never -vv -comment-prefixes=COM,RUN,RUN-NOT %t.chk < %t.in 2>&1 | \
 RUN:   FileCheck -check-prefix=CHECK2 %s
 
 CHECK2: .chk:1:18: remark: CHECK: expected string found in input
diff --git a/llvm/test/FileCheck/comment/suppresses-checks.txt b/llvm/test/FileCheck/comment/suppresses-checks.txt
index 98f01811f53f4d..a58a040b5d39a7 100644
--- a/llvm/test/FileCheck/comment/suppresses-checks.txt
+++ b/llvm/test/FileCheck/comment/suppresses-checks.txt
@@ -7,7 +7,7 @@ RUN: echo 'foo'                    >  %t-1.in
 RUN: echo 'COM: CHECK: bar'        >  %t-1.chk
 RUN: echo 'CHECK: foo'             >> %t-1.chk
 RUN: echo 'RUN: echo "CHECK: baz"' >> %t-1.chk
-RUN: %ProtectFileCheckOutput FileCheck -vv %t-1.chk < %t-1.in 2>&1 | \
+RUN: %ProtectFileCheckOutput FileCheck -dump-input=never -vv %t-1.chk < %t-1.in 2>&1 | \
 RUN:   FileCheck -DCHECK_LINE=2 %s
 
 # Check the case of one user-specified comment prefix.
@@ -16,7 +16,7 @@ RUN: echo 'foo'                                      >  %t-2.in
 RUN: echo 'CHECK: foo'                               >  %t-2.chk
 RUN: echo 'letters then space MY-PREFIX: CHECK: bar' >> %t-2.chk
 RUN: %ProtectFileCheckOutput \
-RUN: FileCheck -vv %t-2.chk -comment-prefixes=MY-PREFIX < %t-2.in 2>&1 | \
+RUN: FileCheck -dump-input=never -vv %t-2.chk -comment-prefixes=MY-PREFIX < %t-2.in 2>&1 | \
 RUN:   FileCheck -DCHECK_LINE=1 %s
 
 # Check the case of multiple user-specified comment prefixes.
@@ -26,7 +26,7 @@ RUN: echo 'CHECK: foo'        >> %t-3.chk
 RUN: echo 'Foo_1: CHECK: Foo' >> %t-3.chk
 RUN: echo 'Baz_3: CHECK: Baz' >> %t-3.chk
 RUN: %ProtectFileCheckOutput \
-RUN: FileCheck -vv %t-3.chk -comment-prefixes=Foo_1,Bar_2 \
+RUN: FileCheck -dump-input=never -vv %t-3.chk -comment-prefixes=Foo_1,Bar_2 \
 RUN:           -comment-prefixes=Baz_3 < %t-3.in 2>&1 | \
 RUN:   FileCheck -DCHECK_LINE=2 %s
 
diff --git a/llvm/test/FileCheck/comment/unused-comment-prefixes.txt b/llvm/test/FileCheck/comment/unused-comment-prefixes.txt
index 29212ecb6aadce..5dadc8f3569d5c 100644
--- a/llvm/test/FileCheck/comment/unused-comment-prefixes.txt
+++ b/llvm/test/FileCheck/comment/unused-comment-prefixes.txt
@@ -5,12 +5,12 @@ RUN: echo 'CHECK: foo' > %t.chk
 
 # Check the case of default comment prefixes.
 RUN: %ProtectFileCheckOutput \
-RUN: FileCheck -vv %t.chk < %t.in 2>&1 | FileCheck %s
+RUN: FileCheck -dump-input=never -vv %t.chk < %t.in 2>&1 | FileCheck %s
 
 # Specifying non-default comment prefixes doesn't mean you have to use them.
 # For example, they might be applied to an entire test suite via
 # FILECHECK_OPTS or via a wrapper command or substitution.
 RUN: %ProtectFileCheckOutput \
-RUN: FileCheck -vv -comment-prefixes=FOO %t.chk < %t.in 2>&1 | FileCheck %s
+RUN: FileCheck -dump-input=never -vv -comment-prefixes=FOO %t.chk < %t.in 2>&1 | FileCheck %s
 
 CHECK: .chk:1:8: remark: CHECK: expected string found in input
diff --git a/llvm/test/FileCheck/dump-input-enable.txt b/llvm/test/FileCheck/dump-input-enable.txt
index cf47f03dfa835b..48a6eef417154e 100644
--- a/llvm/test/FileCheck/dump-input-enable.txt
+++ b/llvm/test/FileCheck/dump-input-enable.txt
@@ -74,20 +74,22 @@ BADVAL: {{F|f}}ile{{C|c}}heck{{.*}}: for the --dump-input option: Cannot find op
 ; RUN: | FileCheck %s -match-full-lines -check-prefixes=TRACE,ERR,NODUMP
 
 ;--------------------------------------------------
-; Check no -dump-input, which defaults to never.
+; Check no -dump-input, which defaults to fail.
 ;--------------------------------------------------
 
-; FileCheck success, -v => no dump, trace.
+; FileCheck success, -v => no dump, no trace.
 ; RUN: %ProtectFileCheckOutput \
 ; RUN: FileCheck -input-file %t.good %t.check -check-prefix=CHECK \
 ; RUN:           -match-full-lines -v 2>&1 \
-; RUN: | FileCheck %s -match-full-lines -check-prefixes=TRACE,NODUMP
+; RUN: | FileCheck %s -match-full-lines -allow-empty \
+; RUN:             -check-prefixes=NOTRACE,NODUMP
 
-; FileCheck fail, -v => no dump, trace.
+; FileCheck fail, -v => dump, no trace.
 ; RUN: %ProtectFileCheckOutput \
 ; RUN: not FileCheck -input-file %t.err %t.check -check-prefix=CHECK \
 ; RUN:               -match-full-lines -v 2>&1 \
-; RUN: | FileCheck %s -match-full-lines -check-prefixes=TRACE,ERR,NODUMP
+; RUN: | FileCheck %s -match-full-lines \
+; RUN:                -check-prefixes=NOTRACE,ERR,DUMP-ERR,DUMP-ERR-V
 
 ;--------------------------------------------------
 ; Check -dump-input=fail.
@@ -122,42 +124,6 @@ BADVAL: {{F|f}}ile{{C|c}}heck{{.*}}: for the --dump-input option: Cannot find op
 ; RUN: | FileCheck %s -match-full-lines \
 ; RUN:                -check-prefixes=NOTRACE,ERR,DUMP-ERR,DUMP-ERR-V
 
-;--------------------------------------------------
-; Check -dump-input-on-failure.
-;--------------------------------------------------
-
-; Command-line option.
-
-; FileCheck success, -v => no dump, no trace.
-; RUN: %ProtectFileCheckOutput \
-; RUN: FileCheck -input-file %t.good %t.check -check-prefix=CHECK \
-; RUN:           -match-full-lines -dump-input-on-failure -v 2>&1 \
-; RUN: | FileCheck %s -match-full-lines -allow-empty \
-; RUN:             -check-prefixes=NOTRACE,NODUMP
-
-; FileCheck fail, -v => dump, no trace.
-; RUN: %ProtectFileCheckOutput \
-; RUN: not FileCheck -input-file %t.err %t.check -check-prefix=CHECK \
-; RUN:               -match-full-lines -dump-input-on-failure -v 2>&1 \
-; RUN: | FileCheck %s -match-full-lines \
-; RUN:                -check-prefixes=NOTRACE,ERR,DUMP-ERR,DUMP-ERR-V
-
-; FILECHECK_DUMP_INPUT_ON_FAILURE=1.
-
-; FileCheck success, -v => no dump, no trace.
-; RUN: %ProtectFileCheckOutput FILECHECK_DUMP_INPUT_ON_FAILURE=1 \
-; RUN: FileCheck -input-file %t.good %t.check -check-prefix=CHECK \
-; RUN:           -match-full-lines -v 2>&1 \
-; RUN: | FileCheck %s -match-full-lines -allow-empty \
-; RUN:                -check-prefixes=NOTRACE,NODUMP
-
-; FileCheck fail, -v => dump, no trace.
-; RUN: %ProtectFileCheckOutput FILECHECK_DUMP_INPUT_ON_FAILURE=1 \
-; RUN: not FileCheck -input-file %t.err %t.check -check-prefix=CHECK \
-; RUN:               -match-full-lines -v 2>&1 \
-; RUN: | FileCheck %s -match-full-lines \
-; RUN:                -check-prefixes=NOTRACE,ERR,DUMP-ERR,DUMP-ERR-V
-
 ;--------------------------------------------------
 ; Check -dump-input=always.
 ;--------------------------------------------------
diff --git a/llvm/test/FileCheck/envvar-opts.txt b/llvm/test/FileCheck/envvar-opts.txt
index c1a9b2e1b243e6..da2b9f919a0d89 100644
--- a/llvm/test/FileCheck/envvar-opts.txt
+++ b/llvm/test/FileCheck/envvar-opts.txt
@@ -4,15 +4,15 @@
 ; CHECK: bar
 
 ; RUN: %ProtectFileCheckOutput \
-; RUN: not FileCheck %s -input-file %t.in 2>&1 \
+; RUN: not FileCheck %s -dump-input=never -input-file %t.in 2>&1 \
 ; RUN: | FileCheck -check-prefix QUIET %s
 
 ; RUN: %ProtectFileCheckOutput FILECHECK_OPTS= \
-; RUN: not FileCheck %s -input-file %t.in 2>&1 \
+; RUN: not FileCheck %s -dump-input=never -input-file %t.in 2>&1 \
 ; RUN: | FileCheck -check-prefix QUIET %s
 
 ; RUN: %ProtectFileCheckOutput FILECHECK_OPTS=-v \
-; RUN: not FileCheck %s -input-file %t.in 2>&1 \
+; RUN: not FileCheck %s -dump-input=never -input-file %t.in 2>&1 \
 ; RUN: | FileCheck -check-prefix VERB %s
 
 ; QUIET-NOT: remark: {{CHECK}}: expected string found in input
diff --git a/llvm/test/FileCheck/lit.local.cfg b/llvm/test/FileCheck/lit.local.cfg
index 65aba149e22d7c..9164f683fc1be3 100644
--- a/llvm/test/FileCheck/lit.local.cfg
+++ b/llvm/test/FileCheck/lit.local.cfg
@@ -39,7 +39,7 @@ config.test_format = lit.formats.ShTest(execute_external=False)
 #   ; FILECHECK_OPTS beforehand.
 #   ;
 #   ; RUN: %ProtectFileCheckOutput FILECHECK_OPTS=-v \
-#   ; RUN: FileCheck -input-file %s %s 2>&1 \
+#   ; RUN: FileCheck -dump-input=never -input-file %s %s 2>&1 \
 #   ; RUN: | FileCheck -check-prefix TRACE %s
 #   ;
 #   ; CHECK: {{[0-9]+\.0}}
@@ -53,4 +53,4 @@ config.test_format = lit.formats.ShTest(execute_external=False)
 # status (e.g., FILECHECK_OPTS=-strict-whitespace), he shouldn't be surprised
 # that test results throughout all test suites are affected.
 config.substitutions.append(('%ProtectFileCheckOutput',
-    'env -u FILECHECK_OPTS -u FILECHECK_DUMP_INPUT_ON_FAILURE'))
+    'env -u FILECHECK_OPTS'))
diff --git a/llvm/test/FileCheck/match-full-lines.txt b/llvm/test/FileCheck/match-full-lines.txt
index 114f628d8bc927..d69ebbc4c5a9b9 100644
--- a/llvm/test/FileCheck/match-full-lines.txt
+++ b/llvm/test/FileCheck/match-full-lines.txt
@@ -1,8 +1,8 @@
 // RUN: %ProtectFileCheckOutput \
-// RUN: not FileCheck -match-full-lines -input-file %s %s  2>&1 \
+// RUN: not FileCheck -match-full-lines -dump-input=never -input-file %s %s  2>&1 \
 // RUN:   | FileCheck --check-prefix=ERROR --implicit-check-not=error: %s
 // RUN: %ProtectFileCheckOutput \
-// RUN: not FileCheck -match-full-lines -strict-whitespace -input-file %s %s \
+// RUN: not FileCheck -match-full-lines -strict-whitespace -dump-input=never -input-file %s %s \
 // RUN: 2>&1 | FileCheck --check-prefix=ERROR-STRICT --check-prefix=ERROR \
 // RUN:                  --implicit-check-not=error: %s
 
diff --git a/llvm/test/FileCheck/verbose.txt b/llvm/test/FileCheck/verbose.txt
index 66c4b1efbe06b5..f852702a9b1f8b 100644
--- a/llvm/test/FileCheck/verbose.txt
+++ b/llvm/test/FileCheck/verbose.txt
@@ -1,8 +1,8 @@
-; RUN: %ProtectFileCheckOutput FileCheck -input-file %s %s 2>&1 \
+; RUN: %ProtectFileCheckOutput FileCheck -dump-input=never -input-file %s %s 2>&1 \
 ; RUN: | FileCheck -check-prefix QUIET --allow-empty %s
-; RUN: %ProtectFileCheckOutput FileCheck -v -input-file %s %s 2>&1 \
+; RUN: %ProtectFileCheckOutput FileCheck -dump-input=never -v -input-file %s %s 2>&1 \
 ; RUN: | FileCheck --strict-whitespace -check-prefix V %s
-; RUN: %ProtectFileCheckOutput FileCheck -vv -input-file %s %s 2>&1 \
+; RUN: %ProtectFileCheckOutput FileCheck -dump-input=never -vv -input-file %s %s 2>&1 \
 ; RUN: | FileCheck --strict-whitespace -check-prefixes V,VV %s
 
 foo
diff --git a/llvm/test/Transforms/InstCombine/fortify-folding.ll b/llvm/test/Transforms/InstCombine/fortify-folding.ll
index b2171a44f57ef8..2602640595e65e 100644
--- a/llvm/test/Transforms/InstCombine/fortify-folding.ll
+++ b/llvm/test/Transforms/InstCombine/fortify-folding.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -instcombine -S | FileCheck %s --dump-input-on-failure
+; RUN: opt < %s -instcombine -S | FileCheck %s
 
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 
diff --git a/llvm/utils/FileCheck/FileCheck.cpp b/llvm/utils/FileCheck/FileCheck.cpp
index 3ee7c5a66a908f..8e41365de81a84 100644
--- a/llvm/utils/FileCheck/FileCheck.cpp
+++ b/llvm/utils/FileCheck/FileCheck.cpp
@@ -106,15 +106,6 @@ static cl::opt<bool> VerboseVerbose(
     cl::desc("Print information helpful in diagnosing internal FileCheck\n"
              "issues, or add it to the input dump if enabled.  Implies\n"
              "-v.\n"));
-static const char * DumpInputEnv = "FILECHECK_DUMP_INPUT_ON_FAILURE";
-
-static cl::opt<bool> DumpInputOnFailure(
-    "dump-input-on-failure",
-    cl::init(std::getenv(DumpInputEnv) && *std::getenv(DumpInputEnv)),
-    cl::desc("Dump original input to stderr before failing.\n"
-             "The value can be also controlled using\n"
-             "FILECHECK_DUMP_INPUT_ON_FAILURE environment variable.\n"
-             "This option is deprecated in favor of -dump-input=fail.\n"));
 
 // The order of DumpInputValue members affects their precedence, as documented
 // for -dump-input below.
@@ -678,7 +669,7 @@ int main(int argc, char **argv) {
                         SMLoc());
 
   if (DumpInput == DumpInputDefault)
-    DumpInput = DumpInputOnFailure ? DumpInputFail : DumpInputNever;
+    DumpInput = DumpInputFail;
 
   std::vector<FileCheckDiag> Diags;
   int ExitCode = FC.checkInput(SM, InputFileText,
diff --git a/llvm/utils/lit/lit/TestingConfig.py b/llvm/utils/lit/lit/TestingConfig.py
index dd2d3f52f89cb1..cfc0dab86e1105 100644
--- a/llvm/utils/lit/lit/TestingConfig.py
+++ b/llvm/utils/lit/lit/TestingConfig.py
@@ -26,9 +26,8 @@ def fromdefaults(litConfig):
                      'LSAN_OPTIONS', 'ADB', 'ANDROID_SERIAL',
                      'SANITIZER_IGNORE_CVE_2016_2143', 'TMPDIR', 'TMP', 'TEMP',
                      'TEMPDIR', 'AVRLIT_BOARD', 'AVRLIT_PORT',
-                     'FILECHECK_DUMP_INPUT_ON_FAILURE', 'FILECHECK_OPTS',
-                     'VCINSTALLDIR', 'VCToolsinstallDir', 'VSINSTALLDIR',
-                     'WindowsSdkDir', 'WindowsSDKLibVersion']
+                     'FILECHECK_OPTS', 'VCINSTALLDIR', 'VCToolsinstallDir',
+                     'VSINSTALLDIR', 'WindowsSdkDir', 'WindowsSDKLibVersion']
 
         if sys.platform == 'win32':
             pass_vars.append('INCLUDE')
diff --git a/llvm/utils/lit/tests/lit.cfg b/llvm/utils/lit/tests/lit.cfg
index ba9cb4da9cfd1f..f5686873ec891d 100644
--- a/llvm/utils/lit/tests/lit.cfg
+++ b/llvm/utils/lit/tests/lit.cfg
@@ -61,7 +61,7 @@ config.substitutions.append(('%{inputs}', os.path.join(
     config.test_source_root, 'Inputs')))
 config.substitutions.append(('%{lit}',
     "{env} %{{python}} {lit}".format(
-        env="env -u FILECHECK_OPTS -u FILECHECK_DUMP_INPUT_ON_FAILURE",
+        env="env -u FILECHECK_OPTS",
         lit=os.path.join(lit_path, 'lit.py'))))
 config.substitutions.append(('%{python}', '"%s"' % (sys.executable)))
 
diff --git a/mlir/test/Analysis/test-callgraph.mlir b/mlir/test/Analysis/test-callgraph.mlir
index 8c295ff248e554..bb5ba60742006c 100644
--- a/mlir/test/Analysis/test-callgraph.mlir
+++ b/mlir/test/Analysis/test-callgraph.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -test-print-callgraph -split-input-file 2>&1 | FileCheck %s --dump-input-on-failure
+// RUN: mlir-opt %s -test-print-callgraph -split-input-file 2>&1 | FileCheck %s
 
 // CHECK-LABEL: Testing : "simple"
 module attributes {test.name = "simple"} {
diff --git a/mlir/test/Analysis/test-dominance.mlir b/mlir/test/Analysis/test-dominance.mlir
index 6366a49a62e3ef..9430038a538f96 100644
--- a/mlir/test/Analysis/test-dominance.mlir
+++ b/mlir/test/Analysis/test-dominance.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -test-print-dominance -split-input-file 2>&1 | FileCheck %s --dump-input-on-failure
+// RUN: mlir-opt %s -test-print-dominance -split-input-file 2>&1 | FileCheck %s
 
 // CHECK-LABEL: Testing : func_condBranch
 func @func_condBranch(%cond : i1) {
diff --git a/mlir/test/Analysis/test-liveness.mlir b/mlir/test/Analysis/test-liveness.mlir
index 9e1329f3609b23..3beb2186afb55f 100644
--- a/mlir/test/Analysis/test-liveness.mlir
+++ b/mlir/test/Analysis/test-liveness.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -test-print-liveness -split-input-file 2>&1 | FileCheck %s --dump-input-on-failure
+// RUN: mlir-opt %s -test-print-liveness -split-input-file 2>&1 | FileCheck %s
 
 // CHECK-LABEL: Testing : func_empty
 func @func_empty() {
diff --git a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
index 925615c0674e7d..20d166bab05d1a 100644
--- a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
+++ b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-gpu-to-nvvm -split-input-file | FileCheck %s --dump-input-on-failure
+// RUN: mlir-opt %s -convert-gpu-to-nvvm -split-input-file | FileCheck %s
 
 gpu.module @test_module {
   // CHECK-LABEL: func @gpu_index_ops()
diff --git a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir
index 4404cebec85319..61becff83c6cfd 100644
--- a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir
+++ b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-gpu-to-rocdl -split-input-file | FileCheck %s --dump-input-on-failure
+// RUN: mlir-opt %s -convert-gpu-to-rocdl -split-input-file | FileCheck %s
 
 gpu.module @test_module {
   // CHECK-LABEL: func @gpu_index_ops()
diff --git a/mlir/test/Conversion/SCFToGPU/no_blocks_no_threads.mlir b/mlir/test/Conversion/SCFToGPU/no_blocks_no_threads.mlir
index 44f170bc43bbc9..451fcbe173da0c 100644
--- a/mlir/test/Conversion/SCFToGPU/no_blocks_no_threads.mlir
+++ b/mlir/test/Conversion/SCFToGPU/no_blocks_no_threads.mlir
@@ -1,5 +1,5 @@
-// RUN: mlir-opt -convert-affine-for-to-gpu="gpu-block-dims=0 gpu-thread-dims=1" %s | FileCheck --check-prefix=CHECK-THREADS %s --dump-input-on-failure
-// RUN: mlir-opt -convert-affine-for-to-gpu="gpu-block-dims=1 gpu-thread-dims=0" %s | FileCheck --check-prefix=CHECK-BLOCKS %s --dump-input-on-failure
+// RUN: mlir-opt -convert-affine-for-to-gpu="gpu-block-dims=0 gpu-thread-dims=1" %s | FileCheck --check-prefix=CHECK-THREADS %s
+// RUN: mlir-opt -convert-affine-for-to-gpu="gpu-block-dims=1 gpu-thread-dims=0" %s | FileCheck --check-prefix=CHECK-BLOCKS %s
 
 // CHECK-THREADS-LABEL: @one_d_loop
 // CHECK-BLOCKS-LABEL: @one_d_loop
diff --git a/mlir/test/Conversion/SCFToGPU/parallel_loop.mlir b/mlir/test/Conversion/SCFToGPU/parallel_loop.mlir
index 52ed94cae567e7..de19331ce91192 100644
--- a/mlir/test/Conversion/SCFToGPU/parallel_loop.mlir
+++ b/mlir/test/Conversion/SCFToGPU/parallel_loop.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -convert-parallel-loops-to-gpu -split-input-file -verify-diagnostics %s | FileCheck %s -dump-input-on-failure
+// RUN: mlir-opt -convert-parallel-loops-to-gpu -split-input-file -verify-diagnostics %s | FileCheck %s
 
 // 2-d parallel loop mapped to block.y and block.x
 
diff --git a/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir b/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir
index fd1f980c536938..7c7098d76afa86 100644
--- a/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir
+++ b/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt --split-input-file --convert-shape-to-std --verify-diagnostics %s | FileCheck %s --dump-input-on-failure
+// RUN: mlir-opt --split-input-file --convert-shape-to-std --verify-diagnostics %s | FileCheck %s
 
 // Convert `size` to `index` type.
 // CHECK-LABEL: @size_id
diff --git a/mlir/test/Dialect/GPU/outlining.mlir b/mlir/test/Dialect/GPU/outlining.mlir
index d15f10fd75ecee..51394ab615258c 100644
--- a/mlir/test/Dialect/GPU/outlining.mlir
+++ b/mlir/test/Dialect/GPU/outlining.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -allow-unregistered-dialect -gpu-kernel-outlining -split-input-file -verify-diagnostics %s | FileCheck %s -dump-input-on-failure
+// RUN: mlir-opt -allow-unregistered-dialect -gpu-kernel-outlining -split-input-file -verify-diagnostics %s | FileCheck %s
 
 // CHECK: module attributes {gpu.container_module}
 
diff --git a/mlir/test/Dialect/Linalg/fusion-tensor.mlir b/mlir/test/Dialect/Linalg/fusion-tensor.mlir
index 6d6a409edbd2a8..5f1f90707a6ebf 100644
--- a/mlir/test/Dialect/Linalg/fusion-tensor.mlir
+++ b/mlir/test/Dialect/Linalg/fusion-tensor.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -linalg-fusion-for-tensor-ops -split-input-file | FileCheck %s --dump-input-on-failure
+// RUN: mlir-opt %s -linalg-fusion-for-tensor-ops -split-input-file | FileCheck %s
 
 // CHECK-DAG: [[MAP0:#[a-zA-Z0-9_]*]] = affine_map<(d0, d1) -> (d0, d1)>
 #map0 = affine_map<(d0, d1) -> (d0, d1)>
diff --git a/mlir/test/Dialect/Linalg/fusion.mlir b/mlir/test/Dialect/Linalg/fusion.mlir
index 2f472aa6aaf2d4..db47e8eea6165c 100644
--- a/mlir/test/Dialect/Linalg/fusion.mlir
+++ b/mlir/test/Dialect/Linalg/fusion.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -linalg-fusion -split-input-file | FileCheck %s --dump-input-on-failure
+// RUN: mlir-opt %s -linalg-fusion -split-input-file | FileCheck %s
 
 func @f1(%A: memref<?x?xf32, offset: 0, strides: [?, 1]>,
          %B: memref<?x?xf32, offset: 0, strides: [?, 1]>,
diff --git a/mlir/test/Dialect/Linalg/fusion_indexed_generic.mlir b/mlir/test/Dialect/Linalg/fusion_indexed_generic.mlir
index de16e4b50f33ac..c14db3bed1c4b6 100644
--- a/mlir/test/Dialect/Linalg/fusion_indexed_generic.mlir
+++ b/mlir/test/Dialect/Linalg/fusion_indexed_generic.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -linalg-fusion -split-input-file | FileCheck %s --dump-input-on-failure
+// RUN: mlir-opt %s -linalg-fusion -split-input-file | FileCheck %s
 
 #map = affine_map<(d0, d1)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2)>
 #id_2d = affine_map<(d0, d1) -> (d0, d1)>
diff --git a/mlir/test/Dialect/Linalg/parallel_loops.mlir b/mlir/test/Dialect/Linalg/parallel_loops.mlir
index 2174ddc3c269da..597990eac264eb 100644
--- a/mlir/test/Dialect/Linalg/parallel_loops.mlir
+++ b/mlir/test/Dialect/Linalg/parallel_loops.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-linalg-to-parallel-loops -split-input-file | FileCheck %s --dump-input-on-failure
+// RUN: mlir-opt %s -convert-linalg-to-parallel-loops -split-input-file | FileCheck %s
 
 #map0 = affine_map<(d0, d1) -> (d0, d1)>
 func @linalg_generic_sum(%lhs: memref<2x2xf32>,
diff --git a/mlir/test/Dialect/Linalg/tensors-to-buffers.mlir b/mlir/test/Dialect/Linalg/tensors-to-buffers.mlir
index ed82c93622dff5..a744d14af74a9e 100644
--- a/mlir/test/Dialect/Linalg/tensors-to-buffers.mlir
+++ b/mlir/test/Dialect/Linalg/tensors-to-buffers.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -convert-linalg-on-tensors-to-buffers -buffer-placement -split-input-file %s | FileCheck %s -dump-input-on-failure
+// RUN: mlir-opt -convert-linalg-on-tensors-to-buffers -buffer-placement -split-input-file %s | FileCheck %s
 
 #map0 = affine_map<(d0) -> (d0)>
 
diff --git a/mlir/test/Dialect/Linalg/tile_conv_padding.mlir b/mlir/test/Dialect/Linalg/tile_conv_padding.mlir
index 98cecc3e81e2e8..273f6491315973 100644
--- a/mlir/test/Dialect/Linalg/tile_conv_padding.mlir
+++ b/mlir/test/Dialect/Linalg/tile_conv_padding.mlir
@@ -1,5 +1,5 @@
-// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,3,0,0,4" | FileCheck %s -check-prefix=TILE-23004 --dump-input-on-failure
-// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2" | FileCheck %s -check-prefix=TILE-20000 --dump-input-on-failure
+// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,3,0,0,4" | FileCheck %s -check-prefix=TILE-23004
+// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2" | FileCheck %s -check-prefix=TILE-20000
 
 // TILE-23004-DAG: #[[strided4D:.*]] = affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3)>
 // TILE-20000-DAG: #[[strided4D:.*]] = affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3)>
diff --git a/mlir/test/Dialect/Linalg/tile_parallel.mlir b/mlir/test/Dialect/Linalg/tile_parallel.mlir
index 963051b7c7b308..18d9d2016b1d24 100644
--- a/mlir/test/Dialect/Linalg/tile_parallel.mlir
+++ b/mlir/test/Dialect/Linalg/tile_parallel.mlir
@@ -1,7 +1,7 @@
-// RUN: mlir-opt %s -linalg-tile-to-parallel-loops="linalg-tile-sizes=2" | FileCheck %s -check-prefix=TILE-2 --dump-input-on-failure
-// RUN: mlir-opt %s -linalg-tile-to-parallel-loops="linalg-tile-sizes=0,2" | FileCheck %s -check-prefix=TILE-02 --dump-input-on-failure
-// RUN: mlir-opt %s -linalg-tile-to-parallel-loops="linalg-tile-sizes=0,0,2" | FileCheck %s -check-prefix=TILE-002 --dump-input-on-failure
-// RUN: mlir-opt %s -linalg-tile-to-parallel-loops="linalg-tile-sizes=2,3,4" | FileCheck %s -check-prefix=TILE-234 --dump-input-on-failure
+// RUN: mlir-opt %s -linalg-tile-to-parallel-loops="linalg-tile-sizes=2" | FileCheck %s -check-prefix=TILE-2
+// RUN: mlir-opt %s -linalg-tile-to-parallel-loops="linalg-tile-sizes=0,2" | FileCheck %s -check-prefix=TILE-02
+// RUN: mlir-opt %s -linalg-tile-to-parallel-loops="linalg-tile-sizes=0,0,2" | FileCheck %s -check-prefix=TILE-002
+// RUN: mlir-opt %s -linalg-tile-to-parallel-loops="linalg-tile-sizes=2,3,4" | FileCheck %s -check-prefix=TILE-234
 
 #id_2d = affine_map<(i, j) -> (i, j)>
 #pointwise_2d_trait = {
diff --git a/mlir/test/Dialect/SCF/ops.mlir b/mlir/test/Dialect/SCF/ops.mlir
index c21451d8cf7fdf..1058983f5fb9a4 100644
--- a/mlir/test/Dialect/SCF/ops.mlir
+++ b/mlir/test/Dialect/SCF/ops.mlir
@@ -1,8 +1,8 @@
-// RUN: mlir-opt %s | FileCheck %s --dump-input-on-failure
+// RUN: mlir-opt %s | FileCheck %s
 // Verify the printed output can be parsed.
-// RUN: mlir-opt %s | mlir-opt | FileCheck %s --dump-input-on-failure
+// RUN: mlir-opt %s | mlir-opt | FileCheck %s
 // Verify the generic form can be parsed.
-// RUN: mlir-opt -mlir-print-op-generic %s | mlir-opt | FileCheck %s --dump-input-on-failure
+// RUN: mlir-opt -mlir-print-op-generic %s | mlir-opt | FileCheck %s
 
 func @std_for(%arg0 : index, %arg1 : index, %arg2 : index) {
   scf.for %i0 = %arg0 to %arg1 step %arg2 {
diff --git a/mlir/test/Dialect/SCF/parallel-loop-fusion.mlir b/mlir/test/Dialect/SCF/parallel-loop-fusion.mlir
index 6ed5ad36819e7c..8e6769961c10c6 100644
--- a/mlir/test/Dialect/SCF/parallel-loop-fusion.mlir
+++ b/mlir/test/Dialect/SCF/parallel-loop-fusion.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='func(parallel-loop-fusion)' -split-input-file | FileCheck %s --dump-input-on-failure
+// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='func(parallel-loop-fusion)' -split-input-file | FileCheck %s
 
 func @fuse_empty_loops() {
   %c2 = constant 2 : index
diff --git a/mlir/test/Dialect/SCF/parallel-loop-specialization.mlir b/mlir/test/Dialect/SCF/parallel-loop-specialization.mlir
index 5843eb6d4134a8..d7c0f1d3074e3a 100644
--- a/mlir/test/Dialect/SCF/parallel-loop-specialization.mlir
+++ b/mlir/test/Dialect/SCF/parallel-loop-specialization.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -parallel-loop-specialization -split-input-file | FileCheck %s --dump-input-on-failure
+// RUN: mlir-opt %s -parallel-loop-specialization -split-input-file | FileCheck %s
 
 #map0 = affine_map<()[s0, s1] -> (1024, s0 - s1)>
 #map1 = affine_map<()[s0, s1] -> (64, s0 - s1)>
diff --git a/mlir/test/Dialect/SCF/parallel-loop-tiling.mlir b/mlir/test/Dialect/SCF/parallel-loop-tiling.mlir
index 7b37830e8c5db9..14912436f96b27 100644
--- a/mlir/test/Dialect/SCF/parallel-loop-tiling.mlir
+++ b/mlir/test/Dialect/SCF/parallel-loop-tiling.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -pass-pipeline='func(parallel-loop-tiling{parallel-loop-tile-sizes=1,4})' -split-input-file | FileCheck %s --dump-input-on-failure
+// RUN: mlir-opt %s -pass-pipeline='func(parallel-loop-tiling{parallel-loop-tile-sizes=1,4})' -split-input-file | FileCheck %s
 
 func @parallel_loop(%arg0 : index, %arg1 : index, %arg2 : index,
                     %arg3 : index, %arg4 : index, %arg5 : index,
diff --git a/mlir/test/Dialect/Shape/ops.mlir b/mlir/test/Dialect/Shape/ops.mlir
index d25a7f01535e0e..a6668187f078df 100644
--- a/mlir/test/Dialect/Shape/ops.mlir
+++ b/mlir/test/Dialect/Shape/ops.mlir
@@ -1,8 +1,8 @@
-// RUN: mlir-opt -split-input-file %s | mlir-opt | FileCheck %s --dump-input-on-failure
+// RUN: mlir-opt -split-input-file %s | mlir-opt | FileCheck %s
 // Verify the printed output can be parsed.
-// RUN: mlir-opt %s | mlir-opt | FileCheck %s --dump-input-on-failure
+// RUN: mlir-opt %s | mlir-opt | FileCheck %s
 // Verify the generic form can be parsed.
-// RUN: mlir-opt -mlir-print-op-generic %s | mlir-opt | FileCheck %s --dump-input-on-failure
+// RUN: mlir-opt -mlir-print-op-generic %s | mlir-opt | FileCheck %s
 
 // CHECK-LABEL: shape_num_elements
 func @shape_num_elements(%shape : !shape.shape) -> !shape.size {
diff --git a/mlir/test/Dialect/Shape/shape-to-shape.mlir b/mlir/test/Dialect/Shape/shape-to-shape.mlir
index d2338cddc5e1c2..b3be4c9de3a1bd 100644
--- a/mlir/test/Dialect/Shape/shape-to-shape.mlir
+++ b/mlir/test/Dialect/Shape/shape-to-shape.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -shape-to-shape-lowering -split-input-file %s | FileCheck %s --dump-input-on-failure
+// RUN: mlir-opt -shape-to-shape-lowering -split-input-file %s | FileCheck %s
 
 // CHECK-LABEL: func @num_elements_to_reduce(
 // CHECK-SAME:    [[ARG:%.*]]: !shape.shape) -> [[SIZE_TY:!.*]] {
diff --git a/mlir/test/Dialect/Standard/expand-atomic.mlir b/mlir/test/Dialect/Standard/expand-atomic.mlir
index b4e65945f58aeb..2f5cc7c179ed45 100644
--- a/mlir/test/Dialect/Standard/expand-atomic.mlir
+++ b/mlir/test/Dialect/Standard/expand-atomic.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -expand-atomic -split-input-file | FileCheck %s --dump-input-on-failure
+// RUN: mlir-opt %s -expand-atomic -split-input-file | FileCheck %s
 
 // CHECK-LABEL: func @atomic_rmw_to_generic
 // CHECK-SAME: ([[F:%.*]]: memref<10xf32>, [[f:%.*]]: f32, [[i:%.*]]: index)
diff --git a/mlir/test/Dialect/Vector/vector-contract-transforms.mlir b/mlir/test/Dialect/Vector/vector-contract-transforms.mlir
index 76d7a9a0e7df0b..da784205224a6e 100644
--- a/mlir/test/Dialect/Vector/vector-contract-transforms.mlir
+++ b/mlir/test/Dialect/Vector/vector-contract-transforms.mlir
@@ -1,6 +1,6 @@
-// RUN: mlir-opt %s -test-vector-contraction-conversion | FileCheck %s --dump-input-on-failure
-// RUN: mlir-opt %s -test-vector-contraction-conversion=vector-lower-matrix-intrinsics=1 | FileCheck %s --check-prefix=MATRIX --dump-input-on-failure
-// RUN: mlir-opt %s -test-vector-contraction-conversion=vector-outerproduct=1 | FileCheck %s --check-prefix=OUTERPRODUCT --dump-input-on-failure
+// RUN: mlir-opt %s -test-vector-contraction-conversion | FileCheck %s
+// RUN: mlir-opt %s -test-vector-contraction-conversion=vector-lower-matrix-intrinsics=1 | FileCheck %s --check-prefix=MATRIX
+// RUN: mlir-opt %s -test-vector-contraction-conversion=vector-outerproduct=1 | FileCheck %s --check-prefix=OUTERPRODUCT
 
 #dotp_accesses = [
   affine_map<(i) -> (i)>,
diff --git a/mlir/test/Dialect/Vector/vector-flat-transforms.mlir b/mlir/test/Dialect/Vector/vector-flat-transforms.mlir
index e715755738de86..6a1e6ee85a7d47 100644
--- a/mlir/test/Dialect/Vector/vector-flat-transforms.mlir
+++ b/mlir/test/Dialect/Vector/vector-flat-transforms.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -test-vector-contraction-conversion=vector-flat-transpose=1 | FileCheck %s --dump-input-on-failure
+// RUN: mlir-opt %s -test-vector-contraction-conversion=vector-flat-transpose=1 | FileCheck %s
 
 // Tests for lowering 2-D vector.transpose into vector.flat_transpose.
 //
diff --git a/mlir/test/EDSC/builder-api-test.cpp b/mlir/test/EDSC/builder-api-test.cpp
index b48fd99c8f7d54..4d0888e55312ab 100644
--- a/mlir/test/EDSC/builder-api-test.cpp
+++ b/mlir/test/EDSC/builder-api-test.cpp
@@ -6,7 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// RUN: mlir-edsc-builder-api-test | FileCheck %s -dump-input-on-failure
+// RUN: mlir-edsc-builder-api-test | FileCheck %s
 
 #include "mlir/Dialect/Affine/EDSC/Intrinsics.h"
 #include "mlir/Dialect/Linalg/EDSC/Builders.h"
diff --git a/mlir/test/IR/print-op-local-scope.mlir b/mlir/test/IR/print-op-local-scope.mlir
index 93b25fca943ebc..2ff201cf6debbf 100644
--- a/mlir/test/IR/print-op-local-scope.mlir
+++ b/mlir/test/IR/print-op-local-scope.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -allow-unregistered-dialect %s -mlir-print-local-scope | FileCheck %s --dump-input-on-failure
+// RUN: mlir-opt -allow-unregistered-dialect %s -mlir-print-local-scope | FileCheck %s
 
 // CHECK: "foo.op"() : () -> memref<?xf32, affine_map<(d0) -> (d0 * 2)>>
 "foo.op"() : () -> (memref<?xf32, affine_map<(d0) -> (2*d0)>>)
diff --git a/mlir/test/Transforms/buffer-placement-preparation-allowed-memref-results.mlir b/mlir/test/Transforms/buffer-placement-preparation-allowed-memref-results.mlir
index adf6e30fe6c6c8..97c96008f26910 100644
--- a/mlir/test/Transforms/buffer-placement-preparation-allowed-memref-results.mlir
+++ b/mlir/test/Transforms/buffer-placement-preparation-allowed-memref-results.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -test-buffer-placement-preparation-with-allowed-memref-results -split-input-file %s | FileCheck %s -dump-input-on-failure
+// RUN: mlir-opt -test-buffer-placement-preparation-with-allowed-memref-results -split-input-file %s | FileCheck %s
 
 // Since allowMemrefEscaping is on for Buffer Placement in this test pass, all
 // tensor typed function results are converted to memref and remain as function
diff --git a/mlir/test/Transforms/buffer-placement-preparation.mlir b/mlir/test/Transforms/buffer-placement-preparation.mlir
index cae2829ead1757..9b0755aad18009 100644
--- a/mlir/test/Transforms/buffer-placement-preparation.mlir
+++ b/mlir/test/Transforms/buffer-placement-preparation.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -test-buffer-placement-preparation -split-input-file %s | FileCheck %s -dump-input-on-failure
+// RUN: mlir-opt -test-buffer-placement-preparation -split-input-file %s | FileCheck %s
 
 // CHECK-LABEL: func @func_signature_conversion
 func @func_signature_conversion(%arg0: tensor<4x8xf32>) {
diff --git a/mlir/test/Transforms/buffer-placement.mlir b/mlir/test/Transforms/buffer-placement.mlir
index 4b401cc841afee..176e063a700be0 100644
--- a/mlir/test/Transforms/buffer-placement.mlir
+++ b/mlir/test/Transforms/buffer-placement.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -buffer-placement -split-input-file %s | FileCheck %s -dump-input-on-failure
+// RUN: mlir-opt -buffer-placement -split-input-file %s | FileCheck %s
 
 // This file checks the behaviour of BufferPlacement pass for moving Alloc and Dealloc
 // operations and inserting the missing the DeallocOps in their correct positions.
diff --git a/mlir/test/Transforms/canonicalize.mlir b/mlir/test/Transforms/canonicalize.mlir
index 6e24bb3b2d832a..f1ad305d5c87fd 100644
--- a/mlir/test/Transforms/canonicalize.mlir
+++ b/mlir/test/Transforms/canonicalize.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='func(canonicalize)' -split-input-file | FileCheck %s -dump-input-on-failure
+// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='func(canonicalize)' -split-input-file | FileCheck %s
 
 // CHECK-LABEL: func @test_subi_zero
 func @test_subi_zero(%arg0: i32) -> i32 {
diff --git a/mlir/test/Transforms/sccp-callgraph.mlir b/mlir/test/Transforms/sccp-callgraph.mlir
index add65d9e33c5ae..c30cdf7bfb97de 100644
--- a/mlir/test/Transforms/sccp-callgraph.mlir
+++ b/mlir/test/Transforms/sccp-callgraph.mlir
@@ -1,5 +1,5 @@
-// RUN: mlir-opt -allow-unregistered-dialect %s -sccp -split-input-file | FileCheck %s -dump-input-on-failure
-// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="module(sccp)" -split-input-file | FileCheck %s --check-prefix=NESTED -dump-input-on-failure
+// RUN: mlir-opt -allow-unregistered-dialect %s -sccp -split-input-file | FileCheck %s
+// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="module(sccp)" -split-input-file | FileCheck %s --check-prefix=NESTED
 
 /// Check that a constant is properly propagated through the arguments and
 /// results of a private function.
diff --git a/mlir/test/mlir-tblgen/op-attribute.td b/mlir/test/mlir-tblgen/op-attribute.td
index b4c850269a1d26..fc10d4c2d66e57 100644
--- a/mlir/test/mlir-tblgen/op-attribute.td
+++ b/mlir/test/mlir-tblgen/op-attribute.td
@@ -1,6 +1,6 @@
-// RUN: mlir-tblgen -gen-op-decls -I %S/../../include %s | FileCheck %s --check-prefix=DECL --dump-input-on-failure
-// RUN: mlir-tblgen -gen-op-defs -I %S/../../include %s | FileCheck %s --check-prefix=DEF --dump-input-on-failure
-// RUN: mlir-tblgen -print-records -I %S/../../include %s | FileCheck %s --check-prefix=RECORD --dump-input-on-failure
+// RUN: mlir-tblgen -gen-op-decls -I %S/../../include %s | FileCheck %s --check-prefix=DECL
+// RUN: mlir-tblgen -gen-op-defs -I %S/../../include %s | FileCheck %s --check-prefix=DEF
+// RUN: mlir-tblgen -print-records -I %S/../../include %s | FileCheck %s --check-prefix=RECORD
 
 include "mlir/IR/OpBase.td"
 
diff --git a/mlir/test/mlir-tblgen/op-decl.td b/mlir/test/mlir-tblgen/op-decl.td
index a101103b08fc0f..655d49cbd3a7cf 100644
--- a/mlir/test/mlir-tblgen/op-decl.td
+++ b/mlir/test/mlir-tblgen/op-decl.td
@@ -1,4 +1,4 @@
-// RUN: mlir-tblgen -gen-op-decls -I %S/../../include %s | FileCheck --dump-input-on-failure %s
+// RUN: mlir-tblgen -gen-op-decls -I %S/../../include %s | FileCheck  %s
 
 include "mlir/IR/OpBase.td"
 include "mlir/Interfaces/InferTypeOpInterface.td"
diff --git a/mlir/test/mlir-tblgen/op-derived-attribute.mlir b/mlir/test/mlir-tblgen/op-derived-attribute.mlir
index b11df48a319c88..ec4f4dcf7dae42 100644
--- a/mlir/test/mlir-tblgen/op-derived-attribute.mlir
+++ b/mlir/test/mlir-tblgen/op-derived-attribute.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -test-derived-attr -verify-diagnostics %s | FileCheck %s --dump-input-on-failure
+// RUN: mlir-opt -test-derived-attr -verify-diagnostics %s | FileCheck %s
 
 // CHECK-LABEL: verifyDerivedAttributes
 func @verifyDerivedAttributes() {
diff --git a/mlir/test/mlir-tblgen/op-format-spec.td b/mlir/test/mlir-tblgen/op-format-spec.td
index 613f3d1d482963..47255d47f8a761 100644
--- a/mlir/test/mlir-tblgen/op-format-spec.td
+++ b/mlir/test/mlir-tblgen/op-format-spec.td
@@ -1,4 +1,4 @@
-// RUN: mlir-tblgen -gen-op-decls -asmformat-error-is-fatal=false -I %S/../../include %s -o=%t 2>&1 | FileCheck %s --dump-input-on-failure
+// RUN: mlir-tblgen -gen-op-decls -asmformat-error-is-fatal=false -I %S/../../include %s -o=%t 2>&1 | FileCheck %s
 
 // This file contains tests for the specification of the declarative op format.
 
diff --git a/mlir/test/mlir-tblgen/op-interface.td b/mlir/test/mlir-tblgen/op-interface.td
index cb53a77ac0cb36..8e5167e6fe539c 100644
--- a/mlir/test/mlir-tblgen/op-interface.td
+++ b/mlir/test/mlir-tblgen/op-interface.td
@@ -1,5 +1,5 @@
-// RUN: mlir-tblgen -gen-op-interface-decls -I %S/../../include %s | FileCheck %s --check-prefix=DECL --dump-input-on-failure
-// RUN: mlir-tblgen -gen-op-decls -I %S/../../include %s | FileCheck %s --check-prefix=OP_DECL --dump-input-on-failure
+// RUN: mlir-tblgen -gen-op-interface-decls -I %S/../../include %s | FileCheck %s --check-prefix=DECL
+// RUN: mlir-tblgen -gen-op-decls -I %S/../../include %s | FileCheck %s --check-prefix=OP_DECL
 
 include "mlir/IR/OpBase.td"
 
diff --git a/mlir/test/mlir-tblgen/pattern.mlir b/mlir/test/mlir-tblgen/pattern.mlir
index 50ec1688ddcce4..6154e6bc4c4579 100644
--- a/mlir/test/mlir-tblgen/pattern.mlir
+++ b/mlir/test/mlir-tblgen/pattern.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -test-patterns -mlir-print-debuginfo %s | FileCheck %s --dump-input-on-failure
+// RUN: mlir-opt -test-patterns -mlir-print-debuginfo %s | FileCheck %s
 
 // CHECK-LABEL: verifyFusedLocs
 func @verifyFusedLocs(%arg0 : i32) -> i32 {
diff --git a/mlir/test/mlir-tblgen/predicate.td b/mlir/test/mlir-tblgen/predicate.td
index a617208d157a0d..040d2b6de3935e 100644
--- a/mlir/test/mlir-tblgen/predicate.td
+++ b/mlir/test/mlir-tblgen/predicate.td
@@ -1,4 +1,4 @@
-// RUN: mlir-tblgen -gen-op-defs -I %S/../../include %s | FileCheck %s --dump-input-on-failure
+// RUN: mlir-tblgen -gen-op-defs -I %S/../../include %s | FileCheck %s
 
 include "mlir/IR/OpBase.td"
 
diff --git a/mlir/test/mlir-tblgen/return-types.mlir b/mlir/test/mlir-tblgen/return-types.mlir
index d0eb364a6a9d78..01e6294564d157 100644
--- a/mlir/test/mlir-tblgen/return-types.mlir
+++ b/mlir/test/mlir-tblgen/return-types.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -test-return-type -split-input-file -verify-diagnostics | FileCheck %s --dump-input-on-failure
+// RUN: mlir-opt %s -test-return-type -split-input-file -verify-diagnostics | FileCheck %s
 
 // CHECK-LABEL: testCreateFunctions
 // This function tests invoking the create method with different inference

From 6c5c4a2a50e1fcdd94c0288008af65c544a96452 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Tue, 9 Jun 2020 11:58:22 -0700
Subject: [PATCH 13/25] [lldb/Reproducers] Also collect ::open  and ::fopen

Report files opened trough ::open and ::fopen to the FileCollector.
---
 lldb/source/Host/posix/FileSystemPosix.cpp | 2 ++
 lldb/source/Host/windows/FileSystem.cpp    | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/lldb/source/Host/posix/FileSystemPosix.cpp b/lldb/source/Host/posix/FileSystemPosix.cpp
index 3660f67895a4f7..0aa34bc5943596 100644
--- a/lldb/source/Host/posix/FileSystemPosix.cpp
+++ b/lldb/source/Host/posix/FileSystemPosix.cpp
@@ -72,9 +72,11 @@ Status FileSystem::ResolveSymbolicLink(const FileSpec &src, FileSpec &dst) {
 }
 
 FILE *FileSystem::Fopen(const char *path, const char *mode) {
+  Collect(path);
   return llvm::sys::RetryAfterSignal(nullptr, ::fopen, path, mode);
 }
 
 int FileSystem::Open(const char *path, int flags, int mode) {
+  Collect(path);
   return llvm::sys::RetryAfterSignal(-1, ::open, path, flags, mode);
 }
diff --git a/lldb/source/Host/windows/FileSystem.cpp b/lldb/source/Host/windows/FileSystem.cpp
index cbd1915bdb448c..94872c99b15ecb 100644
--- a/lldb/source/Host/windows/FileSystem.cpp
+++ b/lldb/source/Host/windows/FileSystem.cpp
@@ -86,6 +86,7 @@ Status FileSystem::ResolveSymbolicLink(const FileSpec &src, FileSpec &dst) {
 }
 
 FILE *FileSystem::Fopen(const char *path, const char *mode) {
+  Collect(path);
   std::wstring wpath, wmode;
   if (!llvm::ConvertUTF8toWide(path, wpath))
     return nullptr;
@@ -98,6 +99,7 @@ FILE *FileSystem::Fopen(const char *path, const char *mode) {
 }
 
 int FileSystem::Open(const char *path, int flags, int mode) {
+  Collect(path);
   std::wstring wpath;
   if (!llvm::ConvertUTF8toWide(path, wpath))
     return -1;

From 6eeac6ae33046f022f2d2c857ef38d2329acfc88 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Tue, 9 Jun 2020 08:21:03 -0400
Subject: [PATCH 14/25] GlobalISel: Fix double printing new instructions in
 legalizer

New instructions were getting printed both in createdInstr, and in the
final printNewInstrs, so it made it look like the same instructions
were created twice. This overall made reading the debug output
harder. Stop printing the initial construction and only print new
instructions in the summary at the end. This avoids printing the less
useful case where instructions are sometimes initially created with no
operands.

I'm not sure this is the correct instance to remove; now the visible
ordering is different. Now you will typically see the one erased
instruction message before all the new instructions in order. I think
this is the more logical view of typical legalization changes,
although it's mechanically backwards from the normal
insert-new-erase-old pattern.
---
 llvm/lib/CodeGen/GlobalISel/Legalizer.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
index 0a2d71c275d582..a9bfc11d0aa68d 100644
--- a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
@@ -133,7 +133,6 @@ class LegalizerWorkListManager : public GISelChangeObserver {
   }
 
   void createdInstr(MachineInstr &MI) override {
-    LLVM_DEBUG(dbgs() << ".. .. New MI: " << MI);
     LLVM_DEBUG(NewMIs.push_back(&MI));
     createdOrChangedInstr(MI);
   }

From bb6cb6bfe413e1f3e368f1bf0550a7517d7c8d66 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Tue, 9 Jun 2020 09:20:57 -0400
Subject: [PATCH 15/25] GlobalISel: Remove redundant check in verifier

This was already checked earlier for all instructions.
---
 llvm/lib/CodeGen/MachineVerifier.cpp | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp
index df23ccf4e195c4..c477626172450a 100644
--- a/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -915,9 +915,6 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
   switch (MI->getOpcode()) {
   case TargetOpcode::G_CONSTANT:
   case TargetOpcode::G_FCONSTANT: {
-    if (MI->getNumOperands() < MCID.getNumOperands())
-      break;
-
     LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
     if (DstTy.isVector())
       report("Instruction cannot use a vector result type", MI);

From babbf4441b6022a2d76f831316b7c3588ade9e15 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Sun, 7 Jun 2020 21:24:34 -0400
Subject: [PATCH 16/25] GlobalISel: Move some trivial MIRBuilder methods into
 the header

The construction APIs for MachineIRBuilder don't make much sense, and
it's been annoying to sort through it with these trivial functions
separate from the declaration.
---
 .../CodeGen/GlobalISel/MachineIRBuilder.h     | 40 +++++++++++++++----
 .../CodeGen/GlobalISel/MachineIRBuilder.cpp   | 38 ------------------
 2 files changed, 32 insertions(+), 46 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
index 118a177f88dfb6..44eac6bb2ba331 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
@@ -228,7 +228,11 @@ class MachineIRBuilder {
 
   void validateSelectOp(const LLT ResTy, const LLT TstTy, const LLT Op0Ty,
                         const LLT Op1Ty);
-  void recordInsertion(MachineInstr *MI) const;
+
+  void recordInsertion(MachineInstr *InsertedInstr) const {
+    if (State.Observer)
+      State.Observer->createdInstr(*InsertedInstr);
+  }
 
 public:
   /// Some constructors for easy use.
@@ -292,10 +296,16 @@ class MachineIRBuilder {
   /// Set the insertion point before the specified position.
   /// \pre MBB must be in getMF().
   /// \pre II must be a valid iterator in MBB.
-  void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II);
+  void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II) {
+    assert(MBB.getParent() == &getMF() &&
+           "Basic block is in a different function");
+    State.MBB = &MBB;
+    State.II = II;
+  }
+
   /// @}
 
-  void setCSEInfo(GISelCSEInfo *Info);
+  void setCSEInfo(GISelCSEInfo *Info) { State.CSEInfo = Info; }
 
   /// \name Setters for the insertion point.
   /// @{
@@ -304,11 +314,20 @@ class MachineIRBuilder {
 
   /// Set the insertion point to the  end of \p MBB.
   /// \pre \p MBB must be contained by getMF().
-  void setMBB(MachineBasicBlock &MBB);
+  void setMBB(MachineBasicBlock &MBB) {
+    State.MBB = &MBB;
+    State.II = MBB.end();
+    assert(&getMF() == MBB.getParent() &&
+           "Basic block is in a different function");
+  }
 
   /// Set the insertion point to before MI.
   /// \pre MI must be in getMF().
-  void setInstr(MachineInstr &MI);
+  void setInstr(MachineInstr &MI) {
+    assert(MI.getParent() && "Instruction is not part of a basic block");
+    setMBB(*MI.getParent());
+    State.II = MI.getIterator();
+  }
   /// @}
 
   /// Set the insertion point to before MI, and set the debug loc to MI's loc.
@@ -318,8 +337,11 @@ class MachineIRBuilder {
     setDebugLoc(MI.getDebugLoc());
   }
 
-  void setChangeObserver(GISelChangeObserver &Observer);
-  void stopObservingChanges();
+  void setChangeObserver(GISelChangeObserver &Observer) {
+    State.Observer = &Observer;
+  }
+
+  void stopObservingChanges() { State.Observer = nullptr; }
   /// @}
 
   /// Set the debug location to \p DL for all the next build instructions.
@@ -335,7 +357,9 @@ class MachineIRBuilder {
   /// \pre setBasicBlock or setMI must have been called.
   ///
   /// \return a MachineInstrBuilder for the newly created instruction.
-  MachineInstrBuilder buildInstr(unsigned Opcode);
+  MachineInstrBuilder buildInstr(unsigned Opcode) {
+    return insertInstr(buildInstrNoInsert(Opcode));
+  }
 
   /// Build but don't insert <empty> = \p Opcode <empty>.
   ///
diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index 4236fdd8208425..ea98233beb0eca 100644
--- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -33,48 +33,10 @@ void MachineIRBuilder::setMF(MachineFunction &MF) {
   State.Observer = nullptr;
 }
 
-void MachineIRBuilder::setMBB(MachineBasicBlock &MBB) {
-  State.MBB = &MBB;
-  State.II = MBB.end();
-  assert(&getMF() == MBB.getParent() &&
-         "Basic block is in a different function");
-}
-
-void MachineIRBuilder::setInstr(MachineInstr &MI) {
-  assert(MI.getParent() && "Instruction is not part of a basic block");
-  setMBB(*MI.getParent());
-  State.II = MI.getIterator();
-}
-
-void MachineIRBuilder::setCSEInfo(GISelCSEInfo *Info) { State.CSEInfo = Info; }
-
-void MachineIRBuilder::setInsertPt(MachineBasicBlock &MBB,
-                                   MachineBasicBlock::iterator II) {
-  assert(MBB.getParent() == &getMF() &&
-         "Basic block is in a different function");
-  State.MBB = &MBB;
-  State.II = II;
-}
-
-void MachineIRBuilder::recordInsertion(MachineInstr *InsertedInstr) const {
-  if (State.Observer)
-    State.Observer->createdInstr(*InsertedInstr);
-}
-
-void MachineIRBuilder::setChangeObserver(GISelChangeObserver &Observer) {
-  State.Observer = &Observer;
-}
-
-void MachineIRBuilder::stopObservingChanges() { State.Observer = nullptr; }
-
 //------------------------------------------------------------------------------
 // Build instruction variants.
 //------------------------------------------------------------------------------
 
-MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opcode) {
-  return insertInstr(buildInstrNoInsert(Opcode));
-}
-
 MachineInstrBuilder MachineIRBuilder::buildInstrNoInsert(unsigned Opcode) {
   MachineInstrBuilder MIB = BuildMI(getMF(), getDL(), getTII().get(Opcode));
   return MIB;

From b94c9e3b55ab97f6646018dec2c1d3647c04cda3 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Sun, 7 Jun 2020 21:37:29 -0400
Subject: [PATCH 17/25] GlobalISel: Improve MachineIRBuilder construction

The current relationship between LegalizerHelper and MachineIRBuilder
confuses me, because the LegalizerHelper modifies the MachineIRBuilder
which it does not own. Constructing a LegalizerHelper destroys the
insert point, since the constructor calls setMF, which clears all the
fields. Try to separate these functions, so it's possible to construct
a LegalizerHelper from an existing MachineIRBuilder without losing the
insert point/debug loc.
---
 .../CodeGen/GlobalISel/MachineIRBuilder.h     | 22 +++++++++++++------
 llvm/lib/CodeGen/GlobalISel/Legalizer.cpp     |  1 +
 .../CodeGen/GlobalISel/LegalizerHelper.cpp    |  2 --
 3 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
index 44eac6bb2ba331..d6498345f25c85 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
@@ -35,23 +35,23 @@ class GISelChangeObserver;
 /// to transfer BuilderState between different kinds of MachineIRBuilders.
 struct MachineIRBuilderState {
   /// MachineFunction under construction.
-  MachineFunction *MF;
+  MachineFunction *MF = nullptr;
   /// Information used to access the description of the opcodes.
-  const TargetInstrInfo *TII;
+  const TargetInstrInfo *TII = nullptr;
   /// Information used to verify types are consistent and to create virtual registers.
-  MachineRegisterInfo *MRI;
+  MachineRegisterInfo *MRI = nullptr;
   /// Debug location to be set to any instruction we create.
   DebugLoc DL;
 
   /// \name Fields describing the insertion point.
   /// @{
-  MachineBasicBlock *MBB;
+  MachineBasicBlock *MBB = nullptr;
   MachineBasicBlock::iterator II;
   /// @}
 
-  GISelChangeObserver *Observer;
+  GISelChangeObserver *Observer = nullptr;
 
-  GISelCSEInfo *CSEInfo;
+  GISelCSEInfo *CSEInfo = nullptr;
 };
 
 class DstOp {
@@ -238,8 +238,16 @@ class MachineIRBuilder {
   /// Some constructors for easy use.
   MachineIRBuilder() = default;
   MachineIRBuilder(MachineFunction &MF) { setMF(MF); }
-  MachineIRBuilder(MachineInstr &MI) : MachineIRBuilder(*MI.getMF()) {
+
+  MachineIRBuilder(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt) {
+    setMF(*MBB.getParent());
+    setInsertPt(MBB, InsPt);
+  }
+
+  MachineIRBuilder(MachineInstr &MI) :
+    MachineIRBuilder(*MI.getParent(), MI.getIterator()) {
     setInstr(MI);
+    setDebugLoc(MI.getDebugLoc());
   }
 
   virtual ~MachineIRBuilder() = default;
diff --git a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
index a9bfc11d0aa68d..1d7be54de3b045 100644
--- a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
@@ -169,6 +169,7 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,
                                    ArrayRef<GISelChangeObserver *> AuxObservers,
                                    LostDebugLocObserver &LocObserver,
                                    MachineIRBuilder &MIRBuilder) {
+  MIRBuilder.setMF(MF);
   MachineRegisterInfo &MRI = MF.getRegInfo();
 
   // Populate worklists.
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index c38d08f41e685d..3a6d499c9cde59 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -87,7 +87,6 @@ LegalizerHelper::LegalizerHelper(MachineFunction &MF,
                                  MachineIRBuilder &Builder)
     : MIRBuilder(Builder), MRI(MF.getRegInfo()),
       LI(*MF.getSubtarget().getLegalizerInfo()), Observer(Observer) {
-  MIRBuilder.setMF(MF);
   MIRBuilder.setChangeObserver(Observer);
 }
 
@@ -95,7 +94,6 @@ LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI,
                                  GISelChangeObserver &Observer,
                                  MachineIRBuilder &B)
     : MIRBuilder(B), MRI(MF.getRegInfo()), LI(LI), Observer(Observer) {
-  MIRBuilder.setMF(MF);
   MIRBuilder.setChangeObserver(Observer);
 }
 LegalizerHelper::LegalizeResult

From 113b0d7d0bd637743efb050ad619dd0c6d306e96 Mon Sep 17 00:00:00 2001
From: Erich Keane <erich.keane@intel.com>
Date: Tue, 9 Jun 2020 12:19:35 -0700
Subject: [PATCH 18/25] PR46255: Fix field diagnostics for C records with
 anonymous members.

The ParseStructUnionBody function was separately keeping track of the
field decls for historical reasons, however the "ActOn" functions add
the field to the RecordDecl anyway.

The "ParseStructDeclaration" function, which handles parsing fields
didn't have a way of handling what happens on an anonymous field, and
changing it would alter a large amount of objc code, so I chose instead
to implement this by just filling the FieldDecls vector with the actual
FieldDecls that were successfully added to the recorddecl .
---
 .../clang/Basic/DiagnosticSemaKinds.td        |  5 ++-
 clang/include/clang/Parse/Parser.h            |  2 +-
 clang/lib/Parse/ParseDecl.cpp                 |  9 ++--
 clang/lib/Parse/ParseDeclCXX.cpp              |  2 +-
 clang/test/Sema/struct-decl.c                 | 41 +++++++++++++++++++
 5 files changed, 50 insertions(+), 9 deletions(-)

diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 84bcf66a148e5c..e1adf199a12b93 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -5713,8 +5713,9 @@ def ext_flexible_array_union_gnu : Extension<
 def err_flexible_array_not_at_end : Error<
   "flexible array member %0 with type %1 is not at the end of"
   " %select{struct|interface|union|class|enum}2">;
-def err_objc_variable_sized_type_not_at_end : Error<
-  "field %0 with variable sized type %1 is not at the end of class">;
+def err_objc_variable_sized_type_not_at_end
+    : Error<"%select{field %1|unnamed field}0 with variable sized type %2 is "
+            "not at the end of class">;
 def note_next_field_declaration : Note<
   "next field declaration is here">;
 def note_next_ivar_declaration : Note<
diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h
index b6b161e482ac15..1ae219781c696c 100644
--- a/clang/include/clang/Parse/Parser.h
+++ b/clang/include/clang/Parse/Parser.h
@@ -2333,7 +2333,7 @@ class Parser : public CodeCompletionHandler {
                           AccessSpecifier AS, DeclSpecContext DSC);
   void ParseEnumBody(SourceLocation StartLoc, Decl *TagDecl);
   void ParseStructUnionBody(SourceLocation StartLoc, DeclSpec::TST TagType,
-                            Decl *TagDecl);
+                            RecordDecl *TagDecl);
 
   void ParseStructDeclaration(
       ParsingDeclSpec &DS,
diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp
index 7e761978455731..79a3b19bac5766 100644
--- a/clang/lib/Parse/ParseDecl.cpp
+++ b/clang/lib/Parse/ParseDecl.cpp
@@ -4249,7 +4249,7 @@ void Parser::ParseStructDeclaration(
 /// [OBC]   '@' 'defs' '(' class-name ')'
 ///
 void Parser::ParseStructUnionBody(SourceLocation RecordLoc,
-                                  DeclSpec::TST TagType, Decl *TagDecl) {
+                                  DeclSpec::TST TagType, RecordDecl *TagDecl) {
   PrettyDeclStackTraceEntry CrashInfo(Actions.Context, TagDecl, RecordLoc,
                                       "parsing struct/union body");
   assert(!getLangOpts().CPlusPlus && "C++ declarations not supported");
@@ -4261,8 +4261,6 @@ void Parser::ParseStructUnionBody(SourceLocation RecordLoc,
   ParseScope StructScope(this, Scope::ClassScope|Scope::DeclScope);
   Actions.ActOnTagStartDefinition(getCurScope(), TagDecl);
 
-  SmallVector<Decl *, 32> FieldDecls;
-
   // While we still have something to read, read the declarations in the struct.
   while (!tryParseMisplacedModuleImport() && Tok.isNot(tok::r_brace) &&
          Tok.isNot(tok::eof)) {
@@ -4314,7 +4312,6 @@ void Parser::ParseStructUnionBody(SourceLocation RecordLoc,
             Actions.ActOnField(getCurScope(), TagDecl,
                                FD.D.getDeclSpec().getSourceRange().getBegin(),
                                FD.D, FD.BitfieldSize);
-        FieldDecls.push_back(Field);
         FD.complete(Field);
       };
 
@@ -4338,7 +4335,6 @@ void Parser::ParseStructUnionBody(SourceLocation RecordLoc,
       SmallVector<Decl *, 16> Fields;
       Actions.ActOnDefs(getCurScope(), TagDecl, Tok.getLocation(),
                         Tok.getIdentifierInfo(), Fields);
-      FieldDecls.insert(FieldDecls.end(), Fields.begin(), Fields.end());
       ConsumeToken();
       ExpectAndConsume(tok::r_paren);
     }
@@ -4364,6 +4360,9 @@ void Parser::ParseStructUnionBody(SourceLocation RecordLoc,
   // If attributes exist after struct contents, parse them.
   MaybeParseGNUAttributes(attrs);
 
+  SmallVector<Decl *, 32> FieldDecls(TagDecl->field_begin(),
+                                     TagDecl->field_end());
+
   Actions.ActOnFields(getCurScope(), RecordLoc, TagDecl, FieldDecls,
                       T.getOpenLocation(), T.getCloseLocation(), attrs);
   StructScope.Exit();
diff --git a/clang/lib/Parse/ParseDeclCXX.cpp b/clang/lib/Parse/ParseDeclCXX.cpp
index 1a82475117baac..8753c929287512 100644
--- a/clang/lib/Parse/ParseDeclCXX.cpp
+++ b/clang/lib/Parse/ParseDeclCXX.cpp
@@ -1964,7 +1964,7 @@ void Parser::ParseClassSpecifier(tok::TokenKind TagTokKind,
       Decl *D =
           SkipBody.CheckSameAsPrevious ? SkipBody.New : TagOrTempResult.get();
       // Parse the definition body.
-      ParseStructUnionBody(StartLoc, TagType, D);
+      ParseStructUnionBody(StartLoc, TagType, cast<RecordDecl>(D));
       if (SkipBody.CheckSameAsPrevious &&
           !Actions.ActOnDuplicateDefinition(DS, TagOrTempResult.get(),
                                             SkipBody)) {
diff --git a/clang/test/Sema/struct-decl.c b/clang/test/Sema/struct-decl.c
index 80cac0e0d145cf..ee3e79182eaa7d 100644
--- a/clang/test/Sema/struct-decl.c
+++ b/clang/test/Sema/struct-decl.c
@@ -69,3 +69,44 @@ void test_hiding() {
 
 struct PreserveAttributes {};
 typedef struct __attribute__((noreturn)) PreserveAttributes PreserveAttributes_t; // expected-warning {{'noreturn' attribute only applies to functions and methods}}
+
+// PR46255
+struct FlexibleArrayMem {
+  int a;
+  int b[];
+};
+
+struct FollowedByNamed {
+  struct FlexibleArrayMem a; // expected-warning {{field 'a' with variable sized type 'struct FlexibleArrayMem' not at the end of a struct or class is a GNU extension}}
+  int i;
+};
+
+struct FollowedByUnNamed {
+  struct FlexibleArrayMem a; // expected-warning {{field 'a' with variable sized type 'struct FlexibleArrayMem' not at the end of a struct or class is a GNU extension}}
+  struct {
+    int i;
+  };
+};
+
+struct InAnonymous {
+  struct { // expected-warning-re {{field '' with variable sized type 'struct InAnonymous::(anonymous at {{.+}})' not at the end of a struct or class is a GNU extension}}
+
+    struct FlexibleArrayMem a;
+  };
+  int i;
+};
+struct InAnonymousFollowedByAnon {
+  struct { // expected-warning-re {{field '' with variable sized type 'struct InAnonymousFollowedByAnon::(anonymous at {{.+}})' not at the end of a struct or class is a GNU extension}}
+
+    struct FlexibleArrayMem a;
+  };
+  struct {
+    int i;
+  };
+};
+
+// This is the behavior in C++ as well, so making sure we reproduce it here.
+struct InAnonymousFollowedByEmpty {
+  struct FlexibleArrayMem a; // expected-warning {{field 'a' with variable sized type 'struct FlexibleArrayMem' not at the end of a struct or class is a GNU extension}}
+  struct {};
+};

From f71a3b54f0c5c300440c5ce21c76b5f7f41fc626 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Tue, 9 Jun 2020 12:23:36 -0400
Subject: [PATCH 19/25] [InstCombine] add tests for diff-of-sums; NFC

---
 .../InstCombine/vector-reductions.ll          | 75 +++++++++++++++++++
 1 file changed, 75 insertions(+)
 create mode 100644 llvm/test/Transforms/InstCombine/vector-reductions.ll

diff --git a/llvm/test/Transforms/InstCombine/vector-reductions.ll b/llvm/test/Transforms/InstCombine/vector-reductions.ll
new file mode 100644
index 00000000000000..5eac0e09414ca5
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/vector-reductions.ll
@@ -0,0 +1,75 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float, <4 x float>)
+declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32(float, <8 x float>)
+declare void @use_f32(float)
+
+define float @diff_of_sums_v4f32(float %a0, <4 x float> %v0, float %a1, <4 x float> %v1) {
+; CHECK-LABEL: @diff_of_sums_v4f32(
+; CHECK-NEXT:    [[R0:%.*]] = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float [[A0:%.*]], <4 x float> [[V0:%.*]])
+; CHECK-NEXT:    [[R1:%.*]] = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float [[A1:%.*]], <4 x float> [[V1:%.*]])
+; CHECK-NEXT:    [[R:%.*]] = fsub reassoc nsz float [[R0]], [[R1]]
+; CHECK-NEXT:    ret float [[R]]
+;
+  %r0 = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float %a0, <4 x float> %v0)
+  %r1 = call float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float %a1, <4 x float> %v1)
+  %r = fsub reassoc nsz float %r0, %r1
+  ret float %r
+}
+
+define float @diff_of_sums_v4f32_fmf(float %a0, <4 x float> %v0, float %a1, <4 x float> %v1) {
+; CHECK-LABEL: @diff_of_sums_v4f32_fmf(
+; CHECK-NEXT:    [[R0:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float [[A0:%.*]], <4 x float> [[V0:%.*]])
+; CHECK-NEXT:    [[R1:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float [[A1:%.*]], <4 x float> [[V1:%.*]])
+; CHECK-NEXT:    [[R:%.*]] = fsub nnan ninf nsz float [[R0]], [[R1]]
+; CHECK-NEXT:    ret float [[R]]
+;
+  %r0 = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float %a0, <4 x float> %v0)
+  %r1 = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float %a1, <4 x float> %v1)
+  %r = fsub ninf nnan nsz float %r0, %r1
+  ret float %r
+}
+
+define float @diff_of_sums_extra_use1(float %a0, <4 x float> %v0, float %a1, <4 x float> %v1) {
+; CHECK-LABEL: @diff_of_sums_extra_use1(
+; CHECK-NEXT:    [[R0:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float [[A0:%.*]], <4 x float> [[V0:%.*]])
+; CHECK-NEXT:    call void @use_f32(float [[R0]])
+; CHECK-NEXT:    [[R1:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float [[A1:%.*]], <4 x float> [[V1:%.*]])
+; CHECK-NEXT:    [[R:%.*]] = fsub fast float [[R0]], [[R1]]
+; CHECK-NEXT:    ret float [[R]]
+;
+  %r0 = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float %a0, <4 x float> %v0)
+  call void @use_f32(float %r0)
+  %r1 = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float %a1, <4 x float> %v1)
+  %r = fsub fast float %r0, %r1
+  ret float %r
+}
+
+define float @diff_of_sums_extra_use2(float %a0, <4 x float> %v0, float %a1, <4 x float> %v1) {
+; CHECK-LABEL: @diff_of_sums_extra_use2(
+; CHECK-NEXT:    [[R0:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float [[A0:%.*]], <4 x float> [[V0:%.*]])
+; CHECK-NEXT:    [[R1:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float [[A1:%.*]], <4 x float> [[V1:%.*]])
+; CHECK-NEXT:    call void @use_f32(float [[R1]])
+; CHECK-NEXT:    [[R:%.*]] = fsub fast float [[R0]], [[R1]]
+; CHECK-NEXT:    ret float [[R]]
+;
+  %r0 = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float %a0, <4 x float> %v0)
+  %r1 = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float %a1, <4 x float> %v1)
+  call void @use_f32(float %r1)
+  %r = fsub fast float %r0, %r1
+  ret float %r
+}
+
+define float @diff_of_sums_type_mismatch(float %a0, <4 x float> %v0, float %a1, <8 x float> %v1) {
+; CHECK-LABEL: @diff_of_sums_type_mismatch(
+; CHECK-NEXT:    [[R0:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float [[A0:%.*]], <4 x float> [[V0:%.*]])
+; CHECK-NEXT:    [[R1:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32(float [[A1:%.*]], <8 x float> [[V1:%.*]])
+; CHECK-NEXT:    [[R:%.*]] = fsub fast float [[R0]], [[R1]]
+; CHECK-NEXT:    ret float [[R]]
+;
+  %r0 = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float %a0, <4 x float> %v0)
+  %r1 = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v8f32(float %a1, <8 x float> %v1)
+  %r = fsub fast float %r0, %r1
+  ret float %r
+}

From 0e04342ae0399876f3488464d12f5a4da5085456 Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne@apple.com>
Date: Tue, 9 Jun 2020 15:14:13 -0400
Subject: [PATCH 20/25] [NFCI] Clean up exceptions related CMake and Lit
 options in libc++abi and libunwind

First, libc++abi doesn't need to add the no-exceptions Lit feature itself,
since that is already done in the config.py for libc++, which it reuses.
Specifically, config.enable_exceptions is set based on @LIBCXXABI_ENABLE_EXCEPTIONS@
in libc++abi's lit.cfg.in, and libc++'s config.py handles that correctly.

Secondly, libunwind's LIBUNWIND_ENABLE_EXCEPTIONS is never set (it's
probably a remnant of copy-pasting code between the runtime libraries),
so the library is always built with exceptions disabled (which makes
sense since it implements the runtime support for exceptions).
Conversely, the test suite is always run with exceptions enabled
(not sure why), but that is preserved by the default behavior of
libc++'s config.py.
---
 libcxxabi/test/libcxxabi/test/config.py | 2 --
 libunwind/test/CMakeLists.txt           | 1 -
 libunwind/test/libunwind/test/config.py | 4 ----
 libunwind/test/lit.cfg                  | 3 ---
 libunwind/test/lit.site.cfg.in          | 1 -
 5 files changed, 11 deletions(-)

diff --git a/libcxxabi/test/libcxxabi/test/config.py b/libcxxabi/test/libcxxabi/test/config.py
index fe76d193e79ed2..f1eb453e09f313 100644
--- a/libcxxabi/test/libcxxabi/test/config.py
+++ b/libcxxabi/test/libcxxabi/test/config.py
@@ -38,8 +38,6 @@ def has_cpp_feature(self, feature, required_value):
 
     def configure_features(self):
         super(Configuration, self).configure_features()
-        if not self.get_lit_bool('enable_exceptions', True):
-            self.config.available_features.add('no-exceptions')
         if not self.has_cpp_feature('noexcept_function_type', 201510):
             self.config.available_features.add('libcxxabi-no-noexcept-function-type')
         if not self.get_lit_bool('llvm_unwinder', False):
diff --git a/libunwind/test/CMakeLists.txt b/libunwind/test/CMakeLists.txt
index 40d4acd4e8c2a9..e608c1708b8abb 100644
--- a/libunwind/test/CMakeLists.txt
+++ b/libunwind/test/CMakeLists.txt
@@ -15,7 +15,6 @@ pythonize_bool(LIBUNWIND_BUILD_32_BITS)
 pythonize_bool(LIBCXX_ENABLE_SHARED)
 pythonize_bool(LIBUNWIND_ENABLE_SHARED)
 pythonize_bool(LIBUNWIND_ENABLE_THREADS)
-pythonize_bool(LIBUNWIND_ENABLE_EXCEPTIONS)
 pythonize_bool(LIBUNWIND_USES_ARM_EHABI)
 pythonize_bool(LIBUNWIND_USE_COMPILER_RT)
 pythonize_bool(LIBUNWIND_BUILD_EXTERNAL_THREAD_LIBRARY)
diff --git a/libunwind/test/libunwind/test/config.py b/libunwind/test/libunwind/test/config.py
index 36501f230272cd..7e4f230d821c75 100644
--- a/libunwind/test/libunwind/test/config.py
+++ b/libunwind/test/libunwind/test/config.py
@@ -35,15 +35,11 @@ def has_cpp_feature(self, feature, required_value):
 
     def configure_features(self):
         super(Configuration, self).configure_features()
-        if not self.get_lit_bool('enable_exceptions', True):
-            self.config.available_features.add('no-exceptions')
         if self.get_lit_bool('arm_ehabi', False):
             self.config.available_features.add('libunwind-arm-ehabi')
 
     def configure_compile_flags(self):
         self.cxx.compile_flags += ['-DLIBUNWIND_NO_TIMER']
-        if not self.get_lit_bool('enable_exceptions', True):
-            self.cxx.compile_flags += ['-fno-exceptions', '-DLIBUNWIND_HAS_NO_EXCEPTIONS']
         # Stack unwinding tests need unwinding tables and these are not
         # generated by default on all Targets.
         self.cxx.compile_flags += ['-funwind-tables']
diff --git a/libunwind/test/lit.cfg b/libunwind/test/lit.cfg
index 262f25af0d70a4..7f74bd6e4afb4c 100644
--- a/libunwind/test/lit.cfg
+++ b/libunwind/test/lit.cfg
@@ -23,9 +23,6 @@ config.suffixes = ['.cpp', '.s']
 # test_source_root: The root path where tests are located.
 config.test_source_root = os.path.dirname(__file__)
 
-# needed to test libunwind with code that throws exceptions
-config.enable_exceptions = True
-
 # Infer the libcxx_test_source_root for configuration import.
 # If libcxx_source_root isn't specified in the config, assume that the libcxx
 # and libunwind source directories are sibling directories.
diff --git a/libunwind/test/lit.site.cfg.in b/libunwind/test/lit.site.cfg.in
index 37f90a90efdb43..809ad1009f4bdc 100644
--- a/libunwind/test/lit.site.cfg.in
+++ b/libunwind/test/lit.site.cfg.in
@@ -18,7 +18,6 @@ config.test_compiler_flags      = "@LIBUNWIND_TEST_COMPILER_FLAGS@"
 config.executor                 = "@LIBUNWIND_EXECUTOR@"
 config.libunwind_shared         = @LIBUNWIND_ENABLE_SHARED@
 config.enable_shared            = @LIBCXX_ENABLE_SHARED@
-config.enable_exceptions        = @LIBUNWIND_ENABLE_EXCEPTIONS@
 config.arm_ehabi                = @LIBUNWIND_USES_ARM_EHABI@
 config.host_triple              = "@LLVM_HOST_TRIPLE@"
 config.target_triple            = "@TARGET_TRIPLE@"

From 32823091c36cfa2b27b717246f15d4f12591e6f4 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Sun, 7 Jun 2020 20:57:28 -0400
Subject: [PATCH 21/25] GlobalISel: Set instr/debugloc before any legalizer
 action

It was annoying enough that every custom lowering needed to set the
insert point, but this was made worse since now these all needed to be
updated to setInstrAndDebugLoc. Consolidate these so every
legalization action has the right insert position by default.

This should fix dropping debug info in every custom AMDGPU
legalization.
---
 .../CodeGen/GlobalISel/LegalizerHelper.cpp    | 13 +----
 .../AArch64/GISel/AArch64LegalizerInfo.cpp    |  4 --
 .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 58 -------------------
 .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp  |  6 +-
 llvm/lib/Target/Mips/MipsLegalizerInfo.cpp    |  2 -
 .../GlobalISel/LegalizerHelperTest.cpp        | 37 ++++++++++++
 6 files changed, 43 insertions(+), 77 deletions(-)

diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 3a6d499c9cde59..6c6ef78816b8d9 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -100,6 +100,8 @@ LegalizerHelper::LegalizeResult
 LegalizerHelper::legalizeInstrStep(MachineInstr &MI) {
   LLVM_DEBUG(dbgs() << "Legalizing: " << MI);
 
+  MIRBuilder.setInstrAndDebugLoc(MI);
+
   if (MI.getOpcode() == TargetOpcode::G_INTRINSIC ||
       MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)
     return LI.legalizeIntrinsic(MI, MIRBuilder, Observer) ? Legalized
@@ -634,8 +636,6 @@ LegalizerHelper::libcall(MachineInstr &MI) {
   unsigned Size = LLTy.getSizeInBits();
   auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
 
-  MIRBuilder.setInstrAndDebugLoc(MI);
-
   switch (MI.getOpcode()) {
   default:
     return UnableToLegalize;
@@ -731,8 +731,6 @@ LegalizerHelper::libcall(MachineInstr &MI) {
 LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
                                                               unsigned TypeIdx,
                                                               LLT NarrowTy) {
-  MIRBuilder.setInstrAndDebugLoc(MI);
-
   uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
   uint64_t NarrowSize = NarrowTy.getSizeInBits();
 
@@ -1644,8 +1642,6 @@ LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
 
 LegalizerHelper::LegalizeResult
 LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
-  MIRBuilder.setInstrAndDebugLoc(MI);
-
   switch (MI.getOpcode()) {
   default:
     return UnableToLegalize;
@@ -2195,8 +2191,6 @@ LegalizerHelper::lowerBitcast(MachineInstr &MI) {
 
 LegalizerHelper::LegalizeResult
 LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
-  MIRBuilder.setInstr(MI);
-
   switch (MI.getOpcode()) {
   case TargetOpcode::G_LOAD: {
     if (TypeIdx != 0)
@@ -2251,7 +2245,6 @@ LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
 LegalizerHelper::LegalizeResult
 LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
   using namespace TargetOpcode;
-  MIRBuilder.setInstrAndDebugLoc(MI);
 
   switch(MI.getOpcode()) {
   default:
@@ -3325,7 +3318,6 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
                                      LLT NarrowTy) {
   using namespace TargetOpcode;
 
-  MIRBuilder.setInstrAndDebugLoc(MI);
   switch (MI.getOpcode()) {
   case G_IMPLICIT_DEF:
     return fewerElementsVectorImplicitDef(MI, TypeIdx, NarrowTy);
@@ -3648,7 +3640,6 @@ LegalizerHelper::moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
 LegalizerHelper::LegalizeResult
 LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
                                     LLT MoreTy) {
-  MIRBuilder.setInstrAndDebugLoc(MI);
   unsigned Opc = MI.getOpcode();
   switch (Opc) {
   case TargetOpcode::G_IMPLICIT_DEF:
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 26dcde47ccfe39..c02f8dd0bb62e3 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -661,7 +661,6 @@ bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(MachineInstr &MI,
   if (GV->isThreadLocal())
     return true; // Don't want to modify TLS vars.
 
-  MIRBuilder.setInstrAndDebugLoc(MI);
   auto &TM = ST->getTargetLowering()->getTargetMachine();
   unsigned OpFlags = ST->ClassifyGlobalReference(GV, TM);
 
@@ -717,7 +716,6 @@ bool AArch64LegalizerInfo::legalizeShlAshrLshr(
   if (Amount > 31)
     return true; // This will have to remain a register variant.
   assert(MRI.getType(AmtReg).getSizeInBits() == 32);
-  MIRBuilder.setInstrAndDebugLoc(MI);
   auto ExtCst = MIRBuilder.buildZExt(LLT::scalar(64), AmtReg);
   MI.getOperand(2).setReg(ExtCst.getReg(0));
   return true;
@@ -746,7 +744,6 @@ bool AArch64LegalizerInfo::legalizeLoadStore(
     return false;
   }
 
-  MIRBuilder.setInstrAndDebugLoc(MI);
   unsigned PtrSize = ValTy.getElementType().getSizeInBits();
   const LLT NewTy = LLT::vector(ValTy.getNumElements(), PtrSize);
   auto &MMO = **MI.memoperands_begin();
@@ -764,7 +761,6 @@ bool AArch64LegalizerInfo::legalizeLoadStore(
 bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
                                          MachineRegisterInfo &MRI,
                                          MachineIRBuilder &MIRBuilder) const {
-  MIRBuilder.setInstrAndDebugLoc(MI);
   MachineFunction &MF = MIRBuilder.getMF();
   Align Alignment(MI.getOperand(2).getImm());
   Register Dst = MI.getOperand(0).getReg();
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index c1e9e225469259..6d383d409aa89d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -1571,8 +1571,6 @@ bool AMDGPULegalizerInfo::legalizeAddrSpaceCast(
   MachineIRBuilder &B) const {
   MachineFunction &MF = B.getMF();
 
-  B.setInstr(MI);
-
   const LLT S32 = LLT::scalar(32);
   Register Dst = MI.getOperand(0).getReg();
   Register Src = MI.getOperand(1).getReg();
@@ -1668,8 +1666,6 @@ bool AMDGPULegalizerInfo::legalizeAddrSpaceCast(
 bool AMDGPULegalizerInfo::legalizeFrint(
   MachineInstr &MI, MachineRegisterInfo &MRI,
   MachineIRBuilder &B) const {
-  B.setInstr(MI);
-
   Register Src = MI.getOperand(1).getReg();
   LLT Ty = MRI.getType(Src);
   assert(Ty.isScalar() && Ty.getSizeInBits() == 64);
@@ -1695,7 +1691,6 @@ bool AMDGPULegalizerInfo::legalizeFrint(
 bool AMDGPULegalizerInfo::legalizeFceil(
   MachineInstr &MI, MachineRegisterInfo &MRI,
   MachineIRBuilder &B) const {
-  B.setInstr(MI);
 
   const LLT S1 = LLT::scalar(1);
   const LLT S64 = LLT::scalar(64);
@@ -1740,8 +1735,6 @@ static MachineInstrBuilder extractF64Exponent(unsigned Hi,
 bool AMDGPULegalizerInfo::legalizeIntrinsicTrunc(
   MachineInstr &MI, MachineRegisterInfo &MRI,
   MachineIRBuilder &B) const {
-  B.setInstr(MI);
-
   const LLT S1 = LLT::scalar(1);
   const LLT S32 = LLT::scalar(32);
   const LLT S64 = LLT::scalar(64);
@@ -1786,7 +1779,6 @@ bool AMDGPULegalizerInfo::legalizeIntrinsicTrunc(
 bool AMDGPULegalizerInfo::legalizeITOFP(
   MachineInstr &MI, MachineRegisterInfo &MRI,
   MachineIRBuilder &B, bool Signed) const {
-  B.setInstr(MI);
 
   Register Dst = MI.getOperand(0).getReg();
   Register Src = MI.getOperand(1).getReg();
@@ -1820,7 +1812,6 @@ bool AMDGPULegalizerInfo::legalizeITOFP(
 bool AMDGPULegalizerInfo::legalizeFPTOI(
   MachineInstr &MI, MachineRegisterInfo &MRI,
   MachineIRBuilder &B, bool Signed) const {
-  B.setInstr(MI);
 
   Register Dst = MI.getOperand(0).getReg();
   Register Src = MI.getOperand(1).getReg();
@@ -1871,7 +1862,6 @@ bool AMDGPULegalizerInfo::legalizeMinNumMaxNum(
   MachineIRBuilder HelperBuilder(MI);
   GISelObserverWrapper DummyObserver;
   LegalizerHelper Helper(MF, DummyObserver, HelperBuilder);
-  HelperBuilder.setInstr(MI);
   return Helper.lowerFMinNumMaxNum(MI) == LegalizerHelper::Legalized;
 }
 
@@ -1897,8 +1887,6 @@ bool AMDGPULegalizerInfo::legalizeExtractVectorElt(
   LLT EltTy = VecTy.getElementType();
   assert(EltTy == MRI.getType(Dst));
 
-  B.setInstr(MI);
-
   if (IdxVal->Value < VecTy.getNumElements())
     B.buildExtract(Dst, Vec, IdxVal->Value * EltTy.getSizeInBits());
   else
@@ -1931,8 +1919,6 @@ bool AMDGPULegalizerInfo::legalizeInsertVectorElt(
   LLT EltTy = VecTy.getElementType();
   assert(EltTy == MRI.getType(Ins));
 
-  B.setInstr(MI);
-
   if (IdxVal->Value < VecTy.getNumElements())
     B.buildInsert(Dst, Vec, Ins, IdxVal->Value * EltTy.getSizeInBits());
   else
@@ -1959,14 +1945,12 @@ bool AMDGPULegalizerInfo::legalizeShuffleVector(
   MachineIRBuilder HelperBuilder(MI);
   GISelObserverWrapper DummyObserver;
   LegalizerHelper Helper(B.getMF(), DummyObserver, HelperBuilder);
-  HelperBuilder.setInstr(MI);
   return Helper.lowerShuffleVector(MI) == LegalizerHelper::Legalized;
 }
 
 bool AMDGPULegalizerInfo::legalizeSinCos(
   MachineInstr &MI, MachineRegisterInfo &MRI,
   MachineIRBuilder &B) const {
-  B.setInstr(MI);
 
   Register DstReg = MI.getOperand(0).getReg();
   Register SrcReg = MI.getOperand(1).getReg();
@@ -2058,7 +2042,6 @@ bool AMDGPULegalizerInfo::legalizeGlobalValue(
   const GlobalValue *GV = MI.getOperand(1).getGlobal();
   MachineFunction &MF = B.getMF();
   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
-  B.setInstr(MI);
 
   if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) {
     if (!MFI->isEntryFunction()) {
@@ -2138,7 +2121,6 @@ bool AMDGPULegalizerInfo::legalizeGlobalValue(
 bool AMDGPULegalizerInfo::legalizeLoad(
   MachineInstr &MI, MachineRegisterInfo &MRI,
   MachineIRBuilder &B, GISelChangeObserver &Observer) const {
-  B.setInstr(MI);
   LLT ConstPtr = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
   auto Cast = B.buildAddrSpaceCast(ConstPtr, MI.getOperand(1).getReg());
   Observer.changingInstr(MI);
@@ -2166,7 +2148,6 @@ bool AMDGPULegalizerInfo::legalizeFMad(
   MachineIRBuilder HelperBuilder(MI);
   GISelObserverWrapper DummyObserver;
   LegalizerHelper Helper(MF, DummyObserver, HelperBuilder);
-  HelperBuilder.setInstr(MI);
   return Helper.lowerFMad(MI) == LegalizerHelper::Legalized;
 }
 
@@ -2184,7 +2165,6 @@ bool AMDGPULegalizerInfo::legalizeAtomicCmpXChg(
   LLT ValTy = MRI.getType(CmpVal);
   LLT VecTy = LLT::vector(2, ValTy);
 
-  B.setInstr(MI);
   Register PackedVal = B.buildBuildVector(VecTy, { NewVal, CmpVal }).getReg(0);
 
   B.buildInstr(AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG)
@@ -2203,7 +2183,6 @@ bool AMDGPULegalizerInfo::legalizeFlog(
   Register Src = MI.getOperand(1).getReg();
   LLT Ty = B.getMRI()->getType(Dst);
   unsigned Flags = MI.getFlags();
-  B.setInstr(MI);
 
   auto Log2Operand = B.buildFLog2(Ty, Src, Flags);
   auto Log2BaseInvertedOperand = B.buildFConstant(Ty, Log2BaseInverted);
@@ -2219,7 +2198,6 @@ bool AMDGPULegalizerInfo::legalizeFExp(MachineInstr &MI,
   Register Src = MI.getOperand(1).getReg();
   unsigned Flags = MI.getFlags();
   LLT Ty = B.getMRI()->getType(Dst);
-  B.setInstr(MI);
 
   auto K = B.buildFConstant(Ty, numbers::log2e);
   auto Mul = B.buildFMul(Ty, Src, K, Flags);
@@ -2235,7 +2213,6 @@ bool AMDGPULegalizerInfo::legalizeFPow(MachineInstr &MI,
   Register Src1 = MI.getOperand(2).getReg();
   unsigned Flags = MI.getFlags();
   LLT Ty = B.getMRI()->getType(Dst);
-  B.setInstr(MI);
   const LLT S16 = LLT::scalar(16);
   const LLT S32 = LLT::scalar(32);
 
@@ -2279,7 +2256,6 @@ static Register stripAnySourceMods(Register OrigSrc, MachineRegisterInfo &MRI) {
 bool AMDGPULegalizerInfo::legalizeFFloor(MachineInstr &MI,
                                          MachineRegisterInfo &MRI,
                                          MachineIRBuilder &B) const {
-  B.setInstr(MI);
 
   const LLT S1 = LLT::scalar(1);
   const LLT S64 = LLT::scalar(64);
@@ -2345,7 +2321,6 @@ bool AMDGPULegalizerInfo::legalizeBuildVector(
   Register Src1 = MI.getOperand(2).getReg();
   assert(MRI.getType(Src0) == LLT::scalar(16));
 
-  B.setInstr(MI);
   auto Merge = B.buildMerge(S32, {Src0, Src1});
   B.buildBitcast(Dst, Merge);
 
@@ -2483,7 +2458,6 @@ bool AMDGPULegalizerInfo::loadInputValue(Register DstReg, MachineIRBuilder &B,
 bool AMDGPULegalizerInfo::legalizePreloadedArgIntrin(
     MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B,
     AMDGPUFunctionArgInfo::PreloadedValue ArgType) const {
-  B.setInstr(MI);
 
   const ArgDescriptor *Arg = getArgDescriptor(B, ArgType);
   if (!Arg)
@@ -2499,7 +2473,6 @@ bool AMDGPULegalizerInfo::legalizePreloadedArgIntrin(
 bool AMDGPULegalizerInfo::legalizeFDIV(MachineInstr &MI,
                                        MachineRegisterInfo &MRI,
                                        MachineIRBuilder &B) const {
-  B.setInstr(MI);
   Register Dst = MI.getOperand(0).getReg();
   LLT DstTy = MRI.getType(Dst);
   LLT S16 = LLT::scalar(16);
@@ -2622,7 +2595,6 @@ void AMDGPULegalizerInfo::legalizeUDIV_UREM32Impl(MachineIRBuilder &B,
 bool AMDGPULegalizerInfo::legalizeUDIV_UREM32(MachineInstr &MI,
                                               MachineRegisterInfo &MRI,
                                               MachineIRBuilder &B) const {
-  B.setInstr(MI);
   const bool IsRem = MI.getOpcode() == AMDGPU::G_UREM;
   Register DstReg = MI.getOperand(0).getReg();
   Register Num = MI.getOperand(1).getReg();
@@ -2678,8 +2650,6 @@ static std::pair<Register, Register> emitReciprocalU64(MachineIRBuilder &B,
 bool AMDGPULegalizerInfo::legalizeUDIV_UREM64(MachineInstr &MI,
                                               MachineRegisterInfo &MRI,
                                               MachineIRBuilder &B) const {
-  B.setInstr(MI);
-
   const bool IsDiv = MI.getOpcode() == TargetOpcode::G_UDIV;
   const LLT S32 = LLT::scalar(32);
   const LLT S64 = LLT::scalar(64);
@@ -2808,7 +2778,6 @@ bool AMDGPULegalizerInfo::legalizeUDIV_UREM(MachineInstr &MI,
 bool AMDGPULegalizerInfo::legalizeSDIV_SREM32(MachineInstr &MI,
                                               MachineRegisterInfo &MRI,
                                               MachineIRBuilder &B) const {
-  B.setInstr(MI);
   const LLT S32 = LLT::scalar(32);
 
   const bool IsRem = MI.getOpcode() == AMDGPU::G_SREM;
@@ -2915,7 +2884,6 @@ bool AMDGPULegalizerInfo::legalizeFastUnsafeFDIV(MachineInstr &MI,
 bool AMDGPULegalizerInfo::legalizeFDIV16(MachineInstr &MI,
                                          MachineRegisterInfo &MRI,
                                          MachineIRBuilder &B) const {
-  B.setInstr(MI);
   Register Res = MI.getOperand(0).getReg();
   Register LHS = MI.getOperand(1).getReg();
   Register RHS = MI.getOperand(2).getReg();
@@ -2978,7 +2946,6 @@ static void toggleSPDenormMode(bool Enable,
 bool AMDGPULegalizerInfo::legalizeFDIV32(MachineInstr &MI,
                                          MachineRegisterInfo &MRI,
                                          MachineIRBuilder &B) const {
-  B.setInstr(MI);
   Register Res = MI.getOperand(0).getReg();
   Register LHS = MI.getOperand(1).getReg();
   Register RHS = MI.getOperand(2).getReg();
@@ -3045,7 +3012,6 @@ bool AMDGPULegalizerInfo::legalizeFDIV32(MachineInstr &MI,
 bool AMDGPULegalizerInfo::legalizeFDIV64(MachineInstr &MI,
                                          MachineRegisterInfo &MRI,
                                          MachineIRBuilder &B) const {
-  B.setInstr(MI);
   Register Res = MI.getOperand(0).getReg();
   Register LHS = MI.getOperand(1).getReg();
   Register RHS = MI.getOperand(2).getReg();
@@ -3124,7 +3090,6 @@ bool AMDGPULegalizerInfo::legalizeFDIV64(MachineInstr &MI,
 bool AMDGPULegalizerInfo::legalizeFDIVFastIntrin(MachineInstr &MI,
                                                  MachineRegisterInfo &MRI,
                                                  MachineIRBuilder &B) const {
-  B.setInstr(MI);
   Register Res = MI.getOperand(0).getReg();
   Register LHS = MI.getOperand(2).getReg();
   Register RHS = MI.getOperand(3).getReg();
@@ -3166,8 +3131,6 @@ bool AMDGPULegalizerInfo::legalizeImplicitArgPtr(MachineInstr &MI,
                                       AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR);
   }
 
-  B.setInstr(MI);
-
   uint64_t Offset =
     ST.getTargetLowering()->getImplicitParameterOffset(
       B.getMF(), AMDGPUTargetLowering::FIRST_IMPLICIT);
@@ -3195,7 +3158,6 @@ bool AMDGPULegalizerInfo::legalizeIsAddrSpace(MachineInstr &MI,
                                               MachineRegisterInfo &MRI,
                                               MachineIRBuilder &B,
                                               unsigned AddrSpace) const {
-  B.setInstr(MI);
   Register ApertureReg = getSegmentAperture(AddrSpace, MRI, B);
   auto Hi32 = B.buildExtract(LLT::scalar(32), MI.getOperand(2).getReg(), 32);
   B.buildICmp(ICmpInst::ICMP_EQ, MI.getOperand(0), Hi32, ApertureReg);
@@ -3303,8 +3265,6 @@ bool AMDGPULegalizerInfo::legalizeBufferStore(MachineInstr &MI,
                                               MachineIRBuilder &B,
                                               bool IsTyped,
                                               bool IsFormat) const {
-  B.setInstr(MI);
-
   Register VData = MI.getOperand(1).getReg();
   LLT Ty = MRI.getType(VData);
   LLT EltTy = Ty.getScalarType();
@@ -3395,8 +3355,6 @@ bool AMDGPULegalizerInfo::legalizeBufferLoad(MachineInstr &MI,
                                              MachineIRBuilder &B,
                                              bool IsFormat,
                                              bool IsTyped) const {
-  B.setInstr(MI);
-
   // FIXME: Verifier should enforce 1 MMO for these intrinsics.
   MachineMemOperand *MMO = *MI.memoperands_begin();
   const int MemSize = MMO->getSize();
@@ -3515,7 +3473,6 @@ bool AMDGPULegalizerInfo::legalizeBufferLoad(MachineInstr &MI,
 bool AMDGPULegalizerInfo::legalizeAtomicIncDec(MachineInstr &MI,
                                                MachineIRBuilder &B,
                                                bool IsInc) const {
-  B.setInstr(MI);
   unsigned Opc = IsInc ? AMDGPU::G_AMDGPU_ATOMIC_INC :
                          AMDGPU::G_AMDGPU_ATOMIC_DEC;
   B.buildInstr(Opc)
@@ -3576,8 +3533,6 @@ static unsigned getBufferAtomicPseudo(Intrinsic::ID IntrID) {
 bool AMDGPULegalizerInfo::legalizeBufferAtomic(MachineInstr &MI,
                                                MachineIRBuilder &B,
                                                Intrinsic::ID IID) const {
-  B.setInstr(MI);
-
   const bool IsCmpSwap = IID == Intrinsic::amdgcn_raw_buffer_atomic_cmpswap ||
                          IID == Intrinsic::amdgcn_struct_buffer_atomic_cmpswap;
 
@@ -3733,7 +3688,6 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
     MachineInstr &MI, MachineIRBuilder &B,
     GISelChangeObserver &Observer,
     const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr) const {
-  B.setInstr(MI);
 
   const int NumDefs = MI.getNumExplicitDefs();
   bool IsTFE = NumDefs == 2;
@@ -3913,8 +3867,6 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
     if (!Ty.isVector() || Ty.getElementType() != S16)
       return true;
 
-    B.setInstr(MI);
-
     Register RepackedReg = handleD16VData(B, *MRI, VData);
     if (RepackedReg != VData) {
       MI.getOperand(1).setReg(RepackedReg);
@@ -4118,7 +4070,6 @@ bool AMDGPULegalizerInfo::legalizeSBufferLoad(
   // out this needs to be converted to a vector load during RegBankSelect.
   if (!isPowerOf2_32(Size)) {
     LegalizerHelper Helper(MF, *this, Observer, B);
-    B.setInstr(MI);
 
     if (Ty.isVector())
       Helper.moreElementsVectorDst(MI, getPow2VectorType(Ty), 0);
@@ -4133,8 +4084,6 @@ bool AMDGPULegalizerInfo::legalizeSBufferLoad(
 bool AMDGPULegalizerInfo::legalizeTrapIntrinsic(MachineInstr &MI,
                                                 MachineRegisterInfo &MRI,
                                                 MachineIRBuilder &B) const {
-  B.setInstr(MI);
-
   // Is non-HSA path or trap-handler disabled? then, insert s_endpgm instruction
   if (ST.getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbiHsa ||
       !ST.isTrapHandlerEnabled()) {
@@ -4165,8 +4114,6 @@ bool AMDGPULegalizerInfo::legalizeTrapIntrinsic(MachineInstr &MI,
 
 bool AMDGPULegalizerInfo::legalizeDebugTrapIntrinsic(
     MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const {
-  B.setInstr(MI);
-
   // Is non-HSA path or trap-handler disabled? then, report a warning
   // accordingly
   if (ST.getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbiHsa ||
@@ -4201,7 +4148,6 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
       const SIRegisterInfo *TRI
         = static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
 
-      B.setInstr(*BrCond);
       Register Def = MI.getOperand(1).getReg();
       Register Use = MI.getOperand(3).getReg();
 
@@ -4244,8 +4190,6 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
       const SIRegisterInfo *TRI
         = static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
 
-      B.setInstr(*BrCond);
-
       MachineBasicBlock *CondBrTarget = BrCond->getOperand(1).getMBB();
       Register Reg = MI.getOperand(2).getReg();
       B.buildInstr(AMDGPU::SI_LOOP)
@@ -4267,7 +4211,6 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
   }
   case Intrinsic::amdgcn_kernarg_segment_ptr:
     if (!AMDGPU::isKernel(B.getMF().getFunction().getCallingConv())) {
-      B.setInstr(MI);
       // This only makes sense to call in a kernel, so just lower to null.
       B.buildConstant(MI.getOperand(0).getReg(), 0);
       MI.eraseFromParent();
@@ -4315,7 +4258,6 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
   case Intrinsic::amdgcn_is_private:
     return legalizeIsAddrSpace(MI, MRI, B, AMDGPUAS::PRIVATE_ADDRESS);
   case Intrinsic::amdgcn_wavefrontsize: {
-    B.setInstr(MI);
     B.buildConstant(MI.getOperand(0), ST.getWavefrontSize());
     MI.eraseFromParent();
     return true;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index bec5e0ea082930..040c0ead66db01 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -2209,7 +2209,8 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
       break;
 
     const LLT S32 = LLT::scalar(32);
-    MachineFunction *MF = MI.getParent()->getParent();
+    MachineBasicBlock *MBB = MI.getParent();
+    MachineFunction *MF = MBB->getParent();
     MachineIRBuilder B(MI);
     ApplyRegBankMapping ApplySALU(*this, MRI, &AMDGPU::SGPRRegBank);
     GISelObserverWrapper Observer(&ApplySALU);
@@ -2234,9 +2235,10 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
       if (Helper.widenScalar(MI, 0, S32) != LegalizerHelper::Legalized)
         llvm_unreachable("widen scalar should have succeeded");
 
-      // FIXME: s16 shift amounts should be lgeal.
+      // FIXME: s16 shift amounts should be legal.
       if (Opc == AMDGPU::G_SHL || Opc == AMDGPU::G_LSHR ||
           Opc == AMDGPU::G_ASHR) {
+        B.setInsertPt(*MBB, MI.getIterator());
         if (Helper.widenScalar(MI, 1, S32) != LegalizerHelper::Legalized)
           llvm_unreachable("widen scalar should have succeeded");
       }
diff --git a/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp b/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp
index 6388e8d2d65771..0afc152744136e 100644
--- a/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp
+++ b/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp
@@ -333,7 +333,6 @@ bool MipsLegalizerInfo::legalizeCustom(MachineInstr &MI,
 
   using namespace TargetOpcode;
 
-  MIRBuilder.setInstr(MI);
   const LLT s32 = LLT::scalar(32);
   const LLT s64 = LLT::scalar(64);
 
@@ -507,7 +506,6 @@ bool MipsLegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
   const MipsInstrInfo &TII = *ST.getInstrInfo();
   const MipsRegisterInfo &TRI = *ST.getRegisterInfo();
   const RegisterBankInfo &RBI = *ST.getRegBankInfo();
-  MIRBuilder.setInstr(MI);
 
   switch (MI.getIntrinsicID()) {
   case Intrinsic::memcpy:
diff --git a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp
index 93f4f703d239b7..2cfab39d456228 100644
--- a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp
+++ b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp
@@ -176,6 +176,8 @@ TEST_F(AArch64GISelMITest, LowerBitCountingCTTZ2) {
   AInfo Info(MF->getSubtarget());
   DummyGISelObserver Observer;
   LegalizerHelper Helper(*MF, Info, Observer, B);
+
+  B.setInsertPt(*EntryMBB, MIBCTTZ->getIterator());
   EXPECT_TRUE(Helper.lower(*MIBCTTZ, 0, LLT::scalar(64)) ==
               LegalizerHelper::LegalizeResult::Legalized);
 
@@ -2583,6 +2585,7 @@ TEST_F(AArch64GISelMITest, BitcastLoad) {
 
   AInfo Info(MF->getSubtarget());
   DummyGISelObserver Observer;
+  B.setInsertPt(*EntryMBB, Load->getIterator());
   LegalizerHelper Helper(*MF, Info, Observer, B);
   EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized,
             Helper.bitcast(*Load, 0, S32));
@@ -2618,6 +2621,7 @@ TEST_F(AArch64GISelMITest, BitcastStore) {
   AInfo Info(MF->getSubtarget());
   DummyGISelObserver Observer;
   LegalizerHelper Helper(*MF, Info, Observer, B);
+  B.setInsertPt(*EntryMBB, Store->getIterator());
   EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized,
             Helper.bitcast(*Store, 0, S32));
 
@@ -2651,6 +2655,7 @@ TEST_F(AArch64GISelMITest, BitcastSelect) {
   AInfo Info(MF->getSubtarget());
   DummyGISelObserver Observer;
   LegalizerHelper Helper(*MF, Info, Observer, B);
+  B.setInsertPt(*EntryMBB, Select->getIterator());
   EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized,
             Helper.bitcast(*Select, 0, S32));
 
@@ -2669,6 +2674,8 @@ TEST_F(AArch64GISelMITest, BitcastSelect) {
   // Doesn't make sense
   auto VCond = B.buildUndef(LLT::vector(4, 1));
   auto VSelect = B.buildSelect(V4S8, VCond, Val0, Val1);
+
+  B.setInsertPt(*EntryMBB, VSelect->getIterator());
   EXPECT_EQ(LegalizerHelper::LegalizeResult::UnableToLegalize,
             Helper.bitcast(*VSelect, 0, S32));
   EXPECT_EQ(LegalizerHelper::LegalizeResult::UnableToLegalize,
@@ -2694,10 +2701,15 @@ TEST_F(AArch64GISelMITest, BitcastBitOps) {
   AInfo Info(MF->getSubtarget());
   DummyGISelObserver Observer;
   LegalizerHelper Helper(*MF, Info, Observer, B);
+  B.setInsertPt(*EntryMBB, And->getIterator());
   EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized,
             Helper.bitcast(*And, 0, S32));
+
+  B.setInsertPt(*EntryMBB, Or->getIterator());
   EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized,
             Helper.bitcast(*Or, 0, S32));
+
+  B.setInsertPt(*EntryMBB, Xor->getIterator());
   EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized,
             Helper.bitcast(*Xor, 0, S32));
 
@@ -2773,12 +2785,20 @@ TEST_F(AArch64GISelMITest, NarrowImplicitDef) {
   LegalizerHelper Helper(*MF, Info, Observer, B);
 
   // Perform Legalization
+
+  B.setInsertPt(*EntryMBB, Implicit1->getIterator());
   EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized,
             Helper.narrowScalar(*Implicit1, 0, S48));
+
+  B.setInsertPt(*EntryMBB, Implicit2->getIterator());
   EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized,
             Helper.narrowScalar(*Implicit2, 0, S32));
+
+  B.setInsertPt(*EntryMBB, Implicit3->getIterator());
   EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized,
             Helper.narrowScalar(*Implicit3, 0, S48));
+
+  B.setInsertPt(*EntryMBB, Implicit4->getIterator());
   EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized,
             Helper.narrowScalar(*Implicit4, 0, S32));
 
@@ -2828,8 +2848,12 @@ TEST_F(AArch64GISelMITest, WidenFreeze) {
   LegalizerHelper Helper(*MF, Info, Observer, B);
 
   // Perform Legalization
+
+  B.setInsertPt(*EntryMBB, FreezeScalar->getIterator());
   EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized,
             Helper.widenScalar(*FreezeScalar, 0, S128));
+
+  B.setInsertPt(*EntryMBB, FreezeVector->getIterator());
   EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized,
             Helper.widenScalar(*FreezeVector, 0, V2S64));
 
@@ -2879,12 +2903,20 @@ TEST_F(AArch64GISelMITest, NarrowFreeze) {
   LegalizerHelper Helper(*MF, Info, Observer, B);
 
   // Perform Legalization
+
+  B.setInsertPt(*EntryMBB, FreezeScalar->getIterator());
   EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized,
             Helper.narrowScalar(*FreezeScalar, 0, S32));
+
+  B.setInsertPt(*EntryMBB, FreezeOdd->getIterator());
   EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized,
             Helper.narrowScalar(*FreezeOdd, 0, S32));
+
+  B.setInsertPt(*EntryMBB, FreezeVector->getIterator());
   EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized,
             Helper.narrowScalar(*FreezeVector, 0, V2S16));
+
+  B.setInsertPt(*EntryMBB, FreezeVector1->getIterator());
   EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized,
             Helper.narrowScalar(*FreezeVector1, 0, S16));
 
@@ -2954,8 +2986,12 @@ TEST_F(AArch64GISelMITest, FewerElementsFreeze) {
   LegalizerHelper Helper(*MF, Info, Observer, B);
 
   // Perform Legalization
+
+  B.setInsertPt(*EntryMBB, FreezeVector1->getIterator());
   EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized,
             Helper.fewerElementsVector(*FreezeVector1, 0, S32));
+
+  B.setInsertPt(*EntryMBB, FreezeVector2->getIterator());
   EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized,
             Helper.fewerElementsVector(*FreezeVector2, 0, V2S16));
 
@@ -2998,6 +3034,7 @@ TEST_F(AArch64GISelMITest, MoreElementsFreeze) {
   LegalizerHelper Helper(*MF, Info, Observer, B);
 
   // Perform Legalization
+  B.setInsertPt(*EntryMBB, FreezeVector1->getIterator());
   EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized,
             Helper.moreElementsVector(*FreezeVector1, 0, V4S32));
 

From 90ee8cf63613df525c4dab5e715fbae5f7310a78 Mon Sep 17 00:00:00 2001
From: Erich Keane <erich.keane@intel.com>
Date: Tue, 9 Jun 2020 12:40:37 -0700
Subject: [PATCH 22/25] Undo change inadvertently added in 113b0d7d

---
 clang/include/clang/Basic/DiagnosticSemaKinds.td | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index e1adf199a12b93..84bcf66a148e5c 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -5713,9 +5713,8 @@ def ext_flexible_array_union_gnu : Extension<
 def err_flexible_array_not_at_end : Error<
   "flexible array member %0 with type %1 is not at the end of"
   " %select{struct|interface|union|class|enum}2">;
-def err_objc_variable_sized_type_not_at_end
-    : Error<"%select{field %1|unnamed field}0 with variable sized type %2 is "
-            "not at the end of class">;
+def err_objc_variable_sized_type_not_at_end : Error<
+  "field %0 with variable sized type %1 is not at the end of class">;
 def note_next_field_declaration : Note<
   "next field declaration is here">;
 def note_next_ivar_declaration : Note<

From 6f6d2d238360883039cd17986c9ef598d04995a3 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Tue, 9 Jun 2020 15:43:34 -0400
Subject: [PATCH 23/25] [x86] refine conditions for immediate hoisting to save
 code-size

As shown in PR46237:
https://bugs.llvm.org/show_bug.cgi?id=46237

The size-savings win for hoisting an 8-bit ALU immediate (intentionally
excluding store constants) requires extreme conditions; it may not even
be possible when including REX prefix bytes on x86-64.

I did draft a version of this patch that included use counts after the
loop, but I suspect that accounting is not working as expected. I think
that is because the number of constant uses are changing as we select
instructions (for example as we transform shl/add into LEA).

Differential Revision: https://reviews.llvm.org/D81468
---
 llvm/lib/Target/X86/X86ISelDAGToDAG.cpp      |  7 +++---
 llvm/test/CodeGen/X86/immediate_merging.ll   | 26 +++++++++-----------
 llvm/test/CodeGen/X86/immediate_merging64.ll | 13 +++++-----
 llvm/test/CodeGen/X86/pr27202.ll             | 15 +++++------
 4 files changed, 30 insertions(+), 31 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 2171be293914e5..fadcb173cd4b9a 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -364,9 +364,10 @@ namespace {
         if (User->getNumOperands() != 2)
           continue;
 
-        // If this can match to INC/DEC, don't count it as a use.
-        if (User->getOpcode() == ISD::ADD &&
-            (isOneConstant(SDValue(N, 0)) || isAllOnesConstant(SDValue(N, 0))))
+        // If this is a sign-extended 8-bit integer immediate used in an ALU
+        // instruction, there is probably an opcode encoding to save space.
+        auto *C = dyn_cast<ConstantSDNode>(N);
+        if (C && isInt<8>(C->getSExtValue()))
           continue;
 
         // Immediates that are used for offsets as part of stack
diff --git a/llvm/test/CodeGen/X86/immediate_merging.ll b/llvm/test/CodeGen/X86/immediate_merging.ll
index 1bed1014f94e39..038c56f6dd5dd5 100644
--- a/llvm/test/CodeGen/X86/immediate_merging.ll
+++ b/llvm/test/CodeGen/X86/immediate_merging.ll
@@ -12,16 +12,16 @@
 @i = common global i32 0, align 4
 
 ; Test -Os to make sure immediates with multiple users don't get pulled in to
-; instructions.
+; instructions (8-bit immediates are exceptions).
+
 define i32 @foo() optsize {
 ; X86-LABEL: foo:
 ; X86:       # %bb.0: # %entry
 ; X86-NEXT:    movl $1234, %eax # imm = 0x4D2
 ; X86-NEXT:    movl %eax, a
 ; X86-NEXT:    movl %eax, b
-; X86-NEXT:    movl $12, %eax
-; X86-NEXT:    movl %eax, c
-; X86-NEXT:    cmpl %eax, e
+; X86-NEXT:    movl $12, c
+; X86-NEXT:    cmpl $12, e
 ; X86-NEXT:    jne .LBB0_2
 ; X86-NEXT:  # %bb.1: # %if.then
 ; X86-NEXT:    movl $1, x
@@ -38,9 +38,8 @@ define i32 @foo() optsize {
 ; X64-NEXT:    movl $1234, %eax # imm = 0x4D2
 ; X64-NEXT:    movl %eax, {{.*}}(%rip)
 ; X64-NEXT:    movl %eax, {{.*}}(%rip)
-; X64-NEXT:    movl $12, %eax
-; X64-NEXT:    movl %eax, {{.*}}(%rip)
-; X64-NEXT:    cmpl %eax, {{.*}}(%rip)
+; X64-NEXT:    movl $12, {{.*}}(%rip)
+; X64-NEXT:    cmpl $12, {{.*}}(%rip)
 ; X64-NEXT:    jne .LBB0_2
 ; X64-NEXT:  # %bb.1: # %if.then
 ; X64-NEXT:    movl $1, {{.*}}(%rip)
@@ -74,16 +73,16 @@ if.end:                                           ; preds = %if.then, %entry
 }
 
 ; Test PGSO to make sure immediates with multiple users don't get pulled in to
-; instructions.
+; instructions (8-bit immediates are exceptions).
+
 define i32 @foo_pgso() !prof !14 {
 ; X86-LABEL: foo_pgso:
 ; X86:       # %bb.0: # %entry
 ; X86-NEXT:    movl $1234, %eax # imm = 0x4D2
 ; X86-NEXT:    movl %eax, a
 ; X86-NEXT:    movl %eax, b
-; X86-NEXT:    movl $12, %eax
-; X86-NEXT:    movl %eax, c
-; X86-NEXT:    cmpl %eax, e
+; X86-NEXT:    movl $12, c
+; X86-NEXT:    cmpl $12, e
 ; X86-NEXT:    jne .LBB1_2
 ; X86-NEXT:  # %bb.1: # %if.then
 ; X86-NEXT:    movl $1, x
@@ -100,9 +99,8 @@ define i32 @foo_pgso() !prof !14 {
 ; X64-NEXT:    movl $1234, %eax # imm = 0x4D2
 ; X64-NEXT:    movl %eax, {{.*}}(%rip)
 ; X64-NEXT:    movl %eax, {{.*}}(%rip)
-; X64-NEXT:    movl $12, %eax
-; X64-NEXT:    movl %eax, {{.*}}(%rip)
-; X64-NEXT:    cmpl %eax, {{.*}}(%rip)
+; X64-NEXT:    movl $12, {{.*}}(%rip)
+; X64-NEXT:    cmpl $12, {{.*}}(%rip)
 ; X64-NEXT:    jne .LBB1_2
 ; X64-NEXT:  # %bb.1: # %if.then
 ; X64-NEXT:    movl $1, {{.*}}(%rip)
diff --git a/llvm/test/CodeGen/X86/immediate_merging64.ll b/llvm/test/CodeGen/X86/immediate_merging64.ll
index a807a119e89353..d355bea1603a5e 100644
--- a/llvm/test/CodeGen/X86/immediate_merging64.ll
+++ b/llvm/test/CodeGen/X86/immediate_merging64.ll
@@ -5,13 +5,13 @@
 ; 32-bit immediates are merged for code size savings.
 
 ; Immediates with multiple users should not be pulled into instructions when
-; optimizing for code size.
+; optimizing for code size (but 8-bit immediates are exceptions).
+
 define i1 @imm_multiple_users(i64 %a, i64* %b) optsize {
 ; CHECK-LABEL: imm_multiple_users:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movq $-1, %rax
-; CHECK-NEXT:    movq %rax, (%rsi)
-; CHECK-NEXT:    cmpq %rax, %rdi
+; CHECK-NEXT:    movq $-1, (%rsi)
+; CHECK-NEXT:    cmpq $-1, %rdi
 ; CHECK-NEXT:    sete %al
 ; CHECK-NEXT:    retq
   store i64 -1, i64* %b, align 8
@@ -22,9 +22,8 @@ define i1 @imm_multiple_users(i64 %a, i64* %b) optsize {
 define i1 @imm_multiple_users_pgso(i64 %a, i64* %b) !prof !14 {
 ; CHECK-LABEL: imm_multiple_users_pgso:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movq $-1, %rax
-; CHECK-NEXT:    movq %rax, (%rsi)
-; CHECK-NEXT:    cmpq %rax, %rdi
+; CHECK-NEXT:    movq $-1, (%rsi)
+; CHECK-NEXT:    cmpq $-1, %rdi
 ; CHECK-NEXT:    sete %al
 ; CHECK-NEXT:    retq
   store i64 -1, i64* %b, align 8
diff --git a/llvm/test/CodeGen/X86/pr27202.ll b/llvm/test/CodeGen/X86/pr27202.ll
index ea5781ed8c5fcd..bb6be1d1685da5 100644
--- a/llvm/test/CodeGen/X86/pr27202.ll
+++ b/llvm/test/CodeGen/X86/pr27202.ll
@@ -14,12 +14,14 @@ define i1 @foo(i32 %i) optsize {
   ret i1 %cmp
 }
 
+; 8-bit ALU immediates probably have small encodings.
+; We do not want to hoist the constant into a register here.
+
 define zeroext i1 @g(i32 %x) optsize {
 ; CHECK-LABEL: g:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl $1, %eax
-; CHECK-NEXT:    orl %eax, %edi
-; CHECK-NEXT:    cmpl %eax, %edi
+; CHECK-NEXT:    orl $1, %edi
+; CHECK-NEXT:    cmpl $1, %edi
 ; CHECK-NEXT:    sete %al
 ; CHECK-NEXT:    retq
   %t0 = or i32 %x, 1
@@ -27,7 +29,7 @@ define zeroext i1 @g(i32 %x) optsize {
   ret i1 %t1
 }
 
-; 8-bit immediates probably have small encodings.
+; 8-bit ALU immediates probably have small encodings.
 ; We do not want to hoist the constant into a register here.
 
 define i64 @PR46237(i64 %x, i64 %y, i64 %z) optsize {
@@ -36,9 +38,8 @@ define i64 @PR46237(i64 %x, i64 %y, i64 %z) optsize {
 ; CHECK-NEXT:    movl %edx, %eax
 ; CHECK-NEXT:    shll $6, %eax
 ; CHECK-NEXT:    movzbl %al, %ecx
-; CHECK-NEXT:    movl $7, %eax
-; CHECK-NEXT:    andq %rax, %rsi
-; CHECK-NEXT:    andq %rax, %rdx
+; CHECK-NEXT:    andl $7, %esi
+; CHECK-NEXT:    andl $7, %edx
 ; CHECK-NEXT:    leaq (%rdx,%rsi,8), %rax
 ; CHECK-NEXT:    orq %rcx, %rax
 ; CHECK-NEXT:    retq

From 44b355f34b8b0c705909da94fdcdacbe3b00900a Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Tue, 9 Jun 2020 15:46:31 -0400
Subject: [PATCH 24/25] AMDGPU/GlobalISel: Add new baseline tests for bitcast
 legalization

---
 .../AMDGPU/GlobalISel/legalize-bitcast.mir    | 1038 ++++++++++++++++-
 1 file changed, 1037 insertions(+), 1 deletion(-)

diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir
index 4cff1a1d1a2f61..98183b01ce364d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck %s
 
 ---
 name: test_bitcast_s32_to_v2s16
@@ -283,6 +283,36 @@ body: |
     $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1
 ...
 
+---
+name: test_bitcast_v32s32_to_v16s64
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+
+    ; CHECK-LABEL: name: test_bitcast_v32s32_to_v16s64
+    ; CHECK: [[COPY:%[0-9]+]]:_(<32 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+    ; CHECK: [[BITCAST:%[0-9]+]]:_(<16 x s64>) = G_BITCAST [[COPY]](<32 x s32>)
+    ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<16 x s64>)
+    %0:_(<32 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+    %1:_(<16 x s64>) = G_BITCAST %0
+    S_ENDPGM 0, implicit %1
+...
+
+---
+name: test_bitcast_v16s64_to_v32s32
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+
+    ; CHECK-LABEL: name: test_bitcast_v16s64_to_v32s32
+    ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+    ; CHECK: [[BITCAST:%[0-9]+]]:_(<32 x s32>) = G_BITCAST [[COPY]](<16 x s64>)
+    ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<32 x s32>)
+    %0:_(<16 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
+    %1:_(<32 x s32>) = G_BITCAST %0
+    S_ENDPGM 0, implicit %1
+...
+
 ---
 name: test_bitcast_s24_to_v3s8
 body: |
@@ -481,3 +511,1009 @@ body: |
     %3:_(s32) = G_ANYEXT %2
     $vgpr0 = COPY %3
 ...
+
+---
+
+name: test_bitcast_v2s16_to_v4s8
+body: |
+  bb.0:
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: test_bitcast_v2s16_to_v4s8
+    ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+    ; CHECK: [[BITCAST:%[0-9]+]]:_(<4 x s8>) = G_BITCAST [[COPY]](<2 x s16>)
+    ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST]](<4 x s8>)
+    ; CHECK: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST]](<4 x s8>)
+    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8)
+    ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV4]](s8)
+    ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT]], [[ANYEXT1]]
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ADD]](s16)
+    ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8)
+    ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV5]](s8)
+    ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT2]], [[ANYEXT3]]
+    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[ADD1]](s16)
+    ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8)
+    ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s16) = G_ANYEXT [[UV6]](s8)
+    ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT4]], [[ANYEXT5]]
+    ; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[ADD2]](s16)
+    ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8)
+    ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s16) = G_ANYEXT [[UV7]](s8)
+    ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT6]], [[ANYEXT7]]
+    ; CHECK: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[ADD3]](s16)
+    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8)
+    ; CHECK: [[ANYEXT8:%[0-9]+]]:_(<4 x s32>) = G_ANYEXT [[BUILD_VECTOR]](<4 x s8>)
+    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[ANYEXT8]](<4 x s32>)
+    %0:_(<2 x s16>) = COPY $vgpr0
+    %1:_(<4 x s8>) = G_BITCAST %0
+    %2:_(<4 x s8>) = G_ADD %1, %1
+    %3:_(<4 x s32>) = G_ANYEXT %2
+    $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3
+...
+
+---
+name: test_bitcast_v4s8_to_v2s16
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3
+
+    ; CHECK-LABEL: name: test_bitcast_v4s8_to_v2s16
+    ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[COPY]](<4 x s32>)
+    ; CHECK: [[ADD:%[0-9]+]]:_(<4 x s8>) = G_ADD [[TRUNC]], [[TRUNC]]
+    ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[ADD]](<4 x s8>)
+    ; CHECK: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+    %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    %1:_(<4 x s8>) = G_TRUNC %0
+    %2:_(<4 x s8>) = G_ADD %1, %1
+    %3:_(<2 x s16>) = G_BITCAST %2
+    $vgpr0 = COPY %3
+...
+
+---
+name: test_bitcast_v2s16_to_v8s4
+body: |
+  bb.0:
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: test_bitcast_v2s16_to_v8s4
+    ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+    ; CHECK: [[BITCAST:%[0-9]+]]:_(<8 x s4>) = G_BITCAST [[COPY]](<2 x s16>)
+    ; CHECK: [[UV:%[0-9]+]]:_(s4), [[UV1:%[0-9]+]]:_(s4), [[UV2:%[0-9]+]]:_(s4), [[UV3:%[0-9]+]]:_(s4), [[UV4:%[0-9]+]]:_(s4), [[UV5:%[0-9]+]]:_(s4), [[UV6:%[0-9]+]]:_(s4), [[UV7:%[0-9]+]]:_(s4) = G_UNMERGE_VALUES [[BITCAST]](<8 x s4>)
+    ; CHECK: [[UV8:%[0-9]+]]:_(s4), [[UV9:%[0-9]+]]:_(s4), [[UV10:%[0-9]+]]:_(s4), [[UV11:%[0-9]+]]:_(s4), [[UV12:%[0-9]+]]:_(s4), [[UV13:%[0-9]+]]:_(s4), [[UV14:%[0-9]+]]:_(s4), [[UV15:%[0-9]+]]:_(s4) = G_UNMERGE_VALUES [[BITCAST]](<8 x s4>)
+    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s4)
+    ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV8]](s4)
+    ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT]], [[ANYEXT1]]
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s4) = G_TRUNC [[ADD]](s16)
+    ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s4)
+    ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV9]](s4)
+    ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT2]], [[ANYEXT3]]
+    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s4) = G_TRUNC [[ADD1]](s16)
+    ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s4)
+    ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s16) = G_ANYEXT [[UV10]](s4)
+    ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT4]], [[ANYEXT5]]
+    ; CHECK: [[TRUNC2:%[0-9]+]]:_(s4) = G_TRUNC [[ADD2]](s16)
+    ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s4)
+    ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s16) = G_ANYEXT [[UV11]](s4)
+    ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT6]], [[ANYEXT7]]
+    ; CHECK: [[TRUNC3:%[0-9]+]]:_(s4) = G_TRUNC [[ADD3]](s16)
+    ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s16) = G_ANYEXT [[UV4]](s4)
+    ; CHECK: [[ANYEXT9:%[0-9]+]]:_(s16) = G_ANYEXT [[UV12]](s4)
+    ; CHECK: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT8]], [[ANYEXT9]]
+    ; CHECK: [[TRUNC4:%[0-9]+]]:_(s4) = G_TRUNC [[ADD4]](s16)
+    ; CHECK: [[ANYEXT10:%[0-9]+]]:_(s16) = G_ANYEXT [[UV5]](s4)
+    ; CHECK: [[ANYEXT11:%[0-9]+]]:_(s16) = G_ANYEXT [[UV13]](s4)
+    ; CHECK: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT10]], [[ANYEXT11]]
+    ; CHECK: [[TRUNC5:%[0-9]+]]:_(s4) = G_TRUNC [[ADD5]](s16)
+    ; CHECK: [[ANYEXT12:%[0-9]+]]:_(s16) = G_ANYEXT [[UV6]](s4)
+    ; CHECK: [[ANYEXT13:%[0-9]+]]:_(s16) = G_ANYEXT [[UV14]](s4)
+    ; CHECK: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT12]], [[ANYEXT13]]
+    ; CHECK: [[TRUNC6:%[0-9]+]]:_(s4) = G_TRUNC [[ADD6]](s16)
+    ; CHECK: [[ANYEXT14:%[0-9]+]]:_(s16) = G_ANYEXT [[UV7]](s4)
+    ; CHECK: [[ANYEXT15:%[0-9]+]]:_(s16) = G_ANYEXT [[UV15]](s4)
+    ; CHECK: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT14]], [[ANYEXT15]]
+    ; CHECK: [[TRUNC7:%[0-9]+]]:_(s4) = G_TRUNC [[ADD7]](s16)
+    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s4>) = G_BUILD_VECTOR [[TRUNC]](s4), [[TRUNC1]](s4), [[TRUNC2]](s4), [[TRUNC3]](s4), [[TRUNC4]](s4), [[TRUNC5]](s4), [[TRUNC6]](s4), [[TRUNC7]](s4)
+    ; CHECK: [[ANYEXT16:%[0-9]+]]:_(<8 x s32>) = G_ANYEXT [[BUILD_VECTOR]](<8 x s4>)
+    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[ANYEXT16]](<8 x s32>)
+    %0:_(<2 x s16>) = COPY $vgpr0
+    %1:_(<8 x s4>) = G_BITCAST %0
+    %2:_(<8 x s4>) = G_ADD %1, %1
+    %3:_(<8 x s32>) = G_ANYEXT %2
+    $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %3
+...
+
+---
+name: test_bitcast_v8s4_to_v2s16
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3
+
+    ; CHECK-LABEL: name: test_bitcast_v8s4_to_v2s16
+    ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(<8 x s4>) = G_TRUNC [[COPY]](<8 x s32>)
+    ; CHECK: [[ADD:%[0-9]+]]:_(<8 x s4>) = G_ADD [[TRUNC]], [[TRUNC]]
+    ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[ADD]](<8 x s4>)
+    ; CHECK: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+    %0:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    %1:_(<8 x s4>) = G_TRUNC %0
+    %2:_(<8 x s4>) = G_ADD %1, %1
+    %3:_(<2 x s16>) = G_BITCAST %2
+    $vgpr0 = COPY %3
+...
+
+---
+name: test_bitcast_v4s16_to_v2s32
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+
+    ; CHECK-LABEL: name: test_bitcast_v4s16_to_v2s32
+    ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
+    ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>)
+    ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<2 x s32>)
+    %0:_(<4 x s16>) = COPY $vgpr0_vgpr1
+    %1:_(<2 x s32>) = G_BITCAST %0
+    S_ENDPGM 0, implicit %1
+...
+
+---
+name: test_bitcast_v2s32_to_v4s16
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+
+    ; CHECK-LABEL: name: test_bitcast_v2s32_to_v4s16
+    ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; CHECK: [[BITCAST:%[0-9]+]]:_(<4 x s16>) = G_BITCAST [[COPY]](<2 x s32>)
+    ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x s16>)
+    ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32)
+    ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; CHECK: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x s16>)
+    ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
+    ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+    ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
+    ; CHECK: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32)
+    ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+    ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32)
+    ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC4]]
+    ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC5]]
+    ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC6]]
+    ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[TRUNC7]]
+    ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16)
+    ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16)
+    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
+    ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]]
+    ; CHECK: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+    ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ADD2]](s16)
+    ; CHECK: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[ADD3]](s16)
+    ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32)
+    ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]]
+    ; CHECK: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>)
+    ; CHECK: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<4 x s16>)
+    %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    %1:_(<4 x s16>) = G_BITCAST %0
+    %2:_(<4 x s16>) = G_ADD %1, %1
+    S_ENDPGM 0, implicit %2
+
+...
+
+---
+name: test_bitcast_v2s32_to_v8s8
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+
+    ; CHECK-LABEL: name: test_bitcast_v2s32_to_v8s8
+    ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; CHECK: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[COPY]](<2 x s32>)
+    ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST]](<8 x s8>)
+    ; CHECK: [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8), [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST]](<8 x s8>)
+    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8)
+    ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV8]](s8)
+    ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT]], [[ANYEXT1]]
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ADD]](s16)
+    ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8)
+    ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV9]](s8)
+    ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT2]], [[ANYEXT3]]
+    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[ADD1]](s16)
+    ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8)
+    ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s16) = G_ANYEXT [[UV10]](s8)
+    ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT4]], [[ANYEXT5]]
+    ; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[ADD2]](s16)
+    ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8)
+    ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s16) = G_ANYEXT [[UV11]](s8)
+    ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT6]], [[ANYEXT7]]
+    ; CHECK: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[ADD3]](s16)
+    ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s16) = G_ANYEXT [[UV4]](s8)
+    ; CHECK: [[ANYEXT9:%[0-9]+]]:_(s16) = G_ANYEXT [[UV12]](s8)
+    ; CHECK: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT8]], [[ANYEXT9]]
+    ; CHECK: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[ADD4]](s16)
+    ; CHECK: [[ANYEXT10:%[0-9]+]]:_(s16) = G_ANYEXT [[UV5]](s8)
+    ; CHECK: [[ANYEXT11:%[0-9]+]]:_(s16) = G_ANYEXT [[UV13]](s8)
+    ; CHECK: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT10]], [[ANYEXT11]]
+    ; CHECK: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[ADD5]](s16)
+    ; CHECK: [[ANYEXT12:%[0-9]+]]:_(s16) = G_ANYEXT [[UV6]](s8)
+    ; CHECK: [[ANYEXT13:%[0-9]+]]:_(s16) = G_ANYEXT [[UV14]](s8)
+    ; CHECK: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT12]], [[ANYEXT13]]
+    ; CHECK: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[ADD6]](s16)
+    ; CHECK: [[ANYEXT14:%[0-9]+]]:_(s16) = G_ANYEXT [[UV7]](s8)
+    ; CHECK: [[ANYEXT15:%[0-9]+]]:_(s16) = G_ANYEXT [[UV15]](s8)
+    ; CHECK: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT14]], [[ANYEXT15]]
+    ; CHECK: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[ADD7]](s16)
+    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8), [[TRUNC4]](s8), [[TRUNC5]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8)
+    ; CHECK: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<8 x s8>)
+    %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    %1:_(<8 x s8>) = G_BITCAST %0
+    %2:_(<8 x s8>) = G_ADD %1, %1
+    S_ENDPGM 0, implicit %2
+
+...
+
+---
+name: test_bitcast_v8s8_to_v2s32
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+
+    ; CHECK-LABEL: name: test_bitcast_v8s8_to_v2s32
+    ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(<8 x s8>) = G_TRUNC [[COPY]](<8 x s32>)
+    ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[TRUNC]](<8 x s8>)
+    ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<2 x s32>)
+    %0:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    %1:_(<8 x s8>) = G_TRUNC %0
+    %2:_(<2 x s32>) = G_BITCAST %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: test_bitcast_v8s8_to_s64
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+
+    ; CHECK-LABEL: name: test_bitcast_v8s8_to_s64
+    ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s32>)
+    ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
+    ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]]
+    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32)
+    ; CHECK: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]]
+    ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; CHECK: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16)
+    ; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
+    ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32)
+    ; CHECK: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]]
+    ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32)
+    ; CHECK: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]]
+    ; CHECK: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C1]](s16)
+    ; CHECK: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
+    ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32)
+    ; CHECK: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C]]
+    ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32)
+    ; CHECK: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C]]
+    ; CHECK: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C1]](s16)
+    ; CHECK: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
+    ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32)
+    ; CHECK: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C]]
+    ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[UV7]](s32)
+    ; CHECK: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C]]
+    ; CHECK: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C1]](s16)
+    ; CHECK: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
+    ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+    ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+    ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32)
+    ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+    ; CHECK: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+    ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C2]](s32)
+    ; CHECK: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
+    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; CHECK: S_ENDPGM 0, implicit [[MV]](s64)
+    %0:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    %1:_(<8 x s8>) = G_TRUNC %0
+    %2:_(s64) = G_BITCAST %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: test_bitcast_v2s32_to_v16s4
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+
+    ; CHECK-LABEL: name: test_bitcast_v2s32_to_v16s4
+    ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; CHECK: [[BITCAST:%[0-9]+]]:_(<16 x s4>) = G_BITCAST [[COPY]](<2 x s32>)
+    ; CHECK: [[UV:%[0-9]+]]:_(s4), [[UV1:%[0-9]+]]:_(s4), [[UV2:%[0-9]+]]:_(s4), [[UV3:%[0-9]+]]:_(s4), [[UV4:%[0-9]+]]:_(s4), [[UV5:%[0-9]+]]:_(s4), [[UV6:%[0-9]+]]:_(s4), [[UV7:%[0-9]+]]:_(s4), [[UV8:%[0-9]+]]:_(s4), [[UV9:%[0-9]+]]:_(s4), [[UV10:%[0-9]+]]:_(s4), [[UV11:%[0-9]+]]:_(s4), [[UV12:%[0-9]+]]:_(s4), [[UV13:%[0-9]+]]:_(s4), [[UV14:%[0-9]+]]:_(s4), [[UV15:%[0-9]+]]:_(s4) = G_UNMERGE_VALUES [[BITCAST]](<16 x s4>)
+    ; CHECK: [[UV16:%[0-9]+]]:_(s4), [[UV17:%[0-9]+]]:_(s4), [[UV18:%[0-9]+]]:_(s4), [[UV19:%[0-9]+]]:_(s4), [[UV20:%[0-9]+]]:_(s4), [[UV21:%[0-9]+]]:_(s4), [[UV22:%[0-9]+]]:_(s4), [[UV23:%[0-9]+]]:_(s4), [[UV24:%[0-9]+]]:_(s4), [[UV25:%[0-9]+]]:_(s4), [[UV26:%[0-9]+]]:_(s4), [[UV27:%[0-9]+]]:_(s4), [[UV28:%[0-9]+]]:_(s4), [[UV29:%[0-9]+]]:_(s4), [[UV30:%[0-9]+]]:_(s4), [[UV31:%[0-9]+]]:_(s4) = G_UNMERGE_VALUES [[BITCAST]](<16 x s4>)
+    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s4)
+    ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV16]](s4)
+    ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT]], [[ANYEXT1]]
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s4) = G_TRUNC [[ADD]](s16)
+    ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s4)
+    ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV17]](s4)
+    ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT2]], [[ANYEXT3]]
+    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s4) = G_TRUNC [[ADD1]](s16)
+    ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s4)
+    ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s16) = G_ANYEXT [[UV18]](s4)
+    ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT4]], [[ANYEXT5]]
+    ; CHECK: [[TRUNC2:%[0-9]+]]:_(s4) = G_TRUNC [[ADD2]](s16)
+    ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s4)
+    ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s16) = G_ANYEXT [[UV19]](s4)
+    ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT6]], [[ANYEXT7]]
+    ; CHECK: [[TRUNC3:%[0-9]+]]:_(s4) = G_TRUNC [[ADD3]](s16)
+    ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s16) = G_ANYEXT [[UV4]](s4)
+    ; CHECK: [[ANYEXT9:%[0-9]+]]:_(s16) = G_ANYEXT [[UV20]](s4)
+    ; CHECK: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT8]], [[ANYEXT9]]
+    ; CHECK: [[TRUNC4:%[0-9]+]]:_(s4) = G_TRUNC [[ADD4]](s16)
+    ; CHECK: [[ANYEXT10:%[0-9]+]]:_(s16) = G_ANYEXT [[UV5]](s4)
+    ; CHECK: [[ANYEXT11:%[0-9]+]]:_(s16) = G_ANYEXT [[UV21]](s4)
+    ; CHECK: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT10]], [[ANYEXT11]]
+    ; CHECK: [[TRUNC5:%[0-9]+]]:_(s4) = G_TRUNC [[ADD5]](s16)
+    ; CHECK: [[ANYEXT12:%[0-9]+]]:_(s16) = G_ANYEXT [[UV6]](s4)
+    ; CHECK: [[ANYEXT13:%[0-9]+]]:_(s16) = G_ANYEXT [[UV22]](s4)
+    ; CHECK: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT12]], [[ANYEXT13]]
+    ; CHECK: [[TRUNC6:%[0-9]+]]:_(s4) = G_TRUNC [[ADD6]](s16)
+    ; CHECK: [[ANYEXT14:%[0-9]+]]:_(s16) = G_ANYEXT [[UV7]](s4)
+    ; CHECK: [[ANYEXT15:%[0-9]+]]:_(s16) = G_ANYEXT [[UV23]](s4)
+    ; CHECK: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT14]], [[ANYEXT15]]
+    ; CHECK: [[TRUNC7:%[0-9]+]]:_(s4) = G_TRUNC [[ADD7]](s16)
+    ; CHECK: [[ANYEXT16:%[0-9]+]]:_(s16) = G_ANYEXT [[UV8]](s4)
+    ; CHECK: [[ANYEXT17:%[0-9]+]]:_(s16) = G_ANYEXT [[UV24]](s4)
+    ; CHECK: [[ADD8:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT16]], [[ANYEXT17]]
+    ; CHECK: [[TRUNC8:%[0-9]+]]:_(s4) = G_TRUNC [[ADD8]](s16)
+    ; CHECK: [[ANYEXT18:%[0-9]+]]:_(s16) = G_ANYEXT [[UV9]](s4)
+    ; CHECK: [[ANYEXT19:%[0-9]+]]:_(s16) = G_ANYEXT [[UV25]](s4)
+    ; CHECK: [[ADD9:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT18]], [[ANYEXT19]]
+    ; CHECK: [[TRUNC9:%[0-9]+]]:_(s4) = G_TRUNC [[ADD9]](s16)
+    ; CHECK: [[ANYEXT20:%[0-9]+]]:_(s16) = G_ANYEXT [[UV10]](s4)
+    ; CHECK: [[ANYEXT21:%[0-9]+]]:_(s16) = G_ANYEXT [[UV26]](s4)
+    ; CHECK: [[ADD10:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT20]], [[ANYEXT21]]
+    ; CHECK: [[TRUNC10:%[0-9]+]]:_(s4) = G_TRUNC [[ADD10]](s16)
+    ; CHECK: [[ANYEXT22:%[0-9]+]]:_(s16) = G_ANYEXT [[UV11]](s4)
+    ; CHECK: [[ANYEXT23:%[0-9]+]]:_(s16) = G_ANYEXT [[UV27]](s4)
+    ; CHECK: [[ADD11:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT22]], [[ANYEXT23]]
+    ; CHECK: [[TRUNC11:%[0-9]+]]:_(s4) = G_TRUNC [[ADD11]](s16)
+    ; CHECK: [[ANYEXT24:%[0-9]+]]:_(s16) = G_ANYEXT [[UV12]](s4)
+    ; CHECK: [[ANYEXT25:%[0-9]+]]:_(s16) = G_ANYEXT [[UV28]](s4)
+    ; CHECK: [[ADD12:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT24]], [[ANYEXT25]]
+    ; CHECK: [[TRUNC12:%[0-9]+]]:_(s4) = G_TRUNC [[ADD12]](s16)
+    ; CHECK: [[ANYEXT26:%[0-9]+]]:_(s16) = G_ANYEXT [[UV13]](s4)
+    ; CHECK: [[ANYEXT27:%[0-9]+]]:_(s16) = G_ANYEXT [[UV29]](s4)
+    ; CHECK: [[ADD13:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT26]], [[ANYEXT27]]
+    ; CHECK: [[TRUNC13:%[0-9]+]]:_(s4) = G_TRUNC [[ADD13]](s16)
+    ; CHECK: [[ANYEXT28:%[0-9]+]]:_(s16) = G_ANYEXT [[UV14]](s4)
+    ; CHECK: [[ANYEXT29:%[0-9]+]]:_(s16) = G_ANYEXT [[UV30]](s4)
+    ; CHECK: [[ADD14:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT28]], [[ANYEXT29]]
+    ; CHECK: [[TRUNC14:%[0-9]+]]:_(s4) = G_TRUNC [[ADD14]](s16)
+    ; CHECK: [[ANYEXT30:%[0-9]+]]:_(s16) = G_ANYEXT [[UV15]](s4)
+    ; CHECK: [[ANYEXT31:%[0-9]+]]:_(s16) = G_ANYEXT [[UV31]](s4)
+    ; CHECK: [[ADD15:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT30]], [[ANYEXT31]]
+    ; CHECK: [[TRUNC15:%[0-9]+]]:_(s4) = G_TRUNC [[ADD15]](s16)
+    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s4>) = G_BUILD_VECTOR [[TRUNC]](s4), [[TRUNC1]](s4), [[TRUNC2]](s4), [[TRUNC3]](s4), [[TRUNC4]](s4), [[TRUNC5]](s4), [[TRUNC6]](s4), [[TRUNC7]](s4), [[TRUNC8]](s4), [[TRUNC9]](s4), [[TRUNC10]](s4), [[TRUNC11]](s4), [[TRUNC12]](s4), [[TRUNC13]](s4), [[TRUNC14]](s4), [[TRUNC15]](s4)
+    ; CHECK: [[ANYEXT32:%[0-9]+]]:_(<16 x s16>) = G_ANYEXT [[BUILD_VECTOR]](<16 x s4>)
+    ; CHECK: S_ENDPGM 0, implicit [[ANYEXT32]](<16 x s16>)
+    %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    %1:_(<16 x s4>) = G_BITCAST %0
+    %2:_(<16 x s4>) = G_ADD %1, %1
+    %3:_(<16 x s16>) = G_ANYEXT %2
+    S_ENDPGM 0, implicit %3
+
+...
+
+---
+name: test_bitcast_v16s4_to_v2s32
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+
+    ; CHECK-LABEL: name: test_bitcast_v16s4_to_v2s32
+    ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(<16 x s4>) = G_TRUNC [[COPY]](<16 x s16>)
+    ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[TRUNC]](<16 x s4>)
+    ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<2 x s32>)
+    %0:_(<16 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    %1:_(<16 x s4>) = G_TRUNC %0
+    %2:_(<2 x s32>) = G_BITCAST %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: test_bitcast_s64_to_v8s8
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+
+    ; CHECK-LABEL: name: test_bitcast_s64_to_v8s8
+    ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32)
+    ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; CHECK: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C1]](s16)
+    ; CHECK: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16)
+    ; CHECK: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C1]](s16)
+    ; CHECK: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C1]](s16)
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16)
+    ; CHECK: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16)
+    ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[COPY1]], [[COPY2]]
+    ; CHECK: [[COPY3:%[0-9]+]]:_(s16) = COPY [[LSHR2]](s16)
+    ; CHECK: [[COPY4:%[0-9]+]]:_(s16) = COPY [[LSHR2]](s16)
+    ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[COPY3]], [[COPY4]]
+    ; CHECK: [[COPY5:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16)
+    ; CHECK: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16)
+    ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[COPY5]], [[COPY6]]
+    ; CHECK: [[COPY7:%[0-9]+]]:_(s16) = COPY [[LSHR3]](s16)
+    ; CHECK: [[COPY8:%[0-9]+]]:_(s16) = COPY [[LSHR3]](s16)
+    ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[COPY7]], [[COPY8]]
+    ; CHECK: [[COPY9:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16)
+    ; CHECK: [[COPY10:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16)
+    ; CHECK: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[COPY9]], [[COPY10]]
+    ; CHECK: [[COPY11:%[0-9]+]]:_(s16) = COPY [[LSHR4]](s16)
+    ; CHECK: [[COPY12:%[0-9]+]]:_(s16) = COPY [[LSHR4]](s16)
+    ; CHECK: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[COPY11]], [[COPY12]]
+    ; CHECK: [[COPY13:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16)
+    ; CHECK: [[COPY14:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16)
+    ; CHECK: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[COPY13]], [[COPY14]]
+    ; CHECK: [[COPY15:%[0-9]+]]:_(s16) = COPY [[LSHR5]](s16)
+    ; CHECK: [[COPY16:%[0-9]+]]:_(s16) = COPY [[LSHR5]](s16)
+    ; CHECK: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[COPY15]], [[COPY16]]
+    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16)
+    ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16)
+    ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16)
+    ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD3]](s16)
+    ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD4]](s16)
+    ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD5]](s16)
+    ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD6]](s16)
+    ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD7]](s16)
+    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32), [[ANYEXT4]](s32), [[ANYEXT5]](s32), [[ANYEXT6]](s32), [[ANYEXT7]](s32)
+    ; CHECK: [[TRUNC4:%[0-9]+]]:_(<8 x s8>) = G_TRUNC [[BUILD_VECTOR]](<8 x s32>)
+    ; CHECK: S_ENDPGM 0, implicit [[TRUNC4]](<8 x s8>)
+    %0:_(s64) = COPY $vgpr0_vgpr1
+    %1:_(<8 x s8>) = G_BITCAST %0
+    %2:_(<8 x s8>) = G_ADD %1, %1
+    S_ENDPGM 0, implicit %2
+
+...
+
+---
+name: test_bitcast_v3s32_to_v12s8
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2
+
+    ; CHECK-LABEL: name: test_bitcast_v3s32_to_v12s8
+    ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
+    ; CHECK: [[BITCAST:%[0-9]+]]:_(<12 x s8>) = G_BITCAST [[COPY]](<3 x s32>)
+    ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST]](<12 x s8>)
+    ; CHECK: [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8), [[UV16:%[0-9]+]]:_(s8), [[UV17:%[0-9]+]]:_(s8), [[UV18:%[0-9]+]]:_(s8), [[UV19:%[0-9]+]]:_(s8), [[UV20:%[0-9]+]]:_(s8), [[UV21:%[0-9]+]]:_(s8), [[UV22:%[0-9]+]]:_(s8), [[UV23:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST]](<12 x s8>)
+    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8)
+    ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV12]](s8)
+    ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT]], [[ANYEXT1]]
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ADD]](s16)
+    ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8)
+    ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV13]](s8)
+    ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT2]], [[ANYEXT3]]
+    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[ADD1]](s16)
+    ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8)
+    ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s16) = G_ANYEXT [[UV14]](s8)
+    ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT4]], [[ANYEXT5]]
+    ; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[ADD2]](s16)
+    ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8)
+    ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s16) = G_ANYEXT [[UV15]](s8)
+    ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT6]], [[ANYEXT7]]
+    ; CHECK: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[ADD3]](s16)
+    ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s16) = G_ANYEXT [[UV4]](s8)
+    ; CHECK: [[ANYEXT9:%[0-9]+]]:_(s16) = G_ANYEXT [[UV16]](s8)
+    ; CHECK: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT8]], [[ANYEXT9]]
+    ; CHECK: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[ADD4]](s16)
+    ; CHECK: [[ANYEXT10:%[0-9]+]]:_(s16) = G_ANYEXT [[UV5]](s8)
+    ; CHECK: [[ANYEXT11:%[0-9]+]]:_(s16) = G_ANYEXT [[UV17]](s8)
+    ; CHECK: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT10]], [[ANYEXT11]]
+    ; CHECK: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[ADD5]](s16)
+    ; CHECK: [[ANYEXT12:%[0-9]+]]:_(s16) = G_ANYEXT [[UV6]](s8)
+    ; CHECK: [[ANYEXT13:%[0-9]+]]:_(s16) = G_ANYEXT [[UV18]](s8)
+    ; CHECK: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT12]], [[ANYEXT13]]
+    ; CHECK: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[ADD6]](s16)
+    ; CHECK: [[ANYEXT14:%[0-9]+]]:_(s16) = G_ANYEXT [[UV7]](s8)
+    ; CHECK: [[ANYEXT15:%[0-9]+]]:_(s16) = G_ANYEXT [[UV19]](s8)
+    ; CHECK: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT14]], [[ANYEXT15]]
+    ; CHECK: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[ADD7]](s16)
+    ; CHECK: [[ANYEXT16:%[0-9]+]]:_(s16) = G_ANYEXT [[UV8]](s8)
+    ; CHECK: [[ANYEXT17:%[0-9]+]]:_(s16) = G_ANYEXT [[UV20]](s8)
+    ; CHECK: [[ADD8:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT16]], [[ANYEXT17]]
+    ; CHECK: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[ADD8]](s16)
+    ; CHECK: [[ANYEXT18:%[0-9]+]]:_(s16) = G_ANYEXT [[UV9]](s8)
+    ; CHECK: [[ANYEXT19:%[0-9]+]]:_(s16) = G_ANYEXT [[UV21]](s8)
+    ; CHECK: [[ADD9:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT18]], [[ANYEXT19]]
+    ; CHECK: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[ADD9]](s16)
+    ; CHECK: [[ANYEXT20:%[0-9]+]]:_(s16) = G_ANYEXT [[UV10]](s8)
+    ; CHECK: [[ANYEXT21:%[0-9]+]]:_(s16) = G_ANYEXT [[UV22]](s8)
+    ; CHECK: [[ADD10:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT20]], [[ANYEXT21]]
+    ; CHECK: [[TRUNC10:%[0-9]+]]:_(s8) = G_TRUNC [[ADD10]](s16)
+    ; CHECK: [[ANYEXT22:%[0-9]+]]:_(s16) = G_ANYEXT [[UV11]](s8)
+    ; CHECK: [[ANYEXT23:%[0-9]+]]:_(s16) = G_ANYEXT [[UV23]](s8)
+    ; CHECK: [[ADD11:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT22]], [[ANYEXT23]]
+    ; CHECK: [[TRUNC11:%[0-9]+]]:_(s8) = G_TRUNC [[ADD11]](s16)
+    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<12 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8), [[TRUNC4]](s8), [[TRUNC5]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8), [[TRUNC8]](s8), [[TRUNC9]](s8), [[TRUNC10]](s8), [[TRUNC11]](s8)
+    ; CHECK: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<12 x s8>)
+    %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
+    %1:_(<12 x s8>) = G_BITCAST %0
+    %2:_(<12 x s8>) = G_ADD %1, %1
+    S_ENDPGM 0, implicit %2
+
+...
+
+---
+name: test_bitcast_v12s8_to_v3s32
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11
+
+    ; CHECK-LABEL: name: test_bitcast_v12s8_to_v3s32
+    ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
+    ; CHECK: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11
+    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[COPY]](<4 x s32>), [[COPY1]](<4 x s32>), [[COPY2]](<4 x s32>)
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(<12 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<12 x s32>)
+    ; CHECK: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[TRUNC]](<12 x s8>)
+    ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<3 x s32>)
+    %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    %1:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
+    %2:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11
+    %3:_(<12 x s32>) = G_CONCAT_VECTORS %0, %1, %2
+    %4:_(<12 x s8>) = G_TRUNC %3
+    %5:_(<3 x s32>) = G_BITCAST %4
+    S_ENDPGM 0, implicit %5
+...
+
+---
+name: test_bitcast_v6s8_to_v3s16
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
+
+    ; CHECK-LABEL: name: test_bitcast_v6s8_to_v3s16
+    ; CHECK: [[COPY:%[0-9]+]]:_(<6 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(<6 x s8>) = G_TRUNC [[COPY]](<6 x s32>)
+    ; CHECK: [[ADD:%[0-9]+]]:_(<6 x s8>) = G_ADD [[TRUNC]], [[TRUNC]]
+    ; CHECK: [[BITCAST:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[ADD]](<6 x s8>)
+    ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<3 x s16>)
+    %0:_(<6 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
+    %1:_(<6 x s8>) = G_TRUNC %0
+    %2:_(<6 x s8>) = G_ADD %1, %1
+    %3:_(<3 x s16>) = G_BITCAST %2
+    S_ENDPGM 0, implicit %3
+...
+
+---
+name: test_bitcast_v3s16_to_v6s8
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2
+
+    ; CHECK-LABEL: name: test_bitcast_v3s16_to_v6s8
+    ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(<3 x s16>) = G_TRUNC [[COPY]](<3 x s32>)
+    ; CHECK: [[ADD:%[0-9]+]]:_(<3 x s16>) = G_ADD [[TRUNC]], [[TRUNC]]
+    ; CHECK: [[BITCAST:%[0-9]+]]:_(<6 x s8>) = G_BITCAST [[ADD]](<3 x s16>)
+    ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<6 x s8>)
+    %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
+    %1:_(<3 x s16>) = G_TRUNC %0
+    %2:_(<3 x s16>) = G_ADD %1, %1
+    %3:_(<6 x s8>) = G_BITCAST %2
+    S_ENDPGM 0, implicit %3
+...
+
+---
+name: test_bitcast_v2s64_to_v16s8
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3
+
+    ; CHECK-LABEL: name: test_bitcast_v2s64_to_v16s8
+    ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; CHECK: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[COPY]](<2 x s64>)
+    ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8), [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST]](<16 x s8>)
+    ; CHECK: [[UV16:%[0-9]+]]:_(s8), [[UV17:%[0-9]+]]:_(s8), [[UV18:%[0-9]+]]:_(s8), [[UV19:%[0-9]+]]:_(s8), [[UV20:%[0-9]+]]:_(s8), [[UV21:%[0-9]+]]:_(s8), [[UV22:%[0-9]+]]:_(s8), [[UV23:%[0-9]+]]:_(s8), [[UV24:%[0-9]+]]:_(s8), [[UV25:%[0-9]+]]:_(s8), [[UV26:%[0-9]+]]:_(s8), [[UV27:%[0-9]+]]:_(s8), [[UV28:%[0-9]+]]:_(s8), [[UV29:%[0-9]+]]:_(s8), [[UV30:%[0-9]+]]:_(s8), [[UV31:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST]](<16 x s8>)
+    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8)
+    ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV16]](s8)
+    ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT]], [[ANYEXT1]]
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ADD]](s16)
+    ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8)
+    ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV17]](s8)
+    ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT2]], [[ANYEXT3]]
+    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[ADD1]](s16)
+    ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8)
+    ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s16) = G_ANYEXT [[UV18]](s8)
+    ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT4]], [[ANYEXT5]]
+    ; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[ADD2]](s16)
+    ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8)
+    ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s16) = G_ANYEXT [[UV19]](s8)
+    ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT6]], [[ANYEXT7]]
+    ; CHECK: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[ADD3]](s16)
+    ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s16) = G_ANYEXT [[UV4]](s8)
+    ; CHECK: [[ANYEXT9:%[0-9]+]]:_(s16) = G_ANYEXT [[UV20]](s8)
+    ; CHECK: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT8]], [[ANYEXT9]]
+    ; CHECK: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[ADD4]](s16)
+    ; CHECK: [[ANYEXT10:%[0-9]+]]:_(s16) = G_ANYEXT [[UV5]](s8)
+    ; CHECK: [[ANYEXT11:%[0-9]+]]:_(s16) = G_ANYEXT [[UV21]](s8)
+    ; CHECK: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT10]], [[ANYEXT11]]
+    ; CHECK: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[ADD5]](s16)
+    ; CHECK: [[ANYEXT12:%[0-9]+]]:_(s16) = G_ANYEXT [[UV6]](s8)
+    ; CHECK: [[ANYEXT13:%[0-9]+]]:_(s16) = G_ANYEXT [[UV22]](s8)
+    ; CHECK: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT12]], [[ANYEXT13]]
+    ; CHECK: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[ADD6]](s16)
+    ; CHECK: [[ANYEXT14:%[0-9]+]]:_(s16) = G_ANYEXT [[UV7]](s8)
+    ; CHECK: [[ANYEXT15:%[0-9]+]]:_(s16) = G_ANYEXT [[UV23]](s8)
+    ; CHECK: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT14]], [[ANYEXT15]]
+    ; CHECK: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[ADD7]](s16)
+    ; CHECK: [[ANYEXT16:%[0-9]+]]:_(s16) = G_ANYEXT [[UV8]](s8)
+    ; CHECK: [[ANYEXT17:%[0-9]+]]:_(s16) = G_ANYEXT [[UV24]](s8)
+    ; CHECK: [[ADD8:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT16]], [[ANYEXT17]]
+    ; CHECK: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[ADD8]](s16)
+    ; CHECK: [[ANYEXT18:%[0-9]+]]:_(s16) = G_ANYEXT [[UV9]](s8)
+    ; CHECK: [[ANYEXT19:%[0-9]+]]:_(s16) = G_ANYEXT [[UV25]](s8)
+    ; CHECK: [[ADD9:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT18]], [[ANYEXT19]]
+    ; CHECK: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[ADD9]](s16)
+    ; CHECK: [[ANYEXT20:%[0-9]+]]:_(s16) = G_ANYEXT [[UV10]](s8)
+    ; CHECK: [[ANYEXT21:%[0-9]+]]:_(s16) = G_ANYEXT [[UV26]](s8)
+    ; CHECK: [[ADD10:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT20]], [[ANYEXT21]]
+    ; CHECK: [[TRUNC10:%[0-9]+]]:_(s8) = G_TRUNC [[ADD10]](s16)
+    ; CHECK: [[ANYEXT22:%[0-9]+]]:_(s16) = G_ANYEXT [[UV11]](s8)
+    ; CHECK: [[ANYEXT23:%[0-9]+]]:_(s16) = G_ANYEXT [[UV27]](s8)
+    ; CHECK: [[ADD11:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT22]], [[ANYEXT23]]
+    ; CHECK: [[TRUNC11:%[0-9]+]]:_(s8) = G_TRUNC [[ADD11]](s16)
+    ; CHECK: [[ANYEXT24:%[0-9]+]]:_(s16) = G_ANYEXT [[UV12]](s8)
+    ; CHECK: [[ANYEXT25:%[0-9]+]]:_(s16) = G_ANYEXT [[UV28]](s8)
+    ; CHECK: [[ADD12:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT24]], [[ANYEXT25]]
+    ; CHECK: [[TRUNC12:%[0-9]+]]:_(s8) = G_TRUNC [[ADD12]](s16)
+    ; CHECK: [[ANYEXT26:%[0-9]+]]:_(s16) = G_ANYEXT [[UV13]](s8)
+    ; CHECK: [[ANYEXT27:%[0-9]+]]:_(s16) = G_ANYEXT [[UV29]](s8)
+    ; CHECK: [[ADD13:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT26]], [[ANYEXT27]]
+    ; CHECK: [[TRUNC13:%[0-9]+]]:_(s8) = G_TRUNC [[ADD13]](s16)
+    ; CHECK: [[ANYEXT28:%[0-9]+]]:_(s16) = G_ANYEXT [[UV14]](s8)
+    ; CHECK: [[ANYEXT29:%[0-9]+]]:_(s16) = G_ANYEXT [[UV30]](s8)
+    ; CHECK: [[ADD14:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT28]], [[ANYEXT29]]
+    ; CHECK: [[TRUNC14:%[0-9]+]]:_(s8) = G_TRUNC [[ADD14]](s16)
+    ; CHECK: [[ANYEXT30:%[0-9]+]]:_(s16) = G_ANYEXT [[UV15]](s8)
+    ; CHECK: [[ANYEXT31:%[0-9]+]]:_(s16) = G_ANYEXT [[UV31]](s8)
+    ; CHECK: [[ADD15:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT30]], [[ANYEXT31]]
+    ; CHECK: [[TRUNC15:%[0-9]+]]:_(s8) = G_TRUNC [[ADD15]](s16)
+    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8), [[TRUNC4]](s8), [[TRUNC5]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8), [[TRUNC8]](s8), [[TRUNC9]](s8), [[TRUNC10]](s8), [[TRUNC11]](s8), [[TRUNC12]](s8), [[TRUNC13]](s8), [[TRUNC14]](s8), [[TRUNC15]](s8)
+    ; CHECK: [[ANYEXT32:%[0-9]+]]:_(<16 x s32>) = G_ANYEXT [[BUILD_VECTOR]](<16 x s8>)
+    ; CHECK: S_ENDPGM 0, implicit [[ANYEXT32]](<16 x s32>)
+    %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    %1:_(<16 x s8>) = G_BITCAST %0
+    %2:_(<16 x s8>) = G_ADD %1, %1
+    %3:_(<16 x s32>) = G_ANYEXT %2
+    S_ENDPGM 0, implicit %3
+...
+
+---
+name: test_bitcast_v16s8_to_v2s64
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+
+    ; CHECK-LABEL: name: test_bitcast_v16s8_to_v2s64
+    ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[COPY]](<16 x s32>)
+    ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s64>) = G_BITCAST [[TRUNC]](<16 x s8>)
+    ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<2 x s64>)
+    %0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    %1:_(<16 x s8>) = G_TRUNC %0
+    %2:_(<2 x s64>) = G_BITCAST %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: test_bitcast_v4s32_to_v16s8
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3
+    ; CHECK-LABEL: name: test_bitcast_v4s32_to_v16s8
+    ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; CHECK: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[COPY]](<4 x s32>)
+    ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8), [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST]](<16 x s8>)
+    ; CHECK: [[UV16:%[0-9]+]]:_(s8), [[UV17:%[0-9]+]]:_(s8), [[UV18:%[0-9]+]]:_(s8), [[UV19:%[0-9]+]]:_(s8), [[UV20:%[0-9]+]]:_(s8), [[UV21:%[0-9]+]]:_(s8), [[UV22:%[0-9]+]]:_(s8), [[UV23:%[0-9]+]]:_(s8), [[UV24:%[0-9]+]]:_(s8), [[UV25:%[0-9]+]]:_(s8), [[UV26:%[0-9]+]]:_(s8), [[UV27:%[0-9]+]]:_(s8), [[UV28:%[0-9]+]]:_(s8), [[UV29:%[0-9]+]]:_(s8), [[UV30:%[0-9]+]]:_(s8), [[UV31:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST]](<16 x s8>)
+    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8)
+    ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV16]](s8)
+    ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT]], [[ANYEXT1]]
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ADD]](s16)
+    ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8)
+    ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV17]](s8)
+    ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT2]], [[ANYEXT3]]
+    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[ADD1]](s16)
+    ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8)
+    ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s16) = G_ANYEXT [[UV18]](s8)
+    ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT4]], [[ANYEXT5]]
+    ; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[ADD2]](s16)
+    ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8)
+    ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s16) = G_ANYEXT [[UV19]](s8)
+    ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT6]], [[ANYEXT7]]
+    ; CHECK: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[ADD3]](s16)
+    ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s16) = G_ANYEXT [[UV4]](s8)
+    ; CHECK: [[ANYEXT9:%[0-9]+]]:_(s16) = G_ANYEXT [[UV20]](s8)
+    ; CHECK: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT8]], [[ANYEXT9]]
+    ; CHECK: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[ADD4]](s16)
+    ; CHECK: [[ANYEXT10:%[0-9]+]]:_(s16) = G_ANYEXT [[UV5]](s8)
+    ; CHECK: [[ANYEXT11:%[0-9]+]]:_(s16) = G_ANYEXT [[UV21]](s8)
+    ; CHECK: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT10]], [[ANYEXT11]]
+    ; CHECK: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[ADD5]](s16)
+    ; CHECK: [[ANYEXT12:%[0-9]+]]:_(s16) = G_ANYEXT [[UV6]](s8)
+    ; CHECK: [[ANYEXT13:%[0-9]+]]:_(s16) = G_ANYEXT [[UV22]](s8)
+    ; CHECK: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT12]], [[ANYEXT13]]
+    ; CHECK: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[ADD6]](s16)
+    ; CHECK: [[ANYEXT14:%[0-9]+]]:_(s16) = G_ANYEXT [[UV7]](s8)
+    ; CHECK: [[ANYEXT15:%[0-9]+]]:_(s16) = G_ANYEXT [[UV23]](s8)
+    ; CHECK: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT14]], [[ANYEXT15]]
+    ; CHECK: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[ADD7]](s16)
+    ; CHECK: [[ANYEXT16:%[0-9]+]]:_(s16) = G_ANYEXT [[UV8]](s8)
+    ; CHECK: [[ANYEXT17:%[0-9]+]]:_(s16) = G_ANYEXT [[UV24]](s8)
+    ; CHECK: [[ADD8:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT16]], [[ANYEXT17]]
+    ; CHECK: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[ADD8]](s16)
+    ; CHECK: [[ANYEXT18:%[0-9]+]]:_(s16) = G_ANYEXT [[UV9]](s8)
+    ; CHECK: [[ANYEXT19:%[0-9]+]]:_(s16) = G_ANYEXT [[UV25]](s8)
+    ; CHECK: [[ADD9:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT18]], [[ANYEXT19]]
+    ; CHECK: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[ADD9]](s16)
+    ; CHECK: [[ANYEXT20:%[0-9]+]]:_(s16) = G_ANYEXT [[UV10]](s8)
+    ; CHECK: [[ANYEXT21:%[0-9]+]]:_(s16) = G_ANYEXT [[UV26]](s8)
+    ; CHECK: [[ADD10:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT20]], [[ANYEXT21]]
+    ; CHECK: [[TRUNC10:%[0-9]+]]:_(s8) = G_TRUNC [[ADD10]](s16)
+    ; CHECK: [[ANYEXT22:%[0-9]+]]:_(s16) = G_ANYEXT [[UV11]](s8)
+    ; CHECK: [[ANYEXT23:%[0-9]+]]:_(s16) = G_ANYEXT [[UV27]](s8)
+    ; CHECK: [[ADD11:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT22]], [[ANYEXT23]]
+    ; CHECK: [[TRUNC11:%[0-9]+]]:_(s8) = G_TRUNC [[ADD11]](s16)
+    ; CHECK: [[ANYEXT24:%[0-9]+]]:_(s16) = G_ANYEXT [[UV12]](s8)
+    ; CHECK: [[ANYEXT25:%[0-9]+]]:_(s16) = G_ANYEXT [[UV28]](s8)
+    ; CHECK: [[ADD12:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT24]], [[ANYEXT25]]
+    ; CHECK: [[TRUNC12:%[0-9]+]]:_(s8) = G_TRUNC [[ADD12]](s16)
+    ; CHECK: [[ANYEXT26:%[0-9]+]]:_(s16) = G_ANYEXT [[UV13]](s8)
+    ; CHECK: [[ANYEXT27:%[0-9]+]]:_(s16) = G_ANYEXT [[UV29]](s8)
+    ; CHECK: [[ADD13:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT26]], [[ANYEXT27]]
+    ; CHECK: [[TRUNC13:%[0-9]+]]:_(s8) = G_TRUNC [[ADD13]](s16)
+    ; CHECK: [[ANYEXT28:%[0-9]+]]:_(s16) = G_ANYEXT [[UV14]](s8)
+    ; CHECK: [[ANYEXT29:%[0-9]+]]:_(s16) = G_ANYEXT [[UV30]](s8)
+    ; CHECK: [[ADD14:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT28]], [[ANYEXT29]]
+    ; CHECK: [[TRUNC14:%[0-9]+]]:_(s8) = G_TRUNC [[ADD14]](s16)
+    ; CHECK: [[ANYEXT30:%[0-9]+]]:_(s16) = G_ANYEXT [[UV15]](s8)
+    ; CHECK: [[ANYEXT31:%[0-9]+]]:_(s16) = G_ANYEXT [[UV31]](s8)
+    ; CHECK: [[ADD15:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT30]], [[ANYEXT31]]
+    ; CHECK: [[TRUNC15:%[0-9]+]]:_(s8) = G_TRUNC [[ADD15]](s16)
+    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8), [[TRUNC4]](s8), [[TRUNC5]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8), [[TRUNC8]](s8), [[TRUNC9]](s8), [[TRUNC10]](s8), [[TRUNC11]](s8), [[TRUNC12]](s8), [[TRUNC13]](s8), [[TRUNC14]](s8), [[TRUNC15]](s8)
+    ; CHECK: [[ANYEXT32:%[0-9]+]]:_(<16 x s32>) = G_ANYEXT [[BUILD_VECTOR]](<16 x s8>)
+    ; CHECK: S_ENDPGM 0, implicit [[ANYEXT32]](<16 x s32>)
+    %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    %1:_(<16 x s8>) = G_BITCAST %0
+    %2:_(<16 x s8>) = G_ADD %1, %1
+    %3:_(<16 x s32>) = G_ANYEXT %2
+    S_ENDPGM 0, implicit %3
+...
+
+---
+name: test_bitcast_v16s8_to_v4s32
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+
+    ; CHECK-LABEL: name: test_bitcast_v16s8_to_v4s32
+    ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[COPY]](<16 x s32>)
+    ; CHECK: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[TRUNC]](<16 x s8>)
+    ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<4 x s32>)
+    %0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    %1:_(<16 x s8>) = G_TRUNC %0
+    %2:_(<4 x s32>) = G_BITCAST %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: test_bitcast_v8s16_to_v16s8
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3
+
+    ; CHECK-LABEL: name: test_bitcast_v8s16_to_v16s8
+    ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; CHECK: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[COPY]](<8 x s16>)
+    ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8), [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST]](<16 x s8>)
+    ; CHECK: [[UV16:%[0-9]+]]:_(s8), [[UV17:%[0-9]+]]:_(s8), [[UV18:%[0-9]+]]:_(s8), [[UV19:%[0-9]+]]:_(s8), [[UV20:%[0-9]+]]:_(s8), [[UV21:%[0-9]+]]:_(s8), [[UV22:%[0-9]+]]:_(s8), [[UV23:%[0-9]+]]:_(s8), [[UV24:%[0-9]+]]:_(s8), [[UV25:%[0-9]+]]:_(s8), [[UV26:%[0-9]+]]:_(s8), [[UV27:%[0-9]+]]:_(s8), [[UV28:%[0-9]+]]:_(s8), [[UV29:%[0-9]+]]:_(s8), [[UV30:%[0-9]+]]:_(s8), [[UV31:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST]](<16 x s8>)
+    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8)
+    ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV16]](s8)
+    ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT]], [[ANYEXT1]]
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ADD]](s16)
+    ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8)
+    ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV17]](s8)
+    ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT2]], [[ANYEXT3]]
+    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[ADD1]](s16)
+    ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8)
+    ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s16) = G_ANYEXT [[UV18]](s8)
+    ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT4]], [[ANYEXT5]]
+    ; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[ADD2]](s16)
+    ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8)
+    ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s16) = G_ANYEXT [[UV19]](s8)
+    ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT6]], [[ANYEXT7]]
+    ; CHECK: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[ADD3]](s16)
+    ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s16) = G_ANYEXT [[UV4]](s8)
+    ; CHECK: [[ANYEXT9:%[0-9]+]]:_(s16) = G_ANYEXT [[UV20]](s8)
+    ; CHECK: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT8]], [[ANYEXT9]]
+    ; CHECK: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[ADD4]](s16)
+    ; CHECK: [[ANYEXT10:%[0-9]+]]:_(s16) = G_ANYEXT [[UV5]](s8)
+    ; CHECK: [[ANYEXT11:%[0-9]+]]:_(s16) = G_ANYEXT [[UV21]](s8)
+    ; CHECK: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT10]], [[ANYEXT11]]
+    ; CHECK: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[ADD5]](s16)
+    ; CHECK: [[ANYEXT12:%[0-9]+]]:_(s16) = G_ANYEXT [[UV6]](s8)
+    ; CHECK: [[ANYEXT13:%[0-9]+]]:_(s16) = G_ANYEXT [[UV22]](s8)
+    ; CHECK: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT12]], [[ANYEXT13]]
+    ; CHECK: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[ADD6]](s16)
+    ; CHECK: [[ANYEXT14:%[0-9]+]]:_(s16) = G_ANYEXT [[UV7]](s8)
+    ; CHECK: [[ANYEXT15:%[0-9]+]]:_(s16) = G_ANYEXT [[UV23]](s8)
+    ; CHECK: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT14]], [[ANYEXT15]]
+    ; CHECK: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[ADD7]](s16)
+    ; CHECK: [[ANYEXT16:%[0-9]+]]:_(s16) = G_ANYEXT [[UV8]](s8)
+    ; CHECK: [[ANYEXT17:%[0-9]+]]:_(s16) = G_ANYEXT [[UV24]](s8)
+    ; CHECK: [[ADD8:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT16]], [[ANYEXT17]]
+    ; CHECK: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[ADD8]](s16)
+    ; CHECK: [[ANYEXT18:%[0-9]+]]:_(s16) = G_ANYEXT [[UV9]](s8)
+    ; CHECK: [[ANYEXT19:%[0-9]+]]:_(s16) = G_ANYEXT [[UV25]](s8)
+    ; CHECK: [[ADD9:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT18]], [[ANYEXT19]]
+    ; CHECK: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[ADD9]](s16)
+    ; CHECK: [[ANYEXT20:%[0-9]+]]:_(s16) = G_ANYEXT [[UV10]](s8)
+    ; CHECK: [[ANYEXT21:%[0-9]+]]:_(s16) = G_ANYEXT [[UV26]](s8)
+    ; CHECK: [[ADD10:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT20]], [[ANYEXT21]]
+    ; CHECK: [[TRUNC10:%[0-9]+]]:_(s8) = G_TRUNC [[ADD10]](s16)
+    ; CHECK: [[ANYEXT22:%[0-9]+]]:_(s16) = G_ANYEXT [[UV11]](s8)
+    ; CHECK: [[ANYEXT23:%[0-9]+]]:_(s16) = G_ANYEXT [[UV27]](s8)
+    ; CHECK: [[ADD11:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT22]], [[ANYEXT23]]
+    ; CHECK: [[TRUNC11:%[0-9]+]]:_(s8) = G_TRUNC [[ADD11]](s16)
+    ; CHECK: [[ANYEXT24:%[0-9]+]]:_(s16) = G_ANYEXT [[UV12]](s8)
+    ; CHECK: [[ANYEXT25:%[0-9]+]]:_(s16) = G_ANYEXT [[UV28]](s8)
+    ; CHECK: [[ADD12:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT24]], [[ANYEXT25]]
+    ; CHECK: [[TRUNC12:%[0-9]+]]:_(s8) = G_TRUNC [[ADD12]](s16)
+    ; CHECK: [[ANYEXT26:%[0-9]+]]:_(s16) = G_ANYEXT [[UV13]](s8)
+    ; CHECK: [[ANYEXT27:%[0-9]+]]:_(s16) = G_ANYEXT [[UV29]](s8)
+    ; CHECK: [[ADD13:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT26]], [[ANYEXT27]]
+    ; CHECK: [[TRUNC13:%[0-9]+]]:_(s8) = G_TRUNC [[ADD13]](s16)
+    ; CHECK: [[ANYEXT28:%[0-9]+]]:_(s16) = G_ANYEXT [[UV14]](s8)
+    ; CHECK: [[ANYEXT29:%[0-9]+]]:_(s16) = G_ANYEXT [[UV30]](s8)
+    ; CHECK: [[ADD14:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT28]], [[ANYEXT29]]
+    ; CHECK: [[TRUNC14:%[0-9]+]]:_(s8) = G_TRUNC [[ADD14]](s16)
+    ; CHECK: [[ANYEXT30:%[0-9]+]]:_(s16) = G_ANYEXT [[UV15]](s8)
+    ; CHECK: [[ANYEXT31:%[0-9]+]]:_(s16) = G_ANYEXT [[UV31]](s8)
+    ; CHECK: [[ADD15:%[0-9]+]]:_(s16) = G_ADD [[ANYEXT30]], [[ANYEXT31]]
+    ; CHECK: [[TRUNC15:%[0-9]+]]:_(s8) = G_TRUNC [[ADD15]](s16)
+    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8), [[TRUNC4]](s8), [[TRUNC5]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8), [[TRUNC8]](s8), [[TRUNC9]](s8), [[TRUNC10]](s8), [[TRUNC11]](s8), [[TRUNC12]](s8), [[TRUNC13]](s8), [[TRUNC14]](s8), [[TRUNC15]](s8)
+    ; CHECK: [[ANYEXT32:%[0-9]+]]:_(<16 x s32>) = G_ANYEXT [[BUILD_VECTOR]](<16 x s8>)
+    ; CHECK: S_ENDPGM 0, implicit [[ANYEXT32]](<16 x s32>)
+    %0:_(<8 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    %1:_(<16 x s8>) = G_BITCAST %0
+    %2:_(<16 x s8>) = G_ADD %1, %1
+    %3:_(<16 x s32>) = G_ANYEXT %2
+    S_ENDPGM 0, implicit %3
+...
+
+---
+name: test_bitcast_v16s8_to_v8s16
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+
+    ; CHECK-LABEL: name: test_bitcast_v16s8_to_v8s16
+    ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[COPY]](<16 x s32>)
+    ; CHECK: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[TRUNC]](<16 x s8>)
+    ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<8 x s16>)
+    %0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    %1:_(<16 x s8>) = G_TRUNC %0
+    %2:_(<8 x s16>) = G_BITCAST %1
+    S_ENDPGM 0, implicit %2
+...
+
+---
+name: test_bitcast_v3s64_to_v6s32
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
+
+    ; CHECK-LABEL: name: test_bitcast_v3s64_to_v6s32
+    ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
+    ; CHECK: [[BITCAST:%[0-9]+]]:_(<6 x s32>) = G_BITCAST [[COPY]](<3 x s64>)
+    ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<6 x s32>)
+    %0:_(<3 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
+    %1:_(<6 x s32>) = G_BITCAST %0
+    S_ENDPGM 0, implicit %1
+...
+
+---
+name: test_bitcast_v6s32_to_v3s64
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
+
+    ; CHECK-LABEL: name: test_bitcast_v6s32_to_v3s64
+    ; CHECK: [[COPY:%[0-9]+]]:_(<6 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
+    ; CHECK: [[BITCAST:%[0-9]+]]:_(<3 x s64>) = G_BITCAST [[COPY]](<6 x s32>)
+    ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<3 x s64>)
+    %0:_(<6 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
+    %1:_(<3 x s64>) = G_BITCAST %0
+    S_ENDPGM 0, implicit %1
+...
+
+---
+name: test_bitcast_v3s64_to_v12s16
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
+
+    ; CHECK-LABEL: name: test_bitcast_v3s64_to_v12s16
+    ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
+    ; CHECK: [[BITCAST:%[0-9]+]]:_(<12 x s16>) = G_BITCAST [[COPY]](<3 x s64>)
+    ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<12 x s16>)
+    %0:_(<3 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
+    %1:_(<12 x s16>) = G_BITCAST %0
+    S_ENDPGM 0, implicit %1
+...
+
+---
+name: test_bitcast_v12s16_to_v3s64
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
+
+    ; CHECK-LABEL: name: test_bitcast_v12s16_to_v3s64
+    ; CHECK: [[COPY:%[0-9]+]]:_(<12 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
+    ; CHECK: [[BITCAST:%[0-9]+]]:_(<3 x s64>) = G_BITCAST [[COPY]](<12 x s16>)
+    ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<3 x s64>)
+    %0:_(<12 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
+    %1:_(<3 x s64>) = G_BITCAST %0
+    S_ENDPGM 0, implicit %1
+...
+
+---
+name: test_bitcast_v3s64_to_v24s8
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
+
+    ; CHECK-LABEL: name: test_bitcast_v3s64_to_v24s8
+    ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
+    ; CHECK: [[BITCAST:%[0-9]+]]:_(<24 x s8>) = G_BITCAST [[COPY]](<3 x s64>)
+    ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<24 x s8>)
+    %0:_(<3 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
+    %1:_(<24 x s8>) = G_BITCAST %0
+    S_ENDPGM 0, implicit %1
+...
+
+---
+name: test_bitcast_v24s8_to_v3s64
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
+
+    ; CHECK-LABEL: name: test_bitcast_v24s8_to_v3s64
+    ; CHECK: [[COPY:%[0-9]+]]:_(<24 x s8>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
+    ; CHECK: [[BITCAST:%[0-9]+]]:_(<3 x s64>) = G_BITCAST [[COPY]](<24 x s8>)
+    ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<3 x s64>)
+    %0:_(<24 x s8>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
+    %1:_(<3 x s64>) = G_BITCAST %0
+    S_ENDPGM 0, implicit %1
+...

From d5c28c4094324e94f6eee403022ca21c8d76998e Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Tue, 9 Jun 2020 12:18:08 -0700
Subject: [PATCH 25/25] [X86] Move CPUKind enum from clang to llvm/lib/Support.
 NFCI

Similar to what some other targets have done. This information
could be reused by other frontends so doesn't make sense to live
in clang.

-Rename CK_Generic to CK_None to better reflect its illegalness.
-Move function for translating from string to enum into llvm.
-Call checkCPUKind directly from the string to enum translation
and update CPU kind to CK_None accordinly. Caller will use CK_None
as sentinel for bad CPU.

I'm planning to move all the CPU to feature mapping out next. As
part of that I want to devise a better way to express CPUs inheriting
features from an earlier CPU. Allowing this to be expressed in a
less rigid way than just falling through a switch. Or using gotos
as we've had to do lately.

Differential Revision: https://reviews.llvm.org/D81439
---
 clang/include/clang/Basic/X86Target.def       | 240 -----------------
 clang/lib/Basic/Targets/X86.cpp               |  55 ++--
 clang/lib/Basic/Targets/X86.h                 |  22 +-
 llvm/include/llvm/Support/X86TargetParser.def | 242 ++++++++++++++++++
 llvm/include/llvm/Support/X86TargetParser.h   |  41 +++
 llvm/lib/Support/CMakeLists.txt               |   1 +
 llvm/lib/Support/X86TargetParser.cpp          |  58 +++++
 7 files changed, 364 insertions(+), 295 deletions(-)
 create mode 100644 llvm/include/llvm/Support/X86TargetParser.h
 create mode 100644 llvm/lib/Support/X86TargetParser.cpp

diff --git a/clang/include/clang/Basic/X86Target.def b/clang/include/clang/Basic/X86Target.def
index ba4e5981e7dcca..70f3879f33a140 100644
--- a/clang/include/clang/Basic/X86Target.def
+++ b/clang/include/clang/Basic/X86Target.def
@@ -11,19 +11,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef PROC_WITH_FEAT
-#define PROC_WITH_FEAT(ENUM, STRING, IS64BIT, KEYFEATURE)                      \
-  PROC(ENUM, STRING, IS64BIT)
-#endif
-
-#ifndef PROC
-#define PROC(ENUM, STRING, IS64BIT)
-#endif
-
-#ifndef PROC_ALIAS
-#define PROC_ALIAS(ENUM, ALIAS)
-#endif
-
 #ifndef FEATURE
 #define FEATURE(ENUM)
 #endif
@@ -36,230 +23,6 @@
 #define CPU_SPECIFIC_ALIAS(NEW_NAME, NAME)
 #endif
 
-#define PROC_64_BIT true
-#define PROC_32_BIT false
-
-/// \name i386
-/// i386-generation processors.
-//@{
-PROC(i386, "i386", PROC_32_BIT)
-//@}
-
-/// \name i486
-/// i486-generation processors.
-//@{
-PROC(i486, "i486", PROC_32_BIT)
-PROC(WinChipC6, "winchip-c6", PROC_32_BIT)
-PROC(WinChip2, "winchip2", PROC_32_BIT)
-PROC(C3, "c3", PROC_32_BIT)
-//@}
-
-/// \name i586
-/// i586-generation processors, P5 microarchitecture based.
-//@{
-PROC(i586, "i586", PROC_32_BIT)
-PROC(Pentium, "pentium", PROC_32_BIT)
-PROC(PentiumMMX, "pentium-mmx", PROC_32_BIT)
-//@}
-
-/// \name i686
-/// i686-generation processors, P6 / Pentium M microarchitecture based.
-//@{
-PROC(PentiumPro, "pentiumpro", PROC_32_BIT)
-PROC(i686, "i686", PROC_32_BIT)
-PROC(Pentium2, "pentium2", PROC_32_BIT)
-PROC(Pentium3, "pentium3", PROC_32_BIT)
-PROC_ALIAS(Pentium3, "pentium3m")
-PROC(PentiumM, "pentium-m", PROC_32_BIT)
-PROC(C3_2, "c3-2", PROC_32_BIT)
-
-/// This enumerator is a bit odd, as GCC no longer accepts -march=yonah.
-/// Clang however has some logic to support this.
-// FIXME: Warn, deprecate, and potentially remove this.
-PROC(Yonah, "yonah", PROC_32_BIT)
-//@}
-
-/// \name Netburst
-/// Netburst microarchitecture based processors.
-//@{
-PROC(Pentium4, "pentium4", PROC_32_BIT)
-PROC_ALIAS(Pentium4, "pentium4m")
-
-PROC(Prescott, "prescott", PROC_32_BIT)
-PROC(Nocona, "nocona", PROC_64_BIT)
-//@}
-
-/// \name Core
-/// Core microarchitecture based processors.
-//@{
-PROC_WITH_FEAT(Core2, "core2", PROC_64_BIT, FEATURE_SSSE3)
-
-/// This enumerator, like Yonah, is a bit odd. It is another
-/// codename which GCC no longer accepts as an option to -march, but Clang
-/// has some logic for recognizing it.
-// FIXME: Warn, deprecate, and potentially remove this.
-PROC(Penryn, "penryn", PROC_64_BIT)
-//@}
-
-/// \name Atom
-/// Atom processors
-//@{
-PROC_WITH_FEAT(Bonnell, "bonnell", PROC_64_BIT, FEATURE_SSSE3)
-PROC_ALIAS(Bonnell, "atom")
-
-PROC_WITH_FEAT(Silvermont, "silvermont", PROC_64_BIT, FEATURE_SSE4_2)
-PROC_ALIAS(Silvermont, "slm")
-
-PROC(Goldmont, "goldmont", PROC_64_BIT)
-PROC(GoldmontPlus, "goldmont-plus", PROC_64_BIT)
-
-PROC(Tremont, "tremont", PROC_64_BIT)
-//@}
-
-/// \name Nehalem
-/// Nehalem microarchitecture based processors.
-PROC_WITH_FEAT(Nehalem, "nehalem", PROC_64_BIT, FEATURE_SSE4_2)
-PROC_ALIAS(Nehalem, "corei7")
-
-/// \name Westmere
-/// Westmere microarchitecture based processors.
-PROC_WITH_FEAT(Westmere, "westmere", PROC_64_BIT, FEATURE_PCLMUL)
-
-/// \name Sandy Bridge
-/// Sandy Bridge microarchitecture based processors.
-PROC_WITH_FEAT(SandyBridge, "sandybridge", PROC_64_BIT, FEATURE_AVX)
-PROC_ALIAS(SandyBridge, "corei7-avx")
-
-/// \name Ivy Bridge
-/// Ivy Bridge microarchitecture based processors.
-PROC_WITH_FEAT(IvyBridge, "ivybridge", PROC_64_BIT, FEATURE_AVX)
-PROC_ALIAS(IvyBridge, "core-avx-i")
-
-/// \name Haswell
-/// Haswell microarchitecture based processors.
-PROC_WITH_FEAT(Haswell, "haswell", PROC_64_BIT, FEATURE_AVX2)
-PROC_ALIAS(Haswell, "core-avx2")
-
-/// \name Broadwell
-/// Broadwell microarchitecture based processors.
-PROC_WITH_FEAT(Broadwell, "broadwell", PROC_64_BIT, FEATURE_AVX2)
-
-/// \name Skylake Client
-/// Skylake client microarchitecture based processors.
-PROC_WITH_FEAT(SkylakeClient, "skylake", PROC_64_BIT, FEATURE_AVX2)
-
-/// \name Skylake Server
-/// Skylake server microarchitecture based processors.
-PROC_WITH_FEAT(SkylakeServer, "skylake-avx512", PROC_64_BIT, FEATURE_AVX512F)
-PROC_ALIAS(SkylakeServer, "skx")
-
-/// \name Cascadelake Server
-/// Cascadelake Server microarchitecture based processors.
-PROC_WITH_FEAT(Cascadelake, "cascadelake", PROC_64_BIT, FEATURE_AVX512VNNI)
-
-/// \name Cooperlake Server
-/// Cooperlake Server microarchitecture based processors.
-PROC_WITH_FEAT(Cooperlake, "cooperlake", PROC_64_BIT, FEATURE_AVX512BF16)
-
-/// \name Cannonlake Client
-/// Cannonlake client microarchitecture based processors.
-PROC_WITH_FEAT(Cannonlake, "cannonlake", PROC_64_BIT, FEATURE_AVX512VBMI)
-
-/// \name Icelake Client
-/// Icelake client microarchitecture based processors.
-PROC(IcelakeClient, "icelake-client", PROC_64_BIT)
-
-/// \name Icelake Server
-/// Icelake server microarchitecture based processors.
-PROC(IcelakeServer, "icelake-server", PROC_64_BIT)
-
-/// \name Tigerlake
-/// Tigerlake microarchitecture based processors.
-PROC(Tigerlake, "tigerlake", PROC_64_BIT)
-
-/// \name Knights Landing
-/// Knights Landing processor.
-PROC_WITH_FEAT(KNL, "knl", PROC_64_BIT, FEATURE_AVX512F)
-
-/// \name Knights Mill
-/// Knights Mill processor.
-PROC_WITH_FEAT(KNM, "knm", PROC_64_BIT, FEATURE_AVX5124FMAPS)
-
-/// \name Lakemont
-/// Lakemont microarchitecture based processors.
-PROC(Lakemont, "lakemont", PROC_32_BIT)
-
-/// \name K6
-/// K6 architecture processors.
-//@{
-PROC(K6, "k6", PROC_32_BIT)
-PROC(K6_2, "k6-2", PROC_32_BIT)
-PROC(K6_3, "k6-3", PROC_32_BIT)
-//@}
-
-/// \name K7
-/// K7 architecture processors.
-//@{
-PROC(Athlon, "athlon", PROC_32_BIT)
-PROC_ALIAS(Athlon, "athlon-tbird")
-
-PROC(AthlonXP, "athlon-xp", PROC_32_BIT)
-PROC_ALIAS(AthlonXP, "athlon-mp")
-PROC_ALIAS(AthlonXP, "athlon-4")
-//@}
-
-/// \name K8
-/// K8 architecture processors.
-//@{
-PROC(K8, "k8", PROC_64_BIT)
-PROC_ALIAS(K8, "athlon64")
-PROC_ALIAS(K8, "athlon-fx")
-PROC_ALIAS(K8, "opteron")
-
-PROC(K8SSE3, "k8-sse3", PROC_64_BIT)
-PROC_ALIAS(K8SSE3, "athlon64-sse3")
-PROC_ALIAS(K8SSE3, "opteron-sse3")
-
-PROC_WITH_FEAT(AMDFAM10, "amdfam10", PROC_64_BIT, FEATURE_SSE4_A)
-PROC_ALIAS(AMDFAM10, "barcelona")
-//@}
-
-/// \name Bobcat
-/// Bobcat architecture processors.
-//@{
-PROC_WITH_FEAT(BTVER1, "btver1", PROC_64_BIT, FEATURE_SSE4_A)
-PROC_WITH_FEAT(BTVER2, "btver2", PROC_64_BIT, FEATURE_BMI)
-//@}
-
-/// \name Bulldozer
-/// Bulldozer architecture processors.
-//@{
-PROC_WITH_FEAT(BDVER1, "bdver1", PROC_64_BIT, FEATURE_XOP)
-PROC_WITH_FEAT(BDVER2, "bdver2", PROC_64_BIT, FEATURE_FMA)
-PROC_WITH_FEAT(BDVER3, "bdver3", PROC_64_BIT, FEATURE_FMA)
-PROC_WITH_FEAT(BDVER4, "bdver4", PROC_64_BIT, FEATURE_AVX2)
-//@}
-
-/// \name zen
-/// Zen architecture processors.
-//@{
-PROC_WITH_FEAT(ZNVER1, "znver1", PROC_64_BIT, FEATURE_AVX2)
-PROC_WITH_FEAT(ZNVER2, "znver2", PROC_64_BIT, FEATURE_AVX2)
-//@}
-
-/// This specification is deprecated and will be removed in the future.
-/// Users should prefer K8.
-// FIXME: Warn on this when the CPU is set to it.
-//@{
-PROC(x86_64, "x86-64", PROC_64_BIT)
-//@}
-
-/// \name Geode
-/// Geode processors.
-//@{
-PROC(Geode, "geode", PROC_32_BIT)
-//@}
-
 // List of CPU Supports features in order.  These need to remain in the order
 // required by attribute 'target' checking.  Note that not all are supported/
 // prioritized by GCC, so synchronization with GCC's implementation may require
@@ -345,6 +108,3 @@ CPU_SPECIFIC("knm", 'j', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+mo
 #undef PROC_64_BIT
 #undef PROC_32_BIT
 #undef FEATURE
-#undef PROC
-#undef PROC_ALIAS
-#undef PROC_WITH_FEAT
diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index b87490a6a85898..05c6ec22af3a89 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -109,7 +109,8 @@ bool X86TargetInfo::initFeatureMap(
   if (getTriple().getArch() == llvm::Triple::x86_64)
     setFeatureEnabledImpl(Features, "sse2", true);
 
-  const CPUKind Kind = getCPUKind(CPU);
+  using namespace llvm::X86;
+  const enum CPUKind Kind = parseArchX86(CPU);
 
   // Enable X87 for all X86 processors but Lakemont.
   if (Kind != CK_Lakemont)
@@ -117,11 +118,11 @@ bool X86TargetInfo::initFeatureMap(
 
   // Enable cmpxchg8 for i586 and greater CPUs. Include generic for backwards
   // compatibility.
-  if (Kind >= CK_i586 || Kind == CK_Generic)
+  if (Kind >= CK_i586 || Kind == CK_None)
     setFeatureEnabledImpl(Features, "cx8", true);
 
   switch (Kind) {
-  case CK_Generic:
+  case CK_None:
   case CK_i386:
   case CK_i486:
   case CK_i586:
@@ -936,8 +937,9 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
   // Subtarget options.
   // FIXME: We are hard-coding the tune parameters based on the CPU, but they
   // truly should be based on -mtune options.
+  using namespace llvm::X86;
   switch (CPU) {
-  case CK_Generic:
+  case CK_None:
     break;
   case CK_i386:
     // The rest are coming from the i386 define above.
@@ -1324,7 +1326,7 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
     break;
   }
 
-  if (CPU >= CK_i486 || CPU == CK_Generic) {
+  if (CPU >= CK_i486 || CPU == CK_None) {
     Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1");
     Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2");
     Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4");
@@ -1548,8 +1550,9 @@ static unsigned getFeaturePriority(llvm::X86::ProcessorFeatures Feat) {
 unsigned X86TargetInfo::multiVersionSortPriority(StringRef Name) const {
   // Valid CPUs have a 'key feature' that compares just better than its key
   // feature.
-  CPUKind Kind = getCPUKind(Name);
-  if (Kind != CK_Generic) {
+  using namespace llvm::X86;
+  CPUKind Kind = parseArchX86(Name);
+  if (Kind != CK_None) {
     switch (Kind) {
     default:
       llvm_unreachable(
@@ -1557,7 +1560,7 @@ unsigned X86TargetInfo::multiVersionSortPriority(StringRef Name) const {
 #define PROC_WITH_FEAT(ENUM, STR, IS64, KEY_FEAT)                              \
   case CK_##ENUM:                                                              \
     return (getFeaturePriority(llvm::X86::KEY_FEAT) << 1) + 1;
-#include "clang/Basic/X86Target.def"
+#include "llvm/Support/X86TargetParser.def"
     }
   }
 
@@ -1761,6 +1764,7 @@ bool X86TargetInfo::validateAsmConstraint(
 // | Knights Mill                       |                      64 | https://software.intel.com/sites/default/files/managed/9e/bc/64-ia-32-architectures-optimization-manual.pdf?countrylabel=Colombia "2.5.5.2 L1 DCache "       |
 // +------------------------------------+-------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+
 Optional<unsigned> X86TargetInfo::getCPUCacheLineSize() const {
+  using namespace llvm::X86;
   switch (CPU) {
     // i386
     case CK_i386:
@@ -1846,7 +1850,7 @@ Optional<unsigned> X86TargetInfo::getCPUCacheLineSize() const {
 
     // The following currently have unknown cache line sizes (but they are probably all 64):
     // Core
-    case CK_Generic:
+    case CK_None:
       return None;
   }
   llvm_unreachable("Unknown CPU kind");
@@ -1977,38 +1981,9 @@ std::string X86TargetInfo::convertConstraint(const char *&Constraint) const {
   }
 }
 
-bool X86TargetInfo::checkCPUKind(CPUKind Kind) const {
-  // Perform any per-CPU checks necessary to determine if this CPU is
-  // acceptable.
-  switch (Kind) {
-  case CK_Generic:
-    // No processor selected!
-    return false;
-#define PROC(ENUM, STRING, IS64BIT)                                            \
-  case CK_##ENUM:                                                              \
-    return IS64BIT || getTriple().getArch() == llvm::Triple::x86;
-#include "clang/Basic/X86Target.def"
-  }
-  llvm_unreachable("Unhandled CPU kind");
-}
-
 void X86TargetInfo::fillValidCPUList(SmallVectorImpl<StringRef> &Values) const {
-#define PROC(ENUM, STRING, IS64BIT)                                            \
-  if (IS64BIT || getTriple().getArch() == llvm::Triple::x86)                   \
-    Values.emplace_back(STRING);
-  // For aliases we need to lookup the CPUKind to check get the 64-bit ness.
-#define PROC_ALIAS(ENUM, ALIAS)                                                \
-  if (checkCPUKind(CK_##ENUM))                                                      \
-    Values.emplace_back(ALIAS);
-#include "clang/Basic/X86Target.def"
-}
-
-X86TargetInfo::CPUKind X86TargetInfo::getCPUKind(StringRef CPU) const {
-  return llvm::StringSwitch<CPUKind>(CPU)
-#define PROC(ENUM, STRING, IS64BIT) .Case(STRING, CK_##ENUM)
-#define PROC_ALIAS(ENUM, ALIAS) .Case(ALIAS, CK_##ENUM)
-#include "clang/Basic/X86Target.def"
-      .Default(CK_Generic);
+  bool Only64Bit = getTriple().getArch() != llvm::Triple::x86;
+  llvm::X86::fillValidCPUArchList(Values, Only64Bit);
 }
 
 ArrayRef<const char *> X86TargetInfo::getGCCRegNames() const {
diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h
index 39ccac96a49d8a..c33c608e27c843 100644
--- a/clang/lib/Basic/Targets/X86.h
+++ b/clang/lib/Basic/Targets/X86.h
@@ -18,6 +18,7 @@
 #include "clang/Basic/TargetOptions.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/X86TargetParser.h"
 
 namespace clang {
 namespace targets {
@@ -128,19 +129,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
   bool HasTSXLDTRK = false;
 
 protected:
-  /// Enumeration of all of the X86 CPUs supported by Clang.
-  ///
-  /// Each enumeration represents a particular CPU supported by Clang. These
-  /// loosely correspond to the options passed to '-march' or '-mtune' flags.
-  enum CPUKind {
-    CK_Generic,
-#define PROC(ENUM, STRING, IS64BIT) CK_##ENUM,
-#include "clang/Basic/X86Target.def"
-  } CPU = CK_Generic;
-
-  bool checkCPUKind(CPUKind Kind) const;
-
-  CPUKind getCPUKind(StringRef CPU) const;
+  llvm::X86::CPUKind CPU = llvm::X86::CK_None;
 
   enum FPMathKind { FP_Default, FP_SSE, FP_387 } FPMath = FP_Default;
 
@@ -313,13 +302,16 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
   }
 
   bool isValidCPUName(StringRef Name) const override {
-    return checkCPUKind(getCPUKind(Name));
+    bool Only64Bit = getTriple().getArch() != llvm::Triple::x86;
+    return llvm::X86::parseArchX86(Name, Only64Bit) != llvm::X86::CK_None;
   }
 
   void fillValidCPUList(SmallVectorImpl<StringRef> &Values) const override;
 
   bool setCPU(const std::string &Name) override {
-    return checkCPUKind(CPU = getCPUKind(Name));
+    bool Only64Bit = getTriple().getArch() != llvm::Triple::x86;
+    CPU = llvm::X86::parseArchX86(Name, Only64Bit);
+    return CPU != llvm::X86::CK_None;
   }
 
   unsigned multiVersionSortPriority(StringRef Name) const override;
diff --git a/llvm/include/llvm/Support/X86TargetParser.def b/llvm/include/llvm/Support/X86TargetParser.def
index aef189a562a5b0..4d2b615e9d3de3 100644
--- a/llvm/include/llvm/Support/X86TargetParser.def
+++ b/llvm/include/llvm/Support/X86TargetParser.def
@@ -177,3 +177,245 @@ X86_FEATURE       (67, FEATURE_CLFLUSHOPT)
 X86_FEATURE       (68, FEATURE_SHA)
 #undef X86_FEATURE_COMPAT
 #undef X86_FEATURE
+
+
+#ifndef PROC_WITH_FEAT
+#define PROC_WITH_FEAT(ENUM, STRING, IS64BIT, KEYFEATURE)                      \
+  PROC(ENUM, STRING, IS64BIT)
+#endif
+
+#ifndef PROC
+#define PROC(ENUM, STRING, IS64BIT)
+#endif
+
+#ifndef PROC_ALIAS
+#define PROC_ALIAS(ENUM, ALIAS)
+#endif
+
+#define PROC_64_BIT true
+#define PROC_32_BIT false
+
+/// \name i386
+/// i386-generation processors.
+//@{
+PROC(i386, "i386", PROC_32_BIT)
+//@}
+
+/// \name i486
+/// i486-generation processors.
+//@{
+PROC(i486, "i486", PROC_32_BIT)
+PROC(WinChipC6, "winchip-c6", PROC_32_BIT)
+PROC(WinChip2, "winchip2", PROC_32_BIT)
+PROC(C3, "c3", PROC_32_BIT)
+//@}
+
+/// \name i586
+/// i586-generation processors, P5 microarchitecture based.
+//@{
+PROC(i586, "i586", PROC_32_BIT)
+PROC(Pentium, "pentium", PROC_32_BIT)
+PROC(PentiumMMX, "pentium-mmx", PROC_32_BIT)
+//@}
+
+/// \name i686
+/// i686-generation processors, P6 / Pentium M microarchitecture based.
+//@{
+PROC(PentiumPro, "pentiumpro", PROC_32_BIT)
+PROC(i686, "i686", PROC_32_BIT)
+PROC(Pentium2, "pentium2", PROC_32_BIT)
+PROC(Pentium3, "pentium3", PROC_32_BIT)
+PROC_ALIAS(Pentium3, "pentium3m")
+PROC(PentiumM, "pentium-m", PROC_32_BIT)
+PROC(C3_2, "c3-2", PROC_32_BIT)
+
+/// This enumerator is a bit odd, as GCC no longer accepts -march=yonah.
+/// Clang however has some logic to support this.
+// FIXME: Warn, deprecate, and potentially remove this.
+PROC(Yonah, "yonah", PROC_32_BIT)
+//@}
+
+/// \name Netburst
+/// Netburst microarchitecture based processors.
+//@{
+PROC(Pentium4, "pentium4", PROC_32_BIT)
+PROC_ALIAS(Pentium4, "pentium4m")
+
+PROC(Prescott, "prescott", PROC_32_BIT)
+PROC(Nocona, "nocona", PROC_64_BIT)
+//@}
+
+/// \name Core
+/// Core microarchitecture based processors.
+//@{
+PROC_WITH_FEAT(Core2, "core2", PROC_64_BIT, FEATURE_SSSE3)
+
+/// This enumerator, like Yonah, is a bit odd. It is another
+/// codename which GCC no longer accepts as an option to -march, but Clang
+/// has some logic for recognizing it.
+// FIXME: Warn, deprecate, and potentially remove this.
+PROC(Penryn, "penryn", PROC_64_BIT)
+//@}
+
+/// \name Atom
+/// Atom processors
+//@{
+PROC_WITH_FEAT(Bonnell, "bonnell", PROC_64_BIT, FEATURE_SSSE3)
+PROC_ALIAS(Bonnell, "atom")
+
+PROC_WITH_FEAT(Silvermont, "silvermont", PROC_64_BIT, FEATURE_SSE4_2)
+PROC_ALIAS(Silvermont, "slm")
+
+PROC(Goldmont, "goldmont", PROC_64_BIT)
+PROC(GoldmontPlus, "goldmont-plus", PROC_64_BIT)
+
+PROC(Tremont, "tremont", PROC_64_BIT)
+//@}
+
+/// \name Nehalem
+/// Nehalem microarchitecture based processors.
+PROC_WITH_FEAT(Nehalem, "nehalem", PROC_64_BIT, FEATURE_SSE4_2)
+PROC_ALIAS(Nehalem, "corei7")
+
+/// \name Westmere
+/// Westmere microarchitecture based processors.
+PROC_WITH_FEAT(Westmere, "westmere", PROC_64_BIT, FEATURE_PCLMUL)
+
+/// \name Sandy Bridge
+/// Sandy Bridge microarchitecture based processors.
+PROC_WITH_FEAT(SandyBridge, "sandybridge", PROC_64_BIT, FEATURE_AVX)
+PROC_ALIAS(SandyBridge, "corei7-avx")
+
+/// \name Ivy Bridge
+/// Ivy Bridge microarchitecture based processors.
+PROC_WITH_FEAT(IvyBridge, "ivybridge", PROC_64_BIT, FEATURE_AVX)
+PROC_ALIAS(IvyBridge, "core-avx-i")
+
+/// \name Haswell
+/// Haswell microarchitecture based processors.
+PROC_WITH_FEAT(Haswell, "haswell", PROC_64_BIT, FEATURE_AVX2)
+PROC_ALIAS(Haswell, "core-avx2")
+
+/// \name Broadwell
+/// Broadwell microarchitecture based processors.
+PROC_WITH_FEAT(Broadwell, "broadwell", PROC_64_BIT, FEATURE_AVX2)
+
+/// \name Skylake Client
+/// Skylake client microarchitecture based processors.
+PROC_WITH_FEAT(SkylakeClient, "skylake", PROC_64_BIT, FEATURE_AVX2)
+
+/// \name Skylake Server
+/// Skylake server microarchitecture based processors.
+PROC_WITH_FEAT(SkylakeServer, "skylake-avx512", PROC_64_BIT, FEATURE_AVX512F)
+PROC_ALIAS(SkylakeServer, "skx")
+
+/// \name Cascadelake Server
+/// Cascadelake Server microarchitecture based processors.
+PROC_WITH_FEAT(Cascadelake, "cascadelake", PROC_64_BIT, FEATURE_AVX512VNNI)
+
+/// \name Cooperlake Server
+/// Cooperlake Server microarchitecture based processors.
+PROC_WITH_FEAT(Cooperlake, "cooperlake", PROC_64_BIT, FEATURE_AVX512BF16)
+
+/// \name Cannonlake Client
+/// Cannonlake client microarchitecture based processors.
+PROC_WITH_FEAT(Cannonlake, "cannonlake", PROC_64_BIT, FEATURE_AVX512VBMI)
+
+/// \name Icelake Client
+/// Icelake client microarchitecture based processors.
+PROC(IcelakeClient, "icelake-client", PROC_64_BIT)
+
+/// \name Icelake Server
+/// Icelake server microarchitecture based processors.
+PROC(IcelakeServer, "icelake-server", PROC_64_BIT)
+
+/// \name Tigerlake
+/// Tigerlake microarchitecture based processors.
+PROC(Tigerlake, "tigerlake", PROC_64_BIT)
+
+/// \name Knights Landing
+/// Knights Landing processor.
+PROC_WITH_FEAT(KNL, "knl", PROC_64_BIT, FEATURE_AVX512F)
+
+/// \name Knights Mill
+/// Knights Mill processor.
+PROC_WITH_FEAT(KNM, "knm", PROC_64_BIT, FEATURE_AVX5124FMAPS)
+
+/// \name Lakemont
+/// Lakemont microarchitecture based processors.
+PROC(Lakemont, "lakemont", PROC_32_BIT)
+
+/// \name K6
+/// K6 architecture processors.
+//@{
+PROC(K6, "k6", PROC_32_BIT)
+PROC(K6_2, "k6-2", PROC_32_BIT)
+PROC(K6_3, "k6-3", PROC_32_BIT)
+//@}
+
+/// \name K7
+/// K7 architecture processors.
+//@{
+PROC(Athlon, "athlon", PROC_32_BIT)
+PROC_ALIAS(Athlon, "athlon-tbird")
+
+PROC(AthlonXP, "athlon-xp", PROC_32_BIT)
+PROC_ALIAS(AthlonXP, "athlon-mp")
+PROC_ALIAS(AthlonXP, "athlon-4")
+//@}
+
+/// \name K8
+/// K8 architecture processors.
+//@{
+PROC(K8, "k8", PROC_64_BIT)
+PROC_ALIAS(K8, "athlon64")
+PROC_ALIAS(K8, "athlon-fx")
+PROC_ALIAS(K8, "opteron")
+
+PROC(K8SSE3, "k8-sse3", PROC_64_BIT)
+PROC_ALIAS(K8SSE3, "athlon64-sse3")
+PROC_ALIAS(K8SSE3, "opteron-sse3")
+
+PROC_WITH_FEAT(AMDFAM10, "amdfam10", PROC_64_BIT, FEATURE_SSE4_A)
+PROC_ALIAS(AMDFAM10, "barcelona")
+//@}
+
+/// \name Bobcat
+/// Bobcat architecture processors.
+//@{
+PROC_WITH_FEAT(BTVER1, "btver1", PROC_64_BIT, FEATURE_SSE4_A)
+PROC_WITH_FEAT(BTVER2, "btver2", PROC_64_BIT, FEATURE_BMI)
+//@}
+
+/// \name Bulldozer
+/// Bulldozer architecture processors.
+//@{
+PROC_WITH_FEAT(BDVER1, "bdver1", PROC_64_BIT, FEATURE_XOP)
+PROC_WITH_FEAT(BDVER2, "bdver2", PROC_64_BIT, FEATURE_FMA)
+PROC_WITH_FEAT(BDVER3, "bdver3", PROC_64_BIT, FEATURE_FMA)
+PROC_WITH_FEAT(BDVER4, "bdver4", PROC_64_BIT, FEATURE_AVX2)
+//@}
+
+/// \name zen
+/// Zen architecture processors.
+//@{
+PROC_WITH_FEAT(ZNVER1, "znver1", PROC_64_BIT, FEATURE_AVX2)
+PROC_WITH_FEAT(ZNVER2, "znver2", PROC_64_BIT, FEATURE_AVX2)
+//@}
+
+/// This specification is deprecated and will be removed in the future.
+/// Users should prefer K8.
+// FIXME: Warn on this when the CPU is set to it.
+//@{
+PROC(x86_64, "x86-64", PROC_64_BIT)
+//@}
+
+/// \name Geode
+/// Geode processors.
+//@{
+PROC(Geode, "geode", PROC_32_BIT)
+//@}
+
+#undef PROC
+#undef PROC_ALIAS
+#undef PROC_WITH_FEAT
diff --git a/llvm/include/llvm/Support/X86TargetParser.h b/llvm/include/llvm/Support/X86TargetParser.h
new file mode 100644
index 00000000000000..1c9ad03cde8135
--- /dev/null
+++ b/llvm/include/llvm/Support/X86TargetParser.h
@@ -0,0 +1,41 @@
+//===-- X86TargetParser - Parser for X86 features ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a target parser to recognise X86 hardware features.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_X86TARGETPARSERCOMMON_H
+#define LLVM_SUPPORT_X86TARGETPARSERCOMMON_H
+
+#include "llvm/ADT/SmallVector.h"
+
+namespace llvm {
+class StringRef;
+
+namespace X86 {
+
+enum CPUKind {
+  CK_None,
+#define PROC(ENUM, STRING, IS64BIT) CK_##ENUM,
+#include "llvm/Support/X86TargetParser.def"
+};
+
+/// Parse \p CPU string into a CPUKind. Will only accept 64-bit capable CPUs if
+/// \p Only64Bit is true.
+CPUKind parseArchX86(StringRef CPU, bool Only64Bit = false);
+
+/// Provide a list of valid CPU names. If \p Only64Bit is true, the list will
+/// only contain 64-bit capable CPUs.
+void fillValidCPUArchList(SmallVectorImpl<StringRef> &Values,
+                          bool ArchIs32Bit);
+
+} // namespace X86
+} // namespace llvm
+
+#endif
diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt
index 6a3448dc3f8566..17bef023078976 100644
--- a/llvm/lib/Support/CMakeLists.txt
+++ b/llvm/lib/Support/CMakeLists.txt
@@ -158,6 +158,7 @@ add_llvm_component_library(LLVMSupport
   VersionTuple.cpp
   VirtualFileSystem.cpp
   WithColor.cpp
+  X86TargetParser.cpp
   YAMLParser.cpp
   YAMLTraits.cpp
   raw_os_ostream.cpp
diff --git a/llvm/lib/Support/X86TargetParser.cpp b/llvm/lib/Support/X86TargetParser.cpp
new file mode 100644
index 00000000000000..ba85ed7ee6260c
--- /dev/null
+++ b/llvm/lib/Support/X86TargetParser.cpp
@@ -0,0 +1,58 @@
+//===-- X86TargetParser - Parser for X86 features ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a target parser to recognise X86 hardware features.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/X86TargetParser.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Triple.h"
+
+using namespace llvm;
+
+bool checkCPUKind(llvm::X86::CPUKind Kind, bool Only64Bit) {
+  using namespace X86;
+  // Perform any per-CPU checks necessary to determine if this CPU is
+  // acceptable.
+  switch (Kind) {
+  case CK_None:
+    // No processor selected!
+    return false;
+#define PROC(ENUM, STRING, IS64BIT)                                            \
+  case CK_##ENUM:                                                              \
+    return IS64BIT || !Only64Bit;
+#include "llvm/Support/X86TargetParser.def"
+  }
+  llvm_unreachable("Unhandled CPU kind");
+}
+
+X86::CPUKind llvm::X86::parseArchX86(StringRef CPU, bool Only64Bit) {
+  X86::CPUKind Kind = llvm::StringSwitch<CPUKind>(CPU)
+#define PROC(ENUM, STRING, IS64BIT) .Case(STRING, CK_##ENUM)
+#define PROC_ALIAS(ENUM, ALIAS) .Case(ALIAS, CK_##ENUM)
+#include "llvm/Support/X86TargetParser.def"
+      .Default(CK_None);
+
+  if (!checkCPUKind(Kind, Only64Bit))
+    Kind = CK_None;
+
+  return Kind;
+}
+
+void llvm::X86::fillValidCPUArchList(SmallVectorImpl<StringRef> &Values,
+                                     bool Only64Bit) {
+#define PROC(ENUM, STRING, IS64BIT)                                            \
+  if (IS64BIT || !Only64Bit)                                                   \
+    Values.emplace_back(STRING);
+  // For aliases we need to lookup the CPUKind to get the 64-bit ness.
+#define PROC_ALIAS(ENUM, ALIAS)                                                \
+  if (checkCPUKind(CK_##ENUM, Only64Bit))                                      \
+    Values.emplace_back(ALIAS);
+#include "llvm/Support/X86TargetParser.def"
+}