From ad5541045a63fe3049fc910d843bcbb78f7c7056 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Mon, 12 Oct 2020 16:13:26 +0100
Subject: [PATCH 001/123] [LoopDeletion] Remove over-eager SCEV verification.

60b852092c98dbdc6248d60109d90ae6f8ad841c introduced SCEV verification to
deleteDeadLoop, but it appears this check is currently a bit over-eager
and some users of deleteDeadLoop appear to only patch up SE after
calling it (e.g. PR47753).

Remove the extra check for now. We can consider adding it back after we
tracked down the source of the inconsistency for PR47753.
---
 llvm/lib/Transforms/Utils/LoopUtils.cpp | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index ccb9b6d0bdb4c2..d7cd9b19b8d51d 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -724,11 +724,6 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE,
     }
     LI->destroy(L);
   }
-
-#ifndef NDEBUG
-  if (SE)
-    SE->verify();
-#endif
 }
 
 /// Checks if \p L has single exit through latch block except possibly

From c2216d796aab7659771c05303f9d78bad4aeca07 Mon Sep 17 00:00:00 2001
From: Sebastian Neubauer <sebastian.neubauer@amd.com>
Date: Mon, 12 Oct 2020 16:38:14 +0200
Subject: [PATCH 002/123] [AMDGPU] Print metadata on error

If the metadata is valid yaml, we can print it, even if it failed
validation. That makes it easier to debug any wrong metadata.

Differential Revision: https://reviews.llvm.org/D89243
---
 llvm/tools/llvm-readobj/ELFDumper.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp
index e7ee0793b903ab..fcc5c002c62480 100644
--- a/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -5108,10 +5108,10 @@ static AMDGPUNote getAMDGPUNote(uint32_t NoteType, ArrayRef<uint8_t> Desc) {
       return {"AMDGPU Metadata", "Invalid AMDGPU Metadata"};
 
     AMDGPU::HSAMD::V3::MetadataVerifier Verifier(true);
+    std::string HSAMetadataString;
     if (!Verifier.verify(MsgPackDoc.getRoot()))
-      return {"AMDGPU Metadata", "Invalid AMDGPU Metadata"};
+      HSAMetadataString = "Invalid AMDGPU Metadata\n";
 
-    std::string HSAMetadataString;
     raw_string_ostream StrOS(HSAMetadataString);
     MsgPackDoc.toYAML(StrOS);
 

From 596a9f6b89d0d3e3f2897132ef1283941bd3607b Mon Sep 17 00:00:00 2001
From: Mircea Trofin <mtrofin@google.com>
Date: Fri, 9 Oct 2020 16:38:42 -0700
Subject: [PATCH 003/123] [NFC][Regalloc] Pass VirtRegMap by reference.

It's never null - the reason it's modeled as a pointer is because the
pass can't init it in its ctor. Passing by ref simplifies the code, too,
as the null checks were unnecessary complexity.

Differential Revision: https://reviews.llvm.org/D89171
---
 llvm/include/llvm/CodeGen/CalcSpillWeights.h |  6 +-
 llvm/lib/CodeGen/CalcSpillWeights.cpp        | 58 +++++++++-----------
 llvm/lib/CodeGen/LiveRangeEdit.cpp           |  2 +-
 llvm/lib/CodeGen/RegAllocBasic.cpp           |  2 +-
 llvm/lib/CodeGen/RegAllocGreedy.cpp          |  2 +-
 llvm/lib/CodeGen/RegAllocPBQP.cpp            |  4 +-
 6 files changed, 35 insertions(+), 39 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/CalcSpillWeights.h b/llvm/include/llvm/CodeGen/CalcSpillWeights.h
index c345c42c777451..78dae81f596e1a 100644
--- a/llvm/include/llvm/CodeGen/CalcSpillWeights.h
+++ b/llvm/include/llvm/CodeGen/CalcSpillWeights.h
@@ -46,13 +46,13 @@ class VirtRegMap;
   class VirtRegAuxInfo {
     MachineFunction &MF;
     LiveIntervals &LIS;
-    VirtRegMap *const VRM;
+    const VirtRegMap &VRM;
     const MachineLoopInfo &Loops;
     const MachineBlockFrequencyInfo &MBFI;
 
   public:
-    VirtRegAuxInfo(MachineFunction &MF, LiveIntervals &LIS, VirtRegMap *VRM,
-                   const MachineLoopInfo &Loops,
+    VirtRegAuxInfo(MachineFunction &MF, LiveIntervals &LIS,
+                   const VirtRegMap &VRM, const MachineLoopInfo &Loops,
                    const MachineBlockFrequencyInfo &MBFI)
         : MF(MF), LIS(LIS), VRM(VRM), Loops(Loops), MBFI(MBFI) {}
 
diff --git a/llvm/lib/CodeGen/CalcSpillWeights.cpp b/llvm/lib/CodeGen/CalcSpillWeights.cpp
index 03490643339f23..0a268a20d365f9 100644
--- a/llvm/lib/CodeGen/CalcSpillWeights.cpp
+++ b/llvm/lib/CodeGen/CalcSpillWeights.cpp
@@ -76,12 +76,11 @@ static Register copyHint(const MachineInstr *MI, unsigned Reg,
 }
 
 // Check if all values in LI are rematerializable
-static bool isRematerializable(const LiveInterval &LI,
-                               const LiveIntervals &LIS,
-                               VirtRegMap *VRM,
+static bool isRematerializable(const LiveInterval &LI, const LiveIntervals &LIS,
+                               const VirtRegMap &VRM,
                                const TargetInstrInfo &TII) {
   unsigned Reg = LI.reg();
-  unsigned Original = VRM ? VRM->getOriginal(Reg) : 0;
+  unsigned Original = VRM.getOriginal(Reg);
   for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end();
        I != E; ++I) {
     const VNInfo *VNI = *I;
@@ -96,31 +95,28 @@ static bool isRematerializable(const LiveInterval &LI,
     // Trace copies introduced by live range splitting.  The inline
     // spiller can rematerialize through these copies, so the spill
     // weight must reflect this.
-    if (VRM) {
-      while (MI->isFullCopy()) {
-        // The copy destination must match the interval register.
-        if (MI->getOperand(0).getReg() != Reg)
-          return false;
-
-        // Get the source register.
-        Reg = MI->getOperand(1).getReg();
-
-        // If the original (pre-splitting) registers match this
-        // copy came from a split.
-        if (!Register::isVirtualRegister(Reg) ||
-            VRM->getOriginal(Reg) != Original)
-          return false;
-
-        // Follow the copy live-in value.
-        const LiveInterval &SrcLI = LIS.getInterval(Reg);
-        LiveQueryResult SrcQ = SrcLI.Query(VNI->def);
-        VNI = SrcQ.valueIn();
-        assert(VNI && "Copy from non-existing value");
-        if (VNI->isPHIDef())
-          return false;
-        MI = LIS.getInstructionFromIndex(VNI->def);
-        assert(MI && "Dead valno in interval");
-      }
+    while (MI->isFullCopy()) {
+      // The copy destination must match the interval register.
+      if (MI->getOperand(0).getReg() != Reg)
+        return false;
+
+      // Get the source register.
+      Reg = MI->getOperand(1).getReg();
+
+      // If the original (pre-splitting) registers match this
+      // copy came from a split.
+      if (!Register::isVirtualRegister(Reg) || VRM.getOriginal(Reg) != Original)
+        return false;
+
+      // Follow the copy live-in value.
+      const LiveInterval &SrcLI = LIS.getInterval(Reg);
+      LiveQueryResult SrcQ = SrcLI.Query(VNI->def);
+      VNI = SrcQ.valueIn();
+      assert(VNI && "Copy from non-existing value");
+      if (VNI->isPHIDef())
+        return false;
+      MI = LIS.getInstructionFromIndex(VNI->def);
+      assert(MI && "Dead valno in interval");
     }
 
     if (!TII.isTriviallyReMaterializable(*MI, LIS.getAliasAnalysis()))
@@ -155,9 +151,9 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start,
 
   std::pair<Register, Register> TargetHint = MRI.getRegAllocationHint(LI.reg());
 
-  if (LI.isSpillable() && VRM) {
+  if (LI.isSpillable()) {
     Register Reg = LI.reg();
-    Register Original = VRM->getOriginal(Reg);
+    Register Original = VRM.getOriginal(Reg);
     const LiveInterval &OrigInt = LIS.getInterval(Original);
     // li comes from a split of OrigInt. If OrigInt was marked
     // as not spillable, make sure the new interval is marked
diff --git a/llvm/lib/CodeGen/LiveRangeEdit.cpp b/llvm/lib/CodeGen/LiveRangeEdit.cpp
index dc62c1377ce2c4..b77c4c414e8a2f 100644
--- a/llvm/lib/CodeGen/LiveRangeEdit.cpp
+++ b/llvm/lib/CodeGen/LiveRangeEdit.cpp
@@ -463,7 +463,7 @@ void
 LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF,
                                         const MachineLoopInfo &Loops,
                                         const MachineBlockFrequencyInfo &MBFI) {
-  VirtRegAuxInfo VRAI(MF, LIS, VRM, Loops, MBFI);
+  VirtRegAuxInfo VRAI(MF, LIS, *VRM, Loops, MBFI);
   for (unsigned I = 0, Size = size(); I < Size; ++I) {
     LiveInterval &LI = LIS.getInterval(get(I));
     if (MRI.recomputeRegClass(LI.reg()))
diff --git a/llvm/lib/CodeGen/RegAllocBasic.cpp b/llvm/lib/CodeGen/RegAllocBasic.cpp
index 83b5a05f92e9be..05c8c4e34c692e 100644
--- a/llvm/lib/CodeGen/RegAllocBasic.cpp
+++ b/llvm/lib/CodeGen/RegAllocBasic.cpp
@@ -312,7 +312,7 @@ bool RABasic::runOnMachineFunction(MachineFunction &mf) {
   RegAllocBase::init(getAnalysis<VirtRegMap>(),
                      getAnalysis<LiveIntervals>(),
                      getAnalysis<LiveRegMatrix>());
-  VirtRegAuxInfo VRAI(*MF, *LIS, VRM, getAnalysis<MachineLoopInfo>(),
+  VirtRegAuxInfo VRAI(*MF, *LIS, *VRM, getAnalysis<MachineLoopInfo>(),
                       getAnalysis<MachineBlockFrequencyInfo>());
   VRAI.calculateSpillWeightsAndHints();
 
diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp
index 03dab75b33e056..cdc1422797fea0 100644
--- a/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -3234,7 +3234,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
 
   initializeCSRCost();
 
-  VRAI = std::make_unique<VirtRegAuxInfo>(*MF, *LIS, VRM, *Loops, *MBFI);
+  VRAI = std::make_unique<VirtRegAuxInfo>(*MF, *LIS, *VRM, *Loops, *MBFI);
 
   VRAI->calculateSpillWeightsAndHints();
 
diff --git a/llvm/lib/CodeGen/RegAllocPBQP.cpp b/llvm/lib/CodeGen/RegAllocPBQP.cpp
index 2c6e01376b0c2c..1d9923c38ce26f 100644
--- a/llvm/lib/CodeGen/RegAllocPBQP.cpp
+++ b/llvm/lib/CodeGen/RegAllocPBQP.cpp
@@ -527,7 +527,7 @@ class PBQPVirtRegAuxInfo final : public VirtRegAuxInfo {
   }
 
 public:
-  PBQPVirtRegAuxInfo(MachineFunction &MF, LiveIntervals &LIS, VirtRegMap *VRM,
+  PBQPVirtRegAuxInfo(MachineFunction &MF, LiveIntervals &LIS, VirtRegMap &VRM,
                      const MachineLoopInfo &Loops,
                      const MachineBlockFrequencyInfo &MBFI)
       : VirtRegAuxInfo(MF, LIS, VRM, Loops, MBFI) {}
@@ -799,7 +799,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
 
   VirtRegMap &VRM = getAnalysis<VirtRegMap>();
 
-  PBQPVirtRegAuxInfo VRAI(MF, LIS, &VRM, getAnalysis<MachineLoopInfo>(), MBFI);
+  PBQPVirtRegAuxInfo VRAI(MF, LIS, VRM, getAnalysis<MachineLoopInfo>(), MBFI);
   VRAI.calculateSpillWeightsAndHints();
 
   std::unique_ptr<Spiller> VRegSpiller(createInlineSpiller(*this, MF, VRM));

From ea058d289cbf54e5b33aac7f7a13d0d58625f1b9 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Fri, 2 Oct 2020 19:02:22 +0100
Subject: [PATCH 004/123] [VPlan] Use operands for printing of
 VPWidenMemoryInstructionRecipe.

Now that operands of the recipe are managed through VPUser, we can
simplify the printing by just using the operands.
---
 llvm/lib/Transforms/Vectorize/VPlan.cpp | 17 +++++++++--------
 llvm/lib/Transforms/Vectorize/VPlan.h   |  2 +-
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index a009393d029cc4..054920645a9af0 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -790,7 +790,7 @@ void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) {
   dumpEdges(Region);
 }
 
-void VPlanPrinter::printAsIngredient(raw_ostream &O, Value *V) {
+void VPlanPrinter::printAsIngredient(raw_ostream &O, const Value *V) {
   std::string IngredientString;
   raw_string_ostream RSO(IngredientString);
   if (auto *Inst = dyn_cast<Instruction>(V)) {
@@ -903,13 +903,14 @@ void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent,
 
 void VPWidenMemoryInstructionRecipe::print(raw_ostream &O, const Twine &Indent,
                                            VPSlotTracker &SlotTracker) const {
-  O << "\"WIDEN " << VPlanIngredient(&Instr);
-  O << ", ";
-  getAddr()->printAsOperand(O, SlotTracker);
-  VPValue *Mask = getMask();
-  if (Mask) {
-    O << ", ";
-    Mask->printAsOperand(O, SlotTracker);
+  O << "\"WIDEN " << Instruction::getOpcodeName(Instr.getOpcode()) << " ";
+
+  bool First = true;
+  for (VPValue *Op : operands()) {
+    if (!First)
+      O << ", ";
+    Op->printAsOperand(O, SlotTracker);
+    First = false;
   }
 }
 
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index aa59904ea78e46..30f984fd39d768 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1778,7 +1778,7 @@ class VPlanPrinter {
 
   void dump();
 
-  static void printAsIngredient(raw_ostream &O, Value *V);
+  static void printAsIngredient(raw_ostream &O, const Value *V);
 };
 
 struct VPlanIngredient {

From 43d347995c33a5f48f0b4d9cf3d541a1f6ba66c6 Mon Sep 17 00:00:00 2001
From: Mircea Trofin <mtrofin@google.com>
Date: Fri, 9 Oct 2020 10:04:29 -0700
Subject: [PATCH 005/123] [NFC][MC] Use MCRegister in LiveRangeMatrix

The change starts from LiveRangeMatrix and also checks the users of the
APIs are typed accordingly.

Differential Revision: https://reviews.llvm.org/D89145
---
 llvm/include/llvm/CodeGen/LiveRegMatrix.h     | 15 +++----
 llvm/lib/CodeGen/LiveRegMatrix.cpp            | 22 +++++-----
 llvm/lib/CodeGen/RegAllocBase.cpp             |  2 +-
 llvm/lib/CodeGen/RegAllocBase.h               |  4 +-
 llvm/lib/CodeGen/RegAllocBasic.cpp            | 15 +++----
 llvm/lib/CodeGen/RegAllocGreedy.cpp           | 41 ++++++++++---------
 llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp     |  6 +--
 llvm/lib/Target/AMDGPU/GCNRegBankReassign.cpp | 17 ++++----
 .../Target/AMDGPU/SIPreAllocateWWMRegs.cpp    |  2 +-
 9 files changed, 64 insertions(+), 60 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/LiveRegMatrix.h b/llvm/include/llvm/CodeGen/LiveRegMatrix.h
index ab4d44f9a61176..a3f8f88e810b99 100644
--- a/llvm/include/llvm/CodeGen/LiveRegMatrix.h
+++ b/llvm/include/llvm/CodeGen/LiveRegMatrix.h
@@ -104,19 +104,19 @@ class LiveRegMatrix : public MachineFunctionPass {
   /// If this function returns IK_Free, it is legal to assign(VirtReg, PhysReg).
   /// When there is more than one kind of interference, the InterferenceKind
   /// with the highest enum value is returned.
-  InterferenceKind checkInterference(LiveInterval &VirtReg, unsigned PhysReg);
+  InterferenceKind checkInterference(LiveInterval &VirtReg, MCRegister PhysReg);
 
   /// Check for interference in the segment [Start, End) that may prevent
   /// assignment to PhysReg. If this function returns true, there is
   /// interference in the segment [Start, End) of some other interval already
   /// assigned to PhysReg. If this function returns false, PhysReg is free at
   /// the segment [Start, End).
-  bool checkInterference(SlotIndex Start, SlotIndex End, unsigned PhysReg);
+  bool checkInterference(SlotIndex Start, SlotIndex End, MCRegister PhysReg);
 
   /// Assign VirtReg to PhysReg.
   /// This will mark VirtReg's live range as occupied in the LiveRegMatrix and
   /// update VirtRegMap. The live range is expected to be available in PhysReg.
-  void assign(LiveInterval &VirtReg, unsigned PhysReg);
+  void assign(LiveInterval &VirtReg, MCRegister PhysReg);
 
   /// Unassign VirtReg from its PhysReg.
   /// Assuming that VirtReg was previously assigned to a PhysReg, this undoes
@@ -124,7 +124,7 @@ class LiveRegMatrix : public MachineFunctionPass {
   void unassign(LiveInterval &VirtReg);
 
   /// Returns true if the given \p PhysReg has any live intervals assigned.
-  bool isPhysRegUsed(unsigned PhysReg) const;
+  bool isPhysRegUsed(MCRegister PhysReg) const;
 
   //===--------------------------------------------------------------------===//
   // Low-level interface.
@@ -136,18 +136,19 @@ class LiveRegMatrix : public MachineFunctionPass {
   /// Check for regmask interference only.
   /// Return true if VirtReg crosses a regmask operand that clobbers PhysReg.
   /// If PhysReg is null, check if VirtReg crosses any regmask operands.
-  bool checkRegMaskInterference(LiveInterval &VirtReg, unsigned PhysReg = 0);
+  bool checkRegMaskInterference(LiveInterval &VirtReg,
+                                MCRegister PhysReg = MCRegister::NoRegister);
 
   /// Check for regunit interference only.
   /// Return true if VirtReg overlaps a fixed assignment of one of PhysRegs's
   /// register units.
-  bool checkRegUnitInterference(LiveInterval &VirtReg, unsigned PhysReg);
+  bool checkRegUnitInterference(LiveInterval &VirtReg, MCRegister PhysReg);
 
   /// Query a line of the assigned virtual register matrix directly.
   /// Use MCRegUnitIterator to enumerate all regunits in the desired PhysReg.
   /// This returns a reference to an internal Query data structure that is only
   /// valid until the next query() call.
-  LiveIntervalUnion::Query &query(const LiveRange &LR, unsigned RegUnit);
+  LiveIntervalUnion::Query &query(const LiveRange &LR, MCRegister RegUnit);
 
   /// Directly access the live interval unions per regunit.
   /// This returns an array indexed by the regunit number.
diff --git a/llvm/lib/CodeGen/LiveRegMatrix.cpp b/llvm/lib/CodeGen/LiveRegMatrix.cpp
index 6b1775f28c045e..59c7f93fd915c2 100644
--- a/llvm/lib/CodeGen/LiveRegMatrix.cpp
+++ b/llvm/lib/CodeGen/LiveRegMatrix.cpp
@@ -78,7 +78,7 @@ void LiveRegMatrix::releaseMemory() {
 
 template <typename Callable>
 static bool foreachUnit(const TargetRegisterInfo *TRI,
-                        LiveInterval &VRegInterval, unsigned PhysReg,
+                        LiveInterval &VRegInterval, MCRegister PhysReg,
                         Callable Func) {
   if (VRegInterval.hasSubRanges()) {
     for (MCRegUnitMaskIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
@@ -101,7 +101,7 @@ static bool foreachUnit(const TargetRegisterInfo *TRI,
   return false;
 }
 
-void LiveRegMatrix::assign(LiveInterval &VirtReg, unsigned PhysReg) {
+void LiveRegMatrix::assign(LiveInterval &VirtReg, MCRegister PhysReg) {
   LLVM_DEBUG(dbgs() << "assigning " << printReg(VirtReg.reg(), TRI) << " to "
                     << printReg(PhysReg, TRI) << ':');
   assert(!VRM->hasPhys(VirtReg.reg()) && "Duplicate VirtReg assignment");
@@ -135,7 +135,7 @@ void LiveRegMatrix::unassign(LiveInterval &VirtReg) {
   LLVM_DEBUG(dbgs() << '\n');
 }
 
-bool LiveRegMatrix::isPhysRegUsed(unsigned PhysReg) const {
+bool LiveRegMatrix::isPhysRegUsed(MCRegister PhysReg) const {
   for (MCRegUnitIterator Unit(PhysReg, TRI); Unit.isValid(); ++Unit) {
     if (!Matrix[*Unit].empty())
       return true;
@@ -144,7 +144,7 @@ bool LiveRegMatrix::isPhysRegUsed(unsigned PhysReg) const {
 }
 
 bool LiveRegMatrix::checkRegMaskInterference(LiveInterval &VirtReg,
-                                             unsigned PhysReg) {
+                                             MCRegister PhysReg) {
   // Check if the cached information is valid.
   // The same BitVector can be reused for all PhysRegs.
   // We could cache multiple VirtRegs if it becomes necessary.
@@ -162,7 +162,7 @@ bool LiveRegMatrix::checkRegMaskInterference(LiveInterval &VirtReg,
 }
 
 bool LiveRegMatrix::checkRegUnitInterference(LiveInterval &VirtReg,
-                                             unsigned PhysReg) {
+                                             MCRegister PhysReg) {
   if (VirtReg.empty())
     return false;
   CoalescerPair CP(VirtReg.reg(), PhysReg, *TRI);
@@ -176,14 +176,14 @@ bool LiveRegMatrix::checkRegUnitInterference(LiveInterval &VirtReg,
 }
 
 LiveIntervalUnion::Query &LiveRegMatrix::query(const LiveRange &LR,
-                                               unsigned RegUnit) {
+                                               MCRegister RegUnit) {
   LiveIntervalUnion::Query &Q = Queries[RegUnit];
   Q.init(UserTag, LR, Matrix[RegUnit]);
   return Q;
 }
 
 LiveRegMatrix::InterferenceKind
-LiveRegMatrix::checkInterference(LiveInterval &VirtReg, unsigned PhysReg) {
+LiveRegMatrix::checkInterference(LiveInterval &VirtReg, MCRegister PhysReg) {
   if (VirtReg.empty())
     return IK_Free;
 
@@ -197,9 +197,9 @@ LiveRegMatrix::checkInterference(LiveInterval &VirtReg, unsigned PhysReg) {
 
   // Check the matrix for virtual register interference.
   bool Interference = foreachUnit(TRI, VirtReg, PhysReg,
-                                  [&](unsigned Unit, const LiveRange &LR) {
-    return query(LR, Unit).checkInterference();
-  });
+                                  [&](MCRegister Unit, const LiveRange &LR) {
+                                    return query(LR, Unit).checkInterference();
+                                  });
   if (Interference)
     return IK_VirtReg;
 
@@ -207,7 +207,7 @@ LiveRegMatrix::checkInterference(LiveInterval &VirtReg, unsigned PhysReg) {
 }
 
 bool LiveRegMatrix::checkInterference(SlotIndex Start, SlotIndex End,
-                                      unsigned PhysReg) {
+                                      MCRegister PhysReg) {
   // Construct artificial live range containing only one segment [Start, End).
   VNInfo valno(0, Start);
   LiveRange::Segment Seg(Start, End, &valno);
diff --git a/llvm/lib/CodeGen/RegAllocBase.cpp b/llvm/lib/CodeGen/RegAllocBase.cpp
index f7fe1063afeaea..d49a64b3f141bb 100644
--- a/llvm/lib/CodeGen/RegAllocBase.cpp
+++ b/llvm/lib/CodeGen/RegAllocBase.cpp
@@ -110,7 +110,7 @@ void RegAllocBase::allocatePhysRegs() {
     using VirtRegVec = SmallVector<Register, 4>;
 
     VirtRegVec SplitVRegs;
-    unsigned AvailablePhysReg = selectOrSplit(*VirtReg, SplitVRegs);
+    MCRegister AvailablePhysReg = selectOrSplit(*VirtReg, SplitVRegs);
 
     if (AvailablePhysReg == ~0u) {
       // selectOrSplit failed to find a register!
diff --git a/llvm/lib/CodeGen/RegAllocBase.h b/llvm/lib/CodeGen/RegAllocBase.h
index 8e931eaae99a0e..3144605345e998 100644
--- a/llvm/lib/CodeGen/RegAllocBase.h
+++ b/llvm/lib/CodeGen/RegAllocBase.h
@@ -101,8 +101,8 @@ class RegAllocBase {
   // Each call must guarantee forward progess by returning an available PhysReg
   // or new set of split live virtual registers. It is up to the splitter to
   // converge quickly toward fully spilled live ranges.
-  virtual Register selectOrSplit(LiveInterval &VirtReg,
-                                 SmallVectorImpl<Register> &splitLVRs) = 0;
+  virtual MCRegister selectOrSplit(LiveInterval &VirtReg,
+                                   SmallVectorImpl<Register> &splitLVRs) = 0;
 
   // Use this group name for NamedRegionTimer.
   static const char TimerGroupName[];
diff --git a/llvm/lib/CodeGen/RegAllocBasic.cpp b/llvm/lib/CodeGen/RegAllocBasic.cpp
index 05c8c4e34c692e..58e1e364d285da 100644
--- a/llvm/lib/CodeGen/RegAllocBasic.cpp
+++ b/llvm/lib/CodeGen/RegAllocBasic.cpp
@@ -100,8 +100,8 @@ class RABasic : public MachineFunctionPass,
     return LI;
   }
 
-  Register selectOrSplit(LiveInterval &VirtReg,
-                         SmallVectorImpl<Register> &SplitVRegs) override;
+  MCRegister selectOrSplit(LiveInterval &VirtReg,
+                           SmallVectorImpl<Register> &SplitVRegs) override;
 
   /// Perform register allocation.
   bool runOnMachineFunction(MachineFunction &mf) override;
@@ -253,10 +253,10 @@ bool RABasic::spillInterferences(LiveInterval &VirtReg, Register PhysReg,
 // |vregs| * |machineregs|. And since the number of interference tests is
 // minimal, there is no value in caching them outside the scope of
 // selectOrSplit().
-Register RABasic::selectOrSplit(LiveInterval &VirtReg,
-                                SmallVectorImpl<Register> &SplitVRegs) {
+MCRegister RABasic::selectOrSplit(LiveInterval &VirtReg,
+                                  SmallVectorImpl<Register> &SplitVRegs) {
   // Populate a list of physical register spill candidates.
-  SmallVector<Register, 8> PhysRegSpillCands;
+  SmallVector<MCRegister, 8> PhysRegSpillCands;
 
   // Check for an available register in this class.
   auto Order =
@@ -281,8 +281,9 @@ Register RABasic::selectOrSplit(LiveInterval &VirtReg,
   }
 
   // Try to spill another interfering reg with less spill weight.
-  for (SmallVectorImpl<Register>::iterator PhysRegI = PhysRegSpillCands.begin(),
-       PhysRegE = PhysRegSpillCands.end(); PhysRegI != PhysRegE; ++PhysRegI) {
+  for (auto PhysRegI = PhysRegSpillCands.begin(),
+            PhysRegE = PhysRegSpillCands.end();
+       PhysRegI != PhysRegE; ++PhysRegI) {
     if (!spillInterferences(VirtReg, *PhysRegI, SplitVRegs))
       continue;
 
diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp
index cdc1422797fea0..6a804d96c04bf5 100644
--- a/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -418,7 +418,8 @@ class RAGreedy : public MachineFunctionPass,
   Spiller &spiller() override { return *SpillerInstance; }
   void enqueue(LiveInterval *LI) override;
   LiveInterval *dequeue() override;
-  Register selectOrSplit(LiveInterval&, SmallVectorImpl<Register>&) override;
+  MCRegister selectOrSplit(LiveInterval &,
+                           SmallVectorImpl<Register> &) override;
   void aboutToRemoveInterval(LiveInterval &) override;
 
   /// Perform register allocation.
@@ -432,8 +433,8 @@ class RAGreedy : public MachineFunctionPass,
   static char ID;
 
 private:
-  Register selectOrSplitImpl(LiveInterval &, SmallVectorImpl<Register> &,
-                             SmallVirtRegSet &, unsigned = 0);
+  MCRegister selectOrSplitImpl(LiveInterval &, SmallVectorImpl<Register> &,
+                               SmallVirtRegSet &, unsigned = 0);
 
   bool LRE_CanEraseVirtReg(unsigned) override;
   void LRE_WillShrinkVirtReg(unsigned) override;
@@ -459,8 +460,8 @@ class RAGreedy : public MachineFunctionPass,
   void calcGapWeights(unsigned, SmallVectorImpl<float>&);
   Register canReassign(LiveInterval &VirtReg, Register PrevReg);
   bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool);
-  bool canEvictInterference(LiveInterval&, Register, bool, EvictionCost&,
-                            const SmallVirtRegSet&);
+  bool canEvictInterference(LiveInterval &, MCRegister, bool, EvictionCost &,
+                            const SmallVirtRegSet &);
   bool canEvictInterferenceInRange(LiveInterval &VirtReg, Register oPhysReg,
                                    SlotIndex Start, SlotIndex End,
                                    EvictionCost &MaxCost);
@@ -869,7 +870,7 @@ bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint,
 /// @param MaxCost Only look for cheaper candidates and update with new cost
 ///                when returning true.
 /// @returns True when interference can be evicted cheaper than MaxCost.
-bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, Register PhysReg,
+bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, MCRegister PhysReg,
                                     bool IsHint, EvictionCost &MaxCost,
                                     const SmallVirtRegSet &FixedRegisters) {
   // It is only possible to evict virtual register interference.
@@ -2606,7 +2607,7 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
   SmallLISet RecoloringCandidates;
   // Record the original mapping virtual register to physical register in case
   // the recoloring fails.
-  DenseMap<Register, Register> VirtRegToPhysReg;
+  DenseMap<Register, MCRegister> VirtRegToPhysReg;
   // Mark VirtReg as fixed, i.e., it will not be recolored pass this point in
   // this recoloring "session".
   assert(!FixedRegisters.count(VirtReg.reg()));
@@ -2701,7 +2702,7 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
       Register ItVirtReg = (*It)->reg();
       if (VRM->hasPhys(ItVirtReg))
         Matrix->unassign(**It);
-      Register ItPhysReg = VirtRegToPhysReg[ItVirtReg];
+      MCRegister ItPhysReg = VirtRegToPhysReg[ItVirtReg];
       Matrix->assign(**It, ItPhysReg);
     }
   }
@@ -2725,8 +2726,8 @@ bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue,
   while (!RecoloringQueue.empty()) {
     LiveInterval *LI = dequeue(RecoloringQueue);
     LLVM_DEBUG(dbgs() << "Try to recolor: " << *LI << '\n');
-    Register PhysReg = selectOrSplitImpl(*LI, NewVRegs, FixedRegisters,
-                                         Depth + 1);
+    MCRegister PhysReg =
+        selectOrSplitImpl(*LI, NewVRegs, FixedRegisters, Depth + 1);
     // When splitting happens, the live-range may actually be empty.
     // In that case, this is okay to continue the recoloring even
     // if we did not find an alternative color for it. Indeed,
@@ -2753,12 +2754,12 @@ bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue,
 //                            Main Entry Point
 //===----------------------------------------------------------------------===//
 
-Register RAGreedy::selectOrSplit(LiveInterval &VirtReg,
-                                 SmallVectorImpl<Register> &NewVRegs) {
+MCRegister RAGreedy::selectOrSplit(LiveInterval &VirtReg,
+                                   SmallVectorImpl<Register> &NewVRegs) {
   CutOffInfo = CO_None;
   LLVMContext &Ctx = MF->getFunction().getContext();
   SmallVirtRegSet FixedRegisters;
-  Register Reg = selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters);
+  MCRegister Reg = selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters);
   if (Reg == ~0U && (CutOffInfo != CO_None)) {
     uint8_t CutOffEncountered = CutOffInfo & (CO_Depth | CO_Interf);
     if (CutOffEncountered == CO_Depth)
@@ -2902,7 +2903,7 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) {
   SmallVector<unsigned, 2> RecoloringCandidates;
   HintsInfo Info;
   unsigned Reg = VirtReg.reg();
-  Register PhysReg = VRM->getPhys(Reg);
+  MCRegister PhysReg = VRM->getPhys(Reg);
   // Start the recoloring algorithm from the input live-interval, then
   // it will propagate to the ones that are copy-related with it.
   Visited.insert(Reg);
@@ -3014,10 +3015,10 @@ void RAGreedy::tryHintsRecoloring() {
   }
 }
 
-Register RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
-                                     SmallVectorImpl<Register> &NewVRegs,
-                                     SmallVirtRegSet &FixedRegisters,
-                                     unsigned Depth) {
+MCRegister RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
+                                       SmallVectorImpl<Register> &NewVRegs,
+                                       SmallVirtRegSet &FixedRegisters,
+                                       unsigned Depth) {
   unsigned CostPerUseLimit = ~0u;
   // First try assigning a free register.
   auto Order =
@@ -3030,8 +3031,8 @@ Register RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
     // register.
     if (CSRCost.getFrequency() && isUnusedCalleeSavedReg(PhysReg) &&
         NewVRegs.empty()) {
-      Register CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg,
-                                              CostPerUseLimit, NewVRegs);
+      MCRegister CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg,
+                                                CostPerUseLimit, NewVRegs);
       if (CSRReg || !NewVRegs.empty())
         // Return now if we decide to use a CSR or create new vregs due to
         // pre-splitting.
diff --git a/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp b/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp
index 1df86e7ca6b20b..200b2d36848da8 100644
--- a/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp
@@ -118,11 +118,11 @@ GCNNSAReassign::tryAssignRegisters(SmallVectorImpl<LiveInterval *> &Intervals,
       LRM->unassign(*Intervals[N]);
 
   for (unsigned N = 0; N < NumRegs; ++N)
-    if (LRM->checkInterference(*Intervals[N], StartReg + N))
+    if (LRM->checkInterference(*Intervals[N], MCRegister::from(StartReg + N)))
       return false;
 
   for (unsigned N = 0; N < NumRegs; ++N)
-    LRM->assign(*Intervals[N], StartReg + N);
+    LRM->assign(*Intervals[N], MCRegister::from(StartReg + N));
 
   return true;
 }
@@ -273,7 +273,7 @@ bool GCNNSAReassign::runOnMachineFunction(MachineFunction &MF) {
       AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::vaddr0);
 
     SmallVector<LiveInterval *, 16> Intervals;
-    SmallVector<unsigned, 16> OrigRegs;
+    SmallVector<MCRegister, 16> OrigRegs;
     SlotIndex MinInd, MaxInd;
     for (unsigned I = 0; I < Info->VAddrDwords; ++I) {
       const MachineOperand &Op = MI->getOperand(VAddr0Idx + I);
diff --git a/llvm/lib/Target/AMDGPU/GCNRegBankReassign.cpp b/llvm/lib/Target/AMDGPU/GCNRegBankReassign.cpp
index 92d4a646247933..9a27b23ce419a7 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegBankReassign.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegBankReassign.cpp
@@ -239,7 +239,8 @@ class GCNRegBankReassign : public MachineFunctionPass {
 
   // Search for a register in Bank unused within LI.
   // Returns phys reg or NoRegister.
-  unsigned scavengeReg(LiveInterval &LI, unsigned Bank, unsigned SubReg) const;
+  MCRegister scavengeReg(LiveInterval &LI, unsigned Bank,
+                         unsigned SubReg) const;
 
   // Try to reassign candidate. Returns number or stall cycles saved.
   unsigned tryReassign(Candidate &C);
@@ -648,15 +649,15 @@ unsigned GCNRegBankReassign::computeStallCycles(Register SrcReg, Register Reg,
   return TotalStallCycles;
 }
 
-unsigned GCNRegBankReassign::scavengeReg(LiveInterval &LI, unsigned Bank,
-                                         unsigned SubReg) const {
+MCRegister GCNRegBankReassign::scavengeReg(LiveInterval &LI, unsigned Bank,
+                                           unsigned SubReg) const {
   const TargetRegisterClass *RC = MRI->getRegClass(LI.reg());
   unsigned MaxNumRegs = (Bank < NUM_VGPR_BANKS) ? MaxNumVGPRs
                                                 : MaxNumSGPRs;
   unsigned MaxReg = MaxNumRegs + (Bank < NUM_VGPR_BANKS ? AMDGPU::VGPR0
                                                         : AMDGPU::SGPR0);
 
-  for (Register Reg : RC->getRegisters()) {
+  for (MCRegister Reg : RC->getRegisters()) {
     // Check occupancy limit.
     if (TRI->isSubRegisterEq(Reg, MaxReg))
       break;
@@ -667,7 +668,7 @@ unsigned GCNRegBankReassign::scavengeReg(LiveInterval &LI, unsigned Bank,
     for (unsigned I = 0; CSRegs[I]; ++I)
       if (TRI->isSubRegisterEq(Reg, CSRegs[I]) &&
           !LRM->isPhysRegUsed(CSRegs[I]))
-        return AMDGPU::NoRegister;
+        return MCRegister::from(AMDGPU::NoRegister);
 
     LLVM_DEBUG(dbgs() << "Trying register " << printReg(Reg) << '\n');
 
@@ -675,7 +676,7 @@ unsigned GCNRegBankReassign::scavengeReg(LiveInterval &LI, unsigned Bank,
       return Reg;
   }
 
-  return AMDGPU::NoRegister;
+  return MCRegister::from(AMDGPU::NoRegister);
 }
 
 unsigned GCNRegBankReassign::tryReassign(Candidate &C) {
@@ -720,11 +721,11 @@ unsigned GCNRegBankReassign::tryReassign(Candidate &C) {
   }
   llvm::sort(BankStalls);
 
-  Register OrigReg = VRM->getPhys(C.Reg);
+  MCRegister OrigReg = VRM->getPhys(C.Reg);
   LRM->unassign(LI);
   while (!BankStalls.empty()) {
     BankStall BS = BankStalls.pop_back_val();
-    Register Reg = scavengeReg(LI, BS.Bank, C.SubReg);
+    MCRegister Reg = scavengeReg(LI, BS.Bank, C.SubReg);
     if (Reg == AMDGPU::NoRegister) {
       LLVM_DEBUG(dbgs() << "No free registers in bank " << printBank(BS.Bank)
                    << '\n');
diff --git a/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp b/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp
index b6e5235c310a65..21348f89c19d95 100644
--- a/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp
@@ -104,7 +104,7 @@ bool SIPreAllocateWWMRegs::processDef(MachineOperand &MO) {
 
   LiveInterval &LI = LIS->getInterval(Reg);
 
-  for (unsigned PhysReg : RegClassInfo.getOrder(MRI->getRegClass(Reg))) {
+  for (MCRegister PhysReg : RegClassInfo.getOrder(MRI->getRegClass(Reg))) {
     if (!MRI->isPhysRegUsed(PhysReg) &&
         Matrix->checkInterference(LI, PhysReg) == LiveRegMatrix::IK_Free) {
       Matrix->assign(LI, PhysReg);

From 2f66bfac280f9ae9299dccc357ae10e8a48525ed Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?D=C3=A1vid=20Bolvansk=C3=BD?= <david.bolvansky@gmail.com>
Date: Mon, 12 Oct 2020 17:54:16 +0200
Subject: [PATCH 006/123] [Tests] Regenerate test checks; NFC

---
 .../test/Transforms/InstCombine/cabs-array.ll | 70 +++++++++++--------
 .../Transforms/InstCombine/cabs-discrete.ll   | 58 ++++++++-------
 .../Transforms/InstCombine/fabs-libcall.ll    |  9 +--
 llvm/test/Transforms/InstCombine/objsize.ll   | 20 +++---
 4 files changed, 89 insertions(+), 68 deletions(-)

diff --git a/llvm/test/Transforms/InstCombine/cabs-array.ll b/llvm/test/Transforms/InstCombine/cabs-array.ll
index 1c15dc1c5457fb..198badf5ac72a3 100644
--- a/llvm/test/Transforms/InstCombine/cabs-array.ll
+++ b/llvm/test/Transforms/InstCombine/cabs-array.ll
@@ -1,61 +1,71 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
 define double @std_cabs([2 x double] %z) {
-; CHECK-LABEL: define double @std_cabs(
-; CHECK: tail call double @cabs(
+; CHECK-LABEL: @std_cabs(
+; CHECK-NEXT:    [[CALL:%.*]] = tail call double @cabs([2 x double] [[Z:%.*]])
+; CHECK-NEXT:    ret double [[CALL]]
+;
   %call = tail call double @cabs([2 x double] %z)
   ret double %call
 }
 
 define float @std_cabsf([2 x float] %z) {
-; CHECK-LABEL: define float @std_cabsf(
-; CHECK: tail call float @cabsf(
+; CHECK-LABEL: @std_cabsf(
+; CHECK-NEXT:    [[CALL:%.*]] = tail call float @cabsf([2 x float] [[Z:%.*]])
+; CHECK-NEXT:    ret float [[CALL]]
+;
   %call = tail call float @cabsf([2 x float] %z)
   ret float %call
 }
 
 define fp128 @std_cabsl([2 x fp128] %z) {
-; CHECK-LABEL: define fp128 @std_cabsl(
-; CHECK: tail call fp128 @cabsl(
+; CHECK-LABEL: @std_cabsl(
+; CHECK-NEXT:    [[CALL:%.*]] = tail call fp128 @cabsl([2 x fp128] [[Z:%.*]])
+; CHECK-NEXT:    ret fp128 [[CALL]]
+;
   %call = tail call fp128 @cabsl([2 x fp128] %z)
   ret fp128 %call
 }
 
 define double @fast_cabs([2 x double] %z) {
-; CHECK-LABEL: define double @fast_cabs(
-; CHECK: %real = extractvalue [2 x double] %z, 0
-; CHECK: %imag = extractvalue [2 x double] %z, 1
-; CHECK: %1 = fmul fast double %real, %real
-; CHECK: %2 = fmul fast double %imag, %imag
-; CHECK: %3 = fadd fast double %1, %2
-; CHECK: %cabs = call fast double @llvm.sqrt.f64(double %3)
-; CHECK: ret double %cabs
+; CHECK-LABEL: @fast_cabs(
+; CHECK-NEXT:    [[REAL:%.*]] = extractvalue [2 x double] [[Z:%.*]], 0
+; CHECK-NEXT:    [[IMAG:%.*]] = extractvalue [2 x double] [[Z]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast double [[REAL]], [[REAL]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast double [[IMAG]], [[IMAG]]
+; CHECK-NEXT:    [[TMP3:%.*]] = fadd fast double [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[CABS:%.*]] = call fast double @llvm.sqrt.f64(double [[TMP3]])
+; CHECK-NEXT:    ret double [[CABS]]
+;
   %call = tail call fast double @cabs([2 x double] %z)
   ret double %call
 }
 
 define float @fast_cabsf([2 x float] %z) {
-; CHECK-LABEL: define float @fast_cabsf(
-; CHECK: %real = extractvalue [2 x float] %z, 0
-; CHECK: %imag = extractvalue [2 x float] %z, 1
-; CHECK: %1 = fmul fast float %real, %real
-; CHECK: %2 = fmul fast float %imag, %imag
-; CHECK: %3 = fadd fast float %1, %2
-; CHECK: %cabs = call fast float @llvm.sqrt.f32(float %3)
-; CHECK: ret float %cabs
+; CHECK-LABEL: @fast_cabsf(
+; CHECK-NEXT:    [[REAL:%.*]] = extractvalue [2 x float] [[Z:%.*]], 0
+; CHECK-NEXT:    [[IMAG:%.*]] = extractvalue [2 x float] [[Z]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[REAL]], [[REAL]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast float [[IMAG]], [[IMAG]]
+; CHECK-NEXT:    [[TMP3:%.*]] = fadd fast float [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[CABS:%.*]] = call fast float @llvm.sqrt.f32(float [[TMP3]])
+; CHECK-NEXT:    ret float [[CABS]]
+;
   %call = tail call fast float @cabsf([2 x float] %z)
   ret float %call
 }
 
 define fp128 @fast_cabsl([2 x fp128] %z) {
-; CHECK-LABEL: define fp128 @fast_cabsl(
-; CHECK: %real = extractvalue [2 x fp128] %z, 0
-; CHECK: %imag = extractvalue [2 x fp128] %z, 1
-; CHECK: %1 = fmul fast fp128 %real, %real
-; CHECK: %2 = fmul fast fp128 %imag, %imag
-; CHECK: %3 = fadd fast fp128 %1, %2
-; CHECK: %cabs = call fast fp128 @llvm.sqrt.f128(fp128 %3)
-; CHECK: ret fp128 %cabs
+; CHECK-LABEL: @fast_cabsl(
+; CHECK-NEXT:    [[REAL:%.*]] = extractvalue [2 x fp128] [[Z:%.*]], 0
+; CHECK-NEXT:    [[IMAG:%.*]] = extractvalue [2 x fp128] [[Z]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast fp128 [[REAL]], [[REAL]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast fp128 [[IMAG]], [[IMAG]]
+; CHECK-NEXT:    [[TMP3:%.*]] = fadd fast fp128 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[CABS:%.*]] = call fast fp128 @llvm.sqrt.f128(fp128 [[TMP3]])
+; CHECK-NEXT:    ret fp128 [[CABS]]
+;
   %call = tail call fast fp128 @cabsl([2 x fp128] %z)
   ret fp128 %call
 }
diff --git a/llvm/test/Transforms/InstCombine/cabs-discrete.ll b/llvm/test/Transforms/InstCombine/cabs-discrete.ll
index 405c073c194f00..70e456430b40ee 100644
--- a/llvm/test/Transforms/InstCombine/cabs-discrete.ll
+++ b/llvm/test/Transforms/InstCombine/cabs-discrete.ll
@@ -1,55 +1,65 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
 define double @std_cabs(double %real, double %imag) {
-; CHECK-LABEL: define double @std_cabs(
-; CHECK: tail call double @cabs(
+; CHECK-LABEL: @std_cabs(
+; CHECK-NEXT:    [[CALL:%.*]] = tail call double @cabs(double [[REAL:%.*]], double [[IMAG:%.*]])
+; CHECK-NEXT:    ret double [[CALL]]
+;
   %call = tail call double @cabs(double %real, double %imag)
   ret double %call
 }
 
 define float @std_cabsf(float %real, float %imag) {
-; CHECK-LABEL: define float @std_cabsf(
-; CHECK: tail call float @cabsf(
+; CHECK-LABEL: @std_cabsf(
+; CHECK-NEXT:    [[CALL:%.*]] = tail call float @cabsf(float [[REAL:%.*]], float [[IMAG:%.*]])
+; CHECK-NEXT:    ret float [[CALL]]
+;
   %call = tail call float @cabsf(float %real, float %imag)
   ret float %call
 }
 
 define fp128 @std_cabsl(fp128 %real, fp128 %imag) {
-; CHECK-LABEL: define fp128 @std_cabsl(
-; CHECK: tail call fp128 @cabsl(
+; CHECK-LABEL: @std_cabsl(
+; CHECK-NEXT:    [[CALL:%.*]] = tail call fp128 @cabsl(fp128 [[REAL:%.*]], fp128 [[IMAG:%.*]])
+; CHECK-NEXT:    ret fp128 [[CALL]]
+;
   %call = tail call fp128 @cabsl(fp128 %real, fp128 %imag)
   ret fp128 %call
 }
 
 define double @fast_cabs(double %real, double %imag) {
-; CHECK-LABEL: define double @fast_cabs(
-; CHECK: %1 = fmul fast double %real, %real
-; CHECK: %2 = fmul fast double %imag, %imag
-; CHECK: %3 = fadd fast double %1, %2
-; CHECK: %cabs = call fast double @llvm.sqrt.f64(double %3)
-; CHECK: ret double %cabs
+; CHECK-LABEL: @fast_cabs(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast double [[REAL:%.*]], [[REAL]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast double [[IMAG:%.*]], [[IMAG]]
+; CHECK-NEXT:    [[TMP3:%.*]] = fadd fast double [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[CABS:%.*]] = call fast double @llvm.sqrt.f64(double [[TMP3]])
+; CHECK-NEXT:    ret double [[CABS]]
+;
   %call = tail call fast double @cabs(double %real, double %imag)
   ret double %call
 }
 
 define float @fast_cabsf(float %real, float %imag) {
-; CHECK-LABEL: define float @fast_cabsf(
-; CHECK: %1 = fmul fast float %real, %real
-; CHECK: %2 = fmul fast float %imag, %imag
-; CHECK: %3 = fadd fast float %1, %2
-; CHECK: %cabs = call fast float @llvm.sqrt.f32(float %3)
-; CHECK: ret float %cabs
+; CHECK-LABEL: @fast_cabsf(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[REAL:%.*]], [[REAL]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast float [[IMAG:%.*]], [[IMAG]]
+; CHECK-NEXT:    [[TMP3:%.*]] = fadd fast float [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[CABS:%.*]] = call fast float @llvm.sqrt.f32(float [[TMP3]])
+; CHECK-NEXT:    ret float [[CABS]]
+;
   %call = tail call fast float @cabsf(float %real, float %imag)
   ret float %call
 }
 
 define fp128 @fast_cabsl(fp128 %real, fp128 %imag) {
-; CHECK-LABEL: define fp128 @fast_cabsl(
-; CHECK: %1 = fmul fast fp128 %real, %real
-; CHECK: %2 = fmul fast fp128 %imag, %imag
-; CHECK: %3 = fadd fast fp128 %1, %2
-; CHECK: %cabs = call fast fp128 @llvm.sqrt.f128(fp128 %3)
-; CHECK: ret fp128 %cabs
+; CHECK-LABEL: @fast_cabsl(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast fp128 [[REAL:%.*]], [[REAL]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast fp128 [[IMAG:%.*]], [[IMAG]]
+; CHECK-NEXT:    [[TMP3:%.*]] = fadd fast fp128 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[CABS:%.*]] = call fast fp128 @llvm.sqrt.f128(fp128 [[TMP3]])
+; CHECK-NEXT:    ret fp128 [[CABS]]
+;
   %call = tail call fast fp128 @cabsl(fp128 %real, fp128 %imag)
   ret fp128 %call
 }
diff --git a/llvm/test/Transforms/InstCombine/fabs-libcall.ll b/llvm/test/Transforms/InstCombine/fabs-libcall.ll
index 90902bb2fd044e..609529ed3a1b20 100644
--- a/llvm/test/Transforms/InstCombine/fabs-libcall.ll
+++ b/llvm/test/Transforms/InstCombine/fabs-libcall.ll
@@ -1,11 +1,12 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -S -mtriple=i686-apple-macosx -instcombine %s | FileCheck %s
 
 declare x86_fp80 @fabsl(x86_fp80)
 
 define x86_fp80 @replace_fabs_call_f80(x86_fp80 %x) {
 ; CHECK-LABEL: @replace_fabs_call_f80(
-; CHECK-NEXT:    [[TMP1:%.*]] = call x86_fp80 @llvm.fabs.f80(x86_fp80 %x)
-; CHECK-NEXT:    ret x86_fp80 [[TMP1]]
+; CHECK-NEXT:    [[FABSL:%.*]] = call x86_fp80 @llvm.fabs.f80(x86_fp80 [[X:%.*]])
+; CHECK-NEXT:    ret x86_fp80 [[FABSL]]
 ;
   %fabsl = tail call x86_fp80 @fabsl(x86_fp80 %x)
   ret x86_fp80 %fabsl
@@ -13,8 +14,8 @@ define x86_fp80 @replace_fabs_call_f80(x86_fp80 %x) {
 
 define x86_fp80 @fmf_replace_fabs_call_f80(x86_fp80 %x) {
 ; CHECK-LABEL: @fmf_replace_fabs_call_f80(
-; CHECK-NEXT:    [[TMP1:%.*]] = call nnan x86_fp80 @llvm.fabs.f80(x86_fp80 %x)
-; CHECK-NEXT:    ret x86_fp80 [[TMP1]]
+; CHECK-NEXT:    [[FABSL:%.*]] = call nnan x86_fp80 @llvm.fabs.f80(x86_fp80 [[X:%.*]])
+; CHECK-NEXT:    ret x86_fp80 [[FABSL]]
 ;
   %fabsl = tail call nnan x86_fp80 @fabsl(x86_fp80 %x)
   ret x86_fp80 %fabsl
diff --git a/llvm/test/Transforms/InstCombine/objsize.ll b/llvm/test/Transforms/InstCombine/objsize.ll
index 15f6b44a3a0ea9..ad37fa2a0860a2 100644
--- a/llvm/test/Transforms/InstCombine/objsize.ll
+++ b/llvm/test/Transforms/InstCombine/objsize.ll
@@ -112,7 +112,7 @@ define void @test3() nounwind {
 ; CHECK:       bb11:
 ; CHECK-NEXT:    unreachable
 ; CHECK:       bb12:
-; CHECK-NEXT:    [[TMP0:%.*]] = call i8* @__inline_memcpy_chk(i8* bitcast (float* getelementptr inbounds ([480 x float], [480 x float]* @array, i32 0, i32 1) to i8*), i8* undef, i32 512) #3
+; CHECK-NEXT:    [[TMP0:%.*]] = call i8* @__inline_memcpy_chk(i8* bitcast (float* getelementptr inbounds ([480 x float], [480 x float]* @array, i32 0, i32 1) to i8*), i8* undef, i32 512) [[ATTR3:#.*]]
 ; CHECK-NEXT:    unreachable
 ;
 entry:
@@ -141,7 +141,7 @@ define i32 @test4(i8** %esc) nounwind ssp {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = alloca [[STRUCT_DATA:%.*]], align 8
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast %struct.data* [[TMP0]] to i8*
-; CHECK-NEXT:    call void @llvm.memset.p0i8.i32(i8* nonnull align 8 dereferenceable(1824) [[TMP1]], i8 0, i32 1824, i1 false) #0
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i32(i8* nonnull align 8 dereferenceable(1824) [[TMP1]], i8 0, i32 1824, i1 false) [[ATTR0:#.*]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8** [[ESC:%.*]] to %struct.data**
 ; CHECK-NEXT:    store %struct.data* [[TMP0]], %struct.data** [[TMP2]], align 4
 ; CHECK-NEXT:    ret i32 0
@@ -161,9 +161,9 @@ entry:
 define i8* @test5(i32 %n) nounwind ssp {
 ; CHECK-LABEL: @test5(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = tail call noalias dereferenceable_or_null(20) i8* @malloc(i32 20) #0
+; CHECK-NEXT:    [[TMP0:%.*]] = tail call noalias dereferenceable_or_null(20) i8* @malloc(i32 20) [[ATTR0]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i8*, i8** @s, align 8
-; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* nonnull align 1 dereferenceable(10) [[TMP0]], i8* nonnull align 1 dereferenceable(10) [[TMP1]], i32 10, i1 false) #0
+; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* nonnull align 1 dereferenceable(10) [[TMP0]], i8* nonnull align 1 dereferenceable(10) [[TMP1]], i32 10, i1 false) [[ATTR0]]
 ; CHECK-NEXT:    ret i8* [[TMP0]]
 ;
 entry:
@@ -177,9 +177,9 @@ entry:
 define void @test6(i32 %n) nounwind ssp {
 ; CHECK-LABEL: @test6(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = tail call noalias dereferenceable_or_null(20) i8* @malloc(i32 20) #0
+; CHECK-NEXT:    [[TMP0:%.*]] = tail call noalias dereferenceable_or_null(20) i8* @malloc(i32 20) [[ATTR0]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i8*, i8** @s, align 8
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call i8* @__memcpy_chk(i8* [[TMP0]], i8* [[TMP1]], i32 30, i32 20) #0
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call i8* @__memcpy_chk(i8* [[TMP0]], i8* [[TMP1]], i32 30, i32 20) [[ATTR0]]
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -196,7 +196,7 @@ declare noalias i8* @malloc(i32) nounwind
 
 define i32 @test7(i8** %esc) {
 ; CHECK-LABEL: @test7(
-; CHECK-NEXT:    [[ALLOC:%.*]] = call noalias dereferenceable_or_null(48) i8* @malloc(i32 48) #0
+; CHECK-NEXT:    [[ALLOC:%.*]] = call noalias dereferenceable_or_null(48) i8* @malloc(i32 48) [[ATTR0]]
 ; CHECK-NEXT:    store i8* [[ALLOC]], i8** [[ESC:%.*]], align 4
 ; CHECK-NEXT:    ret i32 32
 ;
@@ -211,7 +211,7 @@ declare noalias i8* @calloc(i32, i32) nounwind
 
 define i32 @test8(i8** %esc) {
 ; CHECK-LABEL: @test8(
-; CHECK-NEXT:    [[ALLOC:%.*]] = call noalias dereferenceable_or_null(35) i8* @calloc(i32 5, i32 7) #0
+; CHECK-NEXT:    [[ALLOC:%.*]] = call noalias dereferenceable_or_null(35) i8* @calloc(i32 5, i32 7) [[ATTR0]]
 ; CHECK-NEXT:    store i8* [[ALLOC]], i8** [[ESC:%.*]], align 4
 ; CHECK-NEXT:    ret i32 30
 ;
@@ -227,7 +227,7 @@ declare noalias i8* @strndup(i8* nocapture, i32) nounwind
 
 define i32 @test9(i8** %esc) {
 ; CHECK-LABEL: @test9(
-; CHECK-NEXT:    [[CALL:%.*]] = tail call dereferenceable_or_null(8) i8* @strdup(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0)) #0
+; CHECK-NEXT:    [[CALL:%.*]] = tail call dereferenceable_or_null(8) i8* @strdup(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0)) [[ATTR0]]
 ; CHECK-NEXT:    store i8* [[CALL]], i8** [[ESC:%.*]], align 8
 ; CHECK-NEXT:    ret i32 8
 ;
@@ -239,7 +239,7 @@ define i32 @test9(i8** %esc) {
 
 define i32 @test10(i8** %esc) {
 ; CHECK-LABEL: @test10(
-; CHECK-NEXT:    [[CALL:%.*]] = tail call dereferenceable_or_null(4) i8* @strndup(i8* dereferenceable(8) getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0), i32 3) #0
+; CHECK-NEXT:    [[CALL:%.*]] = tail call dereferenceable_or_null(4) i8* @strndup(i8* dereferenceable(8) getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0), i32 3) [[ATTR0]]
 ; CHECK-NEXT:    store i8* [[CALL]], i8** [[ESC:%.*]], align 8
 ; CHECK-NEXT:    ret i32 4
 ;

From 734112343917a011676c2915c5e5d29803a51ba6 Mon Sep 17 00:00:00 2001
From: Konstantin Schwarz <konstantin.schwarz@hightec-rt.com>
Date: Mon, 12 Oct 2020 11:45:33 +0200
Subject: [PATCH 007/123] [GlobalISel][KnownBits] Early return on out of bound
 shift amounts

If the known shift amount is bigger than or equal to the bitwidth of the type of the value to be shifted,
the result is target dependent, so don't try to infer any bits.

This fixes a crash we've seen in one of our internal test suites.

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D89232
---
 .../lib/CodeGen/GlobalISel/GISelKnownBits.cpp |  4 +++
 .../CodeGen/GlobalISel/KnownBitsTest.cpp      | 35 +++++++++++++++++++
 2 files changed, 39 insertions(+)

diff --git a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
index 3ebbac9fd659aa..81a89a6eb0b78f 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
@@ -397,6 +397,10 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
     uint64_t Shift = RHSKnown.getConstant().getZExtValue();
     LLVM_DEBUG(dbgs() << '[' << Depth << "] Shift is " << Shift << '\n');
 
+    // Guard against oversized shift amounts
+    if (Shift >= MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits())
+      break;
+
     computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
                          Depth + 1);
 
diff --git a/llvm/unittests/CodeGen/GlobalISel/KnownBitsTest.cpp b/llvm/unittests/CodeGen/GlobalISel/KnownBitsTest.cpp
index faf6f7087ac0cd..5f1d24b1078b65 100644
--- a/llvm/unittests/CodeGen/GlobalISel/KnownBitsTest.cpp
+++ b/llvm/unittests/CodeGen/GlobalISel/KnownBitsTest.cpp
@@ -725,3 +725,38 @@ TEST_F(AArch64GISelMITest, TestKnownBitsUMax) {
   EXPECT_EQ(0xffu, KnownUmax.Zero.getZExtValue());
   EXPECT_EQ(0xffffffffffffff00, KnownUmax.One.getZExtValue());
 }
+
+TEST_F(AArch64GISelMITest, TestInvalidQueries) {
+  StringRef MIRString = R"(
+   %src:_(s32) = COPY $w0
+   %thirty2:_(s32) = G_CONSTANT i32 32
+   %equalSized:_(s32) = G_SHL %src, %thirty2
+   %copy1:_(s32) = COPY %equalSized
+   %thirty3:_(s32) = G_CONSTANT i32 33
+   %biggerSized:_(s32) = G_SHL %src, %thirty3
+   %copy2:_(s32) = COPY %biggerSized
+)";
+  setUp(MIRString);
+  if (!TM)
+    return;
+
+  Register EqSizedCopyReg = Copies[Copies.size() - 2];
+  MachineInstr *EqSizedCopy = MRI->getVRegDef(EqSizedCopyReg);
+  Register EqSizedShl = EqSizedCopy->getOperand(1).getReg();
+
+  Register BiggerSizedCopyReg = Copies[Copies.size() - 1];
+  MachineInstr *BiggerSizedCopy = MRI->getVRegDef(BiggerSizedCopyReg);
+  Register BiggerSizedShl = BiggerSizedCopy->getOperand(1).getReg();
+
+  GISelKnownBits Info(*MF);
+  KnownBits EqSizeRes = Info.getKnownBits(EqSizedShl);
+  KnownBits BiggerSizeRes = Info.getKnownBits(BiggerSizedShl);
+
+
+  // We don't know what the result of the shift is, but we should not crash
+  EXPECT_TRUE(EqSizeRes.One.isNullValue());
+  EXPECT_TRUE(EqSizeRes.Zero.isNullValue());
+
+  EXPECT_TRUE(BiggerSizeRes.One.isNullValue());
+  EXPECT_TRUE(BiggerSizeRes.Zero.isNullValue());
+}

From 17cec6a11a12f815052d56a17ef738cf246a2d9a Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans@chromium.org>
Date: Mon, 12 Oct 2020 18:32:25 +0200
Subject: [PATCH 008/123] Revert 1c021c64c "[SCEV] Model ptrtoint(SCEVUnknown)
 cast not as unknown, but as zext/trunc/self of SCEVUnknown"

> While we indeed can't treat them as no-ops, i believe we can/should
> do better than just modelling them as `unknown`. `inttoptr` story
> is complicated, but for `ptrtoint`, it seems straight-forward
> to model it just as a zext-or-trunc of unknown.
>
> This may be important now that we track towards
> making inttoptr/ptrtoint casts not no-op,
> and towards preventing folding them into loads/etc
> (see D88979/D88789/D88788)
>
> Reviewed By: mkazantsev
>
> Differential Revision: https://reviews.llvm.org/D88806

It caused the following assert during Chromium builds:

  llvm/lib/IR/Constants.cpp:1868:
  static llvm::Constant *llvm::ConstantExpr::getTrunc(llvm::Constant *, llvm::Type *, bool):
  Assertion `C->getType()->isIntOrIntVectorTy() && "Trunc operand must be integer"' failed.

See code review for a link to a reproducer.

This reverts commit 1c021c64caef83cccb719c9bf0a2554faa6563af.
---
 llvm/lib/Analysis/ScalarEvolution.cpp         | 43 +++----------
 llvm/lib/Transforms/Utils/SimplifyIndVar.cpp  |  2 +-
 .../add-expr-pointer-operand-sorting.ll       |  4 +-
 .../ScalarEvolution/no-wrap-add-exprs.ll      |  4 +-
 .../test/Analysis/ScalarEvolution/ptrtoint.ll | 60 +++++++++----------
 llvm/test/CodeGen/ARM/lsr-undef-in-binop.ll   |  4 +-
 llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll |  4 +-
 .../IndVarSimplify/2011-11-01-lftrptr.ll      | 16 +++--
 .../CodeGen/scev_looking_through_bitcasts.ll  |  3 +-
 9 files changed, 53 insertions(+), 87 deletions(-)

diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 036d80649110aa..1d3e26b93cb6aa 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -3505,15 +3505,15 @@ const SCEV *ScalarEvolution::getUMinExpr(SmallVectorImpl<const SCEV *> &Ops) {
 }
 
 const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) {
+  // We can bypass creating a target-independent
+  // constant expression and then folding it back into a ConstantInt.
+  // This is just a compile-time optimization.
   if (isa<ScalableVectorType>(AllocTy)) {
     Constant *NullPtr = Constant::getNullValue(AllocTy->getPointerTo());
     Constant *One = ConstantInt::get(IntTy, 1);
     Constant *GEP = ConstantExpr::getGetElementPtr(AllocTy, NullPtr, One);
-    return getUnknown(ConstantExpr::getPtrToInt(GEP, IntTy));
+    return getSCEV(ConstantExpr::getPtrToInt(GEP, IntTy));
   }
-  // We can bypass creating a target-independent
-  // constant expression and then folding it back into a ConstantInt.
-  // This is just a compile-time optimization.
   return getConstant(IntTy, getDataLayout().getTypeAllocSize(AllocTy));
 }
 
@@ -6301,36 +6301,6 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
       return getSCEV(U->getOperand(0));
     break;
 
-  case Instruction::PtrToInt: {
-    // It's tempting to handle inttoptr and ptrtoint as no-ops,
-    // however this can lead to pointer expressions which cannot safely be
-    // expanded to GEPs because ScalarEvolution doesn't respect
-    // the GEP aliasing rules when simplifying integer expressions.
-    //
-    // However, given
-    //   %x = ???
-    //   %y = ptrtoint %x
-    //   %z = ptrtoint %x
-    // it is safe to say that %y and %z are the same thing.
-    //
-    // So instead of modelling the cast itself as unknown,
-    // since the casts are transparent within SCEV,
-    // we can at least model the casts original value as unknow instead.
-
-    // BUT, there's caveat. If we simply model %x as unknown, unrelated uses
-    // of %x will also see it as unknown, which is obviously bad.
-    // So we can only do this iff %x would be modelled as unknown anyways.
-    auto *OpSCEV = getSCEV(U->getOperand(0));
-    if (isa<SCEVUnknown>(OpSCEV))
-      return getTruncateOrZeroExtend(OpSCEV, U->getType());
-    // If we can model the operand, however, we must fallback to modelling
-    // the whole cast as unknown instead.
-    LLVM_FALLTHROUGH;
-  }
-  case Instruction::IntToPtr:
-    // We can't do this for inttoptr at all, however.
-    return getUnknown(V);
-
   case Instruction::SDiv:
     // If both operands are non-negative, this is just an udiv.
     if (isKnownNonNegative(getSCEV(U->getOperand(0))) &&
@@ -6345,6 +6315,11 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
       return getURemExpr(getSCEV(U->getOperand(0)), getSCEV(U->getOperand(1)));
     break;
 
+  // It's tempting to handle inttoptr and ptrtoint as no-ops, however this can
+  // lead to pointer expressions which cannot safely be expanded to GEPs,
+  // because ScalarEvolution doesn't respect the GEP aliasing rules when
+  // simplifying integer expressions.
+
   case Instruction::GetElementPtr:
     return createNodeForGEP(cast<GEPOperator>(U));
 
diff --git a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
index 3e280a66175c88..2d71b0fff88940 100644
--- a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -427,7 +427,7 @@ static bool willNotOverflow(ScalarEvolution *SE, Instruction::BinaryOps BinOp,
              : &ScalarEvolution::getZeroExtendExpr;
 
   // Check ext(LHS op RHS) == ext(LHS) op ext(RHS)
-  auto *NarrowTy = cast<IntegerType>(SE->getEffectiveSCEVType(LHS->getType()));
+  auto *NarrowTy = cast<IntegerType>(LHS->getType());
   auto *WideTy =
     IntegerType::get(NarrowTy->getContext(), NarrowTy->getBitWidth() * 2);
 
diff --git a/llvm/test/Analysis/ScalarEvolution/add-expr-pointer-operand-sorting.ll b/llvm/test/Analysis/ScalarEvolution/add-expr-pointer-operand-sorting.ll
index e798e2715ba1da..93a3bf4d4c3786 100644
--- a/llvm/test/Analysis/ScalarEvolution/add-expr-pointer-operand-sorting.ll
+++ b/llvm/test/Analysis/ScalarEvolution/add-expr-pointer-operand-sorting.ll
@@ -33,9 +33,9 @@ define i32 @d(i32 %base) {
 ; CHECK-NEXT:    %1 = load i32*, i32** @c, align 8
 ; CHECK-NEXT:    --> %1 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.cond: Variant }
 ; CHECK-NEXT:    %sub.ptr.lhs.cast = ptrtoint i32* %1 to i64
-; CHECK-NEXT:    --> %1 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.cond: Variant }
+; CHECK-NEXT:    --> %sub.ptr.lhs.cast U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.cond: Variant }
 ; CHECK-NEXT:    %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, ptrtoint ([1 x i32]* @b to i64)
-; CHECK-NEXT:    --> ((-1 * @b) + %1) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.cond: Variant }
+; CHECK-NEXT:    --> ((-1 * ptrtoint ([1 x i32]* @b to i64)) + %sub.ptr.lhs.cast) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.cond: Variant }
 ; CHECK-NEXT:    %sub.ptr.div = sdiv exact i64 %sub.ptr.sub, 4
 ; CHECK-NEXT:    --> %sub.ptr.div U: full-set S: [-2305843009213693952,2305843009213693952) Exits: <<Unknown>> LoopDispositions: { %for.cond: Variant }
 ; CHECK-NEXT:    %arrayidx1 = getelementptr inbounds [1 x i8], [1 x i8]* %arrayidx, i64 0, i64 %sub.ptr.div
diff --git a/llvm/test/Analysis/ScalarEvolution/no-wrap-add-exprs.ll b/llvm/test/Analysis/ScalarEvolution/no-wrap-add-exprs.ll
index eb669cab0c790f..5a7bb3c9e5cd54 100644
--- a/llvm/test/Analysis/ScalarEvolution/no-wrap-add-exprs.ll
+++ b/llvm/test/Analysis/ScalarEvolution/no-wrap-add-exprs.ll
@@ -170,14 +170,14 @@ define void @f3(i8* %x_addr, i8* %y_addr, i32* %tmp_addr) {
   %int5 = add i32 %int0, 5
   %int.zext = zext i32 %int5 to i64
 ; CHECK:  %int.zext = zext i32 %int5 to i64
-; CHECK-NEXT:  -->  (1 + (zext i32 (4 + (trunc [16 x i8]* @z_addr to i32)) to i64))<nuw><nsw> U: [1,4294967294) S: [1,4294967297)
+; CHECK-NEXT:  -->  (1 + (zext i32 (4 + %int0) to i64))<nuw><nsw> U: [1,4294967294) S: [1,4294967297)
 
   %ptr_noalign = bitcast [16 x i8]* @z_addr_noalign to i8*
   %int0_na = ptrtoint i8* %ptr_noalign to i32
   %int5_na = add i32 %int0_na, 5
   %int.zext_na = zext i32 %int5_na to i64
 ; CHECK:  %int.zext_na = zext i32 %int5_na to i64
-; CHECK-NEXT:  -->  (zext i32 (5 + (trunc [16 x i8]* @z_addr_noalign to i32)) to i64) U: [0,4294967296) S: [0,4294967296)
+; CHECK-NEXT:  -->  (zext i32 (5 + %int0_na) to i64) U: [0,4294967296) S: [0,4294967296)
 
   %tmp = load i32, i32* %tmp_addr
   %mul = and i32 %tmp, -4
diff --git a/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll b/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll
index ac08fb24775e51..e3e9330e241f83 100644
--- a/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll
+++ b/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll
@@ -16,25 +16,25 @@ define void @ptrtoint(i8* %in, i64* %out0, i32* %out1, i16* %out2, i128* %out3)
 ; X64-LABEL: 'ptrtoint'
 ; X64-NEXT:  Classifying expressions for: @ptrtoint
 ; X64-NEXT:    %p0 = ptrtoint i8* %in to i64
-; X64-NEXT:    --> %in U: full-set S: full-set
+; X64-NEXT:    --> %p0 U: full-set S: full-set
 ; X64-NEXT:    %p1 = ptrtoint i8* %in to i32
-; X64-NEXT:    --> (trunc i8* %in to i32) U: full-set S: full-set
+; X64-NEXT:    --> %p1 U: full-set S: full-set
 ; X64-NEXT:    %p2 = ptrtoint i8* %in to i16
-; X64-NEXT:    --> (trunc i8* %in to i16) U: full-set S: full-set
+; X64-NEXT:    --> %p2 U: full-set S: full-set
 ; X64-NEXT:    %p3 = ptrtoint i8* %in to i128
-; X64-NEXT:    --> (zext i8* %in to i128) U: [0,18446744073709551616) S: [0,18446744073709551616)
+; X64-NEXT:    --> %p3 U: [0,18446744073709551616) S: [-18446744073709551616,18446744073709551616)
 ; X64-NEXT:  Determining loop execution counts for: @ptrtoint
 ;
 ; X32-LABEL: 'ptrtoint'
 ; X32-NEXT:  Classifying expressions for: @ptrtoint
 ; X32-NEXT:    %p0 = ptrtoint i8* %in to i64
-; X32-NEXT:    --> (zext i8* %in to i64) U: [0,4294967296) S: [0,4294967296)
+; X32-NEXT:    --> %p0 U: [0,4294967296) S: [-4294967296,4294967296)
 ; X32-NEXT:    %p1 = ptrtoint i8* %in to i32
-; X32-NEXT:    --> %in U: full-set S: full-set
+; X32-NEXT:    --> %p1 U: full-set S: full-set
 ; X32-NEXT:    %p2 = ptrtoint i8* %in to i16
-; X32-NEXT:    --> (trunc i8* %in to i16) U: full-set S: full-set
+; X32-NEXT:    --> %p2 U: full-set S: full-set
 ; X32-NEXT:    %p3 = ptrtoint i8* %in to i128
-; X32-NEXT:    --> (zext i8* %in to i128) U: [0,4294967296) S: [0,4294967296)
+; X32-NEXT:    --> %p3 U: [0,4294967296) S: [-4294967296,4294967296)
 ; X32-NEXT:  Determining loop execution counts for: @ptrtoint
 ;
   %p0 = ptrtoint i8* %in to i64
@@ -53,25 +53,25 @@ define void @ptrtoint_as1(i8 addrspace(1)* %in, i64* %out0, i32* %out1, i16* %ou
 ; X64-LABEL: 'ptrtoint_as1'
 ; X64-NEXT:  Classifying expressions for: @ptrtoint_as1
 ; X64-NEXT:    %p0 = ptrtoint i8 addrspace(1)* %in to i64
-; X64-NEXT:    --> %in U: full-set S: full-set
+; X64-NEXT:    --> %p0 U: full-set S: full-set
 ; X64-NEXT:    %p1 = ptrtoint i8 addrspace(1)* %in to i32
-; X64-NEXT:    --> (trunc i8 addrspace(1)* %in to i32) U: full-set S: full-set
+; X64-NEXT:    --> %p1 U: full-set S: full-set
 ; X64-NEXT:    %p2 = ptrtoint i8 addrspace(1)* %in to i16
-; X64-NEXT:    --> (trunc i8 addrspace(1)* %in to i16) U: full-set S: full-set
+; X64-NEXT:    --> %p2 U: full-set S: full-set
 ; X64-NEXT:    %p3 = ptrtoint i8 addrspace(1)* %in to i128
-; X64-NEXT:    --> (zext i8 addrspace(1)* %in to i128) U: [0,18446744073709551616) S: [0,18446744073709551616)
+; X64-NEXT:    --> %p3 U: [0,18446744073709551616) S: [-18446744073709551616,18446744073709551616)
 ; X64-NEXT:  Determining loop execution counts for: @ptrtoint_as1
 ;
 ; X32-LABEL: 'ptrtoint_as1'
 ; X32-NEXT:  Classifying expressions for: @ptrtoint_as1
 ; X32-NEXT:    %p0 = ptrtoint i8 addrspace(1)* %in to i64
-; X32-NEXT:    --> (zext i8 addrspace(1)* %in to i64) U: [0,4294967296) S: [0,4294967296)
+; X32-NEXT:    --> %p0 U: [0,4294967296) S: [-4294967296,4294967296)
 ; X32-NEXT:    %p1 = ptrtoint i8 addrspace(1)* %in to i32
-; X32-NEXT:    --> %in U: full-set S: full-set
+; X32-NEXT:    --> %p1 U: full-set S: full-set
 ; X32-NEXT:    %p2 = ptrtoint i8 addrspace(1)* %in to i16
-; X32-NEXT:    --> (trunc i8 addrspace(1)* %in to i16) U: full-set S: full-set
+; X32-NEXT:    --> %p2 U: full-set S: full-set
 ; X32-NEXT:    %p3 = ptrtoint i8 addrspace(1)* %in to i128
-; X32-NEXT:    --> (zext i8 addrspace(1)* %in to i128) U: [0,4294967296) S: [0,4294967296)
+; X32-NEXT:    --> %p3 U: [0,4294967296) S: [-4294967296,4294967296)
 ; X32-NEXT:  Determining loop execution counts for: @ptrtoint_as1
 ;
   %p0 = ptrtoint i8 addrspace(1)* %in to i64
@@ -92,7 +92,7 @@ define void @ptrtoint_of_bitcast(i8* %in, i64* %out0) {
 ; X64-NEXT:    %in_casted = bitcast i8* %in to float*
 ; X64-NEXT:    --> %in U: full-set S: full-set
 ; X64-NEXT:    %p0 = ptrtoint float* %in_casted to i64
-; X64-NEXT:    --> %in U: full-set S: full-set
+; X64-NEXT:    --> %p0 U: full-set S: full-set
 ; X64-NEXT:  Determining loop execution counts for: @ptrtoint_of_bitcast
 ;
 ; X32-LABEL: 'ptrtoint_of_bitcast'
@@ -100,7 +100,7 @@ define void @ptrtoint_of_bitcast(i8* %in, i64* %out0) {
 ; X32-NEXT:    %in_casted = bitcast i8* %in to float*
 ; X32-NEXT:    --> %in U: full-set S: full-set
 ; X32-NEXT:    %p0 = ptrtoint float* %in_casted to i64
-; X32-NEXT:    --> (zext i8* %in to i64) U: [0,4294967296) S: [0,4294967296)
+; X32-NEXT:    --> %p0 U: [0,4294967296) S: [-4294967296,4294967296)
 ; X32-NEXT:  Determining loop execution counts for: @ptrtoint_of_bitcast
 ;
   %in_casted = bitcast i8* %in to float*
@@ -116,7 +116,7 @@ define void @ptrtoint_of_addrspacecast(i8* %in, i64* %out0) {
 ; X64-NEXT:    %in_casted = addrspacecast i8* %in to i8 addrspace(1)*
 ; X64-NEXT:    --> %in_casted U: full-set S: full-set
 ; X64-NEXT:    %p0 = ptrtoint i8 addrspace(1)* %in_casted to i64
-; X64-NEXT:    --> %in_casted U: full-set S: full-set
+; X64-NEXT:    --> %p0 U: full-set S: full-set
 ; X64-NEXT:  Determining loop execution counts for: @ptrtoint_of_addrspacecast
 ;
 ; X32-LABEL: 'ptrtoint_of_addrspacecast'
@@ -124,7 +124,7 @@ define void @ptrtoint_of_addrspacecast(i8* %in, i64* %out0) {
 ; X32-NEXT:    %in_casted = addrspacecast i8* %in to i8 addrspace(1)*
 ; X32-NEXT:    --> %in_casted U: full-set S: full-set
 ; X32-NEXT:    %p0 = ptrtoint i8 addrspace(1)* %in_casted to i64
-; X32-NEXT:    --> (zext i8 addrspace(1)* %in_casted to i64) U: [0,4294967296) S: [0,4294967296)
+; X32-NEXT:    --> %p0 U: [0,4294967296) S: [-4294967296,4294967296)
 ; X32-NEXT:  Determining loop execution counts for: @ptrtoint_of_addrspacecast
 ;
   %in_casted = addrspacecast i8* %in to i8 addrspace(1)*
@@ -140,7 +140,7 @@ define void @ptrtoint_of_inttoptr(i64 %in, i64* %out0) {
 ; X64-NEXT:    %in_casted = inttoptr i64 %in to i8*
 ; X64-NEXT:    --> %in_casted U: full-set S: full-set
 ; X64-NEXT:    %p0 = ptrtoint i8* %in_casted to i64
-; X64-NEXT:    --> %in_casted U: full-set S: full-set
+; X64-NEXT:    --> %p0 U: full-set S: full-set
 ; X64-NEXT:  Determining loop execution counts for: @ptrtoint_of_inttoptr
 ;
 ; X32-LABEL: 'ptrtoint_of_inttoptr'
@@ -148,7 +148,7 @@ define void @ptrtoint_of_inttoptr(i64 %in, i64* %out0) {
 ; X32-NEXT:    %in_casted = inttoptr i64 %in to i8*
 ; X32-NEXT:    --> %in_casted U: full-set S: full-set
 ; X32-NEXT:    %p0 = ptrtoint i8* %in_casted to i64
-; X32-NEXT:    --> (zext i8* %in_casted to i64) U: [0,4294967296) S: [0,4294967296)
+; X32-NEXT:    --> %p0 U: [0,4294967296) S: [-4294967296,4294967296)
 ; X32-NEXT:  Determining loop execution counts for: @ptrtoint_of_inttoptr
 ;
   %in_casted = inttoptr i64 %in to i8*
@@ -197,17 +197,11 @@ define void @ptrtoint_of_nullptr(i64* %out0) {
 
 ; A constant inttoptr argument of an ptrtoint is still bad.
 define void @ptrtoint_of_constantexpr_inttoptr(i64* %out0) {
-; X64-LABEL: 'ptrtoint_of_constantexpr_inttoptr'
-; X64-NEXT:  Classifying expressions for: @ptrtoint_of_constantexpr_inttoptr
-; X64-NEXT:    %p0 = ptrtoint i8* inttoptr (i64 42 to i8*) to i64
-; X64-NEXT:    --> inttoptr (i64 42 to i8*) U: [42,43) S: [-64,64)
-; X64-NEXT:  Determining loop execution counts for: @ptrtoint_of_constantexpr_inttoptr
-;
-; X32-LABEL: 'ptrtoint_of_constantexpr_inttoptr'
-; X32-NEXT:  Classifying expressions for: @ptrtoint_of_constantexpr_inttoptr
-; X32-NEXT:    %p0 = ptrtoint i8* inttoptr (i64 42 to i8*) to i64
-; X32-NEXT:    --> (zext i8* inttoptr (i64 42 to i8*) to i64) U: [42,43) S: [0,4294967296)
-; X32-NEXT:  Determining loop execution counts for: @ptrtoint_of_constantexpr_inttoptr
+; ALL-LABEL: 'ptrtoint_of_constantexpr_inttoptr'
+; ALL-NEXT:  Classifying expressions for: @ptrtoint_of_constantexpr_inttoptr
+; ALL-NEXT:    %p0 = ptrtoint i8* inttoptr (i64 42 to i8*) to i64
+; ALL-NEXT:    --> %p0 U: [42,43) S: [-64,64)
+; ALL-NEXT:  Determining loop execution counts for: @ptrtoint_of_constantexpr_inttoptr
 ;
   %p0 = ptrtoint i8* inttoptr (i64 42 to i8*) to i64
   store i64 %p0, i64* %out0
diff --git a/llvm/test/CodeGen/ARM/lsr-undef-in-binop.ll b/llvm/test/CodeGen/ARM/lsr-undef-in-binop.ll
index e7339721447580..564328d999982c 100644
--- a/llvm/test/CodeGen/ARM/lsr-undef-in-binop.ll
+++ b/llvm/test/CodeGen/ARM/lsr-undef-in-binop.ll
@@ -186,9 +186,7 @@ define linkonce_odr i32 @vector_insert(%"class.std::__1::vector.182"*, [1 x i32]
   br i1 %114, label %124, label %115
 
 ; CHECK-LABEL: .preheader:
-; CHECK-NEXT: [[NEG_NEW:%[0-9]+]] = sub i32 0, [[NEW_CAST]]
-; CHECK-NEXT: getelementptr i8, i8* %97, i32 [[NEG_NEW]]
-
+; CHECK-NEXT: sub i32 [[OLD_CAST]], [[NEW_CAST]]
 ; <label>:115:                                    ; preds = %111, %115
   %116 = phi i8* [ %118, %115 ], [ %97, %111 ]
   %117 = phi i8* [ %119, %115 ], [ %11, %111 ]
diff --git a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
index d4dd7352aa5261..670477c4c28514 100644
--- a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
+++ b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
@@ -268,9 +268,9 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:  LBB0_48: ## %if.then1477
 ; CHECK-NEXT:    movl $1, %edx
 ; CHECK-NEXT:    callq _write
+; CHECK-NEXT:    subq %rbx, %r14
 ; CHECK-NEXT:    movq _syHistory@{{.*}}(%rip), %rax
-; CHECK-NEXT:    subq %rbx, %rax
-; CHECK-NEXT:    leaq 8189(%rax,%r14), %rax
+; CHECK-NEXT:    leaq 8189(%r14,%rax), %rax
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  LBB0_49: ## %for.body1723
 ; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
diff --git a/llvm/test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll b/llvm/test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll
index bc756c666bde5f..e1ef6bd6635d1f 100644
--- a/llvm/test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll
+++ b/llvm/test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll
@@ -166,23 +166,21 @@ define i8 @testnullptrint(i8* %buf, i8* %end) nounwind {
 ; PTR64-NEXT:    ret i8 [[RET]]
 ;
 ; PTR32-LABEL: @testnullptrint(
-; PTR32-NEXT:    [[BUF1:%.*]] = ptrtoint i8* [[BUF:%.*]] to i32
 ; PTR32-NEXT:    br label [[LOOPGUARD:%.*]]
 ; PTR32:       loopguard:
-; PTR32-NEXT:    [[BI:%.*]] = ptrtoint i8* [[BUF]] to i32
+; PTR32-NEXT:    [[BI:%.*]] = ptrtoint i8* [[BUF:%.*]] to i32
 ; PTR32-NEXT:    [[EI:%.*]] = ptrtoint i8* [[END:%.*]] to i32
 ; PTR32-NEXT:    [[CNT:%.*]] = sub i32 [[EI]], [[BI]]
+; PTR32-NEXT:    [[CNT1:%.*]] = inttoptr i32 [[CNT]] to i8*
 ; PTR32-NEXT:    [[GUARD:%.*]] = icmp ult i32 0, [[CNT]]
 ; PTR32-NEXT:    br i1 [[GUARD]], label [[PREHEADER:%.*]], label [[EXIT:%.*]]
 ; PTR32:       preheader:
-; PTR32-NEXT:    [[TMP1:%.*]] = sub i32 0, [[BUF1]]
-; PTR32-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, i8* [[END]], i32 [[TMP1]]
 ; PTR32-NEXT:    br label [[LOOP:%.*]]
 ; PTR32:       loop:
 ; PTR32-NEXT:    [[P_01_US_US:%.*]] = phi i8* [ null, [[PREHEADER]] ], [ [[GEP:%.*]], [[LOOP]] ]
 ; PTR32-NEXT:    [[GEP]] = getelementptr inbounds i8, i8* [[P_01_US_US]], i64 1
-; PTR32-NEXT:    [[SNEXT:%.*]] = load i8, i8* [[GEP]], align 1
-; PTR32-NEXT:    [[EXITCOND:%.*]] = icmp ne i8* [[GEP]], [[SCEVGEP]]
+; PTR32-NEXT:    [[SNEXT:%.*]] = load i8, i8* [[GEP]]
+; PTR32-NEXT:    [[EXITCOND:%.*]] = icmp ne i8* [[GEP]], [[CNT1]]
 ; PTR32-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]]
 ; PTR32:       exit.loopexit:
 ; PTR32-NEXT:    [[SNEXT_LCSSA:%.*]] = phi i8 [ [[SNEXT]], [[LOOP]] ]
@@ -258,10 +256,10 @@ define i8 @testptrint(i8* %buf, i8* %end) nounwind {
 ; PTR32-NEXT:    [[P_01_US_US:%.*]] = phi i8* [ [[BUF]], [[PREHEADER]] ], [ [[GEP:%.*]], [[LOOP]] ]
 ; PTR32-NEXT:    [[IV:%.*]] = phi i32 [ [[BI]], [[PREHEADER]] ], [ [[IVNEXT:%.*]], [[LOOP]] ]
 ; PTR32-NEXT:    [[GEP]] = getelementptr inbounds i8, i8* [[P_01_US_US]], i64 1
-; PTR32-NEXT:    [[SNEXT:%.*]] = load i8, i8* [[GEP]], align 1
+; PTR32-NEXT:    [[SNEXT:%.*]] = load i8, i8* [[GEP]]
 ; PTR32-NEXT:    [[IVNEXT]] = add nuw i32 [[IV]], 1
-; PTR32-NEXT:    [[CMP:%.*]] = icmp ult i32 [[IVNEXT]], [[CNT]]
-; PTR32-NEXT:    br i1 [[CMP]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]]
+; PTR32-NEXT:    [[EXITCOND:%.*]] = icmp ne i32 [[IVNEXT]], [[CNT]]
+; PTR32-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]]
 ; PTR32:       exit.loopexit:
 ; PTR32-NEXT:    [[SNEXT_LCSSA:%.*]] = phi i8 [ [[SNEXT]], [[LOOP]] ]
 ; PTR32-NEXT:    br label [[EXIT]]
diff --git a/polly/test/Isl/CodeGen/scev_looking_through_bitcasts.ll b/polly/test/Isl/CodeGen/scev_looking_through_bitcasts.ll
index 321e98ab6772ce..1012e23cd3a208 100644
--- a/polly/test/Isl/CodeGen/scev_looking_through_bitcasts.ll
+++ b/polly/test/Isl/CodeGen/scev_looking_through_bitcasts.ll
@@ -32,5 +32,6 @@ bitmap_element_allocate.exit:
 
 
 ; CHECK:       polly.stmt.cond.end73.i:
-; CHECK-NEXT:   store %structty* undef, %structty** %b.s2a
+; CHECK-NEXT:   %0 = bitcast %structty** %b.s2a to i8**
+; CHECK-NEXT:   store i8* undef, i8** %0
 ; CHECK-NEXT:   br label %polly.exiting

From 1ef0e94d5b0206f69e4e822c6828d0b5121c11fb Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Mon, 12 Oct 2020 09:57:12 -0700
Subject: [PATCH 009/123] [compiler-rt] Suppress -Wunused-result due to ::write
 when _FORTIFY_SOURCE>0 in glibc

Noticed by Peter Foley.
In glibc, ::write is declared as __attribute__((__warn_unused_result__)) when __USE_FORTIFY_LEVEL is larger than 0.
---
 compiler-rt/lib/fuzzer/FuzzerIOPosix.cpp                 | 2 +-
 compiler-rt/lib/scudo/standalone/linux.cpp               | 2 +-
 compiler-rt/lib/ubsan_minimal/ubsan_minimal_handlers.cpp | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/compiler-rt/lib/fuzzer/FuzzerIOPosix.cpp b/compiler-rt/lib/fuzzer/FuzzerIOPosix.cpp
index 0da063a18ff7da..4706a40959be84 100644
--- a/compiler-rt/lib/fuzzer/FuzzerIOPosix.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerIOPosix.cpp
@@ -159,7 +159,7 @@ bool IsInterestingCoverageFile(const std::string &FileName) {
 }
 
 void RawPrint(const char *Str) {
-  write(2, Str, strlen(Str));
+  (void)write(2, Str, strlen(Str));
 }
 
 void MkDir(const std::string &Path) {
diff --git a/compiler-rt/lib/scudo/standalone/linux.cpp b/compiler-rt/lib/scudo/standalone/linux.cpp
index 69ffdd9a165ba2..12f3da620e1234 100644
--- a/compiler-rt/lib/scudo/standalone/linux.cpp
+++ b/compiler-rt/lib/scudo/standalone/linux.cpp
@@ -198,7 +198,7 @@ void outputRaw(const char *Buffer) {
     }
     async_safe_write_log(AndroidLogInfo, "scudo", Buffer);
   } else {
-    write(2, Buffer, strlen(Buffer));
+    (void)write(2, Buffer, strlen(Buffer));
   }
 }
 
diff --git a/compiler-rt/lib/ubsan_minimal/ubsan_minimal_handlers.cpp b/compiler-rt/lib/ubsan_minimal/ubsan_minimal_handlers.cpp
index 8654c705cfbb0b..6a1903da62ce74 100644
--- a/compiler-rt/lib/ubsan_minimal/ubsan_minimal_handlers.cpp
+++ b/compiler-rt/lib/ubsan_minimal/ubsan_minimal_handlers.cpp
@@ -10,7 +10,7 @@ extern "C" void ubsan_message(const char *msg);
 static void message(const char *msg) { ubsan_message(msg); }
 #else
 static void message(const char *msg) {
-  write(2, msg, strlen(msg));
+  (void)write(2, msg, strlen(msg));
 }
 #endif
 

From 551caec4a8af79483823e2940d40afb4c1df5da1 Mon Sep 17 00:00:00 2001
From: Mark de Wever <koraq@xs4all.nl>
Date: Sun, 11 Oct 2020 15:07:12 +0200
Subject: [PATCH 010/123] Make likelihood lit test less brittle

Jeremy Morse discovered an issue with the lit test introduced in D88363. The
test gives different results for Sony's `-O1`.

The test needs to run at `-O1` otherwise the likelihood attribute will be
ignored. Instead of running all `-O1` passes it only runs the lower-expect pass
which is needed to lower `__builtin_expect`.

Differential Revision: https://reviews.llvm.org/D89204
---
 .../attr-likelihood-if-vs-builtin-expect.cpp  | 72 ++++++++++---------
 1 file changed, 37 insertions(+), 35 deletions(-)

diff --git a/clang/test/CodeGenCXX/attr-likelihood-if-vs-builtin-expect.cpp b/clang/test/CodeGenCXX/attr-likelihood-if-vs-builtin-expect.cpp
index 5e73cd096742ca..5872c4c5273fdb 100644
--- a/clang/test/CodeGenCXX/attr-likelihood-if-vs-builtin-expect.cpp
+++ b/clang/test/CodeGenCXX/attr-likelihood-if-vs-builtin-expect.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -O1 -emit-llvm %s -o - -triple=x86_64-linux-gnu | FileCheck %s
+// RUN: %clang_cc1 -O1 -disable-llvm-passes -emit-llvm %s -o - -triple=x86_64-linux-gnu | opt --lower-expect -S | FileCheck %s
 
 // Verifies the output of __builtin_expect versus the output of the likelihood
 // attributes. They should generate the same probabilities for the branches.
@@ -9,9 +9,9 @@ extern bool c();
 
 void ab1(int &i) {
   // CHECK-LABEL: define{{.*}}ab1
-  // CHECK: br {{.*}} !prof !2
-  // CHECK: br {{.*}} !prof !2
-  // CHECK: br {{.*}} !prof !2
+  // CHECK: br {{.*}} !prof !6
+  // CHECK: br {{.*}} !prof !6
+  // CHECK: br {{.*}} !prof !6
   if (__builtin_expect(a() && b() && a(), 1)) {
     ++i;
   } else {
@@ -21,9 +21,9 @@ void ab1(int &i) {
 
 void al(int &i) {
   // CHECK-LABEL: define{{.*}}al
-  // CHECK: br {{.*}} !prof !2
-  // CHECK: br {{.*}} !prof !2
-  // CHECK: br {{.*}} !prof !2
+  // CHECK: br {{.*}} !prof !6
+  // CHECK: br {{.*}} !prof !6
+  // CHECK: br {{.*}} !prof !6
   if (a() && b() && c()) [[likely]] {
     ++i;
   } else {
@@ -33,9 +33,10 @@ void al(int &i) {
 
 void ab0(int &i) {
   // CHECK-LABEL: define{{.*}}ab0
-  // CHECK: br {{.*}}else{{$}}
-  // CHECK: br {{.*}}else{{$}}
-  // CHECK: br {{.*}} !prof !8
+  // CHECK: br {{.*}}end{{$}}
+  // CHECK: br {{.*}}end{{$}}
+  // CHECK: br {{.*}}end{{$}}
+  // CHECK: br {{.*}} !prof !10
   if (__builtin_expect(a() && b() && c(), 0)) {
     ++i;
   } else {
@@ -47,7 +48,7 @@ void au(int &i) {
   // CHECK-LABEL: define{{.*}}au
   // CHECK: br {{.*}}else{{$}}
   // CHECK: br {{.*}}else{{$}}
-  // CHECK: br {{.*}} !prof !8
+  // CHECK: br {{.*}} !prof !10
   if (a() && b() && c()) [[unlikely]] {
     ++i;
   } else {
@@ -59,7 +60,8 @@ void ob1(int &i) {
   // CHECK-LABEL: define{{.*}}ob1
   // CHECK: br {{.*}}false{{$}}
   // CHECK: br {{.*}}rhs{{$}}
-  // CHECK: br {{.*}} !prof !2
+  // CHECK: br {{.*}}end{{$}}
+  // CHECK: br {{.*}} !prof !6
   if (__builtin_expect(a() || b() || a(), 1)) {
     i = 0;
   } else {
@@ -71,7 +73,7 @@ void ol(int &i) {
   // CHECK-LABEL: define{{.*}}ol
   // CHECK: br {{.*}}false{{$}}
   // CHECK: br {{.*}}false2{{$}}
-  // CHECK: br {{.*}} !prof !2
+  // CHECK: br {{.*}} !prof !6
   if (a() || b() || c()) [[likely]] {
     i = 0;
   } else {
@@ -81,9 +83,9 @@ void ol(int &i) {
 
 void ob0(int &i) {
   // CHECK-LABEL: define{{.*}}ob0
-  // CHECK: br {{.*}} !prof !8
-  // CHECK: br {{.*}} !prof !8
-  // CHECK: br {{.*}} !prof !8
+  // CHECK: br {{.*}} !prof !10
+  // CHECK: br {{.*}} !prof !10
+  // CHECK: br {{.*}} !prof !10
   if (__builtin_expect(a() || b() || c(), 0)) {
     i = 0;
   } else {
@@ -93,9 +95,9 @@ void ob0(int &i) {
 
 void ou(int &i) {
   // CHECK-LABEL: define{{.*}}ou
-  // CHECK: br {{.*}} !prof !8
-  // CHECK: br {{.*}} !prof !8
-  // CHECK: br {{.*}} !prof !8
+  // CHECK: br {{.*}} !prof !10
+  // CHECK: br {{.*}} !prof !10
+  // CHECK: br {{.*}} !prof !10
   if (a() || b() || c()) [[unlikely]] {
     i = 0;
   } else {
@@ -105,7 +107,7 @@ void ou(int &i) {
 
 void nb1(int &i) {
   // CHECK-LABEL: define{{.*}}nb1
-  // CHECK: storemerge{{.*}} !prof !8
+  // CHECK: br {{.*}} !prof !6
   if (__builtin_expect(!a(), 1)) {
     ++i;
   } else {
@@ -115,8 +117,8 @@ void nb1(int &i) {
 
 void nl(int &i) {
   // CHECK-LABEL: define{{.*}}nl
-  // CHECK: storemerge{{.*}} !prof !8
-  if (!a()) [[likely]] {
+  // CHECK: br {{.*}} !prof !6
+  if (bool d = !a()) [[likely]] {
     ++i;
   } else {
     --i;
@@ -125,7 +127,7 @@ void nl(int &i) {
 
 void nb0(int &i) {
   // CHECK-LABEL: define{{.*}}nb0
-  // CHECK: storemerge{{.*}} !prof !2
+  // CHECK: br {{.*}} !prof !10
   if (__builtin_expect(!a(), 0)) {
     ++i;
   } else {
@@ -135,8 +137,8 @@ void nb0(int &i) {
 
 void nu(int &i) {
   // CHECK-LABEL: define{{.*}}nu
-  // CHECK: storemerge{{.*}} !prof !2
-  if (!a()) [[unlikely]] {
+  // CHECK: br {{.*}} !prof !10
+  if (bool d = !a()) [[unlikely]] {
     ++i;
   } else {
     --i;
@@ -148,7 +150,7 @@ void tb1(int &i) {
   // CHECK: br {{.*}}false{{$}}
   // CHECK: br {{.*}}end{{$}}
   // CHECK: br {{.*}}end{{$}}
-  // CHECK: storemerge{{.*}} !prof !2
+  // CHECK: br {{.*}} !prof !6
   if (__builtin_expect(a() ? b() : c(), 1)) {
     ++i;
   } else {
@@ -161,7 +163,7 @@ void tl(int &i) {
   // CHECK: br {{.*}}false{{$}}
   // CHECK: br {{.*}}end{{$}}
   // CHECK: br {{.*}}end{{$}}
-  // CHECK: storemerge{{.*}} !prof !2
+  // CHECK: br {{.*}} !prof !6
   if (bool d = a() ? b() : c()) [[likely]] {
     ++i;
   } else {
@@ -172,8 +174,8 @@ void tl(int &i) {
 void tl2(int &i) {
   // CHECK-LABEL: define{{.*}}tl
   // CHECK: br {{.*}}false{{$}}
-  // CHECK: br {{.*}} !prof !2
-  // CHECK: br {{.*}} !prof !2
+  // CHECK: br {{.*}} !prof !6
+  // CHECK: br {{.*}} !prof !6
   if (a() ? b() : c()) [[likely]] {
     ++i;
   } else {
@@ -186,7 +188,7 @@ void tb0(int &i) {
   // CHECK: br {{.*}}false{{$}}
   // CHECK: br {{.*}}end{{$}}
   // CHECK: br {{.*}}end{{$}}
-  // CHECK: storemerge{{.*}} !prof !8
+  // CHECK: br {{.*}} !prof !10
   if (__builtin_expect(a() ? b() : c(), 0)) {
     ++i;
   } else {
@@ -199,7 +201,7 @@ void tu(int &i) {
   // CHECK: br {{.*}}false{{$}}
   // CHECK: br {{.*}}end{{$}}
   // CHECK: br {{.*}}end{{$}}
-  // CHECK: storemerge{{.*}} !prof !8
+  // CHECK: br {{.*}} !prof !10
   if (bool d = a() ? b() : c()) [[unlikely]] {
     ++i;
   } else {
@@ -210,8 +212,8 @@ void tu(int &i) {
 void tu2(int &i) {
   // CHECK-LABEL: define{{.*}}tu
   // CHECK: br {{.*}}false{{$}}
-  // CHECK: br {{.*}} !prof !8
-  // CHECK: br {{.*}} !prof !8
+  // CHECK: br {{.*}} !prof !10
+  // CHECK: br {{.*}} !prof !10
   if (a() ? b() : c()) [[unlikely]] {
     ++i;
   } else {
@@ -219,5 +221,5 @@ void tu2(int &i) {
   }
 }
 
-// CHECK: !2 = !{!"branch_weights", i32 2000, i32 1}
-// CHECK: !8 = !{!"branch_weights", i32 1, i32 2000}
+// CHECK: !6 = !{!"branch_weights", i32 2000, i32 1}
+// CHECK: !10 = !{!"branch_weights", i32 1, i32 2000}

From 525b085a65d30a5f2ae2af38c0be252fe8d4781b Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Mon, 12 Oct 2020 17:04:38 +0100
Subject: [PATCH 011/123] [VPlan] Use VPValue def for
 VPMemoryInstructionRecipe.

This patch turns VPMemoryInstructionRecipe into a VPValue and uses it
during VPlan construction and codegeneration instead of the plain IR
reference where possible.

Reviewed By: dmgreen

Differential Revision: https://reviews.llvm.org/D84680
---
 .../Transforms/Vectorize/LoopVectorize.cpp    | 48 +++++++++---
 llvm/lib/Transforms/Vectorize/VPlan.cpp       | 32 ++++++--
 llvm/lib/Transforms/Vectorize/VPlan.h         | 75 ++++++++++++++-----
 llvm/lib/Transforms/Vectorize/VPlanValue.h    |  3 +-
 4 files changed, 123 insertions(+), 35 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 95d55d062da03c..397dae34bceb1d 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -531,6 +531,10 @@ class InnerLoopVectorizer {
   /// value into a vector.
   Value *getOrCreateVectorValue(Value *V, unsigned Part);
 
+  void setVectorValue(Value *Scalar, unsigned Part, Value *Vector) {
+    VectorLoopValueMap.setVectorValue(Scalar, Part, Vector);
+  }
+
   /// Return a value in the new loop corresponding to \p V from the original
   /// loop at unroll and vector indices \p Instance. If the value has been
   /// vectorized but not scalarized, the necessary extractelement instruction
@@ -553,8 +557,8 @@ class InnerLoopVectorizer {
   /// non-null. Use \p State to translate given VPValues to IR values in the
   /// vectorized loop.
   void vectorizeMemoryInstruction(Instruction *Instr, VPTransformState &State,
-                                  VPValue *Addr, VPValue *StoredValue,
-                                  VPValue *BlockInMask);
+                                  VPValue *Def, VPValue *Addr,
+                                  VPValue *StoredValue, VPValue *BlockInMask);
 
   /// Set the debug location in the builder using the debug location in
   /// the instruction.
@@ -2503,11 +2507,9 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(
   }
 }
 
-void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
-                                                     VPTransformState &State,
-                                                     VPValue *Addr,
-                                                     VPValue *StoredValue,
-                                                     VPValue *BlockInMask) {
+void InnerLoopVectorizer::vectorizeMemoryInstruction(
+    Instruction *Instr, VPTransformState &State, VPValue *Def, VPValue *Addr,
+    VPValue *StoredValue, VPValue *BlockInMask) {
   // Attempt to issue a wide load.
   LoadInst *LI = dyn_cast<LoadInst>(Instr);
   StoreInst *SI = dyn_cast<StoreInst>(Instr);
@@ -2636,7 +2638,8 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
       if (Reverse)
         NewLI = reverseVector(NewLI);
     }
-    VectorLoopValueMap.setVectorValue(Instr, Part, NewLI);
+
+    State.set(Def, Instr, NewLI, Part);
   }
 }
 
@@ -7754,6 +7757,16 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
 
       if (auto Recipe =
               RecipeBuilder.tryToCreateWidenRecipe(Instr, Range, Plan)) {
+        // Check if the recipe can be converted to a VPValue. We need the extra
+        // down-casting step until VPRecipeBase inherits from VPValue.
+        VPValue *MaybeVPValue = Recipe->toVPValue();
+        if (!Instr->getType()->isVoidTy() && MaybeVPValue) {
+          if (NeedDef.contains(Instr))
+            Plan->addOrReplaceVPValue(Instr, MaybeVPValue);
+          else
+            Plan->addVPValue(Instr, MaybeVPValue);
+        }
+
         RecipeBuilder.setRecipe(Instr, Recipe);
         VPBB->appendRecipe(Recipe);
         continue;
@@ -7803,7 +7816,14 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
 
     for (unsigned i = 0; i < IG->getFactor(); ++i)
       if (Instruction *Member = IG->getMember(i)) {
+        VPValue *NewVPV = nullptr;
+        if (!Member->getType()->isVoidTy()) {
+          NewVPV = new VPValue(Member);
+          Plan->getVPValue(Member)->replaceAllUsesWith(NewVPV);
+        }
         RecipeBuilder.getRecipe(Member)->eraseFromParent();
+        if (NewVPV)
+          Plan->addVPValue(Member, NewVPV);
       }
   }
 
@@ -8145,9 +8165,11 @@ void VPPredInstPHIRecipe::execute(VPTransformState &State) {
 }
 
 void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
+  Instruction *Instr = getUnderlyingInstr();
   VPValue *StoredValue = isa<StoreInst>(Instr) ? getStoredValue() : nullptr;
-  State.ILV->vectorizeMemoryInstruction(&Instr, State, getAddr(), StoredValue,
-                                        getMask());
+  State.ILV->vectorizeMemoryInstruction(Instr, State,
+                                        StoredValue ? nullptr : this, getAddr(),
+                                        StoredValue, getMask());
 }
 
 // Determine how to lower the scalar epilogue, which depends on 1) optimising
@@ -8193,6 +8215,12 @@ static ScalarEpilogueLowering getScalarEpilogueLowering(
   return CM_ScalarEpilogueAllowed;
 }
 
+void VPTransformState::set(VPValue *Def, Value *IRDef, Value *V,
+                           unsigned Part) {
+  set(Def, V, Part);
+  ILV->setVectorValue(IRDef, Part, V);
+}
+
 // Process the loop in the VPlan-native vectorization path. This path builds
 // VPlan upfront in the vectorization pipeline, which allows to apply
 // VPlan-to-VPlan transformations from the very beginning without modifying the
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 054920645a9af0..f2659d2b266442 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -101,6 +101,22 @@ VPUser *VPRecipeBase::toVPUser() {
   return nullptr;
 }
 
+VPValue *VPRecipeBase::toVPValue() {
+  if (auto *V = dyn_cast<VPInstruction>(this))
+    return V;
+  if (auto *V = dyn_cast<VPWidenMemoryInstructionRecipe>(this))
+    return V;
+  return nullptr;
+}
+
+const VPValue *VPRecipeBase::toVPValue() const {
+  if (auto *V = dyn_cast<VPInstruction>(this))
+    return V;
+  if (auto *V = dyn_cast<VPWidenMemoryInstructionRecipe>(this))
+    return V;
+  return nullptr;
+}
+
 // Get the top-most entry block of \p Start. This is the entry block of the
 // containing VPlan. This function is templated to support both const and non-const blocks
 template <typename T> static T *getPlanEntry(T *Start) {
@@ -405,14 +421,15 @@ void VPRecipeBase::removeFromParent() {
   Parent = nullptr;
 }
 
-VPValue *VPRecipeBase::toVPValue() {
-  if (auto *V = dyn_cast<VPInstruction>(this))
-    return V;
-  return nullptr;
-}
-
 iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() {
   assert(getParent() && "Recipe not in any VPBasicBlock");
+  // If the recipe is a VPValue and has been added to the containing VPlan,
+  // remove the mapping.
+  if (Value *UV = getUnderlyingInstr())
+    if (!UV->getType()->isVoidTy())
+      if (auto *Plan = getParent()->getPlan())
+        Plan->removeVPValueFor(UV);
+
   return getParent()->getRecipeList().erase(getIterator());
 }
 
@@ -903,7 +920,8 @@ void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent,
 
 void VPWidenMemoryInstructionRecipe::print(raw_ostream &O, const Twine &Indent,
                                            VPSlotTracker &SlotTracker) const {
-  O << "\"WIDEN " << Instruction::getOpcodeName(Instr.getOpcode()) << " ";
+  O << "\"WIDEN "
+    << Instruction::getOpcodeName(getUnderlyingInstr()->getOpcode()) << " ";
 
   bool First = true;
   for (VPValue *Op : operands()) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 30f984fd39d768..81a9d67d29762a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -282,6 +282,10 @@ struct VPTransformState {
     // delegates the call to ILV below.
     if (Data.PerPartOutput.count(Def)) {
       auto *VecPart = Data.PerPartOutput[Def][Instance.Part];
+      if (!VecPart->getType()->isVectorTy()) {
+        assert(Instance.Lane == 0 && "cannot get lane > 0 for scalar");
+        return VecPart;
+      }
       // TODO: Cache created scalar values.
       return Builder.CreateExtractElement(VecPart,
                                           Builder.getInt32(Instance.Lane));
@@ -298,6 +302,7 @@ struct VPTransformState {
     }
     Data.PerPartOutput[Def][Part] = V;
   }
+  void set(VPValue *Def, Value *IRDef, Value *V, unsigned Part);
 
   /// Hold state information used when constructing the CFG of the output IR,
   /// traversing the VPBasicBlocks and generating corresponding IR BasicBlocks.
@@ -684,6 +689,20 @@ class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock> {
   /// Returns a pointer to a VPValue, if the recipe inherits from VPValue or
   /// nullptr otherwise.
   VPValue *toVPValue();
+  const VPValue *toVPValue() const;
+
+  /// Returns the underlying instruction, if the recipe is a VPValue or nullptr
+  /// otherwise.
+  Instruction *getUnderlyingInstr() {
+    if (auto *VPV = toVPValue())
+      return cast_or_null<Instruction>(VPV->getUnderlyingValue());
+    return nullptr;
+  }
+  const Instruction *getUnderlyingInstr() const {
+    if (auto *VPV = toVPValue())
+      return cast_or_null<Instruction>(VPV->getUnderlyingValue());
+    return nullptr;
+  }
 };
 
 inline bool VPUser::classof(const VPRecipeBase *Recipe) {
@@ -725,10 +744,6 @@ class VPInstruction : public VPUser, public VPValue, public VPRecipeBase {
   void generateInstruction(VPTransformState &State, unsigned Part);
 
 protected:
-  Instruction *getUnderlyingInstr() {
-    return cast_or_null<Instruction>(getUnderlyingValue());
-  }
-
   void setUnderlyingInstr(Instruction *I) { setUnderlyingValue(I); }
 
 public:
@@ -1207,8 +1222,9 @@ class VPPredInstPHIRecipe : public VPRecipeBase {
 /// - For store: Address, stored value, optional mask
 /// TODO: We currently execute only per-part unless a specific instance is
 /// provided.
-class VPWidenMemoryInstructionRecipe : public VPRecipeBase, public VPUser {
-  Instruction &Instr;
+class VPWidenMemoryInstructionRecipe : public VPRecipeBase,
+                                       public VPValue,
+                                       public VPUser {
 
   void setMask(VPValue *Mask) {
     if (!Mask)
@@ -1217,20 +1233,22 @@ class VPWidenMemoryInstructionRecipe : public VPRecipeBase, public VPUser {
   }
 
   bool isMasked() const {
-    return (isa<LoadInst>(Instr) && getNumOperands() == 2) ||
-           (isa<StoreInst>(Instr) && getNumOperands() == 3);
+    return (isa<LoadInst>(getUnderlyingInstr()) && getNumOperands() == 2) ||
+           (isa<StoreInst>(getUnderlyingInstr()) && getNumOperands() == 3);
   }
 
 public:
   VPWidenMemoryInstructionRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask)
-      : VPRecipeBase(VPWidenMemoryInstructionSC), VPUser({Addr}), Instr(Load) {
+      : VPRecipeBase(VPWidenMemoryInstructionSC),
+        VPValue(VPValue::VPMemoryInstructionSC, &Load), VPUser({Addr}) {
     setMask(Mask);
   }
 
   VPWidenMemoryInstructionRecipe(StoreInst &Store, VPValue *Addr,
                                  VPValue *StoredValue, VPValue *Mask)
-      : VPRecipeBase(VPWidenMemoryInstructionSC), VPUser({Addr, StoredValue}),
-        Instr(Store) {
+      : VPRecipeBase(VPWidenMemoryInstructionSC),
+        VPValue(VPValue::VPMemoryInstructionSC, &Store),
+        VPUser({Addr, StoredValue}) {
     setMask(Mask);
   }
 
@@ -1253,7 +1271,7 @@ class VPWidenMemoryInstructionRecipe : public VPRecipeBase, public VPUser {
 
   /// Return the address accessed by this recipe.
   VPValue *getStoredValue() const {
-    assert(isa<StoreInst>(Instr) &&
+    assert(isa<StoreInst>(getUnderlyingInstr()) &&
            "Stored value only available for store instructions");
     return getOperand(1); // Stored value is the 2nd, mandatory operand.
   }
@@ -1619,6 +1637,10 @@ class VPlan {
   /// VPlan.
   Value2VPValueTy Value2VPValue;
 
+  /// Contains all VPValues that been allocated by addVPValue directly and need
+  /// to be free when the plan's destructor is called.
+  SmallVector<VPValue *, 16> VPValuesToFree;
+
   /// Holds the VPLoopInfo analysis for this VPlan.
   VPLoopInfo VPLInfo;
 
@@ -1634,8 +1656,8 @@ class VPlan {
   ~VPlan() {
     if (Entry)
       VPBlockBase::deleteCFG(Entry);
-    for (auto &MapEntry : Value2VPValue)
-      delete MapEntry.second;
+    for (VPValue *VPV : VPValuesToFree)
+      delete VPV;
     if (BackedgeTakenCount)
       delete BackedgeTakenCount;
     for (VPValue *Def : VPExternalDefs)
@@ -1685,7 +1707,24 @@ class VPlan {
   void addVPValue(Value *V) {
     assert(V && "Trying to add a null Value to VPlan");
     assert(!Value2VPValue.count(V) && "Value already exists in VPlan");
-    Value2VPValue[V] = new VPValue(V);
+    VPValue *VPV = new VPValue(V);
+    Value2VPValue[V] = VPV;
+    VPValuesToFree.push_back(VPV);
+  }
+
+  void addVPValue(Value *V, VPValue *VPV) {
+    assert(V && "Trying to add a null Value to VPlan");
+    assert(!Value2VPValue.count(V) && "Value already exists in VPlan");
+    Value2VPValue[V] = VPV;
+  }
+
+  void addOrReplaceVPValue(Value *V, VPValue *VPV) {
+    assert(V && "Trying to add a null Value to VPlan");
+    auto I = Value2VPValue.find(V);
+    if (I == Value2VPValue.end())
+      Value2VPValue[V] = VPV;
+    else
+      I->second = VPV;
   }
 
   VPValue *getVPValue(Value *V) {
@@ -1701,6 +1740,8 @@ class VPlan {
     return getVPValue(V);
   }
 
+  void removeVPValueFor(Value *V) { Value2VPValue.erase(V); }
+
   /// Return the VPLoopInfo analysis for this VPlan.
   VPLoopInfo &getVPLoopInfo() { return VPLInfo; }
   const VPLoopInfo &getVPLoopInfo() const { return VPLInfo; }
@@ -1782,9 +1823,9 @@ class VPlanPrinter {
 };
 
 struct VPlanIngredient {
-  Value *V;
+  const Value *V;
 
-  VPlanIngredient(Value *V) : V(V) {}
+  VPlanIngredient(const Value *V) : V(V) {}
 };
 
 inline raw_ostream &operator<<(raw_ostream &OS, const VPlanIngredient &I) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index e51c19601f8863..ec8c5bfaae9a46 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -43,6 +43,7 @@ class VPValue {
   friend class VPBasicBlock;
   friend class VPInterleavedAccessInfo;
   friend class VPSlotTracker;
+  friend class VPRecipeBase;
 
   const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
 
@@ -77,7 +78,7 @@ class VPValue {
   /// are actually instantiated. Values of this enumeration are kept in the
   /// SubclassID field of the VPValue objects. They are used for concrete
   /// type identification.
-  enum { VPValueSC, VPInstructionSC };
+  enum { VPValueSC, VPInstructionSC, VPMemoryInstructionSC };
 
   VPValue(Value *UV = nullptr) : VPValue(VPValueSC, UV) {}
   VPValue(const VPValue &) = delete;

From c27ab339ad8fcdd0abbe81ec9f44a440570de708 Mon Sep 17 00:00:00 2001
From: Teresa Johnson <tejohnson@google.com>
Date: Thu, 24 Sep 2020 21:52:15 -0700
Subject: [PATCH 012/123] Restore "[ThinLTO] Avoid temporaries when loading
 global decl attachment metadata"

This restores commit ab1b4810b55279bcf6fdd87be74a403440be3991 which was
reverted in 01b9deba76a950f04574b656c7c31ae389104f2d, with a fix for the
issue it caused. We should use a temporary BitstreamCursor when
loading the global decl attachment records so that the abbrev ids held
in the lazy loading IndexCursor are not clobbered. Enhanced the test so
that the issue is exposed there.

Original description:

When performing ThinLTO importing, the metadata loader attempts to lazy
load, by building an index. However, module level global decl attachment
metadata was being parsed early while building the index, since the
associated (module level) global values aren't materialized on demand.
This results in the creation of forward reference temporary metadatas,
which are expensive.

Normally, these module level global values don't have much attached
metadata. However, in the case of -fwhole-program-vtables (e.g. for
whole program devirtualization), the vtables may have many attached type
metadatas. This was resulting in very slow performance when performing
ThinLTO importing with the default lazy loading.

This patch restructures the handling of these global decl attachment
records, delaying their parsing until after the lazy loading index has
been built. Then the parser can use the interface that loads from the
index, which resolves forward references immediately instead of creating
expensive temporaries.

For one ThinLTO backend that imports from modules containing huge
numbers of vtables and associated types, I measured the following
compile times for the metadata materialization during function
importing, rounded to nearest second:

No -fwhole-program-vtables:
  Lazy loading on (head):  1s
  Lazy loading off (head): 3s
  Lazy loading on (patch): 1s

With -fwhole-program-vtables:
  Lazy loading on (head):  440s
  Lazy loading off (head): 4s
  Lazy loading on (patch): 2s

Differential Revision: https://reviews.llvm.org/D87970
---
 llvm/lib/Bitcode/Reader/MetadataLoader.cpp | 128 +++++++++++++++++----
 llvm/test/ThinLTO/X86/Inputs/devirt2.ll    |  28 ++++-
 llvm/test/ThinLTO/X86/devirt2.ll           |   6 +-
 3 files changed, 134 insertions(+), 28 deletions(-)

diff --git a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
index da7291d8abd357..2ffd1bef6514a3 100644
--- a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
+++ b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
@@ -438,6 +438,20 @@ class MetadataLoader::MetadataLoaderImpl {
   /// Index that keeps track of where to find a metadata record in the stream.
   std::vector<uint64_t> GlobalMetadataBitPosIndex;
 
+  /// Cursor position of the start of the global decl attachments, to enable
+  /// loading using the index built for lazy loading, instead of forward
+  /// references.
+  uint64_t GlobalDeclAttachmentPos = 0;
+
+#ifndef NDEBUG
+  /// Sanity check that we end up parsing all of the global decl attachments.
+  unsigned NumGlobalDeclAttachSkipped = 0;
+  unsigned NumGlobalDeclAttachParsed = 0;
+#endif
+
+  /// Load the global decl attachments, using the index built for lazy loading.
+  Expected<bool> loadGlobalDeclAttachments();
+
   /// Populate the index above to enable lazily loading of metadata, and load
   /// the named metadata as well as the transitively referenced global
   /// Metadata.
@@ -681,8 +695,10 @@ Expected<bool>
 MetadataLoader::MetadataLoaderImpl::lazyLoadModuleMetadataBlock() {
   IndexCursor = Stream;
   SmallVector<uint64_t, 64> Record;
+  GlobalDeclAttachmentPos = 0;
   // Get the abbrevs, and preload record positions to make them lazy-loadable.
   while (true) {
+    uint64_t SavedPos = IndexCursor.GetCurrentBitNo();
     Expected<BitstreamEntry> MaybeEntry = IndexCursor.advanceSkippingSubblocks(
         BitstreamCursor::AF_DontPopBlockAtEnd);
     if (!MaybeEntry)
@@ -817,25 +833,11 @@ MetadataLoader::MetadataLoaderImpl::lazyLoadModuleMetadataBlock() {
         break;
       }
       case bitc::METADATA_GLOBAL_DECL_ATTACHMENT: {
-        // FIXME: we need to do this early because we don't materialize global
-        // value explicitly.
-        if (Error Err = IndexCursor.JumpToBit(CurrentPos))
-          return std::move(Err);
-        Record.clear();
-        if (Expected<unsigned> MaybeRecord =
-                IndexCursor.readRecord(Entry.ID, Record))
-          ;
-        else
-          return MaybeRecord.takeError();
-        if (Record.size() % 2 == 0)
-          return error("Invalid record");
-        unsigned ValueID = Record[0];
-        if (ValueID >= ValueList.size())
-          return error("Invalid record");
-        if (auto *GO = dyn_cast<GlobalObject>(ValueList[ValueID]))
-          if (Error Err = parseGlobalObjectAttachment(
-                  *GO, ArrayRef<uint64_t>(Record).slice(1)))
-            return std::move(Err);
+        if (!GlobalDeclAttachmentPos)
+          GlobalDeclAttachmentPos = SavedPos;
+#ifndef NDEBUG
+        NumGlobalDeclAttachSkipped++;
+#endif
         break;
       }
       case bitc::METADATA_KIND:
@@ -885,6 +887,83 @@ MetadataLoader::MetadataLoaderImpl::lazyLoadModuleMetadataBlock() {
   }
 }
 
+// Load the global decl attachments after building the lazy loading index.
+// We don't load them "lazily" - all global decl attachments must be
+// parsed since they aren't materialized on demand. However, by delaying
+// their parsing until after the index is created, we can use the index
+// instead of creating temporaries.
+Expected<bool> MetadataLoader::MetadataLoaderImpl::loadGlobalDeclAttachments() {
+  // Nothing to do if we didn't find any of these metadata records.
+  if (!GlobalDeclAttachmentPos)
+    return true;
+  // Use a temporary cursor so that we don't mess up the main Stream cursor or
+  // the lazy loading IndexCursor (which holds the necessary abbrev ids).
+  BitstreamCursor TempCursor = Stream;
+  SmallVector<uint64_t, 64> Record;
+  // Jump to the position before the first global decl attachment, so we can
+  // scan for the first BitstreamEntry record.
+  if (Error Err = TempCursor.JumpToBit(GlobalDeclAttachmentPos))
+    return std::move(Err);
+  while (true) {
+    Expected<BitstreamEntry> MaybeEntry = TempCursor.advanceSkippingSubblocks(
+        BitstreamCursor::AF_DontPopBlockAtEnd);
+    if (!MaybeEntry)
+      return MaybeEntry.takeError();
+    BitstreamEntry Entry = MaybeEntry.get();
+
+    switch (Entry.Kind) {
+    case BitstreamEntry::SubBlock: // Handled for us already.
+    case BitstreamEntry::Error:
+      return error("Malformed block");
+    case BitstreamEntry::EndBlock:
+      // Sanity check that we parsed them all.
+      assert(NumGlobalDeclAttachSkipped == NumGlobalDeclAttachParsed);
+      return true;
+    case BitstreamEntry::Record:
+      break;
+    }
+    uint64_t CurrentPos = TempCursor.GetCurrentBitNo();
+    Expected<unsigned> MaybeCode = TempCursor.skipRecord(Entry.ID);
+    if (!MaybeCode)
+      return MaybeCode.takeError();
+    if (MaybeCode.get() != bitc::METADATA_GLOBAL_DECL_ATTACHMENT) {
+      // Anything other than a global decl attachment signals the end of
+      // these records. sanity check that we parsed them all.
+      assert(NumGlobalDeclAttachSkipped == NumGlobalDeclAttachParsed);
+      return true;
+    }
+#ifndef NDEBUG
+    NumGlobalDeclAttachParsed++;
+#endif
+    // FIXME: we need to do this early because we don't materialize global
+    // value explicitly.
+    if (Error Err = TempCursor.JumpToBit(CurrentPos))
+      return std::move(Err);
+    Record.clear();
+    if (Expected<unsigned> MaybeRecord =
+            TempCursor.readRecord(Entry.ID, Record))
+      ;
+    else
+      return MaybeRecord.takeError();
+    if (Record.size() % 2 == 0)
+      return error("Invalid record");
+    unsigned ValueID = Record[0];
+    if (ValueID >= ValueList.size())
+      return error("Invalid record");
+    if (auto *GO = dyn_cast<GlobalObject>(ValueList[ValueID])) {
+      // Need to save and restore the current position since
+      // parseGlobalObjectAttachment will resolve all forward references which
+      // would require parsing from locations stored in the index.
+      CurrentPos = TempCursor.GetCurrentBitNo();
+      if (Error Err = parseGlobalObjectAttachment(
+              *GO, ArrayRef<uint64_t>(Record).slice(1)))
+        return std::move(Err);
+      if (Error Err = TempCursor.JumpToBit(CurrentPos))
+        return std::move(Err);
+    }
+  }
+}
+
 /// Parse a METADATA_BLOCK. If ModuleLevel is true then we are parsing
 /// module level metadata.
 Error MetadataLoader::MetadataLoaderImpl::parseMetadata(bool ModuleLevel) {
@@ -914,6 +993,14 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadata(bool ModuleLevel) {
       MetadataList.resize(MDStringRef.size() +
                           GlobalMetadataBitPosIndex.size());
 
+      // Now that we have built the index, load the global decl attachments
+      // that were deferred during that process. This avoids creating
+      // temporaries.
+      SuccessOrErr = loadGlobalDeclAttachments();
+      if (!SuccessOrErr)
+        return SuccessOrErr.takeError();
+      assert(SuccessOrErr.get());
+
       // Reading the named metadata created forward references and/or
       // placeholders, that we flush here.
       resolveForwardRefsAndPlaceholders(Placeholders);
@@ -2025,7 +2112,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseGlobalObjectAttachment(
     auto K = MDKindMap.find(Record[I]);
     if (K == MDKindMap.end())
       return error("Invalid ID");
-    MDNode *MD = MetadataList.getMDNodeFwdRefOrNull(Record[I + 1]);
+    MDNode *MD =
+        dyn_cast_or_null<MDNode>(getMetadataFwdRefOrLoad(Record[I + 1]));
     if (!MD)
       return error("Invalid metadata attachment: expect fwd ref to MDNode");
     GO.addMetadata(K->second, *MD);
diff --git a/llvm/test/ThinLTO/X86/Inputs/devirt2.ll b/llvm/test/ThinLTO/X86/Inputs/devirt2.ll
index a67e803161d58b..9b6652886cefed 100644
--- a/llvm/test/ThinLTO/X86/Inputs/devirt2.ll
+++ b/llvm/test/ThinLTO/X86/Inputs/devirt2.ll
@@ -13,28 +13,28 @@ target triple = "x86_64-grtev4-linux-gnu"
 @_ZTV1E = constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.E*, i32)* @_ZN1E1mEi to i8*)] }, !type !4
 
 define i32 @_ZN1B1fEi(%struct.B* %this, i32 %a) #0 {
-   ret i32 0;
+   ret i32 0
 }
 
 define internal i32 @_ZN1A1nEi(%struct.A* %this, i32 %a) #0 {
-   ret i32 0;
+   ret i32 0
 }
 
 define i32 @_ZN1C1fEi(%struct.C* %this, i32 %a) #0 {
-   ret i32 0;
+   ret i32 0
 }
 
 define linkonce_odr i32 @_ZN1D1mEi(%struct.D* %this, i32 %a) #0 {
-   ret i32 0;
+   ret i32 0
 }
 
 define internal i32 @_ZN1E1mEi(%struct.E* %this, i32 %a) #0 {
-   ret i32 0;
+   ret i32 0, !dbg !12
 }
 
 define i32 @test2(%struct.E* %obj, i32 %a) {
 entry:
-  %0 = bitcast %struct.E* %obj to i8***
+  %0 = bitcast %struct.E* %obj to i8***, !dbg !12
   %vtable2 = load i8**, i8*** %0
   %1 = bitcast i8** %vtable2 to i8*
   %p2 = call i1 @llvm.type.test(i8* %1, metadata !"_ZTS1E")
@@ -52,8 +52,24 @@ attributes #0 = { noinline optnone }
 declare i1 @llvm.type.test(i8*, metadata)
 declare void @llvm.assume(i1)
 
+!llvm.dbg.cu = !{!5}
+!llvm.module.flags = !{!13, !14, !15}
+
 !0 = !{i64 16, !"_ZTS1A"}
 !1 = !{i64 16, !"_ZTS1B"}
 !2 = !{i64 16, !"_ZTS1C"}
 !3 = !{i64 16, !"_ZTS1D"}
 !4 = !{i64 16, !"_ZTS1E"}
+!5 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !6, producer: "clang version 12.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !7, splitDebugInlining: false, nameTableKind: None)
+!6 = !DIFile(filename: "test.cc", directory: "/tmp")
+!7 = !{}
+!8 = distinct !DISubprogram(name: "bar", linkageName: "_Z5barv", scope: !6, file: !6, line: 1, type: !9, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !5, retainedNodes: !7)
+!9 = !DISubroutineType(types: !10)
+!10 = !{!11}
+!11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!12 = !DILocation(line: 2, column: 3, scope: !8, inlinedAt: !16)
+!13 = !{i32 7, !"Dwarf Version", i32 4}
+!14 = !{i32 2, !"Debug Info Version", i32 3}
+!15 = !{i32 1, !"wchar_size", i32 4}
+!16 = !DILocation(line: 1, column: 1, scope: !17)
+!17 = distinct !DISubprogram(name: "foo", linkageName: "_Z5foov", scope: !6, file: !6, line: 1, type: !9, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !5, retainedNodes: !7)
diff --git a/llvm/test/ThinLTO/X86/devirt2.ll b/llvm/test/ThinLTO/X86/devirt2.ll
index a2cffa7be8c3e9..71b36552353fbf 100644
--- a/llvm/test/ThinLTO/X86/devirt2.ll
+++ b/llvm/test/ThinLTO/X86/devirt2.ll
@@ -15,8 +15,10 @@
 ; ENABLESPLITFLAG: !{i32 1, !"EnableSplitLTOUnit", i32 1}
 
 ; Generate unsplit module with summary for ThinLTO index-based WPD.
-; RUN: opt -thinlto-bc -o %t3.o %s
-; RUN: opt -thinlto-bc -o %t4.o %p/Inputs/devirt2.ll
+; Force generation of the bitcode index so that we also test lazy metadata
+; loader handling of the type metadata.
+; RUN: opt -bitcode-mdindex-threshold=0 -thinlto-bc -o %t3.o %s
+; RUN: opt -bitcode-mdindex-threshold=0 -thinlto-bc -o %t4.o %p/Inputs/devirt2.ll
 
 ; Check that we don't have module flag when splitting not enabled for ThinLTO,
 ; and that we generate summary information needed for index-based WPD.

From 2de368f6a780f4cdbaf9cf8a4f803272f5de5938 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Mon, 12 Oct 2020 17:47:36 +0100
Subject: [PATCH 013/123] [InstCombine] FoldShiftByConstant - merge equivalent
 types. NFCI.

Consistently use the original shift instruction's Type/BitWidth instead of the operands, casted values etc.
---
 llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
index 43b7005728265a..3a4f64ef341b9b 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -676,8 +676,8 @@ Instruction *InstCombinerImpl::FoldShiftByConstant(Value *Op0, Constant *Op1,
 
   // See if we can simplify any instructions used by the instruction whose sole
   // purpose is to compute bits we don't care about.
-  unsigned TypeBits = Op0->getType()->getScalarSizeInBits();
-
+  Type *Ty = I.getType();
+  unsigned TypeBits = Ty->getScalarSizeInBits();
   assert(!Op1C->uge(TypeBits) &&
          "Shift over the type width should have been removed already");
 
@@ -707,9 +707,8 @@ Instruction *InstCombinerImpl::FoldShiftByConstant(Value *Op0, Constant *Op1,
       // clear the top bits as needed.  This 'and' will usually be zapped by
       // other xforms later if dead.
       unsigned SrcSize = SrcTy->getScalarSizeInBits();
-      unsigned DstSize = TI->getType()->getScalarSizeInBits();
       Constant *MaskV =
-          ConstantInt::get(SrcTy, APInt::getLowBitsSet(SrcSize, DstSize));
+          ConstantInt::get(SrcTy, APInt::getLowBitsSet(SrcSize, TypeBits));
 
       // The mask we constructed says what the trunc would do if occurring
       // between the shifts.  We want to know the effect *after* the second
@@ -719,7 +718,7 @@ Instruction *InstCombinerImpl::FoldShiftByConstant(Value *Op0, Constant *Op1,
       // shift1 & 0x00FF
       Value *And = Builder.CreateAnd(NSh, MaskV, TI->getName());
       // Return the value truncated to the interesting size.
-      return new TruncInst(And, I.getType());
+      return new TruncInst(And, Ty);
     }
   }
 

From 24dd0cd1edd5e5a2cb3cc361c76a3751b4896132 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Mon, 12 Oct 2020 18:17:08 +0100
Subject: [PATCH 014/123] [InstCombine] FoldShiftByConstant - create
 Scalar/Vector constant with ConstantInt::get(). NFCI.

There's no need to create constant vector splats manually.
---
 llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
index 3a4f64ef341b9b..9f52f08d3a4d8b 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -744,11 +744,8 @@ Instruction *InstCombinerImpl::FoldShiftByConstant(Value *Op0, Constant *Op1,
           Value *X = Builder.CreateBinOp(Op0BO->getOpcode(), YS, V1,
                                          Op0BO->getOperand(1)->getName());
           unsigned Op1Val = Op1C->getLimitedValue(TypeBits);
-
           APInt Bits = APInt::getHighBitsSet(TypeBits, TypeBits - Op1Val);
-          Constant *Mask = ConstantInt::get(I.getContext(), Bits);
-          if (VectorType *VT = dyn_cast<VectorType>(X->getType()))
-            Mask = ConstantVector::getSplat(VT->getElementCount(), Mask);
+          Constant *Mask = ConstantInt::get(Ty, Bits);
           return BinaryOperator::CreateAnd(X, Mask);
         }
 

From 726a6e84be1892e90e2f3572a36480c0fe616119 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval@gmail.com>
Date: Mon, 12 Oct 2020 13:19:40 -0400
Subject: [PATCH 015/123] [flang][openacc] Update Loop Construct lowering to
 use fir::getBase

This patch update the loop construct lowring to match fir-dev changes.

Reviewed By: jeanPerier

Differential Revision: https://reviews.llvm.org/D88914
---
 flang/lib/Lower/OpenACC.cpp | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index b39fe9b695f832..ce97fc393b30a1 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -15,6 +15,7 @@
 #include "flang/Lower/Bridge.h"
 #include "flang/Lower/FIRBuilder.h"
 #include "flang/Lower/PFTBuilder.h"
+#include "flang/Lower/Support/BoxValue.h"
 #include "flang/Parser/parse-tree.h"
 #include "flang/Semantics/tools.h"
 #include "mlir/Dialect/OpenACC/OpenACC.h"
@@ -120,8 +121,8 @@ static void genACC(Fortran::lower::AbstractConverter &converter,
           if (const auto &gangNumValue =
                   std::get<std::optional<Fortran::parser::ScalarIntExpr>>(
                       x.t)) {
-            gangNum = converter.genExprValue(
-                *Fortran::semantics::GetExpr(gangNumValue.value()));
+            gangNum = fir::getBase(converter.genExprValue(
+                *Fortran::semantics::GetExpr(gangNumValue.value())));
           }
           if (const auto &gangStaticValue =
                   std::get<std::optional<Fortran::parser::AccSizeExpr>>(x.t)) {
@@ -129,8 +130,8 @@ static void genACC(Fortran::lower::AbstractConverter &converter,
                 std::get<std::optional<Fortran::parser::ScalarIntExpr>>(
                     gangStaticValue.value().t);
             if (expr) {
-              gangStatic =
-                  converter.genExprValue(*Fortran::semantics::GetExpr(*expr));
+              gangStatic = fir::getBase(
+                  converter.genExprValue(*Fortran::semantics::GetExpr(*expr)));
             } else {
               // * was passed as value and will be represented as a -1 constant
               // integer.
@@ -145,16 +146,16 @@ static void genACC(Fortran::lower::AbstractConverter &converter,
                      std::get_if<Fortran::parser::AccClause::Worker>(
                          &clause.u)) {
         if (workerClause->v) {
-          workerNum = converter.genExprValue(
-              *Fortran::semantics::GetExpr(*workerClause->v));
+          workerNum = fir::getBase(converter.genExprValue(
+              *Fortran::semantics::GetExpr(*workerClause->v)));
         }
         executionMapping |= mlir::acc::OpenACCExecMapping::WORKER;
       } else if (const auto *vectorClause =
                      std::get_if<Fortran::parser::AccClause::Vector>(
                          &clause.u)) {
         if (vectorClause->v) {
-          vectorLength = converter.genExprValue(
-              *Fortran::semantics::GetExpr(*vectorClause->v));
+          vectorLength = fir::getBase(converter.genExprValue(
+              *Fortran::semantics::GetExpr(*vectorClause->v)));
         }
         executionMapping |= mlir::acc::OpenACCExecMapping::VECTOR;
       } else if (const auto *tileClause =
@@ -165,8 +166,8 @@ static void genACC(Fortran::lower::AbstractConverter &converter,
               std::get<std::optional<Fortran::parser::ScalarIntConstantExpr>>(
                   accTileExpr.t);
           if (expr) {
-            tileOperands.push_back(
-                converter.genExprValue(*Fortran::semantics::GetExpr(*expr)));
+            tileOperands.push_back(fir::getBase(
+                converter.genExprValue(*Fortran::semantics::GetExpr(*expr))));
           } else {
             // * was passed as value and will be represented as a -1 constant
             // integer.

From e944455eaf1ed3b3bfe9876c5478ce18e9975eea Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval@gmail.com>
Date: Mon, 12 Oct 2020 13:20:52 -0400
Subject: [PATCH 016/123] [flang][openacc] Lower parallel construct

This patch upstream the lowering of Parallel construct that was initially done in
https://github.com/flang-compiler/f18-llvm-project/pull/460.

Reviewed By: jeanPerier

Differential Revision: https://reviews.llvm.org/D88917
---
 flang/lib/Lower/OpenACC.cpp | 209 +++++++++++++++++++++++++++++++++++-
 1 file changed, 208 insertions(+), 1 deletion(-)

diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index ce97fc393b30a1..1d8f9f4ccfffd7 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -49,6 +49,26 @@ static void genObjectList(const Fortran::parser::AccObjectList &objectList,
   }
 }
 
+template <typename Clause>
+static void
+genObjectListWithModifier(const Clause *x,
+                          Fortran::lower::AbstractConverter &converter,
+                          Fortran::parser::AccDataModifier::Modifier mod,
+                          SmallVectorImpl<Value> &operandsWithModifier,
+                          SmallVectorImpl<Value> &operands) {
+  const Fortran::parser::AccObjectListWithModifier &listWithModifier = x->v;
+  const Fortran::parser::AccObjectList &accObjectList =
+      std::get<Fortran::parser::AccObjectList>(listWithModifier.t);
+  const auto &modifier =
+      std::get<std::optional<Fortran::parser::AccDataModifier>>(
+          listWithModifier.t);
+  if (modifier && (*modifier).v == mod) {
+    genObjectList(accObjectList, converter, operandsWithModifier);
+  } else {
+    genObjectList(accObjectList, converter, operands);
+  }
+}
+
 static void addOperands(SmallVectorImpl<Value> &operands,
                         SmallVectorImpl<int32_t> &operandSegments,
                         const SmallVectorImpl<Value> &clauseOperands) {
@@ -228,6 +248,193 @@ static void genACC(Fortran::lower::AbstractConverter &converter,
   }
 }
 
+static void
+genACCParallelOp(Fortran::lower::AbstractConverter &converter,
+                 const Fortran::parser::AccClauseList &accClauseList) {
+  mlir::Value async;
+  mlir::Value numGangs;
+  mlir::Value numWorkers;
+  mlir::Value vectorLength;
+  mlir::Value ifCond;
+  mlir::Value selfCond;
+  SmallVector<Value, 2> waitOperands, reductionOperands, copyOperands,
+      copyinOperands, copyinReadonlyOperands, copyoutOperands,
+      copyoutZeroOperands, createOperands, createZeroOperands, noCreateOperands,
+      presentOperands, devicePtrOperands, attachOperands, privateOperands,
+      firstprivateOperands;
+
+  // Async, wait and self clause have optional values but can be present with
+  // no value as well. When there is no value, the op has an attribute to
+  // represent the clause.
+  bool addAsyncAttr = false;
+  bool addWaitAttr = false;
+  bool addSelfAttr = false;
+
+  auto &firOpBuilder = converter.getFirOpBuilder();
+  auto currentLocation = converter.getCurrentLocation();
+
+  // Lower clauses values mapped to operands.
+  // Keep track of each group of operands separatly as clauses can appear
+  // more than once.
+  for (const auto &clause : accClauseList.v) {
+    if (const auto *asyncClause =
+            std::get_if<Fortran::parser::AccClause::Async>(&clause.u)) {
+      const auto &asyncClauseValue = asyncClause->v;
+      if (asyncClauseValue) { // async has a value.
+        async = fir::getBase(converter.genExprValue(
+            *Fortran::semantics::GetExpr(*asyncClauseValue)));
+      } else {
+        addAsyncAttr = true;
+      }
+    } else if (const auto *waitClause =
+                   std::get_if<Fortran::parser::AccClause::Wait>(&clause.u)) {
+      const auto &waitClauseValue = waitClause->v;
+      if (waitClauseValue) { // wait has a value.
+        const Fortran::parser::AccWaitArgument &waitArg = *waitClauseValue;
+        const std::list<Fortran::parser::ScalarIntExpr> &waitList =
+            std::get<std::list<Fortran::parser::ScalarIntExpr>>(waitArg.t);
+        for (const Fortran::parser::ScalarIntExpr &value : waitList) {
+          Value v = fir::getBase(
+              converter.genExprValue(*Fortran::semantics::GetExpr(value)));
+          waitOperands.push_back(v);
+        }
+      } else {
+        addWaitAttr = true;
+      }
+    } else if (const auto *numGangsClause =
+                   std::get_if<Fortran::parser::AccClause::NumGangs>(
+                       &clause.u)) {
+      numGangs = fir::getBase(converter.genExprValue(
+          *Fortran::semantics::GetExpr(numGangsClause->v)));
+    } else if (const auto *numWorkersClause =
+                   std::get_if<Fortran::parser::AccClause::NumWorkers>(
+                       &clause.u)) {
+      numWorkers = fir::getBase(converter.genExprValue(
+          *Fortran::semantics::GetExpr(numWorkersClause->v)));
+    } else if (const auto *vectorLengthClause =
+                   std::get_if<Fortran::parser::AccClause::VectorLength>(
+                       &clause.u)) {
+      vectorLength = fir::getBase(converter.genExprValue(
+          *Fortran::semantics::GetExpr(vectorLengthClause->v)));
+    } else if (const auto *ifClause =
+                   std::get_if<Fortran::parser::AccClause::If>(&clause.u)) {
+      Value cond = fir::getBase(
+          converter.genExprValue(*Fortran::semantics::GetExpr(ifClause->v)));
+      ifCond = firOpBuilder.createConvert(currentLocation,
+                                          firOpBuilder.getI1Type(), cond);
+    } else if (const auto *selfClause =
+                   std::get_if<Fortran::parser::AccClause::Self>(&clause.u)) {
+      if (selfClause->v) {
+        Value cond = fir::getBase(converter.genExprValue(
+            *Fortran::semantics::GetExpr(*(selfClause->v))));
+        selfCond = firOpBuilder.createConvert(currentLocation,
+                                              firOpBuilder.getI1Type(), cond);
+      } else {
+        addSelfAttr = true;
+      }
+    } else if (const auto *copyClause =
+                   std::get_if<Fortran::parser::AccClause::Copy>(&clause.u)) {
+      genObjectList(copyClause->v, converter, copyOperands);
+    } else if (const auto *copyinClause =
+                   std::get_if<Fortran::parser::AccClause::Copyin>(&clause.u)) {
+      genObjectListWithModifier<Fortran::parser::AccClause::Copyin>(
+          copyinClause, converter,
+          Fortran::parser::AccDataModifier::Modifier::ReadOnly,
+          copyinReadonlyOperands, copyinOperands);
+    } else if (const auto *copyoutClause =
+                   std::get_if<Fortran::parser::AccClause::Copyout>(
+                       &clause.u)) {
+      genObjectListWithModifier<Fortran::parser::AccClause::Copyout>(
+          copyoutClause, converter,
+          Fortran::parser::AccDataModifier::Modifier::Zero, copyoutZeroOperands,
+          copyoutOperands);
+    } else if (const auto *createClause =
+                   std::get_if<Fortran::parser::AccClause::Create>(&clause.u)) {
+      genObjectListWithModifier<Fortran::parser::AccClause::Create>(
+          createClause, converter,
+          Fortran::parser::AccDataModifier::Modifier::Zero, createZeroOperands,
+          createOperands);
+    } else if (const auto *noCreateClause =
+                   std::get_if<Fortran::parser::AccClause::NoCreate>(
+                       &clause.u)) {
+      genObjectList(noCreateClause->v, converter, noCreateOperands);
+    } else if (const auto *presentClause =
+                   std::get_if<Fortran::parser::AccClause::Present>(
+                       &clause.u)) {
+      genObjectList(presentClause->v, converter, presentOperands);
+    } else if (const auto *devicePtrClause =
+                   std::get_if<Fortran::parser::AccClause::Deviceptr>(
+                       &clause.u)) {
+      genObjectList(devicePtrClause->v, converter, devicePtrOperands);
+    } else if (const auto *attachClause =
+                   std::get_if<Fortran::parser::AccClause::Attach>(&clause.u)) {
+      genObjectList(attachClause->v, converter, attachOperands);
+    } else if (const auto *privateClause =
+                   std::get_if<Fortran::parser::AccClause::Private>(
+                       &clause.u)) {
+      genObjectList(privateClause->v, converter, privateOperands);
+    } else if (const auto *firstprivateClause =
+                   std::get_if<Fortran::parser::AccClause::Firstprivate>(
+                       &clause.u)) {
+      genObjectList(firstprivateClause->v, converter, firstprivateOperands);
+    }
+  }
+
+  // Prepare the operand segement size attribute and the operands value range.
+  SmallVector<Value, 8> operands;
+  SmallVector<int32_t, 8> operandSegments;
+  addOperand(operands, operandSegments, async);
+  addOperands(operands, operandSegments, waitOperands);
+  addOperand(operands, operandSegments, numGangs);
+  addOperand(operands, operandSegments, numWorkers);
+  addOperand(operands, operandSegments, vectorLength);
+  addOperand(operands, operandSegments, ifCond);
+  addOperand(operands, operandSegments, selfCond);
+  addOperands(operands, operandSegments, reductionOperands);
+  addOperands(operands, operandSegments, copyOperands);
+  addOperands(operands, operandSegments, copyinOperands);
+  addOperands(operands, operandSegments, copyinReadonlyOperands);
+  addOperands(operands, operandSegments, copyoutOperands);
+  addOperands(operands, operandSegments, copyoutZeroOperands);
+  addOperands(operands, operandSegments, createOperands);
+  addOperands(operands, operandSegments, createZeroOperands);
+  addOperands(operands, operandSegments, noCreateOperands);
+  addOperands(operands, operandSegments, presentOperands);
+  addOperands(operands, operandSegments, devicePtrOperands);
+  addOperands(operands, operandSegments, attachOperands);
+  addOperands(operands, operandSegments, privateOperands);
+  addOperands(operands, operandSegments, firstprivateOperands);
+
+  auto parallelOp = createRegionOp<mlir::acc::ParallelOp, mlir::acc::YieldOp>(
+      firOpBuilder, currentLocation, operands, operandSegments);
+
+  if (addAsyncAttr)
+    parallelOp.setAttr(mlir::acc::ParallelOp::getAsyncAttrName(),
+                       firOpBuilder.getUnitAttr());
+  if (addWaitAttr)
+    parallelOp.setAttr(mlir::acc::ParallelOp::getWaitAttrName(),
+                       firOpBuilder.getUnitAttr());
+  if (addSelfAttr)
+    parallelOp.setAttr(mlir::acc::ParallelOp::getSelfAttrName(),
+                       firOpBuilder.getUnitAttr());
+}
+
+static void
+genACC(Fortran::lower::AbstractConverter &converter,
+       Fortran::lower::pft::Evaluation &eval,
+       const Fortran::parser::OpenACCBlockConstruct &blockConstruct) {
+  const auto &beginBlockDirective =
+      std::get<Fortran::parser::AccBeginBlockDirective>(blockConstruct.t);
+  const auto &blockDirective =
+      std::get<Fortran::parser::AccBlockDirective>(beginBlockDirective.t);
+  const auto &accClauseList =
+      std::get<Fortran::parser::AccClauseList>(beginBlockDirective.t);
+
+  if (blockDirective.v == llvm::acc::ACCD_parallel) {
+    genACCParallelOp(converter, accClauseList);
+  }
+}
+
 void Fortran::lower::genOpenACCConstruct(
     Fortran::lower::AbstractConverter &converter,
     Fortran::lower::pft::Evaluation &eval,
@@ -236,7 +443,7 @@ void Fortran::lower::genOpenACCConstruct(
   std::visit(
       common::visitors{
           [&](const Fortran::parser::OpenACCBlockConstruct &blockConstruct) {
-            TODO();
+            genACC(converter, eval, blockConstruct);
           },
           [&](const Fortran::parser::OpenACCCombinedConstruct
                   &combinedConstruct) { TODO(); },

From 012dd42e027e2ff3d183cc9dcf27004cf9711720 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Mon, 12 Oct 2020 09:35:22 -0700
Subject: [PATCH 017/123] [X86] Support -march=x86-64-v[234]

PR47686. These micro-architecture levels are defined in the x86-64 psABI:

https://gitlab.com/x86-psABIs/x86-64-ABI/-/commit/77566eb03bc6a326811cb7e9

GCC 11 will support these levels.

Note, -mtune=x86-64-v[234] are invalid and __builtin_cpu_is cannot be
used on them.

Reviewed By: craig.topper, RKSimon

Differential Revision: https://reviews.llvm.org/D89197
---
 clang/docs/ReleaseNotes.rst                   |  5 +-
 clang/docs/UsersManual.rst                    |  9 ++++
 clang/lib/Basic/Targets/X86.cpp               |  8 ++-
 clang/lib/Basic/Targets/X86.h                 |  2 +-
 clang/test/CodeGen/attr-target-x86.c          | 11 ++++
 clang/test/Driver/x86-march.c                 |  9 ++++
 clang/test/Driver/x86-mtune.c                 |  5 ++
 clang/test/Misc/target-invalid-cpu-note.c     |  9 ++--
 .../Preprocessor/predefined-arch-macros-x86.c | 54 +++++++++++++++++++
 .../Preprocessor/predefined-arch-macros.c     | 15 ------
 clang/test/Sema/builtin-cpu-supports.c        |  5 ++
 llvm/docs/ReleaseNotes.rst                    |  2 +-
 llvm/include/llvm/Support/X86TargetParser.h   |  7 +++
 llvm/lib/Support/X86TargetParser.cpp          | 28 ++++++++++
 llvm/lib/Target/X86/X86.td                    | 52 ++++++++++--------
 llvm/test/CodeGen/X86/cpus-other.ll           |  5 ++
 16 files changed, 182 insertions(+), 44 deletions(-)
 create mode 100644 clang/test/Preprocessor/predefined-arch-macros-x86.c

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 66427f293775b0..d0ef03acaa4c09 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -188,7 +188,10 @@ X86 Support in Clang
 - The x86 intrinsics ``__rorb``, ``__rorw``, ``__rord``, ``__rorq`, ``_rotr``,
   ``_rotwr`` and ``_lrotr`` may now be used within constant expressions.
 
-- Support for -march=sapphirerapids was added.
+- Support for ``-march=sapphirerapids`` was added.
+
+- Support for ``-march=x86-64-v[234]`` has been added.
+  See :doc:`UsersManual` for details about these micro-architecture levels.
 
 - The -mtune command line option is no longer ignored for X86. This can be used
   to request microarchitectural optimizations independent on -march. -march=<cpu>
diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst
index ed6c9e3bc341a1..f313ce72d8ed60 100644
--- a/clang/docs/UsersManual.rst
+++ b/clang/docs/UsersManual.rst
@@ -3201,6 +3201,15 @@ and the ABI remains 32-bit but the assembler emits instructions
 appropriate for a CPU running in 16-bit mode, with address-size and
 operand-size prefixes to enable 32-bit addressing and operations.
 
+Several micro-architecture levels as specified by the x86-64 psABI are defined.
+They are cumulative in the sense that features from previous levels are
+implicitly included in later levels.
+
+- ``-march=x86-64``: CMOV, CMPXCHG8B, FPU, FXSR, MMX, FXSR, SCE, SSE, SSE2
+- ``-march=x86-64-v2``: (close to Nehalem) CMPXCHG16B, LAHF-SAHF, POPCNT, SSE3, SSE4.1, SSE4.2, SSSE3
+- ``-march=x86-64-v3``: (close to Haswell) AVX, AVX2, BMI1, BMI2, F16C, FMA, LZCNT, MOVBE, XSAVE
+- ``-march=x86-64-v4``: AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL
+
 ARM
 ^^^
 
diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index ef83703d6097b6..98ac13b1ae9bdb 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -506,6 +506,9 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
   case CK_K8:
   case CK_K8SSE3:
   case CK_x86_64:
+  case CK_x86_64_v2:
+  case CK_x86_64_v3:
+  case CK_x86_64_v4:
     defineCPUMacros(Builder, "k8");
     break;
   case CK_AMDFAM10:
@@ -1312,6 +1315,9 @@ Optional<unsigned> X86TargetInfo::getCPUCacheLineSize() const {
     case CK_ZNVER2:
     // Deprecated
     case CK_x86_64:
+    case CK_x86_64_v2:
+    case CK_x86_64_v3:
+    case CK_x86_64_v4:
     case CK_Yonah:
     case CK_Penryn:
     case CK_Core2:
@@ -1456,7 +1462,7 @@ void X86TargetInfo::fillValidCPUList(SmallVectorImpl<StringRef> &Values) const {
 }
 
 void X86TargetInfo::fillValidTuneCPUList(SmallVectorImpl<StringRef> &Values) const {
-  llvm::X86::fillValidCPUArchList(Values);
+  llvm::X86::fillValidTuneCPUList(Values);
 }
 
 ArrayRef<const char *> X86TargetInfo::getGCCRegNames() const {
diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h
index 7b2b7dcf64604e..4fc495a09bbb2e 100644
--- a/clang/lib/Basic/Targets/X86.h
+++ b/clang/lib/Basic/Targets/X86.h
@@ -314,7 +314,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
     // Allow 32-bit only CPUs regardless of 64-bit mode unlike isValidCPUName.
     // NOTE: gcc rejects 32-bit mtune CPUs in 64-bit mode. But being lenient
     // since mtune was ignored by clang for so long.
-    return llvm::X86::parseArchX86(Name) != llvm::X86::CK_None;
+    return llvm::X86::parseTuneCPU(Name) != llvm::X86::CK_None;
   }
 
   void fillValidCPUList(SmallVectorImpl<StringRef> &Values) const override;
diff --git a/clang/test/CodeGen/attr-target-x86.c b/clang/test/CodeGen/attr-target-x86.c
index 738b65b1113104..99cca0b4d9dd52 100644
--- a/clang/test/CodeGen/attr-target-x86.c
+++ b/clang/test/CodeGen/attr-target-x86.c
@@ -32,6 +32,10 @@ int __attribute__((target("arch=lakemont,mmx"))) use_before_def(void) {
 
 int __attribute__((target("tune=sandybridge"))) walrus(int a) { return 4; }
 
+void __attribute__((target("arch=x86-64-v2"))) x86_64_v2() {}
+void __attribute__((target("arch=x86-64-v3"))) x86_64_v3() {}
+void __attribute__((target("arch=x86-64-v4"))) x86_64_v4() {}
+
 // Check that we emit the additional subtarget and cpu features for foo and not for baz or bar.
 // CHECK: baz{{.*}} #0
 // CHECK: foo{{.*}} #1
@@ -59,3 +63,10 @@ int __attribute__((target("tune=sandybridge"))) walrus(int a) { return 4; }
 // CHECK: #7 = {{.*}}"target-cpu"="lakemont" "target-features"="+cx8,+mmx"
 // CHECK-NOT: tune-cpu
 // CHECK: #8 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87" "tune-cpu"="sandybridge"
+
+// CHECK: "target-cpu"="x86-64-v2"
+// CHECK-SAME: "target-features"="+cx16,+cx8,+fxsr,+mmx,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87"
+// CHECK: "target-cpu"="x86-64-v3"
+// CHECK-SAME: "target-features"="+avx,+avx2,+bmi,+bmi2,+cx16,+cx8,+f16c,+fma,+fxsr,+lzcnt,+mmx,+movbe,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave"
+// CHECK: "target-cpu"="x86-64-v4"
+// CHECK-SAME: "target-features"="+avx,+avx2,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl,+bmi,+bmi2,+cx16,+cx8,+f16c,+fma,+fxsr,+lzcnt,+mmx,+movbe,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave"
diff --git a/clang/test/Driver/x86-march.c b/clang/test/Driver/x86-march.c
index 57882a5bf637fe..26ffc161c27361 100644
--- a/clang/test/Driver/x86-march.c
+++ b/clang/test/Driver/x86-march.c
@@ -175,3 +175,12 @@
 // RUN: %clang -target x86_64-unknown-unknown -c -### %s -march=znver2 2>&1 \
 // RUN:   | FileCheck %s -check-prefix=znver2
 // znver2: "-target-cpu" "znver2"
+
+// RUN: %clang -target x86_64 -c -### %s -march=x86-64 2>&1 | FileCheck %s --check-prefix=x86-64
+// x86-64: "-target-cpu" "x86-64"
+// RUN: %clang -target x86_64 -c -### %s -march=x86-64-v2 2>&1 | FileCheck %s --check-prefix=x86-64-v2
+// x86-64-v2: "-target-cpu" "x86-64-v2"
+// RUN: %clang -target x86_64 -c -### %s -march=x86-64-v3 2>&1 | FileCheck %s --check-prefix=x86-64-v3
+// x86-64-v3: "-target-cpu" "x86-64-v3"
+// RUN: %clang -target x86_64 -c -### %s -march=x86-64-v4 2>&1 | FileCheck %s --check-prefix=x86-64-v4
+// x86-64-v4: "-target-cpu" "x86-64-v4"
diff --git a/clang/test/Driver/x86-mtune.c b/clang/test/Driver/x86-mtune.c
index a313412b6ab256..9ab4bf10df8ebc 100644
--- a/clang/test/Driver/x86-mtune.c
+++ b/clang/test/Driver/x86-mtune.c
@@ -40,3 +40,8 @@
 // RUN:   | FileCheck %s -check-prefix=marchmtune
 // marchmtune: "-target-cpu" "core2"
 // mmarchmtune: "-tune-cpu" "nehalem"
+
+// RUN: not %clang %s -target x86_64 -E -mtune=x86-64-v2 2>&1 | FileCheck %s --check-prefix=INVALID
+// RUN: not %clang %s -target x86_64 -E -mtune=x86-64-v3 2>&1 | FileCheck %s --check-prefix=INVALID
+// RUN: not %clang %s -target x86_64 -E -mtune=x86-64-v4 2>&1 | FileCheck %s --check-prefix=INVALID
+// INVALID: error: unknown target CPU '{{.*}}'
diff --git a/clang/test/Misc/target-invalid-cpu-note.c b/clang/test/Misc/target-invalid-cpu-note.c
index 4f5dbac133cb8f..c35e6c2c8e6256 100644
--- a/clang/test/Misc/target-invalid-cpu-note.c
+++ b/clang/test/Misc/target-invalid-cpu-note.c
@@ -25,7 +25,7 @@
 // X86-SAME: athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64,
 // X86-SAME: athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10,
 // X86-SAME: barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2,
-// X86-SAME: x86-64, geode
+// X86-SAME: x86-64, x86-64-v2, x86-64-v3, x86-64-v4, geode{{$}}
 
 // RUN: not %clang_cc1 -triple x86_64--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix X86_64
 // X86_64: error: unknown target CPU 'not-a-cpu'
@@ -35,7 +35,8 @@
 // X86_64-SAME: core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake,
 // X86_64-SAME: icelake-client, icelake-server, tigerlake, sapphirerapids, knl, knm, k8, athlon64, athlon-fx, opteron, k8-sse3,
 // X86_64-SAME: athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1,
-// X86_64-SAME: btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, x86-64
+// X86_64-SAME: btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2,
+// X86_64-SAME: x86-64, x86-64-v2, x86-64-v3, x86-64-v4{{$}}
 
 // RUN: not %clang_cc1 -triple i386--- -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE_X86
 // TUNE_X86: error: unknown target CPU 'not-a-cpu'
@@ -49,7 +50,7 @@
 // TUNE_X86-SAME: athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64,
 // TUNE_X86-SAME: athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10,
 // TUNE_X86-SAME: barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2,
-// TUNE_X86-SAME: x86-64, geode
+// TUNE_X86-SAME: x86-64, geode{{$}}
 
 // RUN: not %clang_cc1 -triple x86_64--- -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE_X86_64
 // TUNE_X86_64: error: unknown target CPU 'not-a-cpu'
@@ -63,7 +64,7 @@
 // TUNE_X86_64-SAME: athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64,
 // TUNE_X86_64-SAME: athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10,
 // TUNE_X86_64-SAME: barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2,
-// TUNE_X86_64-SAME: x86-64, geode
+// TUNE_X86_64-SAME: x86-64, geode{{$}}
 
 // RUN: not %clang_cc1 -triple nvptx--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix NVPTX
 // NVPTX: error: unknown target CPU 'not-a-cpu'
diff --git a/clang/test/Preprocessor/predefined-arch-macros-x86.c b/clang/test/Preprocessor/predefined-arch-macros-x86.c
new file mode 100644
index 00000000000000..37b7c612b49196
--- /dev/null
+++ b/clang/test/Preprocessor/predefined-arch-macros-x86.c
@@ -0,0 +1,54 @@
+// RUN: %clang -target x86_64 -march=x86-64 -E -dM %s > %tv1
+// RUN: FileCheck %s --check-prefix=X86_64_V1 < %tv1
+
+// X86_64_V1: #define __MMX__ 1
+// X86_64_V1: #define __SSE2_MATH__ 1
+// X86_64_V1: #define __SSE2__ 1
+// X86_64_V1: #define __SSE_MATH__ 1
+// X86_64_V1: #define __SSE__ 1
+// X86_64_V1: #define __amd64 1
+// X86_64_V1: #define __amd64__ 1
+// X86_64_V1: #define __k8 1
+// X86_64_V1: #define __k8__ 1
+// X86_64_V1: #define __x86_64 1
+// X86_64_V1: #define __x86_64__ 1
+
+// RUN: %clang -target x86_64 -march=x86-64-v2 -E -dM %s > %tv2
+// RUN: diff %tv1 %tv2 > %t.txt || true
+// RUN: FileCheck %s --check-prefix=X86_64_V2 < %t.txt
+
+/// v2 is close to Nehalem.
+// X86_64_V2:      #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_16 1
+// X86_64_V2:      #define __LAHF_SAHF__ 1
+// X86_64_V2:      #define __POPCNT__ 1
+// X86_64_V2:      #define __SSE3__ 1
+// X86_64_V2-NEXT: #define __SSE4_1__ 1
+// X86_64_V2-NEXT: #define __SSE4_2__ 1
+// X86_64_V2:      #define __SSSE3__ 1
+
+/// v3 is close to Haswell.
+// RUN: %clang -target x86_64 -march=x86-64-v3 -E -dM %s > %tv3
+// RUN: diff %tv2 %tv3 > %t.txt || true
+// RUN: FileCheck %s --check-prefix=X86_64_V3 < %t.txt
+
+// X86_64_V3:      #define __AVX2__ 1
+// X86_64_V3-NEXT: #define __AVX__ 1
+// X86_64_V3:      #define __BMI2__ 1
+// X86_64_V3-NEXT: #define __BMI__ 1
+// X86_64_V3:      #define __F16C__ 1
+// X86_64_V3:      #define __FMA__ 1
+// X86_64_V3:      #define __LZCNT__ 1
+// X86_64_V3:      #define __MOVBE__ 1
+// X86_64_V3:      #define __XSAVE__ 1
+
+/// v4 is close to the AVX-512 level implemented by Xeon Scalable Processors.
+// RUN: %clang -target x86_64 -march=x86-64-v4 -E -dM %s > %tv4
+// RUN: diff %tv3 %tv4 > %t.txt || true
+// RUN: FileCheck %s --check-prefix=X86_64_V4 < %t.txt
+
+// X86_64_V4:      #define __AVX512BW__ 1
+// X86_64_V4-NEXT: #define __AVX512CD__ 1
+// X86_64_V4-NEXT: #define __AVX512DQ__ 1
+// X86_64_V4-NEXT: #define __AVX512F__ 1
+// X86_64_V4-NEXT: #define __AVX512VL__ 1
+// X86_64_V4-NOT:  #define __AVX512{{.*}}
diff --git a/clang/test/Preprocessor/predefined-arch-macros.c b/clang/test/Preprocessor/predefined-arch-macros.c
index 7993c587efcd10..820a242955c2dc 100644
--- a/clang/test/Preprocessor/predefined-arch-macros.c
+++ b/clang/test/Preprocessor/predefined-arch-macros.c
@@ -2263,21 +2263,6 @@
 // CHECK_X86_64_M32: #define __k8__ 1
 // CHECK_X86_64_M32: #define i386 1
 
-// RUN: %clang -march=x86-64 -m64 -E -dM %s -o - 2>&1 \
-// RUN:     -target i386-unknown-linux \
-// RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_X86_64_M64
-// CHECK_X86_64_M64: #define __MMX__ 1
-// CHECK_X86_64_M64: #define __SSE2_MATH__ 1
-// CHECK_X86_64_M64: #define __SSE2__ 1
-// CHECK_X86_64_M64: #define __SSE_MATH__ 1
-// CHECK_X86_64_M64: #define __SSE__ 1
-// CHECK_X86_64_M64: #define __amd64 1
-// CHECK_X86_64_M64: #define __amd64__ 1
-// CHECK_X86_64_M64: #define __k8 1
-// CHECK_X86_64_M64: #define __k8__ 1
-// CHECK_X86_64_M64: #define __x86_64 1
-// CHECK_X86_64_M64: #define __x86_64__ 1
-
 // RUN: %clang -march=k8 -m32 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_K8_M32
diff --git a/clang/test/Sema/builtin-cpu-supports.c b/clang/test/Sema/builtin-cpu-supports.c
index 026b5b7e38e9ae..05b58314f0932c 100644
--- a/clang/test/Sema/builtin-cpu-supports.c
+++ b/clang/test/Sema/builtin-cpu-supports.c
@@ -15,6 +15,11 @@ int main() {
 
   if (__builtin_cpu_is("int")) // expected-error {{invalid cpu name for builtin}}
     a("intel");
+
+  (void)__builtin_cpu_is("x86-64");    // expected-error {{invalid cpu name for builtin}}
+  (void)__builtin_cpu_is("x86-64-v2"); // expected-error {{invalid cpu name for builtin}}
+  (void)__builtin_cpu_is("x86-64-v3"); // expected-error {{invalid cpu name for builtin}}
+  (void)__builtin_cpu_is("x86-64-v4"); // expected-error {{invalid cpu name for builtin}}
 #else
   if (__builtin_cpu_supports("vsx")) // expected-error {{use of unknown builtin}}
     a("vsx");
diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index b81547dca6df56..d75245e7055ddd 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -105,7 +105,7 @@ During this release ...
 * The 'mpx' feature was removed from the backend. It had been removed from clang
   frontend in 10.0. Mention of the 'mpx' feature in an IR file will print a
   message to stderr, but IR should still compile.
-* Support for -march=sapphirerapids was added.
+* Support for ``-march=sapphirerapids`` and ``-march=x86-64-v[234]`` has been added.
 * The assembler now has support for {disp32} and {disp8} pseudo prefixes for
   controlling displacement size for memory operands and jump displacements. The
   assembler also supports the .d32 and .d8 mnemonic suffixes to do the same.
diff --git a/llvm/include/llvm/Support/X86TargetParser.h b/llvm/include/llvm/Support/X86TargetParser.h
index d97f620419e1e8..e66a074aa0000f 100644
--- a/llvm/include/llvm/Support/X86TargetParser.h
+++ b/llvm/include/llvm/Support/X86TargetParser.h
@@ -121,17 +121,24 @@ enum CPUKind {
   CK_ZNVER1,
   CK_ZNVER2,
   CK_x86_64,
+  CK_x86_64_v2,
+  CK_x86_64_v3,
+  CK_x86_64_v4,
   CK_Geode,
 };
 
 /// Parse \p CPU string into a CPUKind. Will only accept 64-bit capable CPUs if
 /// \p Only64Bit is true.
 CPUKind parseArchX86(StringRef CPU, bool Only64Bit = false);
+CPUKind parseTuneCPU(StringRef CPU, bool Only64Bit = false);
 
 /// Provide a list of valid CPU names. If \p Only64Bit is true, the list will
 /// only contain 64-bit capable CPUs.
 void fillValidCPUArchList(SmallVectorImpl<StringRef> &Values,
                           bool Only64Bit = false);
+/// Provide a list of valid -mtune names.
+void fillValidTuneCPUList(SmallVectorImpl<StringRef> &Values,
+                          bool Only64Bit = false);
 
 /// Get the key feature prioritizing target multiversioning.
 ProcessorFeatures getKeyFeature(CPUKind Kind);
diff --git a/llvm/lib/Support/X86TargetParser.cpp b/llvm/lib/Support/X86TargetParser.cpp
index 1e8569bcd540a7..35582a9b277cc6 100644
--- a/llvm/lib/Support/X86TargetParser.cpp
+++ b/llvm/lib/Support/X86TargetParser.cpp
@@ -137,6 +137,15 @@ constexpr FeatureBitset FeaturesNocona =
 
 // Basic 64-bit capable CPU.
 constexpr FeatureBitset FeaturesX86_64 = FeaturesPentium4 | Feature64BIT;
+constexpr FeatureBitset FeaturesX86_64_V2 = FeaturesX86_64 | FeatureSAHF |
+                                            FeaturePOPCNT | FeatureSSE4_2 |
+                                            FeatureCMPXCHG16B;
+constexpr FeatureBitset FeaturesX86_64_V3 =
+    FeaturesX86_64_V2 | FeatureAVX2 | FeatureBMI | FeatureBMI2 | FeatureF16C |
+    FeatureFMA | FeatureLZCNT | FeatureMOVBE | FeatureXSAVE;
+constexpr FeatureBitset FeaturesX86_64_V4 = FeaturesX86_64_V3 |
+                                            FeatureAVX512BW | FeatureAVX512CD |
+                                            FeatureAVX512DQ | FeatureAVX512VL;
 
 // Intel Core CPUs
 constexpr FeatureBitset FeaturesCore2 =
@@ -383,10 +392,15 @@ constexpr ProcInfo Processors[] = {
   { {"znver2"}, CK_ZNVER2, FEATURE_AVX2, FeaturesZNVER2 },
   // Generic 64-bit processor.
   { {"x86-64"}, CK_x86_64, ~0U, FeaturesX86_64 },
+  { {"x86-64-v2"}, CK_x86_64_v2, ~0U, FeaturesX86_64_V2 },
+  { {"x86-64-v3"}, CK_x86_64_v3, ~0U, FeaturesX86_64_V3 },
+  { {"x86-64-v4"}, CK_x86_64_v4, ~0U, FeaturesX86_64_V4 },
   // Geode processors.
   { {"geode"}, CK_Geode, ~0U, FeaturesGeode },
 };
 
+constexpr const char *NoTuneList[] = {"x86-64-v2", "x86-64-v3", "x86-64-v4"};
+
 X86::CPUKind llvm::X86::parseArchX86(StringRef CPU, bool Only64Bit) {
   for (const auto &P : Processors)
     if (P.Name == CPU && (P.Features[FEATURE_64BIT] || !Only64Bit))
@@ -395,6 +409,12 @@ X86::CPUKind llvm::X86::parseArchX86(StringRef CPU, bool Only64Bit) {
   return CK_None;
 }
 
+X86::CPUKind llvm::X86::parseTuneCPU(StringRef CPU, bool Only64Bit) {
+  if (llvm::is_contained(NoTuneList, CPU))
+    return CK_None;
+  return parseArchX86(CPU, Only64Bit);
+}
+
 void llvm::X86::fillValidCPUArchList(SmallVectorImpl<StringRef> &Values,
                                      bool Only64Bit) {
   for (const auto &P : Processors)
@@ -402,6 +422,14 @@ void llvm::X86::fillValidCPUArchList(SmallVectorImpl<StringRef> &Values,
       Values.emplace_back(P.Name);
 }
 
+void llvm::X86::fillValidTuneCPUList(SmallVectorImpl<StringRef> &Values,
+                                     bool Only64Bit) {
+  for (const ProcInfo &P : Processors)
+    if (!P.Name.empty() && (P.Features[FEATURE_64BIT] || !Only64Bit) &&
+        !llvm::is_contained(NoTuneList, P.Name))
+      Values.emplace_back(P.Name);
+}
+
 ProcessorFeatures llvm::X86::getKeyFeature(X86::CPUKind Kind) {
   // FIXME: Can we avoid a linear search here? The table might be sorted by
   // CPUKind so we could binary search?
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index e5d47a0ac3255b..5419c35502c6b4 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -558,18 +558,27 @@ include "X86SchedSkylakeServer.td"
 //===----------------------------------------------------------------------===//
 
 def ProcessorFeatures {
+  // x86-64 and x86-64-v[234]
+  list<SubtargetFeature> X86_64V1Features = [
+    FeatureX87, FeatureCMPXCHG8B, FeatureCMOV, FeatureMMX, FeatureSSE2,
+    FeatureFXSR, FeatureNOPL, Feature64Bit
+  ];
+  list<SubtargetFeature> X86_64V2Features = !listconcat(
+      X86_64V1Features,
+      [FeatureCMPXCHG16B, FeatureLAHFSAHF, FeaturePOPCNT, FeatureSSE42]);
+  list<SubtargetFeature> X86_64V3Features = !listconcat(X86_64V2Features, [
+    FeatureAVX2, FeatureBMI, FeatureBMI2, FeatureF16C, FeatureFMA, FeatureLZCNT,
+    FeatureMOVBE, FeatureXSAVE
+  ]);
+  list<SubtargetFeature> X86_64V4Features = !listconcat(X86_64V3Features, [
+    FeatureBWI,
+    FeatureCDI,
+    FeatureDQI,
+    FeatureVLX,
+  ]);
+
   // Nehalem
-  list<SubtargetFeature> NHMFeatures = [FeatureX87,
-                                        FeatureCMPXCHG8B,
-                                        FeatureCMOV,
-                                        FeatureMMX,
-                                        FeatureSSE42,
-                                        FeatureFXSR,
-                                        FeatureNOPL,
-                                        Feature64Bit,
-                                        FeatureCMPXCHG16B,
-                                        FeaturePOPCNT,
-                                        FeatureLAHFSAHF];
+  list<SubtargetFeature> NHMFeatures = X86_64V2Features;
   list<SubtargetFeature> NHMTuning = [FeatureMacroFusion,
                                       FeatureInsertVZEROUPPER];
 
@@ -1350,16 +1359,7 @@ def : Proc<"c3-2",            [FeatureX87, FeatureCMPXCHG8B, FeatureMMX,
 // covers a huge swath of x86 processors. If there are specific scheduling
 // knobs which need to be tuned differently for AMD chips, we might consider
 // forming a common base for them.
-def : ProcModel<"x86-64", SandyBridgeModel, [
-  FeatureX87,
-  FeatureCMPXCHG8B,
-  FeatureCMOV,
-  FeatureMMX,
-  FeatureSSE2,
-  FeatureFXSR,
-  FeatureNOPL,
-  Feature64Bit,
-],
+def : ProcModel<"x86-64", SandyBridgeModel, ProcessorFeatures.X86_64V1Features,
 [
   FeatureSlow3OpsLEA,
   FeatureSlowDivide64,
@@ -1368,6 +1368,16 @@ def : ProcModel<"x86-64", SandyBridgeModel, [
   FeatureInsertVZEROUPPER
 ]>;
 
+// x86-64 micro-architecture levels.
+def : ProcModel<"x86-64-v2", SandyBridgeModel, ProcessorFeatures.X86_64V2Features,
+                ProcessorFeatures.SNBTuning>;
+// Close to Haswell.
+def : ProcModel<"x86-64-v3", HaswellModel, ProcessorFeatures.X86_64V3Features,
+                ProcessorFeatures.HSWTuning>;
+// Close to the AVX-512 level implemented by Xeon Scalable Processors.
+def : ProcModel<"x86-64-v4", HaswellModel, ProcessorFeatures.X86_64V4Features,
+                ProcessorFeatures.SKXTuning>;
+
 //===----------------------------------------------------------------------===//
 // Calling Conventions
 //===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/X86/cpus-other.ll b/llvm/test/CodeGen/X86/cpus-other.ll
index 89231bf7665a7d..0b97be4b69990c 100644
--- a/llvm/test/CodeGen/X86/cpus-other.ll
+++ b/llvm/test/CodeGen/X86/cpus-other.ll
@@ -16,6 +16,11 @@
 ; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=c3 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=c3-2 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 
+;; x86-64 micro-architecture levels.
+; RUN: llc %s -filetype=null -mtriple=x86_64 -mcpu=x86-64-v2
+; RUN: llc %s -filetype=null -mtriple=x86_64 -mcpu=x86-64-v3
+; RUN: llc %s -filetype=null -mtriple=x86_64 -mcpu=x86-64-v4
+
 define void @foo() {
   ret void
 }

From 039126c97d39fbd8ca6a82f103db8f2591e793cd Mon Sep 17 00:00:00 2001
From: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Date: Mon, 12 Oct 2020 14:23:26 -0300
Subject: [PATCH 018/123] [sanitizer] Disable fast_unwind_on_malloc as default
 for arm-linux-gnu

ARM thumb/thumb2 frame pointer is inconsistent on GCC and Clang [1]
and fast-unwider is also unreliable when mixing arm and thumb code [2].

The fast unwinder on ARM tries to probe and compare the frame-pointer
at different stack layout positions and it works reliable only on
systems where all the libraries were built in arm mode (either with
gcc or clang) or with clang in thmb mode (which uses the same stack
frame pointer layout in arm and thumb).

However when mixing objects built with different abi modes the
fast unwinder is still problematic as shown by the failures on the
AddressSanitizer.ThreadStackReuseTest. For these failures, the
malloc is called by the loader itself and since it has been built
with a thum enabled gcc, the stack frame is not correctly obtained
and the suppression rule is not applied (resulting in a leak warning).

The check for fast-unwinder-works is also changed: instead of checking
f it is explicit enabled in the compiler flags, it now checks if
compiler defined thumb pre-processor.

This should fix BZ#44158.

[1] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92172
[2] https://bugs.llvm.org/show_bug.cgi?id=44158

Reviewed By: eugenis

Differential Revision: https://reviews.llvm.org/D88958
---
 compiler-rt/CMakeLists.txt                              | 1 +
 compiler-rt/lib/sanitizer_common/sanitizer_flags.inc    | 7 ++++++-
 compiler-rt/test/asan/TestCases/Linux/clang_gcc_abi.cpp | 8 ++++----
 compiler-rt/test/asan/lit.cfg.py                        | 2 +-
 compiler-rt/test/asan/lit.site.cfg.py.in                | 1 +
 5 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/compiler-rt/CMakeLists.txt b/compiler-rt/CMakeLists.txt
index c59b4456cd4549..45c52f804f4854 100644
--- a/compiler-rt/CMakeLists.txt
+++ b/compiler-rt/CMakeLists.txt
@@ -114,6 +114,7 @@ construct_compiler_rt_default_triple()
 if ("${COMPILER_RT_DEFAULT_TARGET_TRIPLE}" MATCHES ".*hf$")
   if (${COMPILER_RT_DEFAULT_TARGET_ARCH} MATCHES "^arm")
     set(COMPILER_RT_DEFAULT_TARGET_ARCH "armhf")
+    CHECK_SYMBOL_EXISTS (__thumb__ "" COMPILER_RT_ARM_THUMB)
   endif()
 endif()
 if ("${COMPILER_RT_DEFAULT_TARGET_TRIPLE}" MATCHES ".*android.*")
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_flags.inc b/compiler-rt/lib/sanitizer_common/sanitizer_flags.inc
index b83ac03408d88b..5964c7020d3145 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_flags.inc
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_flags.inc
@@ -40,7 +40,12 @@ COMMON_FLAG(bool, fast_unwind_on_check, false,
 COMMON_FLAG(bool, fast_unwind_on_fatal, false,
             "If available, use the fast frame-pointer-based unwinder on fatal "
             "errors.")
-COMMON_FLAG(bool, fast_unwind_on_malloc, true,
+// ARM thumb/thumb2 frame pointer is inconsistent on GCC and Clang [1]
+// and fast-unwider is also unreliable with mixing arm and thumb code [2].
+// [1] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92172
+// [2] https://bugs.llvm.org/show_bug.cgi?id=44158
+COMMON_FLAG(bool, fast_unwind_on_malloc,
+	    !(SANITIZER_LINUX && !SANITIZER_ANDROID && SANITIZER_ARM),
             "If available, use the fast frame-pointer-based unwinder on "
             "malloc/free.")
 COMMON_FLAG(bool, handle_ioctl, false, "Intercept and handle ioctl requests.")
diff --git a/compiler-rt/test/asan/TestCases/Linux/clang_gcc_abi.cpp b/compiler-rt/test/asan/TestCases/Linux/clang_gcc_abi.cpp
index 0c3fa0aed7d046..314e7d35b460b2 100644
--- a/compiler-rt/test/asan/TestCases/Linux/clang_gcc_abi.cpp
+++ b/compiler-rt/test/asan/TestCases/Linux/clang_gcc_abi.cpp
@@ -1,7 +1,7 @@
-// RUN: %clangxx_asan -O0 -x c %s -o %t && not %run %t 2>&1 | FileCheck %s
-// RUN: %clangxx_asan -O1 -x c %s -o %t && not %run %t 2>&1 | FileCheck %s
-// RUN: %clangxx_asan -O2 -x c %s -o %t && not %run %t 2>&1 | FileCheck %s
-// RUN: %clangxx_asan -O3 -x c %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_asan -O0 -x c %s -o %t && not %env_asan_opts=fast_unwind_on_malloc=1 %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_asan -O1 -x c %s -o %t && not %env_asan_opts=fast_unwind_on_malloc=1 %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_asan -O2 -x c %s -o %t && not %env_asan_opts=fast_unwind_on_malloc=1 %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_asan -O3 -x c %s -o %t && not %env_asan_opts=fast_unwind_on_malloc=1 %run %t 2>&1 | FileCheck %s
 
 // REQUIRES: (arm-target-arch || armhf-target-arch), fast-unwinder-works
 
diff --git a/compiler-rt/test/asan/lit.cfg.py b/compiler-rt/test/asan/lit.cfg.py
index f9c491846a4299..4cfd0b68b7619d 100644
--- a/compiler-rt/test/asan/lit.cfg.py
+++ b/compiler-rt/test/asan/lit.cfg.py
@@ -205,7 +205,7 @@ def build_invocation(compile_flags):
 config.available_features.add("asan-" + config.bits + "-bits")
 
 # Fast unwinder doesn't work with Thumb
-if re.search('mthumb', config.target_cflags) is None:
+if not config.arm_thumb:
   config.available_features.add('fast-unwinder-works')
 
 # Turn on leak detection on 64-bit Linux.
diff --git a/compiler-rt/test/asan/lit.site.cfg.py.in b/compiler-rt/test/asan/lit.site.cfg.py.in
index 81cebde2029ed7..afecfafeb99f94 100644
--- a/compiler-rt/test/asan/lit.site.cfg.py.in
+++ b/compiler-rt/test/asan/lit.site.cfg.py.in
@@ -5,6 +5,7 @@ config.name_suffix = "@ASAN_TEST_CONFIG_SUFFIX@"
 config.target_cflags = "@ASAN_TEST_TARGET_CFLAGS@"
 config.clang = "@ASAN_TEST_TARGET_CC@"
 config.bits = "@ASAN_TEST_BITS@"
+config.arm_thumb = "@COMPILER_RT_ARM_THUMB@"
 config.apple_platform = "@ASAN_TEST_APPLE_PLATFORM@"
 config.apple_platform_min_deployment_target_flag = "@ASAN_TEST_MIN_DEPLOYMENT_TARGET_FLAG@"
 config.asan_dynamic = @ASAN_TEST_DYNAMIC@

From 1b962fdd5f365a10684d9f70d703ae101c20d37a Mon Sep 17 00:00:00 2001
From: Nathan Ridge <zeratul976@hotmail.com>
Date: Tue, 29 Sep 2020 03:19:59 -0400
Subject: [PATCH 019/123] [clangd] Heuristic resolution for dependent type and
 template names

Fixes https://github.com/clangd/clangd/issues/543

Differential Revision: https://reviews.llvm.org/D88469
---
 clang-tools-extra/clangd/FindTarget.cpp       | 66 +++++++++++++++----
 .../clangd/unittests/FindTargetTests.cpp      | 48 ++++++++++++++
 2 files changed, 103 insertions(+), 11 deletions(-)

diff --git a/clang-tools-extra/clangd/FindTarget.cpp b/clang-tools-extra/clangd/FindTarget.cpp
index 4cf62d3d1539c8..19ffdbb7c7eafb 100644
--- a/clang-tools-extra/clangd/FindTarget.cpp
+++ b/clang-tools-extra/clangd/FindTarget.cpp
@@ -125,6 +125,10 @@ const auto StaticFilter = [](const NamedDecl *D) {
   return !D->isCXXInstanceMember();
 };
 const auto ValueFilter = [](const NamedDecl *D) { return isa<ValueDecl>(D); };
+const auto TypeFilter = [](const NamedDecl *D) { return isa<TypeDecl>(D); };
+const auto TemplateFilter = [](const NamedDecl *D) {
+  return isa<TemplateDecl>(D);
+};
 
 // Given the type T of a dependent expression that appears of the LHS of a
 // "->", heuristically find a corresponding pointee type in whose scope we
@@ -219,19 +223,45 @@ std::vector<const NamedDecl *> resolveExprToDecls(const Expr *E) {
   return {};
 }
 
-// Try to heuristically resolve the type of a possibly-dependent expression `E`.
-const Type *resolveExprToType(const Expr *E) {
-  std::vector<const NamedDecl *> Decls = resolveExprToDecls(E);
+const Type *resolveDeclsToType(const std::vector<const NamedDecl *> &Decls) {
   if (Decls.size() != 1) // Names an overload set -- just bail.
     return nullptr;
   if (const auto *TD = dyn_cast<TypeDecl>(Decls[0])) {
     return TD->getTypeForDecl();
-  } else if (const auto *VD = dyn_cast<ValueDecl>(Decls[0])) {
+  }
+  if (const auto *VD = dyn_cast<ValueDecl>(Decls[0])) {
     return VD->getType().getTypePtrOrNull();
   }
   return nullptr;
 }
 
+// Try to heuristically resolve the type of a possibly-dependent expression `E`.
+const Type *resolveExprToType(const Expr *E) {
+  return resolveDeclsToType(resolveExprToDecls(E));
+}
+
+// Try to heuristically resolve the type of a possibly-dependent nested name
+// specifier.
+const Type *resolveNestedNameSpecifierToType(const NestedNameSpecifier *NNS) {
+  if (!NNS)
+    return nullptr;
+
+  switch (NNS->getKind()) {
+  case NestedNameSpecifier::TypeSpec:
+  case NestedNameSpecifier::TypeSpecWithTemplate:
+    return NNS->getAsType();
+  case NestedNameSpecifier::Identifier: {
+    return resolveDeclsToType(getMembersReferencedViaDependentName(
+        resolveNestedNameSpecifierToType(NNS->getPrefix()),
+        [&](const ASTContext &) { return NNS->getAsIdentifier(); },
+        TypeFilter));
+  }
+  default:
+    break;
+  }
+  return nullptr;
+}
+
 const NamedDecl *getTemplatePattern(const NamedDecl *D) {
   if (const CXXRecordDecl *CRD = dyn_cast<CXXRecordDecl>(D)) {
     if (const auto *Result = CRD->getTemplateInstantiationPattern())
@@ -291,10 +321,8 @@ const NamedDecl *getTemplatePattern(const NamedDecl *D) {
 //    and both are lossy. We must know upfront what the caller ultimately wants.
 //
 // FIXME: improve common dependent scope using name lookup in primary templates.
-// We currently handle DependentScopeDeclRefExpr and
-// CXXDependentScopeMemberExpr, but some other constructs remain to be handled:
-//  - DependentTemplateSpecializationType,
-//  - DependentNameType
+// We currently handle several dependent constructs, but some others remain to
+// be handled:
 //  - UnresolvedUsingTypenameDecl
 struct TargetFinder {
   using RelSet = DeclRelationSet;
@@ -536,6 +564,23 @@ struct TargetFinder {
         if (auto *TD = DTST->getTemplateName().getAsTemplateDecl())
           Outer.add(TD->getTemplatedDecl(), Flags | Rel::TemplatePattern);
       }
+      void VisitDependentNameType(const DependentNameType *DNT) {
+        for (const NamedDecl *ND : getMembersReferencedViaDependentName(
+                 resolveNestedNameSpecifierToType(DNT->getQualifier()),
+                 [DNT](ASTContext &) { return DNT->getIdentifier(); },
+                 TypeFilter)) {
+          Outer.add(ND, Flags);
+        }
+      }
+      void VisitDependentTemplateSpecializationType(
+          const DependentTemplateSpecializationType *DTST) {
+        for (const NamedDecl *ND : getMembersReferencedViaDependentName(
+                 resolveNestedNameSpecifierToType(DTST->getQualifier()),
+                 [DTST](ASTContext &) { return DTST->getIdentifier(); },
+                 TemplateFilter)) {
+          Outer.add(ND, Flags);
+        }
+      }
       void VisitTypedefType(const TypedefType *TT) {
         Outer.add(TT->getDecl(), Flags);
       }
@@ -591,17 +636,16 @@ struct TargetFinder {
       return;
     debug(*NNS, Flags);
     switch (NNS->getKind()) {
-    case NestedNameSpecifier::Identifier:
-      return;
     case NestedNameSpecifier::Namespace:
       add(NNS->getAsNamespace(), Flags);
       return;
     case NestedNameSpecifier::NamespaceAlias:
       add(NNS->getAsNamespaceAlias(), Flags);
       return;
+    case NestedNameSpecifier::Identifier:
     case NestedNameSpecifier::TypeSpec:
     case NestedNameSpecifier::TypeSpecWithTemplate:
-      add(QualType(NNS->getAsType(), 0), Flags);
+      add(QualType(resolveNestedNameSpecifierToType(NNS), 0), Flags);
       return;
     case NestedNameSpecifier::Global:
       // This should be TUDecl, but we can't get a pointer to it!
diff --git a/clang-tools-extra/clangd/unittests/FindTargetTests.cpp b/clang-tools-extra/clangd/unittests/FindTargetTests.cpp
index e4f584bea01f82..1ea2aa34f289cd 100644
--- a/clang-tools-extra/clangd/unittests/FindTargetTests.cpp
+++ b/clang-tools-extra/clangd/unittests/FindTargetTests.cpp
@@ -728,6 +728,54 @@ TEST_F(TargetDeclTest, DependentExprs) {
                "template <typename T> T convert() const");
 }
 
+TEST_F(TargetDeclTest, DependentTypes) {
+  Flags = {"-fno-delayed-template-parsing"};
+
+  // Heuristic resolution of dependent type name
+  Code = R"cpp(
+        template <typename>
+        struct A { struct B {}; };
+
+        template <typename T>
+        void foo(typename A<T>::[[B]]);
+      )cpp";
+  EXPECT_DECLS("DependentNameTypeLoc", "struct B");
+
+  // Heuristic resolution of dependent type name which doesn't get a TypeLoc
+  Code = R"cpp(
+        template <typename>
+        struct A { struct B { struct C {}; }; };
+
+        template <typename T>
+        void foo(typename A<T>::[[B]]::C);
+      )cpp";
+  EXPECT_DECLS("NestedNameSpecifierLoc", "struct B");
+
+  // Heuristic resolution of dependent type name whose qualifier is also
+  // dependent
+  Code = R"cpp(
+        template <typename>
+        struct A { struct B { struct C {}; }; };
+
+        template <typename T>
+        void foo(typename A<T>::B::[[C]]);
+      )cpp";
+  EXPECT_DECLS("DependentNameTypeLoc", "struct C");
+
+  // Heuristic resolution of dependent template name
+  Code = R"cpp(
+        template <typename>
+        struct A {
+          template <typename> struct B {};
+        };
+
+        template <typename T>
+        void foo(typename A<T>::template [[B]]<int>);
+      )cpp";
+  EXPECT_DECLS("DependentTemplateSpecializationTypeLoc",
+               "template <typename> struct B");
+}
+
 TEST_F(TargetDeclTest, ObjC) {
   Flags = {"-xobjective-c"};
   Code = R"cpp(

From 4ff71362683b159903b3908f5c9f949d2fe7656d Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Mon, 12 Oct 2020 18:39:30 +0100
Subject: [PATCH 020/123] [InstCombine] FoldShiftByConstant - create
 Scalar/Vector constant with ConstantInt::get(). NFCI.

There's no need to create constant vector splats manually - missed this one in rG24dd0cd1edd5
---
 llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
index 9f52f08d3a4d8b..b28dc490cbde1f 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -776,11 +776,8 @@ Instruction *InstCombinerImpl::FoldShiftByConstant(Value *Op0, Constant *Op1,
           Value *X = Builder.CreateBinOp(Op0BO->getOpcode(), V1, YS,
                                          Op0BO->getOperand(0)->getName());
           unsigned Op1Val = Op1C->getLimitedValue(TypeBits);
-
           APInt Bits = APInt::getHighBitsSet(TypeBits, TypeBits - Op1Val);
-          Constant *Mask = ConstantInt::get(I.getContext(), Bits);
-          if (VectorType *VT = dyn_cast<VectorType>(X->getType()))
-            Mask = ConstantVector::getSplat(VT->getElementCount(), Mask);
+          Constant *Mask = ConstantInt::get(Ty, Bits);
           return BinaryOperator::CreateAnd(X, Mask);
         }
 

From f1800579666c80adf8bb2f992a639e5b8e3a389b Mon Sep 17 00:00:00 2001
From: Arthur Eubanks <aeubanks@google.com>
Date: Thu, 8 Oct 2020 13:14:43 -0700
Subject: [PATCH 021/123] [Inliner][NPM] Fix various tests under NPM

alloca-dbgdeclare-merge.ll:
alloca-merge-align.ll:
array_merge.ll:
 NPM inliner does not merge allocas

delete-call.ll:
 NPM inliner does not delete readonly calls

externally_available.ll:
 NPM inliner does not delete available_externally functions

inline-cold-callee.ll:
inline-hot-callee.ll:
 inline-hot-callee.ll has a comment saying it only applies to legacy PM,
 I assume same for inline-cold-callee.ll

devirtualize-2.ll:
inline-hot-callsite:
monster_scc.ll:
pr22285.ll:
 already has legacy and new PM RUN lines

inline-cold.ll:
 profile-summary required to see callee as cold

prof-update-sample.ll:
 profile-summary required to update branch_weights

Reviewed By: davidxl

Differential Revision: https://reviews.llvm.org/D89093
---
 llvm/test/Transforms/Inline/alloca-dbgdeclare-merge.ll | 2 +-
 llvm/test/Transforms/Inline/alloca-merge-align.ll      | 2 +-
 llvm/test/Transforms/Inline/array_merge.ll             | 2 +-
 llvm/test/Transforms/Inline/delete-call.ll             | 4 ++--
 llvm/test/Transforms/Inline/devirtualize-2.ll          | 2 +-
 llvm/test/Transforms/Inline/devirtualize-3.ll          | 2 +-
 llvm/test/Transforms/Inline/externally_available.ll    | 2 +-
 llvm/test/Transforms/Inline/inline-cold-callee.ll      | 2 +-
 llvm/test/Transforms/Inline/inline-cold.ll             | 9 ++++++---
 llvm/test/Transforms/Inline/inline-hot-callee.ll       | 2 +-
 llvm/test/Transforms/Inline/inline-hot-callsite.ll     | 2 +-
 llvm/test/Transforms/Inline/monster_scc.ll             | 2 +-
 llvm/test/Transforms/Inline/pr22285.ll                 | 2 +-
 llvm/test/Transforms/Inline/prof-update-sample.ll      | 3 ++-
 14 files changed, 21 insertions(+), 17 deletions(-)

diff --git a/llvm/test/Transforms/Inline/alloca-dbgdeclare-merge.ll b/llvm/test/Transforms/Inline/alloca-dbgdeclare-merge.ll
index d0a2bf3b1c9f84..d31ab7355f99a2 100644
--- a/llvm/test/Transforms/Inline/alloca-dbgdeclare-merge.ll
+++ b/llvm/test/Transforms/Inline/alloca-dbgdeclare-merge.ll
@@ -18,7 +18,7 @@
 ;  g();
 ;}
 ;
-; RUN: opt -always-inline -S < %s | FileCheck %s
+; RUN: opt -always-inline -S -enable-new-pm=0 < %s | FileCheck %s
 
 ; FIXME: Why does the dbg.declare for "aaa" occur later in @h than the
 ; dbg.declare for "bbb"? I'd expect the opposite, given @f is inlined earlier.
diff --git a/llvm/test/Transforms/Inline/alloca-merge-align.ll b/llvm/test/Transforms/Inline/alloca-merge-align.ll
index 70b94f7b69a090..e3b819d561f91d 100644
--- a/llvm/test/Transforms/Inline/alloca-merge-align.ll
+++ b/llvm/test/Transforms/Inline/alloca-merge-align.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -inline -S | FileCheck %s
+; RUN: opt < %s -inline -S -enable-new-pm=0 | FileCheck %s
 
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
diff --git a/llvm/test/Transforms/Inline/array_merge.ll b/llvm/test/Transforms/Inline/array_merge.ll
index b2eafeb0456f07..8abd2989b3f724 100644
--- a/llvm/test/Transforms/Inline/array_merge.ll
+++ b/llvm/test/Transforms/Inline/array_merge.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -inline -S | FileCheck %s
+; RUN: opt < %s -inline -S -enable-new-pm=0 | FileCheck %s
 ; rdar://7173846
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin10.0"
diff --git a/llvm/test/Transforms/Inline/delete-call.ll b/llvm/test/Transforms/Inline/delete-call.ll
index 9d6140a3f78e5b..7338fe896d61ba 100644
--- a/llvm/test/Transforms/Inline/delete-call.ll
+++ b/llvm/test/Transforms/Inline/delete-call.ll
@@ -1,8 +1,8 @@
 ; REQUIRES: asserts
-; RUN: opt -S -inline -stats < %s 2>&1 | FileCheck %s
+; RUN: opt -S -inline -stats -enable-new-pm=0 < %s 2>&1 | FileCheck %s
 ; CHECK: Number of functions inlined
 
-; RUN: opt -S -inline -function-attrs -stats < %s 2>&1 | FileCheck -check-prefix=CHECK-FUNCTIONATTRS %s
+; RUN: opt -S -inline -function-attrs -stats -enable-new-pm=0 < %s 2>&1 | FileCheck -check-prefix=CHECK-FUNCTIONATTRS %s
 ; CHECK-FUNCTIONATTRS: Number of call sites deleted, not inlined
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
diff --git a/llvm/test/Transforms/Inline/devirtualize-2.ll b/llvm/test/Transforms/Inline/devirtualize-2.ll
index e2c1e7c01ae709..a0ebd5f360cdb2 100644
--- a/llvm/test/Transforms/Inline/devirtualize-2.ll
+++ b/llvm/test/Transforms/Inline/devirtualize-2.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -inline -S | FileCheck %s
+; RUN: opt < %s -inline -S -enable-new-pm=0 | FileCheck %s
 ; RUN: opt < %s -passes='cgscc(devirt<4>(inline))' -S | FileCheck %s
 ; PR4834
 
diff --git a/llvm/test/Transforms/Inline/devirtualize-3.ll b/llvm/test/Transforms/Inline/devirtualize-3.ll
index e7d1073c69b029..4165b2125dfc8c 100644
--- a/llvm/test/Transforms/Inline/devirtualize-3.ll
+++ b/llvm/test/Transforms/Inline/devirtualize-3.ll
@@ -1,4 +1,4 @@
-; RUN: opt -basic-aa -inline -S -sroa -gvn -instcombine < %s | FileCheck %s
+; RUN: opt -basic-aa -S -O2 < %s | FileCheck %s
 ; PR5009
 
 ; CHECK: define i32 @main() 
diff --git a/llvm/test/Transforms/Inline/externally_available.ll b/llvm/test/Transforms/Inline/externally_available.ll
index 0785dabf846831..adcafb2907277e 100644
--- a/llvm/test/Transforms/Inline/externally_available.ll
+++ b/llvm/test/Transforms/Inline/externally_available.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -inline -S | FileCheck %s
+; RUN: opt < %s -inline -S -enable-new-pm=0 | FileCheck %s
 
 define available_externally i32 @test_function() {
 ; CHECK-NOT: @test_function
diff --git a/llvm/test/Transforms/Inline/inline-cold-callee.ll b/llvm/test/Transforms/Inline/inline-cold-callee.ll
index 404c537b297f4e..9c8d03cabf64c0 100644
--- a/llvm/test/Transforms/Inline/inline-cold-callee.ll
+++ b/llvm/test/Transforms/Inline/inline-cold-callee.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -inline -inlinecold-threshold=0 -S | FileCheck %s
+; RUN: opt < %s -inline -inlinecold-threshold=0 -S -enable-new-pm=0 | FileCheck %s
 
 ; This tests that a cold callee gets the (lower) inlinecold-threshold even without
 ; Cold hint and does not get inlined because the cost exceeds the inlinecold-threshold.
diff --git a/llvm/test/Transforms/Inline/inline-cold.ll b/llvm/test/Transforms/Inline/inline-cold.ll
index e0e679ad4036df..eacc541f654518 100644
--- a/llvm/test/Transforms/Inline/inline-cold.ll
+++ b/llvm/test/Transforms/Inline/inline-cold.ll
@@ -1,13 +1,16 @@
-; RUN: opt < %s -inline -S -inlinecold-threshold=25 | FileCheck %s
+; RUN: opt < %s -inline -S -inlinecold-threshold=25 -enable-new-pm=0 | FileCheck %s
+; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -S -inlinecold-threshold=25 | FileCheck %s
 ; Test that functions with attribute Cold are not inlined while the 
 ; same function without attribute Cold will be inlined.
 
-; RUN: opt < %s -inline -S -inline-threshold=600 | FileCheck %s -check-prefix=OVERRIDE
+; RUN: opt < %s -inline -S -inline-threshold=600 -enable-new-pm=0 | FileCheck %s -check-prefix=OVERRIDE
+; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -S -inline-threshold=600 -enable-new-pm=0 | FileCheck %s -check-prefix=OVERRIDE
 ; The command line argument for inline-threshold should override
 ; the default cold threshold, so a cold function with size bigger
 ; than the default cold threshold (225) will be inlined.
 
-; RUN: opt < %s -inline -S | FileCheck %s -check-prefix=DEFAULT
+; RUN: opt < %s -inline -S -enable-new-pm=0 | FileCheck %s -check-prefix=DEFAULT
+; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -S | FileCheck %s -check-prefix=DEFAULT
 ; The same cold function will not be inlined with the default behavior.
 
 @a = global i32 4
diff --git a/llvm/test/Transforms/Inline/inline-hot-callee.ll b/llvm/test/Transforms/Inline/inline-hot-callee.ll
index dad57440063bdb..421f9d00910bbc 100644
--- a/llvm/test/Transforms/Inline/inline-hot-callee.ll
+++ b/llvm/test/Transforms/Inline/inline-hot-callee.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -inline -inline-threshold=0 -inlinehint-threshold=100 -S | FileCheck %s
+; RUN: opt < %s -inline -inline-threshold=0 -inlinehint-threshold=100 -S -enable-new-pm=0 | FileCheck %s
 
 ; This tests that a hot callee gets the (higher) inlinehint-threshold even
 ; without inline hints and gets inlined because the cost is less than
diff --git a/llvm/test/Transforms/Inline/inline-hot-callsite.ll b/llvm/test/Transforms/Inline/inline-hot-callsite.ll
index cfe1055916f658..54e7838b4299c2 100644
--- a/llvm/test/Transforms/Inline/inline-hot-callsite.ll
+++ b/llvm/test/Transforms/Inline/inline-hot-callsite.ll
@@ -3,7 +3,7 @@
 ; inlinehint-threshold. A cold callee with identical body does not get inlined because
 ; cost exceeds the inline-threshold
 
-; RUN: opt < %s -inline -inline-threshold=0 -hot-callsite-threshold=100 -S | FileCheck %s
+; RUN: opt < %s -inline -inline-threshold=0 -hot-callsite-threshold=100 -S -enable-new-pm=0 | FileCheck %s
 ; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -inline-threshold=0 -hot-callsite-threshold=100 -S | FileCheck %s
 
 ; Run this with the default O2 pipeline to test that profile summary analysis
diff --git a/llvm/test/Transforms/Inline/monster_scc.ll b/llvm/test/Transforms/Inline/monster_scc.ll
index 860e2e4fb225dc..b4e45e06570cbb 100644
--- a/llvm/test/Transforms/Inline/monster_scc.ll
+++ b/llvm/test/Transforms/Inline/monster_scc.ll
@@ -39,7 +39,7 @@
 ;
 ;   void test(bool *B, bool *E) { f<false, 0>(B, E); }
 ;
-; RUN: opt -S < %s -inline -inline-threshold=150 | FileCheck %s --check-prefixes=CHECK,OLD
+; RUN: opt -S < %s -inline -inline-threshold=150 -enable-new-pm=0 | FileCheck %s --check-prefixes=CHECK,OLD
 ; RUN: opt -S < %s -passes=inline -inline-threshold=150 | FileCheck %s --check-prefixes=CHECK,NEW
 ; RUN: opt -S < %s -passes=inliner-wrapper -inline-threshold=150 | FileCheck %s --check-prefixes=CHECK,NEW
 
diff --git a/llvm/test/Transforms/Inline/pr22285.ll b/llvm/test/Transforms/Inline/pr22285.ll
index d7631554c2cfd4..e365c8581b7c16 100644
--- a/llvm/test/Transforms/Inline/pr22285.ll
+++ b/llvm/test/Transforms/Inline/pr22285.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -inline -S | FileCheck %s
+; RUN: opt < %s -inline -S -enable-new-pm=0 | FileCheck %s
 ; RUN: opt < %s -passes='cgscc(inline),globaldce' -S | FileCheck %s
 
 $f1 = comdat any
diff --git a/llvm/test/Transforms/Inline/prof-update-sample.ll b/llvm/test/Transforms/Inline/prof-update-sample.ll
index add861b880f9d7..ee475a6f3b2b72 100644
--- a/llvm/test/Transforms/Inline/prof-update-sample.ll
+++ b/llvm/test/Transforms/Inline/prof-update-sample.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -inline -S | FileCheck %s
+; RUN: opt < %s -inline -S -enable-new-pm=0 | FileCheck %s
+; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -S | FileCheck %s
 ; Checks if inliner updates branch_weights annotation for call instructions.
 
 declare void @ext();

From f21fcccef7197f911a27b960aa2a180e0c7724aa Mon Sep 17 00:00:00 2001
From: Adrian McCarthy <amccarth@google.com>
Date: Mon, 12 Oct 2020 10:56:10 -0700
Subject: [PATCH 022/123] [LLDB] Fix 37 tests on Windows

A Windows-style LLDB_PYTHON_HOME path in a Cmake template didn't have the
backslashes escaped, which led to a garbled paths derived from it.  Fixed
by expanding the environment variable as a raw string literal.

Differential Revision: https://reviews.llvm.org/D89256
---
 lldb/include/lldb/Host/Config.h.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lldb/include/lldb/Host/Config.h.cmake b/lldb/include/lldb/Host/Config.h.cmake
index 671d71d1c4e321..c667708a90a645 100644
--- a/lldb/include/lldb/Host/Config.h.cmake
+++ b/lldb/include/lldb/Host/Config.h.cmake
@@ -52,7 +52,7 @@
 
 #cmakedefine01 LLDB_EMBED_PYTHON_HOME
 
-#cmakedefine LLDB_PYTHON_HOME "${LLDB_PYTHON_HOME}"
+#cmakedefine LLDB_PYTHON_HOME R"(${LLDB_PYTHON_HOME})"
 
 #define LLDB_LIBDIR_SUFFIX "${LLVM_LIBDIR_SUFFIX}"
 

From 820e65f9e2369d2990fde4b3e7cfceb64f0df9c8 Mon Sep 17 00:00:00 2001
From: Ben Vanik <benvanik@google.com>
Date: Mon, 12 Oct 2020 11:26:50 -0700
Subject: [PATCH 023/123] [mlir] fixing typo in parseAttribute that was
 ignoring caller-specified types

Reviewed By: rriddle

Differential Revision: https://reviews.llvm.org/D89255
---
 mlir/include/mlir/IR/DialectImplementation.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlir/include/mlir/IR/DialectImplementation.h b/mlir/include/mlir/IR/DialectImplementation.h
index c478b200b5d91a..ba6f9df3f1de11 100644
--- a/mlir/include/mlir/IR/DialectImplementation.h
+++ b/mlir/include/mlir/IR/DialectImplementation.h
@@ -287,7 +287,7 @@ class DialectAsmParser {
 
     // Parse any kind of attribute.
     Attribute attr;
-    if (parseAttribute(attr))
+    if (parseAttribute(attr, type))
       return failure();
 
     // Check for the right kind of attribute.

From f09587c64ccb713af1e83d4f01a6aea3cb682c94 Mon Sep 17 00:00:00 2001
From: brett koonce <koonce@gmail.com>
Date: Sat, 10 Oct 2020 20:10:53 +0000
Subject: [PATCH 024/123] update .txt --> .md links

---
 flang/README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/flang/README.md b/flang/README.md
index 921334951b791e..20a078193a2907 100644
--- a/flang/README.md
+++ b/flang/README.md
@@ -15,9 +15,9 @@ To better understand Fortran as a language
 and the specific grammar accepted by flang,
 read [Fortran For C Programmers](docs/FortranForCProgrammers.md)
 and
-flang's specifications of the [Fortran grammar](docs/f2018-grammar.txt)
+flang's specifications of the [Fortran grammar](docs/f2018-grammar.md)
 and
-the [OpenMP grammar](docs/OpenMP-4.5-grammar.txt).
+the [OpenMP grammar](docs/OpenMP-4.5-grammar.md).
 
 Treatment of language extensions is covered
 in [this document](docs/Extensions.md).

From dce8f2bb25ea1d01533d8e602f2520492fa67259 Mon Sep 17 00:00:00 2001
From: Xun Li <xun@fb.com>
Date: Mon, 12 Oct 2020 12:00:20 -0700
Subject: [PATCH 025/123] [Coroutine][Sema] Only tighten the suspend call temp
 lifetime for final awaiter

In https://reviews.llvm.org/D87470 I added the change to tighten the lifetime of the expression awaiter.await_suspend().address.
Howver it was incorrect. ExprWithCleanups will call the dtor and end the lifetime for all the temps created in the current full expr.
When this is called on a normal await call, we don't want to do that.
We only want to do this for the call on the final_awaiter, to avoid writing into the frame after the frame is destroyed.
This change fixes it, by checking IsImplicit.

Differential Revision: https://reviews.llvm.org/D89066
---
 clang/lib/Sema/SemaCoroutine.cpp              | 31 ++++++++++++-------
 .../coro-semmetric-transfer.cpp               |  2 +-
 2 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/clang/lib/Sema/SemaCoroutine.cpp b/clang/lib/Sema/SemaCoroutine.cpp
index 565f907e05b280..5582c728aa2df7 100644
--- a/clang/lib/Sema/SemaCoroutine.cpp
+++ b/clang/lib/Sema/SemaCoroutine.cpp
@@ -375,7 +375,7 @@ static ExprResult buildMemberCall(Sema &S, Expr *Base, SourceLocation Loc,
 // returning await_suspend that results in a guaranteed tail call to the target
 // coroutine.
 static Expr *maybeTailCall(Sema &S, QualType RetType, Expr *E,
-                           SourceLocation Loc) {
+                           SourceLocation Loc, bool IsImplicit) {
   if (RetType->isReferenceType())
     return nullptr;
   Type const *T = RetType.getTypePtr();
@@ -398,10 +398,17 @@ static Expr *maybeTailCall(Sema &S, QualType RetType, Expr *E,
            diag::warn_coroutine_handle_address_invalid_return_type)
         << JustAddress->getType();
 
-  // The coroutine handle used to obtain the address is no longer needed
-  // at this point, clean it up to avoid unnecessarily long lifetime which
-  // could lead to unnecessary spilling.
-  JustAddress = S.MaybeCreateExprWithCleanups(JustAddress);
+  // After the await_suspend call on the awaiter, the coroutine may have
+  // been destroyed. In that case, we can not store anything to the frame
+  // from this point on. Hence here we wrap it immediately with a cleanup. This
+  // could have applied to all await_suspend calls. However doing so causes
+  // alive objects being destructed for reasons that need further
+  // investigations. Here we walk-around it temporarily by only doing it after
+  // the suspend call on the final awaiter (indicated by IsImplicit) where it's
+  // most common to happen.
+  // TODO: Properly clean up the temps generated by await_suspend calls.
+  if (IsImplicit)
+    JustAddress = S.MaybeCreateExprWithCleanups(JustAddress);
   return buildBuiltinCall(S, Loc, Builtin::BI__builtin_coro_resume,
                           JustAddress);
 }
@@ -409,7 +416,8 @@ static Expr *maybeTailCall(Sema &S, QualType RetType, Expr *E,
 /// Build calls to await_ready, await_suspend, and await_resume for a co_await
 /// expression.
 static ReadySuspendResumeResult buildCoawaitCalls(Sema &S, VarDecl *CoroPromise,
-                                                  SourceLocation Loc, Expr *E) {
+                                                  SourceLocation Loc, Expr *E,
+                                                  bool IsImplicit) {
   OpaqueValueExpr *Operand = new (S.Context)
       OpaqueValueExpr(Loc, E->getType(), VK_LValue, E->getObjectKind(), E);
 
@@ -458,7 +466,8 @@ static ReadySuspendResumeResult buildCoawaitCalls(Sema &S, VarDecl *CoroPromise,
     QualType RetType = AwaitSuspend->getCallReturnType(S.Context);
 
     // Experimental support for coroutine_handle returning await_suspend.
-    if (Expr *TailCallSuspend = maybeTailCall(S, RetType, AwaitSuspend, Loc))
+    if (Expr *TailCallSuspend =
+            maybeTailCall(S, RetType, AwaitSuspend, Loc, IsImplicit))
       Calls.Results[ACT::ACT_Suspend] = TailCallSuspend;
     else {
       // non-class prvalues always have cv-unqualified types
@@ -870,8 +879,8 @@ ExprResult Sema::BuildResolvedCoawaitExpr(SourceLocation Loc, Expr *E,
   SourceLocation CallLoc = E->getExprLoc();
 
   // Build the await_ready, await_suspend, await_resume calls.
-  ReadySuspendResumeResult RSS =
-      buildCoawaitCalls(*this, Coroutine->CoroutinePromise, CallLoc, E);
+  ReadySuspendResumeResult RSS = buildCoawaitCalls(
+      *this, Coroutine->CoroutinePromise, CallLoc, E, IsImplicit);
   if (RSS.IsInvalid)
     return ExprError();
 
@@ -925,8 +934,8 @@ ExprResult Sema::BuildCoyieldExpr(SourceLocation Loc, Expr *E) {
     E = CreateMaterializeTemporaryExpr(E->getType(), E, true);
 
   // Build the await_ready, await_suspend, await_resume calls.
-  ReadySuspendResumeResult RSS =
-      buildCoawaitCalls(*this, Coroutine->CoroutinePromise, Loc, E);
+  ReadySuspendResumeResult RSS = buildCoawaitCalls(
+      *this, Coroutine->CoroutinePromise, Loc, E, /*IsImplicit*/ false);
   if (RSS.IsInvalid)
     return ExprError();
 
diff --git a/clang/test/CodeGenCoroutines/coro-semmetric-transfer.cpp b/clang/test/CodeGenCoroutines/coro-semmetric-transfer.cpp
index 09205799c3f7fb..9833f14b273d7d 100644
--- a/clang/test/CodeGenCoroutines/coro-semmetric-transfer.cpp
+++ b/clang/test/CodeGenCoroutines/coro-semmetric-transfer.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang -std=c++14 -fcoroutines-ts -emit-llvm -S -O1 %s -o -
+// RUN: %clang -std=c++14 -fcoroutines-ts -emit-llvm -S -O1 %s -o - | FileCheck %s
 
 #include "Inputs/coroutine.h"
 

From 388d373294e1f7386e1bc14338bd985189f47604 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval@gmail.com>
Date: Mon, 12 Oct 2020 15:03:53 -0400
Subject: [PATCH 026/123] [flang][openacc] Lower data construct

This patch upstream the lowering of Data construct that was initially done in
https://github.com/flang-compiler/f18-llvm-project/pull/460.

Reviewed By: jeanPerier

Differential Revision: https://reviews.llvm.org/D88918
---
 flang/lib/Lower/OpenACC.cpp | 82 +++++++++++++++++++++++++++++++++++++
 1 file changed, 82 insertions(+)

diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index 1d8f9f4ccfffd7..25fb922a239578 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -419,6 +419,86 @@ genACCParallelOp(Fortran::lower::AbstractConverter &converter,
                        firOpBuilder.getUnitAttr());
 }
 
+static void genACCDataOp(Fortran::lower::AbstractConverter &converter,
+                         const Fortran::parser::AccClauseList &accClauseList) {
+  mlir::Value ifCond;
+  SmallVector<Value, 2> copyOperands, copyinOperands, copyinReadonlyOperands,
+      copyoutOperands, copyoutZeroOperands, createOperands, createZeroOperands,
+      noCreateOperands, presentOperands, deviceptrOperands, attachOperands;
+
+  auto &firOpBuilder = converter.getFirOpBuilder();
+  auto currentLocation = converter.getCurrentLocation();
+
+  // Lower clauses values mapped to operands.
+  // Keep track of each group of operands separatly as clauses can appear
+  // more than once.
+  for (const auto &clause : accClauseList.v) {
+    if (const auto *ifClause =
+            std::get_if<Fortran::parser::AccClause::If>(&clause.u)) {
+      Value cond = fir::getBase(
+          converter.genExprValue(*Fortran::semantics::GetExpr(ifClause->v)));
+      ifCond = firOpBuilder.createConvert(currentLocation,
+                                          firOpBuilder.getI1Type(), cond);
+    } else if (const auto *copyClause =
+                   std::get_if<Fortran::parser::AccClause::Copy>(&clause.u)) {
+      genObjectList(copyClause->v, converter, copyOperands);
+    } else if (const auto *copyinClause =
+                   std::get_if<Fortran::parser::AccClause::Copyin>(&clause.u)) {
+      genObjectListWithModifier<Fortran::parser::AccClause::Copyin>(
+          copyinClause, converter,
+          Fortran::parser::AccDataModifier::Modifier::ReadOnly,
+          copyinReadonlyOperands, copyinOperands);
+    } else if (const auto *copyoutClause =
+                   std::get_if<Fortran::parser::AccClause::Copyout>(
+                       &clause.u)) {
+      genObjectListWithModifier<Fortran::parser::AccClause::Copyout>(
+          copyoutClause, converter,
+          Fortran::parser::AccDataModifier::Modifier::Zero, copyoutZeroOperands,
+          copyoutOperands);
+    } else if (const auto *createClause =
+                   std::get_if<Fortran::parser::AccClause::Create>(&clause.u)) {
+      genObjectListWithModifier<Fortran::parser::AccClause::Create>(
+          createClause, converter,
+          Fortran::parser::AccDataModifier::Modifier::Zero, createZeroOperands,
+          createOperands);
+    } else if (const auto *noCreateClause =
+                   std::get_if<Fortran::parser::AccClause::NoCreate>(
+                       &clause.u)) {
+      genObjectList(noCreateClause->v, converter, noCreateOperands);
+    } else if (const auto *presentClause =
+                   std::get_if<Fortran::parser::AccClause::Present>(
+                       &clause.u)) {
+      genObjectList(presentClause->v, converter, presentOperands);
+    } else if (const auto *deviceptrClause =
+                   std::get_if<Fortran::parser::AccClause::Deviceptr>(
+                       &clause.u)) {
+      genObjectList(deviceptrClause->v, converter, deviceptrOperands);
+    } else if (const auto *attachClause =
+                   std::get_if<Fortran::parser::AccClause::Attach>(&clause.u)) {
+      genObjectList(attachClause->v, converter, attachOperands);
+    }
+  }
+
+  // Prepare the operand segement size attribute and the operands value range.
+  SmallVector<Value, 8> operands;
+  SmallVector<int32_t, 8> operandSegments;
+  addOperand(operands, operandSegments, ifCond);
+  addOperands(operands, operandSegments, copyOperands);
+  addOperands(operands, operandSegments, copyinOperands);
+  addOperands(operands, operandSegments, copyinReadonlyOperands);
+  addOperands(operands, operandSegments, copyoutOperands);
+  addOperands(operands, operandSegments, copyoutZeroOperands);
+  addOperands(operands, operandSegments, createOperands);
+  addOperands(operands, operandSegments, createZeroOperands);
+  addOperands(operands, operandSegments, noCreateOperands);
+  addOperands(operands, operandSegments, presentOperands);
+  addOperands(operands, operandSegments, deviceptrOperands);
+  addOperands(operands, operandSegments, attachOperands);
+
+  createRegionOp<mlir::acc::DataOp, mlir::acc::TerminatorOp>(
+      firOpBuilder, currentLocation, operands, operandSegments);
+}
+
 static void
 genACC(Fortran::lower::AbstractConverter &converter,
        Fortran::lower::pft::Evaluation &eval,
@@ -432,6 +512,8 @@ genACC(Fortran::lower::AbstractConverter &converter,
 
   if (blockDirective.v == llvm::acc::ACCD_parallel) {
     genACCParallelOp(converter, accClauseList);
+  } else if (blockDirective.v == llvm::acc::ACCD_data) {
+    genACCDataOp(converter, accClauseList);
   }
 }
 

From 26d861cbbd5f40182b3b7f0ac7ed0e58e0e8feaa Mon Sep 17 00:00:00 2001
From: Walter Erquinigo <wallace@fb.com>
Date: Fri, 2 Oct 2020 14:32:22 -0700
Subject: [PATCH 027/123] [trace] Scaffold "thread trace dump instructions"

Depends on D88841

As per the discussion in the RFC, we'll implement both

  thread trace dump [instructions | functions]

This is the first step in implementing the "instructions" dumping command.

It includes:

- A minimal ProcessTrace plugin for representing processes from a trace file. I noticed that it was a required step to mimic how core-based processes are initialized, e.g. ProcessElfCore and ProcessMinidump. I haven't had the need to create ThreadTrace yet, though. So far HistoryThread seems good enough.
- The command handling itself in CommandObjectThread, which outputs a placeholder text instead of the actual instructions. I'll do that part in the next diff.
- Tests

{F13132325}

Differential Revision: https://reviews.llvm.org/D88769
---
 lldb/include/lldb/Target/Target.h             |  17 ++
 lldb/include/lldb/Target/Thread.h             |  18 ++
 lldb/include/lldb/Target/Trace.h              |  24 ++-
 lldb/source/Commands/CommandObjectThread.cpp  | 169 ++++++++++++++++++
 lldb/source/Commands/Options.td               |  15 ++
 lldb/source/Plugins/Process/CMakeLists.txt    |   1 +
 .../Plugins/Process/Trace/CMakeLists.txt      |  13 ++
 .../Plugins/Process/Trace/ProcessTrace.cpp    | 128 +++++++++++++
 .../Plugins/Process/Trace/ProcessTrace.h      |  86 +++++++++
 .../Plugins/Trace/intel-pt/TraceIntelPT.cpp   |  14 +-
 .../Plugins/Trace/intel-pt/TraceIntelPT.h     |  17 +-
 .../TraceIntelPTSessionFileParser.cpp         |  18 +-
 lldb/source/Target/Target.cpp                 |   4 +
 lldb/source/Target/Trace.cpp                  |  10 ++
 .../trace/TestTraceDumpInstructions.py        |  92 ++++++++++
 lldb/test/API/commands/trace/TestTraceLoad.py |   4 +
 .../trace/intelpt-trace/trace_2threads.json   |  35 ++++
 17 files changed, 657 insertions(+), 8 deletions(-)
 create mode 100644 lldb/source/Plugins/Process/Trace/CMakeLists.txt
 create mode 100644 lldb/source/Plugins/Process/Trace/ProcessTrace.cpp
 create mode 100644 lldb/source/Plugins/Process/Trace/ProcessTrace.h
 create mode 100644 lldb/test/API/commands/trace/TestTraceDumpInstructions.py
 create mode 100644 lldb/test/API/commands/trace/intelpt-trace/trace_2threads.json

diff --git a/lldb/include/lldb/Target/Target.h b/lldb/include/lldb/Target/Target.h
index 7ee27a9776d5c5..f371c4fd695652 100644
--- a/lldb/include/lldb/Target/Target.h
+++ b/lldb/include/lldb/Target/Target.h
@@ -1105,6 +1105,20 @@ class Target : public std::enable_shared_from_this<Target>,
 
   void ClearAllLoadedSections();
 
+  /// Set the \a Trace object containing processor trace information of this
+  /// target.
+  ///
+  /// \param[in] trace_sp
+  ///   The trace object.
+  void SetTrace(const lldb::TraceSP &trace_sp);
+
+  /// Get the \a Trace object containing processor trace information of this
+  /// target.
+  ///
+  /// \return
+  ///   The trace object. It might be undefined.
+  const lldb::TraceSP &GetTrace();
+
   // Since expressions results can persist beyond the lifetime of a process,
   // and the const expression results are available after a process is gone, we
   // provide a way for expressions to be evaluated from the Target itself. If
@@ -1402,6 +1416,9 @@ class Target : public std::enable_shared_from_this<Target>,
   bool m_suppress_stop_hooks;
   bool m_is_dummy_target;
   unsigned m_next_persistent_variable_index = 0;
+  /// An optional \a lldb_private::Trace object containing processor trace
+  /// information of this target.
+  lldb::TraceSP m_trace_sp;
   /// Stores the frame recognizers of this target.
   lldb::StackFrameRecognizerManagerUP m_frame_recognizer_manager_up;
 
diff --git a/lldb/include/lldb/Target/Thread.h b/lldb/include/lldb/Target/Thread.h
index 59e4d8a8f87277..4b148063ec6eed 100644
--- a/lldb/include/lldb/Target/Thread.h
+++ b/lldb/include/lldb/Target/Thread.h
@@ -469,6 +469,24 @@ class Thread : public std::enable_shared_from_this<Thread>,
     // the backing thread for all memory threads each time we stop.
   }
 
+  /// Dump \a count instructions of the thread's \a Trace starting at the \a
+  /// start_position position in reverse order.
+  ///
+  /// The instructions are indexed in reverse order, which means that the \a
+  /// start_position 0 represents the last instruction of the trace
+  /// chronologically.
+  ///
+  /// \param[in] s
+  ///   The stream object where the instructions are printed.
+  ///
+  /// \param[in] count
+  ///     The number of instructions to print.
+  ///
+  /// \param[in] start_position
+  ///     The position of the first instruction to print.
+  void DumpTraceInstructions(Stream &s, size_t count,
+                             size_t start_position = 0) const;
+
   // If stop_format is true, this will be the form used when we print stop
   // info. If false, it will be the form we use for thread list and co.
   void DumpUsingSettingsFormat(Stream &strm, uint32_t frame_idx,
diff --git a/lldb/include/lldb/Target/Trace.h b/lldb/include/lldb/Target/Trace.h
index 09d18525a55836..7ecab7262cbf0f 100644
--- a/lldb/include/lldb/Target/Trace.h
+++ b/lldb/include/lldb/Target/Trace.h
@@ -32,7 +32,8 @@ namespace lldb_private {
 /// Processor trace information can also be fetched through the process
 /// interfaces during a live debug session if your process supports gathering
 /// this information.
-class Trace : public PluginInterface {
+class Trace : public PluginInterface,
+              public std::enable_shared_from_this<Trace> {
 public:
   /// Dump the trace data that this plug-in has access to.
   ///
@@ -96,6 +97,27 @@ class Trace : public PluginInterface {
   /// \return
   ///     The JSON schema of this Trace plug-in.
   virtual llvm::StringRef GetSchema() = 0;
+
+  /// Dump \a count instructions of the given thread's \a Trace starting at the
+  /// \a start_position position in reverse order.
+  ///
+  /// The instructions are indexed in reverse order, which means that the \a
+  /// start_position 0 represents the last instruction of the trace
+  /// chronologically.
+  ///
+  /// \param[in] thread
+  ///     The thread whose trace will be dumped.
+  ///
+  /// \param[in] s
+  ///     The stream object where the instructions are printed.
+  ///
+  /// \param[in] count
+  ///     The number of instructions to print.
+  ///
+  /// \param[in] start_position
+  ///     The position of the first instruction to print.
+  void DumpTraceInstructions(Thread &thread, Stream &s, size_t count,
+                             size_t start_position) const;
 };
 
 } // namespace lldb_private
diff --git a/lldb/source/Commands/CommandObjectThread.cpp b/lldb/source/Commands/CommandObjectThread.cpp
index 26e150f1ccc5c5..6fbcf7cbbc6812 100644
--- a/lldb/source/Commands/CommandObjectThread.cpp
+++ b/lldb/source/Commands/CommandObjectThread.cpp
@@ -8,6 +8,8 @@
 
 #include "CommandObjectThread.h"
 
+#include <sstream>
+
 #include "lldb/Core/ValueObject.h"
 #include "lldb/Host/OptionParser.h"
 #include "lldb/Interpreter/CommandInterpreter.h"
@@ -26,6 +28,7 @@
 #include "lldb/Target/Thread.h"
 #include "lldb/Target/ThreadPlan.h"
 #include "lldb/Target/ThreadPlanStepInRange.h"
+#include "lldb/Target/Trace.h"
 #include "lldb/Utility/State.h"
 
 using namespace lldb;
@@ -2165,6 +2168,170 @@ class CommandObjectMultiwordThreadPlan : public CommandObjectMultiword {
   ~CommandObjectMultiwordThreadPlan() override = default;
 };
 
+// Next are the subcommands of CommandObjectMultiwordTrace
+
+// CommandObjectTraceDumpInstructions
+#define LLDB_OPTIONS_thread_trace_dump_instructions
+#include "CommandOptions.inc"
+
+class CommandObjectTraceDumpInstructions
+    : public CommandObjectIterateOverThreads {
+public:
+  class CommandOptions : public Options {
+  public:
+    CommandOptions() : Options() { OptionParsingStarting(nullptr); }
+
+    ~CommandOptions() override = default;
+
+    Status SetOptionValue(uint32_t option_idx, llvm::StringRef option_arg,
+                          ExecutionContext *execution_context) override {
+      Status error;
+      const int short_option = m_getopt_table[option_idx].val;
+
+      switch (short_option) {
+      case 'c': {
+        int32_t count;
+        if (option_arg.empty() || option_arg.getAsInteger(0, count) ||
+            count < 0)
+          error.SetErrorStringWithFormat(
+              "invalid integer value for option '%s'",
+              option_arg.str().c_str());
+        else
+          m_count = count;
+        break;
+      }
+      case 's': {
+        int32_t start_position;
+        if (option_arg.empty() || option_arg.getAsInteger(0, start_position) ||
+            start_position < 0)
+          error.SetErrorStringWithFormat(
+              "invalid integer value for option '%s'",
+              option_arg.str().c_str());
+        else
+          m_start_position = start_position;
+        break;
+      }
+      default:
+        llvm_unreachable("Unimplemented option");
+      }
+      return error;
+    }
+
+    void OptionParsingStarting(ExecutionContext *execution_context) override {
+      m_count = kDefaultCount;
+      m_start_position = kDefaultStartPosition;
+    }
+
+    llvm::ArrayRef<OptionDefinition> GetDefinitions() override {
+      return llvm::makeArrayRef(g_thread_trace_dump_instructions_options);
+    }
+
+    static const uint32_t kDefaultCount = 20;
+    static const uint32_t kDefaultStartPosition = 0;
+
+    // Instance variables to hold the values for command options.
+    uint32_t m_count;
+    uint32_t m_start_position;
+  };
+
+  CommandObjectTraceDumpInstructions(CommandInterpreter &interpreter)
+      : CommandObjectIterateOverThreads(
+            interpreter, "thread trace dump instructions",
+            "Dump the traced instructions for one or more threads.  If no "
+            "threads are specified, show the current thread.  Use the "
+            "thread-index \"all\" to see all threads.",
+            nullptr,
+            eCommandRequiresProcess | eCommandTryTargetAPILock |
+                eCommandProcessMustBeLaunched | eCommandProcessMustBePaused),
+        m_options(), m_create_repeat_command_just_invoked(false) {}
+
+  ~CommandObjectTraceDumpInstructions() override = default;
+
+  Options *GetOptions() override { return &m_options; }
+
+  const char *GetRepeatCommand(Args &current_command_args,
+                               uint32_t index) override {
+    current_command_args.GetCommandString(m_repeat_command);
+    m_create_repeat_command_just_invoked = true;
+    return m_repeat_command.c_str();
+  }
+
+protected:
+  bool DoExecute(Args &args, CommandReturnObject &result) override {
+    bool status = CommandObjectIterateOverThreads::DoExecute(args, result);
+    PrepareRepeatArguments();
+    return status;
+  }
+
+  void PrepareRepeatArguments() {
+    m_repeat_start_position = m_options.m_count + GetStartPosition();
+    m_create_repeat_command_just_invoked = false;
+  }
+
+  bool IsRepeatCommand() {
+    return !m_repeat_command.empty() && !m_create_repeat_command_just_invoked;
+  }
+
+  uint32_t GetStartPosition() {
+    return IsRepeatCommand() ? m_repeat_start_position
+                             : m_options.m_start_position;
+  }
+
+  bool HandleOneThread(lldb::tid_t tid, CommandReturnObject &result) override {
+    const TraceSP &trace_sp = m_exe_ctx.GetTargetSP()->GetTrace();
+    if (!trace_sp) {
+      result.SetError("error: this thread is not being traced");
+      return false;
+    }
+
+    ThreadSP thread_sp =
+        m_exe_ctx.GetProcessPtr()->GetThreadList().FindThreadByID(tid);
+
+    trace_sp->DumpTraceInstructions(*thread_sp, result.GetOutputStream(),
+                                    m_options.m_count, GetStartPosition());
+    return true;
+  }
+
+  CommandOptions m_options;
+
+  // Repeat command helpers
+  std::string m_repeat_command;
+  bool m_create_repeat_command_just_invoked;
+  uint32_t m_repeat_start_position;
+};
+
+// CommandObjectMultiwordTraceDump
+class CommandObjectMultiwordTraceDump : public CommandObjectMultiword {
+public:
+  CommandObjectMultiwordTraceDump(CommandInterpreter &interpreter)
+      : CommandObjectMultiword(
+            interpreter, "dump",
+            "Commands for displaying trace information of the threads "
+            "in the current process.",
+            "thread trace dump <subcommand> [<subcommand objects>]") {
+    LoadSubCommand(
+        "instructions",
+        CommandObjectSP(new CommandObjectTraceDumpInstructions(interpreter)));
+  }
+  ~CommandObjectMultiwordTraceDump() override = default;
+};
+
+// CommandObjectMultiwordTrace
+class CommandObjectMultiwordTrace : public CommandObjectMultiword {
+public:
+  CommandObjectMultiwordTrace(CommandInterpreter &interpreter)
+      : CommandObjectMultiword(
+            interpreter, "trace",
+            "Commands for operating on traces of the threads in the current "
+            "process.",
+            "thread trace <subcommand> [<subcommand objects>]") {
+    LoadSubCommand("dump", CommandObjectSP(new CommandObjectMultiwordTraceDump(
+                               interpreter)));
+  }
+
+  ~CommandObjectMultiwordTrace() override = default;
+};
+
 // CommandObjectMultiwordThread
 
 CommandObjectMultiwordThread::CommandObjectMultiwordThread(
@@ -2240,6 +2407,8 @@ CommandObjectMultiwordThread::CommandObjectMultiwordThread(
 
   LoadSubCommand("plan", CommandObjectSP(new CommandObjectMultiwordThreadPlan(
                              interpreter)));
+  LoadSubCommand("trace",
+                 CommandObjectSP(new CommandObjectMultiwordTrace(interpreter)));
 }
 
 CommandObjectMultiwordThread::~CommandObjectMultiwordThread() = default;
diff --git a/lldb/source/Commands/Options.td b/lldb/source/Commands/Options.td
index ad2f5fdae8e733..2932cf029224a9 100644
--- a/lldb/source/Commands/Options.td
+++ b/lldb/source/Commands/Options.td
@@ -1005,6 +1005,21 @@ let Command = "thread plan list" in {
     Desc<"Display thread plans for unreported threads">;
 }
 
+let Command = "thread trace dump instructions" in {
+  def thread_trace_dump_instructions_count : Option<"count", "c">, Group<1>,
+    Arg<"Count">,
+    Desc<"The number of instructions to display starting at the current "
+    "position in reverse order chronologically.">;
+  def thread_trace_dump_instructions_start_position:
+    Option<"start-position", "s">,
+    Group<1>,
+    Arg<"Index">,
+    Desc<"The position of the first instruction to print. Defaults to the "
+    "current position, i.e. where the thread is stopped. The instructions are "
+    "indexed in reverse order, which means that a start position of 0 refers "
+    "to the last instruction chronologically.">;
+}
+
 let Command = "type summary add" in {
   def type_summary_add_category : Option<"category", "w">, Arg<"Name">,
     Desc<"Add this to the given category instead of the default one.">;
diff --git a/lldb/source/Plugins/Process/CMakeLists.txt b/lldb/source/Plugins/Process/CMakeLists.txt
index 91f20ec22ac55d..a028793e32f389 100644
--- a/lldb/source/Plugins/Process/CMakeLists.txt
+++ b/lldb/source/Plugins/Process/CMakeLists.txt
@@ -18,3 +18,4 @@ add_subdirectory(Utility)
 add_subdirectory(elf-core)
 add_subdirectory(mach-core)
 add_subdirectory(minidump)
+add_subdirectory(Trace)
diff --git a/lldb/source/Plugins/Process/Trace/CMakeLists.txt b/lldb/source/Plugins/Process/Trace/CMakeLists.txt
new file mode 100644
index 00000000000000..10a5c60b112b1e
--- /dev/null
+++ b/lldb/source/Plugins/Process/Trace/CMakeLists.txt
@@ -0,0 +1,13 @@
+add_lldb_library(lldbPluginProcessTrace PLUGIN
+  ProcessTrace.cpp
+
+  LINK_LIBS
+    lldbCore
+    lldbTarget
+    lldbUtility
+    lldbPluginProcessUtility
+  LINK_COMPONENTS
+    BinaryFormat
+    Object
+    Support
+  )
diff --git a/lldb/source/Plugins/Process/Trace/ProcessTrace.cpp b/lldb/source/Plugins/Process/Trace/ProcessTrace.cpp
new file mode 100644
index 00000000000000..f727336e4d2169
--- /dev/null
+++ b/lldb/source/Plugins/Process/Trace/ProcessTrace.cpp
@@ -0,0 +1,128 @@
+//===-- ProcessTrace.cpp --------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "ProcessTrace.h"
+
+#include <memory>
+
+#include "lldb/Core/Module.h"
+#include "lldb/Core/PluginManager.h"
+#include "lldb/Target/Target.h"
+
+using namespace lldb;
+using namespace lldb_private;
+using namespace lldb_private::process_trace;
+
+LLDB_PLUGIN_DEFINE(ProcessTrace)
+
+ConstString ProcessTrace::GetPluginNameStatic() {
+  static ConstString g_name("trace");
+  return g_name;
+}
+
+const char *ProcessTrace::GetPluginDescriptionStatic() {
+  return "Trace process plug-in.";
+}
+
+void ProcessTrace::Terminate() {
+  PluginManager::UnregisterPlugin(ProcessTrace::CreateInstance);
+}
+
+ProcessSP ProcessTrace::CreateInstance(TargetSP target_sp,
+                                       ListenerSP listener_sp,
+                                       const FileSpec *crash_file) {
+  return std::make_shared<ProcessTrace>(target_sp, listener_sp);
+}
+
+bool ProcessTrace::CanDebug(TargetSP target_sp, bool plugin_specified_by_name) {
+  return plugin_specified_by_name;
+}
+
+ProcessTrace::ProcessTrace(TargetSP target_sp, ListenerSP listener_sp)
+    : Process(target_sp, listener_sp) {}
+
+ProcessTrace::~ProcessTrace() {
+  Clear();
+  // We need to call finalize on the process before destroying ourselves to
+  // make sure all of the broadcaster cleanup goes as planned. If we destruct
+  // this class, then Process::~Process() might have problems trying to fully
+  // destroy the broadcaster.
+  Finalize();
+}
+
+ConstString ProcessTrace::GetPluginName() { return GetPluginNameStatic(); }
+
+uint32_t ProcessTrace::GetPluginVersion() { return 1; }
+
+void ProcessTrace::DidAttach(ArchSpec &process_arch) {
+  ListenerSP listener_sp(
+      Listener::MakeListener("lldb.process_trace.did_attach_listener"));
+  HijackProcessEvents(listener_sp);
+
+  SetCanJIT(false);
+  StartPrivateStateThread();
+  SetPrivateState(eStateStopped);
+
+  EventSP event_sp;
+  WaitForProcessToStop(llvm::None, &event_sp, true, listener_sp);
+
+  RestoreProcessEvents();
+
+  Process::DidAttach(process_arch);
+}
+
+bool ProcessTrace::UpdateThreadList(ThreadList &old_thread_list,
+                                    ThreadList &new_thread_list) {
+  return false;
+}
+
+void ProcessTrace::RefreshStateAfterStop() {}
+
+Status ProcessTrace::DoDestroy() { return Status(); }
+
+bool ProcessTrace::IsAlive() { return true; }
+
+size_t ProcessTrace::ReadMemory(addr_t addr, void *buf, size_t size,
+                                Status &error) {
+  // Don't allow the caching that lldb_private::Process::ReadMemory does since
+  // we have it all cached in the trace files.
+  return DoReadMemory(addr, buf, size, error);
+}
+
+void ProcessTrace::Clear() { m_thread_list.Clear(); }
+
+void ProcessTrace::Initialize() {
+  static llvm::once_flag g_once_flag;
+
+  llvm::call_once(g_once_flag, []() {
+    PluginManager::RegisterPlugin(GetPluginNameStatic(),
+                                  GetPluginDescriptionStatic(), CreateInstance);
+  });
+}
+
+ArchSpec ProcessTrace::GetArchitecture() {
+  return GetTarget().GetArchitecture();
+}
+
+bool ProcessTrace::GetProcessInfo(ProcessInstanceInfo &info) {
+  info.Clear();
+  info.SetProcessID(GetID());
+  info.SetArchitecture(GetArchitecture());
+  ModuleSP module_sp = GetTarget().GetExecutableModule();
+  if (module_sp) {
+    const bool add_exe_file_as_first_arg = false;
+    info.SetExecutableFile(GetTarget().GetExecutableModule()->GetFileSpec(),
+                           add_exe_file_as_first_arg);
+  }
+  return true;
+}
+
+size_t ProcessTrace::DoReadMemory(addr_t addr, void *buf, size_t size,
+                                  Status &error) {
+  return 0;
+}
diff --git a/lldb/source/Plugins/Process/Trace/ProcessTrace.h b/lldb/source/Plugins/Process/Trace/ProcessTrace.h
new file mode 100644
index 00000000000000..450aa1e91d8fce
--- /dev/null
+++ b/lldb/source/Plugins/Process/Trace/ProcessTrace.h
@@ -0,0 +1,86 @@
+//===-- ProcessTrace.h ------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLDB_SOURCE_PLUGINS_PROCESS_TRACE_PROCESSTRACE_H
+#define LLDB_SOURCE_PLUGINS_PROCESS_TRACE_PROCESSTRACE_H
+
+#include "lldb/Target/Process.h"
+#include "lldb/Utility/ConstString.h"
+#include "lldb/Utility/Status.h"
+
+namespace lldb_private {
+namespace process_trace {
+
+class ProcessTrace : public Process {
+public:
+  static lldb::ProcessSP CreateInstance(lldb::TargetSP target_sp,
+                                        lldb::ListenerSP listener_sp,
+                                        const FileSpec *crash_file_path);
+
+  static void Initialize();
+
+  static void Terminate();
+
+  static ConstString GetPluginNameStatic();
+
+  static const char *GetPluginDescriptionStatic();
+
+  ProcessTrace(lldb::TargetSP target_sp, lldb::ListenerSP listener_sp);
+
+  ~ProcessTrace() override;
+
+  bool CanDebug(lldb::TargetSP target_sp,
+                bool plugin_specified_by_name) override;
+
+  void DidAttach(ArchSpec &process_arch) override;
+
+  DynamicLoader *GetDynamicLoader() override { return nullptr; }
+
+  SystemRuntime *GetSystemRuntime() override { return nullptr; }
+
+  ConstString GetPluginName() override;
+
+  uint32_t GetPluginVersion() override;
+
+  Status DoDestroy() override;
+
+  void RefreshStateAfterStop() override;
+
+  Status WillResume() override {
+    Status error;
+    error.SetErrorStringWithFormat(
+        "error: %s does not support resuming processes",
+        GetPluginName().GetCString());
+    return error;
+  }
+
+  bool IsAlive() override;
+
+  bool WarnBeforeDetach() const override { return false; }
+
+  size_t ReadMemory(lldb::addr_t addr, void *buf, size_t size,
+                    Status &error) override;
+
+  size_t DoReadMemory(lldb::addr_t addr, void *buf, size_t size,
+                      Status &error) override;
+
+  ArchSpec GetArchitecture();
+
+  bool GetProcessInfo(ProcessInstanceInfo &info) override;
+
+protected:
+  void Clear();
+
+  bool UpdateThreadList(ThreadList &old_thread_list,
+                        ThreadList &new_thread_list) override;
+};
+
+} // namespace process_trace
+} // namespace lldb_private
+
+#endif // LLDB_SOURCE_PLUGINS_PROCESS_TRACE_PROCESSTRACE_H
diff --git a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.cpp b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.cpp
index c4e38e668c2572..e2b24672b086f0 100644
--- a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.cpp
+++ b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.cpp
@@ -10,6 +10,7 @@
 
 #include "TraceIntelPTSessionFileParser.h"
 #include "lldb/Core/PluginManager.h"
+#include "lldb/Target/Target.h"
 
 using namespace lldb;
 using namespace lldb_private;
@@ -45,12 +46,21 @@ ConstString TraceIntelPT::GetPluginName() { return GetPluginNameStatic(); }
 
 uint32_t TraceIntelPT::GetPluginVersion() { return 1; }
 
-void TraceIntelPT::Dump(lldb_private::Stream *s) const {}
+void TraceIntelPT::Dump(Stream *s) const {}
 
-Expected<lldb::TraceSP>
+Expected<TraceSP>
 TraceIntelPT::CreateInstance(const json::Value &trace_session_file,
                              StringRef session_file_dir, Debugger &debugger) {
   return TraceIntelPTSessionFileParser(debugger, trace_session_file,
                                        session_file_dir)
       .Parse();
 }
+
+TraceSP TraceIntelPT::CreateInstance(const pt_cpu &pt_cpu,
+                                     const std::vector<TargetSP> &targets) {
+  TraceSP trace_instance(new TraceIntelPT(pt_cpu, targets));
+  for (const TargetSP &target_sp : targets)
+    target_sp->SetTrace(trace_instance);
+
+  return trace_instance;
+}
diff --git a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.h b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.h
index ac8bccd0d932d7..bf34d89a347e52 100644
--- a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.h
+++ b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.h
@@ -52,6 +52,21 @@ class TraceIntelPT : public Trace {
   CreateInstance(const llvm::json::Value &trace_session_file,
                  llvm::StringRef session_file_dir, Debugger &debugger);
 
+  /// Create an instance of this class.
+  ///
+  /// \param[in] pt_cpu
+  ///     The libipt.h cpu information needed for decoding correctling the
+  ///     traces.
+  ///
+  /// \param[in] targets
+  ///     The list of targets to associate with this trace instance
+  ///
+  /// \return
+  ///     An intel-pt trace instance.
+  static lldb::TraceSP
+  CreateInstance(const pt_cpu &pt_cpu,
+                 const std::vector<lldb::TargetSP> &targets);
+
   static ConstString GetPluginNameStatic();
 
   uint32_t GetPluginVersion() override;
@@ -59,13 +74,13 @@ class TraceIntelPT : public Trace {
 
   llvm::StringRef GetSchema() override;
 
+private:
   TraceIntelPT(const pt_cpu &pt_cpu, const std::vector<lldb::TargetSP> &targets)
       : m_pt_cpu(pt_cpu) {
     for (const lldb::TargetSP &target_sp : targets)
       m_targets.push_back(target_sp);
   }
 
-private:
   pt_cpu m_pt_cpu;
   std::vector<std::weak_ptr<Target>> m_targets;
 };
diff --git a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTSessionFileParser.cpp b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTSessionFileParser.cpp
index 33db8facdcec26..4b439000a153e1 100644
--- a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTSessionFileParser.cpp
+++ b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTSessionFileParser.cpp
@@ -41,7 +41,8 @@ void TraceIntelPTSessionFileParser::ParseThread(
   FileSpec trace_file(thread.trace_file);
   NormalizePath(trace_file);
 
-  ThreadSP thread_sp(new ThreadIntelPT(*process_sp, tid, trace_file));
+  ThreadSP thread_sp =
+      std::make_shared<ThreadIntelPT>(*process_sp, tid, trace_file);
   process_sp->GetThreadList().AddThread(thread_sp);
 }
 
@@ -60,7 +61,7 @@ Error TraceIntelPTSessionFileParser::ParseProcess(
   m_debugger.GetTargetList().SetSelectedTarget(target_sp.get());
 
   ProcessSP process_sp(target_sp->CreateProcess(
-      /*listener*/ nullptr, /*plugin_name*/ StringRef(),
+      /*listener*/ nullptr, "trace",
       /*crash_file*/ nullptr));
   process_sp->SetID(static_cast<lldb::pid_t>(process.pid));
 
@@ -71,7 +72,16 @@ Error TraceIntelPTSessionFileParser::ParseProcess(
     if (Error err = ParseModule(target_sp, module))
       return err;
   }
-  return Error::success();
+
+  if (!process.threads.empty())
+    process_sp->GetThreadList().SetSelectedThreadByIndexID(0);
+
+  // We invoke DidAttach to create a correct stopped state for the process and
+  // its threads.
+  ArchSpec process_arch;
+  process_sp->DidAttach(process_arch);
+
+  return llvm::Error::success();
 }
 
 void TraceIntelPTSessionFileParser::ParsePTCPU(const JSONPTCPU &pt_cpu) {
@@ -105,7 +115,7 @@ Expected<TraceSP> TraceIntelPTSessionFileParser::Parse() {
     return std::move(err);
   }
 
-  return std::make_shared<TraceIntelPT>(m_pt_cpu, m_targets);
+  return TraceIntelPT::CreateInstance(m_pt_cpu, m_targets);
 }
 
 namespace llvm {
diff --git a/lldb/source/Target/Target.cpp b/lldb/source/Target/Target.cpp
index 49af6c297cbcb8..6ce613697825b2 100644
--- a/lldb/source/Target/Target.cpp
+++ b/lldb/source/Target/Target.cpp
@@ -2997,6 +2997,10 @@ Status Target::Launch(ProcessLaunchInfo &launch_info, Stream *stream) {
   return error;
 }
 
+void Target::SetTrace(const TraceSP &trace_sp) { m_trace_sp = trace_sp; }
+
+const TraceSP &Target::GetTrace() { return m_trace_sp; }
+
 Status Target::Attach(ProcessAttachInfo &attach_info, Stream *stream) {
   auto state = eStateInvalid;
   auto process_sp = GetProcessSP();
diff --git a/lldb/source/Target/Trace.cpp b/lldb/source/Target/Trace.cpp
index 1b83aec486b718..31b09376db24be 100644
--- a/lldb/source/Target/Trace.cpp
+++ b/lldb/source/Target/Trace.cpp
@@ -13,6 +13,8 @@
 #include "llvm/Support/Format.h"
 
 #include "lldb/Core/PluginManager.h"
+#include "lldb/Target/Thread.h"
+#include "lldb/Utility/Stream.h"
 
 using namespace lldb;
 using namespace lldb_private;
@@ -76,3 +78,11 @@ Expected<StringRef> Trace::FindPluginSchema(StringRef name) {
 
   return createInvalidPlugInError(name);
 }
+
+void Trace::DumpTraceInstructions(Thread &thread, Stream &s, size_t count,
+                                  size_t start_position) const {
+  s.Printf("thread #%u: tid = %" PRIu64 ", total instructions = 1000\n",
+           thread.GetIndexID(), thread.GetID());
+  s.Printf("  would print %zu instructions from position %zu\n", count,
+           start_position);
+}
diff --git a/lldb/test/API/commands/trace/TestTraceDumpInstructions.py b/lldb/test/API/commands/trace/TestTraceDumpInstructions.py
new file mode 100644
index 00000000000000..d71e734a4c4455
--- /dev/null
+++ b/lldb/test/API/commands/trace/TestTraceDumpInstructions.py
@@ -0,0 +1,92 @@
+import lldb
+from lldbsuite.test.lldbtest import *
+from lldbsuite.test import lldbutil
+from lldbsuite.test.decorators import *
+
+class TestTraceDumpInstructions(TestBase):
+
+    mydir = TestBase.compute_mydir(__file__)
+    NO_DEBUG_INFO_TESTCASE = True
+
+    def setUp(self):
+        TestBase.setUp(self)
+        if 'intel-pt' not in configuration.enabled_plugins:
+            self.skipTest("The intel-pt test plugin is not enabled")
+
+    def testErrorMessages(self):
+        # We first check the output when there are no targets
+        self.expect("thread trace dump instructions",
+            substrs=["error: invalid target, create a target using the 'target create' command"],
+            error=True)
+
+        # We now check the output when there's a non-running target
+        self.expect("target create " + os.path.join(self.getSourceDir(), "intelpt-trace", "a.out"))
+
+        self.expect("thread trace dump instructions",
+            substrs=["error: invalid process"],
+            error=True)
+
+        # Now we check the output when there's a running target without a trace
+        self.expect("b main")
+        self.expect("run")
+
+        self.expect("thread trace dump instructions",
+            substrs=["error: this thread is not being traced"],
+            error=True)
+
+    def testDumpInstructions(self):
+        self.expect("trace load -v " + os.path.join(self.getSourceDir(), "intelpt-trace", "trace.json"),
+            substrs=["intel-pt"])
+
+        self.expect("thread trace dump instructions",
+            substrs=['thread #1: tid = 3842849, total instructions = 1000',
+                     'would print 20 instructions from position 0'])
+
+        # We check if we can pass count and offset
+        self.expect("thread trace dump instructions --count 5 --start-position 10",
+            substrs=['thread #1: tid = 3842849, total instructions = 1000',
+                     'would print 5 instructions from position 10'])
+
+        # We check if we can access the thread by index id
+        self.expect("thread trace dump instructions 1",
+            substrs=['thread #1: tid = 3842849, total instructions = 1000',
+                     'would print 20 instructions from position 0'])
+
+        # We check that we get an error when using an invalid thread index id
+        self.expect("thread trace dump instructions 10", error=True,
+            substrs=['error: no thread with index: "10"'])
+
+    def testDumpInstructionsWithMultipleThreads(self):
+        # We load a trace with two threads
+        self.expect("trace load -v " + os.path.join(self.getSourceDir(), "intelpt-trace", "trace_2threads.json"))
+
+        # We print the instructions of two threads simultaneously
+        self.expect("thread trace dump instructions 1 2",
+            substrs=['''thread #1: tid = 3842849, total instructions = 1000
+  would print 20 instructions from position 0
+thread #2: tid = 3842850, total instructions = 1000
+  would print 20 instructions from position 0'''])
+
+        # We use custom --count and --start-position, saving the command to history for later
+        ci = self.dbg.GetCommandInterpreter()
+
+        result = lldb.SBCommandReturnObject()
+        ci.HandleCommand("thread trace dump instructions 1 2 --count 12 --start-position 5", result, True)
+        self.assertIn('''thread #1: tid = 3842849, total instructions = 1000
+  would print 12 instructions from position 5
+thread #2: tid = 3842850, total instructions = 1000
+  would print 12 instructions from position 5''', result.GetOutput())
+
+        # We use a repeat command and ensure the previous count is used and the start-position has moved to the next position
+        result = lldb.SBCommandReturnObject()
+        ci.HandleCommand("", result)
+        self.assertIn('''thread #1: tid = 3842849, total instructions = 1000
+  would print 12 instructions from position 17
+thread #2: tid = 3842850, total instructions = 1000
+  would print 12 instructions from position 17''', result.GetOutput())
+
+        ci.HandleCommand("", result)
+        self.assertIn('''thread #1: tid = 3842849, total instructions = 1000
+  would print 12 instructions from position 29
+thread #2: tid = 3842850, total instructions = 1000
+  would print 12 instructions from position 29''', result.GetOutput())
diff --git a/lldb/test/API/commands/trace/TestTraceLoad.py b/lldb/test/API/commands/trace/TestTraceLoad.py
index 673536a09dd62b..c48d468ce7845d 100644
--- a/lldb/test/API/commands/trace/TestTraceLoad.py
+++ b/lldb/test/API/commands/trace/TestTraceLoad.py
@@ -35,6 +35,10 @@ def testLoadTrace(self):
 
         self.assertEqual("6AA9A4E2-6F28-2F33-377D-59FECE874C71-5B41261A", module.GetUUIDString())
 
+        # check that the Process and Thread objects were created correctly
+        self.expect("thread info", substrs=["tid = 3842849"])
+        self.expect("thread list", substrs=["Process 1234 stopped", "tid = 3842849"])
+
 
     def testLoadInvalidTraces(self):
         src_dir = self.getSourceDir()
diff --git a/lldb/test/API/commands/trace/intelpt-trace/trace_2threads.json b/lldb/test/API/commands/trace/intelpt-trace/trace_2threads.json
new file mode 100644
index 00000000000000..87609f2e3b2773
--- /dev/null
+++ b/lldb/test/API/commands/trace/intelpt-trace/trace_2threads.json
@@ -0,0 +1,35 @@
+{
+  "trace": {
+    "type": "intel-pt",
+    "pt_cpu": {
+      "vendor": "intel",
+      "family": 6,
+      "model": 79,
+      "stepping": 1
+    }
+  },
+  "processes": [
+    {
+      "pid": 1234,
+      "triple": "x86_64-*-linux",
+      "threads": [
+        {
+          "tid": 3842849,
+          "traceFile": "3842849.trace"
+        },
+        {
+          "tid": 3842850,
+          "traceFile": "3842849.trace"
+        }
+      ],
+      "modules": [
+        {
+          "file": "a.out",
+          "systemPath": "a.out",
+          "loadAddress": "0x0000000000400000",
+          "uuid": "6AA9A4E2-6F28-2F33-377D-59FECE874C71-5B41261A"
+        }
+      ]
+    }
+  ]
+}

From abe14485fed7741ef5771d11509373c861ec37eb Mon Sep 17 00:00:00 2001
From: Cameron McInally <mcinally@cray.com>
Date: Mon, 12 Oct 2020 14:31:34 -0500
Subject: [PATCH 028/123] [SVE] Fix VBITS_GE_256 typo in fixed-width tests.

This seems to be a typo that propagated to a number of tests. Replace VBITS_GE_256 with CHECK. There is no VBITS_GE_256.
---
 .../AArch64/sve-fixed-length-fp-reduce.ll     |  48 ++---
 .../AArch64/sve-fixed-length-fp-select.ll     |  48 ++---
 .../AArch64/sve-fixed-length-int-div.ll       | 200 +++++++++---------
 .../AArch64/sve-fixed-length-int-reduce.ll    | 200 +++++++++---------
 .../AArch64/sve-fixed-length-int-select.ll    |  64 +++---
 .../AArch64/sve-fixed-length-log-reduce.ll    | 120 +++++------
 .../CodeGen/AArch64/sve-fixed-length-trunc.ll |  10 +-
 7 files changed, 345 insertions(+), 345 deletions(-)

diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-reduce.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-reduce.ll
index e38d18a9e46334..6991c0ad3a6815 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-reduce.ll
@@ -44,10 +44,10 @@ define half @fmaxv_v8f16(<8 x half> %a) #0 {
 
 define half @fmaxv_v16f16(<16 x half>* %a) #0 {
 ; CHECK-LABEL: fmaxv_v16f16:
-; VBITS_GE_256: ptrue [[PG:p[0-9]+]].h, vl16
-; VBITS_GE_256-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
-; VBITS_GE_256-NEXT: fmaxnmv h0, [[PG]], [[OP]].h
-; VBITS_GE_256-NEXT: ret
+; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
+; CHECK-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
+; CHECK-NEXT: fmaxnmv h0, [[PG]], [[OP]].h
+; CHECK-NEXT: ret
   %op = load <16 x half>, <16 x half>* %a
   %res = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> %op)
   ret half %res
@@ -115,10 +115,10 @@ define float @fmaxv_v4f32(<4 x float> %a) #0 {
 
 define float @fmaxv_v8f32(<8 x float>* %a) #0 {
 ; CHECK-LABEL: fmaxv_v8f32:
-; VBITS_GE_256: ptrue [[PG:p[0-9]+]].s, vl8
-; VBITS_GE_256-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
-; VBITS_GE_256-NEXT: fmaxnmv s0, [[PG]], [[OP]].s
-; VBITS_GE_256-NEXT: ret
+; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
+; CHECK-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
+; CHECK-NEXT: fmaxnmv s0, [[PG]], [[OP]].s
+; CHECK-NEXT: ret
   %op = load <8 x float>, <8 x float>* %a
   %res = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> %op)
   ret float %res
@@ -186,10 +186,10 @@ define double @fmaxv_v2f64(<2 x double> %a) #0 {
 
 define double @fmaxv_v4f64(<4 x double>* %a) #0 {
 ; CHECK-LABEL: fmaxv_v4f64:
-; VBITS_GE_256: ptrue [[PG:p[0-9]+]].d, vl4
-; VBITS_GE_256-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_GE_256-NEXT: fmaxnmv d0, [[PG]], [[OP]].d
-; VBITS_GE_256-NEXT: ret
+; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
+; CHECK-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
+; CHECK-NEXT: fmaxnmv d0, [[PG]], [[OP]].d
+; CHECK-NEXT: ret
   %op = load <4 x double>, <4 x double>* %a
   %res = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> %op)
   ret double %res
@@ -261,10 +261,10 @@ define half @fminv_v8f16(<8 x half> %a) #0 {
 
 define half @fminv_v16f16(<16 x half>* %a) #0 {
 ; CHECK-LABEL: fminv_v16f16:
-; VBITS_GE_256: ptrue [[PG:p[0-9]+]].h, vl16
-; VBITS_GE_256-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
-; VBITS_GE_256-NEXT: fminnmv h0, [[PG]], [[OP]].h
-; VBITS_GE_256-NEXT: ret
+; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
+; CHECK-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
+; CHECK-NEXT: fminnmv h0, [[PG]], [[OP]].h
+; CHECK-NEXT: ret
   %op = load <16 x half>, <16 x half>* %a
   %res = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> %op)
   ret half %res
@@ -332,10 +332,10 @@ define float @fminv_v4f32(<4 x float> %a) #0 {
 
 define float @fminv_v8f32(<8 x float>* %a) #0 {
 ; CHECK-LABEL: fminv_v8f32:
-; VBITS_GE_256: ptrue [[PG:p[0-9]+]].s, vl8
-; VBITS_GE_256-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
-; VBITS_GE_256-NEXT: fminnmv s0, [[PG]], [[OP]].s
-; VBITS_GE_256-NEXT: ret
+; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
+; CHECK-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
+; CHECK-NEXT: fminnmv s0, [[PG]], [[OP]].s
+; CHECK-NEXT: ret
   %op = load <8 x float>, <8 x float>* %a
   %res = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> %op)
   ret float %res
@@ -403,10 +403,10 @@ define double @fminv_v2f64(<2 x double> %a) #0 {
 
 define double @fminv_v4f64(<4 x double>* %a) #0 {
 ; CHECK-LABEL: fminv_v4f64:
-; VBITS_GE_256: ptrue [[PG:p[0-9]+]].d, vl4
-; VBITS_GE_256-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_GE_256-NEXT: fminnmv d0, [[PG]], [[OP]].d
-; VBITS_GE_256-NEXT: ret
+; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
+; CHECK-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
+; CHECK-NEXT: fminnmv d0, [[PG]], [[OP]].d
+; CHECK-NEXT: ret
   %op = load <4 x double>, <4 x double>* %a
   %res = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> %op)
   ret double %res
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll
index 1570ea2db7718e..5318d37253e4c4 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll
@@ -42,14 +42,14 @@ define void @select_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x i1>* %c) #0 {
 ; CHECK-LABEL: select_v16f16:
 ; CHECK: ptrue [[PG:p[0-9]+]].h, vl[[#min(div(VBYTES,2),16)]]
 ; CHECK: ptrue [[PG1:p[0-9]+]].h
-; VBITS_GE_256: ld1h { [[MASK:z[0-9]+]].h }, [[PG]]/z, [x9]
-; VBITS_GE_256-NEXT: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
-; VBITS_GE_256-NEXT: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
-; VBITS_GE_256-NEXT: and [[AND:z[0-9]+]].h, [[MASK]].h, #0x1
-; VBITS_GE_256-NEXT: cmpne [[COND:p[0-9]+]].h, [[PG1]]/z, [[AND]].h, #0
-; VBITS_GE_256-NEXT: sel [[RES:z[0-9]+]].h, [[COND]], [[OP1]].h, [[OP2]].h
-; VBITS_GE_256-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
-; VBITS_GE_256: ret
+; CHECK: ld1h { [[MASK:z[0-9]+]].h }, [[PG]]/z, [x9]
+; CHECK-NEXT: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
+; CHECK-NEXT: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
+; CHECK-NEXT: and [[AND:z[0-9]+]].h, [[MASK]].h, #0x1
+; CHECK-NEXT: cmpne [[COND:p[0-9]+]].h, [[PG1]]/z, [[AND]].h, #0
+; CHECK-NEXT: sel [[RES:z[0-9]+]].h, [[COND]], [[OP1]].h, [[OP2]].h
+; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
+; CHECK: ret
   %mask = load <16 x i1>, <16 x i1>* %c
   %op1 = load <16 x half>, <16 x half>* %a
   %op2 = load <16 x half>, <16 x half>* %b
@@ -140,14 +140,14 @@ define void @select_v8f32(<8 x float>* %a, <8 x float>* %b, <8 x i1>* %c) #0 {
 ; CHECK-LABEL: select_v8f32:
 ; CHECK: ptrue [[PG:p[0-9]+]].s, vl[[#min(div(VBYTES,4),8)]]
 ; CHECK: ptrue [[PG1:p[0-9]+]].s
-; VBITS_GE_256: ld1w { [[MASK:z[0-9]+]].s }, [[PG]]/z, [x9]
-; VBITS_GE_256-NEXT: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
-; VBITS_GE_256-NEXT: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1]
-; VBITS_GE_256-NEXT: and [[AND:z[0-9]+]].s, [[MASK]].s, #0x1
-; VBITS_GE_256-NEXT: cmpne [[COND:p[0-9]+]].s, [[PG1]]/z, [[AND]].s, #0
-; VBITS_GE_256-NEXT: sel [[RES:z[0-9]+]].s, [[COND]], [[OP1]].s, [[OP2]].s
-; VBITS_GE_256-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
-; VBITS_GE_256: ret
+; CHECK: ld1w { [[MASK:z[0-9]+]].s }, [[PG]]/z, [x9]
+; CHECK-NEXT: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
+; CHECK-NEXT: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1]
+; CHECK-NEXT: and [[AND:z[0-9]+]].s, [[MASK]].s, #0x1
+; CHECK-NEXT: cmpne [[COND:p[0-9]+]].s, [[PG1]]/z, [[AND]].s, #0
+; CHECK-NEXT: sel [[RES:z[0-9]+]].s, [[COND]], [[OP1]].s, [[OP2]].s
+; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
+; CHECK: ret
   %mask = load <8 x i1>, <8 x i1>* %c
   %op1 = load <8 x float>, <8 x float>* %a
   %op2 = load <8 x float>, <8 x float>* %b
@@ -238,14 +238,14 @@ define void @select_v4f64(<4 x double>* %a, <4 x double>* %b, <4 x i1>* %c) #0 {
 ; CHECK-LABEL: select_v4f64:
 ; CHECK: ptrue [[PG:p[0-9]+]].d, vl[[#min(div(VBYTES,8),4)]]
 ; CHECK: ptrue [[PG1:p[0-9]+]].d
-; VBITS_GE_256: ld1d { [[MASK:z[0-9]+]].d }, [[PG]]/z, [x9]
-; VBITS_GE_256-NEXT: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_GE_256-NEXT: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
-; VBITS_GE_256-NEXT: and [[AND:z[0-9]+]].d, [[MASK]].d, #0x1
-; VBITS_GE_256-NEXT: cmpne [[COND:p[0-9]+]].d, [[PG1]]/z, [[AND]].d, #0
-; VBITS_GE_256-NEXT: sel [[RES:z[0-9]+]].d, [[COND]], [[OP1]].d, [[OP2]].d
-; VBITS_GE_256-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
-; VBITS_GE_256: ret
+; CHECK: ld1d { [[MASK:z[0-9]+]].d }, [[PG]]/z, [x9]
+; CHECK-NEXT: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
+; CHECK-NEXT: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
+; CHECK-NEXT: and [[AND:z[0-9]+]].d, [[MASK]].d, #0x1
+; CHECK-NEXT: cmpne [[COND:p[0-9]+]].d, [[PG1]]/z, [[AND]].d, #0
+; CHECK-NEXT: sel [[RES:z[0-9]+]].d, [[COND]], [[OP1]].d, [[OP2]].d
+; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
+; CHECK: ret
   %mask = load <4 x i1>, <4 x i1>* %c
   %op1 = load <4 x double>, <4 x double>* %a
   %op2 = load <4 x double>, <4 x double>* %b
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-div.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-div.ll
index ac9ce7111f2d3a..9af597cd925a4d 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-div.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-div.ll
@@ -81,31 +81,31 @@ define <16 x i8> @sdiv_v16i8(<16 x i8> %op1, <16 x i8> %op2) #0 {
 
 define void @sdiv_v32i8(<32 x i8>* %a, <32 x i8>* %b) #0 {
 ; CHECK-LABEL: sdiv_v32i8:
-; VBITS_GE_512: ptrue [[PG:p[0-9]+]].b, vl[[#min(VBYTES,32)]]
-; VBITS_GE_512-NEXT: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0]
-; VBITS_GE_512-NEXT: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1]
-; VBITS_GE_256: ptrue [[PG1:p[0-9]+]].s, vl[[#min(VBYTES,8)]]
-; VBITS_GE_256-NEXT: sunpkhi [[OP1_HI:z[0-9]+]].h, [[OP1]].b
-; VBITS_GE_256-NEXT: sunpkhi [[OP2_HI:z[0-9]+]].h, [[OP2]].b
-; VBITS_GE_256-NEXT: sunpklo [[OP2_LO:z[0-9]+]].h, [[OP2]].b
-; VBITS_GE_256-NEXT: sunpklo [[OP1_LO:z[0-9]+]].h, [[OP1]].b
-; VBITS_GE_256-NEXT: sunpkhi [[OP2_HI_HI:z[0-9]]].s, [[OP2_HI]].h
-; VBITS_GE_256-NEXT: sunpkhi [[OP1_HI_HI:z[0-9]]].s, [[OP1_HI]].h
-; VBITS_GE_256-NEXT: sunpklo [[OP2_HI_LO:z[0-9]+]].s, [[OP2_HI]].h
-; VBITS_GE_256-NEXT: sunpklo [[OP1_HI_LO:z[0-9]+]].s, [[OP1_HI]].h
-; VBITS_GE_256-NEXT: sdivr   [[RES_HI_HI:z[0-9]+]].s, [[PG1]]/m, [[OP2_HI_HI]].s, [[OP1_HI_HI]].s
-; VBITS_GE_256-NEXT: sunpkhi [[OP2_LO_HI:z[0-9]+]].s, [[OP2_LO]].h
-; VBITS_GE_256-NEXT: sdivr   [[RES_HI_LO:z[0-9]+]].s, [[PG1]]/m, [[OP2_HI_LO]].s, [[OP1_HI_LO]].s
-; VBITS_GE_256-NEXT: sunpkhi [[OP1_LO_HI:z[0-9]+]].s, [[OP1_LO]].h
-; VBITS_GE_256-NEXT: sunpklo [[OP2_LO_LO:z[0-9]+]].s, [[OP2_LO]].h
-; VBITS_GE_256-NEXT: sunpklo [[OP1_LO_LO:z[0-9]+]].s, [[OP1_LO]].h
-; VBITS_GE_256-NEXT: sdiv    [[RES_LO_HI:z[0-9]+]].s, [[PG1]]/m, [[OP1_LO_HI]].s, [[OP2_LO_HI]].s
-; VBITS_GE_256-NEXT: sdiv    [[RES_LO_LO:z[0-9]+]].s, [[PG1]]/m, [[OP1_LO_LO]].s, [[OP2_LO_LO]].s
-; VBITS_GE_256-NEXT: uzp1    [[RES_HI:z[0-9]+]].h, [[RES_HI_LO]].h, [[RES_HI_HI]].h
-; VBITS_GE_256-NEXT: uzp1    [[RES_LO:z[0-9]+]].h, [[RES_LO_LO]].h, [[RES_LO_HI]].h
-; VBITS_GE_256-NEXT: uzp1    [[RES:z[0-9]+]].b, [[RES_LO]].b, [[RES_HI]].b
-; VBITS_GE_256-NEXT: st1b    { [[RES]].b }, [[PG]], [x0]
-; VBITS_GE_256-NEXT: ret
+; CHECK: ptrue [[PG:p[0-9]+]].b, vl[[#min(VBYTES,32)]]
+; CHECK-NEXT: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0]
+; CHECK-NEXT: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1]
+; CHECK-NEXT: ptrue [[PG1:p[0-9]+]].s, vl[[#min(VBYTES,8)]]
+; CHECK-NEXT: sunpkhi [[OP1_HI:z[0-9]+]].h, [[OP1]].b
+; CHECK-NEXT: sunpkhi [[OP2_HI:z[0-9]+]].h, [[OP2]].b
+; CHECK-NEXT: sunpklo [[OP2_LO:z[0-9]+]].h, [[OP2]].b
+; CHECK-NEXT: sunpklo [[OP1_LO:z[0-9]+]].h, [[OP1]].b
+; CHECK-NEXT: sunpkhi [[OP2_HI_HI:z[0-9]]].s, [[OP2_HI]].h
+; CHECK-NEXT: sunpkhi [[OP1_HI_HI:z[0-9]]].s, [[OP1_HI]].h
+; CHECK-NEXT: sunpklo [[OP2_HI_LO:z[0-9]+]].s, [[OP2_HI]].h
+; CHECK-NEXT: sunpklo [[OP1_HI_LO:z[0-9]+]].s, [[OP1_HI]].h
+; CHECK-NEXT: sdivr   [[RES_HI_HI:z[0-9]+]].s, [[PG1]]/m, [[OP2_HI_HI]].s, [[OP1_HI_HI]].s
+; CHECK-NEXT: sunpkhi [[OP2_LO_HI:z[0-9]+]].s, [[OP2_LO]].h
+; CHECK-NEXT: sdivr   [[RES_HI_LO:z[0-9]+]].s, [[PG1]]/m, [[OP2_HI_LO]].s, [[OP1_HI_LO]].s
+; CHECK-NEXT: sunpkhi [[OP1_LO_HI:z[0-9]+]].s, [[OP1_LO]].h
+; CHECK-NEXT: sunpklo [[OP2_LO_LO:z[0-9]+]].s, [[OP2_LO]].h
+; CHECK-NEXT: sunpklo [[OP1_LO_LO:z[0-9]+]].s, [[OP1_LO]].h
+; CHECK-NEXT: sdiv    [[RES_LO_HI:z[0-9]+]].s, [[PG1]]/m, [[OP1_LO_HI]].s, [[OP2_LO_HI]].s
+; CHECK-NEXT: sdiv    [[RES_LO_LO:z[0-9]+]].s, [[PG1]]/m, [[OP1_LO_LO]].s, [[OP2_LO_LO]].s
+; CHECK-NEXT: uzp1    [[RES_HI:z[0-9]+]].h, [[RES_HI_LO]].h, [[RES_HI_HI]].h
+; CHECK-NEXT: uzp1    [[RES_LO:z[0-9]+]].h, [[RES_LO_LO]].h, [[RES_LO_HI]].h
+; CHECK-NEXT: uzp1    [[RES:z[0-9]+]].b, [[RES_LO]].b, [[RES_HI]].b
+; CHECK-NEXT: st1b    { [[RES]].b }, [[PG]], [x0]
+; CHECK-NEXT: ret
   %op1 = load <32 x i8>, <32 x i8>* %a
   %op2 = load <32 x i8>, <32 x i8>* %b
   %res = sdiv <32 x i8> %op1, %op2
@@ -248,19 +248,19 @@ define <8 x i16> @sdiv_v8i16(<8 x i16> %op1, <8 x i16> %op2) #0 {
 
 define void @sdiv_v16i16(<16 x i16>* %a, <16 x i16>* %b) #0 {
 ; CHECK-LABEL: sdiv_v16i16:
-; VBITS_GE_256: ptrue [[PG:p[0-9]+]].h, vl[[#min(div(VBYTES,2),16)]]
-; VBITS_GE_256-NEXT: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
-; VBITS_GE_256-NEXT: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
-; VBITS_GE_256-NEXT: ptrue [[PG1:p[0-9]+]].s, vl[[#min(div(VBYTES,2),8)]]
-; VBITS_GE_256-NEXT: sunpkhi [[OP1_HI:z[0-9]+]].s, [[OP1]].h
-; VBITS_GE_256-NEXT: sunpkhi [[OP2_HI:z[0-9]+]].s, [[OP2]].h
-; VBITS_GE_256-NEXT: sunpklo [[OP2_LO:z[0-9]+]].s, [[OP2]].h
-; VBITS_GE_256-NEXT: sunpklo [[OP1_LO:z[0-9]+]].s, [[OP1]].h
-; VBITS_GE_256-NEXT: sdivr   [[RES_HI:z[0-9]+]].s, [[PG1]]/m, [[OP2_HI]].s, [[OP1_HI]].s
-; VBITS_GE_256-NEXT: sdiv    [[RES_LO:z[0-9]+]].s, [[PG1]]/m, [[OP1_LO]].s, [[OP2_LO]].s
-; VBITS_GE_256-NEXT: uzp1 [[RES:z[0-9]+]].h, [[RES_LO]].h, [[RES_HI]].h
-; VBITS_GE_256-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
-; VBITS_GE_256-NEXT: ret
+; CHECK: ptrue [[PG:p[0-9]+]].h, vl[[#min(div(VBYTES,2),16)]]
+; CHECK-NEXT: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
+; CHECK-NEXT: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
+; CHECK-NEXT: ptrue [[PG1:p[0-9]+]].s, vl[[#min(div(VBYTES,2),8)]]
+; CHECK-NEXT: sunpkhi [[OP1_HI:z[0-9]+]].s, [[OP1]].h
+; CHECK-NEXT: sunpkhi [[OP2_HI:z[0-9]+]].s, [[OP2]].h
+; CHECK-NEXT: sunpklo [[OP2_LO:z[0-9]+]].s, [[OP2]].h
+; CHECK-NEXT: sunpklo [[OP1_LO:z[0-9]+]].s, [[OP1]].h
+; CHECK-NEXT: sdivr   [[RES_HI:z[0-9]+]].s, [[PG1]]/m, [[OP2_HI]].s, [[OP1_HI]].s
+; CHECK-NEXT: sdiv    [[RES_LO:z[0-9]+]].s, [[PG1]]/m, [[OP1_LO]].s, [[OP2_LO]].s
+; CHECK-NEXT: uzp1 [[RES:z[0-9]+]].h, [[RES_LO]].h, [[RES_HI]].h
+; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
+; CHECK-NEXT: ret
   %op1 = load <16 x i16>, <16 x i16>* %a
   %op2 = load <16 x i16>, <16 x i16>* %b
   %res = sdiv <16 x i16> %op1, %op2
@@ -356,12 +356,12 @@ define <4 x i32> @sdiv_v4i32(<4 x i32> %op1, <4 x i32> %op2) #0 {
 
 define void @sdiv_v8i32(<8 x i32>* %a, <8 x i32>* %b) #0 {
 ; CHECK-LABEL: sdiv_v8i32:
-; VBITS_GE_256: ptrue [[PG:p[0-9]+]].s, vl[[#min(div(VBYTES,4),8)]]
-; VBITS_GE_256-NEXT: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
-; VBITS_GE_256-NEXT: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1]
-; VBITS_GE_256-NEXT: sdiv [[RES:z[0-9]+]].s, [[PG]]/m, [[OP1]].s, [[OP2]].s
-; VBITS_GE_256-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
-; VBITS_GE_256-NEXT: ret
+; CHECK: ptrue [[PG:p[0-9]+]].s, vl[[#min(div(VBYTES,4),8)]]
+; CHECK-NEXT: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
+; CHECK-NEXT: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1]
+; CHECK-NEXT: sdiv [[RES:z[0-9]+]].s, [[PG]]/m, [[OP1]].s, [[OP2]].s
+; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
+; CHECK-NEXT: ret
   %op1 = load <8 x i32>, <8 x i32>* %a
   %op2 = load <8 x i32>, <8 x i32>* %b
   %res = sdiv <8 x i32> %op1, %op2
@@ -436,12 +436,12 @@ define <2 x i64> @sdiv_v2i64(<2 x i64> %op1, <2 x i64> %op2) #0 {
 
 define void @sdiv_v4i64(<4 x i64>* %a, <4 x i64>* %b) #0 {
 ; CHECK-LABEL: sdiv_v4i64:
-; VBITS_GE_256: ptrue [[PG:p[0-9]+]].d, vl[[#min(div(VBYTES,8),4)]]
-; VBITS_GE_256-NEXT: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_GE_256-NEXT: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
-; VBITS_GE_256-NEXT: sdiv [[RES:z[0-9]+]].d, [[PG]]/m, [[OP1]].d, [[OP2]].d
-; VBITS_GE_256-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
-; VBITS_GE_256-NEXT: ret
+; CHECK: ptrue [[PG:p[0-9]+]].d, vl[[#min(div(VBYTES,8),4)]]
+; CHECK-NEXT: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
+; CHECK-NEXT: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
+; CHECK-NEXT: sdiv [[RES:z[0-9]+]].d, [[PG]]/m, [[OP1]].d, [[OP2]].d
+; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
+; CHECK-NEXT: ret
   %op1 = load <4 x i64>, <4 x i64>* %a
   %op2 = load <4 x i64>, <4 x i64>* %b
   %res = sdiv <4 x i64> %op1, %op2
@@ -555,31 +555,31 @@ define <16 x i8> @udiv_v16i8(<16 x i8> %op1, <16 x i8> %op2) #0 {
 
 define void @udiv_v32i8(<32 x i8>* %a, <32 x i8>* %b) #0 {
 ; CHECK-LABEL: udiv_v32i8:
-; VBITS_GE_512: ptrue [[PG:p[0-9]+]].b, vl[[#min(VBYTES,32)]]
-; VBITS_GE_512-NEXT: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0]
-; VBITS_GE_512-NEXT: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1]
-; VBITS_GE_256: ptrue [[PG1:p[0-9]+]].s, vl[[#min(VBYTES,8)]]
-; VBITS_GE_256-NEXT: uunpkhi [[OP1_HI:z[0-9]+]].h, [[OP1]].b
-; VBITS_GE_256-NEXT: uunpkhi [[OP2_HI:z[0-9]+]].h, [[OP2]].b
-; VBITS_GE_256-NEXT: uunpklo [[OP2_LO:z[0-9]+]].h, [[OP2]].b
-; VBITS_GE_256-NEXT: uunpklo [[OP1_LO:z[0-9]+]].h, [[OP1]].b
-; VBITS_GE_256-NEXT: uunpkhi [[OP2_HI_HI:z[0-9]]].s, [[OP2_HI]].h
-; VBITS_GE_256-NEXT: uunpkhi [[OP1_HI_HI:z[0-9]]].s, [[OP1_HI]].h
-; VBITS_GE_256-NEXT: uunpklo [[OP2_HI_LO:z[0-9]+]].s, [[OP2_HI]].h
-; VBITS_GE_256-NEXT: uunpklo [[OP1_HI_LO:z[0-9]+]].s, [[OP1_HI]].h
-; VBITS_GE_256-NEXT: udivr   [[RES_HI_HI:z[0-9]+]].s, [[PG1]]/m, [[OP2_HI_HI]].s, [[OP1_HI_HI]].s
-; VBITS_GE_256-NEXT: uunpkhi [[OP2_LO_HI:z[0-9]+]].s, [[OP2_LO]].h
-; VBITS_GE_256-NEXT: udivr   [[RES_HI_LO:z[0-9]+]].s, [[PG1]]/m, [[OP2_HI_LO]].s, [[OP1_HI_LO]].s
-; VBITS_GE_256-NEXT: uunpkhi [[OP1_LO_HI:z[0-9]+]].s, [[OP1_LO]].h
-; VBITS_GE_256-NEXT: uunpklo [[OP2_LO_LO:z[0-9]+]].s, [[OP2_LO]].h
-; VBITS_GE_256-NEXT: uunpklo [[OP1_LO_LO:z[0-9]+]].s, [[OP1_LO]].h
-; VBITS_GE_256-NEXT: udiv    [[RES_LO_HI:z[0-9]+]].s, [[PG1]]/m, [[OP1_LO_HI]].s, [[OP2_LO_HI]].s
-; VBITS_GE_256-NEXT: udiv    [[RES_LO_LO:z[0-9]+]].s, [[PG1]]/m, [[OP1_LO_LO]].s, [[OP2_LO_LO]].s
-; VBITS_GE_256-NEXT: uzp1    [[RES_HI:z[0-9]+]].h, [[RES_HI_LO]].h, [[RES_HI_HI]].h
-; VBITS_GE_256-NEXT: uzp1    [[RES_LO:z[0-9]+]].h, [[RES_LO_LO]].h, [[RES_LO_HI]].h
-; VBITS_GE_256-NEXT: uzp1    [[RES:z[0-9]+]].b, [[RES_LO]].b, [[RES_HI]].b
-; VBITS_GE_256-NEXT: st1b    { [[RES]].b }, [[PG]], [x0]
-; VBITS_GE_256-NEXT: ret
+; CHECK: ptrue [[PG:p[0-9]+]].b, vl[[#min(VBYTES,32)]]
+; CHECK-NEXT: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0]
+; CHECK-NEXT: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1]
+; CHECK-NEXT: ptrue [[PG1:p[0-9]+]].s, vl[[#min(VBYTES,8)]]
+; CHECK-NEXT: uunpkhi [[OP1_HI:z[0-9]+]].h, [[OP1]].b
+; CHECK-NEXT: uunpkhi [[OP2_HI:z[0-9]+]].h, [[OP2]].b
+; CHECK-NEXT: uunpklo [[OP2_LO:z[0-9]+]].h, [[OP2]].b
+; CHECK-NEXT: uunpklo [[OP1_LO:z[0-9]+]].h, [[OP1]].b
+; CHECK-NEXT: uunpkhi [[OP2_HI_HI:z[0-9]]].s, [[OP2_HI]].h
+; CHECK-NEXT: uunpkhi [[OP1_HI_HI:z[0-9]]].s, [[OP1_HI]].h
+; CHECK-NEXT: uunpklo [[OP2_HI_LO:z[0-9]+]].s, [[OP2_HI]].h
+; CHECK-NEXT: uunpklo [[OP1_HI_LO:z[0-9]+]].s, [[OP1_HI]].h
+; CHECK-NEXT: udivr   [[RES_HI_HI:z[0-9]+]].s, [[PG1]]/m, [[OP2_HI_HI]].s, [[OP1_HI_HI]].s
+; CHECK-NEXT: uunpkhi [[OP2_LO_HI:z[0-9]+]].s, [[OP2_LO]].h
+; CHECK-NEXT: udivr   [[RES_HI_LO:z[0-9]+]].s, [[PG1]]/m, [[OP2_HI_LO]].s, [[OP1_HI_LO]].s
+; CHECK-NEXT: uunpkhi [[OP1_LO_HI:z[0-9]+]].s, [[OP1_LO]].h
+; CHECK-NEXT: uunpklo [[OP2_LO_LO:z[0-9]+]].s, [[OP2_LO]].h
+; CHECK-NEXT: uunpklo [[OP1_LO_LO:z[0-9]+]].s, [[OP1_LO]].h
+; CHECK-NEXT: udiv    [[RES_LO_HI:z[0-9]+]].s, [[PG1]]/m, [[OP1_LO_HI]].s, [[OP2_LO_HI]].s
+; CHECK-NEXT: udiv    [[RES_LO_LO:z[0-9]+]].s, [[PG1]]/m, [[OP1_LO_LO]].s, [[OP2_LO_LO]].s
+; CHECK-NEXT: uzp1    [[RES_HI:z[0-9]+]].h, [[RES_HI_LO]].h, [[RES_HI_HI]].h
+; CHECK-NEXT: uzp1    [[RES_LO:z[0-9]+]].h, [[RES_LO_LO]].h, [[RES_LO_HI]].h
+; CHECK-NEXT: uzp1    [[RES:z[0-9]+]].b, [[RES_LO]].b, [[RES_HI]].b
+; CHECK-NEXT: st1b    { [[RES]].b }, [[PG]], [x0]
+; CHECK-NEXT: ret
   %op1 = load <32 x i8>, <32 x i8>* %a
   %op2 = load <32 x i8>, <32 x i8>* %b
   %res = udiv <32 x i8> %op1, %op2
@@ -722,19 +722,19 @@ define <8 x i16> @udiv_v8i16(<8 x i16> %op1, <8 x i16> %op2) #0 {
 
 define void @udiv_v16i16(<16 x i16>* %a, <16 x i16>* %b) #0 {
 ; CHECK-LABEL: udiv_v16i16:
-; VBITS_GE_256: ptrue [[PG:p[0-9]+]].h, vl[[#min(div(VBYTES,2),16)]]
-; VBITS_GE_256-NEXT: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
-; VBITS_GE_256-NEXT: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
-; VBITS_GE_256-NEXT: ptrue [[PG1:p[0-9]+]].s, vl[[#min(div(VBYTES,2),8)]]
-; VBITS_GE_256-NEXT: uunpkhi [[OP1_HI:z[0-9]+]].s, [[OP1]].h
-; VBITS_GE_256-NEXT: uunpkhi [[OP2_HI:z[0-9]+]].s, [[OP2]].h
-; VBITS_GE_256-NEXT: uunpklo [[OP2_LO:z[0-9]+]].s, [[OP2]].h
-; VBITS_GE_256-NEXT: uunpklo [[OP1_LO:z[0-9]+]].s, [[OP1]].h
-; VBITS_GE_256-NEXT: udivr   [[RES_HI:z[0-9]+]].s, [[PG1]]/m, [[OP2_HI]].s, [[OP1_HI]].s
-; VBITS_GE_256-NEXT: udiv    [[RES_LO:z[0-9]+]].s, [[PG1]]/m, [[OP1_LO]].s, [[OP2_LO]].s
-; VBITS_GE_256-NEXT: uzp1 [[RES:z[0-9]+]].h, [[RES_LO]].h, [[RES_HI]].h
-; VBITS_GE_256-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
-; VBITS_GE_256-NEXT: ret
+; CHECK: ptrue [[PG:p[0-9]+]].h, vl[[#min(div(VBYTES,2),16)]]
+; CHECK-NEXT: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
+; CHECK-NEXT: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
+; CHECK-NEXT: ptrue [[PG1:p[0-9]+]].s, vl[[#min(div(VBYTES,2),8)]]
+; CHECK-NEXT: uunpkhi [[OP1_HI:z[0-9]+]].s, [[OP1]].h
+; CHECK-NEXT: uunpkhi [[OP2_HI:z[0-9]+]].s, [[OP2]].h
+; CHECK-NEXT: uunpklo [[OP2_LO:z[0-9]+]].s, [[OP2]].h
+; CHECK-NEXT: uunpklo [[OP1_LO:z[0-9]+]].s, [[OP1]].h
+; CHECK-NEXT: udivr   [[RES_HI:z[0-9]+]].s, [[PG1]]/m, [[OP2_HI]].s, [[OP1_HI]].s
+; CHECK-NEXT: udiv    [[RES_LO:z[0-9]+]].s, [[PG1]]/m, [[OP1_LO]].s, [[OP2_LO]].s
+; CHECK-NEXT: uzp1 [[RES:z[0-9]+]].h, [[RES_LO]].h, [[RES_HI]].h
+; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
+; CHECK-NEXT: ret
   %op1 = load <16 x i16>, <16 x i16>* %a
   %op2 = load <16 x i16>, <16 x i16>* %b
   %res = udiv <16 x i16> %op1, %op2
@@ -830,12 +830,12 @@ define <4 x i32> @udiv_v4i32(<4 x i32> %op1, <4 x i32> %op2) #0 {
 
 define void @udiv_v8i32(<8 x i32>* %a, <8 x i32>* %b) #0 {
 ; CHECK-LABEL: udiv_v8i32:
-; VBITS_GE_256: ptrue [[PG:p[0-9]+]].s, vl[[#min(div(VBYTES,4),8)]]
-; VBITS_GE_256-NEXT: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
-; VBITS_GE_256-NEXT: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1]
-; VBITS_GE_256-NEXT: udiv [[RES:z[0-9]+]].s, [[PG]]/m, [[OP1]].s, [[OP2]].s
-; VBITS_GE_256-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
-; VBITS_GE_256-NEXT: ret
+; CHECK: ptrue [[PG:p[0-9]+]].s, vl[[#min(div(VBYTES,4),8)]]
+; CHECK-NEXT: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
+; CHECK-NEXT: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1]
+; CHECK-NEXT: udiv [[RES:z[0-9]+]].s, [[PG]]/m, [[OP1]].s, [[OP2]].s
+; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
+; CHECK-NEXT: ret
   %op1 = load <8 x i32>, <8 x i32>* %a
   %op2 = load <8 x i32>, <8 x i32>* %b
   %res = udiv <8 x i32> %op1, %op2
@@ -910,12 +910,12 @@ define <2 x i64> @udiv_v2i64(<2 x i64> %op1, <2 x i64> %op2) #0 {
 
 define void @udiv_v4i64(<4 x i64>* %a, <4 x i64>* %b) #0 {
 ; CHECK-LABEL: udiv_v4i64:
-; VBITS_GE_256: ptrue [[PG:p[0-9]+]].d, vl[[#min(div(VBYTES,8),4)]]
-; VBITS_GE_256-NEXT: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_GE_256-NEXT: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
-; VBITS_GE_256-NEXT: udiv [[RES:z[0-9]+]].d, [[PG]]/m, [[OP1]].d, [[OP2]].d
-; VBITS_GE_256-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
-; VBITS_GE_256-NEXT: ret
+; CHECK: ptrue [[PG:p[0-9]+]].d, vl[[#min(div(VBYTES,8),4)]]
+; CHECK-NEXT: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
+; CHECK-NEXT: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
+; CHECK-NEXT: udiv [[RES:z[0-9]+]].d, [[PG]]/m, [[OP1]].d, [[OP2]].d
+; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
+; CHECK-NEXT: ret
   %op1 = load <4 x i64>, <4 x i64>* %a
   %op2 = load <4 x i64>, <4 x i64>* %b
   %res = udiv <4 x i64> %op1, %op2
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-reduce.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-reduce.ll
index 4967f53d1dfe2b..9e7e190b65a866 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-reduce.ll
@@ -44,11 +44,11 @@ define i8 @uaddv_v16i8(<16 x i8> %a) #0 {
 
 define i8 @uaddv_v32i8(<32 x i8>* %a) #0 {
 ; CHECK-LABEL: uaddv_v32i8:
-; VBITS_GE_256: ptrue [[PG:p[0-9]+]].b, vl32
-; VBITS_GE_256-NEXT: ld1b { [[OP:z[0-9]+]].b }, [[PG]]/z, [x0]
-; VBITS_GE_256-NEXT: uaddv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].b
-; VBITS_GE_256-NEXT: fmov x0, [[REDUCE]]
-; VBITS_GE_256-NEXT: ret
+; CHECK: ptrue [[PG:p[0-9]+]].b, vl32
+; CHECK-NEXT: ld1b { [[OP:z[0-9]+]].b }, [[PG]]/z, [x0]
+; CHECK-NEXT: uaddv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].b
+; CHECK-NEXT: fmov x0, [[REDUCE]]
+; CHECK-NEXT: ret
   %op = load <32 x i8>, <32 x i8>* %a
   %res = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> %op)
   ret i8 %res
@@ -120,11 +120,11 @@ define i16 @uaddv_v8i16(<8 x i16> %a) #0 {
 
 define i16 @uaddv_v16i16(<16 x i16>* %a) #0 {
 ; CHECK-LABEL: uaddv_v16i16:
-; VBITS_GE_256: ptrue [[PG:p[0-9]+]].h, vl16
-; VBITS_GE_256-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
-; VBITS_GE_256-NEXT: uaddv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].h
-; VBITS_GE_256-NEXT: fmov x0, [[REDUCE]]
-; VBITS_GE_256-NEXT: ret
+; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
+; CHECK-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
+; CHECK-NEXT: uaddv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].h
+; CHECK-NEXT: fmov x0, [[REDUCE]]
+; CHECK-NEXT: ret
   %op = load <16 x i16>, <16 x i16>* %a
   %res = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %op)
   ret i16 %res
@@ -196,11 +196,11 @@ define i32 @uaddv_v4i32(<4 x i32> %a) #0 {
 
 define i32 @uaddv_v8i32(<8 x i32>* %a) #0 {
 ; CHECK-LABEL: uaddv_v8i32:
-; VBITS_GE_256: ptrue [[PG:p[0-9]+]].s, vl8
-; VBITS_GE_256-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
-; VBITS_GE_256-NEXT: uaddv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].s
-; VBITS_GE_256-NEXT: fmov x0, [[REDUCE]]
-; VBITS_GE_256-NEXT: ret
+; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
+; CHECK-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
+; CHECK-NEXT: uaddv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].s
+; CHECK-NEXT: fmov x0, [[REDUCE]]
+; CHECK-NEXT: ret
   %op = load <8 x i32>, <8 x i32>* %a
   %res = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %op)
   ret i32 %res
@@ -272,11 +272,11 @@ define i64 @uaddv_v2i64(<2 x i64> %a) #0 {
 
 define i64 @uaddv_v4i64(<4 x i64>* %a) #0 {
 ; CHECK-LABEL: uaddv_v4i64:
-; VBITS_GE_256: ptrue [[PG:p[0-9]+]].d, vl4
-; VBITS_GE_256-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_GE_256-NEXT: uaddv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d
-; VBITS_GE_256-NEXT: fmov x0, [[REDUCE]]
-; VBITS_GE_256-NEXT: ret
+; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
+; CHECK-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
+; CHECK-NEXT: uaddv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d
+; CHECK-NEXT: fmov x0, [[REDUCE]]
+; CHECK-NEXT: ret
   %op = load <4 x i64>, <4 x i64>* %a
   %res = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %op)
   ret i64 %res
@@ -352,11 +352,11 @@ define i8 @smaxv_v16i8(<16 x i8> %a) #0 {
 
 define i8 @smaxv_v32i8(<32 x i8>* %a) #0 {
 ; CHECK-LABEL: smaxv_v32i8:
-; VBITS_GE_256: ptrue [[PG:p[0-9]+]].b, vl32
-; VBITS_GE_256-NEXT: ld1b { [[OP:z[0-9]+]].b }, [[PG]]/z, [x0]
-; VBITS_GE_256-NEXT: smaxv b[[REDUCE:[0-9]+]], [[PG]], [[OP]].b
-; VBITS_GE_256-NEXT: fmov w0, s[[REDUCE]]
-; VBITS_GE_256-NEXT: ret
+; CHECK: ptrue [[PG:p[0-9]+]].b, vl32
+; CHECK-NEXT: ld1b { [[OP:z[0-9]+]].b }, [[PG]]/z, [x0]
+; CHECK-NEXT: smaxv b[[REDUCE:[0-9]+]], [[PG]], [[OP]].b
+; CHECK-NEXT: fmov w0, s[[REDUCE]]
+; CHECK-NEXT: ret
   %op = load <32 x i8>, <32 x i8>* %a
   %res = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> %op)
   ret i8 %res
@@ -428,11 +428,11 @@ define i16 @smaxv_v8i16(<8 x i16> %a) #0 {
 
 define i16 @smaxv_v16i16(<16 x i16>* %a) #0 {
 ; CHECK-LABEL: smaxv_v16i16:
-; VBITS_GE_256: ptrue [[PG:p[0-9]+]].h, vl16
-; VBITS_GE_256-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
-; VBITS_GE_256-NEXT: smaxv h[[REDUCE:[0-9]+]], [[PG]], [[OP]].h
-; VBITS_GE_256-NEXT: fmov w0, s[[REDUCE]]
-; VBITS_GE_256-NEXT: ret
+; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
+; CHECK-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
+; CHECK-NEXT: smaxv h[[REDUCE:[0-9]+]], [[PG]], [[OP]].h
+; CHECK-NEXT: fmov w0, s[[REDUCE]]
+; CHECK-NEXT: ret
   %op = load <16 x i16>, <16 x i16>* %a
   %res = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> %op)
   ret i16 %res
@@ -504,11 +504,11 @@ define i32 @smaxv_v4i32(<4 x i32> %a) #0 {
 
 define i32 @smaxv_v8i32(<8 x i32>* %a) #0 {
 ; CHECK-LABEL: smaxv_v8i32:
-; VBITS_GE_256: ptrue [[PG:p[0-9]+]].s, vl8
-; VBITS_GE_256-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
-; VBITS_GE_256-NEXT: smaxv [[REDUCE:s[0-9]+]], [[PG]], [[OP]].s
-; VBITS_GE_256-NEXT: fmov w0, [[REDUCE]]
-; VBITS_GE_256-NEXT: ret
+; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
+; CHECK-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
+; CHECK-NEXT: smaxv [[REDUCE:s[0-9]+]], [[PG]], [[OP]].s
+; CHECK-NEXT: fmov w0, [[REDUCE]]
+; CHECK-NEXT: ret
   %op = load <8 x i32>, <8 x i32>* %a
   %res = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> %op)
   ret i32 %res
@@ -582,11 +582,11 @@ define i64 @smaxv_v2i64(<2 x i64> %a) #0 {
 
 define i64 @smaxv_v4i64(<4 x i64>* %a) #0 {
 ; CHECK-LABEL: smaxv_v4i64:
-; VBITS_GE_256: ptrue [[PG:p[0-9]+]].d, vl4
-; VBITS_GE_256-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_GE_256-NEXT: smaxv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d
-; VBITS_GE_256-NEXT: fmov x0, [[REDUCE]]
-; VBITS_GE_256-NEXT: ret
+; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
+; CHECK-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
+; CHECK-NEXT: smaxv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d
+; CHECK-NEXT: fmov x0, [[REDUCE]]
+; CHECK-NEXT: ret
   %op = load <4 x i64>, <4 x i64>* %a
   %res = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> %op)
   ret i64 %res
@@ -662,11 +662,11 @@ define i8 @sminv_v16i8(<16 x i8> %a) #0 {
 
 define i8 @sminv_v32i8(<32 x i8>* %a) #0 {
 ; CHECK-LABEL: sminv_v32i8:
-; VBITS_GE_256: ptrue [[PG:p[0-9]+]].b, vl32
-; VBITS_GE_256-NEXT: ld1b { [[OP:z[0-9]+]].b }, [[PG]]/z, [x0]
-; VBITS_GE_256-NEXT: sminv b[[REDUCE:[0-9]+]], [[PG]], [[OP]].b
-; VBITS_GE_256-NEXT: fmov w0, s[[REDUCE]]
-; VBITS_GE_256-NEXT: ret
+; CHECK: ptrue [[PG:p[0-9]+]].b, vl32
+; CHECK-NEXT: ld1b { [[OP:z[0-9]+]].b }, [[PG]]/z, [x0]
+; CHECK-NEXT: sminv b[[REDUCE:[0-9]+]], [[PG]], [[OP]].b
+; CHECK-NEXT: fmov w0, s[[REDUCE]]
+; CHECK-NEXT: ret
   %op = load <32 x i8>, <32 x i8>* %a
   %res = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> %op)
   ret i8 %res
@@ -738,11 +738,11 @@ define i16 @sminv_v8i16(<8 x i16> %a) #0 {
 
 define i16 @sminv_v16i16(<16 x i16>* %a) #0 {
 ; CHECK-LABEL: sminv_v16i16:
-; VBITS_GE_256: ptrue [[PG:p[0-9]+]].h, vl16
-; VBITS_GE_256-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
-; VBITS_GE_256-NEXT: sminv h[[REDUCE:[0-9]+]], [[PG]], [[OP]].h
-; VBITS_GE_256-NEXT: fmov w0, s[[REDUCE]]
-; VBITS_GE_256-NEXT: ret
+; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
+; CHECK-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
+; CHECK-NEXT: sminv h[[REDUCE:[0-9]+]], [[PG]], [[OP]].h
+; CHECK-NEXT: fmov w0, s[[REDUCE]]
+; CHECK-NEXT: ret
   %op = load <16 x i16>, <16 x i16>* %a
   %res = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> %op)
   ret i16 %res
@@ -814,11 +814,11 @@ define i32 @sminv_v4i32(<4 x i32> %a) #0 {
 
 define i32 @sminv_v8i32(<8 x i32>* %a) #0 {
 ; CHECK-LABEL: sminv_v8i32:
-; VBITS_GE_256: ptrue [[PG:p[0-9]+]].s, vl8
-; VBITS_GE_256-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
-; VBITS_GE_256-NEXT: sminv [[REDUCE:s[0-9]+]], [[PG]], [[OP]].s
-; VBITS_GE_256-NEXT: fmov w0, [[REDUCE]]
-; VBITS_GE_256-NEXT: ret
+; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
+; CHECK-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
+; CHECK-NEXT: sminv [[REDUCE:s[0-9]+]], [[PG]], [[OP]].s
+; CHECK-NEXT: fmov w0, [[REDUCE]]
+; CHECK-NEXT: ret
   %op = load <8 x i32>, <8 x i32>* %a
   %res = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> %op)
   ret i32 %res
@@ -892,11 +892,11 @@ define i64 @sminv_v2i64(<2 x i64> %a) #0 {
 
 define i64 @sminv_v4i64(<4 x i64>* %a) #0 {
 ; CHECK-LABEL: sminv_v4i64:
-; VBITS_GE_256: ptrue [[PG:p[0-9]+]].d, vl4
-; VBITS_GE_256-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_GE_256-NEXT: sminv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d
-; VBITS_GE_256-NEXT: fmov x0, [[REDUCE]]
-; VBITS_GE_256-NEXT: ret
+; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
+; CHECK-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
+; CHECK-NEXT: sminv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d
+; CHECK-NEXT: fmov x0, [[REDUCE]]
+; CHECK-NEXT: ret
   %op = load <4 x i64>, <4 x i64>* %a
   %res = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> %op)
   ret i64 %res
@@ -972,11 +972,11 @@ define i8 @umaxv_v16i8(<16 x i8> %a) #0 {
 
 define i8 @umaxv_v32i8(<32 x i8>* %a) #0 {
 ; CHECK-LABEL: umaxv_v32i8:
-; VBITS_GE_256: ptrue [[PG:p[0-9]+]].b, vl32
-; VBITS_GE_256-NEXT: ld1b { [[OP:z[0-9]+]].b }, [[PG]]/z, [x0]
-; VBITS_GE_256-NEXT: umaxv b[[REDUCE:[0-9]+]], [[PG]], [[OP]].b
-; VBITS_GE_256-NEXT: fmov w0, s[[REDUCE]]
-; VBITS_GE_256-NEXT: ret
+; CHECK: ptrue [[PG:p[0-9]+]].b, vl32
+; CHECK-NEXT: ld1b { [[OP:z[0-9]+]].b }, [[PG]]/z, [x0]
+; CHECK-NEXT: umaxv b[[REDUCE:[0-9]+]], [[PG]], [[OP]].b
+; CHECK-NEXT: fmov w0, s[[REDUCE]]
+; CHECK-NEXT: ret
   %op = load <32 x i8>, <32 x i8>* %a
   %res = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> %op)
   ret i8 %res
@@ -1048,11 +1048,11 @@ define i16 @umaxv_v8i16(<8 x i16> %a) #0 {
 
 define i16 @umaxv_v16i16(<16 x i16>* %a) #0 {
 ; CHECK-LABEL: umaxv_v16i16:
-; VBITS_GE_256: ptrue [[PG:p[0-9]+]].h, vl16
-; VBITS_GE_256-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
-; VBITS_GE_256-NEXT: umaxv h[[REDUCE:[0-9]+]], [[PG]], [[OP]].h
-; VBITS_GE_256-NEXT: fmov w0, s[[REDUCE]]
-; VBITS_GE_256-NEXT: ret
+; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
+; CHECK-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
+; CHECK-NEXT: umaxv h[[REDUCE:[0-9]+]], [[PG]], [[OP]].h
+; CHECK-NEXT: fmov w0, s[[REDUCE]]
+; CHECK-NEXT: ret
   %op = load <16 x i16>, <16 x i16>* %a
   %res = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> %op)
   ret i16 %res
@@ -1124,11 +1124,11 @@ define i32 @umaxv_v4i32(<4 x i32> %a) #0 {
 
 define i32 @umaxv_v8i32(<8 x i32>* %a) #0 {
 ; CHECK-LABEL: umaxv_v8i32:
-; VBITS_GE_256: ptrue [[PG:p[0-9]+]].s, vl8
-; VBITS_GE_256-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
-; VBITS_GE_256-NEXT: umaxv [[REDUCE:s[0-9]+]], [[PG]], [[OP]].s
-; VBITS_GE_256-NEXT: fmov w0, [[REDUCE]]
-; VBITS_GE_256-NEXT: ret
+; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
+; CHECK-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
+; CHECK-NEXT: umaxv [[REDUCE:s[0-9]+]], [[PG]], [[OP]].s
+; CHECK-NEXT: fmov w0, [[REDUCE]]
+; CHECK-NEXT: ret
   %op = load <8 x i32>, <8 x i32>* %a
   %res = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> %op)
   ret i32 %res
@@ -1202,11 +1202,11 @@ define i64 @umaxv_v2i64(<2 x i64> %a) #0 {
 
 define i64 @umaxv_v4i64(<4 x i64>* %a) #0 {
 ; CHECK-LABEL: umaxv_v4i64:
-; VBITS_GE_256: ptrue [[PG:p[0-9]+]].d, vl4
-; VBITS_GE_256-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_GE_256-NEXT: umaxv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d
-; VBITS_GE_256-NEXT: fmov x0, [[REDUCE]]
-; VBITS_GE_256-NEXT: ret
+; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
+; CHECK-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
+; CHECK-NEXT: umaxv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d
+; CHECK-NEXT: fmov x0, [[REDUCE]]
+; CHECK-NEXT: ret
   %op = load <4 x i64>, <4 x i64>* %a
   %res = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> %op)
   ret i64 %res
@@ -1282,11 +1282,11 @@ define i8 @uminv_v16i8(<16 x i8> %a) #0 {
 
 define i8 @uminv_v32i8(<32 x i8>* %a) #0 {
 ; CHECK-LABEL: uminv_v32i8:
-; VBITS_GE_256: ptrue [[PG:p[0-9]+]].b, vl32
-; VBITS_GE_256-NEXT: ld1b { [[OP:z[0-9]+]].b }, [[PG]]/z, [x0]
-; VBITS_GE_256-NEXT: uminv b[[REDUCE:[0-9]+]], [[PG]], [[OP]].b
-; VBITS_GE_256-NEXT: fmov w0, s[[REDUCE]]
-; VBITS_GE_256-NEXT: ret
+; CHECK: ptrue [[PG:p[0-9]+]].b, vl32
+; CHECK-NEXT: ld1b { [[OP:z[0-9]+]].b }, [[PG]]/z, [x0]
+; CHECK-NEXT: uminv b[[REDUCE:[0-9]+]], [[PG]], [[OP]].b
+; CHECK-NEXT: fmov w0, s[[REDUCE]]
+; CHECK-NEXT: ret
   %op = load <32 x i8>, <32 x i8>* %a
   %res = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> %op)
   ret i8 %res
@@ -1358,11 +1358,11 @@ define i16 @uminv_v8i16(<8 x i16> %a) #0 {
 
 define i16 @uminv_v16i16(<16 x i16>* %a) #0 {
 ; CHECK-LABEL: uminv_v16i16:
-; VBITS_GE_256: ptrue [[PG:p[0-9]+]].h, vl16
-; VBITS_GE_256-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
-; VBITS_GE_256-NEXT: uminv h[[REDUCE:[0-9]+]], [[PG]], [[OP]].h
-; VBITS_GE_256-NEXT: fmov w0, s[[REDUCE]]
-; VBITS_GE_256-NEXT: ret
+; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
+; CHECK-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
+; CHECK-NEXT: uminv h[[REDUCE:[0-9]+]], [[PG]], [[OP]].h
+; CHECK-NEXT: fmov w0, s[[REDUCE]]
+; CHECK-NEXT: ret
   %op = load <16 x i16>, <16 x i16>* %a
   %res = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> %op)
   ret i16 %res
@@ -1434,11 +1434,11 @@ define i32 @uminv_v4i32(<4 x i32> %a) #0 {
 
 define i32 @uminv_v8i32(<8 x i32>* %a) #0 {
 ; CHECK-LABEL: uminv_v8i32:
-; VBITS_GE_256: ptrue [[PG:p[0-9]+]].s, vl8
-; VBITS_GE_256-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
-; VBITS_GE_256-NEXT: uminv [[REDUCE:s[0-9]+]], [[PG]], [[OP]].s
-; VBITS_GE_256-NEXT: fmov w0, [[REDUCE]]
-; VBITS_GE_256-NEXT: ret
+; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
+; CHECK-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
+; CHECK-NEXT: uminv [[REDUCE:s[0-9]+]], [[PG]], [[OP]].s
+; CHECK-NEXT: fmov w0, [[REDUCE]]
+; CHECK-NEXT: ret
   %op = load <8 x i32>, <8 x i32>* %a
   %res = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> %op)
   ret i32 %res
@@ -1512,11 +1512,11 @@ define i64 @uminv_v2i64(<2 x i64> %a) #0 {
 
 define i64 @uminv_v4i64(<4 x i64>* %a) #0 {
 ; CHECK-LABEL: uminv_v4i64:
-; VBITS_GE_256: ptrue [[PG:p[0-9]+]].d, vl4
-; VBITS_GE_256-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_GE_256-NEXT: uminv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d
-; VBITS_GE_256-NEXT: fmov x0, [[REDUCE]]
-; VBITS_GE_256-NEXT: ret
+; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
+; CHECK-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
+; CHECK-NEXT: uminv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d
+; CHECK-NEXT: fmov x0, [[REDUCE]]
+; CHECK-NEXT: ret
   %op = load <4 x i64>, <4 x i64>* %a
   %res = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> %op)
   ret i64 %res
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-select.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-select.ll
index 904e56fb8c0960..1d9fb7e04cdaa9 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-select.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-select.ll
@@ -42,14 +42,14 @@ define void @select_v32i8(<32 x i8>* %a, <32 x i8>* %b, <32 x i1>* %c) #0 {
 ; CHECK: select_v32i8:
 ; CHECK: ptrue [[PG:p[0-9]+]].b, vl[[#min(VBYTES,32)]]
 ; CHECK: ptrue [[PG1:p[0-9]+]].b
-; VBITS_GE_256: ld1b { [[MASK:z[0-9]+]].b }, [[PG]]/z, [x9]
-; VBITS_GE_256-NEXT: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0]
-; VBITS_GE_256-NEXT: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1]
-; VBITS_GE_256-NEXT: and [[AND:z[0-9]+]].b, [[MASK]].b, #0x1
-; VBITS_GE_256-NEXT: cmpne [[COND:p[0-9]+]].b, [[PG1]]/z, [[AND]].b, #0
-; VBITS_GE_256-NEXT: sel [[RES:z[0-9]+]].b, [[COND]], [[OP1]].b, [[OP2]].b
-; VBITS_GE_256-NEXT: st1b { [[RES]].b }, [[PG]], [x0]
-; VBITS_GE_256: ret
+; CHECK: ld1b { [[MASK:z[0-9]+]].b }, [[PG]]/z, [x9]
+; CHECK-NEXT: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0]
+; CHECK-NEXT: ld1b { [[OP2:z[0-9]+]].b }, [[PG]]/z, [x1]
+; CHECK-NEXT: and [[AND:z[0-9]+]].b, [[MASK]].b, #0x1
+; CHECK-NEXT: cmpne [[COND:p[0-9]+]].b, [[PG1]]/z, [[AND]].b, #0
+; CHECK-NEXT: sel [[RES:z[0-9]+]].b, [[COND]], [[OP1]].b, [[OP2]].b
+; CHECK-NEXT: st1b { [[RES]].b }, [[PG]], [x0]
+; CHECK: ret
   %mask = load <32 x i1>, <32 x i1>* %c
   %op1 = load <32 x i8>, <32 x i8>* %a
   %op2 = load <32 x i8>, <32 x i8>* %b
@@ -140,14 +140,14 @@ define void @select_v16i16(<16 x i16>* %a, <16 x i16>* %b, <16 x i1>* %c) #0 {
 ; CHECK: select_v16i16:
 ; CHECK: ptrue [[PG:p[0-9]+]].h, vl[[#min(div(VBYTES,2),16)]]
 ; CHECK: ptrue [[PG1:p[0-9]+]].h
-; VBITS_GE_256: ld1h { [[MASK:z[0-9]+]].h }, [[PG]]/z, [x9]
-; VBITS_GE_256-NEXT: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
-; VBITS_GE_256-NEXT: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
-; VBITS_GE_256-NEXT: and [[AND:z[0-9]+]].h, [[MASK]].h, #0x1
-; VBITS_GE_256-NEXT: cmpne [[COND:p[0-9]+]].h, [[PG1]]/z, [[AND]].h, #0
-; VBITS_GE_256-NEXT: sel [[RES:z[0-9]+]].h, [[COND]], [[OP1]].h, [[OP2]].h
-; VBITS_GE_256-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
-; VBITS_GE_256: ret
+; CHECK: ld1h { [[MASK:z[0-9]+]].h }, [[PG]]/z, [x9]
+; CHECK-NEXT: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0]
+; CHECK-NEXT: ld1h { [[OP2:z[0-9]+]].h }, [[PG]]/z, [x1]
+; CHECK-NEXT: and [[AND:z[0-9]+]].h, [[MASK]].h, #0x1
+; CHECK-NEXT: cmpne [[COND:p[0-9]+]].h, [[PG1]]/z, [[AND]].h, #0
+; CHECK-NEXT: sel [[RES:z[0-9]+]].h, [[COND]], [[OP1]].h, [[OP2]].h
+; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0]
+; CHECK: ret
   %mask = load <16 x i1>, <16 x i1>* %c
   %op1 = load <16 x i16>, <16 x i16>* %a
   %op2 = load <16 x i16>, <16 x i16>* %b
@@ -238,14 +238,14 @@ define void @select_v8i32(<8 x i32>* %a, <8 x i32>* %b, <8 x i1>* %c) #0 {
 ; CHECK: select_v8i32:
 ; CHECK: ptrue [[PG:p[0-9]+]].s, vl[[#min(div(VBYTES,4),8)]]
 ; CHECK: ptrue [[PG1:p[0-9]+]].s
-; VBITS_GE_256: ld1w { [[MASK:z[0-9]+]].s }, [[PG]]/z, [x9]
-; VBITS_GE_256-NEXT: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
-; VBITS_GE_256-NEXT: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1]
-; VBITS_GE_256-NEXT: and [[AND:z[0-9]+]].s, [[MASK]].s, #0x1
-; VBITS_GE_256-NEXT: cmpne [[COND:p[0-9]+]].s, [[PG1]]/z, [[AND]].s, #0
-; VBITS_GE_256-NEXT: sel [[RES:z[0-9]+]].s, [[COND]], [[OP1]].s, [[OP2]].s
-; VBITS_GE_256-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
-; VBITS_GE_256: ret
+; CHECK: ld1w { [[MASK:z[0-9]+]].s }, [[PG]]/z, [x9]
+; CHECK-NEXT: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0]
+; CHECK-NEXT: ld1w { [[OP2:z[0-9]+]].s }, [[PG]]/z, [x1]
+; CHECK-NEXT: and [[AND:z[0-9]+]].s, [[MASK]].s, #0x1
+; CHECK-NEXT: cmpne [[COND:p[0-9]+]].s, [[PG1]]/z, [[AND]].s, #0
+; CHECK-NEXT: sel [[RES:z[0-9]+]].s, [[COND]], [[OP1]].s, [[OP2]].s
+; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x0]
+; CHECK: ret
   %mask = load <8 x i1>, <8 x i1>* %c
   %op1 = load <8 x i32>, <8 x i32>* %a
   %op2 = load <8 x i32>, <8 x i32>* %b
@@ -336,14 +336,14 @@ define void @select_v4i64(<4 x i64>* %a, <4 x i64>* %b, <4 x i1>* %c) #0 {
 ; CHECK: select_v4i64:
 ; CHECK: ptrue [[PG:p[0-9]+]].d, vl[[#min(div(VBYTES,8),4)]]
 ; CHECK: ptrue [[PG1:p[0-9]+]].d
-; VBITS_GE_256: ld1d { [[MASK:z[0-9]+]].d }, [[PG]]/z, [x9]
-; VBITS_GE_256-NEXT: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_GE_256-NEXT: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
-; VBITS_GE_256-NEXT: and [[AND:z[0-9]+]].d, [[MASK]].d, #0x1
-; VBITS_GE_256-NEXT: cmpne [[COND:p[0-9]+]].d, [[PG1]]/z, [[AND]].d, #0
-; VBITS_GE_256-NEXT: sel [[RES:z[0-9]+]].d, [[COND]], [[OP1]].d, [[OP2]].d
-; VBITS_GE_256-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
-; VBITS_GE_256: ret
+; CHECK: ld1d { [[MASK:z[0-9]+]].d }, [[PG]]/z, [x9]
+; CHECK-NEXT: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0]
+; CHECK-NEXT: ld1d { [[OP2:z[0-9]+]].d }, [[PG]]/z, [x1]
+; CHECK-NEXT: and [[AND:z[0-9]+]].d, [[MASK]].d, #0x1
+; CHECK-NEXT: cmpne [[COND:p[0-9]+]].d, [[PG1]]/z, [[AND]].d, #0
+; CHECK-NEXT: sel [[RES:z[0-9]+]].d, [[COND]], [[OP1]].d, [[OP2]].d
+; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x0]
+; CHECK: ret
   %mask = load <4 x i1>, <4 x i1>* %c
   %op1 = load <4 x i64>, <4 x i64>* %a
   %op2 = load <4 x i64>, <4 x i64>* %b
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-log-reduce.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-log-reduce.ll
index ffe72b511e0d9e..934ed8c0b5030b 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-log-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-log-reduce.ll
@@ -48,11 +48,11 @@ define i8 @andv_v16i8(<16 x i8> %a) #0 {
 
 define i8 @andv_v32i8(<32 x i8>* %a) #0 {
 ; CHECK-LABEL: andv_v32i8:
-; VBITS_GE_256: ptrue [[PG:p[0-9]+]].b, vl32
-; VBITS_GE_256-NEXT: ld1b { [[OP:z[0-9]+]].b }, [[PG]]/z, [x0]
-; VBITS_GE_256-NEXT: andv b[[REDUCE:[0-9]+]], [[PG]], [[OP]].b
-; VBITS_GE_256-NEXT: fmov w0, s[[REDUCE]]
-; VBITS_GE_256-NEXT: ret
+; CHECK: ptrue [[PG:p[0-9]+]].b, vl32
+; CHECK-NEXT: ld1b { [[OP:z[0-9]+]].b }, [[PG]]/z, [x0]
+; CHECK-NEXT: andv b[[REDUCE:[0-9]+]], [[PG]], [[OP]].b
+; CHECK-NEXT: fmov w0, s[[REDUCE]]
+; CHECK-NEXT: ret
   %op = load <32 x i8>, <32 x i8>* %a
   %res = call i8 @llvm.experimental.vector.reduce.and.v32i8(<32 x i8> %op)
   ret i8 %res
@@ -129,11 +129,11 @@ define i16 @andv_v8i16(<8 x i16> %a) #0 {
 
 define i16 @andv_v16i16(<16 x i16>* %a) #0 {
 ; CHECK-LABEL: andv_v16i16:
-; VBITS_GE_256: ptrue [[PG:p[0-9]+]].h, vl16
-; VBITS_GE_256-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
-; VBITS_GE_256-NEXT: andv h[[REDUCE:[0-9]+]], [[PG]], [[OP]].h
-; VBITS_GE_256-NEXT: fmov w0, s[[REDUCE]]
-; VBITS_GE_256-NEXT: ret
+; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
+; CHECK-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
+; CHECK-NEXT: andv h[[REDUCE:[0-9]+]], [[PG]], [[OP]].h
+; CHECK-NEXT: fmov w0, s[[REDUCE]]
+; CHECK-NEXT: ret
   %op = load <16 x i16>, <16 x i16>* %a
   %res = call i16 @llvm.experimental.vector.reduce.and.v16i16(<16 x i16> %op)
   ret i16 %res
@@ -209,11 +209,11 @@ define i32 @andv_v4i32(<4 x i32> %a) #0 {
 
 define i32 @andv_v8i32(<8 x i32>* %a) #0 {
 ; CHECK-LABEL: andv_v8i32:
-; VBITS_GE_256: ptrue [[PG:p[0-9]+]].s, vl8
-; VBITS_GE_256-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
-; VBITS_GE_256-NEXT: andv [[REDUCE:s[0-9]+]], [[PG]], [[OP]].s
-; VBITS_GE_256-NEXT: fmov w0, [[REDUCE]]
-; VBITS_GE_256-NEXT: ret
+; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
+; CHECK-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
+; CHECK-NEXT: andv [[REDUCE:s[0-9]+]], [[PG]], [[OP]].s
+; CHECK-NEXT: fmov w0, [[REDUCE]]
+; CHECK-NEXT: ret
   %op = load <8 x i32>, <8 x i32>* %a
   %res = call i32 @llvm.experimental.vector.reduce.and.v8i32(<8 x i32> %op)
   ret i32 %res
@@ -287,11 +287,11 @@ define i64 @andv_v2i64(<2 x i64> %a) #0 {
 
 define i64 @andv_v4i64(<4 x i64>* %a) #0 {
 ; CHECK-LABEL: andv_v4i64:
-; VBITS_GE_256: ptrue [[PG:p[0-9]+]].d, vl4
-; VBITS_GE_256-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_GE_256-NEXT: andv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d
-; VBITS_GE_256-NEXT: fmov x0, [[REDUCE]]
-; VBITS_GE_256-NEXT: ret
+; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
+; CHECK-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
+; CHECK-NEXT: andv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d
+; CHECK-NEXT: fmov x0, [[REDUCE]]
+; CHECK-NEXT: ret
   %op = load <4 x i64>, <4 x i64>* %a
   %res = call i64 @llvm.experimental.vector.reduce.and.v4i64(<4 x i64> %op)
   ret i64 %res
@@ -371,11 +371,11 @@ define i8 @eorv_v16i8(<16 x i8> %a) #0 {
 
 define i8 @eorv_v32i8(<32 x i8>* %a) #0 {
 ; CHECK-LABEL: eorv_v32i8:
-; VBITS_GE_256: ptrue [[PG:p[0-9]+]].b, vl32
-; VBITS_GE_256-NEXT: ld1b { [[OP:z[0-9]+]].b }, [[PG]]/z, [x0]
-; VBITS_GE_256-NEXT: eorv b[[REDUCE:[0-9]+]], [[PG]], [[OP]].b
-; VBITS_GE_256-NEXT: fmov w0, s[[REDUCE]]
-; VBITS_GE_256-NEXT: ret
+; CHECK: ptrue [[PG:p[0-9]+]].b, vl32
+; CHECK-NEXT: ld1b { [[OP:z[0-9]+]].b }, [[PG]]/z, [x0]
+; CHECK-NEXT: eorv b[[REDUCE:[0-9]+]], [[PG]], [[OP]].b
+; CHECK-NEXT: fmov w0, s[[REDUCE]]
+; CHECK-NEXT: ret
   %op = load <32 x i8>, <32 x i8>* %a
   %res = call i8 @llvm.experimental.vector.reduce.xor.v32i8(<32 x i8> %op)
   ret i8 %res
@@ -452,11 +452,11 @@ define i16 @eorv_v8i16(<8 x i16> %a) #0 {
 
 define i16 @eorv_v16i16(<16 x i16>* %a) #0 {
 ; CHECK-LABEL: eorv_v16i16:
-; VBITS_GE_256: ptrue [[PG:p[0-9]+]].h, vl16
-; VBITS_GE_256-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
-; VBITS_GE_256-NEXT: eorv h[[REDUCE:[0-9]+]], [[PG]], [[OP]].h
-; VBITS_GE_256-NEXT: fmov w0, s[[REDUCE]]
-; VBITS_GE_256-NEXT: ret
+; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
+; CHECK-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
+; CHECK-NEXT: eorv h[[REDUCE:[0-9]+]], [[PG]], [[OP]].h
+; CHECK-NEXT: fmov w0, s[[REDUCE]]
+; CHECK-NEXT: ret
   %op = load <16 x i16>, <16 x i16>* %a
   %res = call i16 @llvm.experimental.vector.reduce.xor.v16i16(<16 x i16> %op)
   ret i16 %res
@@ -532,11 +532,11 @@ define i32 @eorv_v4i32(<4 x i32> %a) #0 {
 
 define i32 @eorv_v8i32(<8 x i32>* %a) #0 {
 ; CHECK-LABEL: eorv_v8i32:
-; VBITS_GE_256: ptrue [[PG:p[0-9]+]].s, vl8
-; VBITS_GE_256-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
-; VBITS_GE_256-NEXT: eorv [[REDUCE:s[0-9]+]], [[PG]], [[OP]].s
-; VBITS_GE_256-NEXT: fmov w0, [[REDUCE]]
-; VBITS_GE_256-NEXT: ret
+; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
+; CHECK-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
+; CHECK-NEXT: eorv [[REDUCE:s[0-9]+]], [[PG]], [[OP]].s
+; CHECK-NEXT: fmov w0, [[REDUCE]]
+; CHECK-NEXT: ret
   %op = load <8 x i32>, <8 x i32>* %a
   %res = call i32 @llvm.experimental.vector.reduce.xor.v8i32(<8 x i32> %op)
   ret i32 %res
@@ -610,11 +610,11 @@ define i64 @eorv_v2i64(<2 x i64> %a) #0 {
 
 define i64 @eorv_v4i64(<4 x i64>* %a) #0 {
 ; CHECK-LABEL: eorv_v4i64:
-; VBITS_GE_256: ptrue [[PG:p[0-9]+]].d, vl4
-; VBITS_GE_256-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_GE_256-NEXT: eorv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d
-; VBITS_GE_256-NEXT: fmov x0, [[REDUCE]]
-; VBITS_GE_256-NEXT: ret
+; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
+; CHECK-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
+; CHECK-NEXT: eorv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d
+; CHECK-NEXT: fmov x0, [[REDUCE]]
+; CHECK-NEXT: ret
   %op = load <4 x i64>, <4 x i64>* %a
   %res = call i64 @llvm.experimental.vector.reduce.xor.v4i64(<4 x i64> %op)
   ret i64 %res
@@ -694,11 +694,11 @@ define i8 @orv_v16i8(<16 x i8> %a) #0 {
 
 define i8 @orv_v32i8(<32 x i8>* %a) #0 {
 ; CHECK-LABEL: orv_v32i8:
-; VBITS_GE_256: ptrue [[PG:p[0-9]+]].b, vl32
-; VBITS_GE_256-NEXT: ld1b { [[OP:z[0-9]+]].b }, [[PG]]/z, [x0]
-; VBITS_GE_256-NEXT: orv b[[REDUCE:[0-9]+]], [[PG]], [[OP]].b
-; VBITS_GE_256-NEXT: fmov w0, s[[REDUCE]]
-; VBITS_GE_256-NEXT: ret
+; CHECK: ptrue [[PG:p[0-9]+]].b, vl32
+; CHECK-NEXT: ld1b { [[OP:z[0-9]+]].b }, [[PG]]/z, [x0]
+; CHECK-NEXT: orv b[[REDUCE:[0-9]+]], [[PG]], [[OP]].b
+; CHECK-NEXT: fmov w0, s[[REDUCE]]
+; CHECK-NEXT: ret
   %op = load <32 x i8>, <32 x i8>* %a
   %res = call i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8> %op)
   ret i8 %res
@@ -775,11 +775,11 @@ define i16 @orv_v8i16(<8 x i16> %a) #0 {
 
 define i16 @orv_v16i16(<16 x i16>* %a) #0 {
 ; CHECK-LABEL: orv_v16i16:
-; VBITS_GE_256: ptrue [[PG:p[0-9]+]].h, vl16
-; VBITS_GE_256-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
-; VBITS_GE_256-NEXT: orv h[[REDUCE:[0-9]+]], [[PG]], [[OP]].h
-; VBITS_GE_256-NEXT: fmov w0, s[[REDUCE]]
-; VBITS_GE_256-NEXT: ret
+; CHECK: ptrue [[PG:p[0-9]+]].h, vl16
+; CHECK-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0]
+; CHECK-NEXT: orv h[[REDUCE:[0-9]+]], [[PG]], [[OP]].h
+; CHECK-NEXT: fmov w0, s[[REDUCE]]
+; CHECK-NEXT: ret
   %op = load <16 x i16>, <16 x i16>* %a
   %res = call i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16> %op)
   ret i16 %res
@@ -855,11 +855,11 @@ define i32 @orv_v4i32(<4 x i32> %a) #0 {
 
 define i32 @orv_v8i32(<8 x i32>* %a) #0 {
 ; CHECK-LABEL: orv_v8i32:
-; VBITS_GE_256: ptrue [[PG:p[0-9]+]].s, vl8
-; VBITS_GE_256-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
-; VBITS_GE_256-NEXT: orv [[REDUCE:s[0-9]+]], [[PG]], [[OP]].s
-; VBITS_GE_256-NEXT: fmov w0, [[REDUCE]]
-; VBITS_GE_256-NEXT: ret
+; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
+; CHECK-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0]
+; CHECK-NEXT: orv [[REDUCE:s[0-9]+]], [[PG]], [[OP]].s
+; CHECK-NEXT: fmov w0, [[REDUCE]]
+; CHECK-NEXT: ret
   %op = load <8 x i32>, <8 x i32>* %a
   %res = call i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32> %op)
   ret i32 %res
@@ -933,11 +933,11 @@ define i64 @orv_v2i64(<2 x i64> %a) #0 {
 
 define i64 @orv_v4i64(<4 x i64>* %a) #0 {
 ; CHECK-LABEL: orv_v4i64:
-; VBITS_GE_256: ptrue [[PG:p[0-9]+]].d, vl4
-; VBITS_GE_256-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_GE_256-NEXT: orv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d
-; VBITS_GE_256-NEXT: fmov x0, [[REDUCE]]
-; VBITS_GE_256-NEXT: ret
+; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
+; CHECK-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0]
+; CHECK-NEXT: orv [[REDUCE:d[0-9]+]], [[PG]], [[OP]].d
+; CHECK-NEXT: fmov x0, [[REDUCE]]
+; CHECK-NEXT: ret
   %op = load <4 x i64>, <4 x i64>* %a
   %res = call i64 @llvm.experimental.vector.reduce.or.v4i64(<4 x i64> %op)
   ret i64 %res
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-trunc.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-trunc.ll
index f62abc09460662..656a821c446cff 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-trunc.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-trunc.ll
@@ -199,11 +199,11 @@ define void @trunc_v64i32_v64i16(<64 x i32>* %in, <64 x i16>* %out) #0 {
 ; NOTE: v4i8 is not legal so result i8 elements are held within i16 containers.
 define <4 x i8> @trunc_v4i64_v4i8(<4 x i64>* %in) #0 {
 ; CHECK-LABEL: trunc_v4i64_v4i8:
-; VBITS_GE_256: ptrue [[PG:p[0-9]+]].d, vl4
-; VBITS_GE_256-NEXT: ld1d { [[A_DWORDS:z[0-9]+]].d }, [[PG]]/z, [x0]
-; VBITS_GE_256-NEXT: uzp1 [[A_WORDS:z[0-9]+]].s, [[A_DWORDS]].s, [[A_DWORDS]].s
-; VBITS_GE_256-NEXT: uzp1 z0.h, [[A_WORDS]].h, [[A_WORDS]].h
-; VBITS_GE_256-NEXT: ret
+; CHECK: ptrue [[PG:p[0-9]+]].d, vl4
+; CHECK-NEXT: ld1d { [[A_DWORDS:z[0-9]+]].d }, [[PG]]/z, [x0]
+; CHECK-NEXT: uzp1 [[A_WORDS:z[0-9]+]].s, [[A_DWORDS]].s, [[A_DWORDS]].s
+; CHECK-NEXT: uzp1 z0.h, [[A_WORDS]].h, [[A_WORDS]].h
+; CHECK-NEXT: ret
   %a = load <4 x i64>, <4 x i64>* %in
   %b = trunc <4 x i64> %a to <4 x i8>
   ret <4 x i8> %b

From 9a33f027ac7d73e14ae287e78ab554142d1cbc8f Mon Sep 17 00:00:00 2001
From: Arthur Eubanks <aeubanks@google.com>
Date: Mon, 12 Oct 2020 12:18:00 -0700
Subject: [PATCH 029/123] Revert "Canonicalize declaration pointers when
 forming APValues."

This reverts commit 9dcd96f728863d40d6f5922ed52732fdd728fb5f.

See https://crbug.com/1134762.
---
 clang/include/clang/AST/APValue.h             |  4 +--
 clang/lib/AST/APValue.cpp                     | 26 ++++++-------------
 clang/lib/AST/Decl.cpp                        |  8 +++---
 clang/lib/AST/DeclBase.cpp                    |  2 +-
 clang/lib/AST/ExprConstant.cpp                | 18 ++++++++-----
 clang/lib/CodeGen/CGExprConstant.cpp          |  4 ---
 .../CXX/dcl.dcl/dcl.spec/dcl.constexpr/p9.cpp |  3 ++-
 clang/test/CodeGenCXX/weak-external.cpp       | 12 ---------
 clang/test/OpenMP/ordered_messages.cpp        |  5 +---
 9 files changed, 31 insertions(+), 51 deletions(-)

diff --git a/clang/include/clang/AST/APValue.h b/clang/include/clang/AST/APValue.h
index ac8ed0818af099..9e9468645fe783 100644
--- a/clang/include/clang/AST/APValue.h
+++ b/clang/include/clang/AST/APValue.h
@@ -177,7 +177,6 @@ class APValue {
       return !(LHS == RHS);
     }
     friend llvm::hash_code hash_value(const LValueBase &Base);
-    friend struct llvm::DenseMapInfo<LValueBase>;
 
   private:
     PtrTy Ptr;
@@ -205,7 +204,8 @@ class APValue {
 
   public:
     LValuePathEntry() : Value() {}
-    LValuePathEntry(BaseOrMemberType BaseOrMember);
+    LValuePathEntry(BaseOrMemberType BaseOrMember)
+        : Value{reinterpret_cast<uintptr_t>(BaseOrMember.getOpaqueValue())} {}
     static LValuePathEntry ArrayIndex(uint64_t Index) {
       LValuePathEntry Result;
       Result.Value = Index;
diff --git a/clang/lib/AST/APValue.cpp b/clang/lib/AST/APValue.cpp
index 7efd0caf3f1d2f..22145beafd8dd5 100644
--- a/clang/lib/AST/APValue.cpp
+++ b/clang/lib/AST/APValue.cpp
@@ -38,7 +38,7 @@ static_assert(
     "Type is insufficiently aligned");
 
 APValue::LValueBase::LValueBase(const ValueDecl *P, unsigned I, unsigned V)
-    : Ptr(P ? cast<ValueDecl>(P->getCanonicalDecl()) : nullptr), Local{I, V} {}
+    : Ptr(P), Local{I, V} {}
 APValue::LValueBase::LValueBase(const Expr *P, unsigned I, unsigned V)
     : Ptr(P), Local{I, V} {}
 
@@ -90,19 +90,13 @@ bool operator==(const APValue::LValueBase &LHS,
                 const APValue::LValueBase &RHS) {
   if (LHS.Ptr != RHS.Ptr)
     return false;
-  if (LHS.is<TypeInfoLValue>() || LHS.is<DynamicAllocLValue>())
+  if (LHS.is<TypeInfoLValue>())
     return true;
   return LHS.Local.CallIndex == RHS.Local.CallIndex &&
          LHS.Local.Version == RHS.Local.Version;
 }
 }
 
-APValue::LValuePathEntry::LValuePathEntry(BaseOrMemberType BaseOrMember) {
-  if (const Decl *D = BaseOrMember.getPointer())
-    BaseOrMember.setPointer(D->getCanonicalDecl());
-  Value = reinterpret_cast<uintptr_t>(BaseOrMember.getOpaqueValue());
-}
-
 void APValue::LValuePathEntry::profile(llvm::FoldingSetNodeID &ID) const {
   ID.AddInteger(Value);
 }
@@ -131,16 +125,14 @@ APValue::LValueBase::operator bool () const {
 
 clang::APValue::LValueBase
 llvm::DenseMapInfo<clang::APValue::LValueBase>::getEmptyKey() {
-  clang::APValue::LValueBase B;
-  B.Ptr = DenseMapInfo<const ValueDecl*>::getEmptyKey();
-  return B;
+  return clang::APValue::LValueBase(
+      DenseMapInfo<const ValueDecl*>::getEmptyKey());
 }
 
 clang::APValue::LValueBase
 llvm::DenseMapInfo<clang::APValue::LValueBase>::getTombstoneKey() {
-  clang::APValue::LValueBase B;
-  B.Ptr = DenseMapInfo<const ValueDecl*>::getTombstoneKey();
-  return B;
+  return clang::APValue::LValueBase(
+      DenseMapInfo<const ValueDecl*>::getTombstoneKey());
 }
 
 namespace clang {
@@ -934,10 +926,8 @@ void APValue::MakeMemberPointer(const ValueDecl *Member, bool IsDerivedMember,
   assert(isAbsent() && "Bad state change");
   MemberPointerData *MPD = new ((void*)(char*)Data.buffer) MemberPointerData;
   Kind = MemberPointer;
-  MPD->MemberAndIsDerivedMember.setPointer(
-      Member ? cast<ValueDecl>(Member->getCanonicalDecl()) : nullptr);
+  MPD->MemberAndIsDerivedMember.setPointer(Member);
   MPD->MemberAndIsDerivedMember.setInt(IsDerivedMember);
   MPD->resizePath(Path.size());
-  for (unsigned I = 0; I != Path.size(); ++I)
-    MPD->getPath()[I] = Path[I]->getCanonicalDecl();
+  memcpy(MPD->getPath(), Path.data(), Path.size()*sizeof(const CXXRecordDecl*));
 }
diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp
index a6c7f30528eb45..6bb8aa026ad8aa 100644
--- a/clang/lib/AST/Decl.cpp
+++ b/clang/lib/AST/Decl.cpp
@@ -4686,9 +4686,11 @@ char *Buffer = new (getASTContext(), 1) char[Name.size() + 1];
 void ValueDecl::anchor() {}
 
 bool ValueDecl::isWeak() const {
-  auto *MostRecent = getMostRecentDecl();
-  return MostRecent->hasAttr<WeakAttr>() ||
-         MostRecent->hasAttr<WeakRefAttr>() || isWeakImported();
+  for (const auto *I : attrs())
+    if (isa<WeakAttr>(I) || isa<WeakRefAttr>(I))
+      return true;
+
+  return isWeakImported();
 }
 
 void ImplicitParamDecl::anchor() {}
diff --git a/clang/lib/AST/DeclBase.cpp b/clang/lib/AST/DeclBase.cpp
index ab2b55c0762e74..f4314d0bd9614f 100644
--- a/clang/lib/AST/DeclBase.cpp
+++ b/clang/lib/AST/DeclBase.cpp
@@ -720,7 +720,7 @@ bool Decl::isWeakImported() const {
   if (!canBeWeakImported(IsDefinition))
     return false;
 
-  for (const auto *A : getMostRecentDecl()->attrs()) {
+  for (const auto *A : attrs()) {
     if (isa<WeakImportAttr>(A))
       return true;
 
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 639a5733b34b8b..67ca5271929fd6 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -1982,11 +1982,18 @@ static bool HasSameBase(const LValue &A, const LValue &B) {
     return false;
 
   if (A.getLValueBase().getOpaqueValue() !=
-      B.getLValueBase().getOpaqueValue())
-    return false;
+      B.getLValueBase().getOpaqueValue()) {
+    const Decl *ADecl = GetLValueBaseDecl(A);
+    if (!ADecl)
+      return false;
+    const Decl *BDecl = GetLValueBaseDecl(B);
+    if (!BDecl || ADecl->getCanonicalDecl() != BDecl->getCanonicalDecl())
+      return false;
+  }
 
-  return A.getLValueCallIndex() == B.getLValueCallIndex() &&
-         A.getLValueVersion() == B.getLValueVersion();
+  return IsGlobalLValue(A.getLValueBase()) ||
+         (A.getLValueCallIndex() == B.getLValueCallIndex() &&
+          A.getLValueVersion() == B.getLValueVersion());
 }
 
 static void NoteLValueLocation(EvalInfo &Info, APValue::LValueBase Base) {
@@ -3156,8 +3163,7 @@ static bool evaluateVarDeclInit(EvalInfo &Info, const Expr *E,
 
   // If we're currently evaluating the initializer of this declaration, use that
   // in-flight value.
-  if (declaresSameEntity(Info.EvaluatingDecl.dyn_cast<const ValueDecl *>(),
-                         VD)) {
+  if (Info.EvaluatingDecl.dyn_cast<const ValueDecl*>() == VD) {
     Result = Info.EvaluatingDeclValue;
     return true;
   }
diff --git a/clang/lib/CodeGen/CGExprConstant.cpp b/clang/lib/CodeGen/CGExprConstant.cpp
index bff4a0c38af9bf..b0a37531dfe132 100644
--- a/clang/lib/CodeGen/CGExprConstant.cpp
+++ b/clang/lib/CodeGen/CGExprConstant.cpp
@@ -1877,10 +1877,6 @@ ConstantLValue
 ConstantLValueEmitter::tryEmitBase(const APValue::LValueBase &base) {
   // Handle values.
   if (const ValueDecl *D = base.dyn_cast<const ValueDecl*>()) {
-    // The constant always points to the canonical declaration. We want to look
-    // at properties of the most recent declaration at the point of emission.
-    D = cast<ValueDecl>(D->getMostRecentDecl());
-
     if (D->hasAttr<WeakRefAttr>())
       return CGM.GetWeakRefReference(D).getPointer();
 
diff --git a/clang/test/CXX/dcl.dcl/dcl.spec/dcl.constexpr/p9.cpp b/clang/test/CXX/dcl.dcl/dcl.spec/dcl.constexpr/p9.cpp
index 3720b277af7a93..8d51dbde71776f 100644
--- a/clang/test/CXX/dcl.dcl/dcl.spec/dcl.constexpr/p9.cpp
+++ b/clang/test/CXX/dcl.dcl/dcl.spec/dcl.constexpr/p9.cpp
@@ -24,10 +24,11 @@ constexpr double &ni3; // expected-error {{declaration of reference variable 'ni
 
 constexpr int nc1 = i; // expected-error {{constexpr variable 'nc1' must be initialized by a constant expression}} expected-note {{read of non-const variable 'i' is not allowed in a constant expression}}
 constexpr C nc2 = C(); // expected-error {{cannot have non-literal type 'const C'}}
-int &f(); // expected-note 2{{declared here}}
+int &f(); // expected-note {{declared here}}
 constexpr int &nc3 = f(); // expected-error {{constexpr variable 'nc3' must be initialized by a constant expression}} expected-note {{non-constexpr function 'f' cannot be used in a constant expression}}
 constexpr int nc4(i); // expected-error {{constexpr variable 'nc4' must be initialized by a constant expression}} expected-note {{read of non-const variable 'i' is not allowed in a constant expression}}
 constexpr C nc5((C())); // expected-error {{cannot have non-literal type 'const C'}}
+int &f(); // expected-note {{here}}
 constexpr int &nc6(f()); // expected-error {{constexpr variable 'nc6' must be initialized by a constant expression}} expected-note {{non-constexpr function 'f'}}
 
 struct pixel {
diff --git a/clang/test/CodeGenCXX/weak-external.cpp b/clang/test/CodeGenCXX/weak-external.cpp
index 433fb3c806249f..a2c53a59dcd59c 100644
--- a/clang/test/CodeGenCXX/weak-external.cpp
+++ b/clang/test/CodeGenCXX/weak-external.cpp
@@ -64,15 +64,3 @@ class _LIBCPP_EXCEPTION_ABI runtime_error
 void dummysymbol() {
   throw(std::runtime_error("string"));
 }
-
-namespace not_weak_on_first {
-  int func();
-  // CHECK: {{.*}} extern_weak {{.*}} @_ZN17not_weak_on_first4funcEv(
-  int func() __attribute__ ((weak));
-
-  typedef int (*FuncT)();
-
-  extern const FuncT table[] = {
-      func,
-  };
-}
diff --git a/clang/test/OpenMP/ordered_messages.cpp b/clang/test/OpenMP/ordered_messages.cpp
index 8a3a86443eb8c8..f6b9dbd6d27faf 100644
--- a/clang/test/OpenMP/ordered_messages.cpp
+++ b/clang/test/OpenMP/ordered_messages.cpp
@@ -16,9 +16,6 @@ void xxx(int argc) {
 }
 
 int foo();
-#if __cplusplus >= 201103L
-// expected-note@-2 {{declared here}}
-#endif
 
 template <class T>
 T foo() {
@@ -179,7 +176,7 @@ T foo() {
 
 int foo() {
 #if __cplusplus >= 201103L
-// expected-note@-2 {{declared here}}
+// expected-note@-2 2 {{declared here}}
 #endif
 int k;
   #pragma omp for ordered

From 73818f450e3a90fc89eca143ee30777ed7e660e9 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Mon, 12 Oct 2020 22:17:16 +0300
Subject: [PATCH 030/123] [NFC][ScalarEvolution] Add tests with ptrtoint in
 constant context in loop

Reduced from the https://reviews.llvm.org/D88806#2325340
---
 .../ptrtoint-constantexpr-loop.ll             | 273 ++++++++++++++++++
 1 file changed, 273 insertions(+)
 create mode 100644 llvm/test/Analysis/ScalarEvolution/ptrtoint-constantexpr-loop.ll

diff --git a/llvm/test/Analysis/ScalarEvolution/ptrtoint-constantexpr-loop.ll b/llvm/test/Analysis/ScalarEvolution/ptrtoint-constantexpr-loop.ll
new file mode 100644
index 00000000000000..8cfa041e75523d
--- /dev/null
+++ b/llvm/test/Analysis/ScalarEvolution/ptrtoint-constantexpr-loop.ll
@@ -0,0 +1,273 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt < %s --data-layout="e-m:e-p:64:64:64:64-n8:16:32:64" -S -analyze -enable-new-pm=0 -scalar-evolution | FileCheck --check-prefixes=ALL,X64,PTR64_IDX64 %s
+; RUN: opt < %s --data-layout="e-m:e-p:64:64:64:64-n8:16:32:64" -S -disable-output "-passes=print<scalar-evolution>" 2>&1 | FileCheck --check-prefixes=ALL,X64,PTR64_IDX64 %s
+; RUN: opt < %s --data-layout="e-m:e-p:64:64:64:32-n8:16:32:64" -S -analyze -enable-new-pm=0 -scalar-evolution | FileCheck --check-prefixes=ALL,X64,PTR64_IDX32 %s
+; RUN: opt < %s --data-layout="e-m:e-p:64:64:64:32-n8:16:32:64" -S -disable-output "-passes=print<scalar-evolution>" 2>&1 | FileCheck --check-prefixes=ALL,X64,PTR64_IDX32 %s
+; RUN: opt < %s --data-layout="e-m:e-p:32:32:32:32-n8:16:32" -S -analyze -enable-new-pm=0 -scalar-evolution | FileCheck --check-prefixes=ALL,X32,PTR32_IDX32 %s
+; RUN: opt < %s --data-layout="e-m:e-p:32:32:32:32-n8:16:32" -S -disable-output "-passes=print<scalar-evolution>" 2>&1 | FileCheck --check-prefixes=ALL,X32,PTR32_IDX32 %s
+; RUN: opt < %s --data-layout="e-m:e-p:32:32:32:64-n8:16:32:64" -S -analyze -enable-new-pm=0 -scalar-evolution | FileCheck --check-prefixes=ALL,X32,PTR32_IDX64 %s
+; RUN: opt < %s --data-layout="e-m:e-p:32:32:32:64-n8:16:32:64" -S -disable-output "-passes=print<scalar-evolution>" 2>&1 | FileCheck --check-prefixes=ALL,X32,PTR32_IDX64 %s
+
+@global = external hidden global [0 x i8]
+
+define hidden i32* @i64(i8* %arg, i32* %arg10) {
+; PTR64_IDX64-LABEL: 'i64'
+; PTR64_IDX64-NEXT:  Classifying expressions for: @i64
+; PTR64_IDX64-NEXT:    %tmp = phi i32 [ 0, %bb ], [ %tmp18, %bb17 ]
+; PTR64_IDX64-NEXT:    --> {0,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
+; PTR64_IDX64-NEXT:    %tmp12 = getelementptr i8, i8* %arg, i64 ptrtoint ([0 x i8]* @global to i64)
+; PTR64_IDX64-NEXT:    --> (ptrtoint ([0 x i8]* @global to i64) + %arg) U: full-set S: full-set Exits: (ptrtoint ([0 x i8]* @global to i64) + %arg) LoopDispositions: { %bb11: Invariant }
+; PTR64_IDX64-NEXT:    %tmp13 = bitcast i8* %tmp12 to i32*
+; PTR64_IDX64-NEXT:    --> (ptrtoint ([0 x i8]* @global to i64) + %arg) U: full-set S: full-set Exits: (ptrtoint ([0 x i8]* @global to i64) + %arg) LoopDispositions: { %bb11: Invariant }
+; PTR64_IDX64-NEXT:    %tmp14 = load i32, i32* %tmp13, align 4
+; PTR64_IDX64-NEXT:    --> %tmp14 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb11: Variant }
+; PTR64_IDX64-NEXT:    %tmp18 = add i32 %tmp, 2
+; PTR64_IDX64-NEXT:    --> {2,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
+; PTR64_IDX64-NEXT:  Determining loop execution counts for: @i64
+; PTR64_IDX64-NEXT:  Loop %bb11: Unpredictable backedge-taken count.
+; PTR64_IDX64-NEXT:  Loop %bb11: Unpredictable max backedge-taken count.
+; PTR64_IDX64-NEXT:  Loop %bb11: Unpredictable predicated backedge-taken count.
+;
+; PTR64_IDX32-LABEL: 'i64'
+; PTR64_IDX32-NEXT:  Classifying expressions for: @i64
+; PTR64_IDX32-NEXT:    %tmp = phi i32 [ 0, %bb ], [ %tmp18, %bb17 ]
+; PTR64_IDX32-NEXT:    --> {0,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
+; PTR64_IDX32-NEXT:    %tmp12 = getelementptr i8, i8* %arg, i64 ptrtoint ([0 x i8]* @global to i64)
+; PTR64_IDX32-NEXT:    --> ((trunc i64 ptrtoint ([0 x i8]* @global to i64) to i32) + %arg) U: full-set S: full-set Exits: ((trunc i64 ptrtoint ([0 x i8]* @global to i64) to i32) + %arg) LoopDispositions: { %bb11: Invariant }
+; PTR64_IDX32-NEXT:    %tmp13 = bitcast i8* %tmp12 to i32*
+; PTR64_IDX32-NEXT:    --> ((trunc i64 ptrtoint ([0 x i8]* @global to i64) to i32) + %arg) U: full-set S: full-set Exits: ((trunc i64 ptrtoint ([0 x i8]* @global to i64) to i32) + %arg) LoopDispositions: { %bb11: Invariant }
+; PTR64_IDX32-NEXT:    %tmp14 = load i32, i32* %tmp13, align 4
+; PTR64_IDX32-NEXT:    --> %tmp14 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb11: Variant }
+; PTR64_IDX32-NEXT:    %tmp18 = add i32 %tmp, 2
+; PTR64_IDX32-NEXT:    --> {2,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
+; PTR64_IDX32-NEXT:  Determining loop execution counts for: @i64
+; PTR64_IDX32-NEXT:  Loop %bb11: Unpredictable backedge-taken count.
+; PTR64_IDX32-NEXT:  Loop %bb11: Unpredictable max backedge-taken count.
+; PTR64_IDX32-NEXT:  Loop %bb11: Unpredictable predicated backedge-taken count.
+;
+; PTR32_IDX32-LABEL: 'i64'
+; PTR32_IDX32-NEXT:  Classifying expressions for: @i64
+; PTR32_IDX32-NEXT:    %tmp = phi i32 [ 0, %bb ], [ %tmp18, %bb17 ]
+; PTR32_IDX32-NEXT:    --> {0,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
+; PTR32_IDX32-NEXT:    %tmp12 = getelementptr i8, i8* %arg, i64 ptrtoint ([0 x i8]* @global to i64)
+; PTR32_IDX32-NEXT:    --> ((trunc i64 ptrtoint ([0 x i8]* @global to i64) to i32) + %arg) U: full-set S: full-set Exits: ((trunc i64 ptrtoint ([0 x i8]* @global to i64) to i32) + %arg) LoopDispositions: { %bb11: Invariant }
+; PTR32_IDX32-NEXT:    %tmp13 = bitcast i8* %tmp12 to i32*
+; PTR32_IDX32-NEXT:    --> ((trunc i64 ptrtoint ([0 x i8]* @global to i64) to i32) + %arg) U: full-set S: full-set Exits: ((trunc i64 ptrtoint ([0 x i8]* @global to i64) to i32) + %arg) LoopDispositions: { %bb11: Invariant }
+; PTR32_IDX32-NEXT:    %tmp14 = load i32, i32* %tmp13, align 4
+; PTR32_IDX32-NEXT:    --> %tmp14 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb11: Variant }
+; PTR32_IDX32-NEXT:    %tmp18 = add i32 %tmp, 2
+; PTR32_IDX32-NEXT:    --> {2,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
+; PTR32_IDX32-NEXT:  Determining loop execution counts for: @i64
+; PTR32_IDX32-NEXT:  Loop %bb11: Unpredictable backedge-taken count.
+; PTR32_IDX32-NEXT:  Loop %bb11: Unpredictable max backedge-taken count.
+; PTR32_IDX32-NEXT:  Loop %bb11: Unpredictable predicated backedge-taken count.
+;
+; PTR32_IDX64-LABEL: 'i64'
+; PTR32_IDX64-NEXT:  Classifying expressions for: @i64
+; PTR32_IDX64-NEXT:    %tmp = phi i32 [ 0, %bb ], [ %tmp18, %bb17 ]
+; PTR32_IDX64-NEXT:    --> {0,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
+; PTR32_IDX64-NEXT:    %tmp12 = getelementptr i8, i8* %arg, i64 ptrtoint ([0 x i8]* @global to i64)
+; PTR32_IDX64-NEXT:    --> (ptrtoint ([0 x i8]* @global to i64) + %arg) U: [0,8589934591) S: full-set Exits: (ptrtoint ([0 x i8]* @global to i64) + %arg) LoopDispositions: { %bb11: Invariant }
+; PTR32_IDX64-NEXT:    %tmp13 = bitcast i8* %tmp12 to i32*
+; PTR32_IDX64-NEXT:    --> (ptrtoint ([0 x i8]* @global to i64) + %arg) U: [0,8589934591) S: full-set Exits: (ptrtoint ([0 x i8]* @global to i64) + %arg) LoopDispositions: { %bb11: Invariant }
+; PTR32_IDX64-NEXT:    %tmp14 = load i32, i32* %tmp13, align 4
+; PTR32_IDX64-NEXT:    --> %tmp14 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb11: Variant }
+; PTR32_IDX64-NEXT:    %tmp18 = add i32 %tmp, 2
+; PTR32_IDX64-NEXT:    --> {2,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
+; PTR32_IDX64-NEXT:  Determining loop execution counts for: @i64
+; PTR32_IDX64-NEXT:  Loop %bb11: Unpredictable backedge-taken count.
+; PTR32_IDX64-NEXT:  Loop %bb11: Unpredictable max backedge-taken count.
+; PTR32_IDX64-NEXT:  Loop %bb11: Unpredictable predicated backedge-taken count.
+;
+bb:
+  br label %bb11
+
+bb11:                                             ; preds = %bb17, %bb
+  %tmp = phi i32 [ 0, %bb ], [ %tmp18, %bb17 ]
+  %tmp12 = getelementptr i8, i8* %arg, i64 ptrtoint ([0 x i8]* @global to i64)
+  %tmp13 = bitcast i8* %tmp12 to i32*
+  %tmp14 = load i32, i32* %tmp13, align 4
+  %tmp15 = icmp eq i32 %tmp14, 6
+  br i1 %tmp15, label %bb16, label %bb17
+
+bb16:                                             ; preds = %bb11
+  ret i32* %arg10
+
+bb17:                                             ; preds = %bb11
+  %tmp18 = add i32 %tmp, 2
+  br label %bb11
+}
+define hidden i32* @i64_to_i32(i8* %arg, i32* %arg10) {
+; PTR64_IDX64-LABEL: 'i64_to_i32'
+; PTR64_IDX64-NEXT:  Classifying expressions for: @i64_to_i32
+; PTR64_IDX64-NEXT:    %tmp = phi i32 [ 0, %bb ], [ %tmp18, %bb17 ]
+; PTR64_IDX64-NEXT:    --> {0,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
+; PTR64_IDX64-NEXT:    %tmp12 = getelementptr i8, i8* %arg, i32 ptrtoint ([0 x i8]* @global to i32)
+; PTR64_IDX64-NEXT:    --> ((sext i32 ptrtoint ([0 x i8]* @global to i32) to i64) + %arg) U: full-set S: full-set Exits: ((sext i32 ptrtoint ([0 x i8]* @global to i32) to i64) + %arg) LoopDispositions: { %bb11: Invariant }
+; PTR64_IDX64-NEXT:    %tmp13 = bitcast i8* %tmp12 to i32*
+; PTR64_IDX64-NEXT:    --> ((sext i32 ptrtoint ([0 x i8]* @global to i32) to i64) + %arg) U: full-set S: full-set Exits: ((sext i32 ptrtoint ([0 x i8]* @global to i32) to i64) + %arg) LoopDispositions: { %bb11: Invariant }
+; PTR64_IDX64-NEXT:    %tmp14 = load i32, i32* %tmp13, align 4
+; PTR64_IDX64-NEXT:    --> %tmp14 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb11: Variant }
+; PTR64_IDX64-NEXT:    %tmp18 = add i32 %tmp, 2
+; PTR64_IDX64-NEXT:    --> {2,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
+; PTR64_IDX64-NEXT:  Determining loop execution counts for: @i64_to_i32
+; PTR64_IDX64-NEXT:  Loop %bb11: Unpredictable backedge-taken count.
+; PTR64_IDX64-NEXT:  Loop %bb11: Unpredictable max backedge-taken count.
+; PTR64_IDX64-NEXT:  Loop %bb11: Unpredictable predicated backedge-taken count.
+;
+; PTR64_IDX32-LABEL: 'i64_to_i32'
+; PTR64_IDX32-NEXT:  Classifying expressions for: @i64_to_i32
+; PTR64_IDX32-NEXT:    %tmp = phi i32 [ 0, %bb ], [ %tmp18, %bb17 ]
+; PTR64_IDX32-NEXT:    --> {0,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
+; PTR64_IDX32-NEXT:    %tmp12 = getelementptr i8, i8* %arg, i32 ptrtoint ([0 x i8]* @global to i32)
+; PTR64_IDX32-NEXT:    --> (ptrtoint ([0 x i8]* @global to i32) + %arg) U: full-set S: full-set Exits: (ptrtoint ([0 x i8]* @global to i32) + %arg) LoopDispositions: { %bb11: Invariant }
+; PTR64_IDX32-NEXT:    %tmp13 = bitcast i8* %tmp12 to i32*
+; PTR64_IDX32-NEXT:    --> (ptrtoint ([0 x i8]* @global to i32) + %arg) U: full-set S: full-set Exits: (ptrtoint ([0 x i8]* @global to i32) + %arg) LoopDispositions: { %bb11: Invariant }
+; PTR64_IDX32-NEXT:    %tmp14 = load i32, i32* %tmp13, align 4
+; PTR64_IDX32-NEXT:    --> %tmp14 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb11: Variant }
+; PTR64_IDX32-NEXT:    %tmp18 = add i32 %tmp, 2
+; PTR64_IDX32-NEXT:    --> {2,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
+; PTR64_IDX32-NEXT:  Determining loop execution counts for: @i64_to_i32
+; PTR64_IDX32-NEXT:  Loop %bb11: Unpredictable backedge-taken count.
+; PTR64_IDX32-NEXT:  Loop %bb11: Unpredictable max backedge-taken count.
+; PTR64_IDX32-NEXT:  Loop %bb11: Unpredictable predicated backedge-taken count.
+;
+; PTR32_IDX32-LABEL: 'i64_to_i32'
+; PTR32_IDX32-NEXT:  Classifying expressions for: @i64_to_i32
+; PTR32_IDX32-NEXT:    %tmp = phi i32 [ 0, %bb ], [ %tmp18, %bb17 ]
+; PTR32_IDX32-NEXT:    --> {0,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
+; PTR32_IDX32-NEXT:    %tmp12 = getelementptr i8, i8* %arg, i32 ptrtoint ([0 x i8]* @global to i32)
+; PTR32_IDX32-NEXT:    --> (ptrtoint ([0 x i8]* @global to i32) + %arg) U: full-set S: full-set Exits: (ptrtoint ([0 x i8]* @global to i32) + %arg) LoopDispositions: { %bb11: Invariant }
+; PTR32_IDX32-NEXT:    %tmp13 = bitcast i8* %tmp12 to i32*
+; PTR32_IDX32-NEXT:    --> (ptrtoint ([0 x i8]* @global to i32) + %arg) U: full-set S: full-set Exits: (ptrtoint ([0 x i8]* @global to i32) + %arg) LoopDispositions: { %bb11: Invariant }
+; PTR32_IDX32-NEXT:    %tmp14 = load i32, i32* %tmp13, align 4
+; PTR32_IDX32-NEXT:    --> %tmp14 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb11: Variant }
+; PTR32_IDX32-NEXT:    %tmp18 = add i32 %tmp, 2
+; PTR32_IDX32-NEXT:    --> {2,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
+; PTR32_IDX32-NEXT:  Determining loop execution counts for: @i64_to_i32
+; PTR32_IDX32-NEXT:  Loop %bb11: Unpredictable backedge-taken count.
+; PTR32_IDX32-NEXT:  Loop %bb11: Unpredictable max backedge-taken count.
+; PTR32_IDX32-NEXT:  Loop %bb11: Unpredictable predicated backedge-taken count.
+;
+; PTR32_IDX64-LABEL: 'i64_to_i32'
+; PTR32_IDX64-NEXT:  Classifying expressions for: @i64_to_i32
+; PTR32_IDX64-NEXT:    %tmp = phi i32 [ 0, %bb ], [ %tmp18, %bb17 ]
+; PTR32_IDX64-NEXT:    --> {0,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
+; PTR32_IDX64-NEXT:    %tmp12 = getelementptr i8, i8* %arg, i32 ptrtoint ([0 x i8]* @global to i32)
+; PTR32_IDX64-NEXT:    --> ((sext i32 ptrtoint ([0 x i8]* @global to i32) to i64) + %arg) U: [-2147483648,6442450943) S: full-set Exits: ((sext i32 ptrtoint ([0 x i8]* @global to i32) to i64) + %arg) LoopDispositions: { %bb11: Invariant }
+; PTR32_IDX64-NEXT:    %tmp13 = bitcast i8* %tmp12 to i32*
+; PTR32_IDX64-NEXT:    --> ((sext i32 ptrtoint ([0 x i8]* @global to i32) to i64) + %arg) U: [-2147483648,6442450943) S: full-set Exits: ((sext i32 ptrtoint ([0 x i8]* @global to i32) to i64) + %arg) LoopDispositions: { %bb11: Invariant }
+; PTR32_IDX64-NEXT:    %tmp14 = load i32, i32* %tmp13, align 4
+; PTR32_IDX64-NEXT:    --> %tmp14 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb11: Variant }
+; PTR32_IDX64-NEXT:    %tmp18 = add i32 %tmp, 2
+; PTR32_IDX64-NEXT:    --> {2,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
+; PTR32_IDX64-NEXT:  Determining loop execution counts for: @i64_to_i32
+; PTR32_IDX64-NEXT:  Loop %bb11: Unpredictable backedge-taken count.
+; PTR32_IDX64-NEXT:  Loop %bb11: Unpredictable max backedge-taken count.
+; PTR32_IDX64-NEXT:  Loop %bb11: Unpredictable predicated backedge-taken count.
+;
+bb:
+  br label %bb11
+
+bb11:                                             ; preds = %bb17, %bb
+  %tmp = phi i32 [ 0, %bb ], [ %tmp18, %bb17 ]
+  %tmp12 = getelementptr i8, i8* %arg, i32 ptrtoint ([0 x i8]* @global to i32)
+  %tmp13 = bitcast i8* %tmp12 to i32*
+  %tmp14 = load i32, i32* %tmp13, align 4
+  %tmp15 = icmp eq i32 %tmp14, 6
+  br i1 %tmp15, label %bb16, label %bb17
+
+bb16:                                             ; preds = %bb11
+  ret i32* %arg10
+
+bb17:                                             ; preds = %bb11
+  %tmp18 = add i32 %tmp, 2
+  br label %bb11
+}
+define hidden i32* @i64_to_i128(i8* %arg, i32* %arg10) {
+; PTR64_IDX64-LABEL: 'i64_to_i128'
+; PTR64_IDX64-NEXT:  Classifying expressions for: @i64_to_i128
+; PTR64_IDX64-NEXT:    %tmp = phi i32 [ 0, %bb ], [ %tmp18, %bb17 ]
+; PTR64_IDX64-NEXT:    --> {0,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
+; PTR64_IDX64-NEXT:    %tmp12 = getelementptr i8, i8* %arg, i128 ptrtoint ([0 x i8]* @global to i128)
+; PTR64_IDX64-NEXT:    --> ((trunc i128 ptrtoint ([0 x i8]* @global to i128) to i64) + %arg) U: full-set S: full-set Exits: ((trunc i128 ptrtoint ([0 x i8]* @global to i128) to i64) + %arg) LoopDispositions: { %bb11: Invariant }
+; PTR64_IDX64-NEXT:    %tmp13 = bitcast i8* %tmp12 to i32*
+; PTR64_IDX64-NEXT:    --> ((trunc i128 ptrtoint ([0 x i8]* @global to i128) to i64) + %arg) U: full-set S: full-set Exits: ((trunc i128 ptrtoint ([0 x i8]* @global to i128) to i64) + %arg) LoopDispositions: { %bb11: Invariant }
+; PTR64_IDX64-NEXT:    %tmp14 = load i32, i32* %tmp13, align 4
+; PTR64_IDX64-NEXT:    --> %tmp14 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb11: Variant }
+; PTR64_IDX64-NEXT:    %tmp18 = add i32 %tmp, 2
+; PTR64_IDX64-NEXT:    --> {2,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
+; PTR64_IDX64-NEXT:  Determining loop execution counts for: @i64_to_i128
+; PTR64_IDX64-NEXT:  Loop %bb11: Unpredictable backedge-taken count.
+; PTR64_IDX64-NEXT:  Loop %bb11: Unpredictable max backedge-taken count.
+; PTR64_IDX64-NEXT:  Loop %bb11: Unpredictable predicated backedge-taken count.
+;
+; PTR64_IDX32-LABEL: 'i64_to_i128'
+; PTR64_IDX32-NEXT:  Classifying expressions for: @i64_to_i128
+; PTR64_IDX32-NEXT:    %tmp = phi i32 [ 0, %bb ], [ %tmp18, %bb17 ]
+; PTR64_IDX32-NEXT:    --> {0,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
+; PTR64_IDX32-NEXT:    %tmp12 = getelementptr i8, i8* %arg, i128 ptrtoint ([0 x i8]* @global to i128)
+; PTR64_IDX32-NEXT:    --> ((trunc i128 ptrtoint ([0 x i8]* @global to i128) to i32) + %arg) U: full-set S: full-set Exits: ((trunc i128 ptrtoint ([0 x i8]* @global to i128) to i32) + %arg) LoopDispositions: { %bb11: Invariant }
+; PTR64_IDX32-NEXT:    %tmp13 = bitcast i8* %tmp12 to i32*
+; PTR64_IDX32-NEXT:    --> ((trunc i128 ptrtoint ([0 x i8]* @global to i128) to i32) + %arg) U: full-set S: full-set Exits: ((trunc i128 ptrtoint ([0 x i8]* @global to i128) to i32) + %arg) LoopDispositions: { %bb11: Invariant }
+; PTR64_IDX32-NEXT:    %tmp14 = load i32, i32* %tmp13, align 4
+; PTR64_IDX32-NEXT:    --> %tmp14 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb11: Variant }
+; PTR64_IDX32-NEXT:    %tmp18 = add i32 %tmp, 2
+; PTR64_IDX32-NEXT:    --> {2,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
+; PTR64_IDX32-NEXT:  Determining loop execution counts for: @i64_to_i128
+; PTR64_IDX32-NEXT:  Loop %bb11: Unpredictable backedge-taken count.
+; PTR64_IDX32-NEXT:  Loop %bb11: Unpredictable max backedge-taken count.
+; PTR64_IDX32-NEXT:  Loop %bb11: Unpredictable predicated backedge-taken count.
+;
+; PTR32_IDX32-LABEL: 'i64_to_i128'
+; PTR32_IDX32-NEXT:  Classifying expressions for: @i64_to_i128
+; PTR32_IDX32-NEXT:    %tmp = phi i32 [ 0, %bb ], [ %tmp18, %bb17 ]
+; PTR32_IDX32-NEXT:    --> {0,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
+; PTR32_IDX32-NEXT:    %tmp12 = getelementptr i8, i8* %arg, i128 ptrtoint ([0 x i8]* @global to i128)
+; PTR32_IDX32-NEXT:    --> ((trunc i128 ptrtoint ([0 x i8]* @global to i128) to i32) + %arg) U: full-set S: full-set Exits: ((trunc i128 ptrtoint ([0 x i8]* @global to i128) to i32) + %arg) LoopDispositions: { %bb11: Invariant }
+; PTR32_IDX32-NEXT:    %tmp13 = bitcast i8* %tmp12 to i32*
+; PTR32_IDX32-NEXT:    --> ((trunc i128 ptrtoint ([0 x i8]* @global to i128) to i32) + %arg) U: full-set S: full-set Exits: ((trunc i128 ptrtoint ([0 x i8]* @global to i128) to i32) + %arg) LoopDispositions: { %bb11: Invariant }
+; PTR32_IDX32-NEXT:    %tmp14 = load i32, i32* %tmp13, align 4
+; PTR32_IDX32-NEXT:    --> %tmp14 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb11: Variant }
+; PTR32_IDX32-NEXT:    %tmp18 = add i32 %tmp, 2
+; PTR32_IDX32-NEXT:    --> {2,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
+; PTR32_IDX32-NEXT:  Determining loop execution counts for: @i64_to_i128
+; PTR32_IDX32-NEXT:  Loop %bb11: Unpredictable backedge-taken count.
+; PTR32_IDX32-NEXT:  Loop %bb11: Unpredictable max backedge-taken count.
+; PTR32_IDX32-NEXT:  Loop %bb11: Unpredictable predicated backedge-taken count.
+;
+; PTR32_IDX64-LABEL: 'i64_to_i128'
+; PTR32_IDX64-NEXT:  Classifying expressions for: @i64_to_i128
+; PTR32_IDX64-NEXT:    %tmp = phi i32 [ 0, %bb ], [ %tmp18, %bb17 ]
+; PTR32_IDX64-NEXT:    --> {0,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
+; PTR32_IDX64-NEXT:    %tmp12 = getelementptr i8, i8* %arg, i128 ptrtoint ([0 x i8]* @global to i128)
+; PTR32_IDX64-NEXT:    --> ((trunc i128 ptrtoint ([0 x i8]* @global to i128) to i64) + %arg) U: [0,8589934591) S: full-set Exits: ((trunc i128 ptrtoint ([0 x i8]* @global to i128) to i64) + %arg) LoopDispositions: { %bb11: Invariant }
+; PTR32_IDX64-NEXT:    %tmp13 = bitcast i8* %tmp12 to i32*
+; PTR32_IDX64-NEXT:    --> ((trunc i128 ptrtoint ([0 x i8]* @global to i128) to i64) + %arg) U: [0,8589934591) S: full-set Exits: ((trunc i128 ptrtoint ([0 x i8]* @global to i128) to i64) + %arg) LoopDispositions: { %bb11: Invariant }
+; PTR32_IDX64-NEXT:    %tmp14 = load i32, i32* %tmp13, align 4
+; PTR32_IDX64-NEXT:    --> %tmp14 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb11: Variant }
+; PTR32_IDX64-NEXT:    %tmp18 = add i32 %tmp, 2
+; PTR32_IDX64-NEXT:    --> {2,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
+; PTR32_IDX64-NEXT:  Determining loop execution counts for: @i64_to_i128
+; PTR32_IDX64-NEXT:  Loop %bb11: Unpredictable backedge-taken count.
+; PTR32_IDX64-NEXT:  Loop %bb11: Unpredictable max backedge-taken count.
+; PTR32_IDX64-NEXT:  Loop %bb11: Unpredictable predicated backedge-taken count.
+;
+bb:
+  br label %bb11
+
+bb11:                                             ; preds = %bb17, %bb
+  %tmp = phi i32 [ 0, %bb ], [ %tmp18, %bb17 ]
+  %tmp12 = getelementptr i8, i8* %arg, i128 ptrtoint ([0 x i8]* @global to i128)
+  %tmp13 = bitcast i8* %tmp12 to i32*
+  %tmp14 = load i32, i32* %tmp13, align 4
+  %tmp15 = icmp eq i32 %tmp14, 6
+  br i1 %tmp15, label %bb16, label %bb17
+
+bb16:                                             ; preds = %bb11
+  ret i32* %arg10
+
+bb17:                                             ; preds = %bb11
+  %tmp18 = add i32 %tmp, 2
+  br label %bb11
+}

From 1fb610429308a7c29c5065f5cc35dcc3fd69c8b1 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Mon, 12 Oct 2020 22:19:17 +0300
Subject: [PATCH 031/123] Reland "[SCEV] Model ptrtoint(SCEVUnknown) cast not
 as unknown, but as zext/trunc/self of SCEVUnknown"

This relands commit 1c021c64caef83cccb719c9bf0a2554faa6563af which was
reverted in commit 17cec6a11a12f815052d56a17ef738cf246a2d9a because
an assertion was being triggered, since `BuildConstantFromSCEV()`
wasn't updated to handle the case where the constant we want to truncate
is actually a pointer. I was unsuccessful in coming up with a test case
where we'd end there with constant zext/sext of a pointer,
so i didn't handle those cases there until there is a test case.

Original commit message:

While we indeed can't treat them as no-ops, i believe we can/should
do better than just modelling them as `unknown`. `inttoptr` story
is complicated, but for `ptrtoint`, it seems straight-forward
to model it just as a zext-or-trunc of unknown.

This may be important now that we track towards
making inttoptr/ptrtoint casts not no-op,
and towards preventing folding them into loads/etc
(see D88979/D88789/D88788)

Reviewed By: mkazantsev

Differential Revision: https://reviews.llvm.org/D88806
---
 llvm/lib/Analysis/ScalarEvolution.cpp         |  50 +++++--
 llvm/lib/Transforms/Utils/SimplifyIndVar.cpp  |   2 +-
 .../add-expr-pointer-operand-sorting.ll       |   4 +-
 .../ScalarEvolution/no-wrap-add-exprs.ll      |   4 +-
 .../ptrtoint-constantexpr-loop.ll             | 130 +++++++-----------
 .../test/Analysis/ScalarEvolution/ptrtoint.ll |  60 ++++----
 llvm/test/CodeGen/ARM/lsr-undef-in-binop.ll   |   4 +-
 llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll |   4 +-
 .../IndVarSimplify/2011-11-01-lftrptr.ll      |  16 ++-
 .../CodeGen/scev_looking_through_bitcasts.ll  |   3 +-
 10 files changed, 140 insertions(+), 137 deletions(-)

diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 1d3e26b93cb6aa..74bffc0facdbff 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -3505,15 +3505,15 @@ const SCEV *ScalarEvolution::getUMinExpr(SmallVectorImpl<const SCEV *> &Ops) {
 }
 
 const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) {
-  // We can bypass creating a target-independent
-  // constant expression and then folding it back into a ConstantInt.
-  // This is just a compile-time optimization.
   if (isa<ScalableVectorType>(AllocTy)) {
     Constant *NullPtr = Constant::getNullValue(AllocTy->getPointerTo());
     Constant *One = ConstantInt::get(IntTy, 1);
     Constant *GEP = ConstantExpr::getGetElementPtr(AllocTy, NullPtr, One);
-    return getSCEV(ConstantExpr::getPtrToInt(GEP, IntTy));
+    return getUnknown(ConstantExpr::getPtrToInt(GEP, IntTy));
   }
+  // We can bypass creating a target-independent
+  // constant expression and then folding it back into a ConstantInt.
+  // This is just a compile-time optimization.
   return getConstant(IntTy, getDataLayout().getTypeAllocSize(AllocTy));
 }
 
@@ -6301,6 +6301,36 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
       return getSCEV(U->getOperand(0));
     break;
 
+  case Instruction::PtrToInt: {
+    // It's tempting to handle inttoptr and ptrtoint as no-ops,
+    // however this can lead to pointer expressions which cannot safely be
+    // expanded to GEPs because ScalarEvolution doesn't respect
+    // the GEP aliasing rules when simplifying integer expressions.
+    //
+    // However, given
+    //   %x = ???
+    //   %y = ptrtoint %x
+    //   %z = ptrtoint %x
+    // it is safe to say that %y and %z are the same thing.
+    //
+    // So instead of modelling the cast itself as unknown,
+    // since the casts are transparent within SCEV,
+    // we can at least model the casts original value as unknow instead.
+
+    // BUT, there's caveat. If we simply model %x as unknown, unrelated uses
+    // of %x will also see it as unknown, which is obviously bad.
+    // So we can only do this iff %x would be modelled as unknown anyways.
+    auto *OpSCEV = getSCEV(U->getOperand(0));
+    if (isa<SCEVUnknown>(OpSCEV))
+      return getTruncateOrZeroExtend(OpSCEV, U->getType());
+    // If we can model the operand, however, we must fallback to modelling
+    // the whole cast as unknown instead.
+    LLVM_FALLTHROUGH;
+  }
+  case Instruction::IntToPtr:
+    // We can't do this for inttoptr at all, however.
+    return getUnknown(V);
+
   case Instruction::SDiv:
     // If both operands are non-negative, this is just an udiv.
     if (isKnownNonNegative(getSCEV(U->getOperand(0))) &&
@@ -6315,11 +6345,6 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
       return getURemExpr(getSCEV(U->getOperand(0)), getSCEV(U->getOperand(1)));
     break;
 
-  // It's tempting to handle inttoptr and ptrtoint as no-ops, however this can
-  // lead to pointer expressions which cannot safely be expanded to GEPs,
-  // because ScalarEvolution doesn't respect the GEP aliasing rules when
-  // simplifying integer expressions.
-
   case Instruction::GetElementPtr:
     return createNodeForGEP(cast<GEPOperator>(U));
 
@@ -7974,8 +7999,11 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) {
     }
     case scTruncate: {
       const SCEVTruncateExpr *ST = cast<SCEVTruncateExpr>(V);
-      if (Constant *CastOp = BuildConstantFromSCEV(ST->getOperand()))
-        return ConstantExpr::getTrunc(CastOp, ST->getType());
+      if (Constant *CastOp = BuildConstantFromSCEV(ST->getOperand())) {
+        if (!CastOp->getType()->isPointerTy())
+          return ConstantExpr::getTrunc(CastOp, ST->getType());
+        return ConstantExpr::getPtrToInt(CastOp, ST->getType());
+      }
       break;
     }
     case scAddExpr: {
diff --git a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
index 2d71b0fff88940..3e280a66175c88 100644
--- a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -427,7 +427,7 @@ static bool willNotOverflow(ScalarEvolution *SE, Instruction::BinaryOps BinOp,
              : &ScalarEvolution::getZeroExtendExpr;
 
   // Check ext(LHS op RHS) == ext(LHS) op ext(RHS)
-  auto *NarrowTy = cast<IntegerType>(LHS->getType());
+  auto *NarrowTy = cast<IntegerType>(SE->getEffectiveSCEVType(LHS->getType()));
   auto *WideTy =
     IntegerType::get(NarrowTy->getContext(), NarrowTy->getBitWidth() * 2);
 
diff --git a/llvm/test/Analysis/ScalarEvolution/add-expr-pointer-operand-sorting.ll b/llvm/test/Analysis/ScalarEvolution/add-expr-pointer-operand-sorting.ll
index 93a3bf4d4c3786..e798e2715ba1da 100644
--- a/llvm/test/Analysis/ScalarEvolution/add-expr-pointer-operand-sorting.ll
+++ b/llvm/test/Analysis/ScalarEvolution/add-expr-pointer-operand-sorting.ll
@@ -33,9 +33,9 @@ define i32 @d(i32 %base) {
 ; CHECK-NEXT:    %1 = load i32*, i32** @c, align 8
 ; CHECK-NEXT:    --> %1 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.cond: Variant }
 ; CHECK-NEXT:    %sub.ptr.lhs.cast = ptrtoint i32* %1 to i64
-; CHECK-NEXT:    --> %sub.ptr.lhs.cast U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.cond: Variant }
+; CHECK-NEXT:    --> %1 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.cond: Variant }
 ; CHECK-NEXT:    %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, ptrtoint ([1 x i32]* @b to i64)
-; CHECK-NEXT:    --> ((-1 * ptrtoint ([1 x i32]* @b to i64)) + %sub.ptr.lhs.cast) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.cond: Variant }
+; CHECK-NEXT:    --> ((-1 * @b) + %1) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.cond: Variant }
 ; CHECK-NEXT:    %sub.ptr.div = sdiv exact i64 %sub.ptr.sub, 4
 ; CHECK-NEXT:    --> %sub.ptr.div U: full-set S: [-2305843009213693952,2305843009213693952) Exits: <<Unknown>> LoopDispositions: { %for.cond: Variant }
 ; CHECK-NEXT:    %arrayidx1 = getelementptr inbounds [1 x i8], [1 x i8]* %arrayidx, i64 0, i64 %sub.ptr.div
diff --git a/llvm/test/Analysis/ScalarEvolution/no-wrap-add-exprs.ll b/llvm/test/Analysis/ScalarEvolution/no-wrap-add-exprs.ll
index 5a7bb3c9e5cd54..eb669cab0c790f 100644
--- a/llvm/test/Analysis/ScalarEvolution/no-wrap-add-exprs.ll
+++ b/llvm/test/Analysis/ScalarEvolution/no-wrap-add-exprs.ll
@@ -170,14 +170,14 @@ define void @f3(i8* %x_addr, i8* %y_addr, i32* %tmp_addr) {
   %int5 = add i32 %int0, 5
   %int.zext = zext i32 %int5 to i64
 ; CHECK:  %int.zext = zext i32 %int5 to i64
-; CHECK-NEXT:  -->  (1 + (zext i32 (4 + %int0) to i64))<nuw><nsw> U: [1,4294967294) S: [1,4294967297)
+; CHECK-NEXT:  -->  (1 + (zext i32 (4 + (trunc [16 x i8]* @z_addr to i32)) to i64))<nuw><nsw> U: [1,4294967294) S: [1,4294967297)
 
   %ptr_noalign = bitcast [16 x i8]* @z_addr_noalign to i8*
   %int0_na = ptrtoint i8* %ptr_noalign to i32
   %int5_na = add i32 %int0_na, 5
   %int.zext_na = zext i32 %int5_na to i64
 ; CHECK:  %int.zext_na = zext i32 %int5_na to i64
-; CHECK-NEXT:  -->  (zext i32 (5 + %int0_na) to i64) U: [0,4294967296) S: [0,4294967296)
+; CHECK-NEXT:  -->  (zext i32 (5 + (trunc [16 x i8]* @z_addr_noalign to i32)) to i64) U: [0,4294967296) S: [0,4294967296)
 
   %tmp = load i32, i32* %tmp_addr
   %mul = and i32 %tmp, -4
diff --git a/llvm/test/Analysis/ScalarEvolution/ptrtoint-constantexpr-loop.ll b/llvm/test/Analysis/ScalarEvolution/ptrtoint-constantexpr-loop.ll
index 8cfa041e75523d..d0ead6028071ad 100644
--- a/llvm/test/Analysis/ScalarEvolution/ptrtoint-constantexpr-loop.ll
+++ b/llvm/test/Analysis/ScalarEvolution/ptrtoint-constantexpr-loop.ll
@@ -11,48 +11,31 @@
 @global = external hidden global [0 x i8]
 
 define hidden i32* @i64(i8* %arg, i32* %arg10) {
-; PTR64_IDX64-LABEL: 'i64'
-; PTR64_IDX64-NEXT:  Classifying expressions for: @i64
-; PTR64_IDX64-NEXT:    %tmp = phi i32 [ 0, %bb ], [ %tmp18, %bb17 ]
-; PTR64_IDX64-NEXT:    --> {0,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
-; PTR64_IDX64-NEXT:    %tmp12 = getelementptr i8, i8* %arg, i64 ptrtoint ([0 x i8]* @global to i64)
-; PTR64_IDX64-NEXT:    --> (ptrtoint ([0 x i8]* @global to i64) + %arg) U: full-set S: full-set Exits: (ptrtoint ([0 x i8]* @global to i64) + %arg) LoopDispositions: { %bb11: Invariant }
-; PTR64_IDX64-NEXT:    %tmp13 = bitcast i8* %tmp12 to i32*
-; PTR64_IDX64-NEXT:    --> (ptrtoint ([0 x i8]* @global to i64) + %arg) U: full-set S: full-set Exits: (ptrtoint ([0 x i8]* @global to i64) + %arg) LoopDispositions: { %bb11: Invariant }
-; PTR64_IDX64-NEXT:    %tmp14 = load i32, i32* %tmp13, align 4
-; PTR64_IDX64-NEXT:    --> %tmp14 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb11: Variant }
-; PTR64_IDX64-NEXT:    %tmp18 = add i32 %tmp, 2
-; PTR64_IDX64-NEXT:    --> {2,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
-; PTR64_IDX64-NEXT:  Determining loop execution counts for: @i64
-; PTR64_IDX64-NEXT:  Loop %bb11: Unpredictable backedge-taken count.
-; PTR64_IDX64-NEXT:  Loop %bb11: Unpredictable max backedge-taken count.
-; PTR64_IDX64-NEXT:  Loop %bb11: Unpredictable predicated backedge-taken count.
-;
-; PTR64_IDX32-LABEL: 'i64'
-; PTR64_IDX32-NEXT:  Classifying expressions for: @i64
-; PTR64_IDX32-NEXT:    %tmp = phi i32 [ 0, %bb ], [ %tmp18, %bb17 ]
-; PTR64_IDX32-NEXT:    --> {0,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
-; PTR64_IDX32-NEXT:    %tmp12 = getelementptr i8, i8* %arg, i64 ptrtoint ([0 x i8]* @global to i64)
-; PTR64_IDX32-NEXT:    --> ((trunc i64 ptrtoint ([0 x i8]* @global to i64) to i32) + %arg) U: full-set S: full-set Exits: ((trunc i64 ptrtoint ([0 x i8]* @global to i64) to i32) + %arg) LoopDispositions: { %bb11: Invariant }
-; PTR64_IDX32-NEXT:    %tmp13 = bitcast i8* %tmp12 to i32*
-; PTR64_IDX32-NEXT:    --> ((trunc i64 ptrtoint ([0 x i8]* @global to i64) to i32) + %arg) U: full-set S: full-set Exits: ((trunc i64 ptrtoint ([0 x i8]* @global to i64) to i32) + %arg) LoopDispositions: { %bb11: Invariant }
-; PTR64_IDX32-NEXT:    %tmp14 = load i32, i32* %tmp13, align 4
-; PTR64_IDX32-NEXT:    --> %tmp14 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb11: Variant }
-; PTR64_IDX32-NEXT:    %tmp18 = add i32 %tmp, 2
-; PTR64_IDX32-NEXT:    --> {2,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
-; PTR64_IDX32-NEXT:  Determining loop execution counts for: @i64
-; PTR64_IDX32-NEXT:  Loop %bb11: Unpredictable backedge-taken count.
-; PTR64_IDX32-NEXT:  Loop %bb11: Unpredictable max backedge-taken count.
-; PTR64_IDX32-NEXT:  Loop %bb11: Unpredictable predicated backedge-taken count.
+; X64-LABEL: 'i64'
+; X64-NEXT:  Classifying expressions for: @i64
+; X64-NEXT:    %tmp = phi i32 [ 0, %bb ], [ %tmp18, %bb17 ]
+; X64-NEXT:    --> {0,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
+; X64-NEXT:    %tmp12 = getelementptr i8, i8* %arg, i64 ptrtoint ([0 x i8]* @global to i64)
+; X64-NEXT:    --> (@global + %arg) U: full-set S: full-set Exits: (@global + %arg) LoopDispositions: { %bb11: Invariant }
+; X64-NEXT:    %tmp13 = bitcast i8* %tmp12 to i32*
+; X64-NEXT:    --> (@global + %arg) U: full-set S: full-set Exits: (@global + %arg) LoopDispositions: { %bb11: Invariant }
+; X64-NEXT:    %tmp14 = load i32, i32* %tmp13, align 4
+; X64-NEXT:    --> %tmp14 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb11: Variant }
+; X64-NEXT:    %tmp18 = add i32 %tmp, 2
+; X64-NEXT:    --> {2,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
+; X64-NEXT:  Determining loop execution counts for: @i64
+; X64-NEXT:  Loop %bb11: Unpredictable backedge-taken count.
+; X64-NEXT:  Loop %bb11: Unpredictable max backedge-taken count.
+; X64-NEXT:  Loop %bb11: Unpredictable predicated backedge-taken count.
 ;
 ; PTR32_IDX32-LABEL: 'i64'
 ; PTR32_IDX32-NEXT:  Classifying expressions for: @i64
 ; PTR32_IDX32-NEXT:    %tmp = phi i32 [ 0, %bb ], [ %tmp18, %bb17 ]
 ; PTR32_IDX32-NEXT:    --> {0,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
 ; PTR32_IDX32-NEXT:    %tmp12 = getelementptr i8, i8* %arg, i64 ptrtoint ([0 x i8]* @global to i64)
-; PTR32_IDX32-NEXT:    --> ((trunc i64 ptrtoint ([0 x i8]* @global to i64) to i32) + %arg) U: full-set S: full-set Exits: ((trunc i64 ptrtoint ([0 x i8]* @global to i64) to i32) + %arg) LoopDispositions: { %bb11: Invariant }
+; PTR32_IDX32-NEXT:    --> (@global + %arg) U: full-set S: full-set Exits: (@global + %arg) LoopDispositions: { %bb11: Invariant }
 ; PTR32_IDX32-NEXT:    %tmp13 = bitcast i8* %tmp12 to i32*
-; PTR32_IDX32-NEXT:    --> ((trunc i64 ptrtoint ([0 x i8]* @global to i64) to i32) + %arg) U: full-set S: full-set Exits: ((trunc i64 ptrtoint ([0 x i8]* @global to i64) to i32) + %arg) LoopDispositions: { %bb11: Invariant }
+; PTR32_IDX32-NEXT:    --> (@global + %arg) U: full-set S: full-set Exits: (@global + %arg) LoopDispositions: { %bb11: Invariant }
 ; PTR32_IDX32-NEXT:    %tmp14 = load i32, i32* %tmp13, align 4
 ; PTR32_IDX32-NEXT:    --> %tmp14 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb11: Variant }
 ; PTR32_IDX32-NEXT:    %tmp18 = add i32 %tmp, 2
@@ -67,9 +50,9 @@ define hidden i32* @i64(i8* %arg, i32* %arg10) {
 ; PTR32_IDX64-NEXT:    %tmp = phi i32 [ 0, %bb ], [ %tmp18, %bb17 ]
 ; PTR32_IDX64-NEXT:    --> {0,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
 ; PTR32_IDX64-NEXT:    %tmp12 = getelementptr i8, i8* %arg, i64 ptrtoint ([0 x i8]* @global to i64)
-; PTR32_IDX64-NEXT:    --> (ptrtoint ([0 x i8]* @global to i64) + %arg) U: [0,8589934591) S: full-set Exits: (ptrtoint ([0 x i8]* @global to i64) + %arg) LoopDispositions: { %bb11: Invariant }
+; PTR32_IDX64-NEXT:    --> (@global + %arg) U: [0,8589934591) S: full-set Exits: (@global + %arg) LoopDispositions: { %bb11: Invariant }
 ; PTR32_IDX64-NEXT:    %tmp13 = bitcast i8* %tmp12 to i32*
-; PTR32_IDX64-NEXT:    --> (ptrtoint ([0 x i8]* @global to i64) + %arg) U: [0,8589934591) S: full-set Exits: (ptrtoint ([0 x i8]* @global to i64) + %arg) LoopDispositions: { %bb11: Invariant }
+; PTR32_IDX64-NEXT:    --> (@global + %arg) U: [0,8589934591) S: full-set Exits: (@global + %arg) LoopDispositions: { %bb11: Invariant }
 ; PTR32_IDX64-NEXT:    %tmp14 = load i32, i32* %tmp13, align 4
 ; PTR32_IDX64-NEXT:    --> %tmp14 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb11: Variant }
 ; PTR32_IDX64-NEXT:    %tmp18 = add i32 %tmp, 2
@@ -103,9 +86,9 @@ define hidden i32* @i64_to_i32(i8* %arg, i32* %arg10) {
 ; PTR64_IDX64-NEXT:    %tmp = phi i32 [ 0, %bb ], [ %tmp18, %bb17 ]
 ; PTR64_IDX64-NEXT:    --> {0,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
 ; PTR64_IDX64-NEXT:    %tmp12 = getelementptr i8, i8* %arg, i32 ptrtoint ([0 x i8]* @global to i32)
-; PTR64_IDX64-NEXT:    --> ((sext i32 ptrtoint ([0 x i8]* @global to i32) to i64) + %arg) U: full-set S: full-set Exits: ((sext i32 ptrtoint ([0 x i8]* @global to i32) to i64) + %arg) LoopDispositions: { %bb11: Invariant }
+; PTR64_IDX64-NEXT:    --> ((sext i32 (trunc [0 x i8]* @global to i32) to i64) + %arg) U: full-set S: full-set Exits: ((sext i32 (trunc [0 x i8]* @global to i32) to i64) + %arg) LoopDispositions: { %bb11: Invariant }
 ; PTR64_IDX64-NEXT:    %tmp13 = bitcast i8* %tmp12 to i32*
-; PTR64_IDX64-NEXT:    --> ((sext i32 ptrtoint ([0 x i8]* @global to i32) to i64) + %arg) U: full-set S: full-set Exits: ((sext i32 ptrtoint ([0 x i8]* @global to i32) to i64) + %arg) LoopDispositions: { %bb11: Invariant }
+; PTR64_IDX64-NEXT:    --> ((sext i32 (trunc [0 x i8]* @global to i32) to i64) + %arg) U: full-set S: full-set Exits: ((sext i32 (trunc [0 x i8]* @global to i32) to i64) + %arg) LoopDispositions: { %bb11: Invariant }
 ; PTR64_IDX64-NEXT:    %tmp14 = load i32, i32* %tmp13, align 4
 ; PTR64_IDX64-NEXT:    --> %tmp14 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb11: Variant }
 ; PTR64_IDX64-NEXT:    %tmp18 = add i32 %tmp, 2
@@ -120,9 +103,9 @@ define hidden i32* @i64_to_i32(i8* %arg, i32* %arg10) {
 ; PTR64_IDX32-NEXT:    %tmp = phi i32 [ 0, %bb ], [ %tmp18, %bb17 ]
 ; PTR64_IDX32-NEXT:    --> {0,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
 ; PTR64_IDX32-NEXT:    %tmp12 = getelementptr i8, i8* %arg, i32 ptrtoint ([0 x i8]* @global to i32)
-; PTR64_IDX32-NEXT:    --> (ptrtoint ([0 x i8]* @global to i32) + %arg) U: full-set S: full-set Exits: (ptrtoint ([0 x i8]* @global to i32) + %arg) LoopDispositions: { %bb11: Invariant }
+; PTR64_IDX32-NEXT:    --> (@global + %arg) U: full-set S: full-set Exits: (@global + %arg) LoopDispositions: { %bb11: Invariant }
 ; PTR64_IDX32-NEXT:    %tmp13 = bitcast i8* %tmp12 to i32*
-; PTR64_IDX32-NEXT:    --> (ptrtoint ([0 x i8]* @global to i32) + %arg) U: full-set S: full-set Exits: (ptrtoint ([0 x i8]* @global to i32) + %arg) LoopDispositions: { %bb11: Invariant }
+; PTR64_IDX32-NEXT:    --> (@global + %arg) U: full-set S: full-set Exits: (@global + %arg) LoopDispositions: { %bb11: Invariant }
 ; PTR64_IDX32-NEXT:    %tmp14 = load i32, i32* %tmp13, align 4
 ; PTR64_IDX32-NEXT:    --> %tmp14 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb11: Variant }
 ; PTR64_IDX32-NEXT:    %tmp18 = add i32 %tmp, 2
@@ -137,9 +120,9 @@ define hidden i32* @i64_to_i32(i8* %arg, i32* %arg10) {
 ; PTR32_IDX32-NEXT:    %tmp = phi i32 [ 0, %bb ], [ %tmp18, %bb17 ]
 ; PTR32_IDX32-NEXT:    --> {0,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
 ; PTR32_IDX32-NEXT:    %tmp12 = getelementptr i8, i8* %arg, i32 ptrtoint ([0 x i8]* @global to i32)
-; PTR32_IDX32-NEXT:    --> (ptrtoint ([0 x i8]* @global to i32) + %arg) U: full-set S: full-set Exits: (ptrtoint ([0 x i8]* @global to i32) + %arg) LoopDispositions: { %bb11: Invariant }
+; PTR32_IDX32-NEXT:    --> (@global + %arg) U: full-set S: full-set Exits: (@global + %arg) LoopDispositions: { %bb11: Invariant }
 ; PTR32_IDX32-NEXT:    %tmp13 = bitcast i8* %tmp12 to i32*
-; PTR32_IDX32-NEXT:    --> (ptrtoint ([0 x i8]* @global to i32) + %arg) U: full-set S: full-set Exits: (ptrtoint ([0 x i8]* @global to i32) + %arg) LoopDispositions: { %bb11: Invariant }
+; PTR32_IDX32-NEXT:    --> (@global + %arg) U: full-set S: full-set Exits: (@global + %arg) LoopDispositions: { %bb11: Invariant }
 ; PTR32_IDX32-NEXT:    %tmp14 = load i32, i32* %tmp13, align 4
 ; PTR32_IDX32-NEXT:    --> %tmp14 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb11: Variant }
 ; PTR32_IDX32-NEXT:    %tmp18 = add i32 %tmp, 2
@@ -154,9 +137,9 @@ define hidden i32* @i64_to_i32(i8* %arg, i32* %arg10) {
 ; PTR32_IDX64-NEXT:    %tmp = phi i32 [ 0, %bb ], [ %tmp18, %bb17 ]
 ; PTR32_IDX64-NEXT:    --> {0,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
 ; PTR32_IDX64-NEXT:    %tmp12 = getelementptr i8, i8* %arg, i32 ptrtoint ([0 x i8]* @global to i32)
-; PTR32_IDX64-NEXT:    --> ((sext i32 ptrtoint ([0 x i8]* @global to i32) to i64) + %arg) U: [-2147483648,6442450943) S: full-set Exits: ((sext i32 ptrtoint ([0 x i8]* @global to i32) to i64) + %arg) LoopDispositions: { %bb11: Invariant }
+; PTR32_IDX64-NEXT:    --> ((sext i32 (trunc [0 x i8]* @global to i32) to i64) + %arg) U: [-2147483648,6442450943) S: full-set Exits: ((sext i32 (trunc [0 x i8]* @global to i32) to i64) + %arg) LoopDispositions: { %bb11: Invariant }
 ; PTR32_IDX64-NEXT:    %tmp13 = bitcast i8* %tmp12 to i32*
-; PTR32_IDX64-NEXT:    --> ((sext i32 ptrtoint ([0 x i8]* @global to i32) to i64) + %arg) U: [-2147483648,6442450943) S: full-set Exits: ((sext i32 ptrtoint ([0 x i8]* @global to i32) to i64) + %arg) LoopDispositions: { %bb11: Invariant }
+; PTR32_IDX64-NEXT:    --> ((sext i32 (trunc [0 x i8]* @global to i32) to i64) + %arg) U: [-2147483648,6442450943) S: full-set Exits: ((sext i32 (trunc [0 x i8]* @global to i32) to i64) + %arg) LoopDispositions: { %bb11: Invariant }
 ; PTR32_IDX64-NEXT:    %tmp14 = load i32, i32* %tmp13, align 4
 ; PTR32_IDX64-NEXT:    --> %tmp14 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb11: Variant }
 ; PTR32_IDX64-NEXT:    %tmp18 = add i32 %tmp, 2
@@ -185,48 +168,31 @@ bb17:                                             ; preds = %bb11
   br label %bb11
 }
 define hidden i32* @i64_to_i128(i8* %arg, i32* %arg10) {
-; PTR64_IDX64-LABEL: 'i64_to_i128'
-; PTR64_IDX64-NEXT:  Classifying expressions for: @i64_to_i128
-; PTR64_IDX64-NEXT:    %tmp = phi i32 [ 0, %bb ], [ %tmp18, %bb17 ]
-; PTR64_IDX64-NEXT:    --> {0,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
-; PTR64_IDX64-NEXT:    %tmp12 = getelementptr i8, i8* %arg, i128 ptrtoint ([0 x i8]* @global to i128)
-; PTR64_IDX64-NEXT:    --> ((trunc i128 ptrtoint ([0 x i8]* @global to i128) to i64) + %arg) U: full-set S: full-set Exits: ((trunc i128 ptrtoint ([0 x i8]* @global to i128) to i64) + %arg) LoopDispositions: { %bb11: Invariant }
-; PTR64_IDX64-NEXT:    %tmp13 = bitcast i8* %tmp12 to i32*
-; PTR64_IDX64-NEXT:    --> ((trunc i128 ptrtoint ([0 x i8]* @global to i128) to i64) + %arg) U: full-set S: full-set Exits: ((trunc i128 ptrtoint ([0 x i8]* @global to i128) to i64) + %arg) LoopDispositions: { %bb11: Invariant }
-; PTR64_IDX64-NEXT:    %tmp14 = load i32, i32* %tmp13, align 4
-; PTR64_IDX64-NEXT:    --> %tmp14 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb11: Variant }
-; PTR64_IDX64-NEXT:    %tmp18 = add i32 %tmp, 2
-; PTR64_IDX64-NEXT:    --> {2,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
-; PTR64_IDX64-NEXT:  Determining loop execution counts for: @i64_to_i128
-; PTR64_IDX64-NEXT:  Loop %bb11: Unpredictable backedge-taken count.
-; PTR64_IDX64-NEXT:  Loop %bb11: Unpredictable max backedge-taken count.
-; PTR64_IDX64-NEXT:  Loop %bb11: Unpredictable predicated backedge-taken count.
-;
-; PTR64_IDX32-LABEL: 'i64_to_i128'
-; PTR64_IDX32-NEXT:  Classifying expressions for: @i64_to_i128
-; PTR64_IDX32-NEXT:    %tmp = phi i32 [ 0, %bb ], [ %tmp18, %bb17 ]
-; PTR64_IDX32-NEXT:    --> {0,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
-; PTR64_IDX32-NEXT:    %tmp12 = getelementptr i8, i8* %arg, i128 ptrtoint ([0 x i8]* @global to i128)
-; PTR64_IDX32-NEXT:    --> ((trunc i128 ptrtoint ([0 x i8]* @global to i128) to i32) + %arg) U: full-set S: full-set Exits: ((trunc i128 ptrtoint ([0 x i8]* @global to i128) to i32) + %arg) LoopDispositions: { %bb11: Invariant }
-; PTR64_IDX32-NEXT:    %tmp13 = bitcast i8* %tmp12 to i32*
-; PTR64_IDX32-NEXT:    --> ((trunc i128 ptrtoint ([0 x i8]* @global to i128) to i32) + %arg) U: full-set S: full-set Exits: ((trunc i128 ptrtoint ([0 x i8]* @global to i128) to i32) + %arg) LoopDispositions: { %bb11: Invariant }
-; PTR64_IDX32-NEXT:    %tmp14 = load i32, i32* %tmp13, align 4
-; PTR64_IDX32-NEXT:    --> %tmp14 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb11: Variant }
-; PTR64_IDX32-NEXT:    %tmp18 = add i32 %tmp, 2
-; PTR64_IDX32-NEXT:    --> {2,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
-; PTR64_IDX32-NEXT:  Determining loop execution counts for: @i64_to_i128
-; PTR64_IDX32-NEXT:  Loop %bb11: Unpredictable backedge-taken count.
-; PTR64_IDX32-NEXT:  Loop %bb11: Unpredictable max backedge-taken count.
-; PTR64_IDX32-NEXT:  Loop %bb11: Unpredictable predicated backedge-taken count.
+; X64-LABEL: 'i64_to_i128'
+; X64-NEXT:  Classifying expressions for: @i64_to_i128
+; X64-NEXT:    %tmp = phi i32 [ 0, %bb ], [ %tmp18, %bb17 ]
+; X64-NEXT:    --> {0,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
+; X64-NEXT:    %tmp12 = getelementptr i8, i8* %arg, i128 ptrtoint ([0 x i8]* @global to i128)
+; X64-NEXT:    --> (@global + %arg) U: full-set S: full-set Exits: (@global + %arg) LoopDispositions: { %bb11: Invariant }
+; X64-NEXT:    %tmp13 = bitcast i8* %tmp12 to i32*
+; X64-NEXT:    --> (@global + %arg) U: full-set S: full-set Exits: (@global + %arg) LoopDispositions: { %bb11: Invariant }
+; X64-NEXT:    %tmp14 = load i32, i32* %tmp13, align 4
+; X64-NEXT:    --> %tmp14 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb11: Variant }
+; X64-NEXT:    %tmp18 = add i32 %tmp, 2
+; X64-NEXT:    --> {2,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
+; X64-NEXT:  Determining loop execution counts for: @i64_to_i128
+; X64-NEXT:  Loop %bb11: Unpredictable backedge-taken count.
+; X64-NEXT:  Loop %bb11: Unpredictable max backedge-taken count.
+; X64-NEXT:  Loop %bb11: Unpredictable predicated backedge-taken count.
 ;
 ; PTR32_IDX32-LABEL: 'i64_to_i128'
 ; PTR32_IDX32-NEXT:  Classifying expressions for: @i64_to_i128
 ; PTR32_IDX32-NEXT:    %tmp = phi i32 [ 0, %bb ], [ %tmp18, %bb17 ]
 ; PTR32_IDX32-NEXT:    --> {0,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
 ; PTR32_IDX32-NEXT:    %tmp12 = getelementptr i8, i8* %arg, i128 ptrtoint ([0 x i8]* @global to i128)
-; PTR32_IDX32-NEXT:    --> ((trunc i128 ptrtoint ([0 x i8]* @global to i128) to i32) + %arg) U: full-set S: full-set Exits: ((trunc i128 ptrtoint ([0 x i8]* @global to i128) to i32) + %arg) LoopDispositions: { %bb11: Invariant }
+; PTR32_IDX32-NEXT:    --> (@global + %arg) U: full-set S: full-set Exits: (@global + %arg) LoopDispositions: { %bb11: Invariant }
 ; PTR32_IDX32-NEXT:    %tmp13 = bitcast i8* %tmp12 to i32*
-; PTR32_IDX32-NEXT:    --> ((trunc i128 ptrtoint ([0 x i8]* @global to i128) to i32) + %arg) U: full-set S: full-set Exits: ((trunc i128 ptrtoint ([0 x i8]* @global to i128) to i32) + %arg) LoopDispositions: { %bb11: Invariant }
+; PTR32_IDX32-NEXT:    --> (@global + %arg) U: full-set S: full-set Exits: (@global + %arg) LoopDispositions: { %bb11: Invariant }
 ; PTR32_IDX32-NEXT:    %tmp14 = load i32, i32* %tmp13, align 4
 ; PTR32_IDX32-NEXT:    --> %tmp14 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb11: Variant }
 ; PTR32_IDX32-NEXT:    %tmp18 = add i32 %tmp, 2
@@ -241,9 +207,9 @@ define hidden i32* @i64_to_i128(i8* %arg, i32* %arg10) {
 ; PTR32_IDX64-NEXT:    %tmp = phi i32 [ 0, %bb ], [ %tmp18, %bb17 ]
 ; PTR32_IDX64-NEXT:    --> {0,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
 ; PTR32_IDX64-NEXT:    %tmp12 = getelementptr i8, i8* %arg, i128 ptrtoint ([0 x i8]* @global to i128)
-; PTR32_IDX64-NEXT:    --> ((trunc i128 ptrtoint ([0 x i8]* @global to i128) to i64) + %arg) U: [0,8589934591) S: full-set Exits: ((trunc i128 ptrtoint ([0 x i8]* @global to i128) to i64) + %arg) LoopDispositions: { %bb11: Invariant }
+; PTR32_IDX64-NEXT:    --> (@global + %arg) U: [0,8589934591) S: full-set Exits: (@global + %arg) LoopDispositions: { %bb11: Invariant }
 ; PTR32_IDX64-NEXT:    %tmp13 = bitcast i8* %tmp12 to i32*
-; PTR32_IDX64-NEXT:    --> ((trunc i128 ptrtoint ([0 x i8]* @global to i128) to i64) + %arg) U: [0,8589934591) S: full-set Exits: ((trunc i128 ptrtoint ([0 x i8]* @global to i128) to i64) + %arg) LoopDispositions: { %bb11: Invariant }
+; PTR32_IDX64-NEXT:    --> (@global + %arg) U: [0,8589934591) S: full-set Exits: (@global + %arg) LoopDispositions: { %bb11: Invariant }
 ; PTR32_IDX64-NEXT:    %tmp14 = load i32, i32* %tmp13, align 4
 ; PTR32_IDX64-NEXT:    --> %tmp14 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb11: Variant }
 ; PTR32_IDX64-NEXT:    %tmp18 = add i32 %tmp, 2
diff --git a/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll b/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll
index e3e9330e241f83..ac08fb24775e51 100644
--- a/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll
+++ b/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll
@@ -16,25 +16,25 @@ define void @ptrtoint(i8* %in, i64* %out0, i32* %out1, i16* %out2, i128* %out3)
 ; X64-LABEL: 'ptrtoint'
 ; X64-NEXT:  Classifying expressions for: @ptrtoint
 ; X64-NEXT:    %p0 = ptrtoint i8* %in to i64
-; X64-NEXT:    --> %p0 U: full-set S: full-set
+; X64-NEXT:    --> %in U: full-set S: full-set
 ; X64-NEXT:    %p1 = ptrtoint i8* %in to i32
-; X64-NEXT:    --> %p1 U: full-set S: full-set
+; X64-NEXT:    --> (trunc i8* %in to i32) U: full-set S: full-set
 ; X64-NEXT:    %p2 = ptrtoint i8* %in to i16
-; X64-NEXT:    --> %p2 U: full-set S: full-set
+; X64-NEXT:    --> (trunc i8* %in to i16) U: full-set S: full-set
 ; X64-NEXT:    %p3 = ptrtoint i8* %in to i128
-; X64-NEXT:    --> %p3 U: [0,18446744073709551616) S: [-18446744073709551616,18446744073709551616)
+; X64-NEXT:    --> (zext i8* %in to i128) U: [0,18446744073709551616) S: [0,18446744073709551616)
 ; X64-NEXT:  Determining loop execution counts for: @ptrtoint
 ;
 ; X32-LABEL: 'ptrtoint'
 ; X32-NEXT:  Classifying expressions for: @ptrtoint
 ; X32-NEXT:    %p0 = ptrtoint i8* %in to i64
-; X32-NEXT:    --> %p0 U: [0,4294967296) S: [-4294967296,4294967296)
+; X32-NEXT:    --> (zext i8* %in to i64) U: [0,4294967296) S: [0,4294967296)
 ; X32-NEXT:    %p1 = ptrtoint i8* %in to i32
-; X32-NEXT:    --> %p1 U: full-set S: full-set
+; X32-NEXT:    --> %in U: full-set S: full-set
 ; X32-NEXT:    %p2 = ptrtoint i8* %in to i16
-; X32-NEXT:    --> %p2 U: full-set S: full-set
+; X32-NEXT:    --> (trunc i8* %in to i16) U: full-set S: full-set
 ; X32-NEXT:    %p3 = ptrtoint i8* %in to i128
-; X32-NEXT:    --> %p3 U: [0,4294967296) S: [-4294967296,4294967296)
+; X32-NEXT:    --> (zext i8* %in to i128) U: [0,4294967296) S: [0,4294967296)
 ; X32-NEXT:  Determining loop execution counts for: @ptrtoint
 ;
   %p0 = ptrtoint i8* %in to i64
@@ -53,25 +53,25 @@ define void @ptrtoint_as1(i8 addrspace(1)* %in, i64* %out0, i32* %out1, i16* %ou
 ; X64-LABEL: 'ptrtoint_as1'
 ; X64-NEXT:  Classifying expressions for: @ptrtoint_as1
 ; X64-NEXT:    %p0 = ptrtoint i8 addrspace(1)* %in to i64
-; X64-NEXT:    --> %p0 U: full-set S: full-set
+; X64-NEXT:    --> %in U: full-set S: full-set
 ; X64-NEXT:    %p1 = ptrtoint i8 addrspace(1)* %in to i32
-; X64-NEXT:    --> %p1 U: full-set S: full-set
+; X64-NEXT:    --> (trunc i8 addrspace(1)* %in to i32) U: full-set S: full-set
 ; X64-NEXT:    %p2 = ptrtoint i8 addrspace(1)* %in to i16
-; X64-NEXT:    --> %p2 U: full-set S: full-set
+; X64-NEXT:    --> (trunc i8 addrspace(1)* %in to i16) U: full-set S: full-set
 ; X64-NEXT:    %p3 = ptrtoint i8 addrspace(1)* %in to i128
-; X64-NEXT:    --> %p3 U: [0,18446744073709551616) S: [-18446744073709551616,18446744073709551616)
+; X64-NEXT:    --> (zext i8 addrspace(1)* %in to i128) U: [0,18446744073709551616) S: [0,18446744073709551616)
 ; X64-NEXT:  Determining loop execution counts for: @ptrtoint_as1
 ;
 ; X32-LABEL: 'ptrtoint_as1'
 ; X32-NEXT:  Classifying expressions for: @ptrtoint_as1
 ; X32-NEXT:    %p0 = ptrtoint i8 addrspace(1)* %in to i64
-; X32-NEXT:    --> %p0 U: [0,4294967296) S: [-4294967296,4294967296)
+; X32-NEXT:    --> (zext i8 addrspace(1)* %in to i64) U: [0,4294967296) S: [0,4294967296)
 ; X32-NEXT:    %p1 = ptrtoint i8 addrspace(1)* %in to i32
-; X32-NEXT:    --> %p1 U: full-set S: full-set
+; X32-NEXT:    --> %in U: full-set S: full-set
 ; X32-NEXT:    %p2 = ptrtoint i8 addrspace(1)* %in to i16
-; X32-NEXT:    --> %p2 U: full-set S: full-set
+; X32-NEXT:    --> (trunc i8 addrspace(1)* %in to i16) U: full-set S: full-set
 ; X32-NEXT:    %p3 = ptrtoint i8 addrspace(1)* %in to i128
-; X32-NEXT:    --> %p3 U: [0,4294967296) S: [-4294967296,4294967296)
+; X32-NEXT:    --> (zext i8 addrspace(1)* %in to i128) U: [0,4294967296) S: [0,4294967296)
 ; X32-NEXT:  Determining loop execution counts for: @ptrtoint_as1
 ;
   %p0 = ptrtoint i8 addrspace(1)* %in to i64
@@ -92,7 +92,7 @@ define void @ptrtoint_of_bitcast(i8* %in, i64* %out0) {
 ; X64-NEXT:    %in_casted = bitcast i8* %in to float*
 ; X64-NEXT:    --> %in U: full-set S: full-set
 ; X64-NEXT:    %p0 = ptrtoint float* %in_casted to i64
-; X64-NEXT:    --> %p0 U: full-set S: full-set
+; X64-NEXT:    --> %in U: full-set S: full-set
 ; X64-NEXT:  Determining loop execution counts for: @ptrtoint_of_bitcast
 ;
 ; X32-LABEL: 'ptrtoint_of_bitcast'
@@ -100,7 +100,7 @@ define void @ptrtoint_of_bitcast(i8* %in, i64* %out0) {
 ; X32-NEXT:    %in_casted = bitcast i8* %in to float*
 ; X32-NEXT:    --> %in U: full-set S: full-set
 ; X32-NEXT:    %p0 = ptrtoint float* %in_casted to i64
-; X32-NEXT:    --> %p0 U: [0,4294967296) S: [-4294967296,4294967296)
+; X32-NEXT:    --> (zext i8* %in to i64) U: [0,4294967296) S: [0,4294967296)
 ; X32-NEXT:  Determining loop execution counts for: @ptrtoint_of_bitcast
 ;
   %in_casted = bitcast i8* %in to float*
@@ -116,7 +116,7 @@ define void @ptrtoint_of_addrspacecast(i8* %in, i64* %out0) {
 ; X64-NEXT:    %in_casted = addrspacecast i8* %in to i8 addrspace(1)*
 ; X64-NEXT:    --> %in_casted U: full-set S: full-set
 ; X64-NEXT:    %p0 = ptrtoint i8 addrspace(1)* %in_casted to i64
-; X64-NEXT:    --> %p0 U: full-set S: full-set
+; X64-NEXT:    --> %in_casted U: full-set S: full-set
 ; X64-NEXT:  Determining loop execution counts for: @ptrtoint_of_addrspacecast
 ;
 ; X32-LABEL: 'ptrtoint_of_addrspacecast'
@@ -124,7 +124,7 @@ define void @ptrtoint_of_addrspacecast(i8* %in, i64* %out0) {
 ; X32-NEXT:    %in_casted = addrspacecast i8* %in to i8 addrspace(1)*
 ; X32-NEXT:    --> %in_casted U: full-set S: full-set
 ; X32-NEXT:    %p0 = ptrtoint i8 addrspace(1)* %in_casted to i64
-; X32-NEXT:    --> %p0 U: [0,4294967296) S: [-4294967296,4294967296)
+; X32-NEXT:    --> (zext i8 addrspace(1)* %in_casted to i64) U: [0,4294967296) S: [0,4294967296)
 ; X32-NEXT:  Determining loop execution counts for: @ptrtoint_of_addrspacecast
 ;
   %in_casted = addrspacecast i8* %in to i8 addrspace(1)*
@@ -140,7 +140,7 @@ define void @ptrtoint_of_inttoptr(i64 %in, i64* %out0) {
 ; X64-NEXT:    %in_casted = inttoptr i64 %in to i8*
 ; X64-NEXT:    --> %in_casted U: full-set S: full-set
 ; X64-NEXT:    %p0 = ptrtoint i8* %in_casted to i64
-; X64-NEXT:    --> %p0 U: full-set S: full-set
+; X64-NEXT:    --> %in_casted U: full-set S: full-set
 ; X64-NEXT:  Determining loop execution counts for: @ptrtoint_of_inttoptr
 ;
 ; X32-LABEL: 'ptrtoint_of_inttoptr'
@@ -148,7 +148,7 @@ define void @ptrtoint_of_inttoptr(i64 %in, i64* %out0) {
 ; X32-NEXT:    %in_casted = inttoptr i64 %in to i8*
 ; X32-NEXT:    --> %in_casted U: full-set S: full-set
 ; X32-NEXT:    %p0 = ptrtoint i8* %in_casted to i64
-; X32-NEXT:    --> %p0 U: [0,4294967296) S: [-4294967296,4294967296)
+; X32-NEXT:    --> (zext i8* %in_casted to i64) U: [0,4294967296) S: [0,4294967296)
 ; X32-NEXT:  Determining loop execution counts for: @ptrtoint_of_inttoptr
 ;
   %in_casted = inttoptr i64 %in to i8*
@@ -197,11 +197,17 @@ define void @ptrtoint_of_nullptr(i64* %out0) {
 
 ; A constant inttoptr argument of an ptrtoint is still bad.
 define void @ptrtoint_of_constantexpr_inttoptr(i64* %out0) {
-; ALL-LABEL: 'ptrtoint_of_constantexpr_inttoptr'
-; ALL-NEXT:  Classifying expressions for: @ptrtoint_of_constantexpr_inttoptr
-; ALL-NEXT:    %p0 = ptrtoint i8* inttoptr (i64 42 to i8*) to i64
-; ALL-NEXT:    --> %p0 U: [42,43) S: [-64,64)
-; ALL-NEXT:  Determining loop execution counts for: @ptrtoint_of_constantexpr_inttoptr
+; X64-LABEL: 'ptrtoint_of_constantexpr_inttoptr'
+; X64-NEXT:  Classifying expressions for: @ptrtoint_of_constantexpr_inttoptr
+; X64-NEXT:    %p0 = ptrtoint i8* inttoptr (i64 42 to i8*) to i64
+; X64-NEXT:    --> inttoptr (i64 42 to i8*) U: [42,43) S: [-64,64)
+; X64-NEXT:  Determining loop execution counts for: @ptrtoint_of_constantexpr_inttoptr
+;
+; X32-LABEL: 'ptrtoint_of_constantexpr_inttoptr'
+; X32-NEXT:  Classifying expressions for: @ptrtoint_of_constantexpr_inttoptr
+; X32-NEXT:    %p0 = ptrtoint i8* inttoptr (i64 42 to i8*) to i64
+; X32-NEXT:    --> (zext i8* inttoptr (i64 42 to i8*) to i64) U: [42,43) S: [0,4294967296)
+; X32-NEXT:  Determining loop execution counts for: @ptrtoint_of_constantexpr_inttoptr
 ;
   %p0 = ptrtoint i8* inttoptr (i64 42 to i8*) to i64
   store i64 %p0, i64* %out0
diff --git a/llvm/test/CodeGen/ARM/lsr-undef-in-binop.ll b/llvm/test/CodeGen/ARM/lsr-undef-in-binop.ll
index 564328d999982c..e7339721447580 100644
--- a/llvm/test/CodeGen/ARM/lsr-undef-in-binop.ll
+++ b/llvm/test/CodeGen/ARM/lsr-undef-in-binop.ll
@@ -186,7 +186,9 @@ define linkonce_odr i32 @vector_insert(%"class.std::__1::vector.182"*, [1 x i32]
   br i1 %114, label %124, label %115
 
 ; CHECK-LABEL: .preheader:
-; CHECK-NEXT: sub i32 [[OLD_CAST]], [[NEW_CAST]]
+; CHECK-NEXT: [[NEG_NEW:%[0-9]+]] = sub i32 0, [[NEW_CAST]]
+; CHECK-NEXT: getelementptr i8, i8* %97, i32 [[NEG_NEW]]
+
 ; <label>:115:                                    ; preds = %111, %115
   %116 = phi i8* [ %118, %115 ], [ %97, %111 ]
   %117 = phi i8* [ %119, %115 ], [ %11, %111 ]
diff --git a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
index 670477c4c28514..d4dd7352aa5261 100644
--- a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
+++ b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
@@ -268,9 +268,9 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:  LBB0_48: ## %if.then1477
 ; CHECK-NEXT:    movl $1, %edx
 ; CHECK-NEXT:    callq _write
-; CHECK-NEXT:    subq %rbx, %r14
 ; CHECK-NEXT:    movq _syHistory@{{.*}}(%rip), %rax
-; CHECK-NEXT:    leaq 8189(%r14,%rax), %rax
+; CHECK-NEXT:    subq %rbx, %rax
+; CHECK-NEXT:    leaq 8189(%rax,%r14), %rax
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  LBB0_49: ## %for.body1723
 ; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
diff --git a/llvm/test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll b/llvm/test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll
index e1ef6bd6635d1f..bc756c666bde5f 100644
--- a/llvm/test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll
+++ b/llvm/test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll
@@ -166,21 +166,23 @@ define i8 @testnullptrint(i8* %buf, i8* %end) nounwind {
 ; PTR64-NEXT:    ret i8 [[RET]]
 ;
 ; PTR32-LABEL: @testnullptrint(
+; PTR32-NEXT:    [[BUF1:%.*]] = ptrtoint i8* [[BUF:%.*]] to i32
 ; PTR32-NEXT:    br label [[LOOPGUARD:%.*]]
 ; PTR32:       loopguard:
-; PTR32-NEXT:    [[BI:%.*]] = ptrtoint i8* [[BUF:%.*]] to i32
+; PTR32-NEXT:    [[BI:%.*]] = ptrtoint i8* [[BUF]] to i32
 ; PTR32-NEXT:    [[EI:%.*]] = ptrtoint i8* [[END:%.*]] to i32
 ; PTR32-NEXT:    [[CNT:%.*]] = sub i32 [[EI]], [[BI]]
-; PTR32-NEXT:    [[CNT1:%.*]] = inttoptr i32 [[CNT]] to i8*
 ; PTR32-NEXT:    [[GUARD:%.*]] = icmp ult i32 0, [[CNT]]
 ; PTR32-NEXT:    br i1 [[GUARD]], label [[PREHEADER:%.*]], label [[EXIT:%.*]]
 ; PTR32:       preheader:
+; PTR32-NEXT:    [[TMP1:%.*]] = sub i32 0, [[BUF1]]
+; PTR32-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, i8* [[END]], i32 [[TMP1]]
 ; PTR32-NEXT:    br label [[LOOP:%.*]]
 ; PTR32:       loop:
 ; PTR32-NEXT:    [[P_01_US_US:%.*]] = phi i8* [ null, [[PREHEADER]] ], [ [[GEP:%.*]], [[LOOP]] ]
 ; PTR32-NEXT:    [[GEP]] = getelementptr inbounds i8, i8* [[P_01_US_US]], i64 1
-; PTR32-NEXT:    [[SNEXT:%.*]] = load i8, i8* [[GEP]]
-; PTR32-NEXT:    [[EXITCOND:%.*]] = icmp ne i8* [[GEP]], [[CNT1]]
+; PTR32-NEXT:    [[SNEXT:%.*]] = load i8, i8* [[GEP]], align 1
+; PTR32-NEXT:    [[EXITCOND:%.*]] = icmp ne i8* [[GEP]], [[SCEVGEP]]
 ; PTR32-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]]
 ; PTR32:       exit.loopexit:
 ; PTR32-NEXT:    [[SNEXT_LCSSA:%.*]] = phi i8 [ [[SNEXT]], [[LOOP]] ]
@@ -256,10 +258,10 @@ define i8 @testptrint(i8* %buf, i8* %end) nounwind {
 ; PTR32-NEXT:    [[P_01_US_US:%.*]] = phi i8* [ [[BUF]], [[PREHEADER]] ], [ [[GEP:%.*]], [[LOOP]] ]
 ; PTR32-NEXT:    [[IV:%.*]] = phi i32 [ [[BI]], [[PREHEADER]] ], [ [[IVNEXT:%.*]], [[LOOP]] ]
 ; PTR32-NEXT:    [[GEP]] = getelementptr inbounds i8, i8* [[P_01_US_US]], i64 1
-; PTR32-NEXT:    [[SNEXT:%.*]] = load i8, i8* [[GEP]]
+; PTR32-NEXT:    [[SNEXT:%.*]] = load i8, i8* [[GEP]], align 1
 ; PTR32-NEXT:    [[IVNEXT]] = add nuw i32 [[IV]], 1
-; PTR32-NEXT:    [[EXITCOND:%.*]] = icmp ne i32 [[IVNEXT]], [[CNT]]
-; PTR32-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]]
+; PTR32-NEXT:    [[CMP:%.*]] = icmp ult i32 [[IVNEXT]], [[CNT]]
+; PTR32-NEXT:    br i1 [[CMP]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]]
 ; PTR32:       exit.loopexit:
 ; PTR32-NEXT:    [[SNEXT_LCSSA:%.*]] = phi i8 [ [[SNEXT]], [[LOOP]] ]
 ; PTR32-NEXT:    br label [[EXIT]]
diff --git a/polly/test/Isl/CodeGen/scev_looking_through_bitcasts.ll b/polly/test/Isl/CodeGen/scev_looking_through_bitcasts.ll
index 1012e23cd3a208..321e98ab6772ce 100644
--- a/polly/test/Isl/CodeGen/scev_looking_through_bitcasts.ll
+++ b/polly/test/Isl/CodeGen/scev_looking_through_bitcasts.ll
@@ -32,6 +32,5 @@ bitmap_element_allocate.exit:
 
 
 ; CHECK:       polly.stmt.cond.end73.i:
-; CHECK-NEXT:   %0 = bitcast %structty** %b.s2a to i8**
-; CHECK-NEXT:   store i8* undef, i8** %0
+; CHECK-NEXT:   store %structty* undef, %structty** %b.s2a
 ; CHECK-NEXT:   br label %polly.exiting

From fe145b66ecfd98769feef496d47e49781efd6a2e Mon Sep 17 00:00:00 2001
From: Tony <Tony.Tye@amd.com>
Date: Mon, 12 Oct 2020 19:21:53 +0000
Subject: [PATCH 032/123] [AMDGPU] Correct processor names for gfx1010 and
 gfx1011

Change-Id: Ie409f86876b0437d0b0405aff42872963708d926

Differential Revision: https://reviews.llvm.org/D89259
---
 llvm/docs/AMDGPUUsage.rst | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index f2b605fb3563be..cc67bfc6758c62 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -245,15 +245,18 @@ names from both the *Processor* and *Alternative Processor* can be used.
      ``gfx1010``                 ``amdgcn``   dGPU  - xnack                   - Radeon RX 5700
                                                       [off]                   - Radeon RX 5700 XT
                                                     - wavefrontsize64         - Radeon Pro 5600 XT
-                                                      [off]
+                                                      [off]                   - Radeon Pro 5600M
                                                     - cumode
                                                       [off]
-     ``gfx1011``                 ``amdgcn``   dGPU  - xnack                   - Radeon Pro 5600M
+     ``gfx1011``                 ``amdgcn``   dGPU  - xnack                   *TBA*
                                                       [off]
                                                     - wavefrontsize64
                                                       [off]
                                                     - cumode
                                                       [off]
+                                                                              .. TODO
+                                                                                 Add product
+                                                                                 names.
      ``gfx1012``                 ``amdgcn``   dGPU  - xnack                   - Radeon RX 5500
                                                       [off]                   - Radeon RX 5500 XT
                                                     - wavefrontsize64

From 4a96b2e75f6db5b6f51febdd2ee559bacb7833b7 Mon Sep 17 00:00:00 2001
From: Nico Weber <thakis@chromium.org>
Date: Mon, 12 Oct 2020 15:20:58 -0400
Subject: [PATCH 033/123] [gn build] Add libclang_rt.ios.a,
 libclang_rt.iossim.a to the build

It's built with just-built clang, like all other compiler-rt parts
in the GN build.

This requires adding some cross build support to the mac toolchain.

Also add explicit mmacosx-version-min and miphoneos-version-min
flags to the build.

ios.a is only built with the arm64 slice, iossim.a only with the
x86_64 slice for now. (The latter should maybe become host_cpu
when Arm Macs become a common iOS development platform.)

With this, it's possible to build chromium/iOS with a GN-built LLVM.

Differential Revision: https://reviews.llvm.org/D89260
---
 llvm/utils/gn/build/BUILD.gn                  | 27 +++++++++++++---
 llvm/utils/gn/build/mac_sdk.gni               |  5 +++
 llvm/utils/gn/build/toolchain/BUILD.gn        | 32 ++++++++++++++-----
 .../utils/gn/build/toolchain/target_flags.gni | 25 +++++++++++++++
 llvm/utils/gn/secondary/compiler-rt/BUILD.gn  | 13 +++++++-
 .../compiler-rt/lib/builtins/BUILD.gn         |  9 ++++--
 .../utils/gn/secondary/compiler-rt/target.gni |  2 +-
 llvm/utils/gn/secondary/llvm/triples.gni      |  4 +--
 8 files changed, 98 insertions(+), 19 deletions(-)

diff --git a/llvm/utils/gn/build/BUILD.gn b/llvm/utils/gn/build/BUILD.gn
index f88f4bcc4b6b3d..a8f4f073de3928 100644
--- a/llvm/utils/gn/build/BUILD.gn
+++ b/llvm/utils/gn/build/BUILD.gn
@@ -42,17 +42,36 @@ config("compiler_defaults") {
   cflags = target_flags
   ldflags = target_flags + target_ldflags
 
-  if (host_os == "mac" && clang_base_path != "") {
+  if ((current_os == "ios" || current_os == "mac") && clang_base_path != "") {
+    if (current_os == "ios" && current_cpu == "arm64") {
+      sdk_path = ios_sdk_path
+    } else if (current_os == "ios" && current_cpu == "x64") {
+      sdk_path = iossim_sdk_path
+    } else if (current_os == "mac") {
+      sdk_path = mac_sdk_path
+    }
     cflags += [
       "-isysroot",
-      mac_sdk_path,
+      sdk_path,
     ]
     ldflags += [
       "-isysroot",
-      mac_sdk_path,
+      sdk_path,
     ]
   }
 
+  # Mostly for compiler-rt, see compiler-rt/cmake/config-ix.cmake
+  if (current_os == "ios") {
+    asmflags += [ "-miphoneos-version-min=8.0" ]
+    cflags += [ "-miphoneos-version-min=8.0" ]
+    ldflags += [ "-miphoneos-version-min=8.0" ]
+  }
+  if (current_os == "mac") {
+    asmflags += [ "-mmacosx-version-min=10.10" ]
+    cflags += [ "-mmacosx-version-min=10.10" ]
+    ldflags += [ "-mmacosx-version-min=10.10" ]
+  }
+
   if (host_os != "win") {
     if (is_debug) {
       cflags += [ "-g" ]
@@ -296,7 +315,7 @@ config("no_rtti") {
 # To make an archive that can be distributed, you need to remove this config and
 # set complete_static_lib.
 config("thin_archive") {
-  if (current_os != "win" && current_os != "mac") {
+  if (current_os != "ios" && current_os != "mac" && current_os != "win") {
     arflags = [ "-T" ]
   }
 }
diff --git a/llvm/utils/gn/build/mac_sdk.gni b/llvm/utils/gn/build/mac_sdk.gni
index 8fa75647afd18b..b6c9baa530965b 100644
--- a/llvm/utils/gn/build/mac_sdk.gni
+++ b/llvm/utils/gn/build/mac_sdk.gni
@@ -9,7 +9,12 @@ declare_args() {
 # but that makes `gn gen` take twice as long and almost everyone has Xcode
 # installed.  So require that people who don't have it installed set a gn arg.
 if (mac_use_commandline_tools_sdk) {
+  ios_sdk_path = "/Library/Developer/CommandLineTools/SDKs/iPhoneOS.sdk"
+  iossim_sdk_path =
+      "/Library/Developer/CommandLineTools/SDKs/iPhoneSimulator.sdk"
   mac_sdk_path = "/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk"
 } else {
+  ios_sdk_path = "/Applications/Xcode.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS.sdk"
+  iossim_sdk_path = "/Applications/Xcode.app/Contents/Developer/Platforms/iPhoneSimulator.platform/Developer/SDKs/iPhoneSimulator.sdk"
   mac_sdk_path = "/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk"
 }
diff --git a/llvm/utils/gn/build/toolchain/BUILD.gn b/llvm/utils/gn/build/toolchain/BUILD.gn
index d716e57e361243..b822d4e5868d1a 100644
--- a/llvm/utils/gn/build/toolchain/BUILD.gn
+++ b/llvm/utils/gn/build/toolchain/BUILD.gn
@@ -56,7 +56,7 @@ template("unix_toolchain") {
     }
 
     tool("alink") {
-      if (current_os == "mac") {
+      if (current_os == "ios" || current_os == "mac") {
         command = "libtool -D -static -no_warning_for_no_symbols {{arflags}} -o {{output}} {{inputs}}"
       } else {
         # Remove the output file first so that ar doesn't try to modify the
@@ -70,7 +70,7 @@ template("unix_toolchain") {
       default_output_dir = "{{root_out_dir}}/lib"
     }
 
-    if (current_os == "mac") {
+    if (current_os == "ios" || current_os == "mac") {
       # gn < 1693 (e214b5d35898) doesn't support |frameworks|, requiring
       # frameworks to be listed in |libs|, but gn >= 1808 (3028c6a426a4) forbids
       # frameworks from appearing in |libs|. This assertion provides a helpful
@@ -89,7 +89,7 @@ template("unix_toolchain") {
 
     tool("solink") {
       outfile = "{{output_dir}}/{{target_output_name}}{{output_extension}}"
-      if (current_os == "mac") {
+      if (current_os == "ios" || current_os == "mac") {
         command = "$ld -shared {{ldflags}} -o $outfile {{inputs}} {{libs}} {{frameworks}}"
         default_output_extension = ".dylib"
       } else {
@@ -105,7 +105,7 @@ template("unix_toolchain") {
 
     tool("solink_module") {
       outfile = "{{output_dir}}/{{target_output_name}}{{output_extension}}"
-      if (current_os == "mac") {
+      if (current_os == "ios" || current_os == "mac") {
         command = "$ld -shared {{ldflags}} -Wl,-flat_namespace -Wl,-undefined,suppress -o $outfile {{inputs}} {{libs}} {{frameworks}}"
         default_output_extension = ".dylib"
       } else {
@@ -120,7 +120,7 @@ template("unix_toolchain") {
 
     tool("link") {
       outfile = "{{output_dir}}/{{target_output_name}}{{output_extension}}"
-      if (current_os == "mac") {
+      if (current_os == "ios" || current_os == "mac") {
         command =
             "$ld {{ldflags}} -o $outfile {{inputs}} {{libs}} {{frameworks}}"
       } else {
@@ -141,7 +141,7 @@ template("unix_toolchain") {
       description = "COPY {{source}} {{output}}"
     }
 
-    if (current_os == "mac") {
+    if (current_os == "ios" || current_os == "mac") {
       tool("copy_bundle_data") {
         # http://serverfault.com/q/209888/43689
         _copydir = "mkdir -p {{output}} && cd {{source}} && " +
@@ -164,7 +164,7 @@ template("unix_toolchain") {
 }
 
 unix_toolchain("unix") {
-  if (current_os != "mac") {
+  if (current_os != "ios" && current_os != "mac") {
     ar = "ar"
   }
 
@@ -189,7 +189,7 @@ template("stage2_unix_toolchain") {
       "//:clang($host_toolchain)",
       "//:lld($host_toolchain)",
     ]
-    if (current_os != "mac") {
+    if (current_os != "ios" && current_os != "mac") {
       ar = "bin/llvm-ar"
       deps += [ "//:llvm-ar($host_toolchain)" ]
     }
@@ -219,6 +219,22 @@ if (android_ndk_path != "") {
   }
 }
 
+if (host_os == "mac") {
+  stage2_unix_toolchain("stage2_ios_aarch64") {
+    toolchain_args = {
+      current_os = "ios"
+      current_cpu = "arm64"
+    }
+  }
+
+  stage2_unix_toolchain("stage2_iossim_x64") {
+    toolchain_args = {
+      current_os = "ios"
+      current_cpu = "x64"
+    }
+  }
+}
+
 toolchain("win") {
   cl = "cl"
   link = "link"
diff --git a/llvm/utils/gn/build/toolchain/target_flags.gni b/llvm/utils/gn/build/toolchain/target_flags.gni
index 02dfeda9d62f61..0af52a0db6da4a 100644
--- a/llvm/utils/gn/build/toolchain/target_flags.gni
+++ b/llvm/utils/gn/build/toolchain/target_flags.gni
@@ -1,6 +1,11 @@
 import("//llvm/triples.gni")
 import("//llvm/utils/gn/build/toolchain/compiler.gni")
 
+# Flags in this file are passed both to the compiler that's building
+# compiler-rt at build time (via normal gn cflags/ldflags), as well as to the
+# compiler building compiler-rt test programs at test time (via
+# COMPILER_RT_TEST_COMPILER_CFLAGS).
+
 target_flags = []
 target_ldflags = []
 
@@ -14,6 +19,26 @@ if (current_os == "android") {
   if (current_cpu == "arm") {
     target_flags += [ "-march=armv7-a" ]
   }
+} else if (current_os == "ios") {
+  if (current_cpu == "arm64") {
+    target_flags += [
+      "-arch",
+      "arm64",
+    ]
+    target_ldflags += [
+      "-arch",
+      "arm64",
+    ]
+  } else if (current_cpu == "x64") {
+    target_flags += [
+      "-arch",
+      "x86_64",
+    ]
+    target_ldflags += [
+      "-arch",
+      "x86_64",
+    ]
+  }
 }
 
 if (current_cpu == "x86") {
diff --git a/llvm/utils/gn/secondary/compiler-rt/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/BUILD.gn
index 7a1961678b0502..5444f662135010 100644
--- a/llvm/utils/gn/secondary/compiler-rt/BUILD.gn
+++ b/llvm/utils/gn/secondary/compiler-rt/BUILD.gn
@@ -1,3 +1,4 @@
+import("//llvm/lib/Target/targets.gni")
 import("//llvm/utils/gn/build/toolchain/compiler.gni")
 
 # In the GN build, compiler-rt is always built by just-built clang and lld.
@@ -10,10 +11,20 @@ if (android_ndk_path != "") {
     "//llvm/utils/gn/build/toolchain:stage2_android_arm",
   ]
 }
-
 group("compiler-rt") {
   deps = []
   foreach(toolchain, supported_toolchains) {
     deps += [ "//compiler-rt/lib($toolchain)" ]
   }
+
+  # FIXME: Do this only if a gn arg compiler_rt_enable_ios is set?
+  # That would match the cmake build.
+  if (host_os == "mac") {
+    if (llvm_build_AArch64) {
+      deps += [ "//compiler-rt/lib/builtins(//llvm/utils/gn/build/toolchain:stage2_ios_aarch64)" ]
+    }
+    if (llvm_build_X86) {
+      deps += [ "//compiler-rt/lib/builtins(//llvm/utils/gn/build/toolchain:stage2_iossim_x64)" ]
+    }
+  }
 }
diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn
index 5ce3cba59ac46d..0292479ecd66f7 100644
--- a/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn
+++ b/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn
@@ -13,6 +13,10 @@ static_library("builtins") {
   output_dir = crt_current_out_dir
   if (current_os == "mac") {
     output_name = "clang_rt.osx"
+  } else if (current_os == "ios" && current_cpu == "arm64") {
+    output_name = "clang_rt.ios"
+  } else if (current_os == "ios" && current_cpu == "x64") {
+    output_name = "clang_rt.iossim"
   } else {
     output_name = "clang_rt.builtins$crt_current_target_suffix"
   }
@@ -177,7 +181,7 @@ static_library("builtins") {
     sources += [ "clear_cache.c" ]
   }
 
-  if (current_os == "mac") {
+  if (current_os == "mac" || current_os == "ios") {
     sources += [
       "atomic_flag_clear.c",
       "atomic_flag_clear_explicit.c",
@@ -496,8 +500,7 @@ static_library("builtins") {
   }
 }
 
-# Currently unused but necessary to make the sync_source_lists_from_cmake.py
-# happy.
+# Currently unused but necessary to make sync_source_lists_from_cmake.py happy.
 source_set("_unused") {
   sources = [
     # Thumb1
diff --git a/llvm/utils/gn/secondary/compiler-rt/target.gni b/llvm/utils/gn/secondary/compiler-rt/target.gni
index 1f2372e8f1d68b..ead515fa501491 100644
--- a/llvm/utils/gn/secondary/compiler-rt/target.gni
+++ b/llvm/utils/gn/secondary/compiler-rt/target.gni
@@ -26,7 +26,7 @@ if (clang_enable_per_target_runtime_dir) {
   if (current_os == "android") {
     crt_current_target_suffix += "-android"
   }
-} else if (current_os == "mac") {
+} else if (current_os == "ios" || current_os == "mac") {
   crt_current_out_dir = "$clang_resource_dir/lib/darwin"
 } else {
   assert(false, "unimplemented current_os " + current_os)
diff --git a/llvm/utils/gn/secondary/llvm/triples.gni b/llvm/utils/gn/secondary/llvm/triples.gni
index c72910f02f9906..a0ea488e04f3df 100644
--- a/llvm/utils/gn/secondary/llvm/triples.gni
+++ b/llvm/utils/gn/secondary/llvm/triples.gni
@@ -7,7 +7,7 @@ if (current_cpu == "x86") {
     llvm_current_triple = "x86_64-unknown-freebsd"
   } else if (current_os == "linux") {
     llvm_current_triple = "x86_64-unknown-linux-gnu"
-  } else if (current_os == "mac") {
+  } else if (current_os == "ios" || current_os == "mac") {
     llvm_current_triple = "x86_64-apple-darwin"
   } else if (current_os == "win") {
     llvm_current_triple = "x86_64-pc-windows-msvc"
@@ -19,7 +19,7 @@ if (current_cpu == "x86") {
 } else if (current_cpu == "arm64") {
   if (current_os == "android") {
     llvm_current_triple = "aarch64-linux-android29"
-  } else if (current_os == "mac") {
+  } else if (current_os == "ios" || current_os == "mac") {
     llvm_current_triple = "arm64-apple-darwin"
   }
 } else if (current_cpu == "ppc64") {

From 81ead8a53525abe1f149b3c5ad1bb44ee258d10f Mon Sep 17 00:00:00 2001
From: Nicolas Vasilache <nicolas.vasilache@gmail.com>
Date: Mon, 12 Oct 2020 20:18:28 +0000
Subject: [PATCH 034/123] [mlir][Linalg] Temporarily circumvent TensorConstant
 bufferize bug

The TensorConstantOp bufferize conversion pattern has a bug that
makes it incorrect in the case of vectors whose alignment is not
the natural alignment. Circumvent it temporarily by using a power of 2.

Differential Revision: https://reviews.llvm.org/D89265
---
 .../Dialect/Linalg/CPU/test-tensor-matmul.mlir        | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/mlir/integration_test/Dialect/Linalg/CPU/test-tensor-matmul.mlir b/mlir/integration_test/Dialect/Linalg/CPU/test-tensor-matmul.mlir
index 2c01a688cfa02e..b706c9cef97678 100644
--- a/mlir/integration_test/Dialect/Linalg/CPU/test-tensor-matmul.mlir
+++ b/mlir/integration_test/Dialect/Linalg/CPU/test-tensor-matmul.mlir
@@ -4,13 +4,12 @@
 // RUN: | FileCheck %s
 
 func @main() {
-  %A = constant dense<[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]> : tensor<2x3xf32>
+  %A = constant dense<[[1.0, 2.0], [4.0, 5.0]]> : tensor<2x2xf32>
   %B = constant dense<[[1.0, 2.0, 3.0, 4.0],
-                       [5.0, 6.0, 7.0, 8.0],
-                       [9.0, 10.0, 11.0, 12.0]]> : tensor<3x4xf32>
+                       [5.0, 6.0, 7.0, 8.0]]> : tensor<2x4xf32>
   %C = constant dense<1000.0> : tensor<2x4xf32>
 
-  %D = linalg.matmul ins(%A, %B: tensor<2x3xf32>, tensor<3x4xf32>)
+  %D = linalg.matmul ins(%A, %B: tensor<2x2xf32>, tensor<2x4xf32>)
                      init(%C: tensor<2x4xf32>) -> tensor<2x4xf32>
 
   %unranked = tensor_cast %D : tensor<2x4xf32> to tensor<*xf32>
@@ -18,8 +17,8 @@ func @main() {
 
   //      CHECK: Unranked Memref base@ = {{0x[-9a-f]*}}
   // CHECK-SAME: rank = 2 offset = 0 sizes = [2, 4] strides = [4, 1] data =
-  // CHECK-NEXT: [1038,   1044,   1050,   1056]
-  // CHECK-NEXT: [1065,   1074,   1083,   1092]
+  // CHECK-NEXT: [1011, 1014, 1017, 1020]
+  // CHECK-NEXT: [1029, 1038, 1047, 1056]
 
   return
 }

From 8b6cd15242673c04618fb0aafc07d5de9e0bbe1e Mon Sep 17 00:00:00 2001
From: JonChesterfield <jonathanchesterfield@gmail.com>
Date: Mon, 12 Oct 2020 21:21:56 +0100
Subject: [PATCH 035/123] [libomptarget][amdgcn] Implement partial barrier

[libomptarget][amdgcn] Implement partial barrier

named_sync is used to coordinate non-spmd kernels. This uses bar.sync on nvptx.
There is no corresponding ISA support on amdgcn, so this is implemented using
shared memory, one word initialized to zero.

Each wave increments the variable by one. Whichever wave is last is responsible
for resetting the variable to zero, at which point it and the others continue.

The race condition on a wave reaching the barrier before another wave has
noticed that it has been released is handled with a generation counter, packed
into the same word.

Uses a shared variable that is not needed on nvptx. Introduces a new hook,
kmpc_impl_target_init, to allow different targets to do extra initialization.

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D88602
---
 .../deviceRTLs/amdgcn/src/target_impl.h       | 10 ++--
 .../deviceRTLs/amdgcn/src/target_impl.hip     | 53 +++++++++++++++++++
 .../deviceRTLs/common/src/omptarget.cu        |  1 +
 .../deviceRTLs/nvptx/src/target_impl.h        |  5 ++
 4 files changed, 64 insertions(+), 5 deletions(-)

diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
index 8afc5e77996afb..34794587e0fe7d 100644
--- a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
+++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
@@ -109,11 +109,11 @@ INLINE void __kmpc_impl_syncwarp(__kmpc_impl_lanemask_t) {
   // AMDGCN doesn't need to sync threads in a warp
 }
 
-INLINE void __kmpc_impl_named_sync(uint32_t num_threads) {
-  (void)num_threads;
-  // TODO: Implement on top of __SHARED__
-  __builtin_amdgcn_s_barrier();
-}
+// AMDGCN specific kernel initialization
+DEVICE void __kmpc_impl_target_init();
+
+// Equivalent to ptx bar.sync 1. Barrier until num_threads arrive.
+DEVICE void __kmpc_impl_named_sync(uint32_t num_threads);
 
 INLINE void __kmpc_impl_threadfence() {
   __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "agent");
diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
index 9807483d4c4202..4c3d421c78ccb9 100644
--- a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
+++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
@@ -62,6 +62,59 @@ DEVICE int32_t __kmpc_impl_shfl_down_sync(__kmpc_impl_lanemask_t, int32_t var,
   return __builtin_amdgcn_ds_bpermute(index << 2, var);
 }
 
+static DEVICE SHARED uint32_t L1_Barrier;
+
+DEVICE void __kmpc_impl_target_init() {
+  // Don't have global ctors, and shared memory is not zero init
+  __atomic_store_n(&L1_Barrier, 0u, __ATOMIC_RELEASE);
+}
+
+DEVICE void __kmpc_impl_named_sync(uint32_t num_threads) {
+  __atomic_thread_fence(__ATOMIC_ACQUIRE);
+
+  uint32_t num_waves = num_threads / WARPSIZE;
+
+  // Partial barrier implementation for amdgcn.
+  // Uses two 16 bit unsigned counters. One for the number of waves to have
+  // reached the barrier, and one to count how many times the barrier has been
+  // passed. These are packed in a single atomically accessed 32 bit integer.
+  // Low bits for the number of waves, assumed zero before this call.
+  // High bits to count the number of times the barrier has been passed.
+
+  assert(num_waves != 0);
+  assert(num_waves * WARPSIZE == num_threads);
+  assert(num_waves < 0xffffu);
+
+  // Increment the low 16 bits once, using the lowest active thread.
+  uint64_t lowestActiveThread = __kmpc_impl_ffs(__kmpc_impl_activemask()) - 1;
+  bool isLowest = GetLaneId() == lowestActiveThread;
+
+  if (isLowest) {
+    uint32_t load =
+        __atomic_fetch_add(&L1_Barrier, 1, __ATOMIC_RELAXED); // commutative
+
+    // Record the number of times the barrier has been passed
+    uint32_t generation = load & 0xffff0000u;
+
+    if ((load & 0x0000ffffu) == (num_waves - 1)) {
+      // Reached num_waves in low bits so this is the last wave.
+      // Set low bits to zero and increment high bits
+      load += 0x00010000u; // wrap is safe
+      load &= 0xffff0000u; // because bits zeroed second
+
+      // Reset the wave counter and release the waiting waves
+      __atomic_store_n(&L1_Barrier, load, __ATOMIC_RELAXED);
+    } else {
+      // more waves still to go, spin until generation counter changes
+      do {
+        __builtin_amdgcn_s_sleep(0);
+        load = __atomic_load_n(&L1_Barrier, __ATOMIC_RELAXED);
+      } while ((load & 0xffff0000u) == generation);
+    }
+  }
+  __atomic_thread_fence(__ATOMIC_RELEASE);
+}
+
 EXTERN uint64_t __ockl_get_local_size(uint32_t);
 EXTERN uint64_t __ockl_get_num_groups(uint32_t);
 DEVICE int GetNumberOfBlocksInKernel() { return __ockl_get_num_groups(0); }
diff --git a/openmp/libomptarget/deviceRTLs/common/src/omptarget.cu b/openmp/libomptarget/deviceRTLs/common/src/omptarget.cu
index 5ccc845394003e..d9ee95e4f42392 100644
--- a/openmp/libomptarget/deviceRTLs/common/src/omptarget.cu
+++ b/openmp/libomptarget/deviceRTLs/common/src/omptarget.cu
@@ -63,6 +63,7 @@ EXTERN void __kmpc_kernel_init(int ThreadLimit, int16_t RequiresOMPRuntime) {
       omptarget_nvptx_threadPrivateContext->GetTopLevelTaskDescr(threadId);
   nThreads = GetNumberOfThreadsInBlock();
   threadLimit = ThreadLimit;
+  __kmpc_impl_target_init();
 }
 
 EXTERN void __kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized) {
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
index f7bc7e14c52844..46ce751c44c4dc 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
@@ -183,6 +183,11 @@ INLINE void __kmpc_impl_syncwarp(__kmpc_impl_lanemask_t Mask) {
 #endif // CUDA_VERSION
 }
 
+// NVPTX specific kernel initialization
+INLINE void __kmpc_impl_target_init() { /* nvptx needs no extra setup */
+}
+
+// Barrier until num_threads arrive.
 INLINE void __kmpc_impl_named_sync(uint32_t num_threads) {
   // The named barrier for active parallel threads of a team in an L1 parallel
   // region to synchronize with each other.

From fc5e68fab965bdc8fdf6db9ae2603f9dd02dec5b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Wed, 27 Nov 2019 15:55:53 +0200
Subject: [PATCH 036/123] [libunwind] [SEH] Don't interact with foreign
 exceptions

This unfortunately means that we don't execute C++ destructors when
unwinding past such frames for a different SEH unwind purpose (e.g.
as part of setjmp/longjmp), but that case isn't handled properly at
the moment (the original unwind intent is lost and we end up with an
unhandled exception). This patch makes sure the foreign unwind terminates
as intended.

After executing a handler, _Unwind_Resume doesn't have access to
the target frame parameter of the original foreign unwind. We also
currently blindly set ExceptionCode to STATUS_GCC_THROW - we could
set that correctly by storing the original code in _GCC_specific_handler,
but we don't have access to the original target frame value.

This also matches what libgcc's SEH unwinding code does in this case.

Differential Revision: https://reviews.llvm.org/D89231
---
 libunwind/src/Unwind-seh.cpp | 20 ++++----------------
 1 file changed, 4 insertions(+), 16 deletions(-)

diff --git a/libunwind/src/Unwind-seh.cpp b/libunwind/src/Unwind-seh.cpp
index 403ab2d77110df..6e2b4e73e41efc 100644
--- a/libunwind/src/Unwind-seh.cpp
+++ b/libunwind/src/Unwind-seh.cpp
@@ -46,18 +46,6 @@ using namespace libunwind;
 /// handling.
 #define STATUS_GCC_UNWIND MAKE_GCC_EXCEPTION(1) // 0x21474343
 
-/// Class of foreign exceptions based on unrecognized SEH exceptions.
-static const uint64_t kSEHExceptionClass = 0x434C4E4753454800; // CLNGSEH\0
-
-/// Exception cleanup routine used by \c _GCC_specific_handler to
-/// free foreign exceptions.
-static void seh_exc_cleanup(_Unwind_Reason_Code urc, _Unwind_Exception *exc) {
-  (void)urc;
-  if (exc->exception_class != kSEHExceptionClass)
-    _LIBUNWIND_ABORT("SEH cleanup called on non-SEH exception");
-  free(exc);
-}
-
 static int __unw_init_seh(unw_cursor_t *cursor, CONTEXT *ctx);
 static DISPATCHER_CONTEXT *__unw_seh_get_disp_ctx(unw_cursor_t *cursor);
 static void __unw_seh_set_disp_ctx(unw_cursor_t *cursor,
@@ -108,10 +96,10 @@ _GCC_specific_handler(PEXCEPTION_RECORD ms_exc, PVOID frame, PCONTEXT ms_ctx,
     }
   } else {
     // Foreign exception.
-    exc = (_Unwind_Exception *)malloc(sizeof(_Unwind_Exception));
-    exc->exception_class = kSEHExceptionClass;
-    exc->exception_cleanup = seh_exc_cleanup;
-    memset(exc->private_, 0, sizeof(exc->private_));
+    // We can't interact with them (we don't know the original target frame
+    // that we should pass on to RtlUnwindEx in _Unwind_Resume), so just
+    // pass without calling our destructors here.
+    return ExceptionContinueSearch;
   }
   if (!ctx) {
     __unw_init_seh(&cursor, disp->ContextRecord);

From d77d727339a7439fe747f6b33c6e63ec57c6662e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Thu, 8 Oct 2020 09:49:16 +0300
Subject: [PATCH 037/123] [LLD] [COFF] Fix a ubsan error in
 pdb-type-server-missing.yaml

This error has been present since 5519e4da83d1abc666203.

Differential Revision: https://reviews.llvm.org/D89027
---
 lld/COFF/DebugTypes.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lld/COFF/DebugTypes.cpp b/lld/COFF/DebugTypes.cpp
index febbd19084dac3..029da969957eb6 100644
--- a/lld/COFF/DebugTypes.cpp
+++ b/lld/COFF/DebugTypes.cpp
@@ -1088,7 +1088,8 @@ void TypeMerger::mergeTypesWithGHash() {
   }
   parallelSort(entries, std::less<GHashCell>());
   log(formatv("ghash table load factor: {0:p} (size {1} / capacity {2})\n",
-              double(entries.size()) / tableSize, entries.size(), tableSize));
+              tableSize ? double(entries.size()) / tableSize : 0,
+              entries.size(), tableSize));
 
   // Find out how many type and item indices there are.
   auto mid =

From 3b1d018c0dba45408164f5e69cb400976efa350f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mateusz=20Miku=C5=82a?= <mati865@gmail.com>
Date: Mon, 12 Oct 2020 22:56:35 +0300
Subject: [PATCH 038/123] [MinGW][clang-shlib] Build only when
 LLVM_LINK_LLVM_DYLIB is enabled

Otherwise it's easy to hit 2^16 DLL exports limit.

Differential Revision: https://reviews.llvm.org/D89225
---
 clang/tools/CMakeLists.txt | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/clang/tools/CMakeLists.txt b/clang/tools/CMakeLists.txt
index 85a85812a8d415..84e3fb156f1a44 100644
--- a/clang/tools/CMakeLists.txt
+++ b/clang/tools/CMakeLists.txt
@@ -15,7 +15,9 @@ add_clang_subdirectory(c-index-test)
 
 add_clang_subdirectory(clang-rename)
 add_clang_subdirectory(clang-refactor)
-if(UNIX OR MINGW)
+# For MinGW we only enable shared library if LLVM_LINK_LLVM_DYLIB=ON.
+# Without that option resulting library is too close to 2^16 DLL exports limit.
+if(UNIX OR (MINGW AND LLVM_LINK_LLVM_DYLIB))
   add_clang_subdirectory(clang-shlib)
 endif()
 

From 09ee1fe85a34642699fc1f3a74605dd79688c739 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Mon, 12 Oct 2020 12:40:03 -0700
Subject: [PATCH 039/123] [X86] Add more tests for D89178. NFC

---
 llvm/test/CodeGen/X86/xaluo128.ll | 209 ++++++++++++++++++++++++++++++
 1 file changed, 209 insertions(+)
 create mode 100644 llvm/test/CodeGen/X86/xaluo128.ll

diff --git a/llvm/test/CodeGen/X86/xaluo128.ll b/llvm/test/CodeGen/X86/xaluo128.ll
new file mode 100644
index 00000000000000..ca30b5602b7680
--- /dev/null
+++ b/llvm/test/CodeGen/X86/xaluo128.ll
@@ -0,0 +1,209 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64-darwin-unknown < %s | FileCheck %s --check-prefix=SDAG --check-prefix=X64
+; RUN: llc -mtriple=i686-darwin-unknown < %s | FileCheck %s --check-prefix=SDAG --check-prefix=X86
+
+define zeroext i1 @saddoi128(i128 %v1, i128 %v2, i128* %res) nounwind {
+; X64-LABEL: saddoi128:
+; X64:       ## %bb.0:
+; X64-NEXT:    testq %rcx, %rcx
+; X64-NEXT:    setns %r9b
+; X64-NEXT:    testq %rsi, %rsi
+; X64-NEXT:    setns %al
+; X64-NEXT:    cmpb %r9b, %al
+; X64-NEXT:    sete %r9b
+; X64-NEXT:    addq %rdx, %rdi
+; X64-NEXT:    adcq %rcx, %rsi
+; X64-NEXT:    setns %cl
+; X64-NEXT:    cmpb %cl, %al
+; X64-NEXT:    setne %al
+; X64-NEXT:    andb %r9b, %al
+; X64-NEXT:    movq %rdi, (%r8)
+; X64-NEXT:    movq %rsi, 8(%r8)
+; X64-NEXT:    retq
+;
+; X86-LABEL: saddoi128:
+; X86:       ## %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    cmpl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    setns %al
+; X86-NEXT:    testl %ebx, %ebx
+; X86-NEXT:    setns %ah
+; X86-NEXT:    cmpb %al, %ah
+; X86-NEXT:    sete %cl
+; X86-NEXT:    addl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    adcl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    adcl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    adcl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    setns %al
+; X86-NEXT:    cmpb %al, %ah
+; X86-NEXT:    setne %al
+; X86-NEXT:    andb %cl, %al
+; X86-NEXT:    movl %esi, (%ebp)
+; X86-NEXT:    movl %edi, 4(%ebp)
+; X86-NEXT:    movl %edx, 8(%ebp)
+; X86-NEXT:    movl %ebx, 12(%ebp)
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl
+  %t = call {i128, i1} @llvm.sadd.with.overflow.i128(i128 %v1, i128 %v2)
+  %val = extractvalue {i128, i1} %t, 0
+  %obit = extractvalue {i128, i1} %t, 1
+  store i128 %val, i128* %res
+  ret i1 %obit
+}
+
+define zeroext i1 @uaddoi128(i128 %v1, i128 %v2, i128* %res) nounwind {
+; X64-LABEL: uaddoi128:
+; X64:       ## %bb.0:
+; X64-NEXT:    addq %rdx, %rdi
+; X64-NEXT:    adcq %rcx, %rsi
+; X64-NEXT:    setb %al
+; X64-NEXT:    movq %rdi, (%r8)
+; X64-NEXT:    movq %rsi, 8(%r8)
+; X64-NEXT:    retq
+;
+; X86-LABEL: uaddoi128:
+; X86:       ## %bb.0:
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    addl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    adcl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    adcl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    adcl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    setb %al
+; X86-NEXT:    movl %edi, (%ecx)
+; X86-NEXT:    movl %ebx, 4(%ecx)
+; X86-NEXT:    movl %esi, 8(%ecx)
+; X86-NEXT:    movl %edx, 12(%ecx)
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    retl
+  %t = call {i128, i1} @llvm.uadd.with.overflow.i128(i128 %v1, i128 %v2)
+  %val = extractvalue {i128, i1} %t, 0
+  %obit = extractvalue {i128, i1} %t, 1
+  store i128 %val, i128* %res
+  ret i1 %obit
+}
+
+
+define zeroext i1 @ssuboi128(i128 %v1, i128 %v2, i128* %res) nounwind {
+; X64-LABEL: ssuboi128:
+; X64:       ## %bb.0:
+; X64-NEXT:    testq %rcx, %rcx
+; X64-NEXT:    setns %r9b
+; X64-NEXT:    testq %rsi, %rsi
+; X64-NEXT:    setns %al
+; X64-NEXT:    cmpb %r9b, %al
+; X64-NEXT:    setne %r9b
+; X64-NEXT:    subq %rdx, %rdi
+; X64-NEXT:    sbbq %rcx, %rsi
+; X64-NEXT:    setns %cl
+; X64-NEXT:    cmpb %cl, %al
+; X64-NEXT:    setne %al
+; X64-NEXT:    andb %r9b, %al
+; X64-NEXT:    movq %rdi, (%r8)
+; X64-NEXT:    movq %rsi, 8(%r8)
+; X64-NEXT:    retq
+;
+; X86-LABEL: ssuboi128:
+; X86:       ## %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    cmpl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    setns %al
+; X86-NEXT:    testl %ebx, %ebx
+; X86-NEXT:    setns %ah
+; X86-NEXT:    cmpb %al, %ah
+; X86-NEXT:    setne %cl
+; X86-NEXT:    subl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    setns %al
+; X86-NEXT:    cmpb %al, %ah
+; X86-NEXT:    setne %al
+; X86-NEXT:    andb %cl, %al
+; X86-NEXT:    movl %esi, (%ebp)
+; X86-NEXT:    movl %edi, 4(%ebp)
+; X86-NEXT:    movl %edx, 8(%ebp)
+; X86-NEXT:    movl %ebx, 12(%ebp)
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl
+  %t = call {i128, i1} @llvm.ssub.with.overflow.i128(i128 %v1, i128 %v2)
+  %val = extractvalue {i128, i1} %t, 0
+  %obit = extractvalue {i128, i1} %t, 1
+  store i128 %val, i128* %res
+  ret i1 %obit
+}
+
+define zeroext i1 @usuboi128(i128 %v1, i128 %v2, i128* %res) nounwind {
+; X64-LABEL: usuboi128:
+; X64:       ## %bb.0:
+; X64-NEXT:    subq %rdx, %rdi
+; X64-NEXT:    sbbq %rcx, %rsi
+; X64-NEXT:    setb %al
+; X64-NEXT:    movq %rdi, (%r8)
+; X64-NEXT:    movq %rsi, 8(%r8)
+; X64-NEXT:    retq
+;
+; X86-LABEL: usuboi128:
+; X86:       ## %bb.0:
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    subl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    setb %al
+; X86-NEXT:    movl %edi, (%ecx)
+; X86-NEXT:    movl %ebx, 4(%ecx)
+; X86-NEXT:    movl %esi, 8(%ecx)
+; X86-NEXT:    movl %edx, 12(%ecx)
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    retl
+  %t = call {i128, i1} @llvm.usub.with.overflow.i128(i128 %v1, i128 %v2)
+  %val = extractvalue {i128, i1} %t, 0
+  %obit = extractvalue {i128, i1} %t, 1
+  store i128 %val, i128* %res
+  ret i1 %obit
+}
+
+declare {i128, i1} @llvm.sadd.with.overflow.i128(i128, i128) nounwind readnone
+declare {i128, i1} @llvm.uadd.with.overflow.i128(i128, i128) nounwind readnone
+declare {i128, i1} @llvm.ssub.with.overflow.i128(i128, i128) nounwind readnone
+declare {i128, i1} @llvm.usub.with.overflow.i128(i128, i128) nounwind readnone

From 350fafabe9d3bda75e80bf077303eb5a09130b53 Mon Sep 17 00:00:00 2001
From: "Paul C. Anagnostopoulos" <paul@windfall.com>
Date: Sun, 4 Oct 2020 14:48:44 -0400
Subject: [PATCH 040/123] [TableGen] Add overload of
 RecordKeeper::getAllDerivedDefinitions()   and use in PseudoLowering backend.
 Now the two getAllDerivedDefinitions() use StringRef and Arrayref. Use
 all_of() in getAllDerivedDefinitions().

---
 llvm/docs/TableGen/BackGuide.rst              |  4 +--
 llvm/include/llvm/TableGen/Record.h           | 12 +++++++--
 llvm/lib/TableGen/Record.cpp                  | 27 ++++++++++++-------
 llvm/utils/TableGen/PseudoLoweringEmitter.cpp | 14 +++-------
 4 files changed, 33 insertions(+), 24 deletions(-)

diff --git a/llvm/docs/TableGen/BackGuide.rst b/llvm/docs/TableGen/BackGuide.rst
index 4ee5453f729276..829f5c8d437a76 100644
--- a/llvm/docs/TableGen/BackGuide.rst
+++ b/llvm/docs/TableGen/BackGuide.rst
@@ -569,9 +569,9 @@ The ``RecordKeeper`` class provides four functions for getting the
   ``Record`` references for the concrete records that derive from the
   given class.
 
-* ``getAllDerivedDefinitionsTwo(``\ *classname1*\ ``,`` *classname2*\ ``)`` returns
+* ``getAllDerivedDefinitions(``\ *classnames*\ ``)`` returns
   a vector of ``Record`` references for the concrete records that derive from
-  *both* of the given classes. [function to come]
+  *all* of the given classes.
 
 This statement obtains all the records that derive from the ``Attribute``
 class and iterates over them.
diff --git a/llvm/include/llvm/TableGen/Record.h b/llvm/include/llvm/TableGen/Record.h
index 2a02093ba53178..c7009e400ebc2a 100644
--- a/llvm/include/llvm/TableGen/Record.h
+++ b/llvm/include/llvm/TableGen/Record.h
@@ -1784,9 +1784,17 @@ class RecordKeeper {
   //===--------------------------------------------------------------------===//
   // High-level helper methods, useful for tablegen backends.
 
-  /// Get all the concrete records that inherit from the specified
+  /// Get all the concrete records that inherit from all the specified
+  /// classes. The classes must be defined.
+  std::vector<Record *> getAllDerivedDefinitions(
+      const ArrayRef<StringRef> ClassNames) const;
+
+  /// Get all the concrete records that inherit from the one specified
   /// class. The class must be defined.
-  std::vector<Record *> getAllDerivedDefinitions(StringRef ClassName) const;
+  std::vector<Record *> getAllDerivedDefinitions(StringRef ClassName) const {
+
+    return getAllDerivedDefinitions(makeArrayRef(ClassName));
+  }
 
   void dump() const;
 };
diff --git a/llvm/lib/TableGen/Record.cpp b/llvm/lib/TableGen/Record.cpp
index 260cca6b59e59e..2a46449b213fe8 100644
--- a/llvm/lib/TableGen/Record.cpp
+++ b/llvm/lib/TableGen/Record.cpp
@@ -2470,16 +2470,25 @@ Init *RecordKeeper::getNewAnonymousName() {
   return StringInit::get("anonymous_" + utostr(AnonCounter++));
 }
 
-std::vector<Record *>
-RecordKeeper::getAllDerivedDefinitions(StringRef ClassName) const {
-  Record *Class = getClass(ClassName);
-  if (!Class)
-    PrintFatalError("ERROR: Couldn't find the `" + ClassName + "' class!\n");
+std::vector<Record *> RecordKeeper::getAllDerivedDefinitions(
+    const ArrayRef<StringRef> ClassNames) const {
+  SmallVector<Record *, 2> ClassRecs;
+  std::vector<Record *> Defs;
 
-  std::vector<Record*> Defs;
-  for (const auto &D : getDefs())
-    if (D.second->isSubClassOf(Class))
-      Defs.push_back(D.second.get());
+  assert(ClassNames.size() > 0 && "At least one class must be passed.");
+  for (const auto &ClassName : ClassNames) {
+    Record *Class = getClass(ClassName);
+    if (!Class)
+      PrintFatalError("The class '" + ClassName + "' is not defined\n");
+    ClassRecs.push_back(Class);
+  }
+
+  for (const auto &OneDef : getDefs()) {
+    if (all_of(ClassRecs, [&OneDef](const Record *Class) {
+                            return OneDef.second->isSubClassOf(Class);
+                          }))
+      Defs.push_back(OneDef.second.get());
+  }
 
   return Defs;
 }
diff --git a/llvm/utils/TableGen/PseudoLoweringEmitter.cpp b/llvm/utils/TableGen/PseudoLoweringEmitter.cpp
index 1f3f93df39dabb..0200e86d3b5334 100644
--- a/llvm/utils/TableGen/PseudoLoweringEmitter.cpp
+++ b/llvm/utils/TableGen/PseudoLoweringEmitter.cpp
@@ -293,17 +293,9 @@ void PseudoLoweringEmitter::emitLoweringEmitter(raw_ostream &o) {
 }
 
 void PseudoLoweringEmitter::run(raw_ostream &o) {
-  Record *ExpansionClass = Records.getClass("PseudoInstExpansion");
-  Record *InstructionClass = Records.getClass("Instruction");
-  assert(ExpansionClass && "PseudoInstExpansion class definition missing!");
-  assert(InstructionClass && "Instruction class definition missing!");
-
-  std::vector<Record*> Insts;
-  for (const auto &D : Records.getDefs()) {
-    if (D.second->isSubClassOf(ExpansionClass) &&
-        D.second->isSubClassOf(InstructionClass))
-      Insts.push_back(D.second.get());
-  }
+  StringRef Classes[] = {"PseudoInstExpansion", "Instruction"};
+  std::vector<Record *> Insts =
+      Records.getAllDerivedDefinitions(makeArrayRef(Classes));
 
   // Process the pseudo expansion definitions, validating them as we do so.
   for (unsigned i = 0, e = Insts.size(); i != e; ++i)

From d07b290e4b7c55823895e88b683de4178ffc66db Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Duncan=20P=2E=20N=2E=20Exon=C2=A0Smith?=
 <dexonsmith@apple.com>
Date: Mon, 12 Oct 2020 16:01:07 -0400
Subject: [PATCH 041/123] DependencyScanning: pull factory function into
 MinimizedVFS, NFC

Avoid need for getBufferPtr API, simplifying another patch. No
functionality change.
---
 .../DependencyScanningFilesystem.cpp          | 20 ++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
index b1b87e7fa5734d..63eab82820cc0b 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
@@ -217,9 +217,11 @@ class MinimizedVFSFile final : public llvm::vfs::File {
                    llvm::vfs::Status Stat)
       : Buffer(std::move(Buffer)), Stat(std::move(Stat)) {}
 
-  llvm::ErrorOr<llvm::vfs::Status> status() override { return Stat; }
+  static llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
+  create(const CachedFileSystemEntry *Entry,
+         ExcludedPreprocessorDirectiveSkipMapping *PPSkipMappings);
 
-  const llvm::MemoryBuffer *getBufferPtr() const { return Buffer.get(); }
+  llvm::ErrorOr<llvm::vfs::Status> status() override { return Stat; }
 
   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
   getBuffer(const Twine &Name, int64_t FileSize, bool RequiresNullTerminator,
@@ -234,9 +236,11 @@ class MinimizedVFSFile final : public llvm::vfs::File {
   llvm::vfs::Status Stat;
 };
 
-llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
-createFile(const CachedFileSystemEntry *Entry,
-           ExcludedPreprocessorDirectiveSkipMapping *PPSkipMappings) {
+} // end anonymous namespace
+
+llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> MinimizedVFSFile::create(
+    const CachedFileSystemEntry *Entry,
+    ExcludedPreprocessorDirectiveSkipMapping *PPSkipMappings) {
   if (Entry->isDirectory())
     return llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>(
         std::make_error_code(std::errc::is_a_directory));
@@ -248,14 +252,12 @@ createFile(const CachedFileSystemEntry *Entry,
                                        /*RequiresNullTerminator=*/false),
       *Entry->getStatus());
   if (!Entry->getPPSkippedRangeMapping().empty() && PPSkipMappings)
-    (*PPSkipMappings)[Result->getBufferPtr()] =
+    (*PPSkipMappings)[Result->Buffer.get()] =
         &Entry->getPPSkippedRangeMapping();
   return llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>(
       std::unique_ptr<llvm::vfs::File>(std::move(Result)));
 }
 
-} // end anonymous namespace
-
 llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
 DependencyScanningWorkerFilesystem::openFileForRead(const Twine &Path) {
   SmallString<256> OwnedFilename;
@@ -265,5 +267,5 @@ DependencyScanningWorkerFilesystem::openFileForRead(const Twine &Path) {
       getOrCreateFileSystemEntry(Filename);
   if (!Result)
     return Result.getError();
-  return createFile(Result.get(), PPSkipMappings);
+  return MinimizedVFSFile::create(Result.get(), PPSkipMappings);
 }

From 46d3e428a83ccc0cab73abb2e39a82b1cca53a52 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Mon, 12 Oct 2020 17:05:09 -0400
Subject: [PATCH 042/123] [CostModel] get type-based cost for vector intrinsics
 directly

This is NFC-intended (the reductions and other intrinsics have
tests that should provide coverage) - trying to untangle the
mess that has formed here.

We were stripping the arguments off of the given
"IntrinsicCostAttributes" and then semi-recursively
calling back into getIntrinsicInstrCost() only to
then call getTypeBasedIntrinsicInstrCost(), so make
that call directly instead.
---
 llvm/include/llvm/CodeGen/BasicTTIImpl.h | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index f8a357a8479b15..b833130f034838 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1194,7 +1194,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
     case Intrinsic::vector_reduce_umax:
     case Intrinsic::vector_reduce_umin: {
       IntrinsicCostAttributes Attrs(IID, RetTy, Args[0]->getType(), FMF, 1, I);
-      return getIntrinsicInstrCost(Attrs, CostKind);
+      return getTypeBasedIntrinsicInstrCost(Attrs, CostKind);
     }
     case Intrinsic::fshl:
     case Intrinsic::fshr: {
@@ -1260,9 +1260,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
       ScalarizationCost += getOperandsScalarizationOverhead(Args, VF);
     }
 
-    IntrinsicCostAttributes Attrs(IID, RetTy, Types, FMF,
-                                  ScalarizationCost, I);
-    return thisT()->getIntrinsicInstrCost(Attrs, CostKind);
+    IntrinsicCostAttributes Attrs(IID, RetTy, Types, FMF, ScalarizationCost, I);
+    return thisT()->getTypeBasedIntrinsicInstrCost(Attrs, CostKind);
   }
 
   /// Get intrinsic cost based on argument types.

From ac73cafac0e523879b42b305106cd6e67bfb412e Mon Sep 17 00:00:00 2001
From: Erich Keane <erich.keane@intel.com>
Date: Mon, 12 Oct 2020 13:56:27 -0700
Subject: [PATCH 043/123] Ensure TreeTransform considers ParmVarDecls as
 transformed Decls

See PR47804:

TreeTransform uses TransformedLocalDecls as a map of declarations that
have been transformed already. When doing a "TransformDecl", which
happens in the cases of updating a DeclRefExpr's target, the default
implementation simply returns the already transformed declaration.

However, this was not including ParmVarDecls. SO, any use of
TreeTransform that didn't re-implement TransformDecl would NOT properly
update the target of a DeclRefExpr, resulting in odd behavior.

In the case of Typo-recovery, the result was that a lambda that used its
own parameter would cause an error, since it thought that the
ParmVarDecl referenced was a different lambda. Additionally, this caused
a problem in the AST (a declrefexpr into another scope) such that a
future instantiation would cause an assertion.

This patch ensures that the ParmVarDecl transforming process records
into TransformedLocalDecls so that the DeclRefExpr is ALSO updated.
---
 clang/lib/Sema/TreeTransform.h |  1 +
 clang/test/SemaCXX/pr47804.cpp | 18 ++++++++++++++++++
 2 files changed, 19 insertions(+)
 create mode 100644 clang/test/SemaCXX/pr47804.cpp

diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index 8439e72025b8bd..9d519616856bb1 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -5479,6 +5479,7 @@ ParmVarDecl *TreeTransform<Derived>::TransformFunctionTypeParam(
                                              /* DefArg */ nullptr);
   newParm->setScopeInfo(OldParm->getFunctionScopeDepth(),
                         OldParm->getFunctionScopeIndex() + indexAdjustment);
+  transformedLocalDecl(OldParm, {newParm});
   return newParm;
 }
 
diff --git a/clang/test/SemaCXX/pr47804.cpp b/clang/test/SemaCXX/pr47804.cpp
new file mode 100644
index 00000000000000..3ac1de553ffc5e
--- /dev/null
+++ b/clang/test/SemaCXX/pr47804.cpp
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 -std=c++11 -fsyntax-only %s -verify
+
+template <class InputIt, class Pred>
+bool all_of(InputIt first, Pred p);
+
+template <typename T> void load_test() {
+  // Ensure that this doesn't crash during CorrectDelayedTyposInExpr,
+  // or any other use of TreeTransform that doesn't implement TransformDecl
+  // separately.  Also, this should only error on 'output', not that 'x' is not
+  // captured.
+  // expected-error@+1 {{use of undeclared identifier 'output'}}
+  all_of(output, [](T x) { return x; });
+}
+
+int main() {
+  load_test<int>();
+  return 0;
+}

From 69feac12d0539a7cc19cbda906d46f67029486e1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Duncan=20P=2E=20N=2E=20Exon=C2=A0Smith?=
 <dexonsmith@apple.com>
Date: Thu, 8 Oct 2020 18:27:47 -0400
Subject: [PATCH 044/123] Lex: Avoid MemoryBuffer* key in
 ExcludedPreprocessorDirectiveSkipMapping, NFC

This is a prep patch for changing SourceManager to return
`Optional<MemoryBufferRef>` instead of `MemoryBuffer`. With that change the
address of the MemoryBuffer will be gone, so instead use the start of the
buffer as the key for this map.

No functionality change intended, as it's expected that the pointer identity
matches between the buffers and the buffer data.

Radar-Id: rdar://70139990
Differential Revision: https://reviews.llvm.org/D89136
---
 .../PreprocessorExcludedConditionalDirectiveSkipMapping.h    | 3 +--
 clang/lib/Lex/PPDirectives.cpp                               | 5 ++++-
 .../DependencyScanning/DependencyScanningFilesystem.cpp      | 2 +-
 3 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/clang/include/clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h b/clang/include/clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h
index 893b7ba7a9f5ad..1a0d5ed57b28e1 100644
--- a/clang/include/clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h
+++ b/clang/include/clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h
@@ -23,8 +23,7 @@ using PreprocessorSkippedRangeMapping = llvm::DenseMap<unsigned, unsigned>;
 /// The datastructure that holds the mapping between the active memory buffers
 /// and the individual skip mappings.
 using ExcludedPreprocessorDirectiveSkipMapping =
-    llvm::DenseMap<const llvm::MemoryBuffer *,
-                   const PreprocessorSkippedRangeMapping *>;
+    llvm::DenseMap<const char *, const PreprocessorSkippedRangeMapping *>;
 
 } // end namespace clang
 
diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp
index e4b901a950ae02..57349d4a439d14 100644
--- a/clang/lib/Lex/PPDirectives.cpp
+++ b/clang/lib/Lex/PPDirectives.cpp
@@ -380,7 +380,10 @@ Optional<unsigned> Preprocessor::getSkippedRangeForExcludedConditionalBlock(
   std::pair<FileID, unsigned> HashFileOffset =
       SourceMgr.getDecomposedLoc(HashLoc);
   const llvm::MemoryBuffer *Buf = SourceMgr.getBuffer(HashFileOffset.first);
-  auto It = ExcludedConditionalDirectiveSkipMappings->find(Buf);
+  if (!Buf)
+    return None;
+  auto It =
+      ExcludedConditionalDirectiveSkipMappings->find(Buf->getBufferStart());
   if (It == ExcludedConditionalDirectiveSkipMappings->end())
     return None;
 
diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
index 63eab82820cc0b..1c10b7d727a583 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
@@ -252,7 +252,7 @@ llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> MinimizedVFSFile::create(
                                        /*RequiresNullTerminator=*/false),
       *Entry->getStatus());
   if (!Entry->getPPSkippedRangeMapping().empty() && PPSkipMappings)
-    (*PPSkipMappings)[Result->Buffer.get()] =
+    (*PPSkipMappings)[Result->Buffer->getBufferStart()] =
         &Entry->getPPSkippedRangeMapping();
   return llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>(
       std::unique_ptr<llvm::vfs::File>(std::move(Result)));

From a324d8f964bf421fa7d8b882b0f64ead28c4149c Mon Sep 17 00:00:00 2001
From: Eli Friedman <efriedma@quicinc.com>
Date: Mon, 12 Oct 2020 14:38:44 -0700
Subject: [PATCH 045/123] [AArch64] Add tests for 128-bit shift variations.

It looks like there's still some room for optimization; the funnel shift
lowering is more efficient than the lowering of actual shifts.
---
 llvm/test/CodeGen/AArch64/arm64-long-shift.ll | 128 ++++++++++++------
 llvm/test/CodeGen/AArch64/funnel-shift.ll     |  26 ++++
 2 files changed, 115 insertions(+), 39 deletions(-)

diff --git a/llvm/test/CodeGen/AArch64/arm64-long-shift.ll b/llvm/test/CodeGen/AArch64/arm64-long-shift.ll
index 39436d6dd34db0..a3588c306aa36b 100644
--- a/llvm/test/CodeGen/AArch64/arm64-long-shift.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-long-shift.ll
@@ -1,58 +1,108 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=arm64-eabi -mcpu=cyclone | FileCheck %s
 
 define i128 @shl(i128 %r, i128 %s) nounwind readnone {
 ; CHECK-LABEL: shl:
-; CHECK: neg [[REV_SHIFT:x[0-9]+]], x2
-; CHECK: lsr  [[LO_FOR_HI_NORMAL:x[0-9]+]], x0, [[REV_SHIFT]]
-; CHECK: cmp x2, #0
-; CHECK: csel [[LO_FOR_HI:x[0-9]+]], xzr, [[LO_FOR_HI_NORMAL]], eq
-; CHECK: lsl  [[HI_FOR_HI:x[0-9]+]], x1, x2
-; CHECK: orr [[HI_NORMAL:x[0-9]+]], [[LO_FOR_HI]], [[HI_FOR_HI]]
-; CHECK: lsl  [[HI_BIG_SHIFT:x[0-9]+]], x0, x2
-; CHECK: sub [[EXTRA_SHIFT:x[0-9]+]], x2, #64
-; CHECK: cmp   [[EXTRA_SHIFT]], #0
-; CHECK: csel  x1, [[HI_BIG_SHIFT]], [[HI_NORMAL]], ge
-; CHECK: csel  x0, xzr, [[HI_BIG_SHIFT]], ge
-; CHECK: ret
-
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    neg x8, x2
+; CHECK-NEXT:    lsr x8, x0, x8
+; CHECK-NEXT:    cmp x2, #0 // =0
+; CHECK-NEXT:    csel x8, xzr, x8, eq
+; CHECK-NEXT:    lsl x9, x1, x2
+; CHECK-NEXT:    orr x8, x8, x9
+; CHECK-NEXT:    lsl x9, x0, x2
+; CHECK-NEXT:    sub x10, x2, #64 // =64
+; CHECK-NEXT:    cmp x10, #0 // =0
+; CHECK-NEXT:    csel x1, x9, x8, ge
+; CHECK-NEXT:    csel x0, xzr, x9, ge
+; CHECK-NEXT:    ret
   %shl = shl i128 %r, %s
   ret i128 %shl
 }
 
+define i128 @shl_mask(i128 %r, i128 %s) nounwind readnone {
+; CHECK-LABEL: shl_mask:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lsl x8, x1, x2
+; CHECK-NEXT:    lsr x9, x0, #1
+; CHECK-NEXT:    and x10, x2, #0x3f
+; CHECK-NEXT:    eor x10, x10, #0x3f
+; CHECK-NEXT:    lsr x9, x9, x10
+; CHECK-NEXT:    orr x1, x8, x9
+; CHECK-NEXT:    lsl x0, x0, x2
+; CHECK-NEXT:    ret
+  %mask = and i128 %s, 63
+  %shl = shl i128 %r, %mask
+  ret i128 %shl
+}
+
 define i128 @ashr(i128 %r, i128 %s) nounwind readnone {
 ; CHECK-LABEL: ashr:
-; CHECK: neg [[REV_SHIFT:x[0-9]+]], x2
-; CHECK: lsl  [[HI_FOR_LO_NORMAL:x[0-9]+]], x1, [[REV_SHIFT]]
-; CHECK: cmp x2, #0
-; CHECK: csel [[HI_FOR_LO:x[0-9]+]], xzr, [[HI_FOR_LO_NORMAL]], eq
-; CHECK: lsr  [[LO_FOR_LO:x[0-9]+]], x0, x2
-; CHECK: orr [[LO_NORMAL:x[0-9]+]], [[LO_FOR_LO]], [[HI_FOR_LO]]
-; CHECK: asr  [[LO_BIG_SHIFT:x[0-9]+]], x1, x2
-; CHECK: sub [[EXTRA_SHIFT:x[0-9]+]], x2, #64
-; CHECK: cmp   [[EXTRA_SHIFT]], #0
-; CHECK: csel  x0, [[LO_BIG_SHIFT]], [[LO_NORMAL]], ge
-; CHECK: asr [[BIGSHIFT_HI:x[0-9]+]], x1, #63
-; CHECK: csel x1, [[BIGSHIFT_HI]], [[LO_BIG_SHIFT]], ge
-; CHECK: ret
-
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    neg x8, x2
+; CHECK-NEXT:    lsl x8, x1, x8
+; CHECK-NEXT:    cmp x2, #0 // =0
+; CHECK-NEXT:    csel x8, xzr, x8, eq
+; CHECK-NEXT:    lsr x9, x0, x2
+; CHECK-NEXT:    orr x8, x9, x8
+; CHECK-NEXT:    asr x9, x1, x2
+; CHECK-NEXT:    sub x10, x2, #64 // =64
+; CHECK-NEXT:    cmp x10, #0 // =0
+; CHECK-NEXT:    csel x0, x9, x8, ge
+; CHECK-NEXT:    asr x8, x1, #63
+; CHECK-NEXT:    csel x1, x8, x9, ge
+; CHECK-NEXT:    ret
   %shr = ashr i128 %r, %s
   ret i128 %shr
 }
 
+define i128 @ashr_mask(i128 %r, i128 %s) nounwind readnone {
+; CHECK-LABEL: ashr_mask:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lsr x8, x0, x2
+; CHECK-NEXT:    lsl x9, x1, #1
+; CHECK-NEXT:    and x10, x2, #0x3f
+; CHECK-NEXT:    eor x10, x10, #0x3f
+; CHECK-NEXT:    lsl x9, x9, x10
+; CHECK-NEXT:    orr x0, x8, x9
+; CHECK-NEXT:    asr x1, x1, x2
+; CHECK-NEXT:    ret
+  %mask = and i128 %s, 63
+  %shr = ashr i128 %r, %mask
+  ret i128 %shr
+}
+
 define i128 @lshr(i128 %r, i128 %s) nounwind readnone {
 ; CHECK-LABEL: lshr:
-; CHECK: neg [[REV_SHIFT:x[0-9]+]], x2
-; CHECK: lsl  [[HI_FOR_LO_NORMAL:x[0-9]+]], x1, [[REV_SHIFT]]
-; CHECK: cmp x2, #0
-; CHECK: csel [[HI_FOR_LO:x[0-9]+]], xzr, [[HI_FOR_LO_NORMAL]], eq
-; CHECK: lsr  [[LO_FOR_LO:x[0-9]+]], x0, x2
-; CHECK: orr [[LO_NORMAL:x[0-9]+]], [[LO_FOR_LO]], [[HI_FOR_LO]]
-; CHECK: lsr  [[LO_BIG_SHIFT:x[0-9]+]], x1, x2
-; CHECK: cmp   [[EXTRA_SHIFT]], #0
-; CHECK: csel  x0, [[LO_BIG_SHIFT]], [[LO_NORMAL]], ge
-; CHECK: csel x1, xzr, [[LO_BIG_SHIFT]], ge
-; CHECK: ret
-
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    neg x8, x2
+; CHECK-NEXT:    lsl x8, x1, x8
+; CHECK-NEXT:    cmp x2, #0 // =0
+; CHECK-NEXT:    csel x8, xzr, x8, eq
+; CHECK-NEXT:    lsr x9, x0, x2
+; CHECK-NEXT:    orr x8, x9, x8
+; CHECK-NEXT:    lsr x9, x1, x2
+; CHECK-NEXT:    sub x10, x2, #64 // =64
+; CHECK-NEXT:    cmp x10, #0 // =0
+; CHECK-NEXT:    csel x0, x9, x8, ge
+; CHECK-NEXT:    csel x1, xzr, x9, ge
+; CHECK-NEXT:    ret
   %shr = lshr i128 %r, %s
   ret i128 %shr
 }
+
+define i128 @lshr_mask(i128 %r, i128 %s) nounwind readnone {
+; CHECK-LABEL: lshr_mask:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lsr x8, x0, x2
+; CHECK-NEXT:    lsl x9, x1, #1
+; CHECK-NEXT:    and x10, x2, #0x3f
+; CHECK-NEXT:    eor x10, x10, #0x3f
+; CHECK-NEXT:    lsl x9, x9, x10
+; CHECK-NEXT:    orr x0, x8, x9
+; CHECK-NEXT:    lsr x1, x1, x2
+; CHECK-NEXT:    ret
+  %mask = and i128 %s, 63
+  %shr = lshr i128 %r, %mask
+  ret i128 %shr
+}
diff --git a/llvm/test/CodeGen/AArch64/funnel-shift.ll b/llvm/test/CodeGen/AArch64/funnel-shift.ll
index 011cbf476c387d..18a192084fdfc7 100644
--- a/llvm/test/CodeGen/AArch64/funnel-shift.ll
+++ b/llvm/test/CodeGen/AArch64/funnel-shift.ll
@@ -29,6 +29,19 @@ define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) {
   ret i32 %f
 }
 
+define i64 @fshl_i64(i64 %x, i64 %y, i64 %z) {
+; CHECK-LABEL: fshl_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mvn w9, w2
+; CHECK-NEXT:    lsr x10, x1, #1
+; CHECK-NEXT:    lsl x8, x0, x2
+; CHECK-NEXT:    lsr x9, x10, x9
+; CHECK-NEXT:    orr x0, x8, x9
+; CHECK-NEXT:    ret
+  %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 %z)
+  ret i64 %f
+}
+
 ; Verify that weird types are minimally supported.
 declare i37 @llvm.fshl.i37(i37, i37, i37)
 define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) {
@@ -153,6 +166,19 @@ define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) {
   ret i32 %f
 }
 
+define i64 @fshr_i64(i64 %x, i64 %y, i64 %z) {
+; CHECK-LABEL: fshr_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mvn w9, w2
+; CHECK-NEXT:    lsl x10, x0, #1
+; CHECK-NEXT:    lsr x8, x1, x2
+; CHECK-NEXT:    lsl x9, x10, x9
+; CHECK-NEXT:    orr x0, x9, x8
+; CHECK-NEXT:    ret
+  %f = call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 %z)
+  ret i64 %f
+}
+
 ; Verify that weird types are minimally supported.
 declare i37 @llvm.fshr.i37(i37, i37, i37)
 define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) {

From 75ae846de69cccd6ed66357f3ee3ad3301849d95 Mon Sep 17 00:00:00 2001
From: Stella Laurenzo <stellaraccident@gmail.com>
Date: Fri, 9 Oct 2020 15:50:07 -0700
Subject: [PATCH 046/123] [mlir] Make Python bindings installable.

* Links against libMLIR.so if the project is built for DYLIBs.
* Puts things in the right place in build and install time python/ trees so that RPaths line up.
* Adds install actions to install both the extension and sources.
* Copies py source files to the build directory to match (consistent layout between build/install time and one place to point a PYTHONPATH for tests and interactive use).
* Finally, "import mlir" from an installed LLVM just works.

Differential Revision: https://reviews.llvm.org/D89167
---
 mlir/lib/Bindings/Python/CMakeLists.txt   | 107 +++++++++++++++++++---
 mlir/lib/Bindings/Python/unix_version.lds |   4 -
 mlir/test/lit.cfg.py                      |  11 ++-
 3 files changed, 103 insertions(+), 19 deletions(-)
 delete mode 100644 mlir/lib/Bindings/Python/unix_version.lds

diff --git a/mlir/lib/Bindings/Python/CMakeLists.txt b/mlir/lib/Bindings/Python/CMakeLists.txt
index 0f03445c27111a..f7b04fff7f63dd 100644
--- a/mlir/lib/Bindings/Python/CMakeLists.txt
+++ b/mlir/lib/Bindings/Python/CMakeLists.txt
@@ -1,3 +1,30 @@
+################################################################################
+# Copy python source tree.
+################################################################################
+
+set(PY_SRC_FILES
+  mlir/__init__.py
+)
+
+add_custom_target(MLIRBindingsPythonSources ALL
+  DEPENDS ${PY_SRC_FILES}
+)
+
+foreach(PY_SRC_FILE ${PY_SRC_FILES})
+  set(PY_DEST_FILE "${PROJECT_BINARY_DIR}/python/${PY_SRC_FILE}")
+  add_custom_command(
+    TARGET MLIRBindingsPythonSources PRE_BUILD
+    COMMENT "Copying python source ${PY_SRC_FILE} -> ${PY_DEST_FILE}"
+    DEPENDS "${PY_SRC_FILE}"
+    COMMAND "${CMAKE_COMMAND}" -E copy_if_different
+        "${CMAKE_CURRENT_SOURCE_DIR}/${PY_SRC_FILE}" "${PY_DEST_FILE}"
+  )
+endforeach()
+
+################################################################################
+# Build python extension
+################################################################################
+
 # Normally on unix-like platforms, extensions are built as "MODULE" libraries
 # and do not explicitly link to the python shared object. This allows for
 # some greater deployment flexibility since the extension will bind to
@@ -5,13 +32,15 @@
 # linker from erroring on undefined symbols, leaving this to (usually obtuse)
 # runtime errors. Building in "SHARED" mode with an explicit link to the
 # python libraries allows us to build with the expectation of no undefined
-# symbols, which is better for development.
-if(MLIR_PYTHON_BINDINGS_VERSION_LOCKED)
-  set(PYEXT_LINK_MODE SHARED)
-  set(PYEXT_LIBADD ${PYTHON_LIBRARIES})
-else()
+# symbols, which is better for development. Note that not all python
+# configurations provide build-time libraries to link against, in which
+# case, we fall back to MODULE linking.
+if(PYTHON_LIBRARIES STREQUAL "" OR NOT MLIR_PYTHON_BINDINGS_VERSION_LOCKED)
   set(PYEXT_LINK_MODE MODULE)
   set(PYEXT_LIBADD)
+else()
+  set(PYEXT_LINK_MODE SHARED)
+  set(PYEXT_LIBADD ${PYTHON_LIBRARIES})
 endif()
 
 # The actual extension library produces a shared-object or DLL and has
@@ -47,7 +76,14 @@ target_compile_options(MLIRBindingsPythonExtension PRIVATE
 # Configure the output to match python expectations.
 set_target_properties(
   MLIRBindingsPythonExtension PROPERTIES
-    LIBRARY_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
+    # Build-time RPath layouts require to be a directory one up from the
+    # binary root.
+    # TODO: Don't reference the LLVM_BINARY_DIR here: the invariant is that
+    # the output directory must be at the same level of the lib directory
+    # where libMLIR.so is installed. This is presently not optimal from a
+    # project separation perspective and a discussion on how to better
+    # segment MLIR libraries needs to happen.
+    LIBRARY_OUTPUT_DIRECTORY ${LLVM_BINARY_DIR}/python
     OUTPUT_NAME "_mlir"
     PREFIX "${PYTHON_MODULE_PREFIX}"
     SUFFIX "${PYTHON_MODULE_SUFFIX}${PYTHON_MODULE_EXTENSION}"
@@ -61,15 +97,62 @@ set_target_properties(
 # TODO: Add a Windows .def file and figure out the right thing to do on MacOS.
 set_target_properties(
   MLIRBindingsPythonExtension PROPERTIES CXX_VISIBILITY_PRESET "hidden")
-if(NOT MSVC AND NOT APPLE)
-  set_target_properties(MLIRBindingsPythonExtension
-    PROPERTIES LINK_FLAGS
-    "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/unix_version.lds")
+
+set(PYEXT_DEPS)
+if(LLVM_BUILD_LLVM_DYLIB)
+  list(APPEND PYEXT_DEPS
+    # Depend on libMLIR.so first so that deps primarily come from the shared
+    # library.
+    MLIR
+  )
 endif()
 
-target_link_libraries(MLIRBindingsPythonExtension
-  PRIVATE
+# Full static dependencies are also added and will augment what is in the
+# shared lib if needed (or in fully static builds, will result in mondo-built
+# extension).
+list(APPEND PYEXT_DEPS
+  # Depend only on the MLIR C-API.
   MLIRCAPIIR
   MLIRCAPIRegistration
+)
+
+target_link_libraries(MLIRBindingsPythonExtension
+  PRIVATE
+  ${PYEXT_DEPS}
   ${PYEXT_LIBADD}
 )
+
+add_dependencies(MLIRBindingsPythonExtension MLIRBindingsPythonSources)
+llvm_setup_rpath(MLIRBindingsPythonExtension)
+
+################################################################################
+# Install
+################################################################################
+
+install(TARGETS MLIRBindingsPythonExtension
+  COMPONENT MLIRBindingsPythonExtension
+  LIBRARY DESTINATION python
+  ARCHIVE DESTINATION python
+  # NOTE: Even on DLL-platforms, extensions go in the lib directory tree.
+  RUNTIME DESTINATION python)
+
+# Note that we copy from the source tree just like for headers because
+# it will not be polluted with py_cache runtime artifacts (from testing and
+# such).
+install(
+  DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/mlir
+  DESTINATION python
+  COMPONENT MLIRBindingsPythonSources
+  FILES_MATCHING PATTERN "*.py"
+)
+
+if (NOT LLVM_ENABLE_IDE)
+  add_llvm_install_targets(
+    install-MLIRBindingsPythonExtension
+    DEPENDS MLIRBindingsPythonExtension
+    COMPONENT MLIRBindingsPythonExtension)
+  add_llvm_install_targets(
+    install-MLIRBindingsPythonSources
+    DEPENDS MLIRBindingsPythonSources
+    COMPONENT MLIRBindingsPythonSources)
+endif()
diff --git a/mlir/lib/Bindings/Python/unix_version.lds b/mlir/lib/Bindings/Python/unix_version.lds
deleted file mode 100644
index b1e59a76229bb3..00000000000000
--- a/mlir/lib/Bindings/Python/unix_version.lds
+++ /dev/null
@@ -1,4 +0,0 @@
-{
-  global: PyInit__mlir;
-  local: *;
-};
diff --git a/mlir/test/lit.cfg.py b/mlir/test/lit.cfg.py
index 04dfaf449c8975..67ca6692d10a3f 100644
--- a/mlir/test/lit.cfg.py
+++ b/mlir/test/lit.cfg.py
@@ -64,7 +64,7 @@
 
 # The following tools are optional
 tools.extend([
-    ToolSubst('%PYTHON', config.python_executable),
+    ToolSubst('%PYTHON', config.python_executable, unresolved='ignore'),
     ToolSubst('toy-ch1', unresolved='ignore'),
     ToolSubst('toy-ch2', unresolved='ignore'),
     ToolSubst('toy-ch3', unresolved='ignore'),
@@ -99,6 +99,11 @@
 # by copying/linking sources to build.
 if config.enable_bindings_python:
     llvm_config.with_environment('PYTHONPATH', [
-        os.path.join(config.mlir_src_root, "lib", "Bindings", "Python"),
-        os.path.join(config.mlir_obj_root, "lib", "Bindings", "Python"),
+        # TODO: Don't reference the llvm_obj_root here: the invariant is that
+        # the python/ must be at the same level of the lib directory
+        # where libMLIR.so is installed. This is presently not optimal from a
+        # project separation perspective and a discussion on how to better
+        # segment MLIR libraries needs to happen. See also
+        # lib/Bindings/Python/CMakeLists.txt for where this is set up.
+        os.path.join(config.llvm_obj_root, 'python'),
     ], append_path=True)

From d80ecdf27faf2c45a4264064ddfd5c4524dadce4 Mon Sep 17 00:00:00 2001
From: Xun Li <xun@fb.com>
Date: Mon, 12 Oct 2020 15:28:50 -0700
Subject: [PATCH 047/123] [Coroutine] Rename coro-semmetric-transfer.cpp and
 possibly fix test failure

Some tests start to fail after https://reviews.llvm.org/D89066.
It's because the size of pointers are different on different targets.
Limit the target in the command so there is no confusion.
Also noticed I had typo in the test name.
Adding disable-llvm-passes option to make the test more stable as well.

Differential Revision: https://reviews.llvm.org/D89269
---
 ...tric-transfer.cpp => coro-symmetric-transfer.cpp} | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)
 rename clang/test/CodeGenCoroutines/{coro-semmetric-transfer.cpp => coro-symmetric-transfer.cpp} (58%)

diff --git a/clang/test/CodeGenCoroutines/coro-semmetric-transfer.cpp b/clang/test/CodeGenCoroutines/coro-symmetric-transfer.cpp
similarity index 58%
rename from clang/test/CodeGenCoroutines/coro-semmetric-transfer.cpp
rename to clang/test/CodeGenCoroutines/coro-symmetric-transfer.cpp
index 9833f14b273d7d..4f841a918bcfe5 100644
--- a/clang/test/CodeGenCoroutines/coro-semmetric-transfer.cpp
+++ b/clang/test/CodeGenCoroutines/coro-symmetric-transfer.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang -std=c++14 -fcoroutines-ts -emit-llvm -S -O1 %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fcoroutines-ts -std=c++14 -O1 -emit-llvm %s -o - -disable-llvm-passes | FileCheck %s
 
 #include "Inputs/coroutine.h"
 
@@ -48,6 +48,10 @@ detached_task foo() {
   co_return;
 }
 
-// check that the lifetime of the coroutine handle used to obtain the address ended right away.
-// CHECK:       %{{.*}} = call i8* @{{.*address.*}}(%"struct.std::experimental::coroutines_v1::coroutine_handle.0"* nonnull %{{.*}})
-// CHECK-NEXT:  call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %{{.*}})
+// check that the lifetime of the coroutine handle used to obtain the address is contained within single basic block.
+// CHECK-LABEL: final.suspend:
+// CHECK:         %[[PTR1:.+]] = bitcast %"struct.std::experimental::coroutines_v1::coroutine_handle.0"* %[[ADDR_TMP:.+]] to i8*
+// CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 8, i8* %[[PTR1]])
+// CHECK:         call i8* @{{.*address.*}}(%"struct.std::experimental::coroutines_v1::coroutine_handle.0"* %[[ADDR_TMP]])
+// CHECK-NEXT:    %[[PTR2:.+]] = bitcast %"struct.std::experimental::coroutines_v1::coroutine_handle.0"* %[[ADDR_TMP]] to i8*
+// CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 8, i8* %[[PTR2]])

From a184c758b77dd6849eab2d46baa6a719780dd539 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Mon, 12 Oct 2020 14:49:27 -0700
Subject: [PATCH 048/123] [BitCodeAnalyzer] Add a few missing TYPE_CODES and
 MODULE_CODE_COMDAT to GetCodeName

Happened to notice some of these printing as UnknownCode while running llvm-bcanalyzer on a bc file I had.

Differential Revision: https://reviews.llvm.org/D86900
---
 llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp |  5 +++++
 llvm/test/Bitcode/bcanalyzer-comdat.ll      |  5 +++++
 llvm/test/Bitcode/bcanalyzer-types.ll       | 25 +++++++++++++++++++++
 3 files changed, 35 insertions(+)
 create mode 100644 llvm/test/Bitcode/bcanalyzer-comdat.ll
 create mode 100644 llvm/test/Bitcode/bcanalyzer-types.ll

diff --git a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
index 2ce064c7685a7b..e91af121ea0801 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
@@ -135,6 +135,7 @@ static Optional<const char *> GetCodeName(unsigned CodeID, unsigned BlockID,
       STRINGIFY_CODE(MODULE_CODE, FUNCTION)
       STRINGIFY_CODE(MODULE_CODE, ALIAS)
       STRINGIFY_CODE(MODULE_CODE, GCNAME)
+      STRINGIFY_CODE(MODULE_CODE, COMDAT)
       STRINGIFY_CODE(MODULE_CODE, VSTOFFSET)
       STRINGIFY_CODE(MODULE_CODE, METADATA_VALUES_UNUSED)
       STRINGIFY_CODE(MODULE_CODE, SOURCE_FILENAME)
@@ -176,16 +177,20 @@ static Optional<const char *> GetCodeName(unsigned CodeID, unsigned BlockID,
       STRINGIFY_CODE(TYPE_CODE, OPAQUE)
       STRINGIFY_CODE(TYPE_CODE, INTEGER)
       STRINGIFY_CODE(TYPE_CODE, POINTER)
+      STRINGIFY_CODE(TYPE_CODE, HALF)
       STRINGIFY_CODE(TYPE_CODE, ARRAY)
       STRINGIFY_CODE(TYPE_CODE, VECTOR)
       STRINGIFY_CODE(TYPE_CODE, X86_FP80)
       STRINGIFY_CODE(TYPE_CODE, FP128)
       STRINGIFY_CODE(TYPE_CODE, PPC_FP128)
       STRINGIFY_CODE(TYPE_CODE, METADATA)
+      STRINGIFY_CODE(TYPE_CODE, X86_MMX)
       STRINGIFY_CODE(TYPE_CODE, STRUCT_ANON)
       STRINGIFY_CODE(TYPE_CODE, STRUCT_NAME)
       STRINGIFY_CODE(TYPE_CODE, STRUCT_NAMED)
       STRINGIFY_CODE(TYPE_CODE, FUNCTION)
+      STRINGIFY_CODE(TYPE_CODE, TOKEN)
+      STRINGIFY_CODE(TYPE_CODE, BFLOAT)
     }
 
   case bitc::CONSTANTS_BLOCK_ID:
diff --git a/llvm/test/Bitcode/bcanalyzer-comdat.ll b/llvm/test/Bitcode/bcanalyzer-comdat.ll
new file mode 100644
index 00000000000000..931696ecd559eb
--- /dev/null
+++ b/llvm/test/Bitcode/bcanalyzer-comdat.ll
@@ -0,0 +1,5 @@
+; RUN: llvm-as < %s | llvm-bcanalyzer -dump | FileCheck %s
+
+; CHECK: <COMDAT
+$comdat.any = comdat any
+@comdat.any = global i32 0, comdat
diff --git a/llvm/test/Bitcode/bcanalyzer-types.ll b/llvm/test/Bitcode/bcanalyzer-types.ll
new file mode 100644
index 00000000000000..cbe6f5d22c9479
--- /dev/null
+++ b/llvm/test/Bitcode/bcanalyzer-types.ll
@@ -0,0 +1,25 @@
+; RUN: llvm-as < %s | llvm-bcanalyzer -dump | FileCheck %s
+
+; CHECK: Block ID {{.*}} (TYPE_BLOCK_ID)
+; CHECK: BFLOAT
+; CHECK: TOKEN
+; CHECK: X86_MMX
+; CHECK: HALF
+; CHECK: Block ID
+
+define half @test_half(half %x, half %y) {
+  %a = fadd half %x, %y
+  ret half %a
+}
+
+define x86_mmx @test_mmx(<2 x i32> %x) {
+  %a = bitcast <2 x i32> %x to x86_mmx
+  ret x86_mmx %a
+}
+
+define bfloat @test_bfloat(i16 %x) {
+  %a = bitcast i16 %x to bfloat
+  ret bfloat %a
+}
+
+declare void @llvm.token(token)

From e465ddac880228b879b5a6549adbb9c8a7f335bf Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Mon, 12 Oct 2020 15:22:53 -0700
Subject: [PATCH 049/123] [lldb] Alphabetically sort test categories (NFC)

---
 .../Python/lldbsuite/test/test_categories.py  | 20 +++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/lldb/packages/Python/lldbsuite/test/test_categories.py b/lldb/packages/Python/lldbsuite/test/test_categories.py
index 177c50ee17cfa8..dcfef5be04acfb 100644
--- a/lldb/packages/Python/lldbsuite/test/test_categories.py
+++ b/lldb/packages/Python/lldbsuite/test/test_categories.py
@@ -19,26 +19,26 @@
 ]
 
 all_categories = {
+    'basic_process': 'Basic process execution sniff tests.',
+    'cmdline': 'Tests related to the LLDB command-line interface',
+    'darwin-log': 'Darwin log tests',
     'dataformatters': 'Tests related to the type command and the data formatters subsystem',
+    'dsym': 'Tests that can be run with DSYM debug information',
     'dwarf': 'Tests that can be run with DWARF debug information',
     'dwo': 'Tests that can be run with DWO debug information',
-    'dsym': 'Tests that can be run with DSYM debug information',
-    'gmodules': 'Tests that can be run with -gmodules debug information',
+    'dyntype': 'Tests related to dynamic type support',
     'expression': 'Tests related to the expression parser',
+    'flakey': 'Flakey test cases, i.e. tests that do not reliably pass at each execution',
+    'gmodules': 'Tests that can be run with -gmodules debug information',
     'libc++': 'Test for libc++ data formatters',
     'libstdcxx': 'Test for libstdcxx data formatters',
+    'lldb-server': 'Tests related to lldb-server',
+    'lldb-vscode': 'Visual Studio Code debug adaptor tests',
     'objc': 'Tests related to the Objective-C programming language support',
     'pyapi': 'Tests related to the Python API',
-    'basic_process': 'Basic process execution sniff tests.',
-    'cmdline': 'Tests related to the LLDB command-line interface',
-    'dyntype': 'Tests related to dynamic type support',
-    'stresstest': 'Tests related to stressing lldb limits',
-    'flakey': 'Flakey test cases, i.e. tests that do not reliably pass at each execution',
-    'darwin-log': 'Darwin log tests',
     'std-module': 'Tests related to importing the std module',
+    'stresstest': 'Tests related to stressing lldb limits',
     'watchpoint': 'Watchpoint-related tests',
-    'lldb-vscode': 'Visual Studio Code debug adaptor tests',
-    'lldb-server': 'Tests related to lldb-server',
 }
 
 

From 360ab009e2b95629388cef132ebe639c120ed35e Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Mon, 12 Oct 2020 15:27:37 -0700
Subject: [PATCH 050/123] [lldb] Add instrumentation runtime category

---
 lldb/packages/Python/lldbsuite/test/test_categories.py | 1 +
 lldb/test/API/functionalities/asan/.categories         | 1 +
 lldb/test/API/functionalities/mtc/.categories          | 1 +
 lldb/test/API/functionalities/tsan/.categories         | 1 +
 lldb/test/API/functionalities/ubsan/.categories        | 1 +
 5 files changed, 5 insertions(+)
 create mode 100644 lldb/test/API/functionalities/asan/.categories
 create mode 100644 lldb/test/API/functionalities/mtc/.categories
 create mode 100644 lldb/test/API/functionalities/tsan/.categories
 create mode 100644 lldb/test/API/functionalities/ubsan/.categories

diff --git a/lldb/packages/Python/lldbsuite/test/test_categories.py b/lldb/packages/Python/lldbsuite/test/test_categories.py
index dcfef5be04acfb..699fcf4cb88702 100644
--- a/lldb/packages/Python/lldbsuite/test/test_categories.py
+++ b/lldb/packages/Python/lldbsuite/test/test_categories.py
@@ -30,6 +30,7 @@
     'expression': 'Tests related to the expression parser',
     'flakey': 'Flakey test cases, i.e. tests that do not reliably pass at each execution',
     'gmodules': 'Tests that can be run with -gmodules debug information',
+    'instrumentation-runtime': 'Tests for the instrumentation runtime plugins',
     'libc++': 'Test for libc++ data formatters',
     'libstdcxx': 'Test for libstdcxx data formatters',
     'lldb-server': 'Tests related to lldb-server',
diff --git a/lldb/test/API/functionalities/asan/.categories b/lldb/test/API/functionalities/asan/.categories
new file mode 100644
index 00000000000000..c756cb12419455
--- /dev/null
+++ b/lldb/test/API/functionalities/asan/.categories
@@ -0,0 +1 @@
+instrumentation-runtime
diff --git a/lldb/test/API/functionalities/mtc/.categories b/lldb/test/API/functionalities/mtc/.categories
new file mode 100644
index 00000000000000..c756cb12419455
--- /dev/null
+++ b/lldb/test/API/functionalities/mtc/.categories
@@ -0,0 +1 @@
+instrumentation-runtime
diff --git a/lldb/test/API/functionalities/tsan/.categories b/lldb/test/API/functionalities/tsan/.categories
new file mode 100644
index 00000000000000..c756cb12419455
--- /dev/null
+++ b/lldb/test/API/functionalities/tsan/.categories
@@ -0,0 +1 @@
+instrumentation-runtime
diff --git a/lldb/test/API/functionalities/ubsan/.categories b/lldb/test/API/functionalities/ubsan/.categories
new file mode 100644
index 00000000000000..c756cb12419455
--- /dev/null
+++ b/lldb/test/API/functionalities/ubsan/.categories
@@ -0,0 +1 @@
+instrumentation-runtime

From b764edc59ff7768e052bc2b9e76e3bb69dd5147b Mon Sep 17 00:00:00 2001
From: Nathan Ridge <zeratul976@hotmail.com>
Date: Tue, 22 Sep 2020 02:30:21 -0400
Subject: [PATCH 051/123] [clangd] Try harder to get accurate ranges for
 documentSymbols in macros

Fixes https://github.com/clangd/clangd/issues/500

Differential Revision: https://reviews.llvm.org/D88463
---
 clang-tools-extra/clangd/FindSymbols.cpp      | 36 ++++++++++++++-----
 .../clangd/unittests/FindSymbolsTests.cpp     | 18 +++++++---
 2 files changed, 41 insertions(+), 13 deletions(-)

diff --git a/clang-tools-extra/clangd/FindSymbols.cpp b/clang-tools-extra/clangd/FindSymbols.cpp
index 3169ffd980388b..abd03ecb0464aa 100644
--- a/clang-tools-extra/clangd/FindSymbols.cpp
+++ b/clang-tools-extra/clangd/FindSymbols.cpp
@@ -171,7 +171,6 @@ namespace {
 llvm::Optional<DocumentSymbol> declToSym(ASTContext &Ctx, const NamedDecl &ND) {
   auto &SM = Ctx.getSourceManager();
 
-  SourceLocation NameLoc = nameLocation(ND, SM);
   SourceLocation BeginLoc = SM.getSpellingLoc(SM.getFileLoc(ND.getBeginLoc()));
   SourceLocation EndLoc = SM.getSpellingLoc(SM.getFileLoc(ND.getEndLoc()));
   const auto SymbolRange =
@@ -179,10 +178,6 @@ llvm::Optional<DocumentSymbol> declToSym(ASTContext &Ctx, const NamedDecl &ND) {
   if (!SymbolRange)
     return llvm::None;
 
-  Position NameBegin = sourceLocToPosition(SM, NameLoc);
-  Position NameEnd = sourceLocToPosition(
-      SM, Lexer::getLocForEndOfToken(NameLoc, 0, SM, Ctx.getLangOpts()));
-
   index::SymbolInfo SymInfo = index::getSymbolInfo(&ND);
   // FIXME: this is not classifying constructors, destructors and operators
   //        correctly (they're all "methods").
@@ -194,10 +189,35 @@ llvm::Optional<DocumentSymbol> declToSym(ASTContext &Ctx, const NamedDecl &ND) {
   SI.deprecated = ND.isDeprecated();
   SI.range = Range{sourceLocToPosition(SM, SymbolRange->getBegin()),
                    sourceLocToPosition(SM, SymbolRange->getEnd())};
-  SI.selectionRange = Range{NameBegin, NameEnd};
+
+  SourceLocation NameLoc = ND.getLocation();
+  SourceLocation FallbackNameLoc;
+  if (NameLoc.isMacroID()) {
+    if (isSpelledInSource(NameLoc, SM)) {
+      // Prefer the spelling loc, but save the expansion loc as a fallback.
+      FallbackNameLoc = SM.getExpansionLoc(NameLoc);
+      NameLoc = SM.getSpellingLoc(NameLoc);
+    } else {
+      NameLoc = SM.getExpansionLoc(NameLoc);
+    }
+  }
+  auto ComputeSelectionRange = [&](SourceLocation L) -> Range {
+    Position NameBegin = sourceLocToPosition(SM, L);
+    Position NameEnd = sourceLocToPosition(
+        SM, Lexer::getLocForEndOfToken(L, 0, SM, Ctx.getLangOpts()));
+    return Range{NameBegin, NameEnd};
+  };
+
+  SI.selectionRange = ComputeSelectionRange(NameLoc);
+  if (!SI.range.contains(SI.selectionRange) && FallbackNameLoc.isValid()) {
+    // 'selectionRange' must be contained in 'range'. In cases where clang
+    // reports unrelated ranges, we first try falling back to the expansion
+    // loc for the selection range.
+    SI.selectionRange = ComputeSelectionRange(FallbackNameLoc);
+  }
   if (!SI.range.contains(SI.selectionRange)) {
-    // 'selectionRange' must be contained in 'range', so in cases where clang
-    // reports unrelated ranges we need to reconcile somehow.
+    // If the containment relationship still doesn't hold, throw away
+    // 'range' and use 'selectionRange' for both.
     SI.range = SI.selectionRange;
   }
   return SI;
diff --git a/clang-tools-extra/clangd/unittests/FindSymbolsTests.cpp b/clang-tools-extra/clangd/unittests/FindSymbolsTests.cpp
index a7a0188a85fdae..43658284937ebf 100644
--- a/clang-tools-extra/clangd/unittests/FindSymbolsTests.cpp
+++ b/clang-tools-extra/clangd/unittests/FindSymbolsTests.cpp
@@ -639,19 +639,27 @@ TEST(DocumentSymbols, FromMacro) {
     #define FF(name) \
       class name##_Test {};
 
-    $expansion[[FF]](abc);
+    $expansion1[[FF]](abc);
 
     #define FF2() \
-      class $spelling[[Test]] {};
+      class Test {};
 
-    FF2();
+    $expansion2[[FF2]]();
+
+    #define FF3() \
+      void waldo()
+
+    $fullDef[[FF3() {
+      int var = 42;
+    }]]
   )");
   TU.Code = Main.code().str();
   EXPECT_THAT(
       getSymbols(TU.build()),
       ElementsAre(
-          AllOf(WithName("abc_Test"), SymNameRange(Main.range("expansion"))),
-          AllOf(WithName("Test"), SymNameRange(Main.range("spelling")))));
+          AllOf(WithName("abc_Test"), SymNameRange(Main.range("expansion1"))),
+          AllOf(WithName("Test"), SymNameRange(Main.range("expansion2"))),
+          AllOf(WithName("waldo"), SymRange(Main.range("fullDef")))));
 }
 
 TEST(DocumentSymbols, FuncTemplates) {

From a52cc9b4be362b12ca261000b723374d4b772a45 Mon Sep 17 00:00:00 2001
From: Dave Lee <davelee.com@gmail.com>
Date: Mon, 12 Oct 2020 14:48:52 -0700
Subject: [PATCH 052/123] [lldb] Handle alternative output in TestAbortExitCode

This test

On macOS, this test can instead return `status = 0 (0x00000000) Terminated due to signal 6`. This updates the `CHECK` accordingly.

Differential Revision: https://reviews.llvm.org/D89273
---
 lldb/test/Shell/Process/TestAbortExitCode.test | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lldb/test/Shell/Process/TestAbortExitCode.test b/lldb/test/Shell/Process/TestAbortExitCode.test
index 5be0a15ab17284..746bc915897ec3 100644
--- a/lldb/test/Shell/Process/TestAbortExitCode.test
+++ b/lldb/test/Shell/Process/TestAbortExitCode.test
@@ -3,4 +3,4 @@ UNSUPPORTED: system-windows
 RUN: %clang_host %p/Inputs/abort.c -o %t
 RUN: %lldb %t -o run -o continue | FileCheck %s
 
-CHECK: status = 6 (0x00000006)
+CHECK: {{status = 6 \(0x00000006\)|status = 0 \(0x00000000\) Terminated due to signal 6}}

From 08924b54debcfd47bcf22a8213308cec7d22b975 Mon Sep 17 00:00:00 2001
From: Dave Lee <davelee.com@gmail.com>
Date: Tue, 6 Oct 2020 18:23:57 -0700
Subject: [PATCH 053/123] [lldb] Remove unused code in GetVersion (NFC)

Small cleanup to `lldb_private::GetVersion()`.

Differential Revision: https://reviews.llvm.org/D88939
---
 lldb/source/lldb.cpp | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/lldb/source/lldb.cpp b/lldb/source/lldb.cpp
index 6d4ed66074dc27..4d4c636a4f9c4f 100644
--- a/lldb/source/lldb.cpp
+++ b/lldb/source/lldb.cpp
@@ -33,12 +33,7 @@ static const char *GetLLDBRepository() {
 #endif
 }
 
-#define QUOTE(str) #str
-#define EXPAND_AND_QUOTE(str) QUOTE(str)
-
 const char *lldb_private::GetVersion() {
-  // On platforms other than Darwin, report a version number in the same style
-  // as the clang tool.
   static std::string g_version_str;
   if (g_version_str.empty()) {
     g_version_str += "lldb version ";

From cffb0dd54d41d8e249d2009467c4beb5b681ba26 Mon Sep 17 00:00:00 2001
From: Bruno Cardoso Lopes <bruno.cardoso@gmail.com>
Date: Mon, 12 Oct 2020 15:58:52 -0700
Subject: [PATCH 054/123] [SemaTemplate] Stop passing insertion position around
 during VarTemplate instantiation

They can get stale at use time because of updates from other recursive
specializations. Instead, rely on the existence of previous declarations to add
the specialization.

Differential Revision: https://reviews.llvm.org/D87853
---
 clang/include/clang/Sema/Sema.h               |  2 +-
 clang/include/clang/Sema/Template.h           |  2 +-
 clang/lib/Sema/SemaTemplate.cpp               |  2 +-
 .../lib/Sema/SemaTemplateInstantiateDecl.cpp  | 23 ++++++++-----------
 .../SemaTemplate/instantiate-var-template.cpp |  7 ++++++
 5 files changed, 20 insertions(+), 16 deletions(-)

diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index ca1f87cfdb2bf8..b5f0c08300bf7f 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -9171,7 +9171,7 @@ class Sema final {
       const TemplateArgumentList &TemplateArgList,
       const TemplateArgumentListInfo &TemplateArgsInfo,
       SmallVectorImpl<TemplateArgument> &Converted,
-      SourceLocation PointOfInstantiation, void *InsertPos,
+      SourceLocation PointOfInstantiation,
       LateInstantiatedAttrVec *LateAttrs = nullptr,
       LocalInstantiationScope *StartingScope = nullptr);
   VarTemplateSpecializationDecl *CompleteVarTemplateSpecializationDecl(
diff --git a/clang/include/clang/Sema/Template.h b/clang/include/clang/Sema/Template.h
index 91d175fdd05063..0dcaf565591b88 100644
--- a/clang/include/clang/Sema/Template.h
+++ b/clang/include/clang/Sema/Template.h
@@ -600,7 +600,7 @@ enum class TemplateSubstitutionKind : char {
                         TagDecl *NewDecl);
 
     Decl *VisitVarTemplateSpecializationDecl(
-        VarTemplateDecl *VarTemplate, VarDecl *FromVar, void *InsertPos,
+        VarTemplateDecl *VarTemplate, VarDecl *FromVar,
         const TemplateArgumentListInfo &TemplateArgsInfo,
         ArrayRef<TemplateArgument> Converted,
         VarTemplateSpecializationDecl *PrevDecl = nullptr);
diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp
index 8baf5b96fbf833..4ecae8faad668d 100644
--- a/clang/lib/Sema/SemaTemplate.cpp
+++ b/clang/lib/Sema/SemaTemplate.cpp
@@ -4584,7 +4584,7 @@ Sema::CheckVarTemplateId(VarTemplateDecl *Template, SourceLocation TemplateLoc,
   // FIXME: LateAttrs et al.?
   VarTemplateSpecializationDecl *Decl = BuildVarTemplateInstantiation(
       Template, InstantiationPattern, *InstantiationArgs, TemplateArgs,
-      Converted, TemplateNameLoc, InsertPos /*, LateAttrs, StartingScope*/);
+      Converted, TemplateNameLoc /*, LateAttrs, StartingScope*/);
   if (!Decl)
     return true;
 
diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
index 9420bd04b7a977..7200dc72825dd3 100644
--- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
@@ -3628,11 +3628,11 @@ Decl *TemplateDeclInstantiator::VisitVarTemplateSpecializationDecl(
     return nullptr;
 
   return VisitVarTemplateSpecializationDecl(
-      InstVarTemplate, D, InsertPos, VarTemplateArgsInfo, Converted, PrevDecl);
+      InstVarTemplate, D, VarTemplateArgsInfo, Converted, PrevDecl);
 }
 
 Decl *TemplateDeclInstantiator::VisitVarTemplateSpecializationDecl(
-    VarTemplateDecl *VarTemplate, VarDecl *D, void *InsertPos,
+    VarTemplateDecl *VarTemplate, VarDecl *D,
     const TemplateArgumentListInfo &TemplateArgsInfo,
     ArrayRef<TemplateArgument> Converted,
     VarTemplateSpecializationDecl *PrevDecl) {
@@ -3655,8 +3655,11 @@ Decl *TemplateDeclInstantiator::VisitVarTemplateSpecializationDecl(
       SemaRef.Context, Owner, D->getInnerLocStart(), D->getLocation(),
       VarTemplate, DI->getType(), DI, D->getStorageClass(), Converted);
   Var->setTemplateArgsInfo(TemplateArgsInfo);
-  if (InsertPos)
+  if (!PrevDecl) {
+    void *InsertPos = nullptr;
+    VarTemplate->findSpecialization(Converted, InsertPos);
     VarTemplate->AddSpecialization(Var, InsertPos);
+  }
 
   if (SemaRef.getLangOpts().OpenCL)
     SemaRef.deduceOpenCLAddressSpace(Var);
@@ -4865,7 +4868,7 @@ VarTemplateSpecializationDecl *Sema::BuildVarTemplateInstantiation(
     const TemplateArgumentList &TemplateArgList,
     const TemplateArgumentListInfo &TemplateArgsInfo,
     SmallVectorImpl<TemplateArgument> &Converted,
-    SourceLocation PointOfInstantiation, void *InsertPos,
+    SourceLocation PointOfInstantiation,
     LateInstantiatedAttrVec *LateAttrs,
     LocalInstantiationScope *StartingScope) {
   if (FromVar->isInvalidDecl())
@@ -4904,7 +4907,7 @@ VarTemplateSpecializationDecl *Sema::BuildVarTemplateInstantiation(
 
   return cast_or_null<VarTemplateSpecializationDecl>(
       Instantiator.VisitVarTemplateSpecializationDecl(
-          VarTemplate, FromVar, InsertPos, TemplateArgsInfo, Converted));
+          VarTemplate, FromVar, TemplateArgsInfo, Converted));
 }
 
 /// Instantiates a variable template specialization by completing it
@@ -5327,8 +5330,8 @@ void Sema::InstantiateVariableDefinition(SourceLocation PointOfInstantiation,
     TemplateDeclInstantiator Instantiator(*this, Var->getDeclContext(),
                                           TemplateArgs);
     Var = cast_or_null<VarDecl>(Instantiator.VisitVarTemplateSpecializationDecl(
-        VarSpec->getSpecializedTemplate(), Def, nullptr,
-        VarSpec->getTemplateArgsInfo(), VarSpec->getTemplateArgs().asArray()));
+        VarSpec->getSpecializedTemplate(), Def, VarSpec->getTemplateArgsInfo(),
+        VarSpec->getTemplateArgs().asArray(), VarSpec));
     if (Var) {
       llvm::PointerUnion<VarTemplateDecl *,
                          VarTemplatePartialSpecializationDecl *> PatternPtr =
@@ -5338,12 +5341,6 @@ void Sema::InstantiateVariableDefinition(SourceLocation PointOfInstantiation,
         cast<VarTemplateSpecializationDecl>(Var)->setInstantiationOf(
             Partial, &VarSpec->getTemplateInstantiationArgs());
 
-      // Merge the definition with the declaration.
-      LookupResult R(*this, Var->getDeclName(), Var->getLocation(),
-                     LookupOrdinaryName, forRedeclarationInCurContext());
-      R.addDecl(OldVar);
-      MergeVarDecl(Var, R);
-
       // Attach the initializer.
       InstantiateVariableInitializer(Var, Def, TemplateArgs);
     }
diff --git a/clang/test/SemaTemplate/instantiate-var-template.cpp b/clang/test/SemaTemplate/instantiate-var-template.cpp
index b7b83e4afdd581..a24b205da596ff 100644
--- a/clang/test/SemaTemplate/instantiate-var-template.cpp
+++ b/clang/test/SemaTemplate/instantiate-var-template.cpp
@@ -40,3 +40,10 @@ namespace PR24483 {
   template<typename... T> A<T...> models;
   template<> struct B models<>; // expected-error {{incomplete type 'struct B'}} expected-note {{forward declaration}}
 }
+
+namespace InvalidInsertPos {
+  template<typename T, int N> T v;
+  template<int N> decltype(v<int, N-1>) v<int, N>;
+  template<> int v<int, 0>;
+  int k = v<int, 500>;
+}

From 7f8dc347c9552f339b4410b679d91bceb2cc456e Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Mon, 12 Oct 2020 16:48:20 -0700
Subject: [PATCH 055/123] [llvm] Export LLVM_USE_SPLIT_DWARF in
 LLVMConfig.cmake

Export LLVM_USE_SPLIT_DWARF in LLVMConfig.cmake so that it can be used
from standalone builds of clang and lldb. Currently, there is no way for
standalone builds to know whether this option was set which means that
it only applies to LLVM.

Differential revision: https://reviews.llvm.org/D89282
---
 llvm/cmake/modules/LLVMConfig.cmake.in | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/llvm/cmake/modules/LLVMConfig.cmake.in b/llvm/cmake/modules/LLVMConfig.cmake.in
index d8a511b69fdd6f..4ead4f4198ff22 100644
--- a/llvm/cmake/modules/LLVMConfig.cmake.in
+++ b/llvm/cmake/modules/LLVMConfig.cmake.in
@@ -16,6 +16,8 @@ set(LLVM_USE_CRT_MINSIZEREL @LLVM_USE_CRT_MINSIZEREL@)
 set(LLVM_USE_CRT_RELEASE @LLVM_USE_CRT_RELEASE@)
 set(LLVM_USE_CRT_RELWITHDEBINFO @LLVM_USE_CRT_RELWITHDEBINFO@)
 
+set(LLVM_USE_SPLIT_DWARF @LLVM_USE_SPLIT_DWARF@)
+
 set(LLVM_COMMON_DEPENDS @LLVM_COMMON_DEPENDS@)
 
 set(LLVM_AVAILABLE_LIBS @LLVM_AVAILABLE_LIBS@)

From b215a26628feae349d663f687efe475d622970b7 Mon Sep 17 00:00:00 2001
From: Ruiling Song <ruiling.song@amd.com>
Date: Fri, 9 Oct 2020 09:26:19 +0800
Subject: [PATCH 056/123] [AMDGPU] Update LiveVariables in
 convertToThreeAddress()

This can fix an asan failure like below.
==15856==ERROR: AddressSanitizer: use-after-poison on address ...
READ of size 8 at 0x6210001a3cb0 thread T0
    #0 llvm::MachineInstr::getParent()
    #1 llvm::LiveVariables::VarInfo::findKill()
    #2 TwoAddressInstructionPass::rescheduleMIBelowKill()
    #3 TwoAddressInstructionPass::tryInstructionTransform()
    #4 TwoAddressInstructionPass::runOnMachineFunction()

We need to update the Kills if we replace instructions. The Kills
may be later accessed within TwoAddressInstruction pass.

Differential Revision: https://reviews.llvm.org/D89092
---
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp        | 101 +++++++++++-------
 .../AMDGPU/stale-livevar-in-twoaddr-pass.mir  |  36 +++++++
 2 files changed, 99 insertions(+), 38 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/stale-livevar-in-twoaddr-pass.mir

diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index a2cbe2735220d0..c72e9ede080e7f 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -15,10 +15,10 @@
 #include "AMDGPU.h"
 #include "AMDGPUSubtarget.h"
 #include "GCNHazardRecognizer.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 #include "SIDefines.h"
 #include "SIMachineFunctionInfo.h"
 #include "SIRegisterInfo.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 #include "Utils/AMDGPUBaseInfo.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/ArrayRef.h"
@@ -28,6 +28,7 @@
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/MemoryLocation.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/LiveVariables.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -2841,6 +2842,18 @@ static int64_t getFoldableImm(const MachineOperand* MO) {
   return AMDGPU::NoRegister;
 }
 
+static void updateLiveVariables(LiveVariables *LV, MachineInstr &MI,
+                                MachineInstr &NewMI) {
+  if (LV) {
+    unsigned NumOps = MI.getNumOperands();
+    for (unsigned I = 1; I < NumOps; ++I) {
+      MachineOperand &Op = MI.getOperand(I);
+      if (Op.isReg() && Op.isKill())
+        LV->replaceKillInstruction(Op.getReg(), MI, NewMI);
+    }
+  }
+}
+
 MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB,
                                                  MachineInstr &MI,
                                                  LiveVariables *LV) const {
@@ -2888,43 +2901,53 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB,
   const MachineOperand *Src2 = getNamedOperand(MI, AMDGPU::OpName::src2);
   const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp);
   const MachineOperand *Omod = getNamedOperand(MI, AMDGPU::OpName::omod);
+  MachineInstrBuilder MIB;
 
   if (!Src0Mods && !Src1Mods && !Clamp && !Omod &&
       // If we have an SGPR input, we will violate the constant bus restriction.
-      (ST.getConstantBusLimit(Opc) > 1 ||
-       !Src0->isReg() ||
+      (ST.getConstantBusLimit(Opc) > 1 || !Src0->isReg() ||
        !RI.isSGPRReg(MBB->getParent()->getRegInfo(), Src0->getReg()))) {
     if (auto Imm = getFoldableImm(Src2)) {
       unsigned NewOpc =
-         IsFMA ? (IsF16 ? AMDGPU::V_FMAAK_F16 : AMDGPU::V_FMAAK_F32)
-               : (IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32);
-      if (pseudoToMCOpcode(NewOpc) != -1)
-        return BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc))
-                 .add(*Dst)
-                 .add(*Src0)
-                 .add(*Src1)
-                 .addImm(Imm);
-    }
-    unsigned NewOpc =
-      IsFMA ? (IsF16 ? AMDGPU::V_FMAMK_F16 : AMDGPU::V_FMAMK_F32)
-            : (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32);
+          IsFMA ? (IsF16 ? AMDGPU::V_FMAAK_F16 : AMDGPU::V_FMAAK_F32)
+                : (IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32);
+      if (pseudoToMCOpcode(NewOpc) != -1) {
+        MIB = BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc))
+                  .add(*Dst)
+                  .add(*Src0)
+                  .add(*Src1)
+                  .addImm(Imm);
+        updateLiveVariables(LV, MI, *MIB);
+        return MIB;
+      }
+    }
+    unsigned NewOpc = IsFMA
+                          ? (IsF16 ? AMDGPU::V_FMAMK_F16 : AMDGPU::V_FMAMK_F32)
+                          : (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32);
     if (auto Imm = getFoldableImm(Src1)) {
-      if (pseudoToMCOpcode(NewOpc) != -1)
-        return BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc))
-                 .add(*Dst)
-                 .add(*Src0)
-                 .addImm(Imm)
-                 .add(*Src2);
+      if (pseudoToMCOpcode(NewOpc) != -1) {
+        MIB = BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc))
+                  .add(*Dst)
+                  .add(*Src0)
+                  .addImm(Imm)
+                  .add(*Src2);
+        updateLiveVariables(LV, MI, *MIB);
+        return MIB;
+      }
     }
     if (auto Imm = getFoldableImm(Src0)) {
       if (pseudoToMCOpcode(NewOpc) != -1 &&
-          isOperandLegal(MI, AMDGPU::getNamedOperandIdx(NewOpc,
-                           AMDGPU::OpName::src0), Src1))
-        return BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc))
-                 .add(*Dst)
-                 .add(*Src1)
-                 .addImm(Imm)
-                 .add(*Src2);
+          isOperandLegal(
+              MI, AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::src0),
+              Src1)) {
+        MIB = BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc))
+                  .add(*Dst)
+                  .add(*Src1)
+                  .addImm(Imm)
+                  .add(*Src2);
+        updateLiveVariables(LV, MI, *MIB);
+        return MIB;
+      }
     }
   }
 
@@ -2933,16 +2956,18 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB,
   if (pseudoToMCOpcode(NewOpc) == -1)
     return nullptr;
 
-  return BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc))
-      .add(*Dst)
-      .addImm(Src0Mods ? Src0Mods->getImm() : 0)
-      .add(*Src0)
-      .addImm(Src1Mods ? Src1Mods->getImm() : 0)
-      .add(*Src1)
-      .addImm(0) // Src mods
-      .add(*Src2)
-      .addImm(Clamp ? Clamp->getImm() : 0)
-      .addImm(Omod ? Omod->getImm() : 0);
+  MIB = BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc))
+            .add(*Dst)
+            .addImm(Src0Mods ? Src0Mods->getImm() : 0)
+            .add(*Src0)
+            .addImm(Src1Mods ? Src1Mods->getImm() : 0)
+            .add(*Src1)
+            .addImm(0) // Src mods
+            .add(*Src2)
+            .addImm(Clamp ? Clamp->getImm() : 0)
+            .addImm(Omod ? Omod->getImm() : 0);
+  updateLiveVariables(LV, MI, *MIB);
+  return MIB;
 }
 
 // It's not generally safe to move VALU instructions across these since it will
diff --git a/llvm/test/CodeGen/AMDGPU/stale-livevar-in-twoaddr-pass.mir b/llvm/test/CodeGen/AMDGPU/stale-livevar-in-twoaddr-pass.mir
new file mode 100644
index 00000000000000..264932ae9f430b
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/stale-livevar-in-twoaddr-pass.mir
@@ -0,0 +1,36 @@
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=livevars,phi-node-elimination,twoaddressinstruction -verify-machineinstrs -o - %s | FileCheck %s
+# This used to fail under ASAN enabled build because we didn't update LiveVariables in SIInstrInfo::convertToThreeAddress()
+# CHECK: _amdgpu_ps_main
+
+---
+name:            _amdgpu_ps_main
+alignment:       1
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $sgpr2, $vgpr2, $vgpr3
+
+    %0:vgpr_32 = COPY $vgpr3
+    %1:vgpr_32 = COPY $vgpr2
+    S_BRANCH %bb.3
+
+  bb.1:
+    %2:vgpr_32 = V_MAC_F32_e32 0, %0, %1, implicit $mode, implicit $exec
+    %3:vgpr_32 = V_MED3_F32 0, %1, 0, %2, 0, %2, 0, 0, implicit $mode, implicit $exec
+
+  bb.2:
+    %4:vgpr_32 = PHI %5, %bb.3, %3, %bb.1
+    SI_END_CF %6, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+    EXP_DONE 0, %4, %4, %4, %4, -1, 0, 15, implicit $exec
+    S_ENDPGM 0
+
+  bb.3:
+    successors: %bb.1, %bb.2
+
+    %5:vgpr_32 = V_MAC_F32_e32 0, %1, %0, implicit $mode, implicit $exec
+    %7:vgpr_32 = V_CVT_I32_F32_e32 %5, implicit $mode, implicit $exec
+    %8:sreg_64 = V_CMP_NE_U32_e64 1, %7, implicit $exec
+    %6:sreg_64 = SI_IF %8, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+    S_BRANCH %bb.1
+
+...

From 2513407d39506edf2a98f647088a9e1789f8c418 Mon Sep 17 00:00:00 2001
From: Sam Clegg <sbc@chromium.org>
Date: Wed, 7 Oct 2020 14:48:37 -0700
Subject: [PATCH 057/123] [lld][WebAssembly] Add support for -Bsymbolic flag

This flag works in a similar way to the ELF linker in that it
will resolve any defined symbols to their local definition with
a shared library or -pie executable.

This flag has no effect on static linking.

Differential Revision: https://reviews.llvm.org/D89152
---
 lld/test/wasm/bsymbolic.s      | 79 ++++++++++++++++++++++++++++++++++
 lld/wasm/Config.h              |  1 +
 lld/wasm/Driver.cpp            |  5 +++
 lld/wasm/Options.td            |  2 +
 lld/wasm/Relocations.cpp       | 22 +++++-----
 lld/wasm/SyntheticSections.cpp | 47 +++++++++++++++++---
 lld/wasm/SyntheticSections.h   | 20 +++++++--
 lld/wasm/Writer.cpp            |  8 ++++
 8 files changed, 165 insertions(+), 19 deletions(-)
 create mode 100644 lld/test/wasm/bsymbolic.s

diff --git a/lld/test/wasm/bsymbolic.s b/lld/test/wasm/bsymbolic.s
new file mode 100644
index 00000000000000..dc0e0ddcc773a3
--- /dev/null
+++ b/lld/test/wasm/bsymbolic.s
@@ -0,0 +1,79 @@
+// RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %s -o %t.o
+// RUN: wasm-ld --no-entry -Bsymbolic %t.o -o %t2.so 2>&1 | FileCheck -check-prefix=WARNING %s
+// WARNING: warning: -Bsymbolic is only meaningful when combined with -shared
+
+// RUN: wasm-ld --experimental-pic -shared %t.o -o %t0.so
+// RUN: obj2yaml %t0.so | FileCheck -check-prefix=NOOPTION %s
+
+// RUN: wasm-ld --experimental-pic -shared -Bsymbolic %t.o -o %t1.so
+// RUN: obj2yaml %t1.so | FileCheck -check-prefix=SYMBOLIC %s
+
+// NOOPTION  - Type:            IMPORT
+// NOOPTION:            - Module:          GOT.func
+// NOOPTION-NEXT:         Field:           foo
+// NOOPTION-NEXT:         Kind:            GLOBAL
+// NOOPTION-NEXT:         GlobalType:      I32
+// NOOPTION-NEXT:         GlobalMutable:   true
+// NOOPTION-NEXT:       - Module:          GOT.mem
+// NOOPTION-NEXT:         Field:           bar
+// NOOPTION-NEXT:         Kind:            GLOBAL
+// NOOPTION-NEXT:         GlobalType:      I32
+// NOOPTION-NEXT:         GlobalMutable:   true
+
+//      NOOPTION:  - Type:            GLOBAL
+// NOOPTION-NEXT:    Globals:
+// NOOPTION-NEXT:      - Index:           4
+// NOOPTION-NEXT:        Type:            I32
+// NOOPTION-NEXT:        Mutable:         false
+// NOOPTION-NEXT:        InitExpr:
+// NOOPTION-NEXT:          Opcode:          I32_CONST
+// NOOPTION-NEXT:          Value:           0
+// NOOPTION-NEXT:  - Type:            EXPORT
+
+// SYMBOLIC-NOT:   - Module:          GOT.mem
+// SYMBOLIC-NOT:   - Module:          GOT.func
+
+// SYMBOLIC:       - Type:            GLOBAL
+// SYMBOLIC-NEXT:    Globals:
+// SYMBOLIC-NEXT:      - Index:           2
+// SYMBOLIC-NEXT:        Type:            I32
+// SYMBOLIC-NEXT:        Mutable:         true
+// SYMBOLIC-NEXT:        InitExpr:
+// SYMBOLIC-NEXT:          Opcode:          I32_CONST
+// SYMBOLIC-NEXT:          Value:           0
+// SYMBOLIC-NEXT:      - Index:           3
+// SYMBOLIC-NEXT:        Type:            I32
+// SYMBOLIC-NEXT:        Mutable:         true
+// SYMBOLIC-NEXT:        InitExpr:
+// SYMBOLIC-NEXT:          Opcode:          I32_CONST
+// SYMBOLIC-NEXT:          Value:           0
+// SYMBOLIC-NEXT:      - Index:           4
+// SYMBOLIC-NEXT:        Type:            I32
+// SYMBOLIC-NEXT:        Mutable:         false
+// SYMBOLIC-NEXT:        InitExpr:
+// SYMBOLIC-NEXT:          Opcode:          I32_CONST
+// SYMBOLIC-NEXT:          Value:           0
+// SYMBOLIC-NEXT:  - Type:            EXPORT
+
+.globl foo
+foo:
+  .functype foo () -> ()
+  end_function
+
+.globl get_foo_address
+get_foo_address:
+  .functype get_foo_address () -> (i32)
+  global.get foo@GOT
+  end_function
+
+.globl get_bar_address
+get_bar_address:
+  .functype get_bar_address () -> (i32)
+  global.get bar@GOT
+  end_function
+
+.globl bar
+.section  .data.bar,"",@
+bar:
+  .int 42
+.size bar, 4
diff --git a/lld/wasm/Config.h b/lld/wasm/Config.h
index cd6d57333a2125..4439098e65ba9f 100644
--- a/lld/wasm/Config.h
+++ b/lld/wasm/Config.h
@@ -23,6 +23,7 @@ namespace wasm {
 // Most fields are initialized by the driver.
 struct Configuration {
   bool allowUndefined;
+  bool bsymbolic;
   bool checkFeatures;
   bool compressRelocations;
   bool demangle;
diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp
index a6d26dcfcc430a..c00c7eb7522e7d 100644
--- a/lld/wasm/Driver.cpp
+++ b/lld/wasm/Driver.cpp
@@ -327,6 +327,7 @@ static StringRef getEntry(opt::InputArgList &args) {
 // Initializes Config members by the command line options.
 static void readConfigs(opt::InputArgList &args) {
   config->allowUndefined = args.hasArg(OPT_allow_undefined);
+  config->bsymbolic = args.hasArg(OPT_Bsymbolic);
   config->checkFeatures =
       args.hasFlag(OPT_check_features, OPT_no_check_features, true);
   config->compressRelocations = args.hasArg(OPT_compress_relocations);
@@ -490,6 +491,10 @@ static void checkOptions(opt::InputArgList &args) {
       warn("creating PIEs, with -pie, is not yet stable");
     }
   }
+
+  if (config->bsymbolic && !config->shared) {
+    warn("-Bsymbolic is only meaningful when combined with -shared");
+  }
 }
 
 // Force Sym to be entered in the output. Used for -u or equivalent.
diff --git a/lld/wasm/Options.td b/lld/wasm/Options.td
index 27d54c5cdc648d..1a5cf3513feaf2 100644
--- a/lld/wasm/Options.td
+++ b/lld/wasm/Options.td
@@ -18,6 +18,8 @@ multiclass B<string name, string help1, string help2> {
 }
 
 // The following flags are shared with the ELF linker
+def Bsymbolic: F<"Bsymbolic">, HelpText<"Bind defined symbols locally">;
+
 def color_diagnostics: F<"color-diagnostics">,
   HelpText<"Use colors in diagnostics">;
 
diff --git a/lld/wasm/Relocations.cpp b/lld/wasm/Relocations.cpp
index 0a364d1a53ac43..eec33b67012759 100644
--- a/lld/wasm/Relocations.cpp
+++ b/lld/wasm/Relocations.cpp
@@ -16,8 +16,17 @@ using namespace llvm::wasm;
 
 namespace lld {
 namespace wasm {
+
 static bool requiresGOTAccess(const Symbol *sym) {
-  return config->isPic && !sym->isHidden() && !sym->isLocal();
+  if (!config->isPic)
+    return false;
+  if (sym->isHidden() || sym->isLocal())
+    return false;
+  // With `-Bsymbolic` (or when building an executable) as don't need to use
+  // the GOT for symbols that are defined within the current module.
+  if (sym->isDefined() && (!config->shared || config->bsymbolic))
+    return false;
+  return true;
 }
 
 static bool allowUndefined(const Symbol* sym) {
@@ -41,17 +50,10 @@ static void reportUndefined(const Symbol* sym) {
 }
 
 static void addGOTEntry(Symbol *sym) {
-  // In PIC mode a GOT entry is an imported global that the dynamic linker
-  // will assign.
-  // In non-PIC mode (i.e. when code compiled as fPIC is linked into a static
-  // binary) we create an internal wasm global with a fixed value that takes the
-  // place of th GOT entry and effectivly acts as an i32 const. This can
-  // potentially be optimized away at runtime or with a post-link tool.
-  // TODO(sbc): Linker relaxation might also be able to optimize this away.
-  if (config->isPic)
+  if (requiresGOTAccess(sym))
     out.importSec->addGOTEntry(sym);
   else
-    out.globalSec->addStaticGOTEntry(sym);
+    out.globalSec->addInternalGOTEntry(sym);
 }
 
 void scanRelocations(InputChunk *chunk) {
diff --git a/lld/wasm/SyntheticSections.cpp b/lld/wasm/SyntheticSections.cpp
index 753482fda41093..ca4394e172126c 100644
--- a/lld/wasm/SyntheticSections.cpp
+++ b/lld/wasm/SyntheticSections.cpp
@@ -268,21 +268,56 @@ void GlobalSection::assignIndexes() {
   uint32_t globalIndex = out.importSec->getNumImportedGlobals();
   for (InputGlobal *g : inputGlobals)
     g->setGlobalIndex(globalIndex++);
-  for (Symbol *sym : staticGotSymbols)
+  for (Symbol *sym : internalGotSymbols)
     sym->setGOTIndex(globalIndex++);
   isSealed = true;
 }
 
-void GlobalSection::addStaticGOTEntry(Symbol *sym) {
+void GlobalSection::addInternalGOTEntry(Symbol *sym) {
   assert(!isSealed);
   if (sym->requiresGOT)
     return;
-  LLVM_DEBUG(dbgs() << "addStaticGOTEntry: " << sym->getName() << " "
+  LLVM_DEBUG(dbgs() << "addInternalGOTEntry: " << sym->getName() << " "
                     << toString(sym->kind()) << "\n");
   sym->requiresGOT = true;
   if (auto *F = dyn_cast<FunctionSymbol>(sym))
     out.elemSec->addEntry(F);
-  staticGotSymbols.push_back(sym);
+  internalGotSymbols.push_back(sym);
+}
+
+void GlobalSection::generateRelocationCode(raw_ostream &os) const {
+  unsigned opcode_ptr_const = config->is64.getValueOr(false)
+                                  ? WASM_OPCODE_I64_CONST
+                                  : WASM_OPCODE_I32_CONST;
+  unsigned opcode_ptr_add = config->is64.getValueOr(false)
+                                ? WASM_OPCODE_I64_ADD
+                                : WASM_OPCODE_I32_ADD;
+
+  for (const Symbol *sym : internalGotSymbols) {
+    if (auto *d = dyn_cast<DefinedData>(sym)) {
+      // Get __memory_base
+      writeU8(os, WASM_OPCODE_GLOBAL_GET, "GLOBAL_GET");
+      writeUleb128(os, WasmSym::memoryBase->getGlobalIndex(), "__memory_base");
+
+      // Add the virtual address of the data symbol
+      writeU8(os, opcode_ptr_const, "CONST");
+      writeSleb128(os, d->getVirtualAddress(), "offset");
+    } else if (auto *f = dyn_cast<FunctionSymbol>(sym)) {
+      // Get __table_base
+      writeU8(os, WASM_OPCODE_GLOBAL_GET, "GLOBAL_GET");
+      writeUleb128(os, WasmSym::tableBase->getGlobalIndex(), "__table_base");
+
+      // Add the table index to __table_base
+      writeU8(os, opcode_ptr_const, "CONST");
+      writeSleb128(os, f->getTableIndex(), "offset");
+    } else {
+      assert(isa<UndefinedData>(sym));
+      continue;
+    }
+    writeU8(os, opcode_ptr_add, "ADD");
+    writeU8(os, WASM_OPCODE_GLOBAL_SET, "GLOBAL_SET");
+    writeUleb128(os, sym->getGOTIndex(), "got_entry");
+  }
 }
 
 void GlobalSection::writeBody() {
@@ -292,9 +327,9 @@ void GlobalSection::writeBody() {
   for (InputGlobal *g : inputGlobals)
     writeGlobal(os, g->global);
   // TODO(wvo): when do these need I64_CONST?
-  for (const Symbol *sym : staticGotSymbols) {
+  for (const Symbol *sym : internalGotSymbols) {
     WasmGlobal global;
-    global.Type = {WASM_TYPE_I32, false};
+    global.Type = {WASM_TYPE_I32, config->isPic};
     global.InitExpr.Opcode = WASM_OPCODE_I32_CONST;
     if (auto *d = dyn_cast<DefinedData>(sym))
       global.InitExpr.Value.Int32 = d->getVirtualAddress();
diff --git a/lld/wasm/SyntheticSections.h b/lld/wasm/SyntheticSections.h
index 079de93492f1fd..8e401944017139 100644
--- a/lld/wasm/SyntheticSections.h
+++ b/lld/wasm/SyntheticSections.h
@@ -197,21 +197,35 @@ class GlobalSection : public SyntheticSection {
   uint32_t numGlobals() const {
     assert(isSealed);
     return inputGlobals.size() + dataAddressGlobals.size() +
-           staticGotSymbols.size();
+           internalGotSymbols.size();
   }
   bool isNeeded() const override { return numGlobals() > 0; }
   void assignIndexes() override;
   void writeBody() override;
   void addGlobal(InputGlobal *global);
   void addDataAddressGlobal(DefinedData *global);
-  void addStaticGOTEntry(Symbol *sym);
+
+  // Add an internal GOT entry global that corresponds to the given symbol.
+  // Normally GOT entries are imported and assigned by the external dynamic
+  // linker.  However, when linking PIC code statically or when linking with
+  // -Bsymbolic we can internalize GOT entries by declaring globals the hold
+  // symbol addresses.
+  //
+  // For the static linking case these internal globals can be completely
+  // eliminated by a post-link optimizer such as wasm-opt.
+  //
+  // TODO(sbc): Another approach to optimizing these away could be to use
+  // specific relocation types combined with linker relaxation which could
+  // transform a `global.get` to an `i32.const`.
+  void addInternalGOTEntry(Symbol *sym);
+  void generateRelocationCode(raw_ostream &os) const;
 
   std::vector<const DefinedData *> dataAddressGlobals;
 
 protected:
   bool isSealed = false;
   std::vector<InputGlobal *> inputGlobals;
-  std::vector<Symbol *> staticGotSymbols;
+  std::vector<Symbol *> internalGotSymbols;
 };
 
 class ExportSection : public SyntheticSection {
diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp
index fee87f292c9085..31618314cf521d 100644
--- a/lld/wasm/Writer.cpp
+++ b/lld/wasm/Writer.cpp
@@ -965,9 +965,17 @@ void Writer::createApplyRelocationsFunction() {
   {
     raw_string_ostream os(bodyContent);
     writeUleb128(os, 0, "num locals");
+
+    // First apply relocations to any internalized GOT entries.  These
+    // are the result of relaxation when building with -Bsymbolic.
+    out.globalSec->generateRelocationCode(os);
+
+    // Next apply any realocation to the data section by reading GOT entry
+    // globals.
     for (const OutputSegment *seg : segments)
       for (const InputSegment *inSeg : seg->inputSegments)
         inSeg->generateRelocationCode(os);
+
     writeU8(os, WASM_OPCODE_END, "END");
   }
 

From 85c779d256207fe82ba876d19724c497ee904be7 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Mon, 12 Oct 2020 17:28:48 -0700
Subject: [PATCH 058/123] [llc] -filetype=null: don't create .null if -o is not
 specified

The new behavior is consistent with llvm-mc -filetype=null.
---
 llvm/test/CodeGen/X86/null-streamer.ll | 3 ++-
 llvm/tools/llc/llc.cpp                 | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/llvm/test/CodeGen/X86/null-streamer.ll b/llvm/test/CodeGen/X86/null-streamer.ll
index 28a457f64d4a85..3d5f682f75c22d 100644
--- a/llvm/test/CodeGen/X86/null-streamer.ll
+++ b/llvm/test/CodeGen/X86/null-streamer.ll
@@ -1,7 +1,8 @@
 ; Check the MCNullStreamer operates correctly, at least on a minimal test case.
 ;
+; RUN: rm -f %t
 ; RUN: llc -filetype=null -o %t -mtriple=i686-- %s
-; RUN: llc -filetype=null -o %t -mtriple=i686-cygwin %s
+; RUN: ls %t
 
 source_filename = "test/CodeGen/X86/null-streamer.ll"
 
diff --git a/llvm/tools/llc/llc.cpp b/llvm/tools/llc/llc.cpp
index 98a2735887c022..b58e8486db8c23 100644
--- a/llvm/tools/llc/llc.cpp
+++ b/llvm/tools/llc/llc.cpp
@@ -224,7 +224,7 @@ static std::unique_ptr<ToolOutputFile> GetOutputStream(const char *TargetName,
           OutputFilename += ".o";
         break;
       case CGFT_Null:
-        OutputFilename += ".null";
+        OutputFilename = "-";
         break;
       }
     }

From ec2c2ad2a2dddf24b052625b5ff434704ea43e24 Mon Sep 17 00:00:00 2001
From: Adrian Prantl <aprantl@apple.com>
Date: Fri, 2 Oct 2020 12:32:01 -0700
Subject: [PATCH 059/123] Add accessors. (NFC)

There's a place in swift-lldb where it is useful to create a copy of
an lldb_private::Variable. Adding these two accessors makes this
possible.
---
 lldb/include/lldb/Symbol/Type.h     | 1 +
 lldb/include/lldb/Symbol/Variable.h | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/lldb/include/lldb/Symbol/Type.h b/lldb/include/lldb/Symbol/Type.h
index efd3ede0344571..06fc1d5da0aa97 100644
--- a/lldb/include/lldb/Symbol/Type.h
+++ b/lldb/include/lldb/Symbol/Type.h
@@ -56,6 +56,7 @@ class SymbolFileType : public std::enable_shared_from_this<SymbolFileType>,
   Type *operator->() { return GetType(); }
 
   Type *GetType();
+  SymbolFile &GetSymbolFile() const { return m_symbol_file; }
 
 protected:
   SymbolFile &m_symbol_file;
diff --git a/lldb/include/lldb/Symbol/Variable.h b/lldb/include/lldb/Symbol/Variable.h
index 66abdc0b311788..37bd9ca68533e9 100644
--- a/lldb/include/lldb/Symbol/Variable.h
+++ b/lldb/include/lldb/Symbol/Variable.h
@@ -64,6 +64,8 @@ class Variable : public UserID, public std::enable_shared_from_this<Variable> {
 
   lldb::ValueType GetScope() const { return m_scope; }
 
+  const RangeList &GetScopeRange() const { return m_scope_range; }
+
   bool IsExternal() const { return m_external; }
 
   bool IsArtificial() const { return m_artificial; }

From 412cdcf2edf2344632e01d5f71da4bbd9838ab7d Mon Sep 17 00:00:00 2001
From: "Wang, Pengfei" <pengfei.wang@intel.com>
Date: Tue, 13 Oct 2020 08:42:46 +0800
Subject: [PATCH 060/123] [X86] Add HRESET instruction.

For more details about these instructions, please refer to the latest ISE document: https://software.intel.com/en-us/download/intel-architecture-instruction-set-extensions-programming-reference.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D89102
---
 clang/docs/ClangCommandLineReference.rst      |  2 +
 clang/include/clang/Driver/Options.td         |  2 +
 clang/lib/Basic/Targets/X86.cpp               |  6 +++
 clang/lib/Basic/Targets/X86.h                 |  1 +
 clang/lib/Headers/CMakeLists.txt              |  2 +
 clang/lib/Headers/cpuid.h                     |  1 +
 clang/lib/Headers/hresetintrin.h              | 49 +++++++++++++++++++
 clang/lib/Headers/immintrin.h                 |  2 +
 clang/lib/Headers/x86gprintrin.h              | 18 +++++++
 clang/test/CodeGen/x86-hreset-intrin.c        | 11 +++++
 clang/test/Driver/x86-target-features.c       |  5 ++
 clang/test/Preprocessor/x86_target_features.c |  8 +++
 llvm/docs/ReleaseNotes.rst                    |  1 +
 llvm/include/llvm/Support/X86TargetParser.def |  1 +
 llvm/lib/Support/Host.cpp                     |  1 +
 llvm/lib/Support/X86TargetParser.cpp          |  1 +
 llvm/lib/Target/X86/X86.td                    |  2 +
 llvm/lib/Target/X86/X86InstrFormats.td        |  1 +
 llvm/lib/Target/X86/X86InstrInfo.td           |  8 +++
 llvm/lib/Target/X86/X86Subtarget.h            |  4 ++
 llvm/test/MC/Disassembler/X86/x86-32.txt      |  3 ++
 llvm/test/MC/Disassembler/X86/x86-64.txt      |  3 ++
 llvm/test/MC/X86/x86-32-coverage.s            |  6 ++-
 llvm/test/MC/X86/x86-64.s                     |  4 ++
 24 files changed, 141 insertions(+), 1 deletion(-)
 create mode 100644 clang/lib/Headers/hresetintrin.h
 create mode 100644 clang/lib/Headers/x86gprintrin.h
 create mode 100644 clang/test/CodeGen/x86-hreset-intrin.c

diff --git a/clang/docs/ClangCommandLineReference.rst b/clang/docs/ClangCommandLineReference.rst
index 10e0203ce2415c..ff3decbca70c55 100644
--- a/clang/docs/ClangCommandLineReference.rst
+++ b/clang/docs/ClangCommandLineReference.rst
@@ -3261,6 +3261,8 @@ X86
 
 .. option:: -mgfni, -mno-gfni
 
+.. option:: -mhreset, -mno-hreset
+
 .. option:: -minvpcid, -mno-invpcid
 
 .. option:: -mkl, -mno-kl
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 8e0343710d685e..20acd207206892 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -3260,6 +3260,8 @@ def minvpcid : Flag<["-"], "minvpcid">, Group<m_x86_Features_Group>;
 def mno_invpcid : Flag<["-"], "mno-invpcid">, Group<m_x86_Features_Group>;
 def mgfni : Flag<["-"], "mgfni">, Group<m_x86_Features_Group>;
 def mno_gfni : Flag<["-"], "mno-gfni">, Group<m_x86_Features_Group>;
+def mhreset : Flag<["-"], "mhreset">, Group<m_x86_Features_Group>;
+def mno_hreset : Flag<["-"], "mno-hreset">, Group<m_x86_Features_Group>;
 def mkl : Flag<["-"], "mkl">, Group<m_x86_Features_Group>;
 def mno_kl : Flag<["-"], "mno-kl">, Group<m_x86_Features_Group>;
 def mwidekl : Flag<["-"], "mwidekl">, Group<m_x86_Features_Group>;
diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index 98ac13b1ae9bdb..9b607a3b394134 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -298,6 +298,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
       HasINVPCID = true;
     } else if (Feature == "+enqcmd") {
       HasENQCMD = true;
+    } else if (Feature == "+hreset") {
+      HasHRESET = true;
     } else if (Feature == "+amx-bf16") {
       HasAMXBF16 = true;
     } else if (Feature == "+amx-int8") {
@@ -712,6 +714,8 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
     Builder.defineMacro("__INVPCID__");
   if (HasENQCMD)
     Builder.defineMacro("__ENQCMD__");
+  if (HasHRESET)
+    Builder.defineMacro("__HRESET__");
   if (HasAMXTILE)
     Builder.defineMacro("__AMXTILE__");
   if (HasAMXINT8)
@@ -848,6 +852,7 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
       .Case("fsgsbase", true)
       .Case("fxsr", true)
       .Case("gfni", true)
+      .Case("hreset", true)
       .Case("invpcid", true)
       .Case("kl", true)
       .Case("widekl", true)
@@ -936,6 +941,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
       .Case("fsgsbase", HasFSGSBASE)
       .Case("fxsr", HasFXSR)
       .Case("gfni", HasGFNI)
+      .Case("hreset", HasHRESET)
       .Case("invpcid", HasINVPCID)
       .Case("kl", HasKL)
       .Case("widekl", HasWIDEKL)
diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h
index 4fc495a09bbb2e..441ab961e2937d 100644
--- a/clang/lib/Basic/Targets/X86.h
+++ b/clang/lib/Basic/Targets/X86.h
@@ -129,6 +129,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
   bool HasENQCMD = false;
   bool HasKL = false;      // For key locker
   bool HasWIDEKL = false; // For wide key locker
+  bool HasHRESET = false;
   bool HasAMXTILE = false;
   bool HasAMXINT8 = false;
   bool HasAMXBF16 = false;
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index 533ff4506ffef7..7d0b2a0938bad6 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -65,6 +65,7 @@ set(files
   fmaintrin.h
   fxsrintrin.h
   gfniintrin.h
+  hresetintrin.h
   htmintrin.h
   htmxlintrin.h
   ia32intrin.h
@@ -124,6 +125,7 @@ set(files
   wmmintrin.h
   __wmmintrin_aes.h
   __wmmintrin_pclmul.h
+  x86gprintrin.h
   x86intrin.h
   xmmintrin.h
   xopintrin.h
diff --git a/clang/lib/Headers/cpuid.h b/clang/lib/Headers/cpuid.h
index 2a88c042d04680..639aa015fbf2f0 100644
--- a/clang/lib/Headers/cpuid.h
+++ b/clang/lib/Headers/cpuid.h
@@ -196,6 +196,7 @@
 
 /* Features in %eax for leaf 7 sub-leaf 1 */
 #define bit_AVX512BF16    0x00000020
+#define bit_HRESET        0x00400000
 
 /* Features in %eax for leaf 13 sub-leaf 1 */
 #define bit_XSAVEOPT    0x00000001
diff --git a/clang/lib/Headers/hresetintrin.h b/clang/lib/Headers/hresetintrin.h
new file mode 100644
index 00000000000000..13e31a2e03adc2
--- /dev/null
+++ b/clang/lib/Headers/hresetintrin.h
@@ -0,0 +1,49 @@
+/*===---------------- hresetintrin.h - HRESET intrinsics -------------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __X86GPRINTRIN_H
+#error "Never use <hresetintrin.h> directly; include <x86gprintrin.h> instead."
+#endif
+
+#ifndef __HRESETINTRIN_H
+#define __HRESETINTRIN_H
+
+#if __has_extension(gnu_asm)
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS \
+  __attribute__((__always_inline__, __nodebug__, __target__("hreset")))
+
+/// Provides a hint to the processor to selectively reset the prediction
+///    history of the current logical processor specified by a 32-bit integer
+///    value \a __eax.
+///
+/// This intrinsic corresponds to the <c> HRESET </c> instruction.
+///
+/// \operation
+///    IF __eax == 0
+///      // nop
+///    ELSE
+///      FOR i := 0 to 31
+///        IF __eax[i]
+///          ResetPredictionFeature(i)
+///        FI
+///      ENDFOR
+///    FI
+/// \endoperation
+static __inline void __DEFAULT_FN_ATTRS
+_hreset(int __eax)
+{
+  __asm__ ("hreset $0" :: "a"(__eax));
+}
+
+#undef __DEFAULT_FN_ATTRS
+
+#endif /* __has_extension(gnu_asm) */
+
+#endif /* __HRESETINTRIN_H */
diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h
index 8fb5447a5919c2..c31d2aaa0d5237 100644
--- a/clang/lib/Headers/immintrin.h
+++ b/clang/lib/Headers/immintrin.h
@@ -10,6 +10,8 @@
 #ifndef __IMMINTRIN_H
 #define __IMMINTRIN_H
 
+#include <x86gprintrin.h>
+
 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
     defined(__MMX__)
 #include <mmintrin.h>
diff --git a/clang/lib/Headers/x86gprintrin.h b/clang/lib/Headers/x86gprintrin.h
new file mode 100644
index 00000000000000..6aa0a66e18fc17
--- /dev/null
+++ b/clang/lib/Headers/x86gprintrin.h
@@ -0,0 +1,18 @@
+/*===--------------- x86gprintrin.h - X86 GPR intrinsics ------------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __X86GPRINTRIN_H
+#define __X86GPRINTRIN_H
+
+#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
+    defined(__HRESET__)
+#include <hresetintrin.h>
+#endif
+
+#endif /* __X86GPRINTRIN_H */
diff --git a/clang/test/CodeGen/x86-hreset-intrin.c b/clang/test/CodeGen/x86-hreset-intrin.c
new file mode 100644
index 00000000000000..1c6ed8ceddbc95
--- /dev/null
+++ b/clang/test/CodeGen/x86-hreset-intrin.c
@@ -0,0 +1,11 @@
+// RUN: %clang_cc1 %s -ffreestanding -triple x86_64-unknown-unknown -target-feature +hreset -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -ffreestanding -triple i386-unknown-unknown -target-feature +hreset -emit-llvm -o - | FileCheck %s
+
+#include <immintrin.h>
+
+void test_hreset(int a)
+{
+// CHECK-LABEL: test_hreset
+// CHECK: call void asm sideeffect "hreset $$0", "{ax},~{dirflag},~{fpsr},~{flags}"(i32 %{{[0-9]}})
+  _hreset(a);
+}
diff --git a/clang/test/Driver/x86-target-features.c b/clang/test/Driver/x86-target-features.c
index 9de728c19c7cab..3cd4748c942fbb 100644
--- a/clang/test/Driver/x86-target-features.c
+++ b/clang/test/Driver/x86-target-features.c
@@ -278,3 +278,8 @@
 // RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-amx-int8 %s -### -o %t.o 2>&1 | FileCheck --check-prefix=NO-AMX-INT8 %s
 // AMX-INT8: "-target-feature" "+amx-int8"
 // NO-AMX-INT8: "-target-feature" "-amx-int8"
+
+// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mhreset %s -### -o %t.o 2>&1 | FileCheck -check-prefix=HRESET %s
+// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-hreset %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-HRESET %s
+// HRESET: "-target-feature" "+hreset"
+// NO-HRESET: "-target-feature" "-hreset"
diff --git a/clang/test/Preprocessor/x86_target_features.c b/clang/test/Preprocessor/x86_target_features.c
index 59bc9d6ab531ea..1d8e893282da47 100644
--- a/clang/test/Preprocessor/x86_target_features.c
+++ b/clang/test/Preprocessor/x86_target_features.c
@@ -528,3 +528,11 @@
 // RUN: %clang -target i386-unknown-unknown -march=atom -mno-tsxldtrk -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=NOTSXLDTRK %s
 
 // NOTSXLDTRK-NOT: #define __TSXLDTRK__ 1
+
+// RUN: %clang -target i386-unknown-unknown -march=atom -mhreset -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=HRESET %s
+
+// HRESET: #define __HRESET__ 1
+
+// RUN: %clang -target i386-unknown-unknown -march=atom -mno-hreset -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=NOHRESET %s
+
+// NOHRESET-NOT: #define __HRESET__ 1
diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index d75245e7055ddd..a1eb1ffc5c7c2f 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -114,6 +114,7 @@ During this release ...
   the "target-cpu" attribute or TargetMachine CPU which will be used to select
   Instruction Set. If the attribute is not present, the tune CPU will follow
   the target CPU.
+* Support for ISA HRESET has been added.
 
 Changes to the AMDGPU Target
 -----------------------------
diff --git a/llvm/include/llvm/Support/X86TargetParser.def b/llvm/include/llvm/Support/X86TargetParser.def
index 2a803ca7a68916..162a7f3a10d02c 100644
--- a/llvm/include/llvm/Support/X86TargetParser.def
+++ b/llvm/include/llvm/Support/X86TargetParser.def
@@ -187,6 +187,7 @@ X86_FEATURE       (XSAVE,           "xsave")
 X86_FEATURE       (XSAVEC,          "xsavec")
 X86_FEATURE       (XSAVEOPT,        "xsaveopt")
 X86_FEATURE       (XSAVES,          "xsaves")
+X86_FEATURE       (HRESET,          "hreset")
 // These features aren't really CPU features, but the frontend can set them.
 X86_FEATURE       (RETPOLINE_EXTERNAL_THUNK,    "retpoline-external-thunk")
 X86_FEATURE       (RETPOLINE_INDIRECT_BRANCHES, "retpoline-indirect-branches")
diff --git a/llvm/lib/Support/Host.cpp b/llvm/lib/Support/Host.cpp
index 0f674bbcdc1bb8..ccd19b384c9084 100644
--- a/llvm/lib/Support/Host.cpp
+++ b/llvm/lib/Support/Host.cpp
@@ -1496,6 +1496,7 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
   bool HasLeaf7Subleaf1 =
       MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
   Features["avx512bf16"] = HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save;
+  Features["hreset"]     = HasLeaf7Subleaf1 && ((EAX >> 22) & 1);
 
   bool HasLeafD = MaxLevel >= 0xd &&
                   !getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX);
diff --git a/llvm/lib/Support/X86TargetParser.cpp b/llvm/lib/Support/X86TargetParser.cpp
index 35582a9b277cc6..698dc4a5e9d619 100644
--- a/llvm/lib/Support/X86TargetParser.cpp
+++ b/llvm/lib/Support/X86TargetParser.cpp
@@ -558,6 +558,7 @@ constexpr FeatureBitset ImpliedFeaturesXOP = FeatureFMA4;
 constexpr FeatureBitset ImpliedFeaturesAMX_TILE = {};
 constexpr FeatureBitset ImpliedFeaturesAMX_BF16 = FeatureAMX_TILE;
 constexpr FeatureBitset ImpliedFeaturesAMX_INT8 = FeatureAMX_TILE;
+constexpr FeatureBitset ImpliedFeaturesHRESET = {};
 
 // Key Locker Features
 constexpr FeatureBitset ImpliedFeaturesKL = FeatureSSE2;
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 5419c35502c6b4..428fe78b8213b5 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -285,6 +285,8 @@ def FeatureKL  : SubtargetFeature<"kl", "HasKL", "true",
 def FeatureWIDEKL  : SubtargetFeature<"widekl", "HasWIDEKL", "true",
                                       "Support Key Locker wide Instructions",
                                       [FeatureKL]>;
+def FeatureHRESET : SubtargetFeature<"hreset", "HasHRESET", "true",
+                                      "Has hreset instruction">;
 def FeatureSERIALIZE : SubtargetFeature<"serialize", "HasSERIALIZE", "true",
                                         "Has serialize instruction">;
 def FeatureTSXLDTRK : SubtargetFeature<"tsxldtrk", "HasTSXLDTRK", "true",
diff --git a/llvm/lib/Target/X86/X86InstrFormats.td b/llvm/lib/Target/X86/X86InstrFormats.td
index d7752e656b55c6..150e034c0bb69e 100644
--- a/llvm/lib/Target/X86/X86InstrFormats.td
+++ b/llvm/lib/Target/X86/X86InstrFormats.td
@@ -216,6 +216,7 @@ class T8XS : T8 { Prefix OpPrefix = XS; }
 class TAPS : TA { Prefix OpPrefix = PS; }
 class TAPD : TA { Prefix OpPrefix = PD; }
 class TAXD : TA { Prefix OpPrefix = XD; }
+class TAXS : TA { Prefix OpPrefix = XS; }
 class VEX    { Encoding OpEnc = EncVEX; }
 class VEX_W    { bit HasVEX_W = 1; }
 class VEX_WIG  { bit IgnoresVEX_W = 1; }
diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td
index 94cf7d5e8e00f4..d07af444337fa2 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@@ -972,6 +972,7 @@ def HasPCONFIG   : Predicate<"Subtarget->hasPCONFIG()">;
 def HasENQCMD    : Predicate<"Subtarget->hasENQCMD()">;
 def HasKL        : Predicate<"Subtarget->hasKL()">;
 def HasWIDEKL    : Predicate<"Subtarget->hasWIDEKL()">;
+def HasHRESET    : Predicate<"Subtarget->hasHRESET()">;
 def HasSERIALIZE : Predicate<"Subtarget->hasSERIALIZE()">;
 def HasTSXLDTRK  : Predicate<"Subtarget->hasTSXLDTRK()">;
 def HasAMXTILE   : Predicate<"Subtarget->hasAMXTILE()">;
@@ -2913,6 +2914,13 @@ let SchedRW = [WriteLoad] in {
 def : InstAlias<"clzero\t{%eax|eax}", (CLZERO32r)>, Requires<[Not64BitMode]>;
 def : InstAlias<"clzero\t{%rax|rax}", (CLZERO64r)>, Requires<[In64BitMode]>;
 
+//===----------------------------------------------------------------------===//
+// HRESET Instruction
+//
+let Uses = [EAX], SchedRW = [WriteSystem] in
+  def HRESET : Ii8<0xF0, MRM_C0, (outs), (ins i32u8imm:$imm), "hreset\t$imm", []>,
+                   Requires<[HasHRESET]>, TAXS;
+
 //===----------------------------------------------------------------------===//
 // SERIALIZE Instruction
 //
diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h
index 263be40639db85..7f0c4283f1d9a8 100644
--- a/llvm/lib/Target/X86/X86Subtarget.h
+++ b/llvm/lib/Target/X86/X86Subtarget.h
@@ -401,6 +401,9 @@ class X86Subtarget final : public X86GenSubtargetInfo {
   /// Processor support key locker wide instructions
   bool HasWIDEKL = false;
 
+  /// Processor supports HRESET instruction
+  bool HasHRESET = false;
+
   /// Processor supports SERIALIZE instruction
   bool HasSERIALIZE = false;
 
@@ -736,6 +739,7 @@ class X86Subtarget final : public X86GenSubtargetInfo {
   bool hasENQCMD() const { return HasENQCMD; }
   bool hasKL() const { return HasKL; }
   bool hasWIDEKL() const { return HasWIDEKL; }
+  bool hasHRESET() const { return HasHRESET; }
   bool hasSERIALIZE() const { return HasSERIALIZE; }
   bool hasTSXLDTRK() const { return HasTSXLDTRK; }
   bool useRetpolineIndirectCalls() const { return UseRetpolineIndirectCalls; }
diff --git a/llvm/test/MC/Disassembler/X86/x86-32.txt b/llvm/test/MC/Disassembler/X86/x86-32.txt
index d223a721442234..e1c8f8ffbaefad 100644
--- a/llvm/test/MC/Disassembler/X86/x86-32.txt
+++ b/llvm/test/MC/Disassembler/X86/x86-32.txt
@@ -1000,3 +1000,6 @@
 
 #CHECK: tdcall
 0x66 0x0f 0x01 0xcc
+
+# CHECK: hreset $1
+0xf3 0x0f 0x3a 0xf0 0xc0 0x01
diff --git a/llvm/test/MC/Disassembler/X86/x86-64.txt b/llvm/test/MC/Disassembler/X86/x86-64.txt
index d02cf4c40add50..b56d78297eea26 100644
--- a/llvm/test/MC/Disassembler/X86/x86-64.txt
+++ b/llvm/test/MC/Disassembler/X86/x86-64.txt
@@ -712,3 +712,6 @@
 
 #CHECK: tdcall
 0x66 0x0f 0x01 0xcc
+
+# CHECK: hreset $1
+0xf3 0x0f 0x3a 0xf0 0xc0 0x01
diff --git a/llvm/test/MC/X86/x86-32-coverage.s b/llvm/test/MC/X86/x86-32-coverage.s
index 906d5946313cc5..d215688cdf81ca 100644
--- a/llvm/test/MC/X86/x86-32-coverage.s
+++ b/llvm/test/MC/X86/x86-32-coverage.s
@@ -10891,4 +10891,8 @@ xresldtrk
 
 // CHECK: tdcall
 // CHECK: encoding: [0x66,0x0f,0x01,0xcc]
-tdcall
\ No newline at end of file
+tdcall
+
+// CHECK: hreset
+// CHECK: encoding: [0xf3,0x0f,0x3a,0xf0,0xc0,0x01]
+hreset $1
diff --git a/llvm/test/MC/X86/x86-64.s b/llvm/test/MC/X86/x86-64.s
index eb4ac1d8ef04cb..74777dffab2351 100644
--- a/llvm/test/MC/X86/x86-64.s
+++ b/llvm/test/MC/X86/x86-64.s
@@ -2014,3 +2014,7 @@ seamops
 // CHECK: tdcall
 // CHECK: encoding: [0x66,0x0f,0x01,0xcc]
 tdcall
+
+// CHECK: hreset
+// CHECK: encoding: [0xf3,0x0f,0x3a,0xf0,0xc0,0x01]
+hreset $1

From d938e6e3c24aab46b2d1578012acd11c41ff0d5d Mon Sep 17 00:00:00 2001
From: LLVM GN Syncbot <llvmgnsyncbot@gmail.com>
Date: Tue, 13 Oct 2020 01:07:55 +0000
Subject: [PATCH 061/123] [gn build] Port 412cdcf2edf

---
 llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn
index 111915e0ed5638..5704470e301050 100644
--- a/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn
@@ -136,6 +136,7 @@ copy("Headers") {
     "fmaintrin.h",
     "fxsrintrin.h",
     "gfniintrin.h",
+    "hresetintrin.h",
     "htmintrin.h",
     "htmxlintrin.h",
     "ia32intrin.h",
@@ -204,6 +205,7 @@ copy("Headers") {
     "wasm_simd128.h",
     "wbnoinvdintrin.h",
     "wmmintrin.h",
+    "x86gprintrin.h",
     "x86intrin.h",
     "xmmintrin.h",
     "xopintrin.h",

From 950ae43091121ea357f735790f4042264cf40728 Mon Sep 17 00:00:00 2001
From: Dan Gohman <dev@sunfishcode.online>
Date: Wed, 30 Sep 2020 20:00:04 -0700
Subject: [PATCH 062/123] [WebAssembly] GC constructor functions in otherwise
 unused archive objects

This allows `__wasilibc_populate_libpreopen` to be GC'd in more cases
where it isn't needed, including when linked from Rust's libstd.

Differential Revision: https://reviews.llvm.org/D85062
---
 lld/test/wasm/Inputs/ctor-ctor.s           | 15 ++++++
 lld/test/wasm/Inputs/ctor-lib.s            | 14 ++++++
 lld/test/wasm/Inputs/ctor-setup-call-def.s | 21 ++++++++
 lld/test/wasm/Inputs/ctor-setup.s          | 19 +++++++
 lld/test/wasm/Inputs/ctor-start.s          |  7 +++
 lld/test/wasm/ctor-gc-setup.test           | 12 +++++
 lld/test/wasm/ctor-gc.test                 | 12 +++++
 lld/test/wasm/ctor-no-gc.test              | 12 +++++
 lld/wasm/InputFiles.h                      | 17 ++++++-
 lld/wasm/MarkLive.cpp                      | 58 +++++++++++++++-------
 lld/wasm/Symbols.cpp                       |  2 +
 lld/wasm/Writer.cpp                        |  3 +-
 12 files changed, 170 insertions(+), 22 deletions(-)
 create mode 100644 lld/test/wasm/Inputs/ctor-ctor.s
 create mode 100644 lld/test/wasm/Inputs/ctor-lib.s
 create mode 100644 lld/test/wasm/Inputs/ctor-setup-call-def.s
 create mode 100644 lld/test/wasm/Inputs/ctor-setup.s
 create mode 100644 lld/test/wasm/Inputs/ctor-start.s
 create mode 100644 lld/test/wasm/ctor-gc-setup.test
 create mode 100644 lld/test/wasm/ctor-gc.test
 create mode 100644 lld/test/wasm/ctor-no-gc.test

diff --git a/lld/test/wasm/Inputs/ctor-ctor.s b/lld/test/wasm/Inputs/ctor-ctor.s
new file mode 100644
index 00000000000000..cf6218238c5fb1
--- /dev/null
+++ b/lld/test/wasm/Inputs/ctor-ctor.s
@@ -0,0 +1,15 @@
+	.section	.text.def,"",@
+	.globl def
+def:
+	.functype	def () -> ()
+	end_function
+
+	.section	.text.test_ctor,"",@
+	.globl test_ctor
+test_ctor:
+	.functype	test_ctor () -> ()
+	end_function
+
+	.section	.init_array,"",@
+	.p2align	2
+	.int32 test_ctor
diff --git a/lld/test/wasm/Inputs/ctor-lib.s b/lld/test/wasm/Inputs/ctor-lib.s
new file mode 100644
index 00000000000000..67dc0b02ee75cf
--- /dev/null
+++ b/lld/test/wasm/Inputs/ctor-lib.s
@@ -0,0 +1,14 @@
+	.section	.text.lib_func,"",@
+	.globl	lib_func
+lib_func:
+	.functype	lib_func () -> ()
+	end_function
+
+	.section	.text.unused_lib_func,"",@
+	.globl unused_lib_func
+unused_lib_func:
+	.functype	unused_lib_func () -> ()
+	call def
+	end_function
+
+	.functype	def () -> ()
diff --git a/lld/test/wasm/Inputs/ctor-setup-call-def.s b/lld/test/wasm/Inputs/ctor-setup-call-def.s
new file mode 100644
index 00000000000000..b0d09ddceeb7f0
--- /dev/null
+++ b/lld/test/wasm/Inputs/ctor-setup-call-def.s
@@ -0,0 +1,21 @@
+# Like Inputs/ctor-setup.s, except it calls `def` instead of `lib_func`,
+# so it pulls in the .o file containing `ctor`.
+
+	.section	.text._start,"",@
+	.globl	_start
+_start:
+	.functype	_start () -> ()
+	end_function
+
+	.section	.text.setup,"",@
+	.globl setup
+setup:
+	.functype	setup () -> ()
+	call def
+	end_function
+
+	.section	.init_array,"",@
+	.p2align	2
+	.int32 setup
+
+.functype       def () -> ()
diff --git a/lld/test/wasm/Inputs/ctor-setup.s b/lld/test/wasm/Inputs/ctor-setup.s
new file mode 100644
index 00000000000000..814954842b5a47
--- /dev/null
+++ b/lld/test/wasm/Inputs/ctor-setup.s
@@ -0,0 +1,19 @@
+# Like Inputs/ctor-start.s, except it calls `lib_func` from a ctor
+# instead of from `_start`.
+
+	.globl	_start
+_start:
+	.functype	_start () -> ()
+	end_function
+
+	.globl	setup
+setup:
+	.functype	setup () -> ()
+	call	lib_func
+	end_function
+
+	.section	.init_array,"",@
+	.p2align	2
+	.int32	setup
+
+        .functype       lib_func () -> ()
diff --git a/lld/test/wasm/Inputs/ctor-start.s b/lld/test/wasm/Inputs/ctor-start.s
new file mode 100644
index 00000000000000..8f85fd2b2e137c
--- /dev/null
+++ b/lld/test/wasm/Inputs/ctor-start.s
@@ -0,0 +1,7 @@
+	.globl _start
+_start:
+	.functype	_start () -> ()
+	call lib_func
+	end_function
+
+	.functype	lib_func () -> ()
diff --git a/lld/test/wasm/ctor-gc-setup.test b/lld/test/wasm/ctor-gc-setup.test
new file mode 100644
index 00000000000000..2076a42fd58c28
--- /dev/null
+++ b/lld/test/wasm/ctor-gc-setup.test
@@ -0,0 +1,12 @@
+; Like ctor-gc.test, but main object calls a function from its constructor,
+; which shouldn't matter; `ctor` shouldn't be pulled in.
+;
+; RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/ctor-ctor.s -o %t.ctor.o
+; RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/ctor-lib.s -o %t.lib.o
+; RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/ctor-setup.s -o %t.setup.o
+; RUN: rm -f %t.lib.a
+; RUN: llvm-ar rcs %t.lib.a %t.lib.o %t.ctor.o
+; RUN: wasm-ld %t.setup.o %t.lib.a -o %t.wasm
+; RUN: obj2yaml %t.wasm | FileCheck %s
+
+; CHECK-NOT: Name: test_ctor
diff --git a/lld/test/wasm/ctor-gc.test b/lld/test/wasm/ctor-gc.test
new file mode 100644
index 00000000000000..18deab54b4e31d
--- /dev/null
+++ b/lld/test/wasm/ctor-gc.test
@@ -0,0 +1,12 @@
+; Verify that constructors from a .o file which it initially depends on but
+; doesn't ultimately contribute to the final link are not included.
+;
+; RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/ctor-ctor.s -o %t.ctor.o
+; RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/ctor-lib.s -o %t.lib.o
+; RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/ctor-start.s -o %t.start.o
+; RUN: rm -f %t.lib.a
+; RUN: llvm-ar rcs %t.lib.a %t.lib.o %t.ctor.o
+; RUN: wasm-ld %t.start.o %t.lib.a -o %t.wasm
+; RUN: obj2yaml %t.wasm | FileCheck %s
+
+; CHECK-NOT: __wasm_call_ctors
diff --git a/lld/test/wasm/ctor-no-gc.test b/lld/test/wasm/ctor-no-gc.test
new file mode 100644
index 00000000000000..04e3fdcaea3111
--- /dev/null
+++ b/lld/test/wasm/ctor-no-gc.test
@@ -0,0 +1,12 @@
+; Like ctor-gc-setup.test, but it calls a different function, so it does pull
+; in the object containing `ctor`, so `ctor` is linked in.
+;
+; RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/ctor-ctor.s -o %t.ctor.o
+; RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/ctor-lib.s -o %t.lib.o
+; RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/ctor-setup-call-def.s -o %t.setup-call-def.o
+; RUN: rm -f %t.lib.a
+; RUN: llvm-ar rcs %t.lib.a %t.lib.o %t.ctor.o
+; RUN: wasm-ld %t.setup-call-def.o %t.lib.a -o %t.wasm
+; RUN: obj2yaml %t.wasm | FileCheck %s
+
+; CHECK: Name: test_ctor
diff --git a/lld/wasm/InputFiles.h b/lld/wasm/InputFiles.h
index eb3205671af30b..0abd47a0ac20f2 100644
--- a/lld/wasm/InputFiles.h
+++ b/lld/wasm/InputFiles.h
@@ -60,8 +60,14 @@ class InputFile {
 
   MutableArrayRef<Symbol *> getMutableSymbols() { return symbols; }
 
+  // An InputFile is considered live if any of the symbols defined by it
+  // are live.
+  void markLive() { live = true; }
+  bool isLive() const { return live; }
+
 protected:
-  InputFile(Kind k, MemoryBufferRef m) : mb(m), fileKind(k) {}
+  InputFile(Kind k, MemoryBufferRef m)
+      : mb(m), fileKind(k), live(!config->gcSections) {}
   MemoryBufferRef mb;
 
   // List of all symbols referenced or defined by this file.
@@ -69,6 +75,7 @@ class InputFile {
 
 private:
   const Kind fileKind;
+  bool live;
 };
 
 // .a file (ar archive)
@@ -92,6 +99,10 @@ class ObjFile : public InputFile {
   explicit ObjFile(MemoryBufferRef m, StringRef archiveName)
       : InputFile(ObjectKind, m) {
     this->archiveName = std::string(archiveName);
+
+    // If this isn't part of an archive, it's eagerly linked, so mark it live.
+    if (archiveName.empty())
+      markLive();
   }
   static bool classof(const InputFile *f) { return f->kind() == ObjectKind; }
 
@@ -156,6 +167,10 @@ class BitcodeFile : public InputFile {
   explicit BitcodeFile(MemoryBufferRef m, StringRef archiveName)
       : InputFile(BitcodeKind, m) {
     this->archiveName = std::string(archiveName);
+
+    // If this isn't part of an archive, it's eagerly linked, so mark it live.
+    if (archiveName.empty())
+      markLive();
   }
   static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; }
 
diff --git a/lld/wasm/MarkLive.cpp b/lld/wasm/MarkLive.cpp
index 2766eec07ecb3e..4bce688770400f 100644
--- a/lld/wasm/MarkLive.cpp
+++ b/lld/wasm/MarkLive.cpp
@@ -42,6 +42,7 @@ class MarkLive {
 
 private:
   void enqueue(Symbol *sym);
+  void enqueueInitFunctions(const ObjFile *sym);
   void markSymbol(Symbol *sym);
   void mark();
   bool isCallCtorsLive();
@@ -56,11 +57,35 @@ void MarkLive::enqueue(Symbol *sym) {
   if (!sym || sym->isLive())
     return;
   LLVM_DEBUG(dbgs() << "markLive: " << sym->getName() << "\n");
+
+  InputFile *file = sym->getFile();
+  bool needInitFunctions = file && !file->isLive() && sym->isDefined();
+
   sym->markLive();
+
+  // Mark ctor functions in the object that defines this symbol live.
+  // The ctor functions are all referenced by the synthetic callCtors
+  // function. However, this function does not contain relocations so we
+  // have to manually mark the ctors as live.
+  if (needInitFunctions)
+    enqueueInitFunctions(cast<ObjFile>(file));
+
   if (InputChunk *chunk = sym->getChunk())
     queue.push_back(chunk);
 }
 
+// The ctor functions are all referenced by the synthetic callCtors
+// function.  However, this function does not contain relocations so we
+// have to manually mark the ctors as live.
+void MarkLive::enqueueInitFunctions(const ObjFile *obj) {
+  const WasmLinkingData &l = obj->getWasmObj()->linkingData();
+  for (const WasmInitFunc &f : l.InitFunctions) {
+    auto *initSym = obj->getFunctionSymbol(f.Symbol);
+    if (!initSym->isDiscarded())
+      enqueue(initSym);
+  }
+}
+
 void MarkLive::run() {
   // Add GC root symbols.
   if (!config->entry.empty())
@@ -75,31 +100,24 @@ void MarkLive::run() {
   if (Symbol *callDtors = WasmSym::callDtors)
     enqueue(callDtors);
 
-  // The ctor functions are all referenced by the synthetic callCtors
-  // function.  However, this function does not contain relocations so we
-  // have to manually mark the ctors as live.
-  for (const ObjFile *obj : symtab->objectFiles) {
-    const WasmLinkingData &l = obj->getWasmObj()->linkingData();
-    for (const WasmInitFunc &f : l.InitFunctions) {
-      auto *initSym = obj->getFunctionSymbol(f.Symbol);
-      if (!initSym->isDiscarded())
-        enqueue(initSym);
-    }
-  }
-
   // In Emscripten-style PIC, `__wasm_call_ctors` calls `__wasm_apply_relocs`.
   if (config->isPic)
     enqueue(WasmSym::applyRelocs);
 
-  // If we have any non-discarded init functions, mark `__wasm_call_ctors` as
-  // live so that we assign it an index and call it.
-  if (isCallCtorsLive())
-    enqueue(WasmSym::callCtors);
-
   if (config->sharedMemory && !config->shared)
     enqueue(WasmSym::initMemory);
 
+  // Enqueue constructors in objects explicitly live from the command-line.
+  for (const ObjFile *obj : symtab->objectFiles)
+    if (obj->isLive())
+      enqueueInitFunctions(obj);
+
   mark();
+
+  // If we have any non-discarded init functions, mark `__wasm_call_ctors` as
+  // live so that we assign it an index and call it.
+  if (isCallCtorsLive())
+    WasmSym::callCtors->markLive();
 }
 
 void MarkLive::mark() {
@@ -181,9 +199,11 @@ bool MarkLive::isCallCtorsLive() {
   // it can call them.
   for (const ObjFile *file : symtab->objectFiles) {
     const WasmLinkingData &l = file->getWasmObj()->linkingData();
-    for (const WasmInitFunc &f : l.InitFunctions)
-      if (!file->getFunctionSymbol(f.Symbol)->isDiscarded())
+    for (const WasmInitFunc &f : l.InitFunctions) {
+      auto *sym = file->getFunctionSymbol(f.Symbol);
+      if (!sym->isDiscarded() && sym->isLive())
         return true;
+    }
   }
 
   return false;
diff --git a/lld/wasm/Symbols.cpp b/lld/wasm/Symbols.cpp
index d69ef00329c92d..e92af6c0746960 100644
--- a/lld/wasm/Symbols.cpp
+++ b/lld/wasm/Symbols.cpp
@@ -132,6 +132,8 @@ bool Symbol::isLive() const {
 
 void Symbol::markLive() {
   assert(!isDiscarded());
+  if (file != NULL)
+    file->markLive();
   if (auto *g = dyn_cast<DefinedGlobal>(this))
     g->global->live = true;
   if (auto *e = dyn_cast<DefinedEvent>(this))
diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp
index 31618314cf521d..aaa29744c326ff 100644
--- a/lld/wasm/Writer.cpp
+++ b/lld/wasm/Writer.cpp
@@ -1112,9 +1112,8 @@ void Writer::calculateInitFunctions() {
     for (const WasmInitFunc &f : l.InitFunctions) {
       FunctionSymbol *sym = file->getFunctionSymbol(f.Symbol);
       // comdat exclusions can cause init functions be discarded.
-      if (sym->isDiscarded())
+      if (sym->isDiscarded() || !sym->isLive())
         continue;
-      assert(sym->isLive());
       if (sym->signature->Params.size() != 0)
         error("constructor functions cannot take arguments: " + toString(*sym));
       LLVM_DEBUG(dbgs() << "initFunctions: " << toString(*sym) << "\n");

From 84cc39c329f8d4910b8f558971d715f949ce1e94 Mon Sep 17 00:00:00 2001
From: Jez Ng <jezng@fb.com>
Date: Mon, 28 Sep 2020 23:10:34 -0700
Subject: [PATCH 063/123] [llvm-readobj] Don't print out section names for
 STABS symbols

This diff is similar to what D71394 did for `llvm-objdump` -- it avoids
trying to look up a section name for STABS symbols, since some STABS
symbol types (like `N_OSO`) use the `n_sect` field to store other data
instead of a section index.

Differential Revision: https://reviews.llvm.org/D88468
---
 .../llvm-objcopy/MachO/symbol-table.test      |  12 +-
 llvm/test/tools/llvm-readobj/MachO/stabs.yaml | 129 ++++++++++++++++++
 llvm/tools/llvm-readobj/MachODumper.cpp       |  21 ++-
 3 files changed, 149 insertions(+), 13 deletions(-)
 create mode 100644 llvm/test/tools/llvm-readobj/MachO/stabs.yaml

diff --git a/llvm/test/tools/llvm-objcopy/MachO/symbol-table.test b/llvm/test/tools/llvm-objcopy/MachO/symbol-table.test
index 7fec35fde1df0c..133f1744bb9a27 100644
--- a/llvm/test/tools/llvm-objcopy/MachO/symbol-table.test
+++ b/llvm/test/tools/llvm-objcopy/MachO/symbol-table.test
@@ -63,7 +63,7 @@
 # CHECK-NEXT:   Symbol {
 # CHECK-NEXT:     Name: /var/folders/1d/zyfdpp7j2995h5hqspjy28bc0000gn/T/main-c5ac21.o (38)
 # CHECK-NEXT:     Type: SymDebugTable (0x66)
-# CHECK-NEXT:     Section: __got (0x3)
+# CHECK-NEXT:     Section:  (0x3)
 # CHECK-NEXT:     RefType: ReferenceFlagUndefinedLazy (0x1)
 # CHECK-NEXT:     Flags [ (0x0)
 # CHECK-NEXT:     ]
@@ -72,7 +72,7 @@
 # CHECK-NEXT:   Symbol {
 # CHECK-NEXT:     Name:  (207)
 # CHECK-NEXT:     Type: SymDebugTable (0x2E)
-# CHECK-NEXT:     Section: __text (0x1)
+# CHECK-NEXT:     Section:  (0x1)
 # CHECK-NEXT:     RefType: UndefinedNonLazy (0x0)
 # CHECK-NEXT:     Flags [ (0x0)
 # CHECK-NEXT:     ]
@@ -81,7 +81,7 @@
 # CHECK-NEXT:   Symbol {
 # CHECK-NEXT:     Name: _main (101)
 # CHECK-NEXT:     Type: SymDebugTable (0x24)
-# CHECK-NEXT:     Section: __text (0x1)
+# CHECK-NEXT:     Section:  (0x1)
 # CHECK-NEXT:     RefType: UndefinedNonLazy (0x0)
 # CHECK-NEXT:     Flags [ (0x0)
 # CHECK-NEXT:     ]
@@ -99,7 +99,7 @@
 # CHECK-NEXT:   Symbol {
 # CHECK-NEXT:     Name:  (207)
 # CHECK-NEXT:     Type: SymDebugTable (0x4E)
-# CHECK-NEXT:     Section: __text (0x1)
+# CHECK-NEXT:     Section:  (0x1)
 # CHECK-NEXT:     RefType: UndefinedNonLazy (0x0)
 # CHECK-NEXT:     Flags [ (0x0)
 # CHECK-NEXT:     ]
@@ -108,7 +108,7 @@
 # CHECK-NEXT:   Symbol {
 # CHECK-NEXT:     Name: _PrivateSymbol (169)
 # CHECK-NEXT:     Type: SymDebugTable (0x26)
-# CHECK-NEXT:     Section: __bss (0x4)
+# CHECK-NEXT:     Section:  (0x4)
 # CHECK-NEXT:     RefType: UndefinedNonLazy (0x0)
 # CHECK-NEXT:     Flags [ (0x0)
 # CHECK-NEXT:     ]
@@ -135,7 +135,7 @@
 # CHECK-NEXT:   Symbol {
 # CHECK-NEXT:     Name:  (207)
 # CHECK-NEXT:     Type: SymDebugTable (0x64)
-# CHECK-NEXT:     Section: __text (0x1)
+# CHECK-NEXT:     Section:  (0x1)
 # CHECK-NEXT:     RefType: UndefinedNonLazy (0x0)
 # CHECK-NEXT:     Flags [ (0x0)
 # CHECK-NEXT:     ]
diff --git a/llvm/test/tools/llvm-readobj/MachO/stabs.yaml b/llvm/test/tools/llvm-readobj/MachO/stabs.yaml
new file mode 100644
index 00000000000000..2e4b4715eb00f0
--- /dev/null
+++ b/llvm/test/tools/llvm-readobj/MachO/stabs.yaml
@@ -0,0 +1,129 @@
+## Verify that llvm-readobj can dump files containing stabs symbols,
+## particularly symbols of type N_OSO which have repurposed the n_sect field
+## to store a non-section-index value.
+
+# RUN: yaml2obj %s -o %t
+# RUN: llvm-readobj --syms %t | FileCheck %s
+
+# CHECK:      Symbols [
+# CHECK-NEXT:   Symbol {
+# CHECK-NEXT:     Name: /Volumes/Sandbox/ (2)
+# CHECK-NEXT:     Type: SymDebugTable (0x64)
+# CHECK-NEXT:     Section:  (0x0)
+# CHECK-NEXT:     RefType: UndefinedNonLazy (0x0)
+# CHECK-NEXT:     Flags [ (0x0)
+# CHECK-NEXT:     ]
+# CHECK-NEXT:     Value: 0x0
+# CHECK-NEXT:   }
+# CHECK-NEXT:   Symbol {
+# CHECK-NEXT:     Name: /Volumes/Sandbox/test.o (20)
+# CHECK-NEXT:     Type: SymDebugTable (0x66)
+# CHECK-NEXT:     Section:  (0x3)
+# CHECK-NEXT:     RefType: ReferenceFlagUndefinedLazy (0x1)
+# CHECK-NEXT:     Flags [ (0x0)
+# CHECK-NEXT:     ]
+# CHECK-NEXT:     Value: 0x5F72D5E2
+# CHECK-NEXT:   }
+# CHECK-NEXT:   Symbol {
+# CHECK-NEXT:     Name:  (1)
+# CHECK-NEXT:     Type: SymDebugTable (0x2E)
+# CHECK-NEXT:     Section:  (0x1)
+# CHECK-NEXT:     RefType: UndefinedNonLazy (0x0)
+# CHECK-NEXT:     Flags [ (0x0)
+# CHECK-NEXT:     ]
+# CHECK-NEXT:     Value: 0x100000FA0
+# CHECK-NEXT:   }
+# CHECK-NEXT:   Symbol {
+# CHECK-NEXT:     Name: _foo (44)
+# CHECK-NEXT:     Type: SymDebugTable (0x24)
+# CHECK-NEXT:     Section:  (0x1)
+# CHECK-NEXT:     RefType: UndefinedNonLazy (0x0)
+# CHECK-NEXT:     Flags [ (0x0)
+# CHECK-NEXT:     ]
+# CHECK-NEXT:     Value: 0x100000FA0
+# CHECK-NEXT:   }
+# CHECK-NEXT:   Symbol {
+# CHECK-NEXT:     Name:  (1)
+# CHECK-NEXT:     Type: SymDebugTable (0x4E)
+# CHECK-NEXT:     Section:  (0x1)
+# CHECK-NEXT:     RefType: UndefinedNonLazy (0x0)
+# CHECK-NEXT:     Flags [ (0x0)
+# CHECK-NEXT:     ]
+# CHECK-NEXT:     Value: 0xF
+# CHECK-NEXT:   }
+# CHECK-NEXT: ]
+
+--- !mach-o
+FileHeader:
+  magic:           0xFEEDFACF
+  cputype:         0x01000007
+  cpusubtype:      0x00000003
+  filetype:        0x00000002
+  ncmds:           2
+  sizeofcmds:      744
+  flags:           0x00200085
+  reserved:        0x00000000
+LoadCommands:
+  - cmd:             LC_SEGMENT_64
+    cmdsize:         232
+    segname:         __TEXT
+    vmaddr:          4294967296
+    vmsize:          4096
+    fileoff:         0
+    filesize:        4096
+    maxprot:         5
+    initprot:        5
+    nsects:          1
+    flags:           0
+    Sections:
+      - sectname:        __text
+        segname:         __TEXT
+        addr:            0x00000000
+        size:            0
+        offset:          0x00000000
+        align:           4
+        reloff:          0x00000000
+        nreloc:          0
+        flags:           0x80000400
+        reserved1:       0x00000000
+        reserved2:       0x00000000
+        reserved3:       0x00000000
+  - cmd:             LC_SYMTAB
+    cmdsize:         24
+    symoff:          4152
+    nsyms:           5
+    stroff:          4328
+    strsize:         49
+LinkEditData:
+  NameList:
+    - n_strx:          2
+      n_type:          0x64 ## N_SO
+      n_sect:          0
+      n_desc:          0
+      n_value:         0
+    - n_strx:          20
+      n_type:          0x66 ## N_OSO
+      n_sect:          3
+      n_desc:          1
+      n_value:         1601361378
+    - n_strx:          1
+      n_type:          0x2E ## N_BNSYM
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294971296
+    - n_strx:          44
+      n_type:          0x24 ## N_FUN
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294971296
+    - n_strx:          1
+      n_type:          0x4E ## N_ENSYM
+      n_sect:          1
+      n_desc:          0
+      n_value:         15
+  StringTable:
+    - ' '
+    - '/Volumes/Sandbox/'
+    - '/Volumes/Sandbox/test.o'
+    - _foo
+...
diff --git a/llvm/tools/llvm-readobj/MachODumper.cpp b/llvm/tools/llvm-readobj/MachODumper.cpp
index a60fb8cef6a708..5c4960804e8fab 100644
--- a/llvm/tools/llvm-readobj/MachODumper.cpp
+++ b/llvm/tools/llvm-readobj/MachODumper.cpp
@@ -623,13 +623,20 @@ void MachODumper::printSymbol(const SymbolRef &Symbol) {
   getSymbol(Obj, Symbol.getRawDataRefImpl(), MOSymbol);
 
   StringRef SectionName = "";
-  Expected<section_iterator> SecIOrErr = Symbol.getSection();
-  if (!SecIOrErr)
-    reportError(SecIOrErr.takeError(), Obj->getFileName());
-
-  section_iterator SecI = *SecIOrErr;
-  if (SecI != Obj->section_end())
-    SectionName = unwrapOrError(Obj->getFileName(), SecI->getName());
+  // Don't ask a Mach-O STABS symbol for its section unless we know that
+  // STAB symbol's section field refers to a valid section index. Otherwise
+  // the symbol may error trying to load a section that does not exist.
+  // TODO: Add a whitelist of STABS symbol types that contain valid section
+  // indices.
+  if (!(MOSymbol.Type & MachO::N_STAB)) {
+    Expected<section_iterator> SecIOrErr = Symbol.getSection();
+    if (!SecIOrErr)
+      reportError(SecIOrErr.takeError(), Obj->getFileName());
+
+    section_iterator SecI = *SecIOrErr;
+    if (SecI != Obj->section_end())
+      SectionName = unwrapOrError(Obj->getFileName(), SecI->getName());
+  }
 
   DictScope D(W, "Symbol");
   W.printNumber("Name", SymbolName, MOSymbol.StringIndex);

From e2d4174e9c66251d1b408234b53f53d0903c0285 Mon Sep 17 00:00:00 2001
From: Richard Smith <richard@metafoo.co.uk>
Date: Mon, 12 Oct 2020 19:17:50 -0700
Subject: [PATCH 064/123] Ensure that InheritedAttrs are properly inherited
 along a redeclaration chain for ObjCInterfaceDecls.

Only one such declaration can actually have attributes (the definition,
if any), but generally we assume that we can look for InheritedAttrs on
the most recent declaration.
---
 clang/lib/Sema/SemaDeclObjC.cpp            |  8 ++++++++
 clang/test/CodeGenObjC/attr-availability.m | 11 +++++++++++
 2 files changed, 19 insertions(+)

diff --git a/clang/lib/Sema/SemaDeclObjC.cpp b/clang/lib/Sema/SemaDeclObjC.cpp
index 6ef6fd1d8c1cf2..733a50e7da5a79 100644
--- a/clang/lib/Sema/SemaDeclObjC.cpp
+++ b/clang/lib/Sema/SemaDeclObjC.cpp
@@ -1066,6 +1066,11 @@ Decl *Sema::ActOnStartClassInterface(
 
   ProcessDeclAttributeList(TUScope, IDecl, AttrList);
   AddPragmaAttributes(TUScope, IDecl);
+
+  // Merge attributes from previous declarations.
+  if (PrevIDecl)
+    mergeDeclAttributes(IDecl, PrevIDecl);
+
   PushOnScopeChains(IDecl, TUScope);
 
   // Start the definition of this class. If we're in a redefinition case, there
@@ -3125,6 +3130,9 @@ Sema::ActOnForwardClassDeclaration(SourceLocation AtClassLoc,
                                   IdentLocs[i]);
     IDecl->setAtEndRange(IdentLocs[i]);
 
+    if (PrevIDecl)
+      mergeDeclAttributes(IDecl, PrevIDecl);
+
     PushOnScopeChains(IDecl, TUScope);
     CheckObjCDeclScope(IDecl);
     DeclsInGroup.push_back(IDecl);
diff --git a/clang/test/CodeGenObjC/attr-availability.m b/clang/test/CodeGenObjC/attr-availability.m
index 375a5be4fadd9a..9ed7678eafb9a3 100644
--- a/clang/test/CodeGenObjC/attr-availability.m
+++ b/clang/test/CodeGenObjC/attr-availability.m
@@ -22,3 +22,14 @@ @implementation WeakClass2(MyCategory) @end
 
 @implementation WeakClass2(YourCategory) @end
 
+// CHECK-10_4: @"OBJC_CLASS_$_WeakClass3" = extern_weak global
+// CHECK-10_5: @"OBJC_CLASS_$_WeakClass3" = extern_weak global
+// CHECK-10_6: @"OBJC_CLASS_$_WeakClass3" = external global
+__attribute__((availability(macosx,introduced=10.6)))
+@interface WeakClass3 @end
+@class WeakClass3;
+
+@implementation WeakClass3(MyCategory) @end
+
+@implementation WeakClass3(YourCategory) @end
+

From 913f6005669cfb590c99865a90bc51ed0983d09d Mon Sep 17 00:00:00 2001
From: Richard Smith <richard@metafoo.co.uk>
Date: Wed, 2 Sep 2020 14:42:37 -0700
Subject: [PATCH 065/123] Canonicalize declaration pointers when forming
 APValues.

References to different declarations of the same entity aren't different
values, so shouldn't have different representations.

Recommit of e6393ee813178e9d3306b8e3c6949a4f32f8a2cb, most recently
reverted in 9a33f027ac7d73e14ae287e78ab554142d1cbc8f due to a bug caused
by ObjCInterfaceDecls not propagating availability attributes along
their redeclaration chains; that bug was fixed in
e2d4174e9c66251d1b408234b53f53d0903c0285.
---
 clang/include/clang/AST/APValue.h             |  4 +--
 clang/lib/AST/APValue.cpp                     | 26 +++++++++++++------
 clang/lib/AST/Decl.cpp                        |  8 +++---
 clang/lib/AST/DeclBase.cpp                    |  2 +-
 clang/lib/AST/ExprConstant.cpp                | 18 +++++--------
 clang/lib/CodeGen/CGExprConstant.cpp          |  4 +++
 .../CXX/dcl.dcl/dcl.spec/dcl.constexpr/p9.cpp |  3 +--
 clang/test/CodeGenCXX/weak-external.cpp       | 12 +++++++++
 clang/test/OpenMP/ordered_messages.cpp        |  5 +++-
 9 files changed, 51 insertions(+), 31 deletions(-)

diff --git a/clang/include/clang/AST/APValue.h b/clang/include/clang/AST/APValue.h
index 9e9468645fe783..ac8ed0818af099 100644
--- a/clang/include/clang/AST/APValue.h
+++ b/clang/include/clang/AST/APValue.h
@@ -177,6 +177,7 @@ class APValue {
       return !(LHS == RHS);
     }
     friend llvm::hash_code hash_value(const LValueBase &Base);
+    friend struct llvm::DenseMapInfo<LValueBase>;
 
   private:
     PtrTy Ptr;
@@ -204,8 +205,7 @@ class APValue {
 
   public:
     LValuePathEntry() : Value() {}
-    LValuePathEntry(BaseOrMemberType BaseOrMember)
-        : Value{reinterpret_cast<uintptr_t>(BaseOrMember.getOpaqueValue())} {}
+    LValuePathEntry(BaseOrMemberType BaseOrMember);
     static LValuePathEntry ArrayIndex(uint64_t Index) {
       LValuePathEntry Result;
       Result.Value = Index;
diff --git a/clang/lib/AST/APValue.cpp b/clang/lib/AST/APValue.cpp
index 22145beafd8dd5..7efd0caf3f1d2f 100644
--- a/clang/lib/AST/APValue.cpp
+++ b/clang/lib/AST/APValue.cpp
@@ -38,7 +38,7 @@ static_assert(
     "Type is insufficiently aligned");
 
 APValue::LValueBase::LValueBase(const ValueDecl *P, unsigned I, unsigned V)
-    : Ptr(P), Local{I, V} {}
+    : Ptr(P ? cast<ValueDecl>(P->getCanonicalDecl()) : nullptr), Local{I, V} {}
 APValue::LValueBase::LValueBase(const Expr *P, unsigned I, unsigned V)
     : Ptr(P), Local{I, V} {}
 
@@ -90,13 +90,19 @@ bool operator==(const APValue::LValueBase &LHS,
                 const APValue::LValueBase &RHS) {
   if (LHS.Ptr != RHS.Ptr)
     return false;
-  if (LHS.is<TypeInfoLValue>())
+  if (LHS.is<TypeInfoLValue>() || LHS.is<DynamicAllocLValue>())
     return true;
   return LHS.Local.CallIndex == RHS.Local.CallIndex &&
          LHS.Local.Version == RHS.Local.Version;
 }
 }
 
+APValue::LValuePathEntry::LValuePathEntry(BaseOrMemberType BaseOrMember) {
+  if (const Decl *D = BaseOrMember.getPointer())
+    BaseOrMember.setPointer(D->getCanonicalDecl());
+  Value = reinterpret_cast<uintptr_t>(BaseOrMember.getOpaqueValue());
+}
+
 void APValue::LValuePathEntry::profile(llvm::FoldingSetNodeID &ID) const {
   ID.AddInteger(Value);
 }
@@ -125,14 +131,16 @@ APValue::LValueBase::operator bool () const {
 
 clang::APValue::LValueBase
 llvm::DenseMapInfo<clang::APValue::LValueBase>::getEmptyKey() {
-  return clang::APValue::LValueBase(
-      DenseMapInfo<const ValueDecl*>::getEmptyKey());
+  clang::APValue::LValueBase B;
+  B.Ptr = DenseMapInfo<const ValueDecl*>::getEmptyKey();
+  return B;
 }
 
 clang::APValue::LValueBase
 llvm::DenseMapInfo<clang::APValue::LValueBase>::getTombstoneKey() {
-  return clang::APValue::LValueBase(
-      DenseMapInfo<const ValueDecl*>::getTombstoneKey());
+  clang::APValue::LValueBase B;
+  B.Ptr = DenseMapInfo<const ValueDecl*>::getTombstoneKey();
+  return B;
 }
 
 namespace clang {
@@ -926,8 +934,10 @@ void APValue::MakeMemberPointer(const ValueDecl *Member, bool IsDerivedMember,
   assert(isAbsent() && "Bad state change");
   MemberPointerData *MPD = new ((void*)(char*)Data.buffer) MemberPointerData;
   Kind = MemberPointer;
-  MPD->MemberAndIsDerivedMember.setPointer(Member);
+  MPD->MemberAndIsDerivedMember.setPointer(
+      Member ? cast<ValueDecl>(Member->getCanonicalDecl()) : nullptr);
   MPD->MemberAndIsDerivedMember.setInt(IsDerivedMember);
   MPD->resizePath(Path.size());
-  memcpy(MPD->getPath(), Path.data(), Path.size()*sizeof(const CXXRecordDecl*));
+  for (unsigned I = 0; I != Path.size(); ++I)
+    MPD->getPath()[I] = Path[I]->getCanonicalDecl();
 }
diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp
index 6bb8aa026ad8aa..a6c7f30528eb45 100644
--- a/clang/lib/AST/Decl.cpp
+++ b/clang/lib/AST/Decl.cpp
@@ -4686,11 +4686,9 @@ char *Buffer = new (getASTContext(), 1) char[Name.size() + 1];
 void ValueDecl::anchor() {}
 
 bool ValueDecl::isWeak() const {
-  for (const auto *I : attrs())
-    if (isa<WeakAttr>(I) || isa<WeakRefAttr>(I))
-      return true;
-
-  return isWeakImported();
+  auto *MostRecent = getMostRecentDecl();
+  return MostRecent->hasAttr<WeakAttr>() ||
+         MostRecent->hasAttr<WeakRefAttr>() || isWeakImported();
 }
 
 void ImplicitParamDecl::anchor() {}
diff --git a/clang/lib/AST/DeclBase.cpp b/clang/lib/AST/DeclBase.cpp
index f4314d0bd9614f..ab2b55c0762e74 100644
--- a/clang/lib/AST/DeclBase.cpp
+++ b/clang/lib/AST/DeclBase.cpp
@@ -720,7 +720,7 @@ bool Decl::isWeakImported() const {
   if (!canBeWeakImported(IsDefinition))
     return false;
 
-  for (const auto *A : attrs()) {
+  for (const auto *A : getMostRecentDecl()->attrs()) {
     if (isa<WeakImportAttr>(A))
       return true;
 
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 67ca5271929fd6..639a5733b34b8b 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -1982,18 +1982,11 @@ static bool HasSameBase(const LValue &A, const LValue &B) {
     return false;
 
   if (A.getLValueBase().getOpaqueValue() !=
-      B.getLValueBase().getOpaqueValue()) {
-    const Decl *ADecl = GetLValueBaseDecl(A);
-    if (!ADecl)
-      return false;
-    const Decl *BDecl = GetLValueBaseDecl(B);
-    if (!BDecl || ADecl->getCanonicalDecl() != BDecl->getCanonicalDecl())
-      return false;
-  }
+      B.getLValueBase().getOpaqueValue())
+    return false;
 
-  return IsGlobalLValue(A.getLValueBase()) ||
-         (A.getLValueCallIndex() == B.getLValueCallIndex() &&
-          A.getLValueVersion() == B.getLValueVersion());
+  return A.getLValueCallIndex() == B.getLValueCallIndex() &&
+         A.getLValueVersion() == B.getLValueVersion();
 }
 
 static void NoteLValueLocation(EvalInfo &Info, APValue::LValueBase Base) {
@@ -3163,7 +3156,8 @@ static bool evaluateVarDeclInit(EvalInfo &Info, const Expr *E,
 
   // If we're currently evaluating the initializer of this declaration, use that
   // in-flight value.
-  if (Info.EvaluatingDecl.dyn_cast<const ValueDecl*>() == VD) {
+  if (declaresSameEntity(Info.EvaluatingDecl.dyn_cast<const ValueDecl *>(),
+                         VD)) {
     Result = Info.EvaluatingDeclValue;
     return true;
   }
diff --git a/clang/lib/CodeGen/CGExprConstant.cpp b/clang/lib/CodeGen/CGExprConstant.cpp
index b0a37531dfe132..bff4a0c38af9bf 100644
--- a/clang/lib/CodeGen/CGExprConstant.cpp
+++ b/clang/lib/CodeGen/CGExprConstant.cpp
@@ -1877,6 +1877,10 @@ ConstantLValue
 ConstantLValueEmitter::tryEmitBase(const APValue::LValueBase &base) {
   // Handle values.
   if (const ValueDecl *D = base.dyn_cast<const ValueDecl*>()) {
+    // The constant always points to the canonical declaration. We want to look
+    // at properties of the most recent declaration at the point of emission.
+    D = cast<ValueDecl>(D->getMostRecentDecl());
+
     if (D->hasAttr<WeakRefAttr>())
       return CGM.GetWeakRefReference(D).getPointer();
 
diff --git a/clang/test/CXX/dcl.dcl/dcl.spec/dcl.constexpr/p9.cpp b/clang/test/CXX/dcl.dcl/dcl.spec/dcl.constexpr/p9.cpp
index 8d51dbde71776f..3720b277af7a93 100644
--- a/clang/test/CXX/dcl.dcl/dcl.spec/dcl.constexpr/p9.cpp
+++ b/clang/test/CXX/dcl.dcl/dcl.spec/dcl.constexpr/p9.cpp
@@ -24,11 +24,10 @@ constexpr double &ni3; // expected-error {{declaration of reference variable 'ni
 
 constexpr int nc1 = i; // expected-error {{constexpr variable 'nc1' must be initialized by a constant expression}} expected-note {{read of non-const variable 'i' is not allowed in a constant expression}}
 constexpr C nc2 = C(); // expected-error {{cannot have non-literal type 'const C'}}
-int &f(); // expected-note {{declared here}}
+int &f(); // expected-note 2{{declared here}}
 constexpr int &nc3 = f(); // expected-error {{constexpr variable 'nc3' must be initialized by a constant expression}} expected-note {{non-constexpr function 'f' cannot be used in a constant expression}}
 constexpr int nc4(i); // expected-error {{constexpr variable 'nc4' must be initialized by a constant expression}} expected-note {{read of non-const variable 'i' is not allowed in a constant expression}}
 constexpr C nc5((C())); // expected-error {{cannot have non-literal type 'const C'}}
-int &f(); // expected-note {{here}}
 constexpr int &nc6(f()); // expected-error {{constexpr variable 'nc6' must be initialized by a constant expression}} expected-note {{non-constexpr function 'f'}}
 
 struct pixel {
diff --git a/clang/test/CodeGenCXX/weak-external.cpp b/clang/test/CodeGenCXX/weak-external.cpp
index a2c53a59dcd59c..433fb3c806249f 100644
--- a/clang/test/CodeGenCXX/weak-external.cpp
+++ b/clang/test/CodeGenCXX/weak-external.cpp
@@ -64,3 +64,15 @@ class _LIBCPP_EXCEPTION_ABI runtime_error
 void dummysymbol() {
   throw(std::runtime_error("string"));
 }
+
+namespace not_weak_on_first {
+  int func();
+  // CHECK: {{.*}} extern_weak {{.*}} @_ZN17not_weak_on_first4funcEv(
+  int func() __attribute__ ((weak));
+
+  typedef int (*FuncT)();
+
+  extern const FuncT table[] = {
+      func,
+  };
+}
diff --git a/clang/test/OpenMP/ordered_messages.cpp b/clang/test/OpenMP/ordered_messages.cpp
index f6b9dbd6d27faf..8a3a86443eb8c8 100644
--- a/clang/test/OpenMP/ordered_messages.cpp
+++ b/clang/test/OpenMP/ordered_messages.cpp
@@ -16,6 +16,9 @@ void xxx(int argc) {
 }
 
 int foo();
+#if __cplusplus >= 201103L
+// expected-note@-2 {{declared here}}
+#endif
 
 template <class T>
 T foo() {
@@ -176,7 +179,7 @@ T foo() {
 
 int foo() {
 #if __cplusplus >= 201103L
-// expected-note@-2 2 {{declared here}}
+// expected-note@-2 {{declared here}}
 #endif
 int k;
   #pragma omp for ordered

From 61dce0f308e35df1edbd3061af339a3aff8d1f35 Mon Sep 17 00:00:00 2001
From: Eugene Zhulenev <ezhulenev@google.com>
Date: Mon, 12 Oct 2020 14:38:42 -0700
Subject: [PATCH 066/123] [mlir] Add async.await operation to async dialect

Add async.await operation to "unwrap" async.values

Reviewed By: mehdi_amini

Differential Revision: https://reviews.llvm.org/D89137
---
 .../include/mlir/Dialect/Async/IR/AsyncOps.td | 44 ++++++++++++++++-
 mlir/lib/Dialect/Async/IR/Async.cpp           | 49 +++++++++++++++++++
 mlir/test/Dialect/Async/ops.mlir              | 14 ++++++
 mlir/test/Dialect/Async/verify.mlir           | 21 ++++++++
 4 files changed, 127 insertions(+), 1 deletion(-)
 create mode 100644 mlir/test/Dialect/Async/verify.mlir

diff --git a/mlir/include/mlir/Dialect/Async/IR/AsyncOps.td b/mlir/include/mlir/Dialect/Async/IR/AsyncOps.td
index fbdbdb92302fe5..2382253eff1719 100644
--- a/mlir/include/mlir/Dialect/Async/IR/AsyncOps.td
+++ b/mlir/include/mlir/Dialect/Async/IR/AsyncOps.td
@@ -75,7 +75,6 @@ def Async_ExecuteOp : Async_Op<"execute", [AttrSizedOperandSegments]> {
 
   let printer = [{ return ::print(p, *this); }];
   let parser = [{ return ::parse$cppClass(parser, result); }];
-
   let verifier = [{ return ::verify(*this); }];
 }
 
@@ -94,4 +93,47 @@ def Async_YieldOp :
   let verifier = [{ return ::verify(*this); }];
 }
 
+def Async_AwaitOp : Async_Op<"await", [NoSideEffect]> {
+  let summary = "waits for the argument to become ready";
+  let description = [{
+    The `async.await` operation waits until the argument becomes ready, and for
+    the `async.value` arguments it unwraps the underlying value
+
+    Example:
+
+    ```mlir
+    %0 = ... : !async.token
+    async.await %0 : !async.token
+
+    %1 = ... : !async.value<f32>
+    %2 = async.await %1 : !async.value<f32>
+    ```
+  }];
+
+  let arguments = (ins Async_AnyValueOrTokenType:$operand);
+  let results = (outs Optional<AnyType>:$result);
+
+  let skipDefaultBuilders = 1;
+
+  let builders = [
+    OpBuilder<"mlir::OpBuilder &builder, OperationState &result,"
+              "Value operand, ArrayRef<NamedAttribute> attrs = {}">,
+  ];
+
+  let extraClassDeclaration = [{
+    Optional<Type> getResultType() {
+      if (getResultTypes().empty()) return None;
+      return getResultTypes()[0];
+    }
+  }];
+
+  let assemblyFormat = [{
+    attr-dict $operand `:` custom<AwaitResultType>(
+      type($operand), type($result)
+    )
+  }];
+
+  let verifier = [{ return ::verify(*this); }];
+}
+
 #endif // ASYNC_OPS
diff --git a/mlir/lib/Dialect/Async/IR/Async.cpp b/mlir/lib/Dialect/Async/IR/Async.cpp
index eb5e65b5dc9328..754665d8bafc01 100644
--- a/mlir/lib/Dialect/Async/IR/Async.cpp
+++ b/mlir/lib/Dialect/Async/IR/Async.cpp
@@ -250,5 +250,54 @@ static LogicalResult verify(ExecuteOp op) {
   return success();
 }
 
+//===----------------------------------------------------------------------===//
+/// AwaitOp
+//===----------------------------------------------------------------------===//
+
+void AwaitOp::build(OpBuilder &builder, OperationState &result, Value operand,
+                    ArrayRef<NamedAttribute> attrs) {
+  result.addOperands({operand});
+  result.attributes.append(attrs.begin(), attrs.end());
+
+  // Add unwrapped async.value type to the returned values types.
+  if (auto valueType = operand.getType().dyn_cast<ValueType>())
+    result.addTypes(valueType.getValueType());
+}
+
+static ParseResult parseAwaitResultType(OpAsmParser &parser, Type &operandType,
+                                        Type &resultType) {
+  if (parser.parseType(operandType))
+    return failure();
+
+  // Add unwrapped async.value type to the returned values types.
+  if (auto valueType = operandType.dyn_cast<ValueType>())
+    resultType = valueType.getValueType();
+
+  return success();
+}
+
+static void printAwaitResultType(OpAsmPrinter &p, Type operandType,
+                                 Type resultType) {
+  p << operandType;
+}
+
+static LogicalResult verify(AwaitOp op) {
+  Type argType = op.operand().getType();
+
+  // Awaiting on a token does not have any results.
+  if (argType.isa<TokenType>() && !op.getResultTypes().empty())
+    return op.emitOpError("awaiting on a token must have empty result");
+
+  // Awaiting on a value unwraps the async value type.
+  if (auto value = argType.dyn_cast<ValueType>()) {
+    if (*op.getResultType() != value.getValueType())
+      return op.emitOpError()
+             << "result type " << *op.getResultType()
+             << " does not match async value type " << value.getValueType();
+  }
+
+  return success();
+}
+
 #define GET_OP_CLASSES
 #include "mlir/Dialect/Async/IR/AsyncOps.cpp.inc"
diff --git a/mlir/test/Dialect/Async/ops.mlir b/mlir/test/Dialect/Async/ops.mlir
index 371cea7e4c0619..8784b6f05a08e5 100644
--- a/mlir/test/Dialect/Async/ops.mlir
+++ b/mlir/test/Dialect/Async/ops.mlir
@@ -106,3 +106,17 @@ func @empty_tokens_or_values_operands() {
   %token4 = async.execute [] { async.yield }
   return
 }
+
+// CHECK-LABEL: @await_token
+func @await_token(%arg0: !async.token) {
+  // CHECK: async.await %arg0
+  async.await %arg0 : !async.token
+  return
+}
+
+// CHECK-LABEL: @await_value
+func @await_value(%arg0: !async.value<f32>) -> f32 {
+  // CHECK: async.await %arg0
+  %0 = async.await %arg0 : !async.value<f32>
+  return %0 : f32
+}
diff --git a/mlir/test/Dialect/Async/verify.mlir b/mlir/test/Dialect/Async/verify.mlir
new file mode 100644
index 00000000000000..9d6c43cbcf4920
--- /dev/null
+++ b/mlir/test/Dialect/Async/verify.mlir
@@ -0,0 +1,21 @@
+// RUN: mlir-opt %s -split-input-file -verify-diagnostics | FileCheck %s
+
+// FileCheck test must have at least one CHECK statement.
+// CHECK-LABEL: @no_op
+func @no_op(%arg0: !async.token) {
+  return
+}
+
+// -----
+
+func @wrong_async_await_arg_type(%arg0: f32) {
+  // expected-error @+1 {{'async.await' op operand #0 must be async value type or token type, but got 'f32'}}
+  async.await %arg0 : f32
+}
+
+// -----
+
+func @wrong_async_await_result_type(%arg0: !async.value<f32>) {
+  // expected-error @+1 {{'async.await' op result type 'f64' does not match async value type 'f32'}}
+  %0 = "async.await"(%arg0): (!async.value<f32>) -> f64
+}

From b3b4cda104068e92b77f18c4e3fc0e0b8f3650e0 Mon Sep 17 00:00:00 2001
From: Sam Clegg <sbc@chromium.org>
Date: Mon, 12 Oct 2020 20:45:20 -0700
Subject: [PATCH 067/123] [lld][WebAssembly] Don't GC library objects under
 `--whole-archive`

Followup on https://reviews.llvm.org/D85062 which ignores
entire library objects when no symbols are used within them.
This is shouldn't apply with `--whole-archive` since this
is specified to treat them like direct object inputs.

Differential Revision: https://reviews.llvm.org/D89290
---
 lld/test/wasm/ctor-gc.test |  4 ++++
 lld/wasm/Driver.cpp        | 11 +++++++++--
 lld/wasm/InputFiles.cpp    |  3 +--
 3 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/lld/test/wasm/ctor-gc.test b/lld/test/wasm/ctor-gc.test
index 18deab54b4e31d..a0dcd375229d07 100644
--- a/lld/test/wasm/ctor-gc.test
+++ b/lld/test/wasm/ctor-gc.test
@@ -7,6 +7,10 @@
 ; RUN: rm -f %t.lib.a
 ; RUN: llvm-ar rcs %t.lib.a %t.lib.o %t.ctor.o
 ; RUN: wasm-ld %t.start.o %t.lib.a -o %t.wasm
+; RUN: wasm-ld %t.start.o --whole-archive %t.lib.a -o %t2.wasm
 ; RUN: obj2yaml %t.wasm | FileCheck %s
+; RUN: obj2yaml %t2.wasm | FileCheck %s -check-prefix=WHOLEARCHIVE
 
 ; CHECK-NOT: __wasm_call_ctors
+
+; WHOLEARCHIVE: __wasm_call_ctors
diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp
index c00c7eb7522e7d..fb003558d1c9a4 100644
--- a/lld/wasm/Driver.cpp
+++ b/lld/wasm/Driver.cpp
@@ -252,8 +252,15 @@ void LinkerDriver::addFile(StringRef path) {
 
     // Handle -whole-archive.
     if (inWholeArchive) {
-      for (MemoryBufferRef &m : getArchiveMembers(mbref))
-        files.push_back(createObjectFile(m, path));
+      for (MemoryBufferRef &m : getArchiveMembers(mbref)) {
+        auto *object = createObjectFile(m, path);
+        // Mark object as live; object members are normally not
+        // live by default but -whole-archive is designed to treat
+        // them as such.
+        object->markLive();
+        files.push_back(object);
+      }
+
       return;
     }
 
diff --git a/lld/wasm/InputFiles.cpp b/lld/wasm/InputFiles.cpp
index fbe6888355eaef..d6848211c98392 100644
--- a/lld/wasm/InputFiles.cpp
+++ b/lld/wasm/InputFiles.cpp
@@ -59,8 +59,7 @@ Optional<MemoryBufferRef> readFile(StringRef path) {
   return mbref;
 }
 
-InputFile *createObjectFile(MemoryBufferRef mb,
-                                       StringRef archiveName) {
+InputFile *createObjectFile(MemoryBufferRef mb, StringRef archiveName) {
   file_magic magic = identify_magic(mb.getBuffer());
   if (magic == file_magic::wasm_object) {
     std::unique_ptr<Binary> bin =

From 72c628e83580625ebd9e8521bab03abec4569d14 Mon Sep 17 00:00:00 2001
From: Thomas Lively <tlively@google.com>
Date: Tue, 13 Oct 2020 04:36:59 +0000
Subject: [PATCH 068/123] Reland "[WebAssembly] Emulate v128.const
 efficiently""

This reverts commit 432e4e56d3d2, which reverted 542523a61a21. Two issues from
the original commit have been fixed. First, MSVC does not like when std::array
is initialized with only single braces, so this commit switches to using the
more portable double braces. Second, there was a subtle endianness bug that
prevented the original commit from working correctly on big-endian machines,
which has been fixed by switching to using endianness-agnostic bit twiddling
instead of type punning.

Differential Revision: https://reviews.llvm.org/D88773
---
 .../WebAssembly/WebAssemblyISelLowering.cpp   | 61 ++++++++++++-
 .../CodeGen/WebAssembly/simd-build-vector.ll  | 91 ++++++++++++++++++-
 2 files changed, 144 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 425f8b86c9fbc8..8a3f2f16c24971 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -31,6 +31,7 @@
 #include "llvm/IR/IntrinsicsWebAssembly.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetOptions.h"
 using namespace llvm;
@@ -1565,6 +1566,7 @@ SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
     };
   } else if (NumConstantLanes >= NumSplatLanes &&
              Subtarget->hasUnimplementedSIMD128()) {
+    // If we support v128.const, emit it directly
     SmallVector<SDValue, 16> ConstLanes;
     for (const SDValue &Lane : Op->op_values()) {
       if (IsConstant(Lane)) {
@@ -1576,11 +1578,59 @@ SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
       }
     }
     Result = DAG.getBuildVector(VecT, DL, ConstLanes);
-    IsLaneConstructed = [&](size_t _, const SDValue &Lane) {
+    IsLaneConstructed = [&IsConstant](size_t _, const SDValue &Lane) {
       return IsConstant(Lane);
     };
-  }
-  if (!Result) {
+  } else if (NumConstantLanes >= NumSplatLanes && VecT.isInteger()) {
+    // Otherwise, if this is an integer vector, pack the lane values together so
+    // we can construct the 128-bit constant from a pair of i64s using a splat
+    // followed by at most one i64x2.replace_lane. Also keep track of the lanes
+    // that actually matter so we can avoid the replace_lane in more cases.
+    std::array<uint64_t, 2> I64s{{0, 0}};
+    std::array<uint64_t, 2> ConstLaneMasks{{0, 0}};
+    size_t LaneBits = 128 / Lanes;
+    size_t HalfLanes = Lanes / 2;
+    for (size_t I = 0; I < Lanes; ++I) {
+      const SDValue &Lane = Op.getOperand(I);
+      if (IsConstant(Lane)) {
+        // How much we need to shift Val to position it in an i64
+        auto Shift = LaneBits * (I % HalfLanes);
+        auto Mask = maskTrailingOnes<uint64_t>(LaneBits);
+        auto Val = cast<ConstantSDNode>(Lane.getNode())->getZExtValue() & Mask;
+        I64s[I / HalfLanes] |= Val << Shift;
+        ConstLaneMasks[I / HalfLanes] |= Mask << Shift;
+      }
+    }
+    // Check whether all constant lanes in the second half of the vector are
+    // equivalent in the first half or vice versa to determine whether splatting
+    // either side will be sufficient to materialize the constant. As a special
+    // case, if the first and second halves have no constant lanes in common, we
+    // can just combine them.
+    bool FirstHalfSufficient = (I64s[0] & ConstLaneMasks[1]) == I64s[1];
+    bool SecondHalfSufficient = (I64s[1] & ConstLaneMasks[0]) == I64s[0];
+    bool CombinedSufficient = (ConstLaneMasks[0] & ConstLaneMasks[1]) == 0;
+
+    uint64_t Splatted;
+    if (SecondHalfSufficient) {
+      Splatted = I64s[1];
+    } else if (CombinedSufficient) {
+      Splatted = I64s[0] | I64s[1];
+    } else {
+      Splatted = I64s[0];
+    }
+
+    Result = DAG.getSplatBuildVector(MVT::v2i64, DL,
+                                     DAG.getConstant(Splatted, DL, MVT::i64));
+    if (!FirstHalfSufficient && !SecondHalfSufficient && !CombinedSufficient) {
+      Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i64, Result,
+                           DAG.getConstant(I64s[1], DL, MVT::i64),
+                           DAG.getConstant(1, DL, MVT::i32));
+    }
+    Result = DAG.getBitcast(VecT, Result);
+    IsLaneConstructed = [&IsConstant](size_t _, const SDValue &Lane) {
+      return IsConstant(Lane);
+    };
+  } else {
     // Use a splat, but possibly a load_splat
     LoadSDNode *SplattedLoad;
     if ((SplattedLoad = dyn_cast<LoadSDNode>(SplatValue)) &&
@@ -1593,11 +1643,14 @@ SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
     } else {
       Result = DAG.getSplatBuildVector(VecT, DL, SplatValue);
     }
-    IsLaneConstructed = [&](size_t _, const SDValue &Lane) {
+    IsLaneConstructed = [&SplatValue](size_t _, const SDValue &Lane) {
       return Lane == SplatValue;
     };
   }
 
+  assert(Result);
+  assert(IsLaneConstructed);
+
   // Add replace_lane instructions for any unhandled values
   for (size_t I = 0; I < Lanes; ++I) {
     const SDValue &Lane = Op->getOperand(I);
diff --git a/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll b/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll
index 43cfa97933f84c..4f758878737843 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll
@@ -8,12 +8,95 @@
 target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
 target triple = "wasm32-unknown-unknown"
 
+; CHECK-LABEL:  emulated_const_trivial_splat:
+; CHECK-NEXT:   .functype       emulated_const_trivial_splat () -> (v128)
+; SIMD-VM-NEXT: i64.const       $push0=, 8589934593
+; SIMD-VM-NEXT: i64x2.splat     $push1=, $pop0
+; SIMD-VM-NEXT: return  $pop1
+; UNIMP: v128.const
+define <4 x i32> @emulated_const_trivial_splat() {
+  ret <4 x i32> <i32 1, i32 2, i32 1, i32 2>
+}
+
+; CHECK-LABEL:  emulated_const_first_sufficient:
+; CHECK-NEXT:   .functype       emulated_const_first_sufficient () -> (v128)
+; SIMD-VM-NEXT: i64.const       $push0=, 8589934593
+; SIMD-VM-NEXT: i64x2.splat     $push1=, $pop0
+; SIMD-VM-NEXT: return  $pop1
+; UNIMP: v128.const
+define <4 x i32> @emulated_const_first_sufficient() {
+  ret <4 x i32> <i32 1, i32 2, i32 undef, i32 2>
+}
+
+; CHECK-LABEL:  emulated_const_second_sufficient:
+; CHECK-NEXT:   .functype       emulated_const_second_sufficient () -> (v128)
+; SIMD-VM-NEXT: i64.const       $push0=, 8589934593
+; SIMD-VM-NEXT: i64x2.splat     $push1=, $pop0
+; SIMD-VM-NEXT: return  $pop1
+; UNIMP: v128.const
+define <4 x i32> @emulated_const_second_sufficient() {
+  ret <4 x i32> <i32 1, i32 undef, i32 1, i32 2>
+}
+
+; CHECK-LABEL:  emulated_const_combined_sufficient:
+; CHECK-NEXT:   .functype       emulated_const_combined_sufficient () -> (v128)
+; SIMD-VM-NEXT: i64.const       $push0=, 8589934593
+; SIMD-VM-NEXT: i64x2.splat     $push1=, $pop0
+; SIMD-VM-NEXT: return  $pop1
+; UNIMP: v128.const
+define <4 x i32> @emulated_const_combined_sufficient() {
+  ret <4 x i32> <i32 1, i32 undef, i32 undef, i32 2>
+}
+
+; CHECK-LABEL:  emulated_const_either_sufficient:
+; CHECK-NEXT:   .functype       emulated_const_either_sufficient () -> (v128)
+; SIMD-VM-NEXT: i64.const       $push0=, 1
+; SIMD-VM-NEXT: i64x2.splat     $push1=, $pop0
+; SIMD-VM-NEXT: return  $pop1
+; UNIMP: v128.const
+define <4 x i32> @emulated_const_either_sufficient() {
+  ret <4 x i32> <i32 1, i32 undef, i32 1, i32 undef>
+}
+
+; CHECK-LABEL: emulated_const_neither_sufficient:
+; CHECK-NEXT:   .functype       emulated_const_neither_sufficient () -> (v128)
+; SIMD-VM-NEXT: i64.const       $push0=, 8589934593
+; SIMD-VM-NEXT: i64x2.splat     $push1=, $pop0
+; SIMD-VM-NEXT: i64.const       $push2=, 17179869184
+; SIMD-VM-NEXT: i64x2.replace_lane      $push3=, $pop1, 1, $pop2
+; SIMD-VM-NEXT: return  $pop3
+define <4 x i32> @emulated_const_neither_sufficient() {
+  ret <4 x i32> <i32 1, i32 2, i32 undef, i32 4>
+}
+
+; CHECK-LABEL:  emulated_const_combined_sufficient_large:
+; CHECK-NEXT:   .functype       emulated_const_combined_sufficient_large () -> (v128)
+; SIMD-VM-NEXT: i64.const       $push0=, 506097522914230528
+; SIMD-VM-NEXT: i64x2.splat     $push1=, $pop0
+; SIMD-VM-NEXT: return  $pop1
+define <16 x i8> @emulated_const_combined_sufficient_large() {
+  ret <16 x i8> <i8 0, i8 undef, i8 2, i8 undef, i8 4, i8 undef, i8 6, i8 undef,
+                 i8 undef, i8 1, i8 undef, i8 3, i8 undef, i8 5, i8 undef, i8 7>
+}
+
+; CHECK-LABEL: emulated_const_neither_sufficient_large:
+; CHECK-NEXT:   .functype       emulated_const_neither_sufficient_large () -> (v128)
+; SIMD-VM-NEXT: i64.const       $push0=, -70368726997663744
+; SIMD-VM-NEXT: i64x2.splat     $push1=, $pop0
+; SIMD-VM-NEXT: i64.const       $push2=, 504408655873966336
+; SIMD-VM-NEXT: i64x2.replace_lane      $push3=, $pop1, 1, $pop2
+; SIMD-VM-NEXT: return  $pop3
+define <16 x i8> @emulated_const_neither_sufficient_large() {
+  ret <16 x i8> <i8 0, i8 undef, i8 2, i8 undef, i8 4, i8 undef, i8 6, i8 255,
+                 i8 undef, i8 1, i8 undef, i8 3, i8 undef, i8 5, i8 undef, i8 7>
+}
+
 ; CHECK-LABEL: same_const_one_replaced_i16x8:
 ; CHECK-NEXT:  .functype       same_const_one_replaced_i16x8 (i32) -> (v128)
 ; UNIMP-NEXT:  v128.const      $push[[L0:[0-9]+]]=, 42, 42, 42, 42, 42, 0, 42, 42
 ; UNIMP-NEXT:  i16x8.replace_lane      $push[[L1:[0-9]+]]=, $pop[[L0]], 5, $0
 ; UNIMP-NEXT:  return          $pop[[L1]]
-; SIMD-VM: i16x8.splat
+; SIMD-VM: i64x2.splat
 define <8 x i16> @same_const_one_replaced_i16x8(i16 %x) {
   %v = insertelement
     <8 x i16> <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>,
@@ -27,7 +110,7 @@ define <8 x i16> @same_const_one_replaced_i16x8(i16 %x) {
 ; UNIMP-NEXT:  v128.const      $push[[L0:[0-9]+]]=, 1, -2, 3, -4, 5, 0, 7, -8
 ; UNIMP-NEXT:  i16x8.replace_lane      $push[[L1:[0-9]+]]=, $pop[[L0]], 5, $0
 ; UNIMP-NEXT:  return          $pop[[L1]]
-; SIMD-VM: i16x8.splat
+; SIMD-VM: i64x2.splat
 define <8 x i16> @different_const_one_replaced_i16x8(i16 %x) {
   %v = insertelement
     <8 x i16> <i16 1, i16 -2, i16 3, i16 -4, i16 5, i16 -6, i16 7, i16 -8>,
@@ -68,7 +151,7 @@ define <4 x float> @different_const_one_replaced_f32x4(float %x) {
 ; CHECK-NEXT:  .functype       splat_common_const_i32x4 () -> (v128)
 ; UNIMP-NEXT:  v128.const      $push[[L0:[0-9]+]]=, 0, 3, 3, 1
 ; UNIMP-NEXT:  return          $pop[[L0]]
-; SIMD-VM: i32x4.splat
+; SIMD-VM: i64x2.splat
 define <4 x i32> @splat_common_const_i32x4() {
   ret <4 x i32> <i32 undef, i32 3, i32 3, i32 1>
 }
@@ -206,7 +289,7 @@ define <16 x i8> @mashup_swizzle_i8x16(<16 x i8> %src, <16 x i8> %mask, i8 %spla
 ; UNIMP:       i8x16.replace_lane
 ; UNIMP:       i8x16.replace_lane
 ; UNIMP:       return
-; SIMD-VM: i8x16.splat
+; SIMD-VM: i64x2.splat
 define <16 x i8> @mashup_const_i8x16(<16 x i8> %src, <16 x i8> %mask, i8 %splatted) {
   ; swizzle 0
   %m0 = extractelement <16 x i8> %mask, i32 0

From 1687a8d83b702332c4aae4f2a95d27c16688418d Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Mon, 12 Oct 2020 23:18:22 -0700
Subject: [PATCH 069/123] [X86][SelectionDAG] Add SADDO_CARRY and SSUBO_CARRY
 to support multipart signed add/sub overflow legalization.

This passes existing X86 test but I'm not sure if it handles all type
legalization cases it needs to.

Alternative to D89200

Reviewed By: efriedma

Differential Revision: https://reviews.llvm.org/D89222
---
 llvm/include/llvm/CodeGen/ISDOpcodes.h        |  10 +
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  41 +++
 .../SelectionDAG/LegalizeIntegerTypes.cpp     | 116 ++++++--
 llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h |   2 +
 .../SelectionDAG/SelectionDAGDumper.cpp       |   2 +
 llvm/lib/CodeGen/TargetLoweringBase.cpp       |   2 +
 llvm/lib/Target/X86/X86ISelLowering.cpp       |  16 +-
 llvm/test/CodeGen/X86/known-bits.ll           |  40 +--
 llvm/test/CodeGen/X86/sadd_sat.ll             |  37 +--
 llvm/test/CodeGen/X86/sadd_sat_plus.ll        |  37 +--
 llvm/test/CodeGen/X86/sadd_sat_vec.ll         | 150 ++++------
 llvm/test/CodeGen/X86/ssub_sat.ll             |  37 +--
 llvm/test/CodeGen/X86/ssub_sat_plus.ll        |  37 +--
 llvm/test/CodeGen/X86/ssub_sat_vec.ll         | 150 ++++------
 llvm/test/CodeGen/X86/vec_saddo.ll            | 262 ++++--------------
 llvm/test/CodeGen/X86/vec_ssubo.ll            | 262 ++++--------------
 llvm/test/CodeGen/X86/xaluo128.ll             |  80 ++----
 17 files changed, 455 insertions(+), 826 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index dbe1f0897c13b2..922cd16ee56f11 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -283,6 +283,16 @@ enum NodeType {
   ADDCARRY,
   SUBCARRY,
 
+  /// Carry-using overflow-aware nodes for multiple precision addition and
+  /// subtraction. These nodes take three operands: The first two are normal lhs
+  /// and rhs to the add or sub, and the third is a boolean indicating if there
+  /// is an incoming carry. They produce two results: the normal result of the
+  /// add or sub, and a boolean that indicates if an overflow occured (*not*
+  /// flag, because it may be a store to memory, etc.). If the type of the
+  /// boolean is not i1 then the high bits conform to getBooleanContents.
+  SADDO_CARRY,
+  SSUBO_CARRY,
+
   /// RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
   /// These nodes take two operands: the normal LHS and RHS to the add. They
   /// produce two results: the normal result of the add, and a boolean that
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index eab6bc39b1c6a5..c5ccfc2caad7a6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -411,9 +411,11 @@ namespace {
     SDValue visitSUBO(SDNode *N);
     SDValue visitADDE(SDNode *N);
     SDValue visitADDCARRY(SDNode *N);
+    SDValue visitSADDO_CARRY(SDNode *N);
     SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
     SDValue visitSUBE(SDNode *N);
     SDValue visitSUBCARRY(SDNode *N);
+    SDValue visitSSUBO_CARRY(SDNode *N);
     SDValue visitMUL(SDNode *N);
     SDValue visitMULFIX(SDNode *N);
     SDValue useDivRem(SDNode *N);
@@ -1600,8 +1602,10 @@ SDValue DAGCombiner::visit(SDNode *N) {
   case ISD::USUBO:              return visitSUBO(N);
   case ISD::ADDE:               return visitADDE(N);
   case ISD::ADDCARRY:           return visitADDCARRY(N);
+  case ISD::SADDO_CARRY:        return visitSADDO_CARRY(N);
   case ISD::SUBE:               return visitSUBE(N);
   case ISD::SUBCARRY:           return visitSUBCARRY(N);
+  case ISD::SSUBO_CARRY:        return visitSSUBO_CARRY(N);
   case ISD::SMULFIX:
   case ISD::SMULFIXSAT:
   case ISD::UMULFIX:
@@ -2836,6 +2840,28 @@ SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
   return SDValue();
 }
 
+SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  SDValue CarryIn = N->getOperand(2);
+  SDLoc DL(N);
+
+  // canonicalize constant to RHS
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  if (N0C && !N1C)
+    return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
+
+  // fold (saddo_carry x, y, false) -> (saddo x, y)
+  if (isNullConstant(CarryIn)) {
+    if (!LegalOperations ||
+        TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
+      return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
+  }
+
+  return SDValue();
+}
+
 /**
  * If we are facing some sort of diamond carry propapagtion pattern try to
  * break it up to generate something like:
@@ -3517,6 +3543,21 @@ SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
   return SDValue();
 }
 
+SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  SDValue CarryIn = N->getOperand(2);
+
+  // fold (ssubo_carry x, y, false) -> (ssubo x, y)
+  if (isNullConstant(CarryIn)) {
+    if (!LegalOperations ||
+        TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
+      return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
+  }
+
+  return SDValue();
+}
+
 // Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
 // UMULFIXSAT here.
 SDValue DAGCombiner::visitMULFIX(SDNode *N) {
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 8c2efe89d6f110..b2ff53327d58bb 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -152,6 +152,9 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
   case ISD::ADDCARRY:
   case ISD::SUBCARRY:    Res = PromoteIntRes_ADDSUBCARRY(N, ResNo); break;
 
+  case ISD::SADDO_CARRY:
+  case ISD::SSUBO_CARRY: Res = PromoteIntRes_SADDSUBO_CARRY(N, ResNo); break;
+
   case ISD::SADDSAT:
   case ISD::UADDSAT:
   case ISD::SSUBSAT:
@@ -1288,6 +1291,12 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo) {
   return SDValue(Res.getNode(), 0);
 }
 
+SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO_CARRY(SDNode *N,
+                                                       unsigned ResNo) {
+  assert(ResNo == 1 && "Don't know how to promote other results yet.");
+  return PromoteIntRes_Overflow(N);
+}
+
 SDValue DAGTypeLegalizer::PromoteIntRes_ABS(SDNode *N) {
   SDValue Op0 = SExtPromotedInteger(N->getOperand(0));
   return DAG.getNode(ISD::ABS, SDLoc(N), Op0.getValueType(), Op0);
@@ -1470,6 +1479,8 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
   case ISD::ROTL:
   case ISD::ROTR: Res = PromoteIntOp_Shift(N); break;
 
+  case ISD::SADDO_CARRY:
+  case ISD::SSUBO_CARRY:
   case ISD::ADDCARRY:
   case ISD::SUBCARRY: Res = PromoteIntOp_ADDSUBCARRY(N, OpNo); break;
 
@@ -2087,6 +2098,9 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
   case ISD::ADDCARRY:
   case ISD::SUBCARRY: ExpandIntRes_ADDSUBCARRY(N, Lo, Hi); break;
 
+  case ISD::SADDO_CARRY:
+  case ISD::SSUBO_CARRY: ExpandIntRes_SADDSUBO_CARRY(N, Lo, Hi); break;
+
   case ISD::SHL:
   case ISD::SRA:
   case ISD::SRL: ExpandIntRes_Shift(N, Lo, Hi); break;
@@ -2710,6 +2724,26 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUBCARRY(SDNode *N,
   ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
 }
 
+void DAGTypeLegalizer::ExpandIntRes_SADDSUBO_CARRY(SDNode *N,
+                                                   SDValue &Lo, SDValue &Hi) {
+  // Expand the subcomponents.
+  SDValue LHSL, LHSH, RHSL, RHSH;
+  SDLoc dl(N);
+  GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
+  GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
+  SDVTList VTList = DAG.getVTList(LHSL.getValueType(), N->getValueType(1));
+
+  // We need to use an unsigned carry op for the lo part.
+  unsigned CarryOp = N->getOpcode() == ISD::SADDO_CARRY ? ISD::ADDCARRY
+                                                        : ISD::SUBCARRY;
+  Lo = DAG.getNode(CarryOp, dl, VTList, { LHSL, RHSL, N->getOperand(2) });
+  Hi = DAG.getNode(N->getOpcode(), dl, VTList, { LHSH, RHSH, Lo.getValue(1) });
+
+  // Legalized the flag result - switch anything that used the old flag to
+  // use the new one.
+  ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
+}
+
 void DAGTypeLegalizer::ExpandIntRes_ANY_EXTEND(SDNode *N,
                                                SDValue &Lo, SDValue &Hi) {
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
@@ -3515,40 +3549,66 @@ void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node,
   SDValue RHS = Node->getOperand(1);
   SDLoc dl(Node);
 
-  // Expand the result by simply replacing it with the equivalent
-  // non-overflow-checking operation.
-  SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::SADDO ?
-                            ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
-                            LHS, RHS);
-  SplitInteger(Sum, Lo, Hi);
+  SDValue Ovf;
 
-  // Compute the overflow.
-  //
-  //   LHSSign -> LHS >= 0
-  //   RHSSign -> RHS >= 0
-  //   SumSign -> Sum >= 0
-  //
-  //   Add:
-  //   Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
-  //   Sub:
-  //   Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
-  //
-  EVT OType = Node->getValueType(1);
-  SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
+  unsigned CarryOp;
+  switch(Node->getOpcode()) {
+  default: llvm_unreachable("Node has unexpected Opcode");
+  case ISD::SADDO: CarryOp = ISD::SADDO_CARRY; break;
+  case ISD::SSUBO: CarryOp = ISD::SSUBO_CARRY; break;
+  }
 
-  SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE);
-  SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE);
-  SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign,
-                                    Node->getOpcode() == ISD::SADDO ?
-                                    ISD::SETEQ : ISD::SETNE);
+  bool HasCarryOp = TLI.isOperationLegalOrCustom(
+      CarryOp, TLI.getTypeToExpandTo(*DAG.getContext(), LHS.getValueType()));
 
-  SDValue SumSign = DAG.getSetCC(dl, OType, Sum, Zero, ISD::SETGE);
-  SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE);
+  if (HasCarryOp) {
+    // Expand the subcomponents.
+    SDValue LHSL, LHSH, RHSL, RHSH;
+    GetExpandedInteger(LHS, LHSL, LHSH);
+    GetExpandedInteger(RHS, RHSL, RHSH);
+    SDVTList VTList = DAG.getVTList(LHSL.getValueType(), Node->getValueType(1));
+
+    Lo = DAG.getNode(Node->getOpcode() == ISD::SADDO ?
+                     ISD::UADDO : ISD::USUBO, dl, VTList, { LHSL, RHSL });
+    Hi = DAG.getNode(CarryOp, dl, VTList, { LHSH, RHSH, Lo.getValue(1) });
 
-  SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE);
+    Ovf = Hi.getValue(1);
+  } else {
+    // Expand the result by simply replacing it with the equivalent
+    // non-overflow-checking operation.
+    SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::SADDO ?
+                              ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
+                              LHS, RHS);
+    SplitInteger(Sum, Lo, Hi);
+
+    // Compute the overflow.
+    //
+    //   LHSSign -> LHS >= 0
+    //   RHSSign -> RHS >= 0
+    //   SumSign -> Sum >= 0
+    //
+    //   Add:
+    //   Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
+    //   Sub:
+    //   Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
+    //
+    EVT OType = Node->getValueType(1);
+    SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
+
+    SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE);
+    SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE);
+    SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign,
+                                      Node->getOpcode() == ISD::SADDO ?
+                                      ISD::SETEQ : ISD::SETNE);
+
+    SDValue SumSign = DAG.getSetCC(dl, OType, Sum, Zero, ISD::SETGE);
+    SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE);
+
+    Ovf = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE);
+  }
 
   // Use the calculated overflow everywhere.
-  ReplaceValueWith(SDValue(Node, 1), Cmp);
+  ReplaceValueWith(SDValue(Node, 1), Ovf);
 }
 
 void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N,
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 4264f8f650c015..cec91c5bbb5b6d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -337,6 +337,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   SDValue PromoteIntRes_TRUNCATE(SDNode *N);
   SDValue PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo);
   SDValue PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo);
+  SDValue PromoteIntRes_SADDSUBO_CARRY(SDNode *N, unsigned ResNo);
   SDValue PromoteIntRes_UNDEF(SDNode *N);
   SDValue PromoteIntRes_VAARG(SDNode *N);
   SDValue PromoteIntRes_VSCALE(SDNode *N);
@@ -429,6 +430,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   void ExpandIntRes_ADDSUBC           (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_ADDSUBE           (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_ADDSUBCARRY       (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_SADDSUBO_CARRY    (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_BITREVERSE        (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_BSWAP             (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_PARITY            (SDNode *N, SDValue &Lo, SDValue &Hi);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 1587398ea02297..43be03e9332f41 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -293,6 +293,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::ADDC:                       return "addc";
   case ISD::ADDE:                       return "adde";
   case ISD::ADDCARRY:                   return "addcarry";
+  case ISD::SADDO_CARRY:                return "saddo_carry";
   case ISD::SADDO:                      return "saddo";
   case ISD::UADDO:                      return "uaddo";
   case ISD::SSUBO:                      return "ssubo";
@@ -302,6 +303,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::SUBC:                       return "subc";
   case ISD::SUBE:                       return "sube";
   case ISD::SUBCARRY:                   return "subcarry";
+  case ISD::SSUBO_CARRY:                return "ssubo_carry";
   case ISD::SHL_PARTS:                  return "shl_parts";
   case ISD::SRA_PARTS:                  return "sra_parts";
   case ISD::SRL_PARTS:                  return "srl_parts";
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 84ff390126a14e..f0734161e04f98 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -680,6 +680,8 @@ void TargetLoweringBase::initActions() {
     setOperationAction(ISD::ADDCARRY, VT, Expand);
     setOperationAction(ISD::SUBCARRY, VT, Expand);
     setOperationAction(ISD::SETCCCARRY, VT, Expand);
+    setOperationAction(ISD::SADDO_CARRY, VT, Expand);
+    setOperationAction(ISD::SSUBO_CARRY, VT, Expand);
 
     // ADDC/ADDE/SUBC/SUBE default to expand.
     setOperationAction(ISD::ADDC, VT, Expand);
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index be77233a60f818..757d86fe4b1f0f 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1914,6 +1914,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::ADDCARRY, VT, Custom);
     setOperationAction(ISD::SUBCARRY, VT, Custom);
     setOperationAction(ISD::SETCCCARRY, VT, Custom);
+    setOperationAction(ISD::SADDO_CARRY, VT, Custom);
+    setOperationAction(ISD::SSUBO_CARRY, VT, Custom);
   }
 
   if (!Subtarget.is64Bit()) {
@@ -29241,6 +29243,7 @@ static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG,
 static SDValue LowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG) {
   SDNode *N = Op.getNode();
   MVT VT = N->getSimpleValueType(0);
+  unsigned Opc = Op.getOpcode();
 
   // Let legalize expand this if it isn't a legal type yet.
   if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
@@ -29255,11 +29258,14 @@ static SDValue LowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG) {
   Carry = DAG.getNode(X86ISD::ADD, DL, DAG.getVTList(CarryVT, MVT::i32),
                       Carry, DAG.getAllOnesConstant(DL, CarryVT));
 
-  unsigned Opc = Op.getOpcode() == ISD::ADDCARRY ? X86ISD::ADC : X86ISD::SBB;
-  SDValue Sum = DAG.getNode(Opc, DL, VTs, Op.getOperand(0),
-                            Op.getOperand(1), Carry.getValue(1));
+  bool IsAdd = Opc == ISD::ADDCARRY || Opc == ISD::SADDO_CARRY;
+  SDValue Sum = DAG.getNode(IsAdd ? X86ISD::ADC : X86ISD::SBB, DL, VTs,
+                            Op.getOperand(0), Op.getOperand(1),
+                            Carry.getValue(1));
 
-  SDValue SetCC = getSETCC(X86::COND_B, Sum.getValue(1), DL, DAG);
+  bool IsSigned = Opc == ISD::SADDO_CARRY || Opc == ISD::SSUBO_CARRY;
+  SDValue SetCC = getSETCC(IsSigned ? X86::COND_O : X86::COND_B,
+                           Sum.getValue(1), DL, DAG);
   if (N->getValueType(1) == MVT::i1)
     SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
 
@@ -29784,6 +29790,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::UMULO:              return LowerXALUO(Op, DAG);
   case ISD::READCYCLECOUNTER:   return LowerREADCYCLECOUNTER(Op, Subtarget,DAG);
   case ISD::BITCAST:            return LowerBITCAST(Op, Subtarget, DAG);
+  case ISD::SADDO_CARRY:
+  case ISD::SSUBO_CARRY:
   case ISD::ADDCARRY:
   case ISD::SUBCARRY:           return LowerADDSUBCARRY(Op, DAG);
   case ISD::ADD:
diff --git a/llvm/test/CodeGen/X86/known-bits.ll b/llvm/test/CodeGen/X86/known-bits.ll
index 9a3e1c52bee939..b6e8794e37e348 100644
--- a/llvm/test/CodeGen/X86/known-bits.ll
+++ b/llvm/test/CodeGen/X86/known-bits.ll
@@ -139,26 +139,12 @@ define i128 @knownbits_mask_addc_shl(i64 %a0, i64 %a1, i64 %a2) nounwind {
 define {i32, i1} @knownbits_uaddo_saddo(i64 %a0, i64 %a1) nounwind {
 ; X32-LABEL: knownbits_uaddo_saddo:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushl %ebx
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    movl %ecx, %edx
-; X32-NEXT:    addl %eax, %edx
-; X32-NEXT:    setb %bl
-; X32-NEXT:    testl %eax, %eax
-; X32-NEXT:    setns %al
-; X32-NEXT:    testl %ecx, %ecx
-; X32-NEXT:    setns %cl
-; X32-NEXT:    cmpb %al, %cl
-; X32-NEXT:    sete %al
-; X32-NEXT:    testl %edx, %edx
-; X32-NEXT:    setns %dl
-; X32-NEXT:    cmpb %dl, %cl
-; X32-NEXT:    setne %dl
-; X32-NEXT:    andb %al, %dl
-; X32-NEXT:    orb %bl, %dl
+; X32-NEXT:    addl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    setb %al
+; X32-NEXT:    seto %dl
+; X32-NEXT:    orb %al, %dl
 ; X32-NEXT:    xorl %eax, %eax
-; X32-NEXT:    popl %ebx
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: knownbits_uaddo_saddo:
@@ -191,20 +177,10 @@ define {i32, i1} @knownbits_usubo_ssubo(i64 %a0, i64 %a1) nounwind {
 ; X32-LABEL: knownbits_usubo_ssubo:
 ; X32:       # %bb.0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    cmpl %eax, %ecx
-; X32-NEXT:    setb %dh
-; X32-NEXT:    setns %dl
-; X32-NEXT:    testl %ecx, %ecx
-; X32-NEXT:    setns %cl
-; X32-NEXT:    cmpb %dl, %cl
-; X32-NEXT:    setne %ch
-; X32-NEXT:    testl %eax, %eax
-; X32-NEXT:    setns %al
-; X32-NEXT:    cmpb %al, %cl
-; X32-NEXT:    setne %dl
-; X32-NEXT:    andb %ch, %dl
-; X32-NEXT:    orb %dh, %dl
+; X32-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    setb %al
+; X32-NEXT:    seto %dl
+; X32-NEXT:    orb %al, %dl
 ; X32-NEXT:    xorl %eax, %eax
 ; X32-NEXT:    retl
 ;
diff --git a/llvm/test/CodeGen/X86/sadd_sat.ll b/llvm/test/CodeGen/X86/sadd_sat.ll
index e0bf68a6a3fa17..8f079d97db300c 100644
--- a/llvm/test/CodeGen/X86/sadd_sat.ll
+++ b/llvm/test/CodeGen/X86/sadd_sat.ll
@@ -42,38 +42,25 @@ define i32 @func(i32 %x, i32 %y) nounwind {
 define i64 @func2(i64 %x, i64 %y) nounwind {
 ; X86-LABEL: func2:
 ; X86:       # %bb.0:
-; X86-NEXT:    pushl %ebp
 ; X86-NEXT:    pushl %ebx
-; X86-NEXT:    pushl %edi
 ; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT:    addl {{[0-9]+}}(%esp), %edi
-; X86-NEXT:    movl %ebx, %ebp
-; X86-NEXT:    adcl %esi, %ebp
-; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    addl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    adcl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    seto %bl
+; X86-NEXT:    movl %esi, %eax
 ; X86-NEXT:    sarl $31, %eax
-; X86-NEXT:    xorl %ecx, %ecx
-; X86-NEXT:    testl %ebp, %ebp
-; X86-NEXT:    setns %cl
-; X86-NEXT:    movl %ecx, %edx
-; X86-NEXT:    addl $2147483647, %edx # imm = 0x7FFFFFFF
-; X86-NEXT:    testl %ebx, %ebx
-; X86-NEXT:    setns %bl
-; X86-NEXT:    cmpb %cl, %bl
-; X86-NEXT:    setne %cl
+; X86-NEXT:    testb %bl, %bl
+; X86-NEXT:    cmovel %ecx, %eax
+; X86-NEXT:    xorl %edx, %edx
 ; X86-NEXT:    testl %esi, %esi
-; X86-NEXT:    setns %ch
-; X86-NEXT:    cmpb %ch, %bl
-; X86-NEXT:    sete %ch
-; X86-NEXT:    testb %cl, %ch
-; X86-NEXT:    cmovel %ebp, %edx
-; X86-NEXT:    cmovel %edi, %eax
+; X86-NEXT:    setns %dl
+; X86-NEXT:    addl $2147483647, %edx # imm = 0x7FFFFFFF
+; X86-NEXT:    testb %bl, %bl
+; X86-NEXT:    cmovel %esi, %edx
 ; X86-NEXT:    popl %esi
-; X86-NEXT:    popl %edi
 ; X86-NEXT:    popl %ebx
-; X86-NEXT:    popl %ebp
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: func2:
diff --git a/llvm/test/CodeGen/X86/sadd_sat_plus.ll b/llvm/test/CodeGen/X86/sadd_sat_plus.ll
index 2c902be8097500..3075e92b2fb17c 100644
--- a/llvm/test/CodeGen/X86/sadd_sat_plus.ll
+++ b/llvm/test/CodeGen/X86/sadd_sat_plus.ll
@@ -44,38 +44,25 @@ define i32 @func32(i32 %x, i32 %y, i32 %z) nounwind {
 define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind {
 ; X86-LABEL: func64:
 ; X86:       # %bb.0:
-; X86-NEXT:    pushl %ebp
 ; X86-NEXT:    pushl %ebx
-; X86-NEXT:    pushl %edi
 ; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT:    addl {{[0-9]+}}(%esp), %edi
-; X86-NEXT:    movl %ebx, %ebp
-; X86-NEXT:    adcl %esi, %ebp
-; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    addl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    adcl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    seto %bl
+; X86-NEXT:    movl %esi, %eax
 ; X86-NEXT:    sarl $31, %eax
-; X86-NEXT:    xorl %ecx, %ecx
-; X86-NEXT:    testl %ebp, %ebp
-; X86-NEXT:    setns %cl
-; X86-NEXT:    movl %ecx, %edx
-; X86-NEXT:    addl $2147483647, %edx # imm = 0x7FFFFFFF
-; X86-NEXT:    testl %ebx, %ebx
-; X86-NEXT:    setns %bl
-; X86-NEXT:    cmpb %cl, %bl
-; X86-NEXT:    setne %cl
+; X86-NEXT:    testb %bl, %bl
+; X86-NEXT:    cmovel %ecx, %eax
+; X86-NEXT:    xorl %edx, %edx
 ; X86-NEXT:    testl %esi, %esi
-; X86-NEXT:    setns %ch
-; X86-NEXT:    cmpb %ch, %bl
-; X86-NEXT:    sete %ch
-; X86-NEXT:    testb %cl, %ch
-; X86-NEXT:    cmovel %ebp, %edx
-; X86-NEXT:    cmovel %edi, %eax
+; X86-NEXT:    setns %dl
+; X86-NEXT:    addl $2147483647, %edx # imm = 0x7FFFFFFF
+; X86-NEXT:    testb %bl, %bl
+; X86-NEXT:    cmovel %esi, %edx
 ; X86-NEXT:    popl %esi
-; X86-NEXT:    popl %edi
 ; X86-NEXT:    popl %ebx
-; X86-NEXT:    popl %ebp
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: func64:
diff --git a/llvm/test/CodeGen/X86/sadd_sat_vec.ll b/llvm/test/CodeGen/X86/sadd_sat_vec.ll
index f5fbd3915c1250..d5fdfe9b4aed64 100644
--- a/llvm/test/CodeGen/X86/sadd_sat_vec.ll
+++ b/llvm/test/CodeGen/X86/sadd_sat_vec.ll
@@ -1940,124 +1940,78 @@ define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind {
 define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
 ; SSE-LABEL: v2i128:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    pushq %r15
-; SSE-NEXT:    pushq %r14
-; SSE-NEXT:    pushq %r13
-; SSE-NEXT:    pushq %r12
 ; SSE-NEXT:    pushq %rbx
 ; SSE-NEXT:    movq %rdi, %rax
-; SSE-NEXT:    movq {{[0-9]+}}(%rsp), %r11
-; SSE-NEXT:    movq {{[0-9]+}}(%rsp), %r14
 ; SSE-NEXT:    addq {{[0-9]+}}(%rsp), %rcx
-; SSE-NEXT:    movq %r8, %r13
-; SSE-NEXT:    adcq %r14, %r13
-; SSE-NEXT:    movq %r13, %r10
-; SSE-NEXT:    sarq $63, %r10
-; SSE-NEXT:    xorl %edi, %edi
-; SSE-NEXT:    testq %r13, %r13
-; SSE-NEXT:    setns %dil
-; SSE-NEXT:    movabsq $9223372036854775807, %r12 # imm = 0x7FFFFFFFFFFFFFFF
-; SSE-NEXT:    leaq (%rdi,%r12), %r15
+; SSE-NEXT:    adcq {{[0-9]+}}(%rsp), %r8
+; SSE-NEXT:    seto %r10b
+; SSE-NEXT:    movq %r8, %rbx
+; SSE-NEXT:    sarq $63, %rbx
+; SSE-NEXT:    testb %r10b, %r10b
+; SSE-NEXT:    cmoveq %rcx, %rbx
+; SSE-NEXT:    xorl %ecx, %ecx
 ; SSE-NEXT:    testq %r8, %r8
-; SSE-NEXT:    setns %r8b
-; SSE-NEXT:    cmpb %dil, %r8b
-; SSE-NEXT:    setne %dil
-; SSE-NEXT:    testq %r14, %r14
-; SSE-NEXT:    setns %bl
-; SSE-NEXT:    cmpb %bl, %r8b
-; SSE-NEXT:    sete %bl
-; SSE-NEXT:    testb %dil, %bl
-; SSE-NEXT:    cmoveq %r13, %r15
-; SSE-NEXT:    cmoveq %rcx, %r10
+; SSE-NEXT:    setns %cl
+; SSE-NEXT:    movabsq $9223372036854775807, %r11 # imm = 0x7FFFFFFFFFFFFFFF
+; SSE-NEXT:    addq %r11, %rcx
+; SSE-NEXT:    testb %r10b, %r10b
+; SSE-NEXT:    cmoveq %r8, %rcx
 ; SSE-NEXT:    addq %r9, %rsi
+; SSE-NEXT:    adcq {{[0-9]+}}(%rsp), %rdx
+; SSE-NEXT:    seto %r8b
 ; SSE-NEXT:    movq %rdx, %rdi
-; SSE-NEXT:    adcq %r11, %rdi
-; SSE-NEXT:    setns %bl
-; SSE-NEXT:    movzbl %bl, %ebx
-; SSE-NEXT:    addq %rbx, %r12
-; SSE-NEXT:    movq %rdi, %rcx
-; SSE-NEXT:    sarq $63, %rcx
-; SSE-NEXT:    testq %r11, %r11
-; SSE-NEXT:    setns %r8b
+; SSE-NEXT:    sarq $63, %rdi
+; SSE-NEXT:    testb %r8b, %r8b
+; SSE-NEXT:    cmoveq %rsi, %rdi
+; SSE-NEXT:    xorl %esi, %esi
 ; SSE-NEXT:    testq %rdx, %rdx
-; SSE-NEXT:    setns %dl
-; SSE-NEXT:    cmpb %r8b, %dl
-; SSE-NEXT:    sete %r8b
-; SSE-NEXT:    cmpb %bl, %dl
-; SSE-NEXT:    setne %dl
-; SSE-NEXT:    testb %dl, %r8b
-; SSE-NEXT:    cmoveq %rsi, %rcx
-; SSE-NEXT:    cmoveq %rdi, %r12
-; SSE-NEXT:    movq %r15, 24(%rax)
-; SSE-NEXT:    movq %r10, 16(%rax)
-; SSE-NEXT:    movq %r12, 8(%rax)
-; SSE-NEXT:    movq %rcx, (%rax)
+; SSE-NEXT:    setns %sil
+; SSE-NEXT:    addq %r11, %rsi
+; SSE-NEXT:    testb %r8b, %r8b
+; SSE-NEXT:    cmoveq %rdx, %rsi
+; SSE-NEXT:    movq %rbx, 16(%rax)
+; SSE-NEXT:    movq %rdi, (%rax)
+; SSE-NEXT:    movq %rcx, 24(%rax)
+; SSE-NEXT:    movq %rsi, 8(%rax)
 ; SSE-NEXT:    popq %rbx
-; SSE-NEXT:    popq %r12
-; SSE-NEXT:    popq %r13
-; SSE-NEXT:    popq %r14
-; SSE-NEXT:    popq %r15
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: v2i128:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    pushq %r15
-; AVX-NEXT:    pushq %r14
-; AVX-NEXT:    pushq %r13
-; AVX-NEXT:    pushq %r12
 ; AVX-NEXT:    pushq %rbx
 ; AVX-NEXT:    movq %rdi, %rax
-; AVX-NEXT:    movq {{[0-9]+}}(%rsp), %r11
-; AVX-NEXT:    movq {{[0-9]+}}(%rsp), %r14
 ; AVX-NEXT:    addq {{[0-9]+}}(%rsp), %rcx
-; AVX-NEXT:    movq %r8, %r13
-; AVX-NEXT:    adcq %r14, %r13
-; AVX-NEXT:    movq %r13, %r10
-; AVX-NEXT:    sarq $63, %r10
-; AVX-NEXT:    xorl %edi, %edi
-; AVX-NEXT:    testq %r13, %r13
-; AVX-NEXT:    setns %dil
-; AVX-NEXT:    movabsq $9223372036854775807, %r12 # imm = 0x7FFFFFFFFFFFFFFF
-; AVX-NEXT:    leaq (%rdi,%r12), %r15
+; AVX-NEXT:    adcq {{[0-9]+}}(%rsp), %r8
+; AVX-NEXT:    seto %r10b
+; AVX-NEXT:    movq %r8, %rbx
+; AVX-NEXT:    sarq $63, %rbx
+; AVX-NEXT:    testb %r10b, %r10b
+; AVX-NEXT:    cmoveq %rcx, %rbx
+; AVX-NEXT:    xorl %ecx, %ecx
 ; AVX-NEXT:    testq %r8, %r8
-; AVX-NEXT:    setns %r8b
-; AVX-NEXT:    cmpb %dil, %r8b
-; AVX-NEXT:    setne %dil
-; AVX-NEXT:    testq %r14, %r14
-; AVX-NEXT:    setns %bl
-; AVX-NEXT:    cmpb %bl, %r8b
-; AVX-NEXT:    sete %bl
-; AVX-NEXT:    testb %dil, %bl
-; AVX-NEXT:    cmoveq %r13, %r15
-; AVX-NEXT:    cmoveq %rcx, %r10
+; AVX-NEXT:    setns %cl
+; AVX-NEXT:    movabsq $9223372036854775807, %r11 # imm = 0x7FFFFFFFFFFFFFFF
+; AVX-NEXT:    addq %r11, %rcx
+; AVX-NEXT:    testb %r10b, %r10b
+; AVX-NEXT:    cmoveq %r8, %rcx
 ; AVX-NEXT:    addq %r9, %rsi
+; AVX-NEXT:    adcq {{[0-9]+}}(%rsp), %rdx
+; AVX-NEXT:    seto %r8b
 ; AVX-NEXT:    movq %rdx, %rdi
-; AVX-NEXT:    adcq %r11, %rdi
-; AVX-NEXT:    setns %bl
-; AVX-NEXT:    movzbl %bl, %ebx
-; AVX-NEXT:    addq %rbx, %r12
-; AVX-NEXT:    movq %rdi, %rcx
-; AVX-NEXT:    sarq $63, %rcx
-; AVX-NEXT:    testq %r11, %r11
-; AVX-NEXT:    setns %r8b
+; AVX-NEXT:    sarq $63, %rdi
+; AVX-NEXT:    testb %r8b, %r8b
+; AVX-NEXT:    cmoveq %rsi, %rdi
+; AVX-NEXT:    xorl %esi, %esi
 ; AVX-NEXT:    testq %rdx, %rdx
-; AVX-NEXT:    setns %dl
-; AVX-NEXT:    cmpb %r8b, %dl
-; AVX-NEXT:    sete %r8b
-; AVX-NEXT:    cmpb %bl, %dl
-; AVX-NEXT:    setne %dl
-; AVX-NEXT:    testb %dl, %r8b
-; AVX-NEXT:    cmoveq %rsi, %rcx
-; AVX-NEXT:    cmoveq %rdi, %r12
-; AVX-NEXT:    movq %r15, 24(%rax)
-; AVX-NEXT:    movq %r10, 16(%rax)
-; AVX-NEXT:    movq %r12, 8(%rax)
-; AVX-NEXT:    movq %rcx, (%rax)
+; AVX-NEXT:    setns %sil
+; AVX-NEXT:    addq %r11, %rsi
+; AVX-NEXT:    testb %r8b, %r8b
+; AVX-NEXT:    cmoveq %rdx, %rsi
+; AVX-NEXT:    movq %rbx, 16(%rax)
+; AVX-NEXT:    movq %rdi, (%rax)
+; AVX-NEXT:    movq %rcx, 24(%rax)
+; AVX-NEXT:    movq %rsi, 8(%rax)
 ; AVX-NEXT:    popq %rbx
-; AVX-NEXT:    popq %r12
-; AVX-NEXT:    popq %r13
-; AVX-NEXT:    popq %r14
-; AVX-NEXT:    popq %r15
 ; AVX-NEXT:    retq
   %z = call <2 x i128> @llvm.sadd.sat.v2i128(<2 x i128> %x, <2 x i128> %y)
   ret <2 x i128> %z
diff --git a/llvm/test/CodeGen/X86/ssub_sat.ll b/llvm/test/CodeGen/X86/ssub_sat.ll
index 1cecaab68cf34e..23d4ef8bcb922c 100644
--- a/llvm/test/CodeGen/X86/ssub_sat.ll
+++ b/llvm/test/CodeGen/X86/ssub_sat.ll
@@ -38,38 +38,25 @@ define i32 @func(i32 %x, i32 %y) nounwind {
 define i64 @func2(i64 %x, i64 %y) nounwind {
 ; X86-LABEL: func2:
 ; X86:       # %bb.0:
-; X86-NEXT:    pushl %ebp
 ; X86-NEXT:    pushl %ebx
-; X86-NEXT:    pushl %edi
 ; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT:    subl {{[0-9]+}}(%esp), %edi
-; X86-NEXT:    movl %ebx, %ebp
-; X86-NEXT:    sbbl %esi, %ebp
-; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    subl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    seto %bl
+; X86-NEXT:    movl %esi, %eax
 ; X86-NEXT:    sarl $31, %eax
-; X86-NEXT:    xorl %ecx, %ecx
-; X86-NEXT:    testl %ebp, %ebp
-; X86-NEXT:    setns %cl
-; X86-NEXT:    movl %ecx, %edx
-; X86-NEXT:    addl $2147483647, %edx # imm = 0x7FFFFFFF
-; X86-NEXT:    testl %ebx, %ebx
-; X86-NEXT:    setns %bl
-; X86-NEXT:    cmpb %cl, %bl
-; X86-NEXT:    setne %cl
+; X86-NEXT:    testb %bl, %bl
+; X86-NEXT:    cmovel %ecx, %eax
+; X86-NEXT:    xorl %edx, %edx
 ; X86-NEXT:    testl %esi, %esi
-; X86-NEXT:    setns %ch
-; X86-NEXT:    cmpb %ch, %bl
-; X86-NEXT:    setne %ch
-; X86-NEXT:    testb %cl, %ch
-; X86-NEXT:    cmovel %ebp, %edx
-; X86-NEXT:    cmovel %edi, %eax
+; X86-NEXT:    setns %dl
+; X86-NEXT:    addl $2147483647, %edx # imm = 0x7FFFFFFF
+; X86-NEXT:    testb %bl, %bl
+; X86-NEXT:    cmovel %esi, %edx
 ; X86-NEXT:    popl %esi
-; X86-NEXT:    popl %edi
 ; X86-NEXT:    popl %ebx
-; X86-NEXT:    popl %ebp
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: func2:
diff --git a/llvm/test/CodeGen/X86/ssub_sat_plus.ll b/llvm/test/CodeGen/X86/ssub_sat_plus.ll
index 12c587f1a1906a..5c868d79c09eb3 100644
--- a/llvm/test/CodeGen/X86/ssub_sat_plus.ll
+++ b/llvm/test/CodeGen/X86/ssub_sat_plus.ll
@@ -40,38 +40,25 @@ define i32 @func32(i32 %x, i32 %y, i32 %z) nounwind {
 define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind {
 ; X86-LABEL: func64:
 ; X86:       # %bb.0:
-; X86-NEXT:    pushl %ebp
 ; X86-NEXT:    pushl %ebx
-; X86-NEXT:    pushl %edi
 ; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT:    subl {{[0-9]+}}(%esp), %edi
-; X86-NEXT:    movl %ebx, %ebp
-; X86-NEXT:    sbbl %esi, %ebp
-; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    subl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    seto %bl
+; X86-NEXT:    movl %esi, %eax
 ; X86-NEXT:    sarl $31, %eax
-; X86-NEXT:    xorl %ecx, %ecx
-; X86-NEXT:    testl %ebp, %ebp
-; X86-NEXT:    setns %cl
-; X86-NEXT:    movl %ecx, %edx
-; X86-NEXT:    addl $2147483647, %edx # imm = 0x7FFFFFFF
-; X86-NEXT:    testl %ebx, %ebx
-; X86-NEXT:    setns %bl
-; X86-NEXT:    cmpb %cl, %bl
-; X86-NEXT:    setne %cl
+; X86-NEXT:    testb %bl, %bl
+; X86-NEXT:    cmovel %ecx, %eax
+; X86-NEXT:    xorl %edx, %edx
 ; X86-NEXT:    testl %esi, %esi
-; X86-NEXT:    setns %ch
-; X86-NEXT:    cmpb %ch, %bl
-; X86-NEXT:    setne %ch
-; X86-NEXT:    testb %cl, %ch
-; X86-NEXT:    cmovel %ebp, %edx
-; X86-NEXT:    cmovel %edi, %eax
+; X86-NEXT:    setns %dl
+; X86-NEXT:    addl $2147483647, %edx # imm = 0x7FFFFFFF
+; X86-NEXT:    testb %bl, %bl
+; X86-NEXT:    cmovel %esi, %edx
 ; X86-NEXT:    popl %esi
-; X86-NEXT:    popl %edi
 ; X86-NEXT:    popl %ebx
-; X86-NEXT:    popl %ebp
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: func64:
diff --git a/llvm/test/CodeGen/X86/ssub_sat_vec.ll b/llvm/test/CodeGen/X86/ssub_sat_vec.ll
index 4126656fd80587..91198d0bf98a95 100644
--- a/llvm/test/CodeGen/X86/ssub_sat_vec.ll
+++ b/llvm/test/CodeGen/X86/ssub_sat_vec.ll
@@ -2145,124 +2145,78 @@ define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind {
 define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
 ; SSE-LABEL: v2i128:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    pushq %r15
-; SSE-NEXT:    pushq %r14
-; SSE-NEXT:    pushq %r13
-; SSE-NEXT:    pushq %r12
 ; SSE-NEXT:    pushq %rbx
 ; SSE-NEXT:    movq %rdi, %rax
-; SSE-NEXT:    movq {{[0-9]+}}(%rsp), %r11
-; SSE-NEXT:    movq {{[0-9]+}}(%rsp), %r14
 ; SSE-NEXT:    subq {{[0-9]+}}(%rsp), %rcx
-; SSE-NEXT:    movq %r8, %r13
-; SSE-NEXT:    sbbq %r14, %r13
-; SSE-NEXT:    movq %r13, %r10
-; SSE-NEXT:    sarq $63, %r10
-; SSE-NEXT:    xorl %edi, %edi
-; SSE-NEXT:    testq %r13, %r13
-; SSE-NEXT:    setns %dil
-; SSE-NEXT:    movabsq $9223372036854775807, %r12 # imm = 0x7FFFFFFFFFFFFFFF
-; SSE-NEXT:    leaq (%rdi,%r12), %r15
+; SSE-NEXT:    sbbq {{[0-9]+}}(%rsp), %r8
+; SSE-NEXT:    seto %r10b
+; SSE-NEXT:    movq %r8, %rbx
+; SSE-NEXT:    sarq $63, %rbx
+; SSE-NEXT:    testb %r10b, %r10b
+; SSE-NEXT:    cmoveq %rcx, %rbx
+; SSE-NEXT:    xorl %ecx, %ecx
 ; SSE-NEXT:    testq %r8, %r8
-; SSE-NEXT:    setns %r8b
-; SSE-NEXT:    cmpb %dil, %r8b
-; SSE-NEXT:    setne %dil
-; SSE-NEXT:    testq %r14, %r14
-; SSE-NEXT:    setns %bl
-; SSE-NEXT:    cmpb %bl, %r8b
-; SSE-NEXT:    setne %bl
-; SSE-NEXT:    testb %dil, %bl
-; SSE-NEXT:    cmoveq %r13, %r15
-; SSE-NEXT:    cmoveq %rcx, %r10
+; SSE-NEXT:    setns %cl
+; SSE-NEXT:    movabsq $9223372036854775807, %r11 # imm = 0x7FFFFFFFFFFFFFFF
+; SSE-NEXT:    addq %r11, %rcx
+; SSE-NEXT:    testb %r10b, %r10b
+; SSE-NEXT:    cmoveq %r8, %rcx
 ; SSE-NEXT:    subq %r9, %rsi
+; SSE-NEXT:    sbbq {{[0-9]+}}(%rsp), %rdx
+; SSE-NEXT:    seto %r8b
 ; SSE-NEXT:    movq %rdx, %rdi
-; SSE-NEXT:    sbbq %r11, %rdi
-; SSE-NEXT:    setns %bl
-; SSE-NEXT:    movzbl %bl, %ebx
-; SSE-NEXT:    addq %rbx, %r12
-; SSE-NEXT:    movq %rdi, %rcx
-; SSE-NEXT:    sarq $63, %rcx
-; SSE-NEXT:    testq %r11, %r11
-; SSE-NEXT:    setns %r8b
+; SSE-NEXT:    sarq $63, %rdi
+; SSE-NEXT:    testb %r8b, %r8b
+; SSE-NEXT:    cmoveq %rsi, %rdi
+; SSE-NEXT:    xorl %esi, %esi
 ; SSE-NEXT:    testq %rdx, %rdx
-; SSE-NEXT:    setns %dl
-; SSE-NEXT:    cmpb %r8b, %dl
-; SSE-NEXT:    setne %r8b
-; SSE-NEXT:    cmpb %bl, %dl
-; SSE-NEXT:    setne %dl
-; SSE-NEXT:    testb %dl, %r8b
-; SSE-NEXT:    cmoveq %rsi, %rcx
-; SSE-NEXT:    cmoveq %rdi, %r12
-; SSE-NEXT:    movq %r15, 24(%rax)
-; SSE-NEXT:    movq %r10, 16(%rax)
-; SSE-NEXT:    movq %r12, 8(%rax)
-; SSE-NEXT:    movq %rcx, (%rax)
+; SSE-NEXT:    setns %sil
+; SSE-NEXT:    addq %r11, %rsi
+; SSE-NEXT:    testb %r8b, %r8b
+; SSE-NEXT:    cmoveq %rdx, %rsi
+; SSE-NEXT:    movq %rbx, 16(%rax)
+; SSE-NEXT:    movq %rdi, (%rax)
+; SSE-NEXT:    movq %rcx, 24(%rax)
+; SSE-NEXT:    movq %rsi, 8(%rax)
 ; SSE-NEXT:    popq %rbx
-; SSE-NEXT:    popq %r12
-; SSE-NEXT:    popq %r13
-; SSE-NEXT:    popq %r14
-; SSE-NEXT:    popq %r15
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: v2i128:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    pushq %r15
-; AVX-NEXT:    pushq %r14
-; AVX-NEXT:    pushq %r13
-; AVX-NEXT:    pushq %r12
 ; AVX-NEXT:    pushq %rbx
 ; AVX-NEXT:    movq %rdi, %rax
-; AVX-NEXT:    movq {{[0-9]+}}(%rsp), %r11
-; AVX-NEXT:    movq {{[0-9]+}}(%rsp), %r14
 ; AVX-NEXT:    subq {{[0-9]+}}(%rsp), %rcx
-; AVX-NEXT:    movq %r8, %r13
-; AVX-NEXT:    sbbq %r14, %r13
-; AVX-NEXT:    movq %r13, %r10
-; AVX-NEXT:    sarq $63, %r10
-; AVX-NEXT:    xorl %edi, %edi
-; AVX-NEXT:    testq %r13, %r13
-; AVX-NEXT:    setns %dil
-; AVX-NEXT:    movabsq $9223372036854775807, %r12 # imm = 0x7FFFFFFFFFFFFFFF
-; AVX-NEXT:    leaq (%rdi,%r12), %r15
+; AVX-NEXT:    sbbq {{[0-9]+}}(%rsp), %r8
+; AVX-NEXT:    seto %r10b
+; AVX-NEXT:    movq %r8, %rbx
+; AVX-NEXT:    sarq $63, %rbx
+; AVX-NEXT:    testb %r10b, %r10b
+; AVX-NEXT:    cmoveq %rcx, %rbx
+; AVX-NEXT:    xorl %ecx, %ecx
 ; AVX-NEXT:    testq %r8, %r8
-; AVX-NEXT:    setns %r8b
-; AVX-NEXT:    cmpb %dil, %r8b
-; AVX-NEXT:    setne %dil
-; AVX-NEXT:    testq %r14, %r14
-; AVX-NEXT:    setns %bl
-; AVX-NEXT:    cmpb %bl, %r8b
-; AVX-NEXT:    setne %bl
-; AVX-NEXT:    testb %dil, %bl
-; AVX-NEXT:    cmoveq %r13, %r15
-; AVX-NEXT:    cmoveq %rcx, %r10
+; AVX-NEXT:    setns %cl
+; AVX-NEXT:    movabsq $9223372036854775807, %r11 # imm = 0x7FFFFFFFFFFFFFFF
+; AVX-NEXT:    addq %r11, %rcx
+; AVX-NEXT:    testb %r10b, %r10b
+; AVX-NEXT:    cmoveq %r8, %rcx
 ; AVX-NEXT:    subq %r9, %rsi
+; AVX-NEXT:    sbbq {{[0-9]+}}(%rsp), %rdx
+; AVX-NEXT:    seto %r8b
 ; AVX-NEXT:    movq %rdx, %rdi
-; AVX-NEXT:    sbbq %r11, %rdi
-; AVX-NEXT:    setns %bl
-; AVX-NEXT:    movzbl %bl, %ebx
-; AVX-NEXT:    addq %rbx, %r12
-; AVX-NEXT:    movq %rdi, %rcx
-; AVX-NEXT:    sarq $63, %rcx
-; AVX-NEXT:    testq %r11, %r11
-; AVX-NEXT:    setns %r8b
+; AVX-NEXT:    sarq $63, %rdi
+; AVX-NEXT:    testb %r8b, %r8b
+; AVX-NEXT:    cmoveq %rsi, %rdi
+; AVX-NEXT:    xorl %esi, %esi
 ; AVX-NEXT:    testq %rdx, %rdx
-; AVX-NEXT:    setns %dl
-; AVX-NEXT:    cmpb %r8b, %dl
-; AVX-NEXT:    setne %r8b
-; AVX-NEXT:    cmpb %bl, %dl
-; AVX-NEXT:    setne %dl
-; AVX-NEXT:    testb %dl, %r8b
-; AVX-NEXT:    cmoveq %rsi, %rcx
-; AVX-NEXT:    cmoveq %rdi, %r12
-; AVX-NEXT:    movq %r15, 24(%rax)
-; AVX-NEXT:    movq %r10, 16(%rax)
-; AVX-NEXT:    movq %r12, 8(%rax)
-; AVX-NEXT:    movq %rcx, (%rax)
+; AVX-NEXT:    setns %sil
+; AVX-NEXT:    addq %r11, %rsi
+; AVX-NEXT:    testb %r8b, %r8b
+; AVX-NEXT:    cmoveq %rdx, %rsi
+; AVX-NEXT:    movq %rbx, 16(%rax)
+; AVX-NEXT:    movq %rdi, (%rax)
+; AVX-NEXT:    movq %rcx, 24(%rax)
+; AVX-NEXT:    movq %rsi, 8(%rax)
 ; AVX-NEXT:    popq %rbx
-; AVX-NEXT:    popq %r12
-; AVX-NEXT:    popq %r13
-; AVX-NEXT:    popq %r14
-; AVX-NEXT:    popq %r15
 ; AVX-NEXT:    retq
   %z = call <2 x i128> @llvm.ssub.sat.v2i128(<2 x i128> %x, <2 x i128> %y)
   ret <2 x i128> %z
diff --git a/llvm/test/CodeGen/X86/vec_saddo.ll b/llvm/test/CodeGen/X86/vec_saddo.ll
index 6bee501e06a404..8197a3521f0930 100644
--- a/llvm/test/CodeGen/X86/vec_saddo.ll
+++ b/llvm/test/CodeGen/X86/vec_saddo.ll
@@ -1145,275 +1145,131 @@ define <4 x i32> @saddo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind
 define <2 x i32> @saddo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2) nounwind {
 ; SSE2-LABEL: saddo_v2i128:
 ; SSE2:       # %bb.0:
-; SSE2-NEXT:    pushq %rbp
-; SSE2-NEXT:    pushq %rbx
-; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %r11
 ; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %r10
-; SSE2-NEXT:    testq %r9, %r9
-; SSE2-NEXT:    setns %al
-; SSE2-NEXT:    testq %rsi, %rsi
-; SSE2-NEXT:    setns %bl
-; SSE2-NEXT:    cmpb %al, %bl
-; SSE2-NEXT:    sete %bpl
 ; SSE2-NEXT:    addq %r8, %rdi
 ; SSE2-NEXT:    adcq %r9, %rsi
-; SSE2-NEXT:    setns %al
-; SSE2-NEXT:    cmpb %al, %bl
-; SSE2-NEXT:    setne %al
-; SSE2-NEXT:    andb %bpl, %al
+; SSE2-NEXT:    seto %r8b
 ; SSE2-NEXT:    addq {{[0-9]+}}(%rsp), %rdx
-; SSE2-NEXT:    movq %rcx, %rbp
-; SSE2-NEXT:    adcq %r10, %rbp
-; SSE2-NEXT:    setns %bl
-; SSE2-NEXT:    testq %rcx, %rcx
-; SSE2-NEXT:    setns %cl
-; SSE2-NEXT:    cmpb %bl, %cl
-; SSE2-NEXT:    setne %r8b
-; SSE2-NEXT:    testq %r10, %r10
-; SSE2-NEXT:    setns %bl
-; SSE2-NEXT:    cmpb %bl, %cl
-; SSE2-NEXT:    sete %cl
-; SSE2-NEXT:    andb %r8b, %cl
-; SSE2-NEXT:    movzbl %cl, %ecx
-; SSE2-NEXT:    negl %ecx
-; SSE2-NEXT:    movd %ecx, %xmm1
+; SSE2-NEXT:    adcq {{[0-9]+}}(%rsp), %rcx
+; SSE2-NEXT:    seto %al
 ; SSE2-NEXT:    movzbl %al, %eax
 ; SSE2-NEXT:    negl %eax
+; SSE2-NEXT:    movd %eax, %xmm1
+; SSE2-NEXT:    movzbl %r8b, %eax
+; SSE2-NEXT:    negl %eax
 ; SSE2-NEXT:    movd %eax, %xmm0
 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE2-NEXT:    movq %rdx, 16(%r11)
-; SSE2-NEXT:    movq %rdi, (%r11)
-; SSE2-NEXT:    movq %rbp, 24(%r11)
-; SSE2-NEXT:    movq %rsi, 8(%r11)
-; SSE2-NEXT:    popq %rbx
-; SSE2-NEXT:    popq %rbp
+; SSE2-NEXT:    movq %rdx, 16(%r10)
+; SSE2-NEXT:    movq %rdi, (%r10)
+; SSE2-NEXT:    movq %rcx, 24(%r10)
+; SSE2-NEXT:    movq %rsi, 8(%r10)
 ; SSE2-NEXT:    retq
 ;
 ; SSSE3-LABEL: saddo_v2i128:
 ; SSSE3:       # %bb.0:
-; SSSE3-NEXT:    pushq %rbp
-; SSSE3-NEXT:    pushq %rbx
-; SSSE3-NEXT:    movq {{[0-9]+}}(%rsp), %r11
 ; SSSE3-NEXT:    movq {{[0-9]+}}(%rsp), %r10
-; SSSE3-NEXT:    testq %r9, %r9
-; SSSE3-NEXT:    setns %al
-; SSSE3-NEXT:    testq %rsi, %rsi
-; SSSE3-NEXT:    setns %bl
-; SSSE3-NEXT:    cmpb %al, %bl
-; SSSE3-NEXT:    sete %bpl
 ; SSSE3-NEXT:    addq %r8, %rdi
 ; SSSE3-NEXT:    adcq %r9, %rsi
-; SSSE3-NEXT:    setns %al
-; SSSE3-NEXT:    cmpb %al, %bl
-; SSSE3-NEXT:    setne %al
-; SSSE3-NEXT:    andb %bpl, %al
+; SSSE3-NEXT:    seto %r8b
 ; SSSE3-NEXT:    addq {{[0-9]+}}(%rsp), %rdx
-; SSSE3-NEXT:    movq %rcx, %rbp
-; SSSE3-NEXT:    adcq %r10, %rbp
-; SSSE3-NEXT:    setns %bl
-; SSSE3-NEXT:    testq %rcx, %rcx
-; SSSE3-NEXT:    setns %cl
-; SSSE3-NEXT:    cmpb %bl, %cl
-; SSSE3-NEXT:    setne %r8b
-; SSSE3-NEXT:    testq %r10, %r10
-; SSSE3-NEXT:    setns %bl
-; SSSE3-NEXT:    cmpb %bl, %cl
-; SSSE3-NEXT:    sete %cl
-; SSSE3-NEXT:    andb %r8b, %cl
-; SSSE3-NEXT:    movzbl %cl, %ecx
-; SSSE3-NEXT:    negl %ecx
-; SSSE3-NEXT:    movd %ecx, %xmm1
+; SSSE3-NEXT:    adcq {{[0-9]+}}(%rsp), %rcx
+; SSSE3-NEXT:    seto %al
 ; SSSE3-NEXT:    movzbl %al, %eax
 ; SSSE3-NEXT:    negl %eax
+; SSSE3-NEXT:    movd %eax, %xmm1
+; SSSE3-NEXT:    movzbl %r8b, %eax
+; SSSE3-NEXT:    negl %eax
 ; SSSE3-NEXT:    movd %eax, %xmm0
 ; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSSE3-NEXT:    movq %rdx, 16(%r11)
-; SSSE3-NEXT:    movq %rdi, (%r11)
-; SSSE3-NEXT:    movq %rbp, 24(%r11)
-; SSSE3-NEXT:    movq %rsi, 8(%r11)
-; SSSE3-NEXT:    popq %rbx
-; SSSE3-NEXT:    popq %rbp
+; SSSE3-NEXT:    movq %rdx, 16(%r10)
+; SSSE3-NEXT:    movq %rdi, (%r10)
+; SSSE3-NEXT:    movq %rcx, 24(%r10)
+; SSSE3-NEXT:    movq %rsi, 8(%r10)
 ; SSSE3-NEXT:    retq
 ;
 ; SSE41-LABEL: saddo_v2i128:
 ; SSE41:       # %bb.0:
-; SSE41-NEXT:    pushq %rbp
-; SSE41-NEXT:    pushq %rbx
-; SSE41-NEXT:    movq {{[0-9]+}}(%rsp), %r11
 ; SSE41-NEXT:    movq {{[0-9]+}}(%rsp), %r10
-; SSE41-NEXT:    testq %r9, %r9
-; SSE41-NEXT:    setns %al
-; SSE41-NEXT:    testq %rsi, %rsi
-; SSE41-NEXT:    setns %bl
-; SSE41-NEXT:    cmpb %al, %bl
-; SSE41-NEXT:    sete %bpl
 ; SSE41-NEXT:    addq %r8, %rdi
 ; SSE41-NEXT:    adcq %r9, %rsi
-; SSE41-NEXT:    setns %al
-; SSE41-NEXT:    cmpb %al, %bl
-; SSE41-NEXT:    setne %al
-; SSE41-NEXT:    andb %bpl, %al
+; SSE41-NEXT:    seto %r8b
 ; SSE41-NEXT:    addq {{[0-9]+}}(%rsp), %rdx
-; SSE41-NEXT:    movq %rcx, %rbp
-; SSE41-NEXT:    adcq %r10, %rbp
-; SSE41-NEXT:    setns %bl
-; SSE41-NEXT:    testq %rcx, %rcx
-; SSE41-NEXT:    setns %cl
-; SSE41-NEXT:    cmpb %bl, %cl
-; SSE41-NEXT:    setne %r8b
-; SSE41-NEXT:    testq %r10, %r10
-; SSE41-NEXT:    setns %bl
-; SSE41-NEXT:    cmpb %bl, %cl
-; SSE41-NEXT:    sete %cl
-; SSE41-NEXT:    andb %r8b, %cl
-; SSE41-NEXT:    movzbl %cl, %ecx
-; SSE41-NEXT:    negl %ecx
-; SSE41-NEXT:    movzbl %al, %eax
+; SSE41-NEXT:    adcq {{[0-9]+}}(%rsp), %rcx
+; SSE41-NEXT:    seto %al
+; SSE41-NEXT:    movzbl %al, %r9d
+; SSE41-NEXT:    negl %r9d
+; SSE41-NEXT:    movzbl %r8b, %eax
 ; SSE41-NEXT:    negl %eax
 ; SSE41-NEXT:    movd %eax, %xmm0
-; SSE41-NEXT:    pinsrd $1, %ecx, %xmm0
-; SSE41-NEXT:    movq %rdx, 16(%r11)
-; SSE41-NEXT:    movq %rdi, (%r11)
-; SSE41-NEXT:    movq %rbp, 24(%r11)
-; SSE41-NEXT:    movq %rsi, 8(%r11)
-; SSE41-NEXT:    popq %rbx
-; SSE41-NEXT:    popq %rbp
+; SSE41-NEXT:    pinsrd $1, %r9d, %xmm0
+; SSE41-NEXT:    movq %rdx, 16(%r10)
+; SSE41-NEXT:    movq %rdi, (%r10)
+; SSE41-NEXT:    movq %rcx, 24(%r10)
+; SSE41-NEXT:    movq %rsi, 8(%r10)
 ; SSE41-NEXT:    retq
 ;
 ; AVX1-LABEL: saddo_v2i128:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    pushq %rbp
-; AVX1-NEXT:    pushq %rbx
-; AVX1-NEXT:    movq {{[0-9]+}}(%rsp), %r11
 ; AVX1-NEXT:    movq {{[0-9]+}}(%rsp), %r10
-; AVX1-NEXT:    testq %r9, %r9
-; AVX1-NEXT:    setns %al
-; AVX1-NEXT:    testq %rsi, %rsi
-; AVX1-NEXT:    setns %bl
-; AVX1-NEXT:    cmpb %al, %bl
-; AVX1-NEXT:    sete %bpl
 ; AVX1-NEXT:    addq %r8, %rdi
 ; AVX1-NEXT:    adcq %r9, %rsi
-; AVX1-NEXT:    setns %al
-; AVX1-NEXT:    cmpb %al, %bl
-; AVX1-NEXT:    setne %al
-; AVX1-NEXT:    andb %bpl, %al
+; AVX1-NEXT:    seto %r8b
 ; AVX1-NEXT:    addq {{[0-9]+}}(%rsp), %rdx
-; AVX1-NEXT:    movq %rcx, %rbp
-; AVX1-NEXT:    adcq %r10, %rbp
-; AVX1-NEXT:    setns %bl
-; AVX1-NEXT:    testq %rcx, %rcx
-; AVX1-NEXT:    setns %cl
-; AVX1-NEXT:    cmpb %bl, %cl
-; AVX1-NEXT:    setne %r8b
-; AVX1-NEXT:    testq %r10, %r10
-; AVX1-NEXT:    setns %bl
-; AVX1-NEXT:    cmpb %bl, %cl
-; AVX1-NEXT:    sete %cl
-; AVX1-NEXT:    andb %r8b, %cl
-; AVX1-NEXT:    movzbl %cl, %ecx
-; AVX1-NEXT:    negl %ecx
-; AVX1-NEXT:    movzbl %al, %eax
+; AVX1-NEXT:    adcq {{[0-9]+}}(%rsp), %rcx
+; AVX1-NEXT:    seto %al
+; AVX1-NEXT:    movzbl %al, %r9d
+; AVX1-NEXT:    negl %r9d
+; AVX1-NEXT:    movzbl %r8b, %eax
 ; AVX1-NEXT:    negl %eax
 ; AVX1-NEXT:    vmovd %eax, %xmm0
-; AVX1-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rdx, 16(%r11)
-; AVX1-NEXT:    movq %rdi, (%r11)
-; AVX1-NEXT:    movq %rbp, 24(%r11)
-; AVX1-NEXT:    movq %rsi, 8(%r11)
-; AVX1-NEXT:    popq %rbx
-; AVX1-NEXT:    popq %rbp
+; AVX1-NEXT:    vpinsrd $1, %r9d, %xmm0, %xmm0
+; AVX1-NEXT:    movq %rdx, 16(%r10)
+; AVX1-NEXT:    movq %rdi, (%r10)
+; AVX1-NEXT:    movq %rcx, 24(%r10)
+; AVX1-NEXT:    movq %rsi, 8(%r10)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: saddo_v2i128:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    pushq %rbp
-; AVX2-NEXT:    pushq %rbx
-; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %r11
 ; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %r10
-; AVX2-NEXT:    testq %r9, %r9
-; AVX2-NEXT:    setns %al
-; AVX2-NEXT:    testq %rsi, %rsi
-; AVX2-NEXT:    setns %bl
-; AVX2-NEXT:    cmpb %al, %bl
-; AVX2-NEXT:    sete %bpl
 ; AVX2-NEXT:    addq %r8, %rdi
 ; AVX2-NEXT:    adcq %r9, %rsi
-; AVX2-NEXT:    setns %al
-; AVX2-NEXT:    cmpb %al, %bl
-; AVX2-NEXT:    setne %al
-; AVX2-NEXT:    andb %bpl, %al
+; AVX2-NEXT:    seto %r8b
 ; AVX2-NEXT:    addq {{[0-9]+}}(%rsp), %rdx
-; AVX2-NEXT:    movq %rcx, %rbp
-; AVX2-NEXT:    adcq %r10, %rbp
-; AVX2-NEXT:    setns %bl
-; AVX2-NEXT:    testq %rcx, %rcx
-; AVX2-NEXT:    setns %cl
-; AVX2-NEXT:    cmpb %bl, %cl
-; AVX2-NEXT:    setne %r8b
-; AVX2-NEXT:    testq %r10, %r10
-; AVX2-NEXT:    setns %bl
-; AVX2-NEXT:    cmpb %bl, %cl
-; AVX2-NEXT:    sete %cl
-; AVX2-NEXT:    andb %r8b, %cl
-; AVX2-NEXT:    movzbl %cl, %ecx
-; AVX2-NEXT:    negl %ecx
-; AVX2-NEXT:    movzbl %al, %eax
+; AVX2-NEXT:    adcq {{[0-9]+}}(%rsp), %rcx
+; AVX2-NEXT:    seto %al
+; AVX2-NEXT:    movzbl %al, %r9d
+; AVX2-NEXT:    negl %r9d
+; AVX2-NEXT:    movzbl %r8b, %eax
 ; AVX2-NEXT:    negl %eax
 ; AVX2-NEXT:    vmovd %eax, %xmm0
-; AVX2-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rdx, 16(%r11)
-; AVX2-NEXT:    movq %rdi, (%r11)
-; AVX2-NEXT:    movq %rbp, 24(%r11)
-; AVX2-NEXT:    movq %rsi, 8(%r11)
-; AVX2-NEXT:    popq %rbx
-; AVX2-NEXT:    popq %rbp
+; AVX2-NEXT:    vpinsrd $1, %r9d, %xmm0, %xmm0
+; AVX2-NEXT:    movq %rdx, 16(%r10)
+; AVX2-NEXT:    movq %rdi, (%r10)
+; AVX2-NEXT:    movq %rcx, 24(%r10)
+; AVX2-NEXT:    movq %rsi, 8(%r10)
 ; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: saddo_v2i128:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    pushq %r14
-; AVX512-NEXT:    pushq %rbx
 ; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %r10
-; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %r11
 ; AVX512-NEXT:    addq {{[0-9]+}}(%rsp), %rdx
-; AVX512-NEXT:    movq %rcx, %r14
-; AVX512-NEXT:    adcq %r11, %r14
-; AVX512-NEXT:    setns %bl
-; AVX512-NEXT:    testq %rcx, %rcx
-; AVX512-NEXT:    setns %cl
-; AVX512-NEXT:    cmpb %bl, %cl
-; AVX512-NEXT:    setne %bl
-; AVX512-NEXT:    testq %r11, %r11
-; AVX512-NEXT:    setns %al
-; AVX512-NEXT:    cmpb %al, %cl
-; AVX512-NEXT:    sete %al
-; AVX512-NEXT:    andb %bl, %al
+; AVX512-NEXT:    adcq {{[0-9]+}}(%rsp), %rcx
+; AVX512-NEXT:    seto %al
 ; AVX512-NEXT:    kmovd %eax, %k0
-; AVX512-NEXT:    testq %r9, %r9
-; AVX512-NEXT:    setns %al
-; AVX512-NEXT:    testq %rsi, %rsi
-; AVX512-NEXT:    setns %cl
-; AVX512-NEXT:    cmpb %al, %cl
-; AVX512-NEXT:    sete %al
 ; AVX512-NEXT:    addq %r8, %rdi
 ; AVX512-NEXT:    adcq %r9, %rsi
-; AVX512-NEXT:    setns %bl
-; AVX512-NEXT:    cmpb %bl, %cl
-; AVX512-NEXT:    setne %cl
-; AVX512-NEXT:    andb %al, %cl
-; AVX512-NEXT:    andl $1, %ecx
-; AVX512-NEXT:    kmovw %ecx, %k1
+; AVX512-NEXT:    seto %al
+; AVX512-NEXT:    andl $1, %eax
+; AVX512-NEXT:    kmovw %eax, %k1
 ; AVX512-NEXT:    kshiftlw $1, %k0, %k0
 ; AVX512-NEXT:    korw %k0, %k1, %k1
 ; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; AVX512-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; AVX512-NEXT:    movq %rdx, 16(%r10)
 ; AVX512-NEXT:    movq %rdi, (%r10)
-; AVX512-NEXT:    movq %r14, 24(%r10)
+; AVX512-NEXT:    movq %rcx, 24(%r10)
 ; AVX512-NEXT:    movq %rsi, 8(%r10)
-; AVX512-NEXT:    popq %rbx
-; AVX512-NEXT:    popq %r14
 ; AVX512-NEXT:    retq
   %t = call {<2 x i128>, <2 x i1>} @llvm.sadd.with.overflow.v2i128(<2 x i128> %a0, <2 x i128> %a1)
   %val = extractvalue {<2 x i128>, <2 x i1>} %t, 0
diff --git a/llvm/test/CodeGen/X86/vec_ssubo.ll b/llvm/test/CodeGen/X86/vec_ssubo.ll
index 9981643ba2d4ca..d4d458560fa675 100644
--- a/llvm/test/CodeGen/X86/vec_ssubo.ll
+++ b/llvm/test/CodeGen/X86/vec_ssubo.ll
@@ -1154,275 +1154,131 @@ define <4 x i32> @ssubo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind
 define <2 x i32> @ssubo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2) nounwind {
 ; SSE2-LABEL: ssubo_v2i128:
 ; SSE2:       # %bb.0:
-; SSE2-NEXT:    pushq %rbp
-; SSE2-NEXT:    pushq %rbx
-; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %r11
 ; SSE2-NEXT:    movq {{[0-9]+}}(%rsp), %r10
-; SSE2-NEXT:    testq %r9, %r9
-; SSE2-NEXT:    setns %al
-; SSE2-NEXT:    testq %rsi, %rsi
-; SSE2-NEXT:    setns %bl
-; SSE2-NEXT:    cmpb %al, %bl
-; SSE2-NEXT:    setne %bpl
 ; SSE2-NEXT:    subq %r8, %rdi
 ; SSE2-NEXT:    sbbq %r9, %rsi
-; SSE2-NEXT:    setns %al
-; SSE2-NEXT:    cmpb %al, %bl
-; SSE2-NEXT:    setne %al
-; SSE2-NEXT:    andb %bpl, %al
+; SSE2-NEXT:    seto %r8b
 ; SSE2-NEXT:    subq {{[0-9]+}}(%rsp), %rdx
-; SSE2-NEXT:    movq %rcx, %rbp
-; SSE2-NEXT:    sbbq %r10, %rbp
-; SSE2-NEXT:    setns %bl
-; SSE2-NEXT:    testq %rcx, %rcx
-; SSE2-NEXT:    setns %cl
-; SSE2-NEXT:    cmpb %bl, %cl
-; SSE2-NEXT:    setne %r8b
-; SSE2-NEXT:    testq %r10, %r10
-; SSE2-NEXT:    setns %bl
-; SSE2-NEXT:    cmpb %bl, %cl
-; SSE2-NEXT:    setne %cl
-; SSE2-NEXT:    andb %r8b, %cl
-; SSE2-NEXT:    movzbl %cl, %ecx
-; SSE2-NEXT:    negl %ecx
-; SSE2-NEXT:    movd %ecx, %xmm1
+; SSE2-NEXT:    sbbq {{[0-9]+}}(%rsp), %rcx
+; SSE2-NEXT:    seto %al
 ; SSE2-NEXT:    movzbl %al, %eax
 ; SSE2-NEXT:    negl %eax
+; SSE2-NEXT:    movd %eax, %xmm1
+; SSE2-NEXT:    movzbl %r8b, %eax
+; SSE2-NEXT:    negl %eax
 ; SSE2-NEXT:    movd %eax, %xmm0
 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE2-NEXT:    movq %rdx, 16(%r11)
-; SSE2-NEXT:    movq %rdi, (%r11)
-; SSE2-NEXT:    movq %rbp, 24(%r11)
-; SSE2-NEXT:    movq %rsi, 8(%r11)
-; SSE2-NEXT:    popq %rbx
-; SSE2-NEXT:    popq %rbp
+; SSE2-NEXT:    movq %rdx, 16(%r10)
+; SSE2-NEXT:    movq %rdi, (%r10)
+; SSE2-NEXT:    movq %rcx, 24(%r10)
+; SSE2-NEXT:    movq %rsi, 8(%r10)
 ; SSE2-NEXT:    retq
 ;
 ; SSSE3-LABEL: ssubo_v2i128:
 ; SSSE3:       # %bb.0:
-; SSSE3-NEXT:    pushq %rbp
-; SSSE3-NEXT:    pushq %rbx
-; SSSE3-NEXT:    movq {{[0-9]+}}(%rsp), %r11
 ; SSSE3-NEXT:    movq {{[0-9]+}}(%rsp), %r10
-; SSSE3-NEXT:    testq %r9, %r9
-; SSSE3-NEXT:    setns %al
-; SSSE3-NEXT:    testq %rsi, %rsi
-; SSSE3-NEXT:    setns %bl
-; SSSE3-NEXT:    cmpb %al, %bl
-; SSSE3-NEXT:    setne %bpl
 ; SSSE3-NEXT:    subq %r8, %rdi
 ; SSSE3-NEXT:    sbbq %r9, %rsi
-; SSSE3-NEXT:    setns %al
-; SSSE3-NEXT:    cmpb %al, %bl
-; SSSE3-NEXT:    setne %al
-; SSSE3-NEXT:    andb %bpl, %al
+; SSSE3-NEXT:    seto %r8b
 ; SSSE3-NEXT:    subq {{[0-9]+}}(%rsp), %rdx
-; SSSE3-NEXT:    movq %rcx, %rbp
-; SSSE3-NEXT:    sbbq %r10, %rbp
-; SSSE3-NEXT:    setns %bl
-; SSSE3-NEXT:    testq %rcx, %rcx
-; SSSE3-NEXT:    setns %cl
-; SSSE3-NEXT:    cmpb %bl, %cl
-; SSSE3-NEXT:    setne %r8b
-; SSSE3-NEXT:    testq %r10, %r10
-; SSSE3-NEXT:    setns %bl
-; SSSE3-NEXT:    cmpb %bl, %cl
-; SSSE3-NEXT:    setne %cl
-; SSSE3-NEXT:    andb %r8b, %cl
-; SSSE3-NEXT:    movzbl %cl, %ecx
-; SSSE3-NEXT:    negl %ecx
-; SSSE3-NEXT:    movd %ecx, %xmm1
+; SSSE3-NEXT:    sbbq {{[0-9]+}}(%rsp), %rcx
+; SSSE3-NEXT:    seto %al
 ; SSSE3-NEXT:    movzbl %al, %eax
 ; SSSE3-NEXT:    negl %eax
+; SSSE3-NEXT:    movd %eax, %xmm1
+; SSSE3-NEXT:    movzbl %r8b, %eax
+; SSSE3-NEXT:    negl %eax
 ; SSSE3-NEXT:    movd %eax, %xmm0
 ; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSSE3-NEXT:    movq %rdx, 16(%r11)
-; SSSE3-NEXT:    movq %rdi, (%r11)
-; SSSE3-NEXT:    movq %rbp, 24(%r11)
-; SSSE3-NEXT:    movq %rsi, 8(%r11)
-; SSSE3-NEXT:    popq %rbx
-; SSSE3-NEXT:    popq %rbp
+; SSSE3-NEXT:    movq %rdx, 16(%r10)
+; SSSE3-NEXT:    movq %rdi, (%r10)
+; SSSE3-NEXT:    movq %rcx, 24(%r10)
+; SSSE3-NEXT:    movq %rsi, 8(%r10)
 ; SSSE3-NEXT:    retq
 ;
 ; SSE41-LABEL: ssubo_v2i128:
 ; SSE41:       # %bb.0:
-; SSE41-NEXT:    pushq %rbp
-; SSE41-NEXT:    pushq %rbx
-; SSE41-NEXT:    movq {{[0-9]+}}(%rsp), %r11
 ; SSE41-NEXT:    movq {{[0-9]+}}(%rsp), %r10
-; SSE41-NEXT:    testq %r9, %r9
-; SSE41-NEXT:    setns %al
-; SSE41-NEXT:    testq %rsi, %rsi
-; SSE41-NEXT:    setns %bl
-; SSE41-NEXT:    cmpb %al, %bl
-; SSE41-NEXT:    setne %bpl
 ; SSE41-NEXT:    subq %r8, %rdi
 ; SSE41-NEXT:    sbbq %r9, %rsi
-; SSE41-NEXT:    setns %al
-; SSE41-NEXT:    cmpb %al, %bl
-; SSE41-NEXT:    setne %al
-; SSE41-NEXT:    andb %bpl, %al
+; SSE41-NEXT:    seto %r8b
 ; SSE41-NEXT:    subq {{[0-9]+}}(%rsp), %rdx
-; SSE41-NEXT:    movq %rcx, %rbp
-; SSE41-NEXT:    sbbq %r10, %rbp
-; SSE41-NEXT:    setns %bl
-; SSE41-NEXT:    testq %rcx, %rcx
-; SSE41-NEXT:    setns %cl
-; SSE41-NEXT:    cmpb %bl, %cl
-; SSE41-NEXT:    setne %r8b
-; SSE41-NEXT:    testq %r10, %r10
-; SSE41-NEXT:    setns %bl
-; SSE41-NEXT:    cmpb %bl, %cl
-; SSE41-NEXT:    setne %cl
-; SSE41-NEXT:    andb %r8b, %cl
-; SSE41-NEXT:    movzbl %cl, %ecx
-; SSE41-NEXT:    negl %ecx
-; SSE41-NEXT:    movzbl %al, %eax
+; SSE41-NEXT:    sbbq {{[0-9]+}}(%rsp), %rcx
+; SSE41-NEXT:    seto %al
+; SSE41-NEXT:    movzbl %al, %r9d
+; SSE41-NEXT:    negl %r9d
+; SSE41-NEXT:    movzbl %r8b, %eax
 ; SSE41-NEXT:    negl %eax
 ; SSE41-NEXT:    movd %eax, %xmm0
-; SSE41-NEXT:    pinsrd $1, %ecx, %xmm0
-; SSE41-NEXT:    movq %rdx, 16(%r11)
-; SSE41-NEXT:    movq %rdi, (%r11)
-; SSE41-NEXT:    movq %rbp, 24(%r11)
-; SSE41-NEXT:    movq %rsi, 8(%r11)
-; SSE41-NEXT:    popq %rbx
-; SSE41-NEXT:    popq %rbp
+; SSE41-NEXT:    pinsrd $1, %r9d, %xmm0
+; SSE41-NEXT:    movq %rdx, 16(%r10)
+; SSE41-NEXT:    movq %rdi, (%r10)
+; SSE41-NEXT:    movq %rcx, 24(%r10)
+; SSE41-NEXT:    movq %rsi, 8(%r10)
 ; SSE41-NEXT:    retq
 ;
 ; AVX1-LABEL: ssubo_v2i128:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    pushq %rbp
-; AVX1-NEXT:    pushq %rbx
-; AVX1-NEXT:    movq {{[0-9]+}}(%rsp), %r11
 ; AVX1-NEXT:    movq {{[0-9]+}}(%rsp), %r10
-; AVX1-NEXT:    testq %r9, %r9
-; AVX1-NEXT:    setns %al
-; AVX1-NEXT:    testq %rsi, %rsi
-; AVX1-NEXT:    setns %bl
-; AVX1-NEXT:    cmpb %al, %bl
-; AVX1-NEXT:    setne %bpl
 ; AVX1-NEXT:    subq %r8, %rdi
 ; AVX1-NEXT:    sbbq %r9, %rsi
-; AVX1-NEXT:    setns %al
-; AVX1-NEXT:    cmpb %al, %bl
-; AVX1-NEXT:    setne %al
-; AVX1-NEXT:    andb %bpl, %al
+; AVX1-NEXT:    seto %r8b
 ; AVX1-NEXT:    subq {{[0-9]+}}(%rsp), %rdx
-; AVX1-NEXT:    movq %rcx, %rbp
-; AVX1-NEXT:    sbbq %r10, %rbp
-; AVX1-NEXT:    setns %bl
-; AVX1-NEXT:    testq %rcx, %rcx
-; AVX1-NEXT:    setns %cl
-; AVX1-NEXT:    cmpb %bl, %cl
-; AVX1-NEXT:    setne %r8b
-; AVX1-NEXT:    testq %r10, %r10
-; AVX1-NEXT:    setns %bl
-; AVX1-NEXT:    cmpb %bl, %cl
-; AVX1-NEXT:    setne %cl
-; AVX1-NEXT:    andb %r8b, %cl
-; AVX1-NEXT:    movzbl %cl, %ecx
-; AVX1-NEXT:    negl %ecx
-; AVX1-NEXT:    movzbl %al, %eax
+; AVX1-NEXT:    sbbq {{[0-9]+}}(%rsp), %rcx
+; AVX1-NEXT:    seto %al
+; AVX1-NEXT:    movzbl %al, %r9d
+; AVX1-NEXT:    negl %r9d
+; AVX1-NEXT:    movzbl %r8b, %eax
 ; AVX1-NEXT:    negl %eax
 ; AVX1-NEXT:    vmovd %eax, %xmm0
-; AVX1-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
-; AVX1-NEXT:    movq %rdx, 16(%r11)
-; AVX1-NEXT:    movq %rdi, (%r11)
-; AVX1-NEXT:    movq %rbp, 24(%r11)
-; AVX1-NEXT:    movq %rsi, 8(%r11)
-; AVX1-NEXT:    popq %rbx
-; AVX1-NEXT:    popq %rbp
+; AVX1-NEXT:    vpinsrd $1, %r9d, %xmm0, %xmm0
+; AVX1-NEXT:    movq %rdx, 16(%r10)
+; AVX1-NEXT:    movq %rdi, (%r10)
+; AVX1-NEXT:    movq %rcx, 24(%r10)
+; AVX1-NEXT:    movq %rsi, 8(%r10)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: ssubo_v2i128:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    pushq %rbp
-; AVX2-NEXT:    pushq %rbx
-; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %r11
 ; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %r10
-; AVX2-NEXT:    testq %r9, %r9
-; AVX2-NEXT:    setns %al
-; AVX2-NEXT:    testq %rsi, %rsi
-; AVX2-NEXT:    setns %bl
-; AVX2-NEXT:    cmpb %al, %bl
-; AVX2-NEXT:    setne %bpl
 ; AVX2-NEXT:    subq %r8, %rdi
 ; AVX2-NEXT:    sbbq %r9, %rsi
-; AVX2-NEXT:    setns %al
-; AVX2-NEXT:    cmpb %al, %bl
-; AVX2-NEXT:    setne %al
-; AVX2-NEXT:    andb %bpl, %al
+; AVX2-NEXT:    seto %r8b
 ; AVX2-NEXT:    subq {{[0-9]+}}(%rsp), %rdx
-; AVX2-NEXT:    movq %rcx, %rbp
-; AVX2-NEXT:    sbbq %r10, %rbp
-; AVX2-NEXT:    setns %bl
-; AVX2-NEXT:    testq %rcx, %rcx
-; AVX2-NEXT:    setns %cl
-; AVX2-NEXT:    cmpb %bl, %cl
-; AVX2-NEXT:    setne %r8b
-; AVX2-NEXT:    testq %r10, %r10
-; AVX2-NEXT:    setns %bl
-; AVX2-NEXT:    cmpb %bl, %cl
-; AVX2-NEXT:    setne %cl
-; AVX2-NEXT:    andb %r8b, %cl
-; AVX2-NEXT:    movzbl %cl, %ecx
-; AVX2-NEXT:    negl %ecx
-; AVX2-NEXT:    movzbl %al, %eax
+; AVX2-NEXT:    sbbq {{[0-9]+}}(%rsp), %rcx
+; AVX2-NEXT:    seto %al
+; AVX2-NEXT:    movzbl %al, %r9d
+; AVX2-NEXT:    negl %r9d
+; AVX2-NEXT:    movzbl %r8b, %eax
 ; AVX2-NEXT:    negl %eax
 ; AVX2-NEXT:    vmovd %eax, %xmm0
-; AVX2-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
-; AVX2-NEXT:    movq %rdx, 16(%r11)
-; AVX2-NEXT:    movq %rdi, (%r11)
-; AVX2-NEXT:    movq %rbp, 24(%r11)
-; AVX2-NEXT:    movq %rsi, 8(%r11)
-; AVX2-NEXT:    popq %rbx
-; AVX2-NEXT:    popq %rbp
+; AVX2-NEXT:    vpinsrd $1, %r9d, %xmm0, %xmm0
+; AVX2-NEXT:    movq %rdx, 16(%r10)
+; AVX2-NEXT:    movq %rdi, (%r10)
+; AVX2-NEXT:    movq %rcx, 24(%r10)
+; AVX2-NEXT:    movq %rsi, 8(%r10)
 ; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: ssubo_v2i128:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    pushq %r14
-; AVX512-NEXT:    pushq %rbx
 ; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %r10
-; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %r11
 ; AVX512-NEXT:    subq {{[0-9]+}}(%rsp), %rdx
-; AVX512-NEXT:    movq %rcx, %r14
-; AVX512-NEXT:    sbbq %r11, %r14
-; AVX512-NEXT:    setns %bl
-; AVX512-NEXT:    testq %rcx, %rcx
-; AVX512-NEXT:    setns %cl
-; AVX512-NEXT:    cmpb %bl, %cl
-; AVX512-NEXT:    setne %bl
-; AVX512-NEXT:    testq %r11, %r11
-; AVX512-NEXT:    setns %al
-; AVX512-NEXT:    cmpb %al, %cl
-; AVX512-NEXT:    setne %al
-; AVX512-NEXT:    andb %bl, %al
+; AVX512-NEXT:    sbbq {{[0-9]+}}(%rsp), %rcx
+; AVX512-NEXT:    seto %al
 ; AVX512-NEXT:    kmovd %eax, %k0
-; AVX512-NEXT:    testq %r9, %r9
-; AVX512-NEXT:    setns %al
-; AVX512-NEXT:    testq %rsi, %rsi
-; AVX512-NEXT:    setns %cl
-; AVX512-NEXT:    cmpb %al, %cl
-; AVX512-NEXT:    setne %al
 ; AVX512-NEXT:    subq %r8, %rdi
 ; AVX512-NEXT:    sbbq %r9, %rsi
-; AVX512-NEXT:    setns %bl
-; AVX512-NEXT:    cmpb %bl, %cl
-; AVX512-NEXT:    setne %cl
-; AVX512-NEXT:    andb %al, %cl
-; AVX512-NEXT:    andl $1, %ecx
-; AVX512-NEXT:    kmovw %ecx, %k1
+; AVX512-NEXT:    seto %al
+; AVX512-NEXT:    andl $1, %eax
+; AVX512-NEXT:    kmovw %eax, %k1
 ; AVX512-NEXT:    kshiftlw $1, %k0, %k0
 ; AVX512-NEXT:    korw %k0, %k1, %k1
 ; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; AVX512-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
 ; AVX512-NEXT:    movq %rdx, 16(%r10)
 ; AVX512-NEXT:    movq %rdi, (%r10)
-; AVX512-NEXT:    movq %r14, 24(%r10)
+; AVX512-NEXT:    movq %rcx, 24(%r10)
 ; AVX512-NEXT:    movq %rsi, 8(%r10)
-; AVX512-NEXT:    popq %rbx
-; AVX512-NEXT:    popq %r14
 ; AVX512-NEXT:    retq
   %t = call {<2 x i128>, <2 x i1>} @llvm.ssub.with.overflow.v2i128(<2 x i128> %a0, <2 x i128> %a1)
   %val = extractvalue {<2 x i128>, <2 x i1>} %t, 0
diff --git a/llvm/test/CodeGen/X86/xaluo128.ll b/llvm/test/CodeGen/X86/xaluo128.ll
index ca30b5602b7680..61009b9616ea9d 100644
--- a/llvm/test/CodeGen/X86/xaluo128.ll
+++ b/llvm/test/CodeGen/X86/xaluo128.ll
@@ -5,55 +5,35 @@
 define zeroext i1 @saddoi128(i128 %v1, i128 %v2, i128* %res) nounwind {
 ; X64-LABEL: saddoi128:
 ; X64:       ## %bb.0:
-; X64-NEXT:    testq %rcx, %rcx
-; X64-NEXT:    setns %r9b
-; X64-NEXT:    testq %rsi, %rsi
-; X64-NEXT:    setns %al
-; X64-NEXT:    cmpb %r9b, %al
-; X64-NEXT:    sete %r9b
 ; X64-NEXT:    addq %rdx, %rdi
 ; X64-NEXT:    adcq %rcx, %rsi
-; X64-NEXT:    setns %cl
-; X64-NEXT:    cmpb %cl, %al
-; X64-NEXT:    setne %al
-; X64-NEXT:    andb %r9b, %al
+; X64-NEXT:    seto %al
 ; X64-NEXT:    movq %rdi, (%r8)
 ; X64-NEXT:    movq %rsi, 8(%r8)
 ; X64-NEXT:    retq
 ;
 ; X86-LABEL: saddoi128:
 ; X86:       ## %bb.0:
-; X86-NEXT:    pushl %ebp
 ; X86-NEXT:    pushl %ebx
 ; X86-NEXT:    pushl %edi
 ; X86-NEXT:    pushl %esi
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT:    cmpl $0, {{[0-9]+}}(%esp)
-; X86-NEXT:    setns %al
-; X86-NEXT:    testl %ebx, %ebx
-; X86-NEXT:    setns %ah
-; X86-NEXT:    cmpb %al, %ah
-; X86-NEXT:    sete %cl
-; X86-NEXT:    addl {{[0-9]+}}(%esp), %esi
-; X86-NEXT:    adcl {{[0-9]+}}(%esp), %edi
-; X86-NEXT:    adcl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    addl {{[0-9]+}}(%esp), %edi
 ; X86-NEXT:    adcl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT:    setns %al
-; X86-NEXT:    cmpb %al, %ah
-; X86-NEXT:    setne %al
-; X86-NEXT:    andb %cl, %al
-; X86-NEXT:    movl %esi, (%ebp)
-; X86-NEXT:    movl %edi, 4(%ebp)
-; X86-NEXT:    movl %edx, 8(%ebp)
-; X86-NEXT:    movl %ebx, 12(%ebp)
+; X86-NEXT:    adcl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    adcl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    seto %al
+; X86-NEXT:    movl %edi, (%ecx)
+; X86-NEXT:    movl %ebx, 4(%ecx)
+; X86-NEXT:    movl %esi, 8(%ecx)
+; X86-NEXT:    movl %edx, 12(%ecx)
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    popl %edi
 ; X86-NEXT:    popl %ebx
-; X86-NEXT:    popl %ebp
 ; X86-NEXT:    retl
   %t = call {i128, i1} @llvm.sadd.with.overflow.i128(i128 %v1, i128 %v2)
   %val = extractvalue {i128, i1} %t, 0
@@ -106,55 +86,35 @@ define zeroext i1 @uaddoi128(i128 %v1, i128 %v2, i128* %res) nounwind {
 define zeroext i1 @ssuboi128(i128 %v1, i128 %v2, i128* %res) nounwind {
 ; X64-LABEL: ssuboi128:
 ; X64:       ## %bb.0:
-; X64-NEXT:    testq %rcx, %rcx
-; X64-NEXT:    setns %r9b
-; X64-NEXT:    testq %rsi, %rsi
-; X64-NEXT:    setns %al
-; X64-NEXT:    cmpb %r9b, %al
-; X64-NEXT:    setne %r9b
 ; X64-NEXT:    subq %rdx, %rdi
 ; X64-NEXT:    sbbq %rcx, %rsi
-; X64-NEXT:    setns %cl
-; X64-NEXT:    cmpb %cl, %al
-; X64-NEXT:    setne %al
-; X64-NEXT:    andb %r9b, %al
+; X64-NEXT:    seto %al
 ; X64-NEXT:    movq %rdi, (%r8)
 ; X64-NEXT:    movq %rsi, 8(%r8)
 ; X64-NEXT:    retq
 ;
 ; X86-LABEL: ssuboi128:
 ; X86:       ## %bb.0:
-; X86-NEXT:    pushl %ebp
 ; X86-NEXT:    pushl %ebx
 ; X86-NEXT:    pushl %edi
 ; X86-NEXT:    pushl %esi
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT:    cmpl $0, {{[0-9]+}}(%esp)
-; X86-NEXT:    setns %al
-; X86-NEXT:    testl %ebx, %ebx
-; X86-NEXT:    setns %ah
-; X86-NEXT:    cmpb %al, %ah
-; X86-NEXT:    setne %cl
-; X86-NEXT:    subl {{[0-9]+}}(%esp), %esi
-; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %edi
-; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    subl {{[0-9]+}}(%esp), %edi
 ; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT:    setns %al
-; X86-NEXT:    cmpb %al, %ah
-; X86-NEXT:    setne %al
-; X86-NEXT:    andb %cl, %al
-; X86-NEXT:    movl %esi, (%ebp)
-; X86-NEXT:    movl %edi, 4(%ebp)
-; X86-NEXT:    movl %edx, 8(%ebp)
-; X86-NEXT:    movl %ebx, 12(%ebp)
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    sbbl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    seto %al
+; X86-NEXT:    movl %edi, (%ecx)
+; X86-NEXT:    movl %ebx, 4(%ecx)
+; X86-NEXT:    movl %esi, 8(%ecx)
+; X86-NEXT:    movl %edx, 12(%ecx)
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    popl %edi
 ; X86-NEXT:    popl %ebx
-; X86-NEXT:    popl %ebp
 ; X86-NEXT:    retl
   %t = call {i128, i1} @llvm.ssub.with.overflow.i128(i128 %v1, i128 %v2)
   %val = extractvalue {i128, i1} %t, 0

From 61133e0b1110d03e35d0acc9ccfda2b6d9fb03cd Mon Sep 17 00:00:00 2001
From: Tobias Hieta <tobias@hieta.se>
Date: Tue, 13 Oct 2020 00:45:14 -0700
Subject: [PATCH 070/123] [llvm-install-name-tool] Add -delete_all_rpaths
 option

This diff adds an option to remove all rpaths from a Mach-O binary.

Test plan: make check-all

Differential revision: https://reviews.llvm.org/D88674
---
 llvm/docs/CommandGuide/llvm-install-name-tool.rst |  4 ++++
 .../MachO/install-name-tool-delete-rpath.test     | 15 +++++++++++++++
 llvm/tools/llvm-objcopy/CopyConfig.cpp            |  3 +++
 llvm/tools/llvm-objcopy/CopyConfig.h              |  3 +++
 llvm/tools/llvm-objcopy/InstallNameToolOpts.td    |  3 +++
 llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp    |  8 +++++++-
 6 files changed, 35 insertions(+), 1 deletion(-)

diff --git a/llvm/docs/CommandGuide/llvm-install-name-tool.rst b/llvm/docs/CommandGuide/llvm-install-name-tool.rst
index 87775d4f20d0fb..33eb998ab65572 100644
--- a/llvm/docs/CommandGuide/llvm-install-name-tool.rst
+++ b/llvm/docs/CommandGuide/llvm-install-name-tool.rst
@@ -43,6 +43,10 @@ the same `<rpath>` value.
  times to delete multiple rpaths. Throws an error if ``<rpath>`` is not listed in
  the binary.
 
+.. option:: -delete_all_rpaths
+
+  Deletes all rpaths from the binary.
+
 .. option:: --help, -h
 
  Print a summary of command line options.
diff --git a/llvm/test/tools/llvm-objcopy/MachO/install-name-tool-delete-rpath.test b/llvm/test/tools/llvm-objcopy/MachO/install-name-tool-delete-rpath.test
index b5e6abb7775201..77dd6d5eb9c48a 100644
--- a/llvm/test/tools/llvm-objcopy/MachO/install-name-tool-delete-rpath.test
+++ b/llvm/test/tools/llvm-objcopy/MachO/install-name-tool-delete-rpath.test
@@ -42,6 +42,21 @@
 
 # COMBINED: cannot specify both -add_rpath @executable_b/. and -delete_rpath @executable_b/.
 
+## Remove all RPATHS
+# RUN: yaml2obj %s -o %t2
+# RUN: llvm-install-name-tool -delete_all_rpaths %t2
+# RUN: llvm-objdump -p %t2 | FileCheck %s
+
+# CHECK-NOT: LC_RPATH
+
+## Remove all RPATHS and add a new one.
+# RUN: yaml2obj %s -o %t3
+# RUN: llvm-install-name-tool --delete_all_rpaths -add_rpath @executable_b/. %t3
+# RUN: llvm-objdump -p %t3 | \
+# RUN:   FileCheck %s --check-prefix=DELETE_AND_ADD --implicit-check-not=@executable
+
+# DELETE_AND_ADD: @executable_b/.
+
 --- !mach-o
 FileHeader:
   magic:           0xFEEDFACF
diff --git a/llvm/tools/llvm-objcopy/CopyConfig.cpp b/llvm/tools/llvm-objcopy/CopyConfig.cpp
index a98c16e6fc622d..cdd9147f76131f 100644
--- a/llvm/tools/llvm-objcopy/CopyConfig.cpp
+++ b/llvm/tools/llvm-objcopy/CopyConfig.cpp
@@ -953,6 +953,9 @@ parseInstallNameToolOptions(ArrayRef<const char *> ArgsArr) {
   for (auto *Arg : InputArgs.filtered(INSTALL_NAME_TOOL_change))
     Config.InstallNamesToUpdate.insert({Arg->getValue(0), Arg->getValue(1)});
 
+  Config.RemoveAllRpaths =
+      InputArgs.hasArg(INSTALL_NAME_TOOL_delete_all_rpaths);
+
   SmallVector<StringRef, 2> Positional;
   for (auto Arg : InputArgs.filtered(INSTALL_NAME_TOOL_UNKNOWN))
     return createStringError(errc::invalid_argument, "unknown argument '%s'",
diff --git a/llvm/tools/llvm-objcopy/CopyConfig.h b/llvm/tools/llvm-objcopy/CopyConfig.h
index 666d0d450f6129..af763f24d1cc27 100644
--- a/llvm/tools/llvm-objcopy/CopyConfig.h
+++ b/llvm/tools/llvm-objcopy/CopyConfig.h
@@ -230,6 +230,9 @@ struct CopyConfig {
   bool StripUnneeded = false;
   bool Weaken = false;
   bool DecompressDebugSections = false;
+  // install-name-tool's --delete_all_rpaths
+  bool RemoveAllRpaths = false;
+
   DebugCompressionType CompressionType = DebugCompressionType::None;
 
   // parseELFConfig performs ELF-specific command-line parsing. Fills `ELF` on
diff --git a/llvm/tools/llvm-objcopy/InstallNameToolOpts.td b/llvm/tools/llvm-objcopy/InstallNameToolOpts.td
index 7998041513cb1b..1c4e37f8b2ad0d 100644
--- a/llvm/tools/llvm-objcopy/InstallNameToolOpts.td
+++ b/llvm/tools/llvm-objcopy/InstallNameToolOpts.td
@@ -21,6 +21,9 @@ def add_rpath : Option<["-", "--"], "add_rpath", KIND_SEPARATE>,
 def delete_rpath: Option<["-", "--"], "delete_rpath", KIND_SEPARATE>,
                   HelpText<"Delete specified rpath">;
 
+def delete_all_rpaths: Flag<["-", "--"], "delete_all_rpaths">,
+              HelpText<"Delete all rpath directives">;
+
 def rpath: MultiArg<["-", "--"], "rpath", 2>,
            HelpText<"Change rpath path name">;
 
diff --git a/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp b/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp
index 337c448f6dfd00..4dbf1dabf9f33f 100644
--- a/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp
+++ b/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp
@@ -137,8 +137,14 @@ static Error processLoadCommands(const CopyConfig &Config, Object &Obj) {
   DenseSet<StringRef> RPathsToRemove(Config.RPathsToRemove.begin(),
                                      Config.RPathsToRemove.end());
 
-  LoadCommandPred RemovePred = [&RPathsToRemove](const LoadCommand &LC) {
+  LoadCommandPred RemovePred = [&RPathsToRemove,
+                                &Config](const LoadCommand &LC) {
     if (LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_RPATH) {
+      // When removing all RPaths we don't need to care
+      // about what it contains
+      if (Config.RemoveAllRpaths)
+        return true;
+
       StringRef RPath = getPayloadString(LC);
       if (RPathsToRemove.count(RPath)) {
         RPathsToRemove.erase(RPath);

From acd0dd3a62d1a05bdc97d03a8a73326f7acb7c91 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad@amd.com>
Date: Tue, 13 Oct 2020 09:01:45 +0100
Subject: [PATCH 071/123] [AMDGPU] Use lowercase for subtarget feature names in
 RUN lines

---
 llvm/test/CodeGen/AMDGPU/amdpal-elf.ll                | 4 ++--
 llvm/test/CodeGen/AMDGPU/basic-branch.ll              | 2 +-
 llvm/test/CodeGen/AMDGPU/hazard-hidden-bundle.mir     | 2 +-
 llvm/test/CodeGen/AMDGPU/hazard-in-bundle.mir         | 2 +-
 llvm/test/CodeGen/AMDGPU/hsa.ll                       | 4 ++--
 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll | 4 ++--
 llvm/test/CodeGen/AMDGPU/regbank-reassign-wave64.mir  | 2 +-
 llvm/test/CodeGen/AMDGPU/smem-war-hazard.mir          | 2 +-
 llvm/test/CodeGen/AMDGPU/v_cndmask.ll                 | 2 +-
 llvm/test/CodeGen/AMDGPU/vcmpx-exec-war-hazard.mir    | 2 +-
 llvm/test/MC/AMDGPU/expressions-gfx10.s               | 4 ++--
 llvm/test/MC/AMDGPU/gfx10_asm_all.s                   | 8 ++++----
 llvm/test/MC/AMDGPU/gfx10_asm_dpp16.s                 | 8 ++++----
 llvm/test/MC/AMDGPU/gfx10_asm_dpp8.s                  | 8 ++++----
 llvm/test/MC/AMDGPU/gfx10_asm_err.s                   | 4 ++--
 llvm/test/MC/AMDGPU/gfx10_unsupported.s               | 4 ++--
 llvm/test/MC/AMDGPU/hsa-gfx10.s                       | 4 ++--
 llvm/test/MC/AMDGPU/hsa-wave-size.s                   | 8 ++++----
 llvm/test/MC/AMDGPU/hsa_isa_version_attrs.s           | 2 +-
 llvm/test/MC/AMDGPU/vop3-literal.s                    | 4 ++--
 llvm/test/MC/AMDGPU/wave_any.s                        | 2 +-
 llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_dpp16.txt | 4 ++--
 llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_dpp8.txt  | 4 ++--
 23 files changed, 45 insertions(+), 45 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-elf.ll b/llvm/test/CodeGen/AMDGPU/amdpal-elf.ll
index dfff47a926ff85..80ba9b592ee160 100644
--- a/llvm/test/CodeGen/AMDGPU/amdpal-elf.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdpal-elf.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -mtriple=amdgcn--amdpal -mcpu=kaveri -filetype=obj -mattr=-code-object-v3 | llvm-readobj -symbols -s -sd - | FileCheck --check-prefix=ELF %s
 ; RUN: llc < %s -mtriple=amdgcn--amdpal -mcpu=kaveri -mattr=-code-object-v3 | llvm-mc -filetype=obj -triple amdgcn--amdpal -mcpu=kaveri -mattr=-code-object-v3 | llvm-readobj -symbols -s -sd - | FileCheck %s --check-prefix=ELF
-; RUN: llc < %s -mtriple=amdgcn--amdpal -mcpu=gfx1010 -mattr=+WavefrontSize32,-WavefrontSize64,-code-object-v3 | FileCheck --check-prefix=GFX10-W32 %s
-; RUN: llc < %s -mtriple=amdgcn--amdpal -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64,-code-object-v3 | FileCheck --check-prefix=GFX10-W64 %s
+; RUN: llc < %s -mtriple=amdgcn--amdpal -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64,-code-object-v3 | FileCheck --check-prefix=GFX10-W32 %s
+; RUN: llc < %s -mtriple=amdgcn--amdpal -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64,-code-object-v3 | FileCheck --check-prefix=GFX10-W64 %s
 
 ; ELF: Section {
 ; ELF: Name: .text
diff --git a/llvm/test/CodeGen/AMDGPU/basic-branch.ll b/llvm/test/CodeGen/AMDGPU/basic-branch.ll
index 14f2070200f33a..efcc2ae04e1f5c 100644
--- a/llvm/test/CodeGen/AMDGPU/basic-branch.ll
+++ b/llvm/test/CodeGen/AMDGPU/basic-branch.ll
@@ -1,6 +1,6 @@
 ; RUN: llc -O0 -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCNNOOPT -check-prefix=GCN %s
 ; RUN: llc -O0 -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope  -check-prefix=GCNNOOPT -check-prefix=GCN %s
-; RUN: llc -O0 -march=amdgcn -mcpu=gfx1010 -mattr=-flat-for-global,-WavefrontSize32,+WavefrontSize64 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCNNOOPT -check-prefix=GCN %s
+; RUN: llc -O0 -march=amdgcn -mcpu=gfx1010 -mattr=-flat-for-global,-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCNNOOPT -check-prefix=GCN %s
 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCNOPT -check-prefix=GCN %s
 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCNOPT -check-prefix=GCN %s
 
diff --git a/llvm/test/CodeGen/AMDGPU/hazard-hidden-bundle.mir b/llvm/test/CodeGen/AMDGPU/hazard-hidden-bundle.mir
index 830e9aa340fd86..d5d2512795b7c6 100644
--- a/llvm/test/CodeGen/AMDGPU/hazard-hidden-bundle.mir
+++ b/llvm/test/CodeGen/AMDGPU/hazard-hidden-bundle.mir
@@ -1,6 +1,6 @@
 # RUN: llc -march=amdgcn -mcpu=gfx902 -verify-machineinstrs -run-pass  post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,XNACK,GCX9 %s
 # RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass  post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,NOXNACK,GFX9 %s
-# RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64 -verify-machineinstrs -run-pass  post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,NOXNACK,GFX10 %s
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs -run-pass  post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,NOXNACK,GFX10 %s
 
 # GCN-LABEL: name: break_smem_clause_simple_load_smrd8_ptr_hidden_bundle
 # GCN: bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/hazard-in-bundle.mir b/llvm/test/CodeGen/AMDGPU/hazard-in-bundle.mir
index 8d02f7a60add20..8229ec7fae9cff 100644
--- a/llvm/test/CodeGen/AMDGPU/hazard-in-bundle.mir
+++ b/llvm/test/CodeGen/AMDGPU/hazard-in-bundle.mir
@@ -1,6 +1,6 @@
 # RUN: llc -march=amdgcn -mcpu=gfx902 -verify-machineinstrs -run-pass  post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,XNACK,GFX9 %s
 # RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass  post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,NOXNACK,GFX9 %s
-# RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64 -verify-machineinstrs -run-pass  post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,NOXNACK,GFX10 %s
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs -run-pass  post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,NOXNACK,GFX10 %s
 
 # GCN-LABEL: name: break_smem_clause_max_look_ahead_in_bundle
 # GCN:          S_LOAD_DWORDX2_IMM
diff --git a/llvm/test/CodeGen/AMDGPU/hsa.ll b/llvm/test/CodeGen/AMDGPU/hsa.ll
index f23ce2389984a9..7539710592bf56 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa.ll
@@ -4,8 +4,8 @@
 ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo -mattr=-code-object-v3,-flat-for-global | FileCheck --check-prefix=HSA-VI %s
 ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri -filetype=obj -mattr=-code-object-v3 | llvm-readobj -symbols -s -sd - | FileCheck --check-prefix=ELF %s
 ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-code-object-v3 | llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=kaveri -mattr=-code-object-v3 | llvm-readobj -symbols -s -sd - | FileCheck %s --check-prefix=ELF
-; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=+WavefrontSize32,-WavefrontSize64,-code-object-v3 | FileCheck --check-prefix=HSA --check-prefix=GFX10 --check-prefix=GFX10-W32 %s
-; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64,-code-object-v3 | FileCheck --check-prefix=HSA --check-prefix=GFX10 --check-prefix=GFX10-W64 %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64,-code-object-v3 | FileCheck --check-prefix=HSA --check-prefix=GFX10 --check-prefix=GFX10-W32 %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64,-code-object-v3 | FileCheck --check-prefix=HSA --check-prefix=GFX10 --check-prefix=GFX10-W64 %s
 
 ; The SHT_NOTE section contains the output from the .hsa_code_object_*
 ; directives.
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll
index 806673b5841795..c97c43ab5f0484 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll
@@ -4,8 +4,8 @@
 
 ; RUN: opt -O3 -S < %s | FileCheck -check-prefixes=OPT,OPT-WXX %s
 ; RUN: opt -mtriple=amdgcn-- -O3 -S < %s | FileCheck -check-prefixes=OPT,OPT-WXX %s
-; RUN: opt -mtriple=amdgcn-- -O3 -mattr=+WavefrontSize32 -S < %s | FileCheck -check-prefixes=OPT,OPT-W32 %s
-; RUN: opt -mtriple=amdgcn-- -O3 -mattr=+WavefrontSize64 -S < %s | FileCheck -check-prefixes=OPT,OPT-W64 %s
+; RUN: opt -mtriple=amdgcn-- -O3 -mattr=+wavefrontsize32 -S < %s | FileCheck -check-prefixes=OPT,OPT-W32 %s
+; RUN: opt -mtriple=amdgcn-- -O3 -mattr=+wavefrontsize64 -S < %s | FileCheck -check-prefixes=OPT,OPT-W64 %s
 ; RUN: opt -mtriple=amdgcn-- -mcpu=tonga -O3 -S < %s | FileCheck -check-prefixes=OPT,OPT-W64 %s
 ; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1010 -O3 -mattr=+wavefrontsize32,-wavefrontsize64 -S < %s | FileCheck -check-prefixes=OPT,OPT-W32 %s
 ; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1010 -O3 -mattr=-wavefrontsize32,+wavefrontsize64 -S < %s | FileCheck -check-prefixes=OPT,OPT-W64 %s
diff --git a/llvm/test/CodeGen/AMDGPU/regbank-reassign-wave64.mir b/llvm/test/CodeGen/AMDGPU/regbank-reassign-wave64.mir
index 49d6a9ad197172..821396d79c3242 100644
--- a/llvm/test/CodeGen/AMDGPU/regbank-reassign-wave64.mir
+++ b/llvm/test/CodeGen/AMDGPU/regbank-reassign-wave64.mir
@@ -1,4 +1,4 @@
-# RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64 -verify-machineinstrs -run-pass greedy,amdgpu-regbanks-reassign,virtregrewriter -o - %s | FileCheck -check-prefix=GCN %s
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs -run-pass greedy,amdgpu-regbanks-reassign,virtregrewriter -o - %s | FileCheck -check-prefix=GCN %s
 
 
 # Test that subreg reassignments are correctly handled when whole register also
diff --git a/llvm/test/CodeGen/AMDGPU/smem-war-hazard.mir b/llvm/test/CodeGen/AMDGPU/smem-war-hazard.mir
index a3747ac6ac42b8..b684860d59e4bd 100644
--- a/llvm/test/CodeGen/AMDGPU/smem-war-hazard.mir
+++ b/llvm/test/CodeGen/AMDGPU/smem-war-hazard.mir
@@ -1,4 +1,4 @@
-# RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64 -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefix=GCN %s
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefix=GCN %s
 
 # GCN-LABEL: name: hazard_smem_war
 # GCN:      S_LOAD_DWORD_IMM
diff --git a/llvm/test/CodeGen/AMDGPU/v_cndmask.ll b/llvm/test/CodeGen/AMDGPU/v_cndmask.ll
index 9f22c70fef7d7e..d3a53e3953a9ee 100644
--- a/llvm/test/CodeGen/AMDGPU/v_cndmask.ll
+++ b/llvm/test/CodeGen/AMDGPU/v_cndmask.ll
@@ -1,6 +1,6 @@
 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI,SIVI %s
 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,SIVI %s
-; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-flat-for-global,+WavefrontSize64 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-flat-for-global,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10 %s
 
 declare i32 @llvm.amdgcn.workitem.id.x() #1
 declare half @llvm.fabs.f16(half)
diff --git a/llvm/test/CodeGen/AMDGPU/vcmpx-exec-war-hazard.mir b/llvm/test/CodeGen/AMDGPU/vcmpx-exec-war-hazard.mir
index 2e66aee4f43e0d..730a576368273c 100644
--- a/llvm/test/CodeGen/AMDGPU/vcmpx-exec-war-hazard.mir
+++ b/llvm/test/CodeGen/AMDGPU/vcmpx-exec-war-hazard.mir
@@ -1,4 +1,4 @@
-# RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64 -verify-machineinstrs -run-pass si-insert-skips,post-RA-hazard-rec -o - %s | FileCheck -check-prefix=GCN %s
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs -run-pass si-insert-skips,post-RA-hazard-rec -o - %s | FileCheck -check-prefix=GCN %s
 
 # GCN-LABEL: name: hazard_vcmpx_smov_exec_lo
 # GCN:      $sgpr0 = S_MOV_B32 $exec_lo
diff --git a/llvm/test/MC/AMDGPU/expressions-gfx10.s b/llvm/test/MC/AMDGPU/expressions-gfx10.s
index 8c413879a3c026..643001b18f4634 100644
--- a/llvm/test/MC/AMDGPU/expressions-gfx10.s
+++ b/llvm/test/MC/AMDGPU/expressions-gfx10.s
@@ -1,5 +1,5 @@
-// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64 -show-encoding %s | FileCheck %s --check-prefix=GFX10
-// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64 %s 2>&1 | FileCheck -check-prefix=NOGFX10 --implicit-check-not=error: %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck %s --check-prefix=GFX10
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 %s 2>&1 | FileCheck -check-prefix=NOGFX10 --implicit-check-not=error: %s
 
 i1=1
 
diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_all.s b/llvm/test/MC/AMDGPU/gfx10_asm_all.s
index 6f3d814153c5ee..d05101b7d634de 100644
--- a/llvm/test/MC/AMDGPU/gfx10_asm_all.s
+++ b/llvm/test/MC/AMDGPU/gfx10_asm_all.s
@@ -1,7 +1,7 @@
-// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+WavefrontSize32,-WavefrontSize64 -show-encoding %s | FileCheck --check-prefixes=GFX10,W32 %s
-// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64 -show-encoding %s | FileCheck --check-prefixes=GFX10,W64 %s
-// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+WavefrontSize32,-WavefrontSize64 %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W32-ERR --implicit-check-not=error: %s
-// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64 %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W64-ERR --implicit-check-not=error: %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX10,W32 %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX10,W64 %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W32-ERR --implicit-check-not=error: %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W64-ERR --implicit-check-not=error: %s
 
 //===----------------------------------------------------------------------===//
 // ENC_DS.
diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_dpp16.s b/llvm/test/MC/AMDGPU/gfx10_asm_dpp16.s
index 6fb8cf991d7c5b..ccc363736432fb 100644
--- a/llvm/test/MC/AMDGPU/gfx10_asm_dpp16.s
+++ b/llvm/test/MC/AMDGPU/gfx10_asm_dpp16.s
@@ -1,7 +1,7 @@
-// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+WavefrontSize32,-WavefrontSize64 -show-encoding %s | FileCheck --check-prefixes=GFX10,W32 %s
-// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64 -show-encoding %s | FileCheck --check-prefixes=GFX10,W64 %s
-// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+WavefrontSize32,-WavefrontSize64 %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W32-ERR --implicit-check-not=error: %s
-// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64 %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W64-ERR --implicit-check-not=error: %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX10,W32 %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX10,W64 %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W32-ERR --implicit-check-not=error: %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W64-ERR --implicit-check-not=error: %s
 
 v_mov_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0
 // GFX10: [0xfa,0x02,0x0a,0x7e,0x01,0x1b,0x00,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_dpp8.s b/llvm/test/MC/AMDGPU/gfx10_asm_dpp8.s
index 6a98669b170c96..8858ffd0936230 100644
--- a/llvm/test/MC/AMDGPU/gfx10_asm_dpp8.s
+++ b/llvm/test/MC/AMDGPU/gfx10_asm_dpp8.s
@@ -1,7 +1,7 @@
-// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+WavefrontSize32,-WavefrontSize64 -show-encoding %s | FileCheck --check-prefixes=GFX10,W32 %s
-// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64 -show-encoding %s | FileCheck --check-prefixes=GFX10,W64 %s
-// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+WavefrontSize32,-WavefrontSize64 %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W32-ERR --implicit-check-not=error: %s
-// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64 %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W64-ERR --implicit-check-not=error: %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX10,W32 %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX10,W64 %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W32-ERR --implicit-check-not=error: %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=GFX10-ERR,W64-ERR --implicit-check-not=error: %s
 
 v_mov_b32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7]
 // GFX10: encoding: [0xe9,0x02,0x0a,0x7e,0x01,0x88,0xc6,0xfa]
diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_err.s b/llvm/test/MC/AMDGPU/gfx10_asm_err.s
index ed33a55fb953e8..a22aad824e2c8c 100644
--- a/llvm/test/MC/AMDGPU/gfx10_asm_err.s
+++ b/llvm/test/MC/AMDGPU/gfx10_asm_err.s
@@ -2,8 +2,8 @@
 // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx701 %s 2>&1 | FileCheck --check-prefixes=GFX6-7,GFX6-8,GFX6-9 --implicit-check-not=error: %s
 // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx801 %s 2>&1 | FileCheck --check-prefixes=GFX6-8,GFX6-9,GFX8-9 --implicit-check-not=error: %s
 // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 %s 2>&1 | FileCheck --check-prefixes=GFX6-9,GFX8-9 --implicit-check-not=error: %s
-// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+WavefrontSize32,-WavefrontSize64 %s 2>&1 | FileCheck --check-prefixes=GFX10 --implicit-check-not=error: %s
-// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64 %s 2>&1 | FileCheck --check-prefixes=GFX10 --implicit-check-not=error: %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=GFX10 --implicit-check-not=error: %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=GFX10 --implicit-check-not=error: %s
 
 //===----------------------------------------------------------------------===//
 // ENC_DS.
diff --git a/llvm/test/MC/AMDGPU/gfx10_unsupported.s b/llvm/test/MC/AMDGPU/gfx10_unsupported.s
index 425bd27e625115..b332fa9380f070 100644
--- a/llvm/test/MC/AMDGPU/gfx10_unsupported.s
+++ b/llvm/test/MC/AMDGPU/gfx10_unsupported.s
@@ -1,5 +1,5 @@
-// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+WavefrontSize32,-WavefrontSize64 %s 2>&1 | FileCheck --implicit-check-not=error: %s
-// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64 %s 2>&1 | FileCheck --implicit-check-not=error: %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 %s 2>&1 | FileCheck --implicit-check-not=error: %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 %s 2>&1 | FileCheck --implicit-check-not=error: %s
 
 //===----------------------------------------------------------------------===//
 // Unsupported instructions.
diff --git a/llvm/test/MC/AMDGPU/hsa-gfx10.s b/llvm/test/MC/AMDGPU/hsa-gfx10.s
index 707a8be4e8081c..22a915aafaf89d 100644
--- a/llvm/test/MC/AMDGPU/hsa-gfx10.s
+++ b/llvm/test/MC/AMDGPU/hsa-gfx10.s
@@ -1,5 +1,5 @@
-// RUN: llvm-mc -triple amdgcn--amdhsa -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64,-code-object-v3 -show-encoding %s | FileCheck %s --check-prefix=ASM
-// RUN: llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64,-code-object-v3 -show-encoding %s | llvm-readobj -symbols -s -sd - | FileCheck %s --check-prefix=ELF
+// RUN: llvm-mc -triple amdgcn--amdhsa -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64,-code-object-v3 -show-encoding %s | FileCheck %s --check-prefix=ASM
+// RUN: llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64,-code-object-v3 -show-encoding %s | llvm-readobj -symbols -s -sd - | FileCheck %s --check-prefix=ELF
 
 // ELF: Section {
 // ELF: Name: .text
diff --git a/llvm/test/MC/AMDGPU/hsa-wave-size.s b/llvm/test/MC/AMDGPU/hsa-wave-size.s
index 8785895ce18100..22dca531d0950e 100644
--- a/llvm/test/MC/AMDGPU/hsa-wave-size.s
+++ b/llvm/test/MC/AMDGPU/hsa-wave-size.s
@@ -1,10 +1,10 @@
 // RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 -mattr=-code-object-v3 %s | FileCheck --check-prefixes=GCN,GFX7 %s
-// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-code-object-v3,+WavefrontSize32,-WavefrontSize64 %s | FileCheck --check-prefixes=GCN,GFX10-W32 %s
-// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-code-object-v3,-WavefrontSize32,+WavefrontSize64 %s | FileCheck --check-prefixes=GCN,GFX10-W64 %s
+// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-code-object-v3,+wavefrontsize32,-wavefrontsize64 %s | FileCheck --check-prefixes=GCN,GFX10-W32 %s
+// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-code-object-v3,-wavefrontsize32,+wavefrontsize64 %s | FileCheck --check-prefixes=GCN,GFX10-W64 %s
 
 // RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 -mattr=-code-object-v3 %s 2>&1 | FileCheck --check-prefixes=GCN-ERR,GFX7-ERR %s
-// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-code-object-v3,+WavefrontSize32,-WavefrontSize64 %s 2>&1 | FileCheck --check-prefixes=GCN-ERR,GFX10-W32-ERR %s
-// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-code-object-v3,-WavefrontSize32,+WavefrontSize64 %s 2>&1 | FileCheck --check-prefixes=GCN-ERR,GFX10-W64-ERR %s
+// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-code-object-v3,+wavefrontsize32,-wavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=GCN-ERR,GFX10-W32-ERR %s
+// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-code-object-v3,-wavefrontsize32,+wavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=GCN-ERR,GFX10-W64-ERR %s
 
 // GCN: test0:
 // GFX7: enable_wavefront_size32 = 0
diff --git a/llvm/test/MC/AMDGPU/hsa_isa_version_attrs.s b/llvm/test/MC/AMDGPU/hsa_isa_version_attrs.s
index 0a5686e27e8c9d..28364e6881abff 100644
--- a/llvm/test/MC/AMDGPU/hsa_isa_version_attrs.s
+++ b/llvm/test/MC/AMDGPU/hsa_isa_version_attrs.s
@@ -1,6 +1,6 @@
 // RUN: llvm-mc -arch=amdgcn -mcpu=gfx801 -mattr=-code-object-v3,-fast-fmaf -show-encoding %s | FileCheck --check-prefix=GFX8 %s
 // RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -mattr=-code-object-v3,-mad-mix-insts -show-encoding %s | FileCheck --check-prefix=GFX9 %s
-// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-code-object-v3,-WavefrontSize32 -show-encoding %s | FileCheck --check-prefix=GFX10 %s
+// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-code-object-v3,-wavefrontsize32 -show-encoding %s | FileCheck --check-prefix=GFX10 %s
 
 .hsa_code_object_isa
 // GFX8:  .hsa_code_object_isa 8,0,1,"AMD","AMDGPU"
diff --git a/llvm/test/MC/AMDGPU/vop3-literal.s b/llvm/test/MC/AMDGPU/vop3-literal.s
index 55f75fe2649dbf..b3458a99af43da 100644
--- a/llvm/test/MC/AMDGPU/vop3-literal.s
+++ b/llvm/test/MC/AMDGPU/vop3-literal.s
@@ -1,6 +1,6 @@
-// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64 -show-encoding %s | FileCheck -check-prefix=GFX10 %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck -check-prefix=GFX10 %s
 // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900  -show-encoding %s | FileCheck -check-prefix=GFX9 %s
-// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64 %s 2>&1 | FileCheck -check-prefix=GFX10-ERR --implicit-check-not=error: %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 %s 2>&1 | FileCheck -check-prefix=GFX10-ERR --implicit-check-not=error: %s
 // RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900  %s 2>&1 | FileCheck -check-prefix=GFX9-ERR --implicit-check-not=error: %s
 
 v_bfe_u32 v0, 0x3039, v1, s1
diff --git a/llvm/test/MC/AMDGPU/wave_any.s b/llvm/test/MC/AMDGPU/wave_any.s
index 0479df223e7cb9..b26b8a44acbd65 100644
--- a/llvm/test/MC/AMDGPU/wave_any.s
+++ b/llvm/test/MC/AMDGPU/wave_any.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+WavefrontSize32,+WavefrontSize64 -show-encoding %s | FileCheck --check-prefix=GFX10 %s
+// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=GFX10 %s
 
 v_cmp_ge_i32_e32 s0, v0
 // GFX10: v_cmp_ge_i32_e32 vcc, s0, v0 ; encoding: [0x00,0x00,0x0c,0x7d]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_dpp16.txt
index 6e2c95f9be83ff..ddee0fb24a188f 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_dpp16.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_dpp16.txt
@@ -1,5 +1,5 @@
-# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+WavefrontSize32,-WavefrontSize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX10,W32 %s
-# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX10,W64 %s
+# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX10,W32 %s
+# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX10,W64 %s
 
 # GFX10: v_mov_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x02,0x0a,0x7e,0x01,0x1b,0x00,0x00]
 0xfa,0x02,0x0a,0x7e,0x01,0x1b,0x00,0x00
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_dpp8.txt
index f77ac1247b9185..130d9b96aed43e 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_dpp8.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_dpp8.txt
@@ -1,5 +1,5 @@
-# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+WavefrontSize32,-WavefrontSize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX10,W32 %s
-# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX10,W64 %s
+# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX10,W32 %s
+# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX10,W64 %s
 
 # GFX10: v_mov_b32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x02,0x0a,0x7e,0x01,0x88,0xc6,0xfa]
 0xe9,0x02,0x0a,0x7e,0x01,0x88,0xc6,0xfa

From 06a5e2f307891de2073f03fe3a1113384d1cccea Mon Sep 17 00:00:00 2001
From: Max Kazantsev <mkazantsev@azul.com>
Date: Tue, 13 Oct 2020 15:16:00 +0700
Subject: [PATCH 072/123] [Test] Use generated auto-checks to make further
 changes more visible

---
 .../promote-iv-to-eliminate-casts.ll          | 240 ++++++++++++++----
 1 file changed, 193 insertions(+), 47 deletions(-)

diff --git a/llvm/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll b/llvm/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll
index d1712da3a6be7c..5cc288c58e68c8 100644
--- a/llvm/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll
+++ b/llvm/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -indvars -S | FileCheck %s
 
 ; Provide legal integer types.
@@ -6,67 +7,128 @@ target datalayout = "n8:16:32:64"
 ; CHECK-NOT: sext
 
 define i64 @test(i64* nocapture %first, i32 %count) nounwind readonly {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[T0:%.*]] = icmp sgt i32 [[COUNT:%.*]], 0
+; CHECK-NEXT:    br i1 [[T0]], label [[BB_NPH:%.*]], label [[BB2:%.*]]
+; CHECK:       bb.nph:
+; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[COUNT]] to i64
+; CHECK-NEXT:    br label [[BB:%.*]]
+; CHECK:       bb:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[BB1:%.*]] ], [ 0, [[BB_NPH]] ]
+; CHECK-NEXT:    [[RESULT_02:%.*]] = phi i64 [ [[T5:%.*]], [[BB1]] ], [ 0, [[BB_NPH]] ]
+; CHECK-NEXT:    [[T2:%.*]] = getelementptr i64, i64* [[FIRST:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[T3:%.*]] = load i64, i64* [[T2]], align 8
+; CHECK-NEXT:    [[T4:%.*]] = lshr i64 [[T3]], 4
+; CHECK-NEXT:    [[T5]] = add i64 [[T4]], [[RESULT_02]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    br label [[BB1]]
+; CHECK:       bb1:
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[BB]], label [[BB1_BB2_CRIT_EDGE:%.*]]
+; CHECK:       bb1.bb2_crit_edge:
+; CHECK-NEXT:    [[DOTLCSSA:%.*]] = phi i64 [ [[T5]], [[BB1]] ]
+; CHECK-NEXT:    br label [[BB2]]
+; CHECK:       bb2:
+; CHECK-NEXT:    [[RESULT_0_LCSSA:%.*]] = phi i64 [ [[DOTLCSSA]], [[BB1_BB2_CRIT_EDGE]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret i64 [[RESULT_0_LCSSA]]
+;
 entry:
-	%t0 = icmp sgt i32 %count, 0		; <i1> [#uses=1]
-	br i1 %t0, label %bb.nph, label %bb2
+  %t0 = icmp sgt i32 %count, 0		; <i1> [#uses=1]
+  br i1 %t0, label %bb.nph, label %bb2
 
 bb.nph:		; preds = %entry
-	br label %bb
+  br label %bb
 
 bb:		; preds = %bb1, %bb.nph
-	%result.02 = phi i64 [ %t5, %bb1 ], [ 0, %bb.nph ]		; <i64> [#uses=1]
-	%n.01 = phi i32 [ %t6, %bb1 ], [ 0, %bb.nph ]		; <i32> [#uses=2]
-	%t1 = sext i32 %n.01 to i64		; <i64> [#uses=1]
-	%t2 = getelementptr i64, i64* %first, i64 %t1		; <i64*> [#uses=1]
-	%t3 = load i64, i64* %t2, align 8		; <i64> [#uses=1]
-	%t4 = lshr i64 %t3, 4		; <i64> [#uses=1]
-	%t5 = add i64 %t4, %result.02		; <i64> [#uses=2]
-	%t6 = add i32 %n.01, 1		; <i32> [#uses=2]
-	br label %bb1
+  %result.02 = phi i64 [ %t5, %bb1 ], [ 0, %bb.nph ]		; <i64> [#uses=1]
+  %n.01 = phi i32 [ %t6, %bb1 ], [ 0, %bb.nph ]		; <i32> [#uses=2]
+  %t1 = sext i32 %n.01 to i64		; <i64> [#uses=1]
+  %t2 = getelementptr i64, i64* %first, i64 %t1		; <i64*> [#uses=1]
+  %t3 = load i64, i64* %t2, align 8		; <i64> [#uses=1]
+  %t4 = lshr i64 %t3, 4		; <i64> [#uses=1]
+  %t5 = add i64 %t4, %result.02		; <i64> [#uses=2]
+  %t6 = add i32 %n.01, 1		; <i32> [#uses=2]
+  br label %bb1
 
 bb1:		; preds = %bb
-	%t7 = icmp slt i32 %t6, %count		; <i1> [#uses=1]
-	br i1 %t7, label %bb, label %bb1.bb2_crit_edge
+  %t7 = icmp slt i32 %t6, %count		; <i1> [#uses=1]
+  br i1 %t7, label %bb, label %bb1.bb2_crit_edge
 
 bb1.bb2_crit_edge:		; preds = %bb1
-	%.lcssa = phi i64 [ %t5, %bb1 ]		; <i64> [#uses=1]
-	br label %bb2
+  %.lcssa = phi i64 [ %t5, %bb1 ]		; <i64> [#uses=1]
+  br label %bb2
 
 bb2:		; preds = %bb1.bb2_crit_edge, %entry
-	%result.0.lcssa = phi i64 [ %.lcssa, %bb1.bb2_crit_edge ], [ 0, %entry ]		; <i64> [#uses=1]
-	ret i64 %result.0.lcssa
+  %result.0.lcssa = phi i64 [ %.lcssa, %bb1.bb2_crit_edge ], [ 0, %entry ]		; <i64> [#uses=1]
+  ret i64 %result.0.lcssa
 }
 
 define void @foo(i16 signext %N, i32* nocapture %P) nounwind {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[T0:%.*]] = icmp sgt i16 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[T0]], label [[BB_NPH:%.*]], label [[RETURN:%.*]]
+; CHECK:       bb.nph:
+; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i16 [[N]] to i64
+; CHECK-NEXT:    br label [[BB:%.*]]
+; CHECK:       bb:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[BB1:%.*]] ], [ 0, [[BB_NPH]] ]
+; CHECK-NEXT:    [[T2:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    store i32 123, i32* [[T2]], align 4
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    br label [[BB1]]
+; CHECK:       bb1:
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[BB]], label [[BB1_RETURN_CRIT_EDGE:%.*]]
+; CHECK:       bb1.return_crit_edge:
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       return:
+; CHECK-NEXT:    ret void
+;
 entry:
-	%t0 = icmp sgt i16 %N, 0		; <i1> [#uses=1]
-	br i1 %t0, label %bb.nph, label %return
+  %t0 = icmp sgt i16 %N, 0		; <i1> [#uses=1]
+  br i1 %t0, label %bb.nph, label %return
 
 bb.nph:		; preds = %entry
-	br label %bb
+  br label %bb
 
 bb:		; preds = %bb1, %bb.nph
-	%i.01 = phi i16 [ %t3, %bb1 ], [ 0, %bb.nph ]		; <i16> [#uses=2]
-	%t1 = sext i16 %i.01 to i64		; <i64> [#uses=1]
-	%t2 = getelementptr i32, i32* %P, i64 %t1		; <i32*> [#uses=1]
-	store i32 123, i32* %t2, align 4
-	%t3 = add i16 %i.01, 1		; <i16> [#uses=2]
-	br label %bb1
+  %i.01 = phi i16 [ %t3, %bb1 ], [ 0, %bb.nph ]		; <i16> [#uses=2]
+  %t1 = sext i16 %i.01 to i64		; <i64> [#uses=1]
+  %t2 = getelementptr i32, i32* %P, i64 %t1		; <i32*> [#uses=1]
+  store i32 123, i32* %t2, align 4
+  %t3 = add i16 %i.01, 1		; <i16> [#uses=2]
+  br label %bb1
 
 bb1:		; preds = %bb
-	%t4 = icmp slt i16 %t3, %N		; <i1> [#uses=1]
-	br i1 %t4, label %bb, label %bb1.return_crit_edge
+  %t4 = icmp slt i16 %t3, %N		; <i1> [#uses=1]
+  br i1 %t4, label %bb, label %bb1.return_crit_edge
 
 bb1.return_crit_edge:		; preds = %bb1
-	br label %return
+  br label %return
 
 return:		; preds = %bb1.return_crit_edge, %entry
-	ret void
+  ret void
 }
 
 ; Test cases from PR1301:
 
 define void @kinds__srangezero([21 x i32]* nocapture %a) nounwind {
+; CHECK-LABEL: @kinds__srangezero(
+; CHECK-NEXT:  bb.thread:
+; CHECK-NEXT:    br label [[BB:%.*]]
+; CHECK:       bb:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], [[BB]] ], [ -10, [[BB_THREAD:%.*]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = add nsw i32 [[INDVARS_IV]], 10
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr [21 x i32], [21 x i32]* [[A:%.*]], i32 0, i32 [[TMP4]]
+; CHECK-NEXT:    store i32 0, i32* [[TMP5]], align 4
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nsw i32 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], 11
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[RETURN:%.*]], label [[BB]]
+; CHECK:       return:
+; CHECK-NEXT:    ret void
+;
 bb.thread:
   br label %bb
 
@@ -85,6 +147,20 @@ return:         ; preds = %bb
 }
 
 define void @kinds__urangezero([21 x i32]* nocapture %a) nounwind {
+; CHECK-LABEL: @kinds__urangezero(
+; CHECK-NEXT:  bb.thread:
+; CHECK-NEXT:    br label [[BB:%.*]]
+; CHECK:       bb:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], [[BB]] ], [ 10, [[BB_THREAD:%.*]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = add nsw i32 [[INDVARS_IV]], -10
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr [21 x i32], [21 x i32]* [[A:%.*]], i32 0, i32 [[TMP4]]
+; CHECK-NEXT:    store i32 0, i32* [[TMP5]], align 4
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], 31
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[RETURN:%.*]], label [[BB]]
+; CHECK:       return:
+; CHECK-NEXT:    ret void
+;
 bb.thread:
   br label %bb
 
@@ -103,9 +179,27 @@ return:         ; preds = %bb
 }
 
 define void @promote_latch_condition_decrementing_loop_01(i32* %p, i32* %a) {
-
 ; CHECK-LABEL: @promote_latch_condition_decrementing_loop_01(
-; CHECK-NOT:     trunc
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[LEN:%.*]] = load i32, i32* [[P:%.*]], align 4, [[RNG0:!range !.*]]
+; CHECK-NEXT:    [[LEN_MINUS_1:%.*]] = add nsw i32 [[LEN]], -1
+; CHECK-NEXT:    [[ZERO_CHECK:%.*]] = icmp eq i32 [[LEN]], 0
+; CHECK-NEXT:    br i1 [[ZERO_CHECK]], label [[LOOPEXIT:%.*]], label [[PREHEADER:%.*]]
+; CHECK:       preheader:
+; CHECK-NEXT:    [[TMP0:%.*]] = zext i32 [[LEN_MINUS_1]] to i64
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loopexit.loopexit:
+; CHECK-NEXT:    br label [[LOOPEXIT]]
+; CHECK:       loopexit:
+; CHECK-NEXT:    ret void
+; CHECK:       loop:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ], [ [[TMP0]], [[PREHEADER]] ]
+; CHECK-NEXT:    [[EL:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    store atomic i32 0, i32* [[EL]] unordered, align 4
+; CHECK-NEXT:    [[LOOPCOND:%.*]] = icmp slt i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
+; CHECK-NEXT:    br i1 [[LOOPCOND]], label [[LOOPEXIT_LOOPEXIT:%.*]], label [[LOOP]]
+;
 
 entry:
   %len = load i32, i32* %p, align 4, !range !0
@@ -121,20 +215,35 @@ loopexit:
 
 loop:
   %iv = phi i32 [ %iv.next, %loop ], [ %len.minus.1, %preheader ]
-  ; CHECK: %indvars.iv = phi i64
   %iv.wide = zext i32 %iv to i64
   %el = getelementptr inbounds i32, i32* %a, i64 %iv.wide
   store atomic i32 0, i32* %el unordered, align 4
   %iv.next = add nsw i32 %iv, -1
-  ; CHECK: %loopcond = icmp slt i64 %indvars.iv, 1
   %loopcond = icmp slt i32 %iv, 1
   br i1 %loopcond, label %loopexit, label %loop
 }
 
 define void @promote_latch_condition_decrementing_loop_02(i32* %p, i32* %a) {
-
 ; CHECK-LABEL: @promote_latch_condition_decrementing_loop_02(
-; CHECK-NOT:     trunc
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[LEN:%.*]] = load i32, i32* [[P:%.*]], align 4, [[RNG0]]
+; CHECK-NEXT:    [[ZERO_CHECK:%.*]] = icmp eq i32 [[LEN]], 0
+; CHECK-NEXT:    br i1 [[ZERO_CHECK]], label [[LOOPEXIT:%.*]], label [[PREHEADER:%.*]]
+; CHECK:       preheader:
+; CHECK-NEXT:    [[TMP0:%.*]] = zext i32 [[LEN]] to i64
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loopexit.loopexit:
+; CHECK-NEXT:    br label [[LOOPEXIT]]
+; CHECK:       loopexit:
+; CHECK-NEXT:    ret void
+; CHECK:       loop:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ], [ [[TMP0]], [[PREHEADER]] ]
+; CHECK-NEXT:    [[EL:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    store atomic i32 0, i32* [[EL]] unordered, align 4
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
+; CHECK-NEXT:    [[LOOPCOND:%.*]] = icmp slt i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    br i1 [[LOOPCOND]], label [[LOOPEXIT_LOOPEXIT:%.*]], label [[LOOP]]
+;
 
 entry:
   %len = load i32, i32* %p, align 4, !range !0
@@ -149,20 +258,36 @@ loopexit:
 
 loop:
   %iv = phi i32 [ %iv.next, %loop ], [ %len, %preheader ]
-  ; CHECK: %indvars.iv = phi i64
   %iv.wide = zext i32 %iv to i64
   %el = getelementptr inbounds i32, i32* %a, i64 %iv.wide
   store atomic i32 0, i32* %el unordered, align 4
   %iv.next = add nsw i32 %iv, -1
-  ; CHECK: %loopcond = icmp slt i64 %indvars.iv, 1
   %loopcond = icmp slt i32 %iv, 1
   br i1 %loopcond, label %loopexit, label %loop
 }
 
 define void @promote_latch_condition_decrementing_loop_03(i32* %p, i32* %a) {
-
 ; CHECK-LABEL: @promote_latch_condition_decrementing_loop_03(
-; CHECK-NOT:     trunc
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[LEN:%.*]] = load i32, i32* [[P:%.*]], align 4, [[RNG0]]
+; CHECK-NEXT:    [[ZERO_CHECK:%.*]] = icmp eq i32 [[LEN]], 0
+; CHECK-NEXT:    br i1 [[ZERO_CHECK]], label [[LOOPEXIT:%.*]], label [[PREHEADER:%.*]]
+; CHECK:       preheader:
+; CHECK-NEXT:    [[TMP0:%.*]] = zext i32 [[LEN]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 1
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loopexit.loopexit:
+; CHECK-NEXT:    br label [[LOOPEXIT]]
+; CHECK:       loopexit:
+; CHECK-NEXT:    ret void
+; CHECK:       loop:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ], [ [[TMP1]], [[PREHEADER]] ]
+; CHECK-NEXT:    [[EL:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    store atomic i32 0, i32* [[EL]] unordered, align 4
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
+; CHECK-NEXT:    [[LOOPCOND:%.*]] = icmp slt i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    br i1 [[LOOPCOND]], label [[LOOPEXIT_LOOPEXIT:%.*]], label [[LOOP]]
+;
 
 entry:
   %len = load i32, i32* %p, align 4, !range !0
@@ -178,20 +303,43 @@ loopexit:
 
 loop:
   %iv = phi i32 [ %iv.next, %loop ], [ %len.plus.1, %preheader ]
-  ; CHECK: %indvars.iv = phi i64
   %iv.wide = zext i32 %iv to i64
   %el = getelementptr inbounds i32, i32* %a, i64 %iv.wide
   store atomic i32 0, i32* %el unordered, align 4
   %iv.next = add nsw i32 %iv, -1
-  ; CHECK: %loopcond = icmp slt i64 %indvars.iv, 1
   %loopcond = icmp slt i32 %iv, 1
   br i1 %loopcond, label %loopexit, label %loop
 }
 
 define void @promote_latch_condition_decrementing_loop_04(i32* %p, i32* %a, i1 %cond) {
-
 ; CHECK-LABEL: @promote_latch_condition_decrementing_loop_04(
-; CHECK-NOT:     trunc
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[LEN:%.*]] = load i32, i32* [[P:%.*]], align 4, [[RNG0]]
+; CHECK-NEXT:    [[LEN_MINUS_1:%.*]] = add nsw i32 [[LEN]], -1
+; CHECK-NEXT:    br i1 [[COND:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; CHECK:       if.true:
+; CHECK-NEXT:    br label [[MERGE:%.*]]
+; CHECK:       if.false:
+; CHECK-NEXT:    br label [[MERGE]]
+; CHECK:       merge:
+; CHECK-NEXT:    [[IV_START:%.*]] = phi i32 [ [[LEN]], [[IF_TRUE]] ], [ [[LEN_MINUS_1]], [[IF_FALSE]] ]
+; CHECK-NEXT:    [[ZERO_CHECK:%.*]] = icmp eq i32 [[LEN]], 0
+; CHECK-NEXT:    br i1 [[ZERO_CHECK]], label [[LOOPEXIT:%.*]], label [[PREHEADER:%.*]]
+; CHECK:       preheader:
+; CHECK-NEXT:    [[TMP0:%.*]] = zext i32 [[IV_START]] to i64
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loopexit.loopexit:
+; CHECK-NEXT:    br label [[LOOPEXIT]]
+; CHECK:       loopexit:
+; CHECK-NEXT:    ret void
+; CHECK:       loop:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ], [ [[TMP0]], [[PREHEADER]] ]
+; CHECK-NEXT:    [[EL:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    store atomic i32 0, i32* [[EL]] unordered, align 4
+; CHECK-NEXT:    [[LOOPCOND:%.*]] = icmp slt i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
+; CHECK-NEXT:    br i1 [[LOOPCOND]], label [[LOOPEXIT_LOOPEXIT:%.*]], label [[LOOP]]
+;
 
 entry:
   %len = load i32, i32* %p, align 4, !range !0
@@ -217,12 +365,10 @@ loopexit:
 
 loop:
   %iv = phi i32 [ %iv.next, %loop ], [ %iv_start, %preheader ]
-  ; CHECK: %indvars.iv = phi i64
   %iv.wide = zext i32 %iv to i64
   %el = getelementptr inbounds i32, i32* %a, i64 %iv.wide
   store atomic i32 0, i32* %el unordered, align 4
   %iv.next = add nsw i32 %iv, -1
-  ; CHECK: %loopcond = icmp slt i64 %indvars.iv, 1
   %loopcond = icmp slt i32 %iv, 1
   br i1 %loopcond, label %loopexit, label %loop
 }

From 5df61724a171710570f37938eb229401fa0176c7 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Mon, 12 Oct 2020 18:55:11 +0100
Subject: [PATCH 073/123] [InstCombine] Support uniform vector splats in ((((X
 >> C) & CC) + Y) << C) folds.

Add support for uniform vector splats (no undefs).
---
 .../InstCombine/InstCombineShifts.cpp         | 23 ++++++++++---------
 llvm/test/Transforms/InstCombine/pr19420.ll   | 16 ++++++-------
 2 files changed, 19 insertions(+), 20 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
index b28dc490cbde1f..e44939b7c3b9f7 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -726,7 +726,7 @@ Instruction *InstCombinerImpl::FoldShiftByConstant(Value *Op0, Constant *Op1,
     if (BinaryOperator *Op0BO = dyn_cast<BinaryOperator>(Op0)) {
       // Turn ((X >> C) + Y) << C  ->  (X + (Y << C)) & (~0 << C)
       Value *V1;
-      ConstantInt *CC;
+      const APInt *CC;
       switch (Op0BO->getOpcode()) {
       default: break;
       case Instruction::Add:
@@ -752,14 +752,14 @@ Instruction *InstCombinerImpl::FoldShiftByConstant(Value *Op0, Constant *Op1,
         // Turn (Y + ((X >> C) & CC)) << C  ->  ((X & (CC << C)) + (Y << C))
         Value *Op0BOOp1 = Op0BO->getOperand(1);
         if (isLeftShift && Op0BOOp1->hasOneUse() &&
-            match(Op0BOOp1,
-                  m_And(m_OneUse(m_Shr(m_Value(V1), m_Specific(Op1))),
-                        m_ConstantInt(CC)))) {
-          Value *YS =   // (Y << C)
-            Builder.CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName());
+            match(Op0BOOp1, m_And(m_OneUse(m_Shr(m_Value(V1), m_Specific(Op1))),
+                                  m_APInt(CC)))) {
+          Value *YS = // (Y << C)
+              Builder.CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName());
           // X & (CC << C)
-          Value *XM = Builder.CreateAnd(V1, ConstantExpr::getShl(CC, Op1),
-                                        V1->getName()+".mask");
+          Value *XM = Builder.CreateAnd(
+              V1, ConstantExpr::getShl(ConstantInt::get(Ty, *CC), Op1),
+              V1->getName() + ".mask");
           return BinaryOperator::Create(Op0BO->getOpcode(), YS, XM);
         }
         LLVM_FALLTHROUGH;
@@ -785,12 +785,13 @@ Instruction *InstCombinerImpl::FoldShiftByConstant(Value *Op0, Constant *Op1,
         if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() &&
             match(Op0BO->getOperand(0),
                   m_And(m_OneUse(m_Shr(m_Value(V1), m_Specific(Op1))),
-                        m_ConstantInt(CC)))) {
+                        m_APInt(CC)))) {
           Value *YS = // (Y << C)
               Builder.CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName());
           // X & (CC << C)
-          Value *XM = Builder.CreateAnd(V1, ConstantExpr::getShl(CC, Op1),
-                                        V1->getName() + ".mask");
+          Value *XM = Builder.CreateAnd(
+              V1, ConstantExpr::getShl(ConstantInt::get(Ty, *CC), Op1),
+              V1->getName() + ".mask");
           return BinaryOperator::Create(Op0BO->getOpcode(), XM, YS);
         }
 
diff --git a/llvm/test/Transforms/InstCombine/pr19420.ll b/llvm/test/Transforms/InstCombine/pr19420.ll
index 34aafba847b63e..aeeed2fc10e065 100644
--- a/llvm/test/Transforms/InstCombine/pr19420.ll
+++ b/llvm/test/Transforms/InstCombine/pr19420.ll
@@ -101,11 +101,10 @@ define i32 @lshr_add_and_shl(i32 %x, i32 %y)  {
 
 define <2 x i32> @lshr_add_and_shl_v2i32(<2 x i32> %x, <2 x i32> %y)  {
 ; CHECK-LABEL: @lshr_add_and_shl_v2i32(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 5, i32 5>
-; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP1]], <i32 127, i32 127>
-; CHECK-NEXT:    [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[Y:%.*]]
-; CHECK-NEXT:    [[TMP4:%.*]] = shl <2 x i32> [[TMP3]], <i32 5, i32 5>
-; CHECK-NEXT:    ret <2 x i32> [[TMP4]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i32> [[Y:%.*]], <i32 5, i32 5>
+; CHECK-NEXT:    [[X_MASK:%.*]] = and <2 x i32> [[X:%.*]], <i32 4064, i32 4064>
+; CHECK-NEXT:    [[TMP2:%.*]] = add <2 x i32> [[X_MASK]], [[TMP1]]
+; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
 ;
   %1 = lshr <2 x i32> %x, <i32 5, i32 5>
   %2 = and <2 x i32> %1, <i32 127, i32 127>
@@ -160,10 +159,9 @@ define i32 @shl_add_and_lshr(i32 %x, i32 %y) {
 
 define <2 x i32> @shl_add_and_lshr_v2i32(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @shl_add_and_lshr_v2i32(
-; CHECK-NEXT:    [[A:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 4, i32 4>
-; CHECK-NEXT:    [[B:%.*]] = and <2 x i32> [[A]], <i32 8, i32 8>
-; CHECK-NEXT:    [[C:%.*]] = add <2 x i32> [[B]], [[Y:%.*]]
-; CHECK-NEXT:    [[D:%.*]] = shl <2 x i32> [[C]], <i32 4, i32 4>
+; CHECK-NEXT:    [[C1:%.*]] = shl <2 x i32> [[Y:%.*]], <i32 4, i32 4>
+; CHECK-NEXT:    [[X_MASK:%.*]] = and <2 x i32> [[X:%.*]], <i32 128, i32 128>
+; CHECK-NEXT:    [[D:%.*]] = add <2 x i32> [[X_MASK]], [[C1]]
 ; CHECK-NEXT:    ret <2 x i32> [[D]]
 ;
   %a = lshr <2 x i32> %x, <i32 4, i32 4>

From 6c23cbc5603cf0011f8d57b0354954aeca695daf Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Tue, 13 Oct 2020 09:28:21 +0100
Subject: [PATCH 074/123] [X86] Convert integer _mm_reduce_* intrinsics to emit
 llvm.reduction intrinsics (PR47506)

Emit the equivalent integer reduction intrinsics in IR instead of expanding to shuffle+arithmetic sequences.

The fadd/fmul reductions might be trickier as they assume a similar bisection reduction while the generic intrinsics assume a sequential reduction (intel docs are ambiguous on the correct approach) - I'm not sure if we want to always tag them with reassoc? Anyway, that issue can wait until a separate fp patch along with the fmin/fmax reductions.

Differential Revision: https://reviews.llvm.org/D87604
---
 clang/include/clang/Basic/BuiltinsX86.def     |  18 ++
 clang/lib/CodeGen/CGBuiltin.cpp               |  50 +++++
 clang/lib/Headers/avx512fintrin.h             | 115 ++++-------
 clang/test/CodeGen/X86/avx512-reduceIntrin.c  | 181 ++----------------
 .../CodeGen/X86/avx512-reduceMinMaxIntrin.c   | 160 ++--------------
 5 files changed, 132 insertions(+), 392 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index 8f9cfe4b6dc565..6bfb9b2cf8a501 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -1871,6 +1871,24 @@ TARGET_BUILTIN(__builtin_ia32_selectpd_512, "V8dUcV8dV8d", "ncV:512:", "avx512f"
 TARGET_BUILTIN(__builtin_ia32_selectss_128, "V4fUcV4fV4f", "ncV:128:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_selectsd_128, "V2dUcV2dV2d", "ncV:128:", "avx512f")
 
+// generic reduction intrinsics
+TARGET_BUILTIN(__builtin_ia32_reduce_add_d512, "iV16i", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_reduce_add_q512, "OiV8Oi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_reduce_and_d512, "iV16i", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_reduce_and_q512, "OiV8Oi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_reduce_mul_d512, "iV16i", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_reduce_mul_q512, "OiV8Oi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_reduce_or_d512, "iV16i", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_reduce_or_q512, "OiV8Oi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_reduce_smax_d512, "iV16i", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_reduce_smax_q512, "OiV8Oi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_reduce_smin_d512, "iV16i", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_reduce_smin_q512, "OiV8Oi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_reduce_umax_d512, "iV16i", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_reduce_umax_q512, "OiV8Oi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_reduce_umin_d512, "iV16i", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_reduce_umin_q512, "OiV8Oi", "ncV:512:", "avx512f")
+
 // MONITORX/MWAITX
 TARGET_BUILTIN(__builtin_ia32_monitorx, "vvC*UiUi", "n", "mwaitx")
 TARGET_BUILTIN(__builtin_ia32_mwaitx, "vUiUiUi", "n", "mwaitx")
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index e8c64bf0f3519c..45deb41645530d 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -13416,6 +13416,56 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
     // Ops 0 and 1 are swapped.
     return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
 
+  // Reductions
+  case X86::BI__builtin_ia32_reduce_add_d512:
+  case X86::BI__builtin_ia32_reduce_add_q512: {
+    Function *F =
+        CGM.getIntrinsic(Intrinsic::vector_reduce_add, Ops[0]->getType());
+    return Builder.CreateCall(F, {Ops[0]});
+  }
+  case X86::BI__builtin_ia32_reduce_and_d512:
+  case X86::BI__builtin_ia32_reduce_and_q512: {
+    Function *F =
+        CGM.getIntrinsic(Intrinsic::vector_reduce_and, Ops[0]->getType());
+    return Builder.CreateCall(F, {Ops[0]});
+  }
+  case X86::BI__builtin_ia32_reduce_mul_d512:
+  case X86::BI__builtin_ia32_reduce_mul_q512: {
+    Function *F =
+        CGM.getIntrinsic(Intrinsic::vector_reduce_mul, Ops[0]->getType());
+    return Builder.CreateCall(F, {Ops[0]});
+  }
+  case X86::BI__builtin_ia32_reduce_or_d512:
+  case X86::BI__builtin_ia32_reduce_or_q512: {
+    Function *F =
+        CGM.getIntrinsic(Intrinsic::vector_reduce_or, Ops[0]->getType());
+    return Builder.CreateCall(F, {Ops[0]});
+  }
+  case X86::BI__builtin_ia32_reduce_smax_d512:
+  case X86::BI__builtin_ia32_reduce_smax_q512: {
+    Function *F =
+        CGM.getIntrinsic(Intrinsic::vector_reduce_smax, Ops[0]->getType());
+    return Builder.CreateCall(F, {Ops[0]});
+  }
+  case X86::BI__builtin_ia32_reduce_smin_d512:
+  case X86::BI__builtin_ia32_reduce_smin_q512: {
+    Function *F =
+        CGM.getIntrinsic(Intrinsic::vector_reduce_smin, Ops[0]->getType());
+    return Builder.CreateCall(F, {Ops[0]});
+  }
+  case X86::BI__builtin_ia32_reduce_umax_d512:
+  case X86::BI__builtin_ia32_reduce_umax_q512: {
+    Function *F =
+        CGM.getIntrinsic(Intrinsic::vector_reduce_umax, Ops[0]->getType());
+    return Builder.CreateCall(F, {Ops[0]});
+  }
+  case X86::BI__builtin_ia32_reduce_umin_d512:
+  case X86::BI__builtin_ia32_reduce_umin_q512: {
+    Function *F =
+        CGM.getIntrinsic(Intrinsic::vector_reduce_umin, Ops[0]->getType());
+    return Builder.CreateCall(F, {Ops[0]});
+  }
+
   // 3DNow!
   case X86::BI__builtin_ia32_pswapdsf:
   case X86::BI__builtin_ia32_pswapdsi: {
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index fa22ef3fdd1884..2df399d978e308 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -9305,57 +9305,45 @@ _mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A)
  * This takes log2(n) steps where n is the number of elements in the vector.
  */
 
-#define _mm512_mask_reduce_operator(op) \
-  __v4du __t1 = (__v4du)_mm512_extracti64x4_epi64(__W, 0); \
-  __v4du __t2 = (__v4du)_mm512_extracti64x4_epi64(__W, 1); \
-  __m256i __t3 = (__m256i)(__t1 op __t2); \
-  __v2du __t4 = (__v2du)_mm256_extracti128_si256(__t3, 0); \
-  __v2du __t5 = (__v2du)_mm256_extracti128_si256(__t3, 1); \
-  __v2du __t6 = __t4 op __t5; \
-  __v2du __t7 = __builtin_shufflevector(__t6, __t6, 1, 0); \
-  __v2du __t8 = __t6 op __t7; \
-  return __t8[0]
-
 static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_add_epi64(__m512i __W) {
-  _mm512_mask_reduce_operator(+);
+  return __builtin_ia32_reduce_add_q512(__W);
 }
 
 static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_epi64(__m512i __W) {
-  _mm512_mask_reduce_operator(*);
+  return __builtin_ia32_reduce_mul_q512(__W);
 }
 
 static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_and_epi64(__m512i __W) {
-  _mm512_mask_reduce_operator(&);
+  return __builtin_ia32_reduce_and_q512(__W);
 }
 
 static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_or_epi64(__m512i __W) {
-  _mm512_mask_reduce_operator(|);
+  return __builtin_ia32_reduce_or_q512(__W);
 }
 
 static __inline__ long long __DEFAULT_FN_ATTRS512
 _mm512_mask_reduce_add_epi64(__mmask8 __M, __m512i __W) {
   __W = _mm512_maskz_mov_epi64(__M, __W);
-  _mm512_mask_reduce_operator(+);
+  return __builtin_ia32_reduce_add_q512(__W);
 }
 
 static __inline__ long long __DEFAULT_FN_ATTRS512
 _mm512_mask_reduce_mul_epi64(__mmask8 __M, __m512i __W) {
   __W = _mm512_mask_mov_epi64(_mm512_set1_epi64(1), __M, __W);
-  _mm512_mask_reduce_operator(*);
+  return __builtin_ia32_reduce_mul_q512(__W);
 }
 
 static __inline__ long long __DEFAULT_FN_ATTRS512
 _mm512_mask_reduce_and_epi64(__mmask8 __M, __m512i __W) {
   __W = _mm512_mask_mov_epi64(_mm512_set1_epi64(~0ULL), __M, __W);
-  _mm512_mask_reduce_operator(&);
+  return __builtin_ia32_reduce_and_q512(__W);
 }
 
 static __inline__ long long __DEFAULT_FN_ATTRS512
 _mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W) {
   __W = _mm512_maskz_mov_epi64(__M, __W);
-  _mm512_mask_reduce_operator(|);
+  return __builtin_ia32_reduce_or_q512(__W);
 }
-#undef _mm512_mask_reduce_operator
 
 #define _mm512_mask_reduce_operator(op) \
   __m256d __t1 = _mm512_extractf64x4_pd(__W, 0); \
@@ -9389,63 +9377,49 @@ _mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W) {
 }
 #undef _mm512_mask_reduce_operator
 
-#define _mm512_mask_reduce_operator(op) \
-  __v8su __t1 = (__v8su)_mm512_extracti64x4_epi64(__W, 0); \
-  __v8su __t2 = (__v8su)_mm512_extracti64x4_epi64(__W, 1); \
-  __m256i __t3 = (__m256i)(__t1 op __t2); \
-  __v4su __t4 = (__v4su)_mm256_extracti128_si256(__t3, 0); \
-  __v4su __t5 = (__v4su)_mm256_extracti128_si256(__t3, 1); \
-  __v4su __t6 = __t4 op __t5; \
-  __v4su __t7 = __builtin_shufflevector(__t6, __t6, 2, 3, 0, 1); \
-  __v4su __t8 = __t6 op __t7; \
-  __v4su __t9 = __builtin_shufflevector(__t8, __t8, 1, 0, 3, 2); \
-  __v4su __t10 = __t8 op __t9; \
-  return __t10[0]
-
 static __inline__ int __DEFAULT_FN_ATTRS512
 _mm512_reduce_add_epi32(__m512i __W) {
-  _mm512_mask_reduce_operator(+);
+  return __builtin_ia32_reduce_add_d512((__v16si)__W);
 }
 
 static __inline__ int __DEFAULT_FN_ATTRS512
 _mm512_reduce_mul_epi32(__m512i __W) {
-  _mm512_mask_reduce_operator(*);
+  return __builtin_ia32_reduce_mul_d512((__v16si)__W);
 }
 
 static __inline__ int __DEFAULT_FN_ATTRS512
 _mm512_reduce_and_epi32(__m512i __W) {
-  _mm512_mask_reduce_operator(&);
+  return __builtin_ia32_reduce_and_d512((__v16si)__W);
 }
 
 static __inline__ int __DEFAULT_FN_ATTRS512
 _mm512_reduce_or_epi32(__m512i __W) {
-  _mm512_mask_reduce_operator(|);
+  return __builtin_ia32_reduce_or_d512((__v16si)__W);
 }
 
 static __inline__ int __DEFAULT_FN_ATTRS512
 _mm512_mask_reduce_add_epi32( __mmask16 __M, __m512i __W) {
   __W = _mm512_maskz_mov_epi32(__M, __W);
-  _mm512_mask_reduce_operator(+);
+  return __builtin_ia32_reduce_add_d512((__v16si)__W);
 }
 
 static __inline__ int __DEFAULT_FN_ATTRS512
 _mm512_mask_reduce_mul_epi32( __mmask16 __M, __m512i __W) {
   __W = _mm512_mask_mov_epi32(_mm512_set1_epi32(1), __M, __W);
-  _mm512_mask_reduce_operator(*);
+  return __builtin_ia32_reduce_mul_d512((__v16si)__W);
 }
 
 static __inline__ int __DEFAULT_FN_ATTRS512
 _mm512_mask_reduce_and_epi32( __mmask16 __M, __m512i __W) {
   __W = _mm512_mask_mov_epi32(_mm512_set1_epi32(~0U), __M, __W);
-  _mm512_mask_reduce_operator(&);
+  return __builtin_ia32_reduce_and_d512((__v16si)__W);
 }
 
 static __inline__ int __DEFAULT_FN_ATTRS512
 _mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W) {
   __W = _mm512_maskz_mov_epi32(__M, __W);
-  _mm512_mask_reduce_operator(|);
+  return __builtin_ia32_reduce_or_d512((__v16si)__W);
 }
-#undef _mm512_mask_reduce_operator
 
 #define _mm512_mask_reduce_operator(op) \
   __m256 __t1 = (__m256)_mm512_extractf64x4_pd((__m512d)__W, 0); \
@@ -9483,117 +9457,92 @@ _mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W) {
 }
 #undef _mm512_mask_reduce_operator
 
-#define _mm512_mask_reduce_operator(op) \
-  __m512i __t1 = (__m512i)__builtin_shufflevector((__v8di)__V, (__v8di)__V, 4, 5, 6, 7, 0, 1, 2, 3); \
-  __m512i __t2 = _mm512_##op(__V, __t1); \
-  __m512i __t3 = (__m512i)__builtin_shufflevector((__v8di)__t2, (__v8di)__t2, 2, 3, 0, 1, 6, 7, 4, 5); \
-  __m512i __t4 = _mm512_##op(__t2, __t3); \
-  __m512i __t5 = (__m512i)__builtin_shufflevector((__v8di)__t4, (__v8di)__t4, 1, 0, 3, 2, 5, 4, 7, 6); \
-  __v8di __t6 = (__v8di)_mm512_##op(__t4, __t5); \
-  return __t6[0]
-
 static __inline__ long long __DEFAULT_FN_ATTRS512
 _mm512_reduce_max_epi64(__m512i __V) {
-  _mm512_mask_reduce_operator(max_epi64);
+  return __builtin_ia32_reduce_smax_q512(__V);
 }
 
 static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
 _mm512_reduce_max_epu64(__m512i __V) {
-  _mm512_mask_reduce_operator(max_epu64);
+  return __builtin_ia32_reduce_umax_q512(__V);
 }
 
 static __inline__ long long __DEFAULT_FN_ATTRS512
 _mm512_reduce_min_epi64(__m512i __V) {
-  _mm512_mask_reduce_operator(min_epi64);
+  return __builtin_ia32_reduce_smin_q512(__V);
 }
 
 static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
 _mm512_reduce_min_epu64(__m512i __V) {
-  _mm512_mask_reduce_operator(min_epu64);
+  return __builtin_ia32_reduce_umin_q512(__V);
 }
 
 static __inline__ long long __DEFAULT_FN_ATTRS512
 _mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __V) {
   __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(-__LONG_LONG_MAX__ - 1LL), __M, __V);
-  _mm512_mask_reduce_operator(max_epi64);
+  return __builtin_ia32_reduce_smax_q512(__V);
 }
 
 static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
 _mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __V) {
   __V = _mm512_maskz_mov_epi64(__M, __V);
-  _mm512_mask_reduce_operator(max_epu64);
+  return __builtin_ia32_reduce_umax_q512(__V);
 }
 
 static __inline__ long long __DEFAULT_FN_ATTRS512
 _mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __V) {
   __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(__LONG_LONG_MAX__), __M, __V);
-  _mm512_mask_reduce_operator(min_epi64);
+  return __builtin_ia32_reduce_smin_q512(__V);
 }
 
 static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
 _mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V) {
   __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(~0ULL), __M, __V);
-  _mm512_mask_reduce_operator(min_epu64);
+  return __builtin_ia32_reduce_umin_q512(__V);
 }
-#undef _mm512_mask_reduce_operator
-
-#define _mm512_mask_reduce_operator(op) \
-  __m256i __t1 = _mm512_extracti64x4_epi64(__V, 0); \
-  __m256i __t2 = _mm512_extracti64x4_epi64(__V, 1); \
-  __m256i __t3 = _mm256_##op(__t1, __t2); \
-  __m128i __t4 = _mm256_extracti128_si256(__t3, 0); \
-  __m128i __t5 = _mm256_extracti128_si256(__t3, 1); \
-  __m128i __t6 = _mm_##op(__t4, __t5); \
-  __m128i __t7 = (__m128i)__builtin_shufflevector((__v4si)__t6, (__v4si)__t6, 2, 3, 0, 1); \
-  __m128i __t8 = _mm_##op(__t6, __t7); \
-  __m128i __t9 = (__m128i)__builtin_shufflevector((__v4si)__t8, (__v4si)__t8, 1, 0, 3, 2); \
-  __v4si __t10 = (__v4si)_mm_##op(__t8, __t9); \
-  return __t10[0]
-
 static __inline__ int __DEFAULT_FN_ATTRS512
 _mm512_reduce_max_epi32(__m512i __V) {
-  _mm512_mask_reduce_operator(max_epi32);
+  return __builtin_ia32_reduce_smax_d512((__v16si)__V);
 }
 
 static __inline__ unsigned int __DEFAULT_FN_ATTRS512
 _mm512_reduce_max_epu32(__m512i __V) {
-  _mm512_mask_reduce_operator(max_epu32);
+  return __builtin_ia32_reduce_umax_d512((__v16si)__V);
 }
 
 static __inline__ int __DEFAULT_FN_ATTRS512
 _mm512_reduce_min_epi32(__m512i __V) {
-  _mm512_mask_reduce_operator(min_epi32);
+  return __builtin_ia32_reduce_smin_d512((__v16si)__V);
 }
 
 static __inline__ unsigned int __DEFAULT_FN_ATTRS512
 _mm512_reduce_min_epu32(__m512i __V) {
-  _mm512_mask_reduce_operator(min_epu32);
+  return __builtin_ia32_reduce_umin_d512((__v16si)__V);
 }
 
 static __inline__ int __DEFAULT_FN_ATTRS512
 _mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __V) {
   __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(-__INT_MAX__ - 1), __M, __V);
-  _mm512_mask_reduce_operator(max_epi32);
+  return __builtin_ia32_reduce_smax_d512((__v16si)__V);
 }
 
 static __inline__ unsigned int __DEFAULT_FN_ATTRS512
 _mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __V) {
   __V = _mm512_maskz_mov_epi32(__M, __V);
-  _mm512_mask_reduce_operator(max_epu32);
+  return __builtin_ia32_reduce_umax_d512((__v16si)__V);
 }
 
 static __inline__ int __DEFAULT_FN_ATTRS512
 _mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __V) {
   __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(__INT_MAX__), __M, __V);
-  _mm512_mask_reduce_operator(min_epi32);
+  return __builtin_ia32_reduce_smin_d512((__v16si)__V);
 }
 
 static __inline__ unsigned int __DEFAULT_FN_ATTRS512
 _mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V) {
   __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(~0U), __M, __V);
-  _mm512_mask_reduce_operator(min_epu32);
+  return __builtin_ia32_reduce_umin_d512((__v16si)__V);
 }
-#undef _mm512_mask_reduce_operator
 
 #define _mm512_mask_reduce_operator(op) \
   __m256d __t1 = _mm512_extractf64x4_pd(__V, 0); \
diff --git a/clang/test/CodeGen/X86/avx512-reduceIntrin.c b/clang/test/CodeGen/X86/avx512-reduceIntrin.c
index 8157095fc64470..accc3c64f2b97f 100644
--- a/clang/test/CodeGen/X86/avx512-reduceIntrin.c
+++ b/clang/test/CodeGen/X86/avx512-reduceIntrin.c
@@ -4,57 +4,25 @@
 
 long long test_mm512_reduce_add_epi64(__m512i __W){
 // CHECK-LABEL: @test_mm512_reduce_add_epi64(
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK:    add <4 x i64> %{{.*}}, %{{.*}}
-// CHECK:    shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
-// CHECK:    shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
-// CHECK:    add <2 x i64> %{{.*}}, %{{.*}}
-// CHECK:    shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> <i32 1, i32 0>
-// CHECK:    add <2 x i64> %{{.*}}, %{{.*}}
-// CHECK:    extractelement <2 x i64> %{{.*}}, i32 0
+// CHECK:    call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %{{.*}})
   return _mm512_reduce_add_epi64(__W);
 }
 
 long long test_mm512_reduce_mul_epi64(__m512i __W){
 // CHECK-LABEL: @test_mm512_reduce_mul_epi64(
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK:    mul <4 x i64> %{{.*}}, %{{.*}}
-// CHECK:    shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
-// CHECK:    shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
-// CHECK:    mul <2 x i64> %{{.*}}, %{{.*}}
-// CHECK:    shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> <i32 1, i32 0>
-// CHECK:    mul <2 x i64> %{{.*}}, %{{.*}}
-// CHECK:    extractelement <2 x i64> %{{.*}}, i32 0
+// CHECK:    call i64 @llvm.vector.reduce.mul.v8i64(<8 x i64> %{{.*}})
   return _mm512_reduce_mul_epi64(__W); 
 }
 
 long long test_mm512_reduce_or_epi64(__m512i __W){
 // CHECK-LABEL: @test_mm512_reduce_or_epi64(
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK:    or <4 x i64> %{{.*}}, %{{.*}}
-// CHECK:    shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
-// CHECK:    shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
-// CHECK:    or <2 x i64> %{{.*}}, %{{.*}}
-// CHECK:    shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> <i32 1, i32 0>
-// CHECK:    or <2 x i64> %{{.*}}, %{{.*}}
-// CHECK:    extractelement <2 x i64> %{{.*}}, i32 0
+// CHECK:    call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> %{{.*}})
   return _mm512_reduce_or_epi64(__W); 
 }
 
 long long test_mm512_reduce_and_epi64(__m512i __W){
 // CHECK-LABEL: @test_mm512_reduce_and_epi64(
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK:    and <4 x i64> %{{.*}}, %{{.*}}
-// CHECK:    shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
-// CHECK:    shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
-// CHECK:    and <2 x i64> %{{.*}}, %{{.*}}
-// CHECK:    shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> <i32 1, i32 0>
-// CHECK:    and <2 x i64> %{{.*}}, %{{.*}}
-// CHECK:    extractelement <2 x i64> %{{.*}}, i32 0
+// CHECK:    call i64 @llvm.vector.reduce.and.v8i64(<8 x i64> %{{.*}})
   return _mm512_reduce_and_epi64(__W);
 }
 
@@ -62,15 +30,7 @@ long long test_mm512_mask_reduce_add_epi64(__mmask8 __M, __m512i __W){
 // CHECK-LABEL: @test_mm512_mask_reduce_add_epi64(
 // CHECK:    bitcast i8 %{{.*}} to <8 x i1>
 // CHECK:    select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK:    add <4 x i64> %{{.*}}, %{{.*}}
-// CHECK:    shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
-// CHECK:    shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
-// CHECK:    add <2 x i64> %{{.*}}, %{{.*}}
-// CHECK:    shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> <i32 1, i32 0>
-// CHECK:    add <2 x i64> %{{.*}}, %{{.*}}
-// CHECK:    extractelement <2 x i64> %{{.*}}, i32 0
+// CHECK:    call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %{{.*}})
   return _mm512_mask_reduce_add_epi64(__M, __W); 
 }
 
@@ -78,15 +38,7 @@ long long test_mm512_mask_reduce_mul_epi64(__mmask8 __M, __m512i __W){
 // CHECK-LABEL: @test_mm512_mask_reduce_mul_epi64(
 // CHECK:    bitcast i8 %{{.*}} to <8 x i1>
 // CHECK:    select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK:    mul <4 x i64> %{{.*}}, %{{.*}}
-// CHECK:    shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
-// CHECK:    shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
-// CHECK:    mul <2 x i64> %{{.*}}, %{{.*}}
-// CHECK:    shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> <i32 1, i32 0>
-// CHECK:    mul <2 x i64> %{{.*}}, %{{.*}}
-// CHECK:    extractelement <2 x i64> %{{.*}}, i32 0
+// CHECK:    call i64 @llvm.vector.reduce.mul.v8i64(<8 x i64> %{{.*}})
   return _mm512_mask_reduce_mul_epi64(__M, __W); 
 }
 
@@ -94,15 +46,7 @@ long long test_mm512_mask_reduce_and_epi64(__mmask8 __M, __m512i __W){
 // CHECK-LABEL: @test_mm512_mask_reduce_and_epi64(
 // CHECK:    bitcast i8 %{{.*}} to <8 x i1>
 // CHECK:    select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK:    and <4 x i64> %{{.*}}, %{{.*}}
-// CHECK:    shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
-// CHECK:    shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
-// CHECK:    and <2 x i64> %{{.*}}, %{{.*}}
-// CHECK:    shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> <i32 1, i32 0>
-// CHECK:    and <2 x i64> %{{.*}}, %{{.*}}
-// CHECK:    extractelement <2 x i64> %{{.*}}, i32 0
+// CHECK:    call i64 @llvm.vector.reduce.and.v8i64(<8 x i64> %{{.*}})
   return _mm512_mask_reduce_and_epi64(__M, __W); 
 }
 
@@ -110,79 +54,30 @@ long long test_mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W){
 // CHECK-LABEL: @test_mm512_mask_reduce_or_epi64(
 // CHECK:    bitcast i8 %{{.*}} to <8 x i1>
 // CHECK:    select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK:    or <4 x i64> %{{.*}}, %{{.*}}
-// CHECK:    shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
-// CHECK:    shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
-// CHECK:    or <2 x i64> %{{.*}}, %{{.*}}
-// CHECK:    shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> <i32 1, i32 0>
-// CHECK:    or <2 x i64> %{{.*}}, %{{.*}}
-// CHECK:    extractelement <2 x i64> %{{.*}}, i32 0
+// CHECK:    call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> %{{.*}})
   return _mm512_mask_reduce_or_epi64(__M, __W); 
 }
 
 int test_mm512_reduce_add_epi32(__m512i __W){
 // CHECK-LABEL: @test_mm512_reduce_add_epi32(
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK:    add <8 x i32> %{{.*}}, %{{.*}}
-// CHECK:    shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
-// CHECK:    shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
-// CHECK:    add <4 x i32> %{{.*}}, %{{.*}}
-// CHECK:    shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
-// CHECK:    add <4 x i32> %{{.*}}, %{{.*}}
-// CHECK:    shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-// CHECK:    add <4 x i32> %{{.*}}, %{{.*}}
-// CHECK:    extractelement <4 x i32> %{{.*}}, i32 0
+// CHECK:    call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %{{.*}})
   return _mm512_reduce_add_epi32(__W); 
 }
 
 int test_mm512_reduce_mul_epi32(__m512i __W){
 // CHECK-LABEL: @test_mm512_reduce_mul_epi32(
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK:    mul <8 x i32> %{{.*}}, %{{.*}}
-// CHECK:    shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
-// CHECK:    shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
-// CHECK:    mul <4 x i32> %{{.*}}, %{{.*}}
-// CHECK:    shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
-// CHECK:    mul <4 x i32> %{{.*}}, %{{.*}}
-// CHECK:    shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-// CHECK:    mul <4 x i32> %{{.*}}, %{{.*}}
-// CHECK:    extractelement <4 x i32> %{{.*}}, i32 0
+// CHECK:    call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> %{{.*}})
   return _mm512_reduce_mul_epi32(__W); 
 }
 
 int test_mm512_reduce_or_epi32(__m512i __W){
-// CHECK-LABEL: @test_mm512_reduce_or_epi32(
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK:    or <8 x i32> %{{.*}}, %{{.*}}
-// CHECK:    shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
-// CHECK:    shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
-// CHECK:    or <4 x i32> %{{.*}}, %{{.*}}
-// CHECK:    shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
-// CHECK:    or <4 x i32> %{{.*}}, %{{.*}}
-// CHECK:    shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-// CHECK:    or <4 x i32> %{{.*}}, %{{.*}}
-// CHECK:    extractelement <4 x i32> %{{.*}}, i32 0
+// CHECK:    call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> %{{.*}})
   return _mm512_reduce_or_epi32(__W); 
 }
 
 int test_mm512_reduce_and_epi32(__m512i __W){
 // CHECK-LABEL: @test_mm512_reduce_and_epi32(
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK:    and <8 x i32> %{{.*}}, %{{.*}}
-// CHECK:    shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
-// CHECK:    shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
-// CHECK:    and <4 x i32> %{{.*}}, %{{.*}}
-// CHECK:    shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
-// CHECK:    and <4 x i32> %{{.*}}, %{{.*}}
-// CHECK:    shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-// CHECK:    and <4 x i32> %{{.*}}, %{{.*}}
-// CHECK:    extractelement <4 x i32> %{{.*}}, i32 0
+// CHECK:    call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> %{{.*}})
   return _mm512_reduce_and_epi32(__W); 
 }
 
@@ -190,18 +85,7 @@ int test_mm512_mask_reduce_add_epi32(__mmask16 __M, __m512i __W){
 // CHECK-LABEL: @test_mm512_mask_reduce_add_epi32(
 // CHECK:    bitcast i16 %{{.*}} to <16 x i1>
 // CHECK:    select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
-// CHECK:    bitcast <16 x i32> %{{.*}} to <8 x i64>
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK:    add <8 x i32> %{{.*}}, %{{.*}}
-// CHECK:    shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
-// CHECK:    shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
-// CHECK:    add <4 x i32> %{{.*}}, %{{.*}}
-// CHECK:    shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
-// CHECK:    add <4 x i32> %{{.*}}, %{{.*}}
-// CHECK:    shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-// CHECK:    add <4 x i32> %{{.*}}, %{{.*}}
-// CHECK:    extractelement <4 x i32> %{{.*}}, i32 0
+// CHECK:    call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %{{.*}})
   return _mm512_mask_reduce_add_epi32(__M, __W); 
 }
 
@@ -209,18 +93,7 @@ int test_mm512_mask_reduce_mul_epi32(__mmask16 __M, __m512i __W){
 // CHECK-LABEL: @test_mm512_mask_reduce_mul_epi32(
 // CHECK:    bitcast i16 %{{.*}} to <16 x i1>
 // CHECK:    select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
-// CHECK:    bitcast <16 x i32> %{{.*}} to <8 x i64>
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK:    mul <8 x i32> %{{.*}}, %{{.*}}
-// CHECK:    shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
-// CHECK:    shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
-// CHECK:    mul <4 x i32> %{{.*}}, %{{.*}}
-// CHECK:    shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
-// CHECK:    mul <4 x i32> %{{.*}}, %{{.*}}
-// CHECK:    shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-// CHECK:    mul <4 x i32> %{{.*}}, %{{.*}}
-// CHECK:    extractelement <4 x i32> %{{.*}}, i32 0
+// CHECK:    call i32 @llvm.vector.reduce.mul.v16i32(<16 x i32> %{{.*}})
   return _mm512_mask_reduce_mul_epi32(__M, __W); 
 }
 
@@ -228,18 +101,7 @@ int test_mm512_mask_reduce_and_epi32(__mmask16 __M, __m512i __W){
 // CHECK-LABEL: @test_mm512_mask_reduce_and_epi32(
 // CHECK:    bitcast i16 %{{.*}} to <16 x i1>
 // CHECK:    select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
-// CHECK:    bitcast <16 x i32> %{{.*}} to <8 x i64>
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK:    and <8 x i32> %{{.*}}, %{{.*}}
-// CHECK:    shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
-// CHECK:    shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
-// CHECK:    and <4 x i32> %{{.*}}, %{{.*}}
-// CHECK:    shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
-// CHECK:    and <4 x i32> %{{.*}}, %{{.*}}
-// CHECK:    shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-// CHECK:    and <4 x i32> %{{.*}}, %{{.*}}
-// CHECK:    extractelement <4 x i32> %{{.*}}, i32 0
+// CHECK:    call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> %{{.*}})
   return _mm512_mask_reduce_and_epi32(__M, __W); 
 }
 
@@ -247,18 +109,7 @@ int test_mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W){
 // CHECK-LABEL: @test_mm512_mask_reduce_or_epi32(
 // CHECK:    bitcast i16 %{{.*}} to <16 x i1>
 // CHECK:    select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
-// CHECK:    bitcast <16 x i32> %{{.*}} to <8 x i64>
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK:    or <8 x i32> %{{.*}}, %{{.*}}
-// CHECK:    shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
-// CHECK:    shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
-// CHECK:    or <4 x i32> %{{.*}}, %{{.*}}
-// CHECK:    shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
-// CHECK:    or <4 x i32> %{{.*}}, %{{.*}}
-// CHECK:    shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-// CHECK:    or <4 x i32> %{{.*}}, %{{.*}}
-// CHECK:    extractelement <4 x i32> %{{.*}}, i32 0
+// CHECK:    call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> %{{.*}})
   return _mm512_mask_reduce_or_epi32(__M, __W); 
 }
 
diff --git a/clang/test/CodeGen/X86/avx512-reduceMinMaxIntrin.c b/clang/test/CodeGen/X86/avx512-reduceMinMaxIntrin.c
index 923672bb809535..f20f0bd5c1a5ba 100644
--- a/clang/test/CodeGen/X86/avx512-reduceMinMaxIntrin.c
+++ b/clang/test/CodeGen/X86/avx512-reduceMinMaxIntrin.c
@@ -4,25 +4,13 @@
 
 long long test_mm512_reduce_max_epi64(__m512i __W){
 // CHECK-LABEL: @test_mm512_reduce_max_epi64(
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
-// CHECK:    call <8 x i64> @llvm.smax.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}})
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5>
-// CHECK:    call <8 x i64> @llvm.smax.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}})
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
-// CHECK:    call <8 x i64> @llvm.smax.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}})
-// CHECK:    extractelement <8 x i64> %{{.*}}, i32 0
+// CHECK:    call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> %{{.*}})
   return _mm512_reduce_max_epi64(__W);
 }
 
 unsigned long long test_mm512_reduce_max_epu64(__m512i __W){
 // CHECK-LABEL: @test_mm512_reduce_max_epu64(
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
-// CHECK:    call <8 x i64> @llvm.umax.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}})
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5>
-// CHECK:    call <8 x i64> @llvm.umax.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}})
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
-// CHECK:    call <8 x i64> @llvm.umax.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}})
-// CHECK:    extractelement <8 x i64> %{{.*}}, i32 0
+// CHECK:    call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> %{{.*}})
   return _mm512_reduce_max_epu64(__W);
 }
 
@@ -42,25 +30,13 @@ double test_mm512_reduce_max_pd(__m512d __W){
 
 long long test_mm512_reduce_min_epi64(__m512i __W){
 // CHECK-LABEL: @test_mm512_reduce_min_epi64(
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
-// CHECK:    call <8 x i64> @llvm.smin.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}})
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5>
-// CHECK:    call <8 x i64> @llvm.smin.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}})
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
-// CHECK:    call <8 x i64> @llvm.smin.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}})
-// CHECK:    extractelement <8 x i64> %{{.*}}, i32 0
+// CHECK:    call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> %{{.*}})
   return _mm512_reduce_min_epi64(__W);
 }
 
 unsigned long long test_mm512_reduce_min_epu64(__m512i __W){
 // CHECK-LABEL: @test_mm512_reduce_min_epu64(
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
-// CHECK:    call <8 x i64> @llvm.umin.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}})
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5>
-// CHECK:    call <8 x i64> @llvm.umin.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}})
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
-// CHECK:    call <8 x i64> @llvm.umin.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}})
-// CHECK:    extractelement <8 x i64> %{{.*}}, i32 0
+// CHECK:    call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> %{{.*}})
   return _mm512_reduce_min_epu64(__W);
 }
 
@@ -82,13 +58,7 @@ long long test_mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __W){
 // CHECK-LABEL: @test_mm512_mask_reduce_max_epi64(
 // CHECK:    bitcast i8 %{{.*}} to <8 x i1>
 // CHECK:    select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
-// CHECK:    call <8 x i64> @llvm.smax.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}})
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5>
-// CHECK:    call <8 x i64> @llvm.smax.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}})
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
-// CHECK:    call <8 x i64> @llvm.smax.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}})
-// CHECK:    extractelement <8 x i64> %{{.*}}, i32 0
+// CHECK:    call i64 @llvm.vector.reduce.smax.v8i64(<8 x i64> %{{.*}})
   return _mm512_mask_reduce_max_epi64(__M, __W); 
 }
 
@@ -96,13 +66,7 @@ unsigned long test_mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __W){
 // CHECK-LABEL: @test_mm512_mask_reduce_max_epu64(
 // CHECK:    bitcast i8 %{{.*}} to <8 x i1>
 // CHECK:    select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
-// CHECK:    call <8 x i64> @llvm.umax.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}})
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5>
-// CHECK:    call <8 x i64> @llvm.umax.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}})
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
-// CHECK:    call <8 x i64> @llvm.umax.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}})
-// CHECK:    extractelement <8 x i64> %{{.*}}, i32 0
+// CHECK:    call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> %{{.*}})
   return _mm512_mask_reduce_max_epu64(__M, __W); 
 }
 
@@ -126,13 +90,7 @@ long long test_mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __W){
 // CHECK-LABEL: @test_mm512_mask_reduce_min_epi64(
 // CHECK:    bitcast i8 %{{.*}} to <8 x i1>
 // CHECK:    select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
-// CHECK:    call <8 x i64> @llvm.smin.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}})
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5>
-// CHECK:    call <8 x i64> @llvm.smin.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}})
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
-// CHECK:    call <8 x i64> @llvm.smin.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}})
-// CHECK:    extractelement <8 x i64> %{{.*}}, i32 0
+// CHECK:    call i64 @llvm.vector.reduce.smin.v8i64(<8 x i64> %{{.*}})
   return _mm512_mask_reduce_min_epi64(__M, __W); 
 }
 
@@ -140,13 +98,7 @@ unsigned long long test_mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __W){
 // CHECK-LABEL: @test_mm512_mask_reduce_min_epu64(
 // CHECK:    bitcast i8 %{{.*}} to <8 x i1>
 // CHECK:    select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
-// CHECK:    call <8 x i64> @llvm.umin.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}})
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5>
-// CHECK:    call <8 x i64> @llvm.umin.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}})
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
-// CHECK:    call <8 x i64> @llvm.umin.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}})
-// CHECK:    extractelement <8 x i64> %{{.*}}, i32 0
+// CHECK:    call i64 @llvm.vector.reduce.umin.v8i64(<8 x i64> %{{.*}})
   return _mm512_mask_reduce_min_epu64(__M, __W); 
 }
 
@@ -168,33 +120,13 @@ double test_mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __W){
 
 int test_mm512_reduce_max_epi32(__m512i __W){
 // CHECK-LABEL: @test_mm512_reduce_max_epi32(
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK:    call <8 x i32> @llvm.smax.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}})
-// CHECK:    shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
-// CHECK:    shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
-// CHECK:    call <4 x i32> @llvm.smax.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
-// CHECK:    shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
-// CHECK:    call <4 x i32> @llvm.smax.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
-// CHECK:    shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-// CHECK:    call <4 x i32> @llvm.smax.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
-// CHECK:    extractelement <4 x i32> %{{.*}}, i32 0
+// CHECK:    call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> %{{.*}})
   return _mm512_reduce_max_epi32(__W);
 }
 
 unsigned int test_mm512_reduce_max_epu32(__m512i __W){
 // CHECK-LABEL: @test_mm512_reduce_max_epu32(
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK:    call <8 x i32> @llvm.umax.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}})
-// CHECK:    shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
-// CHECK:    shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
-// CHECK:    call <4 x i32> @llvm.umax.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
-// CHECK:    shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
-// CHECK:    call <4 x i32> @llvm.umax.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
-// CHECK:    shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-// CHECK:    call <4 x i32> @llvm.umax.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
-// CHECK:    extractelement <4 x i32> %{{.*}}, i32 0
+// CHECK:    call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> %{{.*}})
   return _mm512_reduce_max_epu32(__W);
 }
 
@@ -216,33 +148,13 @@ float test_mm512_reduce_max_ps(__m512 __W){
 
 int test_mm512_reduce_min_epi32(__m512i __W){
 // CHECK-LABEL: @test_mm512_reduce_min_epi32(
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK:    call <8 x i32> @llvm.smin.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}})
-// CHECK:    shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
-// CHECK:    shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
-// CHECK:    call <4 x i32> @llvm.smin.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
-// CHECK:    shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
-// CHECK:    call <4 x i32> @llvm.smin.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
-// CHECK:    shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-// CHECK:    call <4 x i32> @llvm.smin.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
-// CHECK:    extractelement <4 x i32> %{{.*}}, i32 0
+// CHECK:    call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> %{{.*}})
   return _mm512_reduce_min_epi32(__W);
 }
 
 unsigned int test_mm512_reduce_min_epu32(__m512i __W){
 // CHECK-LABEL: @test_mm512_reduce_min_epu32(
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK:    call <8 x i32> @llvm.umin.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}})
-// CHECK:    shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
-// CHECK:    shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
-// CHECK:    call <4 x i32> @llvm.umin.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
-// CHECK:    shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
-// CHECK:    call <4 x i32> @llvm.umin.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
-// CHECK:    shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-// CHECK:    call <4 x i32> @llvm.umin.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
-// CHECK:    extractelement <4 x i32> %{{.*}}, i32 0
+// CHECK:    call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> %{{.*}})
   return _mm512_reduce_min_epu32(__W);
 }
 
@@ -266,17 +178,7 @@ int test_mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __W){
 // CHECK-LABEL: @test_mm512_mask_reduce_max_epi32(
 // CHECK:    bitcast i16 %{{.*}} to <16 x i1>
 // CHECK:    select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK:    call <8 x i32> @llvm.smax.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}})
-// CHECK:    shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
-// CHECK:    shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
-// CHECK:    call <4 x i32> @llvm.smax.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
-// CHECK:    shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
-// CHECK:    call <4 x i32> @llvm.smax.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
-// CHECK:    shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-// CHECK:    call <4 x i32> @llvm.smax.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
-// CHECK:    extractelement <4 x i32> %{{.*}}, i32 0
+// CHECK:    call i32 @llvm.vector.reduce.smax.v16i32(<16 x i32> %{{.*}})
   return _mm512_mask_reduce_max_epi32(__M, __W); 
 }
 
@@ -284,17 +186,7 @@ unsigned int test_mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __W){
 // CHECK-LABEL: @test_mm512_mask_reduce_max_epu32(
 // CHECK:    bitcast i16 %{{.*}} to <16 x i1>
 // CHECK:    select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK:    call <8 x i32> @llvm.umax.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}})
-// CHECK:    shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
-// CHECK:    shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
-// CHECK:    call <4 x i32> @llvm.umax.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
-// CHECK:    shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
-// CHECK:    call <4 x i32> @llvm.umax.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
-// CHECK:    shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-// CHECK:    call <4 x i32> @llvm.umax.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
-// CHECK:    extractelement <4 x i32> %{{.*}}, i32 0
+// CHECK:    call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> %{{.*}})
   return _mm512_mask_reduce_max_epu32(__M, __W); 
 }
 
@@ -320,17 +212,7 @@ int test_mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __W){
 // CHECK-LABEL: @test_mm512_mask_reduce_min_epi32(
 // CHECK:    bitcast i16 %{{.*}} to <16 x i1>
 // CHECK:    select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK:    call <8 x i32> @llvm.smin.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}})
-// CHECK:    shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
-// CHECK:    shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
-// CHECK:    call <4 x i32> @llvm.smin.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
-// CHECK:    shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
-// CHECK:    call <4 x i32> @llvm.smin.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
-// CHECK:    shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-// CHECK:    call <4 x i32> @llvm.smin.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
-// CHECK:    extractelement <4 x i32> %{{.*}}, i32 0
+// CHECK:    call i32 @llvm.vector.reduce.smin.v16i32(<16 x i32> %{{.*}})
   return _mm512_mask_reduce_min_epi32(__M, __W); 
 }
 
@@ -338,17 +220,7 @@ unsigned int test_mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __W){
 // CHECK-LABEL: @test_mm512_mask_reduce_min_epu32(
 // CHECK:    bitcast i16 %{{.*}} to <16 x i1>
 // CHECK:    select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-// CHECK:    shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-// CHECK:    call <8 x i32> @llvm.umin.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}})
-// CHECK:    shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
-// CHECK:    shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
-// CHECK:    call <4 x i32> @llvm.umin.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
-// CHECK:    shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
-// CHECK:    call <4 x i32> @llvm.umin.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
-// CHECK:    shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-// CHECK:    call <4 x i32> @llvm.umin.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
-// CHECK:    extractelement <4 x i32> %{{.*}}, i32 0
+// CHECK:    call i32 @llvm.vector.reduce.umin.v16i32(<16 x i32> %{{.*}})
   return _mm512_mask_reduce_min_epu32(__M, __W); 
 }
 

From 7324616660fc0995fa8c166e3c392361222d5dbc Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Tue, 13 Oct 2020 11:45:33 +0300
Subject: [PATCH 075/123] [SCEV] BuildConstantFromSCEV(): properly handle
 SCEVZeroExtend from ptr

As being reported in https://reviews.llvm.org/D88806#2326944,
this is pretty much the sibling problem of https://reviews.llvm.org/D88806#2325340,
with root cause being that SCEV now models `ptrtoint` as trunc/zext/self of unknown.

The appropriate (currently crashing) test coverage added.
---
 llvm/lib/Analysis/ScalarEvolution.cpp         |   7 +-
 .../ptrtoint-constantexpr-loop.ll             | 299 +++++++++++-------
 2 files changed, 185 insertions(+), 121 deletions(-)

diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 74bffc0facdbff..5f4b97dda335a5 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -7993,8 +7993,11 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) {
     }
     case scZeroExtend: {
       const SCEVZeroExtendExpr *SZ = cast<SCEVZeroExtendExpr>(V);
-      if (Constant *CastOp = BuildConstantFromSCEV(SZ->getOperand()))
-        return ConstantExpr::getZExt(CastOp, SZ->getType());
+      if (Constant *CastOp = BuildConstantFromSCEV(SZ->getOperand())) {
+        if (!CastOp->getType()->isPointerTy())
+          return ConstantExpr::getZExt(CastOp, SZ->getType());
+        return ConstantExpr::getPtrToInt(CastOp, SZ->getType());
+      }
       break;
     }
     case scTruncate: {
diff --git a/llvm/test/Analysis/ScalarEvolution/ptrtoint-constantexpr-loop.ll b/llvm/test/Analysis/ScalarEvolution/ptrtoint-constantexpr-loop.ll
index d0ead6028071ad..55963fc958c1a8 100644
--- a/llvm/test/Analysis/ScalarEvolution/ptrtoint-constantexpr-loop.ll
+++ b/llvm/test/Analysis/ScalarEvolution/ptrtoint-constantexpr-loop.ll
@@ -1,18 +1,20 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s --data-layout="e-m:e-p:64:64:64:64-n8:16:32:64" -S -analyze -enable-new-pm=0 -scalar-evolution | FileCheck --check-prefixes=ALL,X64,PTR64_IDX64 %s
-; RUN: opt < %s --data-layout="e-m:e-p:64:64:64:64-n8:16:32:64" -S -disable-output "-passes=print<scalar-evolution>" 2>&1 | FileCheck --check-prefixes=ALL,X64,PTR64_IDX64 %s
-; RUN: opt < %s --data-layout="e-m:e-p:64:64:64:32-n8:16:32:64" -S -analyze -enable-new-pm=0 -scalar-evolution | FileCheck --check-prefixes=ALL,X64,PTR64_IDX32 %s
-; RUN: opt < %s --data-layout="e-m:e-p:64:64:64:32-n8:16:32:64" -S -disable-output "-passes=print<scalar-evolution>" 2>&1 | FileCheck --check-prefixes=ALL,X64,PTR64_IDX32 %s
-; RUN: opt < %s --data-layout="e-m:e-p:32:32:32:32-n8:16:32" -S -analyze -enable-new-pm=0 -scalar-evolution | FileCheck --check-prefixes=ALL,X32,PTR32_IDX32 %s
-; RUN: opt < %s --data-layout="e-m:e-p:32:32:32:32-n8:16:32" -S -disable-output "-passes=print<scalar-evolution>" 2>&1 | FileCheck --check-prefixes=ALL,X32,PTR32_IDX32 %s
-; RUN: opt < %s --data-layout="e-m:e-p:32:32:32:64-n8:16:32:64" -S -analyze -enable-new-pm=0 -scalar-evolution | FileCheck --check-prefixes=ALL,X32,PTR32_IDX64 %s
-; RUN: opt < %s --data-layout="e-m:e-p:32:32:32:64-n8:16:32:64" -S -disable-output "-passes=print<scalar-evolution>" 2>&1 | FileCheck --check-prefixes=ALL,X32,PTR32_IDX64 %s
+; RUN: opt < %s --data-layout="p:64:64:64:64" -S -analyze -enable-new-pm=0 -scalar-evolution | FileCheck --check-prefixes=ALL,X64,PTR64_IDX64 %s
+; RUN: opt < %s --data-layout="p:64:64:64:64" -S -disable-output "-passes=print<scalar-evolution>" 2>&1 | FileCheck --check-prefixes=ALL,X64,PTR64_IDX64 %s
+; RUN: opt < %s --data-layout="p:64:64:64:32" -S -analyze -enable-new-pm=0 -scalar-evolution | FileCheck --check-prefixes=ALL,X64,PTR64_IDX32 %s
+; RUN: opt < %s --data-layout="p:64:64:64:32" -S -disable-output "-passes=print<scalar-evolution>" 2>&1 | FileCheck --check-prefixes=ALL,X64,PTR64_IDX32 %s
+; RUN: opt < %s --data-layout="p:16:16:16:16" -S -analyze -enable-new-pm=0 -scalar-evolution | FileCheck --check-prefixes=ALL,X32,PTR16_IDX16 %s
+; RUN: opt < %s --data-layout="p:16:16:16:16" -S -disable-output "-passes=print<scalar-evolution>" 2>&1 | FileCheck --check-prefixes=ALL,X32,PTR16_IDX16 %s
+; RUN: opt < %s --data-layout="p:16:16:16:32" -S -analyze -enable-new-pm=0 -scalar-evolution | FileCheck --check-prefixes=ALL,X32,PTR16_IDX32 %s
+; RUN: opt < %s --data-layout="p:16:16:16:32" -S -disable-output "-passes=print<scalar-evolution>" 2>&1 | FileCheck --check-prefixes=ALL,X32,PTR16_IDX32 %s
 
 @global = external hidden global [0 x i8]
 
-define hidden i32* @i64(i8* %arg, i32* %arg10) {
-; X64-LABEL: 'i64'
-; X64-NEXT:  Classifying expressions for: @i64
+declare void @use16(i16)
+
+define hidden i32* @v0_ptr_to_i64(i8* %arg, i32* %arg10) {
+; X64-LABEL: 'v0_ptr_to_i64'
+; X64-NEXT:  Classifying expressions for: @v0_ptr_to_i64
 ; X64-NEXT:    %tmp = phi i32 [ 0, %bb ], [ %tmp18, %bb17 ]
 ; X64-NEXT:    --> {0,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
 ; X64-NEXT:    %tmp12 = getelementptr i8, i8* %arg, i64 ptrtoint ([0 x i8]* @global to i64)
@@ -23,44 +25,44 @@ define hidden i32* @i64(i8* %arg, i32* %arg10) {
 ; X64-NEXT:    --> %tmp14 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb11: Variant }
 ; X64-NEXT:    %tmp18 = add i32 %tmp, 2
 ; X64-NEXT:    --> {2,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
-; X64-NEXT:  Determining loop execution counts for: @i64
+; X64-NEXT:  Determining loop execution counts for: @v0_ptr_to_i64
 ; X64-NEXT:  Loop %bb11: Unpredictable backedge-taken count.
 ; X64-NEXT:  Loop %bb11: Unpredictable max backedge-taken count.
 ; X64-NEXT:  Loop %bb11: Unpredictable predicated backedge-taken count.
 ;
-; PTR32_IDX32-LABEL: 'i64'
-; PTR32_IDX32-NEXT:  Classifying expressions for: @i64
-; PTR32_IDX32-NEXT:    %tmp = phi i32 [ 0, %bb ], [ %tmp18, %bb17 ]
-; PTR32_IDX32-NEXT:    --> {0,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
-; PTR32_IDX32-NEXT:    %tmp12 = getelementptr i8, i8* %arg, i64 ptrtoint ([0 x i8]* @global to i64)
-; PTR32_IDX32-NEXT:    --> (@global + %arg) U: full-set S: full-set Exits: (@global + %arg) LoopDispositions: { %bb11: Invariant }
-; PTR32_IDX32-NEXT:    %tmp13 = bitcast i8* %tmp12 to i32*
-; PTR32_IDX32-NEXT:    --> (@global + %arg) U: full-set S: full-set Exits: (@global + %arg) LoopDispositions: { %bb11: Invariant }
-; PTR32_IDX32-NEXT:    %tmp14 = load i32, i32* %tmp13, align 4
-; PTR32_IDX32-NEXT:    --> %tmp14 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb11: Variant }
-; PTR32_IDX32-NEXT:    %tmp18 = add i32 %tmp, 2
-; PTR32_IDX32-NEXT:    --> {2,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
-; PTR32_IDX32-NEXT:  Determining loop execution counts for: @i64
-; PTR32_IDX32-NEXT:  Loop %bb11: Unpredictable backedge-taken count.
-; PTR32_IDX32-NEXT:  Loop %bb11: Unpredictable max backedge-taken count.
-; PTR32_IDX32-NEXT:  Loop %bb11: Unpredictable predicated backedge-taken count.
+; PTR16_IDX16-LABEL: 'v0_ptr_to_i64'
+; PTR16_IDX16-NEXT:  Classifying expressions for: @v0_ptr_to_i64
+; PTR16_IDX16-NEXT:    %tmp = phi i32 [ 0, %bb ], [ %tmp18, %bb17 ]
+; PTR16_IDX16-NEXT:    --> {0,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
+; PTR16_IDX16-NEXT:    %tmp12 = getelementptr i8, i8* %arg, i64 ptrtoint ([0 x i8]* @global to i64)
+; PTR16_IDX16-NEXT:    --> (@global + %arg) U: full-set S: full-set Exits: (@global + %arg) LoopDispositions: { %bb11: Invariant }
+; PTR16_IDX16-NEXT:    %tmp13 = bitcast i8* %tmp12 to i32*
+; PTR16_IDX16-NEXT:    --> (@global + %arg) U: full-set S: full-set Exits: (@global + %arg) LoopDispositions: { %bb11: Invariant }
+; PTR16_IDX16-NEXT:    %tmp14 = load i32, i32* %tmp13, align 4
+; PTR16_IDX16-NEXT:    --> %tmp14 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb11: Variant }
+; PTR16_IDX16-NEXT:    %tmp18 = add i32 %tmp, 2
+; PTR16_IDX16-NEXT:    --> {2,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
+; PTR16_IDX16-NEXT:  Determining loop execution counts for: @v0_ptr_to_i64
+; PTR16_IDX16-NEXT:  Loop %bb11: Unpredictable backedge-taken count.
+; PTR16_IDX16-NEXT:  Loop %bb11: Unpredictable max backedge-taken count.
+; PTR16_IDX16-NEXT:  Loop %bb11: Unpredictable predicated backedge-taken count.
 ;
-; PTR32_IDX64-LABEL: 'i64'
-; PTR32_IDX64-NEXT:  Classifying expressions for: @i64
-; PTR32_IDX64-NEXT:    %tmp = phi i32 [ 0, %bb ], [ %tmp18, %bb17 ]
-; PTR32_IDX64-NEXT:    --> {0,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
-; PTR32_IDX64-NEXT:    %tmp12 = getelementptr i8, i8* %arg, i64 ptrtoint ([0 x i8]* @global to i64)
-; PTR32_IDX64-NEXT:    --> (@global + %arg) U: [0,8589934591) S: full-set Exits: (@global + %arg) LoopDispositions: { %bb11: Invariant }
-; PTR32_IDX64-NEXT:    %tmp13 = bitcast i8* %tmp12 to i32*
-; PTR32_IDX64-NEXT:    --> (@global + %arg) U: [0,8589934591) S: full-set Exits: (@global + %arg) LoopDispositions: { %bb11: Invariant }
-; PTR32_IDX64-NEXT:    %tmp14 = load i32, i32* %tmp13, align 4
-; PTR32_IDX64-NEXT:    --> %tmp14 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb11: Variant }
-; PTR32_IDX64-NEXT:    %tmp18 = add i32 %tmp, 2
-; PTR32_IDX64-NEXT:    --> {2,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
-; PTR32_IDX64-NEXT:  Determining loop execution counts for: @i64
-; PTR32_IDX64-NEXT:  Loop %bb11: Unpredictable backedge-taken count.
-; PTR32_IDX64-NEXT:  Loop %bb11: Unpredictable max backedge-taken count.
-; PTR32_IDX64-NEXT:  Loop %bb11: Unpredictable predicated backedge-taken count.
+; PTR16_IDX32-LABEL: 'v0_ptr_to_i64'
+; PTR16_IDX32-NEXT:  Classifying expressions for: @v0_ptr_to_i64
+; PTR16_IDX32-NEXT:    %tmp = phi i32 [ 0, %bb ], [ %tmp18, %bb17 ]
+; PTR16_IDX32-NEXT:    --> {0,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
+; PTR16_IDX32-NEXT:    %tmp12 = getelementptr i8, i8* %arg, i64 ptrtoint ([0 x i8]* @global to i64)
+; PTR16_IDX32-NEXT:    --> (@global + %arg) U: [0,131071) S: full-set Exits: (@global + %arg) LoopDispositions: { %bb11: Invariant }
+; PTR16_IDX32-NEXT:    %tmp13 = bitcast i8* %tmp12 to i32*
+; PTR16_IDX32-NEXT:    --> (@global + %arg) U: [0,131071) S: full-set Exits: (@global + %arg) LoopDispositions: { %bb11: Invariant }
+; PTR16_IDX32-NEXT:    %tmp14 = load i32, i32* %tmp13, align 4
+; PTR16_IDX32-NEXT:    --> %tmp14 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb11: Variant }
+; PTR16_IDX32-NEXT:    %tmp18 = add i32 %tmp, 2
+; PTR16_IDX32-NEXT:    --> {2,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
+; PTR16_IDX32-NEXT:  Determining loop execution counts for: @v0_ptr_to_i64
+; PTR16_IDX32-NEXT:  Loop %bb11: Unpredictable backedge-taken count.
+; PTR16_IDX32-NEXT:  Loop %bb11: Unpredictable max backedge-taken count.
+; PTR16_IDX32-NEXT:  Loop %bb11: Unpredictable predicated backedge-taken count.
 ;
 bb:
   br label %bb11
@@ -80,9 +82,9 @@ bb17:                                             ; preds = %bb11
   %tmp18 = add i32 %tmp, 2
   br label %bb11
 }
-define hidden i32* @i64_to_i32(i8* %arg, i32* %arg10) {
-; PTR64_IDX64-LABEL: 'i64_to_i32'
-; PTR64_IDX64-NEXT:  Classifying expressions for: @i64_to_i32
+define hidden i32* @v0_ptr_to_i32(i8* %arg, i32* %arg10) {
+; PTR64_IDX64-LABEL: 'v0_ptr_to_i32'
+; PTR64_IDX64-NEXT:  Classifying expressions for: @v0_ptr_to_i32
 ; PTR64_IDX64-NEXT:    %tmp = phi i32 [ 0, %bb ], [ %tmp18, %bb17 ]
 ; PTR64_IDX64-NEXT:    --> {0,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
 ; PTR64_IDX64-NEXT:    %tmp12 = getelementptr i8, i8* %arg, i32 ptrtoint ([0 x i8]* @global to i32)
@@ -93,13 +95,13 @@ define hidden i32* @i64_to_i32(i8* %arg, i32* %arg10) {
 ; PTR64_IDX64-NEXT:    --> %tmp14 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb11: Variant }
 ; PTR64_IDX64-NEXT:    %tmp18 = add i32 %tmp, 2
 ; PTR64_IDX64-NEXT:    --> {2,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
-; PTR64_IDX64-NEXT:  Determining loop execution counts for: @i64_to_i32
+; PTR64_IDX64-NEXT:  Determining loop execution counts for: @v0_ptr_to_i32
 ; PTR64_IDX64-NEXT:  Loop %bb11: Unpredictable backedge-taken count.
 ; PTR64_IDX64-NEXT:  Loop %bb11: Unpredictable max backedge-taken count.
 ; PTR64_IDX64-NEXT:  Loop %bb11: Unpredictable predicated backedge-taken count.
 ;
-; PTR64_IDX32-LABEL: 'i64_to_i32'
-; PTR64_IDX32-NEXT:  Classifying expressions for: @i64_to_i32
+; PTR64_IDX32-LABEL: 'v0_ptr_to_i32'
+; PTR64_IDX32-NEXT:  Classifying expressions for: @v0_ptr_to_i32
 ; PTR64_IDX32-NEXT:    %tmp = phi i32 [ 0, %bb ], [ %tmp18, %bb17 ]
 ; PTR64_IDX32-NEXT:    --> {0,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
 ; PTR64_IDX32-NEXT:    %tmp12 = getelementptr i8, i8* %arg, i32 ptrtoint ([0 x i8]* @global to i32)
@@ -110,44 +112,44 @@ define hidden i32* @i64_to_i32(i8* %arg, i32* %arg10) {
 ; PTR64_IDX32-NEXT:    --> %tmp14 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb11: Variant }
 ; PTR64_IDX32-NEXT:    %tmp18 = add i32 %tmp, 2
 ; PTR64_IDX32-NEXT:    --> {2,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
-; PTR64_IDX32-NEXT:  Determining loop execution counts for: @i64_to_i32
+; PTR64_IDX32-NEXT:  Determining loop execution counts for: @v0_ptr_to_i32
 ; PTR64_IDX32-NEXT:  Loop %bb11: Unpredictable backedge-taken count.
 ; PTR64_IDX32-NEXT:  Loop %bb11: Unpredictable max backedge-taken count.
 ; PTR64_IDX32-NEXT:  Loop %bb11: Unpredictable predicated backedge-taken count.
 ;
-; PTR32_IDX32-LABEL: 'i64_to_i32'
-; PTR32_IDX32-NEXT:  Classifying expressions for: @i64_to_i32
-; PTR32_IDX32-NEXT:    %tmp = phi i32 [ 0, %bb ], [ %tmp18, %bb17 ]
-; PTR32_IDX32-NEXT:    --> {0,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
-; PTR32_IDX32-NEXT:    %tmp12 = getelementptr i8, i8* %arg, i32 ptrtoint ([0 x i8]* @global to i32)
-; PTR32_IDX32-NEXT:    --> (@global + %arg) U: full-set S: full-set Exits: (@global + %arg) LoopDispositions: { %bb11: Invariant }
-; PTR32_IDX32-NEXT:    %tmp13 = bitcast i8* %tmp12 to i32*
-; PTR32_IDX32-NEXT:    --> (@global + %arg) U: full-set S: full-set Exits: (@global + %arg) LoopDispositions: { %bb11: Invariant }
-; PTR32_IDX32-NEXT:    %tmp14 = load i32, i32* %tmp13, align 4
-; PTR32_IDX32-NEXT:    --> %tmp14 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb11: Variant }
-; PTR32_IDX32-NEXT:    %tmp18 = add i32 %tmp, 2
-; PTR32_IDX32-NEXT:    --> {2,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
-; PTR32_IDX32-NEXT:  Determining loop execution counts for: @i64_to_i32
-; PTR32_IDX32-NEXT:  Loop %bb11: Unpredictable backedge-taken count.
-; PTR32_IDX32-NEXT:  Loop %bb11: Unpredictable max backedge-taken count.
-; PTR32_IDX32-NEXT:  Loop %bb11: Unpredictable predicated backedge-taken count.
+; PTR16_IDX16-LABEL: 'v0_ptr_to_i32'
+; PTR16_IDX16-NEXT:  Classifying expressions for: @v0_ptr_to_i32
+; PTR16_IDX16-NEXT:    %tmp = phi i32 [ 0, %bb ], [ %tmp18, %bb17 ]
+; PTR16_IDX16-NEXT:    --> {0,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
+; PTR16_IDX16-NEXT:    %tmp12 = getelementptr i8, i8* %arg, i32 ptrtoint ([0 x i8]* @global to i32)
+; PTR16_IDX16-NEXT:    --> (@global + %arg) U: full-set S: full-set Exits: (@global + %arg) LoopDispositions: { %bb11: Invariant }
+; PTR16_IDX16-NEXT:    %tmp13 = bitcast i8* %tmp12 to i32*
+; PTR16_IDX16-NEXT:    --> (@global + %arg) U: full-set S: full-set Exits: (@global + %arg) LoopDispositions: { %bb11: Invariant }
+; PTR16_IDX16-NEXT:    %tmp14 = load i32, i32* %tmp13, align 4
+; PTR16_IDX16-NEXT:    --> %tmp14 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb11: Variant }
+; PTR16_IDX16-NEXT:    %tmp18 = add i32 %tmp, 2
+; PTR16_IDX16-NEXT:    --> {2,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
+; PTR16_IDX16-NEXT:  Determining loop execution counts for: @v0_ptr_to_i32
+; PTR16_IDX16-NEXT:  Loop %bb11: Unpredictable backedge-taken count.
+; PTR16_IDX16-NEXT:  Loop %bb11: Unpredictable max backedge-taken count.
+; PTR16_IDX16-NEXT:  Loop %bb11: Unpredictable predicated backedge-taken count.
 ;
-; PTR32_IDX64-LABEL: 'i64_to_i32'
-; PTR32_IDX64-NEXT:  Classifying expressions for: @i64_to_i32
-; PTR32_IDX64-NEXT:    %tmp = phi i32 [ 0, %bb ], [ %tmp18, %bb17 ]
-; PTR32_IDX64-NEXT:    --> {0,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
-; PTR32_IDX64-NEXT:    %tmp12 = getelementptr i8, i8* %arg, i32 ptrtoint ([0 x i8]* @global to i32)
-; PTR32_IDX64-NEXT:    --> ((sext i32 (trunc [0 x i8]* @global to i32) to i64) + %arg) U: [-2147483648,6442450943) S: full-set Exits: ((sext i32 (trunc [0 x i8]* @global to i32) to i64) + %arg) LoopDispositions: { %bb11: Invariant }
-; PTR32_IDX64-NEXT:    %tmp13 = bitcast i8* %tmp12 to i32*
-; PTR32_IDX64-NEXT:    --> ((sext i32 (trunc [0 x i8]* @global to i32) to i64) + %arg) U: [-2147483648,6442450943) S: full-set Exits: ((sext i32 (trunc [0 x i8]* @global to i32) to i64) + %arg) LoopDispositions: { %bb11: Invariant }
-; PTR32_IDX64-NEXT:    %tmp14 = load i32, i32* %tmp13, align 4
-; PTR32_IDX64-NEXT:    --> %tmp14 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb11: Variant }
-; PTR32_IDX64-NEXT:    %tmp18 = add i32 %tmp, 2
-; PTR32_IDX64-NEXT:    --> {2,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
-; PTR32_IDX64-NEXT:  Determining loop execution counts for: @i64_to_i32
-; PTR32_IDX64-NEXT:  Loop %bb11: Unpredictable backedge-taken count.
-; PTR32_IDX64-NEXT:  Loop %bb11: Unpredictable max backedge-taken count.
-; PTR32_IDX64-NEXT:  Loop %bb11: Unpredictable predicated backedge-taken count.
+; PTR16_IDX32-LABEL: 'v0_ptr_to_i32'
+; PTR16_IDX32-NEXT:  Classifying expressions for: @v0_ptr_to_i32
+; PTR16_IDX32-NEXT:    %tmp = phi i32 [ 0, %bb ], [ %tmp18, %bb17 ]
+; PTR16_IDX32-NEXT:    --> {0,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
+; PTR16_IDX32-NEXT:    %tmp12 = getelementptr i8, i8* %arg, i32 ptrtoint ([0 x i8]* @global to i32)
+; PTR16_IDX32-NEXT:    --> (@global + %arg) U: [0,131071) S: full-set Exits: (@global + %arg) LoopDispositions: { %bb11: Invariant }
+; PTR16_IDX32-NEXT:    %tmp13 = bitcast i8* %tmp12 to i32*
+; PTR16_IDX32-NEXT:    --> (@global + %arg) U: [0,131071) S: full-set Exits: (@global + %arg) LoopDispositions: { %bb11: Invariant }
+; PTR16_IDX32-NEXT:    %tmp14 = load i32, i32* %tmp13, align 4
+; PTR16_IDX32-NEXT:    --> %tmp14 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb11: Variant }
+; PTR16_IDX32-NEXT:    %tmp18 = add i32 %tmp, 2
+; PTR16_IDX32-NEXT:    --> {2,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
+; PTR16_IDX32-NEXT:  Determining loop execution counts for: @v0_ptr_to_i32
+; PTR16_IDX32-NEXT:  Loop %bb11: Unpredictable backedge-taken count.
+; PTR16_IDX32-NEXT:  Loop %bb11: Unpredictable max backedge-taken count.
+; PTR16_IDX32-NEXT:  Loop %bb11: Unpredictable predicated backedge-taken count.
 ;
 bb:
   br label %bb11
@@ -167,9 +169,9 @@ bb17:                                             ; preds = %bb11
   %tmp18 = add i32 %tmp, 2
   br label %bb11
 }
-define hidden i32* @i64_to_i128(i8* %arg, i32* %arg10) {
-; X64-LABEL: 'i64_to_i128'
-; X64-NEXT:  Classifying expressions for: @i64_to_i128
+define hidden i32* @v0_ptr_to_i128(i8* %arg, i32* %arg10) {
+; X64-LABEL: 'v0_ptr_to_i128'
+; X64-NEXT:  Classifying expressions for: @v0_ptr_to_i128
 ; X64-NEXT:    %tmp = phi i32 [ 0, %bb ], [ %tmp18, %bb17 ]
 ; X64-NEXT:    --> {0,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
 ; X64-NEXT:    %tmp12 = getelementptr i8, i8* %arg, i128 ptrtoint ([0 x i8]* @global to i128)
@@ -180,44 +182,44 @@ define hidden i32* @i64_to_i128(i8* %arg, i32* %arg10) {
 ; X64-NEXT:    --> %tmp14 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb11: Variant }
 ; X64-NEXT:    %tmp18 = add i32 %tmp, 2
 ; X64-NEXT:    --> {2,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
-; X64-NEXT:  Determining loop execution counts for: @i64_to_i128
+; X64-NEXT:  Determining loop execution counts for: @v0_ptr_to_i128
 ; X64-NEXT:  Loop %bb11: Unpredictable backedge-taken count.
 ; X64-NEXT:  Loop %bb11: Unpredictable max backedge-taken count.
 ; X64-NEXT:  Loop %bb11: Unpredictable predicated backedge-taken count.
 ;
-; PTR32_IDX32-LABEL: 'i64_to_i128'
-; PTR32_IDX32-NEXT:  Classifying expressions for: @i64_to_i128
-; PTR32_IDX32-NEXT:    %tmp = phi i32 [ 0, %bb ], [ %tmp18, %bb17 ]
-; PTR32_IDX32-NEXT:    --> {0,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
-; PTR32_IDX32-NEXT:    %tmp12 = getelementptr i8, i8* %arg, i128 ptrtoint ([0 x i8]* @global to i128)
-; PTR32_IDX32-NEXT:    --> (@global + %arg) U: full-set S: full-set Exits: (@global + %arg) LoopDispositions: { %bb11: Invariant }
-; PTR32_IDX32-NEXT:    %tmp13 = bitcast i8* %tmp12 to i32*
-; PTR32_IDX32-NEXT:    --> (@global + %arg) U: full-set S: full-set Exits: (@global + %arg) LoopDispositions: { %bb11: Invariant }
-; PTR32_IDX32-NEXT:    %tmp14 = load i32, i32* %tmp13, align 4
-; PTR32_IDX32-NEXT:    --> %tmp14 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb11: Variant }
-; PTR32_IDX32-NEXT:    %tmp18 = add i32 %tmp, 2
-; PTR32_IDX32-NEXT:    --> {2,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
-; PTR32_IDX32-NEXT:  Determining loop execution counts for: @i64_to_i128
-; PTR32_IDX32-NEXT:  Loop %bb11: Unpredictable backedge-taken count.
-; PTR32_IDX32-NEXT:  Loop %bb11: Unpredictable max backedge-taken count.
-; PTR32_IDX32-NEXT:  Loop %bb11: Unpredictable predicated backedge-taken count.
+; PTR16_IDX16-LABEL: 'v0_ptr_to_i128'
+; PTR16_IDX16-NEXT:  Classifying expressions for: @v0_ptr_to_i128
+; PTR16_IDX16-NEXT:    %tmp = phi i32 [ 0, %bb ], [ %tmp18, %bb17 ]
+; PTR16_IDX16-NEXT:    --> {0,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
+; PTR16_IDX16-NEXT:    %tmp12 = getelementptr i8, i8* %arg, i128 ptrtoint ([0 x i8]* @global to i128)
+; PTR16_IDX16-NEXT:    --> (@global + %arg) U: full-set S: full-set Exits: (@global + %arg) LoopDispositions: { %bb11: Invariant }
+; PTR16_IDX16-NEXT:    %tmp13 = bitcast i8* %tmp12 to i32*
+; PTR16_IDX16-NEXT:    --> (@global + %arg) U: full-set S: full-set Exits: (@global + %arg) LoopDispositions: { %bb11: Invariant }
+; PTR16_IDX16-NEXT:    %tmp14 = load i32, i32* %tmp13, align 4
+; PTR16_IDX16-NEXT:    --> %tmp14 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb11: Variant }
+; PTR16_IDX16-NEXT:    %tmp18 = add i32 %tmp, 2
+; PTR16_IDX16-NEXT:    --> {2,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
+; PTR16_IDX16-NEXT:  Determining loop execution counts for: @v0_ptr_to_i128
+; PTR16_IDX16-NEXT:  Loop %bb11: Unpredictable backedge-taken count.
+; PTR16_IDX16-NEXT:  Loop %bb11: Unpredictable max backedge-taken count.
+; PTR16_IDX16-NEXT:  Loop %bb11: Unpredictable predicated backedge-taken count.
 ;
-; PTR32_IDX64-LABEL: 'i64_to_i128'
-; PTR32_IDX64-NEXT:  Classifying expressions for: @i64_to_i128
-; PTR32_IDX64-NEXT:    %tmp = phi i32 [ 0, %bb ], [ %tmp18, %bb17 ]
-; PTR32_IDX64-NEXT:    --> {0,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
-; PTR32_IDX64-NEXT:    %tmp12 = getelementptr i8, i8* %arg, i128 ptrtoint ([0 x i8]* @global to i128)
-; PTR32_IDX64-NEXT:    --> (@global + %arg) U: [0,8589934591) S: full-set Exits: (@global + %arg) LoopDispositions: { %bb11: Invariant }
-; PTR32_IDX64-NEXT:    %tmp13 = bitcast i8* %tmp12 to i32*
-; PTR32_IDX64-NEXT:    --> (@global + %arg) U: [0,8589934591) S: full-set Exits: (@global + %arg) LoopDispositions: { %bb11: Invariant }
-; PTR32_IDX64-NEXT:    %tmp14 = load i32, i32* %tmp13, align 4
-; PTR32_IDX64-NEXT:    --> %tmp14 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb11: Variant }
-; PTR32_IDX64-NEXT:    %tmp18 = add i32 %tmp, 2
-; PTR32_IDX64-NEXT:    --> {2,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
-; PTR32_IDX64-NEXT:  Determining loop execution counts for: @i64_to_i128
-; PTR32_IDX64-NEXT:  Loop %bb11: Unpredictable backedge-taken count.
-; PTR32_IDX64-NEXT:  Loop %bb11: Unpredictable max backedge-taken count.
-; PTR32_IDX64-NEXT:  Loop %bb11: Unpredictable predicated backedge-taken count.
+; PTR16_IDX32-LABEL: 'v0_ptr_to_i128'
+; PTR16_IDX32-NEXT:  Classifying expressions for: @v0_ptr_to_i128
+; PTR16_IDX32-NEXT:    %tmp = phi i32 [ 0, %bb ], [ %tmp18, %bb17 ]
+; PTR16_IDX32-NEXT:    --> {0,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
+; PTR16_IDX32-NEXT:    %tmp12 = getelementptr i8, i8* %arg, i128 ptrtoint ([0 x i8]* @global to i128)
+; PTR16_IDX32-NEXT:    --> (@global + %arg) U: [0,131071) S: full-set Exits: (@global + %arg) LoopDispositions: { %bb11: Invariant }
+; PTR16_IDX32-NEXT:    %tmp13 = bitcast i8* %tmp12 to i32*
+; PTR16_IDX32-NEXT:    --> (@global + %arg) U: [0,131071) S: full-set Exits: (@global + %arg) LoopDispositions: { %bb11: Invariant }
+; PTR16_IDX32-NEXT:    %tmp14 = load i32, i32* %tmp13, align 4
+; PTR16_IDX32-NEXT:    --> %tmp14 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb11: Variant }
+; PTR16_IDX32-NEXT:    %tmp18 = add i32 %tmp, 2
+; PTR16_IDX32-NEXT:    --> {2,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
+; PTR16_IDX32-NEXT:  Determining loop execution counts for: @v0_ptr_to_i128
+; PTR16_IDX32-NEXT:  Loop %bb11: Unpredictable backedge-taken count.
+; PTR16_IDX32-NEXT:  Loop %bb11: Unpredictable max backedge-taken count.
+; PTR16_IDX32-NEXT:  Loop %bb11: Unpredictable predicated backedge-taken count.
 ;
 bb:
   br label %bb11
@@ -237,3 +239,62 @@ bb17:                                             ; preds = %bb11
   %tmp18 = add i32 %tmp, 2
   br label %bb11
 }
+
+define void @v1_ptr_to_i32(i32 %arg, i32 %arg6) {
+; PTR64_IDX64-LABEL: 'v1_ptr_to_i32'
+; PTR64_IDX64-NEXT:  Classifying expressions for: @v1_ptr_to_i32
+; PTR64_IDX64-NEXT:    %tmp = sub i32 %arg, ptrtoint ([0 x i8]* @global to i32)
+; PTR64_IDX64-NEXT:    --> ((-1 * (trunc [0 x i8]* @global to i32)) + %arg) U: full-set S: full-set Exits: ((-1 * (trunc [0 x i8]* @global to i32)) + %arg) LoopDispositions: { %bb7: Invariant }
+; PTR64_IDX64-NEXT:    %tmp9 = select i1 %tmp8, i16 0, i16 1
+; PTR64_IDX64-NEXT:    --> %tmp9 U: [0,2) S: [-2,2) Exits: <<Unknown>> LoopDispositions: { %bb7: Variant }
+; PTR64_IDX64-NEXT:  Determining loop execution counts for: @v1_ptr_to_i32
+; PTR64_IDX64-NEXT:  Loop %bb7: Unpredictable backedge-taken count.
+; PTR64_IDX64-NEXT:  Loop %bb7: Unpredictable max backedge-taken count.
+; PTR64_IDX64-NEXT:  Loop %bb7: Unpredictable predicated backedge-taken count.
+;
+; PTR64_IDX32-LABEL: 'v1_ptr_to_i32'
+; PTR64_IDX32-NEXT:  Classifying expressions for: @v1_ptr_to_i32
+; PTR64_IDX32-NEXT:    %tmp = sub i32 %arg, ptrtoint ([0 x i8]* @global to i32)
+; PTR64_IDX32-NEXT:    --> ((-1 * @global) + %arg) U: full-set S: full-set Exits: ((-1 * @global) + %arg) LoopDispositions: { %bb7: Invariant }
+; PTR64_IDX32-NEXT:    %tmp9 = select i1 %tmp8, i16 0, i16 1
+; PTR64_IDX32-NEXT:    --> %tmp9 U: [0,2) S: [-2,2) Exits: <<Unknown>> LoopDispositions: { %bb7: Variant }
+; PTR64_IDX32-NEXT:  Determining loop execution counts for: @v1_ptr_to_i32
+; PTR64_IDX32-NEXT:  Loop %bb7: Unpredictable backedge-taken count.
+; PTR64_IDX32-NEXT:  Loop %bb7: Unpredictable max backedge-taken count.
+; PTR64_IDX32-NEXT:  Loop %bb7: Unpredictable predicated backedge-taken count.
+;
+; PTR16_IDX16-LABEL: 'v1_ptr_to_i32'
+; PTR16_IDX16-NEXT:  Classifying expressions for: @v1_ptr_to_i32
+; PTR16_IDX16-NEXT:    %tmp = sub i32 %arg, ptrtoint ([0 x i8]* @global to i32)
+; PTR16_IDX16-NEXT:    --> ((-1 * (zext [0 x i8]* @global to i32))<nsw> + %arg) U: full-set S: full-set Exits: ((-1 * (zext [0 x i8]* @global to i32))<nsw> + %arg) LoopDispositions: { %bb7: Invariant }
+; PTR16_IDX16-NEXT:    %tmp9 = select i1 %tmp8, i16 0, i16 1
+; PTR16_IDX16-NEXT:    --> %tmp9 U: [0,2) S: [-2,2) Exits: <<Unknown>> LoopDispositions: { %bb7: Variant }
+; PTR16_IDX16-NEXT:  Determining loop execution counts for: @v1_ptr_to_i32
+; PTR16_IDX16-NEXT:  Loop %bb7: Unpredictable backedge-taken count.
+; PTR16_IDX16-NEXT:  Loop %bb7: Unpredictable max backedge-taken count.
+; PTR16_IDX16-NEXT:  Loop %bb7: Unpredictable predicated backedge-taken count.
+;
+; PTR16_IDX32-LABEL: 'v1_ptr_to_i32'
+; PTR16_IDX32-NEXT:  Classifying expressions for: @v1_ptr_to_i32
+; PTR16_IDX32-NEXT:    %tmp = sub i32 %arg, ptrtoint ([0 x i8]* @global to i32)
+; PTR16_IDX32-NEXT:    --> ((-1 * @global) + %arg) U: full-set S: full-set Exits: ((-1 * @global) + %arg) LoopDispositions: { %bb7: Invariant }
+; PTR16_IDX32-NEXT:    %tmp9 = select i1 %tmp8, i16 0, i16 1
+; PTR16_IDX32-NEXT:    --> %tmp9 U: [0,2) S: [-2,2) Exits: <<Unknown>> LoopDispositions: { %bb7: Variant }
+; PTR16_IDX32-NEXT:  Determining loop execution counts for: @v1_ptr_to_i32
+; PTR16_IDX32-NEXT:  Loop %bb7: Unpredictable backedge-taken count.
+; PTR16_IDX32-NEXT:  Loop %bb7: Unpredictable max backedge-taken count.
+; PTR16_IDX32-NEXT:  Loop %bb7: Unpredictable predicated backedge-taken count.
+;
+bb:
+  br label %bb7
+
+bb7:                                              ; preds = %bb7, %bb
+  %tmp = sub i32 %arg, ptrtoint ([0 x i8]* @global to i32)
+  %tmp8 = icmp eq i32 %tmp, %arg6
+  %tmp9 = select i1 %tmp8, i16 0, i16 1
+  call void @use16(i16 %tmp9)
+  br i1 %tmp8, label %bb7, label %bb10
+
+bb10:                                             ; preds = %bb7
+  ret void
+}

From 05ef552e5660d05cb6cd730c734e709d8323fd6f Mon Sep 17 00:00:00 2001
From: Pietro Albini <pietro@pietroalbini.org>
Date: Tue, 13 Oct 2020 10:51:26 +0200
Subject: [PATCH 076/123] Add expected response time and escalation path to the
 security docs

Following up on the discussion within the group during the roundtable at
the 2020 LLVM Developers Meeting, this commit adds to the security docs:

* How long we expect acknowledging security reports will take
* The escalation path the reporter can follow if they get no response

A temporary line inviting reporters to directly follow the escalation
path while the mailing list is being setup is also added.

Differential Revision: https://reviews.llvm.org/D89068
---
 llvm/docs/Security.rst | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/llvm/docs/Security.rst b/llvm/docs/Security.rst
index 8f71db1894d1d8..d73a9e835d6da4 100644
--- a/llvm/docs/Security.rst
+++ b/llvm/docs/Security.rst
@@ -207,13 +207,14 @@ The parts of the LLVM Project which are currently treated as non-security sensit
 How to report a security issue?
 ===============================
 
-*FUTURE*: this section will be expanded once we’ve figured out other details above.
+*FUTURE*: this section will be expanded once we’ve figured out other details above. In the meantime, if you found a security issue please follow directly the escalation instructions below.
 
 Not everyone who wants to report a security issue will be familiar with LLVM, its community, and processes. Therefore, this needs to be easy to find on the LLVM website, and set clear expectations to issue reporters.
 
-
+We aim to acknowledge your report within two business days since you first reach out. If you do not receive any response by then, you can escalate by sending a message to the `llvm-dev mailing list`_ asking to get in touch with someone from the LLVM Security Group. **The escalation mailing list is public**: avoid discussing or mentioning the specific issue when posting on it.
 
 .. _CVE process: https://cve.mitre.org
 .. _chromium issue tracker: https://crbug.com
 .. _GitHub security: https://help.github.com/en/articles/about-maintainer-security-advisories
+.. _llvm-dev mailing list: https://lists.llvm.org/mailman/listinfo/llvm-dev
 .. _MITRE: https://cve.mitre.org

From cdf0214845a1230d424bfdab0bafa9c484aa34e0 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad@amd.com>
Date: Mon, 12 Oct 2020 15:49:55 +0100
Subject: [PATCH 077/123] [AMDGPU] v_mac_legacy_f32 does not support DPP

Differential Revision: https://reviews.llvm.org/D89245
---
 llvm/lib/Target/AMDGPU/VOP2Instructions.td | 4 +++-
 llvm/test/MC/AMDGPU/gfx10_unsupported.s    | 6 ++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index 09f65c5c944e37..2eeefad3a3718e 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -333,6 +333,8 @@ class VOP_MAC <ValueType vt0, ValueType vt1=vt0> : VOPProfile <[vt0, vt1, vt1, v
 
 def VOP_MAC_F16 : VOP_MAC <f16>;
 def VOP_MAC_F32 : VOP_MAC <f32>;
+let HasExtDPP = 0 in
+def VOP_MAC_LEGACY_F32 : VOP_MAC <f32>;
 
 class VOP_DOT_ACC<ValueType vt0, ValueType vt1> : VOP_MAC<vt0, vt1> {
   let HasClamp = 0;
@@ -501,7 +503,7 @@ let Constraints = "$vdst = $src2", DisableEncoding="$src2",
 defm V_MAC_F32 : VOP2Inst <"v_mac_f32", VOP_MAC_F32>;
 
 let SubtargetPredicate = isGFX6GFX7GFX10 in
-defm V_MAC_LEGACY_F32 : VOP2Inst <"v_mac_legacy_f32", VOP_MAC_F32>;
+defm V_MAC_LEGACY_F32 : VOP2Inst <"v_mac_legacy_f32", VOP_MAC_LEGACY_F32>;
 } // End Constraints = "$vdst = $src2", DisableEncoding="$src2",
   //     isConvertibleToThreeAddress = 1
 
diff --git a/llvm/test/MC/AMDGPU/gfx10_unsupported.s b/llvm/test/MC/AMDGPU/gfx10_unsupported.s
index b332fa9380f070..fb50fd27c4a30c 100644
--- a/llvm/test/MC/AMDGPU/gfx10_unsupported.s
+++ b/llvm/test/MC/AMDGPU/gfx10_unsupported.s
@@ -1040,6 +1040,9 @@ v_lshlrev_b16_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
 v_lshrrev_b16_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
 // CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: dpp variant of this instruction is not supported
 
+v_mac_legacy_f32_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: dpp variant of this instruction is not supported
+
 v_max_i16_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
 // CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: dpp variant of this instruction is not supported
 
@@ -1080,6 +1083,9 @@ v_lshrrev_b16_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWO
 v_mac_f32_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 // CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: sdwa variant of this instruction is not supported
 
+v_mac_legacy_f32_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: sdwa variant of this instruction is not supported
+
 v_max_i16_sdwa v255, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 // CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: sdwa variant of this instruction is not supported
 

From aaafe350bb65dfc24c2cdad4839059ac81899fbe Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Tue, 13 Oct 2020 12:19:02 +0300
Subject: [PATCH 078/123] [SCEV] BuildConstantFromSCEV(): properly handle
 SCEVSignExtend from ptr

Much similar to the ZExt/Trunc handling.
Thanks goes to Alexander Richardson for nudging towards noticing this one proactively.

The appropriate (currently crashing) test coverage added.
---
 llvm/lib/Analysis/ScalarEvolution.cpp         |   7 +-
 .../ptrtoint-constantexpr-loop.ll             | 140 ++++++++++++------
 2 files changed, 99 insertions(+), 48 deletions(-)

diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 5f4b97dda335a5..152351c10ad463 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -7987,8 +7987,11 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) {
       return dyn_cast<Constant>(cast<SCEVUnknown>(V)->getValue());
     case scSignExtend: {
       const SCEVSignExtendExpr *SS = cast<SCEVSignExtendExpr>(V);
-      if (Constant *CastOp = BuildConstantFromSCEV(SS->getOperand()))
-        return ConstantExpr::getSExt(CastOp, SS->getType());
+      if (Constant *CastOp = BuildConstantFromSCEV(SS->getOperand())) {
+        if (!CastOp->getType()->isPointerTy())
+          return ConstantExpr::getSExt(CastOp, SS->getType());
+        return ConstantExpr::getPtrToInt(CastOp, SS->getType());
+      }
       break;
     }
     case scZeroExtend: {
diff --git a/llvm/test/Analysis/ScalarEvolution/ptrtoint-constantexpr-loop.ll b/llvm/test/Analysis/ScalarEvolution/ptrtoint-constantexpr-loop.ll
index 55963fc958c1a8..edb4c1e8fb5863 100644
--- a/llvm/test/Analysis/ScalarEvolution/ptrtoint-constantexpr-loop.ll
+++ b/llvm/test/Analysis/ScalarEvolution/ptrtoint-constantexpr-loop.ll
@@ -12,9 +12,9 @@
 
 declare void @use16(i16)
 
-define hidden i32* @v0_ptr_to_i64(i8* %arg, i32* %arg10) {
-; X64-LABEL: 'v0_ptr_to_i64'
-; X64-NEXT:  Classifying expressions for: @v0_ptr_to_i64
+define hidden i32* @trunc_ptr_to_i64(i8* %arg, i32* %arg10) {
+; X64-LABEL: 'trunc_ptr_to_i64'
+; X64-NEXT:  Classifying expressions for: @trunc_ptr_to_i64
 ; X64-NEXT:    %tmp = phi i32 [ 0, %bb ], [ %tmp18, %bb17 ]
 ; X64-NEXT:    --> {0,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
 ; X64-NEXT:    %tmp12 = getelementptr i8, i8* %arg, i64 ptrtoint ([0 x i8]* @global to i64)
@@ -25,13 +25,13 @@ define hidden i32* @v0_ptr_to_i64(i8* %arg, i32* %arg10) {
 ; X64-NEXT:    --> %tmp14 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb11: Variant }
 ; X64-NEXT:    %tmp18 = add i32 %tmp, 2
 ; X64-NEXT:    --> {2,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
-; X64-NEXT:  Determining loop execution counts for: @v0_ptr_to_i64
+; X64-NEXT:  Determining loop execution counts for: @trunc_ptr_to_i64
 ; X64-NEXT:  Loop %bb11: Unpredictable backedge-taken count.
 ; X64-NEXT:  Loop %bb11: Unpredictable max backedge-taken count.
 ; X64-NEXT:  Loop %bb11: Unpredictable predicated backedge-taken count.
 ;
-; PTR16_IDX16-LABEL: 'v0_ptr_to_i64'
-; PTR16_IDX16-NEXT:  Classifying expressions for: @v0_ptr_to_i64
+; PTR16_IDX16-LABEL: 'trunc_ptr_to_i64'
+; PTR16_IDX16-NEXT:  Classifying expressions for: @trunc_ptr_to_i64
 ; PTR16_IDX16-NEXT:    %tmp = phi i32 [ 0, %bb ], [ %tmp18, %bb17 ]
 ; PTR16_IDX16-NEXT:    --> {0,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
 ; PTR16_IDX16-NEXT:    %tmp12 = getelementptr i8, i8* %arg, i64 ptrtoint ([0 x i8]* @global to i64)
@@ -42,13 +42,13 @@ define hidden i32* @v0_ptr_to_i64(i8* %arg, i32* %arg10) {
 ; PTR16_IDX16-NEXT:    --> %tmp14 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb11: Variant }
 ; PTR16_IDX16-NEXT:    %tmp18 = add i32 %tmp, 2
 ; PTR16_IDX16-NEXT:    --> {2,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
-; PTR16_IDX16-NEXT:  Determining loop execution counts for: @v0_ptr_to_i64
+; PTR16_IDX16-NEXT:  Determining loop execution counts for: @trunc_ptr_to_i64
 ; PTR16_IDX16-NEXT:  Loop %bb11: Unpredictable backedge-taken count.
 ; PTR16_IDX16-NEXT:  Loop %bb11: Unpredictable max backedge-taken count.
 ; PTR16_IDX16-NEXT:  Loop %bb11: Unpredictable predicated backedge-taken count.
 ;
-; PTR16_IDX32-LABEL: 'v0_ptr_to_i64'
-; PTR16_IDX32-NEXT:  Classifying expressions for: @v0_ptr_to_i64
+; PTR16_IDX32-LABEL: 'trunc_ptr_to_i64'
+; PTR16_IDX32-NEXT:  Classifying expressions for: @trunc_ptr_to_i64
 ; PTR16_IDX32-NEXT:    %tmp = phi i32 [ 0, %bb ], [ %tmp18, %bb17 ]
 ; PTR16_IDX32-NEXT:    --> {0,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
 ; PTR16_IDX32-NEXT:    %tmp12 = getelementptr i8, i8* %arg, i64 ptrtoint ([0 x i8]* @global to i64)
@@ -59,7 +59,7 @@ define hidden i32* @v0_ptr_to_i64(i8* %arg, i32* %arg10) {
 ; PTR16_IDX32-NEXT:    --> %tmp14 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb11: Variant }
 ; PTR16_IDX32-NEXT:    %tmp18 = add i32 %tmp, 2
 ; PTR16_IDX32-NEXT:    --> {2,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
-; PTR16_IDX32-NEXT:  Determining loop execution counts for: @v0_ptr_to_i64
+; PTR16_IDX32-NEXT:  Determining loop execution counts for: @trunc_ptr_to_i64
 ; PTR16_IDX32-NEXT:  Loop %bb11: Unpredictable backedge-taken count.
 ; PTR16_IDX32-NEXT:  Loop %bb11: Unpredictable max backedge-taken count.
 ; PTR16_IDX32-NEXT:  Loop %bb11: Unpredictable predicated backedge-taken count.
@@ -82,9 +82,9 @@ bb17:                                             ; preds = %bb11
   %tmp18 = add i32 %tmp, 2
   br label %bb11
 }
-define hidden i32* @v0_ptr_to_i32(i8* %arg, i32* %arg10) {
-; PTR64_IDX64-LABEL: 'v0_ptr_to_i32'
-; PTR64_IDX64-NEXT:  Classifying expressions for: @v0_ptr_to_i32
+define hidden i32* @trunc_ptr_to_i32(i8* %arg, i32* %arg10) {
+; PTR64_IDX64-LABEL: 'trunc_ptr_to_i32'
+; PTR64_IDX64-NEXT:  Classifying expressions for: @trunc_ptr_to_i32
 ; PTR64_IDX64-NEXT:    %tmp = phi i32 [ 0, %bb ], [ %tmp18, %bb17 ]
 ; PTR64_IDX64-NEXT:    --> {0,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
 ; PTR64_IDX64-NEXT:    %tmp12 = getelementptr i8, i8* %arg, i32 ptrtoint ([0 x i8]* @global to i32)
@@ -95,13 +95,13 @@ define hidden i32* @v0_ptr_to_i32(i8* %arg, i32* %arg10) {
 ; PTR64_IDX64-NEXT:    --> %tmp14 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb11: Variant }
 ; PTR64_IDX64-NEXT:    %tmp18 = add i32 %tmp, 2
 ; PTR64_IDX64-NEXT:    --> {2,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
-; PTR64_IDX64-NEXT:  Determining loop execution counts for: @v0_ptr_to_i32
+; PTR64_IDX64-NEXT:  Determining loop execution counts for: @trunc_ptr_to_i32
 ; PTR64_IDX64-NEXT:  Loop %bb11: Unpredictable backedge-taken count.
 ; PTR64_IDX64-NEXT:  Loop %bb11: Unpredictable max backedge-taken count.
 ; PTR64_IDX64-NEXT:  Loop %bb11: Unpredictable predicated backedge-taken count.
 ;
-; PTR64_IDX32-LABEL: 'v0_ptr_to_i32'
-; PTR64_IDX32-NEXT:  Classifying expressions for: @v0_ptr_to_i32
+; PTR64_IDX32-LABEL: 'trunc_ptr_to_i32'
+; PTR64_IDX32-NEXT:  Classifying expressions for: @trunc_ptr_to_i32
 ; PTR64_IDX32-NEXT:    %tmp = phi i32 [ 0, %bb ], [ %tmp18, %bb17 ]
 ; PTR64_IDX32-NEXT:    --> {0,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
 ; PTR64_IDX32-NEXT:    %tmp12 = getelementptr i8, i8* %arg, i32 ptrtoint ([0 x i8]* @global to i32)
@@ -112,13 +112,13 @@ define hidden i32* @v0_ptr_to_i32(i8* %arg, i32* %arg10) {
 ; PTR64_IDX32-NEXT:    --> %tmp14 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb11: Variant }
 ; PTR64_IDX32-NEXT:    %tmp18 = add i32 %tmp, 2
 ; PTR64_IDX32-NEXT:    --> {2,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
-; PTR64_IDX32-NEXT:  Determining loop execution counts for: @v0_ptr_to_i32
+; PTR64_IDX32-NEXT:  Determining loop execution counts for: @trunc_ptr_to_i32
 ; PTR64_IDX32-NEXT:  Loop %bb11: Unpredictable backedge-taken count.
 ; PTR64_IDX32-NEXT:  Loop %bb11: Unpredictable max backedge-taken count.
 ; PTR64_IDX32-NEXT:  Loop %bb11: Unpredictable predicated backedge-taken count.
 ;
-; PTR16_IDX16-LABEL: 'v0_ptr_to_i32'
-; PTR16_IDX16-NEXT:  Classifying expressions for: @v0_ptr_to_i32
+; PTR16_IDX16-LABEL: 'trunc_ptr_to_i32'
+; PTR16_IDX16-NEXT:  Classifying expressions for: @trunc_ptr_to_i32
 ; PTR16_IDX16-NEXT:    %tmp = phi i32 [ 0, %bb ], [ %tmp18, %bb17 ]
 ; PTR16_IDX16-NEXT:    --> {0,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
 ; PTR16_IDX16-NEXT:    %tmp12 = getelementptr i8, i8* %arg, i32 ptrtoint ([0 x i8]* @global to i32)
@@ -129,13 +129,13 @@ define hidden i32* @v0_ptr_to_i32(i8* %arg, i32* %arg10) {
 ; PTR16_IDX16-NEXT:    --> %tmp14 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb11: Variant }
 ; PTR16_IDX16-NEXT:    %tmp18 = add i32 %tmp, 2
 ; PTR16_IDX16-NEXT:    --> {2,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
-; PTR16_IDX16-NEXT:  Determining loop execution counts for: @v0_ptr_to_i32
+; PTR16_IDX16-NEXT:  Determining loop execution counts for: @trunc_ptr_to_i32
 ; PTR16_IDX16-NEXT:  Loop %bb11: Unpredictable backedge-taken count.
 ; PTR16_IDX16-NEXT:  Loop %bb11: Unpredictable max backedge-taken count.
 ; PTR16_IDX16-NEXT:  Loop %bb11: Unpredictable predicated backedge-taken count.
 ;
-; PTR16_IDX32-LABEL: 'v0_ptr_to_i32'
-; PTR16_IDX32-NEXT:  Classifying expressions for: @v0_ptr_to_i32
+; PTR16_IDX32-LABEL: 'trunc_ptr_to_i32'
+; PTR16_IDX32-NEXT:  Classifying expressions for: @trunc_ptr_to_i32
 ; PTR16_IDX32-NEXT:    %tmp = phi i32 [ 0, %bb ], [ %tmp18, %bb17 ]
 ; PTR16_IDX32-NEXT:    --> {0,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
 ; PTR16_IDX32-NEXT:    %tmp12 = getelementptr i8, i8* %arg, i32 ptrtoint ([0 x i8]* @global to i32)
@@ -146,7 +146,7 @@ define hidden i32* @v0_ptr_to_i32(i8* %arg, i32* %arg10) {
 ; PTR16_IDX32-NEXT:    --> %tmp14 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb11: Variant }
 ; PTR16_IDX32-NEXT:    %tmp18 = add i32 %tmp, 2
 ; PTR16_IDX32-NEXT:    --> {2,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
-; PTR16_IDX32-NEXT:  Determining loop execution counts for: @v0_ptr_to_i32
+; PTR16_IDX32-NEXT:  Determining loop execution counts for: @trunc_ptr_to_i32
 ; PTR16_IDX32-NEXT:  Loop %bb11: Unpredictable backedge-taken count.
 ; PTR16_IDX32-NEXT:  Loop %bb11: Unpredictable max backedge-taken count.
 ; PTR16_IDX32-NEXT:  Loop %bb11: Unpredictable predicated backedge-taken count.
@@ -169,9 +169,9 @@ bb17:                                             ; preds = %bb11
   %tmp18 = add i32 %tmp, 2
   br label %bb11
 }
-define hidden i32* @v0_ptr_to_i128(i8* %arg, i32* %arg10) {
-; X64-LABEL: 'v0_ptr_to_i128'
-; X64-NEXT:  Classifying expressions for: @v0_ptr_to_i128
+define hidden i32* @trunc_ptr_to_i128(i8* %arg, i32* %arg10) {
+; X64-LABEL: 'trunc_ptr_to_i128'
+; X64-NEXT:  Classifying expressions for: @trunc_ptr_to_i128
 ; X64-NEXT:    %tmp = phi i32 [ 0, %bb ], [ %tmp18, %bb17 ]
 ; X64-NEXT:    --> {0,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
 ; X64-NEXT:    %tmp12 = getelementptr i8, i8* %arg, i128 ptrtoint ([0 x i8]* @global to i128)
@@ -182,13 +182,13 @@ define hidden i32* @v0_ptr_to_i128(i8* %arg, i32* %arg10) {
 ; X64-NEXT:    --> %tmp14 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb11: Variant }
 ; X64-NEXT:    %tmp18 = add i32 %tmp, 2
 ; X64-NEXT:    --> {2,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
-; X64-NEXT:  Determining loop execution counts for: @v0_ptr_to_i128
+; X64-NEXT:  Determining loop execution counts for: @trunc_ptr_to_i128
 ; X64-NEXT:  Loop %bb11: Unpredictable backedge-taken count.
 ; X64-NEXT:  Loop %bb11: Unpredictable max backedge-taken count.
 ; X64-NEXT:  Loop %bb11: Unpredictable predicated backedge-taken count.
 ;
-; PTR16_IDX16-LABEL: 'v0_ptr_to_i128'
-; PTR16_IDX16-NEXT:  Classifying expressions for: @v0_ptr_to_i128
+; PTR16_IDX16-LABEL: 'trunc_ptr_to_i128'
+; PTR16_IDX16-NEXT:  Classifying expressions for: @trunc_ptr_to_i128
 ; PTR16_IDX16-NEXT:    %tmp = phi i32 [ 0, %bb ], [ %tmp18, %bb17 ]
 ; PTR16_IDX16-NEXT:    --> {0,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
 ; PTR16_IDX16-NEXT:    %tmp12 = getelementptr i8, i8* %arg, i128 ptrtoint ([0 x i8]* @global to i128)
@@ -199,13 +199,13 @@ define hidden i32* @v0_ptr_to_i128(i8* %arg, i32* %arg10) {
 ; PTR16_IDX16-NEXT:    --> %tmp14 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb11: Variant }
 ; PTR16_IDX16-NEXT:    %tmp18 = add i32 %tmp, 2
 ; PTR16_IDX16-NEXT:    --> {2,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
-; PTR16_IDX16-NEXT:  Determining loop execution counts for: @v0_ptr_to_i128
+; PTR16_IDX16-NEXT:  Determining loop execution counts for: @trunc_ptr_to_i128
 ; PTR16_IDX16-NEXT:  Loop %bb11: Unpredictable backedge-taken count.
 ; PTR16_IDX16-NEXT:  Loop %bb11: Unpredictable max backedge-taken count.
 ; PTR16_IDX16-NEXT:  Loop %bb11: Unpredictable predicated backedge-taken count.
 ;
-; PTR16_IDX32-LABEL: 'v0_ptr_to_i128'
-; PTR16_IDX32-NEXT:  Classifying expressions for: @v0_ptr_to_i128
+; PTR16_IDX32-LABEL: 'trunc_ptr_to_i128'
+; PTR16_IDX32-NEXT:  Classifying expressions for: @trunc_ptr_to_i128
 ; PTR16_IDX32-NEXT:    %tmp = phi i32 [ 0, %bb ], [ %tmp18, %bb17 ]
 ; PTR16_IDX32-NEXT:    --> {0,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
 ; PTR16_IDX32-NEXT:    %tmp12 = getelementptr i8, i8* %arg, i128 ptrtoint ([0 x i8]* @global to i128)
@@ -216,7 +216,7 @@ define hidden i32* @v0_ptr_to_i128(i8* %arg, i32* %arg10) {
 ; PTR16_IDX32-NEXT:    --> %tmp14 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb11: Variant }
 ; PTR16_IDX32-NEXT:    %tmp18 = add i32 %tmp, 2
 ; PTR16_IDX32-NEXT:    --> {2,+,2}<%bb11> U: [0,-1) S: [-2147483648,2147483647) Exits: <<Unknown>> LoopDispositions: { %bb11: Computable }
-; PTR16_IDX32-NEXT:  Determining loop execution counts for: @v0_ptr_to_i128
+; PTR16_IDX32-NEXT:  Determining loop execution counts for: @trunc_ptr_to_i128
 ; PTR16_IDX32-NEXT:  Loop %bb11: Unpredictable backedge-taken count.
 ; PTR16_IDX32-NEXT:  Loop %bb11: Unpredictable max backedge-taken count.
 ; PTR16_IDX32-NEXT:  Loop %bb11: Unpredictable predicated backedge-taken count.
@@ -240,47 +240,47 @@ bb17:                                             ; preds = %bb11
   br label %bb11
 }
 
-define void @v1_ptr_to_i32(i32 %arg, i32 %arg6) {
-; PTR64_IDX64-LABEL: 'v1_ptr_to_i32'
-; PTR64_IDX64-NEXT:  Classifying expressions for: @v1_ptr_to_i32
+define void @zext_ptr_to_i32(i32 %arg, i32 %arg6) {
+; PTR64_IDX64-LABEL: 'zext_ptr_to_i32'
+; PTR64_IDX64-NEXT:  Classifying expressions for: @zext_ptr_to_i32
 ; PTR64_IDX64-NEXT:    %tmp = sub i32 %arg, ptrtoint ([0 x i8]* @global to i32)
 ; PTR64_IDX64-NEXT:    --> ((-1 * (trunc [0 x i8]* @global to i32)) + %arg) U: full-set S: full-set Exits: ((-1 * (trunc [0 x i8]* @global to i32)) + %arg) LoopDispositions: { %bb7: Invariant }
 ; PTR64_IDX64-NEXT:    %tmp9 = select i1 %tmp8, i16 0, i16 1
 ; PTR64_IDX64-NEXT:    --> %tmp9 U: [0,2) S: [-2,2) Exits: <<Unknown>> LoopDispositions: { %bb7: Variant }
-; PTR64_IDX64-NEXT:  Determining loop execution counts for: @v1_ptr_to_i32
+; PTR64_IDX64-NEXT:  Determining loop execution counts for: @zext_ptr_to_i32
 ; PTR64_IDX64-NEXT:  Loop %bb7: Unpredictable backedge-taken count.
 ; PTR64_IDX64-NEXT:  Loop %bb7: Unpredictable max backedge-taken count.
 ; PTR64_IDX64-NEXT:  Loop %bb7: Unpredictable predicated backedge-taken count.
 ;
-; PTR64_IDX32-LABEL: 'v1_ptr_to_i32'
-; PTR64_IDX32-NEXT:  Classifying expressions for: @v1_ptr_to_i32
+; PTR64_IDX32-LABEL: 'zext_ptr_to_i32'
+; PTR64_IDX32-NEXT:  Classifying expressions for: @zext_ptr_to_i32
 ; PTR64_IDX32-NEXT:    %tmp = sub i32 %arg, ptrtoint ([0 x i8]* @global to i32)
 ; PTR64_IDX32-NEXT:    --> ((-1 * @global) + %arg) U: full-set S: full-set Exits: ((-1 * @global) + %arg) LoopDispositions: { %bb7: Invariant }
 ; PTR64_IDX32-NEXT:    %tmp9 = select i1 %tmp8, i16 0, i16 1
 ; PTR64_IDX32-NEXT:    --> %tmp9 U: [0,2) S: [-2,2) Exits: <<Unknown>> LoopDispositions: { %bb7: Variant }
-; PTR64_IDX32-NEXT:  Determining loop execution counts for: @v1_ptr_to_i32
+; PTR64_IDX32-NEXT:  Determining loop execution counts for: @zext_ptr_to_i32
 ; PTR64_IDX32-NEXT:  Loop %bb7: Unpredictable backedge-taken count.
 ; PTR64_IDX32-NEXT:  Loop %bb7: Unpredictable max backedge-taken count.
 ; PTR64_IDX32-NEXT:  Loop %bb7: Unpredictable predicated backedge-taken count.
 ;
-; PTR16_IDX16-LABEL: 'v1_ptr_to_i32'
-; PTR16_IDX16-NEXT:  Classifying expressions for: @v1_ptr_to_i32
+; PTR16_IDX16-LABEL: 'zext_ptr_to_i32'
+; PTR16_IDX16-NEXT:  Classifying expressions for: @zext_ptr_to_i32
 ; PTR16_IDX16-NEXT:    %tmp = sub i32 %arg, ptrtoint ([0 x i8]* @global to i32)
 ; PTR16_IDX16-NEXT:    --> ((-1 * (zext [0 x i8]* @global to i32))<nsw> + %arg) U: full-set S: full-set Exits: ((-1 * (zext [0 x i8]* @global to i32))<nsw> + %arg) LoopDispositions: { %bb7: Invariant }
 ; PTR16_IDX16-NEXT:    %tmp9 = select i1 %tmp8, i16 0, i16 1
 ; PTR16_IDX16-NEXT:    --> %tmp9 U: [0,2) S: [-2,2) Exits: <<Unknown>> LoopDispositions: { %bb7: Variant }
-; PTR16_IDX16-NEXT:  Determining loop execution counts for: @v1_ptr_to_i32
+; PTR16_IDX16-NEXT:  Determining loop execution counts for: @zext_ptr_to_i32
 ; PTR16_IDX16-NEXT:  Loop %bb7: Unpredictable backedge-taken count.
 ; PTR16_IDX16-NEXT:  Loop %bb7: Unpredictable max backedge-taken count.
 ; PTR16_IDX16-NEXT:  Loop %bb7: Unpredictable predicated backedge-taken count.
 ;
-; PTR16_IDX32-LABEL: 'v1_ptr_to_i32'
-; PTR16_IDX32-NEXT:  Classifying expressions for: @v1_ptr_to_i32
+; PTR16_IDX32-LABEL: 'zext_ptr_to_i32'
+; PTR16_IDX32-NEXT:  Classifying expressions for: @zext_ptr_to_i32
 ; PTR16_IDX32-NEXT:    %tmp = sub i32 %arg, ptrtoint ([0 x i8]* @global to i32)
 ; PTR16_IDX32-NEXT:    --> ((-1 * @global) + %arg) U: full-set S: full-set Exits: ((-1 * @global) + %arg) LoopDispositions: { %bb7: Invariant }
 ; PTR16_IDX32-NEXT:    %tmp9 = select i1 %tmp8, i16 0, i16 1
 ; PTR16_IDX32-NEXT:    --> %tmp9 U: [0,2) S: [-2,2) Exits: <<Unknown>> LoopDispositions: { %bb7: Variant }
-; PTR16_IDX32-NEXT:  Determining loop execution counts for: @v1_ptr_to_i32
+; PTR16_IDX32-NEXT:  Determining loop execution counts for: @zext_ptr_to_i32
 ; PTR16_IDX32-NEXT:  Loop %bb7: Unpredictable backedge-taken count.
 ; PTR16_IDX32-NEXT:  Loop %bb7: Unpredictable max backedge-taken count.
 ; PTR16_IDX32-NEXT:  Loop %bb7: Unpredictable predicated backedge-taken count.
@@ -298,3 +298,51 @@ bb7:                                              ; preds = %bb7, %bb
 bb10:                                             ; preds = %bb7
   ret void
 }
+
+define void @sext_to_i32(i32 %arg, i32 %arg6) {
+; X64-LABEL: 'sext_to_i32'
+; X64-NEXT:  Classifying expressions for: @sext_to_i32
+; X64-NEXT:    %tmp = sub i32 %arg, sext (i16 ptrtoint ([0 x i8]* @global to i16) to i32)
+; X64-NEXT:    --> ((-1 * (sext i16 (trunc [0 x i8]* @global to i16) to i32))<nsw> + %arg) U: full-set S: full-set Exits: ((-1 * (sext i16 (trunc [0 x i8]* @global to i16) to i32))<nsw> + %arg) LoopDispositions: { %bb7: Invariant }
+; X64-NEXT:    %tmp9 = select i1 %tmp8, i16 0, i16 1
+; X64-NEXT:    --> %tmp9 U: [0,2) S: [-2,2) Exits: <<Unknown>> LoopDispositions: { %bb7: Variant }
+; X64-NEXT:  Determining loop execution counts for: @sext_to_i32
+; X64-NEXT:  Loop %bb7: Unpredictable backedge-taken count.
+; X64-NEXT:  Loop %bb7: Unpredictable max backedge-taken count.
+; X64-NEXT:  Loop %bb7: Unpredictable predicated backedge-taken count.
+;
+; PTR16_IDX16-LABEL: 'sext_to_i32'
+; PTR16_IDX16-NEXT:  Classifying expressions for: @sext_to_i32
+; PTR16_IDX16-NEXT:    %tmp = sub i32 %arg, sext (i16 ptrtoint ([0 x i8]* @global to i16) to i32)
+; PTR16_IDX16-NEXT:    --> ((-1 * (sext [0 x i8]* @global to i32))<nsw> + %arg) U: full-set S: full-set Exits: ((-1 * (sext [0 x i8]* @global to i32))<nsw> + %arg) LoopDispositions: { %bb7: Invariant }
+; PTR16_IDX16-NEXT:    %tmp9 = select i1 %tmp8, i16 0, i16 1
+; PTR16_IDX16-NEXT:    --> %tmp9 U: [0,2) S: [-2,2) Exits: <<Unknown>> LoopDispositions: { %bb7: Variant }
+; PTR16_IDX16-NEXT:  Determining loop execution counts for: @sext_to_i32
+; PTR16_IDX16-NEXT:  Loop %bb7: Unpredictable backedge-taken count.
+; PTR16_IDX16-NEXT:  Loop %bb7: Unpredictable max backedge-taken count.
+; PTR16_IDX16-NEXT:  Loop %bb7: Unpredictable predicated backedge-taken count.
+;
+; PTR16_IDX32-LABEL: 'sext_to_i32'
+; PTR16_IDX32-NEXT:  Classifying expressions for: @sext_to_i32
+; PTR16_IDX32-NEXT:    %tmp = sub i32 %arg, sext (i16 ptrtoint ([0 x i8]* @global to i16) to i32)
+; PTR16_IDX32-NEXT:    --> ((-1 * (sext i16 (trunc [0 x i8]* @global to i16) to i32))<nsw> + %arg) U: full-set S: full-set Exits: ((-1 * (sext i16 (trunc [0 x i8]* @global to i16) to i32))<nsw> + %arg) LoopDispositions: { %bb7: Invariant }
+; PTR16_IDX32-NEXT:    %tmp9 = select i1 %tmp8, i16 0, i16 1
+; PTR16_IDX32-NEXT:    --> %tmp9 U: [0,2) S: [-2,2) Exits: <<Unknown>> LoopDispositions: { %bb7: Variant }
+; PTR16_IDX32-NEXT:  Determining loop execution counts for: @sext_to_i32
+; PTR16_IDX32-NEXT:  Loop %bb7: Unpredictable backedge-taken count.
+; PTR16_IDX32-NEXT:  Loop %bb7: Unpredictable max backedge-taken count.
+; PTR16_IDX32-NEXT:  Loop %bb7: Unpredictable predicated backedge-taken count.
+;
+bb:
+  br label %bb7
+
+bb7:                                              ; preds = %bb7, %bb
+  %tmp = sub i32 %arg, sext (i16 ptrtoint ([0 x i8]* @global to i16) to i32)
+  %tmp8 = icmp eq i32 %tmp, %arg6
+  %tmp9 = select i1 %tmp8, i16 0, i16 1
+  call void @use16(i16 %tmp9)
+  br i1 %tmp8, label %bb7, label %bb10
+
+bb10:                                             ; preds = %bb7
+  ret void
+}

From 208987844ffa5fef636fd6bd36b4f7a7597fe520 Mon Sep 17 00:00:00 2001
From: Ties Stuij <ties.stuij@arm.com>
Date: Wed, 30 Sep 2020 14:44:27 +0100
Subject: [PATCH 079/123] [ARM] Follow AACPS standard for volatile bit-fields
 access width

This patch resumes the work of D16586.
According to the AAPCS, volatile bit-fields should
be accessed using containers of the widht of their
declarative type. In such case:
```
struct S1 {
  short a : 1;
}
```
should be accessed using load and stores of the width
(sizeof(short)), where now the compiler does only load
the minimum required width (char in this case).
However, as discussed in D16586,
that could overwrite non-volatile bit-fields, which
conflicted with C and C++ object models by creating
data race conditions that are not part of the bit-field,
e.g.
```
struct S2 {
  short a;
  int  b : 16;
}
```
Accessing `S2.b` would also access `S2.a`.

The AAPCS Release 2020Q2
(https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=)
section 8.1 Data Types, page 36, "Volatile bit-fields -
preserving number and width of container accesses" has been
updated to avoid conflict with the C++ Memory Model.
Now it reads in the note:
```
This ABI does not place any restrictions on the access widths of bit-fields where the container
overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field
placed between two other bit-fields. This is because the C/C++ memory model defines these as being
separate memory locations, which can be accessed by two threads simultaneously. For this reason,
compilers must be permitted to use a narrower memory access width (including splitting the access into
multiple instructions) to avoid writing to a different memory location. For example, in
struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two
memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };,
writes to a or b must not overwrite each other.
```

I've updated the patch D16586 to follow such behavior by verifying that we
only change volatile bit-field access when:
 - it won't overlap with any other non-bit-field member
 - we only access memory inside the bounds of the record
 - avoid overlapping zero-length bit-fields.

Regarding the number of memory accesses, that should be preserved, that will
be implemented by D67399.

Reviewed By: ostannard

Differential Revision: https://reviews.llvm.org/D72932
---
 clang/include/clang/Basic/CodeGenOptions.def |    5 +-
 clang/include/clang/Driver/Options.td        |    8 +-
 clang/lib/CodeGen/CGExpr.cpp                 |  117 +-
 clang/lib/CodeGen/CGRecordLayout.h           |   17 +-
 clang/lib/CodeGen/CGRecordLayoutBuilder.cpp  |  166 +-
 clang/lib/Frontend/CompilerInvocation.cpp    |    3 +
 clang/test/CodeGen/aapcs-bitfield.c          | 3292 +++++++++++++++++-
 clang/test/CodeGen/bitfield-2.c              |   12 +-
 clang/test/CodeGen/volatile.c                |   12 +-
 9 files changed, 3525 insertions(+), 107 deletions(-)

diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def
index 4054f93cf4a4f1..bce2120a4d6db4 100644
--- a/clang/include/clang/Basic/CodeGenOptions.def
+++ b/clang/include/clang/Basic/CodeGenOptions.def
@@ -401,12 +401,15 @@ CODEGENOPT(Addrsig, 1, 0)
 /// Whether to emit unused static constants.
 CODEGENOPT(KeepStaticConsts, 1, 0)
 
-/// Whether to not follow the AAPCS that enforce at least one read before storing to a volatile bitfield
+/// Whether to follow the AAPCS enforcing at least one read before storing to a volatile bitfield
 CODEGENOPT(ForceAAPCSBitfieldLoad, 1, 0)
 
 /// Assume that by-value parameters do not alias any other values.
 CODEGENOPT(PassByValueIsNoAlias, 1, 0)
 
+/// Whether to not follow the AAPCS that enforces volatile bit-field access width to be
+/// according to the field declaring type width.
+CODEGENOPT(AAPCSBitfieldWidth, 1, 1)
 
 #undef CODEGENOPT
 #undef ENUM_CODEGENOPT
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 20acd207206892..9980dda23bb05f 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -2391,9 +2391,15 @@ def mno_neg_immediates: Flag<["-"], "mno-neg-immediates">, Group<m_arm_Features_
 def mcmse : Flag<["-"], "mcmse">, Group<m_arm_Features_Group>,
   Flags<[DriverOption,CC1Option]>,
   HelpText<"Allow use of CMSE (Armv8-M Security Extensions)">;
-def ForceAAPCSBitfieldLoad : Flag<["-"], "fAAPCSBitfieldLoad">, Group<m_arm_Features_Group>,
+def ForceAAPCSBitfieldLoad : Flag<["-"], "faapcs-bitfield-load">, Group<m_arm_Features_Group>,
   Flags<[DriverOption,CC1Option]>,
   HelpText<"Follows the AAPCS standard that all volatile bit-field write generates at least one load. (ARM only).">;
+def ForceNoAAPCSBitfieldWidth : Flag<["-"], "fno-aapcs-bitfield-width">, Group<m_arm_Features_Group>,
+  Flags<[DriverOption,CC1Option]>,
+  HelpText<"Do not follow the AAPCS standard requirement that volatile bit-field width is dictated by the field container type. (ARM only).">;
+def AAPCSBitfieldWidth : Flag<["-"], "faapcs-bitfield-width">, Group<m_arm_Features_Group>,
+  Flags<[DriverOption,CC1Option]>,
+  HelpText<"Follow the AAPCS standard requirement stating that volatile bit-field width is dictated by the field container type. (ARM only).">;
 
 def mgeneral_regs_only : Flag<["-"], "mgeneral-regs-only">, Group<m_aarch64_Features_Group>,
   HelpText<"Generate code which only uses the general purpose registers (AArch64 only)">;
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 27cf066466ca8a..869bace18ffce9 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -1934,22 +1934,27 @@ RValue CodeGenFunction::EmitLoadOfBitfieldLValue(LValue LV,
   llvm::Type *ResLTy = ConvertType(LV.getType());
 
   Address Ptr = LV.getBitFieldAddress();
-  llvm::Value *Val = Builder.CreateLoad(Ptr, LV.isVolatileQualified(), "bf.load");
-
+  llvm::Value *Val =
+      Builder.CreateLoad(Ptr, LV.isVolatileQualified(), "bf.load");
+
+  bool UseVolatile = LV.isVolatileQualified() &&
+                     Info.VolatileStorageSize != 0 && isAAPCS(CGM.getTarget());
+  const unsigned Offset = UseVolatile ? Info.VolatileOffset : Info.Offset;
+  const unsigned StorageSize =
+      UseVolatile ? Info.VolatileStorageSize : Info.StorageSize;
   if (Info.IsSigned) {
-    assert(static_cast<unsigned>(Info.Offset + Info.Size) <= Info.StorageSize);
-    unsigned HighBits = Info.StorageSize - Info.Offset - Info.Size;
+    assert(static_cast<unsigned>(Offset + Info.Size) <= StorageSize);
+    unsigned HighBits = StorageSize - Offset - Info.Size;
     if (HighBits)
       Val = Builder.CreateShl(Val, HighBits, "bf.shl");
-    if (Info.Offset + HighBits)
-      Val = Builder.CreateAShr(Val, Info.Offset + HighBits, "bf.ashr");
+    if (Offset + HighBits)
+      Val = Builder.CreateAShr(Val, Offset + HighBits, "bf.ashr");
   } else {
-    if (Info.Offset)
-      Val = Builder.CreateLShr(Val, Info.Offset, "bf.lshr");
-    if (static_cast<unsigned>(Info.Offset) + Info.Size < Info.StorageSize)
-      Val = Builder.CreateAnd(Val, llvm::APInt::getLowBitsSet(Info.StorageSize,
-                                                              Info.Size),
-                              "bf.clear");
+    if (Offset)
+      Val = Builder.CreateLShr(Val, Offset, "bf.lshr");
+    if (static_cast<unsigned>(Offset) + Info.Size < StorageSize)
+      Val = Builder.CreateAnd(
+          Val, llvm::APInt::getLowBitsSet(StorageSize, Info.Size), "bf.clear");
   }
   Val = Builder.CreateIntCast(Val, ResLTy, Info.IsSigned, "bf.cast");
   EmitScalarRangeCheck(Val, LV.getType(), Loc);
@@ -2151,39 +2156,42 @@ void CodeGenFunction::EmitStoreThroughBitfieldLValue(RValue Src, LValue Dst,
                                  /*isSigned=*/false);
   llvm::Value *MaskedVal = SrcVal;
 
+  const bool UseVolatile =
+      CGM.getCodeGenOpts().AAPCSBitfieldWidth && Dst.isVolatileQualified() &&
+      Info.VolatileStorageSize != 0 && isAAPCS(CGM.getTarget());
+  const unsigned StorageSize =
+      UseVolatile ? Info.VolatileStorageSize : Info.StorageSize;
+  const unsigned Offset = UseVolatile ? Info.VolatileOffset : Info.Offset;
   // See if there are other bits in the bitfield's storage we'll need to load
   // and mask together with source before storing.
-  if (Info.StorageSize != Info.Size) {
-    assert(Info.StorageSize > Info.Size && "Invalid bitfield size.");
+  if (StorageSize != Info.Size) {
+    assert(StorageSize > Info.Size && "Invalid bitfield size.");
     llvm::Value *Val =
-      Builder.CreateLoad(Ptr, Dst.isVolatileQualified(), "bf.load");
+        Builder.CreateLoad(Ptr, Dst.isVolatileQualified(), "bf.load");
 
     // Mask the source value as needed.
     if (!hasBooleanRepresentation(Dst.getType()))
-      SrcVal = Builder.CreateAnd(SrcVal,
-                                 llvm::APInt::getLowBitsSet(Info.StorageSize,
-                                                            Info.Size),
-                                 "bf.value");
+      SrcVal = Builder.CreateAnd(
+          SrcVal, llvm::APInt::getLowBitsSet(StorageSize, Info.Size),
+          "bf.value");
     MaskedVal = SrcVal;
-    if (Info.Offset)
-      SrcVal = Builder.CreateShl(SrcVal, Info.Offset, "bf.shl");
+    if (Offset)
+      SrcVal = Builder.CreateShl(SrcVal, Offset, "bf.shl");
 
     // Mask out the original value.
-    Val = Builder.CreateAnd(Val,
-                            ~llvm::APInt::getBitsSet(Info.StorageSize,
-                                                     Info.Offset,
-                                                     Info.Offset + Info.Size),
-                            "bf.clear");
+    Val = Builder.CreateAnd(
+        Val, ~llvm::APInt::getBitsSet(StorageSize, Offset, Offset + Info.Size),
+        "bf.clear");
 
     // Or together the unchanged values and the source value.
     SrcVal = Builder.CreateOr(Val, SrcVal, "bf.set");
   } else {
-    assert(Info.Offset == 0);
+    assert(Offset == 0);
     // According to the AACPS:
     // When a volatile bit-field is written, and its container does not overlap
-    // with any non-bit-field member, its container must be read exactly once and
-    // written exactly once using the access width appropriate to the type of the
-    // container. The two accesses are not atomic.
+    // with any non-bit-field member, its container must be read exactly once
+    // and written exactly once using the access width appropriate to the type
+    // of the container. The two accesses are not atomic.
     if (Dst.isVolatileQualified() && isAAPCS(CGM.getTarget()) &&
         CGM.getCodeGenOpts().ForceAAPCSBitfieldLoad)
       Builder.CreateLoad(Ptr, true, "bf.load");
@@ -2198,8 +2206,8 @@ void CodeGenFunction::EmitStoreThroughBitfieldLValue(RValue Src, LValue Dst,
 
     // Sign extend the value if needed.
     if (Info.IsSigned) {
-      assert(Info.Size <= Info.StorageSize);
-      unsigned HighBits = Info.StorageSize - Info.Size;
+      assert(Info.Size <= StorageSize);
+      unsigned HighBits = StorageSize - Info.Size;
       if (HighBits) {
         ResultVal = Builder.CreateShl(ResultVal, HighBits, "bf.result.shl");
         ResultVal = Builder.CreateAShr(ResultVal, HighBits, "bf.result.ashr");
@@ -4211,32 +4219,45 @@ LValue CodeGenFunction::EmitLValueForField(LValue base,
 
   if (field->isBitField()) {
     const CGRecordLayout &RL =
-      CGM.getTypes().getCGRecordLayout(field->getParent());
+        CGM.getTypes().getCGRecordLayout(field->getParent());
     const CGBitFieldInfo &Info = RL.getBitFieldInfo(field);
+    const bool UseVolatile = isAAPCS(CGM.getTarget()) &&
+                             CGM.getCodeGenOpts().AAPCSBitfieldWidth &&
+                             Info.VolatileStorageSize != 0 &&
+                             field->getType()
+                                 .withCVRQualifiers(base.getVRQualifiers())
+                                 .isVolatileQualified();
     Address Addr = base.getAddress(*this);
     unsigned Idx = RL.getLLVMFieldNo(field);
     const RecordDecl *rec = field->getParent();
-    if (!IsInPreservedAIRegion &&
-        (!getDebugInfo() || !rec->hasAttr<BPFPreserveAccessIndexAttr>())) {
-      if (Idx != 0)
-        // For structs, we GEP to the field that the record layout suggests.
-        Addr = Builder.CreateStructGEP(Addr, Idx, field->getName());
-    } else {
-      llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateRecordType(
-          getContext().getRecordType(rec), rec->getLocation());
-      Addr = Builder.CreatePreserveStructAccessIndex(Addr, Idx,
-          getDebugInfoFIndex(rec, field->getFieldIndex()),
-          DbgInfo);
+    if (!UseVolatile) {
+      if (!IsInPreservedAIRegion &&
+          (!getDebugInfo() || !rec->hasAttr<BPFPreserveAccessIndexAttr>())) {
+        if (Idx != 0)
+          // For structs, we GEP to the field that the record layout suggests.
+          Addr = Builder.CreateStructGEP(Addr, Idx, field->getName());
+      } else {
+        llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateRecordType(
+            getContext().getRecordType(rec), rec->getLocation());
+        Addr = Builder.CreatePreserveStructAccessIndex(
+            Addr, Idx, getDebugInfoFIndex(rec, field->getFieldIndex()),
+            DbgInfo);
+      }
     }
-
+    const unsigned SS =
+        UseVolatile ? Info.VolatileStorageSize : Info.StorageSize;
     // Get the access type.
-    llvm::Type *FieldIntTy =
-      llvm::Type::getIntNTy(getLLVMContext(), Info.StorageSize);
+    llvm::Type *FieldIntTy = llvm::Type::getIntNTy(getLLVMContext(), SS);
     if (Addr.getElementType() != FieldIntTy)
       Addr = Builder.CreateElementBitCast(Addr, FieldIntTy);
+    if (UseVolatile) {
+      const unsigned VolatileOffset = Info.VolatileStorageOffset.getQuantity();
+      if (VolatileOffset)
+        Addr = Builder.CreateConstInBoundsGEP(Addr, VolatileOffset);
+    }
 
     QualType fieldType =
-      field->getType().withCVRQualifiers(base.getVRQualifiers());
+        field->getType().withCVRQualifiers(base.getVRQualifiers());
     // TODO: Support TBAA for bit fields.
     LValueBaseInfo FieldBaseInfo(BaseInfo.getAlignmentSource());
     return LValue::MakeBitfield(Addr, Info, fieldType, FieldBaseInfo,
diff --git a/clang/lib/CodeGen/CGRecordLayout.h b/clang/lib/CodeGen/CGRecordLayout.h
index 730ee4c438e7e0..e6665b72bcba15 100644
--- a/clang/lib/CodeGen/CGRecordLayout.h
+++ b/clang/lib/CodeGen/CGRecordLayout.h
@@ -46,7 +46,7 @@ namespace CodeGen {
 ///   };
 ///
 /// This will end up as the following LLVM type. The first array is the
-/// bitfield, and the second is the padding out to a 4-byte alignmnet.
+/// bitfield, and the second is the padding out to a 4-byte alignment.
 ///
 ///   %t = type { i8, i8, i8, i8, i8, [3 x i8] }
 ///
@@ -80,8 +80,21 @@ struct CGBitFieldInfo {
   /// The offset of the bitfield storage from the start of the struct.
   CharUnits StorageOffset;
 
+  /// The offset within a contiguous run of bitfields that are represented as a
+  /// single "field" within the LLVM struct type, taking into account the AAPCS
+  /// rules for volatile bitfields. This offset is in bits.
+  unsigned VolatileOffset : 16;
+
+  /// The storage size in bits which should be used when accessing this
+  /// bitfield.
+  unsigned VolatileStorageSize;
+
+  /// The offset of the bitfield storage from the start of the struct.
+  CharUnits VolatileStorageOffset;
+
   CGBitFieldInfo()
-      : Offset(), Size(), IsSigned(), StorageSize(), StorageOffset() {}
+      : Offset(), Size(), IsSigned(), StorageSize(), StorageOffset(),
+        VolatileOffset(), VolatileStorageSize(), VolatileStorageOffset() {}
 
   CGBitFieldInfo(unsigned Offset, unsigned Size, bool IsSigned,
                  unsigned StorageSize, CharUnits StorageOffset)
diff --git a/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp b/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp
index 4e5d1d3f16f65b..ce35880106c20f 100644
--- a/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp
+++ b/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp
@@ -109,6 +109,14 @@ struct CGRecordLowering {
            D->isMsStruct(Context);
   }
 
+  /// Helper function to check if we are targeting AAPCS.
+  bool isAAPCS() const {
+    return Context.getTargetInfo().getABI().startswith("aapcs");
+  }
+
+  /// Helper function to check if the target machine is BigEndian.
+  bool isBE() const { return Context.getTargetInfo().isBigEndian(); }
+
   /// The Itanium base layout rule allows virtual bases to overlap
   /// other bases, which complicates layout in specific ways.
   ///
@@ -172,7 +180,8 @@ struct CGRecordLowering {
   void lowerUnion();
   void accumulateFields();
   void accumulateBitFields(RecordDecl::field_iterator Field,
-                        RecordDecl::field_iterator FieldEnd);
+                           RecordDecl::field_iterator FieldEnd);
+  void computeVolatileBitfields();
   void accumulateBases();
   void accumulateVPtrs();
   void accumulateVBases();
@@ -237,6 +246,10 @@ void CGRecordLowering::setBitFieldInfo(
   // least-significant-bit.
   if (DataLayout.isBigEndian())
     Info.Offset = Info.StorageSize - (Info.Offset + Info.Size);
+
+  Info.VolatileStorageSize = 0;
+  Info.VolatileOffset = 0;
+  Info.VolatileStorageOffset = CharUnits::Zero();
 }
 
 void CGRecordLowering::lower(bool NVBaseType) {
@@ -261,15 +274,21 @@ void CGRecordLowering::lower(bool NVBaseType) {
   // 8) Format the complete list of members in a way that can be consumed by
   //    CodeGenTypes::ComputeRecordLayout.
   CharUnits Size = NVBaseType ? Layout.getNonVirtualSize() : Layout.getSize();
-  if (D->isUnion())
-    return lowerUnion();
+  if (D->isUnion()) {
+    lowerUnion();
+    computeVolatileBitfields();
+    return;
+  }
   accumulateFields();
   // RD implies C++.
   if (RD) {
     accumulateVPtrs();
     accumulateBases();
-    if (Members.empty())
-      return appendPaddingBytes(Size);
+    if (Members.empty()) {
+      appendPaddingBytes(Size);
+      computeVolatileBitfields();
+      return;
+    }
     if (!NVBaseType)
       accumulateVBases();
   }
@@ -281,6 +300,7 @@ void CGRecordLowering::lower(bool NVBaseType) {
   Members.pop_back();
   calculateZeroInit();
   fillOutputFields();
+  computeVolatileBitfields();
 }
 
 void CGRecordLowering::lowerUnion() {
@@ -418,9 +438,9 @@ CGRecordLowering::accumulateBitFields(RecordDecl::field_iterator Field,
     if (OffsetInRecord < 8 || !llvm::isPowerOf2_64(OffsetInRecord) ||
         !DataLayout.fitsInLegalInteger(OffsetInRecord))
       return false;
-    // Make sure StartBitOffset is natually aligned if it is treated as an
+    // Make sure StartBitOffset is naturally aligned if it is treated as an
     // IType integer.
-     if (StartBitOffset %
+    if (StartBitOffset %
             Context.toBits(getAlignment(getIntNType(OffsetInRecord))) !=
         0)
       return false;
@@ -503,6 +523,123 @@ void CGRecordLowering::accumulateBases() {
   }
 }
 
+/// The AAPCS that defines that, when possible, bit-fields should
+/// be accessed using containers of the declared type width:
+/// When a volatile bit-field is read, and its container does not overlap with
+/// any non-bit-field member or any zero length bit-field member, its container
+/// must be read exactly once using the access width appropriate to the type of
+/// the container. When a volatile bit-field is written, and its container does
+/// not overlap with any non-bit-field member or any zero-length bit-field
+/// member, its container must be read exactly once and written exactly once
+/// using the access width appropriate to the type of the container. The two
+/// accesses are not atomic.
+///
+/// Enforcing the width restriction can be disabled using
+/// -fno-aapcs-bitfield-width.
+void CGRecordLowering::computeVolatileBitfields() {
+  if (!isAAPCS() || !Types.getCodeGenOpts().AAPCSBitfieldWidth)
+    return;
+
+  for (auto &I : BitFields) {
+    const FieldDecl *Field = I.first;
+    CGBitFieldInfo &Info = I.second;
+    llvm::Type *ResLTy = Types.ConvertTypeForMem(Field->getType());
+    // If the record alignment is less than the type width, we can't enforce a
+    // aligned load, bail out.
+    if ((uint64_t)(Context.toBits(Layout.getAlignment())) <
+        ResLTy->getPrimitiveSizeInBits())
+      continue;
+    // CGRecordLowering::setBitFieldInfo() pre-adjusts the bit-field offsets
+    // for big-endian targets, but it assumes a container of width
+    // Info.StorageSize. Since AAPCS uses a different container size (width
+    // of the type), we first undo that calculation here and redo it once
+    // the bit-field offset within the new container is calculated.
+    const unsigned OldOffset =
+        isBE() ? Info.StorageSize - (Info.Offset + Info.Size) : Info.Offset;
+    // Offset to the bit-field from the beginning of the struct.
+    const unsigned AbsoluteOffset =
+        Context.toBits(Info.StorageOffset) + OldOffset;
+
+    // Container size is the width of the bit-field type.
+    const unsigned StorageSize = ResLTy->getPrimitiveSizeInBits();
+    // Nothing to do if the access uses the desired
+    // container width and is naturally aligned.
+    if (Info.StorageSize == StorageSize && (OldOffset % StorageSize == 0))
+      continue;
+
+    // Offset within the container.
+    unsigned Offset = AbsoluteOffset & (StorageSize - 1);
+    // Bail out if an aligned load of the container cannot cover the entire
+    // bit-field. This can happen for example, if the bit-field is part of a
+    // packed struct. AAPCS does not define access rules for such cases, we let
+    // clang to follow its own rules.
+    if (Offset + Info.Size > StorageSize)
+      continue;
+
+    // Re-adjust offsets for big-endian targets.
+    if (isBE())
+      Offset = StorageSize - (Offset + Info.Size);
+
+    const CharUnits StorageOffset =
+        Context.toCharUnitsFromBits(AbsoluteOffset & ~(StorageSize - 1));
+    const CharUnits End = StorageOffset +
+                          Context.toCharUnitsFromBits(StorageSize) -
+                          CharUnits::One();
+
+    const ASTRecordLayout &Layout =
+        Context.getASTRecordLayout(Field->getParent());
+    // If we access outside memory outside the record, than bail out.
+    const CharUnits RecordSize = Layout.getSize();
+    if (End >= RecordSize)
+      continue;
+
+    // Bail out if performing this load would access non-bit-fields members.
+    bool Conflict = false;
+    for (const auto *F : D->fields()) {
+      // Allow sized bit-fields overlaps.
+      if (F->isBitField() && !F->isZeroLengthBitField(Context))
+        continue;
+
+      const CharUnits FOffset = Context.toCharUnitsFromBits(
+          Layout.getFieldOffset(F->getFieldIndex()));
+
+      // As C11 defines, a zero sized bit-field defines a barrier, so
+      // fields after and before it should be race condition free.
+      // The AAPCS acknowledges it and imposes no restritions when the
+      // natural container overlaps a zero-length bit-field.
+      if (F->isZeroLengthBitField(Context)) {
+        if (End > FOffset && StorageOffset < FOffset) {
+          Conflict = true;
+          break;
+        }
+      }
+
+      const CharUnits FEnd =
+          FOffset +
+          Context.toCharUnitsFromBits(
+              Types.ConvertTypeForMem(F->getType())->getPrimitiveSizeInBits()) -
+          CharUnits::One();
+      // If no overlap, continue.
+      if (End < FOffset || FEnd < StorageOffset)
+        continue;
+
+      // The desired load overlaps a non-bit-field member, bail out.
+      Conflict = true;
+      break;
+    }
+
+    if (Conflict)
+      continue;
+    // Write the new bit-field access parameters.
+    // As the storage offset now is defined as the number of elements from the
+    // start of the structure, we should divide the Offset by the element size.
+    Info.VolatileStorageOffset =
+        StorageOffset / Context.toCharUnitsFromBits(StorageSize).getQuantity();
+    Info.VolatileStorageSize = StorageSize;
+    Info.VolatileOffset = Offset;
+  }
+}
+
 void CGRecordLowering::accumulateVPtrs() {
   if (Layout.hasOwnVFPtr())
     Members.push_back(MemberInfo(CharUnits::Zero(), MemberInfo::VFPtr,
@@ -848,8 +985,10 @@ CodeGenTypes::ComputeRecordLayout(const RecordDecl *D, llvm::StructType *Ty) {
       assert(Info.StorageSize <= SL->getSizeInBits() &&
              "Union not large enough for bitfield storage");
     } else {
-      assert(Info.StorageSize ==
-             getDataLayout().getTypeAllocSizeInBits(ElementTy) &&
+      assert((Info.StorageSize ==
+                  getDataLayout().getTypeAllocSizeInBits(ElementTy) ||
+              Info.VolatileStorageSize ==
+                  getDataLayout().getTypeAllocSizeInBits(ElementTy)) &&
              "Storage size does not match the element type size");
     }
     assert(Info.Size > 0 && "Empty bitfield!");
@@ -897,11 +1036,12 @@ LLVM_DUMP_METHOD void CGRecordLayout::dump() const {
 
 void CGBitFieldInfo::print(raw_ostream &OS) const {
   OS << "<CGBitFieldInfo"
-     << " Offset:" << Offset
-     << " Size:" << Size
-     << " IsSigned:" << IsSigned
+     << " Offset:" << Offset << " Size:" << Size << " IsSigned:" << IsSigned
      << " StorageSize:" << StorageSize
-     << " StorageOffset:" << StorageOffset.getQuantity() << ">";
+     << " StorageOffset:" << StorageOffset.getQuantity()
+     << " VolatileOffset:" << VolatileOffset
+     << " VolatileStorageSize:" << VolatileStorageSize
+     << " VolatileStorageOffset:" << VolatileStorageOffset.getQuantity() << ">";
 }
 
 LLVM_DUMP_METHOD void CGBitFieldInfo::dump() const {
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index 77ecbbd093e5b2..a4c56cc4e51e57 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -1464,8 +1464,11 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
       std::string(Args.getLastArgValue(OPT_fsymbol_partition_EQ));
 
   Opts.ForceAAPCSBitfieldLoad = Args.hasArg(OPT_ForceAAPCSBitfieldLoad);
+  Opts.AAPCSBitfieldWidth =
+      Args.hasFlag(OPT_AAPCSBitfieldWidth, OPT_ForceNoAAPCSBitfieldWidth, true);
 
   Opts.PassByValueIsNoAlias = Args.hasArg(OPT_fpass_by_value_is_noalias);
+
   return Success;
 }
 
diff --git a/clang/test/CodeGen/aapcs-bitfield.c b/clang/test/CodeGen/aapcs-bitfield.c
index 4fc889bcf379ec..13db68d6ae81bd 100644
--- a/clang/test/CodeGen/aapcs-bitfield.c
+++ b/clang/test/CodeGen/aapcs-bitfield.c
@@ -1,8 +1,12 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -triple armv8-none-linux-eabi %s -emit-llvm -o - -O3 | FileCheck %s -check-prefix=LE
-// RUN: %clang_cc1 -triple armebv8-none-linux-eabi %s -emit-llvm -o - -O3 | FileCheck %s -check-prefix=BE
-// RUN: %clang_cc1 -triple armv8-none-linux-eabi %s -emit-llvm -o - -O3 -fAAPCSBitfieldLoad | FileCheck %s -check-prefixes=LE,LENUMLOADS
-// RUN: %clang_cc1 -triple armebv8-none-linux-eabi %s -emit-llvm -o - -O3 -fAAPCSBitfieldLoad | FileCheck %s -check-prefixes=BE,BENUMLOADS
+// RUN: %clang_cc1 -triple armv8-none-linux-eabi %s -emit-llvm -o - -O3 -fno-aapcs-bitfield-width | FileCheck %s -check-prefix=LE
+// RUN: %clang_cc1 -triple armebv8-none-linux-eabi %s -emit-llvm -o - -O3 -fno-aapcs-bitfield-width | FileCheck %s -check-prefix=BE
+// RUN: %clang_cc1 -triple armv8-none-linux-eabi %s -emit-llvm -o - -O3 -faapcs-bitfield-load -fno-aapcs-bitfield-width | FileCheck %s -check-prefixes=LENUMLOADS
+// RUN: %clang_cc1 -triple armebv8-none-linux-eabi %s -emit-llvm -o - -O3 -faapcs-bitfield-load -fno-aapcs-bitfield-width | FileCheck %s -check-prefixes=BENUMLOADS
+// RUN: %clang_cc1 -triple armv8-none-linux-eabi %s -emit-llvm -o - -O3 | FileCheck %s -check-prefix=LEWIDTH
+// RUN: %clang_cc1 -triple armebv8-none-linux-eabi %s -emit-llvm -o - -O3 | FileCheck %s -check-prefix=BEWIDTH
+// RUN: %clang_cc1 -triple armv8-none-linux-eabi %s -emit-llvm -o - -O3 -faapcs-bitfield-load | FileCheck %s -check-prefixes=LEWIDTHNUM
+// RUN: %clang_cc1 -triple armebv8-none-linux-eabi %s -emit-llvm -o - -O3 -faapcs-bitfield-load | FileCheck %s -check-prefixes=BEWIDTHNUM
 
 struct st0 {
   short c : 7;
@@ -25,6 +29,57 @@ struct st0 {
 // BE-NEXT:    [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32
 // BE-NEXT:    ret i32 [[CONV]]
 //
+// LENUMLOADS-LABEL: @st0_check_load(
+// LENUMLOADS-NEXT:  entry:
+// LENUMLOADS-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0
+// LENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2
+// LENUMLOADS-NEXT:    [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 1
+// LENUMLOADS-NEXT:    [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 1
+// LENUMLOADS-NEXT:    [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32
+// LENUMLOADS-NEXT:    ret i32 [[CONV]]
+//
+// BENUMLOADS-LABEL: @st0_check_load(
+// BENUMLOADS-NEXT:  entry:
+// BENUMLOADS-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0
+// BENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2
+// BENUMLOADS-NEXT:    [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 1
+// BENUMLOADS-NEXT:    [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32
+// BENUMLOADS-NEXT:    ret i32 [[CONV]]
+//
+// LEWIDTH-LABEL: @st0_check_load(
+// LEWIDTH-NEXT:  entry:
+// LEWIDTH-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0
+// LEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2
+// LEWIDTH-NEXT:    [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 1
+// LEWIDTH-NEXT:    [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 1
+// LEWIDTH-NEXT:    [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32
+// LEWIDTH-NEXT:    ret i32 [[CONV]]
+//
+// BEWIDTH-LABEL: @st0_check_load(
+// BEWIDTH-NEXT:  entry:
+// BEWIDTH-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0
+// BEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2
+// BEWIDTH-NEXT:    [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 1
+// BEWIDTH-NEXT:    [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32
+// BEWIDTH-NEXT:    ret i32 [[CONV]]
+//
+// LEWIDTHNUM-LABEL: @st0_check_load(
+// LEWIDTHNUM-NEXT:  entry:
+// LEWIDTHNUM-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0
+// LEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2
+// LEWIDTHNUM-NEXT:    [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 1
+// LEWIDTHNUM-NEXT:    [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 1
+// LEWIDTHNUM-NEXT:    [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32
+// LEWIDTHNUM-NEXT:    ret i32 [[CONV]]
+//
+// BEWIDTHNUM-LABEL: @st0_check_load(
+// BEWIDTHNUM-NEXT:  entry:
+// BEWIDTHNUM-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0
+// BEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2
+// BEWIDTHNUM-NEXT:    [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 1
+// BEWIDTHNUM-NEXT:    [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32
+// BEWIDTHNUM-NEXT:    ret i32 [[CONV]]
+//
 int st0_check_load(struct st0 *m) {
   return m->c;
 }
@@ -47,6 +102,60 @@ int st0_check_load(struct st0 *m) {
 // BE-NEXT:    store i8 [[BF_SET]], i8* [[TMP0]], align 2
 // BE-NEXT:    ret void
 //
+// LENUMLOADS-LABEL: @st0_check_store(
+// LENUMLOADS-NEXT:  entry:
+// LENUMLOADS-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0
+// LENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2
+// LENUMLOADS-NEXT:    [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -128
+// LENUMLOADS-NEXT:    [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1
+// LENUMLOADS-NEXT:    store i8 [[BF_SET]], i8* [[TMP0]], align 2
+// LENUMLOADS-NEXT:    ret void
+//
+// BENUMLOADS-LABEL: @st0_check_store(
+// BENUMLOADS-NEXT:  entry:
+// BENUMLOADS-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0
+// BENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2
+// BENUMLOADS-NEXT:    [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 1
+// BENUMLOADS-NEXT:    [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 2
+// BENUMLOADS-NEXT:    store i8 [[BF_SET]], i8* [[TMP0]], align 2
+// BENUMLOADS-NEXT:    ret void
+//
+// LEWIDTH-LABEL: @st0_check_store(
+// LEWIDTH-NEXT:  entry:
+// LEWIDTH-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0
+// LEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2
+// LEWIDTH-NEXT:    [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -128
+// LEWIDTH-NEXT:    [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1
+// LEWIDTH-NEXT:    store i8 [[BF_SET]], i8* [[TMP0]], align 2
+// LEWIDTH-NEXT:    ret void
+//
+// BEWIDTH-LABEL: @st0_check_store(
+// BEWIDTH-NEXT:  entry:
+// BEWIDTH-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0
+// BEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2
+// BEWIDTH-NEXT:    [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 1
+// BEWIDTH-NEXT:    [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 2
+// BEWIDTH-NEXT:    store i8 [[BF_SET]], i8* [[TMP0]], align 2
+// BEWIDTH-NEXT:    ret void
+//
+// LEWIDTHNUM-LABEL: @st0_check_store(
+// LEWIDTHNUM-NEXT:  entry:
+// LEWIDTHNUM-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0
+// LEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2
+// LEWIDTHNUM-NEXT:    [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -128
+// LEWIDTHNUM-NEXT:    [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1
+// LEWIDTHNUM-NEXT:    store i8 [[BF_SET]], i8* [[TMP0]], align 2
+// LEWIDTHNUM-NEXT:    ret void
+//
+// BEWIDTHNUM-LABEL: @st0_check_store(
+// BEWIDTHNUM-NEXT:  entry:
+// BEWIDTHNUM-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0
+// BEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2
+// BEWIDTHNUM-NEXT:    [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 1
+// BEWIDTHNUM-NEXT:    [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 2
+// BEWIDTHNUM-NEXT:    store i8 [[BF_SET]], i8* [[TMP0]], align 2
+// BEWIDTHNUM-NEXT:    ret void
+//
 void st0_check_store(struct st0 *m) {
   m->c = 1;
 }
@@ -73,6 +182,57 @@ struct st1 {
 // BE-NEXT:    [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32
 // BE-NEXT:    ret i32 [[CONV]]
 //
+// LENUMLOADS-LABEL: @st1_check_load(
+// LENUMLOADS-NEXT:  entry:
+// LENUMLOADS-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0
+// LENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4
+// LENUMLOADS-NEXT:    [[BF_ASHR:%.*]] = ashr i16 [[BF_LOAD]], 10
+// LENUMLOADS-NEXT:    [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32
+// LENUMLOADS-NEXT:    ret i32 [[CONV]]
+//
+// BENUMLOADS-LABEL: @st1_check_load(
+// BENUMLOADS-NEXT:  entry:
+// BENUMLOADS-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0
+// BENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4
+// BENUMLOADS-NEXT:    [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 10
+// BENUMLOADS-NEXT:    [[BF_ASHR:%.*]] = ashr exact i16 [[BF_SHL]], 10
+// BENUMLOADS-NEXT:    [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32
+// BENUMLOADS-NEXT:    ret i32 [[CONV]]
+//
+// LEWIDTH-LABEL: @st1_check_load(
+// LEWIDTH-NEXT:  entry:
+// LEWIDTH-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0
+// LEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4
+// LEWIDTH-NEXT:    [[BF_ASHR:%.*]] = ashr i16 [[BF_LOAD]], 10
+// LEWIDTH-NEXT:    [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32
+// LEWIDTH-NEXT:    ret i32 [[CONV]]
+//
+// BEWIDTH-LABEL: @st1_check_load(
+// BEWIDTH-NEXT:  entry:
+// BEWIDTH-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0
+// BEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4
+// BEWIDTH-NEXT:    [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 10
+// BEWIDTH-NEXT:    [[BF_ASHR:%.*]] = ashr exact i16 [[BF_SHL]], 10
+// BEWIDTH-NEXT:    [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32
+// BEWIDTH-NEXT:    ret i32 [[CONV]]
+//
+// LEWIDTHNUM-LABEL: @st1_check_load(
+// LEWIDTHNUM-NEXT:  entry:
+// LEWIDTHNUM-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0
+// LEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4
+// LEWIDTHNUM-NEXT:    [[BF_ASHR:%.*]] = ashr i16 [[BF_LOAD]], 10
+// LEWIDTHNUM-NEXT:    [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32
+// LEWIDTHNUM-NEXT:    ret i32 [[CONV]]
+//
+// BEWIDTHNUM-LABEL: @st1_check_load(
+// BEWIDTHNUM-NEXT:  entry:
+// BEWIDTHNUM-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0
+// BEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4
+// BEWIDTHNUM-NEXT:    [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 10
+// BEWIDTHNUM-NEXT:    [[BF_ASHR:%.*]] = ashr exact i16 [[BF_SHL]], 10
+// BEWIDTHNUM-NEXT:    [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32
+// BEWIDTHNUM-NEXT:    ret i32 [[CONV]]
+//
 int st1_check_load(struct st1 *m) {
   return m->c;
 }
@@ -95,6 +255,60 @@ int st1_check_load(struct st1 *m) {
 // BE-NEXT:    store i16 [[BF_SET]], i16* [[TMP0]], align 4
 // BE-NEXT:    ret void
 //
+// LENUMLOADS-LABEL: @st1_check_store(
+// LENUMLOADS-NEXT:  entry:
+// LENUMLOADS-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0
+// LENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4
+// LENUMLOADS-NEXT:    [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 1023
+// LENUMLOADS-NEXT:    [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1024
+// LENUMLOADS-NEXT:    store i16 [[BF_SET]], i16* [[TMP0]], align 4
+// LENUMLOADS-NEXT:    ret void
+//
+// BENUMLOADS-LABEL: @st1_check_store(
+// BENUMLOADS-NEXT:  entry:
+// BENUMLOADS-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0
+// BENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4
+// BENUMLOADS-NEXT:    [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -64
+// BENUMLOADS-NEXT:    [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1
+// BENUMLOADS-NEXT:    store i16 [[BF_SET]], i16* [[TMP0]], align 4
+// BENUMLOADS-NEXT:    ret void
+//
+// LEWIDTH-LABEL: @st1_check_store(
+// LEWIDTH-NEXT:  entry:
+// LEWIDTH-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0
+// LEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4
+// LEWIDTH-NEXT:    [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 1023
+// LEWIDTH-NEXT:    [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1024
+// LEWIDTH-NEXT:    store i16 [[BF_SET]], i16* [[TMP0]], align 4
+// LEWIDTH-NEXT:    ret void
+//
+// BEWIDTH-LABEL: @st1_check_store(
+// BEWIDTH-NEXT:  entry:
+// BEWIDTH-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0
+// BEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4
+// BEWIDTH-NEXT:    [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -64
+// BEWIDTH-NEXT:    [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1
+// BEWIDTH-NEXT:    store i16 [[BF_SET]], i16* [[TMP0]], align 4
+// BEWIDTH-NEXT:    ret void
+//
+// LEWIDTHNUM-LABEL: @st1_check_store(
+// LEWIDTHNUM-NEXT:  entry:
+// LEWIDTHNUM-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0
+// LEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4
+// LEWIDTHNUM-NEXT:    [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 1023
+// LEWIDTHNUM-NEXT:    [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1024
+// LEWIDTHNUM-NEXT:    store i16 [[BF_SET]], i16* [[TMP0]], align 4
+// LEWIDTHNUM-NEXT:    ret void
+//
+// BEWIDTHNUM-LABEL: @st1_check_store(
+// BEWIDTHNUM-NEXT:  entry:
+// BEWIDTHNUM-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0
+// BEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4
+// BEWIDTHNUM-NEXT:    [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -64
+// BEWIDTHNUM-NEXT:    [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1
+// BEWIDTHNUM-NEXT:    store i16 [[BF_SET]], i16* [[TMP0]], align 4
+// BEWIDTHNUM-NEXT:    ret void
+//
 void st1_check_store(struct st1 *m) {
   m->c = 1;
 }
@@ -121,6 +335,57 @@ struct st2 {
 // BE-NEXT:    [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32
 // BE-NEXT:    ret i32 [[CONV]]
 //
+// LENUMLOADS-LABEL: @st2_check_load(
+// LENUMLOADS-NEXT:  entry:
+// LENUMLOADS-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1
+// LENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2
+// LENUMLOADS-NEXT:    [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 1
+// LENUMLOADS-NEXT:    [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 1
+// LENUMLOADS-NEXT:    [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32
+// LENUMLOADS-NEXT:    ret i32 [[CONV]]
+//
+// BENUMLOADS-LABEL: @st2_check_load(
+// BENUMLOADS-NEXT:  entry:
+// BENUMLOADS-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1
+// BENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2
+// BENUMLOADS-NEXT:    [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 1
+// BENUMLOADS-NEXT:    [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32
+// BENUMLOADS-NEXT:    ret i32 [[CONV]]
+//
+// LEWIDTH-LABEL: @st2_check_load(
+// LEWIDTH-NEXT:  entry:
+// LEWIDTH-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1
+// LEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2
+// LEWIDTH-NEXT:    [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 1
+// LEWIDTH-NEXT:    [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 1
+// LEWIDTH-NEXT:    [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32
+// LEWIDTH-NEXT:    ret i32 [[CONV]]
+//
+// BEWIDTH-LABEL: @st2_check_load(
+// BEWIDTH-NEXT:  entry:
+// BEWIDTH-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1
+// BEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2
+// BEWIDTH-NEXT:    [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 1
+// BEWIDTH-NEXT:    [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32
+// BEWIDTH-NEXT:    ret i32 [[CONV]]
+//
+// LEWIDTHNUM-LABEL: @st2_check_load(
+// LEWIDTHNUM-NEXT:  entry:
+// LEWIDTHNUM-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1
+// LEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2
+// LEWIDTHNUM-NEXT:    [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 1
+// LEWIDTHNUM-NEXT:    [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 1
+// LEWIDTHNUM-NEXT:    [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32
+// LEWIDTHNUM-NEXT:    ret i32 [[CONV]]
+//
+// BEWIDTHNUM-LABEL: @st2_check_load(
+// BEWIDTHNUM-NEXT:  entry:
+// BEWIDTHNUM-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1
+// BEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2
+// BEWIDTHNUM-NEXT:    [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 1
+// BEWIDTHNUM-NEXT:    [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32
+// BEWIDTHNUM-NEXT:    ret i32 [[CONV]]
+//
 int st2_check_load(struct st2 *m) {
   return m->c;
 }
@@ -143,6 +408,60 @@ int st2_check_load(struct st2 *m) {
 // BE-NEXT:    store i8 [[BF_SET]], i8* [[C]], align 2
 // BE-NEXT:    ret void
 //
+// LENUMLOADS-LABEL: @st2_check_store(
+// LENUMLOADS-NEXT:  entry:
+// LENUMLOADS-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1
+// LENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2
+// LENUMLOADS-NEXT:    [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -128
+// LENUMLOADS-NEXT:    [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1
+// LENUMLOADS-NEXT:    store i8 [[BF_SET]], i8* [[C]], align 2
+// LENUMLOADS-NEXT:    ret void
+//
+// BENUMLOADS-LABEL: @st2_check_store(
+// BENUMLOADS-NEXT:  entry:
+// BENUMLOADS-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1
+// BENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2
+// BENUMLOADS-NEXT:    [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 1
+// BENUMLOADS-NEXT:    [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 2
+// BENUMLOADS-NEXT:    store i8 [[BF_SET]], i8* [[C]], align 2
+// BENUMLOADS-NEXT:    ret void
+//
+// LEWIDTH-LABEL: @st2_check_store(
+// LEWIDTH-NEXT:  entry:
+// LEWIDTH-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1
+// LEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2
+// LEWIDTH-NEXT:    [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -128
+// LEWIDTH-NEXT:    [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1
+// LEWIDTH-NEXT:    store i8 [[BF_SET]], i8* [[C]], align 2
+// LEWIDTH-NEXT:    ret void
+//
+// BEWIDTH-LABEL: @st2_check_store(
+// BEWIDTH-NEXT:  entry:
+// BEWIDTH-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1
+// BEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2
+// BEWIDTH-NEXT:    [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 1
+// BEWIDTH-NEXT:    [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 2
+// BEWIDTH-NEXT:    store i8 [[BF_SET]], i8* [[C]], align 2
+// BEWIDTH-NEXT:    ret void
+//
+// LEWIDTHNUM-LABEL: @st2_check_store(
+// LEWIDTHNUM-NEXT:  entry:
+// LEWIDTHNUM-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1
+// LEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2
+// LEWIDTHNUM-NEXT:    [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -128
+// LEWIDTHNUM-NEXT:    [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1
+// LEWIDTHNUM-NEXT:    store i8 [[BF_SET]], i8* [[C]], align 2
+// LEWIDTHNUM-NEXT:    ret void
+//
+// BEWIDTHNUM-LABEL: @st2_check_store(
+// BEWIDTHNUM-NEXT:  entry:
+// BEWIDTHNUM-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1
+// BEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2
+// BEWIDTHNUM-NEXT:    [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 1
+// BEWIDTHNUM-NEXT:    [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 2
+// BEWIDTHNUM-NEXT:    store i8 [[BF_SET]], i8* [[C]], align 2
+// BEWIDTHNUM-NEXT:    ret void
+//
 void st2_check_store(struct st2 *m) {
   m->c = 1;
 }
@@ -168,6 +487,57 @@ struct st3 {
 // BE-NEXT:    [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32
 // BE-NEXT:    ret i32 [[CONV]]
 //
+// LENUMLOADS-LABEL: @st3_check_load(
+// LENUMLOADS-NEXT:  entry:
+// LENUMLOADS-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST3:%.*]], %struct.st3* [[M:%.*]], i32 0, i32 0
+// LENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 2
+// LENUMLOADS-NEXT:    [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 1
+// LENUMLOADS-NEXT:    [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 1
+// LENUMLOADS-NEXT:    [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32
+// LENUMLOADS-NEXT:    ret i32 [[CONV]]
+//
+// BENUMLOADS-LABEL: @st3_check_load(
+// BENUMLOADS-NEXT:  entry:
+// BENUMLOADS-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST3:%.*]], %struct.st3* [[M:%.*]], i32 0, i32 0
+// BENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 2
+// BENUMLOADS-NEXT:    [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 1
+// BENUMLOADS-NEXT:    [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32
+// BENUMLOADS-NEXT:    ret i32 [[CONV]]
+//
+// LEWIDTH-LABEL: @st3_check_load(
+// LEWIDTH-NEXT:  entry:
+// LEWIDTH-NEXT:    [[TMP0:%.*]] = bitcast %struct.st3* [[M:%.*]] to i16*
+// LEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 2
+// LEWIDTH-NEXT:    [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 9
+// LEWIDTH-NEXT:    [[BF_ASHR:%.*]] = ashr exact i16 [[BF_SHL]], 9
+// LEWIDTH-NEXT:    [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32
+// LEWIDTH-NEXT:    ret i32 [[CONV]]
+//
+// BEWIDTH-LABEL: @st3_check_load(
+// BEWIDTH-NEXT:  entry:
+// BEWIDTH-NEXT:    [[TMP0:%.*]] = bitcast %struct.st3* [[M:%.*]] to i16*
+// BEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 2
+// BEWIDTH-NEXT:    [[BF_ASHR:%.*]] = ashr i16 [[BF_LOAD]], 9
+// BEWIDTH-NEXT:    [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32
+// BEWIDTH-NEXT:    ret i32 [[CONV]]
+//
+// LEWIDTHNUM-LABEL: @st3_check_load(
+// LEWIDTHNUM-NEXT:  entry:
+// LEWIDTHNUM-NEXT:    [[TMP0:%.*]] = bitcast %struct.st3* [[M:%.*]] to i16*
+// LEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 2
+// LEWIDTHNUM-NEXT:    [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 9
+// LEWIDTHNUM-NEXT:    [[BF_ASHR:%.*]] = ashr exact i16 [[BF_SHL]], 9
+// LEWIDTHNUM-NEXT:    [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32
+// LEWIDTHNUM-NEXT:    ret i32 [[CONV]]
+//
+// BEWIDTHNUM-LABEL: @st3_check_load(
+// BEWIDTHNUM-NEXT:  entry:
+// BEWIDTHNUM-NEXT:    [[TMP0:%.*]] = bitcast %struct.st3* [[M:%.*]] to i16*
+// BEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 2
+// BEWIDTHNUM-NEXT:    [[BF_ASHR:%.*]] = ashr i16 [[BF_LOAD]], 9
+// BEWIDTHNUM-NEXT:    [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32
+// BEWIDTHNUM-NEXT:    ret i32 [[CONV]]
+//
 int st3_check_load(struct st3 *m) {
   return m->c;
 }
@@ -190,6 +560,60 @@ int st3_check_load(struct st3 *m) {
 // BE-NEXT:    store volatile i8 [[BF_SET]], i8* [[TMP0]], align 2
 // BE-NEXT:    ret void
 //
+// LENUMLOADS-LABEL: @st3_check_store(
+// LENUMLOADS-NEXT:  entry:
+// LENUMLOADS-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST3:%.*]], %struct.st3* [[M:%.*]], i32 0, i32 0
+// LENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 2
+// LENUMLOADS-NEXT:    [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -128
+// LENUMLOADS-NEXT:    [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1
+// LENUMLOADS-NEXT:    store volatile i8 [[BF_SET]], i8* [[TMP0]], align 2
+// LENUMLOADS-NEXT:    ret void
+//
+// BENUMLOADS-LABEL: @st3_check_store(
+// BENUMLOADS-NEXT:  entry:
+// BENUMLOADS-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST3:%.*]], %struct.st3* [[M:%.*]], i32 0, i32 0
+// BENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 2
+// BENUMLOADS-NEXT:    [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 1
+// BENUMLOADS-NEXT:    [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 2
+// BENUMLOADS-NEXT:    store volatile i8 [[BF_SET]], i8* [[TMP0]], align 2
+// BENUMLOADS-NEXT:    ret void
+//
+// LEWIDTH-LABEL: @st3_check_store(
+// LEWIDTH-NEXT:  entry:
+// LEWIDTH-NEXT:    [[TMP0:%.*]] = bitcast %struct.st3* [[M:%.*]] to i16*
+// LEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 2
+// LEWIDTH-NEXT:    [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -128
+// LEWIDTH-NEXT:    [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1
+// LEWIDTH-NEXT:    store volatile i16 [[BF_SET]], i16* [[TMP0]], align 2
+// LEWIDTH-NEXT:    ret void
+//
+// BEWIDTH-LABEL: @st3_check_store(
+// BEWIDTH-NEXT:  entry:
+// BEWIDTH-NEXT:    [[TMP0:%.*]] = bitcast %struct.st3* [[M:%.*]] to i16*
+// BEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 2
+// BEWIDTH-NEXT:    [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 511
+// BEWIDTH-NEXT:    [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 512
+// BEWIDTH-NEXT:    store volatile i16 [[BF_SET]], i16* [[TMP0]], align 2
+// BEWIDTH-NEXT:    ret void
+//
+// LEWIDTHNUM-LABEL: @st3_check_store(
+// LEWIDTHNUM-NEXT:  entry:
+// LEWIDTHNUM-NEXT:    [[TMP0:%.*]] = bitcast %struct.st3* [[M:%.*]] to i16*
+// LEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 2
+// LEWIDTHNUM-NEXT:    [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -128
+// LEWIDTHNUM-NEXT:    [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1
+// LEWIDTHNUM-NEXT:    store volatile i16 [[BF_SET]], i16* [[TMP0]], align 2
+// LEWIDTHNUM-NEXT:    ret void
+//
+// BEWIDTHNUM-LABEL: @st3_check_store(
+// BEWIDTHNUM-NEXT:  entry:
+// BEWIDTHNUM-NEXT:    [[TMP0:%.*]] = bitcast %struct.st3* [[M:%.*]] to i16*
+// BEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 2
+// BEWIDTHNUM-NEXT:    [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 511
+// BEWIDTHNUM-NEXT:    [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 512
+// BEWIDTHNUM-NEXT:    store volatile i16 [[BF_SET]], i16* [[TMP0]], align 2
+// BEWIDTHNUM-NEXT:    ret void
+//
 void st3_check_store(struct st3 *m) {
   m->c = 1;
 }
@@ -221,6 +645,68 @@ struct st4 {
 // BE-NEXT:    [[CONV:%.*]] = ashr exact i32 [[SEXT]], 24
 // BE-NEXT:    ret i32 [[CONV]]
 //
+// LENUMLOADS-LABEL: @st4_check_load(
+// LENUMLOADS-NEXT:  entry:
+// LENUMLOADS-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0
+// LENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
+// LENUMLOADS-NEXT:    [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 2
+// LENUMLOADS-NEXT:    [[BF_ASHR:%.*]] = ashr i16 [[BF_SHL]], 11
+// LENUMLOADS-NEXT:    [[BF_CAST:%.*]] = zext i16 [[BF_ASHR]] to i32
+// LENUMLOADS-NEXT:    [[SEXT:%.*]] = shl i32 [[BF_CAST]], 24
+// LENUMLOADS-NEXT:    [[CONV:%.*]] = ashr exact i32 [[SEXT]], 24
+// LENUMLOADS-NEXT:    ret i32 [[CONV]]
+//
+// BENUMLOADS-LABEL: @st4_check_load(
+// BENUMLOADS-NEXT:  entry:
+// BENUMLOADS-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0
+// BENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
+// BENUMLOADS-NEXT:    [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 9
+// BENUMLOADS-NEXT:    [[BF_ASHR:%.*]] = ashr i16 [[BF_SHL]], 11
+// BENUMLOADS-NEXT:    [[BF_CAST:%.*]] = zext i16 [[BF_ASHR]] to i32
+// BENUMLOADS-NEXT:    [[SEXT:%.*]] = shl i32 [[BF_CAST]], 24
+// BENUMLOADS-NEXT:    [[CONV:%.*]] = ashr exact i32 [[SEXT]], 24
+// BENUMLOADS-NEXT:    ret i32 [[CONV]]
+//
+// LEWIDTH-LABEL: @st4_check_load(
+// LEWIDTH-NEXT:  entry:
+// LEWIDTH-NEXT:    [[TMP0:%.*]] = bitcast %struct.st4* [[M:%.*]] to i8*
+// LEWIDTH-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i32 1
+// LEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP1]], align 1
+// LEWIDTH-NEXT:    [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 2
+// LEWIDTH-NEXT:    [[BF_ASHR:%.*]] = ashr i8 [[BF_SHL]], 3
+// LEWIDTH-NEXT:    [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32
+// LEWIDTH-NEXT:    ret i32 [[CONV]]
+//
+// BEWIDTH-LABEL: @st4_check_load(
+// BEWIDTH-NEXT:  entry:
+// BEWIDTH-NEXT:    [[TMP0:%.*]] = bitcast %struct.st4* [[M:%.*]] to i8*
+// BEWIDTH-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i32 1
+// BEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP1]], align 1
+// BEWIDTH-NEXT:    [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 1
+// BEWIDTH-NEXT:    [[BF_ASHR:%.*]] = ashr i8 [[BF_SHL]], 3
+// BEWIDTH-NEXT:    [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32
+// BEWIDTH-NEXT:    ret i32 [[CONV]]
+//
+// LEWIDTHNUM-LABEL: @st4_check_load(
+// LEWIDTHNUM-NEXT:  entry:
+// LEWIDTHNUM-NEXT:    [[TMP0:%.*]] = bitcast %struct.st4* [[M:%.*]] to i8*
+// LEWIDTHNUM-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i32 1
+// LEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP1]], align 1
+// LEWIDTHNUM-NEXT:    [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 2
+// LEWIDTHNUM-NEXT:    [[BF_ASHR:%.*]] = ashr i8 [[BF_SHL]], 3
+// LEWIDTHNUM-NEXT:    [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32
+// LEWIDTHNUM-NEXT:    ret i32 [[CONV]]
+//
+// BEWIDTHNUM-LABEL: @st4_check_load(
+// BEWIDTHNUM-NEXT:  entry:
+// BEWIDTHNUM-NEXT:    [[TMP0:%.*]] = bitcast %struct.st4* [[M:%.*]] to i8*
+// BEWIDTHNUM-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i32 1
+// BEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP1]], align 1
+// BEWIDTHNUM-NEXT:    [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 1
+// BEWIDTHNUM-NEXT:    [[BF_ASHR:%.*]] = ashr i8 [[BF_SHL]], 3
+// BEWIDTHNUM-NEXT:    [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32
+// BEWIDTHNUM-NEXT:    ret i32 [[CONV]]
+//
 int st4_check_load(struct st4 *m) {
   return m->c;
 }
@@ -243,6 +729,64 @@ int st4_check_load(struct st4 *m) {
 // BE-NEXT:    store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4
 // BE-NEXT:    ret void
 //
+// LENUMLOADS-LABEL: @st4_check_store(
+// LENUMLOADS-NEXT:  entry:
+// LENUMLOADS-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0
+// LENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
+// LENUMLOADS-NEXT:    [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -15873
+// LENUMLOADS-NEXT:    [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 512
+// LENUMLOADS-NEXT:    store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4
+// LENUMLOADS-NEXT:    ret void
+//
+// BENUMLOADS-LABEL: @st4_check_store(
+// BENUMLOADS-NEXT:  entry:
+// BENUMLOADS-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0
+// BENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
+// BENUMLOADS-NEXT:    [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -125
+// BENUMLOADS-NEXT:    [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 4
+// BENUMLOADS-NEXT:    store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4
+// BENUMLOADS-NEXT:    ret void
+//
+// LEWIDTH-LABEL: @st4_check_store(
+// LEWIDTH-NEXT:  entry:
+// LEWIDTH-NEXT:    [[TMP0:%.*]] = bitcast %struct.st4* [[M:%.*]] to i8*
+// LEWIDTH-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i32 1
+// LEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP1]], align 1
+// LEWIDTH-NEXT:    [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -63
+// LEWIDTH-NEXT:    [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 2
+// LEWIDTH-NEXT:    store volatile i8 [[BF_SET]], i8* [[TMP1]], align 1
+// LEWIDTH-NEXT:    ret void
+//
+// BEWIDTH-LABEL: @st4_check_store(
+// BEWIDTH-NEXT:  entry:
+// BEWIDTH-NEXT:    [[TMP0:%.*]] = bitcast %struct.st4* [[M:%.*]] to i8*
+// BEWIDTH-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i32 1
+// BEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP1]], align 1
+// BEWIDTH-NEXT:    [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -125
+// BEWIDTH-NEXT:    [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 4
+// BEWIDTH-NEXT:    store volatile i8 [[BF_SET]], i8* [[TMP1]], align 1
+// BEWIDTH-NEXT:    ret void
+//
+// LEWIDTHNUM-LABEL: @st4_check_store(
+// LEWIDTHNUM-NEXT:  entry:
+// LEWIDTHNUM-NEXT:    [[TMP0:%.*]] = bitcast %struct.st4* [[M:%.*]] to i8*
+// LEWIDTHNUM-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i32 1
+// LEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP1]], align 1
+// LEWIDTHNUM-NEXT:    [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -63
+// LEWIDTHNUM-NEXT:    [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 2
+// LEWIDTHNUM-NEXT:    store volatile i8 [[BF_SET]], i8* [[TMP1]], align 1
+// LEWIDTHNUM-NEXT:    ret void
+//
+// BEWIDTHNUM-LABEL: @st4_check_store(
+// BEWIDTHNUM-NEXT:  entry:
+// BEWIDTHNUM-NEXT:    [[TMP0:%.*]] = bitcast %struct.st4* [[M:%.*]] to i8*
+// BEWIDTHNUM-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i32 1
+// BEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP1]], align 1
+// BEWIDTHNUM-NEXT:    [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -125
+// BEWIDTHNUM-NEXT:    [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 4
+// BEWIDTHNUM-NEXT:    store volatile i8 [[BF_SET]], i8* [[TMP1]], align 1
+// BEWIDTHNUM-NEXT:    ret void
+//
 void st4_check_store(struct st4 *m) {
   m->c = 1;
 }
@@ -265,6 +809,60 @@ void st4_check_store(struct st4 *m) {
 // BE-NEXT:    store i16 [[BF_SET]], i16* [[TMP0]], align 4
 // BE-NEXT:    ret void
 //
+// LENUMLOADS-LABEL: @st4_check_nonv_store(
+// LENUMLOADS-NEXT:  entry:
+// LENUMLOADS-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0
+// LENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4
+// LENUMLOADS-NEXT:    [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -512
+// LENUMLOADS-NEXT:    [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1
+// LENUMLOADS-NEXT:    store i16 [[BF_SET]], i16* [[TMP0]], align 4
+// LENUMLOADS-NEXT:    ret void
+//
+// BENUMLOADS-LABEL: @st4_check_nonv_store(
+// BENUMLOADS-NEXT:  entry:
+// BENUMLOADS-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0
+// BENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4
+// BENUMLOADS-NEXT:    [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 127
+// BENUMLOADS-NEXT:    [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 128
+// BENUMLOADS-NEXT:    store i16 [[BF_SET]], i16* [[TMP0]], align 4
+// BENUMLOADS-NEXT:    ret void
+//
+// LEWIDTH-LABEL: @st4_check_nonv_store(
+// LEWIDTH-NEXT:  entry:
+// LEWIDTH-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0
+// LEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4
+// LEWIDTH-NEXT:    [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -512
+// LEWIDTH-NEXT:    [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1
+// LEWIDTH-NEXT:    store i16 [[BF_SET]], i16* [[TMP0]], align 4
+// LEWIDTH-NEXT:    ret void
+//
+// BEWIDTH-LABEL: @st4_check_nonv_store(
+// BEWIDTH-NEXT:  entry:
+// BEWIDTH-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0
+// BEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4
+// BEWIDTH-NEXT:    [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 127
+// BEWIDTH-NEXT:    [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 128
+// BEWIDTH-NEXT:    store i16 [[BF_SET]], i16* [[TMP0]], align 4
+// BEWIDTH-NEXT:    ret void
+//
+// LEWIDTHNUM-LABEL: @st4_check_nonv_store(
+// LEWIDTHNUM-NEXT:  entry:
+// LEWIDTHNUM-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0
+// LEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4
+// LEWIDTHNUM-NEXT:    [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -512
+// LEWIDTHNUM-NEXT:    [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1
+// LEWIDTHNUM-NEXT:    store i16 [[BF_SET]], i16* [[TMP0]], align 4
+// LEWIDTHNUM-NEXT:    ret void
+//
+// BEWIDTHNUM-LABEL: @st4_check_nonv_store(
+// BEWIDTHNUM-NEXT:  entry:
+// BEWIDTHNUM-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0
+// BEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4
+// BEWIDTHNUM-NEXT:    [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 127
+// BEWIDTHNUM-NEXT:    [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 128
+// BEWIDTHNUM-NEXT:    store i16 [[BF_SET]], i16* [[TMP0]], align 4
+// BEWIDTHNUM-NEXT:    ret void
+//
 void st4_check_nonv_store(struct st4 *m) {
   m->b = 1;
 }
@@ -291,6 +889,57 @@ struct st5 {
 // BE-NEXT:    [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32
 // BE-NEXT:    ret i32 [[CONV]]
 //
+// LENUMLOADS-LABEL: @st5_check_load(
+// LENUMLOADS-NEXT:  entry:
+// LENUMLOADS-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1
+// LENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2
+// LENUMLOADS-NEXT:    [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 3
+// LENUMLOADS-NEXT:    [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 3
+// LENUMLOADS-NEXT:    [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32
+// LENUMLOADS-NEXT:    ret i32 [[CONV]]
+//
+// BENUMLOADS-LABEL: @st5_check_load(
+// BENUMLOADS-NEXT:  entry:
+// BENUMLOADS-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1
+// BENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2
+// BENUMLOADS-NEXT:    [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 3
+// BENUMLOADS-NEXT:    [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32
+// BENUMLOADS-NEXT:    ret i32 [[CONV]]
+//
+// LEWIDTH-LABEL: @st5_check_load(
+// LEWIDTH-NEXT:  entry:
+// LEWIDTH-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1
+// LEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2
+// LEWIDTH-NEXT:    [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 3
+// LEWIDTH-NEXT:    [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 3
+// LEWIDTH-NEXT:    [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32
+// LEWIDTH-NEXT:    ret i32 [[CONV]]
+//
+// BEWIDTH-LABEL: @st5_check_load(
+// BEWIDTH-NEXT:  entry:
+// BEWIDTH-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1
+// BEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2
+// BEWIDTH-NEXT:    [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 3
+// BEWIDTH-NEXT:    [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32
+// BEWIDTH-NEXT:    ret i32 [[CONV]]
+//
+// LEWIDTHNUM-LABEL: @st5_check_load(
+// LEWIDTHNUM-NEXT:  entry:
+// LEWIDTHNUM-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1
+// LEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2
+// LEWIDTHNUM-NEXT:    [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 3
+// LEWIDTHNUM-NEXT:    [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 3
+// LEWIDTHNUM-NEXT:    [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32
+// LEWIDTHNUM-NEXT:    ret i32 [[CONV]]
+//
+// BEWIDTHNUM-LABEL: @st5_check_load(
+// BEWIDTHNUM-NEXT:  entry:
+// BEWIDTHNUM-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1
+// BEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2
+// BEWIDTHNUM-NEXT:    [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 3
+// BEWIDTHNUM-NEXT:    [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32
+// BEWIDTHNUM-NEXT:    ret i32 [[CONV]]
+//
 int st5_check_load(struct st5 *m) {
   return m->c;
 }
@@ -313,6 +962,60 @@ int st5_check_load(struct st5 *m) {
 // BE-NEXT:    store volatile i8 [[BF_SET]], i8* [[C]], align 2
 // BE-NEXT:    ret void
 //
+// LENUMLOADS-LABEL: @st5_check_store(
+// LENUMLOADS-NEXT:  entry:
+// LENUMLOADS-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1
+// LENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2
+// LENUMLOADS-NEXT:    [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -32
+// LENUMLOADS-NEXT:    [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1
+// LENUMLOADS-NEXT:    store volatile i8 [[BF_SET]], i8* [[C]], align 2
+// LENUMLOADS-NEXT:    ret void
+//
+// BENUMLOADS-LABEL: @st5_check_store(
+// BENUMLOADS-NEXT:  entry:
+// BENUMLOADS-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1
+// BENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2
+// BENUMLOADS-NEXT:    [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 7
+// BENUMLOADS-NEXT:    [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 8
+// BENUMLOADS-NEXT:    store volatile i8 [[BF_SET]], i8* [[C]], align 2
+// BENUMLOADS-NEXT:    ret void
+//
+// LEWIDTH-LABEL: @st5_check_store(
+// LEWIDTH-NEXT:  entry:
+// LEWIDTH-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1
+// LEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2
+// LEWIDTH-NEXT:    [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -32
+// LEWIDTH-NEXT:    [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1
+// LEWIDTH-NEXT:    store volatile i8 [[BF_SET]], i8* [[C]], align 2
+// LEWIDTH-NEXT:    ret void
+//
+// BEWIDTH-LABEL: @st5_check_store(
+// BEWIDTH-NEXT:  entry:
+// BEWIDTH-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1
+// BEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2
+// BEWIDTH-NEXT:    [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 7
+// BEWIDTH-NEXT:    [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 8
+// BEWIDTH-NEXT:    store volatile i8 [[BF_SET]], i8* [[C]], align 2
+// BEWIDTH-NEXT:    ret void
+//
+// LEWIDTHNUM-LABEL: @st5_check_store(
+// LEWIDTHNUM-NEXT:  entry:
+// LEWIDTHNUM-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1
+// LEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2
+// LEWIDTHNUM-NEXT:    [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -32
+// LEWIDTHNUM-NEXT:    [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1
+// LEWIDTHNUM-NEXT:    store volatile i8 [[BF_SET]], i8* [[C]], align 2
+// LEWIDTHNUM-NEXT:    ret void
+//
+// BEWIDTHNUM-LABEL: @st5_check_store(
+// BEWIDTHNUM-NEXT:  entry:
+// BEWIDTHNUM-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1
+// BEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2
+// BEWIDTHNUM-NEXT:    [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 7
+// BEWIDTHNUM-NEXT:    [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 8
+// BEWIDTHNUM-NEXT:    store volatile i8 [[BF_SET]], i8* [[C]], align 2
+// BEWIDTHNUM-NEXT:    ret void
+//
 void st5_check_store(struct st5 *m) {
   m->c = 1;
 }
@@ -331,7 +1034,7 @@ struct st6 {
 // LE-NEXT:    [[BF_ASHR:%.*]] = ashr exact i16 [[BF_SHL]], 4
 // LE-NEXT:    [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32
 // LE-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1
-// LE-NEXT:    [[TMP1:%.*]] = load volatile i8, i8* [[B]], align 2
+// LE-NEXT:    [[TMP1:%.*]] = load volatile i8, i8* [[B]], align 2, !tbaa !3
 // LE-NEXT:    [[CONV:%.*]] = sext i8 [[TMP1]] to i32
 // LE-NEXT:    [[ADD:%.*]] = add nsw i32 [[BF_CAST]], [[CONV]]
 // LE-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2
@@ -349,7 +1052,7 @@ struct st6 {
 // BE-NEXT:    [[BF_ASHR:%.*]] = ashr i16 [[BF_LOAD]], 4
 // BE-NEXT:    [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32
 // BE-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1
-// BE-NEXT:    [[TMP1:%.*]] = load volatile i8, i8* [[B]], align 2
+// BE-NEXT:    [[TMP1:%.*]] = load volatile i8, i8* [[B]], align 2, !tbaa !3
 // BE-NEXT:    [[CONV:%.*]] = sext i8 [[TMP1]] to i32
 // BE-NEXT:    [[ADD:%.*]] = add nsw i32 [[BF_CAST]], [[CONV]]
 // BE-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2
@@ -359,6 +1062,114 @@ struct st6 {
 // BE-NEXT:    [[ADD4:%.*]] = add nsw i32 [[ADD]], [[BF_CAST3]]
 // BE-NEXT:    ret i32 [[ADD4]]
 //
+// LENUMLOADS-LABEL: @st6_check_load(
+// LENUMLOADS-NEXT:  entry:
+// LENUMLOADS-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0
+// LENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
+// LENUMLOADS-NEXT:    [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 4
+// LENUMLOADS-NEXT:    [[BF_ASHR:%.*]] = ashr exact i16 [[BF_SHL]], 4
+// LENUMLOADS-NEXT:    [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32
+// LENUMLOADS-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1
+// LENUMLOADS-NEXT:    [[TMP1:%.*]] = load volatile i8, i8* [[B]], align 2, !tbaa !3
+// LENUMLOADS-NEXT:    [[CONV:%.*]] = sext i8 [[TMP1]] to i32
+// LENUMLOADS-NEXT:    [[ADD:%.*]] = add nsw i32 [[BF_CAST]], [[CONV]]
+// LENUMLOADS-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2
+// LENUMLOADS-NEXT:    [[BF_LOAD1:%.*]] = load volatile i8, i8* [[C]], align 1
+// LENUMLOADS-NEXT:    [[BF_SHL2:%.*]] = shl i8 [[BF_LOAD1]], 3
+// LENUMLOADS-NEXT:    [[BF_ASHR3:%.*]] = ashr exact i8 [[BF_SHL2]], 3
+// LENUMLOADS-NEXT:    [[BF_CAST4:%.*]] = sext i8 [[BF_ASHR3]] to i32
+// LENUMLOADS-NEXT:    [[ADD5:%.*]] = add nsw i32 [[ADD]], [[BF_CAST4]]
+// LENUMLOADS-NEXT:    ret i32 [[ADD5]]
+//
+// BENUMLOADS-LABEL: @st6_check_load(
+// BENUMLOADS-NEXT:  entry:
+// BENUMLOADS-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0
+// BENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
+// BENUMLOADS-NEXT:    [[BF_ASHR:%.*]] = ashr i16 [[BF_LOAD]], 4
+// BENUMLOADS-NEXT:    [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32
+// BENUMLOADS-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1
+// BENUMLOADS-NEXT:    [[TMP1:%.*]] = load volatile i8, i8* [[B]], align 2, !tbaa !3
+// BENUMLOADS-NEXT:    [[CONV:%.*]] = sext i8 [[TMP1]] to i32
+// BENUMLOADS-NEXT:    [[ADD:%.*]] = add nsw i32 [[BF_CAST]], [[CONV]]
+// BENUMLOADS-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2
+// BENUMLOADS-NEXT:    [[BF_LOAD1:%.*]] = load volatile i8, i8* [[C]], align 1
+// BENUMLOADS-NEXT:    [[BF_ASHR2:%.*]] = ashr i8 [[BF_LOAD1]], 3
+// BENUMLOADS-NEXT:    [[BF_CAST3:%.*]] = sext i8 [[BF_ASHR2]] to i32
+// BENUMLOADS-NEXT:    [[ADD4:%.*]] = add nsw i32 [[ADD]], [[BF_CAST3]]
+// BENUMLOADS-NEXT:    ret i32 [[ADD4]]
+//
+// LEWIDTH-LABEL: @st6_check_load(
+// LEWIDTH-NEXT:  entry:
+// LEWIDTH-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0
+// LEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
+// LEWIDTH-NEXT:    [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 4
+// LEWIDTH-NEXT:    [[BF_ASHR:%.*]] = ashr exact i16 [[BF_SHL]], 4
+// LEWIDTH-NEXT:    [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32
+// LEWIDTH-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1
+// LEWIDTH-NEXT:    [[TMP1:%.*]] = load volatile i8, i8* [[B]], align 2, !tbaa !3
+// LEWIDTH-NEXT:    [[CONV:%.*]] = sext i8 [[TMP1]] to i32
+// LEWIDTH-NEXT:    [[ADD:%.*]] = add nsw i32 [[BF_CAST]], [[CONV]]
+// LEWIDTH-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2
+// LEWIDTH-NEXT:    [[BF_LOAD1:%.*]] = load volatile i8, i8* [[C]], align 1
+// LEWIDTH-NEXT:    [[BF_SHL2:%.*]] = shl i8 [[BF_LOAD1]], 3
+// LEWIDTH-NEXT:    [[BF_ASHR3:%.*]] = ashr exact i8 [[BF_SHL2]], 3
+// LEWIDTH-NEXT:    [[BF_CAST4:%.*]] = sext i8 [[BF_ASHR3]] to i32
+// LEWIDTH-NEXT:    [[ADD5:%.*]] = add nsw i32 [[ADD]], [[BF_CAST4]]
+// LEWIDTH-NEXT:    ret i32 [[ADD5]]
+//
+// BEWIDTH-LABEL: @st6_check_load(
+// BEWIDTH-NEXT:  entry:
+// BEWIDTH-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0
+// BEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
+// BEWIDTH-NEXT:    [[BF_ASHR:%.*]] = ashr i16 [[BF_LOAD]], 4
+// BEWIDTH-NEXT:    [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32
+// BEWIDTH-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1
+// BEWIDTH-NEXT:    [[TMP1:%.*]] = load volatile i8, i8* [[B]], align 2, !tbaa !3
+// BEWIDTH-NEXT:    [[CONV:%.*]] = sext i8 [[TMP1]] to i32
+// BEWIDTH-NEXT:    [[ADD:%.*]] = add nsw i32 [[BF_CAST]], [[CONV]]
+// BEWIDTH-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2
+// BEWIDTH-NEXT:    [[BF_LOAD1:%.*]] = load volatile i8, i8* [[C]], align 1
+// BEWIDTH-NEXT:    [[BF_ASHR2:%.*]] = ashr i8 [[BF_LOAD1]], 3
+// BEWIDTH-NEXT:    [[BF_CAST3:%.*]] = sext i8 [[BF_ASHR2]] to i32
+// BEWIDTH-NEXT:    [[ADD4:%.*]] = add nsw i32 [[ADD]], [[BF_CAST3]]
+// BEWIDTH-NEXT:    ret i32 [[ADD4]]
+//
+// LEWIDTHNUM-LABEL: @st6_check_load(
+// LEWIDTHNUM-NEXT:  entry:
+// LEWIDTHNUM-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0
+// LEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
+// LEWIDTHNUM-NEXT:    [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 4
+// LEWIDTHNUM-NEXT:    [[BF_ASHR:%.*]] = ashr exact i16 [[BF_SHL]], 4
+// LEWIDTHNUM-NEXT:    [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32
+// LEWIDTHNUM-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1
+// LEWIDTHNUM-NEXT:    [[TMP1:%.*]] = load volatile i8, i8* [[B]], align 2, !tbaa !3
+// LEWIDTHNUM-NEXT:    [[CONV:%.*]] = sext i8 [[TMP1]] to i32
+// LEWIDTHNUM-NEXT:    [[ADD:%.*]] = add nsw i32 [[BF_CAST]], [[CONV]]
+// LEWIDTHNUM-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2
+// LEWIDTHNUM-NEXT:    [[BF_LOAD1:%.*]] = load volatile i8, i8* [[C]], align 1
+// LEWIDTHNUM-NEXT:    [[BF_SHL2:%.*]] = shl i8 [[BF_LOAD1]], 3
+// LEWIDTHNUM-NEXT:    [[BF_ASHR3:%.*]] = ashr exact i8 [[BF_SHL2]], 3
+// LEWIDTHNUM-NEXT:    [[BF_CAST4:%.*]] = sext i8 [[BF_ASHR3]] to i32
+// LEWIDTHNUM-NEXT:    [[ADD5:%.*]] = add nsw i32 [[ADD]], [[BF_CAST4]]
+// LEWIDTHNUM-NEXT:    ret i32 [[ADD5]]
+//
+// BEWIDTHNUM-LABEL: @st6_check_load(
+// BEWIDTHNUM-NEXT:  entry:
+// BEWIDTHNUM-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0
+// BEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
+// BEWIDTHNUM-NEXT:    [[BF_ASHR:%.*]] = ashr i16 [[BF_LOAD]], 4
+// BEWIDTHNUM-NEXT:    [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32
+// BEWIDTHNUM-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1
+// BEWIDTHNUM-NEXT:    [[TMP1:%.*]] = load volatile i8, i8* [[B]], align 2, !tbaa !3
+// BEWIDTHNUM-NEXT:    [[CONV:%.*]] = sext i8 [[TMP1]] to i32
+// BEWIDTHNUM-NEXT:    [[ADD:%.*]] = add nsw i32 [[BF_CAST]], [[CONV]]
+// BEWIDTHNUM-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2
+// BEWIDTHNUM-NEXT:    [[BF_LOAD1:%.*]] = load volatile i8, i8* [[C]], align 1
+// BEWIDTHNUM-NEXT:    [[BF_ASHR2:%.*]] = ashr i8 [[BF_LOAD1]], 3
+// BEWIDTHNUM-NEXT:    [[BF_CAST3:%.*]] = sext i8 [[BF_ASHR2]] to i32
+// BEWIDTHNUM-NEXT:    [[ADD4:%.*]] = add nsw i32 [[ADD]], [[BF_CAST3]]
+// BEWIDTHNUM-NEXT:    ret i32 [[ADD4]]
+//
 int st6_check_load(volatile struct st6 *m) {
   int x = m->a;
   x += m->b;
@@ -374,7 +1185,7 @@ int st6_check_load(volatile struct st6 *m) {
 // LE-NEXT:    [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1
 // LE-NEXT:    store i16 [[BF_SET]], i16* [[TMP0]], align 4
 // LE-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1
-// LE-NEXT:    store i8 2, i8* [[B]], align 2
+// LE-NEXT:    store i8 2, i8* [[B]], align 2, !tbaa !3
 // LE-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2
 // LE-NEXT:    [[BF_LOAD1:%.*]] = load i8, i8* [[C]], align 1
 // LE-NEXT:    [[BF_CLEAR2:%.*]] = and i8 [[BF_LOAD1]], -32
@@ -390,7 +1201,7 @@ int st6_check_load(volatile struct st6 *m) {
 // BE-NEXT:    [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 16
 // BE-NEXT:    store i16 [[BF_SET]], i16* [[TMP0]], align 4
 // BE-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1
-// BE-NEXT:    store i8 2, i8* [[B]], align 2
+// BE-NEXT:    store i8 2, i8* [[B]], align 2, !tbaa !3
 // BE-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2
 // BE-NEXT:    [[BF_LOAD1:%.*]] = load i8, i8* [[C]], align 1
 // BE-NEXT:    [[BF_CLEAR2:%.*]] = and i8 [[BF_LOAD1]], 7
@@ -398,6 +1209,102 @@ int st6_check_load(volatile struct st6 *m) {
 // BE-NEXT:    store i8 [[BF_SET3]], i8* [[C]], align 1
 // BE-NEXT:    ret void
 //
+// LENUMLOADS-LABEL: @st6_check_store(
+// LENUMLOADS-NEXT:  entry:
+// LENUMLOADS-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0
+// LENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4
+// LENUMLOADS-NEXT:    [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -4096
+// LENUMLOADS-NEXT:    [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1
+// LENUMLOADS-NEXT:    store i16 [[BF_SET]], i16* [[TMP0]], align 4
+// LENUMLOADS-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1
+// LENUMLOADS-NEXT:    store i8 2, i8* [[B]], align 2, !tbaa !3
+// LENUMLOADS-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2
+// LENUMLOADS-NEXT:    [[BF_LOAD1:%.*]] = load i8, i8* [[C]], align 1
+// LENUMLOADS-NEXT:    [[BF_CLEAR2:%.*]] = and i8 [[BF_LOAD1]], -32
+// LENUMLOADS-NEXT:    [[BF_SET3:%.*]] = or i8 [[BF_CLEAR2]], 3
+// LENUMLOADS-NEXT:    store i8 [[BF_SET3]], i8* [[C]], align 1
+// LENUMLOADS-NEXT:    ret void
+//
+// BENUMLOADS-LABEL: @st6_check_store(
+// BENUMLOADS-NEXT:  entry:
+// BENUMLOADS-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0
+// BENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4
+// BENUMLOADS-NEXT:    [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 15
+// BENUMLOADS-NEXT:    [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 16
+// BENUMLOADS-NEXT:    store i16 [[BF_SET]], i16* [[TMP0]], align 4
+// BENUMLOADS-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1
+// BENUMLOADS-NEXT:    store i8 2, i8* [[B]], align 2, !tbaa !3
+// BENUMLOADS-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2
+// BENUMLOADS-NEXT:    [[BF_LOAD1:%.*]] = load i8, i8* [[C]], align 1
+// BENUMLOADS-NEXT:    [[BF_CLEAR2:%.*]] = and i8 [[BF_LOAD1]], 7
+// BENUMLOADS-NEXT:    [[BF_SET3:%.*]] = or i8 [[BF_CLEAR2]], 24
+// BENUMLOADS-NEXT:    store i8 [[BF_SET3]], i8* [[C]], align 1
+// BENUMLOADS-NEXT:    ret void
+//
+// LEWIDTH-LABEL: @st6_check_store(
+// LEWIDTH-NEXT:  entry:
+// LEWIDTH-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0
+// LEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4
+// LEWIDTH-NEXT:    [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -4096
+// LEWIDTH-NEXT:    [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1
+// LEWIDTH-NEXT:    store i16 [[BF_SET]], i16* [[TMP0]], align 4
+// LEWIDTH-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1
+// LEWIDTH-NEXT:    store i8 2, i8* [[B]], align 2, !tbaa !3
+// LEWIDTH-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2
+// LEWIDTH-NEXT:    [[BF_LOAD1:%.*]] = load i8, i8* [[C]], align 1
+// LEWIDTH-NEXT:    [[BF_CLEAR2:%.*]] = and i8 [[BF_LOAD1]], -32
+// LEWIDTH-NEXT:    [[BF_SET3:%.*]] = or i8 [[BF_CLEAR2]], 3
+// LEWIDTH-NEXT:    store i8 [[BF_SET3]], i8* [[C]], align 1
+// LEWIDTH-NEXT:    ret void
+//
+// BEWIDTH-LABEL: @st6_check_store(
+// BEWIDTH-NEXT:  entry:
+// BEWIDTH-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0
+// BEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4
+// BEWIDTH-NEXT:    [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 15
+// BEWIDTH-NEXT:    [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 16
+// BEWIDTH-NEXT:    store i16 [[BF_SET]], i16* [[TMP0]], align 4
+// BEWIDTH-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1
+// BEWIDTH-NEXT:    store i8 2, i8* [[B]], align 2, !tbaa !3
+// BEWIDTH-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2
+// BEWIDTH-NEXT:    [[BF_LOAD1:%.*]] = load i8, i8* [[C]], align 1
+// BEWIDTH-NEXT:    [[BF_CLEAR2:%.*]] = and i8 [[BF_LOAD1]], 7
+// BEWIDTH-NEXT:    [[BF_SET3:%.*]] = or i8 [[BF_CLEAR2]], 24
+// BEWIDTH-NEXT:    store i8 [[BF_SET3]], i8* [[C]], align 1
+// BEWIDTH-NEXT:    ret void
+//
+// LEWIDTHNUM-LABEL: @st6_check_store(
+// LEWIDTHNUM-NEXT:  entry:
+// LEWIDTHNUM-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0
+// LEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4
+// LEWIDTHNUM-NEXT:    [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -4096
+// LEWIDTHNUM-NEXT:    [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1
+// LEWIDTHNUM-NEXT:    store i16 [[BF_SET]], i16* [[TMP0]], align 4
+// LEWIDTHNUM-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1
+// LEWIDTHNUM-NEXT:    store i8 2, i8* [[B]], align 2, !tbaa !3
+// LEWIDTHNUM-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2
+// LEWIDTHNUM-NEXT:    [[BF_LOAD1:%.*]] = load i8, i8* [[C]], align 1
+// LEWIDTHNUM-NEXT:    [[BF_CLEAR2:%.*]] = and i8 [[BF_LOAD1]], -32
+// LEWIDTHNUM-NEXT:    [[BF_SET3:%.*]] = or i8 [[BF_CLEAR2]], 3
+// LEWIDTHNUM-NEXT:    store i8 [[BF_SET3]], i8* [[C]], align 1
+// LEWIDTHNUM-NEXT:    ret void
+//
+// BEWIDTHNUM-LABEL: @st6_check_store(
+// BEWIDTHNUM-NEXT:  entry:
+// BEWIDTHNUM-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0
+// BEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4
+// BEWIDTHNUM-NEXT:    [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 15
+// BEWIDTHNUM-NEXT:    [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 16
+// BEWIDTHNUM-NEXT:    store i16 [[BF_SET]], i16* [[TMP0]], align 4
+// BEWIDTHNUM-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1
+// BEWIDTHNUM-NEXT:    store i8 2, i8* [[B]], align 2, !tbaa !3
+// BEWIDTHNUM-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2
+// BEWIDTHNUM-NEXT:    [[BF_LOAD1:%.*]] = load i8, i8* [[C]], align 1
+// BEWIDTHNUM-NEXT:    [[BF_CLEAR2:%.*]] = and i8 [[BF_LOAD1]], 7
+// BEWIDTHNUM-NEXT:    [[BF_SET3:%.*]] = or i8 [[BF_CLEAR2]], 24
+// BEWIDTHNUM-NEXT:    store i8 [[BF_SET3]], i8* [[C]], align 1
+// BEWIDTHNUM-NEXT:    ret void
+//
 void st6_check_store(struct st6 *m) {
   m->a = 1;
   m->b = 2;
@@ -418,10 +1325,10 @@ struct st7b {
 // LE-LABEL: @st7_check_load(
 // LE-NEXT:  entry:
 // LE-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0
-// LE-NEXT:    [[TMP0:%.*]] = load i8, i8* [[X]], align 4
+// LE-NEXT:    [[TMP0:%.*]] = load i8, i8* [[X]], align 4, !tbaa !8
 // LE-NEXT:    [[CONV:%.*]] = sext i8 [[TMP0]] to i32
 // LE-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0
-// LE-NEXT:    [[TMP1:%.*]] = load volatile i8, i8* [[A]], align 4
+// LE-NEXT:    [[TMP1:%.*]] = load volatile i8, i8* [[A]], align 4, !tbaa !11
 // LE-NEXT:    [[CONV1:%.*]] = sext i8 [[TMP1]] to i32
 // LE-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV1]], [[CONV]]
 // LE-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1
@@ -435,10 +1342,10 @@ struct st7b {
 // BE-LABEL: @st7_check_load(
 // BE-NEXT:  entry:
 // BE-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0
-// BE-NEXT:    [[TMP0:%.*]] = load i8, i8* [[X]], align 4
+// BE-NEXT:    [[TMP0:%.*]] = load i8, i8* [[X]], align 4, !tbaa !8
 // BE-NEXT:    [[CONV:%.*]] = sext i8 [[TMP0]] to i32
 // BE-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0
-// BE-NEXT:    [[TMP1:%.*]] = load volatile i8, i8* [[A]], align 4
+// BE-NEXT:    [[TMP1:%.*]] = load volatile i8, i8* [[A]], align 4, !tbaa !11
 // BE-NEXT:    [[CONV1:%.*]] = sext i8 [[TMP1]] to i32
 // BE-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV1]], [[CONV]]
 // BE-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1
@@ -448,6 +1355,105 @@ struct st7b {
 // BE-NEXT:    [[ADD3:%.*]] = add nsw i32 [[ADD]], [[BF_CAST]]
 // BE-NEXT:    ret i32 [[ADD3]]
 //
+// LENUMLOADS-LABEL: @st7_check_load(
+// LENUMLOADS-NEXT:  entry:
+// LENUMLOADS-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0
+// LENUMLOADS-NEXT:    [[TMP0:%.*]] = load i8, i8* [[X]], align 4, !tbaa !8
+// LENUMLOADS-NEXT:    [[CONV:%.*]] = sext i8 [[TMP0]] to i32
+// LENUMLOADS-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0
+// LENUMLOADS-NEXT:    [[TMP1:%.*]] = load volatile i8, i8* [[A]], align 4, !tbaa !11
+// LENUMLOADS-NEXT:    [[CONV1:%.*]] = sext i8 [[TMP1]] to i32
+// LENUMLOADS-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV1]], [[CONV]]
+// LENUMLOADS-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1
+// LENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1
+// LENUMLOADS-NEXT:    [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 3
+// LENUMLOADS-NEXT:    [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 3
+// LENUMLOADS-NEXT:    [[BF_CAST:%.*]] = sext i8 [[BF_ASHR]] to i32
+// LENUMLOADS-NEXT:    [[ADD3:%.*]] = add nsw i32 [[ADD]], [[BF_CAST]]
+// LENUMLOADS-NEXT:    ret i32 [[ADD3]]
+//
+// BENUMLOADS-LABEL: @st7_check_load(
+// BENUMLOADS-NEXT:  entry:
+// BENUMLOADS-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0
+// BENUMLOADS-NEXT:    [[TMP0:%.*]] = load i8, i8* [[X]], align 4, !tbaa !8
+// BENUMLOADS-NEXT:    [[CONV:%.*]] = sext i8 [[TMP0]] to i32
+// BENUMLOADS-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0
+// BENUMLOADS-NEXT:    [[TMP1:%.*]] = load volatile i8, i8* [[A]], align 4, !tbaa !11
+// BENUMLOADS-NEXT:    [[CONV1:%.*]] = sext i8 [[TMP1]] to i32
+// BENUMLOADS-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV1]], [[CONV]]
+// BENUMLOADS-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1
+// BENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1
+// BENUMLOADS-NEXT:    [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 3
+// BENUMLOADS-NEXT:    [[BF_CAST:%.*]] = sext i8 [[BF_ASHR]] to i32
+// BENUMLOADS-NEXT:    [[ADD3:%.*]] = add nsw i32 [[ADD]], [[BF_CAST]]
+// BENUMLOADS-NEXT:    ret i32 [[ADD3]]
+//
+// LEWIDTH-LABEL: @st7_check_load(
+// LEWIDTH-NEXT:  entry:
+// LEWIDTH-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0
+// LEWIDTH-NEXT:    [[TMP0:%.*]] = load i8, i8* [[X]], align 4, !tbaa !8
+// LEWIDTH-NEXT:    [[CONV:%.*]] = sext i8 [[TMP0]] to i32
+// LEWIDTH-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0
+// LEWIDTH-NEXT:    [[TMP1:%.*]] = load volatile i8, i8* [[A]], align 4, !tbaa !11
+// LEWIDTH-NEXT:    [[CONV1:%.*]] = sext i8 [[TMP1]] to i32
+// LEWIDTH-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV1]], [[CONV]]
+// LEWIDTH-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1
+// LEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1
+// LEWIDTH-NEXT:    [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 3
+// LEWIDTH-NEXT:    [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 3
+// LEWIDTH-NEXT:    [[BF_CAST:%.*]] = sext i8 [[BF_ASHR]] to i32
+// LEWIDTH-NEXT:    [[ADD3:%.*]] = add nsw i32 [[ADD]], [[BF_CAST]]
+// LEWIDTH-NEXT:    ret i32 [[ADD3]]
+//
+// BEWIDTH-LABEL: @st7_check_load(
+// BEWIDTH-NEXT:  entry:
+// BEWIDTH-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0
+// BEWIDTH-NEXT:    [[TMP0:%.*]] = load i8, i8* [[X]], align 4, !tbaa !8
+// BEWIDTH-NEXT:    [[CONV:%.*]] = sext i8 [[TMP0]] to i32
+// BEWIDTH-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0
+// BEWIDTH-NEXT:    [[TMP1:%.*]] = load volatile i8, i8* [[A]], align 4, !tbaa !11
+// BEWIDTH-NEXT:    [[CONV1:%.*]] = sext i8 [[TMP1]] to i32
+// BEWIDTH-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV1]], [[CONV]]
+// BEWIDTH-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1
+// BEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1
+// BEWIDTH-NEXT:    [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 3
+// BEWIDTH-NEXT:    [[BF_CAST:%.*]] = sext i8 [[BF_ASHR]] to i32
+// BEWIDTH-NEXT:    [[ADD3:%.*]] = add nsw i32 [[ADD]], [[BF_CAST]]
+// BEWIDTH-NEXT:    ret i32 [[ADD3]]
+//
+// LEWIDTHNUM-LABEL: @st7_check_load(
+// LEWIDTHNUM-NEXT:  entry:
+// LEWIDTHNUM-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0
+// LEWIDTHNUM-NEXT:    [[TMP0:%.*]] = load i8, i8* [[X]], align 4, !tbaa !8
+// LEWIDTHNUM-NEXT:    [[CONV:%.*]] = sext i8 [[TMP0]] to i32
+// LEWIDTHNUM-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0
+// LEWIDTHNUM-NEXT:    [[TMP1:%.*]] = load volatile i8, i8* [[A]], align 4, !tbaa !11
+// LEWIDTHNUM-NEXT:    [[CONV1:%.*]] = sext i8 [[TMP1]] to i32
+// LEWIDTHNUM-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV1]], [[CONV]]
+// LEWIDTHNUM-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1
+// LEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1
+// LEWIDTHNUM-NEXT:    [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 3
+// LEWIDTHNUM-NEXT:    [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 3
+// LEWIDTHNUM-NEXT:    [[BF_CAST:%.*]] = sext i8 [[BF_ASHR]] to i32
+// LEWIDTHNUM-NEXT:    [[ADD3:%.*]] = add nsw i32 [[ADD]], [[BF_CAST]]
+// LEWIDTHNUM-NEXT:    ret i32 [[ADD3]]
+//
+// BEWIDTHNUM-LABEL: @st7_check_load(
+// BEWIDTHNUM-NEXT:  entry:
+// BEWIDTHNUM-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0
+// BEWIDTHNUM-NEXT:    [[TMP0:%.*]] = load i8, i8* [[X]], align 4, !tbaa !8
+// BEWIDTHNUM-NEXT:    [[CONV:%.*]] = sext i8 [[TMP0]] to i32
+// BEWIDTHNUM-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0
+// BEWIDTHNUM-NEXT:    [[TMP1:%.*]] = load volatile i8, i8* [[A]], align 4, !tbaa !11
+// BEWIDTHNUM-NEXT:    [[CONV1:%.*]] = sext i8 [[TMP1]] to i32
+// BEWIDTHNUM-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV1]], [[CONV]]
+// BEWIDTHNUM-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1
+// BEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1
+// BEWIDTHNUM-NEXT:    [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 3
+// BEWIDTHNUM-NEXT:    [[BF_CAST:%.*]] = sext i8 [[BF_ASHR]] to i32
+// BEWIDTHNUM-NEXT:    [[ADD3:%.*]] = add nsw i32 [[ADD]], [[BF_CAST]]
+// BEWIDTHNUM-NEXT:    ret i32 [[ADD3]]
+//
 int st7_check_load(struct st7b *m) {
   int r = m->x;
   r += m->y.a;
@@ -458,9 +1464,9 @@ int st7_check_load(struct st7b *m) {
 // LE-LABEL: @st7_check_store(
 // LE-NEXT:  entry:
 // LE-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0
-// LE-NEXT:    store i8 1, i8* [[X]], align 4
+// LE-NEXT:    store i8 1, i8* [[X]], align 4, !tbaa !8
 // LE-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0
-// LE-NEXT:    store volatile i8 2, i8* [[A]], align 4
+// LE-NEXT:    store volatile i8 2, i8* [[A]], align 4, !tbaa !11
 // LE-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1
 // LE-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1
 // LE-NEXT:    [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -32
@@ -471,9 +1477,9 @@ int st7_check_load(struct st7b *m) {
 // BE-LABEL: @st7_check_store(
 // BE-NEXT:  entry:
 // BE-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0
-// BE-NEXT:    store i8 1, i8* [[X]], align 4
+// BE-NEXT:    store i8 1, i8* [[X]], align 4, !tbaa !8
 // BE-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0
-// BE-NEXT:    store volatile i8 2, i8* [[A]], align 4
+// BE-NEXT:    store volatile i8 2, i8* [[A]], align 4, !tbaa !11
 // BE-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1
 // BE-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1
 // BE-NEXT:    [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 7
@@ -481,6 +1487,84 @@ int st7_check_load(struct st7b *m) {
 // BE-NEXT:    store volatile i8 [[BF_SET]], i8* [[B]], align 1
 // BE-NEXT:    ret void
 //
+// LENUMLOADS-LABEL: @st7_check_store(
+// LENUMLOADS-NEXT:  entry:
+// LENUMLOADS-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0
+// LENUMLOADS-NEXT:    store i8 1, i8* [[X]], align 4, !tbaa !8
+// LENUMLOADS-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0
+// LENUMLOADS-NEXT:    store volatile i8 2, i8* [[A]], align 4, !tbaa !11
+// LENUMLOADS-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1
+// LENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1
+// LENUMLOADS-NEXT:    [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -32
+// LENUMLOADS-NEXT:    [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 3
+// LENUMLOADS-NEXT:    store volatile i8 [[BF_SET]], i8* [[B]], align 1
+// LENUMLOADS-NEXT:    ret void
+//
+// BENUMLOADS-LABEL: @st7_check_store(
+// BENUMLOADS-NEXT:  entry:
+// BENUMLOADS-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0
+// BENUMLOADS-NEXT:    store i8 1, i8* [[X]], align 4, !tbaa !8
+// BENUMLOADS-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0
+// BENUMLOADS-NEXT:    store volatile i8 2, i8* [[A]], align 4, !tbaa !11
+// BENUMLOADS-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1
+// BENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1
+// BENUMLOADS-NEXT:    [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 7
+// BENUMLOADS-NEXT:    [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 24
+// BENUMLOADS-NEXT:    store volatile i8 [[BF_SET]], i8* [[B]], align 1
+// BENUMLOADS-NEXT:    ret void
+//
+// LEWIDTH-LABEL: @st7_check_store(
+// LEWIDTH-NEXT:  entry:
+// LEWIDTH-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0
+// LEWIDTH-NEXT:    store i8 1, i8* [[X]], align 4, !tbaa !8
+// LEWIDTH-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0
+// LEWIDTH-NEXT:    store volatile i8 2, i8* [[A]], align 4, !tbaa !11
+// LEWIDTH-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1
+// LEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1
+// LEWIDTH-NEXT:    [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -32
+// LEWIDTH-NEXT:    [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 3
+// LEWIDTH-NEXT:    store volatile i8 [[BF_SET]], i8* [[B]], align 1
+// LEWIDTH-NEXT:    ret void
+//
+// BEWIDTH-LABEL: @st7_check_store(
+// BEWIDTH-NEXT:  entry:
+// BEWIDTH-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0
+// BEWIDTH-NEXT:    store i8 1, i8* [[X]], align 4, !tbaa !8
+// BEWIDTH-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0
+// BEWIDTH-NEXT:    store volatile i8 2, i8* [[A]], align 4, !tbaa !11
+// BEWIDTH-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1
+// BEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1
+// BEWIDTH-NEXT:    [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 7
+// BEWIDTH-NEXT:    [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 24
+// BEWIDTH-NEXT:    store volatile i8 [[BF_SET]], i8* [[B]], align 1
+// BEWIDTH-NEXT:    ret void
+//
+// LEWIDTHNUM-LABEL: @st7_check_store(
+// LEWIDTHNUM-NEXT:  entry:
+// LEWIDTHNUM-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0
+// LEWIDTHNUM-NEXT:    store i8 1, i8* [[X]], align 4, !tbaa !8
+// LEWIDTHNUM-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0
+// LEWIDTHNUM-NEXT:    store volatile i8 2, i8* [[A]], align 4, !tbaa !11
+// LEWIDTHNUM-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1
+// LEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1
+// LEWIDTHNUM-NEXT:    [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -32
+// LEWIDTHNUM-NEXT:    [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 3
+// LEWIDTHNUM-NEXT:    store volatile i8 [[BF_SET]], i8* [[B]], align 1
+// LEWIDTHNUM-NEXT:    ret void
+//
+// BEWIDTHNUM-LABEL: @st7_check_store(
+// BEWIDTHNUM-NEXT:  entry:
+// BEWIDTHNUM-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0
+// BEWIDTHNUM-NEXT:    store i8 1, i8* [[X]], align 4, !tbaa !8
+// BEWIDTHNUM-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0
+// BEWIDTHNUM-NEXT:    store volatile i8 2, i8* [[A]], align 4, !tbaa !11
+// BEWIDTHNUM-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1
+// BEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1
+// BEWIDTHNUM-NEXT:    [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 7
+// BEWIDTHNUM-NEXT:    [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 24
+// BEWIDTHNUM-NEXT:    store volatile i8 [[BF_SET]], i8* [[B]], align 1
+// BEWIDTHNUM-NEXT:    ret void
+//
 void st7_check_store(struct st7b *m) {
   m->x = 1;
   m->y.a = 2;
@@ -504,6 +1588,42 @@ struct st8 {
 // BE-NEXT:    store i16 -1, i16* [[TMP0]], align 4
 // BE-NEXT:    ret i32 65535
 //
+// LENUMLOADS-LABEL: @st8_check_assignment(
+// LENUMLOADS-NEXT:  entry:
+// LENUMLOADS-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST8:%.*]], %struct.st8* [[M:%.*]], i32 0, i32 0
+// LENUMLOADS-NEXT:    store i16 -1, i16* [[TMP0]], align 4
+// LENUMLOADS-NEXT:    ret i32 65535
+//
+// BENUMLOADS-LABEL: @st8_check_assignment(
+// BENUMLOADS-NEXT:  entry:
+// BENUMLOADS-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST8:%.*]], %struct.st8* [[M:%.*]], i32 0, i32 0
+// BENUMLOADS-NEXT:    store i16 -1, i16* [[TMP0]], align 4
+// BENUMLOADS-NEXT:    ret i32 65535
+//
+// LEWIDTH-LABEL: @st8_check_assignment(
+// LEWIDTH-NEXT:  entry:
+// LEWIDTH-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST8:%.*]], %struct.st8* [[M:%.*]], i32 0, i32 0
+// LEWIDTH-NEXT:    store i16 -1, i16* [[TMP0]], align 4
+// LEWIDTH-NEXT:    ret i32 65535
+//
+// BEWIDTH-LABEL: @st8_check_assignment(
+// BEWIDTH-NEXT:  entry:
+// BEWIDTH-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST8:%.*]], %struct.st8* [[M:%.*]], i32 0, i32 0
+// BEWIDTH-NEXT:    store i16 -1, i16* [[TMP0]], align 4
+// BEWIDTH-NEXT:    ret i32 65535
+//
+// LEWIDTHNUM-LABEL: @st8_check_assignment(
+// LEWIDTHNUM-NEXT:  entry:
+// LEWIDTHNUM-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST8:%.*]], %struct.st8* [[M:%.*]], i32 0, i32 0
+// LEWIDTHNUM-NEXT:    store i16 -1, i16* [[TMP0]], align 4
+// LEWIDTHNUM-NEXT:    ret i32 65535
+//
+// BEWIDTHNUM-LABEL: @st8_check_assignment(
+// BEWIDTHNUM-NEXT:  entry:
+// BEWIDTHNUM-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST8:%.*]], %struct.st8* [[M:%.*]], i32 0, i32 0
+// BEWIDTHNUM-NEXT:    store i16 -1, i16* [[TMP0]], align 4
+// BEWIDTHNUM-NEXT:    ret i32 65535
+//
 int st8_check_assignment(struct st8 *m) {
   return m->f = 0xffff;
 }
@@ -526,6 +1646,50 @@ struct st9{
 // BE-NEXT:    [[BF_CAST:%.*]] = sext i8 [[BF_LOAD]] to i32
 // BE-NEXT:    ret i32 [[BF_CAST]]
 //
+// LENUMLOADS-LABEL: @read_st9(
+// LENUMLOADS-NEXT:  entry:
+// LENUMLOADS-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0
+// LENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4
+// LENUMLOADS-NEXT:    [[BF_CAST:%.*]] = sext i8 [[BF_LOAD]] to i32
+// LENUMLOADS-NEXT:    ret i32 [[BF_CAST]]
+//
+// BENUMLOADS-LABEL: @read_st9(
+// BENUMLOADS-NEXT:  entry:
+// BENUMLOADS-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0
+// BENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4
+// BENUMLOADS-NEXT:    [[BF_CAST:%.*]] = sext i8 [[BF_LOAD]] to i32
+// BENUMLOADS-NEXT:    ret i32 [[BF_CAST]]
+//
+// LEWIDTH-LABEL: @read_st9(
+// LEWIDTH-NEXT:  entry:
+// LEWIDTH-NEXT:    [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32*
+// LEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// LEWIDTH-NEXT:    [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 24
+// LEWIDTH-NEXT:    [[BF_ASHR:%.*]] = ashr exact i32 [[BF_SHL]], 24
+// LEWIDTH-NEXT:    ret i32 [[BF_ASHR]]
+//
+// BEWIDTH-LABEL: @read_st9(
+// BEWIDTH-NEXT:  entry:
+// BEWIDTH-NEXT:    [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32*
+// BEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// BEWIDTH-NEXT:    [[BF_ASHR:%.*]] = ashr i32 [[BF_LOAD]], 24
+// BEWIDTH-NEXT:    ret i32 [[BF_ASHR]]
+//
+// LEWIDTHNUM-LABEL: @read_st9(
+// LEWIDTHNUM-NEXT:  entry:
+// LEWIDTHNUM-NEXT:    [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32*
+// LEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// LEWIDTHNUM-NEXT:    [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 24
+// LEWIDTHNUM-NEXT:    [[BF_ASHR:%.*]] = ashr exact i32 [[BF_SHL]], 24
+// LEWIDTHNUM-NEXT:    ret i32 [[BF_ASHR]]
+//
+// BEWIDTHNUM-LABEL: @read_st9(
+// BEWIDTHNUM-NEXT:  entry:
+// BEWIDTHNUM-NEXT:    [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32*
+// BEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// BEWIDTHNUM-NEXT:    [[BF_ASHR:%.*]] = ashr i32 [[BF_LOAD]], 24
+// BEWIDTHNUM-NEXT:    ret i32 [[BF_ASHR]]
+//
 int read_st9(volatile struct st9 *m) {
   return m->f;
 }
@@ -533,17 +1697,65 @@ int read_st9(volatile struct st9 *m) {
 // LE-LABEL: @store_st9(
 // LE-NEXT:  entry:
 // LE-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0
-// LENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4
 // LE-NEXT:    store volatile i8 1, i8* [[TMP0]], align 4
 // LE-NEXT:    ret void
 //
 // BE-LABEL: @store_st9(
 // BE-NEXT:  entry:
 // BE-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0
-// BENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4
 // BE-NEXT:    store volatile i8 1, i8* [[TMP0]], align 4
 // BE-NEXT:    ret void
 //
+// LENUMLOADS-LABEL: @store_st9(
+// LENUMLOADS-NEXT:  entry:
+// LENUMLOADS-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0
+// LENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4
+// LENUMLOADS-NEXT:    store volatile i8 1, i8* [[TMP0]], align 4
+// LENUMLOADS-NEXT:    ret void
+//
+// BENUMLOADS-LABEL: @store_st9(
+// BENUMLOADS-NEXT:  entry:
+// BENUMLOADS-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0
+// BENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4
+// BENUMLOADS-NEXT:    store volatile i8 1, i8* [[TMP0]], align 4
+// BENUMLOADS-NEXT:    ret void
+//
+// LEWIDTH-LABEL: @store_st9(
+// LEWIDTH-NEXT:  entry:
+// LEWIDTH-NEXT:    [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32*
+// LEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// LEWIDTH-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -256
+// LEWIDTH-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 1
+// LEWIDTH-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
+// LEWIDTH-NEXT:    ret void
+//
+// BEWIDTH-LABEL: @store_st9(
+// BEWIDTH-NEXT:  entry:
+// BEWIDTH-NEXT:    [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32*
+// BEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// BEWIDTH-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], 16777215
+// BEWIDTH-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 16777216
+// BEWIDTH-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
+// BEWIDTH-NEXT:    ret void
+//
+// LEWIDTHNUM-LABEL: @store_st9(
+// LEWIDTHNUM-NEXT:  entry:
+// LEWIDTHNUM-NEXT:    [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32*
+// LEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// LEWIDTHNUM-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -256
+// LEWIDTHNUM-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 1
+// LEWIDTHNUM-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
+// LEWIDTHNUM-NEXT:    ret void
+//
+// BEWIDTHNUM-LABEL: @store_st9(
+// BEWIDTHNUM-NEXT:  entry:
+// BEWIDTHNUM-NEXT:    [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32*
+// BEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// BEWIDTHNUM-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], 16777215
+// BEWIDTHNUM-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 16777216
+// BEWIDTHNUM-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
+// BEWIDTHNUM-NEXT:    ret void
+//
 void store_st9(volatile struct st9 *m) {
   m->f = 1;
 }
@@ -553,7 +1765,6 @@ void store_st9(volatile struct st9 *m) {
 // LE-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0
 // LE-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4
 // LE-NEXT:    [[INC:%.*]] = add i8 [[BF_LOAD]], 1
-// LENUMLOADS-NEXT:    [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 4
 // LE-NEXT:    store volatile i8 [[INC]], i8* [[TMP0]], align 4
 // LE-NEXT:    ret void
 //
@@ -562,10 +1773,75 @@ void store_st9(volatile struct st9 *m) {
 // BE-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0
 // BE-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4
 // BE-NEXT:    [[INC:%.*]] = add i8 [[BF_LOAD]], 1
-// BENUMLOADS-NEXT:    [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 4
 // BE-NEXT:    store volatile i8 [[INC]], i8* [[TMP0]], align 4
 // BE-NEXT:    ret void
 //
+// LENUMLOADS-LABEL: @increment_st9(
+// LENUMLOADS-NEXT:  entry:
+// LENUMLOADS-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0
+// LENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4
+// LENUMLOADS-NEXT:    [[INC:%.*]] = add i8 [[BF_LOAD]], 1
+// LENUMLOADS-NEXT:    [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 4
+// LENUMLOADS-NEXT:    store volatile i8 [[INC]], i8* [[TMP0]], align 4
+// LENUMLOADS-NEXT:    ret void
+//
+// BENUMLOADS-LABEL: @increment_st9(
+// BENUMLOADS-NEXT:  entry:
+// BENUMLOADS-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0
+// BENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4
+// BENUMLOADS-NEXT:    [[INC:%.*]] = add i8 [[BF_LOAD]], 1
+// BENUMLOADS-NEXT:    [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 4
+// BENUMLOADS-NEXT:    store volatile i8 [[INC]], i8* [[TMP0]], align 4
+// BENUMLOADS-NEXT:    ret void
+//
+// LEWIDTH-LABEL: @increment_st9(
+// LEWIDTH-NEXT:  entry:
+// LEWIDTH-NEXT:    [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32*
+// LEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// LEWIDTH-NEXT:    [[INC:%.*]] = add i32 [[BF_LOAD]], 1
+// LEWIDTH-NEXT:    [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// LEWIDTH-NEXT:    [[BF_VALUE:%.*]] = and i32 [[INC]], 255
+// LEWIDTH-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -256
+// LEWIDTH-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]]
+// LEWIDTH-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
+// LEWIDTH-NEXT:    ret void
+//
+// BEWIDTH-LABEL: @increment_st9(
+// BEWIDTH-NEXT:  entry:
+// BEWIDTH-NEXT:    [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32*
+// BEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// BEWIDTH-NEXT:    [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// BEWIDTH-NEXT:    [[TMP1:%.*]] = add i32 [[BF_LOAD]], 16777216
+// BEWIDTH-NEXT:    [[BF_SHL:%.*]] = and i32 [[TMP1]], -16777216
+// BEWIDTH-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 16777215
+// BEWIDTH-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]]
+// BEWIDTH-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
+// BEWIDTH-NEXT:    ret void
+//
+// LEWIDTHNUM-LABEL: @increment_st9(
+// LEWIDTHNUM-NEXT:  entry:
+// LEWIDTHNUM-NEXT:    [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32*
+// LEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// LEWIDTHNUM-NEXT:    [[INC:%.*]] = add i32 [[BF_LOAD]], 1
+// LEWIDTHNUM-NEXT:    [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// LEWIDTHNUM-NEXT:    [[BF_VALUE:%.*]] = and i32 [[INC]], 255
+// LEWIDTHNUM-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -256
+// LEWIDTHNUM-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]]
+// LEWIDTHNUM-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
+// LEWIDTHNUM-NEXT:    ret void
+//
+// BEWIDTHNUM-LABEL: @increment_st9(
+// BEWIDTHNUM-NEXT:  entry:
+// BEWIDTHNUM-NEXT:    [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32*
+// BEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// BEWIDTHNUM-NEXT:    [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// BEWIDTHNUM-NEXT:    [[TMP1:%.*]] = add i32 [[BF_LOAD]], 16777216
+// BEWIDTHNUM-NEXT:    [[BF_SHL:%.*]] = and i32 [[TMP1]], -16777216
+// BEWIDTHNUM-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 16777215
+// BEWIDTHNUM-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]]
+// BEWIDTHNUM-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
+// BEWIDTHNUM-NEXT:    ret void
+//
 void increment_st9(volatile struct st9 *m) {
   ++m->f;
 }
@@ -593,6 +1869,56 @@ struct st10{
 // BE-NEXT:    [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32
 // BE-NEXT:    ret i32 [[BF_CAST]]
 //
+// LENUMLOADS-LABEL: @read_st10(
+// LENUMLOADS-NEXT:  entry:
+// LENUMLOADS-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST10:%.*]], %struct.st10* [[M:%.*]], i32 0, i32 0
+// LENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
+// LENUMLOADS-NEXT:    [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 7
+// LENUMLOADS-NEXT:    [[BF_ASHR:%.*]] = ashr i16 [[BF_SHL]], 8
+// LENUMLOADS-NEXT:    [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32
+// LENUMLOADS-NEXT:    ret i32 [[BF_CAST]]
+//
+// BENUMLOADS-LABEL: @read_st10(
+// BENUMLOADS-NEXT:  entry:
+// BENUMLOADS-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST10:%.*]], %struct.st10* [[M:%.*]], i32 0, i32 0
+// BENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
+// BENUMLOADS-NEXT:    [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 1
+// BENUMLOADS-NEXT:    [[BF_ASHR:%.*]] = ashr i16 [[BF_SHL]], 8
+// BENUMLOADS-NEXT:    [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32
+// BENUMLOADS-NEXT:    ret i32 [[BF_CAST]]
+//
+// LEWIDTH-LABEL: @read_st10(
+// LEWIDTH-NEXT:  entry:
+// LEWIDTH-NEXT:    [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32*
+// LEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// LEWIDTH-NEXT:    [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 23
+// LEWIDTH-NEXT:    [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 24
+// LEWIDTH-NEXT:    ret i32 [[BF_ASHR]]
+//
+// BEWIDTH-LABEL: @read_st10(
+// BEWIDTH-NEXT:  entry:
+// BEWIDTH-NEXT:    [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32*
+// BEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// BEWIDTH-NEXT:    [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 1
+// BEWIDTH-NEXT:    [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 24
+// BEWIDTH-NEXT:    ret i32 [[BF_ASHR]]
+//
+// LEWIDTHNUM-LABEL: @read_st10(
+// LEWIDTHNUM-NEXT:  entry:
+// LEWIDTHNUM-NEXT:    [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32*
+// LEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// LEWIDTHNUM-NEXT:    [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 23
+// LEWIDTHNUM-NEXT:    [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 24
+// LEWIDTHNUM-NEXT:    ret i32 [[BF_ASHR]]
+//
+// BEWIDTHNUM-LABEL: @read_st10(
+// BEWIDTHNUM-NEXT:  entry:
+// BEWIDTHNUM-NEXT:    [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32*
+// BEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// BEWIDTHNUM-NEXT:    [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 1
+// BEWIDTHNUM-NEXT:    [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 24
+// BEWIDTHNUM-NEXT:    ret i32 [[BF_ASHR]]
+//
 int read_st10(volatile struct st10 *m) {
   return m->f;
 }
@@ -615,6 +1941,60 @@ int read_st10(volatile struct st10 *m) {
 // BE-NEXT:    store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4
 // BE-NEXT:    ret void
 //
+// LENUMLOADS-LABEL: @store_st10(
+// LENUMLOADS-NEXT:  entry:
+// LENUMLOADS-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST10:%.*]], %struct.st10* [[M:%.*]], i32 0, i32 0
+// LENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
+// LENUMLOADS-NEXT:    [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -511
+// LENUMLOADS-NEXT:    [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 2
+// LENUMLOADS-NEXT:    store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4
+// LENUMLOADS-NEXT:    ret void
+//
+// BENUMLOADS-LABEL: @store_st10(
+// BENUMLOADS-NEXT:  entry:
+// BENUMLOADS-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST10:%.*]], %struct.st10* [[M:%.*]], i32 0, i32 0
+// BENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
+// BENUMLOADS-NEXT:    [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -32641
+// BENUMLOADS-NEXT:    [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 128
+// BENUMLOADS-NEXT:    store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4
+// BENUMLOADS-NEXT:    ret void
+//
+// LEWIDTH-LABEL: @store_st10(
+// LEWIDTH-NEXT:  entry:
+// LEWIDTH-NEXT:    [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32*
+// LEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// LEWIDTH-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -511
+// LEWIDTH-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 2
+// LEWIDTH-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
+// LEWIDTH-NEXT:    ret void
+//
+// BEWIDTH-LABEL: @store_st10(
+// BEWIDTH-NEXT:  entry:
+// BEWIDTH-NEXT:    [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32*
+// BEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// BEWIDTH-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -2139095041
+// BEWIDTH-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 8388608
+// BEWIDTH-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
+// BEWIDTH-NEXT:    ret void
+//
+// LEWIDTHNUM-LABEL: @store_st10(
+// LEWIDTHNUM-NEXT:  entry:
+// LEWIDTHNUM-NEXT:    [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32*
+// LEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// LEWIDTHNUM-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -511
+// LEWIDTHNUM-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 2
+// LEWIDTHNUM-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
+// LEWIDTHNUM-NEXT:    ret void
+//
+// BEWIDTHNUM-LABEL: @store_st10(
+// BEWIDTHNUM-NEXT:  entry:
+// BEWIDTHNUM-NEXT:    [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32*
+// BEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// BEWIDTHNUM-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -2139095041
+// BEWIDTHNUM-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 8388608
+// BEWIDTHNUM-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
+// BEWIDTHNUM-NEXT:    ret void
+//
 void store_st10(volatile struct st10 *m) {
   m->f = 1;
 }
@@ -643,6 +2023,78 @@ void store_st10(volatile struct st10 *m) {
 // BE-NEXT:    store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4
 // BE-NEXT:    ret void
 //
+// LENUMLOADS-LABEL: @increment_st10(
+// LENUMLOADS-NEXT:  entry:
+// LENUMLOADS-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST10:%.*]], %struct.st10* [[M:%.*]], i32 0, i32 0
+// LENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
+// LENUMLOADS-NEXT:    [[BF_LOAD1:%.*]] = load volatile i16, i16* [[TMP0]], align 4
+// LENUMLOADS-NEXT:    [[TMP1:%.*]] = add i16 [[BF_LOAD]], 2
+// LENUMLOADS-NEXT:    [[BF_SHL2:%.*]] = and i16 [[TMP1]], 510
+// LENUMLOADS-NEXT:    [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD1]], -511
+// LENUMLOADS-NEXT:    [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], [[BF_SHL2]]
+// LENUMLOADS-NEXT:    store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4
+// LENUMLOADS-NEXT:    ret void
+//
+// BENUMLOADS-LABEL: @increment_st10(
+// BENUMLOADS-NEXT:  entry:
+// BENUMLOADS-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST10:%.*]], %struct.st10* [[M:%.*]], i32 0, i32 0
+// BENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
+// BENUMLOADS-NEXT:    [[BF_LOAD1:%.*]] = load volatile i16, i16* [[TMP0]], align 4
+// BENUMLOADS-NEXT:    [[TMP1:%.*]] = add i16 [[BF_LOAD]], 128
+// BENUMLOADS-NEXT:    [[BF_SHL2:%.*]] = and i16 [[TMP1]], 32640
+// BENUMLOADS-NEXT:    [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD1]], -32641
+// BENUMLOADS-NEXT:    [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], [[BF_SHL2]]
+// BENUMLOADS-NEXT:    store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4
+// BENUMLOADS-NEXT:    ret void
+//
+// LEWIDTH-LABEL: @increment_st10(
+// LEWIDTH-NEXT:  entry:
+// LEWIDTH-NEXT:    [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32*
+// LEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// LEWIDTH-NEXT:    [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// LEWIDTH-NEXT:    [[INC3:%.*]] = add i32 [[BF_LOAD]], 2
+// LEWIDTH-NEXT:    [[BF_SHL2:%.*]] = and i32 [[INC3]], 510
+// LEWIDTH-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -511
+// LEWIDTH-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL2]]
+// LEWIDTH-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
+// LEWIDTH-NEXT:    ret void
+//
+// BEWIDTH-LABEL: @increment_st10(
+// BEWIDTH-NEXT:  entry:
+// BEWIDTH-NEXT:    [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32*
+// BEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// BEWIDTH-NEXT:    [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// BEWIDTH-NEXT:    [[INC3:%.*]] = add i32 [[BF_LOAD]], 8388608
+// BEWIDTH-NEXT:    [[BF_SHL2:%.*]] = and i32 [[INC3]], 2139095040
+// BEWIDTH-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -2139095041
+// BEWIDTH-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL2]]
+// BEWIDTH-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
+// BEWIDTH-NEXT:    ret void
+//
+// LEWIDTHNUM-LABEL: @increment_st10(
+// LEWIDTHNUM-NEXT:  entry:
+// LEWIDTHNUM-NEXT:    [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32*
+// LEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// LEWIDTHNUM-NEXT:    [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// LEWIDTHNUM-NEXT:    [[INC3:%.*]] = add i32 [[BF_LOAD]], 2
+// LEWIDTHNUM-NEXT:    [[BF_SHL2:%.*]] = and i32 [[INC3]], 510
+// LEWIDTHNUM-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -511
+// LEWIDTHNUM-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL2]]
+// LEWIDTHNUM-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
+// LEWIDTHNUM-NEXT:    ret void
+//
+// BEWIDTHNUM-LABEL: @increment_st10(
+// BEWIDTHNUM-NEXT:  entry:
+// BEWIDTHNUM-NEXT:    [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32*
+// BEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// BEWIDTHNUM-NEXT:    [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// BEWIDTHNUM-NEXT:    [[INC3:%.*]] = add i32 [[BF_LOAD]], 8388608
+// BEWIDTHNUM-NEXT:    [[BF_SHL2:%.*]] = and i32 [[INC3]], 2139095040
+// BEWIDTHNUM-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -2139095041
+// BEWIDTHNUM-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL2]]
+// BEWIDTHNUM-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
+// BEWIDTHNUM-NEXT:    ret void
+//
 void increment_st10(volatile struct st10 *m) {
   ++m->f;
 }
@@ -666,6 +2118,48 @@ struct st11{
 // BE-NEXT:    [[BF_CAST:%.*]] = sext i16 [[BF_LOAD]] to i32
 // BE-NEXT:    ret i32 [[BF_CAST]]
 //
+// LENUMLOADS-LABEL: @read_st11(
+// LENUMLOADS-NEXT:  entry:
+// LENUMLOADS-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1
+// LENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1
+// LENUMLOADS-NEXT:    [[BF_CAST:%.*]] = sext i16 [[BF_LOAD]] to i32
+// LENUMLOADS-NEXT:    ret i32 [[BF_CAST]]
+//
+// BENUMLOADS-LABEL: @read_st11(
+// BENUMLOADS-NEXT:  entry:
+// BENUMLOADS-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1
+// BENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1
+// BENUMLOADS-NEXT:    [[BF_CAST:%.*]] = sext i16 [[BF_LOAD]] to i32
+// BENUMLOADS-NEXT:    ret i32 [[BF_CAST]]
+//
+// LEWIDTH-LABEL: @read_st11(
+// LEWIDTH-NEXT:  entry:
+// LEWIDTH-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1
+// LEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1
+// LEWIDTH-NEXT:    [[BF_CAST:%.*]] = sext i16 [[BF_LOAD]] to i32
+// LEWIDTH-NEXT:    ret i32 [[BF_CAST]]
+//
+// BEWIDTH-LABEL: @read_st11(
+// BEWIDTH-NEXT:  entry:
+// BEWIDTH-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1
+// BEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1
+// BEWIDTH-NEXT:    [[BF_CAST:%.*]] = sext i16 [[BF_LOAD]] to i32
+// BEWIDTH-NEXT:    ret i32 [[BF_CAST]]
+//
+// LEWIDTHNUM-LABEL: @read_st11(
+// LEWIDTHNUM-NEXT:  entry:
+// LEWIDTHNUM-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1
+// LEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1
+// LEWIDTHNUM-NEXT:    [[BF_CAST:%.*]] = sext i16 [[BF_LOAD]] to i32
+// LEWIDTHNUM-NEXT:    ret i32 [[BF_CAST]]
+//
+// BEWIDTHNUM-LABEL: @read_st11(
+// BEWIDTHNUM-NEXT:  entry:
+// BEWIDTHNUM-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1
+// BEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1
+// BEWIDTHNUM-NEXT:    [[BF_CAST:%.*]] = sext i16 [[BF_LOAD]] to i32
+// BEWIDTHNUM-NEXT:    ret i32 [[BF_CAST]]
+//
 int read_st11(volatile struct st11 *m) {
   return m->f;
 }
@@ -673,17 +2167,55 @@ int read_st11(volatile struct st11 *m) {
 // LE-LABEL: @store_st11(
 // LE-NEXT:  entry:
 // LE-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1
-// LENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1
 // LE-NEXT:    store volatile i16 1, i16* [[F]], align 1
 // LE-NEXT:    ret void
 //
 // BE-LABEL: @store_st11(
 // BE-NEXT:  entry:
 // BE-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1
-// BENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1
 // BE-NEXT:    store volatile i16 1, i16* [[F]], align 1
 // BE-NEXT:    ret void
 //
+// LENUMLOADS-LABEL: @store_st11(
+// LENUMLOADS-NEXT:  entry:
+// LENUMLOADS-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1
+// LENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1
+// LENUMLOADS-NEXT:    store volatile i16 1, i16* [[F]], align 1
+// LENUMLOADS-NEXT:    ret void
+//
+// BENUMLOADS-LABEL: @store_st11(
+// BENUMLOADS-NEXT:  entry:
+// BENUMLOADS-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1
+// BENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1
+// BENUMLOADS-NEXT:    store volatile i16 1, i16* [[F]], align 1
+// BENUMLOADS-NEXT:    ret void
+//
+// LEWIDTH-LABEL: @store_st11(
+// LEWIDTH-NEXT:  entry:
+// LEWIDTH-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1
+// LEWIDTH-NEXT:    store volatile i16 1, i16* [[F]], align 1
+// LEWIDTH-NEXT:    ret void
+//
+// BEWIDTH-LABEL: @store_st11(
+// BEWIDTH-NEXT:  entry:
+// BEWIDTH-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1
+// BEWIDTH-NEXT:    store volatile i16 1, i16* [[F]], align 1
+// BEWIDTH-NEXT:    ret void
+//
+// LEWIDTHNUM-LABEL: @store_st11(
+// LEWIDTHNUM-NEXT:  entry:
+// LEWIDTHNUM-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1
+// LEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1
+// LEWIDTHNUM-NEXT:    store volatile i16 1, i16* [[F]], align 1
+// LEWIDTHNUM-NEXT:    ret void
+//
+// BEWIDTHNUM-LABEL: @store_st11(
+// BEWIDTHNUM-NEXT:  entry:
+// BEWIDTHNUM-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1
+// BEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1
+// BEWIDTHNUM-NEXT:    store volatile i16 1, i16* [[F]], align 1
+// BEWIDTHNUM-NEXT:    ret void
+//
 void store_st11(volatile struct st11 *m) {
   m->f = 1;
 }
@@ -693,7 +2225,6 @@ void store_st11(volatile struct st11 *m) {
 // LE-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1
 // LE-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1
 // LE-NEXT:    [[INC:%.*]] = add i16 [[BF_LOAD]], 1
-// LENUMLOADS-NEXT:    [[BF_LOAD1:%.*]] = load volatile i16, i16* [[F]], align 1
 // LE-NEXT:    store volatile i16 [[INC]], i16* [[F]], align 1
 // LE-NEXT:    ret void
 //
@@ -702,10 +2233,61 @@ void store_st11(volatile struct st11 *m) {
 // BE-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1
 // BE-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1
 // BE-NEXT:    [[INC:%.*]] = add i16 [[BF_LOAD]], 1
-// BENUMLOADS-NEXT:    [[BF_LOAD1:%.*]] = load volatile i16, i16* [[F]], align 1
 // BE-NEXT:    store volatile i16 [[INC]], i16* [[F]], align 1
 // BE-NEXT:    ret void
 //
+// LENUMLOADS-LABEL: @increment_st11(
+// LENUMLOADS-NEXT:  entry:
+// LENUMLOADS-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1
+// LENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1
+// LENUMLOADS-NEXT:    [[INC:%.*]] = add i16 [[BF_LOAD]], 1
+// LENUMLOADS-NEXT:    [[BF_LOAD1:%.*]] = load volatile i16, i16* [[F]], align 1
+// LENUMLOADS-NEXT:    store volatile i16 [[INC]], i16* [[F]], align 1
+// LENUMLOADS-NEXT:    ret void
+//
+// BENUMLOADS-LABEL: @increment_st11(
+// BENUMLOADS-NEXT:  entry:
+// BENUMLOADS-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1
+// BENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1
+// BENUMLOADS-NEXT:    [[INC:%.*]] = add i16 [[BF_LOAD]], 1
+// BENUMLOADS-NEXT:    [[BF_LOAD1:%.*]] = load volatile i16, i16* [[F]], align 1
+// BENUMLOADS-NEXT:    store volatile i16 [[INC]], i16* [[F]], align 1
+// BENUMLOADS-NEXT:    ret void
+//
+// LEWIDTH-LABEL: @increment_st11(
+// LEWIDTH-NEXT:  entry:
+// LEWIDTH-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1
+// LEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1
+// LEWIDTH-NEXT:    [[INC:%.*]] = add i16 [[BF_LOAD]], 1
+// LEWIDTH-NEXT:    store volatile i16 [[INC]], i16* [[F]], align 1
+// LEWIDTH-NEXT:    ret void
+//
+// BEWIDTH-LABEL: @increment_st11(
+// BEWIDTH-NEXT:  entry:
+// BEWIDTH-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1
+// BEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1
+// BEWIDTH-NEXT:    [[INC:%.*]] = add i16 [[BF_LOAD]], 1
+// BEWIDTH-NEXT:    store volatile i16 [[INC]], i16* [[F]], align 1
+// BEWIDTH-NEXT:    ret void
+//
+// LEWIDTHNUM-LABEL: @increment_st11(
+// LEWIDTHNUM-NEXT:  entry:
+// LEWIDTHNUM-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1
+// LEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1
+// LEWIDTHNUM-NEXT:    [[INC:%.*]] = add i16 [[BF_LOAD]], 1
+// LEWIDTHNUM-NEXT:    [[BF_LOAD1:%.*]] = load volatile i16, i16* [[F]], align 1
+// LEWIDTHNUM-NEXT:    store volatile i16 [[INC]], i16* [[F]], align 1
+// LEWIDTHNUM-NEXT:    ret void
+//
+// BEWIDTHNUM-LABEL: @increment_st11(
+// BEWIDTHNUM-NEXT:  entry:
+// BEWIDTHNUM-NEXT:    [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1
+// BEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1
+// BEWIDTHNUM-NEXT:    [[INC:%.*]] = add i16 [[BF_LOAD]], 1
+// BEWIDTHNUM-NEXT:    [[BF_LOAD1:%.*]] = load volatile i16, i16* [[F]], align 1
+// BEWIDTHNUM-NEXT:    store volatile i16 [[INC]], i16* [[F]], align 1
+// BEWIDTHNUM-NEXT:    ret void
+//
 void increment_st11(volatile struct st11 *m) {
   ++m->f;
 }
@@ -713,19 +2295,67 @@ void increment_st11(volatile struct st11 *m) {
 // LE-LABEL: @increment_e_st11(
 // LE-NEXT:  entry:
 // LE-NEXT:    [[E:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 0
-// LE-NEXT:    [[TMP0:%.*]] = load volatile i8, i8* [[E]], align 4
+// LE-NEXT:    [[TMP0:%.*]] = load volatile i8, i8* [[E]], align 4, !tbaa !12
 // LE-NEXT:    [[INC:%.*]] = add i8 [[TMP0]], 1
-// LE-NEXT:    store volatile i8 [[INC]], i8* [[E]], align 4
+// LE-NEXT:    store volatile i8 [[INC]], i8* [[E]], align 4, !tbaa !12
 // LE-NEXT:    ret void
 //
 // BE-LABEL: @increment_e_st11(
 // BE-NEXT:  entry:
 // BE-NEXT:    [[E:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 0
-// BE-NEXT:    [[TMP0:%.*]] = load volatile i8, i8* [[E]], align 4
+// BE-NEXT:    [[TMP0:%.*]] = load volatile i8, i8* [[E]], align 4, !tbaa !12
 // BE-NEXT:    [[INC:%.*]] = add i8 [[TMP0]], 1
-// BE-NEXT:    store volatile i8 [[INC]], i8* [[E]], align 4
+// BE-NEXT:    store volatile i8 [[INC]], i8* [[E]], align 4, !tbaa !12
 // BE-NEXT:    ret void
 //
+// LENUMLOADS-LABEL: @increment_e_st11(
+// LENUMLOADS-NEXT:  entry:
+// LENUMLOADS-NEXT:    [[E:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 0
+// LENUMLOADS-NEXT:    [[TMP0:%.*]] = load volatile i8, i8* [[E]], align 4, !tbaa !12
+// LENUMLOADS-NEXT:    [[INC:%.*]] = add i8 [[TMP0]], 1
+// LENUMLOADS-NEXT:    store volatile i8 [[INC]], i8* [[E]], align 4, !tbaa !12
+// LENUMLOADS-NEXT:    ret void
+//
+// BENUMLOADS-LABEL: @increment_e_st11(
+// BENUMLOADS-NEXT:  entry:
+// BENUMLOADS-NEXT:    [[E:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 0
+// BENUMLOADS-NEXT:    [[TMP0:%.*]] = load volatile i8, i8* [[E]], align 4, !tbaa !12
+// BENUMLOADS-NEXT:    [[INC:%.*]] = add i8 [[TMP0]], 1
+// BENUMLOADS-NEXT:    store volatile i8 [[INC]], i8* [[E]], align 4, !tbaa !12
+// BENUMLOADS-NEXT:    ret void
+//
+// LEWIDTH-LABEL: @increment_e_st11(
+// LEWIDTH-NEXT:  entry:
+// LEWIDTH-NEXT:    [[E:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 0
+// LEWIDTH-NEXT:    [[TMP0:%.*]] = load volatile i8, i8* [[E]], align 4, !tbaa !12
+// LEWIDTH-NEXT:    [[INC:%.*]] = add i8 [[TMP0]], 1
+// LEWIDTH-NEXT:    store volatile i8 [[INC]], i8* [[E]], align 4, !tbaa !12
+// LEWIDTH-NEXT:    ret void
+//
+// BEWIDTH-LABEL: @increment_e_st11(
+// BEWIDTH-NEXT:  entry:
+// BEWIDTH-NEXT:    [[E:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 0
+// BEWIDTH-NEXT:    [[TMP0:%.*]] = load volatile i8, i8* [[E]], align 4, !tbaa !12
+// BEWIDTH-NEXT:    [[INC:%.*]] = add i8 [[TMP0]], 1
+// BEWIDTH-NEXT:    store volatile i8 [[INC]], i8* [[E]], align 4, !tbaa !12
+// BEWIDTH-NEXT:    ret void
+//
+// LEWIDTHNUM-LABEL: @increment_e_st11(
+// LEWIDTHNUM-NEXT:  entry:
+// LEWIDTHNUM-NEXT:    [[E:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 0
+// LEWIDTHNUM-NEXT:    [[TMP0:%.*]] = load volatile i8, i8* [[E]], align 4, !tbaa !12
+// LEWIDTHNUM-NEXT:    [[INC:%.*]] = add i8 [[TMP0]], 1
+// LEWIDTHNUM-NEXT:    store volatile i8 [[INC]], i8* [[E]], align 4, !tbaa !12
+// LEWIDTHNUM-NEXT:    ret void
+//
+// BEWIDTHNUM-LABEL: @increment_e_st11(
+// BEWIDTHNUM-NEXT:  entry:
+// BEWIDTHNUM-NEXT:    [[E:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 0
+// BEWIDTHNUM-NEXT:    [[TMP0:%.*]] = load volatile i8, i8* [[E]], align 4, !tbaa !12
+// BEWIDTHNUM-NEXT:    [[INC:%.*]] = add i8 [[TMP0]], 1
+// BEWIDTHNUM-NEXT:    store volatile i8 [[INC]], i8* [[E]], align 4, !tbaa !12
+// BEWIDTHNUM-NEXT:    ret void
+//
 void increment_e_st11(volatile struct st11 *m) {
   ++m->e;
 }
@@ -751,6 +2381,54 @@ struct st12{
 // BE-NEXT:    [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 16
 // BE-NEXT:    ret i32 [[BF_ASHR]]
 //
+// LENUMLOADS-LABEL: @read_st12(
+// LENUMLOADS-NEXT:  entry:
+// LENUMLOADS-NEXT:    [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32*
+// LENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// LENUMLOADS-NEXT:    [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 8
+// LENUMLOADS-NEXT:    [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 16
+// LENUMLOADS-NEXT:    ret i32 [[BF_ASHR]]
+//
+// BENUMLOADS-LABEL: @read_st12(
+// BENUMLOADS-NEXT:  entry:
+// BENUMLOADS-NEXT:    [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32*
+// BENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// BENUMLOADS-NEXT:    [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 8
+// BENUMLOADS-NEXT:    [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 16
+// BENUMLOADS-NEXT:    ret i32 [[BF_ASHR]]
+//
+// LEWIDTH-LABEL: @read_st12(
+// LEWIDTH-NEXT:  entry:
+// LEWIDTH-NEXT:    [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32*
+// LEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// LEWIDTH-NEXT:    [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 8
+// LEWIDTH-NEXT:    [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 16
+// LEWIDTH-NEXT:    ret i32 [[BF_ASHR]]
+//
+// BEWIDTH-LABEL: @read_st12(
+// BEWIDTH-NEXT:  entry:
+// BEWIDTH-NEXT:    [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32*
+// BEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// BEWIDTH-NEXT:    [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 8
+// BEWIDTH-NEXT:    [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 16
+// BEWIDTH-NEXT:    ret i32 [[BF_ASHR]]
+//
+// LEWIDTHNUM-LABEL: @read_st12(
+// LEWIDTHNUM-NEXT:  entry:
+// LEWIDTHNUM-NEXT:    [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32*
+// LEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// LEWIDTHNUM-NEXT:    [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 8
+// LEWIDTHNUM-NEXT:    [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 16
+// LEWIDTHNUM-NEXT:    ret i32 [[BF_ASHR]]
+//
+// BEWIDTHNUM-LABEL: @read_st12(
+// BEWIDTHNUM-NEXT:  entry:
+// BEWIDTHNUM-NEXT:    [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32*
+// BEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// BEWIDTHNUM-NEXT:    [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 8
+// BEWIDTHNUM-NEXT:    [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 16
+// BEWIDTHNUM-NEXT:    ret i32 [[BF_ASHR]]
+//
 int read_st12(volatile struct st12 *m) {
   return m->f;
 }
@@ -773,6 +2451,60 @@ int read_st12(volatile struct st12 *m) {
 // BE-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
 // BE-NEXT:    ret void
 //
+// LENUMLOADS-LABEL: @store_st12(
+// LENUMLOADS-NEXT:  entry:
+// LENUMLOADS-NEXT:    [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32*
+// LENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// LENUMLOADS-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -16776961
+// LENUMLOADS-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 256
+// LENUMLOADS-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
+// LENUMLOADS-NEXT:    ret void
+//
+// BENUMLOADS-LABEL: @store_st12(
+// BENUMLOADS-NEXT:  entry:
+// BENUMLOADS-NEXT:    [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32*
+// BENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// BENUMLOADS-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -16776961
+// BENUMLOADS-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 256
+// BENUMLOADS-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
+// BENUMLOADS-NEXT:    ret void
+//
+// LEWIDTH-LABEL: @store_st12(
+// LEWIDTH-NEXT:  entry:
+// LEWIDTH-NEXT:    [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32*
+// LEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// LEWIDTH-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -16776961
+// LEWIDTH-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 256
+// LEWIDTH-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
+// LEWIDTH-NEXT:    ret void
+//
+// BEWIDTH-LABEL: @store_st12(
+// BEWIDTH-NEXT:  entry:
+// BEWIDTH-NEXT:    [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32*
+// BEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// BEWIDTH-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -16776961
+// BEWIDTH-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 256
+// BEWIDTH-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
+// BEWIDTH-NEXT:    ret void
+//
+// LEWIDTHNUM-LABEL: @store_st12(
+// LEWIDTHNUM-NEXT:  entry:
+// LEWIDTHNUM-NEXT:    [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32*
+// LEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// LEWIDTHNUM-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -16776961
+// LEWIDTHNUM-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 256
+// LEWIDTHNUM-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
+// LEWIDTHNUM-NEXT:    ret void
+//
+// BEWIDTHNUM-LABEL: @store_st12(
+// BEWIDTHNUM-NEXT:  entry:
+// BEWIDTHNUM-NEXT:    [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32*
+// BEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// BEWIDTHNUM-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -16776961
+// BEWIDTHNUM-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 256
+// BEWIDTHNUM-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
+// BEWIDTHNUM-NEXT:    ret void
+//
 void store_st12(volatile struct st12 *m) {
   m->f = 1;
 }
@@ -801,6 +2533,78 @@ void store_st12(volatile struct st12 *m) {
 // BE-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
 // BE-NEXT:    ret void
 //
+// LENUMLOADS-LABEL: @increment_st12(
+// LENUMLOADS-NEXT:  entry:
+// LENUMLOADS-NEXT:    [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32*
+// LENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// LENUMLOADS-NEXT:    [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// LENUMLOADS-NEXT:    [[INC3:%.*]] = add i32 [[BF_LOAD]], 256
+// LENUMLOADS-NEXT:    [[BF_SHL2:%.*]] = and i32 [[INC3]], 16776960
+// LENUMLOADS-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -16776961
+// LENUMLOADS-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL2]]
+// LENUMLOADS-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
+// LENUMLOADS-NEXT:    ret void
+//
+// BENUMLOADS-LABEL: @increment_st12(
+// BENUMLOADS-NEXT:  entry:
+// BENUMLOADS-NEXT:    [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32*
+// BENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// BENUMLOADS-NEXT:    [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// BENUMLOADS-NEXT:    [[INC3:%.*]] = add i32 [[BF_LOAD]], 256
+// BENUMLOADS-NEXT:    [[BF_SHL2:%.*]] = and i32 [[INC3]], 16776960
+// BENUMLOADS-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -16776961
+// BENUMLOADS-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL2]]
+// BENUMLOADS-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
+// BENUMLOADS-NEXT:    ret void
+//
+// LEWIDTH-LABEL: @increment_st12(
+// LEWIDTH-NEXT:  entry:
+// LEWIDTH-NEXT:    [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32*
+// LEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// LEWIDTH-NEXT:    [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// LEWIDTH-NEXT:    [[INC3:%.*]] = add i32 [[BF_LOAD]], 256
+// LEWIDTH-NEXT:    [[BF_SHL2:%.*]] = and i32 [[INC3]], 16776960
+// LEWIDTH-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -16776961
+// LEWIDTH-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL2]]
+// LEWIDTH-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
+// LEWIDTH-NEXT:    ret void
+//
+// BEWIDTH-LABEL: @increment_st12(
+// BEWIDTH-NEXT:  entry:
+// BEWIDTH-NEXT:    [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32*
+// BEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// BEWIDTH-NEXT:    [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// BEWIDTH-NEXT:    [[INC3:%.*]] = add i32 [[BF_LOAD]], 256
+// BEWIDTH-NEXT:    [[BF_SHL2:%.*]] = and i32 [[INC3]], 16776960
+// BEWIDTH-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -16776961
+// BEWIDTH-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL2]]
+// BEWIDTH-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
+// BEWIDTH-NEXT:    ret void
+//
+// LEWIDTHNUM-LABEL: @increment_st12(
+// LEWIDTHNUM-NEXT:  entry:
+// LEWIDTHNUM-NEXT:    [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32*
+// LEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// LEWIDTHNUM-NEXT:    [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// LEWIDTHNUM-NEXT:    [[INC3:%.*]] = add i32 [[BF_LOAD]], 256
+// LEWIDTHNUM-NEXT:    [[BF_SHL2:%.*]] = and i32 [[INC3]], 16776960
+// LEWIDTHNUM-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -16776961
+// LEWIDTHNUM-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL2]]
+// LEWIDTHNUM-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
+// LEWIDTHNUM-NEXT:    ret void
+//
+// BEWIDTHNUM-LABEL: @increment_st12(
+// BEWIDTHNUM-NEXT:  entry:
+// BEWIDTHNUM-NEXT:    [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32*
+// BEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// BEWIDTHNUM-NEXT:    [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// BEWIDTHNUM-NEXT:    [[INC3:%.*]] = add i32 [[BF_LOAD]], 256
+// BEWIDTHNUM-NEXT:    [[BF_SHL2:%.*]] = and i32 [[INC3]], 16776960
+// BEWIDTHNUM-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -16776961
+// BEWIDTHNUM-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL2]]
+// BEWIDTHNUM-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
+// BEWIDTHNUM-NEXT:    ret void
+//
 void increment_st12(volatile struct st12 *m) {
   ++m->f;
 }
@@ -829,6 +2633,78 @@ void increment_st12(volatile struct st12 *m) {
 // BE-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
 // BE-NEXT:    ret void
 //
+// LENUMLOADS-LABEL: @increment_e_st12(
+// LENUMLOADS-NEXT:  entry:
+// LENUMLOADS-NEXT:    [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32*
+// LENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// LENUMLOADS-NEXT:    [[INC:%.*]] = add i32 [[BF_LOAD]], 1
+// LENUMLOADS-NEXT:    [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// LENUMLOADS-NEXT:    [[BF_VALUE:%.*]] = and i32 [[INC]], 255
+// LENUMLOADS-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -256
+// LENUMLOADS-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]]
+// LENUMLOADS-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
+// LENUMLOADS-NEXT:    ret void
+//
+// BENUMLOADS-LABEL: @increment_e_st12(
+// BENUMLOADS-NEXT:  entry:
+// BENUMLOADS-NEXT:    [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32*
+// BENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// BENUMLOADS-NEXT:    [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// BENUMLOADS-NEXT:    [[TMP1:%.*]] = add i32 [[BF_LOAD]], 16777216
+// BENUMLOADS-NEXT:    [[BF_SHL:%.*]] = and i32 [[TMP1]], -16777216
+// BENUMLOADS-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 16777215
+// BENUMLOADS-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]]
+// BENUMLOADS-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
+// BENUMLOADS-NEXT:    ret void
+//
+// LEWIDTH-LABEL: @increment_e_st12(
+// LEWIDTH-NEXT:  entry:
+// LEWIDTH-NEXT:    [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32*
+// LEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// LEWIDTH-NEXT:    [[INC:%.*]] = add i32 [[BF_LOAD]], 1
+// LEWIDTH-NEXT:    [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// LEWIDTH-NEXT:    [[BF_VALUE:%.*]] = and i32 [[INC]], 255
+// LEWIDTH-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -256
+// LEWIDTH-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]]
+// LEWIDTH-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
+// LEWIDTH-NEXT:    ret void
+//
+// BEWIDTH-LABEL: @increment_e_st12(
+// BEWIDTH-NEXT:  entry:
+// BEWIDTH-NEXT:    [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32*
+// BEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// BEWIDTH-NEXT:    [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// BEWIDTH-NEXT:    [[TMP1:%.*]] = add i32 [[BF_LOAD]], 16777216
+// BEWIDTH-NEXT:    [[BF_SHL:%.*]] = and i32 [[TMP1]], -16777216
+// BEWIDTH-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 16777215
+// BEWIDTH-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]]
+// BEWIDTH-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
+// BEWIDTH-NEXT:    ret void
+//
+// LEWIDTHNUM-LABEL: @increment_e_st12(
+// LEWIDTHNUM-NEXT:  entry:
+// LEWIDTHNUM-NEXT:    [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32*
+// LEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// LEWIDTHNUM-NEXT:    [[INC:%.*]] = add i32 [[BF_LOAD]], 1
+// LEWIDTHNUM-NEXT:    [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// LEWIDTHNUM-NEXT:    [[BF_VALUE:%.*]] = and i32 [[INC]], 255
+// LEWIDTHNUM-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -256
+// LEWIDTHNUM-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]]
+// LEWIDTHNUM-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
+// LEWIDTHNUM-NEXT:    ret void
+//
+// BEWIDTHNUM-LABEL: @increment_e_st12(
+// BEWIDTHNUM-NEXT:  entry:
+// BEWIDTHNUM-NEXT:    [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32*
+// BEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// BEWIDTHNUM-NEXT:    [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// BEWIDTHNUM-NEXT:    [[TMP1:%.*]] = add i32 [[BF_LOAD]], 16777216
+// BEWIDTHNUM-NEXT:    [[BF_SHL:%.*]] = and i32 [[TMP1]], -16777216
+// BEWIDTHNUM-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 16777215
+// BEWIDTHNUM-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]]
+// BEWIDTHNUM-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
+// BEWIDTHNUM-NEXT:    ret void
+//
 void increment_e_st12(volatile struct st12 *m) {
   ++m->e;
 }
@@ -866,6 +2742,90 @@ struct st13 {
 // BE-NEXT:    store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1
 // BE-NEXT:    ret void
 //
+// LENUMLOADS-LABEL: @increment_b_st13(
+// LENUMLOADS-NEXT:  entry:
+// LENUMLOADS-NEXT:    [[TMP0:%.*]] = bitcast %struct.st13* [[S:%.*]] to i40*
+// LENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1
+// LENUMLOADS-NEXT:    [[TMP1:%.*]] = lshr i40 [[BF_LOAD]], 8
+// LENUMLOADS-NEXT:    [[BF_CAST:%.*]] = trunc i40 [[TMP1]] to i32
+// LENUMLOADS-NEXT:    [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
+// LENUMLOADS-NEXT:    [[TMP2:%.*]] = zext i32 [[INC]] to i40
+// LENUMLOADS-NEXT:    [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1
+// LENUMLOADS-NEXT:    [[BF_SHL:%.*]] = shl nuw i40 [[TMP2]], 8
+// LENUMLOADS-NEXT:    [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], 255
+// LENUMLOADS-NEXT:    [[BF_SET:%.*]] = or i40 [[BF_SHL]], [[BF_CLEAR]]
+// LENUMLOADS-NEXT:    store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1
+// LENUMLOADS-NEXT:    ret void
+//
+// BENUMLOADS-LABEL: @increment_b_st13(
+// BENUMLOADS-NEXT:  entry:
+// BENUMLOADS-NEXT:    [[TMP0:%.*]] = bitcast %struct.st13* [[S:%.*]] to i40*
+// BENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1
+// BENUMLOADS-NEXT:    [[BF_CAST:%.*]] = trunc i40 [[BF_LOAD]] to i32
+// BENUMLOADS-NEXT:    [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
+// BENUMLOADS-NEXT:    [[TMP1:%.*]] = zext i32 [[INC]] to i40
+// BENUMLOADS-NEXT:    [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1
+// BENUMLOADS-NEXT:    [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], -4294967296
+// BENUMLOADS-NEXT:    [[BF_SET:%.*]] = or i40 [[BF_CLEAR]], [[TMP1]]
+// BENUMLOADS-NEXT:    store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1
+// BENUMLOADS-NEXT:    ret void
+//
+// LEWIDTH-LABEL: @increment_b_st13(
+// LEWIDTH-NEXT:  entry:
+// LEWIDTH-NEXT:    [[TMP0:%.*]] = bitcast %struct.st13* [[S:%.*]] to i40*
+// LEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1
+// LEWIDTH-NEXT:    [[TMP1:%.*]] = lshr i40 [[BF_LOAD]], 8
+// LEWIDTH-NEXT:    [[BF_CAST:%.*]] = trunc i40 [[TMP1]] to i32
+// LEWIDTH-NEXT:    [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
+// LEWIDTH-NEXT:    [[TMP2:%.*]] = zext i32 [[INC]] to i40
+// LEWIDTH-NEXT:    [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1
+// LEWIDTH-NEXT:    [[BF_SHL:%.*]] = shl nuw i40 [[TMP2]], 8
+// LEWIDTH-NEXT:    [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], 255
+// LEWIDTH-NEXT:    [[BF_SET:%.*]] = or i40 [[BF_SHL]], [[BF_CLEAR]]
+// LEWIDTH-NEXT:    store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1
+// LEWIDTH-NEXT:    ret void
+//
+// BEWIDTH-LABEL: @increment_b_st13(
+// BEWIDTH-NEXT:  entry:
+// BEWIDTH-NEXT:    [[TMP0:%.*]] = bitcast %struct.st13* [[S:%.*]] to i40*
+// BEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1
+// BEWIDTH-NEXT:    [[BF_CAST:%.*]] = trunc i40 [[BF_LOAD]] to i32
+// BEWIDTH-NEXT:    [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
+// BEWIDTH-NEXT:    [[TMP1:%.*]] = zext i32 [[INC]] to i40
+// BEWIDTH-NEXT:    [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1
+// BEWIDTH-NEXT:    [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], -4294967296
+// BEWIDTH-NEXT:    [[BF_SET:%.*]] = or i40 [[BF_CLEAR]], [[TMP1]]
+// BEWIDTH-NEXT:    store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1
+// BEWIDTH-NEXT:    ret void
+//
+// LEWIDTHNUM-LABEL: @increment_b_st13(
+// LEWIDTHNUM-NEXT:  entry:
+// LEWIDTHNUM-NEXT:    [[TMP0:%.*]] = bitcast %struct.st13* [[S:%.*]] to i40*
+// LEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1
+// LEWIDTHNUM-NEXT:    [[TMP1:%.*]] = lshr i40 [[BF_LOAD]], 8
+// LEWIDTHNUM-NEXT:    [[BF_CAST:%.*]] = trunc i40 [[TMP1]] to i32
+// LEWIDTHNUM-NEXT:    [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
+// LEWIDTHNUM-NEXT:    [[TMP2:%.*]] = zext i32 [[INC]] to i40
+// LEWIDTHNUM-NEXT:    [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1
+// LEWIDTHNUM-NEXT:    [[BF_SHL:%.*]] = shl nuw i40 [[TMP2]], 8
+// LEWIDTHNUM-NEXT:    [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], 255
+// LEWIDTHNUM-NEXT:    [[BF_SET:%.*]] = or i40 [[BF_SHL]], [[BF_CLEAR]]
+// LEWIDTHNUM-NEXT:    store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1
+// LEWIDTHNUM-NEXT:    ret void
+//
+// BEWIDTHNUM-LABEL: @increment_b_st13(
+// BEWIDTHNUM-NEXT:  entry:
+// BEWIDTHNUM-NEXT:    [[TMP0:%.*]] = bitcast %struct.st13* [[S:%.*]] to i40*
+// BEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1
+// BEWIDTHNUM-NEXT:    [[BF_CAST:%.*]] = trunc i40 [[BF_LOAD]] to i32
+// BEWIDTHNUM-NEXT:    [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
+// BEWIDTHNUM-NEXT:    [[TMP1:%.*]] = zext i32 [[INC]] to i40
+// BEWIDTHNUM-NEXT:    [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1
+// BEWIDTHNUM-NEXT:    [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], -4294967296
+// BEWIDTHNUM-NEXT:    [[BF_SET:%.*]] = or i40 [[BF_CLEAR]], [[TMP1]]
+// BEWIDTHNUM-NEXT:    store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1
+// BEWIDTHNUM-NEXT:    ret void
+//
 void increment_b_st13(volatile struct st13 *s) {
   s->b++;
 }
@@ -879,7 +2839,6 @@ struct st14 {
 // LE-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST14:%.*]], %struct.st14* [[S:%.*]], i32 0, i32 0
 // LE-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1
 // LE-NEXT:    [[INC:%.*]] = add i8 [[BF_LOAD]], 1
-// LENUMLOADS-NEXT:    [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1
 // LE-NEXT:    store volatile i8 [[INC]], i8* [[TMP0]], align 1
 // LE-NEXT:    ret void
 //
@@ -888,10 +2847,61 @@ struct st14 {
 // BE-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST14:%.*]], %struct.st14* [[S:%.*]], i32 0, i32 0
 // BE-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1
 // BE-NEXT:    [[INC:%.*]] = add i8 [[BF_LOAD]], 1
-// BENUMLOADS-NEXT:    [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1
 // BE-NEXT:    store volatile i8 [[INC]], i8* [[TMP0]], align 1
 // BE-NEXT:    ret void
 //
+// LENUMLOADS-LABEL: @increment_a_st14(
+// LENUMLOADS-NEXT:  entry:
+// LENUMLOADS-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST14:%.*]], %struct.st14* [[S:%.*]], i32 0, i32 0
+// LENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1
+// LENUMLOADS-NEXT:    [[INC:%.*]] = add i8 [[BF_LOAD]], 1
+// LENUMLOADS-NEXT:    [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1
+// LENUMLOADS-NEXT:    store volatile i8 [[INC]], i8* [[TMP0]], align 1
+// LENUMLOADS-NEXT:    ret void
+//
+// BENUMLOADS-LABEL: @increment_a_st14(
+// BENUMLOADS-NEXT:  entry:
+// BENUMLOADS-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST14:%.*]], %struct.st14* [[S:%.*]], i32 0, i32 0
+// BENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1
+// BENUMLOADS-NEXT:    [[INC:%.*]] = add i8 [[BF_LOAD]], 1
+// BENUMLOADS-NEXT:    [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1
+// BENUMLOADS-NEXT:    store volatile i8 [[INC]], i8* [[TMP0]], align 1
+// BENUMLOADS-NEXT:    ret void
+//
+// LEWIDTH-LABEL: @increment_a_st14(
+// LEWIDTH-NEXT:  entry:
+// LEWIDTH-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST14:%.*]], %struct.st14* [[S:%.*]], i32 0, i32 0
+// LEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1
+// LEWIDTH-NEXT:    [[INC:%.*]] = add i8 [[BF_LOAD]], 1
+// LEWIDTH-NEXT:    store volatile i8 [[INC]], i8* [[TMP0]], align 1
+// LEWIDTH-NEXT:    ret void
+//
+// BEWIDTH-LABEL: @increment_a_st14(
+// BEWIDTH-NEXT:  entry:
+// BEWIDTH-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST14:%.*]], %struct.st14* [[S:%.*]], i32 0, i32 0
+// BEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1
+// BEWIDTH-NEXT:    [[INC:%.*]] = add i8 [[BF_LOAD]], 1
+// BEWIDTH-NEXT:    store volatile i8 [[INC]], i8* [[TMP0]], align 1
+// BEWIDTH-NEXT:    ret void
+//
+// LEWIDTHNUM-LABEL: @increment_a_st14(
+// LEWIDTHNUM-NEXT:  entry:
+// LEWIDTHNUM-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST14:%.*]], %struct.st14* [[S:%.*]], i32 0, i32 0
+// LEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1
+// LEWIDTHNUM-NEXT:    [[INC:%.*]] = add i8 [[BF_LOAD]], 1
+// LEWIDTHNUM-NEXT:    [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1
+// LEWIDTHNUM-NEXT:    store volatile i8 [[INC]], i8* [[TMP0]], align 1
+// LEWIDTHNUM-NEXT:    ret void
+//
+// BEWIDTHNUM-LABEL: @increment_a_st14(
+// BEWIDTHNUM-NEXT:  entry:
+// BEWIDTHNUM-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST14:%.*]], %struct.st14* [[S:%.*]], i32 0, i32 0
+// BEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1
+// BEWIDTHNUM-NEXT:    [[INC:%.*]] = add i8 [[BF_LOAD]], 1
+// BEWIDTHNUM-NEXT:    [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1
+// BEWIDTHNUM-NEXT:    store volatile i8 [[INC]], i8* [[TMP0]], align 1
+// BEWIDTHNUM-NEXT:    ret void
+//
 void increment_a_st14(volatile struct st14 *s) {
   s->a++;
 }
@@ -905,7 +2915,6 @@ struct st15 {
 // LE-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST15:%.*]], %struct.st15* [[S:%.*]], i32 0, i32 0
 // LE-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1
 // LE-NEXT:    [[INC:%.*]] = add i8 [[BF_LOAD]], 1
-// LENUMLOADS-NEXT:    [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1
 // LE-NEXT:    store volatile i8 [[INC]], i8* [[TMP0]], align 1
 // LE-NEXT:    ret void
 //
@@ -914,10 +2923,61 @@ struct st15 {
 // BE-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST15:%.*]], %struct.st15* [[S:%.*]], i32 0, i32 0
 // BE-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1
 // BE-NEXT:    [[INC:%.*]] = add i8 [[BF_LOAD]], 1
-// BENUMLOADS-NEXT:    [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1
 // BE-NEXT:    store volatile i8 [[INC]], i8* [[TMP0]], align 1
 // BE-NEXT:    ret void
 //
+// LENUMLOADS-LABEL: @increment_a_st15(
+// LENUMLOADS-NEXT:  entry:
+// LENUMLOADS-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST15:%.*]], %struct.st15* [[S:%.*]], i32 0, i32 0
+// LENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1
+// LENUMLOADS-NEXT:    [[INC:%.*]] = add i8 [[BF_LOAD]], 1
+// LENUMLOADS-NEXT:    [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1
+// LENUMLOADS-NEXT:    store volatile i8 [[INC]], i8* [[TMP0]], align 1
+// LENUMLOADS-NEXT:    ret void
+//
+// BENUMLOADS-LABEL: @increment_a_st15(
+// BENUMLOADS-NEXT:  entry:
+// BENUMLOADS-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST15:%.*]], %struct.st15* [[S:%.*]], i32 0, i32 0
+// BENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1
+// BENUMLOADS-NEXT:    [[INC:%.*]] = add i8 [[BF_LOAD]], 1
+// BENUMLOADS-NEXT:    [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1
+// BENUMLOADS-NEXT:    store volatile i8 [[INC]], i8* [[TMP0]], align 1
+// BENUMLOADS-NEXT:    ret void
+//
+// LEWIDTH-LABEL: @increment_a_st15(
+// LEWIDTH-NEXT:  entry:
+// LEWIDTH-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST15:%.*]], %struct.st15* [[S:%.*]], i32 0, i32 0
+// LEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1
+// LEWIDTH-NEXT:    [[INC:%.*]] = add i8 [[BF_LOAD]], 1
+// LEWIDTH-NEXT:    store volatile i8 [[INC]], i8* [[TMP0]], align 1
+// LEWIDTH-NEXT:    ret void
+//
+// BEWIDTH-LABEL: @increment_a_st15(
+// BEWIDTH-NEXT:  entry:
+// BEWIDTH-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST15:%.*]], %struct.st15* [[S:%.*]], i32 0, i32 0
+// BEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1
+// BEWIDTH-NEXT:    [[INC:%.*]] = add i8 [[BF_LOAD]], 1
+// BEWIDTH-NEXT:    store volatile i8 [[INC]], i8* [[TMP0]], align 1
+// BEWIDTH-NEXT:    ret void
+//
+// LEWIDTHNUM-LABEL: @increment_a_st15(
+// LEWIDTHNUM-NEXT:  entry:
+// LEWIDTHNUM-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST15:%.*]], %struct.st15* [[S:%.*]], i32 0, i32 0
+// LEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1
+// LEWIDTHNUM-NEXT:    [[INC:%.*]] = add i8 [[BF_LOAD]], 1
+// LEWIDTHNUM-NEXT:    [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1
+// LEWIDTHNUM-NEXT:    store volatile i8 [[INC]], i8* [[TMP0]], align 1
+// LEWIDTHNUM-NEXT:    ret void
+//
+// BEWIDTHNUM-LABEL: @increment_a_st15(
+// BEWIDTHNUM-NEXT:  entry:
+// BEWIDTHNUM-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ST15:%.*]], %struct.st15* [[S:%.*]], i32 0, i32 0
+// BEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1
+// BEWIDTHNUM-NEXT:    [[INC:%.*]] = add i8 [[BF_LOAD]], 1
+// BEWIDTHNUM-NEXT:    [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1
+// BEWIDTHNUM-NEXT:    store volatile i8 [[INC]], i8* [[TMP0]], align 1
+// BEWIDTHNUM-NEXT:    ret void
+//
 void increment_a_st15(volatile struct st15 *s) {
   s->a++;
 }
@@ -955,6 +3015,84 @@ struct st16 {
 // BE-NEXT:    store i64 [[BF_SET]], i64* [[TMP0]], align 4
 // BE-NEXT:    ret void
 //
+// LENUMLOADS-LABEL: @increment_a_st16(
+// LENUMLOADS-NEXT:  entry:
+// LENUMLOADS-NEXT:    [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64*
+// LENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4
+// LENUMLOADS-NEXT:    [[BF_CAST:%.*]] = trunc i64 [[BF_LOAD]] to i32
+// LENUMLOADS-NEXT:    [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
+// LENUMLOADS-NEXT:    [[TMP1:%.*]] = zext i32 [[INC]] to i64
+// LENUMLOADS-NEXT:    [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294967296
+// LENUMLOADS-NEXT:    [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[TMP1]]
+// LENUMLOADS-NEXT:    store i64 [[BF_SET]], i64* [[TMP0]], align 4
+// LENUMLOADS-NEXT:    ret void
+//
+// BENUMLOADS-LABEL: @increment_a_st16(
+// BENUMLOADS-NEXT:  entry:
+// BENUMLOADS-NEXT:    [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64*
+// BENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4
+// BENUMLOADS-NEXT:    [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32
+// BENUMLOADS-NEXT:    [[BF_CAST:%.*]] = trunc i64 [[TMP1]] to i32
+// BENUMLOADS-NEXT:    [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
+// BENUMLOADS-NEXT:    [[TMP2:%.*]] = zext i32 [[INC]] to i64
+// BENUMLOADS-NEXT:    [[BF_SHL:%.*]] = shl nuw i64 [[TMP2]], 32
+// BENUMLOADS-NEXT:    [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], 4294967295
+// BENUMLOADS-NEXT:    [[BF_SET:%.*]] = or i64 [[BF_SHL]], [[BF_CLEAR]]
+// BENUMLOADS-NEXT:    store i64 [[BF_SET]], i64* [[TMP0]], align 4
+// BENUMLOADS-NEXT:    ret void
+//
+// LEWIDTH-LABEL: @increment_a_st16(
+// LEWIDTH-NEXT:  entry:
+// LEWIDTH-NEXT:    [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64*
+// LEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4
+// LEWIDTH-NEXT:    [[BF_CAST:%.*]] = trunc i64 [[BF_LOAD]] to i32
+// LEWIDTH-NEXT:    [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
+// LEWIDTH-NEXT:    [[TMP1:%.*]] = zext i32 [[INC]] to i64
+// LEWIDTH-NEXT:    [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294967296
+// LEWIDTH-NEXT:    [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[TMP1]]
+// LEWIDTH-NEXT:    store i64 [[BF_SET]], i64* [[TMP0]], align 4
+// LEWIDTH-NEXT:    ret void
+//
+// BEWIDTH-LABEL: @increment_a_st16(
+// BEWIDTH-NEXT:  entry:
+// BEWIDTH-NEXT:    [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64*
+// BEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4
+// BEWIDTH-NEXT:    [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32
+// BEWIDTH-NEXT:    [[BF_CAST:%.*]] = trunc i64 [[TMP1]] to i32
+// BEWIDTH-NEXT:    [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
+// BEWIDTH-NEXT:    [[TMP2:%.*]] = zext i32 [[INC]] to i64
+// BEWIDTH-NEXT:    [[BF_SHL:%.*]] = shl nuw i64 [[TMP2]], 32
+// BEWIDTH-NEXT:    [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], 4294967295
+// BEWIDTH-NEXT:    [[BF_SET:%.*]] = or i64 [[BF_SHL]], [[BF_CLEAR]]
+// BEWIDTH-NEXT:    store i64 [[BF_SET]], i64* [[TMP0]], align 4
+// BEWIDTH-NEXT:    ret void
+//
+// LEWIDTHNUM-LABEL: @increment_a_st16(
+// LEWIDTHNUM-NEXT:  entry:
+// LEWIDTHNUM-NEXT:    [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64*
+// LEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4
+// LEWIDTHNUM-NEXT:    [[BF_CAST:%.*]] = trunc i64 [[BF_LOAD]] to i32
+// LEWIDTHNUM-NEXT:    [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
+// LEWIDTHNUM-NEXT:    [[TMP1:%.*]] = zext i32 [[INC]] to i64
+// LEWIDTHNUM-NEXT:    [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294967296
+// LEWIDTHNUM-NEXT:    [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[TMP1]]
+// LEWIDTHNUM-NEXT:    store i64 [[BF_SET]], i64* [[TMP0]], align 4
+// LEWIDTHNUM-NEXT:    ret void
+//
+// BEWIDTHNUM-LABEL: @increment_a_st16(
+// BEWIDTHNUM-NEXT:  entry:
+// BEWIDTHNUM-NEXT:    [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64*
+// BEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4
+// BEWIDTHNUM-NEXT:    [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32
+// BEWIDTHNUM-NEXT:    [[BF_CAST:%.*]] = trunc i64 [[TMP1]] to i32
+// BEWIDTHNUM-NEXT:    [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
+// BEWIDTHNUM-NEXT:    [[TMP2:%.*]] = zext i32 [[INC]] to i64
+// BEWIDTHNUM-NEXT:    [[BF_SHL:%.*]] = shl nuw i64 [[TMP2]], 32
+// BEWIDTHNUM-NEXT:    [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], 4294967295
+// BEWIDTHNUM-NEXT:    [[BF_SET:%.*]] = or i64 [[BF_SHL]], [[BF_CLEAR]]
+// BEWIDTHNUM-NEXT:    store i64 [[BF_SET]], i64* [[TMP0]], align 4
+// BEWIDTHNUM-NEXT:    ret void
+//
 void increment_a_st16(struct st16 *s) {
   s->a++;
 }
@@ -987,6 +3125,90 @@ void increment_a_st16(struct st16 *s) {
 // BE-NEXT:    store i64 [[BF_SET]], i64* [[TMP0]], align 4
 // BE-NEXT:    ret void
 //
+// LENUMLOADS-LABEL: @increment_b_st16(
+// LENUMLOADS-NEXT:  entry:
+// LENUMLOADS-NEXT:    [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64*
+// LENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4
+// LENUMLOADS-NEXT:    [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32
+// LENUMLOADS-NEXT:    [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
+// LENUMLOADS-NEXT:    [[INC:%.*]] = add i32 [[TMP2]], 1
+// LENUMLOADS-NEXT:    [[TMP3:%.*]] = and i32 [[INC]], 65535
+// LENUMLOADS-NEXT:    [[BF_VALUE:%.*]] = zext i32 [[TMP3]] to i64
+// LENUMLOADS-NEXT:    [[BF_SHL2:%.*]] = shl nuw nsw i64 [[BF_VALUE]], 32
+// LENUMLOADS-NEXT:    [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -281470681743361
+// LENUMLOADS-NEXT:    [[BF_SET:%.*]] = or i64 [[BF_SHL2]], [[BF_CLEAR]]
+// LENUMLOADS-NEXT:    store i64 [[BF_SET]], i64* [[TMP0]], align 4
+// LENUMLOADS-NEXT:    ret void
+//
+// BENUMLOADS-LABEL: @increment_b_st16(
+// BENUMLOADS-NEXT:  entry:
+// BENUMLOADS-NEXT:    [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64*
+// BENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4
+// BENUMLOADS-NEXT:    [[TMP1:%.*]] = trunc i64 [[BF_LOAD]] to i32
+// BENUMLOADS-NEXT:    [[INC4:%.*]] = add i32 [[TMP1]], 65536
+// BENUMLOADS-NEXT:    [[TMP2:%.*]] = and i32 [[INC4]], -65536
+// BENUMLOADS-NEXT:    [[BF_SHL2:%.*]] = zext i32 [[TMP2]] to i64
+// BENUMLOADS-NEXT:    [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294901761
+// BENUMLOADS-NEXT:    [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[BF_SHL2]]
+// BENUMLOADS-NEXT:    store i64 [[BF_SET]], i64* [[TMP0]], align 4
+// BENUMLOADS-NEXT:    ret void
+//
+// LEWIDTH-LABEL: @increment_b_st16(
+// LEWIDTH-NEXT:  entry:
+// LEWIDTH-NEXT:    [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64*
+// LEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4
+// LEWIDTH-NEXT:    [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32
+// LEWIDTH-NEXT:    [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
+// LEWIDTH-NEXT:    [[INC:%.*]] = add i32 [[TMP2]], 1
+// LEWIDTH-NEXT:    [[TMP3:%.*]] = and i32 [[INC]], 65535
+// LEWIDTH-NEXT:    [[BF_VALUE:%.*]] = zext i32 [[TMP3]] to i64
+// LEWIDTH-NEXT:    [[BF_SHL2:%.*]] = shl nuw nsw i64 [[BF_VALUE]], 32
+// LEWIDTH-NEXT:    [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -281470681743361
+// LEWIDTH-NEXT:    [[BF_SET:%.*]] = or i64 [[BF_SHL2]], [[BF_CLEAR]]
+// LEWIDTH-NEXT:    store i64 [[BF_SET]], i64* [[TMP0]], align 4
+// LEWIDTH-NEXT:    ret void
+//
+// BEWIDTH-LABEL: @increment_b_st16(
+// BEWIDTH-NEXT:  entry:
+// BEWIDTH-NEXT:    [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64*
+// BEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4
+// BEWIDTH-NEXT:    [[TMP1:%.*]] = trunc i64 [[BF_LOAD]] to i32
+// BEWIDTH-NEXT:    [[INC4:%.*]] = add i32 [[TMP1]], 65536
+// BEWIDTH-NEXT:    [[TMP2:%.*]] = and i32 [[INC4]], -65536
+// BEWIDTH-NEXT:    [[BF_SHL2:%.*]] = zext i32 [[TMP2]] to i64
+// BEWIDTH-NEXT:    [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294901761
+// BEWIDTH-NEXT:    [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[BF_SHL2]]
+// BEWIDTH-NEXT:    store i64 [[BF_SET]], i64* [[TMP0]], align 4
+// BEWIDTH-NEXT:    ret void
+//
+// LEWIDTHNUM-LABEL: @increment_b_st16(
+// LEWIDTHNUM-NEXT:  entry:
+// LEWIDTHNUM-NEXT:    [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64*
+// LEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4
+// LEWIDTHNUM-NEXT:    [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32
+// LEWIDTHNUM-NEXT:    [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
+// LEWIDTHNUM-NEXT:    [[INC:%.*]] = add i32 [[TMP2]], 1
+// LEWIDTHNUM-NEXT:    [[TMP3:%.*]] = and i32 [[INC]], 65535
+// LEWIDTHNUM-NEXT:    [[BF_VALUE:%.*]] = zext i32 [[TMP3]] to i64
+// LEWIDTHNUM-NEXT:    [[BF_SHL2:%.*]] = shl nuw nsw i64 [[BF_VALUE]], 32
+// LEWIDTHNUM-NEXT:    [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -281470681743361
+// LEWIDTHNUM-NEXT:    [[BF_SET:%.*]] = or i64 [[BF_SHL2]], [[BF_CLEAR]]
+// LEWIDTHNUM-NEXT:    store i64 [[BF_SET]], i64* [[TMP0]], align 4
+// LEWIDTHNUM-NEXT:    ret void
+//
+// BEWIDTHNUM-LABEL: @increment_b_st16(
+// BEWIDTHNUM-NEXT:  entry:
+// BEWIDTHNUM-NEXT:    [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64*
+// BEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4
+// BEWIDTHNUM-NEXT:    [[TMP1:%.*]] = trunc i64 [[BF_LOAD]] to i32
+// BEWIDTHNUM-NEXT:    [[INC4:%.*]] = add i32 [[TMP1]], 65536
+// BEWIDTHNUM-NEXT:    [[TMP2:%.*]] = and i32 [[INC4]], -65536
+// BEWIDTHNUM-NEXT:    [[BF_SHL2:%.*]] = zext i32 [[TMP2]] to i64
+// BEWIDTHNUM-NEXT:    [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294901761
+// BEWIDTHNUM-NEXT:    [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[BF_SHL2]]
+// BEWIDTHNUM-NEXT:    store i64 [[BF_SET]], i64* [[TMP0]], align 4
+// BEWIDTHNUM-NEXT:    ret void
+//
 void increment_b_st16(struct st16 *s) {
   s->b++;
 }
@@ -1019,6 +3241,90 @@ void increment_b_st16(struct st16 *s) {
 // BE-NEXT:    store i64 [[BF_SET]], i64* [[TMP0]], align 4
 // BE-NEXT:    ret void
 //
+// LENUMLOADS-LABEL: @increment_c_st16(
+// LENUMLOADS-NEXT:  entry:
+// LENUMLOADS-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1
+// LENUMLOADS-NEXT:    [[TMP0:%.*]] = bitcast i48* [[C]] to i64*
+// LENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4
+// LENUMLOADS-NEXT:    [[BF_CAST:%.*]] = trunc i64 [[BF_LOAD]] to i32
+// LENUMLOADS-NEXT:    [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
+// LENUMLOADS-NEXT:    [[TMP1:%.*]] = zext i32 [[INC]] to i64
+// LENUMLOADS-NEXT:    [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294967296
+// LENUMLOADS-NEXT:    [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[TMP1]]
+// LENUMLOADS-NEXT:    store i64 [[BF_SET]], i64* [[TMP0]], align 4
+// LENUMLOADS-NEXT:    ret void
+//
+// BENUMLOADS-LABEL: @increment_c_st16(
+// BENUMLOADS-NEXT:  entry:
+// BENUMLOADS-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1
+// BENUMLOADS-NEXT:    [[TMP0:%.*]] = bitcast i48* [[C]] to i64*
+// BENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4
+// BENUMLOADS-NEXT:    [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32
+// BENUMLOADS-NEXT:    [[BF_CAST:%.*]] = trunc i64 [[TMP1]] to i32
+// BENUMLOADS-NEXT:    [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
+// BENUMLOADS-NEXT:    [[TMP2:%.*]] = zext i32 [[INC]] to i64
+// BENUMLOADS-NEXT:    [[BF_SHL:%.*]] = shl nuw i64 [[TMP2]], 32
+// BENUMLOADS-NEXT:    [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], 4294967295
+// BENUMLOADS-NEXT:    [[BF_SET:%.*]] = or i64 [[BF_SHL]], [[BF_CLEAR]]
+// BENUMLOADS-NEXT:    store i64 [[BF_SET]], i64* [[TMP0]], align 4
+// BENUMLOADS-NEXT:    ret void
+//
+// LEWIDTH-LABEL: @increment_c_st16(
+// LEWIDTH-NEXT:  entry:
+// LEWIDTH-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1
+// LEWIDTH-NEXT:    [[TMP0:%.*]] = bitcast i48* [[C]] to i64*
+// LEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4
+// LEWIDTH-NEXT:    [[BF_CAST:%.*]] = trunc i64 [[BF_LOAD]] to i32
+// LEWIDTH-NEXT:    [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
+// LEWIDTH-NEXT:    [[TMP1:%.*]] = zext i32 [[INC]] to i64
+// LEWIDTH-NEXT:    [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294967296
+// LEWIDTH-NEXT:    [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[TMP1]]
+// LEWIDTH-NEXT:    store i64 [[BF_SET]], i64* [[TMP0]], align 4
+// LEWIDTH-NEXT:    ret void
+//
+// BEWIDTH-LABEL: @increment_c_st16(
+// BEWIDTH-NEXT:  entry:
+// BEWIDTH-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1
+// BEWIDTH-NEXT:    [[TMP0:%.*]] = bitcast i48* [[C]] to i64*
+// BEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4
+// BEWIDTH-NEXT:    [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32
+// BEWIDTH-NEXT:    [[BF_CAST:%.*]] = trunc i64 [[TMP1]] to i32
+// BEWIDTH-NEXT:    [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
+// BEWIDTH-NEXT:    [[TMP2:%.*]] = zext i32 [[INC]] to i64
+// BEWIDTH-NEXT:    [[BF_SHL:%.*]] = shl nuw i64 [[TMP2]], 32
+// BEWIDTH-NEXT:    [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], 4294967295
+// BEWIDTH-NEXT:    [[BF_SET:%.*]] = or i64 [[BF_SHL]], [[BF_CLEAR]]
+// BEWIDTH-NEXT:    store i64 [[BF_SET]], i64* [[TMP0]], align 4
+// BEWIDTH-NEXT:    ret void
+//
+// LEWIDTHNUM-LABEL: @increment_c_st16(
+// LEWIDTHNUM-NEXT:  entry:
+// LEWIDTHNUM-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1
+// LEWIDTHNUM-NEXT:    [[TMP0:%.*]] = bitcast i48* [[C]] to i64*
+// LEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4
+// LEWIDTHNUM-NEXT:    [[BF_CAST:%.*]] = trunc i64 [[BF_LOAD]] to i32
+// LEWIDTHNUM-NEXT:    [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
+// LEWIDTHNUM-NEXT:    [[TMP1:%.*]] = zext i32 [[INC]] to i64
+// LEWIDTHNUM-NEXT:    [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294967296
+// LEWIDTHNUM-NEXT:    [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[TMP1]]
+// LEWIDTHNUM-NEXT:    store i64 [[BF_SET]], i64* [[TMP0]], align 4
+// LEWIDTHNUM-NEXT:    ret void
+//
+// BEWIDTHNUM-LABEL: @increment_c_st16(
+// BEWIDTHNUM-NEXT:  entry:
+// BEWIDTHNUM-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1
+// BEWIDTHNUM-NEXT:    [[TMP0:%.*]] = bitcast i48* [[C]] to i64*
+// BEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4
+// BEWIDTHNUM-NEXT:    [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32
+// BEWIDTHNUM-NEXT:    [[BF_CAST:%.*]] = trunc i64 [[TMP1]] to i32
+// BEWIDTHNUM-NEXT:    [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
+// BEWIDTHNUM-NEXT:    [[TMP2:%.*]] = zext i32 [[INC]] to i64
+// BEWIDTHNUM-NEXT:    [[BF_SHL:%.*]] = shl nuw i64 [[TMP2]], 32
+// BEWIDTHNUM-NEXT:    [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], 4294967295
+// BEWIDTHNUM-NEXT:    [[BF_SET:%.*]] = or i64 [[BF_SHL]], [[BF_CLEAR]]
+// BEWIDTHNUM-NEXT:    store i64 [[BF_SET]], i64* [[TMP0]], align 4
+// BEWIDTHNUM-NEXT:    ret void
+//
 void increment_c_st16(struct st16 *s) {
   s->c++;
 }
@@ -1053,6 +3359,96 @@ void increment_c_st16(struct st16 *s) {
 // BE-NEXT:    store i64 [[BF_SET]], i64* [[TMP0]], align 4
 // BE-NEXT:    ret void
 //
+// LENUMLOADS-LABEL: @increment_d_st16(
+// LENUMLOADS-NEXT:  entry:
+// LENUMLOADS-NEXT:    [[D:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1
+// LENUMLOADS-NEXT:    [[TMP0:%.*]] = bitcast i48* [[D]] to i64*
+// LENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4
+// LENUMLOADS-NEXT:    [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32
+// LENUMLOADS-NEXT:    [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
+// LENUMLOADS-NEXT:    [[INC:%.*]] = add i32 [[TMP2]], 1
+// LENUMLOADS-NEXT:    [[TMP3:%.*]] = and i32 [[INC]], 65535
+// LENUMLOADS-NEXT:    [[BF_VALUE:%.*]] = zext i32 [[TMP3]] to i64
+// LENUMLOADS-NEXT:    [[BF_SHL2:%.*]] = shl nuw nsw i64 [[BF_VALUE]], 32
+// LENUMLOADS-NEXT:    [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -281470681743361
+// LENUMLOADS-NEXT:    [[BF_SET:%.*]] = or i64 [[BF_SHL2]], [[BF_CLEAR]]
+// LENUMLOADS-NEXT:    store i64 [[BF_SET]], i64* [[TMP0]], align 4
+// LENUMLOADS-NEXT:    ret void
+//
+// BENUMLOADS-LABEL: @increment_d_st16(
+// BENUMLOADS-NEXT:  entry:
+// BENUMLOADS-NEXT:    [[D:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1
+// BENUMLOADS-NEXT:    [[TMP0:%.*]] = bitcast i48* [[D]] to i64*
+// BENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4
+// BENUMLOADS-NEXT:    [[TMP1:%.*]] = trunc i64 [[BF_LOAD]] to i32
+// BENUMLOADS-NEXT:    [[INC4:%.*]] = add i32 [[TMP1]], 65536
+// BENUMLOADS-NEXT:    [[TMP2:%.*]] = and i32 [[INC4]], -65536
+// BENUMLOADS-NEXT:    [[BF_SHL2:%.*]] = zext i32 [[TMP2]] to i64
+// BENUMLOADS-NEXT:    [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294901761
+// BENUMLOADS-NEXT:    [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[BF_SHL2]]
+// BENUMLOADS-NEXT:    store i64 [[BF_SET]], i64* [[TMP0]], align 4
+// BENUMLOADS-NEXT:    ret void
+//
+// LEWIDTH-LABEL: @increment_d_st16(
+// LEWIDTH-NEXT:  entry:
+// LEWIDTH-NEXT:    [[D:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1
+// LEWIDTH-NEXT:    [[TMP0:%.*]] = bitcast i48* [[D]] to i64*
+// LEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4
+// LEWIDTH-NEXT:    [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32
+// LEWIDTH-NEXT:    [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
+// LEWIDTH-NEXT:    [[INC:%.*]] = add i32 [[TMP2]], 1
+// LEWIDTH-NEXT:    [[TMP3:%.*]] = and i32 [[INC]], 65535
+// LEWIDTH-NEXT:    [[BF_VALUE:%.*]] = zext i32 [[TMP3]] to i64
+// LEWIDTH-NEXT:    [[BF_SHL2:%.*]] = shl nuw nsw i64 [[BF_VALUE]], 32
+// LEWIDTH-NEXT:    [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -281470681743361
+// LEWIDTH-NEXT:    [[BF_SET:%.*]] = or i64 [[BF_SHL2]], [[BF_CLEAR]]
+// LEWIDTH-NEXT:    store i64 [[BF_SET]], i64* [[TMP0]], align 4
+// LEWIDTH-NEXT:    ret void
+//
+// BEWIDTH-LABEL: @increment_d_st16(
+// BEWIDTH-NEXT:  entry:
+// BEWIDTH-NEXT:    [[D:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1
+// BEWIDTH-NEXT:    [[TMP0:%.*]] = bitcast i48* [[D]] to i64*
+// BEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4
+// BEWIDTH-NEXT:    [[TMP1:%.*]] = trunc i64 [[BF_LOAD]] to i32
+// BEWIDTH-NEXT:    [[INC4:%.*]] = add i32 [[TMP1]], 65536
+// BEWIDTH-NEXT:    [[TMP2:%.*]] = and i32 [[INC4]], -65536
+// BEWIDTH-NEXT:    [[BF_SHL2:%.*]] = zext i32 [[TMP2]] to i64
+// BEWIDTH-NEXT:    [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294901761
+// BEWIDTH-NEXT:    [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[BF_SHL2]]
+// BEWIDTH-NEXT:    store i64 [[BF_SET]], i64* [[TMP0]], align 4
+// BEWIDTH-NEXT:    ret void
+//
+// LEWIDTHNUM-LABEL: @increment_d_st16(
+// LEWIDTHNUM-NEXT:  entry:
+// LEWIDTHNUM-NEXT:    [[D:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1
+// LEWIDTHNUM-NEXT:    [[TMP0:%.*]] = bitcast i48* [[D]] to i64*
+// LEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4
+// LEWIDTHNUM-NEXT:    [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32
+// LEWIDTHNUM-NEXT:    [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
+// LEWIDTHNUM-NEXT:    [[INC:%.*]] = add i32 [[TMP2]], 1
+// LEWIDTHNUM-NEXT:    [[TMP3:%.*]] = and i32 [[INC]], 65535
+// LEWIDTHNUM-NEXT:    [[BF_VALUE:%.*]] = zext i32 [[TMP3]] to i64
+// LEWIDTHNUM-NEXT:    [[BF_SHL2:%.*]] = shl nuw nsw i64 [[BF_VALUE]], 32
+// LEWIDTHNUM-NEXT:    [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -281470681743361
+// LEWIDTHNUM-NEXT:    [[BF_SET:%.*]] = or i64 [[BF_SHL2]], [[BF_CLEAR]]
+// LEWIDTHNUM-NEXT:    store i64 [[BF_SET]], i64* [[TMP0]], align 4
+// LEWIDTHNUM-NEXT:    ret void
+//
+// BEWIDTHNUM-LABEL: @increment_d_st16(
+// BEWIDTHNUM-NEXT:  entry:
+// BEWIDTHNUM-NEXT:    [[D:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1
+// BEWIDTHNUM-NEXT:    [[TMP0:%.*]] = bitcast i48* [[D]] to i64*
+// BEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4
+// BEWIDTHNUM-NEXT:    [[TMP1:%.*]] = trunc i64 [[BF_LOAD]] to i32
+// BEWIDTHNUM-NEXT:    [[INC4:%.*]] = add i32 [[TMP1]], 65536
+// BEWIDTHNUM-NEXT:    [[TMP2:%.*]] = and i32 [[INC4]], -65536
+// BEWIDTHNUM-NEXT:    [[BF_SHL2:%.*]] = zext i32 [[TMP2]] to i64
+// BEWIDTHNUM-NEXT:    [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294901761
+// BEWIDTHNUM-NEXT:    [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[BF_SHL2]]
+// BEWIDTHNUM-NEXT:    store i64 [[BF_SET]], i64* [[TMP0]], align 4
+// BEWIDTHNUM-NEXT:    ret void
+//
 void increment_d_st16(struct st16 *s) {
   s->d++;
 }
@@ -1085,6 +3481,68 @@ void increment_d_st16(struct st16 *s) {
 // BE-NEXT:    store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4
 // BE-NEXT:    ret void
 //
+// LENUMLOADS-LABEL: @increment_v_a_st16(
+// LENUMLOADS-NEXT:  entry:
+// LENUMLOADS-NEXT:    [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64*
+// LENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i64, i64* [[TMP0]], align 4
+// LENUMLOADS-NEXT:    [[BF_CAST:%.*]] = trunc i64 [[BF_LOAD]] to i32
+// LENUMLOADS-NEXT:    [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
+// LENUMLOADS-NEXT:    [[TMP1:%.*]] = zext i32 [[INC]] to i64
+// LENUMLOADS-NEXT:    [[BF_LOAD1:%.*]] = load volatile i64, i64* [[TMP0]], align 4
+// LENUMLOADS-NEXT:    [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD1]], -4294967296
+// LENUMLOADS-NEXT:    [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[TMP1]]
+// LENUMLOADS-NEXT:    store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4
+// LENUMLOADS-NEXT:    ret void
+//
+// BENUMLOADS-LABEL: @increment_v_a_st16(
+// BENUMLOADS-NEXT:  entry:
+// BENUMLOADS-NEXT:    [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64*
+// BENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i64, i64* [[TMP0]], align 4
+// BENUMLOADS-NEXT:    [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32
+// BENUMLOADS-NEXT:    [[BF_CAST:%.*]] = trunc i64 [[TMP1]] to i32
+// BENUMLOADS-NEXT:    [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
+// BENUMLOADS-NEXT:    [[TMP2:%.*]] = zext i32 [[INC]] to i64
+// BENUMLOADS-NEXT:    [[BF_LOAD1:%.*]] = load volatile i64, i64* [[TMP0]], align 4
+// BENUMLOADS-NEXT:    [[BF_SHL:%.*]] = shl nuw i64 [[TMP2]], 32
+// BENUMLOADS-NEXT:    [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD1]], 4294967295
+// BENUMLOADS-NEXT:    [[BF_SET:%.*]] = or i64 [[BF_SHL]], [[BF_CLEAR]]
+// BENUMLOADS-NEXT:    store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4
+// BENUMLOADS-NEXT:    ret void
+//
+// LEWIDTH-LABEL: @increment_v_a_st16(
+// LEWIDTH-NEXT:  entry:
+// LEWIDTH-NEXT:    [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32*
+// LEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// LEWIDTH-NEXT:    [[INC:%.*]] = add nsw i32 [[BF_LOAD]], 1
+// LEWIDTH-NEXT:    store volatile i32 [[INC]], i32* [[TMP0]], align 4
+// LEWIDTH-NEXT:    ret void
+//
+// BEWIDTH-LABEL: @increment_v_a_st16(
+// BEWIDTH-NEXT:  entry:
+// BEWIDTH-NEXT:    [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32*
+// BEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// BEWIDTH-NEXT:    [[INC:%.*]] = add nsw i32 [[BF_LOAD]], 1
+// BEWIDTH-NEXT:    store volatile i32 [[INC]], i32* [[TMP0]], align 4
+// BEWIDTH-NEXT:    ret void
+//
+// LEWIDTHNUM-LABEL: @increment_v_a_st16(
+// LEWIDTHNUM-NEXT:  entry:
+// LEWIDTHNUM-NEXT:    [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32*
+// LEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// LEWIDTHNUM-NEXT:    [[INC:%.*]] = add nsw i32 [[BF_LOAD]], 1
+// LEWIDTHNUM-NEXT:    [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// LEWIDTHNUM-NEXT:    store volatile i32 [[INC]], i32* [[TMP0]], align 4
+// LEWIDTHNUM-NEXT:    ret void
+//
+// BEWIDTHNUM-LABEL: @increment_v_a_st16(
+// BEWIDTHNUM-NEXT:  entry:
+// BEWIDTHNUM-NEXT:    [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32*
+// BEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// BEWIDTHNUM-NEXT:    [[INC:%.*]] = add nsw i32 [[BF_LOAD]], 1
+// BEWIDTHNUM-NEXT:    [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// BEWIDTHNUM-NEXT:    store volatile i32 [[INC]], i32* [[TMP0]], align 4
+// BEWIDTHNUM-NEXT:    ret void
+//
 void increment_v_a_st16(volatile struct st16 *s) {
   s->a++;
 }
@@ -1119,6 +3577,88 @@ void increment_v_a_st16(volatile struct st16 *s) {
 // BE-NEXT:    store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4
 // BE-NEXT:    ret void
 //
+// LENUMLOADS-LABEL: @increment_v_b_st16(
+// LENUMLOADS-NEXT:  entry:
+// LENUMLOADS-NEXT:    [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64*
+// LENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i64, i64* [[TMP0]], align 4
+// LENUMLOADS-NEXT:    [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32
+// LENUMLOADS-NEXT:    [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
+// LENUMLOADS-NEXT:    [[INC:%.*]] = add i32 [[TMP2]], 1
+// LENUMLOADS-NEXT:    [[BF_LOAD1:%.*]] = load volatile i64, i64* [[TMP0]], align 4
+// LENUMLOADS-NEXT:    [[TMP3:%.*]] = and i32 [[INC]], 65535
+// LENUMLOADS-NEXT:    [[BF_VALUE:%.*]] = zext i32 [[TMP3]] to i64
+// LENUMLOADS-NEXT:    [[BF_SHL2:%.*]] = shl nuw nsw i64 [[BF_VALUE]], 32
+// LENUMLOADS-NEXT:    [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD1]], -281470681743361
+// LENUMLOADS-NEXT:    [[BF_SET:%.*]] = or i64 [[BF_SHL2]], [[BF_CLEAR]]
+// LENUMLOADS-NEXT:    store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4
+// LENUMLOADS-NEXT:    ret void
+//
+// BENUMLOADS-LABEL: @increment_v_b_st16(
+// BENUMLOADS-NEXT:  entry:
+// BENUMLOADS-NEXT:    [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64*
+// BENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i64, i64* [[TMP0]], align 4
+// BENUMLOADS-NEXT:    [[BF_LOAD1:%.*]] = load volatile i64, i64* [[TMP0]], align 4
+// BENUMLOADS-NEXT:    [[TMP1:%.*]] = trunc i64 [[BF_LOAD]] to i32
+// BENUMLOADS-NEXT:    [[INC4:%.*]] = add i32 [[TMP1]], 65536
+// BENUMLOADS-NEXT:    [[TMP2:%.*]] = and i32 [[INC4]], -65536
+// BENUMLOADS-NEXT:    [[BF_SHL2:%.*]] = zext i32 [[TMP2]] to i64
+// BENUMLOADS-NEXT:    [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD1]], -4294901761
+// BENUMLOADS-NEXT:    [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[BF_SHL2]]
+// BENUMLOADS-NEXT:    store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4
+// BENUMLOADS-NEXT:    ret void
+//
+// LEWIDTH-LABEL: @increment_v_b_st16(
+// LEWIDTH-NEXT:  entry:
+// LEWIDTH-NEXT:    [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32*
+// LEWIDTH-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i32 1
+// LEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4
+// LEWIDTH-NEXT:    [[INC:%.*]] = add i32 [[BF_LOAD]], 1
+// LEWIDTH-NEXT:    [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP1]], align 4
+// LEWIDTH-NEXT:    [[BF_VALUE:%.*]] = and i32 [[INC]], 65535
+// LEWIDTH-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -65536
+// LEWIDTH-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]]
+// LEWIDTH-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP1]], align 4
+// LEWIDTH-NEXT:    ret void
+//
+// BEWIDTH-LABEL: @increment_v_b_st16(
+// BEWIDTH-NEXT:  entry:
+// BEWIDTH-NEXT:    [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32*
+// BEWIDTH-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i32 1
+// BEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4
+// BEWIDTH-NEXT:    [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP1]], align 4
+// BEWIDTH-NEXT:    [[TMP2:%.*]] = add i32 [[BF_LOAD]], 65536
+// BEWIDTH-NEXT:    [[BF_SHL:%.*]] = and i32 [[TMP2]], -65536
+// BEWIDTH-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 65535
+// BEWIDTH-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]]
+// BEWIDTH-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP1]], align 4
+// BEWIDTH-NEXT:    ret void
+//
+// LEWIDTHNUM-LABEL: @increment_v_b_st16(
+// LEWIDTHNUM-NEXT:  entry:
+// LEWIDTHNUM-NEXT:    [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32*
+// LEWIDTHNUM-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i32 1
+// LEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4
+// LEWIDTHNUM-NEXT:    [[INC:%.*]] = add i32 [[BF_LOAD]], 1
+// LEWIDTHNUM-NEXT:    [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP1]], align 4
+// LEWIDTHNUM-NEXT:    [[BF_VALUE:%.*]] = and i32 [[INC]], 65535
+// LEWIDTHNUM-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -65536
+// LEWIDTHNUM-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]]
+// LEWIDTHNUM-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP1]], align 4
+// LEWIDTHNUM-NEXT:    ret void
+//
+// BEWIDTHNUM-LABEL: @increment_v_b_st16(
+// BEWIDTHNUM-NEXT:  entry:
+// BEWIDTHNUM-NEXT:    [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32*
+// BEWIDTHNUM-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i32 1
+// BEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4
+// BEWIDTHNUM-NEXT:    [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP1]], align 4
+// BEWIDTHNUM-NEXT:    [[TMP2:%.*]] = add i32 [[BF_LOAD]], 65536
+// BEWIDTHNUM-NEXT:    [[BF_SHL:%.*]] = and i32 [[TMP2]], -65536
+// BEWIDTHNUM-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 65535
+// BEWIDTHNUM-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]]
+// BEWIDTHNUM-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP1]], align 4
+// BEWIDTHNUM-NEXT:    ret void
+//
 void increment_v_b_st16(volatile struct st16 *s) {
   s->b++;
 }
@@ -1153,6 +3693,74 @@ void increment_v_b_st16(volatile struct st16 *s) {
 // BE-NEXT:    store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4
 // BE-NEXT:    ret void
 //
+// LENUMLOADS-LABEL: @increment_v_c_st16(
+// LENUMLOADS-NEXT:  entry:
+// LENUMLOADS-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1
+// LENUMLOADS-NEXT:    [[TMP0:%.*]] = bitcast i48* [[C]] to i64*
+// LENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i64, i64* [[TMP0]], align 4
+// LENUMLOADS-NEXT:    [[BF_CAST:%.*]] = trunc i64 [[BF_LOAD]] to i32
+// LENUMLOADS-NEXT:    [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
+// LENUMLOADS-NEXT:    [[TMP1:%.*]] = zext i32 [[INC]] to i64
+// LENUMLOADS-NEXT:    [[BF_LOAD1:%.*]] = load volatile i64, i64* [[TMP0]], align 4
+// LENUMLOADS-NEXT:    [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD1]], -4294967296
+// LENUMLOADS-NEXT:    [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[TMP1]]
+// LENUMLOADS-NEXT:    store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4
+// LENUMLOADS-NEXT:    ret void
+//
+// BENUMLOADS-LABEL: @increment_v_c_st16(
+// BENUMLOADS-NEXT:  entry:
+// BENUMLOADS-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1
+// BENUMLOADS-NEXT:    [[TMP0:%.*]] = bitcast i48* [[C]] to i64*
+// BENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i64, i64* [[TMP0]], align 4
+// BENUMLOADS-NEXT:    [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32
+// BENUMLOADS-NEXT:    [[BF_CAST:%.*]] = trunc i64 [[TMP1]] to i32
+// BENUMLOADS-NEXT:    [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
+// BENUMLOADS-NEXT:    [[TMP2:%.*]] = zext i32 [[INC]] to i64
+// BENUMLOADS-NEXT:    [[BF_LOAD1:%.*]] = load volatile i64, i64* [[TMP0]], align 4
+// BENUMLOADS-NEXT:    [[BF_SHL:%.*]] = shl nuw i64 [[TMP2]], 32
+// BENUMLOADS-NEXT:    [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD1]], 4294967295
+// BENUMLOADS-NEXT:    [[BF_SET:%.*]] = or i64 [[BF_SHL]], [[BF_CLEAR]]
+// BENUMLOADS-NEXT:    store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4
+// BENUMLOADS-NEXT:    ret void
+//
+// LEWIDTH-LABEL: @increment_v_c_st16(
+// LEWIDTH-NEXT:  entry:
+// LEWIDTH-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1
+// LEWIDTH-NEXT:    [[TMP1:%.*]] = bitcast i48* [[TMP0]] to i32*
+// LEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4
+// LEWIDTH-NEXT:    [[INC:%.*]] = add nsw i32 [[BF_LOAD]], 1
+// LEWIDTH-NEXT:    store volatile i32 [[INC]], i32* [[TMP1]], align 4
+// LEWIDTH-NEXT:    ret void
+//
+// BEWIDTH-LABEL: @increment_v_c_st16(
+// BEWIDTH-NEXT:  entry:
+// BEWIDTH-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1
+// BEWIDTH-NEXT:    [[TMP1:%.*]] = bitcast i48* [[TMP0]] to i32*
+// BEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4
+// BEWIDTH-NEXT:    [[INC:%.*]] = add nsw i32 [[BF_LOAD]], 1
+// BEWIDTH-NEXT:    store volatile i32 [[INC]], i32* [[TMP1]], align 4
+// BEWIDTH-NEXT:    ret void
+//
+// LEWIDTHNUM-LABEL: @increment_v_c_st16(
+// LEWIDTHNUM-NEXT:  entry:
+// LEWIDTHNUM-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1
+// LEWIDTHNUM-NEXT:    [[TMP1:%.*]] = bitcast i48* [[TMP0]] to i32*
+// LEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4
+// LEWIDTHNUM-NEXT:    [[INC:%.*]] = add nsw i32 [[BF_LOAD]], 1
+// LEWIDTHNUM-NEXT:    [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP1]], align 4
+// LEWIDTHNUM-NEXT:    store volatile i32 [[INC]], i32* [[TMP1]], align 4
+// LEWIDTHNUM-NEXT:    ret void
+//
+// BEWIDTHNUM-LABEL: @increment_v_c_st16(
+// BEWIDTHNUM-NEXT:  entry:
+// BEWIDTHNUM-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1
+// BEWIDTHNUM-NEXT:    [[TMP1:%.*]] = bitcast i48* [[TMP0]] to i32*
+// BEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4
+// BEWIDTHNUM-NEXT:    [[INC:%.*]] = add nsw i32 [[BF_LOAD]], 1
+// BEWIDTHNUM-NEXT:    [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP1]], align 4
+// BEWIDTHNUM-NEXT:    store volatile i32 [[INC]], i32* [[TMP1]], align 4
+// BEWIDTHNUM-NEXT:    ret void
+//
 void increment_v_c_st16(volatile struct st16 *s) {
   s->c++;
 }
@@ -1189,6 +3797,90 @@ void increment_v_c_st16(volatile struct st16 *s) {
 // BE-NEXT:    store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4
 // BE-NEXT:    ret void
 //
+// LENUMLOADS-LABEL: @increment_v_d_st16(
+// LENUMLOADS-NEXT:  entry:
+// LENUMLOADS-NEXT:    [[D:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1
+// LENUMLOADS-NEXT:    [[TMP0:%.*]] = bitcast i48* [[D]] to i64*
+// LENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i64, i64* [[TMP0]], align 4
+// LENUMLOADS-NEXT:    [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32
+// LENUMLOADS-NEXT:    [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
+// LENUMLOADS-NEXT:    [[INC:%.*]] = add i32 [[TMP2]], 1
+// LENUMLOADS-NEXT:    [[BF_LOAD1:%.*]] = load volatile i64, i64* [[TMP0]], align 4
+// LENUMLOADS-NEXT:    [[TMP3:%.*]] = and i32 [[INC]], 65535
+// LENUMLOADS-NEXT:    [[BF_VALUE:%.*]] = zext i32 [[TMP3]] to i64
+// LENUMLOADS-NEXT:    [[BF_SHL2:%.*]] = shl nuw nsw i64 [[BF_VALUE]], 32
+// LENUMLOADS-NEXT:    [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD1]], -281470681743361
+// LENUMLOADS-NEXT:    [[BF_SET:%.*]] = or i64 [[BF_SHL2]], [[BF_CLEAR]]
+// LENUMLOADS-NEXT:    store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4
+// LENUMLOADS-NEXT:    ret void
+//
+// BENUMLOADS-LABEL: @increment_v_d_st16(
+// BENUMLOADS-NEXT:  entry:
+// BENUMLOADS-NEXT:    [[D:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1
+// BENUMLOADS-NEXT:    [[TMP0:%.*]] = bitcast i48* [[D]] to i64*
+// BENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i64, i64* [[TMP0]], align 4
+// BENUMLOADS-NEXT:    [[BF_LOAD1:%.*]] = load volatile i64, i64* [[TMP0]], align 4
+// BENUMLOADS-NEXT:    [[TMP1:%.*]] = trunc i64 [[BF_LOAD]] to i32
+// BENUMLOADS-NEXT:    [[INC4:%.*]] = add i32 [[TMP1]], 65536
+// BENUMLOADS-NEXT:    [[TMP2:%.*]] = and i32 [[INC4]], -65536
+// BENUMLOADS-NEXT:    [[BF_SHL2:%.*]] = zext i32 [[TMP2]] to i64
+// BENUMLOADS-NEXT:    [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD1]], -4294901761
+// BENUMLOADS-NEXT:    [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[BF_SHL2]]
+// BENUMLOADS-NEXT:    store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4
+// BENUMLOADS-NEXT:    ret void
+//
+// LEWIDTH-LABEL: @increment_v_d_st16(
+// LEWIDTH-NEXT:  entry:
+// LEWIDTH-NEXT:    [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32*
+// LEWIDTH-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i32 3
+// LEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4
+// LEWIDTH-NEXT:    [[INC:%.*]] = add i32 [[BF_LOAD]], 1
+// LEWIDTH-NEXT:    [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP1]], align 4
+// LEWIDTH-NEXT:    [[BF_VALUE:%.*]] = and i32 [[INC]], 65535
+// LEWIDTH-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -65536
+// LEWIDTH-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]]
+// LEWIDTH-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP1]], align 4
+// LEWIDTH-NEXT:    ret void
+//
+// BEWIDTH-LABEL: @increment_v_d_st16(
+// BEWIDTH-NEXT:  entry:
+// BEWIDTH-NEXT:    [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32*
+// BEWIDTH-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i32 3
+// BEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4
+// BEWIDTH-NEXT:    [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP1]], align 4
+// BEWIDTH-NEXT:    [[TMP2:%.*]] = add i32 [[BF_LOAD]], 65536
+// BEWIDTH-NEXT:    [[BF_SHL:%.*]] = and i32 [[TMP2]], -65536
+// BEWIDTH-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 65535
+// BEWIDTH-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]]
+// BEWIDTH-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP1]], align 4
+// BEWIDTH-NEXT:    ret void
+//
+// LEWIDTHNUM-LABEL: @increment_v_d_st16(
+// LEWIDTHNUM-NEXT:  entry:
+// LEWIDTHNUM-NEXT:    [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32*
+// LEWIDTHNUM-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i32 3
+// LEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4
+// LEWIDTHNUM-NEXT:    [[INC:%.*]] = add i32 [[BF_LOAD]], 1
+// LEWIDTHNUM-NEXT:    [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP1]], align 4
+// LEWIDTHNUM-NEXT:    [[BF_VALUE:%.*]] = and i32 [[INC]], 65535
+// LEWIDTHNUM-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -65536
+// LEWIDTHNUM-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]]
+// LEWIDTHNUM-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP1]], align 4
+// LEWIDTHNUM-NEXT:    ret void
+//
+// BEWIDTHNUM-LABEL: @increment_v_d_st16(
+// BEWIDTHNUM-NEXT:  entry:
+// BEWIDTHNUM-NEXT:    [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32*
+// BEWIDTHNUM-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i32 3
+// BEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4
+// BEWIDTHNUM-NEXT:    [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP1]], align 4
+// BEWIDTHNUM-NEXT:    [[TMP2:%.*]] = add i32 [[BF_LOAD]], 65536
+// BEWIDTHNUM-NEXT:    [[BF_SHL:%.*]] = and i32 [[TMP2]], -65536
+// BEWIDTHNUM-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 65535
+// BEWIDTHNUM-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]]
+// BEWIDTHNUM-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP1]], align 4
+// BEWIDTHNUM-NEXT:    ret void
+//
 void increment_v_d_st16(volatile struct st16 *s) {
   s->d++;
 }
@@ -1227,6 +3919,90 @@ char c : 8;
 // BE-NEXT:    store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1
 // BE-NEXT:    ret void
 //
+// LENUMLOADS-LABEL: @increment_v_b_st17(
+// LENUMLOADS-NEXT:  entry:
+// LENUMLOADS-NEXT:    [[TMP0:%.*]] = bitcast %struct.st17* [[S:%.*]] to i40*
+// LENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1
+// LENUMLOADS-NEXT:    [[BF_CAST:%.*]] = trunc i40 [[BF_LOAD]] to i32
+// LENUMLOADS-NEXT:    [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
+// LENUMLOADS-NEXT:    [[TMP1:%.*]] = zext i32 [[INC]] to i40
+// LENUMLOADS-NEXT:    [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1
+// LENUMLOADS-NEXT:    [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], -4294967296
+// LENUMLOADS-NEXT:    [[BF_SET:%.*]] = or i40 [[BF_CLEAR]], [[TMP1]]
+// LENUMLOADS-NEXT:    store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1
+// LENUMLOADS-NEXT:    ret void
+//
+// BENUMLOADS-LABEL: @increment_v_b_st17(
+// BENUMLOADS-NEXT:  entry:
+// BENUMLOADS-NEXT:    [[TMP0:%.*]] = bitcast %struct.st17* [[S:%.*]] to i40*
+// BENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1
+// BENUMLOADS-NEXT:    [[TMP1:%.*]] = lshr i40 [[BF_LOAD]], 8
+// BENUMLOADS-NEXT:    [[BF_CAST:%.*]] = trunc i40 [[TMP1]] to i32
+// BENUMLOADS-NEXT:    [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
+// BENUMLOADS-NEXT:    [[TMP2:%.*]] = zext i32 [[INC]] to i40
+// BENUMLOADS-NEXT:    [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1
+// BENUMLOADS-NEXT:    [[BF_SHL:%.*]] = shl nuw i40 [[TMP2]], 8
+// BENUMLOADS-NEXT:    [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], 255
+// BENUMLOADS-NEXT:    [[BF_SET:%.*]] = or i40 [[BF_SHL]], [[BF_CLEAR]]
+// BENUMLOADS-NEXT:    store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1
+// BENUMLOADS-NEXT:    ret void
+//
+// LEWIDTH-LABEL: @increment_v_b_st17(
+// LEWIDTH-NEXT:  entry:
+// LEWIDTH-NEXT:    [[TMP0:%.*]] = bitcast %struct.st17* [[S:%.*]] to i40*
+// LEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1
+// LEWIDTH-NEXT:    [[BF_CAST:%.*]] = trunc i40 [[BF_LOAD]] to i32
+// LEWIDTH-NEXT:    [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
+// LEWIDTH-NEXT:    [[TMP1:%.*]] = zext i32 [[INC]] to i40
+// LEWIDTH-NEXT:    [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1
+// LEWIDTH-NEXT:    [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], -4294967296
+// LEWIDTH-NEXT:    [[BF_SET:%.*]] = or i40 [[BF_CLEAR]], [[TMP1]]
+// LEWIDTH-NEXT:    store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1
+// LEWIDTH-NEXT:    ret void
+//
+// BEWIDTH-LABEL: @increment_v_b_st17(
+// BEWIDTH-NEXT:  entry:
+// BEWIDTH-NEXT:    [[TMP0:%.*]] = bitcast %struct.st17* [[S:%.*]] to i40*
+// BEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1
+// BEWIDTH-NEXT:    [[TMP1:%.*]] = lshr i40 [[BF_LOAD]], 8
+// BEWIDTH-NEXT:    [[BF_CAST:%.*]] = trunc i40 [[TMP1]] to i32
+// BEWIDTH-NEXT:    [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
+// BEWIDTH-NEXT:    [[TMP2:%.*]] = zext i32 [[INC]] to i40
+// BEWIDTH-NEXT:    [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1
+// BEWIDTH-NEXT:    [[BF_SHL:%.*]] = shl nuw i40 [[TMP2]], 8
+// BEWIDTH-NEXT:    [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], 255
+// BEWIDTH-NEXT:    [[BF_SET:%.*]] = or i40 [[BF_SHL]], [[BF_CLEAR]]
+// BEWIDTH-NEXT:    store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1
+// BEWIDTH-NEXT:    ret void
+//
+// LEWIDTHNUM-LABEL: @increment_v_b_st17(
+// LEWIDTHNUM-NEXT:  entry:
+// LEWIDTHNUM-NEXT:    [[TMP0:%.*]] = bitcast %struct.st17* [[S:%.*]] to i40*
+// LEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1
+// LEWIDTHNUM-NEXT:    [[BF_CAST:%.*]] = trunc i40 [[BF_LOAD]] to i32
+// LEWIDTHNUM-NEXT:    [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
+// LEWIDTHNUM-NEXT:    [[TMP1:%.*]] = zext i32 [[INC]] to i40
+// LEWIDTHNUM-NEXT:    [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1
+// LEWIDTHNUM-NEXT:    [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], -4294967296
+// LEWIDTHNUM-NEXT:    [[BF_SET:%.*]] = or i40 [[BF_CLEAR]], [[TMP1]]
+// LEWIDTHNUM-NEXT:    store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1
+// LEWIDTHNUM-NEXT:    ret void
+//
+// BEWIDTHNUM-LABEL: @increment_v_b_st17(
+// BEWIDTHNUM-NEXT:  entry:
+// BEWIDTHNUM-NEXT:    [[TMP0:%.*]] = bitcast %struct.st17* [[S:%.*]] to i40*
+// BEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1
+// BEWIDTHNUM-NEXT:    [[TMP1:%.*]] = lshr i40 [[BF_LOAD]], 8
+// BEWIDTHNUM-NEXT:    [[BF_CAST:%.*]] = trunc i40 [[TMP1]] to i32
+// BEWIDTHNUM-NEXT:    [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1
+// BEWIDTHNUM-NEXT:    [[TMP2:%.*]] = zext i32 [[INC]] to i40
+// BEWIDTHNUM-NEXT:    [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1
+// BEWIDTHNUM-NEXT:    [[BF_SHL:%.*]] = shl nuw i40 [[TMP2]], 8
+// BEWIDTHNUM-NEXT:    [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], 255
+// BEWIDTHNUM-NEXT:    [[BF_SET:%.*]] = or i40 [[BF_SHL]], [[BF_CLEAR]]
+// BEWIDTHNUM-NEXT:    store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1
+// BEWIDTHNUM-NEXT:    ret void
+//
 void increment_v_b_st17(volatile struct st17 *s) {
   s->b++;
 }
@@ -1259,6 +4035,458 @@ void increment_v_b_st17(volatile struct st17 *s) {
 // BE-NEXT:    store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1
 // BE-NEXT:    ret void
 //
+// LENUMLOADS-LABEL: @increment_v_c_st17(
+// LENUMLOADS-NEXT:  entry:
+// LENUMLOADS-NEXT:    [[TMP0:%.*]] = bitcast %struct.st17* [[S:%.*]] to i40*
+// LENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1
+// LENUMLOADS-NEXT:    [[TMP1:%.*]] = lshr i40 [[BF_LOAD]], 32
+// LENUMLOADS-NEXT:    [[BF_CAST:%.*]] = trunc i40 [[TMP1]] to i8
+// LENUMLOADS-NEXT:    [[INC:%.*]] = add i8 [[BF_CAST]], 1
+// LENUMLOADS-NEXT:    [[TMP2:%.*]] = zext i8 [[INC]] to i40
+// LENUMLOADS-NEXT:    [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1
+// LENUMLOADS-NEXT:    [[BF_SHL:%.*]] = shl nuw i40 [[TMP2]], 32
+// LENUMLOADS-NEXT:    [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], 4294967295
+// LENUMLOADS-NEXT:    [[BF_SET:%.*]] = or i40 [[BF_SHL]], [[BF_CLEAR]]
+// LENUMLOADS-NEXT:    store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1
+// LENUMLOADS-NEXT:    ret void
+//
+// BENUMLOADS-LABEL: @increment_v_c_st17(
+// BENUMLOADS-NEXT:  entry:
+// BENUMLOADS-NEXT:    [[TMP0:%.*]] = bitcast %struct.st17* [[S:%.*]] to i40*
+// BENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1
+// BENUMLOADS-NEXT:    [[BF_CAST:%.*]] = trunc i40 [[BF_LOAD]] to i8
+// BENUMLOADS-NEXT:    [[INC:%.*]] = add i8 [[BF_CAST]], 1
+// BENUMLOADS-NEXT:    [[TMP1:%.*]] = zext i8 [[INC]] to i40
+// BENUMLOADS-NEXT:    [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1
+// BENUMLOADS-NEXT:    [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], -256
+// BENUMLOADS-NEXT:    [[BF_SET:%.*]] = or i40 [[BF_CLEAR]], [[TMP1]]
+// BENUMLOADS-NEXT:    store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1
+// BENUMLOADS-NEXT:    ret void
+//
+// LEWIDTH-LABEL: @increment_v_c_st17(
+// LEWIDTH-NEXT:  entry:
+// LEWIDTH-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST17:%.*]], %struct.st17* [[S:%.*]], i32 0, i32 0, i32 4
+// LEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1
+// LEWIDTH-NEXT:    [[INC:%.*]] = add i8 [[BF_LOAD]], 1
+// LEWIDTH-NEXT:    store volatile i8 [[INC]], i8* [[TMP0]], align 1
+// LEWIDTH-NEXT:    ret void
+//
+// BEWIDTH-LABEL: @increment_v_c_st17(
+// BEWIDTH-NEXT:  entry:
+// BEWIDTH-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST17:%.*]], %struct.st17* [[S:%.*]], i32 0, i32 0, i32 4
+// BEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1
+// BEWIDTH-NEXT:    [[INC:%.*]] = add i8 [[BF_LOAD]], 1
+// BEWIDTH-NEXT:    store volatile i8 [[INC]], i8* [[TMP0]], align 1
+// BEWIDTH-NEXT:    ret void
+//
+// LEWIDTHNUM-LABEL: @increment_v_c_st17(
+// LEWIDTHNUM-NEXT:  entry:
+// LEWIDTHNUM-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST17:%.*]], %struct.st17* [[S:%.*]], i32 0, i32 0, i32 4
+// LEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1
+// LEWIDTHNUM-NEXT:    [[INC:%.*]] = add i8 [[BF_LOAD]], 1
+// LEWIDTHNUM-NEXT:    [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1
+// LEWIDTHNUM-NEXT:    store volatile i8 [[INC]], i8* [[TMP0]], align 1
+// LEWIDTHNUM-NEXT:    ret void
+//
+// BEWIDTHNUM-LABEL: @increment_v_c_st17(
+// BEWIDTHNUM-NEXT:  entry:
+// BEWIDTHNUM-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST17:%.*]], %struct.st17* [[S:%.*]], i32 0, i32 0, i32 4
+// BEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1
+// BEWIDTHNUM-NEXT:    [[INC:%.*]] = add i8 [[BF_LOAD]], 1
+// BEWIDTHNUM-NEXT:    [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1
+// BEWIDTHNUM-NEXT:    store volatile i8 [[INC]], i8* [[TMP0]], align 1
+// BEWIDTHNUM-NEXT:    ret void
+//
 void increment_v_c_st17(volatile struct st17 *s) {
   s->c++;
 }
+
+// A zero bitfield should block, as the C11 specification
+// requires a and b to be different memory positions
+struct zero_bitfield {
+  int a : 8;
+  char : 0;
+  int b : 8;
+};
+
+// LE-LABEL: @increment_a_zero_bitfield(
+// LE-NEXT:  entry:
+// LE-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 0
+// LE-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4
+// LE-NEXT:    [[INC:%.*]] = add i8 [[BF_LOAD]], 1
+// LE-NEXT:    store volatile i8 [[INC]], i8* [[TMP0]], align 4
+// LE-NEXT:    ret void
+//
+// BE-LABEL: @increment_a_zero_bitfield(
+// BE-NEXT:  entry:
+// BE-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 0
+// BE-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4
+// BE-NEXT:    [[INC:%.*]] = add i8 [[BF_LOAD]], 1
+// BE-NEXT:    store volatile i8 [[INC]], i8* [[TMP0]], align 4
+// BE-NEXT:    ret void
+//
+// LENUMLOADS-LABEL: @increment_a_zero_bitfield(
+// LENUMLOADS-NEXT:  entry:
+// LENUMLOADS-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 0
+// LENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4
+// LENUMLOADS-NEXT:    [[INC:%.*]] = add i8 [[BF_LOAD]], 1
+// LENUMLOADS-NEXT:    [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 4
+// LENUMLOADS-NEXT:    store volatile i8 [[INC]], i8* [[TMP0]], align 4
+// LENUMLOADS-NEXT:    ret void
+//
+// BENUMLOADS-LABEL: @increment_a_zero_bitfield(
+// BENUMLOADS-NEXT:  entry:
+// BENUMLOADS-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 0
+// BENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4
+// BENUMLOADS-NEXT:    [[INC:%.*]] = add i8 [[BF_LOAD]], 1
+// BENUMLOADS-NEXT:    [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 4
+// BENUMLOADS-NEXT:    store volatile i8 [[INC]], i8* [[TMP0]], align 4
+// BENUMLOADS-NEXT:    ret void
+//
+// LEWIDTH-LABEL: @increment_a_zero_bitfield(
+// LEWIDTH-NEXT:  entry:
+// LEWIDTH-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 0
+// LEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4
+// LEWIDTH-NEXT:    [[INC:%.*]] = add i8 [[BF_LOAD]], 1
+// LEWIDTH-NEXT:    store volatile i8 [[INC]], i8* [[TMP0]], align 4
+// LEWIDTH-NEXT:    ret void
+//
+// BEWIDTH-LABEL: @increment_a_zero_bitfield(
+// BEWIDTH-NEXT:  entry:
+// BEWIDTH-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 0
+// BEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4
+// BEWIDTH-NEXT:    [[INC:%.*]] = add i8 [[BF_LOAD]], 1
+// BEWIDTH-NEXT:    store volatile i8 [[INC]], i8* [[TMP0]], align 4
+// BEWIDTH-NEXT:    ret void
+//
+// LEWIDTHNUM-LABEL: @increment_a_zero_bitfield(
+// LEWIDTHNUM-NEXT:  entry:
+// LEWIDTHNUM-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 0
+// LEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4
+// LEWIDTHNUM-NEXT:    [[INC:%.*]] = add i8 [[BF_LOAD]], 1
+// LEWIDTHNUM-NEXT:    [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 4
+// LEWIDTHNUM-NEXT:    store volatile i8 [[INC]], i8* [[TMP0]], align 4
+// LEWIDTHNUM-NEXT:    ret void
+//
+// BEWIDTHNUM-LABEL: @increment_a_zero_bitfield(
+// BEWIDTHNUM-NEXT:  entry:
+// BEWIDTHNUM-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 0
+// BEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4
+// BEWIDTHNUM-NEXT:    [[INC:%.*]] = add i8 [[BF_LOAD]], 1
+// BEWIDTHNUM-NEXT:    [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 4
+// BEWIDTHNUM-NEXT:    store volatile i8 [[INC]], i8* [[TMP0]], align 4
+// BEWIDTHNUM-NEXT:    ret void
+//
+void increment_a_zero_bitfield(volatile struct zero_bitfield *s) {
+  s->a++;
+}
+
+// LE-LABEL: @increment_b_zero_bitfield(
+// LE-NEXT:  entry:
+// LE-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 1
+// LE-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1
+// LE-NEXT:    [[INC:%.*]] = add i8 [[BF_LOAD]], 1
+// LE-NEXT:    store volatile i8 [[INC]], i8* [[B]], align 1
+// LE-NEXT:    ret void
+//
+// BE-LABEL: @increment_b_zero_bitfield(
+// BE-NEXT:  entry:
+// BE-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 1
+// BE-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1
+// BE-NEXT:    [[INC:%.*]] = add i8 [[BF_LOAD]], 1
+// BE-NEXT:    store volatile i8 [[INC]], i8* [[B]], align 1
+// BE-NEXT:    ret void
+//
+// LENUMLOADS-LABEL: @increment_b_zero_bitfield(
+// LENUMLOADS-NEXT:  entry:
+// LENUMLOADS-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 1
+// LENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1
+// LENUMLOADS-NEXT:    [[INC:%.*]] = add i8 [[BF_LOAD]], 1
+// LENUMLOADS-NEXT:    [[BF_LOAD1:%.*]] = load volatile i8, i8* [[B]], align 1
+// LENUMLOADS-NEXT:    store volatile i8 [[INC]], i8* [[B]], align 1
+// LENUMLOADS-NEXT:    ret void
+//
+// BENUMLOADS-LABEL: @increment_b_zero_bitfield(
+// BENUMLOADS-NEXT:  entry:
+// BENUMLOADS-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 1
+// BENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1
+// BENUMLOADS-NEXT:    [[INC:%.*]] = add i8 [[BF_LOAD]], 1
+// BENUMLOADS-NEXT:    [[BF_LOAD1:%.*]] = load volatile i8, i8* [[B]], align 1
+// BENUMLOADS-NEXT:    store volatile i8 [[INC]], i8* [[B]], align 1
+// BENUMLOADS-NEXT:    ret void
+//
+// LEWIDTH-LABEL: @increment_b_zero_bitfield(
+// LEWIDTH-NEXT:  entry:
+// LEWIDTH-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 1
+// LEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1
+// LEWIDTH-NEXT:    [[INC:%.*]] = add i8 [[BF_LOAD]], 1
+// LEWIDTH-NEXT:    store volatile i8 [[INC]], i8* [[B]], align 1
+// LEWIDTH-NEXT:    ret void
+//
+// BEWIDTH-LABEL: @increment_b_zero_bitfield(
+// BEWIDTH-NEXT:  entry:
+// BEWIDTH-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 1
+// BEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1
+// BEWIDTH-NEXT:    [[INC:%.*]] = add i8 [[BF_LOAD]], 1
+// BEWIDTH-NEXT:    store volatile i8 [[INC]], i8* [[B]], align 1
+// BEWIDTH-NEXT:    ret void
+//
+// LEWIDTHNUM-LABEL: @increment_b_zero_bitfield(
+// LEWIDTHNUM-NEXT:  entry:
+// LEWIDTHNUM-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 1
+// LEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1
+// LEWIDTHNUM-NEXT:    [[INC:%.*]] = add i8 [[BF_LOAD]], 1
+// LEWIDTHNUM-NEXT:    [[BF_LOAD1:%.*]] = load volatile i8, i8* [[B]], align 1
+// LEWIDTHNUM-NEXT:    store volatile i8 [[INC]], i8* [[B]], align 1
+// LEWIDTHNUM-NEXT:    ret void
+//
+// BEWIDTHNUM-LABEL: @increment_b_zero_bitfield(
+// BEWIDTHNUM-NEXT:  entry:
+// BEWIDTHNUM-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 1
+// BEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1
+// BEWIDTHNUM-NEXT:    [[INC:%.*]] = add i8 [[BF_LOAD]], 1
+// BEWIDTHNUM-NEXT:    [[BF_LOAD1:%.*]] = load volatile i8, i8* [[B]], align 1
+// BEWIDTHNUM-NEXT:    store volatile i8 [[INC]], i8* [[B]], align 1
+// BEWIDTHNUM-NEXT:    ret void
+//
+void increment_b_zero_bitfield(volatile struct zero_bitfield *s) {
+  s->b++;
+}
+
+// The zero bitfield here does not affect
+struct zero_bitfield_ok {
+  short a : 8;
+  char a1 : 8;
+  long : 0;
+  int b : 24;
+};
+
+// LE-LABEL: @increment_a_zero_bitfield_ok(
+// LE-NEXT:  entry:
+// LE-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 0
+// LE-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
+// LE-NEXT:    [[CONV:%.*]] = trunc i16 [[BF_LOAD]] to i8
+// LE-NEXT:    [[BF_LOAD1:%.*]] = load volatile i16, i16* [[TMP0]], align 4
+// LE-NEXT:    [[TMP1:%.*]] = lshr i16 [[BF_LOAD1]], 8
+// LE-NEXT:    [[BF_CAST:%.*]] = trunc i16 [[TMP1]] to i8
+// LE-NEXT:    [[ADD:%.*]] = add i8 [[BF_CAST]], [[CONV]]
+// LE-NEXT:    [[TMP2:%.*]] = zext i8 [[ADD]] to i16
+// LE-NEXT:    [[BF_LOAD5:%.*]] = load volatile i16, i16* [[TMP0]], align 4
+// LE-NEXT:    [[BF_SHL6:%.*]] = shl nuw i16 [[TMP2]], 8
+// LE-NEXT:    [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD5]], 255
+// LE-NEXT:    [[BF_SET:%.*]] = or i16 [[BF_SHL6]], [[BF_CLEAR]]
+// LE-NEXT:    store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4
+// LE-NEXT:    ret void
+//
+// BE-LABEL: @increment_a_zero_bitfield_ok(
+// BE-NEXT:  entry:
+// BE-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 0
+// BE-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
+// BE-NEXT:    [[TMP1:%.*]] = lshr i16 [[BF_LOAD]], 8
+// BE-NEXT:    [[CONV:%.*]] = trunc i16 [[TMP1]] to i8
+// BE-NEXT:    [[BF_LOAD1:%.*]] = load volatile i16, i16* [[TMP0]], align 4
+// BE-NEXT:    [[SEXT:%.*]] = trunc i16 [[BF_LOAD1]] to i8
+// BE-NEXT:    [[ADD:%.*]] = add i8 [[SEXT]], [[CONV]]
+// BE-NEXT:    [[TMP2:%.*]] = zext i8 [[ADD]] to i16
+// BE-NEXT:    [[BF_LOAD5:%.*]] = load volatile i16, i16* [[TMP0]], align 4
+// BE-NEXT:    [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD5]], -256
+// BE-NEXT:    [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], [[TMP2]]
+// BE-NEXT:    store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4
+// BE-NEXT:    ret void
+//
+// LENUMLOADS-LABEL: @increment_a_zero_bitfield_ok(
+// LENUMLOADS-NEXT:  entry:
+// LENUMLOADS-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 0
+// LENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
+// LENUMLOADS-NEXT:    [[CONV:%.*]] = trunc i16 [[BF_LOAD]] to i8
+// LENUMLOADS-NEXT:    [[BF_LOAD1:%.*]] = load volatile i16, i16* [[TMP0]], align 4
+// LENUMLOADS-NEXT:    [[TMP1:%.*]] = lshr i16 [[BF_LOAD1]], 8
+// LENUMLOADS-NEXT:    [[BF_CAST:%.*]] = trunc i16 [[TMP1]] to i8
+// LENUMLOADS-NEXT:    [[ADD:%.*]] = add i8 [[BF_CAST]], [[CONV]]
+// LENUMLOADS-NEXT:    [[TMP2:%.*]] = zext i8 [[ADD]] to i16
+// LENUMLOADS-NEXT:    [[BF_LOAD5:%.*]] = load volatile i16, i16* [[TMP0]], align 4
+// LENUMLOADS-NEXT:    [[BF_SHL6:%.*]] = shl nuw i16 [[TMP2]], 8
+// LENUMLOADS-NEXT:    [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD5]], 255
+// LENUMLOADS-NEXT:    [[BF_SET:%.*]] = or i16 [[BF_SHL6]], [[BF_CLEAR]]
+// LENUMLOADS-NEXT:    store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4
+// LENUMLOADS-NEXT:    ret void
+//
+// BENUMLOADS-LABEL: @increment_a_zero_bitfield_ok(
+// BENUMLOADS-NEXT:  entry:
+// BENUMLOADS-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 0
+// BENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
+// BENUMLOADS-NEXT:    [[TMP1:%.*]] = lshr i16 [[BF_LOAD]], 8
+// BENUMLOADS-NEXT:    [[CONV:%.*]] = trunc i16 [[TMP1]] to i8
+// BENUMLOADS-NEXT:    [[BF_LOAD1:%.*]] = load volatile i16, i16* [[TMP0]], align 4
+// BENUMLOADS-NEXT:    [[SEXT:%.*]] = trunc i16 [[BF_LOAD1]] to i8
+// BENUMLOADS-NEXT:    [[ADD:%.*]] = add i8 [[SEXT]], [[CONV]]
+// BENUMLOADS-NEXT:    [[TMP2:%.*]] = zext i8 [[ADD]] to i16
+// BENUMLOADS-NEXT:    [[BF_LOAD5:%.*]] = load volatile i16, i16* [[TMP0]], align 4
+// BENUMLOADS-NEXT:    [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD5]], -256
+// BENUMLOADS-NEXT:    [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], [[TMP2]]
+// BENUMLOADS-NEXT:    store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4
+// BENUMLOADS-NEXT:    ret void
+//
+// LEWIDTH-LABEL: @increment_a_zero_bitfield_ok(
+// LEWIDTH-NEXT:  entry:
+// LEWIDTH-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 0
+// LEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
+// LEWIDTH-NEXT:    [[CONV:%.*]] = trunc i16 [[BF_LOAD]] to i8
+// LEWIDTH-NEXT:    [[TMP1:%.*]] = bitcast %struct.zero_bitfield_ok* [[S]] to i8*
+// LEWIDTH-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i32 1
+// LEWIDTH-NEXT:    [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP2]], align 1
+// LEWIDTH-NEXT:    [[ADD:%.*]] = add i8 [[BF_LOAD1]], [[CONV]]
+// LEWIDTH-NEXT:    store volatile i8 [[ADD]], i8* [[TMP2]], align 1
+// LEWIDTH-NEXT:    ret void
+//
+// BEWIDTH-LABEL: @increment_a_zero_bitfield_ok(
+// BEWIDTH-NEXT:  entry:
+// BEWIDTH-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 0
+// BEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
+// BEWIDTH-NEXT:    [[TMP1:%.*]] = lshr i16 [[BF_LOAD]], 8
+// BEWIDTH-NEXT:    [[CONV:%.*]] = trunc i16 [[TMP1]] to i8
+// BEWIDTH-NEXT:    [[TMP2:%.*]] = bitcast %struct.zero_bitfield_ok* [[S]] to i8*
+// BEWIDTH-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i32 1
+// BEWIDTH-NEXT:    [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP3]], align 1
+// BEWIDTH-NEXT:    [[ADD:%.*]] = add i8 [[BF_LOAD1]], [[CONV]]
+// BEWIDTH-NEXT:    store volatile i8 [[ADD]], i8* [[TMP3]], align 1
+// BEWIDTH-NEXT:    ret void
+//
+// LEWIDTHNUM-LABEL: @increment_a_zero_bitfield_ok(
+// LEWIDTHNUM-NEXT:  entry:
+// LEWIDTHNUM-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 0
+// LEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
+// LEWIDTHNUM-NEXT:    [[CONV:%.*]] = trunc i16 [[BF_LOAD]] to i8
+// LEWIDTHNUM-NEXT:    [[TMP1:%.*]] = bitcast %struct.zero_bitfield_ok* [[S]] to i8*
+// LEWIDTHNUM-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i32 1
+// LEWIDTHNUM-NEXT:    [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP2]], align 1
+// LEWIDTHNUM-NEXT:    [[ADD:%.*]] = add i8 [[BF_LOAD1]], [[CONV]]
+// LEWIDTHNUM-NEXT:    [[BF_LOAD4:%.*]] = load volatile i8, i8* [[TMP2]], align 1
+// LEWIDTHNUM-NEXT:    store volatile i8 [[ADD]], i8* [[TMP2]], align 1
+// LEWIDTHNUM-NEXT:    ret void
+//
+// BEWIDTHNUM-LABEL: @increment_a_zero_bitfield_ok(
+// BEWIDTHNUM-NEXT:  entry:
+// BEWIDTHNUM-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 0
+// BEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4
+// BEWIDTHNUM-NEXT:    [[TMP1:%.*]] = lshr i16 [[BF_LOAD]], 8
+// BEWIDTHNUM-NEXT:    [[CONV:%.*]] = trunc i16 [[TMP1]] to i8
+// BEWIDTHNUM-NEXT:    [[TMP2:%.*]] = bitcast %struct.zero_bitfield_ok* [[S]] to i8*
+// BEWIDTHNUM-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i32 1
+// BEWIDTHNUM-NEXT:    [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP3]], align 1
+// BEWIDTHNUM-NEXT:    [[ADD:%.*]] = add i8 [[BF_LOAD1]], [[CONV]]
+// BEWIDTHNUM-NEXT:    [[BF_LOAD4:%.*]] = load volatile i8, i8* [[TMP3]], align 1
+// BEWIDTHNUM-NEXT:    store volatile i8 [[ADD]], i8* [[TMP3]], align 1
+// BEWIDTHNUM-NEXT:    ret void
+//
+void increment_a_zero_bitfield_ok(volatile struct zero_bitfield_ok *s) {
+  s->a1 += s->a;
+}
+
+// LE-LABEL: @increment_b_zero_bitfield_ok(
+// LE-NEXT:  entry:
+// LE-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 1
+// LE-NEXT:    [[TMP0:%.*]] = bitcast i24* [[B]] to i32*
+// LE-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// LE-NEXT:    [[INC:%.*]] = add i32 [[BF_LOAD]], 1
+// LE-NEXT:    [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// LE-NEXT:    [[BF_VALUE:%.*]] = and i32 [[INC]], 16777215
+// LE-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -16777216
+// LE-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]]
+// LE-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
+// LE-NEXT:    ret void
+//
+// BE-LABEL: @increment_b_zero_bitfield_ok(
+// BE-NEXT:  entry:
+// BE-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 1
+// BE-NEXT:    [[TMP0:%.*]] = bitcast i24* [[B]] to i32*
+// BE-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// BE-NEXT:    [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// BE-NEXT:    [[TMP1:%.*]] = add i32 [[BF_LOAD]], 256
+// BE-NEXT:    [[BF_SHL:%.*]] = and i32 [[TMP1]], -256
+// BE-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 255
+// BE-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]]
+// BE-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
+// BE-NEXT:    ret void
+//
+// LENUMLOADS-LABEL: @increment_b_zero_bitfield_ok(
+// LENUMLOADS-NEXT:  entry:
+// LENUMLOADS-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 1
+// LENUMLOADS-NEXT:    [[TMP0:%.*]] = bitcast i24* [[B]] to i32*
+// LENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// LENUMLOADS-NEXT:    [[INC:%.*]] = add i32 [[BF_LOAD]], 1
+// LENUMLOADS-NEXT:    [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// LENUMLOADS-NEXT:    [[BF_VALUE:%.*]] = and i32 [[INC]], 16777215
+// LENUMLOADS-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -16777216
+// LENUMLOADS-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]]
+// LENUMLOADS-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
+// LENUMLOADS-NEXT:    ret void
+//
+// BENUMLOADS-LABEL: @increment_b_zero_bitfield_ok(
+// BENUMLOADS-NEXT:  entry:
+// BENUMLOADS-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 1
+// BENUMLOADS-NEXT:    [[TMP0:%.*]] = bitcast i24* [[B]] to i32*
+// BENUMLOADS-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// BENUMLOADS-NEXT:    [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// BENUMLOADS-NEXT:    [[TMP1:%.*]] = add i32 [[BF_LOAD]], 256
+// BENUMLOADS-NEXT:    [[BF_SHL:%.*]] = and i32 [[TMP1]], -256
+// BENUMLOADS-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 255
+// BENUMLOADS-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]]
+// BENUMLOADS-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
+// BENUMLOADS-NEXT:    ret void
+//
+// LEWIDTH-LABEL: @increment_b_zero_bitfield_ok(
+// LEWIDTH-NEXT:  entry:
+// LEWIDTH-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 1
+// LEWIDTH-NEXT:    [[TMP0:%.*]] = bitcast i24* [[B]] to i32*
+// LEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// LEWIDTH-NEXT:    [[INC:%.*]] = add i32 [[BF_LOAD]], 1
+// LEWIDTH-NEXT:    [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// LEWIDTH-NEXT:    [[BF_VALUE:%.*]] = and i32 [[INC]], 16777215
+// LEWIDTH-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -16777216
+// LEWIDTH-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]]
+// LEWIDTH-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
+// LEWIDTH-NEXT:    ret void
+//
+// BEWIDTH-LABEL: @increment_b_zero_bitfield_ok(
+// BEWIDTH-NEXT:  entry:
+// BEWIDTH-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 1
+// BEWIDTH-NEXT:    [[TMP0:%.*]] = bitcast i24* [[B]] to i32*
+// BEWIDTH-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// BEWIDTH-NEXT:    [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// BEWIDTH-NEXT:    [[TMP1:%.*]] = add i32 [[BF_LOAD]], 256
+// BEWIDTH-NEXT:    [[BF_SHL:%.*]] = and i32 [[TMP1]], -256
+// BEWIDTH-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 255
+// BEWIDTH-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]]
+// BEWIDTH-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
+// BEWIDTH-NEXT:    ret void
+//
+// LEWIDTHNUM-LABEL: @increment_b_zero_bitfield_ok(
+// LEWIDTHNUM-NEXT:  entry:
+// LEWIDTHNUM-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 1
+// LEWIDTHNUM-NEXT:    [[TMP0:%.*]] = bitcast i24* [[B]] to i32*
+// LEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// LEWIDTHNUM-NEXT:    [[INC:%.*]] = add i32 [[BF_LOAD]], 1
+// LEWIDTHNUM-NEXT:    [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// LEWIDTHNUM-NEXT:    [[BF_VALUE:%.*]] = and i32 [[INC]], 16777215
+// LEWIDTHNUM-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -16777216
+// LEWIDTHNUM-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]]
+// LEWIDTHNUM-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
+// LEWIDTHNUM-NEXT:    ret void
+//
+// BEWIDTHNUM-LABEL: @increment_b_zero_bitfield_ok(
+// BEWIDTHNUM-NEXT:  entry:
+// BEWIDTHNUM-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 1
+// BEWIDTHNUM-NEXT:    [[TMP0:%.*]] = bitcast i24* [[B]] to i32*
+// BEWIDTHNUM-NEXT:    [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// BEWIDTHNUM-NEXT:    [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4
+// BEWIDTHNUM-NEXT:    [[TMP1:%.*]] = add i32 [[BF_LOAD]], 256
+// BEWIDTHNUM-NEXT:    [[BF_SHL:%.*]] = and i32 [[TMP1]], -256
+// BEWIDTHNUM-NEXT:    [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 255
+// BEWIDTHNUM-NEXT:    [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]]
+// BEWIDTHNUM-NEXT:    store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4
+// BEWIDTHNUM-NEXT:    ret void
+//
+void increment_b_zero_bitfield_ok(volatile struct zero_bitfield_ok *s) {
+  s->b++;
+}
diff --git a/clang/test/CodeGen/bitfield-2.c b/clang/test/CodeGen/bitfield-2.c
index 9d669575ecd117..661d42683bc276 100644
--- a/clang/test/CodeGen/bitfield-2.c
+++ b/clang/test/CodeGen/bitfield-2.c
@@ -14,7 +14,7 @@
 // CHECK-RECORD:   LLVMType:%struct.s0 = type { [3 x i8] }
 // CHECK-RECORD:   IsZeroInitializable:1
 // CHECK-RECORD:   BitFields:[
-// CHECK-RECORD:     <CGBitFieldInfo Offset:0 Size:24 IsSigned:1 StorageSize:24 StorageOffset:0>
+// CHECK-RECORD:     <CGBitFieldInfo Offset:0 Size:24 IsSigned:1 StorageSize:24 StorageOffset:0
 struct __attribute((packed)) s0 {
   int f0 : 24;
 };
@@ -54,8 +54,8 @@ unsigned long long test_0() {
 // CHECK-RECORD:   LLVMType:%struct.s1 = type { [3 x i8] }
 // CHECK-RECORD:   IsZeroInitializable:1
 // CHECK-RECORD:   BitFields:[
-// CHECK-RECORD:     <CGBitFieldInfo Offset:0 Size:10 IsSigned:1 StorageSize:24 StorageOffset:0>
-// CHECK-RECORD:     <CGBitFieldInfo Offset:10 Size:10 IsSigned:1 StorageSize:24 StorageOffset:0>
+// CHECK-RECORD:     <CGBitFieldInfo Offset:0 Size:10 IsSigned:1 StorageSize:24 StorageOffset:0
+// CHECK-RECORD:     <CGBitFieldInfo Offset:10 Size:10 IsSigned:1 StorageSize:24 StorageOffset:0
 
 #pragma pack(push)
 #pragma pack(1)
@@ -102,7 +102,7 @@ unsigned long long test_1() {
 // CHECK-RECORD:   LLVMType:%union.u2 = type { i8 }
 // CHECK-RECORD:   IsZeroInitializable:1
 // CHECK-RECORD:   BitFields:[
-// CHECK-RECORD:     <CGBitFieldInfo Offset:0 Size:3 IsSigned:0 StorageSize:8 StorageOffset:0>
+// CHECK-RECORD:     <CGBitFieldInfo Offset:0 Size:3 IsSigned:0 StorageSize:8 StorageOffset:0
 
 union __attribute__((packed)) u2 {
   unsigned long long f0 : 3;
@@ -274,8 +274,8 @@ _Bool test_6() {
 // CHECK-RECORD:   LLVMType:%struct.s7 = type { i32, i32, i32, i8, i32, [12 x i8] }
 // CHECK-RECORD:   IsZeroInitializable:1
 // CHECK-RECORD:   BitFields:[
-// CHECK-RECORD:     <CGBitFieldInfo Offset:0 Size:5 IsSigned:1 StorageSize:8 StorageOffset:12>
-// CHECK-RECORD:     <CGBitFieldInfo Offset:0 Size:29 IsSigned:1 StorageSize:32 StorageOffset:16>
+// CHECK-RECORD:     <CGBitFieldInfo Offset:0 Size:5 IsSigned:1 StorageSize:8 StorageOffset:12
+// CHECK-RECORD:     <CGBitFieldInfo Offset:0 Size:29 IsSigned:1 StorageSize:32 StorageOffset:16
 
 struct __attribute__((aligned(16))) s7 {
   int a, b, c;
diff --git a/clang/test/CodeGen/volatile.c b/clang/test/CodeGen/volatile.c
index 0f58bb62a24836..93772b8f8e0e8b 100644
--- a/clang/test/CodeGen/volatile.c
+++ b/clang/test/CodeGen/volatile.c
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 -triple=%itanium_abi_triple -emit-llvm < %s | FileCheck %s -check-prefix CHECK -check-prefix CHECK-IT
+// RUN: %clang_cc1 -triple=aarch64-unknown-linux-gnu -emit-llvm < %s | FileCheck %s -check-prefix CHECK -check-prefixes CHECK-IT,CHECK-IT-ARM
+// RUN: %clang_cc1 -triple=x86_64-unknown-linux-gnu -emit-llvm < %s | FileCheck %s -check-prefix CHECK -check-prefixes CHECK-IT,CHECK-IT-OTHER
 // RUN: %clang_cc1 -triple=%ms_abi_triple -emit-llvm < %s | FileCheck %s -check-prefix CHECK -check-prefix CHECK-MS
 
 int S;
@@ -88,7 +89,8 @@ int main() {
 // CHECK-MS: load i32, i32* getelementptr {{.*}} @BF
 // CHECK: store i32 {{.*}}, i32* [[I]]
   i=vBF.x;
-// CHECK-IT: load volatile i8, i8* getelementptr {{.*}} @vBF
+// CHECK-IT-OTHER: load volatile i8, i8* getelementptr {{.*}} @vBF
+// CHECK-IT-ARM: load volatile i32, i32* bitcast {{.*}} @vBF
 // CHECK-MS: load volatile i32, i32* getelementptr {{.*}} @vBF
 // CHECK: store i32 {{.*}}, i32* [[I]]
   i=V[3];
@@ -163,9 +165,11 @@ int main() {
 // CHECK-MS: store i32 {{.*}}, i32* getelementptr {{.*}} @BF
   vBF.x=i;
 // CHECK: load i32, i32* [[I]]
-// CHECK-IT: load volatile i8, i8* getelementptr {{.*}} @vBF
+// CHECK-IT-OTHER: load volatile i8, i8* getelementptr {{.*}} @vBF
+// CHECK-IT-ARM: load volatile i32, i32* bitcast {{.*}} @vBF
 // CHECK-MS: load volatile i32, i32* getelementptr {{.*}} @vBF
-// CHECK-IT: store volatile i8 {{.*}}, i8* getelementptr {{.*}} @vBF
+// CHECK-IT-OTHER: store volatile i8 {{.*}}, i8* getelementptr {{.*}} @vBF
+// CHECK-IT-ARM: store volatile i32 {{.*}}, i32* bitcast {{.*}} @vBF
 // CHECK-MS: store volatile i32 {{.*}}, i32* getelementptr {{.*}} @vBF
   V[3]=i;
 // CHECK: load i32, i32* [[I]]

From 981b31c282eab6f3332c7bbed2674c10624a3fe1 Mon Sep 17 00:00:00 2001
From: Paul Walker <paul.walker@arm.com>
Date: Mon, 12 Oct 2020 12:36:43 +0100
Subject: [PATCH 080/123] [SVE] Add ISel patterns for "insert undef_nxv#f##,
 f##, 0"

Differential Revision: https://reviews.llvm.org/D89235
---
 .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 13 +++++
 .../CodeGen/AArch64/sve-insert-element.ll     | 54 +++++++++++++++++++
 2 files changed, 67 insertions(+)

diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index d0b526ee47554f..bd43f92b0d078e 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -2171,6 +2171,19 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
   def : Pat<(nxv2i64 (vector_insert (nxv2i64 (undef)), (i64 FPR64:$src), 0)),
             (INSERT_SUBREG (nxv2i64 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
 
+  def : Pat<(nxv8f16 (vector_insert (nxv8f16 (undef)), (f16 FPR16:$src), 0)),
+            (INSERT_SUBREG (nxv8f16 (IMPLICIT_DEF)), FPR16:$src, hsub)>;
+  def : Pat<(nxv4f16 (vector_insert (nxv4f16 (undef)), (f16 FPR16:$src), 0)),
+            (INSERT_SUBREG (nxv4f16 (IMPLICIT_DEF)), FPR16:$src, hsub)>;
+  def : Pat<(nxv2f16 (vector_insert (nxv2f16 (undef)), (f16 FPR16:$src), 0)),
+            (INSERT_SUBREG (nxv2f16 (IMPLICIT_DEF)), FPR16:$src, hsub)>;
+  def : Pat<(nxv4f32 (vector_insert (nxv4f32 (undef)), (f32 FPR32:$src), 0)),
+            (INSERT_SUBREG (nxv4f32 (IMPLICIT_DEF)), FPR32:$src, ssub)>;
+  def : Pat<(nxv2f32 (vector_insert (nxv2f32 (undef)), (f32 FPR32:$src), 0)),
+            (INSERT_SUBREG (nxv2f32 (IMPLICIT_DEF)), FPR32:$src, ssub)>;
+  def : Pat<(nxv2f64 (vector_insert (nxv2f64 (undef)), (f64 FPR64:$src), 0)),
+            (INSERT_SUBREG (nxv2f64 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
+
   // Insert scalar into vector[0]
   def : Pat<(nxv16i8 (vector_insert (nxv16i8 ZPR:$vec), (i32 GPR32:$src), 0)),
             (CPY_ZPmR_B ZPR:$vec, (PTRUE_B 1), GPR32:$src)>;
diff --git a/llvm/test/CodeGen/AArch64/sve-insert-element.ll b/llvm/test/CodeGen/AArch64/sve-insert-element.ll
index cbe4b9391f07f3..7bc3b1286b1cdd 100644
--- a/llvm/test/CodeGen/AArch64/sve-insert-element.ll
+++ b/llvm/test/CodeGen/AArch64/sve-insert-element.ll
@@ -223,3 +223,57 @@ define <vscale x 16 x i8> @test_insert3_of_extract1_16xi8(<vscale x 16 x i8> %a,
   %d = insertelement <vscale x 16 x i8> %a, i8 %c, i32 3
   ret <vscale x 16 x i8> %d
 }
+
+define <vscale x 8 x half> @test_insert_into_undef_nxv8f16(half %a) {
+; CHECK-LABEL: test_insert_into_undef_nxv8f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
+; CHECK-NEXT:    ret
+  %b = insertelement <vscale x 8 x half> undef, half %a, i32 0
+  ret <vscale x 8 x half> %b
+}
+
+define <vscale x 4 x half> @test_insert_into_undef_nxv4f16(half %a) {
+; CHECK-LABEL: test_insert_into_undef_nxv4f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
+; CHECK-NEXT:    ret
+  %b = insertelement <vscale x 4 x half> undef, half %a, i32 0
+  ret <vscale x 4 x half> %b
+}
+
+define <vscale x 2 x half> @test_insert_into_undef_nxv2f16(half %a) {
+; CHECK-LABEL: test_insert_into_undef_nxv2f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
+; CHECK-NEXT:    ret
+  %b = insertelement <vscale x 2 x half> undef, half %a, i32 0
+  ret <vscale x 2 x half> %b
+}
+
+define <vscale x 4 x float> @test_insert_into_undef_nxv4f32(float %a) {
+; CHECK-LABEL: test_insert_into_undef_nxv4f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
+; CHECK-NEXT:    ret
+  %b = insertelement <vscale x 4 x float> undef, float %a, i32 0
+  ret <vscale x 4 x float> %b
+}
+
+define <vscale x 2 x float> @test_insert_into_undef_nxv2f32(float %a) {
+; CHECK-LABEL: test_insert_into_undef_nxv2f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
+; CHECK-NEXT:    ret
+  %b = insertelement <vscale x 2 x float> undef, float %a, i32 0
+  ret <vscale x 2 x float> %b
+}
+
+define <vscale x 2 x double> @test_insert_into_undef_nxv2f64(double %a) {
+; CHECK-LABEL: test_insert_into_undef_nxv2f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    ret
+  %b = insertelement <vscale x 2 x double> undef, double %a, i32 0
+  ret <vscale x 2 x double> %b
+}

From 6e56046f65c0f40215373e1cfc9ca6b788daabda Mon Sep 17 00:00:00 2001
From: Evgeny Leviant <eleviant@accesssoftek.com>
Date: Tue, 13 Oct 2020 13:05:24 +0300
Subject: [PATCH 081/123] [TableGen][SchedModels] Fix aliasing of
 SchedWriteVariant

Differential revision: https://reviews.llvm.org/D89114
---
 llvm/test/TableGen/sched-aliases.td           |  48 +++++++
 .../ARM/cortex-a57-basic-instructions.s       | 122 +++++++++---------
 llvm/utils/TableGen/CodeGenSchedule.cpp       |  49 +++++--
 llvm/utils/TableGen/CodeGenSchedule.h         |   2 +
 4 files changed, 146 insertions(+), 75 deletions(-)
 create mode 100644 llvm/test/TableGen/sched-aliases.td

diff --git a/llvm/test/TableGen/sched-aliases.td b/llvm/test/TableGen/sched-aliases.td
new file mode 100644
index 00000000000000..7f3aa09d8773ef
--- /dev/null
+++ b/llvm/test/TableGen/sched-aliases.td
@@ -0,0 +1,48 @@
+// REQUIRES: asserts
+// REQUIRES: aarch64-registered-target
+// RUN: llvm-tblgen -gen-instr-info %s -I%p/../../include -I%p/../../lib/Target/AArch64 -o %t -debug-only=subtarget-emitter 2>&1 | FileCheck %s 
+
+// Check that we've defined scheduling classes for FMOVv2f32_ns and FMOVv2f64 for Model0
+// CHECK: InstRW: New SC [[SC:[0-9]+]]:FMOVv2f32_ns on Model0
+// CHECK: InstRW: New SC [[SC2:[0-9]+]]:FMOVv2f64_ns on Model0
+
+// Generic transition for WriteV should be defined for Model0 as well as for
+// all instructions without explicitly defined scheduling classes.
+// CHECK: Adding transition from WriteV({{[0-9]+}}) to Model0WriteV_4cyc({{[0-9]+}}) on processor indices
+// CHECK: Adding transition from WriteV({{[0-9]+}}) to Model0WriteV_2cyc({{[0-9]+}}) on processor indices
+
+// Transition from FMOVv2f64_ns should still be added for Model0,
+// even though we've defined custom scheduling class.
+// CHECK: Adding transition from FMOVv2f64_ns([[SC2]]) to Model0WriteV_4cyc({{[0-9]+}}) on processor indices
+// CHECK-NEXT: Adding transition from FMOVv2f64_ns([[SC2]]) to Model0WriteV_2cyc({{[0-9]+}}) on processor indices
+
+// Transition from FMOVv2f32_ns should not be added for Model0,
+// because custom sched class for it is defined and it's not variant.
+// CHECK-NOT: Adding transition from FMOVv2f32_ns([[SC]])
+
+include "AArch64.td"
+
+def Model0 : SchedMachineModel {
+  let CompleteModel = 0;
+}
+
+def Model0UnitV    : ProcResource<1> { let BufferSize = 0; }
+
+let SchedModel = Model0 in {
+
+def Model0WriteV_4cyc : SchedWriteRes<[Model0UnitV]> { let Latency = 4; }
+def Model0WriteV_2cyc : SchedWriteRes<[Model0UnitV]> { let Latency = 2; }
+def Model0WriteV_1cyc : SchedWriteRes<[Model0UnitV]> { let Latency = 1; }
+
+def Model0QFormPred : MCSchedPredicate<CheckQForm>;
+def Model0WriteV : SchedWriteVariant<[
+       SchedVar<Model0QFormPred, [Model0WriteV_4cyc]>,
+       SchedVar<NoSchedPred, [Model0WriteV_2cyc]>]>;
+
+def : SchedAlias<WriteV, Model0WriteV>;
+
+def : InstRW<[Model0WriteV_1cyc], (instrs FMOVv2f32_ns)>;
+def : InstRW<[WriteV], (instrs FMOVv2f64_ns)>;
+}
+
+def : ProcessorModel<"foo-0-model", Model0, []>;
diff --git a/llvm/test/tools/llvm-mca/ARM/cortex-a57-basic-instructions.s b/llvm/test/tools/llvm-mca/ARM/cortex-a57-basic-instructions.s
index 1ccb1889a6fb63..ae7cb4772c4fea 100644
--- a/llvm/test/tools/llvm-mca/ARM/cortex-a57-basic-instructions.s
+++ b/llvm/test/tools/llvm-mca/ARM/cortex-a57-basic-instructions.s
@@ -1561,31 +1561,31 @@
 # CHECK-NEXT:  1      2     1.00                        sxtablt	r6, r2, r9, ror #8
 # CHECK-NEXT:  1      2     1.00                        sxtab	r5, r1, r4, ror #16
 # CHECK-NEXT:  1      2     1.00                        sxtab	r7, r8, r3, ror #24
-# CHECK-NEXT:  1      2     1.00                        sxtab16ge	r0, r1, r4
-# CHECK-NEXT:  1      2     1.00                        sxtab16	r6, r2, r7
-# CHECK-NEXT:  1      2     1.00                        sxtab16	r3, r5, r8, ror #8
-# CHECK-NEXT:  1      2     1.00                        sxtab16	r3, r2, r1, ror #16
-# CHECK-NEXT:  1      2     1.00                        sxtab16eq	r1, r2, r3, ror #24
+# CHECK-NEXT:  1      4     1.00                        sxtab16ge	r0, r1, r4
+# CHECK-NEXT:  1      4     1.00                        sxtab16	r6, r2, r7
+# CHECK-NEXT:  1      4     1.00                        sxtab16	r3, r5, r8, ror #8
+# CHECK-NEXT:  1      4     1.00                        sxtab16	r3, r2, r1, ror #16
+# CHECK-NEXT:  1      4     1.00                        sxtab16eq	r1, r2, r3, ror #24
 # CHECK-NEXT:  1      2     1.00                        sxtah	r1, r3, r9
 # CHECK-NEXT:  1      2     1.00                        sxtahhi	r6, r1, r6
 # CHECK-NEXT:  1      2     1.00                        sxtah	r3, r8, r3, ror #8
 # CHECK-NEXT:  1      2     1.00                        sxtahlo	r2, r2, r4, ror #16
 # CHECK-NEXT:  1      2     1.00                        sxtah	r9, r3, r3, ror #24
-# CHECK-NEXT:  1      2     1.00                        sxtbge	r2, r4
-# CHECK-NEXT:  1      2     1.00                        sxtb	r5, r6
-# CHECK-NEXT:  1      2     1.00                        sxtb	r6, r9, ror #8
-# CHECK-NEXT:  1      2     1.00                        sxtblo	r5, r1, ror #16
-# CHECK-NEXT:  1      2     1.00                        sxtb	r8, r3, ror #24
+# CHECK-NEXT:  1      1     0.50                        sxtbge	r2, r4
+# CHECK-NEXT:  1      1     0.50                        sxtb	r5, r6
+# CHECK-NEXT:  1      1     0.50                        sxtb	r6, r9, ror #8
+# CHECK-NEXT:  1      1     0.50                        sxtblo	r5, r1, ror #16
+# CHECK-NEXT:  1      1     0.50                        sxtb	r8, r3, ror #24
 # CHECK-NEXT:  1      2     1.00                        sxtb16	r1, r4
 # CHECK-NEXT:  1      2     1.00                        sxtb16	r6, r7
 # CHECK-NEXT:  1      2     1.00                        sxtb16hs	r3, r5, ror #8
 # CHECK-NEXT:  1      2     1.00                        sxtb16	r3, r1, ror #16
 # CHECK-NEXT:  1      2     1.00                        sxtb16ge	r2, r3, ror #24
-# CHECK-NEXT:  1      2     1.00                        sxthne	r3, r9
-# CHECK-NEXT:  1      2     1.00                        sxth	r1, r6
-# CHECK-NEXT:  1      2     1.00                        sxth	r3, r8, ror #8
-# CHECK-NEXT:  1      2     1.00                        sxthle	r2, r2, ror #16
-# CHECK-NEXT:  1      2     1.00                        sxth	r9, r3, ror #24
+# CHECK-NEXT:  1      1     0.50                        sxthne	r3, r9
+# CHECK-NEXT:  1      1     0.50                        sxth	r1, r6
+# CHECK-NEXT:  1      1     0.50                        sxth	r3, r8, ror #8
+# CHECK-NEXT:  1      1     0.50                        sxthle	r2, r2, ror #16
+# CHECK-NEXT:  1      1     0.50                        sxth	r9, r3, ror #24
 # CHECK-NEXT:  1      1     0.50                        teq	r5, #61440
 # CHECK-NEXT:  1      1     0.50                        teq	r7, #-2147483638
 # CHECK-NEXT:  1      1     0.50                        teq	r7, #40, #2
@@ -1674,31 +1674,31 @@
 # CHECK-NEXT:  1      2     1.00                        uxtablt	r6, r2, r9, ror #8
 # CHECK-NEXT:  1      2     1.00                        uxtab	r5, r1, r4, ror #16
 # CHECK-NEXT:  1      2     1.00                        uxtab	r7, r8, r3, ror #24
-# CHECK-NEXT:  1      2     1.00                        uxtab16ge	r0, r1, r4
-# CHECK-NEXT:  1      2     1.00                        uxtab16	r6, r2, r7
-# CHECK-NEXT:  1      2     1.00                        uxtab16	r3, r5, r8, ror #8
-# CHECK-NEXT:  1      2     1.00                        uxtab16	r3, r2, r1, ror #16
-# CHECK-NEXT:  1      2     1.00                        uxtab16eq	r1, r2, r3, ror #24
+# CHECK-NEXT:  1      4     1.00                        uxtab16ge	r0, r1, r4
+# CHECK-NEXT:  1      4     1.00                        uxtab16	r6, r2, r7
+# CHECK-NEXT:  1      4     1.00                        uxtab16	r3, r5, r8, ror #8
+# CHECK-NEXT:  1      4     1.00                        uxtab16	r3, r2, r1, ror #16
+# CHECK-NEXT:  1      4     1.00                        uxtab16eq	r1, r2, r3, ror #24
 # CHECK-NEXT:  1      2     1.00                        uxtah	r1, r3, r9
 # CHECK-NEXT:  1      2     1.00                        uxtahhi	r6, r1, r6
 # CHECK-NEXT:  1      2     1.00                        uxtah	r3, r8, r3, ror #8
 # CHECK-NEXT:  1      2     1.00                        uxtahlo	r2, r2, r4, ror #16
 # CHECK-NEXT:  1      2     1.00                        uxtah	r9, r3, r3, ror #24
-# CHECK-NEXT:  1      2     1.00                        uxtbge	r2, r4
-# CHECK-NEXT:  1      2     1.00                        uxtb	r5, r6
-# CHECK-NEXT:  1      2     1.00                        uxtb	r6, r9, ror #8
-# CHECK-NEXT:  1      2     1.00                        uxtblo	r5, r1, ror #16
-# CHECK-NEXT:  1      2     1.00                        uxtb	r8, r3, ror #24
+# CHECK-NEXT:  1      1     0.50                        uxtbge	r2, r4
+# CHECK-NEXT:  1      1     0.50                        uxtb	r5, r6
+# CHECK-NEXT:  1      1     0.50                        uxtb	r6, r9, ror #8
+# CHECK-NEXT:  1      1     0.50                        uxtblo	r5, r1, ror #16
+# CHECK-NEXT:  1      1     0.50                        uxtb	r8, r3, ror #24
 # CHECK-NEXT:  1      2     1.00                        uxtb16	r1, r4
 # CHECK-NEXT:  1      2     1.00                        uxtb16	r6, r7
 # CHECK-NEXT:  1      2     1.00                        uxtb16hs	r3, r5, ror #8
 # CHECK-NEXT:  1      2     1.00                        uxtb16	r3, r1, ror #16
 # CHECK-NEXT:  1      2     1.00                        uxtb16ge	r2, r3, ror #24
-# CHECK-NEXT:  1      2     1.00                        uxthne	r3, r9
-# CHECK-NEXT:  1      2     1.00                        uxth	r1, r6
-# CHECK-NEXT:  1      2     1.00                        uxth	r3, r8, ror #8
-# CHECK-NEXT:  1      2     1.00                        uxthle	r2, r2, ror #16
-# CHECK-NEXT:  1      2     1.00                        uxth	r9, r3, ror #24
+# CHECK-NEXT:  1      1     0.50                        uxthne	r3, r9
+# CHECK-NEXT:  1      1     0.50                        uxth	r1, r6
+# CHECK-NEXT:  1      1     0.50                        uxth	r3, r8, ror #8
+# CHECK-NEXT:  1      1     0.50                        uxthle	r2, r2, ror #16
+# CHECK-NEXT:  1      1     0.50                        uxth	r9, r3, ror #24
 # CHECK-NEXT:  0      0     0.00    *      *      U     wfe
 # CHECK-NEXT:  0      0     0.00    *      *      U     wfehi
 # CHECK-NEXT:  0      0     0.00    *      *      U     wfi
@@ -1719,7 +1719,7 @@
 
 # CHECK:      Resource pressure per iteration:
 # CHECK-NEXT: [0]    [1.0]  [1.1]  [2]    [3]    [4]    [5]    [6]
-# CHECK-NEXT: 8.00   148.50 148.50 161.00 527.00 12.00   -      -
+# CHECK-NEXT: 8.00   158.50 158.50 171.00 497.00 12.00   -      -
 
 # CHECK:      Resource pressure by instruction:
 # CHECK-NEXT: [0]    [1.0]  [1.1]  [2]    [3]    [4]    [5]    [6]    Instructions:
@@ -2425,31 +2425,31 @@
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -     sxtablt	r6, r2, r9, ror #8
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -     sxtab	r5, r1, r4, ror #16
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -     sxtab	r7, r8, r3, ror #24
-# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     sxtab16ge	r0, r1, r4
-# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     sxtab16	r6, r2, r7
-# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     sxtab16	r3, r5, r8, ror #8
-# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     sxtab16	r3, r2, r1, ror #16
-# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     sxtab16eq	r1, r2, r3, ror #24
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     sxtab16ge	r0, r1, r4
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     sxtab16	r6, r2, r7
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     sxtab16	r3, r5, r8, ror #8
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     sxtab16	r3, r2, r1, ror #16
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     sxtab16eq	r1, r2, r3, ror #24
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -     sxtah	r1, r3, r9
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -     sxtahhi	r6, r1, r6
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -     sxtah	r3, r8, r3, ror #8
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -     sxtahlo	r2, r2, r4, ror #16
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -     sxtah	r9, r3, r3, ror #24
-# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     sxtbge	r2, r4
-# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     sxtb	r5, r6
-# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     sxtb	r6, r9, ror #8
-# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     sxtblo	r5, r1, ror #16
-# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     sxtb	r8, r3, ror #24
+# CHECK-NEXT:  -     0.50   0.50    -      -      -      -      -     sxtbge	r2, r4
+# CHECK-NEXT:  -     0.50   0.50    -      -      -      -      -     sxtb	r5, r6
+# CHECK-NEXT:  -     0.50   0.50    -      -      -      -      -     sxtb	r6, r9, ror #8
+# CHECK-NEXT:  -     0.50   0.50    -      -      -      -      -     sxtblo	r5, r1, ror #16
+# CHECK-NEXT:  -     0.50   0.50    -      -      -      -      -     sxtb	r8, r3, ror #24
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -     sxtb16	r1, r4
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -     sxtb16	r6, r7
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -     sxtb16hs	r3, r5, ror #8
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -     sxtb16	r3, r1, ror #16
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -     sxtb16ge	r2, r3, ror #24
-# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     sxthne	r3, r9
-# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     sxth	r1, r6
-# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     sxth	r3, r8, ror #8
-# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     sxthle	r2, r2, ror #16
-# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     sxth	r9, r3, ror #24
+# CHECK-NEXT:  -     0.50   0.50    -      -      -      -      -     sxthne	r3, r9
+# CHECK-NEXT:  -     0.50   0.50    -      -      -      -      -     sxth	r1, r6
+# CHECK-NEXT:  -     0.50   0.50    -      -      -      -      -     sxth	r3, r8, ror #8
+# CHECK-NEXT:  -     0.50   0.50    -      -      -      -      -     sxthle	r2, r2, ror #16
+# CHECK-NEXT:  -     0.50   0.50    -      -      -      -      -     sxth	r9, r3, ror #24
 # CHECK-NEXT:  -     0.50   0.50    -      -      -      -      -     teq	r5, #61440
 # CHECK-NEXT:  -     0.50   0.50    -      -      -      -      -     teq	r7, #-2147483638
 # CHECK-NEXT:  -     0.50   0.50    -      -      -      -      -     teq	r7, #40, #2
@@ -2538,31 +2538,31 @@
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -     uxtablt	r6, r2, r9, ror #8
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -     uxtab	r5, r1, r4, ror #16
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -     uxtab	r7, r8, r3, ror #24
-# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     uxtab16ge	r0, r1, r4
-# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     uxtab16	r6, r2, r7
-# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     uxtab16	r3, r5, r8, ror #8
-# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     uxtab16	r3, r2, r1, ror #16
-# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     uxtab16eq	r1, r2, r3, ror #24
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     uxtab16ge	r0, r1, r4
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     uxtab16	r6, r2, r7
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     uxtab16	r3, r5, r8, ror #8
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     uxtab16	r3, r2, r1, ror #16
+# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     uxtab16eq	r1, r2, r3, ror #24
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -     uxtah	r1, r3, r9
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -     uxtahhi	r6, r1, r6
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -     uxtah	r3, r8, r3, ror #8
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -     uxtahlo	r2, r2, r4, ror #16
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -     uxtah	r9, r3, r3, ror #24
-# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     uxtbge	r2, r4
-# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     uxtb	r5, r6
-# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     uxtb	r6, r9, ror #8
-# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     uxtblo	r5, r1, ror #16
-# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     uxtb	r8, r3, ror #24
+# CHECK-NEXT:  -     0.50   0.50    -      -      -      -      -     uxtbge	r2, r4
+# CHECK-NEXT:  -     0.50   0.50    -      -      -      -      -     uxtb	r5, r6
+# CHECK-NEXT:  -     0.50   0.50    -      -      -      -      -     uxtb	r6, r9, ror #8
+# CHECK-NEXT:  -     0.50   0.50    -      -      -      -      -     uxtblo	r5, r1, ror #16
+# CHECK-NEXT:  -     0.50   0.50    -      -      -      -      -     uxtb	r8, r3, ror #24
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -     uxtb16	r1, r4
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -     uxtb16	r6, r7
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -     uxtb16hs	r3, r5, ror #8
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -     uxtb16	r3, r1, ror #16
 # CHECK-NEXT:  -      -      -      -     1.00    -      -      -     uxtb16ge	r2, r3, ror #24
-# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     uxthne	r3, r9
-# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     uxth	r1, r6
-# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     uxth	r3, r8, ror #8
-# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     uxthle	r2, r2, ror #16
-# CHECK-NEXT:  -      -      -      -     1.00    -      -      -     uxth	r9, r3, ror #24
+# CHECK-NEXT:  -     0.50   0.50    -      -      -      -      -     uxthne	r3, r9
+# CHECK-NEXT:  -     0.50   0.50    -      -      -      -      -     uxth	r1, r6
+# CHECK-NEXT:  -     0.50   0.50    -      -      -      -      -     uxth	r3, r8, ror #8
+# CHECK-NEXT:  -     0.50   0.50    -      -      -      -      -     uxthle	r2, r2, ror #16
+# CHECK-NEXT:  -     0.50   0.50    -      -      -      -      -     uxth	r9, r3, ror #24
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     wfe
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     wfehi
 # CHECK-NEXT:  -      -      -      -      -      -      -      -     wfi
diff --git a/llvm/utils/TableGen/CodeGenSchedule.cpp b/llvm/utils/TableGen/CodeGenSchedule.cpp
index d11f07cab9857b..f233b927b7a0bf 100644
--- a/llvm/utils/TableGen/CodeGenSchedule.cpp
+++ b/llvm/utils/TableGen/CodeGenSchedule.cpp
@@ -1281,6 +1281,7 @@ void CodeGenSchedModels::inferFromInstRWs(unsigned SCIdx) {
     findRWs(Rec->getValueAsListOfDefs("OperandReadWrites"), Writes, Reads);
     unsigned PIdx = getProcModel(Rec->getValueAsDef("SchedModel")).Index;
     inferFromRW(Writes, Reads, SCIdx, PIdx); // May mutate SchedClasses.
+    SchedClasses[SCIdx].InstRWProcIndices.insert(PIdx);
   }
 }
 
@@ -1639,29 +1640,50 @@ void PredTransitions::substituteVariants(const PredTransition &Trans) {
   }
 }
 
+static void addSequences(CodeGenSchedModels &SchedModels,
+                         const SmallVectorImpl<SmallVector<unsigned, 4>> &Seqs,
+                         IdxVec &Result, bool IsRead) {
+  for (const auto &S : Seqs)
+    if (!S.empty())
+      Result.push_back(SchedModels.findOrInsertRW(S, IsRead));
+}
+
+static void dumpTransition(const CodeGenSchedModels &SchedModels,
+                           const CodeGenSchedClass &FromSC,
+                           const CodeGenSchedTransition &SCTrans) {
+  LLVM_DEBUG(dbgs() << "Adding transition from " << FromSC.Name << "("
+                    << FromSC.Index << ") to "
+                    << SchedModels.getSchedClass(SCTrans.ToClassIdx).Name << "("
+                    << SCTrans.ToClassIdx << ")"
+                    << " on processor indices: (";
+             dumpIdxVec(SCTrans.ProcIndices); dbgs() << ")\n");
+}
 // Create a new SchedClass for each variant found by inferFromRW. Pass
 static void inferFromTransitions(ArrayRef<PredTransition> LastTransitions,
                                  unsigned FromClassIdx,
                                  CodeGenSchedModels &SchedModels) {
   // For each PredTransition, create a new CodeGenSchedTransition, which usually
   // requires creating a new SchedClass.
+  const CodeGenSchedClass &FromSC = SchedModels.getSchedClass(FromClassIdx);
   for (ArrayRef<PredTransition>::iterator
          I = LastTransitions.begin(), E = LastTransitions.end(); I != E; ++I) {
-    IdxVec OperWritesVariant;
-    transform(I->WriteSequences, std::back_inserter(OperWritesVariant),
-              [&SchedModels](ArrayRef<unsigned> WS) {
-                return SchedModels.findOrInsertRW(WS, /*IsRead=*/false);
-              });
-    IdxVec OperReadsVariant;
-    transform(I->ReadSequences, std::back_inserter(OperReadsVariant),
-              [&SchedModels](ArrayRef<unsigned> RS) {
-                return SchedModels.findOrInsertRW(RS, /*IsRead=*/true);
-              });
+    IdxVec OperWritesVariant, OperReadsVariant;
+    addSequences(SchedModels, I->WriteSequences, OperWritesVariant, false);
+    addSequences(SchedModels, I->ReadSequences, OperReadsVariant, true);
     CodeGenSchedTransition SCTrans;
+
+    // Transition should not contain processor indices already assigned to
+    // InstRWs in this scheduling class.
+    llvm::copy_if(I->ProcIndices, std::back_inserter(SCTrans.ProcIndices),
+                  [&FromSC](unsigned PIdx) {
+                    return !FromSC.InstRWProcIndices.count(PIdx);
+                  });
+    if (SCTrans.ProcIndices.empty())
+      continue;
     SCTrans.ToClassIdx =
-      SchedModels.addSchedClass(/*ItinClassDef=*/nullptr, OperWritesVariant,
-                                OperReadsVariant, I->ProcIndices);
-    SCTrans.ProcIndices.assign(I->ProcIndices.begin(), I->ProcIndices.end());
+        SchedModels.addSchedClass(/*ItinClassDef=*/nullptr, OperWritesVariant,
+                                  OperReadsVariant, I->ProcIndices);
+    dumpTransition(SchedModels, FromSC, SCTrans);
     // The final PredTerm is unique set of predicates guarding the transition.
     RecVec Preds;
     transform(I->PredTerm, std::back_inserter(Preds),
@@ -1684,7 +1706,6 @@ void CodeGenSchedModels::inferFromRW(ArrayRef<unsigned> OperWrites,
                                      ArrayRef<unsigned> ProcIndices) {
   LLVM_DEBUG(dbgs() << "INFER RW proc("; dumpIdxVec(ProcIndices);
              dbgs() << ") ");
-
   // Create a seed transition with an empty PredTerm and the expanded sequences
   // of SchedWrites for the current SchedClass.
   std::vector<PredTransition> LastTransitions;
diff --git a/llvm/utils/TableGen/CodeGenSchedule.h b/llvm/utils/TableGen/CodeGenSchedule.h
index b60a1b4fa242c5..62a359e0888ca5 100644
--- a/llvm/utils/TableGen/CodeGenSchedule.h
+++ b/llvm/utils/TableGen/CodeGenSchedule.h
@@ -140,6 +140,8 @@ struct CodeGenSchedClass {
   // Instructions should be ignored by this class because they have been split
   // off to join another inferred class.
   RecVec InstRWs;
+  // InstRWs processor indices. Filled in inferFromInstRWs
+  DenseSet<unsigned> InstRWProcIndices;
 
   CodeGenSchedClass(unsigned Index, std::string Name, Record *ItinClassDef)
     : Index(Index), Name(std::move(Name)), ItinClassDef(ItinClassDef) {}

From c87bd2d8eb378d152f2b6bde4cb088ad390a676c Mon Sep 17 00:00:00 2001
From: Cullen Rhodes <cullen.rhodes@arm.com>
Date: Thu, 8 Oct 2020 14:51:10 +0000
Subject: [PATCH 082/123] [AArch64] Implement .variant_pcs directive

A dynamic linker with lazy binding support may need to handle variant
PCS function symbols specially, so an ELF symbol table marking
STO_AARCH64_VARIANT_PCS [1] was added to address this.

Function symbols that follow the vector PCS are marked via the
.variant_pcs assembler directive, which takes a single parameter
specifying the symbol name and sets the STO_AARCH64_VARIANT_PCS st_other
flag in the object file.

[1] https://github.com/ARM-software/abi-aa/blob/master/aaelf64/aaelf64.rst#st-other-values

Reviewed By: sdesmalen

Differential Revision: https://reviews.llvm.org/D89138
---
 llvm/include/llvm/BinaryFormat/ELF.h          |  6 +++
 llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp | 15 ++++++
 .../AArch64/AsmParser/AArch64AsmParser.cpp    | 30 +++++++++++
 .../MCTargetDesc/AArch64ELFStreamer.cpp       |  8 +++
 .../MCTargetDesc/AArch64TargetStreamer.h      |  4 ++
 llvm/test/CodeGen/AArch64/variant-pcs.ll      | 51 +++++++++++++++++++
 .../MC/AArch64/directive-variant_pcs-err.s    | 17 +++++++
 llvm/test/MC/AArch64/directive-variant_pcs.s  | 11 ++++
 8 files changed, 142 insertions(+)
 create mode 100644 llvm/test/CodeGen/AArch64/variant-pcs.ll
 create mode 100644 llvm/test/MC/AArch64/directive-variant_pcs-err.s
 create mode 100644 llvm/test/MC/AArch64/directive-variant_pcs.s

diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h
index d800d16fdd3ab0..143810fce89a1a 100644
--- a/llvm/include/llvm/BinaryFormat/ELF.h
+++ b/llvm/include/llvm/BinaryFormat/ELF.h
@@ -406,6 +406,12 @@ enum {
 #include "ELFRelocs/AArch64.def"
 };
 
+// Special values for the st_other field in the symbol table entry for AArch64.
+enum {
+  // Symbol may follow different calling convention than base PCS.
+  STO_AARCH64_VARIANT_PCS = 0x80
+};
+
 // ARM Specific e_flags
 enum : unsigned {
   EF_ARM_SOFT_FLOAT = 0x00000200U,     // Legacy pre EABI_VER5
diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index 0904e1c40c8167..5079494225ebe1 100644
--- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -89,6 +89,8 @@ class AArch64AsmPrinter : public AsmPrinter {
   void emitStartOfAsmFile(Module &M) override;
   void emitJumpTableInfo() override;
 
+  void emitFunctionEntryLabel() override;
+
   void LowerJumpTableDest(MCStreamer &OutStreamer, const MachineInstr &MI);
 
   void LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
@@ -787,6 +789,19 @@ void AArch64AsmPrinter::emitJumpTableInfo() {
   }
 }
 
+void AArch64AsmPrinter::emitFunctionEntryLabel() {
+  if (MF->getFunction().getCallingConv() == CallingConv::AArch64_VectorCall ||
+      MF->getFunction().getCallingConv() ==
+          CallingConv::AArch64_SVE_VectorCall ||
+      STI->getRegisterInfo()->hasSVEArgsOrReturn(MF)) {
+    auto *TS =
+        static_cast<AArch64TargetStreamer *>(OutStreamer->getTargetStreamer());
+    TS->emitDirectiveVariantPCS(CurrentFnSym);
+  }
+
+  return AsmPrinter::emitFunctionEntryLabel();
+}
+
 /// Small jump tables contain an unsigned byte or half, representing the offset
 /// from the lowest-addressed possible destination to the desired basic
 /// block. Since all instructions are 4-byte aligned, this is further compressed
diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 6cc841d37dba1e..ae95d54b2d90f0 100644
--- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -184,6 +184,8 @@ class AArch64AsmParser : public MCTargetAsmParser {
   bool parseDirectiveCFINegateRAState();
   bool parseDirectiveCFIBKeyFrame();
 
+  bool parseDirectiveVariantPCS(SMLoc L);
+
   bool parseDirectiveSEHAllocStack(SMLoc L);
   bool parseDirectiveSEHPrologEnd(SMLoc L);
   bool parseDirectiveSEHSaveR19R20X(SMLoc L);
@@ -5171,6 +5173,8 @@ bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) {
     parseDirectiveCFIBKeyFrame();
   else if (IDVal == ".arch_extension")
     parseDirectiveArchExtension(Loc);
+  else if (IDVal == ".variant_pcs")
+    parseDirectiveVariantPCS(Loc);
   else if (IsMachO) {
     if (IDVal == MCLOHDirectiveName())
       parseDirectiveLOH(IDVal, Loc);
@@ -5650,6 +5654,32 @@ bool AArch64AsmParser::parseDirectiveCFIBKeyFrame() {
   return false;
 }
 
+/// parseDirectiveVariantPCS
+/// ::= .variant_pcs symbolname
+bool AArch64AsmParser::parseDirectiveVariantPCS(SMLoc L) {
+  MCAsmParser &Parser = getParser();
+
+  const AsmToken &Tok = Parser.getTok();
+  if (Tok.isNot(AsmToken::Identifier))
+    return TokError("expected symbol name");
+
+  StringRef SymbolName = Tok.getIdentifier();
+
+  MCSymbol *Sym = getContext().lookupSymbol(SymbolName);
+  if (!Sym)
+    return TokError("unknown symbol in '.variant_pcs' directive");
+
+  Parser.Lex(); // Eat the symbol
+
+  // Shouldn't be any more tokens
+  if (parseToken(AsmToken::EndOfStatement))
+    return addErrorSuffix(" in '.variant_pcs' directive");
+
+  getTargetStreamer().emitDirectiveVariantPCS(Sym);
+
+  return false;
+}
+
 /// parseDirectiveSEHAllocStack
 /// ::= .seh_stackalloc
 bool AArch64AsmParser::parseDirectiveSEHAllocStack(SMLoc L) {
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
index 3e0e9ba9f5f7f7..ec97e1c8b76ad0 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -47,6 +47,10 @@ class AArch64TargetAsmStreamer : public AArch64TargetStreamer {
 
   void emitInst(uint32_t Inst) override;
 
+  void emitDirectiveVariantPCS(MCSymbol *Symbol) override {
+    OS << "\t.variant_pcs " << Symbol->getName() << "\n";
+  }
+
   void EmitARM64WinCFIAllocStack(unsigned Size) override {
     OS << "\t.seh_stackalloc " << Size << "\n";
   }
@@ -249,6 +253,10 @@ void AArch64TargetELFStreamer::emitInst(uint32_t Inst) {
   getStreamer().emitInst(Inst);
 }
 
+void AArch64TargetELFStreamer::emitDirectiveVariantPCS(MCSymbol *Symbol) {
+  cast<MCSymbolELF>(Symbol)->setOther(ELF::STO_AARCH64_VARIANT_PCS);
+}
+
 MCTargetStreamer *createAArch64AsmTargetStreamer(MCStreamer &S,
                                                  formatted_raw_ostream &OS,
                                                  MCInstPrinter *InstPrint,
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h
index 09953315bbd0d9..73dc1e5d4d2aac 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h
@@ -39,6 +39,9 @@ class AArch64TargetStreamer : public MCTargetStreamer {
   /// Callback used to implement the .inst directive.
   virtual void emitInst(uint32_t Inst);
 
+  /// Callback used to implement the .variant_pcs directive.
+  virtual void emitDirectiveVariantPCS(MCSymbol *Symbol) {};
+
   virtual void EmitARM64WinCFIAllocStack(unsigned Size) {}
   virtual void EmitARM64WinCFISaveR19R20X(int Offset) {}
   virtual void EmitARM64WinCFISaveFPLR(int Offset) {}
@@ -73,6 +76,7 @@ class AArch64TargetELFStreamer : public AArch64TargetStreamer {
   AArch64ELFStreamer &getStreamer();
 
   void emitInst(uint32_t Inst) override;
+  void emitDirectiveVariantPCS(MCSymbol *Symbol) override;
 
 public:
   AArch64TargetELFStreamer(MCStreamer &S) : AArch64TargetStreamer(S) {}
diff --git a/llvm/test/CodeGen/AArch64/variant-pcs.ll b/llvm/test/CodeGen/AArch64/variant-pcs.ll
new file mode 100644
index 00000000000000..f6e5fd13f1ed1d
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/variant-pcs.ll
@@ -0,0 +1,51 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -o - %s | FileCheck %s --check-prefix=CHECK-ASM
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -filetype=obj -o - %s \
+; RUN:   | llvm-readobj --symbols - | FileCheck %s --check-prefix=CHECK-OBJ
+
+define i32 @base_pcs() {
+; CHECK-ASM-LABEL: base_pcs:
+; CHECK-ASM-NOT: .variant_pcs
+; CHECK-OBJ-LABEL: Name: base_pcs
+; CHECK-OBJ: Other: 0
+  ret i32 42
+}
+
+define aarch64_vector_pcs <4 x i32> @neon_vector_pcs_1(<4 x i32> %arg) {
+; CHECK-ASM: .variant_pcs neon_vector_pcs_1
+; CHECK-ASM-NEXT: neon_vector_pcs_1:
+; CHECK-OBJ-LABEL: Name: neon_vector_pcs_1
+; CHECK-OBJ: Other [ (0x80)
+  ret <4 x i32> %arg
+}
+
+define <vscale x 4 x i32> @sve_vector_pcs_1() {
+; CHECK-ASM: .variant_pcs sve_vector_pcs_1
+; CHECK-ASM-NEXT: sve_vector_pcs_1:
+; CHECK-OBJ-LABEL: Name: sve_vector_pcs_1
+; CHECK-OBJ: Other [ (0x80)
+  ret <vscale x 4 x i32> undef
+}
+
+define <vscale x 4 x i1> @sve_vector_pcs_2() {
+; CHECK-ASM: .variant_pcs sve_vector_pcs_2
+; CHECK-ASM-NEXT: sve_vector_pcs_2:
+; CHECK-OBJ-LABEL: Name: sve_vector_pcs_2
+; CHECK-OBJ: Other [ (0x80)
+  ret <vscale x 4 x i1> undef
+}
+
+define void @sve_vector_pcs_3(<vscale x 4 x i32> %arg) {
+; CHECK-ASM: .variant_pcs sve_vector_pcs_3
+; CHECK-ASM-NEXT: sve_vector_pcs_3:
+; CHECK-OBJ-LABEL: Name: sve_vector_pcs_3
+; CHECK-OBJ: Other [ (0x80)
+  ret void
+}
+
+define void @sve_vector_pcs_4(<vscale x 4 x i1> %arg) {
+; CHECK-ASM: .variant_pcs sve_vector_pcs_4
+; CHECK-ASM-NEXT: sve_vector_pcs_4:
+; CHECK-OBJ-LABEL: Name: sve_vector_pcs_4
+; CHECK-OBJ: Other [ (0x80)
+  ret void
+}
diff --git a/llvm/test/MC/AArch64/directive-variant_pcs-err.s b/llvm/test/MC/AArch64/directive-variant_pcs-err.s
new file mode 100644
index 00000000000000..98cf703b564e34
--- /dev/null
+++ b/llvm/test/MC/AArch64/directive-variant_pcs-err.s
@@ -0,0 +1,17 @@
+// RUN: not llvm-mc -triple aarch64-unknown-none-eabi -filetype asm -o - %s 2>&1 | FileCheck %s
+
+.variant_pcs
+// CHECK: error: expected symbol name
+// CHECK-NEXT:   .variant_pcs
+// CHECK-NEXT:               ^
+
+.variant_pcs foo
+// CHECK: error: unknown symbol in '.variant_pcs' directive
+// CHECK-NEXT:   .variant_pcs foo
+// CHECK-NEXT:                ^
+
+.global foo
+.variant_pcs foo bar
+// CHECK: error: unexpected token in '.variant_pcs' directive
+// CHECK-NEXT:   .variant_pcs foo bar
+// CHECK-NEXT:                    ^
diff --git a/llvm/test/MC/AArch64/directive-variant_pcs.s b/llvm/test/MC/AArch64/directive-variant_pcs.s
new file mode 100644
index 00000000000000..f6f9c9c272f78b
--- /dev/null
+++ b/llvm/test/MC/AArch64/directive-variant_pcs.s
@@ -0,0 +1,11 @@
+// RUN: llvm-mc -triple aarch64-elf -filetype asm -o - %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-elf -filetype obj -o - %s \
+// RUN:   | llvm-readobj --symbols - | FileCheck %s --check-prefix=CHECK-ST_OTHER
+
+.text
+.global foo
+.variant_pcs foo
+// CHECK: .variant_pcs foo
+
+// CHECK-ST_OTHER: Name: foo
+// CHECK-ST_OTHER: Other [ (0x80)

From 710aceb645e7dba4de7053eef2c616311b9163d4 Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka@google.com>
Date: Tue, 13 Oct 2020 03:00:35 -0700
Subject: [PATCH 083/123] Revert "[VPlan] Use VPValue def for
 VPMemoryInstructionRecipe."

It introduced a memory leak.

This reverts commit 525b085a65d30a5f2ae2af38c0be252fe8d4781b.
---
 .../Transforms/Vectorize/LoopVectorize.cpp    | 48 +++---------
 llvm/lib/Transforms/Vectorize/VPlan.cpp       | 32 ++------
 llvm/lib/Transforms/Vectorize/VPlan.h         | 75 +++++--------------
 llvm/lib/Transforms/Vectorize/VPlanValue.h    |  3 +-
 4 files changed, 35 insertions(+), 123 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 397dae34bceb1d..95d55d062da03c 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -531,10 +531,6 @@ class InnerLoopVectorizer {
   /// value into a vector.
   Value *getOrCreateVectorValue(Value *V, unsigned Part);
 
-  void setVectorValue(Value *Scalar, unsigned Part, Value *Vector) {
-    VectorLoopValueMap.setVectorValue(Scalar, Part, Vector);
-  }
-
   /// Return a value in the new loop corresponding to \p V from the original
   /// loop at unroll and vector indices \p Instance. If the value has been
   /// vectorized but not scalarized, the necessary extractelement instruction
@@ -557,8 +553,8 @@ class InnerLoopVectorizer {
   /// non-null. Use \p State to translate given VPValues to IR values in the
   /// vectorized loop.
   void vectorizeMemoryInstruction(Instruction *Instr, VPTransformState &State,
-                                  VPValue *Def, VPValue *Addr,
-                                  VPValue *StoredValue, VPValue *BlockInMask);
+                                  VPValue *Addr, VPValue *StoredValue,
+                                  VPValue *BlockInMask);
 
   /// Set the debug location in the builder using the debug location in
   /// the instruction.
@@ -2507,9 +2503,11 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(
   }
 }
 
-void InnerLoopVectorizer::vectorizeMemoryInstruction(
-    Instruction *Instr, VPTransformState &State, VPValue *Def, VPValue *Addr,
-    VPValue *StoredValue, VPValue *BlockInMask) {
+void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
+                                                     VPTransformState &State,
+                                                     VPValue *Addr,
+                                                     VPValue *StoredValue,
+                                                     VPValue *BlockInMask) {
   // Attempt to issue a wide load.
   LoadInst *LI = dyn_cast<LoadInst>(Instr);
   StoreInst *SI = dyn_cast<StoreInst>(Instr);
@@ -2638,8 +2636,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(
       if (Reverse)
         NewLI = reverseVector(NewLI);
     }
-
-    State.set(Def, Instr, NewLI, Part);
+    VectorLoopValueMap.setVectorValue(Instr, Part, NewLI);
   }
 }
 
@@ -7757,16 +7754,6 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
 
       if (auto Recipe =
               RecipeBuilder.tryToCreateWidenRecipe(Instr, Range, Plan)) {
-        // Check if the recipe can be converted to a VPValue. We need the extra
-        // down-casting step until VPRecipeBase inherits from VPValue.
-        VPValue *MaybeVPValue = Recipe->toVPValue();
-        if (!Instr->getType()->isVoidTy() && MaybeVPValue) {
-          if (NeedDef.contains(Instr))
-            Plan->addOrReplaceVPValue(Instr, MaybeVPValue);
-          else
-            Plan->addVPValue(Instr, MaybeVPValue);
-        }
-
         RecipeBuilder.setRecipe(Instr, Recipe);
         VPBB->appendRecipe(Recipe);
         continue;
@@ -7816,14 +7803,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
 
     for (unsigned i = 0; i < IG->getFactor(); ++i)
       if (Instruction *Member = IG->getMember(i)) {
-        VPValue *NewVPV = nullptr;
-        if (!Member->getType()->isVoidTy()) {
-          NewVPV = new VPValue(Member);
-          Plan->getVPValue(Member)->replaceAllUsesWith(NewVPV);
-        }
         RecipeBuilder.getRecipe(Member)->eraseFromParent();
-        if (NewVPV)
-          Plan->addVPValue(Member, NewVPV);
       }
   }
 
@@ -8165,11 +8145,9 @@ void VPPredInstPHIRecipe::execute(VPTransformState &State) {
 }
 
 void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
-  Instruction *Instr = getUnderlyingInstr();
   VPValue *StoredValue = isa<StoreInst>(Instr) ? getStoredValue() : nullptr;
-  State.ILV->vectorizeMemoryInstruction(Instr, State,
-                                        StoredValue ? nullptr : this, getAddr(),
-                                        StoredValue, getMask());
+  State.ILV->vectorizeMemoryInstruction(&Instr, State, getAddr(), StoredValue,
+                                        getMask());
 }
 
 // Determine how to lower the scalar epilogue, which depends on 1) optimising
@@ -8215,12 +8193,6 @@ static ScalarEpilogueLowering getScalarEpilogueLowering(
   return CM_ScalarEpilogueAllowed;
 }
 
-void VPTransformState::set(VPValue *Def, Value *IRDef, Value *V,
-                           unsigned Part) {
-  set(Def, V, Part);
-  ILV->setVectorValue(IRDef, Part, V);
-}
-
 // Process the loop in the VPlan-native vectorization path. This path builds
 // VPlan upfront in the vectorization pipeline, which allows to apply
 // VPlan-to-VPlan transformations from the very beginning without modifying the
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index f2659d2b266442..054920645a9af0 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -101,22 +101,6 @@ VPUser *VPRecipeBase::toVPUser() {
   return nullptr;
 }
 
-VPValue *VPRecipeBase::toVPValue() {
-  if (auto *V = dyn_cast<VPInstruction>(this))
-    return V;
-  if (auto *V = dyn_cast<VPWidenMemoryInstructionRecipe>(this))
-    return V;
-  return nullptr;
-}
-
-const VPValue *VPRecipeBase::toVPValue() const {
-  if (auto *V = dyn_cast<VPInstruction>(this))
-    return V;
-  if (auto *V = dyn_cast<VPWidenMemoryInstructionRecipe>(this))
-    return V;
-  return nullptr;
-}
-
 // Get the top-most entry block of \p Start. This is the entry block of the
 // containing VPlan. This function is templated to support both const and non-const blocks
 template <typename T> static T *getPlanEntry(T *Start) {
@@ -421,15 +405,14 @@ void VPRecipeBase::removeFromParent() {
   Parent = nullptr;
 }
 
+VPValue *VPRecipeBase::toVPValue() {
+  if (auto *V = dyn_cast<VPInstruction>(this))
+    return V;
+  return nullptr;
+}
+
 iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() {
   assert(getParent() && "Recipe not in any VPBasicBlock");
-  // If the recipe is a VPValue and has been added to the containing VPlan,
-  // remove the mapping.
-  if (Value *UV = getUnderlyingInstr())
-    if (!UV->getType()->isVoidTy())
-      if (auto *Plan = getParent()->getPlan())
-        Plan->removeVPValueFor(UV);
-
   return getParent()->getRecipeList().erase(getIterator());
 }
 
@@ -920,8 +903,7 @@ void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent,
 
 void VPWidenMemoryInstructionRecipe::print(raw_ostream &O, const Twine &Indent,
                                            VPSlotTracker &SlotTracker) const {
-  O << "\"WIDEN "
-    << Instruction::getOpcodeName(getUnderlyingInstr()->getOpcode()) << " ";
+  O << "\"WIDEN " << Instruction::getOpcodeName(Instr.getOpcode()) << " ";
 
   bool First = true;
   for (VPValue *Op : operands()) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 81a9d67d29762a..30f984fd39d768 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -282,10 +282,6 @@ struct VPTransformState {
     // delegates the call to ILV below.
     if (Data.PerPartOutput.count(Def)) {
       auto *VecPart = Data.PerPartOutput[Def][Instance.Part];
-      if (!VecPart->getType()->isVectorTy()) {
-        assert(Instance.Lane == 0 && "cannot get lane > 0 for scalar");
-        return VecPart;
-      }
       // TODO: Cache created scalar values.
       return Builder.CreateExtractElement(VecPart,
                                           Builder.getInt32(Instance.Lane));
@@ -302,7 +298,6 @@ struct VPTransformState {
     }
     Data.PerPartOutput[Def][Part] = V;
   }
-  void set(VPValue *Def, Value *IRDef, Value *V, unsigned Part);
 
   /// Hold state information used when constructing the CFG of the output IR,
   /// traversing the VPBasicBlocks and generating corresponding IR BasicBlocks.
@@ -689,20 +684,6 @@ class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock> {
   /// Returns a pointer to a VPValue, if the recipe inherits from VPValue or
   /// nullptr otherwise.
   VPValue *toVPValue();
-  const VPValue *toVPValue() const;
-
-  /// Returns the underlying instruction, if the recipe is a VPValue or nullptr
-  /// otherwise.
-  Instruction *getUnderlyingInstr() {
-    if (auto *VPV = toVPValue())
-      return cast_or_null<Instruction>(VPV->getUnderlyingValue());
-    return nullptr;
-  }
-  const Instruction *getUnderlyingInstr() const {
-    if (auto *VPV = toVPValue())
-      return cast_or_null<Instruction>(VPV->getUnderlyingValue());
-    return nullptr;
-  }
 };
 
 inline bool VPUser::classof(const VPRecipeBase *Recipe) {
@@ -744,6 +725,10 @@ class VPInstruction : public VPUser, public VPValue, public VPRecipeBase {
   void generateInstruction(VPTransformState &State, unsigned Part);
 
 protected:
+  Instruction *getUnderlyingInstr() {
+    return cast_or_null<Instruction>(getUnderlyingValue());
+  }
+
   void setUnderlyingInstr(Instruction *I) { setUnderlyingValue(I); }
 
 public:
@@ -1222,9 +1207,8 @@ class VPPredInstPHIRecipe : public VPRecipeBase {
 /// - For store: Address, stored value, optional mask
 /// TODO: We currently execute only per-part unless a specific instance is
 /// provided.
-class VPWidenMemoryInstructionRecipe : public VPRecipeBase,
-                                       public VPValue,
-                                       public VPUser {
+class VPWidenMemoryInstructionRecipe : public VPRecipeBase, public VPUser {
+  Instruction &Instr;
 
   void setMask(VPValue *Mask) {
     if (!Mask)
@@ -1233,22 +1217,20 @@ class VPWidenMemoryInstructionRecipe : public VPRecipeBase,
   }
 
   bool isMasked() const {
-    return (isa<LoadInst>(getUnderlyingInstr()) && getNumOperands() == 2) ||
-           (isa<StoreInst>(getUnderlyingInstr()) && getNumOperands() == 3);
+    return (isa<LoadInst>(Instr) && getNumOperands() == 2) ||
+           (isa<StoreInst>(Instr) && getNumOperands() == 3);
   }
 
 public:
   VPWidenMemoryInstructionRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask)
-      : VPRecipeBase(VPWidenMemoryInstructionSC),
-        VPValue(VPValue::VPMemoryInstructionSC, &Load), VPUser({Addr}) {
+      : VPRecipeBase(VPWidenMemoryInstructionSC), VPUser({Addr}), Instr(Load) {
     setMask(Mask);
   }
 
   VPWidenMemoryInstructionRecipe(StoreInst &Store, VPValue *Addr,
                                  VPValue *StoredValue, VPValue *Mask)
-      : VPRecipeBase(VPWidenMemoryInstructionSC),
-        VPValue(VPValue::VPMemoryInstructionSC, &Store),
-        VPUser({Addr, StoredValue}) {
+      : VPRecipeBase(VPWidenMemoryInstructionSC), VPUser({Addr, StoredValue}),
+        Instr(Store) {
     setMask(Mask);
   }
 
@@ -1271,7 +1253,7 @@ class VPWidenMemoryInstructionRecipe : public VPRecipeBase,
 
   /// Return the address accessed by this recipe.
   VPValue *getStoredValue() const {
-    assert(isa<StoreInst>(getUnderlyingInstr()) &&
+    assert(isa<StoreInst>(Instr) &&
            "Stored value only available for store instructions");
     return getOperand(1); // Stored value is the 2nd, mandatory operand.
   }
@@ -1637,10 +1619,6 @@ class VPlan {
   /// VPlan.
   Value2VPValueTy Value2VPValue;
 
-  /// Contains all VPValues that been allocated by addVPValue directly and need
-  /// to be free when the plan's destructor is called.
-  SmallVector<VPValue *, 16> VPValuesToFree;
-
   /// Holds the VPLoopInfo analysis for this VPlan.
   VPLoopInfo VPLInfo;
 
@@ -1656,8 +1634,8 @@ class VPlan {
   ~VPlan() {
     if (Entry)
       VPBlockBase::deleteCFG(Entry);
-    for (VPValue *VPV : VPValuesToFree)
-      delete VPV;
+    for (auto &MapEntry : Value2VPValue)
+      delete MapEntry.second;
     if (BackedgeTakenCount)
       delete BackedgeTakenCount;
     for (VPValue *Def : VPExternalDefs)
@@ -1707,24 +1685,7 @@ class VPlan {
   void addVPValue(Value *V) {
     assert(V && "Trying to add a null Value to VPlan");
     assert(!Value2VPValue.count(V) && "Value already exists in VPlan");
-    VPValue *VPV = new VPValue(V);
-    Value2VPValue[V] = VPV;
-    VPValuesToFree.push_back(VPV);
-  }
-
-  void addVPValue(Value *V, VPValue *VPV) {
-    assert(V && "Trying to add a null Value to VPlan");
-    assert(!Value2VPValue.count(V) && "Value already exists in VPlan");
-    Value2VPValue[V] = VPV;
-  }
-
-  void addOrReplaceVPValue(Value *V, VPValue *VPV) {
-    assert(V && "Trying to add a null Value to VPlan");
-    auto I = Value2VPValue.find(V);
-    if (I == Value2VPValue.end())
-      Value2VPValue[V] = VPV;
-    else
-      I->second = VPV;
+    Value2VPValue[V] = new VPValue(V);
   }
 
   VPValue *getVPValue(Value *V) {
@@ -1740,8 +1701,6 @@ class VPlan {
     return getVPValue(V);
   }
 
-  void removeVPValueFor(Value *V) { Value2VPValue.erase(V); }
-
   /// Return the VPLoopInfo analysis for this VPlan.
   VPLoopInfo &getVPLoopInfo() { return VPLInfo; }
   const VPLoopInfo &getVPLoopInfo() const { return VPLInfo; }
@@ -1823,9 +1782,9 @@ class VPlanPrinter {
 };
 
 struct VPlanIngredient {
-  const Value *V;
+  Value *V;
 
-  VPlanIngredient(const Value *V) : V(V) {}
+  VPlanIngredient(Value *V) : V(V) {}
 };
 
 inline raw_ostream &operator<<(raw_ostream &OS, const VPlanIngredient &I) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index ec8c5bfaae9a46..e51c19601f8863 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -43,7 +43,6 @@ class VPValue {
   friend class VPBasicBlock;
   friend class VPInterleavedAccessInfo;
   friend class VPSlotTracker;
-  friend class VPRecipeBase;
 
   const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
 
@@ -78,7 +77,7 @@ class VPValue {
   /// are actually instantiated. Values of this enumeration are kept in the
   /// SubclassID field of the VPValue objects. They are used for concrete
   /// type identification.
-  enum { VPValueSC, VPInstructionSC, VPMemoryInstructionSC };
+  enum { VPValueSC, VPInstructionSC };
 
   VPValue(Value *UV = nullptr) : VPValue(VPValueSC, UV) {}
   VPValue(const VPValue &) = delete;

From 25a8881b724abf7251a9278e72224af7e82cb9c2 Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka@google.com>
Date: Tue, 13 Oct 2020 03:02:30 -0700
Subject: [PATCH 084/123] Revert "    Enable LSAN for Android"

Breaks android build.
asan_malloc_dispatch_k needs memalign interceptor disabled in this patch.

This reverts commit a2291a58bf1c860d026581fee6fe96019dc25440.
---
 compiler-rt/cmake/config-ix.cmake             |  2 +-
 compiler-rt/lib/asan/CMakeLists.txt           |  6 +--
 compiler-rt/lib/asan/tests/CMakeLists.txt     |  2 -
 compiler-rt/lib/lsan/CMakeLists.txt           |  9 +---
 compiler-rt/lib/lsan/lsan_common.cpp          | 43 ++++---------------
 compiler-rt/lib/lsan/lsan_common.h            |  9 ++--
 compiler-rt/lib/lsan/lsan_common_linux.cpp    | 33 ++------------
 .../sanitizer_internal_defs.h                 |  5 +--
 .../lib/sanitizer_common/sanitizer_linux.h    | 10 -----
 .../sanitizer_linux_libcdep.cpp               | 17 ++------
 .../sanitizer_platform_interceptors.h         |  7 ++-
 .../test/asan/TestCases/coverage-and-lsan.cpp |  3 +-
 compiler-rt/test/asan/lit.cfg.py              |  3 +-
 compiler-rt/test/lit.common.cfg.py            | 24 ++---------
 .../Linux/cleanup_in_tsd_destructor.c         |  2 +-
 .../Linux/disabler_in_tsd_destructor.c        |  2 +-
 .../test/lsan/TestCases/Linux/guard-page.c    |  3 --
 .../lsan/TestCases/Linux/log-path_test.cpp    |  6 +--
 .../lsan/TestCases/Linux/use_tls_dynamic.cpp  |  3 +-
 .../Linux/use_tls_pthread_specific_static.cpp |  3 --
 compiler-rt/test/lsan/TestCases/disabler.c    |  2 +-
 compiler-rt/test/lsan/TestCases/disabler.cpp  |  2 +-
 .../test/lsan/TestCases/ignore_object.c       |  2 +-
 .../lsan/TestCases/large_allocation_leak.cpp  |  2 +-
 .../test/lsan/TestCases/strace_test.cpp       |  3 --
 .../test/lsan/TestCases/suppressions_file.cpp | 11 ++---
 .../test/lsan/TestCases/swapcontext.cpp       |  3 +-
 .../test/lsan/TestCases/use_registers.cpp     | 13 ++----
 compiler-rt/test/lsan/lit.common.cfg.py       | 10 +----
 29 files changed, 53 insertions(+), 187 deletions(-)

diff --git a/compiler-rt/cmake/config-ix.cmake b/compiler-rt/cmake/config-ix.cmake
index 70245988638299..23a47f2b853975 100644
--- a/compiler-rt/cmake/config-ix.cmake
+++ b/compiler-rt/cmake/config-ix.cmake
@@ -681,7 +681,7 @@ else()
 endif()
 
 if (COMPILER_RT_HAS_SANITIZER_COMMON AND LSAN_SUPPORTED_ARCH AND
-    OS_NAME MATCHES "Android|Darwin|Linux|NetBSD|Fuchsia")
+    OS_NAME MATCHES "Darwin|Linux|NetBSD|Fuchsia")
   set(COMPILER_RT_HAS_LSAN TRUE)
 else()
   set(COMPILER_RT_HAS_LSAN FALSE)
diff --git a/compiler-rt/lib/asan/CMakeLists.txt b/compiler-rt/lib/asan/CMakeLists.txt
index ed34c652c81953..41e889eb79e6fa 100644
--- a/compiler-rt/lib/asan/CMakeLists.txt
+++ b/compiler-rt/lib/asan/CMakeLists.txt
@@ -86,8 +86,6 @@ append_rtti_flag(OFF ASAN_CFLAGS)
 set(ASAN_DYNAMIC_LINK_FLAGS ${SANITIZER_COMMON_LINK_FLAGS})
 
 if(ANDROID)
-  list(APPEND ASAN_CFLAGS -fno-emulated-tls)
-  list(APPEND ASAN_DYNAMIC_LINK_FLAGS -fuse-ld=lld)
 # Put most Sanitizer shared libraries in the global group. For more details, see
 # android-changes-for-ndk-developers.md#changes-to-library-search-order
   if (COMPILER_RT_HAS_Z_GLOBAL)
@@ -234,9 +232,7 @@ else()
            -Wl,--version-script,${CMAKE_CURRENT_BINARY_DIR}/clang_rt.asan-dynamic-${arch}.vers)
       # The Solaris 11.4 linker supports a subset of GNU ld version scripts,
       # but requires a special option to enable it.
-      # This is used/compatible with ANDROID because we force `lld` on ANDROID (line 90).
-      # Therefore we don't want to add it for ANDROID.
-      if (COMPILER_RT_HAS_GNU_VERSION_SCRIPT_COMPAT AND NOT ANDROID)
+      if (COMPILER_RT_HAS_GNU_VERSION_SCRIPT_COMPAT)
           list(APPEND VERSION_SCRIPT_FLAG -Wl,-z,gnu-version-script-compat)
       endif()
       set_property(SOURCE
diff --git a/compiler-rt/lib/asan/tests/CMakeLists.txt b/compiler-rt/lib/asan/tests/CMakeLists.txt
index 4ffc58cdef024b..6c07d1a8c6ff02 100644
--- a/compiler-rt/lib/asan/tests/CMakeLists.txt
+++ b/compiler-rt/lib/asan/tests/CMakeLists.txt
@@ -91,7 +91,6 @@ if(APPLE)
 endif()
 if(ANDROID)
   list(APPEND ASAN_UNITTEST_COMMON_LINK_FLAGS -pie)
-  list(APPEND ASAN_UNITTEST_COMMON_LINK_FLAGS -fuse-ld=lld)
 endif()
 
 set(ASAN_UNITTEST_INSTRUMENTED_LINK_FLAGS
@@ -289,7 +288,6 @@ if(ANDROID)
       $<TARGET_OBJECTS:RTSanitizerCommonLibc.${arch}>
       $<TARGET_OBJECTS:RTSanitizerCommonCoverage.${arch}>
       $<TARGET_OBJECTS:RTSanitizerCommonSymbolizer.${arch}>
-      $<TARGET_OBJECTS:RTLSanCommon.${arch}>
       $<TARGET_OBJECTS:RTUbsan.${arch}>
       $<TARGET_OBJECTS:RTUbsan_cxx.${arch}>
       ${COMPILER_RT_GTEST_SOURCE}
diff --git a/compiler-rt/lib/lsan/CMakeLists.txt b/compiler-rt/lib/lsan/CMakeLists.txt
index 4685214a7a4ed4..ff8d38d848490f 100644
--- a/compiler-rt/lib/lsan/CMakeLists.txt
+++ b/compiler-rt/lib/lsan/CMakeLists.txt
@@ -1,7 +1,6 @@
 include_directories(..)
 
 set(LSAN_CFLAGS ${SANITIZER_COMMON_CFLAGS})
-set(LSAN_LINK_FLAGS ${SANITIZER_COMMON_LINK_FLAGS})
 append_rtti_flag(OFF LSAN_CFLAGS)
 
 set(LSAN_COMMON_SOURCES
@@ -34,11 +33,6 @@ set(LSAN_HEADERS
 
 set(LSAN_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR})
 
-if(ANDROID)
-  list(APPEND LSAN_CFLAGS -fno-emulated-tls)
-  list(APPEND LSAN_LINK_FLAGS -fuse-ld=lld)
-endif()
-
 add_compiler_rt_object_libraries(RTLSanCommon
     OS ${SANITIZER_COMMON_SUPPORTED_OS}
     ARCHS ${LSAN_COMMON_SUPPORTED_ARCH}
@@ -67,7 +61,7 @@ if(COMPILER_RT_HAS_LSAN)
                   RTSanitizerCommonCoverage
                   RTSanitizerCommonSymbolizer
       CFLAGS ${LSAN_CFLAGS}
-      LINK_FLAGS ${LSAN_LINK_FLAGS} ${WEAK_SYMBOL_LINK_FLAGS}
+      LINK_FLAGS ${SANITIZER_COMMON_LINK_FLAGS} ${WEAK_SYMBOL_LINK_FLAGS}
       LINK_LIBS ${LSAN_LINK_LIBS}
       PARENT_TARGET lsan)
   else()
@@ -84,7 +78,6 @@ if(COMPILER_RT_HAS_LSAN)
                 $<TARGET_OBJECTS:RTLSanCommon.${arch}>
         ADDITIONAL_HEADERS ${LSAN_HEADERS}
         CFLAGS ${LSAN_CFLAGS}
-        LINK_FLAGS ${LSAN_LINK_FLAGS}
         PARENT_TARGET lsan)
     endforeach()
   endif()
diff --git a/compiler-rt/lib/lsan/lsan_common.cpp b/compiler-rt/lib/lsan/lsan_common.cpp
index abb559fe3b3225..107d63ac9117c6 100644
--- a/compiler-rt/lib/lsan/lsan_common.cpp
+++ b/compiler-rt/lib/lsan/lsan_common.cpp
@@ -71,17 +71,17 @@ static const char kSuppressionLeak[] = "leak";
 static const char *kSuppressionTypes[] = { kSuppressionLeak };
 static const char kStdSuppressions[] =
 #if SANITIZER_SUPPRESS_LEAK_ON_PTHREAD_EXIT
-    // For more details refer to the SANITIZER_SUPPRESS_LEAK_ON_PTHREAD_EXIT
-    // definition.
-    "leak:*pthread_exit*\n"
+  // For more details refer to the SANITIZER_SUPPRESS_LEAK_ON_PTHREAD_EXIT
+  // definition.
+  "leak:*pthread_exit*\n"
 #endif  // SANITIZER_SUPPRESS_LEAK_ON_PTHREAD_EXIT
 #if SANITIZER_MAC
-    // For Darwin and os_log/os_trace: https://reviews.llvm.org/D35173
-    "leak:*_os_trace*\n"
+  // For Darwin and os_log/os_trace: https://reviews.llvm.org/D35173
+  "leak:*_os_trace*\n"
 #endif
-    // TLS leak in some glibc versions, described in
-    // https://sourceware.org/bugzilla/show_bug.cgi?id=12650.
-    "leak:*tls_get_addr*\n";
+  // TLS leak in some glibc versions, described in
+  // https://sourceware.org/bugzilla/show_bug.cgi?id=12650.
+  "leak:*tls_get_addr*\n";
 
 void InitializeSuppressions() {
   CHECK_EQ(nullptr, suppression_ctx);
@@ -294,22 +294,6 @@ static void ProcessThreads(SuspendedThreadsList const &suspended_threads,
                                  kReachable);
         }
       }
-#if SANITIZER_ANDROID
-      if (HAS_ANDROID_THREAD_PROPERTIES_API) {
-        auto *cb = +[](void *dtls_begin, void *dtls_end, uptr /*dso_idd*/,
-                       void *arg) -> void {
-          ScanRangeForPointers(reinterpret_cast<uptr>(dtls_begin),
-                               reinterpret_cast<uptr>(dtls_end),
-                               reinterpret_cast<Frontier *>(arg), "DTLS",
-                               kReachable);
-        };
-
-        // FIXME: There might be a race-condition here (and in Bionic) if the
-        // thread is suspended in the middle of updating its DTLS. IOWs, we
-        // could scan already freed memory. (probably fine for now)
-        __libc_iterate_dynamic_tls(os_id, cb, frontier);
-      }
-#else
       if (dtls && !DTLSInDestruction(dtls)) {
         for (uptr j = 0; j < dtls->dtv_size; ++j) {
           uptr dtls_beg = dtls->dtv[j].beg;
@@ -325,7 +309,6 @@ static void ProcessThreads(SuspendedThreadsList const &suspended_threads,
         // this and continue.
         LOG_THREADS("Thread %d has DTLS under destruction.\n", os_id);
       }
-#endif
     }
   }
 }
@@ -592,16 +575,8 @@ static void CheckForLeaksCallback(const SuspendedThreadsList &suspended_threads,
 }
 
 static bool CheckForLeaks() {
-#if SANITIZER_ANDROID
-  // Presence of the ThreadProperties API implies the presence of
-  // TLS support, which is required for calling  __lsan_is_turned_off().
-  // Therefore, this check must preceed that.
-  if (!HAS_ANDROID_THREAD_PROPERTIES_API)
-    return false;
-#endif
-
   if (&__lsan_is_turned_off && __lsan_is_turned_off())
-    return false;
+      return false;
   EnsureMainThreadIDIsCorrect();
   CheckForLeaksParam param;
   LockStuffAndStopTheWorld(CheckForLeaksCallback, &param);
diff --git a/compiler-rt/lib/lsan/lsan_common.h b/compiler-rt/lib/lsan/lsan_common.h
index d482a6ef75bb85..3434beede82890 100644
--- a/compiler-rt/lib/lsan/lsan_common.h
+++ b/compiler-rt/lib/lsan/lsan_common.h
@@ -29,13 +29,16 @@
 // To enable LeakSanitizer on a new architecture, one needs to implement the
 // internal_clone function as well as (probably) adjust the TLS machinery for
 // the new architecture inside the sanitizer library.
-#if (SANITIZER_LINUX || SANITIZER_MAC) && (SANITIZER_WORDSIZE == 64) &&  \
+#if (SANITIZER_LINUX && !SANITIZER_ANDROID || SANITIZER_MAC) &&          \
+    (SANITIZER_WORDSIZE == 64) &&                                        \
     (defined(__x86_64__) || defined(__mips64) || defined(__aarch64__) || \
      defined(__powerpc64__) || defined(__s390x__))
 #define CAN_SANITIZE_LEAKS 1
-#elif defined(__i386__) && (SANITIZER_LINUX || SANITIZER_MAC)
+#elif defined(__i386__) && \
+    (SANITIZER_LINUX && !SANITIZER_ANDROID || SANITIZER_MAC)
 #define CAN_SANITIZE_LEAKS 1
-#elif defined(__arm__) && SANITIZER_LINUX
+#elif defined(__arm__) && \
+    SANITIZER_LINUX && !SANITIZER_ANDROID
 #define CAN_SANITIZE_LEAKS 1
 #elif SANITIZER_NETBSD || SANITIZER_FUCHSIA
 #define CAN_SANITIZE_LEAKS 1
diff --git a/compiler-rt/lib/lsan/lsan_common_linux.cpp b/compiler-rt/lib/lsan/lsan_common_linux.cpp
index 0a731c3dd4f571..c97ef31593dfa8 100644
--- a/compiler-rt/lib/lsan/lsan_common_linux.cpp
+++ b/compiler-rt/lib/lsan/lsan_common_linux.cpp
@@ -41,28 +41,9 @@ static bool IsLinker(const LoadedModule& module) {
 
 __attribute__((tls_model("initial-exec")))
 THREADLOCAL int disable_counter;
-bool DisabledInThisThread() {
-#if SANITIZER_ANDROID
-  // LSAN is only enabled with Android-S and up.
-  if (!HAS_ANDROID_THREAD_PROPERTIES_API)
-    return true;
-#endif
-  return disable_counter > 0;
-}
-void DisableInThisThread() {
-#if SANITIZER_ANDROID
-  // LSAN is only enabled with Android-S and up.
-  if (!HAS_ANDROID_THREAD_PROPERTIES_API)
-    return;
-#endif
-  disable_counter++;
-}
+bool DisabledInThisThread() { return disable_counter > 0; }
+void DisableInThisThread() { disable_counter++; }
 void EnableInThisThread() {
-#if SANITIZER_ANDROID
-  // LSAN is only enabled with Android-S and up.
-  if (!HAS_ANDROID_THREAD_PROPERTIES_API)
-    return;
-#endif
   if (disable_counter == 0) {
     DisableCounterUnderflow();
   }
@@ -114,15 +95,7 @@ static int ProcessGlobalRegionsCallback(struct dl_phdr_info *info, size_t size,
 
 // Scans global variables for heap pointers.
 void ProcessGlobalRegions(Frontier *frontier) {
-  if (!flags()->use_globals) {
-#if SANITIZER_ANDROID
-    // There are known malloc'ed global variables from libc[++] on Android.
-    // If use_globals is turnt off, we could see leaks.
-    // Issue a warning in case users turn it off by accident.
-    Report("use_globals=0 on Android could lead to false reports.");
-#endif
-    return;
-  }
+  if (!flags()->use_globals) return;
   dl_iterate_phdr(ProcessGlobalRegionsCallback, frontier);
 }
 
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_internal_defs.h b/compiler-rt/lib/sanitizer_common/sanitizer_internal_defs.h
index 3756a6b76d3b47..a6c5514870528c 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_internal_defs.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_internal_defs.h
@@ -104,9 +104,8 @@
 //
 // FIXME: do we have anything like this on Mac?
 #ifndef SANITIZER_CAN_USE_PREINIT_ARRAY
-#if (SANITIZER_LINUX || SANITIZER_OPENBSD || SANITIZER_FUCHSIA || \
-     SANITIZER_NETBSD) &&                                         \
-    !defined(PIC)
+#if ((SANITIZER_LINUX && !SANITIZER_ANDROID) || SANITIZER_OPENBSD || \
+     SANITIZER_FUCHSIA || SANITIZER_NETBSD) && !defined(PIC)
 #define SANITIZER_CAN_USE_PREINIT_ARRAY 1
 // Before Solaris 11.4, .preinit_array is fully supported only with GNU ld.
 // FIXME: Check for those conditions.
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.h b/compiler-rt/lib/sanitizer_common/sanitizer_linux.h
index 4e31105449e479..a8625ca86e8d00 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.h
@@ -154,16 +154,6 @@ ALWAYS_INLINE uptr *get_android_tls_ptr() {
   return reinterpret_cast<uptr *>(&__get_tls()[TLS_SLOT_SANITIZER]);
 }
 
-// Bionic provides this API since 31.
-extern "C" SANITIZER_WEAK_ATTRIBUTE void __libc_get_static_tls_bounds(void **,
-                                                                      void **);
-extern "C" SANITIZER_WEAK_ATTRIBUTE void __libc_iterate_dynamic_tls(
-    pid_t, void (*cb)(void *, void *, uptr, void *), void *);
-
-#define HAS_ANDROID_THREAD_PROPERTIES_API (&__libc_iterate_dynamic_tls != 0)
-
-#else
-#define HAS_ANDROID_THREAD_PROPERTIES_API (0)
 #endif  // SANITIZER_ANDROID
 
 }  // namespace __sanitizer
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp
index 26bdf0457f58f2..b8b999363ff26d 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp
@@ -460,19 +460,7 @@ int GetSizeFromHdr(struct dl_phdr_info *info, size_t size, void *data) {
 
 #if !SANITIZER_GO
 static void GetTls(uptr *addr, uptr *size) {
-#if SANITIZER_ANDROID
-  if (HAS_ANDROID_THREAD_PROPERTIES_API) {
-    void *start_addr;
-    void *end_addr;
-    __libc_get_static_tls_bounds(&start_addr, &end_addr);
-    *addr = reinterpret_cast<uptr>(start_addr);
-    *size =
-        reinterpret_cast<uptr>(end_addr) - reinterpret_cast<uptr>(start_addr);
-  } else {
-    *addr = 0;
-    *size = 0;
-  }
-#elif SANITIZER_LINUX && !SANITIZER_ANDROID
+#if SANITIZER_LINUX && !SANITIZER_ANDROID
 #if defined(__x86_64__) || defined(__i386__) || defined(__s390__)
   *addr = ThreadSelf();
   *size = GetTlsSize();
@@ -516,6 +504,9 @@ static void GetTls(uptr *addr, uptr *size) {
 #elif SANITIZER_OPENBSD
   *addr = 0;
   *size = 0;
+#elif SANITIZER_ANDROID
+  *addr = 0;
+  *size = 0;
 #elif SANITIZER_SOLARIS
   // FIXME
   *addr = 0;
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h
index 2f598bfb188b6d..25be06c1bc8f6a 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h
@@ -504,15 +504,14 @@
 #define SANITIZER_INTERCEPT_MALLOPT_AND_MALLINFO                            \
   (!SI_FREEBSD && !SI_MAC && !SI_NETBSD && !SI_OPENBSD && SI_NOT_FUCHSIA && \
    SI_NOT_RTEMS && !SI_SOLARIS)
-#define SANITIZER_INTERCEPT_MEMALIGN                                      \
-  (!SI_FREEBSD && !SI_MAC && !SI_NETBSD && !SI_OPENBSD && SI_NOT_RTEMS && \
-   !SI_ANDROID)
+#define SANITIZER_INTERCEPT_MEMALIGN \
+  (!SI_FREEBSD && !SI_MAC && !SI_NETBSD && !SI_OPENBSD && SI_NOT_RTEMS)
 #define SANITIZER_INTERCEPT_PVALLOC                                         \
   (!SI_FREEBSD && !SI_MAC && !SI_NETBSD && !SI_OPENBSD && SI_NOT_FUCHSIA && \
    SI_NOT_RTEMS && !SI_SOLARIS)
 #define SANITIZER_INTERCEPT_CFREE                                           \
   (!SI_FREEBSD && !SI_MAC && !SI_NETBSD && !SI_OPENBSD && SI_NOT_FUCHSIA && \
-   SI_NOT_RTEMS && !SI_SOLARIS && !SI_ANDROID)
+   SI_NOT_RTEMS && !SI_SOLARIS)
 #define SANITIZER_INTERCEPT_REALLOCARRAY SI_POSIX
 #define SANITIZER_INTERCEPT_ALIGNED_ALLOC (!SI_MAC && SI_NOT_RTEMS)
 #define SANITIZER_INTERCEPT_MALLOC_USABLE_SIZE \
diff --git a/compiler-rt/test/asan/TestCases/coverage-and-lsan.cpp b/compiler-rt/test/asan/TestCases/coverage-and-lsan.cpp
index d95d95fc5d5fe6..60851dabb6b7f2 100644
--- a/compiler-rt/test/asan/TestCases/coverage-and-lsan.cpp
+++ b/compiler-rt/test/asan/TestCases/coverage-and-lsan.cpp
@@ -9,8 +9,7 @@
 // RUN: %sancov print %t-dir/*.sancov 2>&1
 //
 // REQUIRES: leak-detection
-// FIXME: sancov paths not work with adb
-// UNSUPPORTED: android
+
 int *g = new int;
 int main(int argc, char **argv) {
   g = 0;
diff --git a/compiler-rt/test/asan/lit.cfg.py b/compiler-rt/test/asan/lit.cfg.py
index 4cfd0b68b7619d..3a338d79e22254 100644
--- a/compiler-rt/test/asan/lit.cfg.py
+++ b/compiler-rt/test/asan/lit.cfg.py
@@ -209,11 +209,10 @@ def build_invocation(compile_flags):
   config.available_features.add('fast-unwinder-works')
 
 # Turn on leak detection on 64-bit Linux.
-leak_detection_android = config.android and 'android-thread-properties-api' in config.available_features and (config.target_arch == 'x86_64' or config.target_arch == 'i386' or config.target_arch == 'i686'  or config.target_arch == 'aarch64' )
 leak_detection_linux = (config.host_os == 'Linux') and (not config.android) and (config.target_arch == 'x86_64' or config.target_arch == 'i386')
 leak_detection_mac = (config.host_os == 'Darwin') and (config.target_arch == 'x86_64')
 leak_detection_netbsd = (config.host_os == 'NetBSD') and (config.target_arch in ['x86_64', 'i386'])
-if leak_detection_android or leak_detection_linux or leak_detection_mac or leak_detection_netbsd:
+if leak_detection_linux or leak_detection_mac or leak_detection_netbsd:
   config.available_features.add('leak-detection')
 
 # Set LD_LIBRARY_PATH to pick dynamic runtime up properly.
diff --git a/compiler-rt/test/lit.common.cfg.py b/compiler-rt/test/lit.common.cfg.py
index 05d0bae016b683..be1534d554ea71 100644
--- a/compiler-rt/test/lit.common.cfg.py
+++ b/compiler-rt/test/lit.common.cfg.py
@@ -61,13 +61,6 @@
 # BFD linker in 64-bit android toolchains fails to find libc++_shared.so, which
 # is a transitive shared library dependency (via asan runtime).
 if config.android:
-  # These are needed for tests to upload/download temp files, such as
-  # suppression-files, to device.
-  config.substitutions.append( ('%device_rundir', "/data/local/tmp/Output") )
-  # FIXME: May need to select specific device with `-s SERIAL`
-  config.substitutions.append( ('%push_to_device', "adb push ") )
-  config.substitutions.append( ('%pull_from_device', "adb pull ") )
-  config.substitutions.append( ('%adb_shell ', "adb shell ") )
   # Prepend the flag so that it can be overridden.
   config.target_cflags = "-pie -fuse-ld=gold " + config.target_cflags
   if config.android_ndk_version < 19:
@@ -76,11 +69,7 @@
     # just contains a handful of ABI functions", which makes most C++ code fail
     # to link. In r19 and later we just use the default which is libc++.
     config.cxx_mode_flags.append('-stdlib=libstdc++')
-else:
-  config.substitutions.append( ('%device_rundir', "") )
-  config.substitutions.append( ('%push_to_device', "echo ") )
-  config.substitutions.append( ('%pull_from_device', "echo ") )
-  config.substitutions.append( ('%adb_shell', "echo ") )
+
 config.environment = dict(os.environ)
 
 # Clear some environment variables that might affect Clang.
@@ -352,15 +341,10 @@ def get_macos_aligned_version(macos_vers):
   if config.android_serial:
     env['ANDROID_SERIAL'] = config.android_serial
     config.environment['ANDROID_SERIAL'] = config.android_serial
-  # Must use lld because Bionic's TLS layout is not compatible with the Gold convention.
-  # The buildbot script will guarantee lld is built/included.
-  # The check for `has_lld` somehow missed that it exists and always marked tests as "unsupported".
-  config.use_lld = True
-  config.has_lld = True
+
   adb = os.environ.get('ADB', 'adb')
   try:
     android_api_level_str = subprocess.check_output([adb, "shell", "getprop", "ro.build.version.sdk"], env=env).rstrip()
-    android_api_codename = subprocess.check_output([adb, "shell", "getprop", "ro.build.version.codename"], env=env).rstrip().decode("utf-8")
   except (subprocess.CalledProcessError, OSError):
     lit_config.fatal("Failed to read ro.build.version.sdk (using '%s' as adb)" % adb)
   try:
@@ -371,8 +355,6 @@ def get_macos_aligned_version(macos_vers):
     config.available_features.add('android-26')
   if android_api_level >= 28:
     config.available_features.add('android-28')
-  if android_api_level >= 31 or android_api_codename == 'S':
-    config.available_features.add('android-thread-properties-api')
 
   # Prepare the device.
   android_tmpdir = '/data/local/tmp/Output'
@@ -392,7 +374,7 @@ def get_macos_aligned_version(macos_vers):
     from distutils.version import LooseVersion
     ver = LooseVersion(ver_line.split()[-1].decode())
     # 2.27 introduced some incompatibilities
-    if ver >= LooseVersion("2.27") and not config.android:
+    if ver >= LooseVersion("2.27"):
       config.available_features.add("glibc-2.27")
 
 sancovcc_path = os.path.join(config.llvm_tools_dir, "sancov")
diff --git a/compiler-rt/test/lsan/TestCases/Linux/cleanup_in_tsd_destructor.c b/compiler-rt/test/lsan/TestCases/Linux/cleanup_in_tsd_destructor.c
index 18476d5a9adecd..cf080e4dd7562c 100644
--- a/compiler-rt/test/lsan/TestCases/Linux/cleanup_in_tsd_destructor.c
+++ b/compiler-rt/test/lsan/TestCases/Linux/cleanup_in_tsd_destructor.c
@@ -3,7 +3,7 @@
 // user-installed TSD destructors have finished running (since they may contain
 // additional cleanup tasks). LSan doesn't actually meet that goal 100%, but it
 // makes its best effort.
-// RUN: LSAN_BASE="report_objects=1:use_registers=0:use_stacks=0"
+// RUN: LSAN_BASE="report_objects=1:use_registers=0:use_stacks=0:use_globals=0"
 // RUN: %clang_lsan %s -o %t
 // RUN: %env_lsan_opts=$LSAN_BASE:use_tls=1 %run %t
 // RUN: %env_lsan_opts=$LSAN_BASE:use_tls=0 not %run %t 2>&1 | FileCheck %s
diff --git a/compiler-rt/test/lsan/TestCases/Linux/disabler_in_tsd_destructor.c b/compiler-rt/test/lsan/TestCases/Linux/disabler_in_tsd_destructor.c
index be36b52fc34f2c..52819bb9fccf2b 100644
--- a/compiler-rt/test/lsan/TestCases/Linux/disabler_in_tsd_destructor.c
+++ b/compiler-rt/test/lsan/TestCases/Linux/disabler_in_tsd_destructor.c
@@ -1,5 +1,5 @@
 // Regression test. Disabler should not depend on TSD validity.
-// RUN: LSAN_BASE="report_objects=1:use_registers=0:use_stacks=0:use_tls=1:use_ld_allocations=0"
+// RUN: LSAN_BASE="report_objects=1:use_registers=0:use_stacks=0:use_globals=0:use_tls=1:use_ld_allocations=0"
 // RUN: %clang_lsan %s -o %t
 // RUN: %env_lsan_opts=$LSAN_BASE %run %t
 
diff --git a/compiler-rt/test/lsan/TestCases/Linux/guard-page.c b/compiler-rt/test/lsan/TestCases/Linux/guard-page.c
index ba03c1ff3535b7..7b353ce30f896a 100644
--- a/compiler-rt/test/lsan/TestCases/Linux/guard-page.c
+++ b/compiler-rt/test/lsan/TestCases/Linux/guard-page.c
@@ -1,9 +1,6 @@
 // Check that if LSan finds that SP doesn't point into thread stack (e.g.
 // if swapcontext is used), LSan will not hit the guard page.
 // RUN: %clang_lsan %s -o %t && %run %t
-// Missing 'getcontext' and 'makecontext' on Android.
-// UNSUPPORTED: android
-
 #include <errno.h>
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/compiler-rt/test/lsan/TestCases/Linux/log-path_test.cpp b/compiler-rt/test/lsan/TestCases/Linux/log-path_test.cpp
index aba3aa12411ce6..a31b4f64acc5d6 100644
--- a/compiler-rt/test/lsan/TestCases/Linux/log-path_test.cpp
+++ b/compiler-rt/test/lsan/TestCases/Linux/log-path_test.cpp
@@ -7,11 +7,7 @@
 
 // Good log_path.
 // RUN: rm -f %t.log.*
-// RUN: %adb_shell 'rm -f %t.log.*'
-// RUN: %env_lsan_opts="use_stacks=0:log_path='"%device_rundir/%t.log"'" not %run %t > %t.out 2>&1
-// adb-pull doesn't support wild cards so we need to rename the log file.
-// RUN: %adb_shell 'mv %device_rundir/%t.log.* %device_rundir/%t.log'
-// RUN: %pull_from_device %device_rundir/%t.log %t.log.ANDROID
+// RUN: %env_lsan_opts="use_stacks=0:log_path='"%t.log"'" not %run %t > %t.out 2>&1
 // RUN: FileCheck %s --check-prefix=CHECK-ERROR < %t.log.*
 
 #include <stdio.h>
diff --git a/compiler-rt/test/lsan/TestCases/Linux/use_tls_dynamic.cpp b/compiler-rt/test/lsan/TestCases/Linux/use_tls_dynamic.cpp
index e81183fcaf951f..b7ca2d754b1c1a 100644
--- a/compiler-rt/test/lsan/TestCases/Linux/use_tls_dynamic.cpp
+++ b/compiler-rt/test/lsan/TestCases/Linux/use_tls_dynamic.cpp
@@ -1,7 +1,6 @@
 // Test that dynamically allocated TLS space is included in the root set.
 
 // This is known to be broken with glibc-2.27+
-// but it should pass with Bionic
 // https://bugs.llvm.org/show_bug.cgi?id=37804
 // XFAIL: glibc-2.27
 
@@ -11,7 +10,7 @@
 // RUN: %env_lsan_opts=$LSAN_BASE:"use_tls=0" not %run %t 2>&1 | FileCheck %s
 // RUN: %env_lsan_opts=$LSAN_BASE:"use_tls=1" %run %t 2>&1
 // RUN: %env_lsan_opts="" %run %t 2>&1
-// UNSUPPORTED: arm,powerpc
+// UNSUPPORTED: i386-linux,arm,powerpc
 
 #ifndef BUILD_DSO
 #include <assert.h>
diff --git a/compiler-rt/test/lsan/TestCases/Linux/use_tls_pthread_specific_static.cpp b/compiler-rt/test/lsan/TestCases/Linux/use_tls_pthread_specific_static.cpp
index c4398c56ea0b3e..cafe40f0637659 100644
--- a/compiler-rt/test/lsan/TestCases/Linux/use_tls_pthread_specific_static.cpp
+++ b/compiler-rt/test/lsan/TestCases/Linux/use_tls_pthread_specific_static.cpp
@@ -19,10 +19,7 @@ int main() {
   int res;
   res = pthread_key_create(&key, NULL);
   assert(res == 0);
-#if !defined(__ANDROID__) && !defined(__BIONIC__)
-  // Bionic doesn't have specific limit.
   assert(key < PTHREAD_KEY_2NDLEVEL_SIZE);
-#endif
   void *p = malloc(1337);
   res = pthread_setspecific(key, p);
   assert(res == 0);
diff --git a/compiler-rt/test/lsan/TestCases/disabler.c b/compiler-rt/test/lsan/TestCases/disabler.c
index 73f1b3e36e937f..f8b7f0da1ba6c7 100644
--- a/compiler-rt/test/lsan/TestCases/disabler.c
+++ b/compiler-rt/test/lsan/TestCases/disabler.c
@@ -1,5 +1,5 @@
 // Test for __lsan_disable() / __lsan_enable().
-// RUN: LSAN_BASE="report_objects=1:use_registers=0:use_stacks=0:use_tls=0"
+// RUN: LSAN_BASE="report_objects=1:use_registers=0:use_stacks=0:use_globals=0:use_tls=0"
 // RUN: %clang_lsan %s -o %t
 // RUN: %env_lsan_opts=$LSAN_BASE not %run %t 2>&1 | FileCheck %s
 
diff --git a/compiler-rt/test/lsan/TestCases/disabler.cpp b/compiler-rt/test/lsan/TestCases/disabler.cpp
index 21e508e5314b0d..c5ffdb0bf6f102 100644
--- a/compiler-rt/test/lsan/TestCases/disabler.cpp
+++ b/compiler-rt/test/lsan/TestCases/disabler.cpp
@@ -1,5 +1,5 @@
 // Test for ScopedDisabler.
-// RUN: LSAN_BASE="report_objects=1:use_registers=0:use_stacks=0:use_tls=0"
+// RUN: LSAN_BASE="report_objects=1:use_registers=0:use_stacks=0:use_globals=0:use_tls=0"
 // RUN: %clangxx_lsan %s -o %t
 // RUN: %env_lsan_opts=$LSAN_BASE not %run %t 2>&1 | FileCheck %s
 
diff --git a/compiler-rt/test/lsan/TestCases/ignore_object.c b/compiler-rt/test/lsan/TestCases/ignore_object.c
index 92f03ee33a05d3..53dea75947bb4b 100644
--- a/compiler-rt/test/lsan/TestCases/ignore_object.c
+++ b/compiler-rt/test/lsan/TestCases/ignore_object.c
@@ -1,5 +1,5 @@
 // Test for __lsan_ignore_object().
-// RUN: LSAN_BASE="report_objects=1:use_registers=0:use_stacks=0:use_tls=0"
+// RUN: LSAN_BASE="report_objects=1:use_registers=0:use_stacks=0:use_globals=0:use_tls=0"
 // RUN: %clang_lsan %s -o %t
 // RUN: %env_lsan_opts=$LSAN_BASE not %run %t 2>&1 | FileCheck %s
 
diff --git a/compiler-rt/test/lsan/TestCases/large_allocation_leak.cpp b/compiler-rt/test/lsan/TestCases/large_allocation_leak.cpp
index ef2fe2f8853f90..66f364fffcd52b 100644
--- a/compiler-rt/test/lsan/TestCases/large_allocation_leak.cpp
+++ b/compiler-rt/test/lsan/TestCases/large_allocation_leak.cpp
@@ -5,7 +5,7 @@
 
 // For 32 bit LSan it's pretty likely that large chunks are "reachable" from some
 // internal data structures (e.g. Glibc global data).
-// UNSUPPORTED: x86, arm, i686
+// UNSUPPORTED: x86, arm
 
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/compiler-rt/test/lsan/TestCases/strace_test.cpp b/compiler-rt/test/lsan/TestCases/strace_test.cpp
index 18c809ca329607..b25e057538487c 100644
--- a/compiler-rt/test/lsan/TestCases/strace_test.cpp
+++ b/compiler-rt/test/lsan/TestCases/strace_test.cpp
@@ -2,9 +2,6 @@
 // REQUIRES: strace
 // RUN: %clangxx_lsan %s -o %t
 // RUN: not strace -o /dev/null %run %t 2>&1 | FileCheck %s
-// FIXME: This technically works in practice but cannot be tested because the
-// fatal-error caused adb to failed. Could not be captured to stderr to lit-check.
-// XFAIL: android
 
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/compiler-rt/test/lsan/TestCases/suppressions_file.cpp b/compiler-rt/test/lsan/TestCases/suppressions_file.cpp
index 29422bd3c2c0a7..33cf0202d73192 100644
--- a/compiler-rt/test/lsan/TestCases/suppressions_file.cpp
+++ b/compiler-rt/test/lsan/TestCases/suppressions_file.cpp
@@ -3,16 +3,13 @@
 
 // RUN: rm -f %t.supp
 // RUN: touch %t.supp
-// RUN: %push_to_device %t.supp %device_rundir/%t.supp
-// RUN: %env_lsan_opts="$LSAN_BASE:suppressions='%device_rundir/%t.supp'" not %run %t 2>&1 | FileCheck %s --check-prefix=NOSUPP
+// RUN: %env_lsan_opts="$LSAN_BASE:suppressions='%t.supp'" not %run %t 2>&1 | FileCheck %s --check-prefix=NOSUPP
 
 // RUN: echo "leak:*LSanTestLeakingFunc*" > %t.supp
-// RUN: %push_to_device  %t.supp %device_rundir/%t.supp
-// RUN: %env_lsan_opts="$LSAN_BASE:suppressions='%device_rundir/%t.supp'" not %run %t 2>&1 | FileCheck %s
-//
+// RUN: %env_lsan_opts="$LSAN_BASE:suppressions='%t.supp'" not %run %t 2>&1 | FileCheck %s
+
 // RUN: echo "leak:%t" > %t.supp
-// RUN: %push_to_device  %t.supp %device_rundir/%t.supp
-// RUN: %env_lsan_opts="$LSAN_BASE:suppressions='%device_rundir/%t.supp':symbolize=false" %run %t
+// RUN: %env_lsan_opts="$LSAN_BASE:suppressions='%t.supp':symbolize=false" %run %t
 
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/compiler-rt/test/lsan/TestCases/swapcontext.cpp b/compiler-rt/test/lsan/TestCases/swapcontext.cpp
index d0999598ad4c22..afce8d9fe3b18b 100644
--- a/compiler-rt/test/lsan/TestCases/swapcontext.cpp
+++ b/compiler-rt/test/lsan/TestCases/swapcontext.cpp
@@ -4,8 +4,7 @@
 // RUN: %clangxx_lsan %s -o %t
 // RUN: %env_lsan_opts= %run %t 2>&1
 // RUN: %env_lsan_opts= not %run %t foo 2>&1 | FileCheck %s
-// Missing 'getcontext' and 'makecontext' on Android.
-// UNSUPPORTED: arm,powerpc64,android
+// UNSUPPORTED: arm,powerpc64
 
 #include "sanitizer_common/sanitizer_ucontext.h"
 #include <stdio.h>
diff --git a/compiler-rt/test/lsan/TestCases/use_registers.cpp b/compiler-rt/test/lsan/TestCases/use_registers.cpp
index 1cf5b17f03f812..2a7d97e0fb45eb 100644
--- a/compiler-rt/test/lsan/TestCases/use_registers.cpp
+++ b/compiler-rt/test/lsan/TestCases/use_registers.cpp
@@ -21,10 +21,11 @@ void *registers_thread_func(void *arg) {
 
   // To store the pointer, choose a register which is unlikely to be reused by
   // a function call.
-#if defined(__i386__) || defined(__i686__)
-  asm("mov %0, %%edi"
+#if defined(__i386__)
+  asm ( "mov %0, %%esi"
       :
-      : "r"(p));
+      : "r" (p)
+      );
 #elif defined(__x86_64__)
   asm ( "mov %0, %%r15"
       :
@@ -40,12 +41,6 @@ void *registers_thread_func(void *arg) {
       :
       : "r" (p)
       );
-#elif defined(__aarch64__)
-  // x9-10are used. x11-12 are probably used.
-  // So we pick x13 to be safe.
-  asm("mov x13, %0"
-      :
-      : "r"(p));
 #elif defined(__powerpc__)
   asm ( "mr 30, %0"
       :
diff --git a/compiler-rt/test/lsan/lit.common.cfg.py b/compiler-rt/test/lsan/lit.common.cfg.py
index d6a959552b523a..1d393880af6a9f 100644
--- a/compiler-rt/test/lsan/lit.common.cfg.py
+++ b/compiler-rt/test/lsan/lit.common.cfg.py
@@ -21,7 +21,6 @@ def get_required_attr(config, attr_name):
 
 # Choose between standalone and LSan+ASan modes.
 lsan_lit_test_mode = get_required_attr(config, 'lsan_lit_test_mode')
-
 if lsan_lit_test_mode == "Standalone":
   config.name = "LeakSanitizer-Standalone"
   lsan_cflags = ["-fsanitize=leak"]
@@ -36,8 +35,7 @@ def get_required_attr(config, attr_name):
 config.name += config.name_suffix
 
 # Platform-specific default LSAN_OPTIONS for lit tests.
-default_common_opts_str = ':'.join(list(config.default_sanitizer_opts))
-default_lsan_opts = default_common_opts_str + ':detect_leaks=1'
+default_lsan_opts = 'detect_leaks=1'
 if config.host_os == 'Darwin':
   # On Darwin, we default to `abort_on_error=1`, which would make tests run
   # much slower. Let's override this and run lit tests with 'abort_on_error=0'.
@@ -55,8 +53,6 @@ def get_required_attr(config, attr_name):
   config.available_features.add('strace')
 
 clang_cflags = ["-O0", config.target_cflags] + config.debug_info_flags
-if config.android:
-  clang_cflags = clang_cflags + ["-fno-emulated-tls"]
 clang_cxxflags = config.cxx_mode_flags + clang_cflags
 lsan_incdir = config.test_source_root + "/../"
 clang_lsan_cflags = clang_cflags + lsan_cflags + ["-I%s" % lsan_incdir]
@@ -80,10 +76,6 @@ def build_invocation(compile_flags):
 if not (supported_linux or supported_darwin or supported_netbsd):
   config.unsupported = True
 
-# Only run the tests on Android, if required API level is present.
-if config.android and 'android-thread-properties-api' not in config.available_features:
-  config.unsupported = True
-
 # Don't support Thumb due to broken fast unwinder
 if re.search('mthumb', config.target_cflags) is not None:
   config.unsupported = True

From 37dce7475b2b97f27f65a80524a8ad8342e6cf7e Mon Sep 17 00:00:00 2001
From: Vinay Madhusudan <vinay@compilertree.com>
Date: Tue, 13 Oct 2020 15:18:30 +0530
Subject: [PATCH 085/123] [AArch64] Identify SAD pattern

(ABS (SUB (EXTEND a), (EXTEND b))) to ZERO_EXTEND((UABD a, b))
(ABS (SUB (SIGN_EXTEND a), (SIGN_EXTEND b))) to ZERO_EXTEND((SABD a, b))

This partially solves the bug: https://bugs.llvm.org/show_bug.cgi?id=46888
Meta ticket: https://bugs.llvm.org/show_bug.cgi?id=46929

Differential Revision: https://reviews.llvm.org/D88742
---
 .../Target/AArch64/AArch64ISelLowering.cpp    |  80 ++++++++++++--
 llvm/lib/Target/AArch64/AArch64ISelLowering.h |   4 +
 llvm/lib/Target/AArch64/AArch64InstrInfo.td   |  26 +++--
 llvm/test/CodeGen/AArch64/arm64-vabs.ll       | 103 ++++++++++++++++--
 4 files changed, 181 insertions(+), 32 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index b1c4903cb5e307..ddfd6dc0e481a5 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -759,6 +759,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
   // Vector add and sub nodes may conceal a high-half opportunity.
   // Also, try to fold ADD into CSINC/CSINV..
   setTargetDAGCombine(ISD::ADD);
+  setTargetDAGCombine(ISD::ABS);
   setTargetDAGCombine(ISD::SUB);
   setTargetDAGCombine(ISD::SRL);
   setTargetDAGCombine(ISD::XOR);
@@ -1830,6 +1831,8 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
     MAKE_CASE(AArch64ISD::STNP)
     MAKE_CASE(AArch64ISD::DUP_MERGE_PASSTHRU)
     MAKE_CASE(AArch64ISD::INDEX_VECTOR)
+    MAKE_CASE(AArch64ISD::UABD)
+    MAKE_CASE(AArch64ISD::SABD)
   }
 #undef MAKE_CASE
   return nullptr;
@@ -3659,6 +3662,15 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
     return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1),
                        Op.getOperand(2));
   }
+
+  case Intrinsic::aarch64_neon_uabd: {
+    return DAG.getNode(AArch64ISD::UABD, dl, Op.getValueType(),
+                       Op.getOperand(1), Op.getOperand(2));
+  }
+  case Intrinsic::aarch64_neon_sabd: {
+    return DAG.getNode(AArch64ISD::SABD, dl, Op.getValueType(),
+                       Op.getOperand(1), Op.getOperand(2));
+  }
   }
 }
 
@@ -11050,6 +11062,48 @@ static SDValue performVecReduceAddCombine(SDNode *N, SelectionDAG &DAG,
   return DAG.getNode(ISD::VECREDUCE_ADD, DL, N->getValueType(0), Dot);
 }
 
+// Given a ABS node, detect the following pattern:
+// (ABS (SUB (EXTEND a), (EXTEND b))).
+// Generates UABD/SABD instruction.
+static SDValue performABSCombine(SDNode *N, SelectionDAG &DAG,
+                                 TargetLowering::DAGCombinerInfo &DCI,
+                                 const AArch64Subtarget *Subtarget) {
+  SDValue AbsOp1 = N->getOperand(0);
+  SDValue Op0, Op1;
+
+  if (AbsOp1.getOpcode() != ISD::SUB)
+    return SDValue();
+
+  Op0 = AbsOp1.getOperand(0);
+  Op1 = AbsOp1.getOperand(1);
+
+  unsigned Opc0 = Op0.getOpcode();
+  // Check if the operands of the sub are (zero|sign)-extended.
+  if (Opc0 != Op1.getOpcode() ||
+      (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND))
+    return SDValue();
+
+  EVT VectorT1 = Op0.getOperand(0).getValueType();
+  EVT VectorT2 = Op1.getOperand(0).getValueType();
+  // Check if vectors are of same type and valid size.
+  uint64_t Size = VectorT1.getFixedSizeInBits();
+  if (VectorT1 != VectorT2 || (Size != 64 && Size != 128))
+    return SDValue();
+
+  // Check if vector element types are valid.
+  EVT VT1 = VectorT1.getVectorElementType();
+  if (VT1 != MVT::i8 && VT1 != MVT::i16 && VT1 != MVT::i32)
+    return SDValue();
+
+  Op0 = Op0.getOperand(0);
+  Op1 = Op1.getOperand(0);
+  unsigned ABDOpcode =
+      (Opc0 == ISD::SIGN_EXTEND) ? AArch64ISD::SABD : AArch64ISD::UABD;
+  SDValue ABD =
+      DAG.getNode(ABDOpcode, SDLoc(N), Op0->getValueType(0), Op0, Op1);
+  return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), ABD);
+}
+
 static SDValue performXorCombine(SDNode *N, SelectionDAG &DAG,
                                  TargetLowering::DAGCombinerInfo &DCI,
                                  const AArch64Subtarget *Subtarget) {
@@ -12339,8 +12393,8 @@ static SDValue tryCombineLongOpWithDup(unsigned IID, SDNode *N,
   if (DCI.isBeforeLegalizeOps())
     return SDValue();
 
-  SDValue LHS = N->getOperand(1);
-  SDValue RHS = N->getOperand(2);
+  SDValue LHS = N->getOperand((IID == Intrinsic::not_intrinsic) ? 0 : 1);
+  SDValue RHS = N->getOperand((IID == Intrinsic::not_intrinsic) ? 1 : 2);
   assert(LHS.getValueType().is64BitVector() &&
          RHS.getValueType().is64BitVector() &&
          "unexpected shape for long operation");
@@ -12358,6 +12412,9 @@ static SDValue tryCombineLongOpWithDup(unsigned IID, SDNode *N,
       return SDValue();
   }
 
+  if (IID == Intrinsic::not_intrinsic)
+    return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), LHS, RHS);
+
   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), N->getValueType(0),
                      N->getOperand(0), LHS, RHS);
 }
@@ -12868,18 +12925,15 @@ static SDValue performExtendCombine(SDNode *N,
   // helps the backend to decide that an sabdl2 would be useful, saving a real
   // extract_high operation.
   if (!DCI.isBeforeLegalizeOps() && N->getOpcode() == ISD::ZERO_EXTEND &&
-      N->getOperand(0).getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
+      (N->getOperand(0).getOpcode() == AArch64ISD::UABD ||
+       N->getOperand(0).getOpcode() == AArch64ISD::SABD)) {
     SDNode *ABDNode = N->getOperand(0).getNode();
-    unsigned IID = getIntrinsicID(ABDNode);
-    if (IID == Intrinsic::aarch64_neon_sabd ||
-        IID == Intrinsic::aarch64_neon_uabd) {
-      SDValue NewABD = tryCombineLongOpWithDup(IID, ABDNode, DCI, DAG);
-      if (!NewABD.getNode())
-        return SDValue();
+    SDValue NewABD =
+        tryCombineLongOpWithDup(Intrinsic::not_intrinsic, ABDNode, DCI, DAG);
+    if (!NewABD.getNode())
+      return SDValue();
 
-      return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0),
-                         NewABD);
-    }
+    return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), NewABD);
   }
 
   // This is effectively a custom type legalization for AArch64.
@@ -14672,6 +14726,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
   default:
     LLVM_DEBUG(dbgs() << "Custom combining: skipping\n");
     break;
+  case ISD::ABS:
+    return performABSCombine(N, DAG, DCI, Subtarget);
   case ISD::ADD:
   case ISD::SUB:
     return performAddSubLongCombine(N, DCI, DAG);
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index dc23fb838f970a..b3ca94bf312fec 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -225,6 +225,10 @@ enum NodeType : unsigned {
   SRHADD,
   URHADD,
 
+  // Absolute difference
+  UABD,
+  SABD,
+
   // Vector across-lanes min/max
   // Only the lower result lane is defined.
   SMINV,
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 3235e197157845..251f241c5ccc75 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -559,6 +559,16 @@ def AArch64urhadd   : SDNode<"AArch64ISD::URHADD", SDT_AArch64binvec>;
 def AArch64shadd   : SDNode<"AArch64ISD::SHADD", SDT_AArch64binvec>;
 def AArch64uhadd   : SDNode<"AArch64ISD::UHADD", SDT_AArch64binvec>;
 
+def AArch64uabd_n   : SDNode<"AArch64ISD::UABD", SDT_AArch64binvec>;
+def AArch64sabd_n   : SDNode<"AArch64ISD::SABD", SDT_AArch64binvec>;
+
+def AArch64uabd     : PatFrags<(ops node:$lhs, node:$rhs),
+                               [(AArch64uabd_n node:$lhs, node:$rhs),
+                                (int_aarch64_neon_uabd node:$lhs, node:$rhs)]>;
+def AArch64sabd     : PatFrags<(ops node:$lhs, node:$rhs),
+                               [(AArch64sabd_n node:$lhs, node:$rhs),
+                                (int_aarch64_neon_sabd node:$lhs, node:$rhs)]>;
+
 def SDT_AArch64SETTAG : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>;
 def AArch64stg : SDNode<"AArch64ISD::STG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
 def AArch64stzg : SDNode<"AArch64ISD::STZG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
@@ -3812,7 +3822,7 @@ defm FMOV : FPMoveImmediate<"fmov">;
 //===----------------------------------------------------------------------===//
 
 defm UABDL   : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl",
-                                          int_aarch64_neon_uabd>;
+                                          AArch64uabd>;
 // Match UABDL in log2-shuffle patterns.
 def : Pat<(abs (v8i16 (sub (zext (v8i8 V64:$opA)),
                            (zext (v8i8 V64:$opB))))),
@@ -4082,8 +4092,8 @@ defm MLS      : SIMDThreeSameVectorBHSTied<1, 0b10010, "mls", null_frag>;
 defm MUL      : SIMDThreeSameVectorBHS<0, 0b10011, "mul", mul>;
 defm PMUL     : SIMDThreeSameVectorB<1, 0b10011, "pmul", int_aarch64_neon_pmul>;
 defm SABA     : SIMDThreeSameVectorBHSTied<0, 0b01111, "saba",
-      TriOpFrag<(add node:$LHS, (int_aarch64_neon_sabd node:$MHS, node:$RHS))> >;
-defm SABD     : SIMDThreeSameVectorBHS<0,0b01110,"sabd", int_aarch64_neon_sabd>;
+      TriOpFrag<(add node:$LHS, (AArch64sabd node:$MHS, node:$RHS))> >;
+defm SABD     : SIMDThreeSameVectorBHS<0,0b01110,"sabd", AArch64sabd>;
 defm SHADD    : SIMDThreeSameVectorBHS<0,0b00000,"shadd", AArch64shadd>;
 defm SHSUB    : SIMDThreeSameVectorBHS<0,0b00100,"shsub", int_aarch64_neon_shsub>;
 defm SMAXP    : SIMDThreeSameVectorBHS<0,0b10100,"smaxp", int_aarch64_neon_smaxp>;
@@ -4101,8 +4111,8 @@ defm SRSHL    : SIMDThreeSameVector<0,0b01010,"srshl", int_aarch64_neon_srshl>;
 defm SSHL     : SIMDThreeSameVector<0,0b01000,"sshl", int_aarch64_neon_sshl>;
 defm SUB      : SIMDThreeSameVector<1,0b10000,"sub", sub>;
 defm UABA     : SIMDThreeSameVectorBHSTied<1, 0b01111, "uaba",
-      TriOpFrag<(add node:$LHS, (int_aarch64_neon_uabd node:$MHS, node:$RHS))> >;
-defm UABD     : SIMDThreeSameVectorBHS<1,0b01110,"uabd", int_aarch64_neon_uabd>;
+      TriOpFrag<(add node:$LHS, (AArch64uabd node:$MHS, node:$RHS))> >;
+defm UABD     : SIMDThreeSameVectorBHS<1,0b01110,"uabd", AArch64uabd>;
 defm UHADD    : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", AArch64uhadd>;
 defm UHSUB    : SIMDThreeSameVectorBHS<1,0b00100,"uhsub", int_aarch64_neon_uhsub>;
 defm UMAXP    : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_aarch64_neon_umaxp>;
@@ -4676,9 +4686,9 @@ defm RADDHN : SIMDNarrowThreeVectorBHS<1,0b0100,"raddhn",int_aarch64_neon_raddhn
 defm RSUBHN : SIMDNarrowThreeVectorBHS<1,0b0110,"rsubhn",int_aarch64_neon_rsubhn>;
 defm PMULL  : SIMDDifferentThreeVectorBD<0,0b1110,"pmull",int_aarch64_neon_pmull>;
 defm SABAL  : SIMDLongThreeVectorTiedBHSabal<0,0b0101,"sabal",
-                                             int_aarch64_neon_sabd>;
+                                             AArch64sabd>;
 defm SABDL   : SIMDLongThreeVectorBHSabdl<0, 0b0111, "sabdl",
-                                          int_aarch64_neon_sabd>;
+                                          AArch64sabd>;
 defm SADDL   : SIMDLongThreeVectorBHS<   0, 0b0000, "saddl",
             BinOpFrag<(add (sext node:$LHS), (sext node:$RHS))>>;
 defm SADDW   : SIMDWideThreeVectorBHS<   0, 0b0001, "saddw",
@@ -4699,7 +4709,7 @@ defm SSUBL   : SIMDLongThreeVectorBHS<0, 0b0010, "ssubl",
 defm SSUBW   : SIMDWideThreeVectorBHS<0, 0b0011, "ssubw",
                  BinOpFrag<(sub node:$LHS, (sext node:$RHS))>>;
 defm UABAL   : SIMDLongThreeVectorTiedBHSabal<1, 0b0101, "uabal",
-                                              int_aarch64_neon_uabd>;
+                                              AArch64uabd>;
 defm UADDL   : SIMDLongThreeVectorBHS<1, 0b0000, "uaddl",
                  BinOpFrag<(add (zext node:$LHS), (zext node:$RHS))>>;
 defm UADDW   : SIMDWideThreeVectorBHS<1, 0b0001, "uaddw",
diff --git a/llvm/test/CodeGen/AArch64/arm64-vabs.ll b/llvm/test/CodeGen/AArch64/arm64-vabs.ll
index 23eddd4e174e21..c13459ac764821 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vabs.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vabs.ll
@@ -142,11 +142,11 @@ define <2 x i64> @uabdl2_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 }
 
 declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>)
+declare i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32>)
 
-define i16 @uabdl8h_rdx(<16 x i8>* %a, <16 x i8>* %b) {
-; CHECK-LABEL: uabdl8h_rdx
-; CHECK: uabdl2.8h
-; CHECK: uabdl.8h
+define i16 @uabd16b_rdx(<16 x i8>* %a, <16 x i8>* %b) {
+; CHECK-LABEL: uabd16b_rdx
+; CHECK: uabd.16b
   %aload = load <16 x i8>, <16 x i8>* %a, align 1
   %bload = load <16 x i8>, <16 x i8>* %b, align 1
   %aext = zext <16 x i8> %aload to <16 x i16>
@@ -159,12 +159,39 @@ define i16 @uabdl8h_rdx(<16 x i8>* %a, <16 x i8>* %b) {
   ret i16 %reduced_v
 }
 
+define i32 @uabd16b_rdx_i32(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: uabd16b_rdx_i32
+; CHECK: uabd.16b
+  %aext = zext <16 x i8> %a to <16 x i32>
+  %bext = zext <16 x i8> %b to <16 x i32>
+  %abdiff = sub nsw <16 x i32> %aext, %bext
+  %abcmp = icmp slt <16 x i32> %abdiff, zeroinitializer
+  %ababs = sub nsw <16 x i32> zeroinitializer, %abdiff
+  %absel = select <16 x i1> %abcmp, <16 x i32> %ababs, <16 x i32> %abdiff
+  %reduced_v = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %absel)
+  ret i32 %reduced_v
+}
+
+define i32 @sabd16b_rdx_i32(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: sabd16b_rdx_i32
+; CHECK: sabd.16b
+  %aext = sext <16 x i8> %a to <16 x i32>
+  %bext = sext <16 x i8> %b to <16 x i32>
+  %abdiff = sub nsw <16 x i32> %aext, %bext
+  %abcmp = icmp slt <16 x i32> %abdiff, zeroinitializer
+  %ababs = sub nsw <16 x i32> zeroinitializer, %abdiff
+  %absel = select <16 x i1> %abcmp, <16 x i32> %ababs, <16 x i32> %abdiff
+  %reduced_v = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> %absel)
+  ret i32 %reduced_v
+}
+
+
 declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>)
+declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>)
 
-define i32 @uabdl4s_rdx(<8 x i16>* %a, <8 x i16>* %b) {
-; CHECK-LABEL: uabdl4s_rdx
-; CHECK: uabdl2.4s
-; CHECK: uabdl.4s
+define i32 @uabd8h_rdx(<8 x i16>* %a, <8 x i16>* %b) {
+; CHECK-LABEL: uabd8h_rdx
+; CHECK: uabd.8h
   %aload = load <8 x i16>, <8 x i16>* %a, align 1
   %bload = load <8 x i16>, <8 x i16>* %b, align 1
   %aext = zext <8 x i16> %aload to <8 x i32>
@@ -177,12 +204,38 @@ define i32 @uabdl4s_rdx(<8 x i16>* %a, <8 x i16>* %b) {
   ret i32 %reduced_v
 }
 
+define i32 @sabd8h_rdx(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: sabd8h_rdx
+; CHECK: sabd.8h
+  %aext = sext <8 x i16> %a to <8 x i32>
+  %bext = sext <8 x i16> %b to <8 x i32>
+  %abdiff = sub nsw <8 x i32> %aext, %bext
+  %abcmp = icmp slt <8 x i32> %abdiff, zeroinitializer
+  %ababs = sub nsw <8 x i32> zeroinitializer, %abdiff
+  %absel = select <8 x i1> %abcmp, <8 x i32> %ababs, <8 x i32> %abdiff
+  %reduced_v = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %absel)
+  ret i32 %reduced_v
+}
+
+define i32 @uabdl4s_rdx_i32(<4 x i16> %a, <4 x i16> %b) {
+; CHECK-LABEL: uabdl4s_rdx_i32
+; CHECK: uabdl.4s
+  %aext = zext <4 x i16> %a to <4 x i32>
+  %bext = zext <4 x i16> %b to <4 x i32>
+ %abdiff = sub nsw <4 x i32> %aext, %bext
+  %abcmp = icmp slt <4 x i32> %abdiff, zeroinitializer
+  %ababs = sub nsw <4 x i32> zeroinitializer, %abdiff
+  %absel = select <4 x i1> %abcmp, <4 x i32> %ababs, <4 x i32> %abdiff
+  %reduced_v = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %absel)
+  ret i32 %reduced_v
+}
+
 declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>)
+declare i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64>)
 
-define i64 @uabdl2d_rdx(<4 x i32>* %a, <4 x i32>* %b, i32 %h) {
-; CHECK: uabdl2d_rdx
-; CHECK: uabdl2.2d
-; CHECK: uabdl.2d
+define i64 @uabd4s_rdx(<4 x i32>* %a, <4 x i32>* %b, i32 %h) {
+; CHECK: uabd4s_rdx
+; CHECK: uabd.4s
   %aload = load <4 x i32>, <4 x i32>* %a, align 1
   %bload = load <4 x i32>, <4 x i32>* %b, align 1
   %aext = zext <4 x i32> %aload to <4 x i64>
@@ -195,6 +248,32 @@ define i64 @uabdl2d_rdx(<4 x i32>* %a, <4 x i32>* %b, i32 %h) {
   ret i64 %reduced_v
 }
 
+define i64 @sabd4s_rdx(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: sabd4s_rdx
+; CHECK: sabd.4s
+  %aext = sext <4 x i32> %a to <4 x i64>
+  %bext = sext <4 x i32> %b to <4 x i64>
+  %abdiff = sub nsw <4 x i64> %aext, %bext
+  %abcmp = icmp slt <4 x i64> %abdiff, zeroinitializer
+  %ababs = sub nsw <4 x i64> zeroinitializer, %abdiff
+  %absel = select <4 x i1> %abcmp, <4 x i64> %ababs, <4 x i64> %abdiff
+  %reduced_v = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %absel)
+  ret i64 %reduced_v
+}
+
+define i64 @uabdl2d_rdx_i64(<2 x i32> %a, <2 x i32> %b) {
+; CHECK-LABEL: uabdl2d_rdx_i64
+; CHECK: uabdl.2d
+  %aext = zext <2 x i32> %a to <2 x i64>
+  %bext = zext <2 x i32> %b to <2 x i64>
+  %abdiff = sub nsw <2 x i64> %aext, %bext
+  %abcmp = icmp slt <2 x i64> %abdiff, zeroinitializer
+  %ababs = sub nsw <2 x i64> zeroinitializer, %abdiff
+  %absel = select <2 x i1> %abcmp, <2 x i64> %ababs, <2 x i64> %abdiff
+  %reduced_v = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> %absel)
+  ret i64 %reduced_v
+}
+
 define <2 x float> @fabd_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: fabd_2s:
 ;CHECK: fabd.2s

From fb2627d8d21574a0d1a81596d00953075265ac88 Mon Sep 17 00:00:00 2001
From: Max Kazantsev <mkazantsev@azul.com>
Date: Tue, 13 Oct 2020 17:51:47 +0700
Subject: [PATCH 086/123] [Test] Add test showing that SCEV cannot compute IV's
 range

---
 .../no-wrap-symbolic-becount.ll               | 58 +++++++++++++++++++
 1 file changed, 58 insertions(+)
 create mode 100644 llvm/test/Analysis/ScalarEvolution/no-wrap-symbolic-becount.ll

diff --git a/llvm/test/Analysis/ScalarEvolution/no-wrap-symbolic-becount.ll b/llvm/test/Analysis/ScalarEvolution/no-wrap-symbolic-becount.ll
new file mode 100644
index 00000000000000..f9cb89c97161f6
--- /dev/null
+++ b/llvm/test/Analysis/ScalarEvolution/no-wrap-symbolic-becount.ll
@@ -0,0 +1,58 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt < %s -S -analyze -scalar-evolution | FileCheck %s
+
+define i32 @test_01(i32 %start, i32* %p, i32* %q) {
+; CHECK-LABEL: 'test_01'
+; CHECK-NEXT:  Classifying expressions for: @test_01
+; CHECK-NEXT:    %0 = zext i32 %start to i64
+; CHECK-NEXT:    --> (zext i32 %start to i64) U: [0,4294967296) S: [0,4294967296)
+; CHECK-NEXT:    %indvars.iv = phi i64 [ %indvars.iv.next, %backedge ], [ %0, %entry ]
+; CHECK-NEXT:    --> {(zext i32 %start to i64),+,-1}<nsw><%loop> U: [-4294967295,4294967296) S: [-4294967295,4294967296) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    %iv = phi i32 [ %start, %entry ], [ %iv.next, %backedge ]
+; CHECK-NEXT:    --> {%start,+,-1}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    %iv.next = add i32 %iv, -1
+; CHECK-NEXT:    --> {(-1 + %start),+,-1}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    %index = zext i32 %iv.next to i64
+; CHECK-NEXT:    --> (zext i32 {(-1 + %start),+,-1}<%loop> to i64) U: [0,4294967296) S: [0,4294967296) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    %store.addr = getelementptr i32, i32* %p, i64 %index
+; CHECK-NEXT:    --> ((4 * (zext i32 {(-1 + %start),+,-1}<%loop> to i64))<nuw><nsw> + %p) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    %load.addr = getelementptr i32, i32* %q, i64 %index
+; CHECK-NEXT:    --> ((4 * (zext i32 {(-1 + %start),+,-1}<%loop> to i64))<nuw><nsw> + %q) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    %stop = load i32, i32* %load.addr, align 4
+; CHECK-NEXT:    --> %stop U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Variant }
+; CHECK-NEXT:    %indvars.iv.next = add nsw i64 %indvars.iv, -1
+; CHECK-NEXT:    --> {(-1 + (zext i32 %start to i64))<nsw>,+,-1}<nsw><%loop> U: [-4294967296,4294967295) S: [-4294967296,4294967295) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:  Determining loop execution counts for: @test_01
+; CHECK-NEXT:  Loop %loop: <multiple exits> Unpredictable backedge-taken count.
+; CHECK-NEXT:    exit count for loop: (zext i32 %start to i64)
+; CHECK-NEXT:    exit count for backedge: ***COULDNOTCOMPUTE***
+; CHECK-NEXT:  Loop %loop: max backedge-taken count is 4294967295
+; CHECK-NEXT:  Loop %loop: Unpredictable predicated backedge-taken count.
+;
+entry:
+  %0 = zext i32 %start to i64
+  br label %loop
+
+loop:                                             ; preds = %backedge, %entry
+  %indvars.iv = phi i64 [ %indvars.iv.next, %backedge ], [ %0, %entry ]
+  %iv = phi i32 [ %start, %entry ], [ %iv.next, %backedge ]
+  %cond = icmp eq i64 %indvars.iv, 0
+  br i1 %cond, label %exit, label %backedge
+
+backedge:                                         ; preds = %loop
+  %iv.next = add i32 %iv, -1
+  %index = zext i32 %iv.next to i64
+  %store.addr = getelementptr i32, i32* %p, i64 %index
+  store i32 1, i32* %store.addr, align 4
+  %load.addr = getelementptr i32, i32* %q, i64 %index
+  %stop = load i32, i32* %load.addr, align 4
+  %loop.cond = icmp eq i32 %stop, 0
+  %indvars.iv.next = add nsw i64 %indvars.iv, -1
+  br i1 %loop.cond, label %loop, label %failure
+
+exit:                                             ; preds = %loop
+  ret i32 0
+
+failure:                                          ; preds = %backedge
+  unreachable
+}

From 52ba4fa6aa21953a0d90c0c6e25a3ed95708d08b Mon Sep 17 00:00:00 2001
From: Mirko Brkusanin <Mirko.Brkusanin@amd.com>
Date: Tue, 13 Oct 2020 12:54:59 +0200
Subject: [PATCH 087/123] [GlobalISel] Avoid making G_PTR_ADD with nullptr

When the first operand is a null pointer we can avoid making a G_PTR_ADD and
make a G_INTTOPTR with the offset operand.
This helps us avoid making add with 0 later on for targets such as AMDGPU.

Differential Revision: https://reviews.llvm.org/D87140
---
 .../llvm/CodeGen/GlobalISel/CombinerHelper.h  |   4 +
 .../include/llvm/Target/GlobalISel/Combine.td |   9 +-
 .../lib/CodeGen/GlobalISel/CombinerHelper.cpp |  26 +++
 .../AMDGPU/GlobalISel/combine-add-nullptr.mir |  76 ++++++++
 .../AMDGPU/GlobalISel/insertelement.i16.ll    | 175 ++++++++----------
 .../GlobalISel/llvm.amdgcn.atomic.dec.ll      |   8 +-
 .../GlobalISel/llvm.amdgcn.atomic.inc.ll      |  12 +-
 7 files changed, 197 insertions(+), 113 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/combine-add-nullptr.mir

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 2eab322df8b074..336a4e986fac08 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -423,6 +423,10 @@ class CombinerHelper {
                                 std::pair<Register, Register> &MatchInfo);
   ///}
 
+  /// Combine G_PTR_ADD with nullptr to G_INTTOPTR
+  bool matchPtrAddZero(MachineInstr &MI);
+  bool applyPtrAddZero(MachineInstr &MI);
+
   /// Try to transform \p MI by using all of the above
   /// combine functions. Returns true if changed.
   bool tryCombine(MachineInstr &MI);
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 316f6f6885c96d..a1b4929d8f669e 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -490,6 +490,13 @@ def xor_of_and_with_same_reg: GICombineRule <
   (apply [{ return Helper.applyXorOfAndWithSameReg(*${root}, ${matchinfo}); }])
 >;
 
+// Transform (ptr_add 0, x) -> (int_to_ptr x)
+def ptr_add_with_zero: GICombineRule<
+  (defs root:$root),
+  (match (wip_match_opcode G_PTR_ADD):$root,
+         [{ return Helper.matchPtrAddZero(*${root}); }]),
+  (apply [{ return Helper.applyPtrAddZero(*${root}); }])>;
+
 // FIXME: These should use the custom predicate feature once it lands.
 def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,
                                      undef_to_negative_one,
@@ -525,4 +532,4 @@ def all_combines : GICombineGroup<[trivial_combines, ptr_add_immed_chain,
     not_cmp_fold, opt_brcond_by_inverting_cond,
     unmerge_merge, fabs_fabs_fold, unmerge_cst, unmerge_dead_to_trunc,
     unmerge_zext_to_zext, trunc_ext_fold, trunc_shl,
-    constant_fp_op, xor_of_and_with_same_reg]>;
+    constant_fp_op, xor_of_and_with_same_reg, ptr_add_with_zero]>;
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 2024d87c60a00f..087abf5a2001d8 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -2766,6 +2766,32 @@ bool CombinerHelper::applyXorOfAndWithSameReg(
   return true;
 }
 
+bool CombinerHelper::matchPtrAddZero(MachineInstr &MI) {
+  Register DstReg = MI.getOperand(0).getReg();
+  LLT Ty = MRI.getType(DstReg);
+  const DataLayout &DL = Builder.getMF().getDataLayout();
+
+  if (DL.isNonIntegralAddressSpace(Ty.getScalarType().getAddressSpace()))
+    return false;
+
+  if (Ty.isPointer()) {
+    auto ConstVal = getConstantVRegVal(MI.getOperand(1).getReg(), MRI);
+    return ConstVal && *ConstVal == 0;
+  }
+
+  assert(Ty.isVector() && "Expecting a vector type");
+  const MachineInstr *VecMI = MRI.getVRegDef(MI.getOperand(1).getReg());
+  return isBuildVectorAllZeros(*VecMI, MRI);
+}
+
+bool CombinerHelper::applyPtrAddZero(MachineInstr &MI) {
+  assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD);
+  Builder.setInstrAndDebugLoc(MI);
+  Builder.buildIntToPtr(MI.getOperand(0), MI.getOperand(2));
+  MI.eraseFromParent();
+  return true;
+}
+
 bool CombinerHelper::tryCombine(MachineInstr &MI) {
   if (tryCombineCopy(MI))
     return true;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-add-nullptr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-add-nullptr.mir
new file mode 100644
index 00000000000000..936a4bc6304e75
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-add-nullptr.mir
@@ -0,0 +1,76 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass=amdgpu-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
+
+---
+name: add_nullptr_shl_add
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $sgpr0
+
+    ; CHECK-LABEL: name: add_nullptr_shl_add
+    ; CHECK: liveins: $sgpr0
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32)
+    ; CHECK: $vgpr0 = COPY [[SHL]](s32)
+    %0:_(s32) = COPY $sgpr0
+    %1:_(s32) = G_CONSTANT i32 3
+    %2:_(s32) = G_SHL %0, %1(s32)
+    %3:_(p3) = G_CONSTANT i32 0
+    %4:_(p3) = G_PTR_ADD %3, %2(s32)
+    %5:_(s32) = G_PTRTOINT %4(p3)
+    $vgpr0 = COPY %5(s32)
+
+...
+
+---
+name: add_nullptr_mul_add
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; CHECK-LABEL: name: add_nullptr_mul_add
+    ; CHECK: liveins: $vgpr0, $vgpr1
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY]], [[COPY1]]
+    ; CHECK: $vgpr0 = COPY [[MUL]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(p3) = G_CONSTANT i32 0
+    %3:_(s32) = G_MUL %0:_, %1:_
+    %4:_(p3) = G_PTR_ADD %2:_, %3:_(s32)
+    %5:_(s32) = G_PTRTOINT %4:_(p3)
+    $vgpr0 = COPY %5:_(s32)
+
+...
+
+---
+name: add_nullptr_vec_all_zero
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
+
+    ; CHECK-LABEL: name: add_nullptr_vec_all_zero
+    ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3
+    ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
+    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr3
+    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32)
+    ; CHECK: [[SHL:%[0-9]+]]:_(<2 x s32>) = G_SHL [[COPY]], [[BUILD_VECTOR]](<2 x s32>)
+    ; CHECK: $vgpr0_vgpr1 = COPY [[SHL]](<2 x s32>)
+    %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    %1:_(s32) = COPY $vgpr2
+    %2:_(s32) = COPY $vgpr3
+    %3:_(<2 x s32>) = G_BUILD_VECTOR %1:_(s32), %2:_(s32)
+    %4:_(<2 x s32>) = G_SHL %0, %3(<2 x s32>)
+    %5:_(p3) = G_CONSTANT i32 0
+    %6:_(<2 x p3>) = G_BUILD_VECTOR %5:_(p3), %5:_(p3)
+    %7:_(<2 x p3>) = G_PTR_ADD %6, %4(<2 x s32>)
+    %8:_(<2 x s32>) = G_PTRTOINT %7(<2 x p3>)
+    $vgpr0_vgpr1 = COPY %8(<2 x s32>)
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i16.ll
index ffdb1155a9343c..0fe8ec062b365c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i16.ll
@@ -1987,25 +1987,23 @@ define amdgpu_ps void @insertelement_s_v16i16_s_s(<16 x i16> addrspace(4)* inreg
 ; GFX9-NEXT:    s_cmp_eq_u32 s7, 4
 ; GFX9-NEXT:    s_cselect_b32 s4, s16, s12
 ; GFX9-NEXT:    s_cmp_eq_u32 s7, 5
+; GFX9-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX9-NEXT:    s_cselect_b32 s5, s16, s13
 ; GFX9-NEXT:    s_cmp_eq_u32 s7, 6
-; GFX9-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX9-NEXT:    s_cselect_b32 s6, s16, s14
 ; GFX9-NEXT:    s_cmp_eq_u32 s7, 7
-; GFX9-NEXT:    s_cselect_b32 s7, s16, s15
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-NEXT:    v_mov_b32_e32 v2, s2
 ; GFX9-NEXT:    v_mov_b32_e32 v3, s3
-; GFX9-NEXT:    s_add_u32 s0, 0, 16
 ; GFX9-NEXT:    v_mov_b32_e32 v5, 0
-; GFX9-NEXT:    s_addc_u32 s1, 0, 0
 ; GFX9-NEXT:    global_store_dwordx4 v[4:5], v[0:3], off
-; GFX9-NEXT:    v_mov_b32_e32 v5, s1
+; GFX9-NEXT:    s_cselect_b32 s7, s16, s15
 ; GFX9-NEXT:    v_mov_b32_e32 v0, s4
+; GFX9-NEXT:    v_mov_b32_e32 v4, 16
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX9-NEXT:    v_mov_b32_e32 v2, s6
 ; GFX9-NEXT:    v_mov_b32_e32 v3, s7
-; GFX9-NEXT:    v_mov_b32_e32 v4, s0
+; GFX9-NEXT:    v_mov_b32_e32 v5, 0
 ; GFX9-NEXT:    global_store_dwordx4 v[4:5], v[0:3], off
 ; GFX9-NEXT:    s_endpgm
 ;
@@ -2048,25 +2046,23 @@ define amdgpu_ps void @insertelement_s_v16i16_s_s(<16 x i16> addrspace(4)* inreg
 ; GFX8-NEXT:    s_cmp_eq_u32 s7, 4
 ; GFX8-NEXT:    s_cselect_b32 s4, s16, s12
 ; GFX8-NEXT:    s_cmp_eq_u32 s7, 5
+; GFX8-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX8-NEXT:    s_cselect_b32 s5, s16, s13
 ; GFX8-NEXT:    s_cmp_eq_u32 s7, 6
-; GFX8-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX8-NEXT:    s_cselect_b32 s6, s16, s14
 ; GFX8-NEXT:    s_cmp_eq_u32 s7, 7
-; GFX8-NEXT:    s_cselect_b32 s7, s16, s15
 ; GFX8-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX8-NEXT:    v_mov_b32_e32 v2, s2
 ; GFX8-NEXT:    v_mov_b32_e32 v3, s3
-; GFX8-NEXT:    s_add_u32 s0, 0, 16
 ; GFX8-NEXT:    v_mov_b32_e32 v5, 0
-; GFX8-NEXT:    s_addc_u32 s1, 0, 0
 ; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
-; GFX8-NEXT:    v_mov_b32_e32 v5, s1
+; GFX8-NEXT:    s_cselect_b32 s7, s16, s15
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s4
+; GFX8-NEXT:    v_mov_b32_e32 v4, 16
 ; GFX8-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX8-NEXT:    v_mov_b32_e32 v2, s6
 ; GFX8-NEXT:    v_mov_b32_e32 v3, s7
-; GFX8-NEXT:    v_mov_b32_e32 v4, s0
+; GFX8-NEXT:    v_mov_b32_e32 v5, 0
 ; GFX8-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
 ; GFX8-NEXT:    s_endpgm
 ;
@@ -2119,14 +2115,14 @@ define amdgpu_ps void @insertelement_s_v16i16_s_s(<16 x i16> addrspace(4)* inreg
 ; GFX7-NEXT:    v_mov_b32_e32 v3, s3
 ; GFX7-NEXT:    s_mov_b32 s10, -1
 ; GFX7-NEXT:    s_mov_b32 s11, 0xf000
-; GFX7-NEXT:    s_cselect_b32 s7, s16, s15
 ; GFX7-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0
-; GFX7-NEXT:    s_nop 0
+; GFX7-NEXT:    s_cselect_b32 s7, s16, s15
 ; GFX7-NEXT:    v_mov_b32_e32 v0, s4
+; GFX7-NEXT:    s_mov_b64 s[8:9], 16
 ; GFX7-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX7-NEXT:    v_mov_b32_e32 v2, s6
 ; GFX7-NEXT:    v_mov_b32_e32 v3, s7
-; GFX7-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:16
+; GFX7-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0
 ; GFX7-NEXT:    s_endpgm
   %vec = load <16 x i16>, <16 x i16> addrspace(4)* %ptr
   %insert = insertelement <16 x i16> %vec, i16 %val, i32 %idx
@@ -2167,19 +2163,17 @@ define amdgpu_ps void @insertelement_v_v16i16_s_s(<16 x i16> addrspace(1)* %ptr,
 ; GFX9-NEXT:    v_and_or_b32 v10, v1, s13, v0
 ; GFX9-NEXT:    v_cmp_eq_u32_e64 s[12:13], s12, 0
 ; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, v10, s[12:13]
-; GFX9-NEXT:    v_cndmask_b32_e64 v2, v4, v10, s[0:1]
-; GFX9-NEXT:    s_add_u32 s0, 0, 16
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v10, vcc
+; GFX9-NEXT:    v_cndmask_b32_e64 v2, v4, v10, s[0:1]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v3, v5, v10, s[2:3]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v4, v6, v10, s[4:5]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v5, v7, v10, s[6:7]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v6, v8, v10, s[8:9]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v7, v9, v10, s[10:11]
-; GFX9-NEXT:    s_addc_u32 s1, 0, 0
 ; GFX9-NEXT:    v_mov_b32_e32 v8, 0
-; GFX9-NEXT:    v_mov_b32_e32 v11, s1
+; GFX9-NEXT:    v_mov_b32_e32 v10, 16
 ; GFX9-NEXT:    v_mov_b32_e32 v9, 0
-; GFX9-NEXT:    v_mov_b32_e32 v10, s0
+; GFX9-NEXT:    v_mov_b32_e32 v11, 0
 ; GFX9-NEXT:    global_store_dwordx4 v[8:9], v[0:3], off
 ; GFX9-NEXT:    global_store_dwordx4 v[10:11], v[4:7], off
 ; GFX9-NEXT:    s_endpgm
@@ -2205,6 +2199,8 @@ define amdgpu_ps void @insertelement_v_v16i16_s_s(<16 x i16> addrspace(1)* %ptr,
 ; GFX8-NEXT:    v_cmp_eq_u32_e64 s[6:7], s12, 5
 ; GFX8-NEXT:    v_cmp_eq_u32_e64 s[8:9], s12, 6
 ; GFX8-NEXT:    v_cmp_eq_u32_e64 s[10:11], s12, 7
+; GFX8-NEXT:    v_mov_b32_e32 v10, 16
+; GFX8-NEXT:    v_mov_b32_e32 v11, 0
 ; GFX8-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(1)
 ; GFX8-NEXT:    v_cndmask_b32_e32 v8, v0, v1, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e64 v8, v8, v2, s[0:1]
@@ -2217,20 +2213,16 @@ define amdgpu_ps void @insertelement_v_v16i16_s_s(<16 x i16> addrspace(1)* %ptr,
 ; GFX8-NEXT:    v_and_b32_e32 v8, s14, v8
 ; GFX8-NEXT:    v_or_b32_e32 v8, s13, v8
 ; GFX8-NEXT:    v_cmp_eq_u32_e64 s[12:13], s12, 0
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, v2, v8, s[0:1]
-; GFX8-NEXT:    s_add_u32 s0, 0, 16
-; GFX8-NEXT:    s_addc_u32 s1, 0, 0
 ; GFX8-NEXT:    v_cndmask_b32_e64 v0, v0, v8, s[12:13]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
+; GFX8-NEXT:    v_cndmask_b32_e64 v2, v2, v8, s[0:1]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v3, v3, v8, s[2:3]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v4, v4, v8, s[4:5]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v5, v5, v8, s[6:7]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v6, v6, v8, s[8:9]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v7, v7, v8, s[10:11]
 ; GFX8-NEXT:    v_mov_b32_e32 v8, 0
-; GFX8-NEXT:    v_mov_b32_e32 v11, s1
 ; GFX8-NEXT:    v_mov_b32_e32 v9, 0
-; GFX8-NEXT:    v_mov_b32_e32 v10, s0
 ; GFX8-NEXT:    flat_store_dwordx4 v[8:9], v[0:3]
 ; GFX8-NEXT:    flat_store_dwordx4 v[10:11], v[4:7]
 ; GFX8-NEXT:    s_endpgm
@@ -2276,10 +2268,11 @@ define amdgpu_ps void @insertelement_v_v16i16_s_s(<16 x i16> addrspace(1)* %ptr,
 ; GFX7-NEXT:    v_cndmask_b32_e64 v3, v5, v10, s[2:3]
 ; GFX7-NEXT:    v_cndmask_b32_e64 v4, v6, v10, s[4:5]
 ; GFX7-NEXT:    v_cndmask_b32_e64 v5, v7, v10, s[6:7]
+; GFX7-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
 ; GFX7-NEXT:    v_cndmask_b32_e64 v6, v8, v10, s[8:9]
 ; GFX7-NEXT:    v_cndmask_b32_e64 v7, v9, v10, s[10:11]
-; GFX7-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
-; GFX7-NEXT:    buffer_store_dwordx4 v[4:7], off, s[16:19], 0 offset:16
+; GFX7-NEXT:    s_mov_b64 s[16:17], 16
+; GFX7-NEXT:    buffer_store_dwordx4 v[4:7], off, s[16:19], 0
 ; GFX7-NEXT:    s_endpgm
   %vec = load <16 x i16>, <16 x i16> addrspace(1 )* %ptr
   %insert = insertelement <16 x i16> %vec, i16 %val, i32 %idx
@@ -2340,12 +2333,11 @@ define amdgpu_ps void @insertelement_s_v16i16_v_s(<16 x i16> addrspace(4)* inreg
 ; GFX9-NEXT:    v_cndmask_b32_e64 v4, v4, v8, s[0:1]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v7, v7, v8, vcc
 ; GFX9-NEXT:    v_mov_b32_e32 v8, 0
-; GFX9-NEXT:    s_add_u32 s0, 0, 16
 ; GFX9-NEXT:    v_mov_b32_e32 v9, 0
 ; GFX9-NEXT:    global_store_dwordx4 v[8:9], v[0:3], off
-; GFX9-NEXT:    s_addc_u32 s1, 0, 0
-; GFX9-NEXT:    v_mov_b32_e32 v0, s0
-; GFX9-NEXT:    v_mov_b32_e32 v1, s1
+; GFX9-NEXT:    s_nop 0
+; GFX9-NEXT:    v_mov_b32_e32 v0, 16
+; GFX9-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off
 ; GFX9-NEXT:    s_endpgm
 ;
@@ -2401,12 +2393,11 @@ define amdgpu_ps void @insertelement_s_v16i16_v_s(<16 x i16> addrspace(4)* inreg
 ; GFX8-NEXT:    v_cndmask_b32_e64 v4, v4, v8, s[0:1]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v7, v7, v8, vcc
 ; GFX8-NEXT:    v_mov_b32_e32 v8, 0
-; GFX8-NEXT:    s_add_u32 s0, 0, 16
 ; GFX8-NEXT:    v_mov_b32_e32 v9, 0
 ; GFX8-NEXT:    flat_store_dwordx4 v[8:9], v[0:3]
-; GFX8-NEXT:    s_addc_u32 s1, 0, 0
-; GFX8-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8-NEXT:    v_mov_b32_e32 v1, s1
+; GFX8-NEXT:    s_nop 0
+; GFX8-NEXT:    v_mov_b32_e32 v0, 16
+; GFX8-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX8-NEXT:    flat_store_dwordx4 v[0:1], v[4:7]
 ; GFX8-NEXT:    s_endpgm
 ;
@@ -2463,9 +2454,10 @@ define amdgpu_ps void @insertelement_s_v16i16_v_s(<16 x i16> addrspace(4)* inreg
 ; GFX7-NEXT:    s_mov_b64 s[0:1], 0
 ; GFX7-NEXT:    s_mov_b32 s2, -1
 ; GFX7-NEXT:    s_mov_b32 s3, 0xf000
-; GFX7-NEXT:    v_cndmask_b32_e32 v7, v7, v8, vcc
 ; GFX7-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
-; GFX7-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
+; GFX7-NEXT:    v_cndmask_b32_e32 v7, v7, v8, vcc
+; GFX7-NEXT:    s_mov_b64 s[0:1], 16
+; GFX7-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0
 ; GFX7-NEXT:    s_endpgm
   %vec = load <16 x i16>, <16 x i16> addrspace(4)* %ptr
   %insert = insertelement <16 x i16> %vec, i16 %val, i32 %idx
@@ -2518,21 +2510,20 @@ define amdgpu_ps void @insertelement_s_v16i16_s_v(<16 x i16> addrspace(4)* inreg
 ; GFX9-NEXT:    v_mov_b32_e32 v6, s22
 ; GFX9-NEXT:    v_mov_b32_e32 v7, s23
 ; GFX9-NEXT:    v_cmp_eq_u32_e64 s[12:13], 0, v8
-; GFX9-NEXT:    v_cndmask_b32_e64 v2, v2, v9, s[0:1]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v0, v0, v9, s[12:13]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
+; GFX9-NEXT:    v_cndmask_b32_e64 v2, v2, v9, s[0:1]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v3, v3, v9, s[2:3]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v4, v4, v9, s[14:15]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v5, v5, v9, s[6:7]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v6, v6, v9, s[8:9]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v7, v7, v9, s[10:11]
 ; GFX9-NEXT:    v_mov_b32_e32 v8, 0
-; GFX9-NEXT:    s_add_u32 s0, 0, 16
 ; GFX9-NEXT:    v_mov_b32_e32 v9, 0
 ; GFX9-NEXT:    global_store_dwordx4 v[8:9], v[0:3], off
-; GFX9-NEXT:    s_addc_u32 s1, 0, 0
-; GFX9-NEXT:    v_mov_b32_e32 v0, s0
-; GFX9-NEXT:    v_mov_b32_e32 v1, s1
+; GFX9-NEXT:    s_nop 0
+; GFX9-NEXT:    v_mov_b32_e32 v0, 16
+; GFX9-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off
 ; GFX9-NEXT:    s_endpgm
 ;
@@ -2581,21 +2572,20 @@ define amdgpu_ps void @insertelement_s_v16i16_s_v(<16 x i16> addrspace(4)* inreg
 ; GFX8-NEXT:    v_mov_b32_e32 v6, s22
 ; GFX8-NEXT:    v_mov_b32_e32 v7, s23
 ; GFX8-NEXT:    v_cmp_eq_u32_e64 s[12:13], 0, v8
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, v2, v9, s[0:1]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v0, v0, v9, s[12:13]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
+; GFX8-NEXT:    v_cndmask_b32_e64 v2, v2, v9, s[0:1]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v3, v3, v9, s[2:3]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v4, v4, v9, s[14:15]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v5, v5, v9, s[6:7]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v6, v6, v9, s[8:9]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v7, v7, v9, s[10:11]
 ; GFX8-NEXT:    v_mov_b32_e32 v8, 0
-; GFX8-NEXT:    s_add_u32 s0, 0, 16
 ; GFX8-NEXT:    v_mov_b32_e32 v9, 0
 ; GFX8-NEXT:    flat_store_dwordx4 v[8:9], v[0:3]
-; GFX8-NEXT:    s_addc_u32 s1, 0, 0
-; GFX8-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8-NEXT:    v_mov_b32_e32 v1, s1
+; GFX8-NEXT:    s_nop 0
+; GFX8-NEXT:    v_mov_b32_e32 v0, 16
+; GFX8-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX8-NEXT:    flat_store_dwordx4 v[0:1], v[4:7]
 ; GFX8-NEXT:    s_endpgm
 ;
@@ -2651,12 +2641,13 @@ define amdgpu_ps void @insertelement_s_v16i16_s_v(<16 x i16> addrspace(4)* inreg
 ; GFX7-NEXT:    s_mov_b64 s[0:1], 0
 ; GFX7-NEXT:    s_mov_b32 s2, -1
 ; GFX7-NEXT:    s_mov_b32 s3, 0xf000
+; GFX7-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
 ; GFX7-NEXT:    v_cndmask_b32_e64 v4, v4, v9, s[14:15]
 ; GFX7-NEXT:    v_cndmask_b32_e64 v5, v5, v9, s[6:7]
 ; GFX7-NEXT:    v_cndmask_b32_e64 v6, v6, v9, s[8:9]
 ; GFX7-NEXT:    v_cndmask_b32_e64 v7, v7, v9, s[10:11]
-; GFX7-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
-; GFX7-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
+; GFX7-NEXT:    s_mov_b64 s[0:1], 16
+; GFX7-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0
 ; GFX7-NEXT:    s_endpgm
   %vec = load <16 x i16>, <16 x i16> addrspace(4)* %ptr
   %insert = insertelement <16 x i16> %vec, i16 %val, i32 %idx
@@ -2708,21 +2699,20 @@ define amdgpu_ps void @insertelement_s_v16i16_v_v(<16 x i16> addrspace(4)* inreg
 ; GFX9-NEXT:    v_mov_b32_e32 v6, s18
 ; GFX9-NEXT:    v_mov_b32_e32 v7, s19
 ; GFX9-NEXT:    v_cmp_eq_u32_e64 s[12:13], 0, v8
-; GFX9-NEXT:    v_cndmask_b32_e64 v2, v2, v9, s[0:1]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v0, v0, v9, s[12:13]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
+; GFX9-NEXT:    v_cndmask_b32_e64 v2, v2, v9, s[0:1]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v3, v3, v9, s[2:3]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v4, v4, v9, s[4:5]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v5, v5, v9, s[6:7]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v6, v6, v9, s[8:9]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v7, v7, v9, s[10:11]
 ; GFX9-NEXT:    v_mov_b32_e32 v8, 0
-; GFX9-NEXT:    s_add_u32 s0, 0, 16
 ; GFX9-NEXT:    v_mov_b32_e32 v9, 0
 ; GFX9-NEXT:    global_store_dwordx4 v[8:9], v[0:3], off
-; GFX9-NEXT:    s_addc_u32 s1, 0, 0
-; GFX9-NEXT:    v_mov_b32_e32 v0, s0
-; GFX9-NEXT:    v_mov_b32_e32 v1, s1
+; GFX9-NEXT:    s_nop 0
+; GFX9-NEXT:    v_mov_b32_e32 v0, 16
+; GFX9-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off
 ; GFX9-NEXT:    s_endpgm
 ;
@@ -2770,21 +2760,20 @@ define amdgpu_ps void @insertelement_s_v16i16_v_v(<16 x i16> addrspace(4)* inreg
 ; GFX8-NEXT:    v_mov_b32_e32 v6, s18
 ; GFX8-NEXT:    v_mov_b32_e32 v7, s19
 ; GFX8-NEXT:    v_cmp_eq_u32_e64 s[12:13], 0, v8
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, v2, v9, s[0:1]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v0, v0, v9, s[12:13]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v9, vcc
+; GFX8-NEXT:    v_cndmask_b32_e64 v2, v2, v9, s[0:1]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v3, v3, v9, s[2:3]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v4, v4, v9, s[4:5]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v5, v5, v9, s[6:7]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v6, v6, v9, s[8:9]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v7, v7, v9, s[10:11]
 ; GFX8-NEXT:    v_mov_b32_e32 v8, 0
-; GFX8-NEXT:    s_add_u32 s0, 0, 16
 ; GFX8-NEXT:    v_mov_b32_e32 v9, 0
 ; GFX8-NEXT:    flat_store_dwordx4 v[8:9], v[0:3]
-; GFX8-NEXT:    s_addc_u32 s1, 0, 0
-; GFX8-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8-NEXT:    v_mov_b32_e32 v1, s1
+; GFX8-NEXT:    s_nop 0
+; GFX8-NEXT:    v_mov_b32_e32 v0, 16
+; GFX8-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX8-NEXT:    flat_store_dwordx4 v[0:1], v[4:7]
 ; GFX8-NEXT:    s_endpgm
 ;
@@ -2840,12 +2829,13 @@ define amdgpu_ps void @insertelement_s_v16i16_v_v(<16 x i16> addrspace(4)* inreg
 ; GFX7-NEXT:    s_mov_b64 s[0:1], 0
 ; GFX7-NEXT:    s_mov_b32 s2, -1
 ; GFX7-NEXT:    s_mov_b32 s3, 0xf000
+; GFX7-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
 ; GFX7-NEXT:    v_cndmask_b32_e64 v4, v4, v9, s[4:5]
 ; GFX7-NEXT:    v_cndmask_b32_e64 v5, v5, v9, s[6:7]
 ; GFX7-NEXT:    v_cndmask_b32_e64 v6, v6, v9, s[8:9]
 ; GFX7-NEXT:    v_cndmask_b32_e64 v7, v7, v9, s[10:11]
-; GFX7-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
-; GFX7-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
+; GFX7-NEXT:    s_mov_b64 s[0:1], 16
+; GFX7-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0
 ; GFX7-NEXT:    s_endpgm
   %vec = load <16 x i16>, <16 x i16> addrspace(4)* %ptr
   %insert = insertelement <16 x i16> %vec, i16 %val, i32 %idx
@@ -2884,20 +2874,18 @@ define amdgpu_ps void @insertelement_v_v16i16_s_v(<16 x i16> addrspace(1)* %ptr,
 ; GFX9-NEXT:    v_cndmask_b32_e64 v11, v11, v9, s[8:9]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v11, v11, v10, s[10:11]
 ; GFX9-NEXT:    v_and_or_b32 v11, v11, v1, v2
-; GFX9-NEXT:    v_cndmask_b32_e64 v2, v5, v11, s[0:1]
-; GFX9-NEXT:    s_add_u32 s0, 0, 16
 ; GFX9-NEXT:    v_cndmask_b32_e64 v0, v3, v11, s[12:13]
+; GFX9-NEXT:    v_cndmask_b32_e64 v2, v5, v11, s[0:1]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v3, v6, v11, s[2:3]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v5, v8, v11, s[6:7]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v6, v9, v11, s[8:9]
 ; GFX9-NEXT:    v_mov_b32_e32 v8, 0
-; GFX9-NEXT:    s_addc_u32 s1, 0, 0
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v4, v11, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e64 v4, v7, v11, s[4:5]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v7, v10, v11, s[10:11]
-; GFX9-NEXT:    v_mov_b32_e32 v11, s1
+; GFX9-NEXT:    v_mov_b32_e32 v10, 16
 ; GFX9-NEXT:    v_mov_b32_e32 v9, 0
-; GFX9-NEXT:    v_mov_b32_e32 v10, s0
+; GFX9-NEXT:    v_mov_b32_e32 v11, 0
 ; GFX9-NEXT:    global_store_dwordx4 v[8:9], v[0:3], off
 ; GFX9-NEXT:    global_store_dwordx4 v[10:11], v[4:7], off
 ; GFX9-NEXT:    s_endpgm
@@ -2935,20 +2923,18 @@ define amdgpu_ps void @insertelement_v_v16i16_s_v(<16 x i16> addrspace(1)* %ptr,
 ; GFX8-NEXT:    v_cndmask_b32_e64 v11, v11, v10, s[10:11]
 ; GFX8-NEXT:    v_and_b32_e32 v1, v11, v1
 ; GFX8-NEXT:    v_or_b32_e32 v11, v1, v2
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, v5, v11, s[0:1]
-; GFX8-NEXT:    s_add_u32 s0, 0, 16
 ; GFX8-NEXT:    v_cndmask_b32_e64 v0, v3, v11, s[12:13]
+; GFX8-NEXT:    v_cndmask_b32_e64 v2, v5, v11, s[0:1]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v3, v6, v11, s[2:3]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v5, v8, v11, s[6:7]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v6, v9, v11, s[8:9]
 ; GFX8-NEXT:    v_mov_b32_e32 v8, 0
-; GFX8-NEXT:    s_addc_u32 s1, 0, 0
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v4, v11, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e64 v4, v7, v11, s[4:5]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v7, v10, v11, s[10:11]
-; GFX8-NEXT:    v_mov_b32_e32 v11, s1
+; GFX8-NEXT:    v_mov_b32_e32 v10, 16
 ; GFX8-NEXT:    v_mov_b32_e32 v9, 0
-; GFX8-NEXT:    v_mov_b32_e32 v10, s0
+; GFX8-NEXT:    v_mov_b32_e32 v11, 0
 ; GFX8-NEXT:    flat_store_dwordx4 v[8:9], v[0:3]
 ; GFX8-NEXT:    flat_store_dwordx4 v[10:11], v[4:7]
 ; GFX8-NEXT:    s_endpgm
@@ -2993,11 +2979,12 @@ define amdgpu_ps void @insertelement_v_v16i16_s_v(<16 x i16> addrspace(1)* %ptr,
 ; GFX7-NEXT:    v_cndmask_b32_e64 v2, v5, v11, s[0:1]
 ; GFX7-NEXT:    v_cndmask_b32_e64 v3, v6, v11, s[2:3]
 ; GFX7-NEXT:    v_cndmask_b32_e64 v4, v7, v11, s[4:5]
+; GFX7-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
 ; GFX7-NEXT:    v_cndmask_b32_e64 v5, v8, v11, s[6:7]
 ; GFX7-NEXT:    v_cndmask_b32_e64 v6, v9, v11, s[8:9]
 ; GFX7-NEXT:    v_cndmask_b32_e64 v7, v10, v11, s[10:11]
-; GFX7-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
-; GFX7-NEXT:    buffer_store_dwordx4 v[4:7], off, s[16:19], 0 offset:16
+; GFX7-NEXT:    s_mov_b64 s[16:17], 16
+; GFX7-NEXT:    buffer_store_dwordx4 v[4:7], off, s[16:19], 0
 ; GFX7-NEXT:    s_endpgm
   %vec = load <16 x i16>, <16 x i16> addrspace(1)* %ptr
   %insert = insertelement <16 x i16> %vec, i16 %val, i32 %idx
@@ -3034,21 +3021,19 @@ define amdgpu_ps void @insertelement_v_v16i16_v_s(<16 x i16> addrspace(1)* %ptr,
 ; GFX9-NEXT:    v_cndmask_b32_e64 v1, v1, v9, s[8:9]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v1, v1, v10, s[10:11]
 ; GFX9-NEXT:    v_and_or_b32 v11, v1, s13, v0
-; GFX9-NEXT:    v_cndmask_b32_e64 v2, v5, v11, s[0:1]
-; GFX9-NEXT:    s_add_u32 s0, 0, 16
 ; GFX9-NEXT:    v_cmp_eq_u32_e64 s[12:13], s12, 0
 ; GFX9-NEXT:    v_cndmask_b32_e64 v0, v3, v11, s[12:13]
+; GFX9-NEXT:    v_cndmask_b32_e64 v2, v5, v11, s[0:1]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v3, v6, v11, s[2:3]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v5, v8, v11, s[6:7]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v6, v9, v11, s[8:9]
 ; GFX9-NEXT:    v_mov_b32_e32 v8, 0
-; GFX9-NEXT:    s_addc_u32 s1, 0, 0
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v4, v11, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e64 v4, v7, v11, s[4:5]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v7, v10, v11, s[10:11]
-; GFX9-NEXT:    v_mov_b32_e32 v11, s1
+; GFX9-NEXT:    v_mov_b32_e32 v10, 16
 ; GFX9-NEXT:    v_mov_b32_e32 v9, 0
-; GFX9-NEXT:    v_mov_b32_e32 v10, s0
+; GFX9-NEXT:    v_mov_b32_e32 v11, 0
 ; GFX9-NEXT:    global_store_dwordx4 v[8:9], v[0:3], off
 ; GFX9-NEXT:    global_store_dwordx4 v[10:11], v[4:7], off
 ; GFX9-NEXT:    s_endpgm
@@ -3085,21 +3070,19 @@ define amdgpu_ps void @insertelement_v_v16i16_v_s(<16 x i16> addrspace(1)* %ptr,
 ; GFX8-NEXT:    v_cndmask_b32_e64 v1, v1, v10, s[10:11]
 ; GFX8-NEXT:    v_and_b32_e32 v1, s13, v1
 ; GFX8-NEXT:    v_or_b32_e32 v11, v1, v0
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, v5, v11, s[0:1]
-; GFX8-NEXT:    s_add_u32 s0, 0, 16
 ; GFX8-NEXT:    v_cmp_eq_u32_e64 s[12:13], s12, 0
 ; GFX8-NEXT:    v_cndmask_b32_e64 v0, v3, v11, s[12:13]
+; GFX8-NEXT:    v_cndmask_b32_e64 v2, v5, v11, s[0:1]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v3, v6, v11, s[2:3]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v5, v8, v11, s[6:7]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v6, v9, v11, s[8:9]
 ; GFX8-NEXT:    v_mov_b32_e32 v8, 0
-; GFX8-NEXT:    s_addc_u32 s1, 0, 0
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v4, v11, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e64 v4, v7, v11, s[4:5]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v7, v10, v11, s[10:11]
-; GFX8-NEXT:    v_mov_b32_e32 v11, s1
+; GFX8-NEXT:    v_mov_b32_e32 v10, 16
 ; GFX8-NEXT:    v_mov_b32_e32 v9, 0
-; GFX8-NEXT:    v_mov_b32_e32 v10, s0
+; GFX8-NEXT:    v_mov_b32_e32 v11, 0
 ; GFX8-NEXT:    flat_store_dwordx4 v[8:9], v[0:3]
 ; GFX8-NEXT:    flat_store_dwordx4 v[10:11], v[4:7]
 ; GFX8-NEXT:    s_endpgm
@@ -3144,11 +3127,12 @@ define amdgpu_ps void @insertelement_v_v16i16_v_s(<16 x i16> addrspace(1)* %ptr,
 ; GFX7-NEXT:    v_cndmask_b32_e64 v2, v5, v11, s[0:1]
 ; GFX7-NEXT:    v_cndmask_b32_e64 v3, v6, v11, s[2:3]
 ; GFX7-NEXT:    v_cndmask_b32_e64 v4, v7, v11, s[4:5]
+; GFX7-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
 ; GFX7-NEXT:    v_cndmask_b32_e64 v5, v8, v11, s[6:7]
 ; GFX7-NEXT:    v_cndmask_b32_e64 v6, v9, v11, s[8:9]
 ; GFX7-NEXT:    v_cndmask_b32_e64 v7, v10, v11, s[10:11]
-; GFX7-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
-; GFX7-NEXT:    buffer_store_dwordx4 v[4:7], off, s[16:19], 0 offset:16
+; GFX7-NEXT:    s_mov_b64 s[16:17], 16
+; GFX7-NEXT:    buffer_store_dwordx4 v[4:7], off, s[16:19], 0
 ; GFX7-NEXT:    s_endpgm
   %vec = load <16 x i16>, <16 x i16> addrspace(1)* %ptr
   %insert = insertelement <16 x i16> %vec, i16 %val, i32 %idx
@@ -3186,20 +3170,18 @@ define amdgpu_ps void @insertelement_v_v16i16_v_v(<16 x i16> addrspace(1)* %ptr,
 ; GFX9-NEXT:    v_cndmask_b32_e64 v3, v3, v10, s[8:9]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v3, v3, v11, s[10:11]
 ; GFX9-NEXT:    v_and_or_b32 v12, v3, v1, v2
-; GFX9-NEXT:    v_cndmask_b32_e64 v2, v6, v12, s[0:1]
-; GFX9-NEXT:    s_add_u32 s0, 0, 16
 ; GFX9-NEXT:    v_cndmask_b32_e64 v0, v4, v12, s[12:13]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v5, v12, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e64 v4, v8, v12, s[4:5]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v5, v9, v12, s[6:7]
 ; GFX9-NEXT:    v_mov_b32_e32 v8, 0
-; GFX9-NEXT:    s_addc_u32 s1, 0, 0
+; GFX9-NEXT:    v_cndmask_b32_e64 v2, v6, v12, s[0:1]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v3, v7, v12, s[2:3]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v6, v10, v12, s[8:9]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v7, v11, v12, s[10:11]
-; GFX9-NEXT:    v_mov_b32_e32 v11, s1
+; GFX9-NEXT:    v_mov_b32_e32 v10, 16
 ; GFX9-NEXT:    v_mov_b32_e32 v9, 0
-; GFX9-NEXT:    v_mov_b32_e32 v10, s0
+; GFX9-NEXT:    v_mov_b32_e32 v11, 0
 ; GFX9-NEXT:    global_store_dwordx4 v[8:9], v[0:3], off
 ; GFX9-NEXT:    global_store_dwordx4 v[10:11], v[4:7], off
 ; GFX9-NEXT:    s_endpgm
@@ -3236,20 +3218,18 @@ define amdgpu_ps void @insertelement_v_v16i16_v_v(<16 x i16> addrspace(1)* %ptr,
 ; GFX8-NEXT:    v_cndmask_b32_e64 v3, v3, v11, s[10:11]
 ; GFX8-NEXT:    v_and_b32_e32 v1, v3, v1
 ; GFX8-NEXT:    v_or_b32_e32 v12, v1, v2
-; GFX8-NEXT:    v_cndmask_b32_e64 v2, v6, v12, s[0:1]
-; GFX8-NEXT:    s_add_u32 s0, 0, 16
 ; GFX8-NEXT:    v_cndmask_b32_e64 v0, v4, v12, s[12:13]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v5, v12, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e64 v4, v8, v12, s[4:5]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v5, v9, v12, s[6:7]
 ; GFX8-NEXT:    v_mov_b32_e32 v8, 0
-; GFX8-NEXT:    s_addc_u32 s1, 0, 0
+; GFX8-NEXT:    v_cndmask_b32_e64 v2, v6, v12, s[0:1]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v3, v7, v12, s[2:3]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v6, v10, v12, s[8:9]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v7, v11, v12, s[10:11]
-; GFX8-NEXT:    v_mov_b32_e32 v11, s1
+; GFX8-NEXT:    v_mov_b32_e32 v10, 16
 ; GFX8-NEXT:    v_mov_b32_e32 v9, 0
-; GFX8-NEXT:    v_mov_b32_e32 v10, s0
+; GFX8-NEXT:    v_mov_b32_e32 v11, 0
 ; GFX8-NEXT:    flat_store_dwordx4 v[8:9], v[0:3]
 ; GFX8-NEXT:    flat_store_dwordx4 v[10:11], v[4:7]
 ; GFX8-NEXT:    s_endpgm
@@ -3293,12 +3273,13 @@ define amdgpu_ps void @insertelement_v_v16i16_v_v(<16 x i16> addrspace(1)* %ptr,
 ; GFX7-NEXT:    v_cndmask_b32_e32 v1, v5, v12, vcc
 ; GFX7-NEXT:    v_cndmask_b32_e64 v2, v6, v12, s[0:1]
 ; GFX7-NEXT:    v_cndmask_b32_e64 v3, v7, v12, s[2:3]
+; GFX7-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
 ; GFX7-NEXT:    v_cndmask_b32_e64 v4, v8, v12, s[4:5]
 ; GFX7-NEXT:    v_cndmask_b32_e64 v5, v9, v12, s[6:7]
 ; GFX7-NEXT:    v_cndmask_b32_e64 v6, v10, v12, s[8:9]
 ; GFX7-NEXT:    v_cndmask_b32_e64 v7, v11, v12, s[10:11]
-; GFX7-NEXT:    buffer_store_dwordx4 v[0:3], off, s[16:19], 0
-; GFX7-NEXT:    buffer_store_dwordx4 v[4:7], off, s[16:19], 0 offset:16
+; GFX7-NEXT:    s_mov_b64 s[16:17], 16
+; GFX7-NEXT:    buffer_store_dwordx4 v[4:7], off, s[16:19], 0
 ; GFX7-NEXT:    s_endpgm
   %vec = load <16 x i16>, <16 x i16> addrspace(1)* %ptr
   %insert = insertelement <16 x i16> %vec, i16 %val, i32 %idx
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.dec.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.dec.ll
index fc8df81dff8452..3b101f0a5de127 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.dec.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.dec.ll
@@ -1124,7 +1124,6 @@ define amdgpu_kernel void @atomic_dec_shl_base_lds_0(i32 addrspace(1)* %out, i32
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
 ; CI-NEXT:    v_add_i32_e32 v2, vcc, 2, v0
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; CI-NEXT:    v_add_i32_e32 v0, vcc, 0, v0
 ; CI-NEXT:    v_mov_b32_e32 v1, 9
 ; CI-NEXT:    s_mov_b32 m0, -1
 ; CI-NEXT:    ds_dec_rtn_u32 v3, v0, v1 offset:8
@@ -1142,7 +1141,6 @@ define amdgpu_kernel void @atomic_dec_shl_base_lds_0(i32 addrspace(1)* %out, i32
 ; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
 ; VI-NEXT:    v_add_u32_e32 v2, vcc, 2, v0
 ; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; VI-NEXT:    v_add_u32_e32 v0, vcc, 0, v0
 ; VI-NEXT:    v_mov_b32_e32 v1, 9
 ; VI-NEXT:    s_mov_b32 m0, -1
 ; VI-NEXT:    ds_dec_rtn_u32 v3, v0, v1 offset:8
@@ -1684,8 +1682,7 @@ define amdgpu_kernel void @atomic_dec_shl_base_lds_0_i64(i64 addrspace(1)* %out,
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
 ; CI-NEXT:    v_add_i32_e32 v4, vcc, 2, v0
-; CI-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
-; CI-NEXT:    v_add_i32_e32 v2, vcc, 0, v0
+; CI-NEXT:    v_lshlrev_b32_e32 v2, 3, v0
 ; CI-NEXT:    v_mov_b32_e32 v0, 9
 ; CI-NEXT:    v_mov_b32_e32 v1, 0
 ; CI-NEXT:    s_mov_b32 m0, -1
@@ -1703,8 +1700,7 @@ define amdgpu_kernel void @atomic_dec_shl_base_lds_0_i64(i64 addrspace(1)* %out,
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
 ; VI-NEXT:    v_add_u32_e32 v4, vcc, 2, v0
-; VI-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
-; VI-NEXT:    v_add_u32_e32 v2, vcc, 0, v0
+; VI-NEXT:    v_lshlrev_b32_e32 v2, 3, v0
 ; VI-NEXT:    v_mov_b32_e32 v0, 9
 ; VI-NEXT:    v_mov_b32_e32 v1, 0
 ; VI-NEXT:    s_mov_b32 m0, -1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll
index af02b77c51d7a4..72894dc30b3e80 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll
@@ -477,7 +477,6 @@ define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i32(i32 addrspace(1)* %out,
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
 ; CI-NEXT:    v_add_i32_e32 v2, vcc, 2, v0
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; CI-NEXT:    v_add_i32_e32 v0, vcc, 0, v0
 ; CI-NEXT:    v_mov_b32_e32 v1, 9
 ; CI-NEXT:    s_mov_b32 m0, -1
 ; CI-NEXT:    ds_inc_rtn_u32 v3, v0, v1 offset:8
@@ -495,7 +494,6 @@ define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i32(i32 addrspace(1)* %out,
 ; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
 ; VI-NEXT:    v_add_u32_e32 v2, vcc, 2, v0
 ; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; VI-NEXT:    v_add_u32_e32 v0, vcc, 0, v0
 ; VI-NEXT:    v_mov_b32_e32 v1, 9
 ; VI-NEXT:    s_mov_b32 m0, -1
 ; VI-NEXT:    ds_inc_rtn_u32 v3, v0, v1 offset:8
@@ -513,7 +511,6 @@ define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i32(i32 addrspace(1)* %out,
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
 ; GFX9-NEXT:    v_add_u32_e32 v2, 2, v0
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    v_add_u32_e32 v0, 0, v0
 ; GFX9-NEXT:    v_mov_b32_e32 v1, 9
 ; GFX9-NEXT:    ds_inc_rtn_u32 v3, v0, v1 offset:8
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1216,8 +1213,7 @@ define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i64(i64 addrspace(1)* %out,
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
 ; CI-NEXT:    v_add_i32_e32 v4, vcc, 2, v0
-; CI-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
-; CI-NEXT:    v_add_i32_e32 v2, vcc, 0, v0
+; CI-NEXT:    v_lshlrev_b32_e32 v2, 3, v0
 ; CI-NEXT:    v_mov_b32_e32 v0, 9
 ; CI-NEXT:    v_mov_b32_e32 v1, 0
 ; CI-NEXT:    s_mov_b32 m0, -1
@@ -1235,8 +1231,7 @@ define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i64(i64 addrspace(1)* %out,
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
 ; VI-NEXT:    v_add_u32_e32 v4, vcc, 2, v0
-; VI-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
-; VI-NEXT:    v_add_u32_e32 v2, vcc, 0, v0
+; VI-NEXT:    v_lshlrev_b32_e32 v2, 3, v0
 ; VI-NEXT:    v_mov_b32_e32 v0, 9
 ; VI-NEXT:    v_mov_b32_e32 v1, 0
 ; VI-NEXT:    s_mov_b32 m0, -1
@@ -1254,8 +1249,7 @@ define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i64(i64 addrspace(1)* %out,
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
 ; GFX9-NEXT:    v_add_u32_e32 v4, 2, v0
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
-; GFX9-NEXT:    v_add_u32_e32 v2, 0, v0
+; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 3, v0
 ; GFX9-NEXT:    v_mov_b32_e32 v0, 9
 ; GFX9-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX9-NEXT:    ds_inc_rtn_u64 v[0:1], v2, v[0:1] offset:16

From 9fa7f48459761fa13205f4c931484b0977c35746 Mon Sep 17 00:00:00 2001
From: Bevin Hansson <bevin.hansson@ericsson.com>
Date: Wed, 26 Aug 2020 16:46:57 +0200
Subject: [PATCH 088/123] [Fixed Point] Add fixed-point to floating point cast
 types and consteval.

Reviewed By: leonardchan

Differential Revision: https://reviews.llvm.org/D86631
---
 clang/include/clang/AST/OperationKinds.def    |  8 +++++
 clang/lib/AST/Expr.cpp                        |  2 ++
 clang/lib/AST/ExprConstant.cpp                | 33 +++++++++++++++++++
 clang/lib/CodeGen/CGExpr.cpp                  |  2 ++
 clang/lib/CodeGen/CGExprAgg.cpp               |  2 ++
 clang/lib/CodeGen/CGExprComplex.cpp           |  2 ++
 clang/lib/CodeGen/CGExprConstant.cpp          |  2 ++
 clang/lib/CodeGen/CGExprScalar.cpp            |  2 ++
 clang/lib/Edit/RewriteObjCFoundationAPI.cpp   |  2 ++
 clang/lib/Sema/SemaExpr.cpp                   | 22 ++++++++-----
 clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp |  2 ++
 .../Frontend/fixed_point_conversions_const.c  | 31 +++++++++++++++++
 clang/test/Frontend/fixed_point_errors.c      |  5 ---
 .../fixed_point_unknown_conversions.c         |  4 ---
 14 files changed, 101 insertions(+), 18 deletions(-)

diff --git a/clang/include/clang/AST/OperationKinds.def b/clang/include/clang/AST/OperationKinds.def
index f29664e8eb3389..6daab1ffcb0a0f 100644
--- a/clang/include/clang/AST/OperationKinds.def
+++ b/clang/include/clang/AST/OperationKinds.def
@@ -201,6 +201,14 @@ CAST_OPERATION(IntegralToBoolean)
 ///    float f = i;
 CAST_OPERATION(IntegralToFloating)
 
+/// CK_FloatingToFixedPoint - Floating to fixed point.
+///    _Accum a = f;
+CAST_OPERATION(FloatingToFixedPoint)
+
+/// CK_FixedPointToFloating - Fixed point to floating.
+///    (float) 2.5k
+CAST_OPERATION(FixedPointToFloating)
+
 /// CK_FixedPointCast - Fixed point to fixed point.
 ///    (_Accum) 0.5r
 CAST_OPERATION(FixedPointCast)
diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp
index 8e8dd75e975a53..919d3220875c43 100644
--- a/clang/lib/AST/Expr.cpp
+++ b/clang/lib/AST/Expr.cpp
@@ -1691,6 +1691,8 @@ bool CastExpr::CastConsistency() const {
   case CK_ARCExtendBlockObject:
   case CK_ZeroToOCLOpaqueType:
   case CK_IntToOCLSampler:
+  case CK_FloatingToFixedPoint:
+  case CK_FixedPointToFloating:
   case CK_FixedPointCast:
   case CK_FixedPointToIntegral:
   case CK_IntegralToFixedPoint:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 639a5733b34b8b..1327aa6876e467 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12896,6 +12896,8 @@ bool IntExprEvaluator::VisitCastExpr(const CastExpr *E) {
   case CK_NonAtomicToAtomic:
   case CK_AddressSpaceConversion:
   case CK_IntToOCLSampler:
+  case CK_FloatingToFixedPoint:
+  case CK_FixedPointToFloating:
   case CK_FixedPointCast:
   case CK_IntegralToFixedPoint:
     llvm_unreachable("invalid cast kind for integral value");
@@ -13140,6 +13142,26 @@ bool FixedPointExprEvaluator::VisitCastExpr(const CastExpr *E) {
 
     return Success(IntResult, E);
   }
+  case CK_FloatingToFixedPoint: {
+    APFloat Src(0.0);
+    if (!EvaluateFloat(SubExpr, Src, Info))
+      return false;
+
+    bool Overflowed;
+    APFixedPoint Result = APFixedPoint::getFromFloatValue(
+        Src, Info.Ctx.getFixedPointSemantics(DestType), &Overflowed);
+
+    if (Overflowed) {
+      if (Info.checkingForUndefinedBehavior())
+        Info.Ctx.getDiagnostics().Report(E->getExprLoc(),
+                                         diag::warn_fixedpoint_constant_overflow)
+          << Result.toString() << E->getType();
+      else if (!HandleOverflow(Info, E, Result, E->getType()))
+        return false;
+    }
+
+    return Success(Result, E);
+  }
   case CK_NoOp:
   case CK_LValueToRValue:
     return ExprEvaluatorBaseTy::VisitCastExpr(E);
@@ -13446,6 +13468,15 @@ bool FloatExprEvaluator::VisitCastExpr(const CastExpr *E) {
                                 E->getType(), Result);
   }
 
+  case CK_FixedPointToFloating: {
+    APFixedPoint FixResult(Info.Ctx.getFixedPointSemantics(SubExpr->getType()));
+    if (!EvaluateFixedPoint(SubExpr, FixResult, Info))
+      return false;
+    Result =
+        FixResult.convertToFloat(Info.Ctx.getFloatTypeSemantics(E->getType()));
+    return true;
+  }
+
   case CK_FloatingCast: {
     if (!Visit(SubExpr))
       return false;
@@ -13591,6 +13622,8 @@ bool ComplexExprEvaluator::VisitCastExpr(const CastExpr *E) {
   case CK_NonAtomicToAtomic:
   case CK_AddressSpaceConversion:
   case CK_IntToOCLSampler:
+  case CK_FloatingToFixedPoint:
+  case CK_FixedPointToFloating:
   case CK_FixedPointCast:
   case CK_FixedPointToBoolean:
   case CK_FixedPointToIntegral:
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 869bace18ffce9..2f54097d920983 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -4593,6 +4593,8 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) {
   case CK_ARCExtendBlockObject:
   case CK_CopyAndAutoreleaseBlockObject:
   case CK_IntToOCLSampler:
+  case CK_FloatingToFixedPoint:
+  case CK_FixedPointToFloating:
   case CK_FixedPointCast:
   case CK_FixedPointToBoolean:
   case CK_FixedPointToIntegral:
diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp
index fb96d70732e890..20815769998181 100644
--- a/clang/lib/CodeGen/CGExprAgg.cpp
+++ b/clang/lib/CodeGen/CGExprAgg.cpp
@@ -903,6 +903,8 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) {
   case CK_ZeroToOCLOpaqueType:
 
   case CK_IntToOCLSampler:
+  case CK_FloatingToFixedPoint:
+  case CK_FixedPointToFloating:
   case CK_FixedPointCast:
   case CK_FixedPointToBoolean:
   case CK_FixedPointToIntegral:
diff --git a/clang/lib/CodeGen/CGExprComplex.cpp b/clang/lib/CodeGen/CGExprComplex.cpp
index a49817898ae394..5b2a0bb4700c16 100644
--- a/clang/lib/CodeGen/CGExprComplex.cpp
+++ b/clang/lib/CodeGen/CGExprComplex.cpp
@@ -527,6 +527,8 @@ ComplexPairTy ComplexExprEmitter::EmitCast(CastKind CK, Expr *Op,
   case CK_ZeroToOCLOpaqueType:
   case CK_AddressSpaceConversion:
   case CK_IntToOCLSampler:
+  case CK_FloatingToFixedPoint:
+  case CK_FixedPointToFloating:
   case CK_FixedPointCast:
   case CK_FixedPointToBoolean:
   case CK_FixedPointToIntegral:
diff --git a/clang/lib/CodeGen/CGExprConstant.cpp b/clang/lib/CodeGen/CGExprConstant.cpp
index bff4a0c38af9bf..57f1ad59a72a41 100644
--- a/clang/lib/CodeGen/CGExprConstant.cpp
+++ b/clang/lib/CodeGen/CGExprConstant.cpp
@@ -1163,6 +1163,8 @@ class ConstExprEmitter :
     case CK_FloatingToIntegral:
     case CK_FloatingToBoolean:
     case CK_FloatingCast:
+    case CK_FloatingToFixedPoint:
+    case CK_FixedPointToFloating:
     case CK_FixedPointCast:
     case CK_FixedPointToBoolean:
     case CK_FixedPointToIntegral:
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index cee2210e74c1b6..f14f862a5f67ae 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -2241,6 +2241,8 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
   case CK_IntegralToFloating:
   case CK_FloatingToIntegral:
   case CK_FloatingCast:
+  case CK_FixedPointToFloating:
+  case CK_FloatingToFixedPoint:
     return EmitScalarConversion(Visit(E), E->getType(), DestTy,
                                 CE->getExprLoc());
   case CK_BooleanToSignedIntegral: {
diff --git a/clang/lib/Edit/RewriteObjCFoundationAPI.cpp b/clang/lib/Edit/RewriteObjCFoundationAPI.cpp
index 6f4a880b649a68..7565626cba9907 100644
--- a/clang/lib/Edit/RewriteObjCFoundationAPI.cpp
+++ b/clang/lib/Edit/RewriteObjCFoundationAPI.cpp
@@ -1085,6 +1085,8 @@ static bool rewriteToNumericBoxedExpression(const ObjCMessageExpr *Msg,
     case CK_BooleanToSignedIntegral:
       llvm_unreachable("OpenCL-specific cast in Objective-C?");
 
+    case CK_FloatingToFixedPoint:
+    case CK_FixedPointToFloating:
     case CK_FixedPointCast:
     case CK_FixedPointToBoolean:
     case CK_FixedPointToIntegral:
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index f0cb227ff58ee5..a02db2293bccd9 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -1126,11 +1126,17 @@ static QualType handleFloatConversion(Sema &S, ExprResult &LHS,
   bool LHSFloat = LHSType->isRealFloatingType();
   bool RHSFloat = RHSType->isRealFloatingType();
 
-  // FIXME: Implement floating to fixed point conversion.(Bug 46268)
-  // Reference N1169 4.1.4 (Type conversion, usual arithmetic conversions).
-  if ((LHSType->isFixedPointType() && RHSFloat) ||
-      (LHSFloat && RHSType->isFixedPointType()))
-    return QualType();
+  // N1169 4.1.4: If one of the operands has a floating type and the other
+  //              operand has a fixed-point type, the fixed-point operand
+  //              is converted to the floating type [...]
+  if (LHSType->isFixedPointType() || RHSType->isFixedPointType()) {
+    if (LHSFloat)
+      RHS = S.ImpCastExprToType(RHS.get(), LHSType, CK_FixedPointToFloating);
+    else if (!IsCompAssign)
+      LHS = S.ImpCastExprToType(LHS.get(), RHSType, CK_FixedPointToFloating);
+    return LHSFloat ? LHSType : RHSType;
+  }
+
   // If we have two real floating types, convert the smaller operand
   // to the bigger result.
   if (LHSFloat && RHSFloat) {
@@ -7000,6 +7006,7 @@ CastKind Sema::PrepareScalarCast(ExprResult &Src, QualType DestTy) {
     case Type::STK_Integral:
       return CK_FixedPointToIntegral;
     case Type::STK_Floating:
+      return CK_FixedPointToFloating;
     case Type::STK_IntegralComplex:
     case Type::STK_FloatingComplex:
       Diag(Src.get()->getExprLoc(),
@@ -7072,10 +7079,7 @@ CastKind Sema::PrepareScalarCast(ExprResult &Src, QualType DestTy) {
     case Type::STK_MemberPointer:
       llvm_unreachable("member pointer type in C");
     case Type::STK_FixedPoint:
-      Diag(Src.get()->getExprLoc(),
-           diag::err_unimplemented_conversion_with_fixed_point_type)
-          << SrcTy;
-      return CK_IntegralCast;
+      return CK_FloatingToFixedPoint;
     }
     llvm_unreachable("Should have returned before this");
 
diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp
index 507400c705b90b..18d1b2169eedf0 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp
@@ -418,6 +418,8 @@ void ExprEngine::VisitCast(const CastExpr *CastE, const Expr *Ex,
       case CK_ZeroToOCLOpaqueType:
       case CK_IntToOCLSampler:
       case CK_LValueBitCast:
+      case CK_FloatingToFixedPoint:
+      case CK_FixedPointToFloating:
       case CK_FixedPointCast:
       case CK_FixedPointToBoolean:
       case CK_FixedPointToIntegral:
diff --git a/clang/test/Frontend/fixed_point_conversions_const.c b/clang/test/Frontend/fixed_point_conversions_const.c
index 7d63d4d6387917..30aefbd564f3eb 100644
--- a/clang/test/Frontend/fixed_point_conversions_const.c
+++ b/clang/test/Frontend/fixed_point_conversions_const.c
@@ -43,6 +43,37 @@ short _Accum sa_const6 = -2;
 short _Accum sa_const7 = -256;
 // CHECK-DAG: @sa_const7 = {{.*}}global i16 -32768, align 2
 
+// Fixed point to floating point
+float fl_const = 1.0hk;
+// CHECK-DAG: @fl_const = {{.*}}global float 1.000000e+00, align 4
+float fl_const2 = -128.0k;
+// CHECK-DAG: @fl_const2 = {{.*}}global float -1.280000e+02, align 4
+float fl_const3 = 0.0872802734375k;
+// CHECK-DAG: @fl_const3 = {{.*}}global float 0x3FB6580000000000, align 4
+float fl_const4 = 192.5k;
+// CHECK-DAG: @fl_const4 = {{.*}}global float 1.925000e+02, align 4
+float fl_const5 = -192.5k;
+// CHECK-DAG: @fl_const5 = {{.*}}global float -1.925000e+02, align 4
+
+// Floating point to fixed point
+_Accum a_fl_const = 1.0f;
+// CHECK-DAG: @a_fl_const = {{.*}}global i32 32768, align 4
+_Accum a_fl_const2 = -128.0f;
+// CHECK-DAG: @a_fl_const2 = {{.*}}global i32 -4194304, align 4
+_Accum a_fl_const3 = 0.0872802734375f;
+// CHECK-DAG: @a_fl_const3 = {{.*}}global i32 2860, align 4
+_Accum a_fl_const4 = 0.0872802734375;
+// CHECK-DAG: @a_fl_const4 = {{.*}}global i32 2860, align 4
+_Accum a_fl_const5 = -0.0872802734375f;
+// CHECK-DAG: @a_fl_const5 = {{.*}}global i32 -2860, align 4
+_Fract f_fl_const = 0.5f;
+// CHECK-DAG: @f_fl_const = {{.*}}global i16 16384, align 2
+_Fract f_fl_const2 = -0.75;
+// CHECK-DAG: @f_fl_const2 = {{.*}}global i16 -24576, align 2
+unsigned short _Accum usa_fl_const = 48.75f;
+// SIGNED-DAG: @usa_fl_const = {{.*}}global i16 12480, align 2
+// UNSIGNED-DAG: @usa_fl_const = {{.*}}global i16 6240, align 2
+
 // Signedness
 unsigned short _Accum usa_const2 = 2.5hk;
 // SIGNED-DAG: @usa_const2  = {{.*}}global i16 640, align 2
diff --git a/clang/test/Frontend/fixed_point_errors.c b/clang/test/Frontend/fixed_point_errors.c
index 5aaf59876dcb87..6c41bf6df163d8 100644
--- a/clang/test/Frontend/fixed_point_errors.c
+++ b/clang/test/Frontend/fixed_point_errors.c
@@ -286,8 +286,3 @@ short _Accum shl_sat = (_Sat short _Accum)200.0hk << 5;
 
 // Division by zero
 short _Accum div_zero = 4.5k / 0.0lr;  // expected-error {{initializer element is not a compile-time constant}}
-
-void foo(void) {
-  _Accum x = 0.5k;
-  if (x == 0.5) {} // expected-error{{invalid operands to binary expression ('_Accum' and 'double')}}
-}
diff --git a/clang/test/Frontend/fixed_point_unknown_conversions.c b/clang/test/Frontend/fixed_point_unknown_conversions.c
index c6a02e90389097..8595901f9e9c3b 100644
--- a/clang/test/Frontend/fixed_point_unknown_conversions.c
+++ b/clang/test/Frontend/fixed_point_unknown_conversions.c
@@ -22,16 +22,12 @@ void func() {
   _Fract fract = accum; // ok
   _Accum *accum_ptr;
 
-  accum = f;       // expected-error{{conversion between fixed point and 'float' is not yet supported}}
-  accum = d;       // expected-error{{conversion between fixed point and 'double' is not yet supported}}
   accum = dc;      // expected-error{{conversion between fixed point and '_Complex double' is not yet supported}}
   accum = ic;      // expected-error{{conversion between fixed point and '_Complex int' is not yet supported}}
   accum = s;       // expected-error{{assigning to '_Accum' from incompatible type 'struct S'}}
   accum = ptr;     // expected-error{{assigning to '_Accum' from incompatible type 'int *'}}
   accum_ptr = ptr; // expected-warning{{incompatible pointer types assigning to '_Accum *' from 'int *'}}
 
-  f = accum;       // expected-error{{conversion between fixed point and 'float' is not yet supported}}
-  d = accum;       // expected-error{{conversion between fixed point and 'double' is not yet supported}}
   dc = accum;      // expected-error{{conversion between fixed point and '_Complex double' is not yet supported}}
   ic = accum;      // expected-error{{conversion between fixed point and '_Complex int' is not yet supported}}
   s = accum;       // expected-error{{assigning to 'struct S' from incompatible type '_Accum'}}

From 101309fe048e66873cfd972c47c4b7e7f2b99f41 Mon Sep 17 00:00:00 2001
From: Bevin Hansson <bevin.hansson@ericsson.com>
Date: Mon, 24 Aug 2020 10:19:29 +0200
Subject: [PATCH 089/123] [AST] Change return type of getTypeInfoInChars to a
 proper struct instead of std::pair.

Followup to D85191.

This changes getTypeInfoInChars to return a TypeInfoChars
struct instead of a std::pair of CharUnits. This lets the
interface match getTypeInfo more closely.

Reviewed By: efriedma

Differential Revision: https://reviews.llvm.org/D86447
---
 clang/include/clang/AST/ASTContext.h          | 16 ++++-
 clang/lib/AST/ASTContext.cpp                  | 40 ++++++-----
 clang/lib/AST/RecordLayoutBuilder.cpp         | 10 +--
 clang/lib/CodeGen/CGAtomic.cpp                | 37 +++++------
 clang/lib/CodeGen/CGBlocks.cpp                |  7 +-
 clang/lib/CodeGen/CGCUDANV.cpp                | 10 ++-
 clang/lib/CodeGen/CGCall.cpp                  |  4 +-
 clang/lib/CodeGen/CGClass.cpp                 |  7 +-
 clang/lib/CodeGen/CGExprAgg.cpp               | 10 +--
 clang/lib/CodeGen/CGObjC.cpp                  |  5 +-
 clang/lib/CodeGen/CGRecordLayoutBuilder.cpp   |  2 +-
 clang/lib/CodeGen/CGValue.h                   |  2 +-
 clang/lib/CodeGen/TargetInfo.cpp              | 66 +++++++++----------
 .../Checkers/PaddingChecker.cpp               |  5 +-
 14 files changed, 112 insertions(+), 109 deletions(-)

diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h
index e261e29036e974..3f4079e2569b1d 100644
--- a/clang/include/clang/AST/ASTContext.h
+++ b/clang/include/clang/AST/ASTContext.h
@@ -171,6 +171,16 @@ struct TypeInfo {
       : Width(Width), Align(Align), AlignIsRequired(AlignIsRequired) {}
 };
 
+struct TypeInfoChars {
+  CharUnits Width;
+  CharUnits Align;
+  bool AlignIsRequired : 1;
+
+  TypeInfoChars() : AlignIsRequired(false) {}
+  TypeInfoChars(CharUnits Width, CharUnits Align, bool AlignIsRequired)
+      : Width(Width), Align(Align), AlignIsRequired(AlignIsRequired) {}
+};
+
 /// Holds long-lived AST nodes (such as types and decls) that can be
 /// referred to throughout the semantic analysis of a file.
 class ASTContext : public RefCountedBase<ASTContext> {
@@ -2169,10 +2179,10 @@ class ASTContext : public RefCountedBase<ASTContext> {
 
   // getTypeInfoDataSizeInChars - Return the size of a type, in chars. If the
   // type is a record, its data size is returned.
-  std::pair<CharUnits, CharUnits> getTypeInfoDataSizeInChars(QualType T) const;
+  TypeInfoChars getTypeInfoDataSizeInChars(QualType T) const;
 
-  std::pair<CharUnits, CharUnits> getTypeInfoInChars(const Type *T) const;
-  std::pair<CharUnits, CharUnits> getTypeInfoInChars(QualType T) const;
+  TypeInfoChars getTypeInfoInChars(const Type *T) const;
+  TypeInfoChars getTypeInfoInChars(QualType T) const;
 
   /// Determine if the alignment the type has was required using an
   /// alignment attribute.
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index a82d95461bb987..8caff6b33379cb 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -1777,9 +1777,8 @@ CharUnits ASTContext::getExnObjectAlignment() const {
 // chars. If the type is a record, its data size is returned.  This is
 // the size of the memcpy that's performed when assigning this type
 // using a trivial copy/move assignment operator.
-std::pair<CharUnits, CharUnits>
-ASTContext::getTypeInfoDataSizeInChars(QualType T) const {
-  std::pair<CharUnits, CharUnits> sizeAndAlign = getTypeInfoInChars(T);
+TypeInfoChars ASTContext::getTypeInfoDataSizeInChars(QualType T) const {
+  TypeInfoChars Info = getTypeInfoInChars(T);
 
   // In C++, objects can sometimes be allocated into the tail padding
   // of a base-class subobject.  We decide whether that's possible
@@ -1787,44 +1786,43 @@ ASTContext::getTypeInfoDataSizeInChars(QualType T) const {
   if (getLangOpts().CPlusPlus) {
     if (const auto *RT = T->getAs<RecordType>()) {
       const ASTRecordLayout &layout = getASTRecordLayout(RT->getDecl());
-      sizeAndAlign.first = layout.getDataSize();
+      Info.Width = layout.getDataSize();
     }
   }
 
-  return sizeAndAlign;
+  return Info;
 }
 
 /// getConstantArrayInfoInChars - Performing the computation in CharUnits
 /// instead of in bits prevents overflowing the uint64_t for some large arrays.
-std::pair<CharUnits, CharUnits>
+TypeInfoChars
 static getConstantArrayInfoInChars(const ASTContext &Context,
                                    const ConstantArrayType *CAT) {
-  std::pair<CharUnits, CharUnits> EltInfo =
-      Context.getTypeInfoInChars(CAT->getElementType());
+  TypeInfoChars EltInfo = Context.getTypeInfoInChars(CAT->getElementType());
   uint64_t Size = CAT->getSize().getZExtValue();
-  assert((Size == 0 || static_cast<uint64_t>(EltInfo.first.getQuantity()) <=
+  assert((Size == 0 || static_cast<uint64_t>(EltInfo.Width.getQuantity()) <=
               (uint64_t)(-1)/Size) &&
          "Overflow in array type char size evaluation");
-  uint64_t Width = EltInfo.first.getQuantity() * Size;
-  unsigned Align = EltInfo.second.getQuantity();
+  uint64_t Width = EltInfo.Width.getQuantity() * Size;
+  unsigned Align = EltInfo.Align.getQuantity();
   if (!Context.getTargetInfo().getCXXABI().isMicrosoft() ||
       Context.getTargetInfo().getPointerWidth(0) == 64)
     Width = llvm::alignTo(Width, Align);
-  return std::make_pair(CharUnits::fromQuantity(Width),
-                        CharUnits::fromQuantity(Align));
+  return TypeInfoChars(CharUnits::fromQuantity(Width),
+                       CharUnits::fromQuantity(Align),
+                       EltInfo.AlignIsRequired);
 }
 
-std::pair<CharUnits, CharUnits>
-ASTContext::getTypeInfoInChars(const Type *T) const {
+TypeInfoChars ASTContext::getTypeInfoInChars(const Type *T) const {
   if (const auto *CAT = dyn_cast<ConstantArrayType>(T))
     return getConstantArrayInfoInChars(*this, CAT);
   TypeInfo Info = getTypeInfo(T);
-  return std::make_pair(toCharUnitsFromBits(Info.Width),
-                        toCharUnitsFromBits(Info.Align));
+  return TypeInfoChars(toCharUnitsFromBits(Info.Width),
+                       toCharUnitsFromBits(Info.Align),
+                       Info.AlignIsRequired);
 }
 
-std::pair<CharUnits, CharUnits>
-ASTContext::getTypeInfoInChars(QualType T) const {
+TypeInfoChars ASTContext::getTypeInfoInChars(QualType T) const {
   return getTypeInfoInChars(T.getTypePtr());
 }
 
@@ -2375,10 +2373,10 @@ int64_t ASTContext::toBits(CharUnits CharSize) const {
 /// getTypeSizeInChars - Return the size of the specified type, in characters.
 /// This method does not work on incomplete types.
 CharUnits ASTContext::getTypeSizeInChars(QualType T) const {
-  return getTypeInfoInChars(T).first;
+  return getTypeInfoInChars(T).Width;
 }
 CharUnits ASTContext::getTypeSizeInChars(const Type *T) const {
-  return getTypeInfoInChars(T).first;
+  return getTypeInfoInChars(T).Width;
 }
 
 /// getTypeAlignInChars - Return the ABI-specified alignment of a type, in
diff --git a/clang/lib/AST/RecordLayoutBuilder.cpp b/clang/lib/AST/RecordLayoutBuilder.cpp
index 1c185bb082125d..e948bd0a48e7d2 100644
--- a/clang/lib/AST/RecordLayoutBuilder.cpp
+++ b/clang/lib/AST/RecordLayoutBuilder.cpp
@@ -1841,12 +1841,12 @@ void ItaniumRecordLayoutBuilder::LayoutField(const FieldDecl *D,
 
   auto setDeclInfo = [&](bool IsIncompleteArrayType) {
     auto TI = Context.getTypeInfoInChars(D->getType());
-    FieldAlign = TI.second;
+    FieldAlign = TI.Align;
     // Flexible array members don't have any size, but they have to be
     // aligned appropriately for their element type.
     EffectiveFieldSize = FieldSize =
-        IsIncompleteArrayType ? CharUnits::Zero() : TI.first;
-    AlignIsRequired = Context.getTypeInfo(D->getType()).AlignIsRequired;
+        IsIncompleteArrayType ? CharUnits::Zero() : TI.Width;
+    AlignIsRequired = TI.AlignIsRequired;
   };
 
   if (D->getType()->isIncompleteArrayType()) {
@@ -2572,9 +2572,9 @@ MicrosoftRecordLayoutBuilder::getAdjustedElementInfo(
     const FieldDecl *FD) {
   // Get the alignment of the field type's natural alignment, ignore any
   // alignment attributes.
-  ElementInfo Info;
-  std::tie(Info.Size, Info.Alignment) =
+  auto TInfo =
       Context.getTypeInfoInChars(FD->getType()->getUnqualifiedDesugaredType());
+  ElementInfo Info{TInfo.Width, TInfo.Align};
   // Respect align attributes on the field.
   CharUnits FieldRequiredAlignment =
       Context.toCharUnitsFromBits(FD->getMaxAlignment());
diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp
index a640cb7b5a6ecb..c7256e240a31eb 100644
--- a/clang/lib/CodeGen/CGAtomic.cpp
+++ b/clang/lib/CodeGen/CGAtomic.cpp
@@ -806,13 +806,12 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
     return RValue::get(nullptr);
   }
 
-  CharUnits sizeChars, alignChars;
-  std::tie(sizeChars, alignChars) = getContext().getTypeInfoInChars(AtomicTy);
-  uint64_t Size = sizeChars.getQuantity();
+  auto TInfo = getContext().getTypeInfoInChars(AtomicTy);
+  uint64_t Size = TInfo.Width.getQuantity();
   unsigned MaxInlineWidthInBits = getTarget().getMaxAtomicInlineWidth();
 
-  bool Oversized = getContext().toBits(sizeChars) > MaxInlineWidthInBits;
-  bool Misaligned = (Ptr.getAlignment() % sizeChars) != 0;
+  bool Oversized = getContext().toBits(TInfo.Width) > MaxInlineWidthInBits;
+  bool Misaligned = (Ptr.getAlignment() % TInfo.Width) != 0;
   bool UseLibcall = Misaligned | Oversized;
   CharUnits MaxInlineWidth =
       getContext().toCharUnitsFromBits(MaxInlineWidthInBits);
@@ -821,13 +820,13 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
 
   if (Misaligned) {
     Diags.Report(E->getBeginLoc(), diag::warn_atomic_op_misaligned)
-        << (int)sizeChars.getQuantity()
+        << (int)TInfo.Width.getQuantity()
         << (int)Ptr.getAlignment().getQuantity();
   }
 
   if (Oversized) {
     Diags.Report(E->getBeginLoc(), diag::warn_atomic_op_oversized)
-        << (int)sizeChars.getQuantity() << (int)MaxInlineWidth.getQuantity();
+        << (int)TInfo.Width.getQuantity() << (int)MaxInlineWidth.getQuantity();
   }
 
   llvm::Value *Order = EmitScalarExpr(E->getOrder());
@@ -1080,7 +1079,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
               EmitCastToVoidPtr(Val1.getPointer()), E->getVal1()->getType())),
           getContext().VoidPtrTy);
       AddDirectArgument(*this, Args, UseOptimizedLibcall, Val2.getPointer(),
-                        MemTy, E->getExprLoc(), sizeChars);
+                        MemTy, E->getExprLoc(), TInfo.Width);
       Args.add(RValue::get(Order), getContext().IntTy);
       Order = OrderFail;
       break;
@@ -1093,7 +1092,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
     case AtomicExpr::AO__atomic_exchange:
       LibCallName = "__atomic_exchange";
       AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
-                        MemTy, E->getExprLoc(), sizeChars);
+                        MemTy, E->getExprLoc(), TInfo.Width);
       break;
     // void __atomic_store(size_t size, void *mem, void *val, int order)
     // void __atomic_store_N(T *mem, T val, int order)
@@ -1105,7 +1104,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
       RetTy = getContext().VoidTy;
       HaveRetTy = true;
       AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
-                        MemTy, E->getExprLoc(), sizeChars);
+                        MemTy, E->getExprLoc(), TInfo.Width);
       break;
     // void __atomic_load(size_t size, void *mem, void *return, int order)
     // T __atomic_load_N(T *mem, int order)
@@ -1125,7 +1124,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
     case AtomicExpr::AO__atomic_fetch_add:
       LibCallName = "__atomic_fetch_add";
       AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
-                        LoweredMemTy, E->getExprLoc(), sizeChars);
+                        LoweredMemTy, E->getExprLoc(), TInfo.Width);
       break;
     // T __atomic_and_fetch_N(T *mem, T val, int order)
     // T __atomic_fetch_and_N(T *mem, T val, int order)
@@ -1137,7 +1136,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
     case AtomicExpr::AO__atomic_fetch_and:
       LibCallName = "__atomic_fetch_and";
       AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
-                        MemTy, E->getExprLoc(), sizeChars);
+                        MemTy, E->getExprLoc(), TInfo.Width);
       break;
     // T __atomic_or_fetch_N(T *mem, T val, int order)
     // T __atomic_fetch_or_N(T *mem, T val, int order)
@@ -1149,7 +1148,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
     case AtomicExpr::AO__atomic_fetch_or:
       LibCallName = "__atomic_fetch_or";
       AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
-                        MemTy, E->getExprLoc(), sizeChars);
+                        MemTy, E->getExprLoc(), TInfo.Width);
       break;
     // T __atomic_sub_fetch_N(T *mem, T val, int order)
     // T __atomic_fetch_sub_N(T *mem, T val, int order)
@@ -1161,7 +1160,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
     case AtomicExpr::AO__atomic_fetch_sub:
       LibCallName = "__atomic_fetch_sub";
       AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
-                        LoweredMemTy, E->getExprLoc(), sizeChars);
+                        LoweredMemTy, E->getExprLoc(), TInfo.Width);
       break;
     // T __atomic_xor_fetch_N(T *mem, T val, int order)
     // T __atomic_fetch_xor_N(T *mem, T val, int order)
@@ -1173,7 +1172,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
     case AtomicExpr::AO__atomic_fetch_xor:
       LibCallName = "__atomic_fetch_xor";
       AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
-                        MemTy, E->getExprLoc(), sizeChars);
+                        MemTy, E->getExprLoc(), TInfo.Width);
       break;
     case AtomicExpr::AO__atomic_min_fetch:
       PostOpMinMax = true;
@@ -1185,7 +1184,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
                         ? "__atomic_fetch_min"
                         : "__atomic_fetch_umin";
       AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
-                        LoweredMemTy, E->getExprLoc(), sizeChars);
+                        LoweredMemTy, E->getExprLoc(), TInfo.Width);
       break;
     case AtomicExpr::AO__atomic_max_fetch:
       PostOpMinMax = true;
@@ -1197,7 +1196,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
                         ? "__atomic_fetch_max"
                         : "__atomic_fetch_umax";
       AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
-                        LoweredMemTy, E->getExprLoc(), sizeChars);
+                        LoweredMemTy, E->getExprLoc(), TInfo.Width);
       break;
     // T __atomic_nand_fetch_N(T *mem, T val, int order)
     // T __atomic_fetch_nand_N(T *mem, T val, int order)
@@ -1207,7 +1206,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
     case AtomicExpr::AO__atomic_fetch_nand:
       LibCallName = "__atomic_fetch_nand";
       AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
-                        MemTy, E->getExprLoc(), sizeChars);
+                        MemTy, E->getExprLoc(), TInfo.Width);
       break;
     }
 
@@ -1225,7 +1224,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
         // Value is returned directly.
         // The function returns an appropriately sized integer type.
         RetTy = getContext().getIntTypeForBitwidth(
-            getContext().toBits(sizeChars), /*Signed=*/false);
+            getContext().toBits(TInfo.Width), /*Signed=*/false);
       } else {
         // Value is returned through parameter before the order.
         RetTy = getContext().VoidTy;
diff --git a/clang/lib/CodeGen/CGBlocks.cpp b/clang/lib/CodeGen/CGBlocks.cpp
index ee0c14641803b6..a4f09afa671b84 100644
--- a/clang/lib/CodeGen/CGBlocks.cpp
+++ b/clang/lib/CodeGen/CGBlocks.cpp
@@ -557,11 +557,10 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF,
     // Theoretically, this could be in a different address space, so
     // don't assume standard pointer size/align.
     llvm::Type *llvmType = CGM.getTypes().ConvertType(thisType);
-    std::pair<CharUnits,CharUnits> tinfo
-      = CGM.getContext().getTypeInfoInChars(thisType);
-    maxFieldAlign = std::max(maxFieldAlign, tinfo.second);
+    auto TInfo = CGM.getContext().getTypeInfoInChars(thisType);
+    maxFieldAlign = std::max(maxFieldAlign, TInfo.Align);
 
-    layout.push_back(BlockLayoutChunk(tinfo.second, tinfo.first,
+    layout.push_back(BlockLayoutChunk(TInfo.Align, TInfo.Width,
                                       Qualifiers::OCL_None,
                                       nullptr, llvmType, thisType));
   }
diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp
index b0b76ffbebdeed..7c5ab39a85ece8 100644
--- a/clang/lib/CodeGen/CGCUDANV.cpp
+++ b/clang/lib/CodeGen/CGCUDANV.cpp
@@ -354,14 +354,12 @@ void CGNVCUDARuntime::emitDeviceStubBodyLegacy(CodeGenFunction &CGF,
   llvm::BasicBlock *EndBlock = CGF.createBasicBlock("setup.end");
   CharUnits Offset = CharUnits::Zero();
   for (const VarDecl *A : Args) {
-    CharUnits TyWidth, TyAlign;
-    std::tie(TyWidth, TyAlign) =
-        CGM.getContext().getTypeInfoInChars(A->getType());
-    Offset = Offset.alignTo(TyAlign);
+    auto TInfo = CGM.getContext().getTypeInfoInChars(A->getType());
+    Offset = Offset.alignTo(TInfo.Align);
     llvm::Value *Args[] = {
         CGF.Builder.CreatePointerCast(CGF.GetAddrOfLocalVar(A).getPointer(),
                                       VoidPtrTy),
-        llvm::ConstantInt::get(SizeTy, TyWidth.getQuantity()),
+        llvm::ConstantInt::get(SizeTy, TInfo.Width.getQuantity()),
         llvm::ConstantInt::get(SizeTy, Offset.getQuantity()),
     };
     llvm::CallBase *CB = CGF.EmitRuntimeCallOrInvoke(cudaSetupArgFn, Args);
@@ -370,7 +368,7 @@ void CGNVCUDARuntime::emitDeviceStubBodyLegacy(CodeGenFunction &CGF,
     llvm::BasicBlock *NextBlock = CGF.createBasicBlock("setup.next");
     CGF.Builder.CreateCondBr(CBZero, NextBlock, EndBlock);
     CGF.EmitBlock(NextBlock);
-    Offset += TyWidth;
+    Offset += TInfo.Width;
   }
 
   // Emit the call to cudaLaunch
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index cb03e025e19e09..0d050dcfa01f8f 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -2285,8 +2285,8 @@ void CodeGenModule::ConstructAttributeList(
       auto PTy = ParamType->getPointeeType();
       if (!PTy->isIncompleteType() && PTy->isConstantSizeType()) {
         auto info = getContext().getTypeInfoInChars(PTy);
-        Attrs.addDereferenceableAttr(info.first.getQuantity());
-        Attrs.addAlignmentAttr(info.second.getAsAlign());
+        Attrs.addDereferenceableAttr(info.Width.getQuantity());
+        Attrs.addAlignmentAttr(info.Align.getAsAlign());
       }
       break;
     }
diff --git a/clang/lib/CodeGen/CGClass.cpp b/clang/lib/CodeGen/CGClass.cpp
index 4d143e3e1bdfc8..c41650ad2b3b6a 100644
--- a/clang/lib/CodeGen/CGClass.cpp
+++ b/clang/lib/CodeGen/CGClass.cpp
@@ -798,9 +798,8 @@ void CodeGenFunction::EmitAsanPrologueOrEpilogue(bool Prologue) {
   size_t NumFields = 0;
   for (const auto *Field : ClassDecl->fields()) {
     const FieldDecl *D = Field;
-    std::pair<CharUnits, CharUnits> FieldInfo =
-        Context.getTypeInfoInChars(D->getType());
-    CharUnits FieldSize = FieldInfo.first;
+    auto FieldInfo = Context.getTypeInfoInChars(D->getType());
+    CharUnits FieldSize = FieldInfo.Width;
     assert(NumFields < SSV.size());
     SSV[NumFields].Size = D->isBitField() ? 0 : FieldSize.getQuantity();
     NumFields++;
@@ -947,7 +946,7 @@ namespace {
           LastField->isBitField()
               ? LastField->getBitWidthValue(Ctx)
               : Ctx.toBits(
-                    Ctx.getTypeInfoDataSizeInChars(LastField->getType()).first);
+                    Ctx.getTypeInfoDataSizeInChars(LastField->getType()).Width);
       uint64_t MemcpySizeBits = LastFieldOffset + LastFieldSize -
                                 FirstByteOffset + Ctx.getCharWidth() - 1;
       CharUnits MemcpySize = Ctx.toCharUnitsFromBits(MemcpySizeBits);
diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp
index 20815769998181..43e23d986ae092 100644
--- a/clang/lib/CodeGen/CGExprAgg.cpp
+++ b/clang/lib/CodeGen/CGExprAgg.cpp
@@ -1976,28 +1976,28 @@ void CodeGenFunction::EmitAggregateCopy(LValue Dest, LValue Src, QualType Ty,
   // Get data size info for this aggregate. Don't copy the tail padding if this
   // might be a potentially-overlapping subobject, since the tail padding might
   // be occupied by a different object. Otherwise, copying it is fine.
-  std::pair<CharUnits, CharUnits> TypeInfo;
+  TypeInfoChars TypeInfo;
   if (MayOverlap)
     TypeInfo = getContext().getTypeInfoDataSizeInChars(Ty);
   else
     TypeInfo = getContext().getTypeInfoInChars(Ty);
 
   llvm::Value *SizeVal = nullptr;
-  if (TypeInfo.first.isZero()) {
+  if (TypeInfo.Width.isZero()) {
     // But note that getTypeInfo returns 0 for a VLA.
     if (auto *VAT = dyn_cast_or_null<VariableArrayType>(
             getContext().getAsArrayType(Ty))) {
       QualType BaseEltTy;
       SizeVal = emitArrayLength(VAT, BaseEltTy, DestPtr);
       TypeInfo = getContext().getTypeInfoInChars(BaseEltTy);
-      assert(!TypeInfo.first.isZero());
+      assert(!TypeInfo.Width.isZero());
       SizeVal = Builder.CreateNUWMul(
           SizeVal,
-          llvm::ConstantInt::get(SizeTy, TypeInfo.first.getQuantity()));
+          llvm::ConstantInt::get(SizeTy, TypeInfo.Width.getQuantity()));
     }
   }
   if (!SizeVal) {
-    SizeVal = llvm::ConstantInt::get(SizeTy, TypeInfo.first.getQuantity());
+    SizeVal = llvm::ConstantInt::get(SizeTy, TypeInfo.Width.getQuantity());
   }
 
   // FIXME: If we have a volatile struct, the optimizer can remove what might
diff --git a/clang/lib/CodeGen/CGObjC.cpp b/clang/lib/CodeGen/CGObjC.cpp
index f905e17e8ad2f7..a64712898b70ac 100644
--- a/clang/lib/CodeGen/CGObjC.cpp
+++ b/clang/lib/CodeGen/CGObjC.cpp
@@ -919,8 +919,9 @@ PropertyImplStrategy::PropertyImplStrategy(CodeGenModule &CGM,
   // Evaluate the ivar's size and alignment.
   ObjCIvarDecl *ivar = propImpl->getPropertyIvarDecl();
   QualType ivarType = ivar->getType();
-  std::tie(IvarSize, IvarAlignment) =
-      CGM.getContext().getTypeInfoInChars(ivarType);
+  auto TInfo = CGM.getContext().getTypeInfoInChars(ivarType);
+  IvarSize = TInfo.Width;
+  IvarAlignment = TInfo.Align;
 
   // If we have a copy property, we always have to use getProperty/setProperty.
   // TODO: we could actually use setProperty and an expression for non-atomics.
diff --git a/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp b/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp
index ce35880106c20f..880342dfed1ff5 100644
--- a/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp
+++ b/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp
@@ -742,7 +742,7 @@ void CGRecordLowering::clipTailPadding() {
         assert(Prior->FD->hasAttr<NoUniqueAddressAttr>() &&
                "should not have reused this field's tail padding");
         Prior->Data = getByteArrayType(
-            Context.getTypeInfoDataSizeInChars(Prior->FD->getType()).first);
+            Context.getTypeInfoDataSizeInChars(Prior->FD->getType()).Width);
       }
     }
     if (Member->Data)
diff --git a/clang/lib/CodeGen/CGValue.h b/clang/lib/CodeGen/CGValue.h
index 70e6fed3f4f61f..4b39a0520833fb 100644
--- a/clang/lib/CodeGen/CGValue.h
+++ b/clang/lib/CodeGen/CGValue.h
@@ -653,7 +653,7 @@ class AggValueSlot {
   /// is the type size unless that might overlap another object, in which
   /// case it's the dsize.
   CharUnits getPreferredSize(ASTContext &Ctx, QualType Type) const {
-    return mayOverlap() ? Ctx.getTypeInfoDataSizeInChars(Type).first
+    return mayOverlap() ? Ctx.getTypeInfoDataSizeInChars(Type).Width
                         : Ctx.getTypeSizeInChars(Type);
   }
 };
diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
index d7c279495aced7..31fbe393e8d283 100644
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -359,7 +359,7 @@ static Address emitVoidPtrDirectVAArg(CodeGenFunction &CGF,
 ///   leaving one or more empty slots behind as padding.
 static Address emitVoidPtrVAArg(CodeGenFunction &CGF, Address VAListAddr,
                                 QualType ValueTy, bool IsIndirect,
-                                std::pair<CharUnits, CharUnits> ValueInfo,
+                                TypeInfoChars ValueInfo,
                                 CharUnits SlotSizeAndAlign,
                                 bool AllowHigherAlign) {
   // The size and alignment of the value that was passed directly.
@@ -368,8 +368,8 @@ static Address emitVoidPtrVAArg(CodeGenFunction &CGF, Address VAListAddr,
     DirectSize = CGF.getPointerSize();
     DirectAlign = CGF.getPointerAlign();
   } else {
-    DirectSize = ValueInfo.first;
-    DirectAlign = ValueInfo.second;
+    DirectSize = ValueInfo.Width;
+    DirectAlign = ValueInfo.Align;
   }
 
   // Cast the address we've calculated to the right type.
@@ -383,7 +383,7 @@ static Address emitVoidPtrVAArg(CodeGenFunction &CGF, Address VAListAddr,
                                         AllowHigherAlign);
 
   if (IsIndirect) {
-    Addr = Address(CGF.Builder.CreateLoad(Addr), ValueInfo.second);
+    Addr = Address(CGF.Builder.CreateLoad(Addr), ValueInfo.Align);
   }
 
   return Addr;
@@ -656,7 +656,7 @@ Address EmitVAArgInstr(CodeGenFunction &CGF, Address VAListAddr, QualType Ty,
         "Unexpected IndirectRealign seen in arginfo in generic VAArg emitter!");
 
     auto TyInfo = CGF.getContext().getTypeInfoInChars(Ty);
-    CharUnits TyAlignForABI = TyInfo.second;
+    CharUnits TyAlignForABI = TyInfo.Align;
 
     llvm::Type *BaseTy =
         llvm::PointerType::getUnqual(CGF.ConvertTypeForMem(Ty));
@@ -2062,8 +2062,8 @@ Address X86_32ABIInfo::EmitVAArg(CodeGenFunction &CGF,
   //
   // Just messing with TypeInfo like this works because we never pass
   // anything indirectly.
-  TypeInfo.second = CharUnits::fromQuantity(
-                getTypeStackAlignInBytes(Ty, TypeInfo.second.getQuantity()));
+  TypeInfo.Align = CharUnits::fromQuantity(
+                getTypeStackAlignInBytes(Ty, TypeInfo.Align.getQuantity()));
 
   return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*Indirect*/ false,
                           TypeInfo, CharUnits::fromQuantity(4),
@@ -4067,10 +4067,9 @@ Address X86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
     RegAddr = CGF.Builder.CreateElementBitCast(RegAddr, LTy);
 
     // Copy to a temporary if necessary to ensure the appropriate alignment.
-    std::pair<CharUnits, CharUnits> SizeAlign =
-        getContext().getTypeInfoInChars(Ty);
-    uint64_t TySize = SizeAlign.first.getQuantity();
-    CharUnits TyAlign = SizeAlign.second;
+    auto TInfo = getContext().getTypeInfoInChars(Ty);
+    uint64_t TySize = TInfo.Width.getQuantity();
+    CharUnits TyAlign = TInfo.Align;
 
     // Copy into a temporary if the type is more aligned than the
     // register save area.
@@ -4573,7 +4572,7 @@ Address AIXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
     llvm::report_fatal_error("vector type is not supported on AIX yet");
 
   auto TypeInfo = getContext().getTypeInfoInChars(Ty);
-  TypeInfo.second = getParamTypeAlignment(Ty);
+  TypeInfo.Align = getParamTypeAlignment(Ty);
 
   CharUnits SlotSize = CharUnits::fromQuantity(PtrByteSize);
 
@@ -4692,7 +4691,7 @@ Address PPC32_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAList,
                                       QualType Ty) const {
   if (getTarget().getTriple().isOSDarwin()) {
     auto TI = getContext().getTypeInfoInChars(Ty);
-    TI.second = getParamTypeAlignment(Ty);
+    TI.Align = getParamTypeAlignment(Ty);
 
     CharUnits SlotSize = CharUnits::fromQuantity(4);
     return emitVoidPtrVAArg(CGF, VAList, Ty,
@@ -4802,7 +4801,7 @@ Address PPC32_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAList,
     CharUnits Size;
     if (!isIndirect) {
       auto TypeInfo = CGF.getContext().getTypeInfoInChars(Ty);
-      Size = TypeInfo.first.alignTo(OverflowAreaAlign);
+      Size = TypeInfo.Width.alignTo(OverflowAreaAlign);
     } else {
       Size = CGF.getPointerSize();
     }
@@ -5365,7 +5364,7 @@ PPC64_SVR4_ABIInfo::classifyReturnType(QualType RetTy) const {
 Address PPC64_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
                                       QualType Ty) const {
   auto TypeInfo = getContext().getTypeInfoInChars(Ty);
-  TypeInfo.second = getParamTypeAlignment(Ty);
+  TypeInfo.Align = getParamTypeAlignment(Ty);
 
   CharUnits SlotSize = CharUnits::fromQuantity(8);
 
@@ -5376,7 +5375,7 @@ Address PPC64_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
   // loads of the real and imaginary parts relative to the va_list pointer,
   // and store them to a temporary structure.
   if (const ComplexType *CTy = Ty->getAs<ComplexType>()) {
-    CharUnits EltSize = TypeInfo.first / 2;
+    CharUnits EltSize = TypeInfo.Width / 2;
     if (EltSize < SlotSize) {
       Address Addr = emitVoidPtrDirectVAArg(CGF, VAListAddr, CGF.Int8Ty,
                                             SlotSize * 2, SlotSize,
@@ -6008,13 +6007,13 @@ Address AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr,
     llvm::Type *BaseTy = CGF.ConvertType(QualType(Base, 0));
     llvm::Type *HFATy = llvm::ArrayType::get(BaseTy, NumMembers);
     Address Tmp = CGF.CreateTempAlloca(HFATy,
-                                       std::max(TyAlign, BaseTyInfo.second));
+                                       std::max(TyAlign, BaseTyInfo.Align));
 
     // On big-endian platforms, the value will be right-aligned in its slot.
     int Offset = 0;
     if (CGF.CGM.getDataLayout().isBigEndian() &&
-        BaseTyInfo.first.getQuantity() < 16)
-      Offset = 16 - BaseTyInfo.first.getQuantity();
+        BaseTyInfo.Width.getQuantity() < 16)
+      Offset = 16 - BaseTyInfo.Width.getQuantity();
 
     for (unsigned i = 0; i < NumMembers; ++i) {
       CharUnits BaseOffset = CharUnits::fromQuantity(16 * i + Offset);
@@ -6138,7 +6137,7 @@ Address AArch64ABIInfo::EmitDarwinVAArg(Address VAListAddr, QualType Ty,
   // Arguments bigger than 16 bytes which aren't homogeneous
   // aggregates should be passed indirectly.
   bool IsIndirect = false;
-  if (TyInfo.first.getQuantity() > 16) {
+  if (TyInfo.Width.getQuantity() > 16) {
     const Type *Base = nullptr;
     uint64_t Members = 0;
     IsIndirect = !isHomogeneousAggregate(Ty, Base, Members);
@@ -6900,7 +6899,7 @@ Address ARMABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
     TyAlignForABI = CharUnits::fromQuantity(4);
   }
 
-  std::pair<CharUnits, CharUnits> TyInfo = { TySize, TyAlignForABI };
+  TypeInfoChars TyInfo(TySize, TyAlignForABI, false);
   return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, TyInfo,
                           SlotSize, /*AllowHigherAlign*/ true);
 }
@@ -7374,8 +7373,8 @@ Address SystemZABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
       ArgTy = AI.getCoerceToType();
     InFPRs = (!IsSoftFloatABI && (ArgTy->isFloatTy() || ArgTy->isDoubleTy()));
     IsVector = ArgTy->isVectorTy();
-    UnpaddedSize = TyInfo.first;
-    DirectAlign = TyInfo.second;
+    UnpaddedSize = TyInfo.Width;
+    DirectAlign = TyInfo.Align;
   }
   CharUnits PaddedSize = CharUnits::fromQuantity(8);
   if (IsVector && UnpaddedSize > PaddedSize)
@@ -7396,7 +7395,7 @@ Address SystemZABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
         CGF.Builder.CreateStructGEP(VAListAddr, 2, "overflow_arg_area_ptr");
     Address OverflowArgArea =
       Address(CGF.Builder.CreateLoad(OverflowArgAreaPtr, "overflow_arg_area"),
-              TyInfo.second);
+              TyInfo.Align);
     Address MemAddr =
       CGF.Builder.CreateElementBitCast(OverflowArgArea, DirectTy, "mem_addr");
 
@@ -7493,7 +7492,7 @@ Address SystemZABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
 
   if (IsIndirect)
     ResAddr = Address(CGF.Builder.CreateLoad(ResAddr, "indirect_arg"),
-                      TyInfo.second);
+                      TyInfo.Align);
 
   return ResAddr;
 }
@@ -7990,8 +7989,8 @@ Address MipsABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
 
   // The alignment of things in the argument area is never larger than
   // StackAlignInBytes.
-  TyInfo.second =
-    std::min(TyInfo.second, CharUnits::fromQuantity(StackAlignInBytes));
+  TyInfo.Align =
+    std::min(TyInfo.Align, CharUnits::fromQuantity(StackAlignInBytes));
 
   // MinABIStackAlignInBytes is the size of argument slots on the stack.
   CharUnits ArgSlotSize = CharUnits::fromQuantity(MinABIStackAlignInBytes);
@@ -9454,7 +9453,7 @@ Address SparcV9ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
 
   case ABIArgInfo::Extend: {
     Stride = SlotSize;
-    CharUnits Offset = SlotSize - TypeInfo.first;
+    CharUnits Offset = SlotSize - TypeInfo.Width;
     ArgAddr = Builder.CreateConstInBoundsByteGEP(Addr, Offset, "extend");
     break;
   }
@@ -9471,11 +9470,11 @@ Address SparcV9ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
     Stride = SlotSize;
     ArgAddr = Builder.CreateElementBitCast(Addr, ArgPtrTy, "indirect");
     ArgAddr = Address(Builder.CreateLoad(ArgAddr, "indirect.arg"),
-                      TypeInfo.second);
+                      TypeInfo.Align);
     break;
 
   case ABIArgInfo::Ignore:
-    return Address(llvm::UndefValue::get(ArgPtrTy), TypeInfo.second);
+    return Address(llvm::UndefValue::get(ArgPtrTy), TypeInfo.Align);
   }
 
   // Update VAList.
@@ -10771,13 +10770,12 @@ Address RISCVABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
     return Addr;
   }
 
-  std::pair<CharUnits, CharUnits> SizeAndAlign =
-      getContext().getTypeInfoInChars(Ty);
+  auto TInfo = getContext().getTypeInfoInChars(Ty);
 
   // Arguments bigger than 2*Xlen bytes are passed indirectly.
-  bool IsIndirect = SizeAndAlign.first > 2 * SlotSize;
+  bool IsIndirect = TInfo.Width > 2 * SlotSize;
 
-  return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, SizeAndAlign,
+  return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, TInfo,
                           SlotSize, /*AllowHigherAlign=*/true);
 }
 
diff --git a/clang/lib/StaticAnalyzer/Checkers/PaddingChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/PaddingChecker.cpp
index 0b00664c7c104d..96f0d9bb3c3ded 100644
--- a/clang/lib/StaticAnalyzer/Checkers/PaddingChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/PaddingChecker.cpp
@@ -248,8 +248,9 @@ class PaddingChecker : public Checker<check::ASTDecl<TranslationUnitDecl>> {
       FieldInfo RetVal;
       RetVal.Field = FD;
       auto &Ctx = FD->getASTContext();
-      std::tie(RetVal.Size, RetVal.Align) =
-          Ctx.getTypeInfoInChars(FD->getType());
+      auto Info = Ctx.getTypeInfoInChars(FD->getType());
+      RetVal.Size = Info.Width;
+      RetVal.Align = Info.Align;
       assert(llvm::isPowerOf2_64(RetVal.Align.getQuantity()));
       if (auto Max = FD->getMaxAlignment())
         RetVal.Align = std::max(Ctx.toCharUnitsFromBits(Max), RetVal.Align);

From 002968a320463237f0ccea754a7482b229e34cf1 Mon Sep 17 00:00:00 2001
From: Sylvestre Ledru <sylvestre@debian.org>
Date: Mon, 12 Oct 2020 23:32:40 +0200
Subject: [PATCH 090/123] [clang-tidy] Add an example for
 misc-unused-alias-decls

Differential Revision: https://reviews.llvm.org/D89270
---
 .../docs/clang-tidy/checks/misc-unused-alias-decls.rst     | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/clang-tools-extra/docs/clang-tidy/checks/misc-unused-alias-decls.rst b/clang-tools-extra/docs/clang-tidy/checks/misc-unused-alias-decls.rst
index d0e8c7188a3fcb..1b0a0cde059b94 100644
--- a/clang-tools-extra/docs/clang-tidy/checks/misc-unused-alias-decls.rst
+++ b/clang-tools-extra/docs/clang-tidy/checks/misc-unused-alias-decls.rst
@@ -5,3 +5,10 @@ misc-unused-alias-decls
 
 
 Finds unused namespace alias declarations.
+
+.. code-block:: c++
+
+  namespace my_namespace {
+  class C {};
+  }
+  namespace unused_alias = ::my_namespace;

From 2322080bc3889145b7fb3c8798016467fe5e3b10 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Wed, 5 Aug 2020 15:21:52 +0100
Subject: [PATCH 091/123] [ConstraintElimination] Add add/sub/and/or test
 cases.

This adds a new set of tests for upcoming constraint elimination
changes.
---
 .../ConstraintElimination/add-nuw.ll          | 242 ++++++++++++++++++
 .../Transforms/ConstraintElimination/add.ll   | 242 ++++++++++++++++++
 .../Transforms/ConstraintElimination/and.ll   |  70 +++++
 .../Transforms/ConstraintElimination/or.ll    |  64 +++++
 .../ConstraintElimination/sub-nuw.ll          | 242 ++++++++++++++++++
 .../Transforms/ConstraintElimination/sub.ll   | 242 ++++++++++++++++++
 6 files changed, 1102 insertions(+)
 create mode 100644 llvm/test/Transforms/ConstraintElimination/add-nuw.ll
 create mode 100644 llvm/test/Transforms/ConstraintElimination/add.ll
 create mode 100644 llvm/test/Transforms/ConstraintElimination/and.ll
 create mode 100644 llvm/test/Transforms/ConstraintElimination/or.ll
 create mode 100644 llvm/test/Transforms/ConstraintElimination/sub-nuw.ll
 create mode 100644 llvm/test/Transforms/ConstraintElimination/sub.ll

diff --git a/llvm/test/Transforms/ConstraintElimination/add-nuw.ll b/llvm/test/Transforms/ConstraintElimination/add-nuw.ll
new file mode 100644
index 00000000000000..45dcd12ddec3c7
--- /dev/null
+++ b/llvm/test/Transforms/ConstraintElimination/add-nuw.ll
@@ -0,0 +1,242 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -constraint-elimination -S %s | FileCheck %s
+
+define void @test.not.uge.ult(i8 %start, i8 %low, i8 %high) {
+; CHECK-LABEL: @test.not.uge.ult(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ADD_PTR_I:%.*]] = add nuw i8 [[START:%.*]], 3
+; CHECK-NEXT:    [[C_1:%.*]] = icmp uge i8 [[ADD_PTR_I]], [[HIGH:%.*]]
+; CHECK-NEXT:    br i1 [[C_1]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    ret void
+; CHECK:       if.end:
+; CHECK-NEXT:    [[T_0:%.*]] = icmp ult i8 [[START]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[T_0]])
+; CHECK-NEXT:    [[START_1:%.*]] = add nuw i8 [[START]], 1
+; CHECK-NEXT:    [[T_1:%.*]] = icmp ult i8 [[START_1]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[T_1]])
+; CHECK-NEXT:    [[START_2:%.*]] = add nuw i8 [[START]], 2
+; CHECK-NEXT:    [[T_2:%.*]] = icmp ult i8 [[START_2]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[T_2]])
+; CHECK-NEXT:    [[START_3:%.*]] = add nuw i8 [[START]], 3
+; CHECK-NEXT:    [[T_3:%.*]] = icmp ult i8 [[START_3]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[T_3]])
+; CHECK-NEXT:    [[START_4:%.*]] = add nuw i8 [[START]], 4
+; CHECK-NEXT:    [[C_4:%.*]] = icmp ult i8 [[START_4]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[C_4]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %add.ptr.i = add nuw i8 %start, 3
+  %c.1 = icmp uge i8 %add.ptr.i, %high
+  br i1 %c.1, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  ret void
+
+if.end:                                           ; preds = %entry
+  %t.0 = icmp ult i8 %start, %high
+  call void @use(i1 %t.0)
+  %start.1 = add nuw i8 %start, 1
+  %t.1 = icmp ult i8 %start.1, %high
+  call void @use(i1 %t.1)
+  %start.2 = add nuw i8 %start, 2
+  %t.2 = icmp ult i8 %start.2, %high
+  call void @use(i1 %t.2)
+  %start.3 = add nuw i8 %start, 3
+  %t.3 = icmp ult i8 %start.3, %high
+  call void @use(i1 %t.3)
+  %start.4 = add nuw i8 %start, 4
+  %c.4 = icmp ult i8 %start.4, %high
+  call void @use(i1 %c.4)
+  ret void
+}
+
+define void @test.not.uge.ule(i8 %start, i8 %low, i8 %high) {
+; CHECK-LABEL: @test.not.uge.ule(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ADD_PTR_I:%.*]] = add nuw i8 [[START:%.*]], 3
+; CHECK-NEXT:    [[C_1:%.*]] = icmp uge i8 [[ADD_PTR_I]], [[HIGH:%.*]]
+; CHECK-NEXT:    br i1 [[C_1]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    ret void
+; CHECK:       if.end:
+; CHECK-NEXT:    [[T_0:%.*]] = icmp ule i8 [[START]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[T_0]])
+; CHECK-NEXT:    [[START_1:%.*]] = add nuw i8 [[START]], 1
+; CHECK-NEXT:    [[T_1:%.*]] = icmp ule i8 [[START_1]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[T_1]])
+; CHECK-NEXT:    [[START_2:%.*]] = add nuw i8 [[START]], 2
+; CHECK-NEXT:    [[T_2:%.*]] = icmp ule i8 [[START_2]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[T_2]])
+; CHECK-NEXT:    [[START_3:%.*]] = add nuw i8 [[START]], 3
+; CHECK-NEXT:    [[T_3:%.*]] = icmp ule i8 [[START_3]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[T_3]])
+; CHECK-NEXT:    [[START_4:%.*]] = add nuw i8 [[START]], 4
+; CHECK-NEXT:    [[T_4:%.*]] = icmp ule i8 [[START_4]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[T_4]])
+; CHECK-NEXT:    [[START_5:%.*]] = add nuw i8 [[START]], 5
+; CHECK-NEXT:    [[C_5:%.*]] = icmp ule i8 [[START_5]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[C_5]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %add.ptr.i = add nuw i8 %start, 3
+  %c.1 = icmp uge i8 %add.ptr.i, %high
+  br i1 %c.1, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  ret void
+
+if.end:                                           ; preds = %entry
+  %t.0 = icmp ule i8 %start, %high
+  call void @use(i1 %t.0)
+  %start.1 = add nuw i8 %start, 1
+  %t.1 = icmp ule i8 %start.1, %high
+  call void @use(i1 %t.1)
+  %start.2 = add nuw i8 %start, 2
+  %t.2 = icmp ule i8 %start.2, %high
+  call void @use(i1 %t.2)
+  %start.3 = add nuw i8 %start, 3
+  %t.3 = icmp ule i8 %start.3, %high
+  call void @use(i1 %t.3)
+  %start.4 = add nuw i8 %start, 4
+  %t.4 = icmp ule i8 %start.4, %high
+  call void @use(i1 %t.4)
+
+  %start.5 = add nuw i8 %start, 5
+  %c.5 = icmp ule i8 %start.5, %high
+  call void @use(i1 %c.5)
+
+  ret void
+}
+
+define void @test.not.uge.ugt(i8 %start, i8 %low, i8 %high) {
+; CHECK-LABEL: @test.not.uge.ugt(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ADD_PTR_I:%.*]] = add nuw i8 [[START:%.*]], 3
+; CHECK-NEXT:    [[C_1:%.*]] = icmp uge i8 [[ADD_PTR_I]], [[HIGH:%.*]]
+; CHECK-NEXT:    br i1 [[C_1]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    ret void
+; CHECK:       if.end:
+; CHECK-NEXT:    [[F_0:%.*]] = icmp ugt i8 [[START]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[F_0]])
+; CHECK-NEXT:    [[START_1:%.*]] = add nuw i8 [[START]], 1
+; CHECK-NEXT:    [[F_1:%.*]] = icmp ugt i8 [[START_1]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[F_1]])
+; CHECK-NEXT:    [[START_2:%.*]] = add nuw i8 [[START]], 2
+; CHECK-NEXT:    [[F_2:%.*]] = icmp ugt i8 [[START_2]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[F_2]])
+; CHECK-NEXT:    [[START_3:%.*]] = add nuw i8 [[START]], 3
+; CHECK-NEXT:    [[F_3:%.*]] = icmp ugt i8 [[START_3]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[F_3]])
+; CHECK-NEXT:    [[START_4:%.*]] = add nuw i8 [[START]], 4
+; CHECK-NEXT:    [[F_4:%.*]] = icmp ugt i8 [[START_4]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[F_4]])
+; CHECK-NEXT:    [[START_5:%.*]] = add nuw i8 [[START]], 5
+; CHECK-NEXT:    [[C_5:%.*]] = icmp ugt i8 [[START_5]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[C_5]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %add.ptr.i = add nuw i8 %start, 3
+  %c.1 = icmp uge i8 %add.ptr.i, %high
+  br i1 %c.1, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  ret void
+
+if.end:                                           ; preds = %entry
+  %f.0 = icmp ugt i8 %start, %high
+  call void @use(i1 %f.0)
+
+  %start.1 = add nuw i8 %start, 1
+  %f.1 = icmp ugt i8 %start.1, %high
+  call void @use(i1 %f.1)
+
+  %start.2 = add nuw i8 %start, 2
+  %f.2 = icmp ugt i8 %start.2, %high
+  call void @use(i1 %f.2)
+
+  %start.3 = add nuw i8 %start, 3
+  %f.3 = icmp ugt i8 %start.3, %high
+  call void @use(i1 %f.3)
+
+  %start.4 = add nuw i8 %start, 4
+  %f.4 = icmp ugt i8 %start.4, %high
+  call void @use(i1 %f.4)
+
+  %start.5 = add nuw i8 %start, 5
+  %c.5 = icmp ugt i8 %start.5, %high
+  call void @use(i1 %c.5)
+
+  ret void
+}
+
+define void @test.not.uge.uge(i8 %start, i8 %low, i8 %high) {
+; CHECK-LABEL: @test.not.uge.uge(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ADD_PTR_I:%.*]] = add nuw i8 [[START:%.*]], 3
+; CHECK-NEXT:    [[C_1:%.*]] = icmp uge i8 [[ADD_PTR_I]], [[HIGH:%.*]]
+; CHECK-NEXT:    br i1 [[C_1]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    ret void
+; CHECK:       if.end:
+; CHECK-NEXT:    [[F_0:%.*]] = icmp ugt i8 [[START]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[F_0]])
+; CHECK-NEXT:    [[START_1:%.*]] = add nuw i8 [[START]], 1
+; CHECK-NEXT:    [[F_1:%.*]] = icmp uge i8 [[START_1]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[F_1]])
+; CHECK-NEXT:    [[START_2:%.*]] = add nuw i8 [[START]], 2
+; CHECK-NEXT:    [[F_2:%.*]] = icmp uge i8 [[START_2]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[F_2]])
+; CHECK-NEXT:    [[START_3:%.*]] = add nuw i8 [[START]], 3
+; CHECK-NEXT:    [[F_3:%.*]] = icmp uge i8 [[START_3]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[F_3]])
+; CHECK-NEXT:    [[START_4:%.*]] = add nuw i8 [[START]], 4
+; CHECK-NEXT:    [[C_4:%.*]] = icmp uge i8 [[START_4]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[C_4]])
+; CHECK-NEXT:    [[START_5:%.*]] = add nuw i8 [[START]], 5
+; CHECK-NEXT:    [[C_5:%.*]] = icmp uge i8 [[START_5]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[C_5]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %add.ptr.i = add nuw i8 %start, 3
+  %c.1 = icmp uge i8 %add.ptr.i, %high
+  br i1 %c.1, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  ret void
+
+if.end:                                           ; preds = %entry
+  %f.0 = icmp ugt i8 %start, %high
+  call void @use(i1 %f.0)
+
+  %start.1 = add nuw i8 %start, 1
+  %f.1 = icmp uge i8 %start.1, %high
+  call void @use(i1 %f.1)
+
+  %start.2 = add nuw i8 %start, 2
+  %f.2 = icmp uge i8 %start.2, %high
+  call void @use(i1 %f.2)
+
+  %start.3 = add nuw i8 %start, 3
+  %f.3 = icmp uge i8 %start.3, %high
+  call void @use(i1 %f.3)
+
+  %start.4 = add nuw i8 %start, 4
+  %c.4 = icmp uge i8 %start.4, %high
+  call void @use(i1 %c.4)
+
+  %start.5 = add nuw i8 %start, 5
+  %c.5 = icmp uge i8 %start.5, %high
+  call void @use(i1 %c.5)
+
+  ret void
+}
+
+
+declare void @use(i1)
+declare void @llvm.trap()
diff --git a/llvm/test/Transforms/ConstraintElimination/add.ll b/llvm/test/Transforms/ConstraintElimination/add.ll
new file mode 100644
index 00000000000000..ac8272203a6a9d
--- /dev/null
+++ b/llvm/test/Transforms/ConstraintElimination/add.ll
@@ -0,0 +1,242 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -constraint-elimination -S %s | FileCheck %s
+
+define void @test.not.uge.ult(i8 %start, i8 %low, i8 %high) {
+; CHECK-LABEL: @test.not.uge.ult(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ADD_PTR_I:%.*]] = add i8 [[START:%.*]], 3
+; CHECK-NEXT:    [[C_1:%.*]] = icmp uge i8 [[ADD_PTR_I]], [[HIGH:%.*]]
+; CHECK-NEXT:    br i1 [[C_1]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    ret void
+; CHECK:       if.end:
+; CHECK-NEXT:    [[T_0:%.*]] = icmp ult i8 [[START]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[T_0]])
+; CHECK-NEXT:    [[START_1:%.*]] = add i8 [[START]], 1
+; CHECK-NEXT:    [[T_1:%.*]] = icmp ult i8 [[START_1]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[T_1]])
+; CHECK-NEXT:    [[START_2:%.*]] = add i8 [[START]], 2
+; CHECK-NEXT:    [[T_2:%.*]] = icmp ult i8 [[START_2]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[T_2]])
+; CHECK-NEXT:    [[START_3:%.*]] = add i8 [[START]], 3
+; CHECK-NEXT:    [[T_3:%.*]] = icmp ult i8 [[START_3]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[T_3]])
+; CHECK-NEXT:    [[START_4:%.*]] = add i8 [[START]], 4
+; CHECK-NEXT:    [[C_4:%.*]] = icmp ult i8 [[START_4]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[C_4]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %add.ptr.i = add i8 %start, 3
+  %c.1 = icmp uge i8 %add.ptr.i, %high
+  br i1 %c.1, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  ret void
+
+if.end:                                           ; preds = %entry
+  %t.0 = icmp ult i8 %start, %high
+  call void @use(i1 %t.0)
+  %start.1 = add i8 %start, 1
+  %t.1 = icmp ult i8 %start.1, %high
+  call void @use(i1 %t.1)
+  %start.2 = add i8 %start, 2
+  %t.2 = icmp ult i8 %start.2, %high
+  call void @use(i1 %t.2)
+  %start.3 = add i8 %start, 3
+  %t.3 = icmp ult i8 %start.3, %high
+  call void @use(i1 %t.3)
+  %start.4 = add i8 %start, 4
+  %c.4 = icmp ult i8 %start.4, %high
+  call void @use(i1 %c.4)
+  ret void
+}
+
+define void @test.not.uge.ule(i8 %start, i8 %low, i8 %high) {
+; CHECK-LABEL: @test.not.uge.ule(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ADD_PTR_I:%.*]] = add i8 [[START:%.*]], 3
+; CHECK-NEXT:    [[C_1:%.*]] = icmp uge i8 [[ADD_PTR_I]], [[HIGH:%.*]]
+; CHECK-NEXT:    br i1 [[C_1]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    ret void
+; CHECK:       if.end:
+; CHECK-NEXT:    [[T_0:%.*]] = icmp ule i8 [[START]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[T_0]])
+; CHECK-NEXT:    [[START_1:%.*]] = add i8 [[START]], 1
+; CHECK-NEXT:    [[T_1:%.*]] = icmp ule i8 [[START_1]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[T_1]])
+; CHECK-NEXT:    [[START_2:%.*]] = add i8 [[START]], 2
+; CHECK-NEXT:    [[T_2:%.*]] = icmp ule i8 [[START_2]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[T_2]])
+; CHECK-NEXT:    [[START_3:%.*]] = add i8 [[START]], 3
+; CHECK-NEXT:    [[T_3:%.*]] = icmp ule i8 [[START_3]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[T_3]])
+; CHECK-NEXT:    [[START_4:%.*]] = add i8 [[START]], 4
+; CHECK-NEXT:    [[T_4:%.*]] = icmp ule i8 [[START_4]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[T_4]])
+; CHECK-NEXT:    [[START_5:%.*]] = add i8 [[START]], 5
+; CHECK-NEXT:    [[C_5:%.*]] = icmp ule i8 [[START_5]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[C_5]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %add.ptr.i = add i8 %start, 3
+  %c.1 = icmp uge i8 %add.ptr.i, %high
+  br i1 %c.1, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  ret void
+
+if.end:                                           ; preds = %entry
+  %t.0 = icmp ule i8 %start, %high
+  call void @use(i1 %t.0)
+  %start.1 = add i8 %start, 1
+  %t.1 = icmp ule i8 %start.1, %high
+  call void @use(i1 %t.1)
+  %start.2 = add i8 %start, 2
+  %t.2 = icmp ule i8 %start.2, %high
+  call void @use(i1 %t.2)
+  %start.3 = add i8 %start, 3
+  %t.3 = icmp ule i8 %start.3, %high
+  call void @use(i1 %t.3)
+  %start.4 = add i8 %start, 4
+  %t.4 = icmp ule i8 %start.4, %high
+  call void @use(i1 %t.4)
+
+  %start.5 = add i8 %start, 5
+  %c.5 = icmp ule i8 %start.5, %high
+  call void @use(i1 %c.5)
+
+  ret void
+}
+
+define void @test.not.uge.ugt(i8 %start, i8 %low, i8 %high) {
+; CHECK-LABEL: @test.not.uge.ugt(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ADD_PTR_I:%.*]] = add i8 [[START:%.*]], 3
+; CHECK-NEXT:    [[C_1:%.*]] = icmp uge i8 [[ADD_PTR_I]], [[HIGH:%.*]]
+; CHECK-NEXT:    br i1 [[C_1]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    ret void
+; CHECK:       if.end:
+; CHECK-NEXT:    [[F_0:%.*]] = icmp ugt i8 [[START]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[F_0]])
+; CHECK-NEXT:    [[START_1:%.*]] = add i8 [[START]], 1
+; CHECK-NEXT:    [[F_1:%.*]] = icmp ugt i8 [[START_1]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[F_1]])
+; CHECK-NEXT:    [[START_2:%.*]] = add i8 [[START]], 2
+; CHECK-NEXT:    [[F_2:%.*]] = icmp ugt i8 [[START_2]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[F_2]])
+; CHECK-NEXT:    [[START_3:%.*]] = add i8 [[START]], 3
+; CHECK-NEXT:    [[F_3:%.*]] = icmp ugt i8 [[START_3]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[F_3]])
+; CHECK-NEXT:    [[START_4:%.*]] = add i8 [[START]], 4
+; CHECK-NEXT:    [[F_4:%.*]] = icmp ugt i8 [[START_4]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[F_4]])
+; CHECK-NEXT:    [[START_5:%.*]] = add i8 [[START]], 5
+; CHECK-NEXT:    [[C_5:%.*]] = icmp ugt i8 [[START_5]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[C_5]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %add.ptr.i = add i8 %start, 3
+  %c.1 = icmp uge i8 %add.ptr.i, %high
+  br i1 %c.1, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  ret void
+
+if.end:                                           ; preds = %entry
+  %f.0 = icmp ugt i8 %start, %high
+  call void @use(i1 %f.0)
+
+  %start.1 = add i8 %start, 1
+  %f.1 = icmp ugt i8 %start.1, %high
+  call void @use(i1 %f.1)
+
+  %start.2 = add i8 %start, 2
+  %f.2 = icmp ugt i8 %start.2, %high
+  call void @use(i1 %f.2)
+
+  %start.3 = add i8 %start, 3
+  %f.3 = icmp ugt i8 %start.3, %high
+  call void @use(i1 %f.3)
+
+  %start.4 = add i8 %start, 4
+  %f.4 = icmp ugt i8 %start.4, %high
+  call void @use(i1 %f.4)
+
+  %start.5 = add i8 %start, 5
+  %c.5 = icmp ugt i8 %start.5, %high
+  call void @use(i1 %c.5)
+
+  ret void
+}
+
+define void @test.not.uge.uge(i8 %start, i8 %low, i8 %high) {
+; CHECK-LABEL: @test.not.uge.uge(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ADD_PTR_I:%.*]] = add i8 [[START:%.*]], 3
+; CHECK-NEXT:    [[C_1:%.*]] = icmp uge i8 [[ADD_PTR_I]], [[HIGH:%.*]]
+; CHECK-NEXT:    br i1 [[C_1]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    ret void
+; CHECK:       if.end:
+; CHECK-NEXT:    [[F_0:%.*]] = icmp ugt i8 [[START]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[F_0]])
+; CHECK-NEXT:    [[START_1:%.*]] = add i8 [[START]], 1
+; CHECK-NEXT:    [[F_1:%.*]] = icmp uge i8 [[START_1]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[F_1]])
+; CHECK-NEXT:    [[START_2:%.*]] = add i8 [[START]], 2
+; CHECK-NEXT:    [[F_2:%.*]] = icmp uge i8 [[START_2]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[F_2]])
+; CHECK-NEXT:    [[START_3:%.*]] = add i8 [[START]], 3
+; CHECK-NEXT:    [[F_3:%.*]] = icmp uge i8 [[START_3]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[F_3]])
+; CHECK-NEXT:    [[START_4:%.*]] = add i8 [[START]], 4
+; CHECK-NEXT:    [[C_4:%.*]] = icmp uge i8 [[START_4]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[C_4]])
+; CHECK-NEXT:    [[START_5:%.*]] = add i8 [[START]], 5
+; CHECK-NEXT:    [[C_5:%.*]] = icmp uge i8 [[START_5]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[C_5]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %add.ptr.i = add i8 %start, 3
+  %c.1 = icmp uge i8 %add.ptr.i, %high
+  br i1 %c.1, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  ret void
+
+if.end:                                           ; preds = %entry
+  %f.0 = icmp ugt i8 %start, %high
+  call void @use(i1 %f.0)
+
+  %start.1 = add i8 %start, 1
+  %f.1 = icmp uge i8 %start.1, %high
+  call void @use(i1 %f.1)
+
+  %start.2 = add i8 %start, 2
+  %f.2 = icmp uge i8 %start.2, %high
+  call void @use(i1 %f.2)
+
+  %start.3 = add i8 %start, 3
+  %f.3 = icmp uge i8 %start.3, %high
+  call void @use(i1 %f.3)
+
+  %start.4 = add i8 %start, 4
+  %c.4 = icmp uge i8 %start.4, %high
+  call void @use(i1 %c.4)
+
+  %start.5 = add i8 %start, 5
+  %c.5 = icmp uge i8 %start.5, %high
+  call void @use(i1 %c.5)
+
+  ret void
+}
+
+
+declare void @use(i1)
+declare void @llvm.trap()
diff --git a/llvm/test/Transforms/ConstraintElimination/and.ll b/llvm/test/Transforms/ConstraintElimination/and.ll
new file mode 100644
index 00000000000000..2e7f61659dac1a
--- /dev/null
+++ b/llvm/test/Transforms/ConstraintElimination/and.ll
@@ -0,0 +1,70 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -constraint-elimination -S %s | FileCheck %s
+
+declare void @use(i1)
+
+define i32 @test_and_ule(i32 %x, i32 %y, i32 %z, i32 %a) {
+; CHECK-LABEL: @test_and_ule(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C_1:%.*]] = icmp ule i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[C_2:%.*]] = icmp ule i32 [[Y]], [[Z:%.*]]
+; CHECK-NEXT:    [[AND:%.*]] = and i1 [[C_1]], [[C_2]]
+; CHECK-NEXT:    br i1 [[AND]], label [[BB1:%.*]], label [[EXIT:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    [[T_1:%.*]] = icmp ule i32 [[X]], [[Z]]
+; CHECK-NEXT:    call void @use(i1 [[T_1]])
+; CHECK-NEXT:    [[T_2:%.*]] = icmp ule i32 [[X]], [[Y]]
+; CHECK-NEXT:    call void @use(i1 [[T_2]])
+; CHECK-NEXT:    [[T_3:%.*]] = icmp ule i32 [[Y]], [[Z]]
+; CHECK-NEXT:    call void @use(i1 [[T_3]])
+; CHECK-NEXT:    [[C_3:%.*]] = icmp ule i32 [[X]], [[A:%.*]]
+; CHECK-NEXT:    call void @use(i1 [[C_3]])
+; CHECK-NEXT:    ret i32 10
+; CHECK:       exit:
+; CHECK-NEXT:    [[C_4:%.*]] = icmp ule i32 [[X]], [[Z]]
+; CHECK-NEXT:    call void @use(i1 [[C_4]])
+; CHECK-NEXT:    [[C_5:%.*]] = icmp ule i32 [[X]], [[A]]
+; CHECK-NEXT:    call void @use(i1 [[C_5]])
+; CHECK-NEXT:    [[C_6:%.*]] = icmp ule i32 [[X]], [[Y]]
+; CHECK-NEXT:    call void @use(i1 [[C_6]])
+; CHECK-NEXT:    [[C_7:%.*]] = icmp ule i32 [[Y]], [[Z]]
+; CHECK-NEXT:    call void @use(i1 [[C_7]])
+; CHECK-NEXT:    ret i32 20
+;
+entry:
+  %c.1 = icmp ule i32 %x, %y
+  %c.2 = icmp ule i32 %y, %z
+  %and = and i1 %c.1, %c.2
+  br i1 %and, label %bb1, label %exit
+
+bb1:
+  %t.1 = icmp ule i32 %x, %z
+  call void @use(i1 %t.1)
+
+  %t.2 = icmp ule i32 %x, %y
+  call void @use(i1 %t.2)
+
+  %t.3 = icmp ule i32 %y, %z
+  call void @use(i1 %t.3)
+
+
+  %c.3 = icmp ule i32 %x, %a
+  call void @use(i1 %c.3)
+
+  ret i32 10
+
+exit:
+  %c.4 = icmp ule i32 %x, %z
+  call void @use(i1 %c.4)
+
+  %c.5 = icmp ule i32 %x, %a
+  call void @use(i1 %c.5)
+
+  %c.6 = icmp ule i32 %x, %y
+  call void @use(i1 %c.6)
+
+  %c.7 = icmp ule i32 %y, %z
+  call void @use(i1 %c.7)
+
+  ret i32 20
+}
diff --git a/llvm/test/Transforms/ConstraintElimination/or.ll b/llvm/test/Transforms/ConstraintElimination/or.ll
new file mode 100644
index 00000000000000..400fedd6ea212f
--- /dev/null
+++ b/llvm/test/Transforms/ConstraintElimination/or.ll
@@ -0,0 +1,64 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -constraint-elimination -S %s | FileCheck %s
+
+declare void @use(i1)
+
+define i32 @test_or_ule(i32 %x, i32 %y, i32 %z, i32 %a) {
+; CHECK-LABEL: @test_or_ule(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C_1:%.*]] = icmp ule i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[C_2:%.*]] = icmp ule i32 [[Y]], [[Z:%.*]]
+; CHECK-NEXT:    [[OR:%.*]] = or i1 [[C_1]], [[C_2]]
+; CHECK-NEXT:    br i1 [[OR]], label [[BB1:%.*]], label [[EXIT:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    [[C_3:%.*]] = icmp ule i32 [[X]], [[Z]]
+; CHECK-NEXT:    call void @use(i1 [[C_3]])
+; CHECK-NEXT:    [[C_4:%.*]] = icmp ule i32 [[X]], [[A:%.*]]
+; CHECK-NEXT:    call void @use(i1 [[C_4]])
+; CHECK-NEXT:    ret i32 10
+; CHECK:       exit:
+; CHECK-NEXT:    [[F_1:%.*]] = icmp ule i32 [[X]], [[Z]]
+; CHECK-NEXT:    call void @use(i1 [[F_1]])
+; CHECK-NEXT:    [[C_5:%.*]] = icmp ule i32 [[X]], [[A]]
+; CHECK-NEXT:    call void @use(i1 [[C_5]])
+; CHECK-NEXT:    [[T_1:%.*]] = icmp ugt i32 [[Y]], [[Z]]
+; CHECK-NEXT:    call void @use(i1 [[T_1]])
+; CHECK-NEXT:    [[T_2:%.*]] = icmp ugt i32 [[X]], [[Y]]
+; CHECK-NEXT:    call void @use(i1 [[T_2]])
+; CHECK-NEXT:    [[T_3:%.*]] = icmp ugt i32 [[X]], [[Z]]
+; CHECK-NEXT:    call void @use(i1 [[T_3]])
+; CHECK-NEXT:    ret i32 20
+;
+entry:
+  %c.1 = icmp ule i32 %x, %y
+  %c.2 = icmp ule i32 %y, %z
+  %or = or i1 %c.1, %c.2
+  br i1 %or, label %bb1, label %exit
+
+bb1:
+  %c.3 = icmp ule i32 %x, %z
+  call void @use(i1 %c.3)
+
+  %c.4 = icmp ule i32 %x, %a
+  call void @use(i1 %c.4)
+
+  ret i32 10
+
+exit:
+  %f.1 = icmp ule i32 %x, %z
+  call void @use(i1 %f.1)
+
+  %c.5 = icmp ule i32 %x, %a
+  call void @use(i1 %c.5)
+
+  %t.1 = icmp ugt i32 %y, %z
+  call void @use(i1 %t.1)
+
+  %t.2 = icmp ugt i32 %x, %y
+  call void @use(i1 %t.2)
+
+  %t.3 = icmp ugt i32 %x, %z
+  call void @use(i1 %t.3)
+
+  ret i32 20
+}
diff --git a/llvm/test/Transforms/ConstraintElimination/sub-nuw.ll b/llvm/test/Transforms/ConstraintElimination/sub-nuw.ll
new file mode 100644
index 00000000000000..f440bf7bbff208
--- /dev/null
+++ b/llvm/test/Transforms/ConstraintElimination/sub-nuw.ll
@@ -0,0 +1,242 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -constraint-elimination -S %s | FileCheck %s
+
+define void @test.not.uge.ult(i8 %start, i8 %low, i8 %high) {
+; CHECK-LABEL: @test.not.uge.ult(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SUB_PTR_I:%.*]] = sub nuw i8 [[START:%.*]], 3
+; CHECK-NEXT:    [[C_1:%.*]] = icmp uge i8 [[SUB_PTR_I]], [[HIGH:%.*]]
+; CHECK-NEXT:    br i1 [[C_1]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    ret void
+; CHECK:       if.end:
+; CHECK-NEXT:    [[T_0:%.*]] = icmp ult i8 [[START]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[T_0]])
+; CHECK-NEXT:    [[START_1:%.*]] = sub nuw i8 [[START]], 1
+; CHECK-NEXT:    [[T_1:%.*]] = icmp ult i8 [[START_1]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[T_1]])
+; CHECK-NEXT:    [[START_2:%.*]] = sub nuw i8 [[START]], 2
+; CHECK-NEXT:    [[T_2:%.*]] = icmp ult i8 [[START_2]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[T_2]])
+; CHECK-NEXT:    [[START_3:%.*]] = sub nuw i8 [[START]], 3
+; CHECK-NEXT:    [[T_3:%.*]] = icmp ult i8 [[START_3]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[T_3]])
+; CHECK-NEXT:    [[START_4:%.*]] = sub nuw i8 [[START]], 4
+; CHECK-NEXT:    [[C_4:%.*]] = icmp ult i8 [[START_4]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[C_4]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %sub.ptr.i = sub nuw i8 %start, 3
+  %c.1 = icmp uge i8 %sub.ptr.i, %high
+  br i1 %c.1, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  ret void
+
+if.end:                                           ; preds = %entry
+  %t.0 = icmp ult i8 %start, %high
+  call void @use(i1 %t.0)
+  %start.1 = sub nuw i8 %start, 1
+  %t.1 = icmp ult i8 %start.1, %high
+  call void @use(i1 %t.1)
+  %start.2 = sub nuw i8 %start, 2
+  %t.2 = icmp ult i8 %start.2, %high
+  call void @use(i1 %t.2)
+  %start.3 = sub nuw i8 %start, 3
+  %t.3 = icmp ult i8 %start.3, %high
+  call void @use(i1 %t.3)
+  %start.4 = sub nuw i8 %start, 4
+  %c.4 = icmp ult i8 %start.4, %high
+  call void @use(i1 %c.4)
+  ret void
+}
+
+define void @test.not.uge.ule(i8 %start, i8 %low, i8 %high) {
+; CHECK-LABEL: @test.not.uge.ule(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SUB_PTR_I:%.*]] = sub nuw i8 [[START:%.*]], 3
+; CHECK-NEXT:    [[C_1:%.*]] = icmp uge i8 [[SUB_PTR_I]], [[HIGH:%.*]]
+; CHECK-NEXT:    br i1 [[C_1]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    ret void
+; CHECK:       if.end:
+; CHECK-NEXT:    [[T_0:%.*]] = icmp ule i8 [[START]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[T_0]])
+; CHECK-NEXT:    [[START_1:%.*]] = sub nuw i8 [[START]], 1
+; CHECK-NEXT:    [[T_1:%.*]] = icmp ule i8 [[START_1]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[T_1]])
+; CHECK-NEXT:    [[START_2:%.*]] = sub nuw i8 [[START]], 2
+; CHECK-NEXT:    [[T_2:%.*]] = icmp ule i8 [[START_2]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[T_2]])
+; CHECK-NEXT:    [[START_3:%.*]] = sub nuw i8 [[START]], 3
+; CHECK-NEXT:    [[T_3:%.*]] = icmp ule i8 [[START_3]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[T_3]])
+; CHECK-NEXT:    [[START_4:%.*]] = sub nuw i8 [[START]], 4
+; CHECK-NEXT:    [[T_4:%.*]] = icmp ule i8 [[START_4]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[T_4]])
+; CHECK-NEXT:    [[START_5:%.*]] = sub nuw i8 [[START]], 5
+; CHECK-NEXT:    [[C_5:%.*]] = icmp ule i8 [[START_5]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[C_5]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %sub.ptr.i = sub nuw i8 %start, 3
+  %c.1 = icmp uge i8 %sub.ptr.i, %high
+  br i1 %c.1, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  ret void
+
+if.end:                                           ; preds = %entry
+  %t.0 = icmp ule i8 %start, %high
+  call void @use(i1 %t.0)
+  %start.1 = sub nuw i8 %start, 1
+  %t.1 = icmp ule i8 %start.1, %high
+  call void @use(i1 %t.1)
+  %start.2 = sub nuw i8 %start, 2
+  %t.2 = icmp ule i8 %start.2, %high
+  call void @use(i1 %t.2)
+  %start.3 = sub nuw i8 %start, 3
+  %t.3 = icmp ule i8 %start.3, %high
+  call void @use(i1 %t.3)
+  %start.4 = sub nuw i8 %start, 4
+  %t.4 = icmp ule i8 %start.4, %high
+  call void @use(i1 %t.4)
+
+  %start.5 = sub nuw i8 %start, 5
+  %c.5 = icmp ule i8 %start.5, %high
+  call void @use(i1 %c.5)
+
+  ret void
+}
+
+define void @test.not.uge.ugt(i8 %start, i8 %low, i8 %high) {
+; CHECK-LABEL: @test.not.uge.ugt(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SUB_PTR_I:%.*]] = sub nuw i8 [[START:%.*]], 3
+; CHECK-NEXT:    [[C_1:%.*]] = icmp uge i8 [[SUB_PTR_I]], [[HIGH:%.*]]
+; CHECK-NEXT:    br i1 [[C_1]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    ret void
+; CHECK:       if.end:
+; CHECK-NEXT:    [[F_0:%.*]] = icmp ugt i8 [[START]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[F_0]])
+; CHECK-NEXT:    [[START_1:%.*]] = sub nuw i8 [[START]], 1
+; CHECK-NEXT:    [[F_1:%.*]] = icmp ugt i8 [[START_1]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[F_1]])
+; CHECK-NEXT:    [[START_2:%.*]] = sub nuw i8 [[START]], 2
+; CHECK-NEXT:    [[F_2:%.*]] = icmp ugt i8 [[START_2]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[F_2]])
+; CHECK-NEXT:    [[START_3:%.*]] = sub nuw i8 [[START]], 3
+; CHECK-NEXT:    [[F_3:%.*]] = icmp ugt i8 [[START_3]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[F_3]])
+; CHECK-NEXT:    [[START_4:%.*]] = sub nuw i8 [[START]], 4
+; CHECK-NEXT:    [[F_4:%.*]] = icmp ugt i8 [[START_4]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[F_4]])
+; CHECK-NEXT:    [[START_5:%.*]] = sub nuw i8 [[START]], 5
+; CHECK-NEXT:    [[C_5:%.*]] = icmp ugt i8 [[START_5]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[C_5]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %sub.ptr.i = sub nuw i8 %start, 3
+  %c.1 = icmp uge i8 %sub.ptr.i, %high
+  br i1 %c.1, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  ret void
+
+if.end:                                           ; preds = %entry
+  %f.0 = icmp ugt i8 %start, %high
+  call void @use(i1 %f.0)
+
+  %start.1 = sub nuw i8 %start, 1
+  %f.1 = icmp ugt i8 %start.1, %high
+  call void @use(i1 %f.1)
+
+  %start.2 = sub nuw i8 %start, 2
+  %f.2 = icmp ugt i8 %start.2, %high
+  call void @use(i1 %f.2)
+
+  %start.3 = sub nuw i8 %start, 3
+  %f.3 = icmp ugt i8 %start.3, %high
+  call void @use(i1 %f.3)
+
+  %start.4 = sub nuw i8 %start, 4
+  %f.4 = icmp ugt i8 %start.4, %high
+  call void @use(i1 %f.4)
+
+  %start.5 = sub nuw i8 %start, 5
+  %c.5 = icmp ugt i8 %start.5, %high
+  call void @use(i1 %c.5)
+
+  ret void
+}
+
+define void @test.not.uge.uge(i8 %start, i8 %low, i8 %high) {
+; CHECK-LABEL: @test.not.uge.uge(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SUB_PTR_I:%.*]] = sub nuw i8 [[START:%.*]], 3
+; CHECK-NEXT:    [[C_1:%.*]] = icmp uge i8 [[SUB_PTR_I]], [[HIGH:%.*]]
+; CHECK-NEXT:    br i1 [[C_1]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    ret void
+; CHECK:       if.end:
+; CHECK-NEXT:    [[F_0:%.*]] = icmp ugt i8 [[START]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[F_0]])
+; CHECK-NEXT:    [[START_1:%.*]] = sub nuw i8 [[START]], 1
+; CHECK-NEXT:    [[F_1:%.*]] = icmp uge i8 [[START_1]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[F_1]])
+; CHECK-NEXT:    [[START_2:%.*]] = sub nuw i8 [[START]], 2
+; CHECK-NEXT:    [[F_2:%.*]] = icmp uge i8 [[START_2]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[F_2]])
+; CHECK-NEXT:    [[START_3:%.*]] = sub nuw i8 [[START]], 3
+; CHECK-NEXT:    [[F_3:%.*]] = icmp uge i8 [[START_3]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[F_3]])
+; CHECK-NEXT:    [[START_4:%.*]] = sub nuw i8 [[START]], 4
+; CHECK-NEXT:    [[C_4:%.*]] = icmp uge i8 [[START_4]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[C_4]])
+; CHECK-NEXT:    [[START_5:%.*]] = sub nuw i8 [[START]], 5
+; CHECK-NEXT:    [[C_5:%.*]] = icmp uge i8 [[START_5]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[C_5]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %sub.ptr.i = sub nuw i8 %start, 3
+  %c.1 = icmp uge i8 %sub.ptr.i, %high
+  br i1 %c.1, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  ret void
+
+if.end:                                           ; preds = %entry
+  %f.0 = icmp ugt i8 %start, %high
+  call void @use(i1 %f.0)
+
+  %start.1 = sub nuw i8 %start, 1
+  %f.1 = icmp uge i8 %start.1, %high
+  call void @use(i1 %f.1)
+
+  %start.2 = sub nuw i8 %start, 2
+  %f.2 = icmp uge i8 %start.2, %high
+  call void @use(i1 %f.2)
+
+  %start.3 = sub nuw i8 %start, 3
+  %f.3 = icmp uge i8 %start.3, %high
+  call void @use(i1 %f.3)
+
+  %start.4 = sub nuw i8 %start, 4
+  %c.4 = icmp uge i8 %start.4, %high
+  call void @use(i1 %c.4)
+
+  %start.5 = sub nuw i8 %start, 5
+  %c.5 = icmp uge i8 %start.5, %high
+  call void @use(i1 %c.5)
+
+  ret void
+}
+
+
+declare void @use(i1)
+declare void @llvm.trap()
diff --git a/llvm/test/Transforms/ConstraintElimination/sub.ll b/llvm/test/Transforms/ConstraintElimination/sub.ll
new file mode 100644
index 00000000000000..bd78f9a0d25688
--- /dev/null
+++ b/llvm/test/Transforms/ConstraintElimination/sub.ll
@@ -0,0 +1,242 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -constraint-elimination -S %s | FileCheck %s
+
+define void @test.not.uge.ult(i8 %start, i8 %low, i8 %high) {
+; CHECK-LABEL: @test.not.uge.ult(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SUB_PTR_I:%.*]] = sub i8 [[START:%.*]], 3
+; CHECK-NEXT:    [[C_1:%.*]] = icmp uge i8 [[SUB_PTR_I]], [[HIGH:%.*]]
+; CHECK-NEXT:    br i1 [[C_1]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    ret void
+; CHECK:       if.end:
+; CHECK-NEXT:    [[T_0:%.*]] = icmp ult i8 [[START]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[T_0]])
+; CHECK-NEXT:    [[START_1:%.*]] = sub i8 [[START]], 1
+; CHECK-NEXT:    [[T_1:%.*]] = icmp ult i8 [[START_1]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[T_1]])
+; CHECK-NEXT:    [[START_2:%.*]] = sub i8 [[START]], 2
+; CHECK-NEXT:    [[T_2:%.*]] = icmp ult i8 [[START_2]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[T_2]])
+; CHECK-NEXT:    [[START_3:%.*]] = sub i8 [[START]], 3
+; CHECK-NEXT:    [[T_3:%.*]] = icmp ult i8 [[START_3]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[T_3]])
+; CHECK-NEXT:    [[START_4:%.*]] = sub i8 [[START]], 4
+; CHECK-NEXT:    [[C_4:%.*]] = icmp ult i8 [[START_4]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[C_4]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %sub.ptr.i = sub i8 %start, 3
+  %c.1 = icmp uge i8 %sub.ptr.i, %high
+  br i1 %c.1, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  ret void
+
+if.end:                                           ; preds = %entry
+  %t.0 = icmp ult i8 %start, %high
+  call void @use(i1 %t.0)
+  %start.1 = sub i8 %start, 1
+  %t.1 = icmp ult i8 %start.1, %high
+  call void @use(i1 %t.1)
+  %start.2 = sub i8 %start, 2
+  %t.2 = icmp ult i8 %start.2, %high
+  call void @use(i1 %t.2)
+  %start.3 = sub i8 %start, 3
+  %t.3 = icmp ult i8 %start.3, %high
+  call void @use(i1 %t.3)
+  %start.4 = sub i8 %start, 4
+  %c.4 = icmp ult i8 %start.4, %high
+  call void @use(i1 %c.4)
+  ret void
+}
+
+define void @test.not.uge.ule(i8 %start, i8 %low, i8 %high) {
+; CHECK-LABEL: @test.not.uge.ule(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SUB_PTR_I:%.*]] = sub i8 [[START:%.*]], 3
+; CHECK-NEXT:    [[C_1:%.*]] = icmp uge i8 [[SUB_PTR_I]], [[HIGH:%.*]]
+; CHECK-NEXT:    br i1 [[C_1]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    ret void
+; CHECK:       if.end:
+; CHECK-NEXT:    [[T_0:%.*]] = icmp ule i8 [[START]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[T_0]])
+; CHECK-NEXT:    [[START_1:%.*]] = sub i8 [[START]], 1
+; CHECK-NEXT:    [[T_1:%.*]] = icmp ule i8 [[START_1]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[T_1]])
+; CHECK-NEXT:    [[START_2:%.*]] = sub i8 [[START]], 2
+; CHECK-NEXT:    [[T_2:%.*]] = icmp ule i8 [[START_2]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[T_2]])
+; CHECK-NEXT:    [[START_3:%.*]] = sub i8 [[START]], 3
+; CHECK-NEXT:    [[T_3:%.*]] = icmp ule i8 [[START_3]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[T_3]])
+; CHECK-NEXT:    [[START_4:%.*]] = sub i8 [[START]], 4
+; CHECK-NEXT:    [[T_4:%.*]] = icmp ule i8 [[START_4]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[T_4]])
+; CHECK-NEXT:    [[START_5:%.*]] = sub i8 [[START]], 5
+; CHECK-NEXT:    [[C_5:%.*]] = icmp ule i8 [[START_5]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[C_5]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %sub.ptr.i = sub i8 %start, 3
+  %c.1 = icmp uge i8 %sub.ptr.i, %high
+  br i1 %c.1, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  ret void
+
+if.end:                                           ; preds = %entry
+  %t.0 = icmp ule i8 %start, %high
+  call void @use(i1 %t.0)
+  %start.1 = sub i8 %start, 1
+  %t.1 = icmp ule i8 %start.1, %high
+  call void @use(i1 %t.1)
+  %start.2 = sub i8 %start, 2
+  %t.2 = icmp ule i8 %start.2, %high
+  call void @use(i1 %t.2)
+  %start.3 = sub i8 %start, 3
+  %t.3 = icmp ule i8 %start.3, %high
+  call void @use(i1 %t.3)
+  %start.4 = sub i8 %start, 4
+  %t.4 = icmp ule i8 %start.4, %high
+  call void @use(i1 %t.4)
+
+  %start.5 = sub i8 %start, 5
+  %c.5 = icmp ule i8 %start.5, %high
+  call void @use(i1 %c.5)
+
+  ret void
+}
+
+define void @test.not.uge.ugt(i8 %start, i8 %low, i8 %high) {
+; CHECK-LABEL: @test.not.uge.ugt(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SUB_PTR_I:%.*]] = sub i8 [[START:%.*]], 3
+; CHECK-NEXT:    [[C_1:%.*]] = icmp uge i8 [[SUB_PTR_I]], [[HIGH:%.*]]
+; CHECK-NEXT:    br i1 [[C_1]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    ret void
+; CHECK:       if.end:
+; CHECK-NEXT:    [[F_0:%.*]] = icmp ugt i8 [[START]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[F_0]])
+; CHECK-NEXT:    [[START_1:%.*]] = sub i8 [[START]], 1
+; CHECK-NEXT:    [[F_1:%.*]] = icmp ugt i8 [[START_1]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[F_1]])
+; CHECK-NEXT:    [[START_2:%.*]] = sub i8 [[START]], 2
+; CHECK-NEXT:    [[F_2:%.*]] = icmp ugt i8 [[START_2]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[F_2]])
+; CHECK-NEXT:    [[START_3:%.*]] = sub i8 [[START]], 3
+; CHECK-NEXT:    [[F_3:%.*]] = icmp ugt i8 [[START_3]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[F_3]])
+; CHECK-NEXT:    [[START_4:%.*]] = sub i8 [[START]], 4
+; CHECK-NEXT:    [[F_4:%.*]] = icmp ugt i8 [[START_4]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[F_4]])
+; CHECK-NEXT:    [[START_5:%.*]] = sub i8 [[START]], 5
+; CHECK-NEXT:    [[C_5:%.*]] = icmp ugt i8 [[START_5]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[C_5]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %sub.ptr.i = sub i8 %start, 3
+  %c.1 = icmp uge i8 %sub.ptr.i, %high
+  br i1 %c.1, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  ret void
+
+if.end:                                           ; preds = %entry
+  %f.0 = icmp ugt i8 %start, %high
+  call void @use(i1 %f.0)
+
+  %start.1 = sub i8 %start, 1
+  %f.1 = icmp ugt i8 %start.1, %high
+  call void @use(i1 %f.1)
+
+  %start.2 = sub i8 %start, 2
+  %f.2 = icmp ugt i8 %start.2, %high
+  call void @use(i1 %f.2)
+
+  %start.3 = sub i8 %start, 3
+  %f.3 = icmp ugt i8 %start.3, %high
+  call void @use(i1 %f.3)
+
+  %start.4 = sub i8 %start, 4
+  %f.4 = icmp ugt i8 %start.4, %high
+  call void @use(i1 %f.4)
+
+  %start.5 = sub i8 %start, 5
+  %c.5 = icmp ugt i8 %start.5, %high
+  call void @use(i1 %c.5)
+
+  ret void
+}
+
+define void @test.not.uge.uge(i8 %start, i8 %low, i8 %high) {
+; CHECK-LABEL: @test.not.uge.uge(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SUB_PTR_I:%.*]] = sub i8 [[START:%.*]], 3
+; CHECK-NEXT:    [[C_1:%.*]] = icmp uge i8 [[SUB_PTR_I]], [[HIGH:%.*]]
+; CHECK-NEXT:    br i1 [[C_1]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    ret void
+; CHECK:       if.end:
+; CHECK-NEXT:    [[F_0:%.*]] = icmp ugt i8 [[START]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[F_0]])
+; CHECK-NEXT:    [[START_1:%.*]] = sub i8 [[START]], 1
+; CHECK-NEXT:    [[F_1:%.*]] = icmp uge i8 [[START_1]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[F_1]])
+; CHECK-NEXT:    [[START_2:%.*]] = sub i8 [[START]], 2
+; CHECK-NEXT:    [[F_2:%.*]] = icmp uge i8 [[START_2]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[F_2]])
+; CHECK-NEXT:    [[START_3:%.*]] = sub i8 [[START]], 3
+; CHECK-NEXT:    [[F_3:%.*]] = icmp uge i8 [[START_3]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[F_3]])
+; CHECK-NEXT:    [[START_4:%.*]] = sub i8 [[START]], 4
+; CHECK-NEXT:    [[C_4:%.*]] = icmp uge i8 [[START_4]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[C_4]])
+; CHECK-NEXT:    [[START_5:%.*]] = sub i8 [[START]], 5
+; CHECK-NEXT:    [[C_5:%.*]] = icmp uge i8 [[START_5]], [[HIGH]]
+; CHECK-NEXT:    call void @use(i1 [[C_5]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %sub.ptr.i = sub i8 %start, 3
+  %c.1 = icmp uge i8 %sub.ptr.i, %high
+  br i1 %c.1, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  ret void
+
+if.end:                                           ; preds = %entry
+  %f.0 = icmp ugt i8 %start, %high
+  call void @use(i1 %f.0)
+
+  %start.1 = sub i8 %start, 1
+  %f.1 = icmp uge i8 %start.1, %high
+  call void @use(i1 %f.1)
+
+  %start.2 = sub i8 %start, 2
+  %f.2 = icmp uge i8 %start.2, %high
+  call void @use(i1 %f.2)
+
+  %start.3 = sub i8 %start, 3
+  %f.3 = icmp uge i8 %start.3, %high
+  call void @use(i1 %f.3)
+
+  %start.4 = sub i8 %start, 4
+  %c.4 = icmp uge i8 %start.4, %high
+  call void @use(i1 %c.4)
+
+  %start.5 = sub i8 %start, 5
+  %c.5 = icmp uge i8 %start.5, %high
+  call void @use(i1 %c.5)
+
+  ret void
+}
+
+
+declare void @use(i1)
+declare void @llvm.trap()

From 836d0addee4a2ce07d09d68484823221cbb062b7 Mon Sep 17 00:00:00 2001
From: Evgeny Leviant <eleviant@accesssoftek.com>
Date: Tue, 13 Oct 2020 15:24:58 +0300
Subject: [PATCH 092/123] Fix Windows/MSVC build after 6e56046f65

Commit 6e56046f65 may trigger SEGV in llvm-tablegen if the latter
is built with -DLLVM_OPTIMIZED_TABLEGEN=OFF. The reason of SEGV was
accessing stale memory after expansion of std::vector.
---
 llvm/utils/TableGen/CodeGenSchedule.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/utils/TableGen/CodeGenSchedule.cpp b/llvm/utils/TableGen/CodeGenSchedule.cpp
index f233b927b7a0bf..be12758b23beae 100644
--- a/llvm/utils/TableGen/CodeGenSchedule.cpp
+++ b/llvm/utils/TableGen/CodeGenSchedule.cpp
@@ -1664,7 +1664,6 @@ static void inferFromTransitions(ArrayRef<PredTransition> LastTransitions,
                                  CodeGenSchedModels &SchedModels) {
   // For each PredTransition, create a new CodeGenSchedTransition, which usually
   // requires creating a new SchedClass.
-  const CodeGenSchedClass &FromSC = SchedModels.getSchedClass(FromClassIdx);
   for (ArrayRef<PredTransition>::iterator
          I = LastTransitions.begin(), E = LastTransitions.end(); I != E; ++I) {
     IdxVec OperWritesVariant, OperReadsVariant;
@@ -1674,6 +1673,7 @@ static void inferFromTransitions(ArrayRef<PredTransition> LastTransitions,
 
     // Transition should not contain processor indices already assigned to
     // InstRWs in this scheduling class.
+    const CodeGenSchedClass &FromSC = SchedModels.getSchedClass(FromClassIdx);
     llvm::copy_if(I->ProcIndices, std::back_inserter(SCTrans.ProcIndices),
                   [&FromSC](unsigned PIdx) {
                     return !FromSC.InstRWProcIndices.count(PIdx);

From bddef54c502811fa1406d1161d4baa15b56ebc32 Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans@chromium.org>
Date: Tue, 13 Oct 2020 14:24:02 +0200
Subject: [PATCH 093/123] Raise the timeout in DirectoryWatcherTest to 10 s

After D88666, which implemented DirectoryWatcher on Windows, we're
seeing test failures on Chromium's Windows bots.

Try raising the timeout in case the test is failing due to high load on
the machine.
---
 clang/unittests/DirectoryWatcher/DirectoryWatcherTest.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/unittests/DirectoryWatcher/DirectoryWatcherTest.cpp b/clang/unittests/DirectoryWatcher/DirectoryWatcherTest.cpp
index 650c0fc497644a..4d4a4614740d2d 100644
--- a/clang/unittests/DirectoryWatcher/DirectoryWatcherTest.cpp
+++ b/clang/unittests/DirectoryWatcher/DirectoryWatcherTest.cpp
@@ -243,7 +243,7 @@ void checkEventualResultWithTimeout(VerifyingConsumer &TestConsumer) {
   std::thread worker(std::move(task));
   worker.detach();
 
-  EXPECT_TRUE(WaitForExpectedStateResult.wait_for(std::chrono::seconds(3)) ==
+  EXPECT_TRUE(WaitForExpectedStateResult.wait_for(std::chrono::seconds(10)) ==
               std::future_status::ready)
       << "The expected result state wasn't reached before the time-out.";
   std::unique_lock<std::mutex> L(TestConsumer.Mtx);

From 937d782e38d44d078342cb6358c41db7a5795d00 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Tue, 13 Oct 2020 08:35:43 -0400
Subject: [PATCH 094/123] [AArch64] add cost model test for scalable vector
 math; NFC

Testing for the various cost model "TargetCostKind" is limited,
and testing for scalable vectors is limited. The motivating
example of an intrinsic is not included here yet because that
just crashes.
---
 .../Analysis/CostModel/AArch64/sve-math.ll    | 28 +++++++++++++++++++
 1 file changed, 28 insertions(+)
 create mode 100644 llvm/test/Analysis/CostModel/AArch64/sve-math.ll

diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-math.ll b/llvm/test/Analysis/CostModel/AArch64/sve-math.ll
new file mode 100644
index 00000000000000..5bd9d2659a9d10
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-math.ll
@@ -0,0 +1,28 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt -mtriple=aarch64-- -mattr=+sve -cost-model -analyze -cost-kind=throughput < %s | FileCheck %s --check-prefix=THRU
+; RUN: opt -mtriple=aarch64-- -mattr=+sve -cost-model -analyze -cost-kind=latency < %s | FileCheck %s --check-prefix=LATE
+; RUN: opt -mtriple=aarch64- --mattr=+sve -cost-model -analyze -cost-kind=code-size < %s | FileCheck %s --check-prefix=SIZE
+; RUN: opt -mtriple=aarch64-- -mattr=+sve -cost-model -analyze -cost-kind=size-latency < %s | FileCheck %s --check-prefix=SIZE_LATE
+
+declare <vscale x 2 x double> @llvm.sqrt.v2f64(<vscale x 2 x double>)
+
+define <vscale x 2 x double> @fadd_v2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
+; THRU-LABEL: 'fadd_v2f64'
+; THRU-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %r = fadd <vscale x 2 x double> %a, %b
+; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <vscale x 2 x double> %r
+;
+; LATE-LABEL: 'fadd_v2f64'
+; LATE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %r = fadd <vscale x 2 x double> %a, %b
+; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <vscale x 2 x double> %r
+;
+; SIZE-LABEL: 'fadd_v2f64'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r = fadd <vscale x 2 x double> %a, %b
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <vscale x 2 x double> %r
+;
+; SIZE_LATE-LABEL: 'fadd_v2f64'
+; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r = fadd <vscale x 2 x double> %a, %b
+; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <vscale x 2 x double> %r
+;
+  %r = fadd <vscale x 2 x double> %a, %b
+  ret <vscale x 2 x double> %r
+}

From f4f4d54ae0d8971e67fd64e10d700804105b720c Mon Sep 17 00:00:00 2001
From: Raphael Isemann <teemperor@gmail.com>
Date: Tue, 13 Oct 2020 14:35:55 +0200
Subject: [PATCH 095/123] [lldb] Fill Watch/AppleTV simulator gaps in
 PlatformDarwin

When running the test suite against the Watch/AppleTV simulator we currently hitting
the unimplemented parts of PlatformDarwin for the respective simulator platforms.

This just adds the respective switch cases.

This whole code path depends on having a valid Target, so can't just unittest this code
without refactoring it. So instead this is tested by just running the testsuite against
the respective simulators (which is how I found this).

Reviewed By: aprantl

Differential Revision: https://reviews.llvm.org/D89106
---
 .../Plugins/Platform/MacOSX/PlatformDarwin.cpp     | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp
index 4e2df72ee3a8e7..6f25128caf24cf 100644
--- a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp
+++ b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp
@@ -1325,6 +1325,12 @@ FileSpec PlatformDarwin::GetSDKDirectoryForModules(XcodeSDK::Type sdk_type) {
   case XcodeSDK::Type::iPhoneOS:
     sdks_spec.AppendPathComponent("iPhoneOS.platform");
     break;
+  case XcodeSDK::Type::WatchSimulator:
+    sdks_spec.AppendPathComponent("WatchSimulator.platform");
+    break;
+  case XcodeSDK::Type::AppleTVSimulator:
+    sdks_spec.AppendPathComponent("AppleTVSimulator.platform");
+    break;
   default:
     llvm_unreachable("unsupported sdk");
   }
@@ -1567,6 +1573,14 @@ void PlatformDarwin::AddClangModuleCompilationOptionsForSDKType(
       minimum_version_option.PutCString("-mmacosx-version-min=");
       minimum_version_option.PutCString(version.getAsString());
       break;
+    case XcodeSDK::Type::WatchSimulator:
+      minimum_version_option.PutCString("-mwatchos-simulator-version-min=");
+      minimum_version_option.PutCString(version.getAsString());
+      break;
+    case XcodeSDK::Type::AppleTVSimulator:
+      minimum_version_option.PutCString("-mtvos-version-min=");
+      minimum_version_option.PutCString(version.getAsString());
+      break;
     default:
       llvm_unreachable("unsupported sdk");
     }

From 480c440f9a3cf3434355f24217cfa430dc1cabda Mon Sep 17 00:00:00 2001
From: Raphael Isemann <teemperor@gmail.com>
Date: Tue, 13 Oct 2020 15:07:33 +0200
Subject: [PATCH 096/123] [lldb] Don't let dotest silently fall back to default
 SDK if the specified one can't be found

If the SDK name passed to dotest can't be found by `xcrun` we silently fall back
to the default SDK. This leads to rather cryptic errors being reported later on
when linking the actual test executables.

Instead just directly log and abort when this situation is encountered and
inform the user about the invalid argument.

Reviewed By: JDevlieghere

Differential Revision: https://reviews.llvm.org/D89053
---
 lldb/packages/Python/lldbsuite/test/dotest.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/lldb/packages/Python/lldbsuite/test/dotest.py b/lldb/packages/Python/lldbsuite/test/dotest.py
index 0da60f11a609be..a98965e50c1917 100644
--- a/lldb/packages/Python/lldbsuite/test/dotest.py
+++ b/lldb/packages/Python/lldbsuite/test/dotest.py
@@ -302,6 +302,11 @@ def parseOptionsAndInitTestdirs():
         configuration.sdkroot = seven.get_command_output(
             'xcrun --sdk "%s" --show-sdk-path 2> /dev/null' %
             (args.apple_sdk))
+        if not configuration.sdkroot:
+            logging.error(
+                    'No SDK found with the name %s; aborting...',
+                    args.apple_sdk)
+            sys.exit(-1)
 
     if args.arch:
         configuration.arch = args.arch

From dfc72439529c49e8bfeab9d604b1aa1cac7d89e8 Mon Sep 17 00:00:00 2001
From: Raphael Isemann <teemperor@gmail.com>
Date: Tue, 13 Oct 2020 15:08:20 +0200
Subject: [PATCH 097/123] [lldb] Don't add no-sandbox entitlement when running
 tests on simulator

It seems that if codesigning the test executables with the
`com.apple.private.security.no-sandbox` entitlement then the simulator refuses
to launch them and every test fails with `Process launch failed: process exited
with status -1 (no such process.)`.

This patch checks if we're trying to run the test suite on the simulator and
then avoids signing the executable with `no-sandbox`.

Reviewed By: JDevlieghere

Differential Revision: https://reviews.llvm.org/D89052
---
 lldb/packages/Python/lldbsuite/test/builders/darwin.py    | 8 ++++++--
 .../lldbsuite/test/make/entitlements-simulator.plist      | 8 ++++++++
 2 files changed, 14 insertions(+), 2 deletions(-)
 create mode 100644 lldb/packages/Python/lldbsuite/test/make/entitlements-simulator.plist

diff --git a/lldb/packages/Python/lldbsuite/test/builders/darwin.py b/lldb/packages/Python/lldbsuite/test/builders/darwin.py
index 236e4fac136821..fd25c0c2f115ce 100644
--- a/lldb/packages/Python/lldbsuite/test/builders/darwin.py
+++ b/lldb/packages/Python/lldbsuite/test/builders/darwin.py
@@ -65,11 +65,15 @@ def getExtraMakeArgs(self):
         if configuration.dsymutil:
             args['DSYMUTIL'] = configuration.dsymutil
 
-        operating_system, _ = get_os_and_env()
+        operating_system, env = get_os_and_env()
         if operating_system and operating_system != "macosx":
             builder_dir = os.path.dirname(os.path.abspath(__file__))
             test_dir = os.path.dirname(builder_dir)
-            entitlements = os.path.join(test_dir, 'make', 'entitlements.plist')
+            if env == "simulator":
+              entitlements_file = 'entitlements-simulator.plist'
+            else:
+              entitlements_file = 'entitlements.plist'
+            entitlements = os.path.join(test_dir, 'make', entitlements_file)
             args['CODESIGN'] = 'codesign --entitlements {}'.format(
                 entitlements)
 
diff --git a/lldb/packages/Python/lldbsuite/test/make/entitlements-simulator.plist b/lldb/packages/Python/lldbsuite/test/make/entitlements-simulator.plist
new file mode 100644
index 00000000000000..9acd12816c9137
--- /dev/null
+++ b/lldb/packages/Python/lldbsuite/test/make/entitlements-simulator.plist
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+    <key>com.apple.security.get-task-allow</key>
+    <true/>
+</dict>
+</plist>

From c78da037783bda0f27f4d82060149166e6f0c796 Mon Sep 17 00:00:00 2001
From: Jonas Paulsson <paulsson@linux.vnet.ibm.com>
Date: Tue, 8 Sep 2020 13:06:41 +0200
Subject: [PATCH 098/123] [clang] Improve handling of physical registers in
 inline assembly operands.

Change EmitAsmStmt() to

- Not tie physregs with the "+r" constraint, but instead add the hard
  register as an input constraint. This makes "+r" and "=r":"r" look the same
  in the output.

  Background: Macro intensive user code may contain inline assembly
  statements with multiple operands constrained to the same physreg. Such a
  case (with the operand constraints "+r" : "r") currently triggers the
  TwoAddressInstructionPass assertion against any extra use of a tied
  register. Furthermore, TwoAddress will insert a COPY to that physreg even
  though isel has already done so (for the non-tied use), which may lead to a
  second redundant instruction currently. A simple fix for this is to not
  emit tied physreg uses in the first place for the "+r" constraint, which is
  what this patch does.

- Give an error on multiple outputs to the same physical register.

  This should be reported and this is also what GCC does.

Review: Ulrich Weigand, Aaron Ballman, Jennifer Yu, Craig Topper

Differential Revision: https://reviews.llvm.org/D87279
---
 clang/lib/CodeGen/CGStmt.cpp               | 20 +++++++++++++++++---
 clang/test/CodeGen/systemz-inline-asm-02.c | 13 +++++++++++++
 clang/test/CodeGen/systemz-inline-asm.c    | 14 ++++++++++++++
 3 files changed, 44 insertions(+), 3 deletions(-)
 create mode 100644 clang/test/CodeGen/systemz-inline-asm-02.c

diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index c9e6ce2df2c0dc..a69007e67b2670 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -21,6 +21,7 @@
 #include "clang/Basic/PrettyStackTrace.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Basic/TargetInfo.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/InlineAsm.h"
@@ -1836,7 +1837,8 @@ SimplifyConstraint(const char *Constraint, const TargetInfo &Target,
 static std::string
 AddVariableConstraints(const std::string &Constraint, const Expr &AsmExpr,
                        const TargetInfo &Target, CodeGenModule &CGM,
-                       const AsmStmt &Stmt, const bool EarlyClobber) {
+                       const AsmStmt &Stmt, const bool EarlyClobber,
+                       std::string *GCCReg = nullptr) {
   const DeclRefExpr *AsmDeclRef = dyn_cast<DeclRefExpr>(&AsmExpr);
   if (!AsmDeclRef)
     return Constraint;
@@ -1861,6 +1863,8 @@ AddVariableConstraints(const std::string &Constraint, const Expr &AsmExpr,
   }
   // Canonicalize the register here before returning it.
   Register = Target.getNormalizedGCCRegisterName(Register);
+  if (GCCReg != nullptr)
+    *GCCReg = Register.str();
   return (EarlyClobber ? "&{" : "{") + Register.str() + "}";
 }
 
@@ -2059,6 +2063,9 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
   // Keep track of out constraints for tied input operand.
   std::vector<std::string> OutputConstraints;
 
+  // Keep track of defined physregs.
+  llvm::SmallSet<std::string, 8> PhysRegOutputs;
+
   // An inline asm can be marked readonly if it meets the following conditions:
   //  - it doesn't have any sideeffects
   //  - it doesn't clobber memory
@@ -2078,9 +2085,15 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
     const Expr *OutExpr = S.getOutputExpr(i);
     OutExpr = OutExpr->IgnoreParenNoopCasts(getContext());
 
+    std::string GCCReg;
     OutputConstraint = AddVariableConstraints(OutputConstraint, *OutExpr,
                                               getTarget(), CGM, S,
-                                              Info.earlyClobber());
+                                              Info.earlyClobber(),
+                                              &GCCReg);
+    // Give an error on multiple outputs to same physreg.
+    if (!GCCReg.empty() && !PhysRegOutputs.insert(GCCReg).second)
+      CGM.Error(S.getAsmLoc(), "multiple outputs to hard register: " + GCCReg);
+
     OutputConstraints.push_back(OutputConstraint);
     LValue Dest = EmitLValue(OutExpr);
     if (!Constraints.empty())
@@ -2167,7 +2180,8 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
         LargestVectorWidth =
             std::max((uint64_t)LargestVectorWidth,
                      VT->getPrimitiveSizeInBits().getKnownMinSize());
-      if (Info.allowsRegister())
+      // Don't tie physregs.
+      if (Info.allowsRegister() && GCCReg.empty())
         InOutConstraints += llvm::utostr(i);
       else
         InOutConstraints += OutputConstraint;
diff --git a/clang/test/CodeGen/systemz-inline-asm-02.c b/clang/test/CodeGen/systemz-inline-asm-02.c
new file mode 100644
index 00000000000000..754d7e66f04b24
--- /dev/null
+++ b/clang/test/CodeGen/systemz-inline-asm-02.c
@@ -0,0 +1,13 @@
+// RUN: not %clang_cc1 -triple s390x-linux-gnu -O2 -emit-llvm -o - %s 2>&1 \
+// RUN:  | FileCheck %s
+// REQUIRES: systemz-registered-target
+
+// Test that an error is given if a physreg is defined by multiple operands.
+int test_physreg_defs(void) {
+  register int l __asm__("r7") = 0;
+
+  // CHECK: error: multiple outputs to hard register: r7
+  __asm__("" : "+r"(l), "=r"(l));
+
+  return l;
+}
diff --git a/clang/test/CodeGen/systemz-inline-asm.c b/clang/test/CodeGen/systemz-inline-asm.c
index 2dc5023c55cb05..19ab9d092afcdf 100644
--- a/clang/test/CodeGen/systemz-inline-asm.c
+++ b/clang/test/CodeGen/systemz-inline-asm.c
@@ -129,3 +129,17 @@ long double test_f128(long double f, long double g) {
 // CHECK: [[RESULT:%.*]] = tail call fp128 asm "axbr $0, $2", "=f,0,f"(fp128 %f, fp128 %g)
 // CHECK: store fp128 [[RESULT]], fp128* [[DEST]]
 }
+
+// Test that there are no tied physreg uses. TwoAddress pass cannot deal with them.
+int test_physregs(void) {
+  // CHECK-LABEL: define signext i32 @test_physregs()
+  register int l __asm__("r7") = 0;
+
+  // CHECK: call i32 asm "lr $0, $1", "={r7},{r7}"
+  __asm__("lr %0, %1" : "+r"(l));
+
+  // CHECK: call i32 asm "$0 $1 $2", "={r7},{r7},{r7}"
+  __asm__("%0 %1 %2" : "+r"(l) : "r"(l));
+
+  return l;
+}

From 647fb6b37488080efd8dd5e5a40d21e926b6e726 Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne@apple.com>
Date: Tue, 13 Oct 2020 09:16:40 -0400
Subject: [PATCH 099/123] [libc++] Update the <version> header in-place from
 generate_feature_test_macro_components

This simplifies the workflow for adding new feature-test macros for
contributors. Previously, they would have to move the generated <version>
header from a temporary directory to libc++'s include directory by hand.
This makes the behavior for the <version> header consistent with what's
done for the tests and the documentation.
---
 libcxx/docs/DesignDocs/FeatureTestMacros.rst         |  6 ++----
 .../utils/generate_feature_test_macro_components.py  | 12 ++++++++----
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/libcxx/docs/DesignDocs/FeatureTestMacros.rst b/libcxx/docs/DesignDocs/FeatureTestMacros.rst
index 2fbba6547bb694..644eb4a6b41142 100644
--- a/libcxx/docs/DesignDocs/FeatureTestMacros.rst
+++ b/libcxx/docs/DesignDocs/FeatureTestMacros.rst
@@ -39,7 +39,5 @@ The `generate_feature_test_macro_components.py` script is used to track and
 update feature test macros in libc++.
 
 Whenever a feature test macro is added or changed, the table should be updated
-and the script should be re-ran. The script will clobber the existing test files
-and the documentation and it will generate a new `<version>` header as a
-temporary file. The generated `<version>` header should be merged with the
-existing one.
+and the script should be re-ran. The script will clobber the existing test files,
+the documentation and the `<version>` header.
diff --git a/libcxx/utils/generate_feature_test_macro_components.py b/libcxx/utils/generate_feature_test_macro_components.py
index 889eabff8b6eec..edf668921f9c15 100755
--- a/libcxx/utils/generate_feature_test_macro_components.py
+++ b/libcxx/utils/generate_feature_test_macro_components.py
@@ -639,12 +639,18 @@ def produce_version_header():
 
 #endif // _LIBCPP_VERSIONH
 """
-  return template.format(
+
+  version_str = template.format(
       synopsis=produce_version_synopsis().strip(),
       cxx14_macros=produce_macros_definition_for_std('c++14').strip(),
       cxx17_macros=produce_macros_definition_for_std('c++17').strip(),
       cxx2a_macros=produce_macros_definition_for_std('c++2a').strip())
 
+  version_header_path = os.path.join(include_path, 'version')
+  with open(version_header_path, 'w') as f:
+    f.write(version_str)
+
+
 """
     Functions to produce test files
 """
@@ -884,9 +890,7 @@ def produce_docs():
     f.write(doc_str)
 
 def main():
-  with tempfile.NamedTemporaryFile(mode='w', prefix='version.', delete=False) as tmp_file:
-    print("producing new <version> header as %s" % tmp_file.name)
-    tmp_file.write(produce_version_header())
+  produce_version_header()
   produce_tests()
   produce_docs()
 

From cb3c13fab6beac4666865b68bea59aae593aaf83 Mon Sep 17 00:00:00 2001
From: Nathan Ridge <zeratul976@hotmail.com>
Date: Mon, 12 Oct 2020 20:41:13 -0400
Subject: [PATCH 100/123] [clangd] Propagate CollectMainFileRefs to
 BackgroundIndex

This appears to have been an omission in D83536.

Differential Revision: https://reviews.llvm.org/D89284
---
 clang-tools-extra/clangd/ClangdServer.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/clang-tools-extra/clangd/ClangdServer.cpp b/clang-tools-extra/clangd/ClangdServer.cpp
index 82dd7436b6f42f..41c26be970d093 100644
--- a/clang-tools-extra/clangd/ClangdServer.cpp
+++ b/clang-tools-extra/clangd/ClangdServer.cpp
@@ -219,6 +219,7 @@ ClangdServer::ClangdServer(const GlobalCompilationDatabase &CDB,
     BGOpts.ContextProvider = [this](PathRef P) {
       return createProcessingContext(P);
     };
+    BGOpts.CollectMainFileRefs = Opts.CollectMainFileRefs;
     BackgroundIdx = std::make_unique<BackgroundIndex>(
         TFS, CDB,
         BackgroundIndexStorage::createDiskBackedStorageFactory(

From f84c77f424e15316f7f46f484880162a7cbcd80b Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans@chromium.org>
Date: Tue, 13 Oct 2020 15:21:06 +0200
Subject: [PATCH 101/123] Revert "Raise the timeout in DirectoryWatcherTest to
 10 s"

It didn't help.

This reverts commit bddef54c502811fa1406d1161d4baa15b56ebc32.
---
 clang/unittests/DirectoryWatcher/DirectoryWatcherTest.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/unittests/DirectoryWatcher/DirectoryWatcherTest.cpp b/clang/unittests/DirectoryWatcher/DirectoryWatcherTest.cpp
index 4d4a4614740d2d..650c0fc497644a 100644
--- a/clang/unittests/DirectoryWatcher/DirectoryWatcherTest.cpp
+++ b/clang/unittests/DirectoryWatcher/DirectoryWatcherTest.cpp
@@ -243,7 +243,7 @@ void checkEventualResultWithTimeout(VerifyingConsumer &TestConsumer) {
   std::thread worker(std::move(task));
   worker.detach();
 
-  EXPECT_TRUE(WaitForExpectedStateResult.wait_for(std::chrono::seconds(10)) ==
+  EXPECT_TRUE(WaitForExpectedStateResult.wait_for(std::chrono::seconds(3)) ==
               std::future_status::ready)
       << "The expected result state wasn't reached before the time-out.";
   std::unique_lock<std::mutex> L(TestConsumer.Mtx);

From 2e604d23b42e2b59b8884c7b4c2f27b62cba5fe3 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Tue, 13 Oct 2020 12:16:12 +0100
Subject: [PATCH 102/123] [Analysis] findAffectedValues - remove unused
 ConstantInt argument. NFCI.

We can use m_ConstantInt without a result value as we don't ever use it.
---
 llvm/lib/Analysis/AssumptionCache.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/llvm/lib/Analysis/AssumptionCache.cpp b/llvm/lib/Analysis/AssumptionCache.cpp
index bc0cdc4c4c77aa..fdde5eac16b7dd 100644
--- a/llvm/lib/Analysis/AssumptionCache.cpp
+++ b/llvm/lib/Analysis/AssumptionCache.cpp
@@ -102,13 +102,12 @@ findAffectedValues(CallInst *CI,
         }
 
         Value *B;
-        ConstantInt *C;
         // (A & B) or (A | B) or (A ^ B).
         if (match(V, m_BitwiseLogic(m_Value(A), m_Value(B)))) {
           AddAffected(A);
           AddAffected(B);
         // (A << C) or (A >>_s C) or (A >>_u C) where C is some constant.
-        } else if (match(V, m_Shift(m_Value(A), m_ConstantInt(C)))) {
+        } else if (match(V, m_Shift(m_Value(A), m_ConstantInt()))) {
           AddAffected(A);
         }
       };

From 9c3138bd6d8b3e303f0f711753506b330ffa8df0 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Tue, 13 Oct 2020 14:35:02 +0100
Subject: [PATCH 103/123] [InstCombine] visitTrunc - pass through undefs for
 trunc(shift(trunc/ext(x),c)) patterns

Based on the recent patches D88475 and D88429 where we are losing undef values due to extension/comparisons.

I've added a Constant::mergeUndefsWith method that merges the undef scalar/elements from another Constant into a specific Constant.

Differential Revision: https://reviews.llvm.org/D88687
---
 llvm/include/llvm/IR/Constant.h               |  6 ++++
 llvm/lib/IR/Constants.cpp                     | 34 +++++++++++++++++++
 .../InstCombine/InstCombineCasts.cpp          |  8 ++---
 llvm/test/Transforms/InstCombine/cast.ll      |  4 +--
 .../InstCombine/trunc-shift-trunc.ll          |  4 +--
 5 files changed, 47 insertions(+), 9 deletions(-)

diff --git a/llvm/include/llvm/IR/Constant.h b/llvm/include/llvm/IR/Constant.h
index f4cdef2af7741b..97650c2051ca04 100644
--- a/llvm/include/llvm/IR/Constant.h
+++ b/llvm/include/llvm/IR/Constant.h
@@ -204,6 +204,12 @@ class Constant : public User {
   /// Try to replace undefined constant C or undefined elements in C with
   /// Replacement. If no changes are made, the constant C is returned.
   static Constant *replaceUndefsWith(Constant *C, Constant *Replacement);
+
+  /// Merges undefs of a Constant with another Constant, along with the
+  /// undefs already present. Other doesn't have to be the same type as C, but
+  /// both must either be scalars or vectors with the same element count. If no
+  /// changes are made, the constant C is returned.
+  static Constant *mergeUndefsWith(Constant *C, Constant *Other);
 };
 
 } // end namespace llvm
diff --git a/llvm/lib/IR/Constants.cpp b/llvm/lib/IR/Constants.cpp
index 9f83861f2aa673..7eca7dddf4a40c 100644
--- a/llvm/lib/IR/Constants.cpp
+++ b/llvm/lib/IR/Constants.cpp
@@ -737,6 +737,40 @@ Constant *Constant::replaceUndefsWith(Constant *C, Constant *Replacement) {
   return ConstantVector::get(NewC);
 }
 
+Constant *Constant::mergeUndefsWith(Constant *C, Constant *Other) {
+  assert(C && Other && "Expected non-nullptr constant arguments");
+  if (match(C, m_Undef()))
+    return C;
+
+  Type *Ty = C->getType();
+  if (match(Other, m_Undef()))
+    return UndefValue::get(Ty);
+
+  auto *VTy = dyn_cast<FixedVectorType>(Ty);
+  if (!VTy)
+    return C;
+
+  Type *EltTy = VTy->getElementType();
+  unsigned NumElts = VTy->getNumElements();
+  assert(isa<FixedVectorType>(Other->getType()) &&
+         cast<FixedVectorType>(Other->getType())->getNumElements() == NumElts &&
+         "Type mismatch");
+
+  bool FoundExtraUndef = false;
+  SmallVector<Constant *, 32> NewC(NumElts);
+  for (unsigned I = 0; I != NumElts; ++I) {
+    NewC[I] = C->getAggregateElement(I);
+    Constant *OtherEltC = Other->getAggregateElement(I);
+    assert(NewC[I] && OtherEltC && "Unknown vector element");
+    if (!match(NewC[I], m_Undef()) && match(OtherEltC, m_Undef())) {
+      NewC[I] = UndefValue::get(EltTy);
+      FoundExtraUndef = true;
+    }
+  }
+  if (FoundExtraUndef)
+    return ConstantVector::get(NewC);
+  return C;
+}
 
 //===----------------------------------------------------------------------===//
 //                                ConstantInt
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index e259b898351d87..478032f56bdf36 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -810,8 +810,6 @@ Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) {
 
     // If the shift is small enough, all zero bits created by the shift are
     // removed by the trunc.
-    // TODO: Support passing through undef shift amounts - these currently get
-    // clamped to MaxAmt.
     if (match(C, m_SpecificInt_ICMP(ICmpInst::ICMP_ULE,
                                     APInt(SrcWidth, MaxShiftAmt)))) {
       // trunc (lshr (sext A), C) --> ashr A, C
@@ -819,6 +817,7 @@ Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) {
         Constant *MaxAmt = ConstantInt::get(SrcTy, DestWidth - 1, false);
         Constant *ShAmt = ConstantExpr::getUMin(C, MaxAmt);
         ShAmt = ConstantExpr::getTrunc(ShAmt, A->getType());
+        ShAmt = Constant::mergeUndefsWith(ShAmt, C);
         return IsExact ? BinaryOperator::CreateExactAShr(A, ShAmt)
                        : BinaryOperator::CreateAShr(A, ShAmt);
       }
@@ -841,13 +840,12 @@ Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) {
 
     // If the shift is small enough, all zero/sign bits created by the shift are
     // removed by the trunc.
-    // TODO: Support passing through undef shift amounts - these currently get
-    // zero'd by getIntegerCast.
     if (match(C, m_SpecificInt_ICMP(ICmpInst::ICMP_ULE,
                                     APInt(SrcWidth, MaxShiftAmt)))) {
       auto *OldShift = cast<Instruction>(Src);
-      auto *ShAmt = ConstantExpr::getIntegerCast(C, A->getType(), true);
       bool IsExact = OldShift->isExact();
+      auto *ShAmt = ConstantExpr::getIntegerCast(C, A->getType(), true);
+      ShAmt = Constant::mergeUndefsWith(ShAmt, C);
       Value *Shift =
           OldShift->getOpcode() == Instruction::AShr
               ? Builder.CreateAShr(A, ShAmt, OldShift->getName(), IsExact)
diff --git a/llvm/test/Transforms/InstCombine/cast.ll b/llvm/test/Transforms/InstCombine/cast.ll
index c5f18b4c625e8d..c7217b9f4dd356 100644
--- a/llvm/test/Transforms/InstCombine/cast.ll
+++ b/llvm/test/Transforms/InstCombine/cast.ll
@@ -1570,7 +1570,7 @@ define <2 x i8> @trunc_lshr_sext_uniform(<2 x i8> %A) {
 
 define <2 x i8> @trunc_lshr_sext_uniform_undef(<2 x i8> %A) {
 ; ALL-LABEL: @trunc_lshr_sext_uniform_undef(
-; ALL-NEXT:    [[D:%.*]] = ashr <2 x i8> [[A:%.*]], <i8 6, i8 7>
+; ALL-NEXT:    [[D:%.*]] = ashr <2 x i8> [[A:%.*]], <i8 6, i8 undef>
 ; ALL-NEXT:    ret <2 x i8> [[D]]
 ;
   %B = sext <2 x i8> %A to <2 x i32>
@@ -1592,7 +1592,7 @@ define <2 x i8> @trunc_lshr_sext_nonuniform(<2 x i8> %A) {
 
 define <3 x i8> @trunc_lshr_sext_nonuniform_undef(<3 x i8> %A) {
 ; ALL-LABEL: @trunc_lshr_sext_nonuniform_undef(
-; ALL-NEXT:    [[D:%.*]] = ashr <3 x i8> [[A:%.*]], <i8 6, i8 2, i8 7>
+; ALL-NEXT:    [[D:%.*]] = ashr <3 x i8> [[A:%.*]], <i8 6, i8 2, i8 undef>
 ; ALL-NEXT:    ret <3 x i8> [[D]]
 ;
   %B = sext <3 x i8> %A to <3 x i32>
diff --git a/llvm/test/Transforms/InstCombine/trunc-shift-trunc.ll b/llvm/test/Transforms/InstCombine/trunc-shift-trunc.ll
index 7a4a9c18972706..269b4619974bdf 100644
--- a/llvm/test/Transforms/InstCombine/trunc-shift-trunc.ll
+++ b/llvm/test/Transforms/InstCombine/trunc-shift-trunc.ll
@@ -45,7 +45,7 @@ define <2 x i8> @trunc_lshr_trunc_nonuniform(<2 x i64> %a) {
 
 define <2 x i8> @trunc_lshr_trunc_uniform_undef(<2 x i64> %a) {
 ; CHECK-LABEL: @trunc_lshr_trunc_uniform_undef(
-; CHECK-NEXT:    [[C1:%.*]] = lshr <2 x i64> [[A:%.*]], <i64 24, i64 0>
+; CHECK-NEXT:    [[C1:%.*]] = lshr <2 x i64> [[A:%.*]], <i64 24, i64 undef>
 ; CHECK-NEXT:    [[D:%.*]] = trunc <2 x i64> [[C1]] to <2 x i8>
 ; CHECK-NEXT:    ret <2 x i8> [[D]]
 ;
@@ -131,7 +131,7 @@ define <2 x i8> @trunc_ashr_trunc_nonuniform(<2 x i64> %a) {
 
 define <2 x i8> @trunc_ashr_trunc_uniform_undef(<2 x i64> %a) {
 ; CHECK-LABEL: @trunc_ashr_trunc_uniform_undef(
-; CHECK-NEXT:    [[C1:%.*]] = ashr <2 x i64> [[A:%.*]], <i64 8, i64 0>
+; CHECK-NEXT:    [[C1:%.*]] = ashr <2 x i64> [[A:%.*]], <i64 8, i64 undef>
 ; CHECK-NEXT:    [[D:%.*]] = trunc <2 x i64> [[C1]] to <2 x i8>
 ; CHECK-NEXT:    ret <2 x i8> [[D]]
 ;

From 1dbf05f5b44db17dcd8520b032e83061189ff4f8 Mon Sep 17 00:00:00 2001
From: Alexandre Ganea <alexandre.ganea@ubisoft.com>
Date: Tue, 13 Oct 2020 09:57:48 -0400
Subject: [PATCH 104/123] [ThinLTO][Documentation] Mention possible values for
 concurrency flags

Differential Revision: https://reviews.llvm.org/D89309
---
 clang/docs/ThinLTO.rst | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/clang/docs/ThinLTO.rst b/clang/docs/ThinLTO.rst
index 528530c5ae9848..0da822f498b989 100644
--- a/clang/docs/ThinLTO.rst
+++ b/clang/docs/ThinLTO.rst
@@ -123,6 +123,11 @@ be reduced to ``N`` via:
 - lld-link:
   ``/opt:lldltojobs=N``
 
+Other possible values for ``N`` are:
+- 0: Use one thread per physical core (default)
+- 1: Use a single thread only (disable multi-threading)
+- all: Use one thread per logical core (uses all hyper-threads)
+
 Incremental
 -----------
 .. _incremental:

From 04b2191d69717d671cc4b5d007aac399a9e60bf6 Mon Sep 17 00:00:00 2001
From: "Paul C. Anagnostopoulos" <paul@windfall.com>
Date: Mon, 12 Oct 2020 18:17:02 -0400
Subject: [PATCH 105/123] [TableGen] Add new section to the TableGen
 Programmer's Reference. Fix typos in it and the TableGen Backend Developer's
 Guide.

---
 llvm/docs/TableGen/BackGuide.rst | 15 ++------
 llvm/docs/TableGen/ProgRef.rst   | 64 ++++++++++++++++++++++++++++----
 2 files changed, 60 insertions(+), 19 deletions(-)

diff --git a/llvm/docs/TableGen/BackGuide.rst b/llvm/docs/TableGen/BackGuide.rst
index 829f5c8d437a76..430e640778cf2f 100644
--- a/llvm/docs/TableGen/BackGuide.rst
+++ b/llvm/docs/TableGen/BackGuide.rst
@@ -52,14 +52,7 @@ is usually abbreviated ``RK``.
 There are two maps in the recordkeeper, one for classes and one for records
 (the latter often referred to as *defs*). Each map maps the class or record
 name to an instance of the ``Record`` class (see `Record`_), which contains
-all the information about that class or record. The ``RecordKeeper`` class
-defines a type that must be used to declare these maps if they are requested
-directly.
-
-.. code-block:: text
-
-  using RecordMap = std::map<std::string, std::unique_ptr<Record>,
-                             std::less<>>;
+all the information about that class or record.
 
 In addition to the two maps, the ``RecordKeeper`` instance contains:
 
@@ -562,7 +555,7 @@ The ``RecordKeeper`` class provides four functions for getting the
 * ``getDefs()`` returns a ``RecordMap`` reference for all the concrete
   records.
 
-* ``getDef(``\ *name*\ ``)`` return a ``Record`` reference for the named
+* ``getDef(``\ *name*\ ``)`` returns a ``Record`` reference for the named
   concrete record.
 
 * ``getAllDerivedDefinitions(``\ *classname*\ ``)`` returns a vector of
@@ -701,7 +694,7 @@ TableGen records are often derived from multiple classes and also often
 defined through a sequence of multiclasses. Because of this, it can be
 difficult for backends to report clear error messages with accurate source
 file locations.  To make error reporting easier, five error reporting
-functions are provided, each with four overloads. [all combinations to come]
+functions are provided, each with four overloads.
 
 * ``PrintWarning`` prints a message tagged as a warning.
 
@@ -827,4 +820,4 @@ Superclasses are shown in the order processed, with indirect superclasses in
 parentheses. Each field is shown with its value and the source location at
 which it was set.
 The ``defm`` sequence gives the locations of the ``defm`` statements that
-were involved in generating the record, in the order they were invoked.
\ No newline at end of file
+were involved in generating the record, in the order they were invoked.
diff --git a/llvm/docs/TableGen/ProgRef.rst b/llvm/docs/TableGen/ProgRef.rst
index 901aee4a32d8f0..d59dc1b54d7644 100644
--- a/llvm/docs/TableGen/ProgRef.rst
+++ b/llvm/docs/TableGen/ProgRef.rst
@@ -444,8 +444,11 @@ sense after reading the remainder of this guide.
 
 This form creates a new anonymous record definition (as would be created by an
 unnamed ``def`` inheriting from the given class with the given template
-arguments; see `def`_) and the value is that record. (A field of the record can be
-obtained using a suffix; see `Suffixed Values`_.)
+arguments; see `def`_) and the value is that record. A field of the record can be
+obtained using a suffix; see `Suffixed Values`_.
+
+Invoking a class in this manner can provide a simple subroutine facility.
+See `Using Classes as Subroutines`_ for more information.
 
 .. productionlist::
    SimpleValue8: `BangOperator` ["<" `Type` ">"] "(" `ValueListNE` ")"
@@ -541,7 +544,7 @@ their declarations are parsed, and thus before the class is finally defined.
 
 .. _NAME:
 
-Every class has an implicit template argument named ``NAME`` (uppercse),
+Every class has an implicit template argument named ``NAME`` (uppercase),
 which is bound to the name of the :token:`Def` or :token:`Defm` inheriting
 the class. The value of ``NAME`` is undefined if the class is inherited by
 an anonymous record.
@@ -1252,7 +1255,7 @@ list of a ``foreach``, which establishes a scope.
 A variable named ``V`` in an inner scope shadows (hides) any variables ``V``
 in outer scopes. In particular, ``V`` in a record body shadows a global
 ``V``, and ``V`` in a ``foreach`` statement list shadows any ``V`` in
-surrounding global or record scopes.
+surrounding record or global scopes.
 
 Variables defined in a ``foreach`` go out of scope at the end of
 each loop iteration, so their value in one iteration is not available in
@@ -1288,7 +1291,6 @@ abstract records and so go through the same steps.
 
 6. Add the record to the master record list.
 
-
 Because references between fields are resolved (step 5) after ``let`` bindings are
 applied (step 3), the ``let`` statement has unusual power. For example:
 
@@ -1329,6 +1331,52 @@ where a local ``let`` does the same thing, the results are:
 1)`` is resolved. Use this power wisely.
 
 
+Using Classes as Subroutines
+============================
+
+As described in `Simple values`_, a class can be invoked in an expression
+and passed template arguments. This causes TableGen to create a new anonymous
+record inheriting from that class. As usual, the record receives all the
+fields defined in the class.
+
+This feature can be employed as a simple subroutine facility. The class can
+use the template arguments to define various variables and fields, which end
+up in the anonymous record. Those fields can then be retrieved in the
+expression invoking the class as follows. Assume that the field ``ret``
+contains the final value of the subroutine.
+
+.. code-block:: text
+
+  int Result = ... CalcValue<arg>.ret ...;
+
+The ``CalcValue`` class is invoked with the template argument ``arg``. It
+calculates a value for the ``ret`` field, which is then retrieved at the
+"point of call" in the initialization for the Result field. The anonymous
+record created in this example serves no other purpose than to carry the
+result value.
+
+Here is a practical example. The class ``isValidSize`` determines whether a
+specified number of bytes represents a valid data size. The bit ``ret`` is
+set appropriately. The field ``ValidSize`` obtains its initial value by
+invoking ``isValidSize`` with the data size and retrieving the ``ret`` field
+from the resulting anonymous record.
+
+.. code-block:: text
+
+  class isValidSize<int size> {
+    bit ret = !cond(!eq(size,  1): 1,
+                    !eq(size,  2): 1,
+                    !eq(size,  4): 1,
+                    !eq(size,  8): 1,
+                    !eq(size, 16): 1,
+                    1: 0);
+  }
+
+  def Data1 {
+    int Size = ...;
+    bit ValidSize = isValidSize<Size>.ret;
+  }
+
 Preprocessing Facilities
 ========================
 
@@ -1372,8 +1420,8 @@ A macro test region begins with an ``#ifdef`` or ``#ifndef`` directive. If
 the macro name is defined (``#ifdef``) or undefined (``#ifndef``), then the
 source code between the directive and the corresponding ``#else`` or
 ``#endif`` is processed. If the test fails but there is an ``#else``
-portion, the source code between the ``#else`` and the ``#endif`` is
-processed. If the test fails and there is no ``#else`` portion, then no
+clause, the source code between the ``#else`` and the ``#endif`` is
+processed. If the test fails and there is no ``#else`` clause, then no
 source code in the test region is processed.
 
 Test regions may be nested, but they must be properly nested. A region
@@ -1381,7 +1429,7 @@ started in a file must end in that file; that is, must have its
 ``#endif`` in the same file.
 
 A :token:`MacroName` may be defined externally using the ``-D`` option on the
-``llvm-tblgen`` command line::
+``xxx-tblgen`` command line::
 
   llvm-tblgen self-reference.td -Dmacro1 -Dmacro3
 

From 777df5c93da893819e9586949531ba9aaec97e1f Mon Sep 17 00:00:00 2001
From: Raphael Isemann <teemperor@gmail.com>
Date: Tue, 13 Oct 2020 16:02:47 +0200
Subject: [PATCH 106/123] [lldb][NFC] Fix some invalid escapes sequences in
 Python strings

I recently had to run the test suite with a debug Python which got started
warning about some invalid escape sequences in LLDB's Python code. They all
attempt to add a backslash by doing a single backslash instead of a double
backslash in a normal string. This seems to work fine for now, but Python says
this behaviour is deprecated, so this patch turns all those strings into raw
strings (where a single backslash is actually a single backslash)

Reviewed By: JDevlieghere

Differential Revision: https://reviews.llvm.org/D88289
---
 llvm/utils/lit/lit/TestRunner.py  | 4 ++--
 llvm/utils/lit/lit/llvm/config.py | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/llvm/utils/lit/lit/TestRunner.py b/llvm/utils/lit/lit/TestRunner.py
index 643b03fc279a14..f826bc91fb3e0e 100644
--- a/llvm/utils/lit/lit/TestRunner.py
+++ b/llvm/utils/lit/lit/TestRunner.py
@@ -1105,8 +1105,8 @@ def getDefaultSubstitutions(test, tmpDir, tmpBase, normalize_slashes=False):
     # "%{/[STpst]:regex_replacement}" should be normalized like "%/[STpst]" but we're
     # also in a regex replacement context of a s@@@ regex.
     def regex_escape(s):
-        s = s.replace('@', '\@')
-        s = s.replace('&', '\&')
+        s = s.replace('@', r'\@')
+        s = s.replace('&', r'\&')
         return s
     substitutions.extend([
             ('%{/s:regex_replacement}',
diff --git a/llvm/utils/lit/lit/llvm/config.py b/llvm/utils/lit/lit/llvm/config.py
index e9fd75e0a5fad1..8897deb24cde92 100644
--- a/llvm/utils/lit/lit/llvm/config.py
+++ b/llvm/utils/lit/lit/llvm/config.py
@@ -439,7 +439,7 @@ def use_clang(self, additional_tool_dirs=[], additional_flags=[], required=True)
         self.config.substitutions.append(
             (' clang ', """\"*** Do not use 'clang' in tests, use '%clang'. ***\""""))
         self.config.substitutions.append(
-            (' clang\+\+ ', """\"*** Do not use 'clang++' in tests, use '%clangxx'. ***\""""))
+            (r' clang\+\+ ', """\"*** Do not use 'clang++' in tests, use '%clangxx'. ***\""""))
         self.config.substitutions.append(
             (' clang-cc ',
              """\"*** Do not use 'clang-cc' in tests, use '%clang_cc1'. ***\""""))
@@ -494,11 +494,11 @@ def use_lld(self, additional_tool_dirs=[], required=True):
         was_found = ld_lld and lld_link and ld64_lld and wasm_ld
         tool_substitutions = []
         if ld_lld:
-            tool_substitutions.append(ToolSubst('ld\.lld', command=ld_lld))
+            tool_substitutions.append(ToolSubst(r'ld\.lld', command=ld_lld))
         if lld_link:
             tool_substitutions.append(ToolSubst('lld-link', command=lld_link))
         if ld64_lld:
-            tool_substitutions.append(ToolSubst('ld64\.lld', command=ld64_lld))
+            tool_substitutions.append(ToolSubst(r'ld64\.lld', command=ld64_lld))
         if wasm_ld:
             tool_substitutions.append(ToolSubst('wasm-ld', command=wasm_ld))
         self.add_tool_substitutions(tool_substitutions)

From 6b7a49bb43d58c2c08fddb9f6c538ee52806de0a Mon Sep 17 00:00:00 2001
From: Raphael Isemann <teemperor@gmail.com>
Date: Tue, 13 Oct 2020 16:05:00 +0200
Subject: [PATCH 107/123] Fix all the CMake code that can only handle -stdlib=
 but not --stdlib=

There are several places in LLVM's CMake setup that try to remove the
`stdlib=...` flag from the CMake flags. All this code however only considered
the `-stdlib=` variant of the flag but not the alternative spelling with a
double dash. This causes that when one adds `--stdlib=...` to the user-provided
CMake flags that this gets transformed into just `-` which ends up causing the
build system to think it should read the source from stdin (which then lead to
very confusing build errors).

This just adds the alternative spelling before the`-stdlib=` variant in all
these places

Reviewed By: ldionne

Differential Revision: https://reviews.llvm.org/D87133
---
 libcxx/CMakeLists.txt    | 2 +-
 libcxxabi/CMakeLists.txt | 2 ++
 libunwind/CMakeLists.txt | 2 ++
 3 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/libcxx/CMakeLists.txt b/libcxx/CMakeLists.txt
index 287059548e42a6..ee250374732d71 100644
--- a/libcxx/CMakeLists.txt
+++ b/libcxx/CMakeLists.txt
@@ -478,7 +478,7 @@ if (NOT LIBCXX_STANDALONE_BUILD)
   remove_flags(-DNDEBUG -UNDEBUG -D_DEBUG
                -lc++abi)
 endif()
-remove_flags(-stdlib=libc++ -stdlib=libstdc++)
+remove_flags(--stdlib=libc++ -stdlib=libc++ --stdlib=libstdc++ -stdlib=libstdc++)
 
 # FIXME: Remove all debug flags and flags that change which Windows
 # default libraries are linked. Currently we only support linking the
diff --git a/libcxxabi/CMakeLists.txt b/libcxxabi/CMakeLists.txt
index 10ac112c90d9ff..c4d76ea22eca8f 100644
--- a/libcxxabi/CMakeLists.txt
+++ b/libcxxabi/CMakeLists.txt
@@ -248,6 +248,8 @@ if (LIBCXXABI_HAS_NOSTDINCXX_FLAG)
   # See: https://gitlab.kitware.com/cmake/cmake/issues/19227
   set(CMAKE_CXX_IMPLICIT_INCLUDE_DIRECTORIES "")
   # Remove -stdlib flags to prevent them from causing an unused flag warning.
+  string(REPLACE "--stdlib=libc++" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+  string(REPLACE "--stdlib=libstdc++" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
   string(REPLACE "-stdlib=libc++" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
   string(REPLACE "-stdlib=libstdc++" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
 endif()
diff --git a/libunwind/CMakeLists.txt b/libunwind/CMakeLists.txt
index 7851f3e45d0cea..ebe9e449ec023e 100644
--- a/libunwind/CMakeLists.txt
+++ b/libunwind/CMakeLists.txt
@@ -321,6 +321,8 @@ add_cxx_compile_flags_if_supported(-fno-rtti)
 if (LIBUNWIND_HAS_NOSTDINCXX_FLAG)
   list(APPEND LIBUNWIND_COMPILE_FLAGS -nostdinc++)
   # Remove -stdlib flags to prevent them from causing an unused flag warning.
+  string(REPLACE "--stdlib=libc++" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+  string(REPLACE "--stdlib=libstdc++" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
   string(REPLACE "-stdlib=libc++" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
   string(REPLACE "-stdlib=libstdc++" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
 endif()

From 08e4e08d7192c5523751e75030689ebb4a279445 Mon Sep 17 00:00:00 2001
From: Lei Zhang <antiagainst@google.com>
Date: Tue, 13 Oct 2020 10:14:54 -0400
Subject: [PATCH 108/123] [mlir-vulkan-runner] Clean up some stale CMake
 configurations

---
 mlir/tools/mlir-vulkan-runner/CMakeLists.txt | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/mlir/tools/mlir-vulkan-runner/CMakeLists.txt b/mlir/tools/mlir-vulkan-runner/CMakeLists.txt
index b329068ae2ad85..34f1a35dca75a6 100644
--- a/mlir/tools/mlir-vulkan-runner/CMakeLists.txt
+++ b/mlir/tools/mlir-vulkan-runner/CMakeLists.txt
@@ -86,12 +86,10 @@ if (MLIR_VULKAN_RUNNER_ENABLED)
   add_llvm_tool(mlir-vulkan-runner
     mlir-vulkan-runner.cpp
 
-    LINK_COMPONENTS
-    Core
-    Support
+    DEPENDS
+    vulkan-runtime-wrappers
   )
-  add_dependencies(mlir-vulkan-runner vulkan-runtime-wrappers)
   llvm_update_compile_flags(mlir-vulkan-runner)
-  target_link_libraries(mlir-vulkan-runner PRIVATE ${FULL_LINK_LIBS} ${LIBS})
+  target_link_libraries(mlir-vulkan-runner PRIVATE ${LIBS})
 
 endif()

From bddaa7a84868cf91d35b896ff773a269bae640df Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy@gmail.com>
Date: Tue, 13 Oct 2020 09:52:20 -0400
Subject: [PATCH 109/123] [MLIR][SPIRV] Support identified and recursive
 structs.

This PR adds support for identified and recursive structs.
This includes: parsing, printing, serializing, and
deserializing such structs.

The following C struct:

```C
struct A {
  A* next;
};
```

which is translated to the following MLIR code as:

```mlir
!spv.struct<A, (!spv.ptr<!spv.struct<A>, Generic>)>
```

would be represented in the SPIR-V module as:

```spirv
OpName %A "A"
OpTypeForwardPointer %APtr Generic
%A = OpTypeStruct %APtr
%APtr = OpTypePointer Generic %A
```

In particular the following changes are included:
- SPIR-V structs can now be either identified or literal
  (i.e. non-identified).
- All structs now have their members surrounded by a ()-pair.
- For recursive references,
  (1) an OpTypeForwardPointer instruction is emitted before
  the OpTypeStruct instruction defining the recursive struct
  (2) an OpTypePointer instruction is emitted after the
  OpTypeStruct instruction which actually defines the recursive
  pointer to struct type.

Reviewed By: antiagainst, rriddle, ftynse

Differential Revision: https://reviews.llvm.org/D87206
---
 mlir/include/mlir/Dialect/SPIRV/SPIRVBase.td  |  31 +--
 mlir/include/mlir/Dialect/SPIRV/SPIRVTypes.h  |  53 ++++-
 mlir/lib/Dialect/SPIRV/LayoutUtils.cpp        |   8 +-
 mlir/lib/Dialect/SPIRV/SPIRVDialect.cpp       | 123 ++++++++++--
 mlir/lib/Dialect/SPIRV/SPIRVTypes.cpp         | 189 ++++++++++++++++--
 .../SPIRV/Serialization/Deserializer.cpp      | 171 ++++++++++++++--
 .../SPIRV/Serialization/Serializer.cpp        | 147 ++++++++++++--
 .../DecorateSPIRVCompositeTypeLayoutPass.cpp  |   4 +
 .../Transforms/LowerABIAttributesPass.cpp     |   4 +
 mlir/test/Conversion/GPUToSPIRV/if.mlir       |  10 +-
 .../Conversion/GPUToSPIRV/load-store.mlir     |   6 +-
 .../GPUToSPIRV/module-structure-opencl.mlir   |   2 +-
 mlir/test/Conversion/GPUToSPIRV/simple.mlir   |   4 +-
 .../lower-gpu-launch-vulkan-launch.mlir       |   8 +-
 .../SPIRVToLLVM/memory-ops-to-llvm.mlir       |  14 +-
 .../SPIRVToLLVM/misc-ops-to-llvm.mlir         |   4 +-
 .../spirv-types-to-llvm-invalid.mlir          |   4 +-
 .../SPIRVToLLVM/spirv-types-to-llvm.mlir      |   6 +-
 .../Conversion/StandardToSPIRV/alloc.mlir     |  12 +-
 .../StandardToSPIRV/std-ops-to-spirv.mlir     |   8 +-
 .../StandardToSPIRV/std-types-to-spirv.mlir   |  70 +++----
 .../SPIRV/Serialization/composite-op.mlir     |   8 +-
 .../Dialect/SPIRV/Serialization/debug.mlir    |   4 +-
 .../Dialect/SPIRV/Serialization/loop.mlir     |  12 +-
 .../SPIRV/Serialization/memory-ops.mlir       |  20 +-
 .../SPIRV/Serialization/spec-constant.mlir    |   8 +-
 .../Dialect/SPIRV/Serialization/struct.mlir   |  58 ++++--
 .../Dialect/SPIRV/Serialization/undef.mlir    |   6 +-
 .../Transforms/abi-interface-opencl.mlir      |   6 +-
 .../SPIRV/Transforms/abi-interface.mlir       |   6 +-
 .../SPIRV/Transforms/abi-load-store.mlir      |  26 +--
 .../Dialect/SPIRV/Transforms/inlining.mlir    |  18 +-
 .../SPIRV/Transforms/layout-decoration.mlir   | 100 ++++-----
 .../SPIRV/Transforms/rewrite-inserts.mlir     |  18 +-
 .../SPIRV/Transforms/vce-deduction.mlir       |   2 +-
 mlir/test/Dialect/SPIRV/canonicalize.mlir     |  16 +-
 mlir/test/Dialect/SPIRV/composite-ops.mlir    |  36 ++--
 .../Dialect/SPIRV/cooperative-matrix.mlir     |   4 +-
 mlir/test/Dialect/SPIRV/ops.mlir              |  20 +-
 mlir/test/Dialect/SPIRV/structure-ops.mlir    |  36 ++--
 mlir/test/Dialect/SPIRV/types.mlir            | 179 ++++++++++++-----
 41 files changed, 1052 insertions(+), 409 deletions(-)

diff --git a/mlir/include/mlir/Dialect/SPIRV/SPIRVBase.td b/mlir/include/mlir/Dialect/SPIRV/SPIRVBase.td
index d59f906440a5ff..7390e2d70f6c23 100644
--- a/mlir/include/mlir/Dialect/SPIRV/SPIRVBase.td
+++ b/mlir/include/mlir/Dialect/SPIRV/SPIRVBase.td
@@ -3155,6 +3155,7 @@ def SPV_OC_OpTypeRuntimeArray          : I32EnumAttrCase<"OpTypeRuntimeArray", 2
 def SPV_OC_OpTypeStruct                : I32EnumAttrCase<"OpTypeStruct", 30>;
 def SPV_OC_OpTypePointer               : I32EnumAttrCase<"OpTypePointer", 32>;
 def SPV_OC_OpTypeFunction              : I32EnumAttrCase<"OpTypeFunction", 33>;
+def SPV_OC_OpTypeForwardPointer        : I32EnumAttrCase<"OpTypeForwardPointer", 39>;
 def SPV_OC_OpConstantTrue              : I32EnumAttrCase<"OpConstantTrue", 41>;
 def SPV_OC_OpConstantFalse             : I32EnumAttrCase<"OpConstantFalse", 42>;
 def SPV_OC_OpConstant                  : I32EnumAttrCase<"OpConstant", 43>;
@@ -3302,21 +3303,21 @@ def SPV_OpcodeAttr :
       SPV_OC_OpCapability, SPV_OC_OpTypeVoid, SPV_OC_OpTypeBool, SPV_OC_OpTypeInt,
       SPV_OC_OpTypeFloat, SPV_OC_OpTypeVector, SPV_OC_OpTypeMatrix,
       SPV_OC_OpTypeArray, SPV_OC_OpTypeRuntimeArray, SPV_OC_OpTypeStruct,
-      SPV_OC_OpTypePointer, SPV_OC_OpTypeFunction, SPV_OC_OpConstantTrue,
-      SPV_OC_OpConstantFalse, SPV_OC_OpConstant, SPV_OC_OpConstantComposite,
-      SPV_OC_OpConstantNull, SPV_OC_OpSpecConstantTrue, SPV_OC_OpSpecConstantFalse,
-      SPV_OC_OpSpecConstant, SPV_OC_OpSpecConstantComposite, SPV_OC_OpFunction,
-      SPV_OC_OpFunctionParameter, SPV_OC_OpFunctionEnd, SPV_OC_OpFunctionCall,
-      SPV_OC_OpVariable, SPV_OC_OpLoad, SPV_OC_OpStore, SPV_OC_OpCopyMemory,
-      SPV_OC_OpAccessChain, SPV_OC_OpDecorate, SPV_OC_OpMemberDecorate,
-      SPV_OC_OpCompositeConstruct, SPV_OC_OpCompositeExtract,
-      SPV_OC_OpCompositeInsert, SPV_OC_OpTranspose, SPV_OC_OpConvertFToU,
-      SPV_OC_OpConvertFToS, SPV_OC_OpConvertSToF, SPV_OC_OpConvertUToF,
-      SPV_OC_OpUConvert, SPV_OC_OpSConvert, SPV_OC_OpFConvert, SPV_OC_OpBitcast,
-      SPV_OC_OpSNegate, SPV_OC_OpFNegate, SPV_OC_OpIAdd, SPV_OC_OpFAdd,
-      SPV_OC_OpISub, SPV_OC_OpFSub, SPV_OC_OpIMul, SPV_OC_OpFMul, SPV_OC_OpUDiv,
-      SPV_OC_OpSDiv, SPV_OC_OpFDiv, SPV_OC_OpUMod, SPV_OC_OpSRem, SPV_OC_OpSMod,
-      SPV_OC_OpFRem, SPV_OC_OpFMod, SPV_OC_OpMatrixTimesScalar,
+      SPV_OC_OpTypePointer, SPV_OC_OpTypeFunction, SPV_OC_OpTypeForwardPointer,
+      SPV_OC_OpConstantTrue, SPV_OC_OpConstantFalse, SPV_OC_OpConstant,
+      SPV_OC_OpConstantComposite, SPV_OC_OpConstantNull, SPV_OC_OpSpecConstantTrue,
+      SPV_OC_OpSpecConstantFalse, SPV_OC_OpSpecConstant,
+      SPV_OC_OpSpecConstantComposite, SPV_OC_OpFunction, SPV_OC_OpFunctionParameter,
+      SPV_OC_OpFunctionEnd, SPV_OC_OpFunctionCall, SPV_OC_OpVariable, SPV_OC_OpLoad,
+      SPV_OC_OpStore, SPV_OC_OpCopyMemory, SPV_OC_OpAccessChain, SPV_OC_OpDecorate,
+      SPV_OC_OpMemberDecorate, SPV_OC_OpCompositeConstruct,
+      SPV_OC_OpCompositeExtract, SPV_OC_OpCompositeInsert, SPV_OC_OpTranspose,
+      SPV_OC_OpConvertFToU, SPV_OC_OpConvertFToS, SPV_OC_OpConvertSToF,
+      SPV_OC_OpConvertUToF, SPV_OC_OpUConvert, SPV_OC_OpSConvert, SPV_OC_OpFConvert,
+      SPV_OC_OpBitcast, SPV_OC_OpSNegate, SPV_OC_OpFNegate, SPV_OC_OpIAdd,
+      SPV_OC_OpFAdd, SPV_OC_OpISub, SPV_OC_OpFSub, SPV_OC_OpIMul, SPV_OC_OpFMul,
+      SPV_OC_OpUDiv, SPV_OC_OpSDiv, SPV_OC_OpFDiv, SPV_OC_OpUMod, SPV_OC_OpSRem,
+      SPV_OC_OpSMod, SPV_OC_OpFRem, SPV_OC_OpFMod, SPV_OC_OpMatrixTimesScalar,
       SPV_OC_OpMatrixTimesMatrix, SPV_OC_OpLogicalEqual, SPV_OC_OpLogicalNotEqual,
       SPV_OC_OpLogicalOr, SPV_OC_OpLogicalAnd, SPV_OC_OpLogicalNot, SPV_OC_OpSelect,
       SPV_OC_OpIEqual, SPV_OC_OpINotEqual, SPV_OC_OpUGreaterThan,
diff --git a/mlir/include/mlir/Dialect/SPIRV/SPIRVTypes.h b/mlir/include/mlir/Dialect/SPIRV/SPIRVTypes.h
index 43fb708c7908d9..75d8f8025841a5 100644
--- a/mlir/include/mlir/Dialect/SPIRV/SPIRVTypes.h
+++ b/mlir/include/mlir/Dialect/SPIRV/SPIRVTypes.h
@@ -262,7 +262,24 @@ class RuntimeArrayType
                        Optional<StorageClass> storage = llvm::None);
 };
 
-// SPIR-V struct type
+/// SPIR-V struct type. Two kinds of struct types are supported:
+/// - Literal: a literal struct type is uniqued by its fields (types + offset
+/// info + decoration info).
+/// - Identified: an indentified struct type is uniqued by its string identifier
+/// (name). This is useful in representing recursive structs. For example, the
+/// following C struct:
+///
+/// struct A {
+///   A* next;
+/// };
+///
+/// would be represented in MLIR as:
+///
+/// !spv.struct<A, (!spv.ptr<!spv.struct<A>, Generic>)>
+///
+/// In the above, expressing recursive struct types is accomplished by giving a
+/// recursive struct a unique identified and using that identifier in the struct
+/// definition for recursive references.
 class StructType : public Type::TypeBase<StructType, CompositeType,
                                          detail::StructTypeStorage> {
 public:
@@ -297,13 +314,34 @@ class StructType : public Type::TypeBase<StructType, CompositeType,
     }
   };
 
-  /// Construct a StructType with at least one member.
+  /// Construct a literal StructType with at least one member.
   static StructType get(ArrayRef<Type> memberTypes,
                         ArrayRef<OffsetInfo> offsetInfo = {},
                         ArrayRef<MemberDecorationInfo> memberDecorations = {});
 
-  /// Construct a struct with no members.
-  static StructType getEmpty(MLIRContext *context);
+  /// Construct an identified StructType. This creates a StructType whose body
+  /// (member types, offset info, and decorations) is not set yet. A call to
+  /// StructType::trySetBody(...) must follow when the StructType contents are
+  /// available (e.g. parsed or deserialized).
+  ///
+  /// Note: If another thread creates (or had already created) a struct with the
+  /// same identifier, that struct will be returned as a result.
+  static StructType getIdentified(MLIRContext *context, StringRef identifier);
+
+  /// Construct a (possibly identified) StructType with no members.
+  ///
+  /// Note: this method might fail in a multi-threaded setup if another thread
+  /// created an identified struct with the same identifier but with different
+  /// contents before returning. In which case, an empty (default-constructed)
+  /// StructType is returned.
+  static StructType getEmpty(MLIRContext *context, StringRef identifier = "");
+
+  /// For literal structs, return an empty string.
+  /// For identified structs, return the struct's identifier.
+  StringRef getIdentifier() const;
+
+  /// Returns true if the StructType is identified.
+  bool isIdentified() const;
 
   unsigned getNumElements() const;
 
@@ -346,6 +384,13 @@ class StructType : public Type::TypeBase<StructType, CompositeType,
                             SmallVectorImpl<StructType::MemberDecorationInfo>
                                 &decorationsInfo) const;
 
+  /// Sets the contents of an incomplete identified StructType. This method must
+  /// be called only for identified StructTypes and it must be called only once
+  /// per instance. Otherwise, failure() is returned.
+  LogicalResult
+  trySetBody(ArrayRef<Type> memberTypes, ArrayRef<OffsetInfo> offsetInfo = {},
+             ArrayRef<MemberDecorationInfo> memberDecorations = {});
+
   void getExtensions(SPIRVType::ExtensionArrayRefVector &extensions,
                      Optional<StorageClass> storage = llvm::None);
   void getCapabilities(SPIRVType::CapabilityArrayRefVector &capabilities,
diff --git a/mlir/lib/Dialect/SPIRV/LayoutUtils.cpp b/mlir/lib/Dialect/SPIRV/LayoutUtils.cpp
index c303f38a8e0cf4..9094d2e96b8ffe 100644
--- a/mlir/lib/Dialect/SPIRV/LayoutUtils.cpp
+++ b/mlir/lib/Dialect/SPIRV/LayoutUtils.cpp
@@ -67,7 +67,13 @@ VulkanLayoutUtils::decorateType(spirv::StructType structType,
   size = llvm::alignTo(structMemberOffset, maxMemberAlignment);
   alignment = maxMemberAlignment;
   structType.getMemberDecorations(memberDecorations);
-  return spirv::StructType::get(memberTypes, offsetInfo, memberDecorations);
+
+  if (!structType.isIdentified())
+    return spirv::StructType::get(memberTypes, offsetInfo, memberDecorations);
+
+  // Identified structs are uniqued by identifier so it is not possible
+  // to create 2 structs with the same name but different decorations.
+  return nullptr;
 }
 
 Type VulkanLayoutUtils::decorateType(Type type, VulkanLayoutUtils::Size &size,
diff --git a/mlir/lib/Dialect/SPIRV/SPIRVDialect.cpp b/mlir/lib/Dialect/SPIRV/SPIRVDialect.cpp
index f6dd470bcffb9f..9b64e7f05b5b1c 100644
--- a/mlir/lib/Dialect/SPIRV/SPIRVDialect.cpp
+++ b/mlir/lib/Dialect/SPIRV/SPIRVDialect.cpp
@@ -23,6 +23,7 @@
 #include "mlir/Transforms/InliningUtils.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Sequence.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringSwitch.h"
@@ -589,15 +590,80 @@ static ParseResult parseStructMemberDecorations(
 }
 
 // struct-member-decoration ::= integer-literal? spirv-decoration*
-// struct-type ::= `!spv.struct<` spirv-type (`[` struct-member-decoration `]`)?
-//                     (`, ` spirv-type (`[` struct-member-decoration `]`)? `>`
+// struct-type ::=
+//             `!spv.struct<` (id `,`)?
+//                          `(`
+//                            (spirv-type (`[` struct-member-decoration `]`)?)*
+//                          `)>`
 static Type parseStructType(SPIRVDialect const &dialect,
                             DialectAsmParser &parser) {
+  // TODO: This function is quite lengthy. Break it down into smaller chunks.
+
+  // To properly resolve recursive references while parsing recursive struct
+  // types, we need to maintain a list of enclosing struct type names. This set
+  // maintains the names of struct types in which the type we are about to parse
+  // is nested.
+  //
+  // Note: This has to be thread_local to enable multiple threads to safely
+  // parse concurrently.
+  thread_local llvm::SetVector<StringRef> structContext;
+
+  static auto removeIdentifierAndFail =
+      [](llvm::SetVector<StringRef> &structContext, StringRef identifier) {
+        if (!identifier.empty())
+          structContext.remove(identifier);
+
+        return Type();
+      };
+
   if (parser.parseLess())
     return Type();
 
-  if (succeeded(parser.parseOptionalGreater()))
-    return StructType::getEmpty(dialect.getContext());
+  StringRef identifier;
+
+  // Check if this is an idenitifed struct type.
+  if (succeeded(parser.parseOptionalKeyword(&identifier))) {
+    // Check if this is a possible recursive reference.
+    if (succeeded(parser.parseOptionalGreater())) {
+      if (structContext.count(identifier) == 0) {
+        parser.emitError(
+            parser.getNameLoc(),
+            "recursive struct reference not nested in struct definition");
+
+        return Type();
+      }
+
+      return StructType::getIdentified(dialect.getContext(), identifier);
+    }
+
+    if (failed(parser.parseComma()))
+      return Type();
+
+    if (structContext.count(identifier) != 0) {
+      parser.emitError(parser.getNameLoc(),
+                       "identifier already used for an enclosing struct");
+
+      return removeIdentifierAndFail(structContext, identifier);
+    }
+
+    structContext.insert(identifier);
+  }
+
+  if (failed(parser.parseLParen()))
+    return removeIdentifierAndFail(structContext, identifier);
+
+  if (succeeded(parser.parseOptionalRParen()) &&
+      succeeded(parser.parseOptionalGreater())) {
+    if (!identifier.empty())
+      structContext.remove(identifier);
+
+    return StructType::getEmpty(dialect.getContext(), identifier);
+  }
+
+  StructType idStructTy;
+
+  if (!identifier.empty())
+    idStructTy = StructType::getIdentified(dialect.getContext(), identifier);
 
   SmallVector<Type, 4> memberTypes;
   SmallVector<StructType::OffsetInfo, 4> offsetInfo;
@@ -606,24 +672,33 @@ static Type parseStructType(SPIRVDialect const &dialect,
   do {
     Type memberType;
     if (parser.parseType(memberType))
-      return Type();
+      return removeIdentifierAndFail(structContext, identifier);
     memberTypes.push_back(memberType);
 
-    if (succeeded(parser.parseOptionalLSquare())) {
+    if (succeeded(parser.parseOptionalLSquare()))
       if (parseStructMemberDecorations(dialect, parser, memberTypes, offsetInfo,
-                                       memberDecorationInfo)) {
-        return Type();
-      }
-    }
+                                       memberDecorationInfo))
+        return removeIdentifierAndFail(structContext, identifier);
   } while (succeeded(parser.parseOptionalComma()));
 
   if (!offsetInfo.empty() && memberTypes.size() != offsetInfo.size()) {
     parser.emitError(parser.getNameLoc(),
                      "offset specification must be given for all members");
-    return Type();
+    return removeIdentifierAndFail(structContext, identifier);
   }
-  if (parser.parseGreater())
-    return Type();
+
+  if (failed(parser.parseRParen()) || failed(parser.parseGreater()))
+    return removeIdentifierAndFail(structContext, identifier);
+
+  if (!identifier.empty()) {
+    if (failed(idStructTy.trySetBody(memberTypes, offsetInfo,
+                                     memberDecorationInfo)))
+      return Type();
+
+    structContext.remove(identifier);
+    return idStructTy;
+  }
+
   return StructType::get(memberTypes, offsetInfo, memberDecorationInfo);
 }
 
@@ -689,7 +764,24 @@ static void print(ImageType type, DialectAsmPrinter &os) {
 }
 
 static void print(StructType type, DialectAsmPrinter &os) {
+  thread_local llvm::SetVector<StringRef> structContext;
+
   os << "struct<";
+
+  if (type.isIdentified()) {
+    os << type.getIdentifier();
+
+    if (structContext.count(type.getIdentifier())) {
+      os << ">";
+      return;
+    }
+
+    os << ", ";
+    structContext.insert(type.getIdentifier());
+  }
+
+  os << "(";
+
   auto printMember = [&](unsigned i) {
     os << type.getElementType(i);
     SmallVector<spirv::StructType::MemberDecorationInfo, 0> decorations;
@@ -713,7 +805,10 @@ static void print(StructType type, DialectAsmPrinter &os) {
   };
   llvm::interleaveComma(llvm::seq<unsigned>(0, type.getNumElements()), os,
                         printMember);
-  os << ">";
+  os << ")>";
+
+  if (type.isIdentified())
+    structContext.remove(type.getIdentifier());
 }
 
 static void print(CooperativeMatrixNVType type, DialectAsmPrinter &os) {
diff --git a/mlir/lib/Dialect/SPIRV/SPIRVTypes.cpp b/mlir/lib/Dialect/SPIRV/SPIRVTypes.cpp
index e9cb4b2835e57e..73e5bb17ec384d 100644
--- a/mlir/lib/Dialect/SPIRV/SPIRVTypes.cpp
+++ b/mlir/lib/Dialect/SPIRV/SPIRVTypes.cpp
@@ -759,25 +759,92 @@ Optional<int64_t> SPIRVType::getSizeInBytes() {
 // StructType
 //===----------------------------------------------------------------------===//
 
+/// Type storage for SPIR-V structure types:
+///
+/// Structures are uniqued using:
+/// - for identified structs:
+///   - a string identifier;
+/// - for literal structs:
+///   - a list of member types;
+///   - a list of member offset info;
+///   - a list of member decoration info.
+///
+/// Identified structures only have a mutable component consisting of:
+/// - a list of member types;
+/// - a list of member offset info;
+/// - a list of member decoration info.
 struct spirv::detail::StructTypeStorage : public TypeStorage {
+  /// Construct a storage object for an identified struct type. A struct type
+  /// associated with such storage must call StructType::trySetBody(...) later
+  /// in order to mutate the storage object providing the actual content.
+  StructTypeStorage(StringRef identifier)
+      : memberTypesAndIsBodySet(nullptr, false), offsetInfo(nullptr),
+        numMemberDecorations(0), memberDecorationsInfo(nullptr),
+        identifier(identifier) {}
+
+  /// Construct a storage object for a literal struct type. A struct type
+  /// associated with such storage is immutable.
   StructTypeStorage(
       unsigned numMembers, Type const *memberTypes,
       StructType::OffsetInfo const *layoutInfo, unsigned numMemberDecorations,
       StructType::MemberDecorationInfo const *memberDecorationsInfo)
-      : memberTypes(memberTypes), offsetInfo(layoutInfo),
+      : memberTypesAndIsBodySet(memberTypes, false), offsetInfo(layoutInfo),
         numMembers(numMembers), numMemberDecorations(numMemberDecorations),
-        memberDecorationsInfo(memberDecorationsInfo) {}
-
-  using KeyTy = std::tuple<ArrayRef<Type>, ArrayRef<StructType::OffsetInfo>,
-                           ArrayRef<StructType::MemberDecorationInfo>>;
+        memberDecorationsInfo(memberDecorationsInfo), identifier(StringRef()) {}
+
+  /// A storage key is divided into 2 parts:
+  /// - for identified structs:
+  ///   - a StringRef representing the struct identifier;
+  /// - for literal structs:
+  ///   - an ArrayRef<Type> for member types;
+  ///   - an ArrayRef<StructType::OffsetInfo> for member offset info;
+  ///   - an ArrayRef<StructType::MemberDecorationInfo> for member decoration
+  ///     info.
+  ///
+  /// An identified struct type is uniqued only by the first part (field 0)
+  /// of the key.
+  ///
+  /// A literal struct type is unqiued only by the second part (fields 1, 2, and
+  /// 3) of the key. The identifier field (field 0) must be empty.
+  using KeyTy =
+      std::tuple<StringRef, ArrayRef<Type>, ArrayRef<StructType::OffsetInfo>,
+                 ArrayRef<StructType::MemberDecorationInfo>>;
+
+  /// For idetified structs, return true if the given key contains the same
+  /// identifier.
+  ///
+  /// For literal structs, return true if the given key contains a matching list
+  /// of member types + offset info + decoration info.
   bool operator==(const KeyTy &key) const {
-    return key ==
-           KeyTy(getMemberTypes(), getOffsetInfo(), getMemberDecorationsInfo());
+    if (isIdentified()) {
+      // Identified types are uniqued by their identifier.
+      return getIdentifier() == std::get<0>(key);
+    }
+
+    return key == KeyTy(StringRef(), getMemberTypes(), getOffsetInfo(),
+                        getMemberDecorationsInfo());
   }
 
+  /// If the given key contains a non-empty identifier, this method constructs
+  /// an identified struct and leaves the rest of the struct type data to be set
+  /// through a later call to StructType::trySetBody(...).
+  ///
+  /// If, on the other hand, the key contains an empty identifier, a literal
+  /// struct is constructed using the other fields of the key.
   static StructTypeStorage *construct(TypeStorageAllocator &allocator,
                                       const KeyTy &key) {
-    ArrayRef<Type> keyTypes = std::get<0>(key);
+    StringRef keyIdentifier = std::get<0>(key);
+
+    if (!keyIdentifier.empty()) {
+      StringRef identifier = allocator.copyInto(keyIdentifier);
+
+      // Identified StructType body/members will be set through trySetBody(...)
+      // later.
+      return new (allocator.allocate<StructTypeStorage>())
+          StructTypeStorage(identifier);
+    }
+
+    ArrayRef<Type> keyTypes = std::get<1>(key);
 
     // Copy the member type and layout information into the bump pointer
     const Type *typesList = nullptr;
@@ -786,8 +853,8 @@ struct spirv::detail::StructTypeStorage : public TypeStorage {
     }
 
     const StructType::OffsetInfo *offsetInfoList = nullptr;
-    if (!std::get<1>(key).empty()) {
-      ArrayRef<StructType::OffsetInfo> keyOffsetInfo = std::get<1>(key);
+    if (!std::get<2>(key).empty()) {
+      ArrayRef<StructType::OffsetInfo> keyOffsetInfo = std::get<2>(key);
       assert(keyOffsetInfo.size() == keyTypes.size() &&
              "size of offset information must be same as the size of number of "
              "elements");
@@ -796,18 +863,19 @@ struct spirv::detail::StructTypeStorage : public TypeStorage {
 
     const StructType::MemberDecorationInfo *memberDecorationList = nullptr;
     unsigned numMemberDecorations = 0;
-    if (!std::get<2>(key).empty()) {
-      auto keyMemberDecorations = std::get<2>(key);
+    if (!std::get<3>(key).empty()) {
+      auto keyMemberDecorations = std::get<3>(key);
       numMemberDecorations = keyMemberDecorations.size();
       memberDecorationList = allocator.copyInto(keyMemberDecorations).data();
     }
+
     return new (allocator.allocate<StructTypeStorage>())
         StructTypeStorage(keyTypes.size(), typesList, offsetInfoList,
                           numMemberDecorations, memberDecorationList);
   }
 
   ArrayRef<Type> getMemberTypes() const {
-    return ArrayRef<Type>(memberTypes, numMembers);
+    return ArrayRef<Type>(memberTypesAndIsBodySet.getPointer(), numMembers);
   }
 
   ArrayRef<StructType::OffsetInfo> getOffsetInfo() const {
@@ -825,11 +893,61 @@ struct spirv::detail::StructTypeStorage : public TypeStorage {
     return {};
   }
 
-  Type const *memberTypes;
+  StringRef getIdentifier() const { return identifier; }
+
+  bool isIdentified() const { return !identifier.empty(); }
+
+  /// Sets the struct type content for identified structs. Calling this method
+  /// is only valid for identified structs.
+  ///
+  /// Fails under the following conditions:
+  /// - If called for a literal struct;
+  /// - If called for an identified struct whose body was set before (through a
+  /// call to this method) but with different contents from the passed
+  /// arguments.
+  LogicalResult mutate(
+      TypeStorageAllocator &allocator, ArrayRef<Type> structMemberTypes,
+      ArrayRef<StructType::OffsetInfo> structOffsetInfo,
+      ArrayRef<StructType::MemberDecorationInfo> structMemberDecorationInfo) {
+    if (!isIdentified())
+      return failure();
+
+    if (memberTypesAndIsBodySet.getInt() &&
+        (getMemberTypes() != structMemberTypes ||
+         getOffsetInfo() != structOffsetInfo ||
+         getMemberDecorationsInfo() != structMemberDecorationInfo))
+      return failure();
+
+    memberTypesAndIsBodySet.setInt(true);
+    numMembers = structMemberTypes.size();
+
+    // Copy the member type and layout information into the bump pointer.
+    if (!structMemberTypes.empty())
+      memberTypesAndIsBodySet.setPointer(
+          allocator.copyInto(structMemberTypes).data());
+
+    if (!structOffsetInfo.empty()) {
+      assert(structOffsetInfo.size() == structMemberTypes.size() &&
+             "size of offset information must be same as the size of number of "
+             "elements");
+      offsetInfo = allocator.copyInto(structOffsetInfo).data();
+    }
+
+    if (!structMemberDecorationInfo.empty()) {
+      numMemberDecorations = structMemberDecorationInfo.size();
+      memberDecorationsInfo =
+          allocator.copyInto(structMemberDecorationInfo).data();
+    }
+
+    return success();
+  }
+
+  llvm::PointerIntPair<Type const *, 1, bool> memberTypesAndIsBodySet;
   StructType::OffsetInfo const *offsetInfo;
   unsigned numMembers;
   unsigned numMemberDecorations;
   StructType::MemberDecorationInfo const *memberDecorationsInfo;
+  StringRef identifier;
 };
 
 StructType
@@ -841,25 +959,49 @@ StructType::get(ArrayRef<Type> memberTypes,
   SmallVector<StructType::MemberDecorationInfo, 4> sortedDecorations(
       memberDecorations.begin(), memberDecorations.end());
   llvm::array_pod_sort(sortedDecorations.begin(), sortedDecorations.end());
-  return Base::get(memberTypes.vec().front().getContext(), memberTypes,
-                   offsetInfo, sortedDecorations);
+  return Base::get(memberTypes.vec().front().getContext(),
+                   /*identifier=*/StringRef(), memberTypes, offsetInfo,
+                   sortedDecorations);
 }
 
-StructType StructType::getEmpty(MLIRContext *context) {
-  return Base::get(context, ArrayRef<Type>(),
+StructType StructType::getIdentified(MLIRContext *context,
+                                     StringRef identifier) {
+  assert(!identifier.empty() &&
+         "StructType identifier must be non-empty string");
+
+  return Base::get(context, identifier, ArrayRef<Type>(),
                    ArrayRef<StructType::OffsetInfo>(),
                    ArrayRef<StructType::MemberDecorationInfo>());
 }
 
+StructType StructType::getEmpty(MLIRContext *context, StringRef identifier) {
+  StructType newStructType = Base::get(
+      context, identifier, ArrayRef<Type>(), ArrayRef<StructType::OffsetInfo>(),
+      ArrayRef<StructType::MemberDecorationInfo>());
+  // Set an empty body in case this is a identified struct.
+  if (newStructType.isIdentified() &&
+      failed(newStructType.trySetBody(
+          ArrayRef<Type>(), ArrayRef<StructType::OffsetInfo>(),
+          ArrayRef<StructType::MemberDecorationInfo>())))
+    return StructType();
+
+  return newStructType;
+}
+
+StringRef StructType::getIdentifier() const { return getImpl()->identifier; }
+
+bool StructType::isIdentified() const { return getImpl()->isIdentified(); }
+
 unsigned StructType::getNumElements() const { return getImpl()->numMembers; }
 
 Type StructType::getElementType(unsigned index) const {
   assert(getNumElements() > index && "member index out of range");
-  return getImpl()->memberTypes[index];
+  return getImpl()->memberTypesAndIsBodySet.getPointer()[index];
 }
 
 StructType::ElementTypeRange StructType::getElementTypes() const {
-  return ElementTypeRange(getImpl()->memberTypes, getNumElements());
+  return ElementTypeRange(getImpl()->memberTypesAndIsBodySet.getPointer(),
+                          getNumElements());
 }
 
 bool StructType::hasOffset() const { return getImpl()->offsetInfo; }
@@ -895,6 +1037,13 @@ void StructType::getMemberDecorations(
   }
 }
 
+LogicalResult
+StructType::trySetBody(ArrayRef<Type> memberTypes,
+                       ArrayRef<OffsetInfo> offsetInfo,
+                       ArrayRef<MemberDecorationInfo> memberDecorations) {
+  return Base::mutate(memberTypes, offsetInfo, memberDecorations);
+}
+
 void StructType::getExtensions(SPIRVType::ExtensionArrayRefVector &extensions,
                                Optional<StorageClass> storage) {
   for (Type elementType : getElementTypes())
diff --git a/mlir/lib/Dialect/SPIRV/Serialization/Deserializer.cpp b/mlir/lib/Dialect/SPIRV/Serialization/Deserializer.cpp
index 33966f8b21e92d..0c56d7efb6a699 100644
--- a/mlir/lib/Dialect/SPIRV/Serialization/Deserializer.cpp
+++ b/mlir/lib/Dialect/SPIRV/Serialization/Deserializer.cpp
@@ -87,6 +87,43 @@ struct DebugLine {
 /// Map from a selection/loop's header block to its merge (and continue) target.
 using BlockMergeInfoMap = DenseMap<Block *, BlockMergeInfo>;
 
+/// A "deferred struct type" is a struct type with one or more member types not
+/// known when the Deserializer first encounters the struct. This happens, for
+/// example, with recursive structs where a pointer to the struct type is
+/// forward declared through OpTypeForwardPointer in the SPIR-V module before
+/// the struct declaration; the actual pointer to struct type should be defined
+/// later through an OpTypePointer. For example, the following C struct:
+///
+/// struct A {
+///   A* next;
+/// };
+///
+/// would be represented in the SPIR-V module as:
+///
+/// OpName %A "A"
+/// OpTypeForwardPointer %APtr Generic
+/// %A = OpTypeStruct %APtr
+/// %APtr = OpTypePointer Generic %A
+///
+/// This means that the spirv::StructType cannot be fully constructed directly
+/// when the Deserializer encounters it. Instead we create a
+/// DeferredStructTypeInfo that contains all the information we know about the
+/// spirv::StructType. Once all forward references for the struct are resolved,
+/// the struct's body is set with all member info.
+struct DeferredStructTypeInfo {
+  spirv::StructType deferredStructType;
+
+  // A list of all unresolved member types for the struct. First element of each
+  // item is operand ID, second element is member index in the struct.
+  SmallVector<std::pair<uint32_t, unsigned>, 0> unresolvedMemberTypes;
+
+  // The list of member types. For unresolved members, this list contains
+  // place-holder empty types that will be updated later.
+  SmallVector<Type, 4> memberTypes;
+  SmallVector<spirv::StructType::OffsetInfo, 0> offsetInfo;
+  SmallVector<spirv::StructType::MemberDecorationInfo, 0> memberDecorationsInfo;
+};
+
 /// A SPIR-V module serializer.
 ///
 /// A SPIR-V binary module is a single linear stream of instructions; each
@@ -224,6 +261,8 @@ class Deserializer {
   /// registers the type into `module`.
   LogicalResult processType(spirv::Opcode opcode, ArrayRef<uint32_t> operands);
 
+  LogicalResult processOpTypePointer(ArrayRef<uint32_t> operands);
+
   LogicalResult processArrayType(ArrayRef<uint32_t> operands);
 
   LogicalResult processCooperativeMatrixType(ArrayRef<uint32_t> operands);
@@ -387,6 +426,8 @@ class Deserializer {
   /// insertion point.
   LogicalResult processUndef(ArrayRef<uint32_t> operands);
 
+  LogicalResult processTypeForwardPointer(ArrayRef<uint32_t> operands);
+
   /// Method to dispatch to the specialized deserialization function for an
   /// operation in SPIR-V dialect that is a mirror of an instruction in the
   /// SPIR-V spec. This is auto-generated from ODS. Dispatch is handled for
@@ -528,6 +569,13 @@ class Deserializer {
   // processed.
   SmallVector<std::pair<spirv::Opcode, ArrayRef<uint32_t>>, 4>
       deferredInstructions;
+
+  /// A list of IDs for all types forward-declared through OpTypeForwardPointer
+  /// instructions.
+  llvm::SetVector<uint32_t> typeForwardPointerIDs;
+
+  /// A list of all structs which have unresolved member types.
+  SmallVector<DeferredStructTypeInfo, 0> deferredStructTypesInfos;
 };
 } // namespace
 
@@ -1165,16 +1213,7 @@ LogicalResult Deserializer::processType(spirv::Opcode opcode,
     typeMap[operands[0]] = VectorType::get({operands[2]}, elementTy);
   } break;
   case spirv::Opcode::OpTypePointer: {
-    if (operands.size() != 3) {
-      return emitError(unknownLoc, "OpTypePointer must have two parameters");
-    }
-    auto pointeeType = getType(operands[2]);
-    if (!pointeeType) {
-      return emitError(unknownLoc, "unknown OpTypePointer pointee type <id> ")
-             << operands[2];
-    }
-    auto storageClass = static_cast<spirv::StorageClass>(operands[1]);
-    typeMap[operands[0]] = spirv::PointerType::get(pointeeType, storageClass);
+    return processOpTypePointer(operands);
   } break;
   case spirv::Opcode::OpTypeArray:
     return processArrayType(operands);
@@ -1194,6 +1233,59 @@ LogicalResult Deserializer::processType(spirv::Opcode opcode,
   return success();
 }
 
+LogicalResult Deserializer::processOpTypePointer(ArrayRef<uint32_t> operands) {
+  if (operands.size() != 3)
+    return emitError(unknownLoc, "OpTypePointer must have two parameters");
+
+  auto pointeeType = getType(operands[2]);
+  if (!pointeeType)
+    return emitError(unknownLoc, "unknown OpTypePointer pointee type <id> ")
+           << operands[2];
+
+  uint32_t typePointerID = operands[0];
+  auto storageClass = static_cast<spirv::StorageClass>(operands[1]);
+  typeMap[typePointerID] = spirv::PointerType::get(pointeeType, storageClass);
+
+  for (auto *deferredStructIt = std::begin(deferredStructTypesInfos);
+       deferredStructIt != std::end(deferredStructTypesInfos);) {
+    for (auto *unresolvedMemberIt =
+             std::begin(deferredStructIt->unresolvedMemberTypes);
+         unresolvedMemberIt !=
+         std::end(deferredStructIt->unresolvedMemberTypes);) {
+      if (unresolvedMemberIt->first == typePointerID) {
+        // The newly constructed pointer type can resolve one of the
+        // deferred struct type members; update the memberTypes list and
+        // clean the unresolvedMemberTypes list accordingly.
+        deferredStructIt->memberTypes[unresolvedMemberIt->second] =
+            typeMap[typePointerID];
+        unresolvedMemberIt =
+            deferredStructIt->unresolvedMemberTypes.erase(unresolvedMemberIt);
+      } else {
+        ++unresolvedMemberIt;
+      }
+    }
+
+    if (deferredStructIt->unresolvedMemberTypes.empty()) {
+      // All deferred struct type members are now resolved, set the struct body.
+      auto structType = deferredStructIt->deferredStructType;
+
+      assert(structType && "expected a spirv::StructType");
+      assert(structType.isIdentified() && "expected an indentified struct");
+
+      if (failed(structType.trySetBody(
+              deferredStructIt->memberTypes, deferredStructIt->offsetInfo,
+              deferredStructIt->memberDecorationsInfo)))
+        return failure();
+
+      deferredStructIt = deferredStructTypesInfos.erase(deferredStructIt);
+    } else {
+      ++deferredStructIt;
+    }
+  }
+
+  return success();
+}
+
 LogicalResult Deserializer::processArrayType(ArrayRef<uint32_t> operands) {
   if (operands.size() != 3) {
     return emitError(unknownLoc,
@@ -1297,22 +1389,34 @@ Deserializer::processRuntimeArrayType(ArrayRef<uint32_t> operands) {
 }
 
 LogicalResult Deserializer::processStructType(ArrayRef<uint32_t> operands) {
+  // TODO: Find a way to handle identified structs when debug info is stripped.
+
   if (operands.empty()) {
     return emitError(unknownLoc, "OpTypeStruct must have at least result <id>");
   }
+
   if (operands.size() == 1) {
     // Handle empty struct.
-    typeMap[operands[0]] = spirv::StructType::getEmpty(context);
+    typeMap[operands[0]] =
+        spirv::StructType::getEmpty(context, nameMap.lookup(operands[0]).str());
     return success();
   }
 
-  SmallVector<Type, 0> memberTypes;
+  // First element is operand ID, second element is member index in the struct.
+  SmallVector<std::pair<uint32_t, unsigned>, 0> unresolvedMemberTypes;
+  SmallVector<Type, 4> memberTypes;
+
   for (auto op : llvm::drop_begin(operands, 1)) {
     Type memberType = getType(op);
-    if (!memberType) {
+    bool typeForwardPtr = (typeForwardPointerIDs.count(op) != 0);
+
+    if (!memberType && !typeForwardPtr)
       return emitError(unknownLoc, "OpTypeStruct references undefined <id> ")
              << op;
-    }
+
+    if (!memberType)
+      unresolvedMemberTypes.emplace_back(op, memberTypes.size());
+
     memberTypes.push_back(memberType);
   }
 
@@ -1344,8 +1448,28 @@ LogicalResult Deserializer::processStructType(ArrayRef<uint32_t> operands) {
       }
     }
   }
-  typeMap[operands[0]] =
-      spirv::StructType::get(memberTypes, offsetInfo, memberDecorationsInfo);
+
+  uint32_t structID = operands[0];
+  std::string structIdentifier = nameMap.lookup(structID).str();
+
+  if (structIdentifier.empty()) {
+    assert(unresolvedMemberTypes.empty() &&
+           "didn't expect unresolved member types");
+    typeMap[structID] =
+        spirv::StructType::get(memberTypes, offsetInfo, memberDecorationsInfo);
+  } else {
+    auto structTy = spirv::StructType::getIdentified(context, structIdentifier);
+    typeMap[structID] = structTy;
+
+    if (!unresolvedMemberTypes.empty())
+      deferredStructTypesInfos.push_back({structTy, unresolvedMemberTypes,
+                                          memberTypes, offsetInfo,
+                                          memberDecorationsInfo});
+    else if (failed(structTy.trySetBody(memberTypes, offsetInfo,
+                                        memberDecorationsInfo)))
+      return failure();
+  }
+
   // TODO: Update StructType to have member name as attribute as
   // well.
   return success();
@@ -2359,6 +2483,8 @@ LogicalResult Deserializer::processInstruction(spirv::Opcode opcode,
     return processPhi(operands);
   case spirv::Opcode::OpUndef:
     return processUndef(operands);
+  case spirv::Opcode::OpTypeForwardPointer:
+    return processTypeForwardPointer(operands);
   default:
     break;
   }
@@ -2377,6 +2503,19 @@ LogicalResult Deserializer::processUndef(ArrayRef<uint32_t> operands) {
   return success();
 }
 
+LogicalResult
+Deserializer::processTypeForwardPointer(ArrayRef<uint32_t> operands) {
+  if (operands.size() != 2)
+    return emitError(unknownLoc,
+                     "OpTypeForwardPointer instruction must have two operands");
+
+  typeForwardPointerIDs.insert(operands[0]);
+  // TODO: Use the 2nd operand (Storage Class) to validate the OpTypePointer
+  // instruction that defines the actual type.
+
+  return success();
+}
+
 LogicalResult Deserializer::processExtInst(ArrayRef<uint32_t> operands) {
   if (operands.size() < 4) {
     return emitError(unknownLoc,
diff --git a/mlir/lib/Dialect/SPIRV/Serialization/Serializer.cpp b/mlir/lib/Dialect/SPIRV/Serialization/Serializer.cpp
index 426c838a7e5ddf..6dea43452ec76f 100644
--- a/mlir/lib/Dialect/SPIRV/Serialization/Serializer.cpp
+++ b/mlir/lib/Dialect/SPIRV/Serialization/Serializer.cpp
@@ -22,6 +22,7 @@
 #include "mlir/Support/LogicalResult.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/Sequence.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
@@ -252,12 +253,16 @@ class Serializer {
   /// Main dispatch method for serializing a type. The result <id> of the
   /// serialized type will be returned as `typeID`.
   LogicalResult processType(Location loc, Type type, uint32_t &typeID);
+  LogicalResult processTypeImpl(Location loc, Type type, uint32_t &typeID,
+                                llvm::SetVector<StringRef> &serializationCtx);
 
   /// Method for preparing basic SPIR-V type serialization. Returns the type's
   /// opcode and operands for the instruction via `typeEnum` and `operands`.
   LogicalResult prepareBasicType(Location loc, Type type, uint32_t resultID,
                                  spirv::Opcode &typeEnum,
-                                 SmallVectorImpl<uint32_t> &operands);
+                                 SmallVectorImpl<uint32_t> &operands,
+                                 bool &deferSerialization,
+                                 llvm::SetVector<StringRef> &serializationCtx);
 
   LogicalResult prepareFunctionType(Location loc, FunctionType type,
                                     spirv::Opcode &typeEnum,
@@ -424,6 +429,20 @@ class Serializer {
   SmallVector<uint32_t, 0> typesGlobalValues;
   SmallVector<uint32_t, 0> functions;
 
+  /// Recursive struct references are serialized as OpTypePointer instructions
+  /// to the recursive struct type. However, the OpTypePointer instruction
+  /// cannot be emitted before the recursive struct's OpTypeStruct.
+  /// RecursiveStructPointerInfo stores the data needed to emit such
+  /// OpTypePointer instructions after forward references to such types.
+  struct RecursiveStructPointerInfo {
+    uint32_t pointerTypeID;
+    spirv::StorageClass storageClass;
+  };
+
+  // Maps spirv::StructType to its recursive reference member info.
+  DenseMap<Type, SmallVector<RecursiveStructPointerInfo, 0>>
+      recursiveStructInfos;
+
   /// `functionHeader` contains all the instructions that must be in the first
   /// block in the function, and `functionBody` contains the rest. After
   /// processing FuncOp, the encoded instructions of a function are appended to
@@ -1013,28 +1032,70 @@ bool Serializer::isInterfaceStructPtrType(Type type) const {
 
 LogicalResult Serializer::processType(Location loc, Type type,
                                       uint32_t &typeID) {
+  // Maintains a set of names for nested identified struct types. This is used
+  // to properly seialize resursive references.
+  llvm::SetVector<StringRef> serializationCtx;
+  return processTypeImpl(loc, type, typeID, serializationCtx);
+}
+
+LogicalResult
+Serializer::processTypeImpl(Location loc, Type type, uint32_t &typeID,
+                            llvm::SetVector<StringRef> &serializationCtx) {
   typeID = getTypeID(type);
   if (typeID) {
     return success();
   }
   typeID = getNextID();
   SmallVector<uint32_t, 4> operands;
+
   operands.push_back(typeID);
   auto typeEnum = spirv::Opcode::OpTypeVoid;
+  bool deferSerialization = false;
+
   if ((type.isa<FunctionType>() &&
        succeeded(prepareFunctionType(loc, type.cast<FunctionType>(), typeEnum,
                                      operands))) ||
-      succeeded(prepareBasicType(loc, type, typeID, typeEnum, operands))) {
+      succeeded(prepareBasicType(loc, type, typeID, typeEnum, operands,
+                                 deferSerialization, serializationCtx))) {
+    if (deferSerialization)
+      return success();
+
     typeIDMap[type] = typeID;
-    return encodeInstructionInto(typesGlobalValues, typeEnum, operands);
+
+    if (failed(encodeInstructionInto(typesGlobalValues, typeEnum, operands)))
+      return failure();
+
+    if (recursiveStructInfos.count(type) != 0) {
+      // This recursive struct type is emitted already, now the OpTypePointer
+      // instructions referring to recursive references are emitted as well.
+      for (auto &ptrInfo : recursiveStructInfos[type]) {
+        // TODO: This might not work if more than 1 recursive reference is
+        // present in the struct.
+        SmallVector<uint32_t, 4> ptrOperands;
+        ptrOperands.push_back(ptrInfo.pointerTypeID);
+        ptrOperands.push_back(static_cast<uint32_t>(ptrInfo.storageClass));
+        ptrOperands.push_back(typeIDMap[type]);
+
+        if (failed(encodeInstructionInto(
+                typesGlobalValues, spirv::Opcode::OpTypePointer, ptrOperands)))
+          return failure();
+      }
+
+      recursiveStructInfos[type].clear();
+    }
+
+    return success();
   }
+
   return failure();
 }
 
-LogicalResult
-Serializer::prepareBasicType(Location loc, Type type, uint32_t resultID,
-                             spirv::Opcode &typeEnum,
-                             SmallVectorImpl<uint32_t> &operands) {
+LogicalResult Serializer::prepareBasicType(
+    Location loc, Type type, uint32_t resultID, spirv::Opcode &typeEnum,
+    SmallVectorImpl<uint32_t> &operands, bool &deferSerialization,
+    llvm::SetVector<StringRef> &serializationCtx) {
+  deferSerialization = false;
+
   if (isVoidType(type)) {
     typeEnum = spirv::Opcode::OpTypeVoid;
     return success();
@@ -1064,7 +1125,8 @@ Serializer::prepareBasicType(Location loc, Type type, uint32_t resultID,
 
   if (auto vectorType = type.dyn_cast<VectorType>()) {
     uint32_t elementTypeID = 0;
-    if (failed(processType(loc, vectorType.getElementType(), elementTypeID))) {
+    if (failed(processTypeImpl(loc, vectorType.getElementType(), elementTypeID,
+                               serializationCtx))) {
       return failure();
     }
     typeEnum = spirv::Opcode::OpTypeVector;
@@ -1076,7 +1138,8 @@ Serializer::prepareBasicType(Location loc, Type type, uint32_t resultID,
   if (auto arrayType = type.dyn_cast<spirv::ArrayType>()) {
     typeEnum = spirv::Opcode::OpTypeArray;
     uint32_t elementTypeID = 0;
-    if (failed(processType(loc, arrayType.getElementType(), elementTypeID))) {
+    if (failed(processTypeImpl(loc, arrayType.getElementType(), elementTypeID,
+                               serializationCtx))) {
       return failure();
     }
     operands.push_back(elementTypeID);
@@ -1089,9 +1152,45 @@ Serializer::prepareBasicType(Location loc, Type type, uint32_t resultID,
 
   if (auto ptrType = type.dyn_cast<spirv::PointerType>()) {
     uint32_t pointeeTypeID = 0;
-    if (failed(processType(loc, ptrType.getPointeeType(), pointeeTypeID))) {
-      return failure();
+    spirv::StructType pointeeStruct =
+        ptrType.getPointeeType().dyn_cast<spirv::StructType>();
+
+    if (pointeeStruct && pointeeStruct.isIdentified() &&
+        serializationCtx.count(pointeeStruct.getIdentifier()) != 0) {
+      // A recursive reference to an enclosing struct is found.
+      //
+      // 1. Prepare an OpTypeForwardPointer with resultID and the ptr storage
+      // class as operands.
+      SmallVector<uint32_t, 2> forwardPtrOperands;
+      forwardPtrOperands.push_back(resultID);
+      forwardPtrOperands.push_back(
+          static_cast<uint32_t>(ptrType.getStorageClass()));
+
+      encodeInstructionInto(typesGlobalValues,
+                            spirv::Opcode::OpTypeForwardPointer,
+                            forwardPtrOperands);
+
+      // 2. Find the the pointee (enclosing) struct.
+      auto structType = spirv::StructType::getIdentified(
+          module.getContext(), pointeeStruct.getIdentifier());
+
+      if (!structType)
+        return failure();
+
+      // 3. Mark the OpTypePointer that is supposed to be emitted by this call
+      // as deferred.
+      deferSerialization = true;
+
+      // 4. Record the info needed to emit the deferred OpTypePointer
+      // instruction when the enclosing struct is completely serialized.
+      recursiveStructInfos[structType].push_back(
+          {resultID, ptrType.getStorageClass()});
+    } else {
+      if (failed(processTypeImpl(loc, ptrType.getPointeeType(), pointeeTypeID,
+                                 serializationCtx)))
+        return failure();
     }
+
     typeEnum = spirv::Opcode::OpTypePointer;
     operands.push_back(static_cast<uint32_t>(ptrType.getStorageClass()));
     operands.push_back(pointeeTypeID);
@@ -1100,8 +1199,8 @@ Serializer::prepareBasicType(Location loc, Type type, uint32_t resultID,
 
   if (auto runtimeArrayType = type.dyn_cast<spirv::RuntimeArrayType>()) {
     uint32_t elementTypeID = 0;
-    if (failed(processType(loc, runtimeArrayType.getElementType(),
-                           elementTypeID))) {
+    if (failed(processTypeImpl(loc, runtimeArrayType.getElementType(),
+                               elementTypeID, serializationCtx))) {
       return failure();
     }
     typeEnum = spirv::Opcode::OpTypeRuntimeArray;
@@ -1110,12 +1209,17 @@ Serializer::prepareBasicType(Location loc, Type type, uint32_t resultID,
   }
 
   if (auto structType = type.dyn_cast<spirv::StructType>()) {
+    if (structType.isIdentified()) {
+      processName(resultID, structType.getIdentifier());
+      serializationCtx.insert(structType.getIdentifier());
+    }
+
     bool hasOffset = structType.hasOffset();
     for (auto elementIndex :
          llvm::seq<uint32_t>(0, structType.getNumElements())) {
       uint32_t elementTypeID = 0;
-      if (failed(processType(loc, structType.getElementType(elementIndex),
-                             elementTypeID))) {
+      if (failed(processTypeImpl(loc, structType.getElementType(elementIndex),
+                                 elementTypeID, serializationCtx))) {
         return failure();
       }
       operands.push_back(elementTypeID);
@@ -1133,6 +1237,7 @@ Serializer::prepareBasicType(Location loc, Type type, uint32_t resultID,
     }
     SmallVector<spirv::StructType::MemberDecorationInfo, 4> memberDecorations;
     structType.getMemberDecorations(memberDecorations);
+
     for (auto &memberDecoration : memberDecorations) {
       if (failed(processMemberDecoration(resultID, memberDecoration))) {
         return emitError(loc, "cannot decorate ")
@@ -1141,15 +1246,20 @@ Serializer::prepareBasicType(Location loc, Type type, uint32_t resultID,
                << stringifyDecoration(memberDecoration.decoration);
       }
     }
+
     typeEnum = spirv::Opcode::OpTypeStruct;
+
+    if (structType.isIdentified())
+      serializationCtx.remove(structType.getIdentifier());
+
     return success();
   }
 
   if (auto cooperativeMatrixType =
           type.dyn_cast<spirv::CooperativeMatrixNVType>()) {
     uint32_t elementTypeID = 0;
-    if (failed(processType(loc, cooperativeMatrixType.getElementType(),
-                           elementTypeID))) {
+    if (failed(processTypeImpl(loc, cooperativeMatrixType.getElementType(),
+                               elementTypeID, serializationCtx))) {
       return failure();
     }
     typeEnum = spirv::Opcode::OpTypeCooperativeMatrixNV;
@@ -1167,7 +1277,8 @@ Serializer::prepareBasicType(Location loc, Type type, uint32_t resultID,
 
   if (auto matrixType = type.dyn_cast<spirv::MatrixType>()) {
     uint32_t elementTypeID = 0;
-    if (failed(processType(loc, matrixType.getColumnType(), elementTypeID))) {
+    if (failed(processTypeImpl(loc, matrixType.getColumnType(), elementTypeID,
+                               serializationCtx))) {
       return failure();
     }
     typeEnum = spirv::Opcode::OpTypeMatrix;
diff --git a/mlir/lib/Dialect/SPIRV/Transforms/DecorateSPIRVCompositeTypeLayoutPass.cpp b/mlir/lib/Dialect/SPIRV/Transforms/DecorateSPIRVCompositeTypeLayoutPass.cpp
index b14bdd152b58b2..5051b54f532fa8 100644
--- a/mlir/lib/Dialect/SPIRV/Transforms/DecorateSPIRVCompositeTypeLayoutPass.cpp
+++ b/mlir/lib/Dialect/SPIRV/Transforms/DecorateSPIRVCompositeTypeLayoutPass.cpp
@@ -35,6 +35,10 @@ class SPIRVGlobalVariableOpLayoutInfoDecoration
     auto ptrType = op.type().cast<spirv::PointerType>();
     auto structType = VulkanLayoutUtils::decorateType(
         ptrType.getPointeeType().cast<spirv::StructType>());
+
+    if (!structType)
+      return failure();
+
     auto decoratedType =
         spirv::PointerType::get(structType, ptrType.getStorageClass());
 
diff --git a/mlir/lib/Dialect/SPIRV/Transforms/LowerABIAttributesPass.cpp b/mlir/lib/Dialect/SPIRV/Transforms/LowerABIAttributesPass.cpp
index 24bb5f8c4bfa49..000fa9dd2d8fc6 100644
--- a/mlir/lib/Dialect/SPIRV/Transforms/LowerABIAttributesPass.cpp
+++ b/mlir/lib/Dialect/SPIRV/Transforms/LowerABIAttributesPass.cpp
@@ -53,6 +53,10 @@ createGlobalVarForEntryPointArgument(OpBuilder &builder, spirv::FuncOp funcOp,
   // Set the offset information.
   varPointeeType =
       VulkanLayoutUtils::decorateType(varPointeeType).cast<spirv::StructType>();
+
+  if (!varPointeeType)
+    return nullptr;
+
   varType =
       spirv::PointerType::get(varPointeeType, varPtrType.getStorageClass());
 
diff --git a/mlir/test/Conversion/GPUToSPIRV/if.mlir b/mlir/test/Conversion/GPUToSPIRV/if.mlir
index 9651946118a675..2efde9af1537ec 100644
--- a/mlir/test/Conversion/GPUToSPIRV/if.mlir
+++ b/mlir/test/Conversion/GPUToSPIRV/if.mlir
@@ -133,20 +133,20 @@ module attributes {
     // VariablePointer capability is supported. This test is still useful to
     // make sure we can handle scf op result with type change.
     // CHECK-LABEL: @simple_if_yield_type_change
-    // CHECK:       %[[VAR:.*]] = spv.Variable : !spv.ptr<!spv.ptr<!spv.struct<!spv.array<10 x f32, stride=4> [0]>, StorageBuffer>, Function>
+    // CHECK:       %[[VAR:.*]] = spv.Variable : !spv.ptr<!spv.ptr<!spv.struct<(!spv.array<10 x f32, stride=4> [0])>, StorageBuffer>, Function>
     // CHECK:       spv.selection {
     // CHECK-NEXT:    spv.BranchConditional {{%.*}}, [[TRUE:\^.*]], [[FALSE:\^.*]]
     // CHECK-NEXT:  [[TRUE]]:
-    // CHECK:         spv.Store "Function" %[[VAR]], {{%.*}} : !spv.ptr<!spv.struct<!spv.array<10 x f32, stride=4> [0]>, StorageBuffer>
+    // CHECK:         spv.Store "Function" %[[VAR]], {{%.*}} : !spv.ptr<!spv.struct<(!spv.array<10 x f32, stride=4> [0])>, StorageBuffer>
     // CHECK:         spv.Branch ^[[MERGE:.*]]
     // CHECK-NEXT:  [[FALSE]]:
-    // CHECK:         spv.Store "Function" %[[VAR]], {{%.*}} : !spv.ptr<!spv.struct<!spv.array<10 x f32, stride=4> [0]>, StorageBuffer>
+    // CHECK:         spv.Store "Function" %[[VAR]], {{%.*}} : !spv.ptr<!spv.struct<(!spv.array<10 x f32, stride=4> [0])>, StorageBuffer>
     // CHECK:         spv.Branch ^[[MERGE]]
     // CHECK-NEXT:  ^[[MERGE]]:
     // CHECK:         spv._merge
     // CHECK-NEXT:  }
-    // CHECK:       %[[OUT:.*]] = spv.Load "Function" %[[VAR]] : !spv.ptr<!spv.struct<!spv.array<10 x f32, stride=4> [0]>, StorageBuffer>
-    // CHECK:       %[[ADD:.*]] = spv.AccessChain %[[OUT]][{{%.*}}, {{%.*}}] : !spv.ptr<!spv.struct<!spv.array<10 x f32, stride=4> [0]>, StorageBuffer>
+    // CHECK:       %[[OUT:.*]] = spv.Load "Function" %[[VAR]] : !spv.ptr<!spv.struct<(!spv.array<10 x f32, stride=4> [0])>, StorageBuffer>
+    // CHECK:       %[[ADD:.*]] = spv.AccessChain %[[OUT]][{{%.*}}, {{%.*}}] : !spv.ptr<!spv.struct<(!spv.array<10 x f32, stride=4> [0])>, StorageBuffer>
     // CHECK:       spv.Store "StorageBuffer" %[[ADD]], {{%.*}} : f32
     // CHECK:       spv.Return
     gpu.func @simple_if_yield_type_change(%arg2 : memref<10xf32>, %arg3 : memref<10xf32>, %arg4 : i1) kernel
diff --git a/mlir/test/Conversion/GPUToSPIRV/load-store.mlir b/mlir/test/Conversion/GPUToSPIRV/load-store.mlir
index b9ae8bdfeacdca..4db58c0a0f946c 100644
--- a/mlir/test/Conversion/GPUToSPIRV/load-store.mlir
+++ b/mlir/test/Conversion/GPUToSPIRV/load-store.mlir
@@ -25,9 +25,9 @@ module attributes {
     // CHECK-DAG: spv.globalVariable @[[$LOCALINVOCATIONIDVAR:.*]] built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input>
     // CHECK-DAG: spv.globalVariable @[[$WORKGROUPIDVAR:.*]] built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input>
     // CHECK-LABEL:    spv.func @load_store_kernel
-    // CHECK-SAME: %[[ARG0:.*]]: !spv.ptr<!spv.struct<!spv.array<48 x f32, stride=4> [0]>, StorageBuffer> {spv.interface_var_abi = #spv.interface_var_abi<(0, 0)>}
-    // CHECK-SAME: %[[ARG1:.*]]: !spv.ptr<!spv.struct<!spv.array<48 x f32, stride=4> [0]>, StorageBuffer> {spv.interface_var_abi = #spv.interface_var_abi<(0, 1)>}
-    // CHECK-SAME: %[[ARG2:.*]]: !spv.ptr<!spv.struct<!spv.array<48 x f32, stride=4> [0]>, StorageBuffer> {spv.interface_var_abi = #spv.interface_var_abi<(0, 2)>}
+    // CHECK-SAME: %[[ARG0:.*]]: !spv.ptr<!spv.struct<(!spv.array<48 x f32, stride=4> [0])>, StorageBuffer> {spv.interface_var_abi = #spv.interface_var_abi<(0, 0)>}
+    // CHECK-SAME: %[[ARG1:.*]]: !spv.ptr<!spv.struct<(!spv.array<48 x f32, stride=4> [0])>, StorageBuffer> {spv.interface_var_abi = #spv.interface_var_abi<(0, 1)>}
+    // CHECK-SAME: %[[ARG2:.*]]: !spv.ptr<!spv.struct<(!spv.array<48 x f32, stride=4> [0])>, StorageBuffer> {spv.interface_var_abi = #spv.interface_var_abi<(0, 2)>}
     // CHECK-SAME: %[[ARG3:.*]]: i32 {spv.interface_var_abi = #spv.interface_var_abi<(0, 3), StorageBuffer>}
     // CHECK-SAME: %[[ARG4:.*]]: i32 {spv.interface_var_abi = #spv.interface_var_abi<(0, 4), StorageBuffer>}
     // CHECK-SAME: %[[ARG5:.*]]: i32 {spv.interface_var_abi = #spv.interface_var_abi<(0, 5), StorageBuffer>}
diff --git a/mlir/test/Conversion/GPUToSPIRV/module-structure-opencl.mlir b/mlir/test/Conversion/GPUToSPIRV/module-structure-opencl.mlir
index 0e2a45f9bf3cb3..6b188263ed8b43 100644
--- a/mlir/test/Conversion/GPUToSPIRV/module-structure-opencl.mlir
+++ b/mlir/test/Conversion/GPUToSPIRV/module-structure-opencl.mlir
@@ -9,7 +9,7 @@ module attributes {
     //       CHECK:   spv.func
     //  CHECK-SAME:     {{%.*}}: f32
     //   CHECK-NOT:     spv.interface_var_abi
-    //  CHECK-SAME:     {{%.*}}: !spv.ptr<!spv.struct<!spv.array<12 x f32, stride=4> [0]>, CrossWorkgroup>
+    //  CHECK-SAME:     {{%.*}}: !spv.ptr<!spv.struct<(!spv.array<12 x f32, stride=4> [0])>, CrossWorkgroup>
     //   CHECK-NOT:     spv.interface_var_abi
     //  CHECK-SAME:     spv.entry_point_abi = {local_size = dense<[32, 4, 1]> : vector<3xi32>}
     gpu.func @basic_module_structure(%arg0 : f32, %arg1 : memref<12xf32, 11>) kernel
diff --git a/mlir/test/Conversion/GPUToSPIRV/simple.mlir b/mlir/test/Conversion/GPUToSPIRV/simple.mlir
index 0c25c296efa20f..89e6f52e655dbd 100644
--- a/mlir/test/Conversion/GPUToSPIRV/simple.mlir
+++ b/mlir/test/Conversion/GPUToSPIRV/simple.mlir
@@ -5,7 +5,7 @@ module attributes {gpu.container_module} {
     // CHECK:       spv.module @{{.*}} Logical GLSL450 {
     // CHECK-LABEL: spv.func @basic_module_structure
     // CHECK-SAME: {{%.*}}: f32 {spv.interface_var_abi = #spv.interface_var_abi<(0, 0), StorageBuffer>}
-    // CHECK-SAME: {{%.*}}: !spv.ptr<!spv.struct<!spv.array<12 x f32, stride=4> [0]>, StorageBuffer> {spv.interface_var_abi = #spv.interface_var_abi<(0, 1)>}
+    // CHECK-SAME: {{%.*}}: !spv.ptr<!spv.struct<(!spv.array<12 x f32, stride=4> [0])>, StorageBuffer> {spv.interface_var_abi = #spv.interface_var_abi<(0, 1)>}
     // CHECK-SAME: spv.entry_point_abi = {local_size = dense<[32, 4, 1]> : vector<3xi32>}
     gpu.func @basic_module_structure(%arg0 : f32, %arg1 : memref<12xf32>) kernel
       attributes {spv.entry_point_abi = {local_size = dense<[32, 4, 1]>: vector<3xi32>}} {
@@ -32,7 +32,7 @@ module attributes {gpu.container_module} {
     // CHECK-LABEL: spv.func @basic_module_structure_preset_ABI
     // CHECK-SAME: {{%[a-zA-Z0-9_]*}}: f32
     // CHECK-SAME: spv.interface_var_abi = #spv.interface_var_abi<(1, 2), StorageBuffer>
-    // CHECK-SAME: !spv.ptr<!spv.struct<!spv.array<12 x f32, stride=4> [0]>, StorageBuffer>
+    // CHECK-SAME: !spv.ptr<!spv.struct<(!spv.array<12 x f32, stride=4> [0])>, StorageBuffer>
     // CHECK-SAME: spv.interface_var_abi = #spv.interface_var_abi<(3, 0)>
     // CHECK-SAME: spv.entry_point_abi = {local_size = dense<[32, 4, 1]> : vector<3xi32>}
     gpu.func @basic_module_structure_preset_ABI(
diff --git a/mlir/test/Conversion/GPUToVulkan/lower-gpu-launch-vulkan-launch.mlir b/mlir/test/Conversion/GPUToVulkan/lower-gpu-launch-vulkan-launch.mlir
index 43da9c5be42943..4a592811f1de7f 100644
--- a/mlir/test/Conversion/GPUToVulkan/lower-gpu-launch-vulkan-launch.mlir
+++ b/mlir/test/Conversion/GPUToVulkan/lower-gpu-launch-vulkan-launch.mlir
@@ -6,12 +6,12 @@
 
 module attributes {gpu.container_module} {
   spv.module Logical GLSL450 requires #spv.vce<v1.0, [Shader], [SPV_KHR_storage_buffer_storage_class]> {
-    spv.globalVariable @kernel_arg_0 bind(0, 0) : !spv.ptr<!spv.struct<!spv.array<12 x f32, stride=4> [0]>, StorageBuffer>
+    spv.globalVariable @kernel_arg_0 bind(0, 0) : !spv.ptr<!spv.struct<(!spv.array<12 x f32, stride=4> [0])>, StorageBuffer>
     spv.func @kernel() "None" attributes {workgroup_attributions = 0 : i64} {
-      %0 = spv._address_of @kernel_arg_0 : !spv.ptr<!spv.struct<!spv.array<12 x f32, stride=4> [0]>, StorageBuffer>
+      %0 = spv._address_of @kernel_arg_0 : !spv.ptr<!spv.struct<(!spv.array<12 x f32, stride=4> [0])>, StorageBuffer>
       %2 = spv.constant 0 : i32
-      %3 = spv._address_of @kernel_arg_0 : !spv.ptr<!spv.struct<!spv.array<12 x f32, stride=4> [0]>, StorageBuffer>
-      %4 = spv.AccessChain %0[%2, %2] : !spv.ptr<!spv.struct<!spv.array<12 x f32, stride=4> [0]>, StorageBuffer>, i32, i32
+      %3 = spv._address_of @kernel_arg_0 : !spv.ptr<!spv.struct<(!spv.array<12 x f32, stride=4> [0])>, StorageBuffer>
+      %4 = spv.AccessChain %0[%2, %2] : !spv.ptr<!spv.struct<(!spv.array<12 x f32, stride=4> [0])>, StorageBuffer>, i32, i32
       %5 = spv.Load "StorageBuffer" %4 : f32
       spv.Return
     }
diff --git a/mlir/test/Conversion/SPIRVToLLVM/memory-ops-to-llvm.mlir b/mlir/test/Conversion/SPIRVToLLVM/memory-ops-to-llvm.mlir
index c5e498a06f56ea..9f5d534fc39933 100644
--- a/mlir/test/Conversion/SPIRVToLLVM/memory-ops-to-llvm.mlir
+++ b/mlir/test/Conversion/SPIRVToLLVM/memory-ops-to-llvm.mlir
@@ -8,10 +8,10 @@
 spv.func @access_chain() "None" {
   // CHECK: %[[ONE:.*]] = llvm.mlir.constant(1 : i32) : !llvm.i32
   %0 = spv.constant 1: i32
-  %1 = spv.Variable : !spv.ptr<!spv.struct<f32, !spv.array<4xf32>>, Function>
+  %1 = spv.Variable : !spv.ptr<!spv.struct<(f32, !spv.array<4xf32>)>, Function>
   // CHECK: %[[ZERO:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32
   // CHECK: llvm.getelementptr %{{.*}}[%[[ZERO]], %[[ONE]], %[[ONE]]] : (!llvm.ptr<struct<packed (float, array<4 x float>)>>, !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm.ptr<float>
-  %2 = spv.AccessChain %1[%0, %0] : !spv.ptr<!spv.struct<f32, !spv.array<4xf32>>, Function>, i32, i32
+  %2 = spv.AccessChain %1[%0, %0] : !spv.ptr<!spv.struct<(f32, !spv.array<4xf32>)>, Function>, i32, i32
   spv.Return
 }
 
@@ -38,9 +38,9 @@ spv.module Logical GLSL450 {
   //       CHECK: llvm.mlir.global private @struct() : !llvm.struct<packed (float, array<10 x float>)>
   // CHECK-LABEL: @func
   //       CHECK:   llvm.mlir.addressof @struct : !llvm.ptr<struct<packed (float, array<10 x float>)>>
-  spv.globalVariable @struct : !spv.ptr<!spv.struct<f32, !spv.array<10xf32>>, Private>
+  spv.globalVariable @struct : !spv.ptr<!spv.struct<(f32, !spv.array<10xf32>)>, Private>
   spv.func @func() "None" {
-    %0 = spv._address_of @struct : !spv.ptr<!spv.struct<f32, !spv.array<10xf32>>, Private>
+    %0 = spv._address_of @struct : !spv.ptr<!spv.struct<(f32, !spv.array<10xf32>)>, Private>
     spv.Return
   }
 }
@@ -124,10 +124,10 @@ spv.func @store(%arg0 : f32) "None" {
 }
 
 // CHECK-LABEL: @store_composite
-spv.func @store_composite(%arg0 : !spv.struct<f64>) "None" {
-  %0 = spv.Variable : !spv.ptr<!spv.struct<f64>, Function>
+spv.func @store_composite(%arg0 : !spv.struct<(f64)>) "None" {
+  %0 = spv.Variable : !spv.ptr<!spv.struct<(f64)>, Function>
   // CHECK: llvm.store %{{.*}}, %{{.*}} : !llvm.ptr<struct<packed (double)>>
-  spv.Store "Function" %0, %arg0 : !spv.struct<f64>
+  spv.Store "Function" %0, %arg0 : !spv.struct<(f64)>
   spv.Return
 }
 
diff --git a/mlir/test/Conversion/SPIRVToLLVM/misc-ops-to-llvm.mlir b/mlir/test/Conversion/SPIRVToLLVM/misc-ops-to-llvm.mlir
index 700c991463ce30..3c86a9f34adadc 100644
--- a/mlir/test/Conversion/SPIRVToLLVM/misc-ops-to-llvm.mlir
+++ b/mlir/test/Conversion/SPIRVToLLVM/misc-ops-to-llvm.mlir
@@ -24,9 +24,9 @@ spv.func @composite_extract_vector(%arg: vector<3xf32>) "None" {
 //===----------------------------------------------------------------------===//
 
 // CHECK-LABEL: @composite_insert_struct
-spv.func @composite_insert_struct(%arg0: i32, %arg1: !spv.struct<f32, !spv.array<4xi32>>) "None" {
+spv.func @composite_insert_struct(%arg0: i32, %arg1: !spv.struct<(f32, !spv.array<4xi32>)>) "None" {
   // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[1 : i32, 3 : i32] : !llvm.struct<packed (float, array<4 x i32>)>
-  %0 = spv.CompositeInsert %arg0, %arg1[1 : i32, 3 : i32] : i32 into !spv.struct<f32, !spv.array<4xi32>>
+  %0 = spv.CompositeInsert %arg0, %arg1[1 : i32, 3 : i32] : i32 into !spv.struct<(f32, !spv.array<4xi32>)>
   spv.Return
 }
 
diff --git a/mlir/test/Conversion/SPIRVToLLVM/spirv-types-to-llvm-invalid.mlir b/mlir/test/Conversion/SPIRVToLLVM/spirv-types-to-llvm-invalid.mlir
index 87f0bd8d829808..20207456387b74 100644
--- a/mlir/test/Conversion/SPIRVToLLVM/spirv-types-to-llvm-invalid.mlir
+++ b/mlir/test/Conversion/SPIRVToLLVM/spirv-types-to-llvm-invalid.mlir
@@ -8,13 +8,13 @@ spv.func @array_with_unnatural_stride(%arg: !spv.array<4 x f32, stride=8>) -> ()
 // -----
 
 // expected-error@+1 {{failed to legalize operation 'spv.func' that was explicitly marked illegal}}
-spv.func @struct_with_unnatural_offset(%arg: !spv.struct<i32[0], i32[8]>) -> () "None" {
+spv.func @struct_with_unnatural_offset(%arg: !spv.struct<(i32[0], i32[8])>) -> () "None" {
   spv.Return
 }
 
 // -----
 
 // expected-error@+1 {{failed to legalize operation 'spv.func' that was explicitly marked illegal}}
-spv.func @struct_with_decorations(%arg: !spv.struct<f32 [RelaxedPrecision]>) -> () "None" {
+spv.func @struct_with_decorations(%arg: !spv.struct<(f32 [RelaxedPrecision])>) -> () "None" {
   spv.Return
 }
diff --git a/mlir/test/Conversion/SPIRVToLLVM/spirv-types-to-llvm.mlir b/mlir/test/Conversion/SPIRVToLLVM/spirv-types-to-llvm.mlir
index d14c8de49555d4..cade893381d63d 100644
--- a/mlir/test/Conversion/SPIRVToLLVM/spirv-types-to-llvm.mlir
+++ b/mlir/test/Conversion/SPIRVToLLVM/spirv-types-to-llvm.mlir
@@ -35,10 +35,10 @@ spv.func @runtime_array_scalar(!spv.rtarray<f32>) "None"
 //===----------------------------------------------------------------------===//
 
 // CHECK-LABEL: @struct(!llvm.struct<packed (double)>)
-spv.func @struct(!spv.struct<f64>) "None"
+spv.func @struct(!spv.struct<(f64)>) "None"
 
 // CHECK-LABEL: @struct_nested(!llvm.struct<packed (i32, struct<packed (i64, i32)>)>)
-spv.func @struct_nested(!spv.struct<i32, !spv.struct<i64, i32>>) "None"
+spv.func @struct_nested(!spv.struct<(i32, !spv.struct<(i64, i32)>)>) "None"
 
 // CHECK-LABEL: @struct_with_natural_offset(!llvm.struct<(i8, i32)>)
-spv.func @struct_with_natural_offset(!spv.struct<i8[0], i32[4]>) "None"
+spv.func @struct_with_natural_offset(!spv.struct<(i8[0], i32[4])>) "None"
diff --git a/mlir/test/Conversion/StandardToSPIRV/alloc.mlir b/mlir/test/Conversion/StandardToSPIRV/alloc.mlir
index ccd8c02e255ac0..089d691578c171 100644
--- a/mlir/test/Conversion/StandardToSPIRV/alloc.mlir
+++ b/mlir/test/Conversion/StandardToSPIRV/alloc.mlir
@@ -17,7 +17,7 @@ module attributes {
     return
   }
 }
-//     CHECK: spv.globalVariable @[[VAR:.+]] : !spv.ptr<!spv.struct<!spv.array<20 x f32, stride=4>>, Workgroup>
+//     CHECK: spv.globalVariable @[[VAR:.+]] : !spv.ptr<!spv.struct<(!spv.array<20 x f32, stride=4>)>, Workgroup>
 //     CHECK: func @alloc_dealloc_workgroup_mem
 // CHECK-NOT:   alloc
 //     CHECK:   %[[PTR:.+]] = spv._address_of @[[VAR]]
@@ -45,7 +45,7 @@ module attributes {
 }
 
 //       CHECK: spv.globalVariable @__workgroup_mem__{{[0-9]+}}
-//  CHECK-SAME:   !spv.ptr<!spv.struct<!spv.array<20 x i32, stride=4>>, Workgroup>
+//  CHECK-SAME:   !spv.ptr<!spv.struct<(!spv.array<20 x i32, stride=4>)>, Workgroup>
 // CHECK_LABEL: spv.func @alloc_dealloc_workgroup_mem
 //       CHECK:   %[[VAR:.+]] = spv._address_of @__workgroup_mem__0
 //       CHECK:   %[[LOC:.+]] = spv.SDiv
@@ -72,9 +72,9 @@ module attributes {
 }
 
 //  CHECK-DAG: spv.globalVariable @__workgroup_mem__{{[0-9]+}}
-// CHECK-SAME:   !spv.ptr<!spv.struct<!spv.array<6 x i32, stride=4>>, Workgroup>
+// CHECK-SAME:   !spv.ptr<!spv.struct<(!spv.array<6 x i32, stride=4>)>, Workgroup>
 //  CHECK-DAG: spv.globalVariable @__workgroup_mem__{{[0-9]+}}
-// CHECK-SAME:   !spv.ptr<!spv.struct<!spv.array<20 x f32, stride=4>>, Workgroup>
+// CHECK-SAME:   !spv.ptr<!spv.struct<(!spv.array<20 x f32, stride=4>)>, Workgroup>
 //      CHECK: spv.func @two_allocs()
 //      CHECK: spv.Return
 
@@ -93,9 +93,9 @@ module attributes {
 }
 
 //  CHECK-DAG: spv.globalVariable @__workgroup_mem__{{[0-9]+}}
-// CHECK-SAME:   !spv.ptr<!spv.struct<!spv.array<2 x vector<2xi32>, stride=8>>, Workgroup>
+// CHECK-SAME:   !spv.ptr<!spv.struct<(!spv.array<2 x vector<2xi32>, stride=8>)>, Workgroup>
 //  CHECK-DAG: spv.globalVariable @__workgroup_mem__{{[0-9]+}}
-// CHECK-SAME:   !spv.ptr<!spv.struct<!spv.array<4 x vector<4xf32>, stride=16>>, Workgroup>
+// CHECK-SAME:   !spv.ptr<!spv.struct<(!spv.array<4 x vector<4xf32>, stride=16>)>, Workgroup>
 //      CHECK: spv.func @two_allocs_vector()
 //      CHECK: spv.Return
 
diff --git a/mlir/test/Conversion/StandardToSPIRV/std-ops-to-spirv.mlir b/mlir/test/Conversion/StandardToSPIRV/std-ops-to-spirv.mlir
index 5b62e54311b4ac..9f112bb8a6ac04 100644
--- a/mlir/test/Conversion/StandardToSPIRV/std-ops-to-spirv.mlir
+++ b/mlir/test/Conversion/StandardToSPIRV/std-ops-to-spirv.mlir
@@ -691,8 +691,8 @@ func @select(%arg0 : i32, %arg1 : i32) {
 //===----------------------------------------------------------------------===//
 
 // CHECK-LABEL: @load_store_zero_rank_float
-// CHECK: [[ARG0:%.*]]: !spv.ptr<!spv.struct<!spv.array<1 x f32, stride=4> [0]>, StorageBuffer>,
-// CHECK: [[ARG1:%.*]]: !spv.ptr<!spv.struct<!spv.array<1 x f32, stride=4> [0]>, StorageBuffer>)
+// CHECK: [[ARG0:%.*]]: !spv.ptr<!spv.struct<(!spv.array<1 x f32, stride=4> [0])>, StorageBuffer>,
+// CHECK: [[ARG1:%.*]]: !spv.ptr<!spv.struct<(!spv.array<1 x f32, stride=4> [0])>, StorageBuffer>)
 func @load_store_zero_rank_float(%arg0: memref<f32>, %arg1: memref<f32>) {
   //      CHECK: [[ZERO1:%.*]] = spv.constant 0 : i32
   //      CHECK: spv.AccessChain [[ARG0]][
@@ -710,8 +710,8 @@ func @load_store_zero_rank_float(%arg0: memref<f32>, %arg1: memref<f32>) {
 }
 
 // CHECK-LABEL: @load_store_zero_rank_int
-// CHECK: [[ARG0:%.*]]: !spv.ptr<!spv.struct<!spv.array<1 x i32, stride=4> [0]>, StorageBuffer>,
-// CHECK: [[ARG1:%.*]]: !spv.ptr<!spv.struct<!spv.array<1 x i32, stride=4> [0]>, StorageBuffer>)
+// CHECK: [[ARG0:%.*]]: !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>, StorageBuffer>,
+// CHECK: [[ARG1:%.*]]: !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>, StorageBuffer>)
 func @load_store_zero_rank_int(%arg0: memref<i32>, %arg1: memref<i32>) {
   //      CHECK: [[ZERO1:%.*]] = spv.constant 0 : i32
   //      CHECK: spv.AccessChain [[ARG0]][
diff --git a/mlir/test/Conversion/StandardToSPIRV/std-types-to-spirv.mlir b/mlir/test/Conversion/StandardToSPIRV/std-types-to-spirv.mlir
index 66b2ba97bea1e4..558d0f3999d44f 100644
--- a/mlir/test/Conversion/StandardToSPIRV/std-types-to-spirv.mlir
+++ b/mlir/test/Conversion/StandardToSPIRV/std-types-to-spirv.mlir
@@ -274,35 +274,35 @@ module attributes {
 } {
 
 // CHECK-LABEL: spv.func @memref_8bit_StorageBuffer
-// CHECK-SAME: !spv.ptr<!spv.struct<!spv.array<16 x i32, stride=4> [0]>, StorageBuffer>
+// CHECK-SAME: !spv.ptr<!spv.struct<(!spv.array<16 x i32, stride=4> [0])>, StorageBuffer>
 func @memref_8bit_StorageBuffer(%arg0: memref<16xi8, 0>) { return }
 
 // CHECK-LABEL: spv.func @memref_8bit_Uniform
-// CHECK-SAME: !spv.ptr<!spv.struct<!spv.array<16 x si32, stride=4> [0]>, Uniform>
+// CHECK-SAME: !spv.ptr<!spv.struct<(!spv.array<16 x si32, stride=4> [0])>, Uniform>
 func @memref_8bit_Uniform(%arg0: memref<16xsi8, 4>) { return }
 
 // CHECK-LABEL: spv.func @memref_8bit_PushConstant
-// CHECK-SAME: !spv.ptr<!spv.struct<!spv.array<16 x ui32, stride=4> [0]>, PushConstant>
+// CHECK-SAME: !spv.ptr<!spv.struct<(!spv.array<16 x ui32, stride=4> [0])>, PushConstant>
 func @memref_8bit_PushConstant(%arg0: memref<16xui8, 7>) { return }
 
 // CHECK-LABEL: spv.func @memref_16bit_StorageBuffer
-// CHECK-SAME: !spv.ptr<!spv.struct<!spv.array<16 x i32, stride=4> [0]>, StorageBuffer>
+// CHECK-SAME: !spv.ptr<!spv.struct<(!spv.array<16 x i32, stride=4> [0])>, StorageBuffer>
 func @memref_16bit_StorageBuffer(%arg0: memref<16xi16, 0>) { return }
 
 // CHECK-LABEL: spv.func @memref_16bit_Uniform
-// CHECK-SAME: !spv.ptr<!spv.struct<!spv.array<16 x si32, stride=4> [0]>, Uniform>
+// CHECK-SAME: !spv.ptr<!spv.struct<(!spv.array<16 x si32, stride=4> [0])>, Uniform>
 func @memref_16bit_Uniform(%arg0: memref<16xsi16, 4>) { return }
 
 // CHECK-LABEL: spv.func @memref_16bit_PushConstant
-// CHECK-SAME: !spv.ptr<!spv.struct<!spv.array<16 x ui32, stride=4> [0]>, PushConstant>
+// CHECK-SAME: !spv.ptr<!spv.struct<(!spv.array<16 x ui32, stride=4> [0])>, PushConstant>
 func @memref_16bit_PushConstant(%arg0: memref<16xui16, 7>) { return }
 
 // CHECK-LABEL: spv.func @memref_16bit_Input
-// CHECK-SAME: !spv.ptr<!spv.struct<!spv.array<16 x f32, stride=4> [0]>, Input>
+// CHECK-SAME: !spv.ptr<!spv.struct<(!spv.array<16 x f32, stride=4> [0])>, Input>
 func @memref_16bit_Input(%arg3: memref<16xf16, 9>) { return }
 
 // CHECK-LABEL: spv.func @memref_16bit_Output
-// CHECK-SAME: !spv.ptr<!spv.struct<!spv.array<16 x f32, stride=4> [0]>, Output>
+// CHECK-SAME: !spv.ptr<!spv.struct<(!spv.array<16 x f32, stride=4> [0])>, Output>
 func @memref_16bit_Output(%arg4: memref<16xf16, 10>) { return }
 
 } // end module
@@ -319,12 +319,12 @@ module attributes {
 } {
 
 // CHECK-LABEL: spv.func @memref_8bit_PushConstant
-// CHECK-SAME: !spv.ptr<!spv.struct<!spv.array<16 x i8, stride=1> [0]>, PushConstant>
+// CHECK-SAME: !spv.ptr<!spv.struct<(!spv.array<16 x i8, stride=1> [0])>, PushConstant>
 func @memref_8bit_PushConstant(%arg0: memref<16xi8, 7>) { return }
 
 // CHECK-LABEL: spv.func @memref_16bit_PushConstant
-// CHECK-SAME: !spv.ptr<!spv.struct<!spv.array<16 x i16, stride=2> [0]>, PushConstant>
-// CHECK-SAME: !spv.ptr<!spv.struct<!spv.array<16 x f16, stride=2> [0]>, PushConstant>
+// CHECK-SAME: !spv.ptr<!spv.struct<(!spv.array<16 x i16, stride=2> [0])>, PushConstant>
+// CHECK-SAME: !spv.ptr<!spv.struct<(!spv.array<16 x f16, stride=2> [0])>, PushConstant>
 func @memref_16bit_PushConstant(
   %arg0: memref<16xi16, 7>,
   %arg1: memref<16xf16, 7>
@@ -344,12 +344,12 @@ module attributes {
 } {
 
 // CHECK-LABEL: spv.func @memref_8bit_StorageBuffer
-// CHECK-SAME: !spv.ptr<!spv.struct<!spv.array<16 x i8, stride=1> [0]>, StorageBuffer>
+// CHECK-SAME: !spv.ptr<!spv.struct<(!spv.array<16 x i8, stride=1> [0])>, StorageBuffer>
 func @memref_8bit_StorageBuffer(%arg0: memref<16xi8, 0>) { return }
 
 // CHECK-LABEL: spv.func @memref_16bit_StorageBuffer
-// CHECK-SAME: !spv.ptr<!spv.struct<!spv.array<16 x i16, stride=2> [0]>, StorageBuffer>
-// CHECK-SAME: !spv.ptr<!spv.struct<!spv.array<16 x f16, stride=2> [0]>, StorageBuffer>
+// CHECK-SAME: !spv.ptr<!spv.struct<(!spv.array<16 x i16, stride=2> [0])>, StorageBuffer>
+// CHECK-SAME: !spv.ptr<!spv.struct<(!spv.array<16 x f16, stride=2> [0])>, StorageBuffer>
 func @memref_16bit_StorageBuffer(
   %arg0: memref<16xi16, 0>,
   %arg1: memref<16xf16, 0>
@@ -369,12 +369,12 @@ module attributes {
 } {
 
 // CHECK-LABEL: spv.func @memref_8bit_Uniform
-// CHECK-SAME: !spv.ptr<!spv.struct<!spv.array<16 x i8, stride=1> [0]>, Uniform>
+// CHECK-SAME: !spv.ptr<!spv.struct<(!spv.array<16 x i8, stride=1> [0])>, Uniform>
 func @memref_8bit_Uniform(%arg0: memref<16xi8, 4>) { return }
 
 // CHECK-LABEL: spv.func @memref_16bit_Uniform
-// CHECK-SAME: !spv.ptr<!spv.struct<!spv.array<16 x i16, stride=2> [0]>, Uniform>
-// CHECK-SAME: !spv.ptr<!spv.struct<!spv.array<16 x f16, stride=2> [0]>, Uniform>
+// CHECK-SAME: !spv.ptr<!spv.struct<(!spv.array<16 x i16, stride=2> [0])>, Uniform>
+// CHECK-SAME: !spv.ptr<!spv.struct<(!spv.array<16 x f16, stride=2> [0])>, Uniform>
 func @memref_16bit_Uniform(
   %arg0: memref<16xi16, 4>,
   %arg1: memref<16xf16, 4>
@@ -393,11 +393,11 @@ module attributes {
 } {
 
 // CHECK-LABEL: spv.func @memref_16bit_Input
-// CHECK-SAME: !spv.ptr<!spv.struct<!spv.array<16 x f16, stride=2> [0]>, Input>
+// CHECK-SAME: !spv.ptr<!spv.struct<(!spv.array<16 x f16, stride=2> [0])>, Input>
 func @memref_16bit_Input(%arg3: memref<16xf16, 9>) { return }
 
 // CHECK-LABEL: spv.func @memref_16bit_Output
-// CHECK-SAME: !spv.ptr<!spv.struct<!spv.array<16 x i16, stride=2> [0]>, Output>
+// CHECK-SAME: !spv.ptr<!spv.struct<(!spv.array<16 x i16, stride=2> [0])>, Output>
 func @memref_16bit_Output(%arg4: memref<16xi16, 10>) { return }
 
 } // end module
@@ -412,22 +412,22 @@ module attributes {
 
 // CHECK-LABEL: spv.func @memref_offset_strides
 func @memref_offset_strides(
-// CHECK-SAME: !spv.array<64 x f32, stride=4> [0]>, StorageBuffer>
-// CHECK-SAME: !spv.array<72 x f32, stride=4> [0]>, StorageBuffer>
-// CHECK-SAME: !spv.array<256 x f32, stride=4> [0]>, StorageBuffer>
-// CHECK-SAME: !spv.array<64 x f32, stride=4> [0]>, StorageBuffer>
-// CHECK-SAME: !spv.array<88 x f32, stride=4> [0]>, StorageBuffer>
+// CHECK-SAME: !spv.array<64 x f32, stride=4> [0])>, StorageBuffer>
+// CHECK-SAME: !spv.array<72 x f32, stride=4> [0])>, StorageBuffer>
+// CHECK-SAME: !spv.array<256 x f32, stride=4> [0])>, StorageBuffer>
+// CHECK-SAME: !spv.array<64 x f32, stride=4> [0])>, StorageBuffer>
+// CHECK-SAME: !spv.array<88 x f32, stride=4> [0])>, StorageBuffer>
   %arg0: memref<16x4xf32, offset: 0, strides: [4, 1]>,  // tightly packed; row major
   %arg1: memref<16x4xf32, offset: 8, strides: [4, 1]>,  // offset 8
   %arg2: memref<16x4xf32, offset: 0, strides: [16, 1]>, // pad 12 after each row
   %arg3: memref<16x4xf32, offset: 0, strides: [1, 16]>, // tightly packed; col major
   %arg4: memref<16x4xf32, offset: 0, strides: [1, 22]>, // pad 4 after each col
 
-// CHECK-SAME: !spv.array<64 x f16, stride=2> [0]>, StorageBuffer>
-// CHECK-SAME: !spv.array<72 x f16, stride=2> [0]>, StorageBuffer>
-// CHECK-SAME: !spv.array<256 x f16, stride=2> [0]>, StorageBuffer>
-// CHECK-SAME: !spv.array<64 x f16, stride=2> [0]>, StorageBuffer>
-// CHECK-SAME: !spv.array<88 x f16, stride=2> [0]>, StorageBuffer>
+// CHECK-SAME: !spv.array<64 x f16, stride=2> [0])>, StorageBuffer>
+// CHECK-SAME: !spv.array<72 x f16, stride=2> [0])>, StorageBuffer>
+// CHECK-SAME: !spv.array<256 x f16, stride=2> [0])>, StorageBuffer>
+// CHECK-SAME: !spv.array<64 x f16, stride=2> [0])>, StorageBuffer>
+// CHECK-SAME: !spv.array<88 x f16, stride=2> [0])>, StorageBuffer>
   %arg5: memref<16x4xf16, offset: 0, strides: [4, 1]>,
   %arg6: memref<16x4xf16, offset: 8, strides: [4, 1]>,
   %arg7: memref<16x4xf16, offset: 0, strides: [16, 1]>,
@@ -450,8 +450,8 @@ module attributes {
 func @unranked_memref(%arg0: memref<*xi32>) { return }
 
 // CHECK-LABEL: func @dynamic_dim_memref
-// CHECK-SAME: !spv.ptr<!spv.struct<!spv.rtarray<i32, stride=4> [0]>, StorageBuffer>
-// CHECK-SAME: !spv.ptr<!spv.struct<!spv.rtarray<f32, stride=4> [0]>, StorageBuffer>
+// CHECK-SAME: !spv.ptr<!spv.struct<(!spv.rtarray<i32, stride=4> [0])>, StorageBuffer>
+// CHECK-SAME: !spv.ptr<!spv.struct<(!spv.rtarray<f32, stride=4> [0])>, StorageBuffer>
 func @dynamic_dim_memref(%arg0: memref<8x?xi32>,
                          %arg1: memref<?x?xf32>)
 { return }
@@ -466,16 +466,16 @@ module attributes {
 } {
 
 // CHECK-LABEL: func @memref_vector
-// CHECK-SAME: !spv.ptr<!spv.struct<!spv.array<4 x vector<2xf32>, stride=8> [0]>, StorageBuffer>
-// CHECK-SAME: !spv.ptr<!spv.struct<!spv.array<4 x vector<4xf32>, stride=16> [0]>, Uniform>
+// CHECK-SAME: !spv.ptr<!spv.struct<(!spv.array<4 x vector<2xf32>, stride=8> [0])>, StorageBuffer>
+// CHECK-SAME: !spv.ptr<!spv.struct<(!spv.array<4 x vector<4xf32>, stride=16> [0])>, Uniform>
 func @memref_vector(
     %arg0: memref<4xvector<2xf32>, 0>,
     %arg1: memref<4xvector<4xf32>, 4>)
 { return }
 
 // CHECK-LABEL: func @dynamic_dim_memref_vector
-// CHECK-SAME: !spv.ptr<!spv.struct<!spv.rtarray<vector<4xi32>, stride=16> [0]>, StorageBuffer>
-// CHECK-SAME: !spv.ptr<!spv.struct<!spv.rtarray<vector<2xf32>, stride=8> [0]>, StorageBuffer>
+// CHECK-SAME: !spv.ptr<!spv.struct<(!spv.rtarray<vector<4xi32>, stride=16> [0])>, StorageBuffer>
+// CHECK-SAME: !spv.ptr<!spv.struct<(!spv.rtarray<vector<2xf32>, stride=8> [0])>, StorageBuffer>
 func @dynamic_dim_memref_vector(%arg0: memref<8x?xvector<4xi32>>,
                          %arg1: memref<?x?xvector<2xf32>>)
 { return }
diff --git a/mlir/test/Dialect/SPIRV/Serialization/composite-op.mlir b/mlir/test/Dialect/SPIRV/Serialization/composite-op.mlir
index f6b7a4a54b6733..aca7a9d93539d2 100644
--- a/mlir/test/Dialect/SPIRV/Serialization/composite-op.mlir
+++ b/mlir/test/Dialect/SPIRV/Serialization/composite-op.mlir
@@ -1,10 +1,10 @@
 // RUN: mlir-translate -split-input-file -test-spirv-roundtrip %s | FileCheck %s
 
 spv.module Logical GLSL450 requires #spv.vce<v1.0, [Shader], []> {
-  spv.func @composite_insert(%arg0 : !spv.struct<f32, !spv.struct<!spv.array<4xf32>, f32>>, %arg1: !spv.array<4xf32>) -> !spv.struct<f32, !spv.struct<!spv.array<4xf32>, f32>> "None" {
-    // CHECK: spv.CompositeInsert {{%.*}}, {{%.*}}[1 : i32, 0 : i32] : !spv.array<4 x f32> into !spv.struct<f32, !spv.struct<!spv.array<4 x f32>, f32>>
-    %0 = spv.CompositeInsert %arg1, %arg0[1 : i32, 0 : i32] : !spv.array<4xf32> into !spv.struct<f32, !spv.struct<!spv.array<4xf32>, f32>>
-    spv.ReturnValue %0: !spv.struct<f32, !spv.struct<!spv.array<4xf32>, f32>>
+  spv.func @composite_insert(%arg0 : !spv.struct<(f32, !spv.struct<(!spv.array<4xf32>, f32)>)>, %arg1: !spv.array<4xf32>) -> !spv.struct<(f32, !spv.struct<(!spv.array<4xf32>, f32)>)> "None" {
+    // CHECK: spv.CompositeInsert {{%.*}}, {{%.*}}[1 : i32, 0 : i32] : !spv.array<4 x f32> into !spv.struct<(f32, !spv.struct<(!spv.array<4 x f32>, f32)>)>
+    %0 = spv.CompositeInsert %arg1, %arg0[1 : i32, 0 : i32] : !spv.array<4xf32> into !spv.struct<(f32, !spv.struct<(!spv.array<4xf32>, f32)>)>
+    spv.ReturnValue %0: !spv.struct<(f32, !spv.struct<(!spv.array<4xf32>, f32)>)>
   }
   spv.func @composite_construct_vector(%arg0: f32, %arg1: f32, %arg2 : f32) -> vector<3xf32> "None" {
     // CHECK: spv.CompositeConstruct {{%.*}}, {{%.*}}, {{%.*}} : vector<3xf32>
diff --git a/mlir/test/Dialect/SPIRV/Serialization/debug.mlir b/mlir/test/Dialect/SPIRV/Serialization/debug.mlir
index d83030d252984e..d5c5ca6a351c85 100644
--- a/mlir/test/Dialect/SPIRV/Serialization/debug.mlir
+++ b/mlir/test/Dialect/SPIRV/Serialization/debug.mlir
@@ -29,9 +29,9 @@ spv.module Logical GLSL450 requires #spv.vce<v1.0, [Shader], []> {
     spv.Return
   }
 
-  spv.func @composite(%arg0 : !spv.struct<f32, !spv.struct<!spv.array<4xf32>, f32>>, %arg1: !spv.array<4xf32>, %arg2 : f32, %arg3 : f32) "None" {
+  spv.func @composite(%arg0 : !spv.struct<(f32, !spv.struct<(!spv.array<4xf32>, f32)>)>, %arg1: !spv.array<4xf32>, %arg2 : f32, %arg3 : f32) "None" {
     // CHECK: loc({{".*debug.mlir"}}:34:10)
-    %0 = spv.CompositeInsert %arg1, %arg0[1 : i32, 0 : i32] : !spv.array<4xf32> into !spv.struct<f32, !spv.struct<!spv.array<4xf32>, f32>>
+    %0 = spv.CompositeInsert %arg1, %arg0[1 : i32, 0 : i32] : !spv.array<4xf32> into !spv.struct<(f32, !spv.struct<(!spv.array<4xf32>, f32)>)>
     // CHECK: loc({{".*debug.mlir"}}:36:10)
     %1 = spv.CompositeConstruct %arg2, %arg3 : vector<2xf32>
     spv.Return
diff --git a/mlir/test/Dialect/SPIRV/Serialization/loop.mlir b/mlir/test/Dialect/SPIRV/Serialization/loop.mlir
index 8f0b35ef6fc825..f9b216c10ea31d 100644
--- a/mlir/test/Dialect/SPIRV/Serialization/loop.mlir
+++ b/mlir/test/Dialect/SPIRV/Serialization/loop.mlir
@@ -60,14 +60,14 @@ spv.module Logical GLSL450 requires #spv.vce<v1.0, [Shader], []> {
 // -----
 
 spv.module Logical GLSL450 requires #spv.vce<v1.0, [Shader], []> {
-  spv.globalVariable @GV1 bind(0, 0) : !spv.ptr<!spv.struct<!spv.array<10 x f32, stride=4> [0]>, StorageBuffer>
-  spv.globalVariable @GV2 bind(0, 1) : !spv.ptr<!spv.struct<!spv.array<10 x f32, stride=4> [0]>, StorageBuffer>
+  spv.globalVariable @GV1 bind(0, 0) : !spv.ptr<!spv.struct<(!spv.array<10 x f32, stride=4> [0])>, StorageBuffer>
+  spv.globalVariable @GV2 bind(0, 1) : !spv.ptr<!spv.struct<(!spv.array<10 x f32, stride=4> [0])>, StorageBuffer>
   spv.func @loop_kernel() "None" {
-    %0 = spv._address_of @GV1 : !spv.ptr<!spv.struct<!spv.array<10 x f32, stride=4> [0]>, StorageBuffer>
+    %0 = spv._address_of @GV1 : !spv.ptr<!spv.struct<(!spv.array<10 x f32, stride=4> [0])>, StorageBuffer>
     %1 = spv.constant 0 : i32
-    %2 = spv.AccessChain %0[%1] : !spv.ptr<!spv.struct<!spv.array<10 x f32, stride=4> [0]>, StorageBuffer>, i32
-    %3 = spv._address_of @GV2 : !spv.ptr<!spv.struct<!spv.array<10 x f32, stride=4> [0]>, StorageBuffer>
-    %5 = spv.AccessChain %3[%1] : !spv.ptr<!spv.struct<!spv.array<10 x f32, stride=4> [0]>, StorageBuffer>, i32
+    %2 = spv.AccessChain %0[%1] : !spv.ptr<!spv.struct<(!spv.array<10 x f32, stride=4> [0])>, StorageBuffer>, i32
+    %3 = spv._address_of @GV2 : !spv.ptr<!spv.struct<(!spv.array<10 x f32, stride=4> [0])>, StorageBuffer>
+    %5 = spv.AccessChain %3[%1] : !spv.ptr<!spv.struct<(!spv.array<10 x f32, stride=4> [0])>, StorageBuffer>, i32
     %6 = spv.constant 4 : i32
     %7 = spv.constant 42 : i32
     %8 = spv.constant 2 : i32
diff --git a/mlir/test/Dialect/SPIRV/Serialization/memory-ops.mlir b/mlir/test/Dialect/SPIRV/Serialization/memory-ops.mlir
index 0e18c1ea37bfb2..8bf021707b1230 100644
--- a/mlir/test/Dialect/SPIRV/Serialization/memory-ops.mlir
+++ b/mlir/test/Dialect/SPIRV/Serialization/memory-ops.mlir
@@ -27,32 +27,32 @@ spv.module Logical GLSL450 requires #spv.vce<v1.0, [Shader], []> {
 // -----
 
 spv.module Logical GLSL450 requires #spv.vce<v1.0, [Shader], []> {
-  spv.func @load_store_zero_rank_float(%arg0: !spv.ptr<!spv.struct<!spv.array<1 x f32, stride=4> [0]>, StorageBuffer>, %arg1: !spv.ptr<!spv.struct<!spv.array<1 x f32, stride=4> [0]>, StorageBuffer>) "None" {
-    // CHECK: [[LOAD_PTR:%.*]] = spv.AccessChain {{%.*}}[{{%.*}}, {{%.*}}] : !spv.ptr<!spv.struct<!spv.array<1 x f32, stride=4> [0]>
+  spv.func @load_store_zero_rank_float(%arg0: !spv.ptr<!spv.struct<(!spv.array<1 x f32, stride=4> [0])>, StorageBuffer>, %arg1: !spv.ptr<!spv.struct<(!spv.array<1 x f32, stride=4> [0])>, StorageBuffer>) "None" {
+    // CHECK: [[LOAD_PTR:%.*]] = spv.AccessChain {{%.*}}[{{%.*}}, {{%.*}}] : !spv.ptr<!spv.struct<(!spv.array<1 x f32, stride=4> [0])>
     // CHECK-NEXT: [[VAL:%.*]] = spv.Load "StorageBuffer" [[LOAD_PTR]] : f32
     %0 = spv.constant 0 : i32
-    %1 = spv.AccessChain %arg0[%0, %0] : !spv.ptr<!spv.struct<!spv.array<1 x f32, stride=4> [0]>, StorageBuffer>, i32, i32
+    %1 = spv.AccessChain %arg0[%0, %0] : !spv.ptr<!spv.struct<(!spv.array<1 x f32, stride=4> [0])>, StorageBuffer>, i32, i32
     %2 = spv.Load "StorageBuffer" %1 : f32
 
-    // CHECK: [[STORE_PTR:%.*]] = spv.AccessChain {{%.*}}[{{%.*}}, {{%.*}}] : !spv.ptr<!spv.struct<!spv.array<1 x f32, stride=4> [0]>
+    // CHECK: [[STORE_PTR:%.*]] = spv.AccessChain {{%.*}}[{{%.*}}, {{%.*}}] : !spv.ptr<!spv.struct<(!spv.array<1 x f32, stride=4> [0])>
     // CHECK-NEXT: spv.Store "StorageBuffer" [[STORE_PTR]], [[VAL]] : f32
     %3 = spv.constant 0 : i32
-    %4 = spv.AccessChain %arg1[%3, %3] : !spv.ptr<!spv.struct<!spv.array<1 x f32, stride=4> [0]>, StorageBuffer>, i32, i32
+    %4 = spv.AccessChain %arg1[%3, %3] : !spv.ptr<!spv.struct<(!spv.array<1 x f32, stride=4> [0])>, StorageBuffer>, i32, i32
     spv.Store "StorageBuffer" %4, %2 : f32
     spv.Return
   }
 
-  spv.func @load_store_zero_rank_int(%arg0: !spv.ptr<!spv.struct<!spv.array<1 x i32, stride=4> [0]>, StorageBuffer>, %arg1: !spv.ptr<!spv.struct<!spv.array<1 x i32, stride=4> [0]>, StorageBuffer>) "None" {
-    // CHECK: [[LOAD_PTR:%.*]] = spv.AccessChain {{%.*}}[{{%.*}}, {{%.*}}] : !spv.ptr<!spv.struct<!spv.array<1 x i32, stride=4> [0]>
+  spv.func @load_store_zero_rank_int(%arg0: !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>, StorageBuffer>, %arg1: !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>, StorageBuffer>) "None" {
+    // CHECK: [[LOAD_PTR:%.*]] = spv.AccessChain {{%.*}}[{{%.*}}, {{%.*}}] : !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>
     // CHECK-NEXT: [[VAL:%.*]] = spv.Load "StorageBuffer" [[LOAD_PTR]] : i32
     %0 = spv.constant 0 : i32
-    %1 = spv.AccessChain %arg0[%0, %0] : !spv.ptr<!spv.struct<!spv.array<1 x i32, stride=4> [0]>, StorageBuffer>, i32, i32
+    %1 = spv.AccessChain %arg0[%0, %0] : !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>, StorageBuffer>, i32, i32
     %2 = spv.Load "StorageBuffer" %1 : i32
 
-    // CHECK: [[STORE_PTR:%.*]] = spv.AccessChain {{%.*}}[{{%.*}}, {{%.*}}] : !spv.ptr<!spv.struct<!spv.array<1 x i32, stride=4> [0]>
+    // CHECK: [[STORE_PTR:%.*]] = spv.AccessChain {{%.*}}[{{%.*}}, {{%.*}}] : !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>
     // CHECK-NEXT: spv.Store "StorageBuffer" [[STORE_PTR]], [[VAL]] : i32
     %3 = spv.constant 0 : i32
-    %4 = spv.AccessChain %arg1[%3, %3] : !spv.ptr<!spv.struct<!spv.array<1 x i32, stride=4> [0]>, StorageBuffer>, i32, i32
+    %4 = spv.AccessChain %arg1[%3, %3] : !spv.ptr<!spv.struct<(!spv.array<1 x i32, stride=4> [0])>, StorageBuffer>, i32, i32
     spv.Store "StorageBuffer" %4, %2 : i32
     spv.Return
   }
diff --git a/mlir/test/Dialect/SPIRV/Serialization/spec-constant.mlir b/mlir/test/Dialect/SPIRV/Serialization/spec-constant.mlir
index 2cbfcc6d219d18..9dd9bb158b7843 100644
--- a/mlir/test/Dialect/SPIRV/Serialization/spec-constant.mlir
+++ b/mlir/test/Dialect/SPIRV/Serialization/spec-constant.mlir
@@ -59,8 +59,8 @@ spv.module Logical GLSL450 requires #spv.vce<v1.0, [Shader], []> {
   // CHECK: spv.specConstantComposite @scc_array (@sc_f32_1, @sc_f32_2, @sc_f32_3) : !spv.array<3 x f32>
   spv.specConstantComposite @scc_array (@sc_f32_1, @sc_f32_2, @sc_f32_3) : !spv.array<3 x f32>
 
-  // CHECK: spv.specConstantComposite @scc_struct (@sc_i32_1, @sc_f32_2, @sc_f32_3) : !spv.struct<i32, f32, f32>
-  spv.specConstantComposite @scc_struct (@sc_i32_1, @sc_f32_2, @sc_f32_3) : !spv.struct<i32, f32, f32>
+  // CHECK: spv.specConstantComposite @scc_struct (@sc_i32_1, @sc_f32_2, @sc_f32_3) : !spv.struct<(i32, f32, f32)>
+  spv.specConstantComposite @scc_struct (@sc_i32_1, @sc_f32_2, @sc_f32_3) : !spv.struct<(i32, f32, f32)>
 
   // CHECK: spv.specConstantComposite @scc_vector (@sc_f32_1, @sc_f32_2, @sc_f32_3) : vector<3xf32>
   spv.specConstantComposite @scc_vector (@sc_f32_1, @sc_f32_2, @sc_f32_3) : vector<3 x f32>
@@ -79,8 +79,8 @@ spv.module Logical GLSL450 requires #spv.vce<v1.0, [Shader], []> {
   // CHECK: spv.specConstantComposite @scc_array (@sc_f32_1, @sc_f32_2, @sc_f32_3) : !spv.array<3 x f32>
   spv.specConstantComposite @scc_array (@sc_f32_1, @sc_f32_2, @sc_f32_3) : !spv.array<3 x f32>
 
-  // CHECK: spv.specConstantComposite @scc_struct (@sc_i32_1, @sc_f32_2, @sc_f32_3) : !spv.struct<i32, f32, f32>
-  spv.specConstantComposite @scc_struct (@sc_i32_1, @sc_f32_2, @sc_f32_3) : !spv.struct<i32, f32, f32>
+  // CHECK: spv.specConstantComposite @scc_struct (@sc_i32_1, @sc_f32_2, @sc_f32_3) : !spv.struct<(i32, f32, f32)>
+  spv.specConstantComposite @scc_struct (@sc_i32_1, @sc_f32_2, @sc_f32_3) : !spv.struct<(i32, f32, f32)>
 
   // CHECK: spv.specConstantComposite @scc_vector (@sc_f32_1, @sc_f32_2, @sc_f32_3) : vector<3xf32>
   spv.specConstantComposite @scc_vector (@sc_f32_1, @sc_f32_2, @sc_f32_3) : vector<3 x f32>
diff --git a/mlir/test/Dialect/SPIRV/Serialization/struct.mlir b/mlir/test/Dialect/SPIRV/Serialization/struct.mlir
index fff591d2f24e31..007baf41426403 100644
--- a/mlir/test/Dialect/SPIRV/Serialization/struct.mlir
+++ b/mlir/test/Dialect/SPIRV/Serialization/struct.mlir
@@ -1,36 +1,52 @@
 // RUN: mlir-translate -test-spirv-roundtrip %s | FileCheck %s
 
 spv.module Logical GLSL450 requires #spv.vce<v1.0, [Shader], []> {
-  // CHECK: !spv.ptr<!spv.struct<!spv.array<128 x f32, stride=4> [0]>, Input>
-  spv.globalVariable @var0 bind(0, 1) : !spv.ptr<!spv.struct<!spv.array<128 x f32, stride=4> [0]>, Input>
+  // CHECK: !spv.ptr<!spv.struct<(!spv.array<128 x f32, stride=4> [0])>, Input>
+  spv.globalVariable @var0 bind(0, 1) : !spv.ptr<!spv.struct<(!spv.array<128 x f32, stride=4> [0])>, Input>
 
-  // CHECK: !spv.ptr<!spv.struct<f32 [0], !spv.struct<f32 [0], !spv.array<16 x f32, stride=4> [4]> [4]>, Input>
-  spv.globalVariable @var1 bind(0, 2) : !spv.ptr<!spv.struct<f32 [0], !spv.struct<f32 [0], !spv.array<16 x f32, stride=4> [4]> [4]>, Input>
+  // CHECK: !spv.ptr<!spv.struct<(f32 [0], !spv.struct<(f32 [0], !spv.array<16 x f32, stride=4> [4])> [4])>, Input>
+  spv.globalVariable @var1 bind(0, 2) : !spv.ptr<!spv.struct<(f32 [0], !spv.struct<(f32 [0], !spv.array<16 x f32, stride=4> [4])> [4])>, Input>
 
-  // CHECK: !spv.ptr<!spv.struct<f32 [0], i32 [4], f64 [8], i64 [16], f32 [24], i32 [30], f32 [34], i32 [38]>, StorageBuffer>
-  spv.globalVariable @var2 : !spv.ptr<!spv.struct<f32 [0], i32 [4], f64 [8], i64 [16], f32 [24], i32 [30], f32 [34], i32 [38]>, StorageBuffer>
+  // CHECK: !spv.ptr<!spv.struct<(f32 [0], i32 [4], f64 [8], i64 [16], f32 [24], i32 [30], f32 [34], i32 [38])>, StorageBuffer>
+  spv.globalVariable @var2 : !spv.ptr<!spv.struct<(f32 [0], i32 [4], f64 [8], i64 [16], f32 [24], i32 [30], f32 [34], i32 [38])>, StorageBuffer>
 
-  // CHECK: !spv.ptr<!spv.struct<!spv.array<128 x !spv.struct<!spv.array<128 x f32, stride=4> [0]>, stride=512> [0]>, StorageBuffer>
-  spv.globalVariable @var3 : !spv.ptr<!spv.struct<!spv.array<128 x !spv.struct<!spv.array<128 x f32, stride=4> [0]>, stride=512> [0]>, StorageBuffer>
+  // CHECK: !spv.ptr<!spv.struct<(!spv.array<128 x !spv.struct<(!spv.array<128 x f32, stride=4> [0])>, stride=512> [0])>, StorageBuffer>
+  spv.globalVariable @var3 : !spv.ptr<!spv.struct<(!spv.array<128 x !spv.struct<(!spv.array<128 x f32, stride=4> [0])>, stride=512> [0])>, StorageBuffer>
 
-  // CHECK: !spv.ptr<!spv.struct<f32 [0, NonWritable], i32 [4]>, StorageBuffer>
-  spv.globalVariable @var4 : !spv.ptr<!spv.struct<f32 [0, NonWritable], i32 [4]>, StorageBuffer>
+  // CHECK: !spv.ptr<!spv.struct<(f32 [0, NonWritable], i32 [4])>, StorageBuffer>
+  spv.globalVariable @var4 : !spv.ptr<!spv.struct<(f32 [0, NonWritable], i32 [4])>, StorageBuffer>
 
-  // CHECK: !spv.ptr<!spv.struct<f32 [NonWritable], i32 [NonWritable, NonReadable]>, StorageBuffer>
-  spv.globalVariable @var5 : !spv.ptr<!spv.struct<f32 [NonWritable], i32 [NonWritable, NonReadable]>, StorageBuffer>
+  // CHECK: !spv.ptr<!spv.struct<(f32 [NonWritable], i32 [NonWritable, NonReadable])>, StorageBuffer>
+  spv.globalVariable @var5 : !spv.ptr<!spv.struct<(f32 [NonWritable], i32 [NonWritable, NonReadable])>, StorageBuffer>
 
-  // CHECK: !spv.ptr<!spv.struct<f32 [0, NonWritable], i32 [4, NonWritable, NonReadable]>, StorageBuffer>
-  spv.globalVariable @var6 : !spv.ptr<!spv.struct<f32 [0, NonWritable], i32 [4, NonWritable, NonReadable]>, StorageBuffer>
+  // CHECK: !spv.ptr<!spv.struct<(f32 [0, NonWritable], i32 [4, NonWritable, NonReadable])>, StorageBuffer>
+  spv.globalVariable @var6 : !spv.ptr<!spv.struct<(f32 [0, NonWritable], i32 [4, NonWritable, NonReadable])>, StorageBuffer>
 
-  // CHECK: !spv.ptr<!spv.struct<!spv.matrix<3 x vector<3xf32>> [0, ColMajor, MatrixStride=16]>, StorageBuffer>
-  spv.globalVariable @var7 : !spv.ptr<!spv.struct<!spv.matrix<3 x vector<3xf32>> [0, ColMajor, MatrixStride=16]>, StorageBuffer>
+  // CHECK: !spv.ptr<!spv.struct<(!spv.matrix<3 x vector<3xf32>> [0, ColMajor, MatrixStride=16])>, StorageBuffer>
+  spv.globalVariable @var7 : !spv.ptr<!spv.struct<(!spv.matrix<3 x vector<3xf32>> [0, ColMajor, MatrixStride=16])>, StorageBuffer>
 
-  // CHECK: !spv.ptr<!spv.struct<>, StorageBuffer>
-  spv.globalVariable @empty : !spv.ptr<!spv.struct<>, StorageBuffer>
+  // CHECK: !spv.ptr<!spv.struct<()>, StorageBuffer>
+  spv.globalVariable @empty : !spv.ptr<!spv.struct<()>, StorageBuffer>
 
-  // CHECK: !spv.ptr<!spv.struct<!spv.array<128 x f32, stride=4> [0]>, Input>,
-  // CHECK-SAME: !spv.ptr<!spv.struct<!spv.array<128 x f32, stride=4> [0]>, Output>
-  spv.func @kernel(%arg0: !spv.ptr<!spv.struct<!spv.array<128 x f32, stride=4> [0]>, Input>, %arg1: !spv.ptr<!spv.struct<!spv.array<128 x f32, stride=4> [0]>, Output>) -> () "None" {
+  // CHECK: !spv.ptr<!spv.struct<empty_struct, ()>, StorageBuffer>
+  spv.globalVariable @id_empty : !spv.ptr<!spv.struct<empty_struct, ()>, StorageBuffer>
+
+  // CHECK: !spv.ptr<!spv.struct<test_id, (!spv.array<128 x f32, stride=4> [0])>, Input>
+  spv.globalVariable @id_var0 : !spv.ptr<!spv.struct<test_id, (!spv.array<128 x f32, stride=4> [0])>, Input>
+
+
+  // CHECK: !spv.ptr<!spv.struct<rec, (!spv.ptr<!spv.struct<rec>, StorageBuffer>)>, StorageBuffer>
+  spv.globalVariable @recursive_simple : !spv.ptr<!spv.struct<rec, (!spv.ptr<!spv.struct<rec>, StorageBuffer>)>, StorageBuffer>
+
+  // CHECK: !spv.ptr<!spv.struct<a, (!spv.ptr<!spv.struct<b, (!spv.ptr<!spv.struct<a>, Uniform>)>, Uniform>)>, Uniform>
+  spv.globalVariable @recursive_2 : !spv.ptr<!spv.struct<a, (!spv.ptr<!spv.struct<b, (!spv.ptr<!spv.struct<a>, Uniform>)>, Uniform>)>, Uniform>
+
+  // CHECK: !spv.ptr<!spv.struct<axx, (!spv.ptr<!spv.struct<bxx, (!spv.ptr<!spv.struct<axx>, Uniform>, !spv.ptr<!spv.struct<bxx>, Uniform>)>, Uniform>)>, Uniform>
+  spv.globalVariable @recursive_3 : !spv.ptr<!spv.struct<axx, (!spv.ptr<!spv.struct<bxx, (!spv.ptr<!spv.struct<axx>, Uniform>, !spv.ptr<!spv.struct<bxx>, Uniform>)>, Uniform>)>, Uniform>
+
+  // CHECK: !spv.ptr<!spv.struct<(!spv.array<128 x f32, stride=4> [0])>, Input>,
+  // CHECK-SAME: !spv.ptr<!spv.struct<(!spv.array<128 x f32, stride=4> [0])>, Output>
+  spv.func @kernel(%arg0: !spv.ptr<!spv.struct<(!spv.array<128 x f32, stride=4> [0])>, Input>, %arg1: !spv.ptr<!spv.struct<(!spv.array<128 x f32, stride=4> [0])>, Output>) -> () "None" {
     spv.Return
   }
 }
diff --git a/mlir/test/Dialect/SPIRV/Serialization/undef.mlir b/mlir/test/Dialect/SPIRV/Serialization/undef.mlir
index d19812f482571a..6a287b2d266526 100644
--- a/mlir/test/Dialect/SPIRV/Serialization/undef.mlir
+++ b/mlir/test/Dialect/SPIRV/Serialization/undef.mlir
@@ -13,10 +13,10 @@ spv.module Logical GLSL450 requires #spv.vce<v1.0, [Shader], []> {
     // CHECK: {{%.*}} = spv.undef : !spv.array<4 x !spv.array<4 x i32>>
     %5 = spv.undef : !spv.array<4x!spv.array<4xi32>>
     %6 = spv.CompositeExtract %5[1 : i32, 2 : i32] : !spv.array<4x!spv.array<4xi32>>
-    // CHECK: {{%.*}} = spv.undef : !spv.ptr<!spv.struct<f32>, StorageBuffer>
-    %7 = spv.undef : !spv.ptr<!spv.struct<f32>, StorageBuffer>
+    // CHECK: {{%.*}} = spv.undef : !spv.ptr<!spv.struct<(f32)>, StorageBuffer>
+    %7 = spv.undef : !spv.ptr<!spv.struct<(f32)>, StorageBuffer>
     %8 = spv.constant 0 : i32
-    %9 = spv.AccessChain %7[%8] : !spv.ptr<!spv.struct<f32>, StorageBuffer>, i32
+    %9 = spv.AccessChain %7[%8] : !spv.ptr<!spv.struct<(f32)>, StorageBuffer>, i32
     spv.Return
   }
 }
diff --git a/mlir/test/Dialect/SPIRV/Transforms/abi-interface-opencl.mlir b/mlir/test/Dialect/SPIRV/Transforms/abi-interface-opencl.mlir
index 1de6b71d888d11..89e6159cd59cc6 100644
--- a/mlir/test/Dialect/SPIRV/Transforms/abi-interface-opencl.mlir
+++ b/mlir/test/Dialect/SPIRV/Transforms/abi-interface-opencl.mlir
@@ -5,14 +5,12 @@ module attributes {
 } {
   spv.module Physical64 OpenCL {
     // CHECK-LABEL: spv.module
-    //       CHECK:   spv.func [[FN:@.*]](
-    //  CHECK-SAME:     {{%.*}}: f32
-    //  CHECK-SAME:     {{%.*}}: !spv.ptr<!spv.struct<!spv.array<12 x f32>>, CrossWorkgroup>
+    //       CHECK:   spv.func [[FN:@.*]]({{%.*}}: f32, {{%.*}}: !spv.ptr<!spv.struct<(!spv.array<12 x f32>)>, CrossWorkgroup>
     //       CHECK:   spv.EntryPoint "Kernel" [[FN]]
     //       CHECK:   spv.ExecutionMode [[FN]] "LocalSize", 32, 1, 1
     spv.func @kernel(
       %arg0: f32,
-      %arg1: !spv.ptr<!spv.struct<!spv.array<12 x f32>>, CrossWorkgroup>) "None"
+      %arg1: !spv.ptr<!spv.struct<(!spv.array<12 x f32>)>, CrossWorkgroup>) "None"
     attributes {spv.entry_point_abi = {local_size = dense<[32, 1, 1]> : vector<3xi32>}} {
       spv.Return
     }
diff --git a/mlir/test/Dialect/SPIRV/Transforms/abi-interface.mlir b/mlir/test/Dialect/SPIRV/Transforms/abi-interface.mlir
index 5b06745eba874e..564485079d998b 100644
--- a/mlir/test/Dialect/SPIRV/Transforms/abi-interface.mlir
+++ b/mlir/test/Dialect/SPIRV/Transforms/abi-interface.mlir
@@ -7,13 +7,13 @@ module attributes {
 
 // CHECK-LABEL: spv.module
 spv.module Logical GLSL450 {
-  // CHECK-DAG:    spv.globalVariable [[VAR0:@.*]] bind(0, 0) : !spv.ptr<!spv.struct<f32 [0]>, StorageBuffer>
-  // CHECK-DAG:    spv.globalVariable [[VAR1:@.*]] bind(0, 1) : !spv.ptr<!spv.struct<!spv.array<12 x f32, stride=4> [0]>, StorageBuffer>
+  // CHECK-DAG:    spv.globalVariable [[VAR0:@.*]] bind(0, 0) : !spv.ptr<!spv.struct<(f32 [0])>, StorageBuffer>
+  // CHECK-DAG:    spv.globalVariable [[VAR1:@.*]] bind(0, 1) : !spv.ptr<!spv.struct<(!spv.array<12 x f32, stride=4> [0])>, StorageBuffer>
   // CHECK:    spv.func [[FN:@.*]]()
   spv.func @kernel(
     %arg0: f32
            {spv.interface_var_abi = #spv.interface_var_abi<(0, 0), StorageBuffer>},
-    %arg1: !spv.ptr<!spv.struct<!spv.array<12 x f32>>, StorageBuffer>
+    %arg1: !spv.ptr<!spv.struct<(!spv.array<12 x f32>)>, StorageBuffer>
            {spv.interface_var_abi = #spv.interface_var_abi<(0, 1)>}) "None"
   attributes {spv.entry_point_abi = {local_size = dense<[32, 1, 1]> : vector<3xi32>}} {
     // CHECK: [[ARG1:%.*]] = spv._address_of [[VAR1]]
diff --git a/mlir/test/Dialect/SPIRV/Transforms/abi-load-store.mlir b/mlir/test/Dialect/SPIRV/Transforms/abi-load-store.mlir
index 7d1a174fa36717..6d588ecb504b1d 100644
--- a/mlir/test/Dialect/SPIRV/Transforms/abi-load-store.mlir
+++ b/mlir/test/Dialect/SPIRV/Transforms/abi-load-store.mlir
@@ -15,20 +15,20 @@ spv.module Logical GLSL450 {
   spv.globalVariable @__builtin_var_LocalInvocationId__ built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input>
   // CHECK-DAG: spv.globalVariable [[WORKGROUPID:@.*]] built_in("WorkgroupId")
   spv.globalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spv.ptr<vector<3xi32>, Input>
-  // CHECK-DAG: spv.globalVariable [[VAR0:@.*]] bind(0, 0) : !spv.ptr<!spv.struct<!spv.array<12 x !spv.array<4 x f32, stride=4>, stride=16> [0]>, StorageBuffer>
-  // CHECK-DAG: spv.globalVariable [[VAR1:@.*]] bind(0, 1) : !spv.ptr<!spv.struct<!spv.array<12 x !spv.array<4 x f32, stride=4>, stride=16> [0]>, StorageBuffer>
-  // CHECK-DAG: spv.globalVariable [[VAR2:@.*]] bind(0, 2) : !spv.ptr<!spv.struct<!spv.array<12 x !spv.array<4 x f32, stride=4>, stride=16> [0]>, StorageBuffer>
-  // CHECK-DAG: spv.globalVariable [[VAR3:@.*]] bind(0, 3) : !spv.ptr<!spv.struct<i32 [0]>, StorageBuffer>
-  // CHECK-DAG: spv.globalVariable [[VAR4:@.*]] bind(0, 4) : !spv.ptr<!spv.struct<i32 [0]>, StorageBuffer>
-  // CHECK-DAG: spv.globalVariable [[VAR5:@.*]] bind(0, 5) : !spv.ptr<!spv.struct<i32 [0]>, StorageBuffer>
-  // CHECK-DAG: spv.globalVariable [[VAR6:@.*]] bind(0, 6) : !spv.ptr<!spv.struct<i32 [0]>, StorageBuffer>
+  // CHECK-DAG: spv.globalVariable [[VAR0:@.*]] bind(0, 0) : !spv.ptr<!spv.struct<(!spv.array<12 x !spv.array<4 x f32, stride=4>, stride=16> [0])>, StorageBuffer>
+  // CHECK-DAG: spv.globalVariable [[VAR1:@.*]] bind(0, 1) : !spv.ptr<!spv.struct<(!spv.array<12 x !spv.array<4 x f32, stride=4>, stride=16> [0])>, StorageBuffer>
+  // CHECK-DAG: spv.globalVariable [[VAR2:@.*]] bind(0, 2) : !spv.ptr<!spv.struct<(!spv.array<12 x !spv.array<4 x f32, stride=4>, stride=16> [0])>, StorageBuffer>
+  // CHECK-DAG: spv.globalVariable [[VAR3:@.*]] bind(0, 3) : !spv.ptr<!spv.struct<(i32 [0])>, StorageBuffer>
+  // CHECK-DAG: spv.globalVariable [[VAR4:@.*]] bind(0, 4) : !spv.ptr<!spv.struct<(i32 [0])>, StorageBuffer>
+  // CHECK-DAG: spv.globalVariable [[VAR5:@.*]] bind(0, 5) : !spv.ptr<!spv.struct<(i32 [0])>, StorageBuffer>
+  // CHECK-DAG: spv.globalVariable [[VAR6:@.*]] bind(0, 6) : !spv.ptr<!spv.struct<(i32 [0])>, StorageBuffer>
   // CHECK: spv.func [[FN:@.*]]()
   spv.func @load_store_kernel(
-    %arg0: !spv.ptr<!spv.struct<!spv.array<12 x !spv.array<4 x f32>>>, StorageBuffer>
+    %arg0: !spv.ptr<!spv.struct<(!spv.array<12 x !spv.array<4 x f32>>)>, StorageBuffer>
     {spv.interface_var_abi = #spv.interface_var_abi<(0, 0)>},
-    %arg1: !spv.ptr<!spv.struct<!spv.array<12 x !spv.array<4 x f32>>>, StorageBuffer>
+    %arg1: !spv.ptr<!spv.struct<(!spv.array<12 x !spv.array<4 x f32>>)>, StorageBuffer>
     {spv.interface_var_abi = #spv.interface_var_abi<(0, 1)>},
-    %arg2: !spv.ptr<!spv.struct<!spv.array<12 x !spv.array<4 x f32>>>, StorageBuffer>
+    %arg2: !spv.ptr<!spv.struct<(!spv.array<12 x !spv.array<4 x f32>>)>, StorageBuffer>
     {spv.interface_var_abi = #spv.interface_var_abi<(0, 2)>},
     %arg3: i32
     {spv.interface_var_abi = #spv.interface_var_abi<(0, 3), StorageBuffer>},
@@ -103,14 +103,14 @@ spv.module Logical GLSL450 {
     %37 = spv.IAdd %arg4, %11 : i32
     // CHECK: spv.AccessChain [[ARG0]]
     %c0 = spv.constant 0 : i32
-    %38 = spv.AccessChain %arg0[%c0, %36, %37] : !spv.ptr<!spv.struct<!spv.array<12 x !spv.array<4 x f32>>>, StorageBuffer>, i32, i32, i32
+    %38 = spv.AccessChain %arg0[%c0, %36, %37] : !spv.ptr<!spv.struct<(!spv.array<12 x !spv.array<4 x f32>>)>, StorageBuffer>, i32, i32, i32
     %39 = spv.Load "StorageBuffer" %38 : f32
     // CHECK: spv.AccessChain [[ARG1]]
-    %40 = spv.AccessChain %arg1[%c0, %36, %37] : !spv.ptr<!spv.struct<!spv.array<12 x !spv.array<4 x f32>>>, StorageBuffer>, i32, i32, i32
+    %40 = spv.AccessChain %arg1[%c0, %36, %37] : !spv.ptr<!spv.struct<(!spv.array<12 x !spv.array<4 x f32>>)>, StorageBuffer>, i32, i32, i32
     %41 = spv.Load "StorageBuffer" %40 : f32
     %42 = spv.FAdd %39, %41 : f32
     // CHECK: spv.AccessChain [[ARG2]]
-    %43 = spv.AccessChain %arg2[%c0, %36, %37] : !spv.ptr<!spv.struct<!spv.array<12 x !spv.array<4 x f32>>>, StorageBuffer>, i32, i32, i32
+    %43 = spv.AccessChain %arg2[%c0, %36, %37] : !spv.ptr<!spv.struct<(!spv.array<12 x !spv.array<4 x f32>>)>, StorageBuffer>, i32, i32, i32
     spv.Store "StorageBuffer" %43, %42 : f32
     spv.Return
   }
diff --git a/mlir/test/Dialect/SPIRV/Transforms/inlining.mlir b/mlir/test/Dialect/SPIRV/Transforms/inlining.mlir
index 8c9408ab089f7a..686af07d703c0d 100644
--- a/mlir/test/Dialect/SPIRV/Transforms/inlining.mlir
+++ b/mlir/test/Dialect/SPIRV/Transforms/inlining.mlir
@@ -33,11 +33,11 @@ spv.module Logical GLSL450 {
 // -----
 
 spv.module Logical GLSL450 {
-  spv.globalVariable @data bind(0, 0) : !spv.ptr<!spv.struct<!spv.rtarray<i32> [0]>, StorageBuffer>
+  spv.globalVariable @data bind(0, 0) : !spv.ptr<!spv.struct<(!spv.rtarray<i32> [0])>, StorageBuffer>
   spv.func @callee() "None" {
-    %0 = spv._address_of @data : !spv.ptr<!spv.struct<!spv.rtarray<i32> [0]>, StorageBuffer>
+    %0 = spv._address_of @data : !spv.ptr<!spv.struct<(!spv.rtarray<i32> [0])>, StorageBuffer>
     %1 = spv.constant 0: i32
-    %2 = spv.AccessChain %0[%1, %1] : !spv.ptr<!spv.struct<!spv.rtarray<i32> [0]>, StorageBuffer>, i32, i32
+    %2 = spv.AccessChain %0[%1, %1] : !spv.ptr<!spv.struct<(!spv.rtarray<i32> [0])>, StorageBuffer>, i32, i32
     spv.Branch ^next
 
   ^next:
@@ -184,8 +184,8 @@ spv.module Logical GLSL450 {
 // -----
 
 spv.module Logical GLSL450 {
-  spv.globalVariable @arg_0 bind(0, 0) : !spv.ptr<!spv.struct<i32 [0]>, StorageBuffer>
-  spv.globalVariable @arg_1 bind(0, 1) : !spv.ptr<!spv.struct<i32 [0]>, StorageBuffer>
+  spv.globalVariable @arg_0 bind(0, 0) : !spv.ptr<!spv.struct<(i32 [0])>, StorageBuffer>
+  spv.globalVariable @arg_1 bind(0, 1) : !spv.ptr<!spv.struct<(i32 [0])>, StorageBuffer>
 
   // CHECK: @inline_into_selection_region
   spv.func @inline_into_selection_region() "None" {
@@ -194,9 +194,9 @@ spv.module Logical GLSL450 {
     // CHECK-DAG: [[ADDRESS_ARG1:%.*]] = spv._address_of @arg_1
     // CHECK-DAG: [[LOADPTR:%.*]] = spv.AccessChain [[ADDRESS_ARG0]]
     // CHECK: [[VAL:%.*]] = spv.Load "StorageBuffer" [[LOADPTR]]
-    %2 = spv._address_of @arg_0 : !spv.ptr<!spv.struct<i32 [0]>, StorageBuffer>
-    %3 = spv._address_of @arg_1 : !spv.ptr<!spv.struct<i32 [0]>, StorageBuffer>
-    %4 = spv.AccessChain %2[%1] : !spv.ptr<!spv.struct<i32 [0]>, StorageBuffer>, i32
+    %2 = spv._address_of @arg_0 : !spv.ptr<!spv.struct<(i32 [0])>, StorageBuffer>
+    %3 = spv._address_of @arg_1 : !spv.ptr<!spv.struct<(i32 [0])>, StorageBuffer>
+    %4 = spv.AccessChain %2[%1] : !spv.ptr<!spv.struct<(i32 [0])>, StorageBuffer>, i32
     %5 = spv.Load "StorageBuffer" %4 : i32
     %6 = spv.SGreaterThan %5, %1 : i32
     // CHECK: spv.selection
@@ -204,7 +204,7 @@ spv.module Logical GLSL450 {
       spv.BranchConditional %6, ^bb1, ^bb2
     ^bb1: // pred: ^bb0
       // CHECK: [[STOREPTR:%.*]] = spv.AccessChain [[ADDRESS_ARG1]]
-      %7 = spv.AccessChain %3[%1] : !spv.ptr<!spv.struct<i32 [0]>, StorageBuffer>, i32
+      %7 = spv.AccessChain %3[%1] : !spv.ptr<!spv.struct<(i32 [0])>, StorageBuffer>, i32
       // CHECK-NOT: spv.FunctionCall
       // CHECK: spv.AtomicIAdd "Device" "AcquireRelease" [[STOREPTR]], [[VAL]]
       // CHECK: spv.Branch
diff --git a/mlir/test/Dialect/SPIRV/Transforms/layout-decoration.mlir b/mlir/test/Dialect/SPIRV/Transforms/layout-decoration.mlir
index f54d1910be22e6..219142872c26ce 100644
--- a/mlir/test/Dialect/SPIRV/Transforms/layout-decoration.mlir
+++ b/mlir/test/Dialect/SPIRV/Transforms/layout-decoration.mlir
@@ -1,30 +1,30 @@
 // RUN: mlir-opt -decorate-spirv-composite-type-layout -split-input-file -verify-diagnostics %s -o - | FileCheck %s
 
 spv.module Logical GLSL450 {
-  // CHECK: spv.globalVariable @var0 bind(0, 1) : !spv.ptr<!spv.struct<i32 [0], !spv.struct<f32 [0], i32 [4]> [4], f32 [12]>, Uniform>
-  spv.globalVariable @var0 bind(0,1) : !spv.ptr<!spv.struct<i32, !spv.struct<f32, i32>, f32>, Uniform>
+  // CHECK: spv.globalVariable @var0 bind(0, 1) : !spv.ptr<!spv.struct<(i32 [0], !spv.struct<(f32 [0], i32 [4])> [4], f32 [12])>, Uniform>
+  spv.globalVariable @var0 bind(0,1) : !spv.ptr<!spv.struct<(i32, !spv.struct<(f32, i32)>, f32)>, Uniform>
 
-  // CHECK: spv.globalVariable @var1 bind(0, 2) : !spv.ptr<!spv.struct<!spv.array<64 x i32, stride=4> [0], f32 [256]>, StorageBuffer>
-  spv.globalVariable @var1 bind(0,2) : !spv.ptr<!spv.struct<!spv.array<64xi32>, f32>, StorageBuffer>
+  // CHECK: spv.globalVariable @var1 bind(0, 2) : !spv.ptr<!spv.struct<(!spv.array<64 x i32, stride=4> [0], f32 [256])>, StorageBuffer>
+  spv.globalVariable @var1 bind(0,2) : !spv.ptr<!spv.struct<(!spv.array<64xi32>, f32)>, StorageBuffer>
 
-  // CHECK: spv.globalVariable @var2 bind(1, 0) : !spv.ptr<!spv.struct<!spv.struct<!spv.array<64 x i32, stride=4> [0], f32 [256]> [0], i32 [260]>, StorageBuffer>
-  spv.globalVariable @var2 bind(1,0) : !spv.ptr<!spv.struct<!spv.struct<!spv.array<64xi32>, f32>, i32>, StorageBuffer>
+  // CHECK: spv.globalVariable @var2 bind(1, 0) : !spv.ptr<!spv.struct<(!spv.struct<(!spv.array<64 x i32, stride=4> [0], f32 [256])> [0], i32 [260])>, StorageBuffer>
+  spv.globalVariable @var2 bind(1,0) : !spv.ptr<!spv.struct<(!spv.struct<(!spv.array<64xi32>, f32)>, i32)>, StorageBuffer>
 
-  // CHECK: spv.globalVariable @var3 : !spv.ptr<!spv.struct<!spv.array<16 x !spv.struct<f32 [0], f32 [4], !spv.array<16 x f32, stride=4> [8]>, stride=72> [0], f32 [1152]>, StorageBuffer>
-  spv.globalVariable @var3 : !spv.ptr<!spv.struct<!spv.array<16x!spv.struct<f32, f32, !spv.array<16xf32>>>, f32>, StorageBuffer>
+  // CHECK: spv.globalVariable @var3 : !spv.ptr<!spv.struct<(!spv.array<16 x !spv.struct<(f32 [0], f32 [4], !spv.array<16 x f32, stride=4> [8])>, stride=72> [0], f32 [1152])>, StorageBuffer>
+  spv.globalVariable @var3 : !spv.ptr<!spv.struct<(!spv.array<16x!spv.struct<(f32, f32, !spv.array<16xf32>)>>, f32)>, StorageBuffer>
 
-  // CHECK: spv.globalVariable @var4 bind(1, 2) : !spv.ptr<!spv.struct<!spv.struct<!spv.struct<i1 [0], i8 [1], i16 [2], i32 [4], i64 [8]> [0], f32 [16], i1 [20]> [0], i1 [24]>, StorageBuffer>
-  spv.globalVariable @var4 bind(1,2) : !spv.ptr<!spv.struct<!spv.struct<!spv.struct<i1, i8, i16, i32, i64>, f32, i1>, i1>, StorageBuffer>
+  // CHECK: spv.globalVariable @var4 bind(1, 2) : !spv.ptr<!spv.struct<(!spv.struct<(!spv.struct<(i1 [0], i8 [1], i16 [2], i32 [4], i64 [8])> [0], f32 [16], i1 [20])> [0], i1 [24])>, StorageBuffer>
+  spv.globalVariable @var4 bind(1,2) : !spv.ptr<!spv.struct<(!spv.struct<(!spv.struct<(i1, i8, i16, i32, i64)>, f32, i1)>, i1)>, StorageBuffer>
 
-  // CHECK: spv.globalVariable @var5 bind(1, 3) : !spv.ptr<!spv.struct<!spv.array<256 x f32, stride=4> [0]>, StorageBuffer>
-  spv.globalVariable @var5 bind(1,3) : !spv.ptr<!spv.struct<!spv.array<256xf32>>, StorageBuffer>
+  // CHECK: spv.globalVariable @var5 bind(1, 3) : !spv.ptr<!spv.struct<(!spv.array<256 x f32, stride=4> [0])>, StorageBuffer>
+  spv.globalVariable @var5 bind(1,3) : !spv.ptr<!spv.struct<(!spv.array<256xf32>)>, StorageBuffer>
 
   spv.func @kernel() -> () "None" {
     %c0 = spv.constant 0 : i32
-    // CHECK: {{%.*}} = spv._address_of @var0 : !spv.ptr<!spv.struct<i32 [0], !spv.struct<f32 [0], i32 [4]> [4], f32 [12]>, Uniform>
-    %0 = spv._address_of @var0 : !spv.ptr<!spv.struct<i32, !spv.struct<f32, i32>, f32>, Uniform>
-    // CHECK:  {{%.*}} = spv.AccessChain {{%.*}}[{{%.*}}] : !spv.ptr<!spv.struct<i32 [0], !spv.struct<f32 [0], i32 [4]> [4], f32 [12]>, Uniform>
-    %1 = spv.AccessChain %0[%c0] : !spv.ptr<!spv.struct<i32, !spv.struct<f32, i32>, f32>, Uniform>, i32
+    // CHECK: {{%.*}} = spv._address_of @var0 : !spv.ptr<!spv.struct<(i32 [0], !spv.struct<(f32 [0], i32 [4])> [4], f32 [12])>, Uniform>
+    %0 = spv._address_of @var0 : !spv.ptr<!spv.struct<(i32, !spv.struct<(f32, i32)>, f32)>, Uniform>
+    // CHECK:  {{%.*}} = spv.AccessChain {{%.*}}[{{%.*}}] : !spv.ptr<!spv.struct<(i32 [0], !spv.struct<(f32 [0], i32 [4])> [4], f32 [12])>, Uniform>
+    %1 = spv.AccessChain %0[%c0] : !spv.ptr<!spv.struct<(i32, !spv.struct<(f32, i32)>, f32)>, Uniform>, i32
     spv.Return
   }
 }
@@ -32,68 +32,68 @@ spv.module Logical GLSL450 {
 // -----
 
 spv.module Logical GLSL450 {
-  // CHECK: spv.globalVariable @var0 : !spv.ptr<!spv.struct<!spv.struct<!spv.struct<!spv.struct<!spv.struct<i1 [0], i1 [1], f64 [8]> [0], i1 [16]> [0], i1 [24]> [0], i1 [32]> [0], i1 [40]>, Uniform>
-  spv.globalVariable @var0 : !spv.ptr<!spv.struct<!spv.struct<!spv.struct<!spv.struct<!spv.struct<i1, i1, f64>, i1>, i1>, i1>, i1>, Uniform>
+  // CHECK: spv.globalVariable @var0 : !spv.ptr<!spv.struct<(!spv.struct<(!spv.struct<(!spv.struct<(!spv.struct<(i1 [0], i1 [1], f64 [8])> [0], i1 [16])> [0], i1 [24])> [0], i1 [32])> [0], i1 [40])>, Uniform>
+  spv.globalVariable @var0 : !spv.ptr<!spv.struct<(!spv.struct<(!spv.struct<(!spv.struct<(!spv.struct<(i1, i1, f64)>, i1)>, i1)>, i1)>, i1)>, Uniform>
 
-  // CHECK: spv.globalVariable @var1 : !spv.ptr<!spv.struct<!spv.struct<i16 [0], !spv.struct<i1 [0], f64 [8]> [8], f32 [24]> [0], f32 [32]>, Uniform>
-  spv.globalVariable @var1 : !spv.ptr<!spv.struct<!spv.struct<i16, !spv.struct<i1, f64>, f32>, f32>, Uniform>
+  // CHECK: spv.globalVariable @var1 : !spv.ptr<!spv.struct<(!spv.struct<(i16 [0], !spv.struct<(i1 [0], f64 [8])> [8], f32 [24])> [0], f32 [32])>, Uniform>
+  spv.globalVariable @var1 : !spv.ptr<!spv.struct<(!spv.struct<(i16, !spv.struct<(i1, f64)>, f32)>, f32)>, Uniform>
 
-  // CHECK: spv.globalVariable @var2 : !spv.ptr<!spv.struct<!spv.struct<i16 [0], !spv.struct<i1 [0], !spv.array<16 x !spv.array<16 x i64, stride=8>, stride=128> [8]> [8], f32 [2064]> [0], f32 [2072]>, Uniform>
-  spv.globalVariable @var2 : !spv.ptr<!spv.struct<!spv.struct<i16, !spv.struct<i1, !spv.array<16x!spv.array<16xi64>>>, f32>, f32>, Uniform>
+  // CHECK: spv.globalVariable @var2 : !spv.ptr<!spv.struct<(!spv.struct<(i16 [0], !spv.struct<(i1 [0], !spv.array<16 x !spv.array<16 x i64, stride=8>, stride=128> [8])> [8], f32 [2064])> [0], f32 [2072])>, Uniform>
+  spv.globalVariable @var2 : !spv.ptr<!spv.struct<(!spv.struct<(i16, !spv.struct<(i1, !spv.array<16x!spv.array<16xi64>>)>, f32)>, f32)>, Uniform>
 
-  // CHECK: spv.globalVariable @var3 : !spv.ptr<!spv.struct<!spv.struct<!spv.array<64 x i64, stride=8> [0], i1 [512]> [0], i1 [520]>, Uniform>
-  spv.globalVariable @var3 : !spv.ptr<!spv.struct<!spv.struct<!spv.array<64xi64>, i1>, i1>, Uniform>
+  // CHECK: spv.globalVariable @var3 : !spv.ptr<!spv.struct<(!spv.struct<(!spv.array<64 x i64, stride=8> [0], i1 [512])> [0], i1 [520])>, Uniform>
+  spv.globalVariable @var3 : !spv.ptr<!spv.struct<(!spv.struct<(!spv.array<64xi64>, i1)>, i1)>, Uniform>
 
-  // CHECK: spv.globalVariable @var4 : !spv.ptr<!spv.struct<i1 [0], !spv.struct<i64 [0], i1 [8], i1 [9], i1 [10], i1 [11]> [8], i1 [24]>, Uniform>
-  spv.globalVariable @var4 : !spv.ptr<!spv.struct<i1, !spv.struct<i64, i1, i1, i1, i1>, i1>, Uniform>
+  // CHECK: spv.globalVariable @var4 : !spv.ptr<!spv.struct<(i1 [0], !spv.struct<(i64 [0], i1 [8], i1 [9], i1 [10], i1 [11])> [8], i1 [24])>, Uniform>
+  spv.globalVariable @var4 : !spv.ptr<!spv.struct<(i1, !spv.struct<(i64, i1, i1, i1, i1)>, i1)>, Uniform>
 
-  // CHECK: spv.globalVariable @var5 : !spv.ptr<!spv.struct<i1 [0], !spv.struct<i1 [0], i1 [1], i1 [2], i1 [3], i64 [8]> [8], i1 [24]>, Uniform>
-  spv.globalVariable @var5 : !spv.ptr<!spv.struct<i1, !spv.struct<i1, i1, i1, i1, i64>, i1>, Uniform>
+  // CHECK: spv.globalVariable @var5 : !spv.ptr<!spv.struct<(i1 [0], !spv.struct<(i1 [0], i1 [1], i1 [2], i1 [3], i64 [8])> [8], i1 [24])>, Uniform>
+  spv.globalVariable @var5 : !spv.ptr<!spv.struct<(i1, !spv.struct<(i1, i1, i1, i1, i64)>, i1)>, Uniform>
 
-  // CHECK: spv.globalVariable @var6 : !spv.ptr<!spv.struct<i1 [0], !spv.struct<i64 [0], i32 [8], i16 [12], i8 [14], i1 [15]> [8], i1 [24]>, Uniform>
-  spv.globalVariable @var6 : !spv.ptr<!spv.struct<i1, !spv.struct<i64, i32, i16, i8, i1>, i1>, Uniform>
+  // CHECK: spv.globalVariable @var6 : !spv.ptr<!spv.struct<(i1 [0], !spv.struct<(i64 [0], i32 [8], i16 [12], i8 [14], i1 [15])> [8], i1 [24])>, Uniform>
+  spv.globalVariable @var6 : !spv.ptr<!spv.struct<(i1, !spv.struct<(i64, i32, i16, i8, i1)>, i1)>, Uniform>
 
-  // CHECK: spv.globalVariable @var7 : !spv.ptr<!spv.struct<i1 [0], !spv.struct<!spv.struct<i1 [0], i64 [8]> [0], i1 [16]> [8], i1 [32]>, Uniform>
-  spv.globalVariable @var7 : !spv.ptr<!spv.struct<i1, !spv.struct<!spv.struct<i1, i64>, i1>, i1>, Uniform>
+  // CHECK: spv.globalVariable @var7 : !spv.ptr<!spv.struct<(i1 [0], !spv.struct<(!spv.struct<(i1 [0], i64 [8])> [0], i1 [16])> [8], i1 [32])>, Uniform>
+  spv.globalVariable @var7 : !spv.ptr<!spv.struct<(i1, !spv.struct<(!spv.struct<(i1, i64)>, i1)>, i1)>, Uniform>
 }
 
 // -----
 
 spv.module Logical GLSL450 {
-  // CHECK: spv.globalVariable @var0 : !spv.ptr<!spv.struct<vector<2xi32> [0], f32 [8]>, StorageBuffer>
-  spv.globalVariable @var0 : !spv.ptr<!spv.struct<vector<2xi32>, f32>, StorageBuffer>
+  // CHECK: spv.globalVariable @var0 : !spv.ptr<!spv.struct<(vector<2xi32> [0], f32 [8])>, StorageBuffer>
+  spv.globalVariable @var0 : !spv.ptr<!spv.struct<(vector<2xi32>, f32)>, StorageBuffer>
 
-  // CHECK: spv.globalVariable @var1 : !spv.ptr<!spv.struct<vector<3xi32> [0], f32 [12]>, StorageBuffer>
-  spv.globalVariable @var1 : !spv.ptr<!spv.struct<vector<3xi32>, f32>, StorageBuffer>
+  // CHECK: spv.globalVariable @var1 : !spv.ptr<!spv.struct<(vector<3xi32> [0], f32 [12])>, StorageBuffer>
+  spv.globalVariable @var1 : !spv.ptr<!spv.struct<(vector<3xi32>, f32)>, StorageBuffer>
 
-  // CHECK: spv.globalVariable @var2 : !spv.ptr<!spv.struct<vector<4xi32> [0], f32 [16]>, StorageBuffer>
-  spv.globalVariable @var2 : !spv.ptr<!spv.struct<vector<4xi32>, f32>, StorageBuffer>
+  // CHECK: spv.globalVariable @var2 : !spv.ptr<!spv.struct<(vector<4xi32> [0], f32 [16])>, StorageBuffer>
+  spv.globalVariable @var2 : !spv.ptr<!spv.struct<(vector<4xi32>, f32)>, StorageBuffer>
 }
 
 // -----
 
 spv.module Logical GLSL450 {
-  // CHECK: spv.globalVariable @emptyStructAsMember : !spv.ptr<!spv.struct<!spv.struct<> [0]>, StorageBuffer>
-  spv.globalVariable @emptyStructAsMember : !spv.ptr<!spv.struct<!spv.struct<>>, StorageBuffer>
+  // CHECK: spv.globalVariable @emptyStructAsMember : !spv.ptr<!spv.struct<(!spv.struct<()> [0])>, StorageBuffer>
+  spv.globalVariable @emptyStructAsMember : !spv.ptr<!spv.struct<(!spv.struct<()>)>, StorageBuffer>
 
   // CHECK: spv.globalVariable @arrayType : !spv.ptr<!spv.array<4 x !spv.array<4 x f32>>, StorageBuffer>
   spv.globalVariable @arrayType : !spv.ptr<!spv.array<4x!spv.array<4xf32>>, StorageBuffer>
 
-  // CHECK: spv.globalVariable @InputStorage : !spv.ptr<!spv.struct<!spv.array<256 x f32>>, Input>
-  spv.globalVariable @InputStorage : !spv.ptr<!spv.struct<!spv.array<256xf32>>, Input>
+  // CHECK: spv.globalVariable @InputStorage : !spv.ptr<!spv.struct<(!spv.array<256 x f32>)>, Input>
+  spv.globalVariable @InputStorage : !spv.ptr<!spv.struct<(!spv.array<256xf32>)>, Input>
 
-  // CHECK: spv.globalVariable @customLayout : !spv.ptr<!spv.struct<f32 [256], i32 [512]>, Uniform>
-  spv.globalVariable @customLayout : !spv.ptr<!spv.struct<f32 [256], i32 [512]>, Uniform>
+  // CHECK: spv.globalVariable @customLayout : !spv.ptr<!spv.struct<(f32 [256], i32 [512])>, Uniform>
+  spv.globalVariable @customLayout : !spv.ptr<!spv.struct<(f32 [256], i32 [512])>, Uniform>
 
-  // CHECK:  spv.globalVariable @emptyStruct : !spv.ptr<!spv.struct<>, Uniform>
-  spv.globalVariable @emptyStruct : !spv.ptr<!spv.struct<>, Uniform>
+  // CHECK:  spv.globalVariable @emptyStruct : !spv.ptr<!spv.struct<()>, Uniform>
+  spv.globalVariable @emptyStruct : !spv.ptr<!spv.struct<()>, Uniform>
 }
 
 // -----
 
 spv.module Logical GLSL450 {
-  // CHECK: spv.globalVariable @var0 : !spv.ptr<!spv.struct<i32 [0]>, PushConstant>
-  spv.globalVariable @var0 : !spv.ptr<!spv.struct<i32>, PushConstant>
-  // CHECK: spv.globalVariable @var1 : !spv.ptr<!spv.struct<i32 [0]>, PhysicalStorageBuffer>
-  spv.globalVariable @var1 : !spv.ptr<!spv.struct<i32>, PhysicalStorageBuffer>
+  // CHECK: spv.globalVariable @var0 : !spv.ptr<!spv.struct<(i32 [0])>, PushConstant>
+  spv.globalVariable @var0 : !spv.ptr<!spv.struct<(i32)>, PushConstant>
+  // CHECK: spv.globalVariable @var1 : !spv.ptr<!spv.struct<(i32 [0])>, PhysicalStorageBuffer>
+  spv.globalVariable @var1 : !spv.ptr<!spv.struct<(i32)>, PhysicalStorageBuffer>
 }
diff --git a/mlir/test/Dialect/SPIRV/Transforms/rewrite-inserts.mlir b/mlir/test/Dialect/SPIRV/Transforms/rewrite-inserts.mlir
index 1b265e3bcd4220..719f00636fbb9a 100644
--- a/mlir/test/Dialect/SPIRV/Transforms/rewrite-inserts.mlir
+++ b/mlir/test/Dialect/SPIRV/Transforms/rewrite-inserts.mlir
@@ -15,16 +15,16 @@ spv.module Logical GLSL450 {
     %7 = spv.CompositeInsert %value2, %6[2 : i32] : f32 into !spv.array<4xf32>
     %8 = spv.CompositeInsert %value0, %7[3 : i32] : f32 into !spv.array<4xf32>
 
-    %9 = spv.undef : !spv.struct<f32, i32, f32>
-    // CHECK: spv.CompositeConstruct {{%.*}}, {{%.*}}, {{%.*}} : !spv.struct<f32, i32, f32>
-    %10 = spv.CompositeInsert %value0, %9[0 : i32] : f32 into !spv.struct<f32, i32, f32>
-    %11 = spv.CompositeInsert %value3, %10[1 : i32] : i32 into !spv.struct<f32, i32, f32>
-    %12 = spv.CompositeInsert %value1, %11[2 : i32] : f32 into !spv.struct<f32, i32, f32>
+    %9 = spv.undef : !spv.struct<(f32, i32, f32)>
+    // CHECK: spv.CompositeConstruct {{%.*}}, {{%.*}}, {{%.*}} : !spv.struct<(f32, i32, f32)>
+    %10 = spv.CompositeInsert %value0, %9[0 : i32] : f32 into !spv.struct<(f32, i32, f32)>
+    %11 = spv.CompositeInsert %value3, %10[1 : i32] : i32 into !spv.struct<(f32, i32, f32)>
+    %12 = spv.CompositeInsert %value1, %11[2 : i32] : f32 into !spv.struct<(f32, i32, f32)>
 
-    %13 = spv.undef : !spv.struct<f32, !spv.array<3xf32>>
-    // CHECK: spv.CompositeConstruct {{%.*}}, {{%.*}} : !spv.struct<f32, !spv.array<3 x f32>>
-    %14 = spv.CompositeInsert %value0, %13[0 : i32] : f32 into !spv.struct<f32, !spv.array<3xf32>>
-    %15 = spv.CompositeInsert %value4, %14[1 : i32] : !spv.array<3xf32> into !spv.struct<f32, !spv.array<3xf32>>
+    %13 = spv.undef : !spv.struct<(f32, !spv.array<3xf32>)>
+    // CHECK: spv.CompositeConstruct {{%.*}}, {{%.*}} : !spv.struct<(f32, !spv.array<3 x f32>)>
+    %14 = spv.CompositeInsert %value0, %13[0 : i32] : f32 into !spv.struct<(f32, !spv.array<3xf32>)>
+    %15 = spv.CompositeInsert %value4, %14[1 : i32] : !spv.array<3xf32> into !spv.struct<(f32, !spv.array<3xf32>)>
 
     spv.ReturnValue %3 : vector<3xf32>
   }
diff --git a/mlir/test/Dialect/SPIRV/Transforms/vce-deduction.mlir b/mlir/test/Dialect/SPIRV/Transforms/vce-deduction.mlir
index 74484fd7ab6b19..f0874f85e4f634 100644
--- a/mlir/test/Dialect/SPIRV/Transforms/vce-deduction.mlir
+++ b/mlir/test/Dialect/SPIRV/Transforms/vce-deduction.mlir
@@ -180,6 +180,6 @@ spv.module Logical GLSL450 attributes {
     #spv.vce<v1.5, [Shader, UniformAndStorageBuffer8BitAccess, StorageBuffer16BitAccess, StorageUniform16, Int16, ImageBuffer, StorageImageExtendedFormats], []>,
     {}>
 } {
-  spv.globalVariable @data : !spv.ptr<!spv.struct<i8 [0], f16 [2], i64 [4]>, Uniform>
+  spv.globalVariable @data : !spv.ptr<!spv.struct<(i8 [0], f16 [2], i64 [4])>, Uniform>
   spv.globalVariable @img  : !spv.ptr<!spv.image<f32, Buffer, NoDepth, NonArrayed, SingleSampled, SamplerUnknown, Rg32f>, UniformConstant>
 }
diff --git a/mlir/test/Dialect/SPIRV/canonicalize.mlir b/mlir/test/Dialect/SPIRV/canonicalize.mlir
index ad129c2f08253b..2f514805edc6a0 100644
--- a/mlir/test/Dialect/SPIRV/canonicalize.mlir
+++ b/mlir/test/Dialect/SPIRV/canonicalize.mlir
@@ -10,8 +10,8 @@ func @combine_full_access_chain() -> f32 {
   // CHECK-NEXT: %[[PTR:.*]] = spv.AccessChain %[[VAR]][%[[INDEX]], %[[INDEX]], %[[INDEX]]]
   // CHECK-NEXT: spv.Load "Function" %[[PTR]]
   %c0 = spv.constant 0: i32
-  %0 = spv.Variable : !spv.ptr<!spv.struct<!spv.array<4x!spv.array<4xf32>>, !spv.array<4xi32>>, Function>
-  %1 = spv.AccessChain %0[%c0] : !spv.ptr<!spv.struct<!spv.array<4x!spv.array<4xf32>>, !spv.array<4xi32>>, Function>, i32
+  %0 = spv.Variable : !spv.ptr<!spv.struct<(!spv.array<4x!spv.array<4xf32>>, !spv.array<4xi32>)>, Function>
+  %1 = spv.AccessChain %0[%c0] : !spv.ptr<!spv.struct<(!spv.array<4x!spv.array<4xf32>>, !spv.array<4xi32>)>, Function>, i32
   %2 = spv.AccessChain %1[%c0, %c0] : !spv.ptr<!spv.array<4x!spv.array<4xf32>>, Function>, i32, i32
   %3 = spv.Load "Function" %2 : f32
   spv.ReturnValue %3 : f32
@@ -27,8 +27,8 @@ func @combine_access_chain_multi_use() -> !spv.array<4xf32> {
   // CHECK-NEXT: spv.Load "Function" %[[PTR_0]]
   // CHECK-NEXT: spv.Load "Function" %[[PTR_1]]
   %c0 = spv.constant 0: i32
-  %0 = spv.Variable : !spv.ptr<!spv.struct<!spv.array<4x!spv.array<4xf32>>, !spv.array<4xi32>>, Function>
-  %1 = spv.AccessChain %0[%c0] : !spv.ptr<!spv.struct<!spv.array<4x!spv.array<4xf32>>, !spv.array<4xi32>>, Function>, i32
+  %0 = spv.Variable : !spv.ptr<!spv.struct<(!spv.array<4x!spv.array<4xf32>>, !spv.array<4xi32>)>, Function>
+  %1 = spv.AccessChain %0[%c0] : !spv.ptr<!spv.struct<(!spv.array<4x!spv.array<4xf32>>, !spv.array<4xi32>)>, Function>, i32
   %2 = spv.AccessChain %1[%c0] : !spv.ptr<!spv.array<4x!spv.array<4xf32>>, Function>, i32
   %3 = spv.AccessChain %2[%c0] : !spv.ptr<!spv.array<4xf32>, Function>, i32
   %4 = spv.Load "Function" %2 : !spv.array<4xf32>
@@ -47,10 +47,10 @@ func @dont_combine_access_chain_without_common_base() -> !spv.array<4xi32> {
   // CHECK-NEXT: spv.Load "Function" %[[VAR_0_PTR]]
   // CHECK-NEXT: spv.Load "Function" %[[VAR_1_PTR]]
   %c1 = spv.constant 1: i32
-  %0 = spv.Variable : !spv.ptr<!spv.struct<!spv.array<4x!spv.array<4xf32>>, !spv.array<4xi32>>, Function>
-  %1 = spv.Variable : !spv.ptr<!spv.struct<!spv.array<4x!spv.array<4xf32>>, !spv.array<4xi32>>, Function>
-  %2 = spv.AccessChain %0[%c1] : !spv.ptr<!spv.struct<!spv.array<4x!spv.array<4xf32>>, !spv.array<4xi32>>, Function>, i32
-  %3 = spv.AccessChain %1[%c1] : !spv.ptr<!spv.struct<!spv.array<4x!spv.array<4xf32>>, !spv.array<4xi32>>, Function>, i32
+  %0 = spv.Variable : !spv.ptr<!spv.struct<(!spv.array<4x!spv.array<4xf32>>, !spv.array<4xi32>)>, Function>
+  %1 = spv.Variable : !spv.ptr<!spv.struct<(!spv.array<4x!spv.array<4xf32>>, !spv.array<4xi32>)>, Function>
+  %2 = spv.AccessChain %0[%c1] : !spv.ptr<!spv.struct<(!spv.array<4x!spv.array<4xf32>>, !spv.array<4xi32>)>, Function>, i32
+  %3 = spv.AccessChain %1[%c1] : !spv.ptr<!spv.struct<(!spv.array<4x!spv.array<4xf32>>, !spv.array<4xi32>)>, Function>, i32
   %4 = spv.Load "Function" %2 : !spv.array<4xi32>
   %5 = spv.Load "Function" %3 : !spv.array<4xi32>
   spv.ReturnValue %4 : !spv.array<4xi32>
diff --git a/mlir/test/Dialect/SPIRV/composite-ops.mlir b/mlir/test/Dialect/SPIRV/composite-ops.mlir
index 04153162e0dc98..18c20ba418fe42 100644
--- a/mlir/test/Dialect/SPIRV/composite-ops.mlir
+++ b/mlir/test/Dialect/SPIRV/composite-ops.mlir
@@ -12,10 +12,10 @@ func @composite_construct_vector(%arg0: f32, %arg1: f32, %arg2 : f32) -> vector<
 
 // -----
 
-func @composite_construct_struct(%arg0: vector<3xf32>, %arg1: !spv.array<4xf32>, %arg2 : !spv.struct<f32>) -> !spv.struct<vector<3xf32>, !spv.array<4xf32>, !spv.struct<f32>> {
-  // CHECK: spv.CompositeConstruct %arg0, %arg1, %arg2 : !spv.struct<vector<3xf32>, !spv.array<4 x f32>, !spv.struct<f32>>
-  %0 = spv.CompositeConstruct %arg0, %arg1, %arg2 : !spv.struct<vector<3xf32>, !spv.array<4xf32>, !spv.struct<f32>>
-  return %0: !spv.struct<vector<3xf32>, !spv.array<4xf32>, !spv.struct<f32>>
+func @composite_construct_struct(%arg0: vector<3xf32>, %arg1: !spv.array<4xf32>, %arg2 : !spv.struct<(f32)>) -> !spv.struct<(vector<3xf32>, !spv.array<4xf32>, !spv.struct<(f32)>)> {
+  // CHECK: spv.CompositeConstruct %arg0, %arg1, %arg2 : !spv.struct<(vector<3xf32>, !spv.array<4 x f32>, !spv.struct<(f32)>)>
+  %0 = spv.CompositeConstruct %arg0, %arg1, %arg2 : !spv.struct<(vector<3xf32>, !spv.array<4xf32>, !spv.struct<(f32)>)>
+  return %0: !spv.struct<(vector<3xf32>, !spv.array<4xf32>, !spv.struct<(f32)>)>
 }
 
 // -----
@@ -28,10 +28,10 @@ func @composite_construct_coopmatrix(%arg0 : f32) -> !spv.coopmatrix<8x16xf32, S
 
 // -----
 
-func @composite_construct_empty_struct() -> !spv.struct<> {
-  // CHECK: spv.CompositeConstruct : !spv.struct<>
-  %0 = spv.CompositeConstruct : !spv.struct<>
-  return %0: !spv.struct<>
+func @composite_construct_empty_struct() -> !spv.struct<()> {
+  // CHECK: spv.CompositeConstruct : !spv.struct<()>
+  %0 = spv.CompositeConstruct : !spv.struct<()>
+  return %0: !spv.struct<()>
 }
 
 // -----
@@ -80,9 +80,9 @@ func @composite_extract_array(%arg0: !spv.array<4xf32>) -> f32 {
 
 // -----
 
-func @composite_extract_struct(%arg0 : !spv.struct<f32, !spv.array<4xf32>>) -> f32 {
-  // CHECK: {{%.*}} = spv.CompositeExtract {{%.*}}[1 : i32, 2 : i32] : !spv.struct<f32, !spv.array<4 x f32>>
-  %0 = spv.CompositeExtract %arg0[1 : i32, 2 : i32] : !spv.struct<f32, !spv.array<4xf32>>
+func @composite_extract_struct(%arg0 : !spv.struct<(f32, !spv.array<4xf32>)>) -> f32 {
+  // CHECK: {{%.*}} = spv.CompositeExtract {{%.*}}[1 : i32, 2 : i32] : !spv.struct<(f32, !spv.array<4 x f32>)>
+  %0 = spv.CompositeExtract %arg0[1 : i32, 2 : i32] : !spv.struct<(f32, !spv.array<4xf32>)>
   return %0 : f32
 }
 
@@ -156,9 +156,9 @@ func @composite_extract_2D_array_out_of_bounds_access_2(%arg0: !spv.array<4x!spv
 
 // -----
 
-func @composite_extract_struct_element_out_of_bounds_access(%arg0 : !spv.struct<f32, !spv.array<4xf32>>) -> () {
-  // expected-error @+1 {{index 2 out of bounds for '!spv.struct<f32, !spv.array<4 x f32>>'}}
-  %0 = spv.CompositeExtract %arg0[2 : i32, 0 : i32] : !spv.struct<f32, !spv.array<4xf32>>
+func @composite_extract_struct_element_out_of_bounds_access(%arg0 : !spv.struct<(f32, !spv.array<4xf32>)>) -> () {
+  // expected-error @+1 {{index 2 out of bounds for '!spv.struct<(f32, !spv.array<4 x f32>)>'}}
+  %0 = spv.CompositeExtract %arg0[2 : i32, 0 : i32] : !spv.struct<(f32, !spv.array<4xf32>)>
   return
 }
 
@@ -216,10 +216,10 @@ func @composite_insert_array(%arg0: !spv.array<4xf32>, %arg1: f32) -> !spv.array
 
 // -----
 
-func @composite_insert_struct(%arg0: !spv.struct<!spv.array<4xf32>, f32>, %arg1: !spv.array<4xf32>) -> !spv.struct<!spv.array<4xf32>, f32> {
-  // CHECK: {{%.*}} = spv.CompositeInsert {{%.*}}, {{%.*}}[0 : i32] : !spv.array<4 x f32> into !spv.struct<!spv.array<4 x f32>, f32>
-  %0 = spv.CompositeInsert %arg1, %arg0[0 : i32] : !spv.array<4xf32> into !spv.struct<!spv.array<4xf32>, f32>
-  return %0: !spv.struct<!spv.array<4xf32>, f32>
+func @composite_insert_struct(%arg0: !spv.struct<(!spv.array<4xf32>, f32)>, %arg1: !spv.array<4xf32>) -> !spv.struct<(!spv.array<4xf32>, f32)> {
+  // CHECK: {{%.*}} = spv.CompositeInsert {{%.*}}, {{%.*}}[0 : i32] : !spv.array<4 x f32> into !spv.struct<(!spv.array<4 x f32>, f32)>
+  %0 = spv.CompositeInsert %arg1, %arg0[0 : i32] : !spv.array<4xf32> into !spv.struct<(!spv.array<4xf32>, f32)>
+  return %0: !spv.struct<(!spv.array<4xf32>, f32)>
 }
 
 // -----
diff --git a/mlir/test/Dialect/SPIRV/cooperative-matrix.mlir b/mlir/test/Dialect/SPIRV/cooperative-matrix.mlir
index f0bb50d10f58e1..7cd0b631b04add 100644
--- a/mlir/test/Dialect/SPIRV/cooperative-matrix.mlir
+++ b/mlir/test/Dialect/SPIRV/cooperative-matrix.mlir
@@ -143,9 +143,9 @@ spv.func @cooperative_matrix_muladd(%a : !spv.coopmatrix<8x16xf32, Subgroup>, %b
 
 // -----
 
-spv.func @cooperative_matrix_load_memaccess(%ptr : !spv.ptr<!spv.struct<f32 [0]>, StorageBuffer>, %stride : i32, %b : i1) "None" {
+spv.func @cooperative_matrix_load_memaccess(%ptr : !spv.ptr<!spv.struct<(f32 [0])>, StorageBuffer>, %stride : i32, %b : i1) "None" {
   // expected-error @+1 {{Pointer must point to a scalar or vector type}}
-  %0 = spv.CooperativeMatrixLoadNV %ptr, %stride, %b : !spv.ptr<!spv.struct<f32 [0]>, StorageBuffer> as !spv.coopmatrix<8x16xi32, Subgroup>
+  %0 = spv.CooperativeMatrixLoadNV %ptr, %stride, %b : !spv.ptr<!spv.struct<(f32 [0])>, StorageBuffer> as !spv.coopmatrix<8x16xi32, Subgroup>
   spv.Return
 }
 
diff --git a/mlir/test/Dialect/SPIRV/ops.mlir b/mlir/test/Dialect/SPIRV/ops.mlir
index fe845ae572fa34..affb6a004950f6 100644
--- a/mlir/test/Dialect/SPIRV/ops.mlir
+++ b/mlir/test/Dialect/SPIRV/ops.mlir
@@ -6,9 +6,9 @@
 
 func @access_chain_struct() -> () {
   %0 = spv.constant 1: i32
-  %1 = spv.Variable : !spv.ptr<!spv.struct<f32, !spv.array<4xf32>>, Function>
-  // CHECK: spv.AccessChain {{.*}}[{{.*}}, {{.*}}] : !spv.ptr<!spv.struct<f32, !spv.array<4 x f32>>, Function>
-  %2 = spv.AccessChain %1[%0, %0] : !spv.ptr<!spv.struct<f32, !spv.array<4xf32>>, Function>, i32, i32
+  %1 = spv.Variable : !spv.ptr<!spv.struct<(f32, !spv.array<4xf32>)>, Function>
+  // CHECK: spv.AccessChain {{.*}}[{{.*}}, {{.*}}] : !spv.ptr<!spv.struct<(f32, !spv.array<4 x f32>)>, Function>
+  %2 = spv.AccessChain %1[%0, %0] : !spv.ptr<!spv.struct<(f32, !spv.array<4xf32>)>, Function>, i32, i32
   return
 }
 
@@ -111,9 +111,9 @@ func @access_chain_invalid_index_1(%index0 : i32) -> () {
 // -----
 
 func @access_chain_invalid_index_2(%index0 : i32) -> () {
-  %0 = spv.Variable : !spv.ptr<!spv.struct<f32, !spv.array<4xf32>>, Function>
+  %0 = spv.Variable : !spv.ptr<!spv.struct<(f32, !spv.array<4xf32>)>, Function>
   // expected-error @+1 {{index must be an integer spv.constant to access element of spv.struct}}
-  %1 = spv.AccessChain %0[%index0, %index0] : !spv.ptr<!spv.struct<f32, !spv.array<4xf32>>, Function>, i32, i32
+  %1 = spv.AccessChain %0[%index0, %index0] : !spv.ptr<!spv.struct<(f32, !spv.array<4xf32>)>, Function>, i32, i32
   return
 }
 
@@ -121,9 +121,9 @@ func @access_chain_invalid_index_2(%index0 : i32) -> () {
 
 func @access_chain_invalid_constant_type_1() -> () {
   %0 = std.constant 1: i32
-  %1 = spv.Variable : !spv.ptr<!spv.struct<f32, !spv.array<4xf32>>, Function>
+  %1 = spv.Variable : !spv.ptr<!spv.struct<(f32, !spv.array<4xf32>)>, Function>
   // expected-error @+1 {{index must be an integer spv.constant to access element of spv.struct, but provided std.constant}}
-  %2 = spv.AccessChain %1[%0, %0] : !spv.ptr<!spv.struct<f32, !spv.array<4xf32>>, Function>, i32, i32
+  %2 = spv.AccessChain %1[%0, %0] : !spv.ptr<!spv.struct<(f32, !spv.array<4xf32>)>, Function>, i32, i32
   return
 }
 
@@ -131,9 +131,9 @@ func @access_chain_invalid_constant_type_1() -> () {
 
 func @access_chain_out_of_bounds() -> () {
   %index0 = "spv.constant"() { value = 12: i32} : () -> i32
-  %0 = spv.Variable : !spv.ptr<!spv.struct<f32, !spv.array<4xf32>>, Function>
-  // expected-error @+1 {{'spv.AccessChain' op index 12 out of bounds for '!spv.struct<f32, !spv.array<4 x f32>>'}}
-  %1 = spv.AccessChain %0[%index0, %index0] : !spv.ptr<!spv.struct<f32, !spv.array<4xf32>>, Function>, i32, i32
+  %0 = spv.Variable : !spv.ptr<!spv.struct<(f32, !spv.array<4xf32>)>, Function>
+  // expected-error @+1 {{'spv.AccessChain' op index 12 out of bounds for '!spv.struct<(f32, !spv.array<4 x f32>)>'}}
+  %1 = spv.AccessChain %0[%index0, %index0] : !spv.ptr<!spv.struct<(f32, !spv.array<4xf32>)>, Function>, i32, i32
   return
 }
 
diff --git a/mlir/test/Dialect/SPIRV/structure-ops.mlir b/mlir/test/Dialect/SPIRV/structure-ops.mlir
index 7bb98b92c3d281..550ca7f5620c22 100644
--- a/mlir/test/Dialect/SPIRV/structure-ops.mlir
+++ b/mlir/test/Dialect/SPIRV/structure-ops.mlir
@@ -5,13 +5,13 @@
 //===----------------------------------------------------------------------===//
 
 spv.module Logical GLSL450 {
-  spv.globalVariable @var1 : !spv.ptr<!spv.struct<f32, !spv.array<4xf32>>, Input>
+  spv.globalVariable @var1 : !spv.ptr<!spv.struct<(f32, !spv.array<4xf32>)>, Input>
   spv.func @access_chain() -> () "None" {
     %0 = spv.constant 1: i32
-    // CHECK: [[VAR1:%.*]] = spv._address_of @var1 : !spv.ptr<!spv.struct<f32, !spv.array<4 x f32>>, Input>
-    // CHECK-NEXT: spv.AccessChain [[VAR1]][{{.*}}, {{.*}}] : !spv.ptr<!spv.struct<f32, !spv.array<4 x f32>>, Input>
-    %1 = spv._address_of @var1 : !spv.ptr<!spv.struct<f32, !spv.array<4xf32>>, Input>
-    %2 = spv.AccessChain %1[%0, %0] : !spv.ptr<!spv.struct<f32, !spv.array<4xf32>>, Input>, i32, i32
+    // CHECK: [[VAR1:%.*]] = spv._address_of @var1 : !spv.ptr<!spv.struct<(f32, !spv.array<4 x f32>)>, Input>
+    // CHECK-NEXT: spv.AccessChain [[VAR1]][{{.*}}, {{.*}}] : !spv.ptr<!spv.struct<(f32, !spv.array<4 x f32>)>, Input>
+    %1 = spv._address_of @var1 : !spv.ptr<!spv.struct<(f32, !spv.array<4xf32>)>, Input>
+    %2 = spv.AccessChain %1[%0, %0] : !spv.ptr<!spv.struct<(f32, !spv.array<4xf32>)>, Input>, i32, i32
     spv.Return
   }
 }
@@ -19,27 +19,27 @@ spv.module Logical GLSL450 {
 // -----
 
 // Allow taking address of global variables in other module-like ops
-spv.globalVariable @var : !spv.ptr<!spv.struct<f32, !spv.array<4xf32>>, Input>
+spv.globalVariable @var : !spv.ptr<!spv.struct<(f32, !spv.array<4xf32>)>, Input>
 func @address_of() -> () {
   // CHECK: spv._address_of @var
-  %1 = spv._address_of @var : !spv.ptr<!spv.struct<f32, !spv.array<4xf32>>, Input>
+  %1 = spv._address_of @var : !spv.ptr<!spv.struct<(f32, !spv.array<4xf32>)>, Input>
   return
 }
 
 // -----
 
 spv.module Logical GLSL450 {
-  spv.globalVariable @var1 : !spv.ptr<!spv.struct<f32, !spv.array<4xf32>>, Input>
+  spv.globalVariable @var1 : !spv.ptr<!spv.struct<(f32, !spv.array<4xf32>)>, Input>
   spv.func @foo() -> () "None" {
     // expected-error @+1 {{expected spv.globalVariable symbol}}
-    %0 = spv._address_of @var2 : !spv.ptr<!spv.struct<f32, !spv.array<4xf32>>, Input>
+    %0 = spv._address_of @var2 : !spv.ptr<!spv.struct<(f32, !spv.array<4xf32>)>, Input>
   }
 }
 
 // -----
 
 spv.module Logical GLSL450 {
-  spv.globalVariable @var1 : !spv.ptr<!spv.struct<f32, !spv.array<4xf32>>, Input>
+  spv.globalVariable @var1 : !spv.ptr<!spv.struct<(f32, !spv.array<4xf32>)>, Input>
   spv.func @foo() -> () "None" {
     // expected-error @+1 {{result type mismatch with the referenced global variable's type}}
     %0 = spv._address_of @var1 : !spv.ptr<f32, Input>
@@ -496,7 +496,7 @@ spv.module Logical GLSL450 {
   spv.specConstant @sc2 = 42 : i64
   spv.specConstant @sc3 = 1.5 : f32
 
-  spv.specConstantComposite @scc (@sc1, @sc2, @sc3) : !spv.struct<i1, i64, f32>
+  spv.specConstantComposite @scc (@sc1, @sc2, @sc3) : !spv.struct<(i1, i64, f32)>
 
   // CHECK-LABEL: @reference
   spv.func @reference() -> i1 "None" {
@@ -507,9 +507,9 @@ spv.module Logical GLSL450 {
 
   // CHECK-LABEL: @reference_composite
   spv.func @reference_composite() -> i1 "None" {
-    // CHECK: spv._reference_of @scc : !spv.struct<i1, i64, f32>
-    %0 = spv._reference_of @scc : !spv.struct<i1, i64, f32>
-    %1 = spv.CompositeExtract %0[0 : i32] : !spv.struct<i1, i64, f32>
+    // CHECK: spv._reference_of @scc : !spv.struct<(i1, i64, f32)>
+    %0 = spv._reference_of @scc : !spv.struct<(i1, i64, f32)>
+    %1 = spv.CompositeExtract %0[0 : i32] : !spv.struct<(i1, i64, f32)>
     spv.ReturnValue %1 : i1
   }
 
@@ -687,8 +687,8 @@ spv.module Logical GLSL450 {
   spv.specConstant @sc1 = 1   : i32
   spv.specConstant @sc2 = 2.5 : f32
   spv.specConstant @sc3 = 3.5 : f32
-  // CHECK: spv.specConstantComposite @scc (@sc1, @sc2, @sc3) : !spv.struct<i32, f32, f32>
-  spv.specConstantComposite @scc (@sc1, @sc2, @sc3) : !spv.struct<i32, f32, f32>
+  // CHECK: spv.specConstantComposite @scc (@sc1, @sc2, @sc3) : !spv.struct<(i32, f32, f32)>
+  spv.specConstantComposite @scc (@sc1, @sc2, @sc3) : !spv.struct<(i32, f32, f32)>
 }
 
 // -----
@@ -698,7 +698,7 @@ spv.module Logical GLSL450 {
   spv.specConstant @sc2 = 2.5 : f32
   spv.specConstant @sc3 = 3.5 : f32
   // expected-error @+1 {{has incorrect number of operands: expected 2, but provided 3}}
-  spv.specConstantComposite @scc (@sc1, @sc2, @sc3) : !spv.struct<i32, f32>
+  spv.specConstantComposite @scc (@sc1, @sc2, @sc3) : !spv.struct<(i32, f32)>
 }
 
 // -----
@@ -708,7 +708,7 @@ spv.module Logical GLSL450 {
   spv.specConstant @sc2 = 2.5 : f32
   spv.specConstant @sc3 = 3.5 : f32
   // expected-error @+1 {{has incorrect types of operands: expected 'i32', but provided 'f32'}}
-  spv.specConstantComposite @scc (@sc1, @sc2, @sc3) : !spv.struct<i32, f32, f32>
+  spv.specConstantComposite @scc (@sc1, @sc2, @sc3) : !spv.struct<(i32, f32, f32)>
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/mlir/test/Dialect/SPIRV/types.mlir b/mlir/test/Dialect/SPIRV/types.mlir
index 810e00b5dedd41..ebd120a10bf956 100644
--- a/mlir/test/Dialect/SPIRV/types.mlir
+++ b/mlir/test/Dialect/SPIRV/types.mlir
@@ -230,119 +230,194 @@ func @image_parameters_nocomma_5(!spv.image<f32, Dim1D, NoDepth, NonArrayed, Sin
 // StructType
 //===----------------------------------------------------------------------===//
 
-// CHECK: func @struct_type(!spv.struct<f32>)
-func @struct_type(!spv.struct<f32>) -> ()
+// CHECK: func @struct_type(!spv.struct<(f32)>)
+func @struct_type(!spv.struct<(f32)>) -> ()
 
-// CHECK: func @struct_type2(!spv.struct<f32 [0]>)
-func @struct_type2(!spv.struct<f32 [0]>) -> ()
+// CHECK: func @struct_type2(!spv.struct<(f32 [0])>)
+func @struct_type2(!spv.struct<(f32 [0])>) -> ()
 
-// CHECK: func @struct_type_simple(!spv.struct<f32, !spv.image<f32, Dim1D, NoDepth, NonArrayed, SingleSampled, SamplerUnknown, Unknown>>)
-func @struct_type_simple(!spv.struct<f32, !spv.image<f32, Dim1D, NoDepth, NonArrayed, SingleSampled, SamplerUnknown, Unknown>>) -> ()
+// CHECK: func @struct_type_simple(!spv.struct<(f32, !spv.image<f32, Dim1D, NoDepth, NonArrayed, SingleSampled, SamplerUnknown, Unknown>)>)
+func @struct_type_simple(!spv.struct<(f32, !spv.image<f32, Dim1D, NoDepth, NonArrayed, SingleSampled, SamplerUnknown, Unknown>)>) -> ()
 
-// CHECK: func @struct_type_with_offset(!spv.struct<f32 [0], i32 [4]>)
-func @struct_type_with_offset(!spv.struct<f32 [0], i32 [4]>) -> ()
+// CHECK: func @struct_type_with_offset(!spv.struct<(f32 [0], i32 [4])>)
+func @struct_type_with_offset(!spv.struct<(f32 [0], i32 [4])>) -> ()
 
-// CHECK: func @nested_struct(!spv.struct<f32, !spv.struct<f32, i32>>)
-func @nested_struct(!spv.struct<f32, !spv.struct<f32, i32>>)
+// CHECK: func @nested_struct(!spv.struct<(f32, !spv.struct<(f32, i32)>)>)
+func @nested_struct(!spv.struct<(f32, !spv.struct<(f32, i32)>)>)
 
-// CHECK: func @nested_struct_with_offset(!spv.struct<f32 [0], !spv.struct<f32 [0], i32 [4]> [4]>)
-func @nested_struct_with_offset(!spv.struct<f32 [0], !spv.struct<f32 [0], i32 [4]> [4]>)
+// CHECK: func @nested_struct_with_offset(!spv.struct<(f32 [0], !spv.struct<(f32 [0], i32 [4])> [4])>)
+func @nested_struct_with_offset(!spv.struct<(f32 [0], !spv.struct<(f32 [0], i32 [4])> [4])>)
 
-// CHECK: func @struct_type_with_decoration(!spv.struct<f32 [NonWritable]>)
-func @struct_type_with_decoration(!spv.struct<f32 [NonWritable]>)
+// CHECK: func @struct_type_with_decoration(!spv.struct<(f32 [NonWritable])>)
+func @struct_type_with_decoration(!spv.struct<(f32 [NonWritable])>)
 
-// CHECK: func @struct_type_with_decoration_and_offset(!spv.struct<f32 [0, NonWritable]>)
-func @struct_type_with_decoration_and_offset(!spv.struct<f32 [0, NonWritable]>)
+// CHECK: func @struct_type_with_decoration_and_offset(!spv.struct<(f32 [0, NonWritable])>)
+func @struct_type_with_decoration_and_offset(!spv.struct<(f32 [0, NonWritable])>)
 
-// CHECK: func @struct_type_with_decoration2(!spv.struct<f32 [NonWritable], i32 [NonReadable]>)
-func @struct_type_with_decoration2(!spv.struct<f32 [NonWritable], i32 [NonReadable]>)
+// CHECK: func @struct_type_with_decoration2(!spv.struct<(f32 [NonWritable], i32 [NonReadable])>)
+func @struct_type_with_decoration2(!spv.struct<(f32 [NonWritable], i32 [NonReadable])>)
 
-// CHECK: func @struct_type_with_decoration3(!spv.struct<f32, i32 [NonReadable]>)
-func @struct_type_with_decoration3(!spv.struct<f32, i32 [NonReadable]>)
+// CHECK: func @struct_type_with_decoration3(!spv.struct<(f32, i32 [NonReadable])>)
+func @struct_type_with_decoration3(!spv.struct<(f32, i32 [NonReadable])>)
 
-// CHECK: func @struct_type_with_decoration4(!spv.struct<f32 [0], i32 [4, NonReadable]>)
-func @struct_type_with_decoration4(!spv.struct<f32 [0], i32 [4, NonReadable]>)
+// CHECK: func @struct_type_with_decoration4(!spv.struct<(f32 [0], i32 [4, NonReadable])>)
+func @struct_type_with_decoration4(!spv.struct<(f32 [0], i32 [4, NonReadable])>)
 
-// CHECK: func @struct_type_with_decoration5(!spv.struct<f32 [NonWritable, NonReadable]>)
-func @struct_type_with_decoration5(!spv.struct<f32 [NonWritable, NonReadable]>)
+// CHECK: func @struct_type_with_decoration5(!spv.struct<(f32 [NonWritable, NonReadable])>)
+func @struct_type_with_decoration5(!spv.struct<(f32 [NonWritable, NonReadable])>)
 
-// CHECK: func @struct_type_with_decoration6(!spv.struct<f32, !spv.struct<i32 [NonWritable, NonReadable]>>)
-func @struct_type_with_decoration6(!spv.struct<f32, !spv.struct<i32 [NonWritable, NonReadable]>>)
+// CHECK: func @struct_type_with_decoration6(!spv.struct<(f32, !spv.struct<(i32 [NonWritable, NonReadable])>)>)
+func @struct_type_with_decoration6(!spv.struct<(f32, !spv.struct<(i32 [NonWritable, NonReadable])>)>)
 
-// CHECK: func @struct_type_with_decoration7(!spv.struct<f32 [0], !spv.struct<i32, f32 [NonReadable]> [4]>)
-func @struct_type_with_decoration7(!spv.struct<f32 [0], !spv.struct<i32, f32 [NonReadable]> [4]>)
+// CHECK: func @struct_type_with_decoration7(!spv.struct<(f32 [0], !spv.struct<(i32, f32 [NonReadable])> [4])>)
+func @struct_type_with_decoration7(!spv.struct<(f32 [0], !spv.struct<(i32, f32 [NonReadable])> [4])>)
 
-// CHECK: func @struct_type_with_decoration8(!spv.struct<f32, !spv.struct<i32 [0], f32 [4, NonReadable]>>)
-func @struct_type_with_decoration8(!spv.struct<f32, !spv.struct<i32 [0], f32 [4, NonReadable]>>)
+// CHECK: func @struct_type_with_decoration8(!spv.struct<(f32, !spv.struct<(i32 [0], f32 [4, NonReadable])>)>)
+func @struct_type_with_decoration8(!spv.struct<(f32, !spv.struct<(i32 [0], f32 [4, NonReadable])>)>)
 
-// CHECK: func @struct_type_with_matrix_1(!spv.struct<!spv.matrix<3 x vector<3xf32>> [0, ColMajor, MatrixStride=16]>)
-func @struct_type_with_matrix_1(!spv.struct<!spv.matrix<3 x vector<3xf32>> [0, ColMajor, MatrixStride=16]>)
+// CHECK: func @struct_type_with_matrix_1(!spv.struct<(!spv.matrix<3 x vector<3xf32>> [0, ColMajor, MatrixStride=16])>)
+func @struct_type_with_matrix_1(!spv.struct<(!spv.matrix<3 x vector<3xf32>> [0, ColMajor, MatrixStride=16])>)
 
-// CHECK: func @struct_type_with_matrix_2(!spv.struct<!spv.matrix<3 x vector<3xf32>> [0, RowMajor, MatrixStride=16]>)
-func @struct_type_with_matrix_2(!spv.struct<!spv.matrix<3 x vector<3xf32>> [0, RowMajor, MatrixStride=16]>)
+// CHECK: func @struct_type_with_matrix_2(!spv.struct<(!spv.matrix<3 x vector<3xf32>> [0, RowMajor, MatrixStride=16])>)
+func @struct_type_with_matrix_2(!spv.struct<(!spv.matrix<3 x vector<3xf32>> [0, RowMajor, MatrixStride=16])>)
 
-// CHECK: func @struct_empty(!spv.struct<>)
-func @struct_empty(!spv.struct<>)
+// CHECK: func @struct_empty(!spv.struct<()>)
+func @struct_empty(!spv.struct<()>)
 
 // -----
 
 // expected-error @+1 {{offset specification must be given for all members}}
-func @struct_type_missing_offset1((!spv.struct<f32, i32 [4]>) -> ()
+func @struct_type_missing_offset1((!spv.struct<(f32, i32 [4])>) -> ()
 
 // -----
 
 // expected-error @+1 {{offset specification must be given for all members}}
-func @struct_type_missing_offset2(!spv.struct<f32 [3], i32>) -> ()
+func @struct_type_missing_offset2(!spv.struct<(f32 [3], i32)>) -> ()
 
 // -----
 
-// expected-error @+1 {{expected '>'}}
-func @struct_type_missing_comma1(!spv.struct<f32 i32>) -> ()
+// expected-error @+1 {{expected ')'}}
+func @struct_type_missing_comma1(!spv.struct<(f32 i32)>) -> ()
 
 // -----
 
-// expected-error @+1 {{expected '>'}}
-func @struct_type_missing_comma2(!spv.struct<f32 [0] i32>) -> ()
+// expected-error @+1 {{expected ')'}}
+func @struct_type_missing_comma2(!spv.struct<(f32 [0] i32)>) -> ()
 
 // -----
 
-//  expected-error @+1 {{unbalanced '>' character in pretty dialect name}}
-func @struct_type_neg_offset(!spv.struct<f32 [0>) -> ()
+//  expected-error @+1 {{unbalanced ')' character in pretty dialect name}}
+func @struct_type_neg_offset(!spv.struct<(f32 [0)>) -> ()
 
 // -----
 
 //  expected-error @+1 {{unbalanced ']' character in pretty dialect name}}
-func @struct_type_neg_offset(!spv.struct<f32 0]>) -> ()
+func @struct_type_neg_offset(!spv.struct<(f32 0])>) -> ()
 
 // -----
 
 //  expected-error @+1 {{expected ']'}}
-func @struct_type_neg_offset(!spv.struct<f32 [NonWritable 0]>) -> ()
+func @struct_type_neg_offset(!spv.struct<(f32 [NonWritable 0])>) -> ()
 
 // -----
 
 //  expected-error @+1 {{expected valid keyword}}
-func @struct_type_neg_offset(!spv.struct<f32 [NonWritable, 0]>) -> ()
+func @struct_type_neg_offset(!spv.struct<(f32 [NonWritable, 0])>) -> ()
 
 // -----
 
 // expected-error @+1 {{expected ','}}
-func @struct_type_missing_comma(!spv.struct<f32 [0 NonWritable], i32 [4]>)
+func @struct_type_missing_comma(!spv.struct<(f32 [0 NonWritable], i32 [4])>)
 
 // -----
 
 // expected-error @+1 {{expected ']'}}
-func @struct_type_missing_comma(!spv.struct<f32 [0, NonWritable NonReadable], i32 [4]>)
+func @struct_type_missing_comma(!spv.struct<(f32 [0, NonWritable NonReadable], i32 [4])>)
 
 // -----
 
 // expected-error @+1 {{expected ']'}}
-func @struct_type_missing_comma(!spv.struct<!spv.matrix<3 x vector<3xf32>> [0, RowMajor MatrixStride=16]>)
+func @struct_type_missing_comma(!spv.struct<(!spv.matrix<3 x vector<3xf32>> [0, RowMajor MatrixStride=16])>)
 
 // -----
 
 // expected-error @+1 {{expected integer value}}
-func @struct_missing_member_decorator_value(!spv.struct<!spv.matrix<3 x vector<3xf32>> [0, RowMajor, MatrixStride=]>)
+func @struct_missing_member_decorator_value(!spv.struct<(!spv.matrix<3 x vector<3xf32>> [0, RowMajor, MatrixStride=])>)
+
+// -----
+
+//===----------------------------------------------------------------------===//
+// StructType (identified)
+//===----------------------------------------------------------------------===//
+
+// CHECK: func @id_struct_empty(!spv.struct<empty, ()>)
+func @id_struct_empty(!spv.struct<empty, ()>) -> ()
+
+// -----
+
+// CHECK: func @id_struct_simple(!spv.struct<simple, (f32)>)
+func @id_struct_simple(!spv.struct<simple, (f32)>) -> ()
+
+// -----
+
+// CHECK: func @id_struct_multiple_elements(!spv.struct<multi_elements, (f32, i32)>)
+func @id_struct_multiple_elements(!spv.struct<multi_elements, (f32, i32)>) -> ()
+
+// -----
+
+// CHECK: func @id_struct_nested_literal(!spv.struct<a1, (!spv.struct<()>)>)
+func @id_struct_nested_literal(!spv.struct<a1, (!spv.struct<()>)>) -> ()
+
+// -----
+
+// CHECK: func @id_struct_nested_id(!spv.struct<a2, (!spv.struct<b2, ()>)>)
+func @id_struct_nested_id(!spv.struct<a2, (!spv.struct<b2, ()>)>) -> ()
+
+// -----
+
+// CHECK: func @literal_struct_nested_id(!spv.struct<(!spv.struct<a3, ()>)>)
+func @literal_struct_nested_id(!spv.struct<(!spv.struct<a3, ()>)>) -> ()
+
+// -----
+
+// CHECK: func @id_struct_self_recursive(!spv.struct<a4, (!spv.ptr<!spv.struct<a4>, Uniform>)>)
+func @id_struct_self_recursive(!spv.struct<a4, (!spv.ptr<!spv.struct<a4>, Uniform>)>) -> ()
+
+// -----
+
+// CHECK: func @id_struct_self_recursive2(!spv.struct<a5, (i32, !spv.ptr<!spv.struct<a5>, Uniform>)>)
+func @id_struct_self_recursive2(!spv.struct<a5, (i32, !spv.ptr<!spv.struct<a5>, Uniform>)>) -> ()
+
+// -----
+
+// expected-error @+1 {{recursive struct reference not nested in struct definition}}
+func @id_wrong_recursive_reference(!spv.struct<a6>) -> ()
+
+// -----
+
+// expected-error @+1 {{recursive struct reference not nested in struct definition}}
+func @id_struct_recursive_invalid(!spv.struct<a7, (!spv.ptr<!spv.struct<b7>, Uniform>)>) -> ()
+
+// -----
+
+// expected-error @+1 {{identifier already used for an enclosing struct}}
+func @id_struct_redefinition(!spv.struct<a8, (!spv.ptr<!spv.struct<a8, (!spv.ptr<!spv.struct<a8>, Uniform>)>, Uniform>)>) -> ()
+
+// -----
+
+// Equivalent to:
+//   struct a { struct b *bPtr; };
+//   struct b { struct a *aPtr; };
+// CHECK: func @id_struct_recursive(!spv.struct<a9, (!spv.ptr<!spv.struct<b9, (!spv.ptr<!spv.struct<a9>, Uniform>)>, Uniform>)>)
+func @id_struct_recursive(!spv.struct<a9, (!spv.ptr<!spv.struct<b9, (!spv.ptr<!spv.struct<a9>, Uniform>)>, Uniform>)>) -> ()
+
+// -----
+
+// Equivalent to:
+//   struct a { struct b *bPtr; };
+//   struct b { struct a *aPtr, struct b *bPtr; };
+// CHECK: func @id_struct_recursive(!spv.struct<a10, (!spv.ptr<!spv.struct<b10, (!spv.ptr<!spv.struct<a10>, Uniform>, !spv.ptr<!spv.struct<b10>, Uniform>)>, Uniform>)>)
+func @id_struct_recursive(!spv.struct<a10, (!spv.ptr<!spv.struct<b10, (!spv.ptr<!spv.struct<a10>, Uniform>, !spv.ptr<!spv.struct<b10>, Uniform>)>, Uniform>)>) -> ()
 
 // -----
 
@@ -446,4 +521,4 @@ func @matrix_size_type(!spv.matrix< x vector<3xi32>>) -> ()
 // expected-error @+1 {{expected single unsigned integer for number of columns}}
 func @matrix_size_type(!spv.matrix<2.0 x vector<3xi32>>) -> ()
 
-// -----
\ No newline at end of file
+// -----

From fab028b914c64db710c808250873c61d8ff716a2 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Tue, 13 Oct 2020 10:27:20 -0400
Subject: [PATCH 110/123] [x86] add tests for cost model kinds of intrinsics;
 NFC

This provides coverage for existing special-cases and
a sampling of other intrinsics. Current output appears
to be wrong in several cases.
---
 .../CostModel/X86/intrinsic-cost-kinds.ll     | 223 ++++++++++++++++++
 1 file changed, 223 insertions(+)
 create mode 100644 llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll

diff --git a/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll b/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll
new file mode 100644
index 00000000000000..835cc23ce76be8
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll
@@ -0,0 +1,223 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt -mtriple=x86_64-- -cost-model -analyze -cost-kind=throughput   < %s | FileCheck %s --check-prefix=THRU
+; RUN: opt -mtriple=x86_64-- -cost-model -analyze -cost-kind=latency      < %s | FileCheck %s --check-prefix=LATE
+; RUN: opt -mtriple=x86_64-- -cost-model -analyze -cost-kind=code-size    < %s | FileCheck %s --check-prefix=SIZE
+; RUN: opt -mtriple=x86_64-- -cost-model -analyze -cost-kind=size-latency < %s | FileCheck %s --check-prefix=SIZE_LATE
+
+; Test a cross-section of intrinsics for various cost-kinds.
+; Other test files may check for accuracy of a particular intrinsic
+; across subtargets or types. This is just a sanity check using the
+; default x86 target and a legal scalar type (i32/float) and/or an
+; illegal vector type (16 x i32/float).
+
+declare i32 @llvm.smax.i32(i32, i32)
+declare <16 x i32> @llvm.smax.v16i32(<16 x i32>, <16 x i32>)
+
+declare float @llvm.fmuladd.f32(float, float, float)
+declare <16 x float> @llvm.fmuladd.v16f32(<16 x float>, <16 x float>, <16 x float>)
+
+declare i32 @llvm.cttz.i32(i32, i1)
+declare <16 x i32> @llvm.cttz.v16i32(<16 x i32>, i1)
+
+declare i32 @llvm.ctlz.i32(i32, i1)
+declare <16 x i32> @llvm.ctlz.v16i32(<16 x i32>, i1)
+
+declare i32 @llvm.fshl.i32(i32, i32, i32)
+declare <16 x i32> @llvm.fshl.v16i32(<16 x i32>, <16 x i32>, <16 x i32>)
+
+declare <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*>, i32, <16 x i1>, <16 x float>)
+declare void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float>, <16 x float*>, i32, <16 x i1>)
+declare float @llvm.vector.reduce.fmax.v16f32(<16 x float>)
+
+define void @smax(i32 %a, i32 %b, <16 x i32> %va, <16 x i32> %vb) {
+; THRU-LABEL: 'smax'
+; THRU-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.smax.i32(i32 %a, i32 %b)
+; THRU-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %va, <16 x i32> %vb)
+; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; LATE-LABEL: 'smax'
+; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.smax.i32(i32 %a, i32 %b)
+; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %va, <16 x i32> %vb)
+; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; SIZE-LABEL: 'smax'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.smax.i32(i32 %a, i32 %b)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %va, <16 x i32> %vb)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; SIZE_LATE-LABEL: 'smax'
+; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.smax.i32(i32 %a, i32 %b)
+; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %va, <16 x i32> %vb)
+; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+  %s = call i32 @llvm.smax.i32(i32 %a, i32 %b)
+  %v = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %va, <16 x i32> %vb)
+  ret void
+}
+
+define void @fmuladd(float %a, float %b, float %c, <16 x float> %va, <16 x float> %vb, <16 x float> %vc) {
+; THRU-LABEL: 'fmuladd'
+; THRU-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %s = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
+; THRU-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc)
+; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; LATE-LABEL: 'fmuladd'
+; LATE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %s = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
+; LATE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc)
+; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; SIZE-LABEL: 'fmuladd'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; SIZE_LATE-LABEL: 'fmuladd'
+; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
+; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc)
+; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+  %s = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
+  %v = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc)
+  ret void
+}
+
+define void @cttz(i32 %a, <16 x i32> %va) {
+; THRU-LABEL: 'cttz'
+; THRU-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false)
+; THRU-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false)
+; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; LATE-LABEL: 'cttz'
+; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false)
+; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false)
+; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; SIZE-LABEL: 'cttz'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; SIZE_LATE-LABEL: 'cttz'
+; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false)
+; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false)
+; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+  %s = call i32 @llvm.cttz.i32(i32 %a, i1 false)
+  %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false)
+  ret void
+}
+
+define void @ctlz(i32 %a, <16 x i32> %va) {
+; THRU-LABEL: 'ctlz'
+; THRU-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
+; THRU-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true)
+; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; LATE-LABEL: 'ctlz'
+; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
+; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true)
+; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; SIZE-LABEL: 'ctlz'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; SIZE_LATE-LABEL: 'ctlz'
+; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
+; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true)
+; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+  %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
+  %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true)
+  ret void
+}
+
+define void @fshl(i32 %a, i32 %b, i32 %c, <16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc) {
+; THRU-LABEL: 'fshl'
+; THRU-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
+; THRU-NEXT:  Cost Model: Found an estimated cost of 132 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
+; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; LATE-LABEL: 'fshl'
+; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
+; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
+; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; SIZE-LABEL: 'fshl'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; SIZE_LATE-LABEL: 'fshl'
+; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
+; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
+; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+  %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
+  %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc)
+  ret void
+}
+
+define void @maskedgather(<16 x float*> %va, <16 x i1> %vb, <16 x float> %vc) {
+; THRU-LABEL: 'maskedgather'
+; THRU-NEXT:  Cost Model: Found an estimated cost of 87 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %va, i32 1, <16 x i1> %vb, <16 x float> %vc)
+; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; LATE-LABEL: 'maskedgather'
+; LATE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %va, i32 1, <16 x i1> %vb, <16 x float> %vc)
+; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; SIZE-LABEL: 'maskedgather'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %va, i32 1, <16 x i1> %vb, <16 x float> %vc)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; SIZE_LATE-LABEL: 'maskedgather'
+; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %va, i32 1, <16 x i1> %vb, <16 x float> %vc)
+; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+  %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %va, i32 1, <16 x i1> %vb, <16 x float> %vc)
+  ret void
+}
+
+define void @maskedscatter(<16 x float> %va, <16 x float*> %vb, <16 x i1> %vc) {
+; THRU-LABEL: 'maskedscatter'
+; THRU-NEXT:  Cost Model: Found an estimated cost of 75 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %va, <16 x float*> %vb, i32 1, <16 x i1> %vc)
+; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; LATE-LABEL: 'maskedscatter'
+; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %va, <16 x float*> %vb, i32 1, <16 x i1> %vc)
+; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; SIZE-LABEL: 'maskedscatter'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %va, <16 x float*> %vb, i32 1, <16 x i1> %vc)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; SIZE_LATE-LABEL: 'maskedscatter'
+; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %va, <16 x float*> %vb, i32 1, <16 x i1> %vc)
+; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+  call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %va, <16 x float*> %vb, i32 1, <16 x i1> %vc)
+  ret void
+}
+
+define void @reduce_fmax(<16 x float> %va) {
+; THRU-LABEL: 'reduce_fmax'
+; THRU-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
+; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; LATE-LABEL: 'reduce_fmax'
+; LATE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
+; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; SIZE-LABEL: 'reduce_fmax'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; SIZE_LATE-LABEL: 'reduce_fmax'
+; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
+; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+  %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
+  ret void
+}

From 66f22411e1bba6eca935fd6447c5dceba48e2e35 Mon Sep 17 00:00:00 2001
From: Sjoerd Meijer <sjoerd.meijer@arm.com>
Date: Tue, 13 Oct 2020 15:39:34 +0100
Subject: [PATCH 111/123] [InstructionSimplify] Precommit tests for D89317.
 NFC.

---
 llvm/test/Transforms/InstSimplify/compare.ll | 125 +++++++++++++++++++
 1 file changed, 125 insertions(+)

diff --git a/llvm/test/Transforms/InstSimplify/compare.ll b/llvm/test/Transforms/InstSimplify/compare.ll
index f9bef673f34e65..557f2bb12101e3 100644
--- a/llvm/test/Transforms/InstSimplify/compare.ll
+++ b/llvm/test/Transforms/InstSimplify/compare.ll
@@ -1709,4 +1709,129 @@ define i1 @cmp_through_addrspacecast(i32 addrspace(1)* %p1) {
   ret i1 %cmp
 }
 
+; Test simplifications for: icmp (X+Y), (X+Z) -> icmp Y,Z
+; Test the overflow check when the RHS has NSW set and constant Z is greater
+; or equal than Y, then we know X+Y also can't overflow.
+
+define i1 @icmp_nsw_1(i32 %V) {
+; CHECK-LABEL: @icmp_nsw_1(
+; CHECK-NEXT:    [[ADD5:%.*]] = add i32 [[V:%.*]], 5
+; CHECK-NEXT:    [[ADD6:%.*]] = add nsw i32 [[V]], 6
+; CHECK-NEXT:    [[S1:%.*]] = sext i32 [[ADD5]] to i64
+; CHECK-NEXT:    [[S2:%.*]] = sext i32 [[ADD6]] to i64
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i64 [[S1]], [[S2]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %add5 = add i32 %V, 5
+  %add6 = add nsw i32 %V, 6
+  %s1 = sext i32 %add5 to i64
+  %s2 = sext i32 %add6 to i64
+  %cmp = icmp slt i64 %s1, %s2
+  ret i1 %cmp
+}
+
+define i1 @icmp_nsw_2(i32 %V) {
+; CHECK-LABEL: @icmp_nsw_2(
+; CHECK-NEXT:    [[ADD5:%.*]] = add i32 [[V:%.*]], 5
+; CHECK-NEXT:    [[ADD6:%.*]] = add nsw i32 [[V]], 6
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[ADD5]], [[ADD6]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %add5 = add i32 %V, 5
+  %add6 = add nsw i32 %V, 6
+  %cmp = icmp slt i32 %add5, %add6
+  ret i1 %cmp
+}
+
+define i1 @icmp_nsw_3(i32 %V) {
+; CHECK-LABEL: @icmp_nsw_3(
+; CHECK-NEXT:    [[ADD5:%.*]] = add i32 [[V:%.*]], 5
+; CHECK-NEXT:    [[ADD5_2:%.*]] = add nsw i32 [[V]], 5
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[ADD5]], [[ADD5_2]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %add5 = add i32 %V, 5
+  %add5_2 = add nsw i32 %V, 5
+  %cmp = icmp slt i32 %add5, %add5_2
+  ret i1 %cmp
+}
+
+define i1 @icmp_nsw_4(i32 %V) {
+; CHECK-LABEL: @icmp_nsw_4(
+; CHECK-NEXT:    [[ADD5:%.*]] = add i32 [[V:%.*]], 5
+; CHECK-NEXT:    [[ADD4:%.*]] = add nsw i32 [[V]], 4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[ADD5]], [[ADD4]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %add5 = add i32 %V, 5
+  %add4 = add nsw i32 %V, 4
+  %cmp = icmp slt i32 %add5, %add4
+  ret i1 %cmp
+}
+
+define i1 @icmp_nsw_5(i32 %V) {
+; CHECK-LABEL: @icmp_nsw_5(
+; CHECK-NEXT:    [[ADD5:%.*]] = add nsw i32 [[V:%.*]], 5
+; CHECK-NEXT:    [[ADD6:%.*]] = add i32 [[V]], 6
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[ADD5]], [[ADD6]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %add5 = add nsw i32 %V, 5
+  %add6 = add i32 %V, 6
+  %cmp = icmp slt i32 %add5, %add6
+  ret i1 %cmp
+}
+
+define i1 @icmp_nsw_7(i32 %V, i32 %arg) {
+; CHECK-LABEL: @icmp_nsw_7(
+; CHECK-NEXT:    [[ADD5:%.*]] = add i32 [[V:%.*]], 5
+; CHECK-NEXT:    [[ADDARG:%.*]] = add nsw i32 [[V]], [[ARG:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[ADD5]], [[ADDARG]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %add5 = add i32 %V, 5
+  %addarg = add nsw i32 %V, %arg
+  %cmp = icmp slt i32 %add5, %addarg
+  ret i1 %cmp
+}
+
+define i1 @icmp_nsw_8(i32 %V, i32 %arg) {
+; CHECK-LABEL: @icmp_nsw_8(
+; CHECK-NEXT:    [[ADDARG:%.*]] = add i32 [[V:%.*]], [[ARG:%.*]]
+; CHECK-NEXT:    [[ADD6:%.*]] = add nsw i32 [[V]], 5
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[ADDARG]], [[ADD6]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %addarg = add i32 %V, %arg
+  %add6 = add nsw i32 %V, 5
+  %cmp = icmp slt i32 %addarg, %add6
+  ret i1 %cmp
+}
+
+define i1 @icmp_nsw_9(i32 %V1, i32 %V2) {
+; CHECK-LABEL: @icmp_nsw_9(
+; CHECK-NEXT:    [[ADD_V1:%.*]] = add i32 [[V1:%.*]], 5
+; CHECK-NEXT:    [[ADD_V2:%.*]] = add nsw i32 [[V2:%.*]], 6
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[ADD_V1]], [[ADD_V2]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %add_V1 = add i32 %V1, 5
+  %add_V2 = add nsw i32 %V2, 6
+  %cmp = icmp slt i32 %add_V1, %add_V2
+  ret i1 %cmp
+}
+
+define i1 @icmp_nsw_10(i32 %V) {
+; CHECK-LABEL: @icmp_nsw_10(
+; CHECK-NEXT:    [[ADD5:%.*]] = add i32 [[V:%.*]], 5
+; CHECK-NEXT:    [[ADD6:%.*]] = add nsw i32 [[V]], 5
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[ADD6]], [[ADD5]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %add5 = add i32 %V, 5
+  %add6 = add nsw i32 %V, 5
+  %cmp = icmp sgt i32 %add6, %add5
+  ret i1 %cmp
+}
+
 attributes #0 = { null_pointer_is_valid }

From 1c90878e60d7cae74552425475faa5a87562e4b3 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Tue, 13 Oct 2020 10:43:41 -0400
Subject: [PATCH 112/123] [AArch64] fix spacing in test's RUN lines; NFC

---
 llvm/test/Analysis/CostModel/AArch64/sve-math.ll | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-math.ll b/llvm/test/Analysis/CostModel/AArch64/sve-math.ll
index 5bd9d2659a9d10..4a4d7dc117f390 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-math.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-math.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt -mtriple=aarch64-- -mattr=+sve -cost-model -analyze -cost-kind=throughput < %s | FileCheck %s --check-prefix=THRU
-; RUN: opt -mtriple=aarch64-- -mattr=+sve -cost-model -analyze -cost-kind=latency < %s | FileCheck %s --check-prefix=LATE
-; RUN: opt -mtriple=aarch64- --mattr=+sve -cost-model -analyze -cost-kind=code-size < %s | FileCheck %s --check-prefix=SIZE
+; RUN: opt -mtriple=aarch64-- -mattr=+sve -cost-model -analyze -cost-kind=throughput   < %s | FileCheck %s --check-prefix=THRU
+; RUN: opt -mtriple=aarch64-- -mattr=+sve -cost-model -analyze -cost-kind=latency      < %s | FileCheck %s --check-prefix=LATE
+; RUN: opt -mtriple=aarch64-- -mattr=+sve -cost-model -analyze -cost-kind=code-size    < %s | FileCheck %s --check-prefix=SIZE
 ; RUN: opt -mtriple=aarch64-- -mattr=+sve -cost-model -analyze -cost-kind=size-latency < %s | FileCheck %s --check-prefix=SIZE_LATE
 
 declare <vscale x 2 x double> @llvm.sqrt.v2f64(<vscale x 2 x double>)

From a8f1790fdb8ce1c53f024870cd51f32724d4c55f Mon Sep 17 00:00:00 2001
From: Eduardo Caldas <ecaldas@google.com>
Date: Fri, 9 Oct 2020 16:34:01 +0000
Subject: [PATCH 113/123] [SyntaxTree] Fix rtti for `Expression`.

Differential Revision: https://reviews.llvm.org/D89146
---
 clang/include/clang/Tooling/Syntax/Nodes.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/include/clang/Tooling/Syntax/Nodes.h b/clang/include/clang/Tooling/Syntax/Nodes.h
index ed4449adb0f068..33ed2ec5c34953 100644
--- a/clang/include/clang/Tooling/Syntax/Nodes.h
+++ b/clang/include/clang/Tooling/Syntax/Nodes.h
@@ -206,7 +206,7 @@ class Expression : public Tree {
   Expression(NodeKind K) : Tree(K) {}
   static bool classof(const Node *N) {
     return NodeKind::UnknownExpression <= N->getKind() &&
-           N->getKind() <= NodeKind::UnknownExpression;
+           N->getKind() <= NodeKind::CallExpression;
   }
 };
 

From 388fb67b0dd7f41630ab6c3400c96843852319af Mon Sep 17 00:00:00 2001
From: Paulo Matos <pmatos@linki.tools>
Date: Tue, 13 Oct 2020 07:13:10 -0700
Subject: [PATCH 114/123] [WebAssembly] Added .tabletype to asm and multiple
 table support in obj files

Adds more testing in basic-assembly.s and a new test tables.s.
Adds support to yaml reading and writing of tables as well.

Differential Revision: https://reviews.llvm.org/D88815
---
 lld/test/wasm/alias.s                         |  3 +-
 lld/test/wasm/call-indirect.ll                |  3 +-
 lld/test/wasm/export-table.test               |  3 +-
 lld/test/wasm/growable-table.test             |  3 +-
 lld/test/wasm/import-table.test               |  1 +
 lld/test/wasm/local-symbols.ll                |  3 +-
 lld/test/wasm/locals-duplicate.test           |  6 ++-
 lld/test/wasm/pie.ll                          |  1 +
 lld/test/wasm/relocatable.ll                  |  3 +-
 lld/test/wasm/shared-memory-no-atomics.yaml   |  1 +
 lld/test/wasm/shared-memory.yaml              |  1 +
 lld/test/wasm/shared.ll                       |  1 +
 lld/test/wasm/stack-pointer.ll                |  3 +-
 lld/test/wasm/undefined-weak-call.ll          |  3 +-
 lld/test/wasm/weak-alias-overide.ll           |  3 +-
 lld/test/wasm/weak-alias.ll                   |  6 ++-
 lld/test/wasm/weak-symbols.ll                 |  3 +-
 lld/test/wasm/weak-undefined.ll               |  3 +-
 lld/wasm/SyntheticSections.cpp                |  2 +-
 lld/wasm/WriterUtils.cpp                      |  2 +
 llvm/include/llvm/BinaryFormat/Wasm.h         |  5 +-
 llvm/include/llvm/MC/MCSymbolWasm.h           |  8 +++
 llvm/include/llvm/Object/Wasm.h               | 11 ++--
 llvm/include/llvm/ObjectYAML/WasmYAML.h       |  1 +
 llvm/lib/BinaryFormat/Wasm.cpp                |  2 +
 llvm/lib/MC/WasmObjectWriter.cpp              | 45 ++++++++++++++--
 llvm/lib/Object/WasmObjectFile.cpp            | 52 ++++++++++++++++---
 llvm/lib/ObjectYAML/WasmEmitter.cpp           | 11 +++-
 llvm/lib/ObjectYAML/WasmYAML.cpp              |  5 ++
 .../AsmParser/WebAssemblyAsmParser.cpp        | 25 +++++++++
 .../MCTargetDesc/WebAssemblyInstPrinter.cpp   |  2 +
 .../WebAssemblyTargetStreamer.cpp             |  7 +++
 .../MCTargetDesc/WebAssemblyTargetStreamer.h  |  5 ++
 .../WebAssembly/WebAssemblyAsmPrinter.cpp     |  2 +
 llvm/test/MC/WebAssembly/assembler-binary.ll  |  1 +
 llvm/test/MC/WebAssembly/basic-assembly.s     | 13 +++++
 llvm/test/MC/WebAssembly/comdat.ll            |  1 +
 llvm/test/MC/WebAssembly/data-section.s       |  1 +
 llvm/test/MC/WebAssembly/global-ctor-dtor.ll  |  1 +
 llvm/test/MC/WebAssembly/reloc-pic.s          |  1 +
 llvm/test/MC/WebAssembly/tables.s             | 42 +++++++++++++++
 llvm/test/MC/WebAssembly/type-index.s         |  1 +
 llvm/test/MC/WebAssembly/wasm64.s             |  1 +
 llvm/test/MC/WebAssembly/weak-alias.s         |  1 +
 .../Object/wasm-relocs-and-producers.yaml     |  1 +
 llvm/test/ObjectYAML/wasm/elem_section.yaml   |  3 +-
 llvm/test/ObjectYAML/wasm/import_section.yaml |  1 +
 llvm/test/ObjectYAML/wasm/table_section.yaml  |  6 ++-
 .../tools/llvm-readobj/wasm/wasm-imports.test |  1 +
 llvm/tools/obj2yaml/wasm2yaml.cpp             |  2 +
 50 files changed, 279 insertions(+), 33 deletions(-)
 create mode 100644 llvm/test/MC/WebAssembly/tables.s

diff --git a/lld/test/wasm/alias.s b/lld/test/wasm/alias.s
index e95cef1bb6a988..1b90ed56bc7bab 100644
--- a/lld/test/wasm/alias.s
+++ b/lld/test/wasm/alias.s
@@ -24,7 +24,8 @@ _start:
 # CHECK-NEXT:     FunctionTypes:   [ 0 ]
 # CHECK-NEXT:   - Type:            TABLE
 # CHECK-NEXT:     Tables:
-# CHECK-NEXT:       - ElemType:        FUNCREF
+# CHECK-NEXT:       - Index:           0
+# CHECK-NEXT:         ElemType:        FUNCREF
 # CHECK-NEXT:         Limits:
 # CHECK-NEXT:           Flags:           [ HAS_MAX ]
 # CHECK-NEXT:           Initial:         0x00000001
diff --git a/lld/test/wasm/call-indirect.ll b/lld/test/wasm/call-indirect.ll
index 722385bdd350a9..fb2777ecd414b0 100644
--- a/lld/test/wasm/call-indirect.ll
+++ b/lld/test/wasm/call-indirect.ll
@@ -63,7 +63,8 @@ define void @call_ptr(i64 (i64)* %arg) {
 ; CHECK-NEXT:     FunctionTypes:   [ 0, 3, 1, 3, 4 ]
 ; CHECK-NEXT:   - Type:            TABLE
 ; CHECK-NEXT:     Tables:
-; CHECK-NEXT:       - ElemType:        FUNCREF
+; CHECK-NEXT:       - Index:           0
+; CHECK-NEXT:         ElemType:        FUNCREF
 ; CHECK-NEXT:         Limits:
 ; CHECK-NEXT:           Flags:           [ HAS_MAX ]
 ; CHECK-NEXT:           Initial:         0x00000003
diff --git a/lld/test/wasm/export-table.test b/lld/test/wasm/export-table.test
index b218392652d682..5b374fc54277ed 100644
--- a/lld/test/wasm/export-table.test
+++ b/lld/test/wasm/export-table.test
@@ -6,7 +6,8 @@
 
 # CHECK:        - Type:            TABLE
 # CHECK-NEXT:     Tables:
-# CHECK-NEXT:       - ElemType:        FUNCREF
+# CHECK-NEXT:       - Index:           0
+# CHECK-NEXT:         ElemType:        FUNCREF
 # CHECK-NEXT:         Limits:
 # CHECK-NEXT:           Flags:           [ HAS_MAX ]
 # CHECK-NEXT:           Initial:         0x00000001
diff --git a/lld/test/wasm/growable-table.test b/lld/test/wasm/growable-table.test
index 00cff3fc416eeb..f5ebc2e346d5b2 100644
--- a/lld/test/wasm/growable-table.test
+++ b/lld/test/wasm/growable-table.test
@@ -6,7 +6,8 @@
 
 # CHECK:        - Type:            TABLE
 # CHECK-NEXT:     Tables:
-# CHECK-NEXT:       - ElemType:        FUNCREF
+# CHECK-NEXT:       - Index:           0
+# CHECK-NEXT:         ElemType:        FUNCREF
 # CHECK-NEXT:         Limits:
 # CHECK-NEXT:           Initial:         0x00000001
 # CHECK-NEXT:   - Type:
diff --git a/lld/test/wasm/import-table.test b/lld/test/wasm/import-table.test
index 4d1b4c9a783257..ffc9395b485e06 100644
--- a/lld/test/wasm/import-table.test
+++ b/lld/test/wasm/import-table.test
@@ -10,6 +10,7 @@
 # CHECK-NEXT:        Field:           __indirect_function_table
 # CHECK-NEXT:        Kind:            TABLE
 # CHECK-NEXT:        Table:
+# CHECK-NEXT:          Index:           0
 # CHECK-NEXT:          ElemType:        FUNCREF
 # CHECK-NEXT:          Limits:
 # CHECK-NEXT:            Initial:         0x00000001
diff --git a/lld/test/wasm/local-symbols.ll b/lld/test/wasm/local-symbols.ll
index 03e638cdd8819f..d6ea9e83a0f3f0 100644
--- a/lld/test/wasm/local-symbols.ll
+++ b/lld/test/wasm/local-symbols.ll
@@ -37,7 +37,8 @@ entry:
 ; CHECK-NEXT:     FunctionTypes:   [ 0, 1 ]
 ; CHECK-NEXT:   - Type:            TABLE
 ; CHECK-NEXT:     Tables:
-; CHECK-NEXT:       - ElemType:        FUNCREF
+; CHECK-NEXT:       - Index:           0
+; CHECK-NEXT:         ElemType:        FUNCREF
 ; CHECK-NEXT:         Limits:
 ; CHECK-NEXT:           Flags:           [ HAS_MAX ]
 ; CHECK-NEXT:           Initial:         0x00000001
diff --git a/lld/test/wasm/locals-duplicate.test b/lld/test/wasm/locals-duplicate.test
index 61018f8213e3f4..78347ed6e47c5a 100644
--- a/lld/test/wasm/locals-duplicate.test
+++ b/lld/test/wasm/locals-duplicate.test
@@ -18,7 +18,8 @@
 ; CHECK-NEXT:                        0, 0 ]
 ; CHECK-NEXT:   - Type:            TABLE
 ; CHECK-NEXT:     Tables:
-; CHECK-NEXT:       - ElemType:        FUNCREF
+; CHECK-NEXT:       - Index:           0
+; CHECK-NEXT:         ElemType:        FUNCREF
 ; CHECK-NEXT:         Limits:
 ; CHECK-NEXT:           Flags:           [ HAS_MAX ]
 ; CHECK-NEXT:           Initial:         0x00000007
@@ -229,7 +230,8 @@
 ; RELOC-NEXT:                        0, 0 ]
 ; RELOC-NEXT:   - Type:            TABLE
 ; RELOC-NEXT:     Tables:
-; RELOC-NEXT:       - ElemType:        FUNCREF
+; RELOC-NEXT:       - Index:           0
+; RELOC-NEXT:         ElemType:        FUNCREF
 ; RELOC-NEXT:         Limits:
 ; RELOC-NEXT:           Flags:           [ HAS_MAX ]
 ; RELOC-NEXT:           Initial:         0x00000007
diff --git a/lld/test/wasm/pie.ll b/lld/test/wasm/pie.ll
index a203d31798c967..13d30ffe51c399 100644
--- a/lld/test/wasm/pie.ll
+++ b/lld/test/wasm/pie.ll
@@ -36,6 +36,7 @@ define void @_start() {
 ; CHECK-NEXT:         Field:           __indirect_function_table
 ; CHECK-NEXT:         Kind:            TABLE
 ; CHECK-NEXT:         Table:
+; CHECK-NEXT:           Index:           0
 ; CHECK-NEXT:           ElemType:        FUNCREF
 ; CHECK-NEXT:           Limits:
 ; CHECK-NEXT:             Initial:         0x00000001
diff --git a/lld/test/wasm/relocatable.ll b/lld/test/wasm/relocatable.ll
index 1bf0391086148c..a1932e59a081ff 100644
--- a/lld/test/wasm/relocatable.ll
+++ b/lld/test/wasm/relocatable.ll
@@ -67,7 +67,8 @@ entry:
 ; CHECK-NEXT:     FunctionTypes:   [ 2, 1, 1 ]
 ; CHECK-NEXT:   - Type:            TABLE
 ; CHECK-NEXT:     Tables:
-; CHECK-NEXT:       - ElemType:        FUNCREF
+; CHECK-NEXT:       - Index:           0
+; CHECK-NEXT:         ElemType:        FUNCREF
 ; CHECK-NEXT:         Limits:
 ; CHECK-NEXT:           Flags:           [ HAS_MAX ]
 ; CHECK-NEXT:           Initial:         0x00000004
diff --git a/lld/test/wasm/shared-memory-no-atomics.yaml b/lld/test/wasm/shared-memory-no-atomics.yaml
index 7e01a89c4557c8..31d0edef1534f0 100644
--- a/lld/test/wasm/shared-memory-no-atomics.yaml
+++ b/lld/test/wasm/shared-memory-no-atomics.yaml
@@ -19,6 +19,7 @@ Sections:
         Field:           __indirect_function_table
         Kind:            TABLE
         Table:
+          Index:           0
           ElemType:        FUNCREF
           Limits:
             Initial:         0x00000000
diff --git a/lld/test/wasm/shared-memory.yaml b/lld/test/wasm/shared-memory.yaml
index 1c238a52357d3d..7d3be4c95ed562 100644
--- a/lld/test/wasm/shared-memory.yaml
+++ b/lld/test/wasm/shared-memory.yaml
@@ -27,6 +27,7 @@ Sections:
         Field:           __indirect_function_table
         Kind:            TABLE
         Table:
+          Index:           0
           ElemType:        FUNCREF
           Limits:
             Initial:         0x00000000
diff --git a/lld/test/wasm/shared.ll b/lld/test/wasm/shared.ll
index 59c1855bed5635..95edd651516295 100644
--- a/lld/test/wasm/shared.ll
+++ b/lld/test/wasm/shared.ll
@@ -72,6 +72,7 @@ declare void @func_external()
 ; CHECK-NEXT:         Field:           __indirect_function_table
 ; CHECK-NEXT:         Kind:            TABLE
 ; CHECK-NEXT:         Table:
+; CHECK-NEXT:           Index:           0
 ; CHECK-NEXT:           ElemType:        FUNCREF
 ; CHECK-NEXT:           Limits:
 ; CHECK-NEXT:             Initial:         0x00000002
diff --git a/lld/test/wasm/stack-pointer.ll b/lld/test/wasm/stack-pointer.ll
index 4be1bf9503d735..889d89e5ab412c 100644
--- a/lld/test/wasm/stack-pointer.ll
+++ b/lld/test/wasm/stack-pointer.ll
@@ -32,7 +32,8 @@ entry:
 ; CHECK-NEXT:     FunctionTypes:   [ 0 ]
 ; CHECK-NEXT:   - Type:            TABLE
 ; CHECK-NEXT:     Tables:
-; CHECK-NEXT:       - ElemType:        FUNCREF
+; CHECK-NEXT:       - Index:           0
+; CHECK-NEXT:         ElemType:        FUNCREF
 ; CHECK-NEXT:         Limits:
 ; CHECK-NEXT:           Flags:           [ HAS_MAX ]
 ; CHECK-NEXT:           Initial:         0x00000001
diff --git a/lld/test/wasm/undefined-weak-call.ll b/lld/test/wasm/undefined-weak-call.ll
index bcb740209a95c8..a1491549ae7e5e 100644
--- a/lld/test/wasm/undefined-weak-call.ll
+++ b/lld/test/wasm/undefined-weak-call.ll
@@ -46,7 +46,8 @@ define i32 @callWeakFuncs() {
 ; CHECK-NEXT:     FunctionTypes:   [ 0, 0, 1, 2 ]
 ; CHECK-NEXT:   - Type:            TABLE
 ; CHECK-NEXT:     Tables:
-; CHECK-NEXT:       - ElemType:        FUNCREF
+; CHECK-NEXT:       - Index:           0
+; CHECK-NEXT:         ElemType:        FUNCREF
 ; CHECK-NEXT:         Limits:
 ; CHECK-NEXT:           Flags:           [ HAS_MAX ]
 ; CHECK-NEXT:           Initial:         0x00000001
diff --git a/lld/test/wasm/weak-alias-overide.ll b/lld/test/wasm/weak-alias-overide.ll
index c65d73a94405ef..391fb82c8ebcf6 100644
--- a/lld/test/wasm/weak-alias-overide.ll
+++ b/lld/test/wasm/weak-alias-overide.ll
@@ -36,7 +36,8 @@ entry:
 ; CHECK-NEXT:     FunctionTypes:   [ 0, 1, 0, 0, 0, 0, 0 ]
 ; CHECK-NEXT:   - Type:            TABLE
 ; CHECK-NEXT:     Tables:
-; CHECK-NEXT:       - ElemType:        FUNCREF
+; CHECK-NEXT:       - Index:           0
+; CHECK-NEXT:         ElemType:        FUNCREF
 ; CHECK-NEXT:         Limits:
 ; CHECK-NEXT:           Flags:           [ HAS_MAX ]
 ; CHECK-NEXT:           Initial:         0x00000003
diff --git a/lld/test/wasm/weak-alias.ll b/lld/test/wasm/weak-alias.ll
index 7c38fa18599021..4ea72b110625d2 100644
--- a/lld/test/wasm/weak-alias.ll
+++ b/lld/test/wasm/weak-alias.ll
@@ -33,7 +33,8 @@ entry:
 ; CHECK-NEXT:     FunctionTypes:   [ 0, 1, 1, 1, 1, 1 ]
 ; CHECK-NEXT:   - Type:            TABLE
 ; CHECK-NEXT:     Tables:
-; CHECK-NEXT:       - ElemType:        FUNCREF
+; CHECK-NEXT:       - Index:           0
+; CHECK-NEXT:         ElemType:        FUNCREF
 ; CHECK-NEXT:         Limits:
 ; CHECK-NEXT:           Flags:           [ HAS_MAX ]
 ; CHECK-NEXT:           Initial:         0x00000002
@@ -149,7 +150,8 @@ entry:
 ; RELOC-NEXT:     FunctionTypes:   [ 0, 1, 1, 1, 1, 1 ]
 ; RELOC-NEXT:   - Type:            TABLE
 ; RELOC-NEXT:     Tables:
-; RELOC-NEXT:       - ElemType:        FUNCREF
+; RELOC-NEXT:       - Index:           0
+; RELOC-NEXT:         ElemType:        FUNCREF
 ; RELOC-NEXT:         Limits:
 ; RELOC-NEXT:           Flags:           [ HAS_MAX ]
 ; RELOC-NEXT:           Initial:         0x00000002
diff --git a/lld/test/wasm/weak-symbols.ll b/lld/test/wasm/weak-symbols.ll
index 43e9014ff322af..58f15c591dd190 100644
--- a/lld/test/wasm/weak-symbols.ll
+++ b/lld/test/wasm/weak-symbols.ll
@@ -33,7 +33,8 @@ entry:
 ; CHECK-NEXT:     FunctionTypes:   [ 0, 1, 1, 1 ]
 ; CHECK-NEXT:   - Type:            TABLE
 ; CHECK-NEXT:     Tables:
-; CHECK-NEXT:       - ElemType:        FUNCREF
+; CHECK-NEXT:       - Index:           0
+; CHECK-NEXT:         ElemType:        FUNCREF
 ; CHECK-NEXT:         Limits:
 ; CHECK-NEXT:           Flags:           [ HAS_MAX ]
 ; CHECK-NEXT:           Initial:         0x00000002
diff --git a/lld/test/wasm/weak-undefined.ll b/lld/test/wasm/weak-undefined.ll
index 8023eb7d368bb6..769c531e0c6162 100644
--- a/lld/test/wasm/weak-undefined.ll
+++ b/lld/test/wasm/weak-undefined.ll
@@ -44,7 +44,8 @@ entry:
 ; CHECK-NEXT:     FunctionTypes:   [ 0, 0, 1 ]
 ; CHECK-NEXT:   - Type:            TABLE
 ; CHECK-NEXT:     Tables:
-; CHECK-NEXT:       - ElemType:        FUNCREF
+; CHECK-NEXT:       - Index:           0
+; CHECK-NEXT:         ElemType:        FUNCREF
 ; CHECK-NEXT:         Limits:
 ; CHECK-NEXT:           Flags:           [ HAS_MAX ]
 ; CHECK-NEXT:           Initial:         0x00000001
diff --git a/lld/wasm/SyntheticSections.cpp b/lld/wasm/SyntheticSections.cpp
index ca4394e172126c..3c702b71e34af5 100644
--- a/lld/wasm/SyntheticSections.cpp
+++ b/lld/wasm/SyntheticSections.cpp
@@ -223,7 +223,7 @@ void TableSection::writeBody() {
     limits = {0, tableSize, 0};
   else
     limits = {WASM_LIMITS_FLAG_HAS_MAX, tableSize, tableSize};
-  writeTableType(os, WasmTable{WASM_TYPE_FUNCREF, limits});
+  writeTableType(os, WasmTable{0, WASM_TYPE_FUNCREF, limits});
 }
 
 void MemorySection::writeBody() {
diff --git a/lld/wasm/WriterUtils.cpp b/lld/wasm/WriterUtils.cpp
index 9206d2fa68e914..cd221855a12bf7 100644
--- a/lld/wasm/WriterUtils.cpp
+++ b/lld/wasm/WriterUtils.cpp
@@ -32,6 +32,8 @@ std::string toString(ValType type) {
     return "v128";
   case ValType::EXNREF:
     return "exnref";
+  case ValType::FUNCREF:
+    return "funcref";
   case ValType::EXTERNREF:
     return "externref";
   }
diff --git a/llvm/include/llvm/BinaryFormat/Wasm.h b/llvm/include/llvm/BinaryFormat/Wasm.h
index 1aca692e30a799..dd05bb02cea7e4 100644
--- a/llvm/include/llvm/BinaryFormat/Wasm.h
+++ b/llvm/include/llvm/BinaryFormat/Wasm.h
@@ -41,7 +41,7 @@ struct WasmDylinkInfo {
   uint32_t MemoryAlignment;  // P2 alignment of memory
   uint32_t TableSize;  // Table size in elements
   uint32_t TableAlignment;  // P2 alignment of table
-  std::vector<StringRef> Needed; // Shared library depenedencies
+  std::vector<StringRef> Needed; // Shared library dependencies
 };
 
 struct WasmProducerInfo {
@@ -68,6 +68,7 @@ struct WasmLimits {
 };
 
 struct WasmTable {
+  uint32_t Index;
   uint8_t ElemType;
   WasmLimits Limits;
 };
@@ -325,6 +326,7 @@ enum WasmSymbolType : unsigned {
   WASM_SYMBOL_TYPE_GLOBAL = 0x2,
   WASM_SYMBOL_TYPE_SECTION = 0x3,
   WASM_SYMBOL_TYPE_EVENT = 0x4,
+  WASM_SYMBOL_TYPE_TABLE = 0x5,
 };
 
 // Kinds of event attributes.
@@ -361,6 +363,7 @@ enum class ValType {
   F64 = WASM_TYPE_F64,
   V128 = WASM_TYPE_V128,
   EXNREF = WASM_TYPE_EXNREF,
+  FUNCREF = WASM_TYPE_FUNCREF,
   EXTERNREF = WASM_TYPE_EXTERNREF,
 };
 
diff --git a/llvm/include/llvm/MC/MCSymbolWasm.h b/llvm/include/llvm/MC/MCSymbolWasm.h
index ffd8a7aad3126c..52ff9f933f928e 100644
--- a/llvm/include/llvm/MC/MCSymbolWasm.h
+++ b/llvm/include/llvm/MC/MCSymbolWasm.h
@@ -25,6 +25,7 @@ class MCSymbolWasm : public MCSymbol {
   Optional<StringRef> ExportName;
   wasm::WasmSignature *Signature = nullptr;
   Optional<wasm::WasmGlobalType> GlobalType;
+  Optional<wasm::ValType> TableType;
   Optional<wasm::WasmEventType> EventType;
 
   /// An expression describing how to calculate the size of a symbol. If a
@@ -42,6 +43,7 @@ class MCSymbolWasm : public MCSymbol {
   bool isFunction() const { return Type == wasm::WASM_SYMBOL_TYPE_FUNCTION; }
   bool isData() const { return Type == wasm::WASM_SYMBOL_TYPE_DATA; }
   bool isGlobal() const { return Type == wasm::WASM_SYMBOL_TYPE_GLOBAL; }
+  bool isTable() const { return Type == wasm::WASM_SYMBOL_TYPE_TABLE; }
   bool isSection() const { return Type == wasm::WASM_SYMBOL_TYPE_SECTION; }
   bool isEvent() const { return Type == wasm::WASM_SYMBOL_TYPE_EVENT; }
   wasm::WasmSymbolType getType() const { return Type; }
@@ -109,6 +111,12 @@ class MCSymbolWasm : public MCSymbol {
   }
   void setGlobalType(wasm::WasmGlobalType GT) { GlobalType = GT; }
 
+  const wasm::ValType &getTableType() const {
+    assert(TableType.hasValue());
+    return TableType.getValue();
+  }
+  void setTableType(wasm::ValType TT) { TableType = TT; }
+
   const wasm::WasmEventType &getEventType() const {
     assert(EventType.hasValue());
     return EventType.getValue();
diff --git a/llvm/include/llvm/Object/Wasm.h b/llvm/include/llvm/Object/Wasm.h
index 05a04af347fc34..37e5a9ad6f70b2 100644
--- a/llvm/include/llvm/Object/Wasm.h
+++ b/llvm/include/llvm/Object/Wasm.h
@@ -35,14 +35,15 @@ namespace object {
 class WasmSymbol {
 public:
   WasmSymbol(const wasm::WasmSymbolInfo &Info,
-             const wasm::WasmGlobalType *GlobalType,
+             const wasm::WasmGlobalType *GlobalType, const uint8_t TableType,
              const wasm::WasmEventType *EventType,
              const wasm::WasmSignature *Signature)
-      : Info(Info), GlobalType(GlobalType), EventType(EventType),
-        Signature(Signature) {}
+      : Info(Info), GlobalType(GlobalType), TableType(TableType),
+        EventType(EventType), Signature(Signature) {}
 
   const wasm::WasmSymbolInfo &Info;
   const wasm::WasmGlobalType *GlobalType;
+  const uint8_t TableType;
   const wasm::WasmEventType *EventType;
   const wasm::WasmSignature *Signature;
 
@@ -149,6 +150,7 @@ class WasmObjectFile : public ObjectFile {
   ArrayRef<wasm::WasmFunctionName> debugNames() const { return DebugNames; }
   uint32_t startFunction() const { return StartFunction; }
   uint32_t getNumImportedGlobals() const { return NumImportedGlobals; }
+  uint32_t getNumImportedTables() const { return NumImportedTables; }
   uint32_t getNumImportedFunctions() const { return NumImportedFunctions; }
   uint32_t getNumImportedEvents() const { return NumImportedEvents; }
   uint32_t getNumSections() const { return Sections.size(); }
@@ -214,7 +216,9 @@ class WasmObjectFile : public ObjectFile {
   bool isValidFunctionIndex(uint32_t Index) const;
   bool isDefinedFunctionIndex(uint32_t Index) const;
   bool isValidGlobalIndex(uint32_t Index) const;
+  bool isValidTableIndex(uint32_t Index) const;
   bool isDefinedGlobalIndex(uint32_t Index) const;
+  bool isDefinedTableIndex(uint32_t Index) const;
   bool isValidEventIndex(uint32_t Index) const;
   bool isDefinedEventIndex(uint32_t Index) const;
   bool isValidFunctionSymbol(uint32_t Index) const;
@@ -285,6 +289,7 @@ class WasmObjectFile : public ObjectFile {
   bool HasMemory64 = false;
   wasm::WasmLinkingData LinkingData;
   uint32_t NumImportedGlobals = 0;
+  uint32_t NumImportedTables = 0;
   uint32_t NumImportedFunctions = 0;
   uint32_t NumImportedEvents = 0;
   uint32_t CodeSection = 0;
diff --git a/llvm/include/llvm/ObjectYAML/WasmYAML.h b/llvm/include/llvm/ObjectYAML/WasmYAML.h
index bffb314e2d3b7a..c2f4d4af8231c5 100644
--- a/llvm/include/llvm/ObjectYAML/WasmYAML.h
+++ b/llvm/include/llvm/ObjectYAML/WasmYAML.h
@@ -53,6 +53,7 @@ struct Limits {
 struct Table {
   TableType ElemType;
   Limits TableLimits;
+  uint32_t Index;
 };
 
 struct Export {
diff --git a/llvm/lib/BinaryFormat/Wasm.cpp b/llvm/lib/BinaryFormat/Wasm.cpp
index 88608168783b07..19dce2bae47dfc 100644
--- a/llvm/lib/BinaryFormat/Wasm.cpp
+++ b/llvm/lib/BinaryFormat/Wasm.cpp
@@ -14,6 +14,8 @@ std::string llvm::wasm::toString(wasm::WasmSymbolType Type) {
     return "WASM_SYMBOL_TYPE_FUNCTION";
   case wasm::WASM_SYMBOL_TYPE_GLOBAL:
     return "WASM_SYMBOL_TYPE_GLOBAL";
+  case wasm::WASM_SYMBOL_TYPE_TABLE:
+    return "WASM_SYMBOL_TYPE_TABLE";
   case wasm::WASM_SYMBOL_TYPE_DATA:
     return "WASM_SYMBOL_TYPE_DATA";
   case wasm::WASM_SYMBOL_TYPE_SECTION:
diff --git a/llvm/lib/MC/WasmObjectWriter.cpp b/llvm/lib/MC/WasmObjectWriter.cpp
index a0e65aa6108aba..4e673f2a49eafb 100644
--- a/llvm/lib/MC/WasmObjectWriter.cpp
+++ b/llvm/lib/MC/WasmObjectWriter.cpp
@@ -194,8 +194,8 @@ class WasmObjectWriter : public MCObjectWriter {
   // Maps function symbols to the table element index space. Used
   // for TABLE_INDEX relocation types (i.e. address taken functions).
   DenseMap<const MCSymbolWasm *, uint32_t> TableIndices;
-  // Maps function/global symbols to the function/global/event/section index
-  // space.
+  // Maps function/global/table symbols to the
+  // function/global/table/event/section index space.
   DenseMap<const MCSymbolWasm *, uint32_t> WasmIndices;
   DenseMap<const MCSymbolWasm *, uint32_t> GOTIndices;
   // Maps data symbols to the Wasm segment and offset/size with the segment.
@@ -218,6 +218,9 @@ class WasmObjectWriter : public MCObjectWriter {
   SmallVector<WasmDataSegment, 4> DataSegments;
   unsigned NumFunctionImports = 0;
   unsigned NumGlobalImports = 0;
+  // NumTableImports is initialized to 1 to account for the hardcoded import of
+  // __indirect_function_table
+  unsigned NumTableImports = 1;
   unsigned NumEventImports = 0;
   uint32_t SectionCount = 0;
 
@@ -267,6 +270,7 @@ class WasmObjectWriter : public MCObjectWriter {
     SectionFunctions.clear();
     NumFunctionImports = 0;
     NumGlobalImports = 0;
+    NumTableImports = 1;
     MCObjectWriter::reset();
   }
 
@@ -316,6 +320,7 @@ class WasmObjectWriter : public MCObjectWriter {
   uint32_t writeDataSection(const MCAsmLayout &Layout);
   void writeEventSection(ArrayRef<wasm::WasmEventType> Events);
   void writeGlobalSection(ArrayRef<wasm::WasmGlobal> Globals);
+  void writeTableSection(ArrayRef<wasm::WasmTable> Tables);
   void writeRelocSection(uint32_t SectionIndex, StringRef Name,
                          std::vector<WasmRelocationEntry> &Relocations);
   void writeLinkingMetaDataSection(
@@ -827,6 +832,24 @@ void WasmObjectWriter::writeGlobalSection(ArrayRef<wasm::WasmGlobal> Globals) {
   endSection(Section);
 }
 
+void WasmObjectWriter::writeTableSection(ArrayRef<wasm::WasmTable> Tables) {
+  if (Tables.empty())
+    return;
+
+  SectionBookkeeping Section;
+  startSection(Section, wasm::WASM_SEC_TABLE);
+
+  encodeULEB128(Tables.size(), W->OS);
+  for (const wasm::WasmTable &Table : Tables) {
+    encodeULEB128(Table.ElemType, W->OS);
+    encodeULEB128(Table.Limits.Flags, W->OS);
+    encodeULEB128(Table.Limits.Initial, W->OS);
+    if (Table.Limits.Flags & wasm::WASM_LIMITS_FLAG_HAS_MAX)
+      encodeULEB128(Table.Limits.Maximum, W->OS);
+  }
+  endSection(Section);
+}
+
 void WasmObjectWriter::writeExportSection(ArrayRef<wasm::WasmExport> Exports) {
   if (Exports.empty())
     return;
@@ -1003,6 +1026,7 @@ void WasmObjectWriter::writeLinkingMetaDataSection(
       case wasm::WASM_SYMBOL_TYPE_FUNCTION:
       case wasm::WASM_SYMBOL_TYPE_GLOBAL:
       case wasm::WASM_SYMBOL_TYPE_EVENT:
+      case wasm::WASM_SYMBOL_TYPE_TABLE:
         encodeULEB128(Sym.ElementIndex, W->OS);
         if ((Sym.Flags & wasm::WASM_SYMBOL_UNDEFINED) == 0 ||
             (Sym.Flags & wasm::WASM_SYMBOL_EXPLICIT_NAME) != 0)
@@ -1292,6 +1316,7 @@ uint64_t WasmObjectWriter::writeOneObject(MCAssembler &Asm,
   SmallVector<wasm::WasmExport, 4> Exports;
   SmallVector<wasm::WasmEventType, 1> Events;
   SmallVector<wasm::WasmGlobal, 1> Globals;
+  SmallVector<wasm::WasmTable, 1> Tables;
   SmallVector<wasm::WasmSymbolInfo, 4> SymbolInfos;
   SmallVector<std::pair<uint16_t, uint32_t>, 2> InitFuncs;
   std::map<StringRef, std::vector<WasmComdatEntry>> Comdats;
@@ -1494,6 +1519,20 @@ uint64_t WasmObjectWriter::writeOneObject(MCAssembler &Asm,
         LLVM_DEBUG(dbgs() << "  -> global index: "
                           << WasmIndices.find(&WS)->second << "\n");
       }
+    } else if (WS.isTable()) {
+      if (WS.isDefined()) {
+        assert(WasmIndices.count(&WS) == 0);
+        wasm::WasmTable Table;
+        Table.ElemType = static_cast<uint8_t>(WS.getTableType());
+        Table.Index = NumTableImports + Tables.size();
+        // FIXME: Work on custom limits is ongoing
+        Table.Limits = {wasm::WASM_LIMITS_FLAG_NONE, 0, 0};
+
+        WasmIndices[&WS] = Table.Index;
+        Tables.push_back(Table);
+      }
+      LLVM_DEBUG(dbgs() << " -> table index: " << WasmIndices.find(&WS)->second
+                        << "\n");
     } else if (WS.isEvent()) {
       // C++ exception symbol (__cpp_exception)
       unsigned Index;
@@ -1715,10 +1754,10 @@ uint64_t WasmObjectWriter::writeOneObject(MCAssembler &Asm,
     writeTypeSection(Signatures);
     writeImportSection(Imports, DataSize, TableElems.size());
     writeFunctionSection(Functions);
-    // Skip the "table" section; we import the table instead.
     // Skip the "memory" section; we import the memory instead.
     writeEventSection(Events);
     writeGlobalSection(Globals);
+    writeTableSection(Tables);
     writeExportSection(Exports);
     writeElemSection(TableElems);
     writeDataCountSection();
diff --git a/llvm/lib/Object/WasmObjectFile.cpp b/llvm/lib/Object/WasmObjectFile.cpp
index 23418a358fa40f..107a9efc9b331d 100644
--- a/llvm/lib/Object/WasmObjectFile.cpp
+++ b/llvm/lib/Object/WasmObjectFile.cpp
@@ -218,6 +218,7 @@ static wasm::WasmTable readTable(WasmObjectFile::ReadContext &Ctx) {
   wasm::WasmTable Table;
   Table.ElemType = readUint8(Ctx);
   Table.Limits = readLimits(Ctx);
+  // The caller needs to set Table.Index field for Table
   return Table;
 }
 
@@ -499,6 +500,7 @@ Error WasmObjectFile::parseLinkingSectionSymtab(ReadContext &Ctx) {
     wasm::WasmSymbolInfo Info;
     const wasm::WasmSignature *Signature = nullptr;
     const wasm::WasmGlobalType *GlobalType = nullptr;
+    uint8_t TableType = 0;
     const wasm::WasmEventType *EventType = nullptr;
 
     Info.Kind = readUint8(Ctx);
@@ -567,6 +569,27 @@ Error WasmObjectFile::parseLinkingSectionSymtab(ReadContext &Ctx) {
       }
       break;
 
+    case wasm::WASM_SYMBOL_TYPE_TABLE:
+      Info.ElementIndex = readVaruint32(Ctx);
+      if (!isValidTableIndex(Info.ElementIndex) ||
+          IsDefined != isDefinedTableIndex(Info.ElementIndex))
+        return make_error<GenericBinaryError>("invalid table symbol index",
+                                              object_error::parse_failed);
+      if (!IsDefined && (Info.Flags & wasm::WASM_SYMBOL_BINDING_MASK) ==
+                            wasm::WASM_SYMBOL_BINDING_WEAK)
+        return make_error<GenericBinaryError>("undefined weak table symbol",
+                                              object_error::parse_failed);
+      if (IsDefined) {
+        Info.Name = readString(Ctx);
+        unsigned TableIndex = Info.ElementIndex - NumImportedTables;
+        wasm::WasmTable &Table = Tables[TableIndex];
+        TableType = Table.ElemType;
+      } else {
+        return make_error<GenericBinaryError>("undefined table symbol",
+                                              object_error::parse_failed);
+      }
+      break;
+
     case wasm::WASM_SYMBOL_TYPE_DATA:
       Info.Name = readString(Ctx);
       if (IsDefined) {
@@ -644,8 +667,8 @@ Error WasmObjectFile::parseLinkingSectionSymtab(ReadContext &Ctx) {
                                                 Twine(Info.Name),
                                             object_error::parse_failed);
     LinkingData.SymbolTable.emplace_back(Info);
-    Symbols.emplace_back(LinkingData.SymbolTable.back(), GlobalType, EventType,
-                         Signature);
+    Symbols.emplace_back(LinkingData.SymbolTable.back(), GlobalType, TableType,
+                         EventType, Signature);
     LLVM_DEBUG(dbgs() << "Adding symbol: " << Symbols.back() << "\n");
   }
 
@@ -963,12 +986,17 @@ Error WasmObjectFile::parseImportSection(ReadContext &Ctx) {
       if (Im.Memory.Flags & wasm::WASM_LIMITS_FLAG_IS_64)
         HasMemory64 = true;
       break;
-    case wasm::WASM_EXTERNAL_TABLE:
+    case wasm::WASM_EXTERNAL_TABLE: {
       Im.Table = readTable(Ctx);
-      if (Im.Table.ElemType != wasm::WASM_TYPE_FUNCREF)
+      Im.Table.Index = NumImportedTables + Tables.size();
+      NumImportedTables++;
+      auto ElemType = Im.Table.ElemType;
+      if (ElemType != wasm::WASM_TYPE_FUNCREF &&
+          ElemType != wasm::WASM_TYPE_EXTERNREF)
         return make_error<GenericBinaryError>("Invalid table element type",
                                               object_error::parse_failed);
       break;
+    }
     case wasm::WASM_EXTERNAL_EVENT:
       NumImportedEvents++;
       Im.Event.Attribute = readVarint32(Ctx);
@@ -1008,8 +1036,12 @@ Error WasmObjectFile::parseTableSection(ReadContext &Ctx) {
   uint32_t Count = readVaruint32(Ctx);
   Tables.reserve(Count);
   while (Count--) {
-    Tables.push_back(readTable(Ctx));
-    if (Tables.back().ElemType != wasm::WASM_TYPE_FUNCREF) {
+    wasm::WasmTable T = readTable(Ctx);
+    T.Index = NumImportedTables + Tables.size();
+    Tables.push_back(T);
+    auto ElemType = Tables.back().ElemType;
+    if (ElemType != wasm::WASM_TYPE_FUNCREF &&
+        ElemType != wasm::WASM_TYPE_EXTERNREF) {
       return make_error<GenericBinaryError>("Invalid table element type",
                                             object_error::parse_failed);
     }
@@ -1125,10 +1157,18 @@ bool WasmObjectFile::isValidGlobalIndex(uint32_t Index) const {
   return Index < NumImportedGlobals + Globals.size();
 }
 
+bool WasmObjectFile::isValidTableIndex(uint32_t Index) const {
+  return Index < NumImportedTables + Tables.size();
+}
+
 bool WasmObjectFile::isDefinedGlobalIndex(uint32_t Index) const {
   return Index >= NumImportedGlobals && isValidGlobalIndex(Index);
 }
 
+bool WasmObjectFile::isDefinedTableIndex(uint32_t Index) const {
+  return Index >= NumImportedTables && isValidTableIndex(Index);
+}
+
 bool WasmObjectFile::isValidEventIndex(uint32_t Index) const {
   return Index < NumImportedEvents + Events.size();
 }
diff --git a/llvm/lib/ObjectYAML/WasmEmitter.cpp b/llvm/lib/ObjectYAML/WasmEmitter.cpp
index cbb062d87ae636..81e24bfdaf8454 100644
--- a/llvm/lib/ObjectYAML/WasmEmitter.cpp
+++ b/llvm/lib/ObjectYAML/WasmEmitter.cpp
@@ -60,6 +60,7 @@ class WasmWriter {
   WasmYAML::Object &Obj;
   uint32_t NumImportedFunctions = 0;
   uint32_t NumImportedGlobals = 0;
+  uint32_t NumImportedTables = 0;
   uint32_t NumImportedEvents = 0;
 
   bool HasError = false;
@@ -187,6 +188,7 @@ void WasmWriter::writeSectionContent(raw_ostream &OS,
       switch (Info.Kind) {
       case wasm::WASM_SYMBOL_TYPE_FUNCTION:
       case wasm::WASM_SYMBOL_TYPE_GLOBAL:
+      case wasm::WASM_SYMBOL_TYPE_TABLE:
       case wasm::WASM_SYMBOL_TYPE_EVENT:
         encodeULEB128(Info.ElementIndex, SubSection.getStream());
         if ((Info.Flags & wasm::WASM_SYMBOL_UNDEFINED) == 0 ||
@@ -360,7 +362,7 @@ void WasmWriter::writeSectionContent(raw_ostream &OS,
     case wasm::WASM_EXTERNAL_EVENT:
       writeUint32(OS, Import.EventImport.Attribute);
       writeUint32(OS, Import.EventImport.SigIndex);
-      NumImportedGlobals++;
+      NumImportedEvents++;
       break;
     case wasm::WASM_EXTERNAL_MEMORY:
       writeLimits(Import.Memory, OS);
@@ -368,6 +370,7 @@ void WasmWriter::writeSectionContent(raw_ostream &OS,
     case wasm::WASM_EXTERNAL_TABLE:
       writeUint8(OS, Import.TableImport.ElemType);
       writeLimits(Import.TableImport.TableLimits, OS);
+      NumImportedTables++;
       break;
     default:
       reportError("unknown import type: " +Twine(Import.Kind));
@@ -401,7 +404,13 @@ void WasmWriter::writeSectionContent(raw_ostream &OS,
 void WasmWriter::writeSectionContent(raw_ostream &OS,
                                      WasmYAML::TableSection &Section) {
   encodeULEB128(Section.Tables.size(), OS);
+  uint32_t ExpectedIndex = NumImportedTables;
   for (auto &Table : Section.Tables) {
+    if (Table.Index != ExpectedIndex) {
+      reportError("unexpected table index: " + Twine(Table.Index));
+      return;
+    }
+    ++ExpectedIndex;
     writeUint8(OS, Table.ElemType);
     writeLimits(Table.TableLimits, OS);
   }
diff --git a/llvm/lib/ObjectYAML/WasmYAML.cpp b/llvm/lib/ObjectYAML/WasmYAML.cpp
index d1aa1181a3445a..f99ff69812aa5c 100644
--- a/llvm/lib/ObjectYAML/WasmYAML.cpp
+++ b/llvm/lib/ObjectYAML/WasmYAML.cpp
@@ -300,6 +300,7 @@ void MappingTraits<WasmYAML::Signature>::mapping(
 }
 
 void MappingTraits<WasmYAML::Table>::mapping(IO &IO, WasmYAML::Table &Table) {
+  IO.mapRequired("Index", Table.Index);
   IO.mapRequired("ElemType", Table.ElemType);
   IO.mapRequired("Limits", Table.TableLimits);
 }
@@ -496,6 +497,8 @@ void MappingTraits<WasmYAML::SymbolInfo>::mapping(IO &IO,
     IO.mapRequired("Function", Info.ElementIndex);
   } else if (Info.Kind == wasm::WASM_SYMBOL_TYPE_GLOBAL) {
     IO.mapRequired("Global", Info.ElementIndex);
+  } else if (Info.Kind == wasm::WASM_SYMBOL_TYPE_TABLE) {
+    IO.mapRequired("Table", Info.ElementIndex);
   } else if (Info.Kind == wasm::WASM_SYMBOL_TYPE_EVENT) {
     IO.mapRequired("Event", Info.ElementIndex);
   } else if (Info.Kind == wasm::WASM_SYMBOL_TYPE_DATA) {
@@ -551,6 +554,7 @@ void ScalarEnumerationTraits<WasmYAML::SymbolKind>::enumeration(
   ECase(FUNCTION);
   ECase(DATA);
   ECase(GLOBAL);
+  ECase(TABLE);
   ECase(SECTION);
   ECase(EVENT);
 #undef ECase
@@ -599,6 +603,7 @@ void ScalarEnumerationTraits<WasmYAML::TableType>::enumeration(
     IO &IO, WasmYAML::TableType &Type) {
 #define ECase(X) IO.enumCase(Type, #X, wasm::WASM_TYPE_##X);
   ECase(FUNCREF);
+  ECase(EXTERNREF);
 #undef ECase
 }
 
diff --git a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
index 0e6c95d5dd3b14..14908a630a0d8b 100644
--- a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
+++ b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
@@ -324,6 +324,8 @@ class WebAssemblyAsmParser final : public MCTargetAsmParser {
       return wasm::ValType::V128;
     if (Type == "exnref")
       return wasm::ValType::EXNREF;
+    if (Type == "funcref")
+      return wasm::ValType::FUNCREF;
     if (Type == "externref")
       return wasm::ValType::EXTERNREF;
     return Optional<wasm::ValType>();
@@ -712,6 +714,29 @@ class WebAssemblyAsmParser final : public MCTargetAsmParser {
       return expect(AsmToken::EndOfStatement, "EOL");
     }
 
+    if (DirectiveID.getString() == ".tabletype") {
+      auto SymName = expectIdent();
+      if (SymName.empty())
+        return true;
+      if (expect(AsmToken::Comma, ","))
+        return true;
+      auto TypeTok = Lexer.getTok();
+      auto TypeName = expectIdent();
+      if (TypeName.empty())
+        return true;
+      auto Type = parseType(TypeName);
+      if (!Type)
+        return error("Unknown type in .tabletype directive: ", TypeTok);
+
+      // Now that we have the name and table type, we can actually create the
+      // symbol
+      auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
+      WasmSym->setType(wasm::WASM_SYMBOL_TYPE_TABLE);
+      WasmSym->setTableType(Type.getValue());
+      TOut.emitTableType(WasmSym);
+      return expect(AsmToken::EndOfStatement, "EOL");
+    }
+
     if (DirectiveID.getString() == ".functype") {
       // This code has to send things to the streamer similar to
       // WebAssemblyAsmPrinter::EmitFunctionBodyStart.
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp
index f60b5fcd14ec7d..86c894eb20767b 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp
@@ -318,6 +318,8 @@ const char *WebAssembly::anyTypeToString(unsigned Ty) {
     return "v128";
   case wasm::WASM_TYPE_FUNCREF:
     return "funcref";
+  case wasm::WASM_TYPE_EXTERNREF:
+    return "externref";
   case wasm::WASM_TYPE_FUNC:
     return "func";
   case wasm::WASM_TYPE_EXNREF:
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp
index d2b2de0dca1f40..652d7a00a63cd1 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp
@@ -77,6 +77,13 @@ void WebAssemblyTargetAsmStreamer::emitGlobalType(const MCSymbolWasm *Sym) {
   OS << '\n';
 }
 
+void WebAssemblyTargetAsmStreamer::emitTableType(const MCSymbolWasm *Sym) {
+  assert(Sym->isTable());
+  OS << "\t.tabletype\t" << Sym->getName() << ", "
+     << WebAssembly::typeToString(Sym->getTableType());
+  OS << '\n';
+}
+
 void WebAssemblyTargetAsmStreamer::emitEventType(const MCSymbolWasm *Sym) {
   assert(Sym->isEvent());
   OS << "\t.eventtype\t" << Sym->getName() << " ";
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
index d6fba05c9986f9..75c9fb4e289dc5 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
@@ -39,6 +39,8 @@ class WebAssemblyTargetStreamer : public MCTargetStreamer {
   virtual void emitIndIdx(const MCExpr *Value) = 0;
   /// .globaltype
   virtual void emitGlobalType(const MCSymbolWasm *Sym) = 0;
+  /// .tabletype
+  virtual void emitTableType(const MCSymbolWasm *Sym) = 0;
   /// .eventtype
   virtual void emitEventType(const MCSymbolWasm *Sym) = 0;
   /// .import_module
@@ -67,6 +69,7 @@ class WebAssemblyTargetAsmStreamer final : public WebAssemblyTargetStreamer {
   void emitFunctionType(const MCSymbolWasm *Sym) override;
   void emitIndIdx(const MCExpr *Value) override;
   void emitGlobalType(const MCSymbolWasm *Sym) override;
+  void emitTableType(const MCSymbolWasm *Sym) override;
   void emitEventType(const MCSymbolWasm *Sym) override;
   void emitImportModule(const MCSymbolWasm *Sym, StringRef ImportModule) override;
   void emitImportName(const MCSymbolWasm *Sym, StringRef ImportName) override;
@@ -83,6 +86,7 @@ class WebAssemblyTargetWasmStreamer final : public WebAssemblyTargetStreamer {
   void emitFunctionType(const MCSymbolWasm *Sym) override {}
   void emitIndIdx(const MCExpr *Value) override;
   void emitGlobalType(const MCSymbolWasm *Sym) override {}
+  void emitTableType(const MCSymbolWasm *Sym) override {}
   void emitEventType(const MCSymbolWasm *Sym) override {}
   void emitImportModule(const MCSymbolWasm *Sym,
                         StringRef ImportModule) override {}
@@ -103,6 +107,7 @@ class WebAssemblyTargetNullStreamer final : public WebAssemblyTargetStreamer {
   void emitFunctionType(const MCSymbolWasm *) override {}
   void emitIndIdx(const MCExpr *) override {}
   void emitGlobalType(const MCSymbolWasm *) override {}
+  void emitTableType(const MCSymbolWasm *) override {}
   void emitEventType(const MCSymbolWasm *) override {}
   void emitImportModule(const MCSymbolWasm *, StringRef) override {}
   void emitImportName(const MCSymbolWasm *, StringRef) override {}
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
index ad0bbd11007765..b233bc2bdc191c 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
@@ -121,6 +121,8 @@ static char getInvokeSig(wasm::ValType VT) {
     return 'V';
   case wasm::ValType::EXNREF:
     return 'E';
+  case wasm::ValType::FUNCREF:
+    return 'F';
   case wasm::ValType::EXTERNREF:
     return 'X';
   }
diff --git a/llvm/test/MC/WebAssembly/assembler-binary.ll b/llvm/test/MC/WebAssembly/assembler-binary.ll
index 35fa4111117b74..ed7b5f2cc875b7 100644
--- a/llvm/test/MC/WebAssembly/assembler-binary.ll
+++ b/llvm/test/MC/WebAssembly/assembler-binary.ll
@@ -55,6 +55,7 @@ entry:
 ; CHECK-NEXT:         Field:           __indirect_function_table
 ; CHECK-NEXT:         Kind:            TABLE
 ; CHECK-NEXT:         Table:
+; CHECK-NEXT:           Index:           0
 ; CHECK-NEXT:           ElemType:        FUNCREF
 ; CHECK-NEXT:           Limits:
 ; CHECK-NEXT:             Initial:         0x00000000
diff --git a/llvm/test/MC/WebAssembly/basic-assembly.s b/llvm/test/MC/WebAssembly/basic-assembly.s
index 151d261ef4cf1b..73c7182cf3cade 100644
--- a/llvm/test/MC/WebAssembly/basic-assembly.s
+++ b/llvm/test/MC/WebAssembly/basic-assembly.s
@@ -121,6 +121,13 @@ test0:
     .ident      "clang version 9.0.0 (trunk 364502) (llvm/trunk 364571)"
     .globaltype __stack_pointer, i32
 
+.tabletype empty_eref_table, externref
+empty_eref_table:       
+
+.tabletype empty_fref_table, funcref
+empty_fref_table:       
+
+        
 # CHECK:           .text
 # CHECK-LABEL: empty_func:
 # CHECK-NEXT:      .functype	empty_func () -> ()
@@ -228,3 +235,9 @@ test0:
 # CHECK-NEXT:      .int32      test0
 
 # CHECK:           .globaltype __stack_pointer, i32
+
+# CHECK:           .tabletype empty_eref_table, externref
+# CHECK-NEXT: empty_eref_table:
+        
+# CHECK:           .tabletype empty_fref_table, funcref
+# CHECK-NEXT: empty_fref_table:
diff --git a/llvm/test/MC/WebAssembly/comdat.ll b/llvm/test/MC/WebAssembly/comdat.ll
index e50f5bf1cc96af..fd87b5ce797acf 100644
--- a/llvm/test/MC/WebAssembly/comdat.ll
+++ b/llvm/test/MC/WebAssembly/comdat.ll
@@ -42,6 +42,7 @@ define linkonce_odr i32 @sharedFn() #1 comdat($sharedComdat) {
 ; CHECK-NEXT:         Field:           __indirect_function_table
 ; CHECK-NEXT:         Kind:            TABLE
 ; CHECK-NEXT:         Table:
+; CHECK-NEXT:           Index:           0
 ; CHECK-NEXT:           ElemType:        FUNCREF
 ; CHECK-NEXT:           Limits:
 ; CHECK-NEXT:             Initial:         0x00000000
diff --git a/llvm/test/MC/WebAssembly/data-section.s b/llvm/test/MC/WebAssembly/data-section.s
index 52e60f6d4a7d1e..e2dab163bea063 100644
--- a/llvm/test/MC/WebAssembly/data-section.s
+++ b/llvm/test/MC/WebAssembly/data-section.s
@@ -49,6 +49,7 @@ test0:
 # BIN-NEXT:         Field:           __indirect_function_table
 # BIN-NEXT:         Kind:            TABLE
 # BIN-NEXT:         Table:
+# BIN-NEXT:           Index:           0
 # BIN-NEXT:           ElemType:        FUNCREF
 # BIN-NEXT:           Limits:
 # BIN-NEXT:             Initial:         0x00000000
diff --git a/llvm/test/MC/WebAssembly/global-ctor-dtor.ll b/llvm/test/MC/WebAssembly/global-ctor-dtor.ll
index f506794467c231..750a1d8768a5bd 100644
--- a/llvm/test/MC/WebAssembly/global-ctor-dtor.ll
+++ b/llvm/test/MC/WebAssembly/global-ctor-dtor.ll
@@ -30,6 +30,7 @@ declare void @func3()
 ; CHECK-NEXT:         Field:           __indirect_function_table
 ; CHECK-NEXT:         Kind:            TABLE
 ; CHECK-NEXT:         Table:
+; CHECK-NEXT:           Index:           0
 ; CHECK-NEXT:           ElemType:        FUNCREF
 ; CHECK-NEXT:           Limits:
 ; CHECK-NEXT:             Initial:         0x00000002
diff --git a/llvm/test/MC/WebAssembly/reloc-pic.s b/llvm/test/MC/WebAssembly/reloc-pic.s
index 4732b7ee73141b..e19ba0a681cd8b 100644
--- a/llvm/test/MC/WebAssembly/reloc-pic.s
+++ b/llvm/test/MC/WebAssembly/reloc-pic.s
@@ -66,6 +66,7 @@ hidden_func:
 # CHECK-NEXT:         Field:           __indirect_function_table
 # CHECK-NEXT:         Kind:            TABLE
 # CHECK-NEXT:         Table:
+# CHECK-NEXT:           Index:           0
 # CHECK-NEXT:           ElemType:        FUNCREF
 # CHECK-NEXT:           Limits:
 # CHECK-NEXT:             Initial:         0x00000001
diff --git a/llvm/test/MC/WebAssembly/tables.s b/llvm/test/MC/WebAssembly/tables.s
new file mode 100644
index 00000000000000..1c6837d6f69ade
--- /dev/null
+++ b/llvm/test/MC/WebAssembly/tables.s
@@ -0,0 +1,42 @@
+# RUN: llvm-mc -triple=wasm32-unknown-unknown < %s | FileCheck %s
+# RUN: llvm-mc -triple=wasm32-unknown-unknown -filetype=obj < %s | obj2yaml | FileCheck -check-prefix=BIN %s
+
+# Test creating several empty tables
+
+.tabletype foo, externref        
+foo:
+
+bar:
+.tabletype bar, funcref
+
+# CHECK: .tabletype foo, externref
+# CHECK: foo:
+
+#      CHECK: bar:
+# CHECK-NEXT: .tabletype bar, funcref
+
+#      BIN:  - Type:            TABLE
+# BIN-NEXT:    Tables:
+# BIN-NEXT:      - Index:           1
+# BIN-NEXT:        ElemType:        EXTERNREF
+# BIN-NEXT:        Limits:
+# BIN-NEXT:          Initial:         0x00000000
+# BIN-NEXT:      - Index:           2
+# BIN-NEXT:        ElemType:        FUNCREF
+# BIN-NEXT:        Limits:
+# BIN-NEXT:          Initial:         0x00000000
+
+#      BIN:  - Type:            CUSTOM
+# BIN-NEXT:    Name:            linking
+# BIN-NEXT:    Version:         2
+# BIN-NEXT:    SymbolTable:
+# BIN-NEXT:      - Index:           0
+# BIN-NEXT:        Kind:            TABLE
+# BIN-NEXT:        Name:            foo
+# BIN-NEXT:        Flags:           [ BINDING_LOCAL ]
+# BIN-NEXT:        Table:           1
+# BIN-NEXT:      - Index:           1
+# BIN-NEXT:        Kind:            TABLE
+# BIN-NEXT:        Name:            bar
+# BIN-NEXT:        Flags:           [ BINDING_LOCAL ]
+# BIN-NEXT:        Table:           2
diff --git a/llvm/test/MC/WebAssembly/type-index.s b/llvm/test/MC/WebAssembly/type-index.s
index 9c77434589f58e..04e124bc180a23 100644
--- a/llvm/test/MC/WebAssembly/type-index.s
+++ b/llvm/test/MC/WebAssembly/type-index.s
@@ -42,6 +42,7 @@ test0:
 # BIN-NEXT:         Field:           __indirect_function_table
 # BIN-NEXT:         Kind:            TABLE
 # BIN-NEXT:         Table:
+# BIN-NEXT:           Index:           0
 # BIN-NEXT:           ElemType:        FUNCREF
 # BIN-NEXT:           Limits:
 # BIN-NEXT:             Initial:         0x00000000
diff --git a/llvm/test/MC/WebAssembly/wasm64.s b/llvm/test/MC/WebAssembly/wasm64.s
index b89718816a9f8f..487ca2b76e8944 100644
--- a/llvm/test/MC/WebAssembly/wasm64.s
+++ b/llvm/test/MC/WebAssembly/wasm64.s
@@ -153,6 +153,7 @@ test:
 # BIN-NEXT:         Field:           __indirect_function_table
 # BIN-NEXT:         Kind:            TABLE
 # BIN-NEXT:         Table:
+# BIN-NEXT:           Index:           0
 # BIN-NEXT:           ElemType:        FUNCREF
 # BIN-NEXT:           Limits:
 # BIN-NEXT:             Initial:         0x00000000
diff --git a/llvm/test/MC/WebAssembly/weak-alias.s b/llvm/test/MC/WebAssembly/weak-alias.s
index 89be4dae0a6387..e16116201bf0e7 100644
--- a/llvm/test/MC/WebAssembly/weak-alias.s
+++ b/llvm/test/MC/WebAssembly/weak-alias.s
@@ -92,6 +92,7 @@ alias_address:
 # CHECK-NEXT:         Field:           __indirect_function_table
 # CHECK-NEXT:         Kind:            TABLE
 # CHECK-NEXT:         Table:
+# CHECK-NEXT:           Index:           0
 # CHECK-NEXT:           ElemType:        FUNCREF
 # CHECK-NEXT:           Limits:
 # CHECK-NEXT:             Initial:         0x00000001
diff --git a/llvm/test/Object/wasm-relocs-and-producers.yaml b/llvm/test/Object/wasm-relocs-and-producers.yaml
index 2bdc12e4619d6a..9bbb0a1d084db7 100644
--- a/llvm/test/Object/wasm-relocs-and-producers.yaml
+++ b/llvm/test/Object/wasm-relocs-and-producers.yaml
@@ -29,6 +29,7 @@ Sections:
         Field:           __indirect_function_table
         Kind:            TABLE
         Table:
+          Index:           0
           ElemType:        FUNCREF
           Limits:
             Initial:         0x00000000
diff --git a/llvm/test/ObjectYAML/wasm/elem_section.yaml b/llvm/test/ObjectYAML/wasm/elem_section.yaml
index 73989dda878c06..f3f81ba0175483 100644
--- a/llvm/test/ObjectYAML/wasm/elem_section.yaml
+++ b/llvm/test/ObjectYAML/wasm/elem_section.yaml
@@ -5,7 +5,8 @@ FileHeader:
 Sections:
   - Type:            TABLE
     Tables:         
-      - ElemType:          FUNCREF
+      - Index:             0
+        ElemType:          FUNCREF
         Limits:
           Flags:           [ HAS_MAX ]
           Initial:         0x00000010
diff --git a/llvm/test/ObjectYAML/wasm/import_section.yaml b/llvm/test/ObjectYAML/wasm/import_section.yaml
index 2df2f3928ecb07..58dae6da5f3330 100644
--- a/llvm/test/ObjectYAML/wasm/import_section.yaml
+++ b/llvm/test/ObjectYAML/wasm/import_section.yaml
@@ -32,6 +32,7 @@ Sections:
         Field:           imported_table
         Kind:            TABLE
         Table:
+          Index:         0
           ElemType:      FUNCREF
           Limits:
             Flags:           [ HAS_MAX ]
diff --git a/llvm/test/ObjectYAML/wasm/table_section.yaml b/llvm/test/ObjectYAML/wasm/table_section.yaml
index 90e73fdd40bc6f..3ce425bbbea344 100644
--- a/llvm/test/ObjectYAML/wasm/table_section.yaml
+++ b/llvm/test/ObjectYAML/wasm/table_section.yaml
@@ -5,7 +5,8 @@ FileHeader:
 Sections:
   - Type:            TABLE
     Tables:         
-      - ElemType:        FUNCREF
+      - Index:           0
+        ElemType:        FUNCREF
         Limits:
           Flags:           [ HAS_MAX ]
           Initial:         0x00000010
@@ -17,7 +18,8 @@ Sections:
 # CHECK: Sections:
 # CHECK:  - Type:            TABLE
 # CHECK:    Tables:         
-# CHECK:      - ElemType:        FUNCREF
+# CHECK:      - Index:           0
+# CHECK:        ElemType:        FUNCREF
 # CHECK:        Limits:
 # CHECK:          Flags:           [ HAS_MAX ]
 # CHECK:          Initial:         0x00000010
diff --git a/llvm/test/tools/llvm-readobj/wasm/wasm-imports.test b/llvm/test/tools/llvm-readobj/wasm/wasm-imports.test
index 35639d11ee974e..dd93c14fbebe69 100644
--- a/llvm/test/tools/llvm-readobj/wasm/wasm-imports.test
+++ b/llvm/test/tools/llvm-readobj/wasm/wasm-imports.test
@@ -26,6 +26,7 @@ Sections:
         Field:           __indirect_function_table
         Kind:            TABLE
         Table:           
+          Index:           0
           ElemType:        FUNCREF
           Limits:          
             Initial:         0x00000000
diff --git a/llvm/tools/obj2yaml/wasm2yaml.cpp b/llvm/tools/obj2yaml/wasm2yaml.cpp
index 689c231b0dd760..3a40829b34aa82 100644
--- a/llvm/tools/obj2yaml/wasm2yaml.cpp
+++ b/llvm/tools/obj2yaml/wasm2yaml.cpp
@@ -37,6 +37,7 @@ static WasmYAML::Table makeTable(const wasm::WasmTable &Table) {
   T.TableLimits.Flags = Table.Limits.Flags;
   T.TableLimits.Initial = Table.Limits.Initial;
   T.TableLimits.Maximum = Table.Limits.Maximum;
+  T.Index = Table.Index;
   return T;
 }
 
@@ -116,6 +117,7 @@ WasmDumper::dumpCustomSection(const WasmSection &WasmSec) {
         break;
       case wasm::WASM_SYMBOL_TYPE_FUNCTION:
       case wasm::WASM_SYMBOL_TYPE_GLOBAL:
+      case wasm::WASM_SYMBOL_TYPE_TABLE:
       case wasm::WASM_SYMBOL_TYPE_EVENT:
         Info.ElementIndex = Symbol.ElementIndex;
         break;

From ef733d9df486884aa33de9f5a9f6bade4e70f187 Mon Sep 17 00:00:00 2001
From: Raphael Isemann <teemperor@gmail.com>
Date: Tue, 13 Oct 2020 16:50:40 +0200
Subject: [PATCH 115/123] [lldb] Add targets for running test suite against
 Watch/TV/iPhone simulators

This patch adds several build system targets that run the normal test suite but
against the Watch/TV/iPhone simulators.

Reviewed By: JDevlieghere

Differential Revision: https://reviews.llvm.org/D89224
---
 lldb/test/API/lit.cfg.py | 24 ++++++++++++++++++++++++
 lldb/test/CMakeLists.txt | 23 +++++++++++++++++++++++
 2 files changed, 47 insertions(+)

diff --git a/lldb/test/API/lit.cfg.py b/lldb/test/API/lit.cfg.py
index a4d4d83fd366df..4bc31206c9f2da 100644
--- a/lldb/test/API/lit.cfg.py
+++ b/lldb/test/API/lit.cfg.py
@@ -146,6 +146,20 @@ def delete_module_cache(path):
   elif lldb_repro_mode == 'replay':
     config.available_features.add('lldb-repro-replay')
 
+lldb_use_simulator = lit_config.params.get('lldb-run-with-simulator', None)
+if lldb_use_simulator:
+  if lldb_use_simulator == "ios":
+    lit_config.note("Running API tests on iOS simulator")
+    config.available_features.add('lldb-simulator-ios')
+  elif lldb_use_simulator == "watchos":
+    lit_config.note("Running API tests on watchOS simulator")
+    config.available_features.add('lldb-simulator-watchos')
+  elif lldb_use_simulator == "tvos":
+    lit_config.note("Running API tests on tvOS simulator")
+    config.available_features.add('lldb-simulator-tvos')
+  else:
+    lit_config.error("Unknown simulator id '{}'".format(lldb_use_simulator))
+
 # Set a default per-test timeout of 10 minutes. Setting a timeout per test
 # requires that killProcessAndChildren() is supported on the platform and
 # lit complains if the value is set but it is not supported.
@@ -216,6 +230,16 @@ def delete_module_cache(path):
     'lldb-repro-replay' in config.available_features:
   dotest_cmd += ['--skip-category=lldb-vscode', '--skip-category=std-module']
 
+if 'lldb-simulator-ios' in config.available_features:
+  dotest_cmd += ['--apple-sdk', 'iphonesimulator',
+                 '--platform-name', 'ios-simulator']
+elif 'lldb-simulator-watchos' in config.available_features:
+  dotest_cmd += ['--apple-sdk', 'watchsimulator',
+                 '--platform-name', 'watchos-simulator']
+elif 'lldb-simulator-tvos' in config.available_features:
+  dotest_cmd += ['--apple-sdk', 'appletvsimulator',
+                 '--platform-name', 'tvos-simulator']
+
 if is_configured('enabled_plugins'):
   for plugin in config.enabled_plugins:
     dotest_cmd += ['--enable-plugin', plugin]
diff --git a/lldb/test/CMakeLists.txt b/lldb/test/CMakeLists.txt
index 21d8c61f11ed58..91665e7b666693 100644
--- a/lldb/test/CMakeLists.txt
+++ b/lldb/test/CMakeLists.txt
@@ -214,6 +214,29 @@ add_lit_testsuite(check-lldb-reproducers
   DEPENDS lldb-test-deps)
 add_dependencies(check-lldb-reproducers check-lldb-reproducers-capture)
 
+# Targets for running the test suite on the different Apple simulators.
+add_lit_testsuite(check-lldb-simulator-ios
+  "Running lldb test suite on the iOS simulator"
+  ${CMAKE_CURRENT_BINARY_DIR}/API
+  PARAMS "lldb-run-with-simulator=ios"
+  EXCLUDE_FROM_CHECK_ALL
+  DEPENDS lldb-test-deps)
+
+add_lit_testsuite(check-lldb-simulator-watchos
+  "Running lldb test suite on the watchOS simulator"
+  ${CMAKE_CURRENT_BINARY_DIR}/API
+  PARAMS "lldb-run-with-simulator=watchos"
+  EXCLUDE_FROM_CHECK_ALL
+  DEPENDS lldb-test-deps)
+
+add_lit_testsuite(check-lldb-simulator-tvos
+  "Running lldb test suite on the tvOS simulator"
+  ${CMAKE_CURRENT_BINARY_DIR}/API
+  PARAMS "lldb-run-with-simulator=tvos"
+  EXCLUDE_FROM_CHECK_ALL
+  DEPENDS lldb-test-deps)
+
+
 if(LLDB_BUILT_STANDALONE)
   # This has to happen *AFTER* add_lit_testsuite.
   if (EXISTS ${LLVM_MAIN_SRC_DIR}/utils/llvm-lit)

From 4cd873c4bd7ff66d4af2bf5e57c27e6924bfc92a Mon Sep 17 00:00:00 2001
From: Raphael Isemann <teemperor@gmail.com>
Date: Tue, 13 Oct 2020 17:08:22 +0200
Subject: [PATCH 116/123] [lldb][NFC] Remove property predicate matcher

That's supposed to be used to implement things such as `settings set target.run-args{basename==test&&arch==x86_64} arg1`
but it's not actually fully implemented or tested anywhere.

Reviewed By: JDevlieghere

Differential Revision: https://reviews.llvm.org/D88910
---
 .../lldb/Interpreter/OptionValueProperties.h  |  5 ---
 .../Interpreter/OptionValueProperties.cpp     | 32 -------------------
 2 files changed, 37 deletions(-)

diff --git a/lldb/include/lldb/Interpreter/OptionValueProperties.h b/lldb/include/lldb/Interpreter/OptionValueProperties.h
index bd944b6a5adff4..d60afdeb46fb76 100644
--- a/lldb/include/lldb/Interpreter/OptionValueProperties.h
+++ b/lldb/include/lldb/Interpreter/OptionValueProperties.h
@@ -104,11 +104,6 @@ class OptionValueProperties
   Status SetSubValue(const ExecutionContext *exe_ctx, VarSetOperationType op,
                      llvm::StringRef path, llvm::StringRef value) override;
 
-  virtual bool PredicateMatches(const ExecutionContext *exe_ctx,
-    llvm::StringRef predicate) const {
-    return false;
-  }
-
   OptionValueArch *
   GetPropertyAtIndexAsOptionValueArch(const ExecutionContext *exe_ctx,
                                       uint32_t idx) const;
diff --git a/lldb/source/Interpreter/OptionValueProperties.cpp b/lldb/source/Interpreter/OptionValueProperties.cpp
index 5b82008ca57176..6c4e77f614f950 100644
--- a/lldb/source/Interpreter/OptionValueProperties.cpp
+++ b/lldb/source/Interpreter/OptionValueProperties.cpp
@@ -147,38 +147,6 @@ OptionValueProperties::GetSubValue(const ExecutionContext *exe_ctx,
     }
     return return_val_sp;
   }
-  case '{':
-    // Predicate matching for predicates like
-    // "<setting-name>{<predicate>}"
-    // strings are parsed by the current OptionValueProperties subclass to mean
-    // whatever they want to. For instance a subclass of OptionValueProperties
-    // for a lldb_private::Target might implement: "target.run-
-    // args{arch==i386}"   -- only set run args if the arch is i386 "target
-    // .run-args{path=/tmp/a/b/c/a.out}" -- only set run args if the path
-    // matches "target.run-args{basename==test&&arch==x86_64}" -- only set run
-    // args if executable basename is "test" and arch is "x86_64"
-    if (sub_name[1]) {
-      llvm::StringRef predicate_start = sub_name.drop_front();
-      size_t pos = predicate_start.find('}');
-      if (pos != llvm::StringRef::npos) {
-        auto predicate = predicate_start.take_front(pos);
-        auto rest = predicate_start.drop_front(pos);
-        if (PredicateMatches(exe_ctx, predicate)) {
-          if (!rest.empty()) {
-            // Still more subvalue string to evaluate
-            return value_sp->GetSubValue(exe_ctx, rest,
-                                          will_modify, error);
-          } else {
-            // We have a match!
-            break;
-          }
-        }
-      }
-    }
-    // Predicate didn't match or wasn't correctly formed
-    value_sp.reset();
-    break;
-
   case '[':
     // Array or dictionary access for subvalues like: "[12]"       -- access
     // 12th array element "['hello']"  -- dictionary access of key named hello

From 24e07570cc928b75e894b81639cabe96c660ccef Mon Sep 17 00:00:00 2001
From: Raphael Isemann <teemperor@gmail.com>
Date: Tue, 13 Oct 2020 17:10:10 +0200
Subject: [PATCH 117/123] [lldb] Remove all the RegisterInfo name
 constification code

RegisterInfo's `reg_name`/`reg_alt_name` fields are C-Strings and are supposed
to only be generated from a ConstString. The reason for that is that
`DynamicRegisterInfo::GetRegisterInfo` and
`RegInfoBasedABI::GetRegisterInfoByName` try to optimise finding registers by
name by only comparing the C string pointer values instead of the underlying
strings. This only works if both C strings involved in the comparison come from
a ConstString. If one of the two C strings doesn't come from a ConstString the
comparison won't work (and most likely will silently fail).

I added an assert in b0060c3a7868ef026d95d0cf8a076791ef74f474 which checks that
both strings come from a ConstString. Apparently not all ABI plugins are
generating their register names via ConstString, so this code is now not just
silently failing but also asserting.

In D88375 we did a shady fix for the MIPS plugins by just copying the
ConstString setup code to that plugin, but we still need to fix ABISysV_arc,
ABISysV_ppc and ABISysV_ppc64 plugins.

I would say we just fix the remaining plugins by removing the whole requirement
to have the register names coming from ConstStrings. I really doubt that we
actually save any time with the whole ConstString search trick (searching ~50
strings that have <4 characters doesn't sound more expensive than calling the
really expensive ConstString constructor + comparing the same amount of pointer
values). Also whatever small percentage of LLDB's runtime is actually spend in
this function is anyway not worth the complexity of this approach.

This patch just removes all this and just does a normal string comparison.

Reviewed By: JDevlieghere, labath

Differential Revision: https://reviews.llvm.org/D88490
---
 lldb/include/lldb/Target/ABI.h                |  2 +-
 lldb/source/Plugins/ABI/ARM/ABIMacOSX_arm.cpp | 17 +------------
 lldb/source/Plugins/ABI/ARM/ABISysV_arm.cpp   | 17 +------------
 .../Plugins/ABI/Hexagon/ABISysV_hexagon.cpp   | 17 +------------
 lldb/source/Plugins/ABI/Mips/ABISysV_mips.cpp | 17 +------------
 .../Plugins/ABI/Mips/ABISysV_mips64.cpp       | 17 +------------
 .../Plugins/ABI/SystemZ/ABISysV_s390x.cpp     | 17 +------------
 .../Process/Utility/DynamicRegisterInfo.cpp   | 24 +++++++------------
 .../Process/Utility/DynamicRegisterInfo.h     |  2 +-
 lldb/source/Target/ABI.cpp                    | 15 ++++--------
 10 files changed, 21 insertions(+), 124 deletions(-)

diff --git a/lldb/include/lldb/Target/ABI.h b/lldb/include/lldb/Target/ABI.h
index b252e4b54f03bd..131b2eaff765cd 100644
--- a/lldb/include/lldb/Target/ABI.h
+++ b/lldb/include/lldb/Target/ABI.h
@@ -159,7 +159,7 @@ class RegInfoBasedABI : public ABI {
 protected:
   using ABI::ABI;
 
-  bool GetRegisterInfoByName(ConstString name, RegisterInfo &info);
+  bool GetRegisterInfoByName(llvm::StringRef name, RegisterInfo &info);
 
   virtual const RegisterInfo *GetRegisterInfoArray(uint32_t &count) = 0;
 };
diff --git a/lldb/source/Plugins/ABI/ARM/ABIMacOSX_arm.cpp b/lldb/source/Plugins/ABI/ARM/ABIMacOSX_arm.cpp
index ef500cb198a8bc..06c4590b77409c 100644
--- a/lldb/source/Plugins/ABI/ARM/ABIMacOSX_arm.cpp
+++ b/lldb/source/Plugins/ABI/ARM/ABIMacOSX_arm.cpp
@@ -34,7 +34,7 @@
 using namespace lldb;
 using namespace lldb_private;
 
-static RegisterInfo g_register_infos[] = {
+static const RegisterInfo g_register_infos[] = {
     //  NAME       ALT       SZ OFF ENCODING         FORMAT          EH_FRAME
     //  DWARF               GENERIC                     PROCESS PLUGIN
     //  LLDB NATIVE
@@ -1292,24 +1292,9 @@ static RegisterInfo g_register_infos[] = {
 
 static const uint32_t k_num_register_infos =
     llvm::array_lengthof(g_register_infos);
-static bool g_register_info_names_constified = false;
 
 const lldb_private::RegisterInfo *
 ABIMacOSX_arm::GetRegisterInfoArray(uint32_t &count) {
-  // Make the C-string names and alt_names for the register infos into const
-  // C-string values by having the ConstString unique the names in the global
-  // constant C-string pool.
-  if (!g_register_info_names_constified) {
-    g_register_info_names_constified = true;
-    for (uint32_t i = 0; i < k_num_register_infos; ++i) {
-      if (g_register_infos[i].name)
-        g_register_infos[i].name =
-            ConstString(g_register_infos[i].name).GetCString();
-      if (g_register_infos[i].alt_name)
-        g_register_infos[i].alt_name =
-            ConstString(g_register_infos[i].alt_name).GetCString();
-    }
-  }
   count = k_num_register_infos;
   return g_register_infos;
 }
diff --git a/lldb/source/Plugins/ABI/ARM/ABISysV_arm.cpp b/lldb/source/Plugins/ABI/ARM/ABISysV_arm.cpp
index c63c569f875a92..26b3152bed1646 100644
--- a/lldb/source/Plugins/ABI/ARM/ABISysV_arm.cpp
+++ b/lldb/source/Plugins/ABI/ARM/ABISysV_arm.cpp
@@ -36,7 +36,7 @@ using namespace lldb_private;
 
 LLDB_PLUGIN_DEFINE(ABISysV_arm)
 
-static RegisterInfo g_register_infos[] = {
+static const RegisterInfo g_register_infos[] = {
     //  NAME       ALT       SZ OFF ENCODING         FORMAT          EH_FRAME
     //  DWARF               GENERIC                     PROCESS PLUGIN
     //  LLDB NATIVE            VALUE REGS    INVALIDATE REGS
@@ -1295,24 +1295,9 @@ static RegisterInfo g_register_infos[] = {
 
 static const uint32_t k_num_register_infos =
     llvm::array_lengthof(g_register_infos);
-static bool g_register_info_names_constified = false;
 
 const lldb_private::RegisterInfo *
 ABISysV_arm::GetRegisterInfoArray(uint32_t &count) {
-  // Make the C-string names and alt_names for the register infos into const
-  // C-string values by having the ConstString unique the names in the global
-  // constant C-string pool.
-  if (!g_register_info_names_constified) {
-    g_register_info_names_constified = true;
-    for (uint32_t i = 0; i < k_num_register_infos; ++i) {
-      if (g_register_infos[i].name)
-        g_register_infos[i].name =
-            ConstString(g_register_infos[i].name).GetCString();
-      if (g_register_infos[i].alt_name)
-        g_register_infos[i].alt_name =
-            ConstString(g_register_infos[i].alt_name).GetCString();
-    }
-  }
   count = k_num_register_infos;
   return g_register_infos;
 }
diff --git a/lldb/source/Plugins/ABI/Hexagon/ABISysV_hexagon.cpp b/lldb/source/Plugins/ABI/Hexagon/ABISysV_hexagon.cpp
index 32313d4cd815a6..47aaefd3b22811 100644
--- a/lldb/source/Plugins/ABI/Hexagon/ABISysV_hexagon.cpp
+++ b/lldb/source/Plugins/ABI/Hexagon/ABISysV_hexagon.cpp
@@ -34,7 +34,7 @@ using namespace lldb_private;
 
 LLDB_PLUGIN_DEFINE_ADV(ABISysV_hexagon, ABIHexagon)
 
-static RegisterInfo g_register_infos[] = {
+static const RegisterInfo g_register_infos[] = {
     // hexagon-core.xml
     {"r00",
      "",
@@ -974,24 +974,9 @@ static RegisterInfo g_register_infos[] = {
 
 static const uint32_t k_num_register_infos =
     sizeof(g_register_infos) / sizeof(RegisterInfo);
-static bool g_register_info_names_constified = false;
 
 const lldb_private::RegisterInfo *
 ABISysV_hexagon::GetRegisterInfoArray(uint32_t &count) {
-  // Make the C-string names and alt_names for the register infos into const
-  // C-string values by having the ConstString unique the names in the global
-  // constant C-string pool.
-  if (!g_register_info_names_constified) {
-    g_register_info_names_constified = true;
-    for (uint32_t i = 0; i < k_num_register_infos; ++i) {
-      if (g_register_infos[i].name)
-        g_register_infos[i].name =
-            ConstString(g_register_infos[i].name).GetCString();
-      if (g_register_infos[i].alt_name)
-        g_register_infos[i].alt_name =
-            ConstString(g_register_infos[i].alt_name).GetCString();
-    }
-  }
   count = k_num_register_infos;
   return g_register_infos;
 }
diff --git a/lldb/source/Plugins/ABI/Mips/ABISysV_mips.cpp b/lldb/source/Plugins/ABI/Mips/ABISysV_mips.cpp
index a209fa760556f8..d66e0926ad99ea 100644
--- a/lldb/source/Plugins/ABI/Mips/ABISysV_mips.cpp
+++ b/lldb/source/Plugins/ABI/Mips/ABISysV_mips.cpp
@@ -75,7 +75,7 @@ enum dwarf_regnums {
   dwarf_pc
 };
 
-static RegisterInfo g_register_infos[] = {
+static const RegisterInfo g_register_infos[] = {
     //  NAME      ALT    SZ OFF ENCODING        FORMAT         EH_FRAME
     //  DWARF                   GENERIC                     PROCESS PLUGINS
     //  LLDB NATIVE            VALUE REGS  INVALIDATE REGS
@@ -542,24 +542,9 @@ static RegisterInfo g_register_infos[] = {
 
 static const uint32_t k_num_register_infos =
     llvm::array_lengthof(g_register_infos);
-static bool g_register_info_names_constified = false;
 
 const lldb_private::RegisterInfo *
 ABISysV_mips::GetRegisterInfoArray(uint32_t &count) {
-  // Make the C-string names and alt_names for the register infos into const
-  // C-string values by having the ConstString unique the names in the global
-  // constant C-string pool.
-  if (!g_register_info_names_constified) {
-    g_register_info_names_constified = true;
-    for (uint32_t i = 0; i < k_num_register_infos; ++i) {
-      if (g_register_infos[i].name)
-        g_register_infos[i].name =
-            ConstString(g_register_infos[i].name).GetCString();
-      if (g_register_infos[i].alt_name)
-        g_register_infos[i].alt_name =
-            ConstString(g_register_infos[i].alt_name).GetCString();
-    }
-  }
   count = k_num_register_infos;
   return g_register_infos;
 }
diff --git a/lldb/source/Plugins/ABI/Mips/ABISysV_mips64.cpp b/lldb/source/Plugins/ABI/Mips/ABISysV_mips64.cpp
index 9a07c3398e19c6..751555722daccb 100644
--- a/lldb/source/Plugins/ABI/Mips/ABISysV_mips64.cpp
+++ b/lldb/source/Plugins/ABI/Mips/ABISysV_mips64.cpp
@@ -75,7 +75,7 @@ enum dwarf_regnums {
   dwarf_pc
 };
 
-static RegisterInfo g_register_infos_mips64[] = {
+static const RegisterInfo g_register_infos_mips64[] = {
     //  NAME      ALT    SZ OFF ENCODING        FORMAT         EH_FRAME
     //  DWARF                   GENERIC                     PROCESS PLUGIN
     //  LLDB NATIVE
@@ -542,24 +542,9 @@ static RegisterInfo g_register_infos_mips64[] = {
 
 static const uint32_t k_num_register_infos =
     llvm::array_lengthof(g_register_infos_mips64);
-static bool g_register_info_names_constified = false;
 
 const lldb_private::RegisterInfo *
 ABISysV_mips64::GetRegisterInfoArray(uint32_t &count) {
-  // Make the C-string names and alt_names for the register infos into const
-  // C-string values by having the ConstString unique the names in the global
-  // constant C-string pool.
-  if (!g_register_info_names_constified) {
-    g_register_info_names_constified = true;
-    for (uint32_t i = 0; i < k_num_register_infos; ++i) {
-      if (g_register_infos_mips64[i].name)
-        g_register_infos_mips64[i].name =
-            ConstString(g_register_infos_mips64[i].name).GetCString();
-      if (g_register_infos_mips64[i].alt_name)
-        g_register_infos_mips64[i].alt_name =
-            ConstString(g_register_infos_mips64[i].alt_name).GetCString();
-    }
-  }
   count = k_num_register_infos;
   return g_register_infos_mips64;
 }
diff --git a/lldb/source/Plugins/ABI/SystemZ/ABISysV_s390x.cpp b/lldb/source/Plugins/ABI/SystemZ/ABISysV_s390x.cpp
index 29ad4d97e71845..22a64170017b22 100644
--- a/lldb/source/Plugins/ABI/SystemZ/ABISysV_s390x.cpp
+++ b/lldb/source/Plugins/ABI/SystemZ/ABISysV_s390x.cpp
@@ -118,7 +118,7 @@ enum dwarf_regnums {
          nullptr, nullptr, nullptr, 0                                          \
   }
 
-static RegisterInfo g_register_infos[] = {
+static const RegisterInfo g_register_infos[] = {
     DEFINE_REG(r0, 8, nullptr, LLDB_INVALID_REGNUM),
     DEFINE_REG(r1, 8, nullptr, LLDB_INVALID_REGNUM),
     DEFINE_REG(r2, 8, "arg1", LLDB_REGNUM_GENERIC_ARG1),
@@ -173,24 +173,9 @@ static RegisterInfo g_register_infos[] = {
 
 static const uint32_t k_num_register_infos =
     llvm::array_lengthof(g_register_infos);
-static bool g_register_info_names_constified = false;
 
 const lldb_private::RegisterInfo *
 ABISysV_s390x::GetRegisterInfoArray(uint32_t &count) {
-  // Make the C-string names and alt_names for the register infos into const
-  // C-string values by having the ConstString unique the names in the global
-  // constant C-string pool.
-  if (!g_register_info_names_constified) {
-    g_register_info_names_constified = true;
-    for (uint32_t i = 0; i < k_num_register_infos; ++i) {
-      if (g_register_infos[i].name)
-        g_register_infos[i].name =
-            ConstString(g_register_infos[i].name).GetCString();
-      if (g_register_infos[i].alt_name)
-        g_register_infos[i].alt_name =
-            ConstString(g_register_infos[i].alt_name).GetCString();
-    }
-  }
   count = k_num_register_infos;
   return g_register_infos;
 }
diff --git a/lldb/source/Plugins/Process/Utility/DynamicRegisterInfo.cpp b/lldb/source/Plugins/Process/Utility/DynamicRegisterInfo.cpp
index 443638aa39f669..fd9e1923104d52 100644
--- a/lldb/source/Plugins/Process/Utility/DynamicRegisterInfo.cpp
+++ b/lldb/source/Plugins/Process/Utility/DynamicRegisterInfo.cpp
@@ -151,10 +151,8 @@ DynamicRegisterInfo::SetRegisterInfo(const StructuredData::Dictionary &dict,
               const uint32_t msbyte = msbit / 8;
               const uint32_t lsbyte = lsbit / 8;
 
-              ConstString containing_reg_name(reg_name_str);
-
               const RegisterInfo *containing_reg_info =
-                  GetRegisterInfo(containing_reg_name);
+                  GetRegisterInfo(reg_name_str);
               if (containing_reg_info) {
                 const uint32_t max_bit = containing_reg_info->byte_size * 8;
                 if (msbit < max_bit && lsbit < max_bit) {
@@ -189,7 +187,7 @@ DynamicRegisterInfo::SetRegisterInfo(const StructuredData::Dictionary &dict,
                 }
               } else {
                 printf("error: invalid concrete register \"%s\"\n",
-                       containing_reg_name.GetCString());
+                       reg_name_str.c_str());
               }
             } else {
               printf("error: msbit (%u) must be greater than lsbit (%u)\n",
@@ -217,7 +215,7 @@ DynamicRegisterInfo::SetRegisterInfo(const StructuredData::Dictionary &dict,
               if (composite_reg_list->GetItemAtIndexAsString(
                       composite_idx, composite_reg_name, nullptr)) {
                 const RegisterInfo *composite_reg_info =
-                    GetRegisterInfo(composite_reg_name);
+                    GetRegisterInfo(composite_reg_name.GetStringRef());
                 if (composite_reg_info) {
                   composite_offset = std::min(composite_offset,
                                               composite_reg_info->byte_offset);
@@ -357,7 +355,7 @@ DynamicRegisterInfo::SetRegisterInfo(const StructuredData::Dictionary &dict,
           if (invalidate_reg_list->GetItemAtIndexAsString(
                   idx, invalidate_reg_name)) {
             const RegisterInfo *invalidate_reg_info =
-                GetRegisterInfo(invalidate_reg_name);
+                GetRegisterInfo(invalidate_reg_name.GetStringRef());
             if (invalidate_reg_info) {
               m_invalidate_regs_map[i].push_back(
                   invalidate_reg_info->kinds[eRegisterKindLLDB]);
@@ -737,16 +735,10 @@ void DynamicRegisterInfo::Dump() const {
   }
 }
 
-const lldb_private::RegisterInfo *DynamicRegisterInfo::GetRegisterInfo(
-    lldb_private::ConstString reg_name) const {
-  for (auto &reg_info : m_regs) {
-    // We can use pointer comparison since we used a ConstString to set the
-    // "name" member in AddRegister()
-    assert(ConstString(reg_info.name).GetCString() == reg_info.name &&
-           "reg_info.name not from a ConstString?");
-    if (reg_info.name == reg_name.GetCString()) {
+const lldb_private::RegisterInfo *
+DynamicRegisterInfo::GetRegisterInfo(llvm::StringRef reg_name) const {
+  for (auto &reg_info : m_regs)
+    if (reg_info.name == reg_name)
       return &reg_info;
-    }
-  }
   return nullptr;
 }
diff --git a/lldb/source/Plugins/Process/Utility/DynamicRegisterInfo.h b/lldb/source/Plugins/Process/Utility/DynamicRegisterInfo.h
index 48939375a50407..d5e6f90832bf27 100644
--- a/lldb/source/Plugins/Process/Utility/DynamicRegisterInfo.h
+++ b/lldb/source/Plugins/Process/Utility/DynamicRegisterInfo.h
@@ -75,7 +75,7 @@ class DynamicRegisterInfo {
   typedef std::map<uint32_t, dwarf_opcode> dynamic_reg_size_map;
 
   const lldb_private::RegisterInfo *
-  GetRegisterInfo(lldb_private::ConstString reg_name) const;
+  GetRegisterInfo(llvm::StringRef reg_name) const;
 
   void MoveFrom(DynamicRegisterInfo &&info);
 
diff --git a/lldb/source/Target/ABI.cpp b/lldb/source/Target/ABI.cpp
index 4320eb93adfc93..d50afb9967e3af 100644
--- a/lldb/source/Target/ABI.cpp
+++ b/lldb/source/Target/ABI.cpp
@@ -42,27 +42,22 @@ ABI::FindPlugin(lldb::ProcessSP process_sp, const ArchSpec &arch) {
 
 ABI::~ABI() = default;
 
-bool RegInfoBasedABI::GetRegisterInfoByName(ConstString name, RegisterInfo &info) {
+bool RegInfoBasedABI::GetRegisterInfoByName(llvm::StringRef name,
+                                            RegisterInfo &info) {
   uint32_t count = 0;
   const RegisterInfo *register_info_array = GetRegisterInfoArray(count);
   if (register_info_array) {
-    const char *unique_name_cstr = name.GetCString();
     uint32_t i;
     for (i = 0; i < count; ++i) {
       const char *reg_name = register_info_array[i].name;
-      assert(ConstString(reg_name).GetCString() == reg_name &&
-             "register_info_array[i].name not from a ConstString?");
-      if (reg_name == unique_name_cstr) {
+      if (reg_name == name) {
         info = register_info_array[i];
         return true;
       }
     }
     for (i = 0; i < count; ++i) {
       const char *reg_alt_name = register_info_array[i].alt_name;
-      assert((reg_alt_name == nullptr ||
-              ConstString(reg_alt_name).GetCString() == reg_alt_name) &&
-             "register_info_array[i].alt_name not from a ConstString?");
-      if (reg_alt_name == unique_name_cstr) {
+      if (reg_alt_name == name) {
         info = register_info_array[i];
         return true;
       }
@@ -224,7 +219,7 @@ void RegInfoBasedABI::AugmentRegisterInfo(RegisterInfo &info) {
     return;
 
   RegisterInfo abi_info;
-  if (!GetRegisterInfoByName(ConstString(info.name), abi_info))
+  if (!GetRegisterInfoByName(info.name, abi_info))
     return;
 
   if (info.kinds[eRegisterKindEHFrame] == LLDB_INVALID_REGNUM)

From 02114e15daad7f02e65289412d37334618386ce5 Mon Sep 17 00:00:00 2001
From: Raphael Isemann <teemperor@gmail.com>
Date: Tue, 13 Oct 2020 17:12:07 +0200
Subject: [PATCH 118/123] [lldb] Allow limiting the number of error diagnostics
 when parsing an expression

While debugging another bug I found out that we currently don't set any limit
for the number of diagnostics Clang emits. If a user does something that
generates a lot of errors (like including some long header file from within the
expression function), then we currently spam the LLDB output with potentially
thousands of Clang error diagnostics.

Clang sets a default limit of 20 errors, but given that LLDB is often used
interactively for small expressions I would say a limit of 5 is enough. The
limit is implemented as a setting, so if a user cares about seeing having a
million errors printed to their terminal then they can just increase the
settings value.

Reviewed By: shafik, mib

Differential Revision: https://reviews.llvm.org/D88889
---
 lldb/include/lldb/Target/Target.h             |  2 +
 .../Clang/ClangExpressionParser.cpp           |  4 ++
 lldb/source/Target/Target.cpp                 |  6 ++
 lldb/source/Target/TargetProperties.td        |  4 ++
 .../commands/expression/error-limit/Makefile  |  3 +
 .../error-limit/TestExprErrorLimit.py         | 60 +++++++++++++++++++
 .../commands/expression/error-limit/main.cpp  |  3 +
 7 files changed, 82 insertions(+)
 create mode 100644 lldb/test/API/commands/expression/error-limit/Makefile
 create mode 100644 lldb/test/API/commands/expression/error-limit/TestExprErrorLimit.py
 create mode 100644 lldb/test/API/commands/expression/error-limit/main.cpp

diff --git a/lldb/include/lldb/Target/Target.h b/lldb/include/lldb/Target/Target.h
index f371c4fd695652..0a27147cb61df9 100644
--- a/lldb/include/lldb/Target/Target.h
+++ b/lldb/include/lldb/Target/Target.h
@@ -173,6 +173,8 @@ class TargetProperties : public Properties {
 
   llvm::StringRef GetExpressionPrefixContents();
 
+  uint64_t GetExprErrorLimit() const;
+
   bool GetUseHexImmediates() const;
 
   bool GetUseFastStepping() const;
diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp
index 202eb87cca3d0c..8ad39ecd270767 100644
--- a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp
+++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp
@@ -454,6 +454,10 @@ ClangExpressionParser::ClangExpressionParser(
 
   // 4. Create and install the target on the compiler.
   m_compiler->createDiagnostics();
+  // Limit the number of error diagnostics we emit.
+  // A value of 0 means no limit for both LLDB and Clang.
+  m_compiler->getDiagnostics().setErrorLimit(target_sp->GetExprErrorLimit());
+
   auto target_info = TargetInfo::CreateTargetInfo(
       m_compiler->getDiagnostics(), m_compiler->getInvocation().TargetOpts);
   if (log) {
diff --git a/lldb/source/Target/Target.cpp b/lldb/source/Target/Target.cpp
index 6ce613697825b2..5cbdb4995c75cb 100644
--- a/lldb/source/Target/Target.cpp
+++ b/lldb/source/Target/Target.cpp
@@ -4026,6 +4026,12 @@ llvm::StringRef TargetProperties::GetExpressionPrefixContents() {
   return "";
 }
 
+uint64_t TargetProperties::GetExprErrorLimit() const {
+  const uint32_t idx = ePropertyExprErrorLimit;
+  return m_collection_sp->GetPropertyAtIndexAsUInt64(
+      nullptr, idx, g_target_properties[idx].default_uint_value);
+}
+
 bool TargetProperties::GetBreakpointsConsultPlatformAvoidList() {
   const uint32_t idx = ePropertyBreakpointUseAvoidList;
   return m_collection_sp->GetPropertyAtIndexAsBoolean(
diff --git a/lldb/source/Target/TargetProperties.td b/lldb/source/Target/TargetProperties.td
index 7fb9b105ceefbe..c624bc35d16e44 100644
--- a/lldb/source/Target/TargetProperties.td
+++ b/lldb/source/Target/TargetProperties.td
@@ -20,6 +20,10 @@ let Definition = "target" in {
   def ExprPrefix: Property<"expr-prefix", "FileSpec">,
     DefaultStringValue<"">,
     Desc<"Path to a file containing expressions to be prepended to all expressions.">;
+  def ExprErrorLimit: Property<"expr-error-limit", "UInt64">,
+    DefaultUnsignedValue<5>,
+    Desc<"The maximum amount of errors to emit while parsing an expression. "
+         "A value of 0 means to always continue parsing if possible.">;
   def PreferDynamic: Property<"prefer-dynamic-value", "Enum">,
     DefaultEnumValue<"eDynamicDontRunTarget">,
     EnumValues<"OptionEnumValues(g_dynamic_value_types)">,
diff --git a/lldb/test/API/commands/expression/error-limit/Makefile b/lldb/test/API/commands/expression/error-limit/Makefile
new file mode 100644
index 00000000000000..99998b20bcb050
--- /dev/null
+++ b/lldb/test/API/commands/expression/error-limit/Makefile
@@ -0,0 +1,3 @@
+CXX_SOURCES := main.cpp
+
+include Makefile.rules
diff --git a/lldb/test/API/commands/expression/error-limit/TestExprErrorLimit.py b/lldb/test/API/commands/expression/error-limit/TestExprErrorLimit.py
new file mode 100644
index 00000000000000..30e003787ec5d4
--- /dev/null
+++ b/lldb/test/API/commands/expression/error-limit/TestExprErrorLimit.py
@@ -0,0 +1,60 @@
+"""
+Tests target.expr-error-limit.
+"""
+
+import lldb
+from lldbsuite.test.decorators import *
+from lldbsuite.test.lldbtest import *
+from lldbsuite.test import lldbutil
+
+
+class TestCase(TestBase):
+
+    mydir = TestBase.compute_mydir(__file__)
+
+    @no_debug_info_test
+    def test(self):
+        # FIXME: The only reason this test needs to create a real target is because
+        # the settings of the dummy target can't be changed with `settings set`.
+        self.build()
+        target = self.dbg.CreateTarget(self.getBuildArtifact("a.out"))
+
+        # Our test expression that is just several lines of malformed
+        # integer literals (with a 'yerror' integer suffix). Every error
+        # has its own unique string (1, 2, 3, 4) and is on its own line
+        # that we can later find it when Clang prints the respective source
+        # code for each error to the error output.
+        # For example, in the error output below we would look for the
+        # unique `1yerror` string:
+        #     error: <expr>:1:2: invalid suffix 'yerror' on integer constant
+        #     1yerror
+        #     ^
+        expr = "1yerror;\n2yerror;\n3yerror;\n4yerror;"
+
+        options = lldb.SBExpressionOptions()
+        options.SetAutoApplyFixIts(False)
+
+        # Evaluate the expression and check that only the first 2 errors are
+        # emitted.
+        self.runCmd("settings set target.expr-error-limit 2")
+        eval_result = target.EvaluateExpression(expr, options)
+        self.assertIn("1yerror", str(eval_result.GetError()))
+        self.assertIn("2yerror", str(eval_result.GetError()))
+        self.assertNotIn("3yerror", str(eval_result.GetError()))
+        self.assertNotIn("4yerror", str(eval_result.GetError()))
+
+        # Change to a 3 errors and check again which errors are emitted.
+        self.runCmd("settings set target.expr-error-limit 3")
+        eval_result = target.EvaluateExpression(expr, options)
+        self.assertIn("1yerror", str(eval_result.GetError()))
+        self.assertIn("2yerror", str(eval_result.GetError()))
+        self.assertIn("3yerror", str(eval_result.GetError()))
+        self.assertNotIn("4yerror", str(eval_result.GetError()))
+
+        # Disable the error limit and make sure all errors are emitted.
+        self.runCmd("settings set target.expr-error-limit 0")
+        eval_result = target.EvaluateExpression(expr, options)
+        self.assertIn("1yerror", str(eval_result.GetError()))
+        self.assertIn("2yerror", str(eval_result.GetError()))
+        self.assertIn("3yerror", str(eval_result.GetError()))
+        self.assertIn("4yerror", str(eval_result.GetError()))
diff --git a/lldb/test/API/commands/expression/error-limit/main.cpp b/lldb/test/API/commands/expression/error-limit/main.cpp
new file mode 100644
index 00000000000000..ba45ee316cd42b
--- /dev/null
+++ b/lldb/test/API/commands/expression/error-limit/main.cpp
@@ -0,0 +1,3 @@
+int main() {
+  return 0; // break here
+}

From 6733b2544794f2703f19f07aa7c6806408e36160 Mon Sep 17 00:00:00 2001
From: Raphael Isemann <teemperor@gmail.com>
Date: Tue, 13 Oct 2020 17:13:23 +0200
Subject: [PATCH 119/123] [lldb][cmake] Remove custom logic for finding VCS
 file to fix LLDB's VCSVersion.inc generation

We are still implementing our own logic for this that looks for a VCS file in
the place where it was before the monorepo migration. This removes this logic
and just uses the CMake function that LLVM/Clang are using.

Reviewed By: JDevlieghere, kastiglione

Differential Revision: https://reviews.llvm.org/D88950
---
 lldb/source/CMakeLists.txt | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/lldb/source/CMakeLists.txt b/lldb/source/CMakeLists.txt
index b196147e68e78e..6dbdec5ec0085d 100644
--- a/lldb/source/CMakeLists.txt
+++ b/lldb/source/CMakeLists.txt
@@ -4,16 +4,8 @@ set(lldbBase_SOURCES
     lldb.cpp
   )
 
-foreach(file
-        "${LLDB_SOURCE_DIR}/.git/logs/HEAD" # Git
-        "${LLDB_SOURCE_DIR}/.svn/wc.db"     # SVN 1.7
-        "${LLDB_SOURCE_DIR}/.svn/entries"   # SVN 1.6
-        )
-  if(EXISTS "${file}")
-    set(lldb_vc "${file}")
-    break()
-  endif()
-endforeach()
+
+find_first_existing_vc_file("${LLDB_SOURCE_DIR}" lldb_vc)
 
 set(version_inc "${CMAKE_CURRENT_BINARY_DIR}/VCSVersion.inc")
 set(generate_vcs_version_script "${LLVM_CMAKE_PATH}/GenerateVersionFromVCS.cmake")

From b59d8d7c72546bf3f81889f4ce02a68c902eddd2 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad@amd.com>
Date: Tue, 13 Oct 2020 14:40:27 +0100
Subject: [PATCH 120/123] [AMDGPU][GlobalISel] Compute known bits for
 zero-extending loads

Implement computeKnownBitsForTargetInstr for G_AMDGPU_BUFFER_LOAD_UBYTE
and G_AMDGPU_BUFFER_LOAD_USHORT. This allows generic combines to remove
some unnecessary G_ANDs.

Differential Revision: https://reviews.llvm.org/D89316
---
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp              |  9 +++++++--
 .../AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll   | 10 ++--------
 .../GlobalISel/llvm.amdgcn.struct.buffer.load.ll       | 10 ++--------
 3 files changed, 11 insertions(+), 18 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 01d083b26e8509..6e28d45aaa2d25 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -11594,10 +11594,15 @@ void SITargetLowering::computeKnownBitsForTargetInstr(
       Known.Zero.setHighBits(countLeadingZeros(getSubtarget()->getLocalMemorySize()));
       break;
     }
-    default:
-      break;
     }
+    break;
   }
+  case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE:
+    Known.Zero.setHighBits(24);
+    break;
+  case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:
+    Known.Zero.setHighBits(16);
+    break;
   }
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll
index 9c71580ac3c4aa..8b1fcf30c865e9 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll
@@ -387,10 +387,7 @@ define amdgpu_ps float @raw_buffer_load_i8__sgpr_rsrc__vgpr_voffset__sgpr_soffse
   ; CHECK:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
   ; CHECK:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
   ; CHECK:   [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 1 from custom "TargetCustom7", addrspace 4)
-  ; CHECK:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 255
-  ; CHECK:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
-  ; CHECK:   [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[BUFFER_LOAD_UBYTE_OFFEN]], [[COPY6]], implicit $exec
-  ; CHECK:   $vgpr0 = COPY [[V_AND_B32_e64_]]
+  ; CHECK:   $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
   ; CHECK:   SI_RETURN_TO_EPILOG implicit $vgpr0
   %val = call i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
   %zext = zext i8 %val to i32
@@ -497,10 +494,7 @@ define amdgpu_ps float @raw_buffer_load_i8__vgpr_rsrc__vgpr_voffset__sgpr_soffse
   ; CHECK:   successors: %bb.4(0x80000000)
   ; CHECK:   $exec = S_MOV_B64_term [[S_MOV_B64_term]]
   ; CHECK: bb.4:
-  ; CHECK:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 255
-  ; CHECK:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
-  ; CHECK:   [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[BUFFER_LOAD_UBYTE_OFFEN]], [[COPY8]], implicit $exec
-  ; CHECK:   $vgpr0 = COPY [[V_AND_B32_e64_]]
+  ; CHECK:   $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
   ; CHECK:   SI_RETURN_TO_EPILOG implicit $vgpr0
   %val = call i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
   %zext = zext i8 %val to i32
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll
index e1512686b63e85..bc26e0e95c77fa 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll
@@ -228,10 +228,7 @@ define amdgpu_ps float @struct_buffer_load_i8_zext__sgpr_rsrc__vgpr_vindex__vgpr
   ; CHECK:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
   ; CHECK:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
   ; CHECK:   [[BUFFER_LOAD_UBYTE_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 1 from custom "TargetCustom7", addrspace 4)
-  ; CHECK:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 255
-  ; CHECK:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
-  ; CHECK:   [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[BUFFER_LOAD_UBYTE_BOTHEN]], [[COPY7]], implicit $exec
-  ; CHECK:   $vgpr0 = COPY [[V_AND_B32_e64_]]
+  ; CHECK:   $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_BOTHEN]]
   ; CHECK:   SI_RETURN_TO_EPILOG implicit $vgpr0
   %val = call i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
   %ext = zext i8 %val to i32
@@ -276,10 +273,7 @@ define amdgpu_ps float @struct_buffer_load_i16_zext__sgpr_rsrc__vgpr_vindex__vgp
   ; CHECK:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
   ; CHECK:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
   ; CHECK:   [[BUFFER_LOAD_USHORT_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 2 from custom "TargetCustom7", align 1, addrspace 4)
-  ; CHECK:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
-  ; CHECK:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
-  ; CHECK:   [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[BUFFER_LOAD_USHORT_BOTHEN]], [[COPY7]], implicit $exec
-  ; CHECK:   $vgpr0 = COPY [[V_AND_B32_e64_]]
+  ; CHECK:   $vgpr0 = COPY [[BUFFER_LOAD_USHORT_BOTHEN]]
   ; CHECK:   SI_RETURN_TO_EPILOG implicit $vgpr0
   %val = call i16 @llvm.amdgcn.struct.buffer.load.i16(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
   %ext = zext i16 %val to i32

From db1cf3d9ab33f56fcaea616baa71c6e4036beffa Mon Sep 17 00:00:00 2001
From: Christian Sigg <csigg@google.com>
Date: Tue, 13 Oct 2020 17:21:59 +0200
Subject: [PATCH 121/123] [mlir][gpu] Add `gpu.wait` op.

This combines two separate ops (D88972: `gpu.create_token`, D89043: `gpu.host_wait`) into one.

I do after all like the idea of combining the two ops, because it matches exactly the pattern we are
going to have in the other gpu ops that will implement the AsyncOpInterface (launch_func, copies, alloc):

If the op is async, we return a !gpu.async.token. Otherwise, we synchronize with the host and don't return a token.

The use cases for `gpu.wait async` and `gpu.wait` are further apart than those of e.g. `gpu.h2d async` and `gpu.h2d`,
but I like the consistent meaning of the `async` keyword in GPU ops.

Reviewed By: herhut

Differential Revision: https://reviews.llvm.org/D89160
---
 mlir/include/mlir/Dialect/GPU/GPUOps.td | 42 +++++++++++++++++++++++++
 mlir/lib/Dialect/GPU/IR/GPUDialect.cpp  | 24 ++++++++++++++
 mlir/test/Dialect/GPU/invalid.mlir      | 14 +++++++++
 mlir/test/Dialect/GPU/ops.mlir          | 17 ++++++++++
 4 files changed, 97 insertions(+)

diff --git a/mlir/include/mlir/Dialect/GPU/GPUOps.td b/mlir/include/mlir/Dialect/GPU/GPUOps.td
index 229023f6ec4c21..5d4443e1d8ffe7 100644
--- a/mlir/include/mlir/Dialect/GPU/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/GPUOps.td
@@ -756,4 +756,46 @@ def GPU_HostRegisterOp : GPU_Op<"host_register">,
   let verifier = [{ return success(); }];
 }
 
+def GPU_WaitOp : GPU_Op<"wait", [GPU_AsyncOpInterface]> {
+  let summary = "Wait for async gpu ops to complete.";
+  let description = [{
+    This op synchronizes the host or the device with a list of dependent ops.
+
+    If the op contains the `async` keyword, it returns a new async token which
+    is synchronized with the op arguments. This new token is merely a shortcut
+    to the argument list, and one could replace the uses of the result with the
+    arguments for the same effect. The async version of this op is primarily
+    used to make each async token have a single use during lowering and
+    thereby make forks in async execution explicit. Example usage:
+
+    ```mlir
+    %t0 = gpu.foo async : !gpu.async.token
+    %t1 = gpu.bar async : !gpu.async.token
+    %t2 = gpu.wait async [%t0, %t1]
+    // gpu.baz doesn't run until gpu.foo and gpu.bar have both completed, just
+    // as if the async dependencies were [%t0, %t1].
+    %t3 = gpu.baz async [%t2]
+    ```
+
+    If the op does not contain the `async` keyword, it does not return a new
+    async token but blocks until all ops producing the async dependency tokens
+    finished execution. All dependent memory operations are visible to the host
+    once this op completes. Example usage:
+
+    ```mlir
+    %t0 = gpu.foo async : !gpu.async.token
+    %t1 = gpu.bar async : !gpu.async.token
+    // The gpu.wait op blocks until gpu.foo and gpu.bar have completed.
+    gpu.wait [%t0, %t1]
+    ```
+  }];
+
+  let arguments = (ins Variadic<GPU_AsyncToken>:$asyncDependencies);
+  let results = (outs Optional<GPU_AsyncToken>:$asyncToken);
+
+  let assemblyFormat = [{
+    custom<AsyncDependencies>(type($asyncToken), $asyncDependencies) attr-dict
+  }];
+}
+
 #endif // GPU_OPS
diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
index 063e894829df40..79fe969dbe175c 100644
--- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
+++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
@@ -818,6 +818,30 @@ static void print(OpAsmPrinter &p, GPUModuleOp op) {
                 /*printBlockTerminators=*/false);
 }
 
+static ParseResult parseAsyncDependencies(
+    OpAsmParser &parser, Type &asyncTokenType,
+    SmallVectorImpl<OpAsmParser::OperandType> &asyncDependencies) {
+  auto loc = parser.getCurrentLocation();
+  if (succeeded(parser.parseOptionalKeyword("async"))) {
+    if (parser.getNumResults() == 0)
+      return parser.emitError(loc, "needs to be named when marked 'async'");
+    asyncTokenType = parser.getBuilder().getType<AsyncTokenType>();
+  }
+  return parser.parseOperandList(asyncDependencies,
+                                 OpAsmParser::Delimiter::OptionalSquare);
+}
+
+static void printAsyncDependencies(OpAsmPrinter &printer, Type asyncTokenType,
+                                   OperandRange asyncDependencies) {
+  if (asyncTokenType)
+    printer << "async ";
+  if (asyncDependencies.empty())
+    return;
+  printer << "[";
+  llvm::interleaveComma(asyncDependencies, printer);
+  printer << "]";
+}
+
 #include "mlir/Dialect/GPU/GPUOpInterfaces.cpp.inc"
 
 #define GET_OP_CLASSES
diff --git a/mlir/test/Dialect/GPU/invalid.mlir b/mlir/test/Dialect/GPU/invalid.mlir
index 739d23a59f058c..cfdb06ac57023c 100644
--- a/mlir/test/Dialect/GPU/invalid.mlir
+++ b/mlir/test/Dialect/GPU/invalid.mlir
@@ -435,3 +435,17 @@ module {
     } ) {gpu.kernel, sym_name = "kernel_1", type = (f32, memref<?xf32>) -> (), workgroup_attributions = 3: i64} : () -> ()
   }
 }
+
+// -----
+
+func @sync_wait_with_result() {
+  // expected-error @+1 {{cannot name an operation with no results}}
+  %t = gpu.wait
+}
+
+// -----
+
+func @async_wait_without_result() {
+  // expected-error @+1 {{custom op 'gpu.wait' needs to be named when marked 'async'}}
+  gpu.wait async
+}
diff --git a/mlir/test/Dialect/GPU/ops.mlir b/mlir/test/Dialect/GPU/ops.mlir
index 574c290884327b..23cd6d5c7d0a9a 100644
--- a/mlir/test/Dialect/GPU/ops.mlir
+++ b/mlir/test/Dialect/GPU/ops.mlir
@@ -149,4 +149,21 @@ module attributes {gpu.container_module} {
     // CHECK: return {{.*}} : !gpu.async.token
     return %arg0 : !gpu.async.token
   }
+
+  func @async_wait() {
+    // CHECK-LABEL: func @async_wait
+    // CHECK: %[[t0:.*]] = gpu.wait async
+    %0 = gpu.wait async
+    // CHECK: %[[t1:.*]] = gpu.wait async [%[[t0]]]
+    %1 = gpu.wait async [%0]
+    // CHECK: %{{.*}} = gpu.wait async [%[[t0]], %[[t1]]]
+    %2 = gpu.wait async [%0, %1]
+    // CHECK: gpu.wait [%[[t0]], %[[t1]]]
+    // CHECK-NOT: async
+    gpu.wait [%0, %1]
+    // CHECK: gpu.wait
+    // CHECK-NOT: async
+    gpu.wait // Valid, but a no-op.
+    return
+  }
 }

From 1b94261e36f18bdc17097078518df448b67c84ea Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Tue, 13 Oct 2020 10:55:06 -0400
Subject: [PATCH 122/123] [x86] add cost model test for memcpy; NFC

This is treated as a special-case in the base class
implementation of getIntrinsicInstrCost().
---
 .../CostModel/X86/intrinsic-cost-kinds.ll     | 23 +++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll b/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll
index 835cc23ce76be8..806bf251a10780 100644
--- a/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll
+++ b/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll
@@ -29,6 +29,8 @@ declare <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*>, i32, <16
 declare void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float>, <16 x float*>, i32, <16 x i1>)
 declare float @llvm.vector.reduce.fmax.v16f32(<16 x float>)
 
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1)
+
 define void @smax(i32 %a, i32 %b, <16 x i32> %va, <16 x i32> %vb) {
 ; THRU-LABEL: 'smax'
 ; THRU-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.smax.i32(i32 %a, i32 %b)
@@ -221,3 +223,24 @@ define void @reduce_fmax(<16 x float> %va) {
   %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
   ret void
 }
+
+define void @memcpy(i8* %a, i8* %b, i32 %c) {
+; THRU-LABEL: 'memcpy'
+; THRU-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %a, i8* align 1 %b, i32 32, i1 false)
+; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; LATE-LABEL: 'memcpy'
+; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %a, i8* align 1 %b, i32 32, i1 false)
+; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; SIZE-LABEL: 'memcpy'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %a, i8* align 1 %b, i32 32, i1 false)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; SIZE_LATE-LABEL: 'memcpy'
+; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %a, i8* align 1 %b, i32 32, i1 false)
+; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %a, i8* align 1 %b, i32 32, i1 false)
+  ret void
+}

From ef748583c24d7dd7ab1f904a27f3a1382cda22c6 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Tue, 13 Oct 2020 11:52:00 -0400
Subject: [PATCH 123/123] [CostModel] rearrange basic intrinsic cost
 implementation

This is bigger/uglier than before, but it should allow fixing
all of the broken paths more easily. Test coverage added with
rGfab028b and other commits.

This is not NFC - the scalable vector test would crash
without this patch.
---
 llvm/include/llvm/CodeGen/BasicTTIImpl.h      | 68 ++++++++++++-------
 .../Analysis/CostModel/AArch64/sve-math.ll    | 21 ++++++
 2 files changed, 66 insertions(+), 23 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index b833130f034838..742fded56a8ebe 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1125,32 +1125,14 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
     if (BaseT::getIntrinsicInstrCost(ICA, CostKind) == 0)
       return 0;
 
-    // Special case some scalar intrinsics.
-    Intrinsic::ID IID = ICA.getID();
-    if (CostKind != TTI::TCK_RecipThroughput) {
-      switch (IID) {
-      default:
-        break;
-      case Intrinsic::cttz:
-        if (getTLI()->isCheapToSpeculateCttz())
-          return TargetTransformInfo::TCC_Basic;
-        break;
-      case Intrinsic::ctlz:
-        if (getTLI()->isCheapToSpeculateCtlz())
-          return TargetTransformInfo::TCC_Basic;
-        break;
-      case Intrinsic::memcpy:
-        return thisT()->getMemcpyCost(ICA.getInst());
-        // TODO: other libc intrinsics.
-      }
-      return BaseT::getIntrinsicInstrCost(ICA, CostKind);
-    }
-
-    // TODO: Combine these two logic paths.
     if (ICA.isTypeBasedOnly())
       return getTypeBasedIntrinsicInstrCost(ICA, CostKind);
 
+    // TODO: Handle scalable vectors?
     Type *RetTy = ICA.getReturnType();
+    if (isa<ScalableVectorType>(RetTy))
+      return BaseT::getIntrinsicInstrCost(ICA, CostKind);
+
     unsigned VF = ICA.getVectorFactor();
     unsigned RetVF =
         (RetTy->isVectorTy() ? cast<FixedVectorType>(RetTy)->getNumElements()
@@ -1159,11 +1141,42 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
     const IntrinsicInst *I = ICA.getInst();
     const SmallVectorImpl<const Value *> &Args = ICA.getArgs();
     FastMathFlags FMF = ICA.getFlags();
-
+    Intrinsic::ID IID = ICA.getID();
     switch (IID) {
     default:
+      // FIXME: all cost kinds should default to the same thing?
+      if (CostKind != TTI::TCK_RecipThroughput)
+        return BaseT::getIntrinsicInstrCost(ICA, CostKind);
       break;
+
+    case Intrinsic::cttz:
+      // FIXME: all cost kinds should default to the same thing?
+      if (CostKind != TTI::TCK_RecipThroughput) {
+        if (getTLI()->isCheapToSpeculateCttz())
+          return TargetTransformInfo::TCC_Basic;
+        return BaseT::getIntrinsicInstrCost(ICA, CostKind);
+      }
+      break;
+
+    case Intrinsic::ctlz:
+      // FIXME: all cost kinds should default to the same thing?
+      if (CostKind != TTI::TCK_RecipThroughput) {
+        if (getTLI()->isCheapToSpeculateCtlz())
+          return TargetTransformInfo::TCC_Basic;
+        return BaseT::getIntrinsicInstrCost(ICA, CostKind);
+      }
+      break;
+
+    case Intrinsic::memcpy:
+      // FIXME: all cost kinds should default to the same thing?
+      if (CostKind != TTI::TCK_RecipThroughput)
+        return thisT()->getMemcpyCost(ICA.getInst());
+      return BaseT::getIntrinsicInstrCost(ICA, CostKind);
+
     case Intrinsic::masked_scatter: {
+      // FIXME: all cost kinds should default to the same thing?
+      if (CostKind != TTI::TCK_RecipThroughput)
+        return BaseT::getIntrinsicInstrCost(ICA, CostKind);
       assert(VF == 1 && "Can't vectorize types here.");
       const Value *Mask = Args[3];
       bool VarMask = !isa<Constant>(Mask);
@@ -1173,6 +1186,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
                                              VarMask, Alignment, CostKind, I);
     }
     case Intrinsic::masked_gather: {
+      // FIXME: all cost kinds should default to the same thing?
+      if (CostKind != TTI::TCK_RecipThroughput)
+        return BaseT::getIntrinsicInstrCost(ICA, CostKind);
       assert(VF == 1 && "Can't vectorize types here.");
       const Value *Mask = Args[2];
       bool VarMask = !isa<Constant>(Mask);
@@ -1193,11 +1209,17 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
     case Intrinsic::vector_reduce_fmin:
     case Intrinsic::vector_reduce_umax:
     case Intrinsic::vector_reduce_umin: {
+      // FIXME: all cost kinds should default to the same thing?
+      if (CostKind != TTI::TCK_RecipThroughput)
+        return BaseT::getIntrinsicInstrCost(ICA, CostKind);
       IntrinsicCostAttributes Attrs(IID, RetTy, Args[0]->getType(), FMF, 1, I);
       return getTypeBasedIntrinsicInstrCost(Attrs, CostKind);
     }
     case Intrinsic::fshl:
     case Intrinsic::fshr: {
+      // FIXME: all cost kinds should default to the same thing?
+      if (CostKind != TTI::TCK_RecipThroughput)
+        return BaseT::getIntrinsicInstrCost(ICA, CostKind);
       const Value *X = Args[0];
       const Value *Y = Args[1];
       const Value *Z = Args[2];
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-math.ll b/llvm/test/Analysis/CostModel/AArch64/sve-math.ll
index 4a4d7dc117f390..5769fe3b0ff23c 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-math.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-math.ll
@@ -26,3 +26,24 @@ define <vscale x 2 x double> @fadd_v2f64(<vscale x 2 x double> %a, <vscale x 2 x
   %r = fadd <vscale x 2 x double> %a, %b
   ret <vscale x 2 x double> %r
 }
+
+define <vscale x 2 x double> @sqrt_v2f64(<vscale x 2 x double> %a) {
+; THRU-LABEL: 'sqrt_v2f64'
+; THRU-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r = call <vscale x 2 x double> @llvm.sqrt.nxv2f64(<vscale x 2 x double> %a)
+; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <vscale x 2 x double> %r
+;
+; LATE-LABEL: 'sqrt_v2f64'
+; LATE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %r = call <vscale x 2 x double> @llvm.sqrt.nxv2f64(<vscale x 2 x double> %a)
+; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <vscale x 2 x double> %r
+;
+; SIZE-LABEL: 'sqrt_v2f64'
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r = call <vscale x 2 x double> @llvm.sqrt.nxv2f64(<vscale x 2 x double> %a)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <vscale x 2 x double> %r
+;
+; SIZE_LATE-LABEL: 'sqrt_v2f64'
+; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %r = call <vscale x 2 x double> @llvm.sqrt.nxv2f64(<vscale x 2 x double> %a)
+; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <vscale x 2 x double> %r
+;
+  %r = call <vscale x 2 x double> @llvm.sqrt.v2f64(<vscale x 2 x double> %a)
+  ret <vscale x 2 x double> %r
+}