Skip to content

Commit

Permalink
merge internal development externally
Browse files Browse the repository at this point in the history
  • Loading branch information
searlmc1 committed May 27, 2024
2 parents 0f4f31c + 8b9d4d3 commit c3f2117
Show file tree
Hide file tree
Showing 21 changed files with 673 additions and 131 deletions.
7 changes: 2 additions & 5 deletions llvm/include/llvm/Support/Discriminator.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,12 +121,9 @@ static inline unsigned getBaseFSBitEnd() {
}

// Set bits in range of [0 .. n] to 1. Used in FS Discriminators.
static inline unsigned getN1Bits(int N) {
// Work around the g++ bug that folding "(1U << (N + 1)) - 1" to 0.
if (N == 31)
return 0xFFFFFFFF;
static inline unsigned getN1Bits(unsigned N) {
assert((N < 32) && "N is invalid");
return (1U << (N + 1)) - 1;
return 0xFFFFFFFF >> (31 - N);
}

} // namespace llvm
Expand Down
4 changes: 3 additions & 1 deletion llvm/lib/Analysis/ScalarEvolution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14751,7 +14751,9 @@ void SCEVUnionPredicate::add(const SCEVPredicate *N) {
return;
}

Preds.push_back(N);
// Only add predicate if it is not already implied by this union predicate.
if (!implies(N))
Preds.push_back(N);
}

PredicatedScalarEvolution::PredicatedScalarEvolution(ScalarEvolution &SE,
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3972,7 +3972,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
// target can override this with custom lowering and calling the
// implementation functions.
LLT Ty = MRI.getType(MI.getOperand(0).getReg());
if (LI.isLegalOrCustom({G_UMIN, Ty}))
if (LI.isLegalOrCustom({G_UMIN, Ty}) && LI.isLegalOrCustom({G_UMAX, Ty}))
return lowerAddSubSatToMinMax(MI);
return lowerAddSubSatToAddoSubo(MI);
}
Expand Down
32 changes: 16 additions & 16 deletions llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/LiveRegUnits.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
Expand Down Expand Up @@ -988,7 +989,7 @@ void AArch64FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero,
}
}

static void getLiveRegsForEntryMBB(LivePhysRegs &LiveRegs,
static void getLiveRegsForEntryMBB(LiveRegUnits &LiveRegs,
const MachineBasicBlock &MBB) {
const MachineFunction *MF = MBB.getParent();
LiveRegs.addLiveIns(MBB);
Expand Down Expand Up @@ -1018,16 +1019,18 @@ static Register findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) {

const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
const AArch64RegisterInfo &TRI = *Subtarget.getRegisterInfo();
LivePhysRegs LiveRegs(TRI);
LiveRegUnits LiveRegs(TRI);
getLiveRegsForEntryMBB(LiveRegs, *MBB);

// Prefer X9 since it was historically used for the prologue scratch reg.
const MachineRegisterInfo &MRI = MF->getRegInfo();
if (LiveRegs.available(MRI, AArch64::X9))
if (LiveRegs.available(AArch64::X9))
return AArch64::X9;

for (unsigned Reg : AArch64::GPR64RegClass) {
if (LiveRegs.available(MRI, Reg))
BitVector Allocatable =
TRI.getAllocatableSet(*MF, TRI.getRegClass(AArch64::GPR64RegClassID));

for (unsigned Reg : Allocatable.set_bits()) {
if (LiveRegs.available(Reg))
return Reg;
}
return AArch64::NoRegister;
Expand All @@ -1043,14 +1046,11 @@ bool AArch64FrameLowering::canUseAsPrologue(
const AArch64FunctionInfo *AFI = MF->getInfo<AArch64FunctionInfo>();

if (AFI->hasSwiftAsyncContext()) {
const AArch64RegisterInfo &TRI = *Subtarget.getRegisterInfo();
const MachineRegisterInfo &MRI = MF->getRegInfo();
LivePhysRegs LiveRegs(TRI);
LiveRegUnits LiveRegs(*RegInfo);
getLiveRegsForEntryMBB(LiveRegs, MBB);
// The StoreSwiftAsyncContext clobbers X16 and X17. Make sure they are
// available.
if (!LiveRegs.available(MRI, AArch64::X16) ||
!LiveRegs.available(MRI, AArch64::X17))
if (!LiveRegs.available(AArch64::X16) || !LiveRegs.available(AArch64::X17))
return false;
}

Expand Down Expand Up @@ -1606,7 +1606,7 @@ static void emitDefineCFAWithFP(MachineFunction &MF, MachineBasicBlock &MBB,
/// Collect live registers from the end of \p MI's parent up to (including) \p
/// MI in \p LiveRegs.
static void getLivePhysRegsUpTo(MachineInstr &MI, const TargetRegisterInfo &TRI,
LivePhysRegs &LiveRegs) {
LiveRegUnits &LiveRegs) {

MachineBasicBlock &MBB = *MI.getParent();
LiveRegs.addLiveOuts(MBB);
Expand Down Expand Up @@ -1644,7 +1644,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
NonFrameStart->getFlag(MachineInstr::FrameSetup))
++NonFrameStart;

LivePhysRegs LiveRegs(*TRI);
LiveRegUnits LiveRegs(*TRI);
if (NonFrameStart != MBB.end()) {
getLivePhysRegsUpTo(*NonFrameStart, *TRI, LiveRegs);
// Ignore registers used for stack management for now.
Expand All @@ -1662,7 +1662,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
make_range(MBB.instr_begin(), NonFrameStart->getIterator())) {
for (auto &Op : MI.operands())
if (Op.isReg() && Op.isDef())
assert(!LiveRegs.contains(Op.getReg()) &&
assert(LiveRegs.available(Op.getReg()) &&
"live register clobbered by inserted prologue instructions");
}
});
Expand Down Expand Up @@ -4132,7 +4132,7 @@ MachineBasicBlock::iterator tryMergeAdjacentSTG(MachineBasicBlock::iterator II,
// FIXME : This approach of bailing out from merge is conservative in
// some ways like even if stg loops are not present after merge the
// insert list, this liveness check is done (which is not needed).
LivePhysRegs LiveRegs(*(MBB->getParent()->getSubtarget().getRegisterInfo()));
LiveRegUnits LiveRegs(*(MBB->getParent()->getSubtarget().getRegisterInfo()));
LiveRegs.addLiveOuts(*MBB);
for (auto I = MBB->rbegin();; ++I) {
MachineInstr &MI = *I;
Expand All @@ -4141,7 +4141,7 @@ MachineBasicBlock::iterator tryMergeAdjacentSTG(MachineBasicBlock::iterator II,
LiveRegs.stepBackward(*I);
}
InsertI++;
if (LiveRegs.contains(AArch64::NZCV))
if (!LiveRegs.available(AArch64::NZCV))
return InsertI;

llvm::stable_sort(Instrs,
Expand Down
5 changes: 1 addition & 4 deletions llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1025,10 +1025,7 @@ MachineBasicBlock *ARMConstantIslands::splitBlockBeforeInstr(MachineInstr *MI) {
OrigBB->addSuccessor(NewBB);

// Update live-in information in the new block.
MachineRegisterInfo &MRI = MF->getRegInfo();
for (MCPhysReg L : LRs)
if (!MRI.isReserved(L))
NewBB->addLiveIn(L);
addLiveIns(*NewBB, LRs);

// Update internal data structures to account for the newly inserted MBB.
// This is almost the same as updateForInsertedWaterBlock, except that
Expand Down
14 changes: 11 additions & 3 deletions llvm/lib/Target/ARM/ARMISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14381,9 +14381,17 @@ static SDValue CombineANDShift(SDNode *N,
}
}

// FIXME: Transform "(and (shl x, c2) c1)" ->
// "(shl (and x, c1>>c2), c2)" if "c1 >> c2" is a cheaper immediate than
// c1.
// Transform "(and (shl x, c2) c1)" into "(shl (and x, c1>>c2), c2)"
// if "c1 >> c2" is a cheaper immediate than "c1"
if (LeftShift &&
HasLowerConstantMaterializationCost(C1 >> C2, C1, Subtarget)) {

SDValue And = DAG.getNode(ISD::AND, DL, MVT::i32, N0->getOperand(0),
DAG.getConstant(C1 >> C2, DL, MVT::i32));
return DAG.getNode(ISD::SHL, DL, MVT::i32, And,
DAG.getConstant(C2, DL, MVT::i32));
}

return SDValue();
}

Expand Down
15 changes: 15 additions & 0 deletions llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -732,6 +732,18 @@ SDValue HexagonTargetLowering::LowerREADCYCLECOUNTER(SDValue Op,
return DAG.getNode(HexagonISD::READCYCLE, dl, VTs, Chain);
}

// Custom-handle ISD::READSTEADYCOUNTER because the target-independent SDNode
// is marked as having side-effects, while the register read on Hexagon does
// not have any. TableGen refuses to accept the direct pattern from that node
// to the A4_tfrcpp.
SDValue HexagonTargetLowering::LowerREADSTEADYCOUNTER(SDValue Op,
SelectionDAG &DAG) const {
SDValue Chain = Op.getOperand(0);
SDLoc dl(Op);
SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
return DAG.getNode(HexagonISD::READTIMER, dl, VTs, Chain);
}

SDValue HexagonTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
SelectionDAG &DAG) const {
SDValue Chain = Op.getOperand(0);
Expand Down Expand Up @@ -1507,6 +1519,7 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::INLINEASM_BR, MVT::Other, Custom);
setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
setOperationAction(ISD::READSTEADYCOUNTER, MVT::i64, Custom);
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
setOperationAction(ISD::EH_RETURN, MVT::Other, Custom);
setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
Expand Down Expand Up @@ -1932,6 +1945,7 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
case HexagonISD::VINSERTW0: return "HexagonISD::VINSERTW0";
case HexagonISD::VROR: return "HexagonISD::VROR";
case HexagonISD::READCYCLE: return "HexagonISD::READCYCLE";
case HexagonISD::READTIMER: return "HexagonISD::READTIMER";
case HexagonISD::PTRUE: return "HexagonISD::PTRUE";
case HexagonISD::PFALSE: return "HexagonISD::PFALSE";
case HexagonISD::D2P: return "HexagonISD::D2P";
Expand Down Expand Up @@ -3389,6 +3403,7 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG);
case ISD::PREFETCH: return LowerPREFETCH(Op, DAG);
case ISD::READCYCLECOUNTER: return LowerREADCYCLECOUNTER(Op, DAG);
case ISD::READSTEADYCOUNTER: return LowerREADSTEADYCOUNTER(Op, DAG);
break;
}

Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/Hexagon/HexagonISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ enum NodeType : unsigned {
EH_RETURN,
DCFETCH,
READCYCLE,
READTIMER,
PTRUE,
PFALSE,
D2P, // Convert 8-byte value to 8-bit predicate register. [*]
Expand Down Expand Up @@ -207,6 +208,7 @@ class HexagonTargetLowering : public TargetLowering {
SDValue LowerFDIV(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerREADSTEADYCOUNTER(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEH_LABEL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
SDValue
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/Hexagon/HexagonPatterns.td
Original file line number Diff line number Diff line change
Expand Up @@ -3392,6 +3392,12 @@ def HexagonREADCYCLE: SDNode<"HexagonISD::READCYCLE", SDTInt64Leaf,

def: Pat<(HexagonREADCYCLE), (A4_tfrcpp UPCYCLE)>;

// Read time counter.
def HexagonREADTIMER: SDNode<"HexagonISD::READTIMER", SDTInt64Leaf,
[SDNPHasChain]>;

def: Pat<(HexagonREADTIMER), (A4_tfrcpp UTIMER)>;

// The declared return value of the store-locked intrinsics is i32, but
// the instructions actually define i1. To avoid register copies from
// IntRegs to PredRegs and back, fold the entire pattern checking the
Expand Down
16 changes: 10 additions & 6 deletions llvm/lib/Target/SystemZ/SystemZPostRewrite.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
#include "SystemZInstrInfo.h"
#include "SystemZSubtarget.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/LiveRegUnits.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
using namespace llvm;
Expand Down Expand Up @@ -161,7 +161,8 @@ bool SystemZPostRewrite::expandCondMove(MachineBasicBlock &MBB,
assert(DestReg == MI.getOperand(1).getReg() &&
"Expected destination and first source operand to be the same.");

LivePhysRegs LiveRegs(TII->getRegisterInfo());
const TargetRegisterInfo &TRI = TII->getRegisterInfo();
LiveRegUnits LiveRegs(TRI);
LiveRegs.addLiveOuts(MBB);
for (auto I = std::prev(MBB.end()); I != MBBI; --I)
LiveRegs.stepBackward(*I);
Expand All @@ -171,15 +172,18 @@ bool SystemZPostRewrite::expandCondMove(MachineBasicBlock &MBB,
MF.insert(std::next(MachineFunction::iterator(MBB)), RestMBB);
RestMBB->splice(RestMBB->begin(), &MBB, MI, MBB.end());
RestMBB->transferSuccessors(&MBB);
for (MCPhysReg R : LiveRegs)
RestMBB->addLiveIn(R);
const BitVector &BV = TRI.getAllocatableSet(MF);
for (Register Reg : BV.set_bits())
if (!LiveRegs.available(Reg))
RestMBB->addLiveIn(Reg);

// Create a new block MoveMBB to hold the move instruction.
MachineBasicBlock *MoveMBB = MF.CreateMachineBasicBlock(BB);
MF.insert(std::next(MachineFunction::iterator(MBB)), MoveMBB);
MoveMBB->addLiveIn(SrcReg);
for (MCPhysReg R : LiveRegs)
MoveMBB->addLiveIn(R);
for (Register Reg : BV.set_bits())
if (!LiveRegs.available(Reg))
MoveMBB->addLiveIn(Reg);

// At the end of MBB, create a conditional branch to RestMBB if the
// condition is false, otherwise fall through to MoveMBB.
Expand Down
60 changes: 31 additions & 29 deletions llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,26 +48,25 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset(
SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Val,
SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline,
MachinePointerInfo DstPtrInfo) const {
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
const X86Subtarget &Subtarget =
DAG.getMachineFunction().getSubtarget<X86Subtarget>();
// If to a segment-relative address space, use the default lowering.
if (DstPtrInfo.getAddrSpace() >= 256)
return SDValue();

#ifndef NDEBUG
// If the base register might conflict with our physical registers, bail out.
const MCPhysReg ClobberSet[] = {X86::RCX, X86::RAX, X86::RDI,
X86::ECX, X86::EAX, X86::EDI};
assert(!isBaseRegConflictPossible(DAG, ClobberSet));
#endif

// If to a segment-relative address space, use the default lowering.
if (DstPtrInfo.getAddrSpace() >= 256)
if (isBaseRegConflictPossible(DAG, ClobberSet))
return SDValue();

ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
const X86Subtarget &Subtarget =
DAG.getMachineFunction().getSubtarget<X86Subtarget>();

// If not DWORD aligned or size is more than the threshold, call the library.
// The libc version is likely to be faster for these cases. It can use the
// address value and run time information about the CPU.
if (Alignment < Align(4) || !ConstantSize ||
ConstantSize->getZExtValue() > Subtarget.getMaxInlineSizeThreshold())
ConstantSize->getZExtValue() > Subtarget.getMaxInlineSizeThreshold())
return SDValue();

uint64_t SizeVal = ConstantSize->getZExtValue();
Expand Down Expand Up @@ -128,26 +127,29 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset(
InGlue = Chain.getValue(1);

SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue Ops[] = { Chain, DAG.getValueType(AVT), InGlue };
Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops);

if (BytesLeft) {
// Handle the last 1 - 7 bytes.
unsigned Offset = SizeVal - BytesLeft;
EVT AddrVT = Dst.getValueType();
EVT SizeVT = Size.getValueType();

Chain =
DAG.getMemset(Chain, dl,
DAG.getNode(ISD::ADD, dl, AddrVT, Dst,
DAG.getConstant(Offset, dl, AddrVT)),
Val, DAG.getConstant(BytesLeft, dl, SizeVT), Alignment,
isVolatile, AlwaysInline,
/* isTailCall */ false, DstPtrInfo.getWithOffset(Offset));
}
SDValue Ops[] = {Chain, DAG.getValueType(AVT), InGlue};
SDValue RepStos = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops);

// TODO: Use a Tokenfactor, as in memcpy, instead of a single chain.
return Chain;
/// RepStos can process the whole length.
if (BytesLeft == 0)
return RepStos;

// Handle the last 1 - 7 bytes.
SmallVector<SDValue, 4> Results;
Results.push_back(RepStos);
unsigned Offset = SizeVal - BytesLeft;
EVT AddrVT = Dst.getValueType();
EVT SizeVT = Size.getValueType();

Results.push_back(
DAG.getMemset(Chain, dl,
DAG.getNode(ISD::ADD, dl, AddrVT, Dst,
DAG.getConstant(Offset, dl, AddrVT)),
Val, DAG.getConstant(BytesLeft, dl, SizeVT), Alignment,
isVolatile, AlwaysInline,
/* isTailCall */ false, DstPtrInfo.getWithOffset(Offset)));

return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Results);
}

/// Emit a single REP MOVS{B,W,D,Q} instruction.
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/X86/X86Subtarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef TuneCPU,
CPU = "generic";

if (TuneCPU.empty())
TuneCPU = "i586"; // FIXME: "generic" is more modern than llc tests expect.
TuneCPU = HasX86_64 ? "generic" : "i586";

std::string FullFS = X86_MC::ParseX86Triple(TargetTriple);
assert(!FullFS.empty() && "Failed to parse X86 triple");
Expand Down
Loading

0 comments on commit c3f2117

Please sign in to comment.