Skip to content

Commit

Permalink
[PowerPC] Tune AIX shared library TLS model at function level (#84132)
Browse files Browse the repository at this point in the history
Under some circumstance (library loaded with the main program), TLS
initial-exec model can be applied to local-dynamic access(es). We
could use some simple heuristic to decide the update at function level:
* If there is equal or less than a number of TLS local-dynamic access(es)
in the function, use TLS initial-exec model. (the threshold which default to
1 is controlled by hidden option)
  • Loading branch information
orcguru authored May 9, 2024
1 parent 51f178d commit ea126ae
Show file tree
Hide file tree
Showing 13 changed files with 859 additions and 3 deletions.
4 changes: 4 additions & 0 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -5077,6 +5077,10 @@ def maix_small_local_dynamic_tls : Flag<["-"], "maix-small-local-dynamic-tls">,
"where the offset from the TLS base is encoded as an "
"immediate operand (AIX 64-bit only). "
"This access sequence is not used for variables larger than 32KB.">;
def maix_shared_lib_tls_model_opt : Flag<["-"], "maix-shared-lib-tls-model-opt">,
Group<m_ppc_Features_Group>,
HelpText<"For shared library loaded with the main program, change local-dynamic access(es) "
"to initial-exec access(es) at the function level (AIX 64-bit only).">;
def maix_struct_return : Flag<["-"], "maix-struct-return">,
Group<m_Group>, Visibility<[ClangOption, CC1Option]>,
HelpText<"Return all structs in memory (PPC32 only)">,
Expand Down
6 changes: 6 additions & 0 deletions clang/lib/Basic/Targets/PPC.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,8 @@ bool PPCTargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
IsISA3_1 = true;
} else if (Feature == "+quadword-atomics") {
HasQuadwordAtomics = true;
} else if (Feature == "+aix-shared-lib-tls-model-opt") {
HasAIXShLibTLSModelOpt = true;
}
// TODO: Finish this list and add an assert that we've handled them
// all.
Expand Down Expand Up @@ -580,6 +582,9 @@ bool PPCTargetInfo::initFeatureMap(
Features["aix-small-local-exec-tls"] = false;
Features["aix-small-local-dynamic-tls"] = false;

// Turn off TLS model opt by default.
Features["aix-shared-lib-tls-model-opt"] = false;

Features["spe"] = llvm::StringSwitch<bool>(CPU)
.Case("8548", true)
.Case("e500", true)
Expand Down Expand Up @@ -722,6 +727,7 @@ bool PPCTargetInfo::hasFeature(StringRef Feature) const {
.Case("isa-v30-instructions", IsISA3_0)
.Case("isa-v31-instructions", IsISA3_1)
.Case("quadword-atomics", HasQuadwordAtomics)
.Case("aix-shared-lib-tls-model-opt", HasAIXShLibTLSModelOpt)
.Default(false);
}

Expand Down
1 change: 1 addition & 0 deletions clang/lib/Basic/Targets/PPC.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ class LLVM_LIBRARY_VISIBILITY PPCTargetInfo : public TargetInfo {
bool IsISA3_0 = false;
bool IsISA3_1 = false;
bool HasQuadwordAtomics = false;
bool HasAIXShLibTLSModelOpt = false;

protected:
std::string ABI;
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/PowerPC/PPC.td
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,12 @@ def FeatureAIXLocalDynamicTLS :
"true", "Produce a faster local-dynamic TLS sequence for this "
"function for 64-bit AIX">;

def FeatureAIXSharedLibTLSModelOpt :
SubtargetFeature<"aix-shared-lib-tls-model-opt",
"HasAIXShLibTLSModelOpt", "true",
"Tune TLS model at function level in shared library loaded "
"with the main program (for 64-bit AIX only)">;

def FeaturePredictableSelectIsExpensive :
SubtargetFeature<"predictable-select-expensive",
"PredictableSelectIsExpensive",
Expand Down
15 changes: 14 additions & 1 deletion llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -878,6 +878,15 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSLE;
if (Model == TLSModel::InitialExec)
return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSIE;
// On AIX, TLS model opt may have turned local-dynamic accesses into
// initial-exec accesses.
PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
if (Model == TLSModel::LocalDynamic &&
FuncInfo->isAIXFuncUseTLSIEForLD()) {
LLVM_DEBUG(
dbgs() << "Current function uses IE access for default LD vars.\n");
return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSIE;
}
llvm_unreachable("Only expecting local-exec or initial-exec accesses!");
}
// For GD TLS access on AIX, we have two TOC entries for the symbol (one for
Expand Down Expand Up @@ -2950,7 +2959,11 @@ void PPCAIXAsmPrinter::emitEndOfAsmFile(Module &M) {
// Setup the csect for the current TC entry. If the variant kind is
// VK_PPC_AIX_TLSGDM the entry represents the region handle, we create a
// new symbol to prefix the name with a dot.
if (I.first.second == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGDM) {
// If TLS model opt is turned on, create a new symbol to prefix the name
// with a dot.
if (I.first.second == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGDM ||
(Subtarget->hasAIXShLibTLSModelOpt() &&
I.first.second == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSLD)) {
SmallString<128> Name;
StringRef Prefix = ".";
Name += Prefix;
Expand Down
58 changes: 58 additions & 0 deletions llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,11 @@ static cl::opt<unsigned> PPCGatherAllAliasesMaxDepth(
"ppc-gather-alias-max-depth", cl::init(18), cl::Hidden,
cl::desc("max depth when checking alias info in GatherAllAliases()"));

static cl::opt<unsigned> PPCAIXTLSModelOptUseIEForLDLimit(
"ppc-aix-shared-lib-tls-model-opt-limit", cl::init(1), cl::Hidden,
cl::desc("Set inclusive limit count of TLS local-dynamic access(es) in a "
"function to use initial-exec"));

STATISTIC(NumTailCalls, "Number of tail calls");
STATISTIC(NumSiblingCalls, "Number of sibling calls");
STATISTIC(ShufflesHandledWithVPERM,
Expand Down Expand Up @@ -3362,6 +3367,54 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
return LowerGlobalTLSAddressLinux(Op, DAG);
}

/// updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings,
/// and then apply the update.
static void updateForAIXShLibTLSModelOpt(TLSModel::Model &Model,
SelectionDAG &DAG,
const TargetMachine &TM) {
// Initialize TLS model opt setting lazily:
// (1) Use initial-exec for single TLS var references within current function.
// (2) Use local-dynamic for multiple TLS var references within current
// function.
PPCFunctionInfo *FuncInfo =
DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
if (!FuncInfo->isAIXFuncTLSModelOptInitDone()) {
SmallPtrSet<const GlobalValue *, 8> TLSGV;
// Iterate over all instructions within current function, collect all TLS
// global variables (global variables taken as the first parameter to
// Intrinsic::threadlocal_address).
const Function &Func = DAG.getMachineFunction().getFunction();
for (Function::const_iterator BI = Func.begin(), BE = Func.end(); BI != BE;
++BI)
for (BasicBlock::const_iterator II = BI->begin(), IE = BI->end();
II != IE; ++II)
if (II->getOpcode() == Instruction::Call)
if (const CallInst *CI = dyn_cast<const CallInst>(&*II))
if (Function *CF = CI->getCalledFunction())
if (CF->isDeclaration() &&
CF->getIntrinsicID() == Intrinsic::threadlocal_address)
if (const GlobalValue *GV =
dyn_cast<GlobalValue>(II->getOperand(0))) {
TLSModel::Model GVModel = TM.getTLSModel(GV);
if (GVModel == TLSModel::LocalDynamic)
TLSGV.insert(GV);
}

unsigned TLSGVCnt = TLSGV.size();
LLVM_DEBUG(dbgs() << format("LocalDynamic TLSGV count:%d\n", TLSGVCnt));
if (TLSGVCnt <= PPCAIXTLSModelOptUseIEForLDLimit)
FuncInfo->setAIXFuncUseTLSIEForLD();
FuncInfo->setAIXFuncTLSModelOptInitDone();
}

if (FuncInfo->isAIXFuncUseTLSIEForLD()) {
LLVM_DEBUG(
dbgs() << DAG.getMachineFunction().getName()
<< " function is using the TLS-IE model for TLS-LD access.\n");
Model = TLSModel::InitialExec;
}
}

SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
SelectionDAG &DAG) const {
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
Expand All @@ -3374,6 +3427,11 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
EVT PtrVT = getPointerTy(DAG.getDataLayout());
bool Is64Bit = Subtarget.isPPC64();
TLSModel::Model Model = getTargetMachine().getTLSModel(GV);

// Apply update to the TLS model.
if (Subtarget.hasAIXShLibTLSModelOpt())
updateForAIXShLibTLSModelOpt(Model, DAG, getTargetMachine());

bool IsTLSLocalExecModel = Model == TLSModel::LocalExec;

if (IsTLSLocalExecModel || Model == TLSModel::InitialExec) {
Expand Down
11 changes: 9 additions & 2 deletions llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

#include "MCTargetDesc/PPCMCExpr.h"
#include "PPC.h"
#include "PPCMachineFunctionInfo.h"
#include "PPCSubtarget.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Twine.h"
Expand Down Expand Up @@ -81,6 +82,8 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
}

const TargetMachine &TM = Printer.TM;
const MachineInstr *MI = MO.getParent();
const MachineFunction *MF = MI->getMF();

if (MO.getTargetFlags() == PPCII::MO_PLT)
RefKind = MCSymbolRefExpr::VK_PLT;
Expand All @@ -100,18 +103,22 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
MO.getTargetFlags() == PPCII::MO_TLSLD_FLAG) {
assert(MO.isGlobal() && "Only expecting a global MachineOperand here!");
TLSModel::Model Model = TM.getTLSModel(MO.getGlobal());
const PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
// For the local-[exec|dynamic] TLS model, we may generate the offset from
// the TLS base as an immediate operand (instead of using a TOC entry). Set
// the relocation type in case the result is used for purposes other than a
// TOC reference. In TOC reference cases, this result is discarded.
if (Model == TLSModel::LocalExec)
RefKind = MCSymbolRefExpr::VK_PPC_AIX_TLSLE;
else if (Model == TLSModel::LocalDynamic &&
FuncInfo->isAIXFuncUseTLSIEForLD())
// On AIX, TLS model opt may have turned local-dynamic accesses into
// initial-exec accesses.
RefKind = MCSymbolRefExpr::VK_PPC_AIX_TLSIE;
else if (Model == TLSModel::LocalDynamic)
RefKind = MCSymbolRefExpr::VK_PPC_AIX_TLSLD;
}

const MachineInstr *MI = MO.getParent();
const MachineFunction *MF = MI->getMF();
const Module *M = MF->getFunction().getParent();
const PPCSubtarget *Subtarget = &(MF->getSubtarget<PPCSubtarget>());

Expand Down
12 changes: 12 additions & 0 deletions llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,11 @@ class PPCFunctionInfo : public MachineFunctionInfo {
/// to use SExt/ZExt flags in later optimization.
std::vector<std::pair<Register, ISD::ArgFlagsTy>> LiveInAttrs;

/// Flags for aix-shared-lib-tls-model-opt, will be lazily initialized for
/// each function.
bool AIXFuncUseTLSIEForLD = false;
bool AIXFuncTLSModelOptInitDone = false;

public:
explicit PPCFunctionInfo(const Function &F, const TargetSubtargetInfo *STI);

Expand Down Expand Up @@ -221,6 +226,13 @@ class PPCFunctionInfo : public MachineFunctionInfo {
void setHasFastCall() { HasFastCall = true; }
bool hasFastCall() const { return HasFastCall;}

void setAIXFuncTLSModelOptInitDone() { AIXFuncTLSModelOptInitDone = true; }
bool isAIXFuncTLSModelOptInitDone() const {
return AIXFuncTLSModelOptInitDone;
}
void setAIXFuncUseTLSIEForLD() { AIXFuncUseTLSIEForLD = true; }
bool isAIXFuncUseTLSIEForLD() const { return AIXFuncUseTLSIEForLD; }

int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }

Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/PowerPC/PPCSubtarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,11 @@ void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef TuneCPU,
"-data-sections.\n",
false);
}

if (HasAIXShLibTLSModelOpt && (!TargetTriple.isOSAIX() || !IsPPC64))
report_fatal_error("The aix-shared-lib-tls-model-opt attribute "
"is only supported on AIX in 64-bit mode.\n",
false);
}

bool PPCSubtarget::enableMachineScheduler() const { return true; }
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-altivec -mtriple powerpc64-ibm-aix-xcoff \
; RUN: -mattr=+aix-shared-lib-tls-model-opt --code-model=large < %s | FileCheck %s --check-prefixes=OPT
; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-altivec -mtriple powerpc64-ibm-aix-xcoff \
; RUN: -mattr=+aix-small-local-dynamic-tls --code-model=large < %s | FileCheck %s --check-prefixes=SMALL
; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-altivec -mtriple powerpc64-ibm-aix-xcoff \
; RUN: -mattr=+aix-shared-lib-tls-model-opt -mattr=+aix-small-local-dynamic-tls \
; RUN: --code-model=large < %s | FileCheck %s --check-prefixes=BOTH

@VarTLSLD1 = internal thread_local(localdynamic) global i32 42, align 4

define i32 @Single_LD(i32 %P, i32 %Q) {
; OPT-LABEL: Single_LD:
; OPT: # %bb.0: # %entry
; OPT-NEXT: and 4, 3, 4
; OPT-NEXT: addis 3, L..C0@u(2)
; OPT-NEXT: ld 3, L..C0@l(3)
; OPT-NEXT: cmpwi 4, -1
; OPT-NEXT: lwzx 3, 13, 3
; OPT-NEXT: blr
;
; SMALL-LABEL: Single_LD:
; SMALL: # %bb.0: # %entry
; SMALL-NEXT: mflr 0
; SMALL-NEXT: stdu 1, -48(1)
; SMALL-NEXT: and 6, 3, 4
; SMALL-NEXT: addis 3, L..C0@u(2)
; SMALL-NEXT: std 0, 64(1)
; SMALL-NEXT: ld 3, L..C0@l(3)
; SMALL-NEXT: bla .__tls_get_mod[PR]
; SMALL-NEXT: cmpwi 6, -1
; SMALL-NEXT: lwz 3, VarTLSLD1[TL]@ld(3)
; SMALL-NEXT: addi 1, 1, 48
; SMALL-NEXT: ld 0, 16(1)
; SMALL-NEXT: mtlr 0
; SMALL-NEXT: blr
;
; BOTH-LABEL: Single_LD:
; BOTH: # %bb.0: # %entry
; BOTH-NEXT: and 4, 3, 4
; BOTH-NEXT: addis 3, L..C0@u(2)
; BOTH-NEXT: ld 3, L..C0@l(3)
; BOTH-NEXT: cmpwi 4, -1
; BOTH-NEXT: lwzx 3, 13, 3
; BOTH-NEXT: blr
entry:
%a = icmp slt i32 %P, 0
%b = icmp slt i32 %Q, 0
%c = and i1 %a, %b
%tls1 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @VarTLSLD1)
%load1 = load i32, ptr %tls1, align 4
br i1 %c, label %bb1, label %return

bb1:
%tls2 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @VarTLSLD1)
%load2 = load i32, ptr %tls2, align 4
ret i32 %load2

return:
ret i32 %load1
}

; OPT-LABEL: .toc
; OPT-LABEL: L..C0:
; OPT-NEXT: .tc VarTLSLD1[TE],VarTLSLD1[TL]@ie

; SMALL-LABEL: .toc
; SMALL-LABEL: L..C0:
; SMALL-NEXT: .tc _Renamed..5f24__TLSML[TC],_Renamed..5f24__TLSML[TC]@ml
; SMALL-NEXT: .rename _Renamed..5f24__TLSML[TC],"_$TLSML"

; BOTH-LABEL: .toc
; BOTH-LABEL: L..C0:
; BOTH-NEXT: .tc VarTLSLD1[TE],VarTLSLD1[TL]@ie
Loading

0 comments on commit ea126ae

Please sign in to comment.