Skip to content

Commit

Permalink
Merged master:6f0ce46873b into amd-gfx:5615b1ef183
Browse files Browse the repository at this point in the history
Local branch amd-gfx 5615b1e Merged master:c966ed8dc7c into amd-gfx:7e2b6a2d3b9
Remote branch master 6f0ce46 Revert "[mlir][spirv] Enhance structure type member decoration handling"
  • Loading branch information
Sw authored and Sw committed Jun 12, 2020
2 parents 5615b1e + 6f0ce46 commit f982686
Show file tree
Hide file tree
Showing 13 changed files with 217 additions and 212 deletions.
14 changes: 11 additions & 3 deletions llvm/lib/Analysis/StackSafetyAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,13 @@ bool StackSafetyLocalAnalysis::analyzeAllUses(Value *Ptr,
return false;
}

unsigned ArgNo = CB.getArgOperandNo(&UI);
if (CB.isByValArgument(ArgNo)) {
US.updateRange(getAccessRange(
UI, Ptr, DL.getTypeStoreSize(CB.getParamByValType(ArgNo))));
break;
}

// FIXME: consult devirt?
// Do not follow aliases, otherwise we could inadvertently follow
// dso_preemptable aliases or aliases with interposable linkage.
Expand All @@ -352,8 +359,7 @@ bool StackSafetyLocalAnalysis::analyzeAllUses(Value *Ptr,
}

assert(isa<Function>(Callee) || isa<GlobalAlias>(Callee));
US.Calls.emplace_back(Callee, CB.getArgOperandNo(&UI),
offsetFrom(UI, Ptr));
US.Calls.emplace_back(Callee, ArgNo, offsetFrom(UI, Ptr));
break;
}

Expand Down Expand Up @@ -382,7 +388,9 @@ FunctionInfo<GlobalValue> StackSafetyLocalAnalysis::run() {
}

for (Argument &A : make_range(F.arg_begin(), F.arg_end())) {
if (A.getType()->isPointerTy()) {
// Non pointers and bypass arguments are not going to be used in any global
// processing.
if (A.getType()->isPointerTy() && !A.hasByValAttr()) {
auto &UI = Info.Params.emplace(A.getArgNo(), PointerSize).first->second;
analyzeAllUses(&A, UI);
}
Expand Down
76 changes: 46 additions & 30 deletions llvm/lib/Support/Host.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -585,6 +585,16 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
unsigned Brand_id, unsigned Features,
unsigned Features2, unsigned Features3,
unsigned *Type, unsigned *Subtype) {
auto testFeature = [&](unsigned F) {
if (F < 32)
return (Features & (1U << (F & 0x1f))) != 0;
if (F < 64)
return (Features2 & (1U << ((F - 32) & 0x1f))) != 0;
if (F < 96)
return (Features3 & (1U << ((F - 64) & 0x1f))) != 0;
llvm_unreachable("Unexpected FeatureBit");
};

if (Brand_id != 0)
return;
switch (Family) {
Expand All @@ -595,7 +605,7 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
*Type = X86::INTEL_i486;
break;
case 5:
if (Features & (1 << X86::FEATURE_MMX)) {
if (testFeature(X86::FEATURE_MMX)) {
*Type = X86::INTEL_PENTIUM_MMX;
break;
}
Expand Down Expand Up @@ -711,9 +721,9 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
// Skylake Xeon:
case 0x55:
*Type = X86::INTEL_COREI7;
if (Features2 & (1 << (X86::FEATURE_AVX512BF16 - 32)))
if (testFeature(X86::FEATURE_AVX512BF16))
*Subtype = X86::INTEL_COREI7_COOPERLAKE; // "cooperlake"
else if (Features2 & (1 << (X86::FEATURE_AVX512VNNI - 32)))
else if (testFeature(X86::FEATURE_AVX512VNNI))
*Subtype = X86::INTEL_COREI7_CASCADELAKE; // "cascadelake"
else
*Subtype = X86::INTEL_COREI7_SKYLAKE_AVX512; // "skylake-avx512"
Expand Down Expand Up @@ -777,113 +787,113 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
break;

default: // Unknown family 6 CPU, try to guess.
// TODO detect tigerlake host
if (Features2 & (1 << (X86::FEATURE_AVX512VP2INTERSECT - 32))) {
// TODO detect tigerlake host from model
if (testFeature(X86::FEATURE_AVX512VP2INTERSECT)) {
*Type = X86::INTEL_COREI7;
*Subtype = X86::INTEL_COREI7_TIGERLAKE;
break;
}

if (Features & (1 << X86::FEATURE_AVX512VBMI2)) {
if (testFeature(X86::FEATURE_AVX512VBMI2)) {
*Type = X86::INTEL_COREI7;
*Subtype = X86::INTEL_COREI7_ICELAKE_CLIENT;
break;
}

if (Features & (1 << X86::FEATURE_AVX512VBMI)) {
if (testFeature(X86::FEATURE_AVX512VBMI)) {
*Type = X86::INTEL_COREI7;
*Subtype = X86::INTEL_COREI7_CANNONLAKE;
break;
}

if (Features2 & (1 << (X86::FEATURE_AVX512BF16 - 32))) {
if (testFeature(X86::FEATURE_AVX512BF16)) {
*Type = X86::INTEL_COREI7;
*Subtype = X86::INTEL_COREI7_COOPERLAKE;
break;
}

if (Features2 & (1 << (X86::FEATURE_AVX512VNNI - 32))) {
if (testFeature(X86::FEATURE_AVX512VNNI)) {
*Type = X86::INTEL_COREI7;
*Subtype = X86::INTEL_COREI7_CASCADELAKE;
break;
}

if (Features & (1 << X86::FEATURE_AVX512VL)) {
if (testFeature(X86::FEATURE_AVX512VL)) {
*Type = X86::INTEL_COREI7;
*Subtype = X86::INTEL_COREI7_SKYLAKE_AVX512;
break;
}

if (Features & (1 << X86::FEATURE_AVX512ER)) {
if (testFeature(X86::FEATURE_AVX512ER)) {
*Type = X86::INTEL_KNL; // knl
break;
}

if (Features3 & (1 << (X86::FEATURE_CLFLUSHOPT - 64))) {
if (Features3 & (1 << (X86::FEATURE_SHA - 64))) {
if (testFeature(X86::FEATURE_CLFLUSHOPT)) {
if (testFeature(X86::FEATURE_SHA)) {
*Type = X86::INTEL_GOLDMONT;
} else {
*Type = X86::INTEL_COREI7;
*Subtype = X86::INTEL_COREI7_SKYLAKE;
}
break;
}
if (Features3 & (1 << (X86::FEATURE_ADX - 64))) {
if (testFeature(X86::FEATURE_ADX)) {
*Type = X86::INTEL_COREI7;
*Subtype = X86::INTEL_COREI7_BROADWELL;
break;
}
if (Features & (1 << X86::FEATURE_AVX2)) {
if (testFeature(X86::FEATURE_AVX2)) {
*Type = X86::INTEL_COREI7;
*Subtype = X86::INTEL_COREI7_HASWELL;
break;
}
if (Features & (1 << X86::FEATURE_AVX)) {
if (testFeature(X86::FEATURE_AVX)) {
*Type = X86::INTEL_COREI7;
*Subtype = X86::INTEL_COREI7_SANDYBRIDGE;
break;
}
if (Features & (1 << X86::FEATURE_SSE4_2)) {
if (Features3 & (1 << (X86::FEATURE_MOVBE - 64))) {
if (testFeature(X86::FEATURE_SSE4_2)) {
if (testFeature(X86::FEATURE_MOVBE)) {
*Type = X86::INTEL_SILVERMONT;
} else {
*Type = X86::INTEL_COREI7;
*Subtype = X86::INTEL_COREI7_NEHALEM;
}
break;
}
if (Features & (1 << X86::FEATURE_SSE4_1)) {
if (testFeature(X86::FEATURE_SSE4_1)) {
*Type = X86::INTEL_CORE2; // "penryn"
*Subtype = X86::INTEL_CORE2_45;
break;
}
if (Features & (1 << X86::FEATURE_SSSE3)) {
if (Features3 & (1 << (X86::FEATURE_MOVBE - 64))) {
if (testFeature(X86::FEATURE_SSSE3)) {
if (testFeature(X86::FEATURE_MOVBE)) {
*Type = X86::INTEL_BONNELL; // "bonnell"
} else {
*Type = X86::INTEL_CORE2; // "core2"
*Subtype = X86::INTEL_CORE2_65;
}
break;
}
if (Features3 & (1 << (X86::FEATURE_EM64T - 64))) {
if (testFeature(X86::FEATURE_EM64T)) {
*Type = X86::INTEL_CORE2; // "core2"
*Subtype = X86::INTEL_CORE2_65;
break;
}
if (Features & (1 << X86::FEATURE_SSE3)) {
if (testFeature(X86::FEATURE_SSE3)) {
*Type = X86::INTEL_CORE_DUO;
break;
}
if (Features & (1 << X86::FEATURE_SSE2)) {
if (testFeature(X86::FEATURE_SSE2)) {
*Type = X86::INTEL_PENTIUM_M;
break;
}
if (Features & (1 << X86::FEATURE_SSE)) {
if (testFeature(X86::FEATURE_SSE)) {
*Type = X86::INTEL_PENTIUM_III;
break;
}
if (Features & (1 << X86::FEATURE_MMX)) {
if (testFeature(X86::FEATURE_MMX)) {
*Type = X86::INTEL_PENTIUM_II;
break;
}
Expand All @@ -892,11 +902,11 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
}
break;
case 15: {
if (Features3 & (1 << (X86::FEATURE_EM64T - 64))) {
if (testFeature(X86::FEATURE_EM64T)) {
*Type = X86::INTEL_NOCONA;
break;
}
if (Features & (1 << X86::FEATURE_SSE3)) {
if (testFeature(X86::FEATURE_SSE3)) {
*Type = X86::INTEL_PRESCOTT;
break;
}
Expand All @@ -911,6 +921,12 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
unsigned Features, unsigned *Type,
unsigned *Subtype) {
auto testFeature = [&](unsigned F) {
if (F < 32)
return (Features & (1U << (F & 0x1f))) != 0;
llvm_unreachable("Unexpected FeatureBit");
};

// FIXME: this poorly matches the generated SubtargetFeatureKV table. There
// appears to be no way to generate the wide variety of AMD-specific targets
// from the information returned from CPUID.
Expand Down Expand Up @@ -938,14 +954,14 @@ static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
}
break;
case 6:
if (Features & (1 << X86::FEATURE_SSE)) {
if (testFeature(X86::FEATURE_SSE)) {
*Type = X86::AMD_ATHLON_XP;
break; // "athlon-xp"
}
*Type = X86::AMD_ATHLON;
break; // "athlon"
case 15:
if (Features & (1 << X86::FEATURE_SSE3)) {
if (testFeature(X86::FEATURE_SSE3)) {
*Type = X86::AMD_K8SSE3;
break; // "k8-sse3"
}
Expand Down
16 changes: 11 additions & 5 deletions llvm/lib/Transforms/Scalar/JumpThreading.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -214,11 +214,16 @@ static void updatePredecessorProfileMetadata(PHINode *PN, BasicBlock *BB) {
if (!CondBr)
return;

BranchProbability BP;
uint64_t TrueWeight, FalseWeight;
if (!CondBr->extractProfMetadata(TrueWeight, FalseWeight))
return;

if (TrueWeight + FalseWeight == 0)
// Zero branch_weights do not give a hint for getting branch probabilities.
// Technically it would result in division by zero denominator, which is
// TrueWeight + FalseWeight.
return;

// Returns the outgoing edge of the dominating predecessor block
// that leads to the PhiNode's incoming block:
auto GetPredOutEdge =
Expand Down Expand Up @@ -253,10 +258,11 @@ static void updatePredecessorProfileMetadata(PHINode *PN, BasicBlock *BB) {
if (!CI || !CI->getType()->isIntegerTy(1))
continue;

BP = (CI->isOne() ? BranchProbability::getBranchProbability(
TrueWeight, TrueWeight + FalseWeight)
: BranchProbability::getBranchProbability(
FalseWeight, TrueWeight + FalseWeight));
BranchProbability BP =
(CI->isOne() ? BranchProbability::getBranchProbability(
TrueWeight, TrueWeight + FalseWeight)
: BranchProbability::getBranchProbability(
FalseWeight, TrueWeight + FalseWeight));

auto PredOutEdge = GetPredOutEdge(PN->getIncomingBlock(i), BB);
if (!PredOutEdge.first)
Expand Down
44 changes: 44 additions & 0 deletions llvm/test/Analysis/StackSafetyAnalysis/local.ll
Original file line number Diff line number Diff line change
Expand Up @@ -416,3 +416,47 @@ entry:
call void @LeakAddress() ["unknown"(i32* %a)]
ret void
}

define void @ByVal(i16* byval %p) {
; CHECK-LABEL: @ByVal dso_preemptable{{$}}
; CHECK-NEXT: args uses:
; CHECK-NEXT: allocas uses:
; CHECK-NOT: ]:
entry:
ret void
}

define void @TestByVal() {
; CHECK-LABEL: @TestByVal dso_preemptable{{$}}
; CHECK-NEXT: args uses:
; CHECK-NEXT: allocas uses:
; CHECK-NEXT: x[2]: [0,2)
; CHECK-NEXT: y[8]: [0,2)
; CHECK-NOT: ]:
entry:
%x = alloca i16, align 4
call void @ByVal(i16* byval %x)

%y = alloca i64, align 4
%y1 = bitcast i64* %y to i16*
call void @ByVal(i16* byval %y1)

ret void
}

declare void @ByValArray([100000 x i64]* byval %p)

define void @TestByValArray() {
; CHECK-LABEL: @TestByValArray dso_preemptable{{$}}
; CHECK-NEXT: args uses:
; CHECK-NEXT: allocas uses:
; CHECK-NEXT: z[800000]: [500000,1300000)
; CHECK-NOT: ]:
entry:
%z = alloca [100000 x i64], align 4
%z1 = bitcast [100000 x i64]* %z to i8*
%z2 = getelementptr i8, i8* %z1, i64 500000
%z3 = bitcast i8* %z2 to [100000 x i64]*
call void @ByValArray([100000 x i64]* byval %z3)
ret void
}
Loading

0 comments on commit f982686

Please sign in to comment.