diff --git a/clang/lib/Tooling/CompilationDatabase.cpp b/clang/lib/Tooling/CompilationDatabase.cpp index 2b4c26dab96f7f..79bb8c0ce09ab1 100644 --- a/clang/lib/Tooling/CompilationDatabase.cpp +++ b/clang/lib/Tooling/CompilationDatabase.cpp @@ -199,22 +199,6 @@ class UnusedInputDiagConsumer : public DiagnosticConsumer { SmallVector UnusedInputs; }; -// Unary functor for asking "Given a StringRef S1, does there exist a string -// S2 in Arr where S1 == S2?" -struct MatchesAny { - MatchesAny(ArrayRef Arr) : Arr(Arr) {} - - bool operator() (StringRef S) { - for (const std::string *I = Arr.begin(), *E = Arr.end(); I != E; ++I) - if (*I == S) - return true; - return false; - } - -private: - ArrayRef Arr; -}; - // Filter of tools unused flags such as -no-integrated-as and -Wa,*. // They are not used for syntax checking, and could confuse targets // which don't support these options. @@ -292,8 +276,7 @@ static bool stripPositionalArgs(std::vector Args, // up with no jobs but then this is the user's fault. Args.push_back("placeholder.cpp"); - Args.erase(std::remove_if(Args.begin(), Args.end(), FilterUnusedFlags()), - Args.end()); + llvm::erase_if(Args, FilterUnusedFlags()); const std::unique_ptr Compilation( NewDriver->BuildCompilation(Args)); @@ -320,15 +303,14 @@ static bool stripPositionalArgs(std::vector Args, return false; } - // Remove all compilation input files from the command line. This is - // necessary so that getCompileCommands() can construct a command line for - // each file. - std::vector::iterator End = std::remove_if( - Args.begin(), Args.end(), MatchesAny(CompileAnalyzer.Inputs)); - - // Remove all inputs deemed unused for compilation. - End = std::remove_if(Args.begin(), End, MatchesAny(DiagClient.UnusedInputs)); - + // Remove all compilation input files from the command line and inputs deemed + // unused for compilation. This is necessary so that getCompileCommands() can + // construct a command line for each file. + std::vector::iterator End = + llvm::remove_if(Args, [&](StringRef S) { + return llvm::is_contained(CompileAnalyzer.Inputs, S) || + llvm::is_contained(DiagClient.UnusedInputs, S); + }); // Remove the -c add above as well. It will be at the end right now. assert(strcmp(*(End - 1), "-c") == 0); --End; diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 663c9460cfbad7..cd9b85279c1901 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1307,8 +1307,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { VecOpTy = dyn_cast(Tys[VecTyIndex]); } + // Library call cost - other than size, make it expensive. + unsigned SingleCallCost = CostKind == TTI::TCK_CodeSize ? 1 : 10; SmallVector ISDs; - unsigned SingleCallCost = 10; // Library call cost. Make it expensive. switch (IID) { default: { // Assume that we need to scalarize this intrinsic. diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp index 27064d2da5dab8..dec3f072262d51 100644 --- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -791,8 +791,11 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape, case coro::ABI::Async: { Shape.AsyncLowering.FrameOffset = alignTo(Shape.AsyncLowering.ContextHeaderSize, Shape.FrameAlign); + // Also make the final context size a multiple of the context alignment to + // make allocation easier for allocators. Shape.AsyncLowering.ContextSize = - Shape.AsyncLowering.FrameOffset + Shape.FrameSize; + alignTo(Shape.AsyncLowering.FrameOffset + Shape.FrameSize, + Shape.AsyncLowering.getContextAlignment()); if (Shape.AsyncLowering.getContextAlignment() < Shape.FrameAlign) { report_fatal_error( "The alignment requirment of frame variables cannot be higher than " diff --git a/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll b/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll index c7b9339f1aa77e..66bb10e8474f88 100644 --- a/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll +++ b/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll @@ -16,6 +16,9 @@ declare <16 x i32> @llvm.smax.v16i32(<16 x i32>, <16 x i32>) declare float @llvm.fmuladd.f32(float, float, float) declare <16 x float> @llvm.fmuladd.v16f32(<16 x float>, <16 x float>, <16 x float>) +declare float @llvm.log2.f32(float) +declare <16 x float> @llvm.log2.v16f32(<16 x float>) + declare i32 @llvm.cttz.i32(i32, i1) declare <16 x i32> @llvm.cttz.v16i32(<16 x i32>, i1) @@ -83,6 +86,32 @@ define void @fmuladd(float %a, float %b, float %c, <16 x float> %va, <16 x float ret void } +define void @log2(float %a, <16 x float> %va) { +; THRU-LABEL: 'log2' +; THRU-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %s = call float @llvm.log2.f32(float %a) +; THRU-NEXT: Cost Model: Found an estimated cost of 672 for instruction: %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va) +; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; LATE-LABEL: 'log2' +; LATE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %s = call float @llvm.log2.f32(float %a) +; LATE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va) +; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SIZE-LABEL: 'log2' +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.log2.f32(float %a) +; SIZE-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SIZE_LATE-LABEL: 'log2' +; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %s = call float @llvm.log2.f32(float %a) +; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 672 for instruction: %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va) +; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %s = call float @llvm.log2.f32(float %a) + %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va) + ret void +} + define void @cttz(i32 %a, <16 x i32> %va) { ; THRU-LABEL: 'cttz' ; THRU-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false) diff --git a/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll b/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll index f26329caa7d260..9cd8614764dcce 100644 --- a/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll +++ b/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll @@ -19,6 +19,9 @@ declare <16 x i32> @llvm.smax.v16i32(<16 x i32>, <16 x i32>) declare float @llvm.fmuladd.f32(float, float, float) declare <16 x float> @llvm.fmuladd.v16f32(<16 x float>, <16 x float>, <16 x float>) +declare float @llvm.log2.f32(float) +declare <16 x float> @llvm.log2.v16f32(<16 x float>) + declare i32 @llvm.cttz.i32(i32, i1) declare <16 x i32> @llvm.cttz.v16i32(<16 x i32>, i1) @@ -114,6 +117,32 @@ define void @fmuladd(float %a, float %b, float %c, <16 x float> %va, <16 x float ret void } +define void @log2(float %a, <16 x float> %va) { +; THRU-LABEL: 'log2' +; THRU-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %s = call float @llvm.log2.f32(float %a) +; THRU-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va) +; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; LATE-LABEL: 'log2' +; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %s = call float @llvm.log2.f32(float %a) +; LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va) +; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SIZE-LABEL: 'log2' +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.log2.f32(float %a) +; SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va) +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SIZE_LATE-LABEL: 'log2' +; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %s = call float @llvm.log2.f32(float %a) +; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va) +; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %s = call float @llvm.log2.f32(float %a) + %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va) + ret void +} + define void @cttz(i32 %a, <16 x i32> %va) { ; THRU-LABEL: 'cttz' ; THRU-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false) diff --git a/llvm/test/Transforms/Coroutines/coro-async.ll b/llvm/test/Transforms/Coroutines/coro-async.ll index 35b23bb33a6e90..0faff4aed2e0ff 100644 --- a/llvm/test/Transforms/Coroutines/coro-async.ll +++ b/llvm/test/Transforms/Coroutines/coro-async.ll @@ -99,8 +99,8 @@ entry: } ; Make sure we update the async function pointer -; CHECK: @my_async_function_fp = constant <{ i32, i32 }> <{ {{.*}}, i32 168 } -; CHECK: @my_async_function2_fp = constant <{ i32, i32 }> <{ {{.*}}, i32 168 } +; CHECK: @my_async_function_fp = constant <{ i32, i32 }> <{ {{.*}}, i32 176 } +; CHECK: @my_async_function2_fp = constant <{ i32, i32 }> <{ {{.*}}, i32 176 } ; CHECK-LABEL: define swiftcc void @my_async_function(i8* %async.ctxt, %async.task* %task, %async.actor* %actor) { ; CHECK: entry: diff --git a/llvm/test/Transforms/LoopUnroll/AArch64/thresholdO3-cost-model.ll b/llvm/test/Transforms/LoopUnroll/AArch64/thresholdO3-cost-model.ll index 33ca2771d2865c..668bc69e5a0c52 100644 --- a/llvm/test/Transforms/LoopUnroll/AArch64/thresholdO3-cost-model.ll +++ b/llvm/test/Transforms/LoopUnroll/AArch64/thresholdO3-cost-model.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -loop-unroll -unroll-threshold=300 -S %s | FileCheck %s -; This test was full unrolled and simplified at -O3 with clang 11. +; This test was fully unrolled and simplified at -O3 with clang 11. ; Changes to the cost model may cause that decision to differ. ; We would not necessarily view the difference as a regression, ; but we should be aware that cost model changes can affect an @@ -20,45 +20,30 @@ define i32 @tripcount_11() { ; CHECK-NEXT: do.body6.preheader: ; CHECK-NEXT: br label [[DO_BODY6:%.*]] ; CHECK: for.cond: -; CHECK-NEXT: [[CMP5_NOT:%.*]] = icmp eq i32 [[DIV20:%.*]], 0 -; CHECK-NEXT: br i1 [[CMP5_NOT]], label [[FOR_END:%.*]], label [[DO_BODY6]] +; CHECK-NEXT: br i1 true, label [[FOR_COND_1:%.*]], label [[IF_THEN11:%.*]] ; CHECK: do.body6: -; CHECK-NEXT: [[I_021:%.*]] = phi i32 [ [[DIV20]], [[FOR_COND:%.*]] ], [ 1024, [[DO_BODY6_PREHEADER:%.*]] ] -; CHECK-NEXT: [[OR_I:%.*]] = or i32 [[I_021]], 1 -; CHECK-NEXT: [[T0:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[OR_I]], i1 true) -; CHECK-NEXT: [[SHL_I:%.*]] = shl i32 [[I_021]], [[T0]] -; CHECK-NEXT: [[AND_I:%.*]] = lshr i32 [[SHL_I]], 26 -; CHECK-NEXT: [[T1:%.*]] = trunc i32 [[AND_I]] to i8 -; CHECK-NEXT: [[CONV3_I:%.*]] = and i8 [[T1]], 31 -; CHECK-NEXT: [[AND4_I:%.*]] = lshr i32 [[SHL_I]], 11 -; CHECK-NEXT: [[CONV6_I:%.*]] = and i32 [[AND4_I]], 32767 -; CHECK-NEXT: [[IDXPROM_I:%.*]] = zext i8 [[CONV3_I]] to i64 -; CHECK-NEXT: [[ARRAYIDX_I7:%.*]] = getelementptr inbounds [33 x i16], [33 x i16]* @tab_log2, i64 0, i64 [[IDXPROM_I]] -; CHECK-NEXT: [[T2:%.*]] = load i16, i16* [[ARRAYIDX_I7]], align 2 -; CHECK-NEXT: [[CONV7_I:%.*]] = zext i16 [[T2]] to i32 -; CHECK-NEXT: [[NARROW_I:%.*]] = add nuw nsw i8 [[CONV3_I]], 1 -; CHECK-NEXT: [[T3:%.*]] = zext i8 [[NARROW_I]] to i64 -; CHECK-NEXT: [[ARRAYIDX11_I:%.*]] = getelementptr inbounds [33 x i16], [33 x i16]* @tab_log2, i64 0, i64 [[T3]] -; CHECK-NEXT: [[T4:%.*]] = load i16, i16* [[ARRAYIDX11_I]], align 2 -; CHECK-NEXT: [[CONV12_I:%.*]] = zext i16 [[T4]] to i32 -; CHECK-NEXT: [[SUB16_I:%.*]] = sub nsw i32 [[CONV12_I]], [[CONV7_I]] -; CHECK-NEXT: [[MUL_I8:%.*]] = mul nsw i32 [[CONV6_I]], [[SUB16_I]] -; CHECK-NEXT: [[SHR17_I:%.*]] = ashr i32 [[MUL_I8]], 15 -; CHECK-NEXT: [[CONV_I:%.*]] = shl nuw nsw i32 [[T0]], 15 -; CHECK-NEXT: [[SHL20_I:%.*]] = xor i32 [[CONV_I]], 1015808 -; CHECK-NEXT: [[ADD18_I:%.*]] = add nuw nsw i32 [[SHL20_I]], [[CONV7_I]] -; CHECK-NEXT: [[ADD21_I:%.*]] = add nsw i32 [[ADD18_I]], [[SHR17_I]] -; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[I_021]] to double -; CHECK-NEXT: [[T5:%.*]] = tail call double @llvm.log2.f64(double [[CONV]]) -; CHECK-NEXT: [[CONV8:%.*]] = fptosi double [[T5]] to i32 -; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[CONV8]], 15 -; CHECK-NEXT: [[ADD:%.*]] = or i32 [[MUL]], 4 -; CHECK-NEXT: [[CMP9:%.*]] = icmp eq i32 [[ADD21_I]], [[ADD]] -; CHECK-NEXT: [[DIV20]] = lshr i32 [[I_021]], 1 -; CHECK-NEXT: br i1 [[CMP9]], label [[FOR_COND]], label [[IF_THEN11:%.*]] +; CHECK-NEXT: br i1 true, label [[FOR_COND:%.*]], label [[IF_THEN11]] ; CHECK: if.then11: ; CHECK-NEXT: unreachable -; CHECK: for.end: +; CHECK: for.cond.1: +; CHECK-NEXT: br i1 true, label [[FOR_COND_2:%.*]], label [[IF_THEN11]] +; CHECK: for.cond.2: +; CHECK-NEXT: br i1 true, label [[FOR_COND_3:%.*]], label [[IF_THEN11]] +; CHECK: for.cond.3: +; CHECK-NEXT: br i1 true, label [[FOR_COND_4:%.*]], label [[IF_THEN11]] +; CHECK: for.cond.4: +; CHECK-NEXT: br i1 true, label [[FOR_COND_5:%.*]], label [[IF_THEN11]] +; CHECK: for.cond.5: +; CHECK-NEXT: br i1 true, label [[FOR_COND_6:%.*]], label [[IF_THEN11]] +; CHECK: for.cond.6: +; CHECK-NEXT: br i1 true, label [[FOR_COND_7:%.*]], label [[IF_THEN11]] +; CHECK: for.cond.7: +; CHECK-NEXT: br i1 true, label [[FOR_COND_8:%.*]], label [[IF_THEN11]] +; CHECK: for.cond.8: +; CHECK-NEXT: br i1 true, label [[FOR_COND_9:%.*]], label [[IF_THEN11]] +; CHECK: for.cond.9: +; CHECK-NEXT: br i1 true, label [[FOR_COND_10:%.*]], label [[IF_THEN11]] +; CHECK: for.cond.10: ; CHECK-NEXT: ret i32 0 ; do.body6.preheader: