From 7e57dc7bcf60eab24d89497d5ac4077c2603d120 Mon Sep 17 00:00:00 2001 From: William Moses Date: Fri, 10 Feb 2023 15:02:21 -0500 Subject: [PATCH] Fix final gc lowering on dynamically sized allocation (#48620) --- src/llvm-final-gc-lowering.cpp | 49 +++++++++++++++++++------------ src/llvm-pass-helpers.cpp | 19 ++++++++++++ src/llvm-pass-helpers.h | 3 ++ test/llvmpasses/final-lower-gc.ll | 15 ++++++++++ 4 files changed, 67 insertions(+), 19 deletions(-) diff --git a/src/llvm-final-gc-lowering.cpp b/src/llvm-final-gc-lowering.cpp index 3b8533c6d0115..e4e8ff69ee2da 100644 --- a/src/llvm-final-gc-lowering.cpp +++ b/src/llvm-final-gc-lowering.cpp @@ -48,6 +48,7 @@ struct FinalLowerGC: private JuliaPassContext { Function *queueRootFunc; Function *poolAllocFunc; Function *bigAllocFunc; + Function *allocTypedFunc; Instruction *pgcstack; // Lowers a `julia.new_gc_frame` intrinsic. @@ -208,26 +209,35 @@ Value *FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F) { ++GCAllocBytesCount; assert(target->arg_size() == 2); - auto sz = (size_t)cast(target->getArgOperand(1))->getZExtValue(); - // This is strongly architecture and OS dependent - int osize; - int offset = jl_gc_classify_pools(sz, &osize); + CallInst *newI; + IRBuilder<> builder(target); builder.SetCurrentDebugLocation(target->getDebugLoc()); auto ptls = target->getArgOperand(0); - CallInst *newI; Attribute derefAttr; - if (offset < 0) { - newI = builder.CreateCall( - bigAllocFunc, - { ptls, ConstantInt::get(getSizeTy(F.getContext()), sz + sizeof(void*)) }); - derefAttr = Attribute::getWithDereferenceableBytes(F.getContext(), sz + sizeof(void*)); - } - else { - auto pool_offs = ConstantInt::get(Type::getInt32Ty(F.getContext()), offset); - auto pool_osize = ConstantInt::get(Type::getInt32Ty(F.getContext()), osize); - newI = builder.CreateCall(poolAllocFunc, { ptls, pool_offs, pool_osize }); - derefAttr = Attribute::getWithDereferenceableBytes(F.getContext(), osize); + + if (auto CI = dyn_cast(target->getArgOperand(1))) { + size_t sz = (size_t)CI->getZExtValue(); + // This is strongly architecture and OS dependent + int osize; + int offset = jl_gc_classify_pools(sz, &osize); + if (offset < 0) { + newI = builder.CreateCall( + bigAllocFunc, + { ptls, ConstantInt::get(getSizeTy(F.getContext()), sz + sizeof(void*)) }); + derefAttr = Attribute::getWithDereferenceableBytes(F.getContext(), sz + sizeof(void*)); + } + else { + auto pool_offs = ConstantInt::get(Type::getInt32Ty(F.getContext()), offset); + auto pool_osize = ConstantInt::get(Type::getInt32Ty(F.getContext()), osize); + newI = builder.CreateCall(poolAllocFunc, { ptls, pool_offs, pool_osize }); + derefAttr = Attribute::getWithDereferenceableBytes(F.getContext(), osize); + } + } else { + auto size = builder.CreateZExtOrTrunc(target->getArgOperand(1), getSizeTy(F.getContext())); + size = builder.CreateAdd(size, ConstantInt::get(getSizeTy(F.getContext()), sizeof(void*))); + newI = builder.CreateCall(allocTypedFunc, { ptls, size, ConstantPointerNull::get(Type::getInt8PtrTy(F.getContext())) }); + derefAttr = Attribute::getWithDereferenceableBytes(F.getContext(), sizeof(void*)); } newI->setAttributes(newI->getCalledFunction()->getAttributes()); newI->addRetAttr(derefAttr); @@ -243,8 +253,9 @@ bool FinalLowerGC::doInitialization(Module &M) { queueRootFunc = getOrDeclare(jl_well_known::GCQueueRoot); poolAllocFunc = getOrDeclare(jl_well_known::GCPoolAlloc); bigAllocFunc = getOrDeclare(jl_well_known::GCBigAlloc); + allocTypedFunc = getOrDeclare(jl_well_known::GCAllocTyped); - GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc}; + GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc, allocTypedFunc}; unsigned j = 0; for (unsigned i = 0; i < sizeof(functionList) / sizeof(void*); i++) { if (!functionList[i]) @@ -260,8 +271,8 @@ bool FinalLowerGC::doInitialization(Module &M) { bool FinalLowerGC::doFinalization(Module &M) { - GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc}; - queueRootFunc = poolAllocFunc = bigAllocFunc = nullptr; + GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc, allocTypedFunc}; + queueRootFunc = poolAllocFunc = bigAllocFunc = allocTypedFunc = nullptr; auto used = M.getGlobalVariable("llvm.compiler.used"); if (!used) return false; diff --git a/src/llvm-pass-helpers.cpp b/src/llvm-pass-helpers.cpp index ea390f01010fd..e69a7d32bda9b 100644 --- a/src/llvm-pass-helpers.cpp +++ b/src/llvm-pass-helpers.cpp @@ -229,6 +229,7 @@ namespace jl_well_known { static const char *GC_BIG_ALLOC_NAME = XSTR(jl_gc_big_alloc); static const char *GC_POOL_ALLOC_NAME = XSTR(jl_gc_pool_alloc); static const char *GC_QUEUE_ROOT_NAME = XSTR(jl_gc_queue_root); + static const char *GC_ALLOC_TYPED_NAME = XSTR(jl_gc_alloc_typed); using jl_intrinsics::addGCAllocAttributes; @@ -276,4 +277,22 @@ namespace jl_well_known { func->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly); return func; }); + + const WellKnownFunctionDescription GCAllocTyped( + GC_ALLOC_TYPED_NAME, + [](const JuliaPassContext &context) { + auto allocTypedFunc = Function::Create( + FunctionType::get( + context.T_prjlvalue, + { Type::getInt8PtrTy(context.getLLVMContext()), + sizeof(size_t) == sizeof(uint32_t) ? + Type::getInt32Ty(context.getLLVMContext()) : + Type::getInt64Ty(context.getLLVMContext()), + Type::getInt8PtrTy(context.getLLVMContext()) }, + false), + Function::ExternalLinkage, + GC_ALLOC_TYPED_NAME); + allocTypedFunc->addFnAttr(Attribute::getWithAllocSizeArgs(context.getLLVMContext(), 1, None)); + return addGCAllocAttributes(allocTypedFunc, context.getLLVMContext()); + }); } diff --git a/src/llvm-pass-helpers.h b/src/llvm-pass-helpers.h index 2b2bd50cd0e4d..3388e6d485181 100644 --- a/src/llvm-pass-helpers.h +++ b/src/llvm-pass-helpers.h @@ -149,6 +149,9 @@ namespace jl_well_known { // `jl_gc_queue_root`: queues a GC root. extern const WellKnownFunctionDescription GCQueueRoot; + + // `jl_gc_alloc_typed`: allocates bytes. + extern const WellKnownFunctionDescription GCAllocTyped; } #endif diff --git a/test/llvmpasses/final-lower-gc.ll b/test/llvmpasses/final-lower-gc.ll index 95e88f9feac9e..7fc2d1b04ca2d 100644 --- a/test/llvmpasses/final-lower-gc.ll +++ b/test/llvmpasses/final-lower-gc.ll @@ -67,6 +67,21 @@ top: ret {} addrspace(10)* %v } +define {} addrspace(10)* @gc_alloc_lowering_var(i64 %size) { +top: +; CHECK-LABEL: @gc_alloc_lowering_var + %pgcstack = call {}*** @julia.get_pgcstack() + %ptls = call {}*** @julia.ptls_states() + %ptls_i8 = bitcast {}*** %ptls to i8* +; CHECK: %0 = add i64 %size, 8 +; CHECK: %v = call noalias nonnull dereferenceable(8) {} addrspace(10)* @ijl_gc_alloc_typed(i8* %ptls_i8, i64 %0, i8* null) + %v = call {} addrspace(10)* @julia.gc_alloc_bytes(i8* %ptls_i8, i64 %size) + %0 = bitcast {} addrspace(10)* %v to {} addrspace(10)* addrspace(10)* + %1 = getelementptr {} addrspace(10)*, {} addrspace(10)* addrspace(10)* %0, i64 -1 + store {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* %1, align 8, !tbaa !0 + ret {} addrspace(10)* %v +} + !0 = !{!1, !1, i64 0} !1 = !{!"jtbaa_gcframe", !2, i64 0} !2 = !{!"jtbaa"}