diff --git a/src/cgutils.cpp b/src/cgutils.cpp index 6aabc459d2c91..295dd93e869ba 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -3321,7 +3321,7 @@ static Value *emit_genericmemoryptr(jl_codectx_t &ctx, Value *mem, const jl_data LoadInst *LI = ctx.builder.CreateAlignedLoad(PPT, addr, Align(sizeof(char*))); LI->setOrdering(AtomicOrdering::NotAtomic); LI->setMetadata(LLVMContext::MD_nonnull, MDNode::get(ctx.builder.getContext(), None)); - jl_aliasinfo_t aliasinfo = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const); + jl_aliasinfo_t aliasinfo = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_memoryptr); aliasinfo.decorateInst(LI); Value *ptr = LI; if (AS) { @@ -3347,7 +3347,7 @@ static Value *emit_genericmemoryowner(jl_codectx_t &ctx, Value *t) return emit_guarded_test(ctx, foreign, t, [&] { addr = ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_jlgenericmemory, m, 1); LoadInst *owner = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, addr, Align(sizeof(void*))); - jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const); + jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_memoryptr); ai.decorateInst(owner); return ctx.builder.CreateSelect(ctx.builder.CreateIsNull(owner), t, owner); }); @@ -4432,6 +4432,105 @@ static int compare_cgparams(const jl_cgparams_t *a, const jl_cgparams_t *b) } #endif +static auto *emit_genericmemory_unchecked(jl_codectx_t &ctx, Value *cg_nbytes, Value *cg_typ) +{ + auto ptls = get_current_ptls(ctx); + auto call = prepare_call(jl_alloc_genericmemory_unchecked_func); + auto *alloc = ctx.builder.CreateCall(call, { ptls, cg_nbytes, cg_typ}); + alloc->setAttributes(call->getAttributes()); + alloc->addRetAttr(Attribute::getWithAlignment(alloc->getContext(), Align(JL_HEAP_ALIGNMENT))); + call->addRetAttr(Attribute::getWithDereferenceableBytes(call->getContext(), sizeof(jl_genericmemory_t))); + return alloc; +} + +static void emit_memory_zeroinit_and_stores(jl_codectx_t &ctx, jl_datatype_t *typ, Value* alloc, Value* nbytes, Value* nel, int zi) +{ + auto arg_typename = [&] JL_NOTSAFEPOINT { + std::string type_str; + auto eltype = jl_tparam1(typ); + if (jl_is_datatype(eltype)) + type_str = jl_symbol_name(((jl_datatype_t*)eltype)->name->name); + else if (jl_is_uniontype(eltype)) + type_str = "Union"; + else + type_str = ""; + return "Memory{" + type_str + "}[]"; + }; + setName(ctx.emission_context, alloc, arg_typename); + // set length (jl_alloc_genericmemory_unchecked_func doesn't have it) + Value *decay_alloc = decay_derived(ctx, alloc); + Value *len_field = ctx.builder.CreateStructGEP(ctx.types().T_jlgenericmemory, decay_alloc, 0); + auto len_store = ctx.builder.CreateAlignedStore(nel, len_field, Align(sizeof(void*))); + auto aliasinfo = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_memorylen); + aliasinfo.decorateInst(len_store); + //This avoids the length store from being deleted which is illegal + ctx.builder.CreateFence(AtomicOrdering::Release, SyncScope::SingleThread); + // zeroinit pointers and unions + if (zi) { + Value *memory_ptr = ctx.builder.CreateStructGEP(ctx.types().T_jlgenericmemory, decay_alloc, 1); + auto *load = ctx.builder.CreateAlignedLoad(ctx.types().T_ptr, memory_ptr, Align(sizeof(void*))); + aliasinfo = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_memoryptr); + aliasinfo.decorateInst(load); + auto int8t = getInt8Ty(ctx.builder.getContext()); + ctx.builder.CreateMemSet(load, ConstantInt::get(int8t, 0), nbytes, Align(sizeof(void*))); + } + return; +} + + +static jl_cgval_t emit_const_len_memorynew(jl_codectx_t &ctx, jl_datatype_t *typ, size_t nel, jl_genericmemory_t *inst) +{ + if (nel == 0) { + Value *empty_alloc = track_pjlvalue(ctx, literal_pointer_val(ctx, (jl_value_t*)inst)); + return mark_julia_type(ctx, empty_alloc, true, typ); + } + const jl_datatype_layout_t *layout = ((jl_datatype_t*)typ)->layout; + assert(((jl_datatype_t*)typ)->has_concrete_subtype && layout != NULL); + size_t elsz = layout->size; + int isboxed = layout->flags.arrayelem_isboxed; + int isunion = layout->flags.arrayelem_isunion; + int zi = ((jl_datatype_t*)typ)->zeroinit; + if (isboxed) + elsz = sizeof(void*); + size_t nbytes; + bool overflow = __builtin_mul_overflow(nel, elsz, &nbytes); + if (isunion) { + // an extra byte for each isbits union memory element, stored at m->ptr + m->length + overflow |= __builtin_add_overflow(nbytes, nel, &nbytes); + } + // overflow if signed size is too big or nel is too big (the latter matters iff elsz==0) + ssize_t tmp=1; + overflow |= __builtin_add_overflow(nel, 1, &tmp) || __builtin_add_overflow(nbytes, 1, &tmp); + if (overflow) + emit_error(ctx, prepare_call(jlargumenterror_func), "invalid GenericMemory size: the number of elements is either negative or too large for system address width"); + + auto T_size = ctx.types().T_size; + auto cg_typ = literal_pointer_val(ctx, (jl_value_t*) typ); + auto cg_nbytes = ConstantInt::get(T_size, nbytes); + auto cg_nel = ConstantInt::get(T_size, nel); + size_t tot = nbytes + LLT_ALIGN(sizeof(jl_genericmemory_t),JL_SMALL_BYTE_ALIGNMENT); + // if allocation fits within GC pools + int pooled = tot <= GC_MAX_SZCLASS; + Value *alloc, *decay_alloc, *memory_ptr; + jl_aliasinfo_t aliasinfo; + if (pooled) { + alloc = emit_allocobj(ctx, tot, cg_typ, false, JL_SMALL_BYTE_ALIGNMENT); + decay_alloc = decay_derived(ctx, alloc); + memory_ptr = ctx.builder.CreateStructGEP(ctx.types().T_jlgenericmemory, decay_alloc, 1); + setName(ctx.emission_context, memory_ptr, "memory_ptr"); + auto objref = emit_pointer_from_objref(ctx, alloc); + Value *memory_data = emit_ptrgep(ctx, objref, JL_SMALL_BYTE_ALIGNMENT); + auto *store = ctx.builder.CreateAlignedStore(memory_data, memory_ptr, Align(sizeof(void*))); + aliasinfo = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_memoryptr); + aliasinfo.decorateInst(store); + setName(ctx.emission_context, memory_data, "memory_data"); + } else { // just use the dynamic length version since the malloc will be slow anyway + alloc = emit_genericmemory_unchecked(ctx, cg_nbytes, cg_typ); + } + emit_memory_zeroinit_and_stores(ctx, typ, alloc, cg_nbytes, cg_nel, zi); + return mark_julia_type(ctx, alloc, true, typ); +} + static jl_cgval_t emit_memorynew(jl_codectx_t &ctx, jl_datatype_t *typ, jl_cgval_t nel, jl_genericmemory_t *inst) { emit_typecheck(ctx, nel, (jl_value_t*)jl_long_type, "memorynew"); @@ -4448,9 +4547,7 @@ static jl_cgval_t emit_memorynew(jl_codectx_t &ctx, jl_datatype_t *typ, jl_cgval if (isboxed) elsz = sizeof(void*); - auto ptls = get_current_ptls(ctx); auto T_size = ctx.types().T_size; - auto int8t = getInt8Ty(ctx.builder.getContext()); BasicBlock *emptymemBB, *nonemptymemBB, *retvalBB; emptymemBB = BasicBlock::Create(ctx.builder.getContext(), "emptymem"); nonemptymemBB = BasicBlock::Create(ctx.builder.getContext(), "nonemptymem"); @@ -4466,18 +4563,7 @@ static jl_cgval_t emit_memorynew(jl_codectx_t &ctx, jl_datatype_t *typ, jl_cgval ctx.builder.CreateBr(retvalBB); nonemptymemBB->insertInto(ctx.f); ctx.builder.SetInsertPoint(nonemptymemBB); - // else actually allocate mem - auto arg_typename = [&] JL_NOTSAFEPOINT { - std::string type_str; - auto eltype = jl_tparam1(typ); - if (jl_is_datatype(eltype)) - type_str = jl_symbol_name(((jl_datatype_t*)eltype)->name->name); - else if (jl_is_uniontype(eltype)) - type_str = "Union"; - else - type_str = ""; - return "Memory{" + type_str + "}[]"; - }; + auto cg_typ = literal_pointer_val(ctx, (jl_value_t*) typ); auto cg_elsz = ConstantInt::get(T_size, elsz); @@ -4501,27 +4587,10 @@ static jl_cgval_t emit_memorynew(jl_codectx_t &ctx, jl_datatype_t *typ, jl_cgval overflow = ctx.builder.CreateOr(overflow, tobignel); Value *notoverflow = ctx.builder.CreateNot(overflow); error_unless(ctx, prepare_call(jlargumenterror_func), notoverflow, "invalid GenericMemory size: the number of elements is either negative or too large for system address width"); - // actually allocate - auto call = prepare_call(jl_alloc_genericmemory_unchecked_func); - Value *alloc = ctx.builder.CreateCall(call, { ptls, nbytes, cg_typ}); - // set length (jl_alloc_genericmemory_unchecked_func doesn't have it) - Value *decay_alloc = decay_derived(ctx, alloc); - Value *len_field = ctx.builder.CreateStructGEP(ctx.types().T_jlgenericmemory, decay_alloc, 0); - auto len_store = ctx.builder.CreateAlignedStore(nel_unboxed, len_field, Align(sizeof(void*))); - auto aliasinfo = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_memorylen); - aliasinfo.decorateInst(len_store); - //This avoids the length store from being deleted which is illegal - ctx.builder.CreateFence(AtomicOrdering::Release, SyncScope::SingleThread); - // zeroinit pointers and unions - if (zi) { - Value *memory_ptr = ctx.builder.CreateStructGEP(ctx.types().T_jlgenericmemory, decay_alloc, 1); - auto *load = ctx.builder.CreateAlignedLoad(ctx.types().T_ptr, memory_ptr, Align(sizeof(void*))); - aliasinfo = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_memoryptr); - aliasinfo.decorateInst(load); - ctx.builder.CreateMemSet(load, ConstantInt::get(int8t, 0), nbytes, Align(sizeof(void*))); - } + // actually allocate the memory - setName(ctx.emission_context, alloc, arg_typename); + Value *alloc = emit_genericmemory_unchecked(ctx, nbytes, cg_typ); + emit_memory_zeroinit_and_stores(ctx, typ, alloc, nbytes, nel_unboxed, zi); ctx.builder.CreateBr(retvalBB); nonemptymemBB = ctx.builder.GetInsertBlock(); // phi node to choose which side of branch diff --git a/src/codegen.cpp b/src/codegen.cpp index 1ca45a98b0620..4b5676cc8216e 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -408,9 +408,9 @@ struct jl_tbaacache_t { tbaa_arrayptr = tbaa_make_child(mbuilder, "jtbaa_arrayptr", tbaa_array_scalar).first; tbaa_arraysize = tbaa_make_child(mbuilder, "jtbaa_arraysize", tbaa_array_scalar).first; tbaa_arrayselbyte = tbaa_make_child(mbuilder, "jtbaa_arrayselbyte", tbaa_array_scalar).first; - tbaa_memoryptr = tbaa_make_child(mbuilder, "jtbaa_memoryptr", tbaa_array_scalar, true).first; - tbaa_memorylen = tbaa_make_child(mbuilder, "jtbaa_memorylen", tbaa_array_scalar, true).first; - tbaa_memoryown = tbaa_make_child(mbuilder, "jtbaa_memoryown", tbaa_array_scalar, true).first; + tbaa_memoryptr = tbaa_make_child(mbuilder, "jtbaa_memoryptr", tbaa_array_scalar).first; + tbaa_memorylen = tbaa_make_child(mbuilder, "jtbaa_memorylen", tbaa_array_scalar).first; + tbaa_memoryown = tbaa_make_child(mbuilder, "jtbaa_memoryown", tbaa_array_scalar).first; tbaa_const = tbaa_make_child(mbuilder, "jtbaa_const", nullptr, true).first; } }; @@ -1179,6 +1179,7 @@ static const auto jl_alloc_genericmemory_unchecked_func = new JuliaFunction{ AttrBuilder FnAttrs(C); FnAttrs.addMemoryAttr(MemoryEffects::none()); FnAttrs.addAttribute(Attribute::NoUnwind); + FnAttrs.addAttribute(Attribute::Speculatable); + FnAttrs.addAttribute(Attribute::WillReturn); + FnAttrs.addAttribute(Attribute::NoRecurse); + FnAttrs.addAttribute(Attribute::NoSync); return AttributeList::get(C, AttributeSet::get(C, FnAttrs), Attributes(C, {Attribute::NonNull}), @@ -4470,7 +4475,17 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, jl_genericmemory_t *inst = (jl_genericmemory_t*)((jl_datatype_t*)typ)->instance; if (inst == NULL) return false; - *ret = emit_memorynew(ctx, typ, argv[2], inst); + if (argv[2].constant) { + if (!jl_is_long(argv[2].constant)) + return false; + size_t nel = jl_unbox_long(argv[2].constant); + if (nel < 0) + return false; + *ret = emit_const_len_memorynew(ctx, typ, nel, inst); + } + else { + *ret = emit_memorynew(ctx, typ, argv[2], inst); + } return true; } diff --git a/src/llvm-alloc-helpers.cpp b/src/llvm-alloc-helpers.cpp index 59fce1235e14e..194c6837860ca 100644 --- a/src/llvm-alloc-helpers.cpp +++ b/src/llvm-alloc-helpers.cpp @@ -250,8 +250,8 @@ void jl_alloc::runEscapeAnalysis(llvm::CallInst *I, EscapeAnalysisRequiredArgs r return true; } if (required.pass.gc_loaded_func == callee) { - required.use_info.haspreserve = true; - required.use_info.hasload = true; + // TODO add manual load->store forwarding + push_inst(inst); return true; } if (required.pass.typeof_func == callee) { diff --git a/src/llvm-alloc-opt.cpp b/src/llvm-alloc-opt.cpp index a9e1b1e02da42..7dd794a4d8847 100644 --- a/src/llvm-alloc-opt.cpp +++ b/src/llvm-alloc-opt.cpp @@ -752,11 +752,11 @@ void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref, AllocF call->eraseFromParent(); return; } - //if (pass.gc_loaded_func == callee) { - // call->replaceAllUsesWith(new_i); - // call->eraseFromParent(); - // return; - //} + if (pass.gc_loaded_func == callee) { + // TODO: handle data pointer forwarding, length forwarding, and fence removal + user->replaceUsesOfWith(orig_i, Constant::getNullValue(orig_i->getType())); + return; + } if (pass.typeof_func == callee) { ++RemovedTypeofs; call->replaceAllUsesWith(tag); diff --git a/src/pipeline.cpp b/src/pipeline.cpp index 8c9054c0d65ff..39f896ba656d2 100644 --- a/src/pipeline.cpp +++ b/src/pipeline.cpp @@ -527,6 +527,7 @@ static void buildIntrinsicLoweringPipeline(ModulePassManager &MPM, PassBuilder * JULIA_PASS(FPM.addPass(LateLowerGCPass())); JULIA_PASS(FPM.addPass(FinalLowerGCPass())); if (O.getSpeedupLevel() >= 2) { + FPM.addPass(DSEPass()); FPM.addPass(GVNPass()); FPM.addPass(SCCPPass()); FPM.addPass(DCEPass()); diff --git a/test/boundscheck_exec.jl b/test/boundscheck_exec.jl index 10f46eb4a8031..85df1d64017b4 100644 --- a/test/boundscheck_exec.jl +++ b/test/boundscheck_exec.jl @@ -297,4 +297,54 @@ end typeintersect(Int, Integer) end |> only === Type{Int} +if bc_opt == bc_default +@testset "Array/Memory escape analysis" begin + function no_allocate(T::Type{<:Union{Memory, Vector}}) + v = T(undef, 2) + v[1] = 2 + v[2] = 3 + return v[1] + v[2] + end + function test_alloc(::Type{T}; broken=false) where T + @test (@allocated no_allocate(T)) == 0 broken=broken + end + @testset "$T" for T in [Memory, Vector] + @testset "$ET" for ET in [Int, Float32, Union{Int, Float64}] + no_allocate(T{ET}) #compile + # allocations aren't removed for Union eltypes which they theoretically could be eventually + test_alloc(T{ET}, broken=(ET==Union{Int, Float64})) + end + end + function f() # this was causing a bug on an in progress version of #55913. + m = Memory{Float64}(undef, 4) + m .= 1.0 + s = 0.0 + for x ∈ m + s += x + end + s + end + @test f() === 4.0 + function confuse_alias_analysis() + mem0 = Memory{Int}(undef, 1) + mem1 = Memory{Int}(undef, 1) + @inbounds mem0[1] = 3 + for width in 1:2 + @inbounds mem1[1] = mem0[1] + mem0 = mem1 + end + mem0[1] + end + @test confuse_alias_analysis() == 3 + @test (@allocated confuse_alias_analysis()) == 0 + function no_alias_prove(n) + m1 = Memory{Int}(undef,n) + m2 = Memory{Int}(undef,n) + m1 === m2 + end + no_alias_prove(1) + @test_broken (@allocated no_alias_prove(5)) == 0 +end +end + end