From e58fb0a07b5b62e5d8991c6e5b41f90acf69d0d2 Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Fri, 1 Sep 2023 15:18:03 -0300 Subject: [PATCH 1/7] Add src alignment arg to emit_memcpy --- src/ccall.cpp | 8 +++++--- src/cgutils.cpp | 34 +++++++++++++++++----------------- src/codegen.cpp | 10 +++++----- src/intrinsics.cpp | 6 +++--- 4 files changed, 30 insertions(+), 28 deletions(-) diff --git a/src/ccall.cpp b/src/ccall.cpp index 1add621edde28..118803cef1b10 100644 --- a/src/ccall.cpp +++ b/src/ccall.cpp @@ -550,6 +550,8 @@ static Value *julia_to_native( // pass the address of an alloca'd thing, not a box // since those are immutable. Value *slot = emit_static_alloca(ctx, to); + unsigned align = julia_alignment(jlto); + cast(slot)->setAlignment(Align(align)); setName(ctx.emission_context, slot, "native_convert_buffer"); if (!jvinfo.ispointer()) { jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, jvinfo.tbaa); @@ -557,7 +559,7 @@ static Value *julia_to_native( } else { jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, jvinfo.tbaa); - emit_memcpy(ctx, slot, ai, jvinfo, jl_datatype_size(jlto), julia_alignment(jlto)); + emit_memcpy(ctx, slot, ai, jvinfo, jl_datatype_size(jlto), align, align); } return slot; } @@ -1826,7 +1828,7 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs) emit_inttoptr(ctx, emit_unbox(ctx, ctx.types().T_size, src, (jl_value_t*)jl_voidpointer_type), getInt8PtrTy(ctx.builder.getContext())), - MaybeAlign(0), + MaybeAlign(1), emit_unbox(ctx, ctx.types().T_size, n, (jl_value_t*)jl_ulong_type), false); JL_GC_POP(); @@ -2171,7 +2173,7 @@ jl_cgval_t function_sig_t::emit_a_ccall( slot->setAlignment(Align(boxalign)); ctx.builder.CreateAlignedStore(result, slot, Align(boxalign)); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa); - emit_memcpy(ctx, strct, ai, slot, ai, rtsz, boxalign); + emit_memcpy(ctx, strct, ai, slot, ai, rtsz, boxalign, boxalign); } else { init_bits_value(ctx, strct, result, tbaa, boxalign); diff --git a/src/cgutils.cpp b/src/cgutils.cpp index c6ba2d7551986..7722f27e02224 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -934,11 +934,11 @@ static Value *data_pointer(jl_codectx_t &ctx, const jl_cgval_t &x) } static void emit_memcpy_llvm(jl_codectx_t &ctx, Value *dst, jl_aliasinfo_t const &dst_ai, Value *src, - jl_aliasinfo_t const &src_ai, uint64_t sz, unsigned align, bool is_volatile) + jl_aliasinfo_t const &src_ai, uint64_t sz, unsigned align_dst, unsigned align_src, bool is_volatile) { if (sz == 0) return; - assert(align && "align must be specified"); + assert(align_dst && "align must be specified"); // If the types are small and simple, use load and store directly. // Going through memcpy can cause LLVM (e.g. SROA) to create bitcasts between float and int // that interferes with other optimizations. @@ -979,8 +979,8 @@ static void emit_memcpy_llvm(jl_codectx_t &ctx, Value *dst, jl_aliasinfo_t const setName(ctx.emission_context, src, "memcpy_refined_src"); if (isa(dst) && !dst->hasName()) setName(ctx.emission_context, dst, "memcpy_refined_dst"); - auto val = src_ai.decorateInst(ctx.builder.CreateAlignedLoad(directel, src, Align(align), is_volatile)); - dst_ai.decorateInst(ctx.builder.CreateAlignedStore(val, dst, Align(align), is_volatile)); + auto val = src_ai.decorateInst(ctx.builder.CreateAlignedLoad(directel, src, Align(align_src), is_volatile)); + dst_ai.decorateInst(ctx.builder.CreateAlignedStore(val, dst, Align(align_dst), is_volatile)); ++SkippedMemcpys; return; } @@ -998,37 +998,37 @@ static void emit_memcpy_llvm(jl_codectx_t &ctx, Value *dst, jl_aliasinfo_t const // above problem won't be as serious. auto merged_ai = dst_ai.merge(src_ai); - ctx.builder.CreateMemCpy(dst, MaybeAlign(align), src, MaybeAlign(0), sz, is_volatile, + ctx.builder.CreateMemCpy(dst, MaybeAlign(align_dst), src, MaybeAlign(align_src), sz, is_volatile, merged_ai.tbaa, merged_ai.tbaa_struct, merged_ai.scope, merged_ai.noalias); } static void emit_memcpy_llvm(jl_codectx_t &ctx, Value *dst, jl_aliasinfo_t const &dst_ai, Value *src, - jl_aliasinfo_t const &src_ai, Value *sz, unsigned align, bool is_volatile) + jl_aliasinfo_t const &src_ai, Value *sz, unsigned align_dst, unsigned align_src, bool is_volatile) { if (auto const_sz = dyn_cast(sz)) { - emit_memcpy_llvm(ctx, dst, dst_ai, src, src_ai, const_sz->getZExtValue(), align, is_volatile); + emit_memcpy_llvm(ctx, dst, dst_ai, src, src_ai, const_sz->getZExtValue(), align_dst, align_src, is_volatile); return; } ++EmittedMemcpys; auto merged_ai = dst_ai.merge(src_ai); - ctx.builder.CreateMemCpy(dst, MaybeAlign(align), src, MaybeAlign(0), sz, is_volatile, + ctx.builder.CreateMemCpy(dst, MaybeAlign(align_dst), src, MaybeAlign(align_src), sz, is_volatile, merged_ai.tbaa, merged_ai.tbaa_struct, merged_ai.scope, merged_ai.noalias); } template static void emit_memcpy(jl_codectx_t &ctx, Value *dst, jl_aliasinfo_t const &dst_ai, Value *src, - jl_aliasinfo_t const &src_ai, T1 &&sz, unsigned align, bool is_volatile=false) + jl_aliasinfo_t const &src_ai, T1 &&sz, unsigned align_dst, unsigned aligned_src, bool is_volatile=false) { - emit_memcpy_llvm(ctx, dst, dst_ai, src, src_ai, sz, align, is_volatile); + emit_memcpy_llvm(ctx, dst, dst_ai, src, src_ai, sz, align_dst, aligned_src, is_volatile); } template static void emit_memcpy(jl_codectx_t &ctx, Value *dst, jl_aliasinfo_t const &dst_ai, const jl_cgval_t &src, - T1 &&sz, unsigned align, bool is_volatile=false) + T1 &&sz, unsigned align_dst, bool is_volatile=false) { auto src_ai = jl_aliasinfo_t::fromTBAA(ctx, src.tbaa); - emit_memcpy_llvm(ctx, dst, dst_ai, data_pointer(ctx, src), src_ai, sz, align, is_volatile); + emit_memcpy_llvm(ctx, dst, dst_ai, data_pointer(ctx, src), src_ai, sz, align_dst, julia_alignment(src.typ), is_volatile); } static LoadInst *emit_nthptr_recast(jl_codectx_t &ctx, Value *v, Value *idx, MDNode *tbaa, Type *type) @@ -1884,7 +1884,7 @@ static jl_cgval_t typed_load(jl_codectx_t &ctx, Value *ptr, Value *idx_0based, j else if (!alignment) alignment = julia_alignment(jltype); if (intcast && Order == AtomicOrdering::NotAtomic) { - emit_memcpy(ctx, intcast, jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack), data, jl_aliasinfo_t::fromTBAA(ctx, tbaa), nb, alignment); + emit_memcpy(ctx, intcast, jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack), data, jl_aliasinfo_t::fromTBAA(ctx, tbaa), nb, alignment, intcast->getAlign().value()); } else { LoadInst *load = ctx.builder.CreateAlignedLoad(elty, data, Align(alignment), false); @@ -2481,7 +2481,7 @@ static jl_cgval_t emit_unionload(jl_codectx_t &ctx, Value *addr, Value *ptindex, if (al > 1) lv->setAlignment(Align(al)); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa); - emit_memcpy(ctx, lv, ai, addr, ai, fsz, al); + emit_memcpy(ctx, lv, ai, addr, ai, fsz, al, al); addr = lv; } return mark_julia_slot(fsz > 0 ? addr : nullptr, jfty, tindex, tbaa); @@ -3110,7 +3110,7 @@ static void init_bits_cgval(jl_codectx_t &ctx, Value *newv, const jl_cgval_t& v, { // newv should already be tagged if (v.ispointer()) { - emit_memcpy(ctx, newv, jl_aliasinfo_t::fromTBAA(ctx, tbaa), v, jl_datatype_size(v.typ), sizeof(void*)); + emit_memcpy(ctx, newv, jl_aliasinfo_t::fromTBAA(ctx, tbaa), v, jl_datatype_size(v.typ), sizeof(void*), sizeof(void*)); } else { init_bits_value(ctx, newv, v.V, tbaa); @@ -3576,7 +3576,7 @@ static void emit_unionmove(jl_codectx_t &ctx, Value *dest, MDNode *tbaa_dst, con // if (skip) src_ptr = ctx.builder.CreateSelect(skip, dest, src_ptr); auto f = [&] { (void)emit_memcpy(ctx, dest, jl_aliasinfo_t::fromTBAA(ctx, tbaa_dst), src_ptr, - jl_aliasinfo_t::fromTBAA(ctx, src.tbaa), nb, alignment, isVolatile); + jl_aliasinfo_t::fromTBAA(ctx, src.tbaa), nb, alignment, alignment, isVolatile); return nullptr; }; if (skip) @@ -3613,7 +3613,7 @@ static void emit_unionmove(jl_codectx_t &ctx, Value *dest, MDNode *tbaa_dst, con return; } else { emit_memcpy(ctx, dest, jl_aliasinfo_t::fromTBAA(ctx, tbaa_dst), src_ptr, - jl_aliasinfo_t::fromTBAA(ctx, src.tbaa), nb, alignment, isVolatile); + jl_aliasinfo_t::fromTBAA(ctx, src.tbaa), nb, alignment, alignment, isVolatile); } } ctx.builder.CreateBr(postBB); diff --git a/src/codegen.cpp b/src/codegen.cpp index 52fd8c3443c9a..96fd3fca86665 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -4935,7 +4935,7 @@ static jl_cgval_t emit_varinfo(jl_codectx_t &ctx, jl_varinfo_t &vi, jl_sym_t *va else { const DataLayout &DL = jl_Module->getDataLayout(); uint64_t sz = DL.getTypeStoreSize(T); - emit_memcpy(ctx, ssaslot, jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack), vi.value, sz, ssaslot->getAlign().value()); + emit_memcpy(ctx, ssaslot, jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack), vi.value, sz, ssaslot->getAlign().value(), varslot->getAlign().value()); } Value *tindex = NULL; if (vi.pTIndex) @@ -5092,7 +5092,7 @@ static void emit_phinode_assign(jl_codectx_t &ctx, ssize_t idx, jl_value_t *r) Value *isboxed = ctx.builder.CreateICmpNE( ctx.builder.CreateAnd(Tindex_phi, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80)), ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0)); - ctx.builder.CreateMemCpy(phi, MaybeAlign(min_align), dest, MaybeAlign(0), nbytes, false); + ctx.builder.CreateMemCpy(phi, MaybeAlign(min_align), dest, dest->getAlign(), nbytes, false); ctx.builder.CreateLifetimeEnd(dest); Value *ptr = ctx.builder.CreateSelect(isboxed, maybe_bitcast(ctx, decay_derived(ctx, ptr_phi), getInt8PtrTy(ctx.builder.getContext())), @@ -5133,7 +5133,7 @@ static void emit_phinode_assign(jl_codectx_t &ctx, ssize_t idx, jl_value_t *r) dest = emit_static_alloca(ctx, vtype); Value *phi = emit_static_alloca(ctx, vtype); ctx.builder.CreateMemCpy(phi, MaybeAlign(julia_alignment(phiType)), - dest, MaybeAlign(0), + dest, dest->getAlign(), jl_datatype_size(phiType), false); ctx.builder.CreateLifetimeEnd(dest); slot = mark_julia_slot(phi, phiType, NULL, ctx.tbaa().tbaa_stack); @@ -6069,7 +6069,7 @@ static void emit_cfunc_invalidate( ctx.builder.CreateStore(gf_ret, root1); } emit_memcpy(ctx, &*gf_thunk->arg_begin(), jl_aliasinfo_t::fromTBAA(ctx, nullptr), gf_ret, - jl_aliasinfo_t::fromTBAA(ctx, nullptr), jl_datatype_size(rettype), julia_alignment(rettype)); + jl_aliasinfo_t::fromTBAA(ctx, nullptr), jl_datatype_size(rettype), julia_alignment(rettype), 0); ctx.builder.CreateRetVoid(); break; } @@ -8425,7 +8425,7 @@ static jl_llvm_functions_t if (returninfo.cc == jl_returninfo_t::SRet) { assert(jl_is_concrete_type(jlrettype)); emit_memcpy(ctx, sret, jl_aliasinfo_t::fromTBAA(ctx, nullptr), retvalinfo, - jl_datatype_size(jlrettype), julia_alignment(jlrettype)); + jl_datatype_size(jlrettype), julia_alignment(jlrettype), 0); } else { // must be jl_returninfo_t::Union emit_unionmove(ctx, sret, nullptr, retvalinfo, /*skip*/isboxed_union); diff --git a/src/intrinsics.cpp b/src/intrinsics.cpp index c7f1263af030a..232aa88731345 100644 --- a/src/intrinsics.cpp +++ b/src/intrinsics.cpp @@ -469,7 +469,7 @@ static void emit_unbox_store(jl_codectx_t &ctx, const jl_cgval_t &x, Value *dest } Value *src = data_pointer(ctx, x); - emit_memcpy(ctx, dest, jl_aliasinfo_t::fromTBAA(ctx, tbaa_dest), src, jl_aliasinfo_t::fromTBAA(ctx, x.tbaa), jl_datatype_size(x.typ), alignment, isVolatile); + emit_memcpy(ctx, dest, jl_aliasinfo_t::fromTBAA(ctx, tbaa_dest), src, jl_aliasinfo_t::fromTBAA(ctx, x.tbaa), jl_datatype_size(x.typ), alignment, 0, isVolatile); } static jl_datatype_t *staticeval_bitstype(const jl_cgval_t &targ) @@ -707,7 +707,7 @@ static jl_cgval_t emit_pointerref(jl_codectx_t &ctx, jl_cgval_t *argv) thePtr = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), emit_bitcast(ctx, thePtr, getInt8PtrTy(ctx.builder.getContext())), im1); setName(ctx.emission_context, thePtr, "pointerref_src"); MDNode *tbaa = best_tbaa(ctx.tbaa(), ety); - emit_memcpy(ctx, strct, jl_aliasinfo_t::fromTBAA(ctx, tbaa), thePtr, jl_aliasinfo_t::fromTBAA(ctx, nullptr), size, 1); + emit_memcpy(ctx, strct, jl_aliasinfo_t::fromTBAA(ctx, tbaa), thePtr, jl_aliasinfo_t::fromTBAA(ctx, nullptr), size, 1, 1); return mark_julia_type(ctx, strct, true, ety); } else { @@ -783,7 +783,7 @@ static jl_cgval_t emit_pointerset(jl_codectx_t &ctx, jl_cgval_t *argv) setName(ctx.emission_context, im1, "pointerset_offset"); auto gep = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), thePtr, im1); setName(ctx.emission_context, gep, "pointerset_ptr"); - emit_memcpy(ctx, gep, jl_aliasinfo_t::fromTBAA(ctx, nullptr), x, size, align_nb); + emit_memcpy(ctx, gep, jl_aliasinfo_t::fromTBAA(ctx, nullptr), x, size, align_nb, 0); } else { bool isboxed; From ae16f8f20b73f6fb2dd2936eb4e9100c548bac75 Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Fri, 1 Sep 2023 15:27:35 -0300 Subject: [PATCH 2/7] Use MaybeAlign --- src/cgutils.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cgutils.cpp b/src/cgutils.cpp index 7722f27e02224..0522752054047 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -979,7 +979,7 @@ static void emit_memcpy_llvm(jl_codectx_t &ctx, Value *dst, jl_aliasinfo_t const setName(ctx.emission_context, src, "memcpy_refined_src"); if (isa(dst) && !dst->hasName()) setName(ctx.emission_context, dst, "memcpy_refined_dst"); - auto val = src_ai.decorateInst(ctx.builder.CreateAlignedLoad(directel, src, Align(align_src), is_volatile)); + auto val = src_ai.decorateInst(ctx.builder.CreateAlignedLoad(directel, src, MaybeAlign(align_src), is_volatile)); dst_ai.decorateInst(ctx.builder.CreateAlignedStore(val, dst, Align(align_dst), is_volatile)); ++SkippedMemcpys; return; From eed34ab6c55f42b7a61a70399ddf22229b028ffb Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Fri, 1 Sep 2023 16:25:47 -0300 Subject: [PATCH 3/7] Apply suggestions from code review Co-authored-by: Jameson Nash --- src/cgutils.cpp | 2 +- src/codegen.cpp | 6 +++--- src/intrinsics.cpp | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/cgutils.cpp b/src/cgutils.cpp index 0522752054047..b24c6210fbeba 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -998,7 +998,7 @@ static void emit_memcpy_llvm(jl_codectx_t &ctx, Value *dst, jl_aliasinfo_t const // above problem won't be as serious. auto merged_ai = dst_ai.merge(src_ai); - ctx.builder.CreateMemCpy(dst, MaybeAlign(align_dst), src, MaybeAlign(align_src), sz, is_volatile, + ctx.builder.CreateMemCpy(dst, Align(align_dst), src, Align(align_src), sz, is_volatile, merged_ai.tbaa, merged_ai.tbaa_struct, merged_ai.scope, merged_ai.noalias); } diff --git a/src/codegen.cpp b/src/codegen.cpp index 96fd3fca86665..d26980831764e 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -5132,7 +5132,7 @@ static void emit_phinode_assign(jl_codectx_t &ctx, ssize_t idx, jl_value_t *r) // here it's moved into phi in the successor (from dest) dest = emit_static_alloca(ctx, vtype); Value *phi = emit_static_alloca(ctx, vtype); - ctx.builder.CreateMemCpy(phi, MaybeAlign(julia_alignment(phiType)), + ctx.builder.CreateMemCpy(phi, Align(julia_alignment(phiType)), dest, dest->getAlign(), jl_datatype_size(phiType), false); ctx.builder.CreateLifetimeEnd(dest); @@ -6069,7 +6069,7 @@ static void emit_cfunc_invalidate( ctx.builder.CreateStore(gf_ret, root1); } emit_memcpy(ctx, &*gf_thunk->arg_begin(), jl_aliasinfo_t::fromTBAA(ctx, nullptr), gf_ret, - jl_aliasinfo_t::fromTBAA(ctx, nullptr), jl_datatype_size(rettype), julia_alignment(rettype), 0); + jl_aliasinfo_t::fromTBAA(ctx, nullptr), jl_datatype_size(rettype), julia_alignment(rettype), julia_alignment(rettype)); ctx.builder.CreateRetVoid(); break; } @@ -8425,7 +8425,7 @@ static jl_llvm_functions_t if (returninfo.cc == jl_returninfo_t::SRet) { assert(jl_is_concrete_type(jlrettype)); emit_memcpy(ctx, sret, jl_aliasinfo_t::fromTBAA(ctx, nullptr), retvalinfo, - jl_datatype_size(jlrettype), julia_alignment(jlrettype), 0); + jl_datatype_size(jlrettype), julia_alignment(jlrettype), julia_alignment(jlrettype)); } else { // must be jl_returninfo_t::Union emit_unionmove(ctx, sret, nullptr, retvalinfo, /*skip*/isboxed_union); diff --git a/src/intrinsics.cpp b/src/intrinsics.cpp index 232aa88731345..3e7ace18a1749 100644 --- a/src/intrinsics.cpp +++ b/src/intrinsics.cpp @@ -469,7 +469,7 @@ static void emit_unbox_store(jl_codectx_t &ctx, const jl_cgval_t &x, Value *dest } Value *src = data_pointer(ctx, x); - emit_memcpy(ctx, dest, jl_aliasinfo_t::fromTBAA(ctx, tbaa_dest), src, jl_aliasinfo_t::fromTBAA(ctx, x.tbaa), jl_datatype_size(x.typ), alignment, 0, isVolatile); + emit_memcpy(ctx, dest, jl_aliasinfo_t::fromTBAA(ctx, tbaa_dest), src, jl_aliasinfo_t::fromTBAA(ctx, x.tbaa), jl_datatype_size(x.typ), alignment, alignment, isVolatile); } static jl_datatype_t *staticeval_bitstype(const jl_cgval_t &targ) @@ -707,7 +707,7 @@ static jl_cgval_t emit_pointerref(jl_codectx_t &ctx, jl_cgval_t *argv) thePtr = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), emit_bitcast(ctx, thePtr, getInt8PtrTy(ctx.builder.getContext())), im1); setName(ctx.emission_context, thePtr, "pointerref_src"); MDNode *tbaa = best_tbaa(ctx.tbaa(), ety); - emit_memcpy(ctx, strct, jl_aliasinfo_t::fromTBAA(ctx, tbaa), thePtr, jl_aliasinfo_t::fromTBAA(ctx, nullptr), size, 1, 1); + emit_memcpy(ctx, strct, jl_aliasinfo_t::fromTBAA(ctx, tbaa), thePtr, jl_aliasinfo_t::fromTBAA(ctx, nullptr), size, sizeof(jl_value_t*), align_nb); return mark_julia_type(ctx, strct, true, ety); } else { @@ -783,7 +783,7 @@ static jl_cgval_t emit_pointerset(jl_codectx_t &ctx, jl_cgval_t *argv) setName(ctx.emission_context, im1, "pointerset_offset"); auto gep = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), thePtr, im1); setName(ctx.emission_context, gep, "pointerset_ptr"); - emit_memcpy(ctx, gep, jl_aliasinfo_t::fromTBAA(ctx, nullptr), x, size, align_nb, 0); + emit_memcpy(ctx, gep, jl_aliasinfo_t::fromTBAA(ctx, nullptr), x, size, align_nb, julia_alignment(ety)); } else { bool isboxed; From dc9edccf3cf9f7667518980b44abd63fda37be4f Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Fri, 1 Sep 2023 16:28:11 -0300 Subject: [PATCH 4/7] Correct alignment for cgval --- src/cgutils.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cgutils.cpp b/src/cgutils.cpp index b24c6210fbeba..da092fe5dd402 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -3110,7 +3110,7 @@ static void init_bits_cgval(jl_codectx_t &ctx, Value *newv, const jl_cgval_t& v, { // newv should already be tagged if (v.ispointer()) { - emit_memcpy(ctx, newv, jl_aliasinfo_t::fromTBAA(ctx, tbaa), v, jl_datatype_size(v.typ), sizeof(void*), sizeof(void*)); + emit_memcpy(ctx, newv, jl_aliasinfo_t::fromTBAA(ctx, tbaa), v, jl_datatype_size(v.typ), sizeof(void*), julia_alignment(v.typ)); } else { init_bits_value(ctx, newv, v.V, tbaa); From 428bb9eafdfab1959b695a9cfafa776b0d95e128 Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Fri, 1 Sep 2023 17:08:29 -0300 Subject: [PATCH 5/7] Derive min_align check for unions and use it in union_move --- src/cgutils.cpp | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/src/cgutils.cpp b/src/cgutils.cpp index da092fe5dd402..54e571f46f4e5 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -448,6 +448,7 @@ static size_t dereferenceable_size(jl_value_t *jt) return 0; } +static unsigned union_minalign(jl_uniontype_t *ut); // Return the min required / expected alignment of jltype (on the stack or heap) static unsigned julia_alignment(jl_value_t *jt) { @@ -460,6 +461,9 @@ static unsigned julia_alignment(jl_value_t *jt) // and this is the guarantee we have for the GC bits return 16; } + if (jl_is_uniontype(jt)) + return union_minalign((jl_uniontype_t*)jt); + assert(jl_is_datatype(jt) && jl_struct_try_layout((jl_datatype_t*)jt)); unsigned alignment = jl_datatype_align(jt); if (alignment > JL_HEAP_ALIGNMENT) @@ -3315,6 +3319,24 @@ static Value *compute_tindex_unboxed(jl_codectx_t &ctx, const jl_cgval_t &val, j return compute_box_tindex(ctx, typof, val.typ, typ); } +static unsigned union_minalign(jl_uniontype_t *ut) +{ + // Compute the min alignment for this union + unsigned min_align = MAX_ALIGN; + unsigned counter = 0; + for_each_uniontype_small( + [&](unsigned idx, jl_datatype_t *jt) { + if (!jl_is_datatype_singleton(jt)) { + size_t align1 = jl_datatype_align(jt); + if (align1 < min_align) + min_align = align1; + } + }, + (jl_value_t*)ut, + counter); + return min_align; +} + static void union_alloca_type(jl_uniontype_t *ut, bool &allunbox, size_t &nbytes, size_t &align, size_t &min_align) { @@ -3638,7 +3660,9 @@ static void emit_unionmove(jl_codectx_t &ctx, Value *dest, MDNode *tbaa_dst, con auto f = [&] { Value *datatype = emit_typeof(ctx, src, false, false); Value *copy_bytes = emit_datatype_size(ctx, datatype); - emit_memcpy(ctx, dest, jl_aliasinfo_t::fromTBAA(ctx, tbaa_dst), src, copy_bytes, /*TODO: min-align*/1, isVolatile); + unsigned alignment = julia_alignment(src.typ); + (void)emit_memcpy(ctx, dest, jl_aliasinfo_t::fromTBAA(ctx, tbaa_dst), data_pointer(ctx, src), + jl_aliasinfo_t::fromTBAA(ctx, src.tbaa), copy_bytes, alignment, alignment, isVolatile); return nullptr; }; if (skip) From 031ac26e829dea038c164d3808c30a820a61fb56 Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Fri, 1 Sep 2023 21:53:20 -0300 Subject: [PATCH 6/7] Remove union_minalign and just default to byte alignment --- src/cgutils.cpp | 23 +---------------------- 1 file changed, 1 insertion(+), 22 deletions(-) diff --git a/src/cgutils.cpp b/src/cgutils.cpp index 54e571f46f4e5..713dc41b409bc 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -448,7 +448,6 @@ static size_t dereferenceable_size(jl_value_t *jt) return 0; } -static unsigned union_minalign(jl_uniontype_t *ut); // Return the min required / expected alignment of jltype (on the stack or heap) static unsigned julia_alignment(jl_value_t *jt) { @@ -461,8 +460,6 @@ static unsigned julia_alignment(jl_value_t *jt) // and this is the guarantee we have for the GC bits return 16; } - if (jl_is_uniontype(jt)) - return union_minalign((jl_uniontype_t*)jt); assert(jl_is_datatype(jt) && jl_struct_try_layout((jl_datatype_t*)jt)); unsigned alignment = jl_datatype_align(jt); @@ -3319,23 +3316,6 @@ static Value *compute_tindex_unboxed(jl_codectx_t &ctx, const jl_cgval_t &val, j return compute_box_tindex(ctx, typof, val.typ, typ); } -static unsigned union_minalign(jl_uniontype_t *ut) -{ - // Compute the min alignment for this union - unsigned min_align = MAX_ALIGN; - unsigned counter = 0; - for_each_uniontype_small( - [&](unsigned idx, jl_datatype_t *jt) { - if (!jl_is_datatype_singleton(jt)) { - size_t align1 = jl_datatype_align(jt); - if (align1 < min_align) - min_align = align1; - } - }, - (jl_value_t*)ut, - counter); - return min_align; -} static void union_alloca_type(jl_uniontype_t *ut, bool &allunbox, size_t &nbytes, size_t &align, size_t &min_align) @@ -3660,9 +3640,8 @@ static void emit_unionmove(jl_codectx_t &ctx, Value *dest, MDNode *tbaa_dst, con auto f = [&] { Value *datatype = emit_typeof(ctx, src, false, false); Value *copy_bytes = emit_datatype_size(ctx, datatype); - unsigned alignment = julia_alignment(src.typ); (void)emit_memcpy(ctx, dest, jl_aliasinfo_t::fromTBAA(ctx, tbaa_dst), data_pointer(ctx, src), - jl_aliasinfo_t::fromTBAA(ctx, src.tbaa), copy_bytes, alignment, alignment, isVolatile); + jl_aliasinfo_t::fromTBAA(ctx, src.tbaa), copy_bytes, 1, 1, isVolatile); return nullptr; }; if (skip) From 02cb269e20cb2eee4d690c0b46f171801b0ca027 Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Mon, 4 Sep 2023 12:55:41 -0300 Subject: [PATCH 7/7] Fixup! --- src/cgutils.cpp | 8 ++++---- src/codegen.cpp | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/cgutils.cpp b/src/cgutils.cpp index 713dc41b409bc..bbdbdde036a33 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -1019,17 +1019,17 @@ static void emit_memcpy_llvm(jl_codectx_t &ctx, Value *dst, jl_aliasinfo_t const template static void emit_memcpy(jl_codectx_t &ctx, Value *dst, jl_aliasinfo_t const &dst_ai, Value *src, - jl_aliasinfo_t const &src_ai, T1 &&sz, unsigned align_dst, unsigned aligned_src, bool is_volatile=false) + jl_aliasinfo_t const &src_ai, T1 &&sz, unsigned align_dst, unsigned align_src, bool is_volatile=false) { - emit_memcpy_llvm(ctx, dst, dst_ai, src, src_ai, sz, align_dst, aligned_src, is_volatile); + emit_memcpy_llvm(ctx, dst, dst_ai, src, src_ai, sz, align_dst, align_src, is_volatile); } template static void emit_memcpy(jl_codectx_t &ctx, Value *dst, jl_aliasinfo_t const &dst_ai, const jl_cgval_t &src, - T1 &&sz, unsigned align_dst, bool is_volatile=false) + T1 &&sz, unsigned align_dst, unsigned align_src, bool is_volatile=false) { auto src_ai = jl_aliasinfo_t::fromTBAA(ctx, src.tbaa); - emit_memcpy_llvm(ctx, dst, dst_ai, data_pointer(ctx, src), src_ai, sz, align_dst, julia_alignment(src.typ), is_volatile); + emit_memcpy_llvm(ctx, dst, dst_ai, data_pointer(ctx, src), src_ai, sz, align_dst, align_src, is_volatile); } static LoadInst *emit_nthptr_recast(jl_codectx_t &ctx, Value *v, Value *idx, MDNode *tbaa, Type *type) diff --git a/src/codegen.cpp b/src/codegen.cpp index d26980831764e..4230270cd8a6e 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -5044,7 +5044,7 @@ static void emit_vi_assignment_unboxed(jl_codectx_t &ctx, jl_varinfo_t &vi, Valu if (vi.value.V != rval_info.V) { Value *copy_bytes = ConstantInt::get(getInt32Ty(ctx.builder.getContext()), jl_datatype_size(vi.value.typ)); emit_memcpy(ctx, vi.value.V, jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack), rval_info, copy_bytes, - julia_alignment(rval_info.typ), vi.isVolatile); + julia_alignment(rval_info.typ), julia_alignment(rval_info.typ), vi.isVolatile); } } else {