Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add src alignment arg to emit_memcpy #51152

Merged
merged 9 commits into from
Sep 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions src/ccall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -550,14 +550,16 @@ static Value *julia_to_native(
// pass the address of an alloca'd thing, not a box
// since those are immutable.
Value *slot = emit_static_alloca(ctx, to);
unsigned align = julia_alignment(jlto);
cast<AllocaInst>(slot)->setAlignment(Align(align));
Comment on lines 552 to +554
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should probably make this an argument, since it is pretty much required now by AllocaInst (it will attempt a bad guess if you don't provide it https://llvm.org/doxygen/Instructions_8cpp.html#a35edfaf69cb59d25d92b1e77f0c27530)

setName(ctx.emission_context, slot, "native_convert_buffer");
if (!jvinfo.ispointer()) {
jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, jvinfo.tbaa);
ai.decorateInst(ctx.builder.CreateStore(emit_unbox(ctx, to, jvinfo, jlto), slot));
}
else {
jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, jvinfo.tbaa);
emit_memcpy(ctx, slot, ai, jvinfo, jl_datatype_size(jlto), julia_alignment(jlto));
emit_memcpy(ctx, slot, ai, jvinfo, jl_datatype_size(jlto), align, align);
}
return slot;
}
Expand Down Expand Up @@ -1826,7 +1828,7 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
emit_inttoptr(ctx,
emit_unbox(ctx, ctx.types().T_size, src, (jl_value_t*)jl_voidpointer_type),
getInt8PtrTy(ctx.builder.getContext())),
MaybeAlign(0),
MaybeAlign(1),
emit_unbox(ctx, ctx.types().T_size, n, (jl_value_t*)jl_ulong_type),
false);
JL_GC_POP();
Expand Down Expand Up @@ -2171,7 +2173,7 @@ jl_cgval_t function_sig_t::emit_a_ccall(
slot->setAlignment(Align(boxalign));
ctx.builder.CreateAlignedStore(result, slot, Align(boxalign));
jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
emit_memcpy(ctx, strct, ai, slot, ai, rtsz, boxalign);
emit_memcpy(ctx, strct, ai, slot, ai, rtsz, boxalign, boxalign);
}
else {
init_bits_value(ctx, strct, result, tbaa, boxalign);
Expand Down
39 changes: 21 additions & 18 deletions src/cgutils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -460,6 +460,7 @@ static unsigned julia_alignment(jl_value_t *jt)
// and this is the guarantee we have for the GC bits
return 16;
}

assert(jl_is_datatype(jt) && jl_struct_try_layout((jl_datatype_t*)jt));
unsigned alignment = jl_datatype_align(jt);
if (alignment > JL_HEAP_ALIGNMENT)
Expand Down Expand Up @@ -934,11 +935,11 @@ static Value *data_pointer(jl_codectx_t &ctx, const jl_cgval_t &x)
}

static void emit_memcpy_llvm(jl_codectx_t &ctx, Value *dst, jl_aliasinfo_t const &dst_ai, Value *src,
jl_aliasinfo_t const &src_ai, uint64_t sz, unsigned align, bool is_volatile)
jl_aliasinfo_t const &src_ai, uint64_t sz, unsigned align_dst, unsigned align_src, bool is_volatile)
{
if (sz == 0)
return;
assert(align && "align must be specified");
assert(align_dst && "align must be specified");
// If the types are small and simple, use load and store directly.
// Going through memcpy can cause LLVM (e.g. SROA) to create bitcasts between float and int
// that interferes with other optimizations.
Expand Down Expand Up @@ -979,8 +980,8 @@ static void emit_memcpy_llvm(jl_codectx_t &ctx, Value *dst, jl_aliasinfo_t const
setName(ctx.emission_context, src, "memcpy_refined_src");
if (isa<Instruction>(dst) && !dst->hasName())
setName(ctx.emission_context, dst, "memcpy_refined_dst");
auto val = src_ai.decorateInst(ctx.builder.CreateAlignedLoad(directel, src, Align(align), is_volatile));
dst_ai.decorateInst(ctx.builder.CreateAlignedStore(val, dst, Align(align), is_volatile));
auto val = src_ai.decorateInst(ctx.builder.CreateAlignedLoad(directel, src, MaybeAlign(align_src), is_volatile));
dst_ai.decorateInst(ctx.builder.CreateAlignedStore(val, dst, Align(align_dst), is_volatile));
++SkippedMemcpys;
return;
}
Expand All @@ -998,37 +999,37 @@ static void emit_memcpy_llvm(jl_codectx_t &ctx, Value *dst, jl_aliasinfo_t const
// above problem won't be as serious.

auto merged_ai = dst_ai.merge(src_ai);
ctx.builder.CreateMemCpy(dst, MaybeAlign(align), src, MaybeAlign(0), sz, is_volatile,
ctx.builder.CreateMemCpy(dst, Align(align_dst), src, Align(align_src), sz, is_volatile,
merged_ai.tbaa, merged_ai.tbaa_struct, merged_ai.scope, merged_ai.noalias);
}

static void emit_memcpy_llvm(jl_codectx_t &ctx, Value *dst, jl_aliasinfo_t const &dst_ai, Value *src,
jl_aliasinfo_t const &src_ai, Value *sz, unsigned align, bool is_volatile)
jl_aliasinfo_t const &src_ai, Value *sz, unsigned align_dst, unsigned align_src, bool is_volatile)
{
if (auto const_sz = dyn_cast<ConstantInt>(sz)) {
emit_memcpy_llvm(ctx, dst, dst_ai, src, src_ai, const_sz->getZExtValue(), align, is_volatile);
emit_memcpy_llvm(ctx, dst, dst_ai, src, src_ai, const_sz->getZExtValue(), align_dst, align_src, is_volatile);
return;
}
++EmittedMemcpys;

auto merged_ai = dst_ai.merge(src_ai);
ctx.builder.CreateMemCpy(dst, MaybeAlign(align), src, MaybeAlign(0), sz, is_volatile,
ctx.builder.CreateMemCpy(dst, MaybeAlign(align_dst), src, MaybeAlign(align_src), sz, is_volatile,
merged_ai.tbaa, merged_ai.tbaa_struct, merged_ai.scope, merged_ai.noalias);
}

template<typename T1>
static void emit_memcpy(jl_codectx_t &ctx, Value *dst, jl_aliasinfo_t const &dst_ai, Value *src,
jl_aliasinfo_t const &src_ai, T1 &&sz, unsigned align, bool is_volatile=false)
jl_aliasinfo_t const &src_ai, T1 &&sz, unsigned align_dst, unsigned align_src, bool is_volatile=false)
{
emit_memcpy_llvm(ctx, dst, dst_ai, src, src_ai, sz, align, is_volatile);
emit_memcpy_llvm(ctx, dst, dst_ai, src, src_ai, sz, align_dst, align_src, is_volatile);
}

template<typename T1>
static void emit_memcpy(jl_codectx_t &ctx, Value *dst, jl_aliasinfo_t const &dst_ai, const jl_cgval_t &src,
T1 &&sz, unsigned align, bool is_volatile=false)
T1 &&sz, unsigned align_dst, unsigned align_src, bool is_volatile=false)
{
auto src_ai = jl_aliasinfo_t::fromTBAA(ctx, src.tbaa);
emit_memcpy_llvm(ctx, dst, dst_ai, data_pointer(ctx, src), src_ai, sz, align, is_volatile);
emit_memcpy_llvm(ctx, dst, dst_ai, data_pointer(ctx, src), src_ai, sz, align_dst, align_src, is_volatile);
}

static LoadInst *emit_nthptr_recast(jl_codectx_t &ctx, Value *v, Value *idx, MDNode *tbaa, Type *type)
Expand Down Expand Up @@ -1884,7 +1885,7 @@ static jl_cgval_t typed_load(jl_codectx_t &ctx, Value *ptr, Value *idx_0based, j
else if (!alignment)
alignment = julia_alignment(jltype);
if (intcast && Order == AtomicOrdering::NotAtomic) {
emit_memcpy(ctx, intcast, jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack), data, jl_aliasinfo_t::fromTBAA(ctx, tbaa), nb, alignment);
emit_memcpy(ctx, intcast, jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack), data, jl_aliasinfo_t::fromTBAA(ctx, tbaa), nb, alignment, intcast->getAlign().value());
}
else {
LoadInst *load = ctx.builder.CreateAlignedLoad(elty, data, Align(alignment), false);
Expand Down Expand Up @@ -2481,7 +2482,7 @@ static jl_cgval_t emit_unionload(jl_codectx_t &ctx, Value *addr, Value *ptindex,
if (al > 1)
lv->setAlignment(Align(al));
jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
emit_memcpy(ctx, lv, ai, addr, ai, fsz, al);
emit_memcpy(ctx, lv, ai, addr, ai, fsz, al, al);
addr = lv;
}
return mark_julia_slot(fsz > 0 ? addr : nullptr, jfty, tindex, tbaa);
Expand Down Expand Up @@ -3110,7 +3111,7 @@ static void init_bits_cgval(jl_codectx_t &ctx, Value *newv, const jl_cgval_t& v,
{
// newv should already be tagged
if (v.ispointer()) {
emit_memcpy(ctx, newv, jl_aliasinfo_t::fromTBAA(ctx, tbaa), v, jl_datatype_size(v.typ), sizeof(void*));
emit_memcpy(ctx, newv, jl_aliasinfo_t::fromTBAA(ctx, tbaa), v, jl_datatype_size(v.typ), sizeof(void*), julia_alignment(v.typ));
}
else {
init_bits_value(ctx, newv, v.V, tbaa);
Expand Down Expand Up @@ -3315,6 +3316,7 @@ static Value *compute_tindex_unboxed(jl_codectx_t &ctx, const jl_cgval_t &val, j
return compute_box_tindex(ctx, typof, val.typ, typ);
}


static void union_alloca_type(jl_uniontype_t *ut,
bool &allunbox, size_t &nbytes, size_t &align, size_t &min_align)
{
Expand Down Expand Up @@ -3579,7 +3581,7 @@ static void emit_unionmove(jl_codectx_t &ctx, Value *dest, MDNode *tbaa_dst, con
// if (skip) src_ptr = ctx.builder.CreateSelect(skip, dest, src_ptr);
auto f = [&] {
(void)emit_memcpy(ctx, dest, jl_aliasinfo_t::fromTBAA(ctx, tbaa_dst), src_ptr,
jl_aliasinfo_t::fromTBAA(ctx, src.tbaa), nb, alignment, isVolatile);
jl_aliasinfo_t::fromTBAA(ctx, src.tbaa), nb, alignment, alignment, isVolatile);
return nullptr;
};
if (skip)
Expand Down Expand Up @@ -3616,7 +3618,7 @@ static void emit_unionmove(jl_codectx_t &ctx, Value *dest, MDNode *tbaa_dst, con
return;
} else {
emit_memcpy(ctx, dest, jl_aliasinfo_t::fromTBAA(ctx, tbaa_dst), src_ptr,
jl_aliasinfo_t::fromTBAA(ctx, src.tbaa), nb, alignment, isVolatile);
jl_aliasinfo_t::fromTBAA(ctx, src.tbaa), nb, alignment, alignment, isVolatile);
}
}
ctx.builder.CreateBr(postBB);
Expand All @@ -3641,7 +3643,8 @@ static void emit_unionmove(jl_codectx_t &ctx, Value *dest, MDNode *tbaa_dst, con
auto f = [&] {
Value *datatype = emit_typeof(ctx, src, false, false);
Value *copy_bytes = emit_datatype_size(ctx, datatype);
emit_memcpy(ctx, dest, jl_aliasinfo_t::fromTBAA(ctx, tbaa_dst), src, copy_bytes, /*TODO: min-align*/1, isVolatile);
(void)emit_memcpy(ctx, dest, jl_aliasinfo_t::fromTBAA(ctx, tbaa_dst), data_pointer(ctx, src),
jl_aliasinfo_t::fromTBAA(ctx, src.tbaa), copy_bytes, 1, 1, isVolatile);
return nullptr;
};
if (skip)
Expand Down
14 changes: 7 additions & 7 deletions src/codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4930,7 +4930,7 @@ static jl_cgval_t emit_varinfo(jl_codectx_t &ctx, jl_varinfo_t &vi, jl_sym_t *va
else {
const DataLayout &DL = jl_Module->getDataLayout();
uint64_t sz = DL.getTypeStoreSize(T);
emit_memcpy(ctx, ssaslot, jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack), vi.value, sz, ssaslot->getAlign().value());
emit_memcpy(ctx, ssaslot, jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack), vi.value, sz, ssaslot->getAlign().value(), varslot->getAlign().value());
}
Value *tindex = NULL;
if (vi.pTIndex)
Expand Down Expand Up @@ -5039,7 +5039,7 @@ static void emit_vi_assignment_unboxed(jl_codectx_t &ctx, jl_varinfo_t &vi, Valu
if (vi.value.V != rval_info.V) {
Value *copy_bytes = ConstantInt::get(getInt32Ty(ctx.builder.getContext()), jl_datatype_size(vi.value.typ));
emit_memcpy(ctx, vi.value.V, jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack), rval_info, copy_bytes,
julia_alignment(rval_info.typ), vi.isVolatile);
julia_alignment(rval_info.typ), julia_alignment(rval_info.typ), vi.isVolatile);
}
}
else {
Expand Down Expand Up @@ -5087,7 +5087,7 @@ static void emit_phinode_assign(jl_codectx_t &ctx, ssize_t idx, jl_value_t *r)
Value *isboxed = ctx.builder.CreateICmpNE(
ctx.builder.CreateAnd(Tindex_phi, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80)),
ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0));
ctx.builder.CreateMemCpy(phi, MaybeAlign(min_align), dest, MaybeAlign(0), nbytes, false);
ctx.builder.CreateMemCpy(phi, MaybeAlign(min_align), dest, dest->getAlign(), nbytes, false);
ctx.builder.CreateLifetimeEnd(dest);
Value *ptr = ctx.builder.CreateSelect(isboxed,
maybe_bitcast(ctx, decay_derived(ctx, ptr_phi), getInt8PtrTy(ctx.builder.getContext())),
Expand Down Expand Up @@ -5127,8 +5127,8 @@ static void emit_phinode_assign(jl_codectx_t &ctx, ssize_t idx, jl_value_t *r)
// here it's moved into phi in the successor (from dest)
dest = emit_static_alloca(ctx, vtype);
Value *phi = emit_static_alloca(ctx, vtype);
ctx.builder.CreateMemCpy(phi, MaybeAlign(julia_alignment(phiType)),
dest, MaybeAlign(0),
ctx.builder.CreateMemCpy(phi, Align(julia_alignment(phiType)),
dest, dest->getAlign(),
jl_datatype_size(phiType), false);
ctx.builder.CreateLifetimeEnd(dest);
slot = mark_julia_slot(phi, phiType, NULL, ctx.tbaa().tbaa_stack);
Expand Down Expand Up @@ -6064,7 +6064,7 @@ static void emit_cfunc_invalidate(
ctx.builder.CreateStore(gf_ret, root1);
}
emit_memcpy(ctx, &*gf_thunk->arg_begin(), jl_aliasinfo_t::fromTBAA(ctx, nullptr), gf_ret,
jl_aliasinfo_t::fromTBAA(ctx, nullptr), jl_datatype_size(rettype), julia_alignment(rettype));
jl_aliasinfo_t::fromTBAA(ctx, nullptr), jl_datatype_size(rettype), julia_alignment(rettype), julia_alignment(rettype));
ctx.builder.CreateRetVoid();
break;
}
Expand Down Expand Up @@ -8420,7 +8420,7 @@ static jl_llvm_functions_t
if (returninfo.cc == jl_returninfo_t::SRet) {
assert(jl_is_concrete_type(jlrettype));
emit_memcpy(ctx, sret, jl_aliasinfo_t::fromTBAA(ctx, nullptr), retvalinfo,
jl_datatype_size(jlrettype), julia_alignment(jlrettype));
jl_datatype_size(jlrettype), julia_alignment(jlrettype), julia_alignment(jlrettype));
}
else { // must be jl_returninfo_t::Union
emit_unionmove(ctx, sret, nullptr, retvalinfo, /*skip*/isboxed_union);
Expand Down
6 changes: 3 additions & 3 deletions src/intrinsics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -469,7 +469,7 @@ static void emit_unbox_store(jl_codectx_t &ctx, const jl_cgval_t &x, Value *dest
}

Value *src = data_pointer(ctx, x);
emit_memcpy(ctx, dest, jl_aliasinfo_t::fromTBAA(ctx, tbaa_dest), src, jl_aliasinfo_t::fromTBAA(ctx, x.tbaa), jl_datatype_size(x.typ), alignment, isVolatile);
emit_memcpy(ctx, dest, jl_aliasinfo_t::fromTBAA(ctx, tbaa_dest), src, jl_aliasinfo_t::fromTBAA(ctx, x.tbaa), jl_datatype_size(x.typ), alignment, alignment, isVolatile);
}

static jl_datatype_t *staticeval_bitstype(const jl_cgval_t &targ)
Expand Down Expand Up @@ -707,7 +707,7 @@ static jl_cgval_t emit_pointerref(jl_codectx_t &ctx, jl_cgval_t *argv)
thePtr = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), emit_bitcast(ctx, thePtr, getInt8PtrTy(ctx.builder.getContext())), im1);
setName(ctx.emission_context, thePtr, "pointerref_src");
MDNode *tbaa = best_tbaa(ctx.tbaa(), ety);
emit_memcpy(ctx, strct, jl_aliasinfo_t::fromTBAA(ctx, tbaa), thePtr, jl_aliasinfo_t::fromTBAA(ctx, nullptr), size, 1);
emit_memcpy(ctx, strct, jl_aliasinfo_t::fromTBAA(ctx, tbaa), thePtr, jl_aliasinfo_t::fromTBAA(ctx, nullptr), size, sizeof(jl_value_t*), align_nb);
return mark_julia_type(ctx, strct, true, ety);
}
else {
Expand Down Expand Up @@ -783,7 +783,7 @@ static jl_cgval_t emit_pointerset(jl_codectx_t &ctx, jl_cgval_t *argv)
setName(ctx.emission_context, im1, "pointerset_offset");
auto gep = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), thePtr, im1);
setName(ctx.emission_context, gep, "pointerset_ptr");
emit_memcpy(ctx, gep, jl_aliasinfo_t::fromTBAA(ctx, nullptr), x, size, align_nb);
emit_memcpy(ctx, gep, jl_aliasinfo_t::fromTBAA(ctx, nullptr), x, size, align_nb, julia_alignment(ety));
}
else {
bool isboxed;
Expand Down