Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Annotate (most) Julia reference accesses as atomic unordered #36507

Merged
merged 2 commits into from
Jul 8, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 57 additions & 25 deletions src/array.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,34 @@ extern "C" {

#define JL_ARRAY_ALIGN(jl_value, nbytes) LLT_ALIGN(jl_value, nbytes)

// this is a version of memcpy that preserves atomic memory ordering
// which makes it safe to use for objects that can contain memory references
// without risk of creating pointers out of thin air
// TODO: replace with LLVM's llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32
// aka `__llvm_memmove_element_unordered_atomic_8` (for 64 bit)
void memmove_refs(void **dstp, void *const *srcp, size_t n) JL_NOTSAFEPOINT
{
size_t i;
if (dstp < srcp || dstp > srcp + n) {
for (i = 0; i < n; i++) {
jl_atomic_store_relaxed(dstp + i, jl_atomic_load_relaxed(srcp + i));
}
}
else {
for (i = 0; i < n; i++) {
jl_atomic_store_relaxed(dstp + n - i - 1, jl_atomic_load_relaxed(srcp + n - i - 1));
}
}
}

void memmove_safe(int hasptr, char *dst, const char *src, size_t nb) JL_NOTSAFEPOINT
{
if (hasptr)
memmove_refs((void**)dst, (void**)src, nb / sizeof(void*));
else
memmove(dst, src, nb);
}

// array constructors ---------------------------------------------------------
char *jl_array_typetagdata(jl_array_t *a) JL_NOTSAFEPOINT
{
Expand Down Expand Up @@ -542,10 +570,9 @@ JL_DLLEXPORT jl_value_t *jl_ptrarrayref(jl_array_t *a JL_PROPAGATES_ROOT, size_t
{
assert(i < jl_array_len(a));
assert(a->flags.ptrarray);
jl_value_t *elt = ((jl_value_t**)a->data)[i];
if (elt == NULL) {
jl_value_t *elt = jl_atomic_load_relaxed(((jl_value_t**)a->data) + i);
if (elt == NULL)
jl_throw(jl_undefref_exception);
}
return elt;
}

Expand All @@ -569,7 +596,7 @@ JL_DLLEXPORT jl_value_t *jl_arrayref(jl_array_t *a, size_t i)
JL_DLLEXPORT int jl_array_isassigned(jl_array_t *a, size_t i)
{
if (a->flags.ptrarray) {
return ((jl_value_t**)jl_array_data(a))[i] != NULL;
return jl_atomic_load_relaxed(((jl_value_t**)jl_array_data(a)) + i) != NULL;
}
else if (a->flags.hasptr) {
jl_datatype_t *eltype = (jl_datatype_t*)jl_tparam0(jl_typeof(a));
Expand Down Expand Up @@ -600,12 +627,17 @@ JL_DLLEXPORT void jl_arrayset(jl_array_t *a JL_ROOTING_ARGUMENT, jl_value_t *rhs
if (jl_is_datatype_singleton((jl_datatype_t*)jl_typeof(rhs)))
return;
}
jl_assign_bits(&((char*)a->data)[i * a->elsize], rhs);
if (a->flags.hasptr) {
memmove_refs((void**)&((char*)a->data)[i * a->elsize], (void**)rhs, a->elsize / sizeof(void*));
}
else {
jl_assign_bits(&((char*)a->data)[i * a->elsize], rhs);
}
if (a->flags.hasptr)
jl_gc_multi_wb(jl_array_owner(a), rhs);
}
else {
((jl_value_t**)a->data)[i] = rhs;
jl_atomic_store_relaxed(((jl_value_t**)a->data) + i, rhs);
jl_gc_wb(jl_array_owner(a), rhs);
}
}
Expand All @@ -615,7 +647,7 @@ JL_DLLEXPORT void jl_arrayunset(jl_array_t *a, size_t i)
if (i >= jl_array_len(a))
jl_bounds_error_int((jl_value_t*)a, i + 1);
if (a->flags.ptrarray)
((jl_value_t**)a->data)[i] = NULL;
jl_atomic_store_relaxed(((jl_value_t**)a->data) + i, NULL);
else if (a->flags.hasptr) {
size_t elsize = a->elsize;
jl_assume(elsize >= sizeof(void*) && elsize % sizeof(void*) == 0);
Expand Down Expand Up @@ -762,7 +794,7 @@ STATIC_INLINE void jl_array_grow_at_beg(jl_array_t *a, size_t idx, size_t inc,
if (isbitsunion) newtypetagdata = typetagdata - inc;
if (idx > 0) {
// inserting new elements after 1st element
memmove(newdata, data, idx * elsz);
memmove_safe(a->flags.hasptr, newdata, data, idx * elsz);
if (isbitsunion) {
memmove(newtypetagdata, typetagdata, idx);
memset(newtypetagdata + idx, 0, inc);
Expand Down Expand Up @@ -796,11 +828,11 @@ STATIC_INLINE void jl_array_grow_at_beg(jl_array_t *a, size_t idx, size_t inc,
// We could use memcpy if resizing allocates a new buffer,
// hopefully it's not a particularly important optimization.
if (idx > 0 && newdata < data) {
memmove(newdata, data, nb1);
memmove_safe(a->flags.hasptr, newdata, data, nb1);
}
memmove(newdata + nbinc + nb1, data + nb1, n * elsz - nb1);
memmove_safe(a->flags.hasptr, newdata + nbinc + nb1, data + nb1, n * elsz - nb1);
if (idx > 0 && newdata > data) {
memmove(newdata, data, nb1);
memmove_safe(a->flags.hasptr, newdata, data, nb1);
}
a->offset = newoffset;
}
Expand All @@ -810,16 +842,16 @@ STATIC_INLINE void jl_array_grow_at_beg(jl_array_t *a, size_t idx, size_t inc,
newdata = data - oldoffsnb + a->offset * elsz;
if (isbitsunion) newtypetagdata = newdata + (a->maxsize - a->offset) * elsz + a->offset;
if (idx > 0 && newdata < data) {
memmove(newdata, data, nb1);
memmove_safe(a->flags.hasptr, newdata, data, nb1);
if (isbitsunion) {
memmove(newtypetagdata, typetagdata, idx);
memset(newtypetagdata + idx, 0, inc);
}
}
memmove(newdata + nbinc + nb1, data + nb1, n * elsz - nb1);
memmove_safe(a->flags.hasptr, newdata + nbinc + nb1, data + nb1, n * elsz - nb1);
if (isbitsunion) memmove(newtypetagdata + idx + inc, typetagdata + idx, n - idx);
if (idx > 0 && newdata > data) {
memmove(newdata, data, nb1);
memmove_safe(a->flags.hasptr, newdata, data, nb1);
if (isbitsunion) {
memmove(newtypetagdata, typetagdata, idx);
memset(newtypetagdata + idx, 0, inc);
Expand Down Expand Up @@ -891,7 +923,7 @@ STATIC_INLINE void jl_array_grow_at_end(jl_array_t *a, size_t idx,
memmove(newtypetagdata, typetagdata, idx);
memset(newtypetagdata + idx, 0, inc);
}
if (has_gap) memmove(newdata + nb1 + nbinc, newdata + nb1, n * elsz - nb1);
if (has_gap) memmove_safe(a->flags.hasptr, newdata + nb1 + nbinc, newdata + nb1, n * elsz - nb1);
}
a->data = data = newdata;
}
Expand All @@ -901,7 +933,7 @@ STATIC_INLINE void jl_array_grow_at_end(jl_array_t *a, size_t idx,
memset(typetagdata + idx, 0, inc);
}
size_t nb1 = idx * elsz;
memmove(data + nb1 + inc * elsz, data + nb1, n * elsz - nb1);
memmove_safe(a->flags.hasptr, data + nb1 + inc * elsz, data + nb1, n * elsz - nb1);
}
else {
// there was enough room for requested growth already in a->maxsize
Expand Down Expand Up @@ -1036,12 +1068,12 @@ STATIC_INLINE void jl_array_del_at_beg(jl_array_t *a, size_t idx, size_t dec,
if (elsz == 1 && !isbitsunion)
nbtotal++;
if (idx > 0) {
memmove(newdata, olddata, nb1);
memmove_safe(a->flags.hasptr, newdata, olddata, nb1);
if (isbitsunion) memmove(newtypetagdata, typetagdata, idx);
}
// Move the rest of the data if the offset changed
if (newoffs != offset) {
memmove(newdata + nb1, olddata + nb1 + nbdec, nbtotal - nb1);
memmove_safe(a->flags.hasptr, newdata + nb1, olddata + nb1 + nbdec, nbtotal - nb1);
if (isbitsunion) memmove(newtypetagdata + idx, typetagdata + idx + dec, n - idx);
}
a->data = newdata;
Expand All @@ -1063,7 +1095,7 @@ STATIC_INLINE void jl_array_del_at_end(jl_array_t *a, size_t idx, size_t dec,
int isbitsunion = jl_array_isbitsunion(a);
size_t last = idx + dec;
if (n > last) {
memmove(data + idx * elsz, data + last * elsz, (n - last) * elsz);
memmove_safe(a->flags.hasptr, data + idx * elsz, data + last * elsz, (n - last) * elsz);
if (isbitsunion) {
char *typetagdata = jl_array_typetagdata(a);
memmove(typetagdata + idx, typetagdata + last, n - last);
Expand Down Expand Up @@ -1161,14 +1193,14 @@ JL_DLLEXPORT jl_array_t *jl_array_copy(jl_array_t *ary)
}

// Copy element by element until we hit a young object, at which point
// we can continue using `memmove`.
// we can finish by using `memmove`.
static NOINLINE ssize_t jl_array_ptr_copy_forward(jl_value_t *owner,
void **src_p, void **dest_p,
ssize_t n)
{
for (ssize_t i = 0; i < n; i++) {
void *val = src_p[i];
dest_p[i] = val;
void *val = jl_atomic_load_relaxed(src_p + i);
jl_atomic_store_relaxed(dest_p + i, val);
// `val` is young or old-unmarked
if (val && !(jl_astaggedvalue(val)->bits.gc & GC_MARKED)) {
jl_gc_queue_root(owner);
Expand All @@ -1183,8 +1215,8 @@ static NOINLINE ssize_t jl_array_ptr_copy_backward(jl_value_t *owner,
ssize_t n)
{
for (ssize_t i = 0; i < n; i++) {
void *val = src_p[n - i - 1];
dest_p[n - i - 1] = val;
void *val = jl_atomic_load_relaxed(src_p + n - i - 1);
jl_atomic_store_relaxed(dest_p + n - i - 1, val);
// `val` is young or old-unmarked
if (val && !(jl_astaggedvalue(val)->bits.gc & GC_MARKED)) {
jl_gc_queue_root(owner);
Expand Down Expand Up @@ -1218,7 +1250,7 @@ JL_DLLEXPORT void jl_array_ptr_copy(jl_array_t *dest, void **dest_p,
n -= done;
}
}
memmove(dest_p, src_p, n * sizeof(void*));
memmove_refs(dest_p, src_p, n);
}

JL_DLLEXPORT void jl_array_ptr_1d_push(jl_array_t *a, jl_value_t *item)
Expand Down
8 changes: 7 additions & 1 deletion src/atomics.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@
* specified.
*/
#if defined(__GNUC__)
# define jl_fence() __atomic_thread_fence(__ATOMIC_SEQ_CST)
# define jl_fence_release() __atomic_thread_fence(__ATOMIC_RELEASE)
# define jl_signal_fence() __atomic_signal_fence(__ATOMIC_SEQ_CST)
# define jl_atomic_fetch_add_relaxed(obj, arg) \
__atomic_fetch_add(obj, arg, __ATOMIC_RELAXED)
Expand Down Expand Up @@ -73,7 +75,7 @@
// TODO: Maybe add jl_atomic_compare_exchange_weak for spin lock
# define jl_atomic_store(obj, val) \
__atomic_store_n(obj, val, __ATOMIC_SEQ_CST)
# define jl_atomic_store_relaxed(obj, val) \
# define jl_atomic_store_relaxed(obj, val) \
__atomic_store_n(obj, val, __ATOMIC_RELAXED)
# if defined(__clang__) || defined(__ICC) || defined(__INTEL_COMPILER) || \
!(defined(_CPU_X86_) || defined(_CPU_X86_64_))
Expand All @@ -96,6 +98,9 @@
# define jl_atomic_load_relaxed(obj) \
__atomic_load_n(obj, __ATOMIC_RELAXED)
#elif defined(_COMPILER_MICROSOFT_)
// TODO: these only define compiler barriers, and aren't correct outside of x86
# define jl_fence() _ReadWriteBarrier()
# define jl_fence_release() _WriteBarrier()
# define jl_signal_fence() _ReadWriteBarrier()

// add
Expand Down Expand Up @@ -266,6 +271,7 @@ static inline void jl_atomic_store_release(volatile T *obj, T2 val)
jl_signal_fence();
*obj = (T)val;
}
template<typename T, typename T2>
static inline void jl_atomic_store_relaxed(volatile T *obj, T2 val)
{
*obj = (T)val;
Expand Down
37 changes: 20 additions & 17 deletions src/ccall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,15 +87,15 @@ static Value *runtime_sym_lookup(
BasicBlock *dlsym_lookup = BasicBlock::Create(jl_LLVMContext, "dlsym");
BasicBlock *ccall_bb = BasicBlock::Create(jl_LLVMContext, "ccall");
Constant *initnul = ConstantPointerNull::get((PointerType*)T_pvoidfunc);
LoadInst *llvmf_orig = irbuilder.CreateAlignedLoad(llvmgv, sizeof(void*));
LoadInst *llvmf_orig = irbuilder.CreateAlignedLoad(T_pvoidfunc, llvmgv, sizeof(void*));
// This in principle needs a consume ordering so that load from
// this pointer sees a valid value. However, this is not supported by
// LLVM (or agreed on in the C/C++ standard FWIW) and should be
// almost impossible to happen on every platform we support since this
// ordering is enforced by the hardware and LLVM has to speculate an
// invalid load from the `cglobal` but doesn't depend on the `cglobal`
// value for this to happen.
// llvmf_orig->setAtomic(AtomicOrdering::Consume);
llvmf_orig->setAtomic(AtomicOrdering::Unordered);
irbuilder.CreateCondBr(
irbuilder.CreateICmpNE(llvmf_orig, initnul),
ccall_bb,
Expand All @@ -114,7 +114,7 @@ static Value *runtime_sym_lookup(
}
Value *llvmf = irbuilder.CreateCall(prepare_call_in(jl_builderModule(irbuilder), jldlsym_func),
{ libname, stringConstPtr(emission_context, irbuilder, f_name), libptrgv });
auto store = irbuilder.CreateAlignedStore(llvmf, llvmgv, sizeof(void*));
StoreInst *store = irbuilder.CreateAlignedStore(llvmf, llvmgv, sizeof(void*));
store->setAtomic(AtomicOrdering::Release);
irbuilder.CreateBr(ccall_bb);

Expand Down Expand Up @@ -169,7 +169,7 @@ static GlobalVariable *emit_plt_thunk(
IRBuilder<> irbuilder(b0);
Value *ptr = runtime_sym_lookup(emission_context, irbuilder, funcptype, f_lib, f_name, plt, libptrgv,
llvmgv, runtime_lib);
auto store = irbuilder.CreateAlignedStore(irbuilder.CreateBitCast(ptr, T_pvoidfunc), got, sizeof(void*));
StoreInst *store = irbuilder.CreateAlignedStore(irbuilder.CreateBitCast(ptr, T_pvoidfunc), got, sizeof(void*));
store->setAtomic(AtomicOrdering::Release);
SmallVector<Value*, 16> args;
for (Function::arg_iterator arg = plt->arg_begin(), arg_e = plt->arg_end(); arg != arg_e; ++arg)
Expand Down Expand Up @@ -234,7 +234,7 @@ static Value *emit_plt(
// consume ordering too. This is even less likely to cause issues though
// since the only thing we do to this loaded pointer is to call it
// immediately.
// got_val->setAtomic(AtomicOrdering::Consume);
got_val->setAtomic(AtomicOrdering::Unordered);
return ctx.builder.CreateBitCast(got_val, funcptype);
}

Expand Down Expand Up @@ -349,17 +349,19 @@ static Value *llvm_type_rewrite(
Value *from;
Value *to;
const DataLayout &DL = jl_data_layout;
unsigned align = std::max(DL.getPrefTypeAlignment(target_type), DL.getPrefTypeAlignment(from_type));
if (DL.getTypeAllocSize(target_type) >= DL.getTypeAllocSize(from_type)) {
to = emit_static_alloca(ctx, target_type);
cast<AllocaInst>(to)->setAlignment(Align(align));
from = emit_bitcast(ctx, to, from_type->getPointerTo());
}
else {
from = emit_static_alloca(ctx, from_type);
cast<AllocaInst>(from)->setAlignment(Align(align));
to = emit_bitcast(ctx, from, target_type->getPointerTo());
}
// XXX: deal with possible alignment issues
ctx.builder.CreateStore(v, from);
return ctx.builder.CreateLoad(to);
ctx.builder.CreateAlignedStore(v, from, align);
return ctx.builder.CreateAlignedLoad(to, align);
}

// --- argument passing and scratch space utilities ---
Expand Down Expand Up @@ -1576,9 +1578,9 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
Value *ptls_i16 = emit_bitcast(ctx, ctx.ptlsStates, T_pint16);
const int tid_offset = offsetof(jl_tls_states_t, tid);
Value *ptid = ctx.builder.CreateGEP(ptls_i16, ConstantInt::get(T_size, tid_offset / 2));
return mark_or_box_ccall_result(ctx,
tbaa_decorate(tbaa_const, ctx.builder.CreateLoad(ptid)),
retboxed, rt, unionall, static_rt);
LoadInst *tid = ctx.builder.CreateAlignedLoad(ptid, sizeof(int16_t));
tbaa_decorate(tbaa_const, tid);
return mark_or_box_ccall_result(ctx, tid, retboxed, rt, unionall, static_rt);
}
else if (is_libjulia_func(jl_get_current_task)) {
assert(lrt == T_prjlvalue);
Expand All @@ -1587,9 +1589,9 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
Value *ptls_pv = emit_bitcast(ctx, ctx.ptlsStates, T_pprjlvalue);
const int ct_offset = offsetof(jl_tls_states_t, current_task);
Value *pct = ctx.builder.CreateGEP(ptls_pv, ConstantInt::get(T_size, ct_offset / sizeof(void*)));
return mark_or_box_ccall_result(ctx,
tbaa_decorate(tbaa_const, ctx.builder.CreateLoad(pct)),
retboxed, rt, unionall, static_rt);
LoadInst *ct = ctx.builder.CreateAlignedLoad(pct, sizeof(void*));
tbaa_decorate(tbaa_const, ct);
return mark_or_box_ccall_result(ctx, ct, retboxed, rt, unionall, static_rt);
}
else if (is_libjulia_func(jl_set_next_task)) {
assert(lrt == T_void);
Expand All @@ -1608,8 +1610,7 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
ctx.builder.CreateCall(prepare_call(gcroot_flush_func));
Value *pdefer_sig = emit_defer_signal(ctx);
Value *defer_sig = ctx.builder.CreateLoad(pdefer_sig);
defer_sig = ctx.builder.CreateAdd(defer_sig,
ConstantInt::get(T_sigatomic, 1));
defer_sig = ctx.builder.CreateAdd(defer_sig, ConstantInt::get(T_sigatomic, 1));
ctx.builder.CreateStore(defer_sig, pdefer_sig);
emit_signal_fence(ctx);
return ghostValue(jl_nothing_type);
Expand Down Expand Up @@ -1671,7 +1672,9 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
idx = ctx.builder.CreateAdd(idx, ConstantInt::get(T_size, ((jl_datatype_t*)ety)->layout->first_ptr));
}
Value *slot_addr = ctx.builder.CreateInBoundsGEP(T_prjlvalue, arrayptr, idx);
Value *load = tbaa_decorate(tbaa_ptrarraybuf, ctx.builder.CreateLoad(T_prjlvalue, slot_addr));
LoadInst *load = ctx.builder.CreateAlignedLoad(T_prjlvalue, slot_addr, sizeof(void*));
load->setAtomic(AtomicOrdering::Unordered);
tbaa_decorate(tbaa_ptrarraybuf, load);
Value *res = ctx.builder.CreateZExt(ctx.builder.CreateICmpNE(load, Constant::getNullValue(T_prjlvalue)), T_int32);
JL_GC_POP();
return mark_or_box_ccall_result(ctx, res, retboxed, rt, unionall, static_rt);
Expand Down
Loading