From b60e7306d3a50a001110b5d0740a87720bd74fdd Mon Sep 17 00:00:00 2001 From: Yichao Yu Date: Wed, 3 Feb 2016 11:55:52 -0500 Subject: [PATCH] GC debugging tweaks * Rename JL_* envs to JULIA_* * Enable GC status printing for non-gc-debug build when there's a crash that is likely related to GC. (SegFault, type tag corruption) * Add an env to put the process to sleep before aborting so that one can attach a debugger later --- src/codegen.cpp | 3 +++ src/gc-debug.c | 40 ++++++++++++++++++++++++++++++++++------ src/gc.c | 4 ++++ src/gf.c | 1 + src/intrinsics.h | 1 + src/julia_internal.h | 11 +++++++---- src/signal-handling.c | 1 + src/signals-win.c | 1 + src/task.c | 3 +++ src/threading.c | 6 ------ 10 files changed, 55 insertions(+), 16 deletions(-) diff --git a/src/codegen.cpp b/src/codegen.cpp index 6107f1add7397..dda59ed8b42e7 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -144,6 +144,7 @@ JL_DLLEXPORT void __stack_chk_fail() { /* put your panic function or similar in here */ fprintf(stderr, "fatal error: stack corruption detected\n"); + gc_debug_critical_error(); abort(); // end with abort, since the compiler destroyed the stack upon entry to this function, there's no going back now } #endif @@ -895,6 +896,7 @@ static Function *to_function(jl_lambda_info_t *li, jl_cyclectx_t *cyclectx) (specf && verifyFunction(*specf, PrintMessageAction))) { f->dump(); if (specf) specf->dump(); + gc_debug_critical_error(); abort(); } #endif @@ -992,6 +994,7 @@ static void writeRecoveryFile(llvm::Module *mod) raw_fd_ostream OS(fname_ref, err, sys::fs::F_None); WriteBitcodeToFile(mod,OS); OS.flush(); + gc_debug_critical_error(); abort(); } #endif diff --git a/src/gc-debug.c b/src/gc-debug.c index 8db7f56f97255..274dd37dd6494 100644 --- a/src/gc-debug.c +++ b/src/gc-debug.c @@ -264,6 +264,7 @@ static void gc_verify(void) restore(); gc_verify_track(); gc_debug_print_status(); + gc_debug_critical_error(); abort(); } @@ -286,6 +287,7 @@ typedef struct { typedef struct { int sweep_mask; + int wait_for_debugger; jl_alloc_num_t pool; jl_alloc_num_t other; jl_alloc_num_t print; @@ -293,6 +295,7 @@ typedef struct { JL_DLLEXPORT jl_gc_debug_env_t jl_gc_debug_env = { GC_MARKED_NOESC, + 0, {0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0} @@ -302,7 +305,7 @@ static void gc_debug_alloc_init(jl_alloc_num_t *num, const char *name) { // Not very generic and robust but good enough for a debug option char buff[128]; - sprintf(buff, "JL_GC_ALLOC_%s", name); + sprintf(buff, "JULIA_GC_ALLOC_%s", name); char *env = getenv(buff); if (!env) return; @@ -324,10 +327,11 @@ static char *gc_stack_lo; static void gc_debug_init(void) { gc_stack_lo = (char*)gc_get_stack_ptr(); - char *env = getenv("JL_GC_NO_GENERATIONAL"); - if (env && strcmp(env, "0") != 0) { + char *env = getenv("JULIA_GC_NO_GENERATIONAL"); + if (env && strcmp(env, "0") != 0) jl_gc_debug_env.sweep_mask = GC_MARKED; - } + env = getenv("JULIA_GC_WAIT_FOR_DEBUGGER"); + jl_gc_debug_env.wait_for_debugger = env && strcmp(env, "0") != 0; gc_debug_alloc_init(&jl_gc_debug_env.pool, "POOL"); gc_debug_alloc_init(&jl_gc_debug_env.other, "OTHER"); gc_debug_alloc_init(&jl_gc_debug_env.print, "PRINT"); @@ -349,8 +353,18 @@ void gc_debug_print_status(void) uint64_t other_count = jl_gc_debug_env.other.num; jl_safe_printf("Allocations: %" PRIu64 " " "(Pool: %" PRIu64 "; Other: %" PRIu64 "); GC: %d\n", - pool_count + other_count, pool_count, other_count, - n_pause); + pool_count + other_count, pool_count, other_count, n_pause); +} + +void gc_debug_critical_error(void) +{ + gc_debug_print_status(); + if (!jl_gc_debug_env.wait_for_debugger) + return; + jl_safe_printf("Waiting for debugger to attach\n"); + while (1) { + sleep(1000); + } } static inline void gc_debug_print(void) @@ -403,6 +417,20 @@ static inline int gc_debug_check_pool(void) return 0; } +void gc_debug_print_status(void) +{ + // May not be accurate but should be helpful enough + uint64_t pool_count = gc_num.poolalloc; + uint64_t big_count = gc_num.bigalloc; + jl_safe_printf("Allocations: %" PRIu64 " " + "(Pool: %" PRIu64 "; Big: %" PRIu64 "); GC: %d\n", + pool_count + big_count, pool_count, big_count, n_pause); +} + +void gc_debug_critical_error(void) +{ +} + static inline void gc_debug_print(void) { } diff --git a/src/gc.c b/src/gc.c index c84118d9415fe..0c183b9a9d6d2 100644 --- a/src/gc.c +++ b/src/gc.c @@ -389,6 +389,7 @@ void jl_gc_signal_init(void) #endif if (jl_gc_signal_page == NULL) { jl_printf(JL_STDERR, "could not allocate GC synchronization page\n"); + gc_debug_critical_error(); abort(); } } @@ -856,6 +857,7 @@ static NOINLINE void *malloc_page(void) #endif if (mem == NULL) { jl_printf(JL_STDERR, "could not allocate pools\n"); + gc_debug_critical_error(); abort(); } if (GC_PAGE_SZ > jl_page_size) { @@ -886,6 +888,7 @@ static NOINLINE void *malloc_page(void) } if (region_i >= REGION_COUNT) { jl_printf(JL_STDERR, "increase REGION_COUNT or allocate less memory\n"); + gc_debug_critical_error(); abort(); } if (regions_lb[region_i] < i) @@ -1951,6 +1954,7 @@ static int push_root(jl_value_t *v, int d, int bits) jl_printf(JL_STDOUT, "GC error (probable corruption) :\n"); gc_debug_print_status(); jl_(vt); + gc_debug_critical_error(); abort(); } diff --git a/src/gf.c b/src/gf.c index 44977994e88b6..2c33318e88994 100644 --- a/src/gf.c +++ b/src/gf.c @@ -1384,6 +1384,7 @@ void JL_NORETURN jl_no_method_error_bare(jl_function_t *f, jl_value_t *args) jl_printf((JL_STREAM*)STDERR_FILENO, "A method error occurred before the base module was defined. Aborting...\n"); jl_static_show((JL_STREAM*)STDERR_FILENO,(jl_value_t*)f); jl_printf((JL_STREAM*)STDERR_FILENO,"\n"); jl_static_show((JL_STREAM*)STDERR_FILENO,args); jl_printf((JL_STREAM*)STDERR_FILENO,"\n"); + gc_debug_critical_error(); abort(); } // not reached diff --git a/src/intrinsics.h b/src/intrinsics.h index e2ff7c945928b..9fc75ee198ffe 100644 --- a/src/intrinsics.h +++ b/src/intrinsics.h @@ -171,6 +171,7 @@ JL_CALLABLE(jl_f_intrinsic_call) default: assert(0 && "unexpected number of arguments to an intrinsic function"); } + gc_debug_critical_error(); abort(); } diff --git a/src/julia_internal.h b/src/julia_internal.h index b8ba32de3450f..7c255a166d104 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -5,6 +5,12 @@ #include #include +#ifndef _MSC_VER +#include +#include +#else +#define sleep(x) Sleep(1000*x) +#endif #ifdef __cplusplus extern "C" { @@ -97,11 +103,8 @@ static inline void jl_gc_wb_buf(void *parent, void *bufptr) // parent isa jl_val gc_setmark_buf(bufptr, jl_astaggedvalue(parent)->gc_bits); } -#ifdef GC_DEBUG_ENV void gc_debug_print_status(void); -#else -#define gc_debug_print_status() -#endif +void gc_debug_critical_error(void); #if defined(GC_FINAL_STATS) void jl_print_gc_stats(JL_STREAM *s); #else diff --git a/src/signal-handling.c b/src/signal-handling.c index 2b79d84247b21..f8f7b3f019e92 100644 --- a/src/signal-handling.c +++ b/src/signal-handling.c @@ -59,6 +59,7 @@ static void jl_critical_error(int sig, bt_context_t context, intptr_t *bt_data, for(size_t i=0; i < n; i++) jl_gdblookup(bt_data[i]); gc_debug_print_status(); + gc_debug_critical_error(); } /////////////////////// diff --git a/src/signals-win.c b/src/signals-win.c index 859db0c5ddee9..6ef10e8697933 100644 --- a/src/signals-win.c +++ b/src/signals-win.c @@ -306,6 +306,7 @@ static DWORD WINAPI profile_bt( LPVOID lparam ) bt_size_cur++; if ((DWORD)-1 == ResumeThread(hMainThread)) { fputs("failed to resume main thread! aborting.",stderr); + gc_debug_critical_error(); abort(); } } diff --git a/src/task.c b/src/task.c index 22cc70ca323fa..6dccb83e0789a 100644 --- a/src/task.c +++ b/src/task.c @@ -209,6 +209,7 @@ static void JL_NORETURN finish_task(jl_task_t *t, jl_value_t *resultval) // For now, only thread 0 runs the task scheduler. // The others return to the thread loop jl_switchto(jl_root_task, jl_nothing); + gc_debug_critical_error(); abort(); } if (task_done_hook_func == NULL) { @@ -219,6 +220,7 @@ static void JL_NORETURN finish_task(jl_task_t *t, jl_value_t *resultval) jl_value_t *args[2] = {task_done_hook_func, (jl_value_t*)t}; jl_apply(args, 2); } + gc_debug_critical_error(); abort(); } @@ -253,6 +255,7 @@ static void NOINLINE JL_NORETURN start_task(void) } } finish_task(t, res); + gc_debug_critical_error(); abort(); } diff --git a/src/threading.c b/src/threading.c index 33f46c0d23b34..620b68132cef9 100644 --- a/src/threading.c +++ b/src/threading.c @@ -18,12 +18,6 @@ #include #include #include -#ifndef _MSC_VER -#include -#include -#else -#define sleep(x) Sleep(1000*x) -#endif #include "julia.h" #include "julia_internal.h"