From 40a0b138a26e9395f26b6f623ba008495d4a9b21 Mon Sep 17 00:00:00 2001 From: Dominic Cerquetti Date: Sat, 27 Aug 2016 02:31:02 -0400 Subject: [PATCH] enable system diagnostics package 1 for threading info - cleanup existing debugging stuff - gather linux-specific thread ID so we can correlate threads with their system process info in /proc, top, etc - create general purpose framework for registering more diagnostics functions - add very detailed snapshot printout of per-thread stack traces, run status, thread name, etc (these are heavy weapons for diagnosing hung threads / deadlocks) - add basic memory reporting, more coming later --- requirements.txt | 3 +- sideboard/debugger.py | 14 ----- sideboard/debugging.py | 50 +++++++++++++++++ sideboard/lib/_threads.py | 100 +++++++++++++++++++++++++++++++--- sideboard/run_debug_server.py | 4 +- 5 files changed, 145 insertions(+), 26 deletions(-) delete mode 100644 sideboard/debugger.py create mode 100644 sideboard/debugging.py diff --git a/requirements.txt b/requirements.txt index d3df306..13eadc4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,4 +16,5 @@ paver==1.2.2 wheel==0.24.0 pip==1.5.6 sh==1.09 -python-prctl==1.6.1 \ No newline at end of file +python-prctl==1.6.1 +psutil==4.3.0 \ No newline at end of file diff --git a/sideboard/debugger.py b/sideboard/debugger.py deleted file mode 100644 index 64dbebd..0000000 --- a/sideboard/debugger.py +++ /dev/null @@ -1,14 +0,0 @@ -import os - - -# when debugging, if you kill the server, occasionally there will be lockfiles leftover. -# we'll kill them here. DO NOT CALL THIS IN PRODUCTION -def debugger_helper_remove_any_lockfiles(): - path_of_this_python_script = os.path.dirname(os.path.realpath(__file__)) - session_path = path_of_this_python_script + "/../data/sessions/" - for lockfile in os.listdir(session_path): - if lockfile.endswith(".lock"): - os.remove(session_path + lockfile) - -def debugger_helpers_all_init(): - debugger_helper_remove_any_lockfiles() \ No newline at end of file diff --git a/sideboard/debugging.py b/sideboard/debugging.py new file mode 100644 index 0000000..2be4ead --- /dev/null +++ b/sideboard/debugging.py @@ -0,0 +1,50 @@ +import os + +# create a list of status functions which can inspect information of the running process +status_functions = [] + + +def gather_diagnostics_status_information(): + """ + Return textual information about current system state / diagnostics + Useful for debugging threading / db / cpu load / etc + """ + out = '' + for func in status_functions: + out += '--------- {} ---------\n{}\n\n\n'.format(func.__name__.replace('_', ' ').upper(), func()) + return out + + +def register_diagnostics_status_function(func): + status_functions.append(func) + return func + + +def _get_all_session_lock_filenames(): + path_of_this_python_script = os.path.dirname(os.path.realpath(__file__)) + session_path = path_of_this_python_script + "/../data/sessions/" + return [session_path + lockfile for lockfile in os.listdir(session_path) if lockfile.endswith(".lock")] + + +def _debugger_helper_remove_any_session_lockfiles(): + """ + When debugging, if you force kill the server, occasionally + there will be cherrypy session lockfiles leftover. + Calling this function will remove any stray lockfiles. + + DO NOT CALL THIS IN PRODUCTION. + """ + for lockfile in _get_all_session_lock_filenames(): + os.remove(lockfile) + + +def debugger_helpers_all_init(): + """ + Prepare sideboard to launch from a debugger. + This will do a few extra steps to make sure the environment is friendly. + + DO NOT CALL THIS IN PRODUCTION. + """ + _debugger_helper_remove_any_session_lockfiles() + + diff --git a/sideboard/lib/_threads.py b/sideboard/lib/_threads.py index 884f4f6..e500df5 100644 --- a/sideboard/lib/_threads.py +++ b/sideboard/lib/_threads.py @@ -1,9 +1,13 @@ from __future__ import unicode_literals import time +import platform +import psutil import heapq import prctl -import threading +import ctypes import sys +import traceback +from sideboard.debugging import register_diagnostics_status_function from warnings import warn from threading import Thread, Timer, Event, Lock @@ -12,15 +16,47 @@ from sideboard.lib import log, on_startup, on_shutdown -# inject our own code at the start of every thread's start() method which sets the thread name via prctl(). -# Python thread names will now be shown in external system tools like 'top', '/proc', etc. -def _thread_name_insert(self): - if self.name: +import threading + + +def _get_linux_thread_tid(): + """ + Get the current linux thread ID as it appears in /proc/[pid]/task/[tid] + :return: Linux thread ID if available, or -1 if any errors / not on linux + """ + try: + if not platform.system().startswith('Linux'): + raise ValueError + syscalls = { + 'i386': 224, # unistd_32.h: #define __NR_gettid 224 + 'x86_64': 186, # unistd_64.h: #define __NR_gettid 186 + } + syscall_num = syscalls[platform.machine()] + tid = ctypes.CDLL('libc.so.6').syscall(syscall_num) + except: + tid = -1 + return tid + + +def _set_current_thread_ids_from(thread): + # thread ID part 1: set externally visible thread name in /proc/[pid]/tasks/[tid]/comm to our internal name + if thread.name: # linux doesn't allow thread names > 15 chars, and we ideally want to see the end of the name. # attempt to shorten the name if we need to. - shorter_name = self.name if len(self.name) < 15 else self.name.replace('CP Server Thread', 'CPServ') + shorter_name = thread.name if len(thread.name) < 15 else thread.name.replace('CP Server Thread', 'CPServ') prctl.set_name(shorter_name) + + # thread ID part 2: capture linux-specific thread ID (TID) and store it with this thread object + # if TID can't be obtained or system call fails, tid will be -1 + thread.linux_tid = _get_linux_thread_tid() + + +# inject our own code at the start of every thread's start() method which sets the thread name via prctl(). +# Python thread names will now be shown in external system tools like 'top', '/proc', etc. +def _thread_name_insert(self): + _set_current_thread_ids_from(self) threading.Thread._bootstrap_inner_original(self) + if sys.version_info[0] == 3: threading.Thread._bootstrap_inner_original = threading.Thread._bootstrap_inner threading.Thread._bootstrap_inner = _thread_name_insert @@ -28,8 +64,9 @@ def _thread_name_insert(self): threading.Thread._bootstrap_inner_original = threading.Thread._Thread__bootstrap threading.Thread._Thread__bootstrap = _thread_name_insert -# set the name of the main thread -prctl.set_name('sideboard_main') +# set the ID's of the main thread +threading.current_thread().setName('sideboard_main') +_set_current_thread_ids_from(threading.current_thread()) class DaemonTask(object): @@ -170,3 +207,50 @@ def defer(self, func, *args, **kwargs): def delayed(self, delay, func, *args, **kwargs): self.q.put([func, args, kwargs], delay=delay) + + +def _get_thread_current_stacktrace(thread_stack, thread): + out = [] + linux_tid = getattr(thread, 'linux_tid', -1) + status = '[unknown]' + if linux_tid != -1: + status = psutil.Process(linux_tid).status() + out.append('\n--------------------------------------------------------------------------') + out.append('# Thread name: "%s"\n# Python thread.ident: %d\n# Linux Thread PID (TID): %d\n# Run Status: %s' + % (thread.name, thread.ident, linux_tid, status) ) + for filename, lineno, name, line in traceback.extract_stack(thread_stack): + out.append('File: "%s", line %d, in %s' % (filename, lineno, name)) + if line: + out.append(' %s' % (line.strip())) + return out + + +@register_diagnostics_status_function +def threading_information(): + out = [] + threads_by_id = dict([(thread.ident, thread) for thread in threading.enumerate()]) + for thread_id, thread_stack in sys._current_frames().items(): + thread = threads_by_id.get(thread_id, '') + out += _get_thread_current_stacktrace(thread_stack, thread) + return '\n'.join(out) + + +def _to_megabytes(bytes): + return str(int(bytes / 0x100000)) + 'MB' + +@register_diagnostics_status_function +def general_system_info(): + """ + Print general system info + TODO: + - print memory nicer, convert mem to megabytes + - disk partitions usage, + - # of open file handles + - # free inode count + - # of cherrypy session files + - # of cherrypy session locks (should be low) + """ + out = [] + out += ['Mem: ' + repr(psutil.virtual_memory())] + out += ['Swap: ' + repr(psutil.swap_memory())] + return '\n'.join(out) diff --git a/sideboard/run_debug_server.py b/sideboard/run_debug_server.py index 9712653..9f80019 100644 --- a/sideboard/run_debug_server.py +++ b/sideboard/run_debug_server.py @@ -1,10 +1,8 @@ from __future__ import unicode_literals -from debugger import debugger_helpers_all_init +from sideboard.debugging import debugger_helpers_all_init import cherrypy -import sideboard.server - if __name__ == '__main__': debugger_helpers_all_init()