diff --git a/Cargo.lock b/Cargo.lock index cc6114ca..2a426bdf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -451,14 +451,6 @@ dependencies = [ "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", ] -[[package]] -name = "memoffset" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", -] - [[package]] name = "miniz-sys" version = "0.1.11" @@ -620,7 +612,6 @@ dependencies = [ "libc 0.2.50 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", "memmap 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", - "memoffset 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "proc-maps 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", "regex 1.1.2 (registry+https://github.com/rust-lang/crates.io-index)", "remoteprocess 0.1.0", @@ -1106,7 +1097,6 @@ dependencies = [ "checksum mach_o_sys 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3e854583a83f20cf329bb9283366335387f7db59d640d1412167e05fedb98826" "checksum memchr 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "2efc7bc57c883d4a4d6e3246905283d8dae951bb3bd32f49d6ef297f546e1c39" "checksum memmap 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "6585fd95e7bb50d6cc31e20d4cf9afb4e2ba16c5846fc76793f11218da9c475b" -"checksum memoffset 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7efacc914ca612fc1022f27b7dc51585e1a9f94c08fd5d322cfd741399260ce0" "checksum miniz-sys 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "0300eafb20369952951699b68243ab4334f4b10a88f411c221d444b36c40e649" "checksum miniz_oxide 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "c468f2369f07d651a5d0bb2c9079f8488a66d5466efe42d0c5c6466edcb7f71e" "checksum miniz_oxide_c_api 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b7fe927a42e3807ef71defb191dc87d4e24479b221e67015fe38ae2b7b447bab" diff --git a/Cargo.toml b/Cargo.toml index 1220b9e4..281d5abd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,7 +20,6 @@ goblin = "0.0.21" lazy_static = "1.1.0" libc = "0.2.34" log = "0.4" -memoffset = "0.3" regex = "1" tempdir = "0.3" tempfile = "3.0.3" diff --git a/generate_bindings.py b/generate_bindings.py index af5debbe..f4980d3d 100644 --- a/generate_bindings.py +++ b/generate_bindings.py @@ -9,6 +9,7 @@ import argparse import os import sys +import tempfile def build_python(cpython_path, version): @@ -35,6 +36,59 @@ def build_python(cpython_path, version): return os.system(f"{pip} install setuptools_rust wheel") +def calculate_pyruntime_offsets(cpython_path, version, configure=False): + ret = os.system(f""" + cd {cpython_path} + git checkout {version} + + # need to run configure on the current branch to generate pyconfig.h sometimes + {("./configure prefix=" + os.path.join(cpython_path, version)) if configure else ""} + """) + if ret: + return ret + + # simple little c program to get the offsets we need from the pyruntime struct + # (using rust bindgen here is more complicated than necessary) + program = r""" + #include + #include + #define Py_BUILD_CORE 1 + #include "Include/Python.h" + #include "Include/internal/pystate.h" + + int main(int argc, const char * argv[]) { + size_t interp_head = offsetof(_PyRuntimeState, interpreters.head); + printf("pub static INTERP_HEAD_OFFSET: usize = %i;\n", interp_head); + + size_t tstate_current = offsetof(_PyRuntimeState, gilstate.tstate_current); + printf("pub static TSTATE_CURRENT: usize = %i;\n", tstate_current); + } + """ + + if not os.path.isfile(os.path.join(cpython_path, "Include", "internal", "pystate.h")): + if os.path.isfile(os.path.join(cpython_path, "Include", "internal", "pycore_pystate.h")): + program = program.replace("pystate.h", "pycore_pystate.h") + else: + print("failed to find Include/internal/pystate.h in cpython directory =(") + return + + with tempfile.TemporaryDirectory() as path: + source_filename = os.path.join(path, "pyruntime_offsets.c") + exe = os.path.join(path, "pyruntime_offsets") + + with open(source_filename, "w") as o: + o.write(program) + ret = os.system(f"""gcc {source_filename} -I {cpython_path} -I {cpython_path}/include -o {exe}""") + if ret: + print("Failed to compile""") + return ret + + ret = os.system(exe) + if ret: + print("Failed to run pyruntime file") + return ret + + def extract_bindings(cpython_path, version, configure=False): print("Generating bindings for python %s from repo at %s" % (version, cpython_path)) ret = os.system(f""" @@ -46,12 +100,10 @@ def extract_bindings(cpython_path, version, configure=False): cat Include/Python.h > bindgen_input.h cat Include/frameobject.h >> bindgen_input.h - cat Include/internal/pystate.h >> bindgen_input.h bindgen bindgen_input.h -o bindgen_output.rs \ --with-derive-default \ --no-layout-tests --no-doc-comments \ - --whitelist-type _PyRuntimeState \ --whitelist-type PyInterpreterState \ --whitelist-type PyFrameObject \ --whitelist-type PyThreadState \ @@ -62,7 +114,7 @@ def extract_bindings(cpython_path, version, configure=False): --whitelist-type PyUnicodeObject \ --whitelist-type PyCompactUnicodeObject \ --whitelist-type PyStringObject \ - -- -I . -I ./Include -DPy_BUILD_CORE + -- -I . -I ./Include """) if ret: return ret @@ -77,7 +129,7 @@ def extract_bindings(cpython_path, version, configure=False): o.write("#![allow(clippy::useless_transmute)]\n") o.write("#![allow(clippy::default_trait_access)]\n") o.write("#![allow(clippy::cast_lossless)]\n") - o.write("#![allow(clippy::trivially_copy_pass_by_ref)]\n") + o.write("#![allow(clippy::trivially_copy_pass_by_ref)]\n\n") o.write(open(os.path.join(cpython_path, "bindgen_output.rs")).read()) @@ -91,6 +143,9 @@ def extract_bindings(cpython_path, version, configure=False): parser.add_argument("--configure", help="Run configure script prior to generating bindings", action="store_true") + parser.add_argument("--pyruntime", + help="generate offsets for pyruntime", + action="store_true") parser.add_argument("--build", help="Build python for this version", action="store_true") @@ -120,6 +175,9 @@ def extract_bindings(cpython_path, version, configure=False): # todo: this probably shoudl be a separate script if build_python(args.cpython, version): print("Failed to build python") + elif args.pyruntime: + calculate_pyruntime_offsets(args.cpython, version, configure=args.configure) + else: if extract_bindings(args.cpython, version, configure=args.configure): print("Failed to generate bindings") diff --git a/src/main.rs b/src/main.rs index 363ba299..8e806dbc 100644 --- a/src/main.rs +++ b/src/main.rs @@ -17,8 +17,6 @@ extern crate libc; #[macro_use] extern crate log; extern crate memmap; -#[macro_use] -extern crate memoffset; extern crate proc_maps; extern crate benfred_read_process_memory as read_process_memory; extern crate regex; @@ -42,6 +40,7 @@ mod stack_trace; mod console_viewer; mod flamegraph; mod utils; +mod version; use std::io::Read; use std::sync::atomic::{AtomicBool, Ordering}; diff --git a/src/python_bindings/mod.rs b/src/python_bindings/mod.rs index 30ba6baa..f0fc53c4 100644 --- a/src/python_bindings/mod.rs +++ b/src/python_bindings/mod.rs @@ -3,3 +3,76 @@ pub mod v3_3_7; pub mod v3_5_5; pub mod v3_6_6; pub mod v3_7_0; + +// currently the PyRuntime struct used from Python 3.7 on really can't be +// exposed in a cross platform way using bindgen. PyRuntime has several mutex's +// as member variables, and these have different sizes depending on the operating +// system and system architecture. +// Instead we will define some constants here that define valid offsets for the +// member variables we care about here +// (note 'generate_bindings.py' has code to figure out these offsets) +pub mod pyruntime { + use version::Version; + + // There aren't any OS specific members of PyRuntime before pyinterpreters.head, + // so these offsets should be valid for all OS'es + #[cfg(target_pointer_width = "32")] + pub static INTERP_HEAD_OFFSET: usize = 16; + + #[cfg(target_pointer_width = "64")] + pub static INTERP_HEAD_OFFSET: usize = 24; + + // getting gilstate.tstate_current is different for all OS + // and is also different for each python version, and even + // between v3.8.0a1 and v3.8.0a2 =( + #[cfg(target_os="macos")] + pub fn get_tstate_current_offset(version: &Version) -> Option { + match version { + Version{major: 3, minor: 7, ..} => Some(1440), + Version{major: 3, minor: 8, patch: 0, ..} => { + match version.release_flags.as_ref() { + "a1" => Some(1432), + "a2" => Some(888), + _ => None + } + }, + _ => None + } + } + + #[cfg(all(target_os="linux", target_pointer_width = "32"))] + pub fn get_tstate_current_offset(version: &Version) -> Option { + match version { + Version{major: 3, minor: 7, ..} => Some(796), + Version{major: 3, minor: 8, patch: 0, ..} => { + match version.release_flags.as_ref() { + "a1" => Some(792), + "a2" => Some(512), + _ => None + } + }, + _ => None + } + } + + #[cfg(all(target_os="linux", target_pointer_width = "64"))] + pub fn get_tstate_current_offset(version: &Version) -> Option { + match version { + Version{major: 3, minor: 7, ..} => Some(1392), + Version{major: 3, minor: 8, patch: 0, ..} => { + match version.release_flags.as_ref() { + "a1" => Some(1384), + "a2" => Some(840), + _ => None + } + }, + _ => None + } + } + + #[cfg(windows)] + pub fn get_tstate_current_offset(version: &Version) -> Option { + // TODO: compute offsets for windows + None + } +} diff --git a/src/python_bindings/v3_7_0.rs b/src/python_bindings/v3_7_0.rs index 3d18713c..fd48a0c0 100644 --- a/src/python_bindings/v3_7_0.rs +++ b/src/python_bindings/v3_7_0.rs @@ -7,6 +7,7 @@ #![allow(clippy::default_trait_access)] #![allow(clippy::cast_lossless)] #![allow(clippy::trivially_copy_pass_by_ref)] + /* automatically generated by rust-bindgen */ #[repr(C)] @@ -93,31 +94,6 @@ pub type __darwin_size_t = ::std::os::raw::c_ulong; pub type __darwin_wchar_t = ::std::os::raw::c_int; pub type __darwin_ssize_t = ::std::os::raw::c_long; pub type __darwin_off_t = __int64_t; -#[repr(C)] -#[derive(Copy, Clone)] -pub struct _opaque_pthread_cond_t { - pub __sig: ::std::os::raw::c_long, - pub __opaque: [::std::os::raw::c_char; 40usize], -} -impl Default for _opaque_pthread_cond_t { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -#[repr(C)] -#[derive(Copy, Clone)] -pub struct _opaque_pthread_mutex_t { - pub __sig: ::std::os::raw::c_long, - pub __opaque: [::std::os::raw::c_char; 56usize], -} -impl Default for _opaque_pthread_mutex_t { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -pub type __darwin_pthread_cond_t = _opaque_pthread_cond_t; -pub type __darwin_pthread_key_t = ::std::os::raw::c_ulong; -pub type __darwin_pthread_mutex_t = _opaque_pthread_mutex_t; pub type fpos_t = __darwin_off_t; #[repr(C)] #[derive(Debug, Copy, Clone)] @@ -189,16 +165,6 @@ pub type wchar_t = __darwin_wchar_t; pub type Py_ssize_t = isize; pub type Py_hash_t = Py_ssize_t; #[repr(C)] -#[derive(Debug, Default, Copy, Clone)] -pub struct _Py_atomic_address { - pub _value: usize, -} -#[repr(C)] -#[derive(Debug, Default, Copy, Clone)] -pub struct _Py_atomic_int { - pub _value: ::std::os::raw::c_int, -} -#[repr(C)] #[derive(Debug, Copy, Clone)] pub struct _object { pub ob_refcnt: Py_ssize_t, @@ -492,31 +458,6 @@ impl Default for _typeobject { } } #[repr(C)] -#[derive(Copy, Clone)] -pub union _gc_head { - pub gc: _gc_head__bindgen_ty_1, - pub dummy: f64, - _bindgen_union_align: [u64; 3usize], -} -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct _gc_head__bindgen_ty_1 { - pub gc_next: *mut _gc_head, - pub gc_prev: *mut _gc_head, - pub gc_refs: Py_ssize_t, -} -impl Default for _gc_head__bindgen_ty_1 { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -impl Default for _gc_head { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -pub type PyGC_Head = _gc_head; -#[repr(C)] #[derive(Debug, Copy, Clone)] pub struct PyBytesObject { pub ob_base: PyVarObject, @@ -695,16 +636,6 @@ impl Default for PyMethodDef { } } pub type PyThread_type_lock = *mut ::std::os::raw::c_void; -pub type Py_tss_t = _Py_tss_t; -pub type pthread_cond_t = __darwin_pthread_cond_t; -pub type pthread_key_t = __darwin_pthread_key_t; -pub type pthread_mutex_t = __darwin_pthread_mutex_t; -#[repr(C)] -#[derive(Debug, Default, Copy, Clone)] -pub struct _Py_tss_t { - pub _is_initialized: ::std::os::raw::c_int, - pub _key: pthread_key_t, -} pub type _PyFrameEvalFunction = ::std::option::Option< unsafe extern "C" fn(arg1: *mut _frame, arg2: ::std::os::raw::c_int) -> *mut PyObject, >; @@ -879,8 +810,6 @@ impl Default for _ts { } } pub type PyThreadState = _ts; -pub type PyThreadFrameGetter = - ::std::option::Option *mut _frame>; pub type getter = ::std::option::Option< unsafe extern "C" fn(arg1: *mut PyObject, arg2: *mut ::std::os::raw::c_void) -> *mut PyObject, >; @@ -974,165 +903,3 @@ impl Default for _frame { } } pub type PyFrameObject = _frame; -#[repr(C)] -#[derive(Copy, Clone)] -pub struct gc_generation { - pub head: PyGC_Head, - pub threshold: ::std::os::raw::c_int, - pub count: ::std::os::raw::c_int, -} -impl Default for gc_generation { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -#[repr(C)] -#[derive(Debug, Default, Copy, Clone)] -pub struct gc_generation_stats { - pub collections: Py_ssize_t, - pub collected: Py_ssize_t, - pub uncollectable: Py_ssize_t, -} -#[repr(C)] -#[derive(Copy, Clone)] -pub struct _gc_runtime_state { - pub trash_delete_later: *mut PyObject, - pub trash_delete_nesting: ::std::os::raw::c_int, - pub enabled: ::std::os::raw::c_int, - pub debug: ::std::os::raw::c_int, - pub generations: [gc_generation; 3usize], - pub generation0: *mut PyGC_Head, - pub permanent_generation: gc_generation, - pub generation_stats: [gc_generation_stats; 3usize], - pub collecting: ::std::os::raw::c_int, - pub garbage: *mut PyObject, - pub callbacks: *mut PyObject, - pub long_lived_total: Py_ssize_t, - pub long_lived_pending: Py_ssize_t, -} -impl Default for _gc_runtime_state { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct _pending_calls { - pub main_thread: ::std::os::raw::c_ulong, - pub lock: PyThread_type_lock, - pub calls_to_do: _Py_atomic_int, - pub async_exc: ::std::os::raw::c_int, - pub calls: [_pending_calls__bindgen_ty_1; 32usize], - pub first: ::std::os::raw::c_int, - pub last: ::std::os::raw::c_int, -} -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct _pending_calls__bindgen_ty_1 { - pub func: ::std::option::Option< - unsafe extern "C" fn(arg1: *mut ::std::os::raw::c_void) -> ::std::os::raw::c_int, - >, - pub arg: *mut ::std::os::raw::c_void, -} -impl Default for _pending_calls__bindgen_ty_1 { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -impl Default for _pending_calls { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -#[repr(C)] -#[derive(Copy, Clone)] -pub struct _gil_runtime_state { - pub interval: ::std::os::raw::c_ulong, - pub last_holder: _Py_atomic_address, - pub locked: _Py_atomic_int, - pub switch_number: ::std::os::raw::c_ulong, - pub cond: pthread_cond_t, - pub mutex: pthread_mutex_t, - pub switch_cond: pthread_cond_t, - pub switch_mutex: pthread_mutex_t, -} -impl Default for _gil_runtime_state { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -#[repr(C)] -#[derive(Copy, Clone)] -pub struct _ceval_runtime_state { - pub recursion_limit: ::std::os::raw::c_int, - pub tracing_possible: ::std::os::raw::c_int, - pub eval_breaker: _Py_atomic_int, - pub gil_drop_request: _Py_atomic_int, - pub pending: _pending_calls, - pub gil: _gil_runtime_state, -} -impl Default for _ceval_runtime_state { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct _warnings_runtime_state { - pub filters: *mut PyObject, - pub once_registry: *mut PyObject, - pub default_action: *mut PyObject, - pub filters_version: ::std::os::raw::c_long, -} -impl Default for _warnings_runtime_state { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct _gilstate_runtime_state { - pub check_enabled: ::std::os::raw::c_int, - pub tstate_current: _Py_atomic_address, - pub getframe: PyThreadFrameGetter, - pub autoInterpreterState: *mut PyInterpreterState, - pub autoTSSkey: Py_tss_t, -} -impl Default for _gilstate_runtime_state { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -#[repr(C)] -#[derive(Copy, Clone)] -pub struct pyruntimestate { - pub initialized: ::std::os::raw::c_int, - pub core_initialized: ::std::os::raw::c_int, - pub finalizing: *mut PyThreadState, - pub interpreters: pyruntimestate_pyinterpreters, - pub exitfuncs: [::std::option::Option; 32usize], - pub nexitfuncs: ::std::os::raw::c_int, - pub gc: _gc_runtime_state, - pub warnings: _warnings_runtime_state, - pub ceval: _ceval_runtime_state, - pub gilstate: _gilstate_runtime_state, -} -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct pyruntimestate_pyinterpreters { - pub mutex: PyThread_type_lock, - pub head: *mut PyInterpreterState, - pub main: *mut PyInterpreterState, - pub next_id: i64, -} -impl Default for pyruntimestate_pyinterpreters { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -impl Default for pyruntimestate { - fn default() -> Self { - unsafe { ::std::mem::zeroed() } - } -} -pub type _PyRuntimeState = pyruntimestate; diff --git a/src/python_spy.rs b/src/python_spy.rs index 8b76692d..88293029 100644 --- a/src/python_spy.rs +++ b/src/python_spy.rs @@ -8,15 +8,16 @@ use regex::Regex; use failure::{Error, ResultExt}; use read_process_memory::{Pid, copy_address, ProcessHandle}; use proc_maps::{get_process_maps, MapRange}; -use python_bindings::{v2_7_15, v3_3_7, v3_5_5, v3_6_6, v3_7_0}; +use python_bindings::{pyruntime, v2_7_15, v3_3_7, v3_5_5, v3_6_6, v3_7_0}; -use python_interpreters; -use stack_trace::{StackTrace, get_stack_traces}; -use native_stack_trace::NativeStack; use binary_parser::{parse_binary, BinaryInfo}; -use utils::{copy_struct, copy_pointer}; -use python_interpreters::{InterpreterState, ThreadState}; use config::Config; +use native_stack_trace::NativeStack; +use python_interpreters::{self, InterpreterState, ThreadState}; +use stack_trace::{StackTrace, get_stack_traces}; +use utils::{copy_struct, copy_pointer}; +use version::Version; + pub struct PythonSpy { pub pid: Pid, @@ -46,16 +47,38 @@ impl PythonSpy { info!("Found interpreter at 0x{:016x}", interpreter_address); // lets us figure out which thread has the GIL - let threadstate_address = match python_info.get_symbol("_PyThreadState_Current") { - Some(&addr) => { - info!("Found _PyThreadState_Current @ 0x{:016x}", addr); - addr as usize - }, - None => { - warn!("Failed to find _PyThreadState_Current symbol - won't be able to detect GIL usage"); - 0 - } - }; + let threadstate_address = match version { + Version{major: 3, minor: 7...8, ..} => { + match python_info.get_symbol("_PyRuntime") { + Some(&addr) => { + if let Some(offset) = pyruntime::get_tstate_current_offset(&version) { + info!("Found _PyRuntime @ 0x{:016x}, getting gilstate.tstate_current from offset 0x{:x}", + addr, offset); + addr as usize + offset + } else { + warn!("Unknown pyruntime.gilstate.tstate_current offset for version {:?}", version); + 0 + } + }, + None => { + warn!("Failed to find _PyRuntime symbol - won't be able to detect GIL usage"); + 0 + } + } + }, + _ => { + match python_info.get_symbol("_PyThreadState_Current") { + Some(&addr) => { + info!("Found _PyThreadState_Current @ 0x{:016x}", addr); + addr as usize + }, + None => { + warn!("Failed to find _PyThreadState_Current symbol - won't be able to detect GIL usage"); + 0 + } + } + } + }; let version_string = format!("python{}.{}", version.major, version.minor); @@ -134,8 +157,10 @@ impl PythonSpy { if self.threadstate_address > 0 { let addr: usize = copy_struct(self.threadstate_address, &self.process.handle())?; if addr != 0 { - let threadstate: I::ThreadState = copy_struct(addr, &self.process.handle())?; - gil_thread_id = threadstate.thread_id(); + match copy_struct::(addr, &self.process.handle()) { + Ok(threadstate) => { gil_thread_id = threadstate.thread_id(); }, + Err(e) => { warn!("failed to copy threadstate: addr {:016x}. Err {:?}", addr, e); } + } } } @@ -246,9 +271,7 @@ fn get_interpreter_address(python_info: &PythonProcessInfo, match version { Version{major: 3, minor: 7, ..} => { if let Some(&addr) = python_info.get_symbol("_PyRuntime") { - let interp_head_offset = offset_of!(v3_7_0::_PyRuntimeState, interpreters) + - offset_of!(v3_7_0::pyruntimestate_pyinterpreters, head); - let addr = copy_struct(addr as usize + interp_head_offset, &process)?; + let addr = copy_struct(addr as usize + pyruntime::INTERP_HEAD_OFFSET, &process)?; // Make sure the interpreter addr is valid before returning match check_interpreter_addresses(&[addr], &python_info.maps, process, version) { Ok(addr) => return Ok(addr), @@ -588,56 +611,6 @@ pub fn is_python_framework(pathname: &str) -> bool { !pathname.contains("Python.app") } -#[derive(Debug, PartialEq, Eq)] -pub struct Version { - pub major: u64, - pub minor: u64, - pub patch: u64, - pub release_flags: String -} - -impl Version { - pub fn scan_bytes(data: &[u8]) -> Result { - use regex::bytes::Regex; - lazy_static! { - static ref RE: Regex = Regex::new(r"((2|3)\.(3|4|5|6|7|8)\.(\d{1,2}))((a|b|c|rc)\d{1,2})? (.{1,64})").unwrap(); - } - - if let Some(cap) = RE.captures_iter(data).next() { - let release = match cap.get(5) { - Some(x) => { std::str::from_utf8(x.as_bytes())? }, - None => "" - }; - let major = std::str::from_utf8(&cap[2])?.parse::()?; - let minor = std::str::from_utf8(&cap[3])?.parse::()?; - let patch = std::str::from_utf8(&cap[4])?.parse::()?; - - let version = std::str::from_utf8(&cap[0])?; - info!("Found matching version string '{}'", version); - #[cfg(windows)] - { - if version.contains("32 bit") { - error!("32-bit python is not yet supported on windows! See https://github.com/benfred/py-spy/issues/31 for updates"); - // we're panic'ing rather than returning an error, since we can't recover from this - // and returning an error would just get the calling code to fall back to other - // methods of trying to find the version - panic!("32-bit python is unsupported on windows"); - } - } - - return Ok(Version{major, minor, patch, release_flags:release.to_owned()}); - } - Err(format_err!("failed to find version string")) - } -} - -impl std::fmt::Display for Version { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(f, "{}.{}.{}{}", self.major, self.minor, self.patch, self.release_flags) - } -} - - #[cfg(test)] mod tests { use super::*; @@ -694,25 +667,4 @@ mod tests { assert!(is_python_framework("/Users/ben/.pyenv/versions/3.6.6/Python.framework/Versions/3.6/Python")); assert!(!is_python_framework("/Users/ben/.pyenv/versions/3.6.6/Python.framework/Versions/3.6/Resources/Python.app/Contents/MacOS/Python")); } - - #[test] - fn test_find_version() { - let version = Version::scan_bytes(b"2.7.10 (default, Oct 6 2017, 22:29:07)").unwrap(); - assert_eq!(version, Version{major: 2, minor: 7, patch: 10, release_flags: "".to_owned()}); - - let version = Version::scan_bytes(b"3.6.3 |Anaconda custom (64-bit)| (default, Oct 6 2017, 12:04:38)").unwrap(); - assert_eq!(version, Version{major: 3, minor: 6, patch: 3, release_flags: "".to_owned()}); - - let version = Version::scan_bytes(b"Python 3.7.0rc1 (v3.7.0rc1:dfad352267, Jul 20 2018, 13:27:54)").unwrap(); - assert_eq!(version, Version{major: 3, minor: 7, patch: 0, release_flags: "rc1".to_owned()}); - - let version = Version::scan_bytes(b"1.7.0rc1 (v1.7.0rc1:dfad352267, Jul 20 2018, 13:27:54)"); - assert!(version.is_err(), "don't match unsupported "); - - let version = Version::scan_bytes(b"3.7 10 "); - assert!(version.is_err(), "needs dotted version"); - - let version = Version::scan_bytes(b"3.7.10fooboo "); - assert!(version.is_err(), "limit suffixes"); - } } diff --git a/src/version.rs b/src/version.rs new file mode 100644 index 00000000..8af60c72 --- /dev/null +++ b/src/version.rs @@ -0,0 +1,78 @@ +use regex::bytes::Regex; +use std; + +use failure::{Error}; + + +#[derive(Debug, PartialEq, Eq)] +pub struct Version { + pub major: u64, + pub minor: u64, + pub patch: u64, + pub release_flags: String +} + +impl Version { + pub fn scan_bytes(data: &[u8]) -> Result { + lazy_static! { + static ref RE: Regex = Regex::new(r"((2|3)\.(3|4|5|6|7|8)\.(\d{1,2}))((a|b|c|rc)\d{1,2})? (.{1,64})").unwrap(); + } + + if let Some(cap) = RE.captures_iter(data).next() { + let release = match cap.get(5) { + Some(x) => { std::str::from_utf8(x.as_bytes())? }, + None => "" + }; + let major = std::str::from_utf8(&cap[2])?.parse::()?; + let minor = std::str::from_utf8(&cap[3])?.parse::()?; + let patch = std::str::from_utf8(&cap[4])?.parse::()?; + + let version = std::str::from_utf8(&cap[0])?; + info!("Found matching version string '{}'", version); + #[cfg(windows)] + { + if version.contains("32 bit") { + error!("32-bit python is not yet supported on windows! See https://github.com/benfred/py-spy/issues/31 for updates"); + // we're panic'ing rather than returning an error, since we can't recover from this + // and returning an error would just get the calling code to fall back to other + // methods of trying to find the version + panic!("32-bit python is unsupported on windows"); + } + } + + return Ok(Version{major, minor, patch, release_flags:release.to_owned()}); + } + Err(format_err!("failed to find version string")) + } +} + +impl std::fmt::Display for Version { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "{}.{}.{}{}", self.major, self.minor, self.patch, self.release_flags) + } +} + +#[cfg(test)] +mod tests { + use super::*; + #[test] + fn test_find_version() { + let version = Version::scan_bytes(b"2.7.10 (default, Oct 6 2017, 22:29:07)").unwrap(); + assert_eq!(version, Version{major: 2, minor: 7, patch: 10, release_flags: "".to_owned()}); + + let version = Version::scan_bytes(b"3.6.3 |Anaconda custom (64-bit)| (default, Oct 6 2017, 12:04:38)").unwrap(); + assert_eq!(version, Version{major: 3, minor: 6, patch: 3, release_flags: "".to_owned()}); + + let version = Version::scan_bytes(b"Python 3.7.0rc1 (v3.7.0rc1:dfad352267, Jul 20 2018, 13:27:54)").unwrap(); + assert_eq!(version, Version{major: 3, minor: 7, patch: 0, release_flags: "rc1".to_owned()}); + + let version = Version::scan_bytes(b"1.7.0rc1 (v1.7.0rc1:dfad352267, Jul 20 2018, 13:27:54)"); + assert!(version.is_err(), "don't match unsupported "); + + let version = Version::scan_bytes(b"3.7 10 "); + assert!(version.is_err(), "needs dotted version"); + + let version = Version::scan_bytes(b"3.7.10fooboo "); + assert!(version.is_err(), "limit suffixes"); + } +} diff --git a/tests/integration_test.py b/tests/integration_test.py new file mode 100644 index 00000000..9c953047 --- /dev/null +++ b/tests/integration_test.py @@ -0,0 +1,88 @@ +import os +import re +import subprocess +import sys +import time +import unittest +from collections import namedtuple + +Frame = namedtuple("Frame", ("function", "file", "line")) +Thread = namedtuple("Thread", ("id", "status")) + + +class IntegrationTest(unittest.TestCase): + def _profile_python_file(self, filename): + # Run the python command in a subprocess + python_process = subprocess.Popen( + [sys.executable, os.path.join("scripts", filename)] + ) + try: + # hack: give it some time to get running + time.sleep(0.2) + + # Run py-spy on the pid of the process we just created + # TODO: get built py-spy here (rather than globally installed) + output = subprocess.check_output( + ["py-spy", "--pid", str(python_process.pid), "--dump"] + ) + + if sys.version_info[0] >= 3: + output = output.decode("utf8") + + traces = [] + for thread in output.split("\nThread")[1:]: + lines = thread.split("\n") + traces.append( + (parse_thread(lines[0]), [parse_frame(l) for l in lines[1:] if l]) + ) + return traces + + finally: + python_process.kill() + python_process.wait() + + def test_basic(self): + traces = self._profile_python_file("longsleep.py") + self.assertEqual(len(traces), 1) + + thread, frames = traces[0] + self.assertEqual( + frames, + [ + Frame(function="longsleep", file="longsleep.py", line=5), + Frame(function="", file="longsleep.py", line=9), + ], + ) + + def test_gil(self): + traces = self._profile_python_file("busyloop.py") + self.assertEqual(len(traces), 1) + thread, frames = traces[0] + assert "gil" in thread.status + + traces = self._profile_python_file("longsleep.py") + self.assertEqual(len(traces), 1) + thread, frames = traces[0] + assert "gil" not in thread.status + + +def parse_frame(frame_line): + matches = re.match( + r"\s+(?P\S+) .(?P\S+):(?P\d+).", frame_line + ) + if not matches: + return None + frame = matches.groupdict() + frame["line"] = int(frame["line"]) + return Frame(**frame) + + +def parse_thread(thread_line): + matches = re.match(r"\s*(?P0[xX][0-9a-fA-f]+) \((?P\S+)\)", thread_line) + if not matches: + return None + return Thread(**matches.groupdict()) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/scripts/busyloop.py b/tests/scripts/busyloop.py new file mode 100644 index 00000000..0d9284c1 --- /dev/null +++ b/tests/scripts/busyloop.py @@ -0,0 +1,7 @@ +def busy_loop(): + while True: + pass + + +if __name__ == "__main__": + busy_loop() diff --git a/tests/scripts/longsleep.py b/tests/scripts/longsleep.py new file mode 100644 index 00000000..61bfbb7d --- /dev/null +++ b/tests/scripts/longsleep.py @@ -0,0 +1,9 @@ +import time + + +def longsleep(): + time.sleep(100000) + + +if __name__ == "__main__": + longsleep()