From d186dc4467c972863460de8e3e550554ab66c204 Mon Sep 17 00:00:00 2001 From: Manoj Joseph Date: Fri, 17 Dec 2021 00:20:28 -0800 Subject: [PATCH] DOSE-484 Trace level debugging in-memory and on crash (#45) --- cmd/zfs_object_agent/Cargo.lock | 2 + cmd/zfs_object_agent/server/src/main.rs | 30 +--- cmd/zfs_object_agent/util/Cargo.toml | 2 + cmd/zfs_object_agent/util/src/logging.rs | 204 +++++++++++++++++++++-- cmd/zfs_object_agent/util/src/tunable.rs | 9 +- etc/zfs/zoa_log4rs.yml | 13 +- 6 files changed, 209 insertions(+), 51 deletions(-) diff --git a/cmd/zfs_object_agent/Cargo.lock b/cmd/zfs_object_agent/Cargo.lock index 5d0e6e6d8773..4d3ec43ab88c 100644 --- a/cmd/zfs_object_agent/Cargo.lock +++ b/cmd/zfs_object_agent/Cargo.lock @@ -2527,9 +2527,11 @@ dependencies = [ name = "util" version = "0.1.0" dependencies = [ + "anyhow", "async-trait", "backtrace", "bytes", + "chrono", "config", "lazy_static", "log", diff --git a/cmd/zfs_object_agent/server/src/main.rs b/cmd/zfs_object_agent/server/src/main.rs index b2a5f2afe0cc..c9c11cba65ae 100644 --- a/cmd/zfs_object_agent/server/src/main.rs +++ b/cmd/zfs_object_agent/server/src/main.rs @@ -148,6 +148,9 @@ fn main() { let cache_paths = matches .values_of("cache-device") .map_or(Vec::new(), |values| values.collect()); + if let Some(file_name) = matches.value_of("config-file") { + util::read_tunable_config(file_name); + } util::setup_logging( matches.occurrences_of("verbosity"), @@ -155,39 +158,12 @@ fn main() { matches.value_of("log-config"), ); - if let Some(file_name) = matches.value_of("config-file") { - util::read_tunable_config(file_name); - } - error!( "Starting ZFS Object Agent ({}). Local timezone is {}", GIT_VERSION, chrono::Local::now().format("%Z (%:z)") ); - // error!() should be used when an invalid state is encountered; the related - // operation will fail and the program may exit. E.g. an invalid request - // was received from the client (kernel). - error!("logging level ERROR enabled"); - - // warn!() should be used when something unexpected has happened, but it can - // be recovered from. - warn!("logging level WARN enabled"); - - // info!() should be used for very high level operations which are expected - // to happen infrequently (no more than once per minute in typical - // operation). e.g. opening/closing a pool, long-lived background tasks, - // things that might be in `zpool history -i`. - info!("logging level INFO enabled"); - - // debug!() can be used for all but the most frequent operations. - // e.g. not every single read/write/free operation, but perhaps for every - // call to S3. - debug!("logging level DEBUG enabled"); - - // trace!() can be used indiscriminately. - trace!("logging level TRACE enabled"); - zettaobject::init::start(socket_dir, cache_paths); } } diff --git a/cmd/zfs_object_agent/util/Cargo.toml b/cmd/zfs_object_agent/util/Cargo.toml index fb7404aa8489..149107b012c2 100644 --- a/cmd/zfs_object_agent/util/Cargo.toml +++ b/cmd/zfs_object_agent/util/Cargo.toml @@ -6,9 +6,11 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +anyhow = "1.0" async-trait = "0.1.51" backtrace = "0.3" bytes = "1.0" +chrono = "0.4.19" config = "0.11" lazy_static = "1.4.0" log = "0.4" diff --git a/cmd/zfs_object_agent/util/src/logging.rs b/cmd/zfs_object_agent/util/src/logging.rs index df8b2039848f..3d93b06df298 100644 --- a/cmd/zfs_object_agent/util/src/logging.rs +++ b/cmd/zfs_object_agent/util/src/logging.rs @@ -1,13 +1,38 @@ +use crate::get_tunable; +use crate::tunable::log_tunable_config; +use backtrace::Backtrace; use lazy_static::lazy_static; use log::*; use log4rs::append::console::ConsoleAppender; use log4rs::append::file::FileAppender; -use log4rs::config::Logger; +use log4rs::append::Append; use log4rs::config::{Appender, Config, Root}; +use log4rs::config::{Deserialize, Deserializers, Logger}; use log4rs::encode::pattern::PatternEncoder; +use log4rs::filter::threshold::ThresholdFilter; +use std::collections::VecDeque; +use std::fs::OpenOptions; +use std::io::Write; +use std::panic::PanicInfo; +use std::sync::atomic::{AtomicPtr, Ordering}; +use std::{panic, process, ptr, thread}; + +static LOG_MESSAGES_PTR: AtomicPtr>> = + AtomicPtr::new(ptr::null_mut()); + +type PanicHook = Box; lazy_static! { static ref LOG_PATTERN: String = "[{d(%Y-%m-%d %H:%M:%S%.3f)}][{t}][{l}] {m}{n}".to_string(); + static ref LOG_MESSAGES: std::sync::Mutex> = { + let mut inner = Default::default(); + LOG_MESSAGES_PTR.store(&mut inner, Ordering::Relaxed); + inner + }; + static ref MAX_LOG_MESSAGES: usize = get_tunable("max_log_messages", 100_000); + static ref PANIC_LOG_FOLDER: String = + get_tunable("panic_log_folder", "/var/log/zoa".to_string()); + static ref DEFAULT_HOOK: std::sync::Mutex> = Default::default(); } pub fn get_logging_level(verbosity: u64) -> LevelFilter { @@ -19,22 +44,116 @@ pub fn get_logging_level(verbosity: u64) -> LevelFilter { } } +/// An appender which logs to a memory buffer. +#[derive(Debug)] +pub struct BufferAppender {} + +impl Append for BufferAppender { + fn append(&self, record: &Record) -> anyhow::Result<()> { + let str = format!( + "[{}][{}][{}] {}", + chrono::Local::now().format("%Y-%m-%d %H:%M:%S%.3f"), + record.target(), + record.level(), + record.args() + ); + + if let Ok(mut messages) = LOG_MESSAGES.lock() { + while messages.len() >= *MAX_LOG_MESSAGES { + messages.pop_front(); + } + messages.push_back(str); + } + + Ok(()) + } + + fn flush(&self) {} +} + +impl BufferAppender { + fn get_writer() -> Box { + match OpenOptions::new().append(true).create(true).open(format!( + "{}/panic_{}.log", + PANIC_LOG_FOLDER.as_str(), + process::id() + )) { + Ok(file) => Box::new(file) as Box, + Err(_) => Box::new(std::io::stderr()), + } + } + + /// Dump log messages in memory to a file or stderr. + pub fn dump(info: &PanicInfo) { + let mut output = Self::get_writer(); + if let Ok(messages) = LOG_MESSAGES.lock() { + for message in messages.iter() { + writeln!(output, "{}", message).unwrap(); + } + } + + let location = info.location().unwrap(); + let msg = match info.payload().downcast_ref::<&'static str>() { + Some(s) => *s, + None => match info.payload().downcast_ref::() { + Some(s) => &s[..], + None => "Box", + }, + }; + let current_thread = thread::current(); + let name = current_thread.name().unwrap_or(""); + writeln!( + output, + "thread '{}' panicked at '{}', {}", + name, msg, location + ) + .unwrap(); + writeln!(output, "stack backtrace:\n{:?}", Backtrace::new()).unwrap(); + } +} + +#[derive(Debug, serde::Deserialize)] +pub struct BufferAppenderConfig; + +pub struct BufferAppenderDeserializer; + +impl Deserialize for BufferAppenderDeserializer { + type Trait = dyn Append; + type Config = BufferAppenderConfig; + + fn deserialize( + &self, + _config: BufferAppenderConfig, + _deserializers: &Deserializers, + ) -> anyhow::Result> { + Ok(Box::new(BufferAppender {})) + } +} + fn setup_console_logging(verbosity: u64) { let config = Config::builder() .appender( - Appender::builder().build( - "stdout", - Box::new( - ConsoleAppender::builder() - .encoder(Box::new(PatternEncoder::new(&*LOG_PATTERN))) - .build(), + Appender::builder() + .filter(Box::new(ThresholdFilter::new(get_logging_level(verbosity)))) + .build("memory", Box::new(BufferAppender {})), + ) + .appender( + Appender::builder() + .filter(Box::new(ThresholdFilter::new(get_logging_level(verbosity)))) + .build( + "stdout", + Box::new( + ConsoleAppender::builder() + .encoder(Box::new(PatternEncoder::new(&*LOG_PATTERN))) + .build(), + ), ), - ), ) // rusoto_core::request is very chatty when set to debug. So, set it to info. .logger(Logger::builder().build("rusoto_core::request", LevelFilter::Info)) .build( Root::builder() + .appender("memory") .appender("stdout") .build(get_logging_level(verbosity)), ) @@ -46,22 +165,30 @@ fn setup_console_logging(verbosity: u64) { fn setup_logfile(verbosity: u64, logfile: &str) { let config = Config::builder() .appender( - Appender::builder().build( - "logfile", - Box::new( - FileAppender::builder() - .encoder(Box::new(PatternEncoder::new(&*LOG_PATTERN))) - .build(logfile) - .unwrap(), + Appender::builder() + .filter(Box::new(ThresholdFilter::new(LevelFilter::Trace))) + .build("memory", Box::new(BufferAppender {})), + ) + .appender( + Appender::builder() + .filter(Box::new(ThresholdFilter::new(get_logging_level(verbosity)))) + .build( + "logfile", + Box::new( + FileAppender::builder() + .encoder(Box::new(PatternEncoder::new(&*LOG_PATTERN))) + .build(logfile) + .unwrap(), + ), ), - ), ) // rusoto_core::request is very chatty when set to debug. So, set it to info. .logger(Logger::builder().build("rusoto_core::request", LevelFilter::Info)) .build( Root::builder() + .appender("memory") .appender("logfile") - .build(get_logging_level(verbosity)), + .build(LevelFilter::Trace), ) .unwrap(); @@ -69,9 +196,22 @@ fn setup_logfile(verbosity: u64, logfile: &str) { } pub fn setup_logging(verbosity: u64, file_name: Option<&str>, log_config: Option<&str>) { + /* + * Panic hook to dump trace logs to a file, in case of a panic. + */ + let default_hook = panic::take_hook(); + panic::set_hook(Box::new(move |info| { + // Call the default panic hook to print out the panic info. + default_hook(info); + + BufferAppender::dump(info); + })); + match log_config { Some(config) => { - log4rs::init_file(config, Default::default()).unwrap(); + let mut deserializers = log4rs::config::Deserializers::new(); + deserializers.insert("buffer", BufferAppenderDeserializer); + log4rs::init_file(config, deserializers).unwrap(); } None => { match file_name { @@ -88,4 +228,32 @@ pub fn setup_logging(verbosity: u64, file_name: Option<&str>, log_config: Option } } }; + + // error!() should be used when an invalid state is encountered; the related + // operation will fail and the program may exit. E.g. an invalid request + // was received from the client (kernel). + error!("logging level ERROR enabled"); + + // warn!() should be used when something unexpected has happened, but it can + // be recovered from. + warn!("logging level WARN enabled"); + + // info!() should be used for very high level operations which are expected + // to happen infrequently (no more than once per minute in typical + // operation). e.g. opening/closing a pool, long-lived background tasks, + // things that might be in `zpool history -i`. + info!("logging level INFO enabled"); + + // debug!() can be used for all but the most frequent operations. + // e.g. not every single read/write/free operation, but perhaps for every + // call to S3. + debug!("logging level DEBUG enabled"); + + // trace!() can be used for frequent operation. But note that we evaluate + // all the trace! statements in prod and there is some cost involved in + // string processing, memory allocation, global lock etc. + trace!("logging level TRACE enabled"); + + // Log all the tunables. + log_tunable_config(); } diff --git a/cmd/zfs_object_agent/util/src/tunable.rs b/cmd/zfs_object_agent/util/src/tunable.rs index 51f35e17eaf4..6cd9bd862585 100644 --- a/cmd/zfs_object_agent/util/src/tunable.rs +++ b/cmd/zfs_object_agent/util/src/tunable.rs @@ -13,10 +13,7 @@ where T: Deserialize<'de> + Debug, { match CONFIG.read().unwrap().get(name) { - Ok(v) => { - info!("{}: using value {:?} from config file", name, v); - v - } + Ok(v) => v, Err(ConfigError::NotFound(_)) => default, Err(e) => { warn!("{}: using default: {:?}", e, default); @@ -28,5 +25,9 @@ where pub fn read_tunable_config(file_name: &str) { let mut config = CONFIG.write().unwrap(); config.merge(config::File::with_name(file_name)).unwrap(); +} + +pub fn log_tunable_config() { + let config = CONFIG.read().unwrap(); info!("config: {}", config.cache); } diff --git a/etc/zfs/zoa_log4rs.yml b/etc/zfs/zoa_log4rs.yml index 0186cf440cf8..9a7eda71dc41 100644 --- a/etc/zfs/zoa_log4rs.yml +++ b/etc/zfs/zoa_log4rs.yml @@ -89,14 +89,23 @@ appenders: - kind: threshold level: error -# Set the default logging level to "debug" + memory: + kind: buffer + filters: + - kind: threshold + level: trace + +# Set the default logging level to "trace" to permit trace logs to "memory". +# But tracelog is not enabled by default. root: - level: debug + level: trace appenders: - stdout +# - tracelog - debuglog - infolog - errorlog + - memory loggers: # "rusoto_core::request" is very chatty when set to debug.