Skip to content

Commit

Permalink
DOSE-484 Trace level debugging in-memory and on crash (openzfs#45)
Browse files Browse the repository at this point in the history
  • Loading branch information
manoj-joseph authored Dec 17, 2021
1 parent afb9215 commit d186dc4
Show file tree
Hide file tree
Showing 6 changed files with 209 additions and 51 deletions.
2 changes: 2 additions & 0 deletions cmd/zfs_object_agent/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

30 changes: 3 additions & 27 deletions cmd/zfs_object_agent/server/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -148,46 +148,22 @@ fn main() {
let cache_paths = matches
.values_of("cache-device")
.map_or(Vec::new(), |values| values.collect());
if let Some(file_name) = matches.value_of("config-file") {
util::read_tunable_config(file_name);
}

util::setup_logging(
matches.occurrences_of("verbosity"),
matches.value_of("output-file"),
matches.value_of("log-config"),
);

if let Some(file_name) = matches.value_of("config-file") {
util::read_tunable_config(file_name);
}

error!(
"Starting ZFS Object Agent ({}). Local timezone is {}",
GIT_VERSION,
chrono::Local::now().format("%Z (%:z)")
);

// error!() should be used when an invalid state is encountered; the related
// operation will fail and the program may exit. E.g. an invalid request
// was received from the client (kernel).
error!("logging level ERROR enabled");

// warn!() should be used when something unexpected has happened, but it can
// be recovered from.
warn!("logging level WARN enabled");

// info!() should be used for very high level operations which are expected
// to happen infrequently (no more than once per minute in typical
// operation). e.g. opening/closing a pool, long-lived background tasks,
// things that might be in `zpool history -i`.
info!("logging level INFO enabled");

// debug!() can be used for all but the most frequent operations.
// e.g. not every single read/write/free operation, but perhaps for every
// call to S3.
debug!("logging level DEBUG enabled");

// trace!() can be used indiscriminately.
trace!("logging level TRACE enabled");

zettaobject::init::start(socket_dir, cache_paths);
}
}
Expand Down
2 changes: 2 additions & 0 deletions cmd/zfs_object_agent/util/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@ edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
anyhow = "1.0"
async-trait = "0.1.51"
backtrace = "0.3"
bytes = "1.0"
chrono = "0.4.19"
config = "0.11"
lazy_static = "1.4.0"
log = "0.4"
Expand Down
204 changes: 186 additions & 18 deletions cmd/zfs_object_agent/util/src/logging.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,38 @@
use crate::get_tunable;
use crate::tunable::log_tunable_config;
use backtrace::Backtrace;
use lazy_static::lazy_static;
use log::*;
use log4rs::append::console::ConsoleAppender;
use log4rs::append::file::FileAppender;
use log4rs::config::Logger;
use log4rs::append::Append;
use log4rs::config::{Appender, Config, Root};
use log4rs::config::{Deserialize, Deserializers, Logger};
use log4rs::encode::pattern::PatternEncoder;
use log4rs::filter::threshold::ThresholdFilter;
use std::collections::VecDeque;
use std::fs::OpenOptions;
use std::io::Write;
use std::panic::PanicInfo;
use std::sync::atomic::{AtomicPtr, Ordering};
use std::{panic, process, ptr, thread};

static LOG_MESSAGES_PTR: AtomicPtr<std::sync::Mutex<VecDeque<String>>> =
AtomicPtr::new(ptr::null_mut());

type PanicHook = Box<dyn Fn(&panic::PanicInfo) + Sync + Send>;

lazy_static! {
static ref LOG_PATTERN: String = "[{d(%Y-%m-%d %H:%M:%S%.3f)}][{t}][{l}] {m}{n}".to_string();
static ref LOG_MESSAGES: std::sync::Mutex<VecDeque<String>> = {
let mut inner = Default::default();
LOG_MESSAGES_PTR.store(&mut inner, Ordering::Relaxed);
inner
};
static ref MAX_LOG_MESSAGES: usize = get_tunable("max_log_messages", 100_000);
static ref PANIC_LOG_FOLDER: String =
get_tunable("panic_log_folder", "/var/log/zoa".to_string());
static ref DEFAULT_HOOK: std::sync::Mutex<Option<PanicHook>> = Default::default();
}

pub fn get_logging_level(verbosity: u64) -> LevelFilter {
Expand All @@ -19,22 +44,116 @@ pub fn get_logging_level(verbosity: u64) -> LevelFilter {
}
}

/// An appender which logs to a memory buffer.
#[derive(Debug)]
pub struct BufferAppender {}

impl Append for BufferAppender {
fn append(&self, record: &Record) -> anyhow::Result<()> {
let str = format!(
"[{}][{}][{}] {}",
chrono::Local::now().format("%Y-%m-%d %H:%M:%S%.3f"),
record.target(),
record.level(),
record.args()
);

if let Ok(mut messages) = LOG_MESSAGES.lock() {
while messages.len() >= *MAX_LOG_MESSAGES {
messages.pop_front();
}
messages.push_back(str);
}

Ok(())
}

fn flush(&self) {}
}

impl BufferAppender {
fn get_writer() -> Box<dyn Write> {
match OpenOptions::new().append(true).create(true).open(format!(
"{}/panic_{}.log",
PANIC_LOG_FOLDER.as_str(),
process::id()
)) {
Ok(file) => Box::new(file) as Box<dyn Write>,
Err(_) => Box::new(std::io::stderr()),
}
}

/// Dump log messages in memory to a file or stderr.
pub fn dump(info: &PanicInfo) {
let mut output = Self::get_writer();
if let Ok(messages) = LOG_MESSAGES.lock() {
for message in messages.iter() {
writeln!(output, "{}", message).unwrap();
}
}

let location = info.location().unwrap();
let msg = match info.payload().downcast_ref::<&'static str>() {
Some(s) => *s,
None => match info.payload().downcast_ref::<String>() {
Some(s) => &s[..],
None => "Box<dyn Any>",
},
};
let current_thread = thread::current();
let name = current_thread.name().unwrap_or("<unnamed>");
writeln!(
output,
"thread '{}' panicked at '{}', {}",
name, msg, location
)
.unwrap();
writeln!(output, "stack backtrace:\n{:?}", Backtrace::new()).unwrap();
}
}

#[derive(Debug, serde::Deserialize)]
pub struct BufferAppenderConfig;

pub struct BufferAppenderDeserializer;

impl Deserialize for BufferAppenderDeserializer {
type Trait = dyn Append;
type Config = BufferAppenderConfig;

fn deserialize(
&self,
_config: BufferAppenderConfig,
_deserializers: &Deserializers,
) -> anyhow::Result<Box<dyn Append>> {
Ok(Box::new(BufferAppender {}))
}
}

fn setup_console_logging(verbosity: u64) {
let config = Config::builder()
.appender(
Appender::builder().build(
"stdout",
Box::new(
ConsoleAppender::builder()
.encoder(Box::new(PatternEncoder::new(&*LOG_PATTERN)))
.build(),
Appender::builder()
.filter(Box::new(ThresholdFilter::new(get_logging_level(verbosity))))
.build("memory", Box::new(BufferAppender {})),
)
.appender(
Appender::builder()
.filter(Box::new(ThresholdFilter::new(get_logging_level(verbosity))))
.build(
"stdout",
Box::new(
ConsoleAppender::builder()
.encoder(Box::new(PatternEncoder::new(&*LOG_PATTERN)))
.build(),
),
),
),
)
// rusoto_core::request is very chatty when set to debug. So, set it to info.
.logger(Logger::builder().build("rusoto_core::request", LevelFilter::Info))
.build(
Root::builder()
.appender("memory")
.appender("stdout")
.build(get_logging_level(verbosity)),
)
Expand All @@ -46,32 +165,53 @@ fn setup_console_logging(verbosity: u64) {
fn setup_logfile(verbosity: u64, logfile: &str) {
let config = Config::builder()
.appender(
Appender::builder().build(
"logfile",
Box::new(
FileAppender::builder()
.encoder(Box::new(PatternEncoder::new(&*LOG_PATTERN)))
.build(logfile)
.unwrap(),
Appender::builder()
.filter(Box::new(ThresholdFilter::new(LevelFilter::Trace)))
.build("memory", Box::new(BufferAppender {})),
)
.appender(
Appender::builder()
.filter(Box::new(ThresholdFilter::new(get_logging_level(verbosity))))
.build(
"logfile",
Box::new(
FileAppender::builder()
.encoder(Box::new(PatternEncoder::new(&*LOG_PATTERN)))
.build(logfile)
.unwrap(),
),
),
),
)
// rusoto_core::request is very chatty when set to debug. So, set it to info.
.logger(Logger::builder().build("rusoto_core::request", LevelFilter::Info))
.build(
Root::builder()
.appender("memory")
.appender("logfile")
.build(get_logging_level(verbosity)),
.build(LevelFilter::Trace),
)
.unwrap();

log4rs::init_config(config).unwrap();
}

pub fn setup_logging(verbosity: u64, file_name: Option<&str>, log_config: Option<&str>) {
/*
* Panic hook to dump trace logs to a file, in case of a panic.
*/
let default_hook = panic::take_hook();
panic::set_hook(Box::new(move |info| {
// Call the default panic hook to print out the panic info.
default_hook(info);

BufferAppender::dump(info);
}));

match log_config {
Some(config) => {
log4rs::init_file(config, Default::default()).unwrap();
let mut deserializers = log4rs::config::Deserializers::new();
deserializers.insert("buffer", BufferAppenderDeserializer);
log4rs::init_file(config, deserializers).unwrap();
}
None => {
match file_name {
Expand All @@ -88,4 +228,32 @@ pub fn setup_logging(verbosity: u64, file_name: Option<&str>, log_config: Option
}
}
};

// error!() should be used when an invalid state is encountered; the related
// operation will fail and the program may exit. E.g. an invalid request
// was received from the client (kernel).
error!("logging level ERROR enabled");

// warn!() should be used when something unexpected has happened, but it can
// be recovered from.
warn!("logging level WARN enabled");

// info!() should be used for very high level operations which are expected
// to happen infrequently (no more than once per minute in typical
// operation). e.g. opening/closing a pool, long-lived background tasks,
// things that might be in `zpool history -i`.
info!("logging level INFO enabled");

// debug!() can be used for all but the most frequent operations.
// e.g. not every single read/write/free operation, but perhaps for every
// call to S3.
debug!("logging level DEBUG enabled");

// trace!() can be used for frequent operation. But note that we evaluate
// all the trace! statements in prod and there is some cost involved in
// string processing, memory allocation, global lock etc.
trace!("logging level TRACE enabled");

// Log all the tunables.
log_tunable_config();
}
9 changes: 5 additions & 4 deletions cmd/zfs_object_agent/util/src/tunable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,7 @@ where
T: Deserialize<'de> + Debug,
{
match CONFIG.read().unwrap().get(name) {
Ok(v) => {
info!("{}: using value {:?} from config file", name, v);
v
}
Ok(v) => v,
Err(ConfigError::NotFound(_)) => default,
Err(e) => {
warn!("{}: using default: {:?}", e, default);
Expand All @@ -28,5 +25,9 @@ where
pub fn read_tunable_config(file_name: &str) {
let mut config = CONFIG.write().unwrap();
config.merge(config::File::with_name(file_name)).unwrap();
}

pub fn log_tunable_config() {
let config = CONFIG.read().unwrap();
info!("config: {}", config.cache);
}
13 changes: 11 additions & 2 deletions etc/zfs/zoa_log4rs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -89,14 +89,23 @@ appenders:
- kind: threshold
level: error

# Set the default logging level to "debug"
memory:
kind: buffer
filters:
- kind: threshold
level: trace

# Set the default logging level to "trace" to permit trace logs to "memory".
# But tracelog is not enabled by default.
root:
level: debug
level: trace
appenders:
- stdout
# - tracelog
- debuglog
- infolog
- errorlog
- memory

loggers:
# "rusoto_core::request" is very chatty when set to debug.
Expand Down

0 comments on commit d186dc4

Please sign in to comment.