Skip to content

Commit

Permalink
Merge pull request #362 from autonomys/improve-crash-handling
Browse files Browse the repository at this point in the history
Improve crash handling on macOS
  • Loading branch information
nazar-pc authored Dec 18, 2024
2 parents 513bff9 + 89505bd commit 414fe13
Show file tree
Hide file tree
Showing 3 changed files with 143 additions and 63 deletions.
16 changes: 0 additions & 16 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,6 @@ subspace-proof-of-space-gpu = { git = "https://github.com/subspace/subspace", re
subspace-rpc-primitives = { git = "https://github.com/subspace/subspace", rev = "ac3d5e5fad8007a15e7aff3790d4e7ceb3ad7a0f" }
subspace-runtime-primitives = { git = "https://github.com/subspace/subspace", rev = "ac3d5e5fad8007a15e7aff3790d4e7ceb3ad7a0f" }
subspace-service = { git = "https://github.com/subspace/subspace", rev = "ac3d5e5fad8007a15e7aff3790d4e7ceb3ad7a0f" }
supports-color = "3.0.1"
sys-locale = "0.3.1"
tempfile = "3.13.0"
thiserror = "2.0.1"
Expand Down
189 changes: 143 additions & 46 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,17 @@ use std::io::{Read, Write};
use std::path::{Path, PathBuf};
use std::process::{ExitCode, Termination};
use std::rc::Rc;
use std::sync::{Arc, Mutex};
use std::thread::available_parallelism;
use std::time::{Duration, Instant};
use std::{env, fs, io, process};
use subspace_farmer::utils::run_future_in_dedicated_thread;
use subspace_proof_of_space::chia::ChiaTable;
use tracing::{debug, error, info, warn};
use tracing_subscriber::filter::LevelFilter;
use tracing_subscriber::fmt::Layer;
use tracing_subscriber::prelude::*;
use tracing_subscriber::EnvFilter;
use tracing_subscriber::{EnvFilter, Registry};

#[global_allocator]
static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc;
Expand Down Expand Up @@ -70,13 +72,48 @@ fn raise_fd_limit() {
}
Err(error) => {
warn!(
"Failed to increase file descriptor limit for the process due to an error: {}.",
"Failed to increase file descriptor limit for the process due to an error: {}",
error
);
}
}
}

struct MultiWriter<W1, W2> {
first: Arc<Mutex<W1>>,
second: W2,
}

impl<W1, W2> Write for MultiWriter<W1, W2>
where
W1: Write,
W2: Write,
{
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
// Write both first
let result1 = self
.first
.lock()
.expect("Must not panic during write, crash otherwise; qed")
.write_all(buf);
let result2 = self.second.write_all(buf);
// Check errors afterwards
result1.and(result2).map(|()| buf.len())
}

fn flush(&mut self) -> io::Result<()> {
// flush both first
let result1 = self
.first
.lock()
.expect("Must not panic during write, crash otherwise; qed")
.flush();
let result2 = self.second.flush();
// Check errors afterwards
result1.and(result2)
}
}

#[derive(Debug, Copy, Clone)]
enum AppStatusCode {
Exit,
Expand Down Expand Up @@ -176,38 +213,25 @@ impl Cli {
fn app(self) -> AppStatusCode {
let maybe_app_data_dir = Self::app_data_dir();

{
let layer = tracing_subscriber::fmt::layer()
// TODO: Workaround for https://github.com/tokio-rs/tracing/issues/2214, also on
// Windows terminal doesn't support the same colors as bash does
.with_ansi(if cfg!(windows) {
false
} else {
supports_color::on(supports_color::Stream::Stderr).is_some()
});
let filter = EnvFilter::builder()
.with_default_directive(LevelFilter::INFO.into())
.from_env_lossy();
if WINDOWS_SUBSYSTEM_WINDOWS {
if let Some(app_data_dir) = &maybe_app_data_dir {
let logger = std::sync::Mutex::new(Self::new_logger(app_data_dir));
let layer = layer.with_writer(logger);

tracing_subscriber::registry()
.with(layer.with_filter(filter))
.init();
} else {
tracing_subscriber::registry()
.with(layer.with_filter(filter))
.init();
}
#[cfg(windows)]
std::panic::set_hook(Box::new(tracing_panic::panic_hook));
if WINDOWS_SUBSYSTEM_WINDOWS {
let (layer, filter) = Self::tracing_logger_init_common();

if let Some(app_data_dir) = &maybe_app_data_dir {
let logger = Mutex::new(Self::new_logger(app_data_dir));
let layer = layer.with_writer(logger);

tracing_subscriber::registry()
.with(layer.with_filter(filter))
.init();
} else {
tracing_subscriber::registry()
.with(layer.with_filter(filter))
.init();
}
#[cfg(windows)]
std::panic::set_hook(Box::new(tracing_panic::panic_hook));
} else {
Self::tracing_logger_init_simple();
}

info!(
Expand Down Expand Up @@ -336,6 +360,8 @@ impl Cli {

let mut last_start;
let program = Self::child_program()?;
let mut logger_initialized = false;
let mut maybe_logger = None;

loop {
let mut args = vec!["--child-process".to_string()];
Expand Down Expand Up @@ -363,11 +389,38 @@ impl Cli {
.unchecked()
.reader()?;

let mut logger = Self::new_logger(app_data_dir);
let logger = match maybe_logger.clone() {
Some(logger) => logger,
None => {
let logger = Arc::new(Mutex::new(Self::new_logger(app_data_dir)));
maybe_logger.replace(Arc::clone(&logger));

if !logger_initialized {
logger_initialized = true;

let (layer, filter) = Self::tracing_logger_init_common();

let layer = layer.with_writer({
let logger = Arc::clone(&logger);

move || MultiWriter {
first: Arc::clone(&logger),
second: io::stderr(),
}
});

tracing_subscriber::registry()
.with(layer.with_filter(filter))
.init();
}

logger
}
};

let mut log_read_buffer = vec![0u8; LOG_READ_BUFFER];

let mut stdout = io::stdout();
let mut stderr = io::stderr();
loop {
match expression.read(&mut log_read_buffer) {
Ok(bytes_count) => {
Expand All @@ -376,12 +429,15 @@ impl Cli {
}

let write_result: io::Result<()> = try {
stdout.write_all(&log_read_buffer[..bytes_count])?;
logger.write_all(&log_read_buffer[..bytes_count])?;
stderr.write_all(&log_read_buffer[..bytes_count])?;
logger
.lock()
.expect("Must not panic, crash if it does; qed")
.write_all(&log_read_buffer[..bytes_count])?;
};

if let Err(error) = write_result {
eprintln!("Error while writing output of child process: {error}");
error!(%error, "Error while writing output of child process");
break;
}
}
Expand All @@ -390,15 +446,19 @@ impl Cli {
// Try again
continue;
}
eprintln!("Error while reading output of child process: {error}");
error!(%error, "Error while reading output of child process");
break;
}
}
}

stdout.flush()?;
if let Err(error) = logger.flush() {
eprintln!("Error while flushing logs: {error}");
stderr.flush()?;
let flush_result = logger
.lock()
.expect("Must not panic, crash if it does; qed")
.flush();
if let Err(error) = flush_result {
error!(%error, "Error while flushing logs");
}

match expression.try_wait()? {
Expand All @@ -411,6 +471,12 @@ impl Cli {
}
}
} else if WINDOWS_SUBSYSTEM_WINDOWS {
if !logger_initialized {
logger_initialized = true;

Self::tracing_logger_init_simple();
}

Self::maybe_force_renderer(cmd(&program, args))
.stdin_null()
.stdout_null()
Expand All @@ -420,9 +486,15 @@ impl Cli {
.run()?
.status
} else {
eprintln!("App data directory doesn't exist, not creating log file");
if !logger_initialized {
logger_initialized = true;

Self::tracing_logger_init_simple();
}

error!("App data directory doesn't exist, not creating log file");
Self::maybe_force_renderer(cmd(&program, args))
// We use non-zero status codes and they don't mean error necessarily
// We use non-zero status codes, and they don't mean error necessarily
.unchecked()
.run()?
.status
Expand All @@ -431,15 +503,21 @@ impl Cli {
match exit_status.code() {
Some(status_code) => match AppStatusCode::from_status_code(status_code) {
AppStatusCode::Exit => {
eprintln!("Application exited gracefully");
error!("Application exited gracefully");
break;
}
AppStatusCode::Restart => {
eprintln!("Restarting application");
error!("Restarting application");
continue;
}
AppStatusCode::Unknown(status_code) => {
eprintln!("Application exited with unexpected status code {status_code}");
error!(%status_code, "Application exited with unexpected status code");

if last_start.elapsed() >= MIN_RUNTIME_DURATION_FOR_AUTORESTART {
self.after_crash = true;
continue;
}

process::exit(status_code);
}
},
Expand All @@ -448,14 +526,14 @@ impl Cli {
{
use std::os::unix::process::ExitStatusExt;

eprintln!(
error!(
"Application terminated by signal {:?}",
exit_status.signal()
);
}
#[cfg(not(unix))]
{
eprintln!("Application terminated by signal");
error!("Application terminated by signal");
}
if last_start.elapsed() >= MIN_RUNTIME_DURATION_FOR_AUTORESTART {
self.after_crash = true;
Expand All @@ -475,7 +553,7 @@ impl Cli {
.and_then(|app_data_dir| {
if !app_data_dir.exists() {
if let Err(error) = fs::create_dir_all(&app_data_dir) {
eprintln!(
error!(
"App data directory \"{}\" doesn't exist and can't be created: {}",
app_data_dir.display(),
error
Expand All @@ -488,6 +566,25 @@ impl Cli {
})
}

fn tracing_logger_init_simple() {
let (layer, filter) = Self::tracing_logger_init_common();

tracing_subscriber::registry()
.with(layer.with_filter(filter))
.init();
}

fn tracing_logger_init_common() -> (Layer<Registry>, EnvFilter) {
let layer = tracing_subscriber::fmt::layer()
// No escape sequences in logs since we write them to files
.with_ansi(false);
let filter = EnvFilter::builder()
.with_default_directive(LevelFilter::INFO.into())
.from_env_lossy();

(layer, filter)
}

fn new_logger(app_data_dir: &Path) -> FileRotate<AppendCount> {
FileRotate::new(
app_data_dir.join("space-acres.log"),
Expand Down

0 comments on commit 414fe13

Please sign in to comment.