Skip to content

Commit

Permalink
pageserver: avoid logging confusing "ERROR" success messages
Browse files Browse the repository at this point in the history
Example:
```
  walreceiver connection handling ended: db error: ERROR: ending streaming to Some("pageserver") at 0/4031CA8
```

The inner DbError has a severity of ERROR so DbError's Display
implementation includes that ERROR, even though we are actually
logging the error at INFO level.

Extend ExpectedError trait to have an optional method for
custom formatting expected errors, in this case implemented
for postgres::Error to adjust how DbError is formatted
when its status is SUCCESSFUL_COMPLETION.

The new output looks like:
```
    walreceiver connection handling ended: Successful completion: ending streaming to Some("pageserver") at 0/154E9C0, receiver is caughtup and there is no computes
```
  • Loading branch information
jcsp committed Aug 4, 2023
1 parent cd33089 commit be225bf
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -421,7 +421,11 @@ impl ConnectionManagerState {
Err(e) => {
use super::walreceiver_connection::ExpectedError;
if e.is_expected() {
info!("walreceiver connection handling ended: {e:#}");
if let Some(adjusted_error) = e.maybe_reformat() {
info!("walreceiver connection handling ended: {adjusted_error}");
} else {
info!("walreceiver connection handling ended: {e}")
}
Ok(())
} else {
// give out an error to have task_mgr give it a really verbose logging
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,14 @@ pub(super) trait ExpectedError {
/// If this function returns an `false` the error should be propagated and the connection manager
/// will log the error at ERROR level.
fn is_expected(&self) -> bool;

/// If is_expected() is true, use this function to optionally transform the error's string
/// representation instead of using Display directly to get a string-ized version of the error.
/// It may be filtered to omit misleading "error" parts of a non-harmful message, to help
/// with searching logs for errors.
fn maybe_reformat(&self) -> Option<String> {
None
}
}

impl ExpectedError for postgres::Error {
Expand All @@ -453,17 +461,39 @@ impl ExpectedError for postgres::Error {
})
.is_some()
}

fn maybe_reformat(&self) -> Option<String> {
self.as_db_error()
.filter(|dbe| dbe.code() == &SqlState::SUCCESSFUL_COMPLETION)
.map(|dbe| {
// Instead of using Display which would include severity (which might be "ERROR"),
// construct our own version
format!("Successful completion: {}", dbe.message())
})
}
}

impl ExpectedError for anyhow::Error {
fn is_expected(&self) -> bool {
let head = self.downcast_ref::<postgres::Error>();
/// Given an anyhow::Error, get an iterator over all contained errors
/// which are of type postgres::Error
fn find_postgres_errors(err: &anyhow::Error) -> impl Iterator<Item = &postgres::Error> {
let head = err.downcast_ref::<postgres::Error>();

let tail = self
.chain()
.filter_map(|e| e.downcast_ref::<postgres::Error>());
let tail = err
.chain()
.filter_map(|e| e.downcast_ref::<postgres::Error>());

// check if self or any of the chained/sourced errors are expected
head.into_iter().chain(tail)
}

impl ExpectedError for anyhow::Error {
fn is_expected(&self) -> bool {
// check if self or any of the chained/sourced errors are expected
head.into_iter().chain(tail).any(|e| e.is_expected())
find_postgres_errors(self).any(|e| e.is_expected())
}

fn maybe_reformat(&self) -> Option<String> {
// Pick the first error to have a reformat implementation, if any
find_postgres_errors(self).find_map(|e| e.maybe_reformat())
}
}
1 change: 0 additions & 1 deletion test_runner/fixtures/neon_fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -1691,7 +1691,6 @@ def __init__(self, env: NeonEnv, port: PageserverPort, config_override: Optional
# FIXME: replication patch for tokio_postgres regards any but CopyDone/CopyData message in CopyBoth stream as unexpected
".*Connection aborted: unexpected message from server*",
".*kill_and_wait_impl.*: wait successful.*",
".*: db error:.*ending streaming to Some.*",
".*query handler for 'pagestream.*failed: Broken pipe.*", # pageserver notices compute shut down
".*query handler for 'pagestream.*failed: Connection reset by peer.*", # pageserver notices compute shut down
# safekeeper connection can fail with this, in the window between timeline creation
Expand Down

0 comments on commit be225bf

Please sign in to comment.