Skip to content

Commit

Permalink
structured logging: replacing Fatal/Exit/etc. without loss of flushing
Browse files Browse the repository at this point in the history
When structured logging called for functions like Fatal and Exit to be avoided
in favor of klog.ErrorS + os.Exit, it was overlooked that this replacement
skips flushing.

We could ask developers to use klog.ErrorS + klog.Flush + os.Exit, but that is
still not quite the same because klog.Flush could get stuck or take too long.

Therefore klog now supports flushing + exiting with a dedicated function that
can be combined with some other, arbitrary logging function.
  • Loading branch information
pohly committed Mar 11, 2022
1 parent 7ea6d6a commit 89f5d2c
Show file tree
Hide file tree
Showing 4 changed files with 114 additions and 20 deletions.
61 changes: 61 additions & 0 deletions exit.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
// Go support for leveled logs, analogous to https://code.google.com/p/google-glog/
//
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package klog

import (
"fmt"
"os"
"time"
)

var (

// ExitFlushTimeout is the timeout that klog has traditionally used during
// calls like Fatal or Exit when flushing log data right before exiting.
// Applications that replace those calls and do not have some specific
// requirements like "exit immediately" can use this value as parameter
// for FlushAndExit.
ExitFlushTimeout = 10 * time.Second

// OsExit is the function called by FlushAndExit to terminate the program.
OsExit = os.Exit
)

// FlushAndExit flushes log data for a certain amount of time and then calls
// os.Exit. Combined with some logging call it provides a replacement for
// traditional calls like Fatal or Exit.
func FlushAndExit(flushTimeout time.Duration, exitCode int) {
timeoutFlush(flushTimeout)
OsExit(exitCode)
}

// timeoutFlush calls Flush and returns when it completes or after timeout
// elapses, whichever happens first. This is needed because the hooks invoked
// by Flush may deadlock when klog.Fatal is called from a hook that holds
// a lock. Flushing also might take too long.
func timeoutFlush(timeout time.Duration) {
done := make(chan bool, 1)
go func() {
Flush() // calls logging.lockAndFlushAll()
done <- true
}()
select {
case <-done:
case <-time.After(timeout):
fmt.Fprintln(os.Stderr, "klog: Flush took longer than", timeout)
}
}
47 changes: 47 additions & 0 deletions exit_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// Copyright 2020 The Kubernetes Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package klog_test

import (
"flag"
"fmt"
"os"

"k8s.io/klog/v2"
)

func ExampleFlushAndExit() {
// Set up klog so that we can test it below.
var fs flag.FlagSet
klog.InitFlags(&fs)
fs.Set("skip_headers", "true")
defer flag.Set("skip_headers", "false")
klog.SetOutput(os.Stdout)
defer klog.SetOutput(nil)
klog.OsExit = func(exitCode int) {
fmt.Printf("os.Exit(%d)\n", exitCode)
}

// If we were to return or exit without flushing, this message would
// get lost because it is buffered in memory by klog when writing to
// files. Output to stderr is not buffered.
klog.InfoS("exiting...")
exitCode := 10
klog.FlushAndExit(klog.ExitFlushTimeout, exitCode)

// Output:
// "exiting..."
// os.Exit(10)
}
23 changes: 3 additions & 20 deletions klog.go
Original file line number Diff line number Diff line change
Expand Up @@ -847,7 +847,7 @@ func (l *loggingT) output(s severity.Severity, log *logr.Logger, buf *buffer.Buf
if atomic.LoadUint32(&fatalNoStacks) > 0 {
l.mu.Unlock()
isLocked = false
timeoutFlush(10 * time.Second)
timeoutFlush(ExitFlushTimeout)
os.Exit(1)
}
// Dump all goroutine stacks before exiting.
Expand All @@ -865,8 +865,8 @@ func (l *loggingT) output(s severity.Severity, log *logr.Logger, buf *buffer.Buf
}
l.mu.Unlock()
isLocked = false
timeoutFlush(10 * time.Second)
os.Exit(255) // C++ uses -1, which is silly because it's anded(&) with 255 anyway.
timeoutFlush(ExitFlushTimeout)
os.Exit(255) // C++ uses -1, which is silly because it's anded with 255 anyway.
}
l.bufferCache.PutBuffer(buf)

Expand All @@ -876,23 +876,6 @@ func (l *loggingT) output(s severity.Severity, log *logr.Logger, buf *buffer.Buf
}
}

// timeoutFlush calls Flush and returns when it completes or after timeout
// elapses, whichever happens first. This is needed because the hooks invoked
// by Flush may deadlock when klog.Fatal is called from a hook that holds
// a lock.
func timeoutFlush(timeout time.Duration) {
done := make(chan bool, 1)
go func() {
Flush() // calls logging.lockAndFlushAll()
done <- true
}()
select {
case <-done:
case <-time.After(timeout):
fmt.Fprintln(os.Stderr, "klog: Flush took longer than", timeout)
}
}

// stacks is a wrapper for runtime.Stack that attempts to recover the data for all goroutines.
func stacks(all bool) []byte {
// We don't know how big the traces are, so grow a few times if they don't fit. Start large, though.
Expand Down
3 changes: 3 additions & 0 deletions klog_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1506,6 +1506,7 @@ func TestCallDepthLogrInfoS(t *testing.T) {
logger.resetCallDepth()
l := logr.New(logger)
SetLogger(l)
defer ClearLogger()

// Add wrapper to ensure callDepthTestLogr +2 offset is correct.
logFunc := func() {
Expand All @@ -1528,6 +1529,7 @@ func TestCallDepthLogrErrorS(t *testing.T) {
logger.resetCallDepth()
l := logr.New(logger)
SetLogger(l)
defer ClearLogger()

// Add wrapper to ensure callDepthTestLogr +2 offset is correct.
logFunc := func() {
Expand All @@ -1550,6 +1552,7 @@ func TestCallDepthLogrGoLog(t *testing.T) {
logger.resetCallDepth()
l := logr.New(logger)
SetLogger(l)
defer ClearLogger()
CopyStandardLogTo("INFO")

// Add wrapper to ensure callDepthTestLogr +2 offset is correct.
Expand Down

0 comments on commit 89f5d2c

Please sign in to comment.