Handle errors when waiting for pod status change fails (#1922)

* Make PodWriter an interface (#1899) * Pod controller interface (#1900) * Introduce interfaces for extended POD manipulation * Update copyright year Co-authored-by: Pavan Navarathna <6504783+pavannd1@users.noreply.github.com> * Fix spelling * Describe purpose of interface --------- Co-authored-by: Pavan Navarathna <6504783+pavannd1@users.noreply.github.com> * Pod controller implementation (#1901) * Implement PodController interface * Add `corev1` alias * Fix variable name. * Fix naming * Rename errors and minor rewording. * Update copyright year and spelling. Co-authored-by: Pavan Navarathna <6504783+pavannd1@users.noreply.github.com> * Address review note --------- Co-authored-by: Pavan Navarathna <6504783+pavannd1@users.noreply.github.com> * Pod file writer implementation (#1902) * Implement PodFileWriter interface * Fix naming * Rename `pfwp` property to `fileWriterProcessor` to make it more understandable. * Update copyright year Co-authored-by: Pavan Navarathna <6504783+pavannd1@users.noreply.github.com> --------- Co-authored-by: Pavan Navarathna <6504783+pavannd1@users.noreply.github.com> * Pod command executor implementation (#1903) * Implement PodController interface * Implement PodCommandExecutor interface * Update copyright year Co-authored-by: Pavan Navarathna <6504783+pavannd1@users.noreply.github.com> --------- Co-authored-by: Pavan Navarathna <6504783+pavannd1@users.noreply.github.com> * Refactor pod runner (#1904) * Introduce interfaces for extended POD manipulation * Refactor PodRunner, use PodController under the hood * Capture last log lines when waiting for pod state change fails. * Move comments to proper place * Add couple test scenarios * Fix wording Co-authored-by: Pavan Navarathna <6504783+pavannd1@users.noreply.github.com> * Fix linting errors * Better naming for `getErrorWithLogs` function --------- Co-authored-by: Pavan Navarathna <6504783+pavannd1@users.noreply.github.com>
kanisterio · Feb 27, 2023 · ec63b99 · ec63b99
1 parent 34de811
commit ec63b99
Show file tree

Hide file tree

Showing 3 changed files with 183 additions and 2 deletions.
diff --git a/pkg/kube/log_tail.go b/pkg/kube/log_tail.go
@@ -0,0 +1,77 @@
+// Copyright 2023 The Kanister Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package kube
+
+import (
+	"bufio"
+	"bytes"
+	"strings"
+)
+
+// LogTail interface allows to store last N lines of log written to it
+type LogTail interface {
+	Write(p []byte) (int, error)
+	ToString() string
+}
+
+type logTail struct {
+	lines []string
+	idx   int
+	len   int
+}
+
+// NewLogTail creates logTail struct containing circular buffer for storing `len` last lines of log written through Write method
+func NewLogTail(len int) LogTail {
+	return &logTail{
+		lines: make([]string, len),
+		len:   len,
+	}
+}
+
+// Write implements io.Writer interface. It writes log line(s) to circular buffer
+func (lt *logTail) Write(p []byte) (int, error) {
+	s := bufio.NewScanner(bytes.NewReader(p))
+	for s.Scan() { // Scan log lines one by one.
+		l := s.Text()
+		l = strings.TrimSpace(l)
+		if l == "" { // Skip empty lines since we are not interested in them
+			continue
+		}
+		lt.lines[lt.idx%lt.len] = l
+		lt.idx += 1
+	}
+
+	return len(p), nil
+}
+
+// ToString returns collected lines joined with a newline
+func (lt *logTail) ToString() string {
+	var result string
+
+	min := 0
+	if lt.idx > lt.len {
+		min = lt.idx - lt.len
+	}
+
+	for i := min; i < lt.idx; i++ {
+		line := lt.lines[i%lt.len]
+		result += line
+		if i != lt.idx-1 {
+			result += "\r\n"
+		}
+	}
+
+	return result
+}
diff --git a/pkg/kube/log_tail_test.go b/pkg/kube/log_tail_test.go
@@ -0,0 +1,65 @@
+// Copyright 2023 The Kanister Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//go:build !unit
+// +build !unit
+
+package kube
+
+import (
+	. "gopkg.in/check.v1"
+)
+
+type LogTailTestSuite struct{}
+
+var _ = Suite(&LogTailTestSuite{})
+
+func (s *LogTailTestSuite) TestLogsTail(c *C) {
+	for caseIdx, tc := range []struct {
+		limit    int
+		input    []string
+		expected string
+	}{
+		{2, []string{"line 1", "line 2", "line 3", "line 4", "line 5"}, "line 4\r\nline 5"},
+		{2, []string{"line 1\nline 2", "line 3", "line 4\r\nline 5"}, "line 4\r\nline 5"},
+		{5, []string{"line 1", "line 2"}, "line 1\r\nline 2"},
+		{1, []string{"line 1", "line 2"}, "line 2"},
+	} {
+		fc := Commentf("Failed for case #%v. Log: %s", caseIdx, tc.expected)
+		lt := NewLogTail(tc.limit)
+
+		for _, in := range tc.input {
+			w, e := lt.Write([]byte(in))
+			c.Check(e, IsNil)
+			c.Check(w, Equals, len([]byte(in)))
+		}
+
+		r := lt.ToString()
+		c.Check(r, Equals, tc.expected, fc)
+	}
+
+	lt := NewLogTail(3)
+	c.Check(lt.ToString(), Equals, "") // If there were no writes at all, output should be empty line
+
+	lt.Write([]byte("line1")) // nolint: errcheck
+	lt.Write([]byte("line2")) // nolint: errcheck
+	c.Check(lt.ToString(), Equals, "line1\r\nline2")
+	c.Check(lt.ToString(), Equals, "line1\r\nline2") // Second invocation should get the same result
+
+	// Check that buffer is still working after ToString is called
+	lt.Write([]byte("line3")) // nolint: errcheck
+	c.Check(lt.ToString(), Equals, "line1\r\nline2\r\nline3")
+	lt.Write([]byte("line4")) // nolint: errcheck
+	c.Check(lt.ToString(), Equals, "line2\r\nline3\r\nline4")
+}
diff --git a/pkg/kube/pod.go b/pkg/kube/pod.go
@@ -16,6 +16,7 @@ package kube
 
 import (
 	"context"
+	"fmt"
 	"io"
 	"os"
 	"strconv"
@@ -235,37 +236,68 @@ func GetPodLogs(ctx context.Context, cli kubernetes.Interface, namespace, name s
 	return string(bytes), nil
 }
 
+// getErrorFromLogs fetches logs from pod and constructs error containing last ten lines of log and specified error message
+func getErrorFromLogs(ctx context.Context, cli kubernetes.Interface, namespace, podName string, err error, errorMessage string) error {
+	r, logErr := StreamPodLogs(ctx, cli, namespace, podName)
+	if logErr != nil {
+		return errors.Wrapf(logErr, "Failed to fetch logs from the pod")
+	}
+	defer r.Close()
+
+	// Grab last log lines and put them to an error
+	lt := NewLogTail(10)
+	// We are not interested in log extraction error
+	io.Copy(lt, r) // nolint: errcheck
+
+	return errors.Wrap(errors.Wrap(err, lt.ToString()), errorMessage)
+}
+
 // WaitForPodReady waits for a pod to exit the pending state
 func WaitForPodReady(ctx context.Context, cli kubernetes.Interface, namespace, name string) error {
 	timeoutCtx, waitCancel := context.WithTimeout(ctx, GetPodReadyWaitTimeout())
 	defer waitCancel()
+	attachLog := true
 	err := poll.Wait(timeoutCtx, func(ctx context.Context) (bool, error) {
 		p, err := cli.CoreV1().Pods(namespace).Get(ctx, name, metav1.GetOptions{})
 		if err != nil {
+			attachLog = false
 			return false, err
 		}
 
 		// check if nodes are up and available
 		err = checkNodesStatus(p, cli)
 		if err != nil && !strings.Contains(err.Error(), errAccessingNode) {
+			attachLog = false
 			return false, err
 		}
 
 		// check for memory or resource issues
 		if p.Status.Phase == v1.PodPending {
 			if p.Status.Reason == "OutOfmemory" || p.Status.Reason == "OutOfcpu" {
+				attachLog = false
 				return false, errors.Errorf("Pod stuck in pending state, reason: %s", p.Status.Reason)
 			}
 		}
 
 		// check if pvc and pv are up and ready to mount
 		if err := getVolStatus(timeoutCtx, p, cli, namespace); err != nil {
+			attachLog = false
 			return false, err
 		}
 
 		return p.Status.Phase != v1.PodPending && p.Status.Phase != "", nil
 	})
-	return errors.Wrapf(err, "Pod did not transition into running state. Timeout:%v  Namespace:%s, Name:%s", GetPodReadyWaitTimeout(), namespace, name)
+
+	if err == nil {
+		return nil
+	}
+
+	errorMessage := fmt.Sprintf("Pod did not transition into running state. Timeout:%v  Namespace:%s, Name:%s", GetPodReadyWaitTimeout(), namespace, name)
+	if attachLog {
+		return getErrorFromLogs(ctx, cli, namespace, name, err, errorMessage)
+	}
+
+	return errors.Wrap(err, errorMessage)
 }
 
 func checkNodesStatus(p *v1.Pod, cli kubernetes.Interface) error {
@@ -343,9 +375,11 @@ func checkPVCAndPVStatus(ctx context.Context, vol v1.Volume, p *v1.Pod, cli kube
 
 // WaitForPodCompletion waits for a pod to reach a terminal state, or timeout
 func WaitForPodCompletion(ctx context.Context, cli kubernetes.Interface, namespace, name string) error {
+	attachLog := true
 	err := poll.Wait(ctx, func(ctx context.Context) (bool, error) {
 		p, err := cli.CoreV1().Pods(namespace).Get(ctx, name, metav1.GetOptions{})
 		if err != nil {
+			attachLog = false
 			return true, err
 		}
 		switch p.Status.Phase {
@@ -354,7 +388,12 @@ func WaitForPodCompletion(ctx context.Context, cli kubernetes.Interface, namespa
 		}
 		return p.Status.Phase == v1.PodSucceeded, nil
 	})
-	return errors.Wrap(err, "Pod failed or did not transition into complete state")
+
+	errorMessage := "Pod failed or did not transition into complete state"
+	if attachLog {
+		return getErrorFromLogs(ctx, cli, namespace, name, err, errorMessage)
+	}
+	return errors.Wrap(err, errorMessage)
 }
 
 // use Strategic Merge to patch default pod specs with the passed specs