Skip to content

Commit

Permalink
Handle errors when waiting for pod status change fails (#1922)
Browse files Browse the repository at this point in the history
* Make PodWriter an interface (#1899)

* Pod controller interface (#1900)

* Introduce interfaces for extended POD manipulation

* Update copyright year

Co-authored-by: Pavan Navarathna <6504783+pavannd1@users.noreply.github.com>

* Fix spelling

* Describe purpose of interface

---------

Co-authored-by: Pavan Navarathna <6504783+pavannd1@users.noreply.github.com>

* Pod controller implementation (#1901)

* Implement PodController interface

* Add `corev1` alias

* Fix variable name.

* Fix naming

* Rename errors and minor rewording.

* Update copyright year and spelling.

Co-authored-by: Pavan Navarathna <6504783+pavannd1@users.noreply.github.com>

* Address review note

---------

Co-authored-by: Pavan Navarathna <6504783+pavannd1@users.noreply.github.com>

* Pod file writer implementation (#1902)

* Implement PodFileWriter interface

* Fix naming

* Rename `pfwp` property to `fileWriterProcessor` to make it more understandable.

* Update copyright year

Co-authored-by: Pavan Navarathna <6504783+pavannd1@users.noreply.github.com>

---------

Co-authored-by: Pavan Navarathna <6504783+pavannd1@users.noreply.github.com>

* Pod command executor implementation (#1903)

* Implement PodController interface

* Implement PodCommandExecutor interface

* Update copyright year

Co-authored-by: Pavan Navarathna <6504783+pavannd1@users.noreply.github.com>

---------

Co-authored-by: Pavan Navarathna <6504783+pavannd1@users.noreply.github.com>

* Refactor pod runner (#1904)

* Introduce interfaces for extended POD manipulation

* Refactor PodRunner, use PodController under the hood

* Capture last log lines when waiting for pod state change fails.

* Move comments to proper place

* Add couple test scenarios

* Fix wording

Co-authored-by: Pavan Navarathna <6504783+pavannd1@users.noreply.github.com>

* Fix linting errors

* Better naming for `getErrorWithLogs` function

---------

Co-authored-by: Pavan Navarathna <6504783+pavannd1@users.noreply.github.com>
  • Loading branch information
e-sumin and pavannd1 committed Feb 27, 2023
1 parent 34de811 commit ec63b99
Show file tree
Hide file tree
Showing 3 changed files with 183 additions and 2 deletions.
77 changes: 77 additions & 0 deletions pkg/kube/log_tail.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
// Copyright 2023 The Kanister Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package kube

import (
"bufio"
"bytes"
"strings"
)

// LogTail interface allows to store last N lines of log written to it
type LogTail interface {
Write(p []byte) (int, error)
ToString() string
}

type logTail struct {
lines []string
idx int
len int
}

// NewLogTail creates logTail struct containing circular buffer for storing `len` last lines of log written through Write method
func NewLogTail(len int) LogTail {
return &logTail{
lines: make([]string, len),
len: len,
}
}

// Write implements io.Writer interface. It writes log line(s) to circular buffer
func (lt *logTail) Write(p []byte) (int, error) {
s := bufio.NewScanner(bytes.NewReader(p))
for s.Scan() { // Scan log lines one by one.
l := s.Text()
l = strings.TrimSpace(l)
if l == "" { // Skip empty lines since we are not interested in them
continue
}
lt.lines[lt.idx%lt.len] = l
lt.idx += 1
}

return len(p), nil
}

// ToString returns collected lines joined with a newline
func (lt *logTail) ToString() string {
var result string

min := 0
if lt.idx > lt.len {
min = lt.idx - lt.len
}

for i := min; i < lt.idx; i++ {
line := lt.lines[i%lt.len]
result += line
if i != lt.idx-1 {
result += "\r\n"
}
}

return result
}
65 changes: 65 additions & 0 deletions pkg/kube/log_tail_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
// Copyright 2023 The Kanister Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build !unit
// +build !unit

package kube

import (
. "gopkg.in/check.v1"
)

type LogTailTestSuite struct{}

var _ = Suite(&LogTailTestSuite{})

func (s *LogTailTestSuite) TestLogsTail(c *C) {
for caseIdx, tc := range []struct {
limit int
input []string
expected string
}{
{2, []string{"line 1", "line 2", "line 3", "line 4", "line 5"}, "line 4\r\nline 5"},
{2, []string{"line 1\nline 2", "line 3", "line 4\r\nline 5"}, "line 4\r\nline 5"},
{5, []string{"line 1", "line 2"}, "line 1\r\nline 2"},
{1, []string{"line 1", "line 2"}, "line 2"},
} {
fc := Commentf("Failed for case #%v. Log: %s", caseIdx, tc.expected)
lt := NewLogTail(tc.limit)

for _, in := range tc.input {
w, e := lt.Write([]byte(in))
c.Check(e, IsNil)
c.Check(w, Equals, len([]byte(in)))
}

r := lt.ToString()
c.Check(r, Equals, tc.expected, fc)
}

lt := NewLogTail(3)
c.Check(lt.ToString(), Equals, "") // If there were no writes at all, output should be empty line

lt.Write([]byte("line1")) // nolint: errcheck
lt.Write([]byte("line2")) // nolint: errcheck
c.Check(lt.ToString(), Equals, "line1\r\nline2")
c.Check(lt.ToString(), Equals, "line1\r\nline2") // Second invocation should get the same result

// Check that buffer is still working after ToString is called
lt.Write([]byte("line3")) // nolint: errcheck
c.Check(lt.ToString(), Equals, "line1\r\nline2\r\nline3")
lt.Write([]byte("line4")) // nolint: errcheck
c.Check(lt.ToString(), Equals, "line2\r\nline3\r\nline4")
}
43 changes: 41 additions & 2 deletions pkg/kube/pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ package kube

import (
"context"
"fmt"
"io"
"os"
"strconv"
Expand Down Expand Up @@ -235,37 +236,68 @@ func GetPodLogs(ctx context.Context, cli kubernetes.Interface, namespace, name s
return string(bytes), nil
}

// getErrorFromLogs fetches logs from pod and constructs error containing last ten lines of log and specified error message
func getErrorFromLogs(ctx context.Context, cli kubernetes.Interface, namespace, podName string, err error, errorMessage string) error {
r, logErr := StreamPodLogs(ctx, cli, namespace, podName)
if logErr != nil {
return errors.Wrapf(logErr, "Failed to fetch logs from the pod")
}
defer r.Close()

// Grab last log lines and put them to an error
lt := NewLogTail(10)
// We are not interested in log extraction error
io.Copy(lt, r) // nolint: errcheck

return errors.Wrap(errors.Wrap(err, lt.ToString()), errorMessage)
}

// WaitForPodReady waits for a pod to exit the pending state
func WaitForPodReady(ctx context.Context, cli kubernetes.Interface, namespace, name string) error {
timeoutCtx, waitCancel := context.WithTimeout(ctx, GetPodReadyWaitTimeout())
defer waitCancel()
attachLog := true
err := poll.Wait(timeoutCtx, func(ctx context.Context) (bool, error) {
p, err := cli.CoreV1().Pods(namespace).Get(ctx, name, metav1.GetOptions{})
if err != nil {
attachLog = false
return false, err
}

// check if nodes are up and available
err = checkNodesStatus(p, cli)
if err != nil && !strings.Contains(err.Error(), errAccessingNode) {
attachLog = false
return false, err
}

// check for memory or resource issues
if p.Status.Phase == v1.PodPending {
if p.Status.Reason == "OutOfmemory" || p.Status.Reason == "OutOfcpu" {
attachLog = false
return false, errors.Errorf("Pod stuck in pending state, reason: %s", p.Status.Reason)
}
}

// check if pvc and pv are up and ready to mount
if err := getVolStatus(timeoutCtx, p, cli, namespace); err != nil {
attachLog = false
return false, err
}

return p.Status.Phase != v1.PodPending && p.Status.Phase != "", nil
})
return errors.Wrapf(err, "Pod did not transition into running state. Timeout:%v Namespace:%s, Name:%s", GetPodReadyWaitTimeout(), namespace, name)

if err == nil {
return nil
}

errorMessage := fmt.Sprintf("Pod did not transition into running state. Timeout:%v Namespace:%s, Name:%s", GetPodReadyWaitTimeout(), namespace, name)
if attachLog {
return getErrorFromLogs(ctx, cli, namespace, name, err, errorMessage)
}

return errors.Wrap(err, errorMessage)
}

func checkNodesStatus(p *v1.Pod, cli kubernetes.Interface) error {
Expand Down Expand Up @@ -343,9 +375,11 @@ func checkPVCAndPVStatus(ctx context.Context, vol v1.Volume, p *v1.Pod, cli kube

// WaitForPodCompletion waits for a pod to reach a terminal state, or timeout
func WaitForPodCompletion(ctx context.Context, cli kubernetes.Interface, namespace, name string) error {
attachLog := true
err := poll.Wait(ctx, func(ctx context.Context) (bool, error) {
p, err := cli.CoreV1().Pods(namespace).Get(ctx, name, metav1.GetOptions{})
if err != nil {
attachLog = false
return true, err
}
switch p.Status.Phase {
Expand All @@ -354,7 +388,12 @@ func WaitForPodCompletion(ctx context.Context, cli kubernetes.Interface, namespa
}
return p.Status.Phase == v1.PodSucceeded, nil
})
return errors.Wrap(err, "Pod failed or did not transition into complete state")

errorMessage := "Pod failed or did not transition into complete state"
if attachLog {
return getErrorFromLogs(ctx, cli, namespace, name, err, errorMessage)
}
return errors.Wrap(err, errorMessage)
}

// use Strategic Merge to patch default pod specs with the passed specs
Expand Down

0 comments on commit ec63b99

Please sign in to comment.