Skip to content

Commit

Permalink
check log directory for restartCount
Browse files Browse the repository at this point in the history
  • Loading branch information
rphillips committed Oct 11, 2021
1 parent ac8c287 commit a06094e
Show file tree
Hide file tree
Showing 2 changed files with 97 additions and 0 deletions.
53 changes: 53 additions & 0 deletions pkg/kubelet/kuberuntime/kuberuntime_container.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,15 @@ import (
"errors"
"fmt"
"io"
"io/ioutil"
"math/rand"
"net/url"
"os"
"path/filepath"
"regexp"
goruntime "runtime"
"sort"
"strconv"
"strings"
"sync"
"time"
Expand Down Expand Up @@ -127,6 +130,40 @@ func (s *startSpec) getTargetID(podStatus *kubecontainer.PodStatus) (*kubecontai
return &targetStatus.ID, nil
}

func calcRestartCountByLogDir(path string) (int, error) {
// if the path doesn't exist then it's not an error
if _, err := os.Stat(path); err != nil {
return 0, nil
}
restartCount := int(0)
files, err := ioutil.ReadDir(path)
if err != nil {
return 0, err
}
if len(files) == 0 {
return 0, err
}
restartCountLogFileRegex := regexp.MustCompile(`(\d+).log(\..*)?`)
for _, file := range files {
if file.IsDir() {
continue
}
matches := restartCountLogFileRegex.FindStringSubmatch(file.Name())
if len(matches) == 0 {
continue
}
count, err := strconv.Atoi(matches[1])
if err != nil {
return restartCount, err
}
count++
if count > restartCount {
restartCount = count
}
}
return restartCount, nil
}

// startContainer starts a container and returns a message indicates why it is failed on error.
// It starts the container through the following steps:
// * pull the image
Expand All @@ -150,6 +187,22 @@ func (m *kubeGenericRuntimeManager) startContainer(podSandboxID string, podSandb
containerStatus := podStatus.FindContainerStatusByName(container.Name)
if containerStatus != nil {
restartCount = containerStatus.RestartCount + 1
} else {
// The container runtime keeps state on container statuses and
// what the container restart count is. When nodes are rebooted
// some container runtimes clear their state which causes the
// restartCount to be reset to 0. This causes the logfile to
// start at 0.log, which either overwrites or appends to the
// already existing log.
//
// We are checking to see if the log directory exists, and find
// the latest restartCount by checking the log name -
// {restartCount}.log - and adding 1 to it.
logDir := BuildContainerLogsDirectory(pod.Namespace, pod.Name, pod.UID, container.Name)
restartCount, err = calcRestartCountByLogDir(logDir)
if err != nil {
klog.InfoS("Log directory exists but could not calculate restartCount", "logDir", logDir, "err", err)
}
}

target, err := spec.getTargetID(podStatus)
Expand Down
44 changes: 44 additions & 0 deletions pkg/kubelet/kuberuntime/kuberuntime_container_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ package kuberuntime

import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
"regexp"
"strings"
Expand Down Expand Up @@ -422,3 +424,45 @@ func TestStartSpec(t *testing.T) {
})
}
}

func TestRestartCountByLogDir(t *testing.T) {
for _, tc := range []struct {
filenames []string
restartCount int
}{
{
filenames: []string{"0.log.rotated-log"},
restartCount: 1,
},
{
filenames: []string{"0.log"},
restartCount: 1,
},
{
filenames: []string{"0.log", "1.log", "2.log"},
restartCount: 3,
},
{
filenames: []string{"0.log.rotated", "1.log", "2.log"},
restartCount: 3,
},
{
filenames: []string{"5.log.rotated", "6.log.rotated"},
restartCount: 7,
},
{
filenames: []string{"5.log.rotated", "6.log", "7.log"},
restartCount: 8,
},
} {
tempDirPath, err := ioutil.TempDir("", "test-restart-count-")
assert.NoError(t, err, "create tempdir error")
defer os.RemoveAll(tempDirPath)
for _, filename := range tc.filenames {
err = ioutil.WriteFile(filepath.Join(tempDirPath, filename), []byte("a log line"), 0600)
assert.NoError(t, err, "could not write log file")
}
count, _ := calcRestartCountByLogDir(tempDirPath)
assert.Equal(t, count, tc.restartCount, "count %v should equal restartCount %v", count, tc.restartCount)
}
}

0 comments on commit a06094e

Please sign in to comment.