Skip to content

Commit

Permalink
Convert dumpDiag to use kind export logs
Browse files Browse the repository at this point in the history
Today in KTF we manually step through all the pods/objects in the
cluster to get debug information. KIND now supports simply
running `kind export logs` which does a better job of giving us
a cluster debug overview. Switch the DumpDiagnostics function to
use this command for cluster log dumping instead.

Intentionally leave the logic which allows each addon to write their own
specific cleanup functionality.

Signed-off-by: Andrew Stoycos <astoycos@redhat.com>
  • Loading branch information
astoycos authored and czeslavo committed Mar 22, 2023
1 parent 238e12e commit 9861d53
Show file tree
Hide file tree
Showing 4 changed files with 141 additions and 117 deletions.
124 changes: 9 additions & 115 deletions pkg/clusters/diagnostics.go
Original file line number Diff line number Diff line change
@@ -1,123 +1,17 @@
package clusters

import (
"bytes"
"context"
"fmt"
"os"
"os/exec"
"path/filepath"
"strings"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

// DumpDiagnostics gathers a wide range of generic, diagnostic information from the test cluster,
// to provide a snapshot of it at a given time for offline debugging.
// It uses the provided context and writes the meta string to meta.txt to identify the result set.
// It returns the name of the directory that contains all the produced diagnostics data.
func DumpDiagnostics(ctx context.Context, c Cluster, meta string) (string, error) {
// Obtain a kubeconfig
kubeconfig, err := TempKubeconfig(c)
if err != nil {
return "", err
}
defer os.Remove(kubeconfig.Name())

// create a tempdir
output, err := os.MkdirTemp(os.TempDir(), "ktf-diag-")
if err != nil {
return "", err
}

// kubectl api-resources --verbs=list --namespaced -o name | xargs -n 1 kubectl get --show-kind --ignore-not-found -A -oyaml
// kubectl api-resources --verbs=list --namespaced -o name | xargs -n 1 kubectl get --show-kind --ignore-not-found -A -oyaml
// aka "kubectl get all" and "kubectl describe all", but also gets CRs and cluster-scoped resouces
getAllOut, err := os.OpenFile(filepath.Join(output, "kubectl_get_all.yaml"), os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o644) //nolint:gomnd
if err != nil {
return output, err
}
defer getAllOut.Close()
describeAllOut, err := os.OpenFile(filepath.Join(output, "kubectl_describe_all.txt"), os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o644) //nolint:gomnd
if err != nil {
return output, err
}
defer describeAllOut.Close()

var namespacedList bytes.Buffer
var clusterList bytes.Buffer
namespacedResources := exec.CommandContext(ctx, "kubectl", "--kubeconfig", kubeconfig.Name(), "api-resources", "--verbs=list", "--namespaced", "-o", "name") //nolint:gosec
namespacedResources.Stdout = &namespacedList
clusterResources := exec.CommandContext(ctx, "kubectl", "--kubeconfig", kubeconfig.Name(), "api-resources", "--verbs=list", "--namespaced=false", "-o", "name") //nolint:gosec
clusterResources.Stdout = &clusterList
if err := namespacedResources.Run(); err != nil {
return output, err
}
if err := clusterResources.Run(); err != nil {
return output, err
}
combinedList := strings.Split(namespacedList.String()+clusterList.String(), "\n")

for _, resource := range combinedList {
if resource == "" {
// unwanted artifact of the split
continue
}
var getErr bytes.Buffer
var descErr bytes.Buffer
resourceGet := exec.CommandContext(ctx, "kubectl", "--kubeconfig", kubeconfig.Name(), "get", "--show-kind", "--ignore-not-found", "-A", "-oyaml", resource) //nolint:gosec
resourceGet.Stdout = getAllOut
resourceGet.Stderr = &getErr
resourceDescribe := exec.CommandContext(ctx, "kubectl", "--kubeconfig", kubeconfig.Name(), "describe", "--all-namespaces", resource) //nolint:gosec
resourceDescribe.Stdout = describeAllOut
resourceDescribe.Stderr = &descErr
if err := resourceGet.Run(); err != nil {
return output, fmt.Errorf("could not get resources for cmd '%s': err %s, stderr: %s", resourceGet.String(), err, getErr.String())
}
if err := resourceDescribe.Run(); err != nil {
return output, fmt.Errorf("could not get resources for cmd '%s': err %s, stderr: %s", resourceDescribe.String(), err, descErr.String())
}
}

// for each Pod, run kubectl logs
pods, err := c.Client().CoreV1().Pods("").List(ctx, metav1.ListOptions{})
if err != nil {
return output, err
}
logsDir := filepath.Join(output, "pod_logs")
err = os.Mkdir(logsDir, 0o750) //nolint:gomnd
if err != nil {
return output, err
}
failedPods := make(map[string]error)
for _, pod := range pods.Items {
podLogOut, err := os.Create(filepath.Join(logsDir, fmt.Sprintf("%s_%s", pod.Namespace, pod.Name)))
if err != nil {
failedPods[fmt.Sprintf("%s/%s", pod.Namespace, pod.Name)] = err
continue
}
cmd := exec.CommandContext(ctx, "kubectl", "--kubeconfig", kubeconfig.Name(), "logs", "--all-containers", "-n", pod.Namespace, pod.Name) //nolint:gosec
cmd.Stdout = podLogOut
if err := cmd.Run(); err != nil {
failedPods[fmt.Sprintf("%s/%s", pod.Namespace, pod.Name)] = err
continue
}
defer podLogOut.Close()
}
if len(failedPods) > 0 {
failedPodOut, err := os.Create(filepath.Join(output, "pod_logs_failures.txt"))
if err != nil {
return output, err
}
defer failedPodOut.Close()
for failed, reason := range failedPods {
_, err = failedPodOut.WriteString(fmt.Sprintf("%s: %v\n", failed, reason))
if err != nil {
return output, err
}
}
}

func DumpDiagnostics(ctx context.Context, c Cluster, meta string, outDir string) (string, error) {
// for each Addon, run the addon diagnostic function
failedAddons := make(map[string]error)
for _, addon := range c.ListAddons() {
Expand All @@ -127,7 +21,7 @@ func DumpDiagnostics(ctx context.Context, c Cluster, meta string) (string, error
continue
}
if len(diagnostics) > 0 {
addonOut := filepath.Join(output, "addons", string(addon.Name()))
addonOut := filepath.Join(outDir, "addons", string(addon.Name()))
err = os.MkdirAll(addonOut, 0o750) //nolint:gomnd
if err != nil {
failedAddons[string(addon.Name())] = err
Expand All @@ -149,29 +43,29 @@ func DumpDiagnostics(ctx context.Context, c Cluster, meta string) (string, error
}
}
if len(failedAddons) > 0 {
failedAddonOut, err := os.Create(filepath.Join(output, "addon_failures.txt"))
failedAddonOut, err := os.Create(filepath.Join(outDir, "addon_failures.txt"))
if err != nil {
return output, err
return outDir, err
}
defer failedAddonOut.Close()
for failed, reason := range failedAddons {
_, err = failedAddonOut.WriteString(fmt.Sprintf("%s: %v\n", failed, reason))
if err != nil {
return output, err
return outDir, err
}
}
}

// write the diagnostic metadata
metaOut, err := os.Create(filepath.Join(output, "meta.txt"))
metaOut, err := os.Create(filepath.Join(outDir, "meta.txt"))
if err != nil {
return output, err
return outDir, err
}
defer metaOut.Close()
_, err = metaOut.WriteString(meta)
if err != nil {
return output, err
return outDir, err
}

return output, nil
return outDir, nil
}
107 changes: 106 additions & 1 deletion pkg/clusters/types/gke/cluster.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
package gke

import (
"bytes"
"context"
"fmt"
"os"
"os/exec"
"path/filepath"
"strings"
"sync"
"time"
Expand All @@ -12,6 +15,7 @@ import (
"cloud.google.com/go/container/apiv1/containerpb"
"github.com/blang/semver/v4"
"google.golang.org/api/option"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"

Expand Down Expand Up @@ -219,5 +223,106 @@ func (c *Cluster) DeleteAddon(ctx context.Context, addon clusters.Addon) error {
// for diagnostics identification.
// It returns the path to directory containing all the diagnostic files and an error.
func (c *Cluster) DumpDiagnostics(ctx context.Context, meta string) (string, error) {
return clusters.DumpDiagnostics(ctx, c, meta)
// Obtain a kubeconfig
kubeconfig, err := clusters.TempKubeconfig(c)
if err != nil {
return "", err
}
defer os.Remove(kubeconfig.Name())

// create a tempdir
output, err := os.MkdirTemp(os.TempDir(), "ktf-diag-")
if err != nil {
return "", err
}

// kubectl api-resources --verbs=list --namespaced -o name | xargs -n 1 kubectl get --show-kind --ignore-not-found -A -oyaml
// kubectl api-resources --verbs=list --namespaced -o name | xargs -n 1 kubectl get --show-kind --ignore-not-found -A -oyaml
// aka "kubectl get all" and "kubectl describe all", but also gets CRs and cluster-scoped resouces
getAllOut, err := os.OpenFile(filepath.Join(output, "kubectl_get_all.yaml"), os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o644) //nolint:gomnd
if err != nil {
return output, err
}
defer getAllOut.Close()
describeAllOut, err := os.OpenFile(filepath.Join(output, "kubectl_describe_all.txt"), os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o644) //nolint:gomnd
if err != nil {
return output, err
}
defer describeAllOut.Close()

var namespacedList bytes.Buffer
var clusterList bytes.Buffer
namespacedResources := exec.CommandContext(ctx, "kubectl", "--kubeconfig", kubeconfig.Name(), "api-resources", "--verbs=list", "--namespaced", "-o", "name") //nolint:gosec
namespacedResources.Stdout = &namespacedList
clusterResources := exec.CommandContext(ctx, "kubectl", "--kubeconfig", kubeconfig.Name(), "api-resources", "--verbs=list", "--namespaced=false", "-o", "name") //nolint:gosec
clusterResources.Stdout = &clusterList
if err := namespacedResources.Run(); err != nil {
return output, err
}
if err := clusterResources.Run(); err != nil {
return output, err
}
combinedList := strings.Split(namespacedList.String()+clusterList.String(), "\n")

for _, resource := range combinedList {
if resource == "" {
// unwanted artifact of the split
continue
}
var getErr bytes.Buffer
var descErr bytes.Buffer
resourceGet := exec.CommandContext(ctx, "kubectl", "--kubeconfig", kubeconfig.Name(), "get", "--show-kind", "--ignore-not-found", "-A", "-oyaml", resource) //nolint:gosec
resourceGet.Stdout = getAllOut
resourceGet.Stderr = &getErr
resourceDescribe := exec.CommandContext(ctx, "kubectl", "--kubeconfig", kubeconfig.Name(), "describe", "--all-namespaces", resource) //nolint:gosec
resourceDescribe.Stdout = describeAllOut
resourceDescribe.Stderr = &descErr
if err := resourceGet.Run(); err != nil {
return output, fmt.Errorf("could not get resources for cmd '%s': err %s, stderr: %s", resourceGet.String(), err, getErr.String())
}
if err := resourceDescribe.Run(); err != nil {
return output, fmt.Errorf("could not get resources for cmd '%s': err %s, stderr: %s", resourceDescribe.String(), err, descErr.String())
}
}

// for each Pod, run kubectl logs
pods, err := c.Client().CoreV1().Pods("").List(ctx, metav1.ListOptions{})
if err != nil {
return output, err
}
logsDir := filepath.Join(output, "pod_logs")
err = os.Mkdir(logsDir, 0o750) //nolint:gomnd
if err != nil {
return output, err
}
failedPods := make(map[string]error)
for _, pod := range pods.Items {
podLogOut, err := os.Create(filepath.Join(logsDir, fmt.Sprintf("%s_%s", pod.Namespace, pod.Name)))
if err != nil {
failedPods[fmt.Sprintf("%s/%s", pod.Namespace, pod.Name)] = err
continue
}
cmd := exec.CommandContext(ctx, "kubectl", "--kubeconfig", kubeconfig.Name(), "logs", "--all-containers", "-n", pod.Namespace, pod.Name) //nolint:gosec
cmd.Stdout = podLogOut
if err := cmd.Run(); err != nil {
failedPods[fmt.Sprintf("%s/%s", pod.Namespace, pod.Name)] = err
continue
}
defer podLogOut.Close()
}
if len(failedPods) > 0 {
failedPodOut, err := os.Create(filepath.Join(output, "pod_logs_failures.txt"))
if err != nil {
return output, err
}
defer failedPodOut.Close()
for failed, reason := range failedPods {
_, err = failedPodOut.WriteString(fmt.Sprintf("%s: %v\n", failed, reason))
if err != nil {
return output, err
}
}
}

return clusters.DumpDiagnostics(ctx, c, meta, output)
}
13 changes: 12 additions & 1 deletion pkg/clusters/types/kind/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,5 +144,16 @@ func (c *Cluster) DeleteAddon(ctx context.Context, addon clusters.Addon) error {
// for diagnostics identification.
// It returns the path to directory containing all the diagnostic files and an error.
func (c *Cluster) DumpDiagnostics(ctx context.Context, meta string) (string, error) {
return clusters.DumpDiagnostics(ctx, c, meta)
// create a tempdir
outDir, err := os.MkdirTemp(os.TempDir(), "ktf-diag-")
if err != nil {
return "", err
}

err = exportLogs(ctx, c.Name(), outDir)
if err != nil {
return "", err
}

return clusters.DumpDiagnostics(ctx, c, meta, outDir)
}
14 changes: 14 additions & 0 deletions pkg/clusters/types/kind/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -155,3 +155,17 @@ func (b *Builder) disableDefaultCNI() error {
}
return nil
}

// exportLogs dumps a kind cluster logs to the specified directory
func exportLogs(ctx context.Context, name string, outDir string) error {
args := []string{"export", "logs", outDir, "--name", name}

stderr := new(bytes.Buffer)
cmd := exec.CommandContext(ctx, "kind", args...)
cmd.Stdout = io.Discard
cmd.Stderr = stderr
if err := cmd.Run(); err != nil {
return fmt.Errorf("%s: %w", stderr.String(), err)
}
return nil
}

0 comments on commit 9861d53

Please sign in to comment.