Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Convert dumpDiag to use kind export logs #591

Merged
merged 4 commits into from
Apr 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
113 changes: 45 additions & 68 deletions pkg/clusters/diagnostics.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,39 +8,31 @@ import (
"os/exec"
"path/filepath"
"strings"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

// DumpDiagnostics gathers a wide range of generic, diagnostic information from the test cluster,
// to provide a snapshot of it at a given time for offline debugging.
// It uses the provided context and writes the meta string to meta.txt to identify the result set.
// It returns the name of the directory that contains all the produced diagnostics data.
func DumpDiagnostics(ctx context.Context, c Cluster, meta string) (string, error) {
// DumpAllDescribeAll gathers diagnostic information from the cluster.
// Specifically it runs "kubectl get all" and "kubectl describe all" for
// all resources and stores the output into two respective yaml files
// (kubectl_get_all.yaml and kubectl_describe_all.yaml).
func DumpAllDescribeAll(ctx context.Context, c Cluster, outDir string) error {
// Obtain a kubeconfig
kubeconfig, err := TempKubeconfig(c)
if err != nil {
return "", err
return err
}
defer os.Remove(kubeconfig.Name())

// create a tempdir
output, err := os.MkdirTemp(os.TempDir(), "ktf-diag-")
if err != nil {
return "", err
}

// kubectl api-resources --verbs=list --namespaced -o name | xargs -n 1 kubectl get --show-kind --ignore-not-found -A -oyaml
// kubectl api-resources --verbs=list --namespaced -o name | xargs -n 1 kubectl get --show-kind --ignore-not-found -A -oyaml
// aka "kubectl get all" and "kubectl describe all", but also gets CRs and cluster-scoped resouces
getAllOut, err := os.OpenFile(filepath.Join(output, "kubectl_get_all.yaml"), os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o644) //nolint:gomnd
getAllOut, err := os.OpenFile(filepath.Join(outDir, "kubectl_get_all.yaml"), os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o644) //nolint:gomnd
if err != nil {
return output, err
return err
}
defer getAllOut.Close()
describeAllOut, err := os.OpenFile(filepath.Join(output, "kubectl_describe_all.txt"), os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o644) //nolint:gomnd
describeAllOut, err := os.OpenFile(filepath.Join(outDir, "kubectl_describe_all.txt"), os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o644) //nolint:gomnd
if err != nil {
return output, err
return err
}
defer describeAllOut.Close()

Expand All @@ -51,13 +43,14 @@ func DumpDiagnostics(ctx context.Context, c Cluster, meta string) (string, error
clusterResources := exec.CommandContext(ctx, "kubectl", "--kubeconfig", kubeconfig.Name(), "api-resources", "--verbs=list", "--namespaced=false", "-o", "name") //nolint:gosec
clusterResources.Stdout = &clusterList
if err := namespacedResources.Run(); err != nil {
return output, err
return err
}
if err := clusterResources.Run(); err != nil {
return output, err
return err
}
combinedList := strings.Split(namespacedList.String()+clusterList.String(), "\n")

// run kubectl get all and kubectl describe all for each resource.
for _, resource := range combinedList {
if resource == "" {
// unwanted artifact of the split
Expand All @@ -72,52 +65,21 @@ func DumpDiagnostics(ctx context.Context, c Cluster, meta string) (string, error
resourceDescribe.Stdout = describeAllOut
resourceDescribe.Stderr = &descErr
if err := resourceGet.Run(); err != nil {
return output, fmt.Errorf("could not get resources for cmd '%s': err %s, stderr: %s", resourceGet.String(), err, getErr.String())
return fmt.Errorf("could not get resources for cmd '%s': err %s, stderr: %s", resourceGet.String(), err, getErr.String())
}
if err := resourceDescribe.Run(); err != nil {
return output, fmt.Errorf("could not get resources for cmd '%s': err %s, stderr: %s", resourceDescribe.String(), err, descErr.String())
return fmt.Errorf("could not get resources for cmd '%s': err %s, stderr: %s", resourceDescribe.String(), err, descErr.String())
}
}

// for each Pod, run kubectl logs
pods, err := c.Client().CoreV1().Pods("").List(ctx, metav1.ListOptions{})
if err != nil {
return output, err
}
logsDir := filepath.Join(output, "pod_logs")
err = os.Mkdir(logsDir, 0o750) //nolint:gomnd
if err != nil {
return output, err
}
failedPods := make(map[string]error)
for _, pod := range pods.Items {
podLogOut, err := os.Create(filepath.Join(logsDir, fmt.Sprintf("%s_%s", pod.Namespace, pod.Name)))
if err != nil {
failedPods[fmt.Sprintf("%s/%s", pod.Namespace, pod.Name)] = err
continue
}
cmd := exec.CommandContext(ctx, "kubectl", "--kubeconfig", kubeconfig.Name(), "logs", "--all-containers", "-n", pod.Namespace, pod.Name) //nolint:gosec
cmd.Stdout = podLogOut
if err := cmd.Run(); err != nil {
failedPods[fmt.Sprintf("%s/%s", pod.Namespace, pod.Name)] = err
continue
}
defer podLogOut.Close()
}
if len(failedPods) > 0 {
failedPodOut, err := os.Create(filepath.Join(output, "pod_logs_failures.txt"))
if err != nil {
return output, err
}
defer failedPodOut.Close()
for failed, reason := range failedPods {
_, err = failedPodOut.WriteString(fmt.Sprintf("%s: %v\n", failed, reason))
if err != nil {
return output, err
}
}
}
return nil
}

// DumpDiagnostics gathers a wide range of generic, diagnostic information from the test cluster,
// to provide a snapshot of it at a given time for offline debugging.
// It uses the provided context and writes the meta string to meta.txt to identify the result set.
// It returns the name of the directory that contains all the produced diagnostics data.
func DumpDiagnostics(ctx context.Context, c Cluster, meta string, outDir string) error {
// for each Addon, run the addon diagnostic function
failedAddons := make(map[string]error)
for _, addon := range c.ListAddons() {
Expand All @@ -127,7 +89,7 @@ func DumpDiagnostics(ctx context.Context, c Cluster, meta string) (string, error
continue
}
if len(diagnostics) > 0 {
addonOut := filepath.Join(output, "addons", string(addon.Name()))
addonOut := filepath.Join(outDir, "addons", string(addon.Name()))
err = os.MkdirAll(addonOut, 0o750) //nolint:gomnd
if err != nil {
failedAddons[string(addon.Name())] = err
Expand All @@ -149,29 +111,44 @@ func DumpDiagnostics(ctx context.Context, c Cluster, meta string) (string, error
}
}
if len(failedAddons) > 0 {
failedAddonOut, err := os.Create(filepath.Join(output, "addon_failures.txt"))
failedAddonOut, err := os.Create(filepath.Join(outDir, "addon_failures.txt"))
if err != nil {
return output, err
return err
}
defer failedAddonOut.Close()
for failed, reason := range failedAddons {
_, err = failedAddonOut.WriteString(fmt.Sprintf("%s: %v\n", failed, reason))
if err != nil {
return output, err
return err
}
}
}

// write the diagnostic metadata
metaOut, err := os.Create(filepath.Join(output, "meta.txt"))
metaOut, err := os.Create(filepath.Join(outDir, "meta.txt"))
if err != nil {
return output, err
return err
}
defer metaOut.Close()
_, err = metaOut.WriteString(meta)
if err != nil {
return output, err
return err
}

astoycos marked this conversation as resolved.
Show resolved Hide resolved
err = DumpAllDescribeAll(ctx, c, outDir)
// write errors if we failed to dump results of `kubectl get all` or `kubectl describe all`.
// in cases where kubernetes cluster may not be correctly created.
if err != nil {
kubectlErrorOut, openErr := os.OpenFile(filepath.Join(outDir, "kubectl_dump_error.txt"), os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o644) //nolint:gomnd
if openErr != nil {
return openErr
}
defer kubectlErrorOut.Close()
_, writeErr := kubectlErrorOut.WriteString(err.Error())
if writeErr != nil {
return writeErr
}
}

return output, nil
return nil
}
astoycos marked this conversation as resolved.
Show resolved Hide resolved
59 changes: 58 additions & 1 deletion pkg/clusters/types/gke/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ import (
"context"
"fmt"
"os"
"os/exec"
"path/filepath"
"strings"
"sync"
"time"
Expand All @@ -12,6 +14,7 @@ import (
"cloud.google.com/go/container/apiv1/containerpb"
"github.com/blang/semver/v4"
"google.golang.org/api/option"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"

Expand Down Expand Up @@ -219,5 +222,59 @@ func (c *Cluster) DeleteAddon(ctx context.Context, addon clusters.Addon) error {
// for diagnostics identification.
// It returns the path to directory containing all the diagnostic files and an error.
func (c *Cluster) DumpDiagnostics(ctx context.Context, meta string) (string, error) {
return clusters.DumpDiagnostics(ctx, c, meta)
// Obtain a kubeconfig
kubeconfig, err := clusters.TempKubeconfig(c)
if err != nil {
return "", err
}
defer os.Remove(kubeconfig.Name())

// create a tempdir
outDir, err := os.MkdirTemp(os.TempDir(), "ktf-diag-")
if err != nil {
return "", err
}

astoycos marked this conversation as resolved.
Show resolved Hide resolved
// for each Pod, run kubectl logs
pods, err := c.Client().CoreV1().Pods("").List(ctx, metav1.ListOptions{})
if err != nil {
return outDir, err
}
logsDir := filepath.Join(outDir, "pod_logs")
err = os.Mkdir(logsDir, 0o750) //nolint:gomnd
if err != nil {
return outDir, err
}
failedPods := make(map[string]error)
for _, pod := range pods.Items {
podLogOut, err := os.Create(filepath.Join(logsDir, fmt.Sprintf("%s_%s", pod.Namespace, pod.Name)))
if err != nil {
failedPods[fmt.Sprintf("%s/%s", pod.Namespace, pod.Name)] = err
continue
}
cmd := exec.CommandContext(ctx, "kubectl", "--kubeconfig", kubeconfig.Name(), "logs", "--all-containers", "-n", pod.Namespace, pod.Name) //nolint:gosec
cmd.Stdout = podLogOut
if err := cmd.Run(); err != nil {
failedPods[fmt.Sprintf("%s/%s", pod.Namespace, pod.Name)] = err
continue
}
defer podLogOut.Close()
}
if len(failedPods) > 0 {
failedPodOut, err := os.Create(filepath.Join(outDir, "pod_logs_failures.txt"))
if err != nil {
return outDir, err
}
defer failedPodOut.Close()
for failed, reason := range failedPods {
_, err = failedPodOut.WriteString(fmt.Sprintf("%s: %v\n", failed, reason))
if err != nil {
return outDir, err
}
}
}

err = clusters.DumpDiagnostics(ctx, c, meta, outDir)

return outDir, err
}
14 changes: 13 additions & 1 deletion pkg/clusters/types/kind/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,5 +144,17 @@ func (c *Cluster) DeleteAddon(ctx context.Context, addon clusters.Addon) error {
// for diagnostics identification.
// It returns the path to directory containing all the diagnostic files and an error.
func (c *Cluster) DumpDiagnostics(ctx context.Context, meta string) (string, error) {
return clusters.DumpDiagnostics(ctx, c, meta)
// create a tempdir
outDir, err := os.MkdirTemp(os.TempDir(), "ktf-diag-")
if err != nil {
return "", err
}

err = exportLogs(ctx, c.Name(), outDir)
if err != nil {
return "", err
}

err = clusters.DumpDiagnostics(ctx, c, meta, outDir)
return outDir, err
}
14 changes: 14 additions & 0 deletions pkg/clusters/types/kind/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -155,3 +155,17 @@ func (b *Builder) disableDefaultCNI() error {
}
return nil
}

// exportLogs dumps a kind cluster logs to the specified directory
func exportLogs(ctx context.Context, name string, outDir string) error {
args := []string{"export", "logs", outDir, "--name", name}

stderr := new(bytes.Buffer)
cmd := exec.CommandContext(ctx, "kind", args...)
cmd.Stdout = io.Discard
cmd.Stderr = stderr
if err := cmd.Run(); err != nil {
return fmt.Errorf("%s: %w", stderr.String(), err)
}
return nil
}
69 changes: 69 additions & 0 deletions test/integration/kind_diagnostics_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
//go:build integration_tests
// +build integration_tests

package integration

import (
"fmt"
"os"
"path/filepath"
"testing"

"github.com/stretchr/testify/require"

environment "github.com/kong/kubernetes-testing-framework/pkg/environments"
)

func TestKindDiagnosticDump(t *testing.T) {
t.Parallel()

t.Log("configuring the testing environment")
builder := environment.NewBuilder()

t.Log("building the testing environment and Kubernetes cluster")
env, err := builder.Build(ctx)
require.NoError(t, err)

t.Logf("setting up the environment cleanup for environment %s and cluster %s", env.Name(), env.Cluster().Name())
t.Cleanup(func() {
t.Logf("cleaning up environment %s and cluster %s", env.Name(), env.Cluster().Name())
require.NoError(t, env.Cleanup(ctx))
})

t.Log("waiting for the test environment to be ready for use")
require.NoError(t, <-env.WaitForReady(ctx))

t.Log("verifying the test environment becomes ready for use")
waitForObjects, ready, err := env.Ready(ctx)
require.NoError(t, err)
require.Len(t, waitForObjects, 0)
require.True(t, ready)

cluster := env.Cluster()

t.Log("verifying that DumpDiagnostics functions as expected")
output, err := cluster.DumpDiagnostics(ctx, t.Name())
require.NoError(t, err)
defer func() {
require.NoError(t, os.RemoveAll(output))
}()

logsPath, _ := filepath.Glob(filepath.Join(output, fmt.Sprintf("%s-control-plane", cluster.Name()), "containers", "kindnet-*"))
require.NotZero(t, len(logsPath))
logs, err := os.ReadFile(logsPath[0])
require.NoError(t, err)
require.NotZero(t, len(logs))

describe, err := os.ReadFile(filepath.Join(output, "kubectl_describe_all.txt"))
require.NoError(t, err)
require.NotZero(t, len(describe))

get, err := os.ReadFile(filepath.Join(output, "kubectl_get_all.yaml"))
require.NoError(t, err)
require.NotZero(t, len(get))

meta, err := os.ReadFile(filepath.Join(output, "meta.txt"))
require.NoError(t, err)
require.NotZero(t, len(meta))
require.Contains(t, string(meta), t.Name())
}
5 changes: 3 additions & 2 deletions test/integration/kongaddon_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -182,9 +182,10 @@ func TestKongAddonDiagnostics(t *testing.T) {
require.NoError(t, err)
require.NotZero(t, len(root))

logsPath, _ := filepath.Glob(filepath.Join(output, "pod_logs", "kong-system_ingress-controller-kong-*"))
logsPath, _ := filepath.Glob(filepath.Join(output, fmt.Sprintf("%s-control-plane",cluster.Name()), "containers", "ingress-controller-kong-*"))
require.NotZero(t, len(logsPath))
logs, err := os.ReadFile(logsPath[0])
// First log file is for "clear-stale-pid" container which is in fact empty, use second one.
logs, err := os.ReadFile(logsPath[1])
require.NoError(t, err)
require.NotZero(t, len(logs))

Expand Down