Skip to content

Commit

Permalink
Merge pull request redpanda-data#19473 from r-vasquez/improve-k8s-bundle
Browse files Browse the repository at this point in the history
rpk: Improve k8s bundle errors + better admin API fallback
  • Loading branch information
r-vasquez authored Jun 12, 2024
2 parents 5ec1849 + e779bf3 commit 996183e
Show file tree
Hide file tree
Showing 2 changed files with 76 additions and 12 deletions.
74 changes: 66 additions & 8 deletions src/go/rpk/pkg/cli/debug/bundle/bundle_k8s_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ import (
"strings"
"time"

authorizationv1 "k8s.io/api/authorization/v1"

"github.com/hashicorp/go-multierror"
"github.com/redpanda-data/redpanda/src/go/rpk/pkg/adminapi"
"github.com/redpanda-data/redpanda/src/go/rpk/pkg/config"
Expand Down Expand Up @@ -70,8 +72,6 @@ func executeK8SBundle(ctx context.Context, bp bundleParams) error {
saveDataDirStructure(ps, bp.y),
saveDiskUsage(ctx, ps, bp.y),
saveInterrupts(ps),
saveK8SLogs(ctx, ps, bp.namespace, bp.logsSince, bp.logsLimitBytes, bp.labelSelector),
saveK8SResources(ctx, ps, bp.namespace, bp.labelSelector),
saveKafkaMetadata(ctx, ps, bp.cl),
saveKernelSymbols(ps),
saveMdstat(ps),
Expand All @@ -81,12 +81,35 @@ func executeK8SBundle(ctx context.Context, bp bundleParams) error {
saveSlabInfo(ps),
}

adminAddresses, err := adminAddressesFromK8S(ctx, bp.namespace)
if err != nil {
zap.L().Sugar().Debugf("unable to get admin API addresses from the k8s API: %v", err)
// We use the K8S to discover the cluster's admin API addresses and collect
// logs and k8s resources. First we check if we have enough permissions
// before kicking the steps.
var adminAddresses []string
if err := checkK8sPermissions(ctx, bp.namespace); err != nil {
errs = multierror.Append(
errs,
fmt.Errorf("skipping log collection and Kubernetes resource collection (such as Pods and Services) in the namespace %q. To enable this, grant additional permissions to your Service Account. For more information, visit https://docs.redpanda.com/current/manage/kubernetes/troubleshooting/k-diagnostics-bundle/", err),
)
} else {
steps = append(steps, []step{
saveK8SResources(ctx, ps, bp.namespace, bp.labelSelector),
saveK8SLogs(ctx, ps, bp.namespace, bp.logsSince, bp.logsLimitBytes, bp.labelSelector),
}...)

adminAddresses, err = adminAddressesFromK8S(ctx, bp.namespace)
if err != nil {
zap.L().Sugar().Debugf("unable to get admin API addresses from the k8s API: %v", err)
}
}
if len(adminAddresses) == 0 {
adminAddresses = []string{fmt.Sprintf("127.0.0.1:%v", config.DefaultAdminPort)}
if len(bp.p.AdminAPI.Addresses) > 0 {
zap.L().Sugar().Debugf("using admin API addresses from profile: %v", bp.p.AdminAPI.Addresses)
adminAddresses = bp.p.AdminAPI.Addresses
} else {
defaultAddress := fmt.Sprintf("127.0.0.1:%v", config.DefaultAdminPort)
zap.L().Sugar().Debugf("profile empty, using %v for the Admin API address", defaultAddress)
adminAddresses = []string{defaultAddress}
}
}
steps = append(steps, []step{
saveClusterAdminAPICalls(ctx, ps, bp.fs, bp.p, adminAddresses, bp.partitions),
Expand Down Expand Up @@ -138,6 +161,41 @@ func k8sPodList(ctx context.Context, namespace string, labelSelector map[string]
return clientset, pods, nil
}

// checkK8sPermissions will check for the minimal service account permissions
// needed to perform the k8s-API-related steps in the debug bundle collection
// process.
func checkK8sPermissions(ctx context.Context, namespace string) error {
cl, err := k8sClientset()
if err != nil {
return fmt.Errorf("unable to create kubernetes client: %v", err)
}

// These are the minimal permissions needed for the k8s bundle to function.
perMap := map[string]string{
"services": "list",
"pods": "list",
}
for resource, verb := range perMap {
sar := &authorizationv1.SelfSubjectAccessReview{
Spec: authorizationv1.SelfSubjectAccessReviewSpec{
ResourceAttributes: &authorizationv1.ResourceAttributes{
Namespace: namespace,
Verb: verb,
Resource: resource,
},
},
}
response, err := cl.AuthorizationV1().SelfSubjectAccessReviews().Create(ctx, sar, metav1.CreateOptions{})
if err != nil {
return fmt.Errorf("unable to check service account permissions: %v", err)
}
if !response.Status.Allowed {
return fmt.Errorf("permission denied to %s %s", verb, resource)
}
}
return nil
}

// adminAddressesFromK8S returns the admin API host:port list by querying the
// K8S Api.
func adminAddressesFromK8S(ctx context.Context, namespace string) ([]string, error) {
Expand Down Expand Up @@ -372,7 +430,7 @@ func saveK8SResources(ctx context.Context, ps *stepParams, namespace string, lab
return func() error {
clientset, pods, err := k8sPodList(ctx, namespace, labelSelector)
if err != nil {
return err
return fmt.Errorf("unable to save k8s resources: unable to list k8s pods: %v", err)
}
// This is a safeguard, so we don't end up saving empty request for
// namespace who don't have any pods.
Expand Down Expand Up @@ -414,7 +472,7 @@ func saveK8SLogs(ctx context.Context, ps *stepParams, namespace, since string, l
return func() error {
clientset, pods, err := k8sPodList(ctx, namespace, labelSelector)
if err != nil {
return err
return fmt.Errorf("unable to save logs: unable to list k8s pods: %v", err)
}
podsInterface := clientset.CoreV1().Pods(namespace)

Expand Down
14 changes: 10 additions & 4 deletions src/go/rpk/pkg/cli/debug/bundle/bundle_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,7 @@ func writeCommandOutputToZipLimit(
err = cmd.Wait()
if err != nil {
if !strings.Contains(err.Error(), "broken pipe") {
return fmt.Errorf("couldn't save '%s': %w", filename, err)
return fmt.Errorf("couldn't save '%s': %w; %[1]v contains the full error message", filename, err)
}
zap.L().Sugar().Debugf(
"Got '%v' while running '%s'. This is probably due to the"+
Expand Down Expand Up @@ -557,6 +557,9 @@ func saveSlabInfo(ps *stepParams) step {
return func() error {
bs, err := afero.ReadFile(ps.fs, "/proc/slabinfo")
if err != nil {
if errors.Is(err, fs.ErrPermission) {
return fmt.Errorf("%v: you may need to run the command as root to read this file", err)
}
return err
}
return writeFileToZip(ps, "proc/slabinfo", bs)
Expand Down Expand Up @@ -940,13 +943,16 @@ func sliceControllerDir(cFiles []fileSize, logLimitBytes int64) (slice []fileSiz

func saveControllerLogDir(ps *stepParams, y *config.RedpandaYaml, logLimitBytes int) step {
return func() error {
if y.Redpanda.Directory == "" {
return fmt.Errorf("failed to save controller logs: 'redpanda.data_directory' is empty on the provided configuration file")
}
controllerDir := filepath.Join(y.Redpanda.Directory, "redpanda", "controller", "0_0")

// We don't need the .base_index files to parse out the messages.
exclude := regexp.MustCompile(`^*.base_index$`)
cFiles, size, err := walkSizeDir(controllerDir, exclude)
if err != nil {
return err
return fmt.Errorf("unable to save controller logs: %v", err)
}

if int(size) < logLimitBytes {
Expand All @@ -966,11 +972,11 @@ func saveControllerLogDir(ps *stepParams, y *config.RedpandaYaml, logLimitBytes
for _, cLog := range slice {
file, err := os.ReadFile(cLog.path)
if err != nil {
return err
return fmt.Errorf("unable to save controller logs: %v", err)
}
err = writeFileToZip(ps, filepath.Join("controller", filepath.Base(cLog.path)), file)
if err != nil {
return err
return fmt.Errorf("unable to save controller logs: %v", err)
}
}
return nil
Expand Down

0 comments on commit 996183e

Please sign in to comment.