From 414cf84b5c93143879d069f7f764152ca292b07a Mon Sep 17 00:00:00 2001 From: Daisuke Kobayashi Date: Tue, 13 Aug 2024 14:39:05 +0900 Subject: [PATCH] rpk: supports triggering on-demand balancer --- src/go/rpk/go.mod | 2 +- src/go/rpk/go.sum | 2 + .../cli/cluster/partitions/balancer_run.go | 73 +++++++++++++++++++ .../pkg/cli/cluster/partitions/partitions.go | 1 + 4 files changed, 77 insertions(+), 1 deletion(-) create mode 100644 src/go/rpk/pkg/cli/cluster/partitions/balancer_run.go diff --git a/src/go/rpk/go.mod b/src/go/rpk/go.mod index ea451286da8ab..f10b4e7e82db2 100644 --- a/src/go/rpk/go.mod +++ b/src/go/rpk/go.mod @@ -37,7 +37,7 @@ require ( github.com/pkg/errors v0.9.1 github.com/prometheus/client_model v0.6.1 github.com/prometheus/common v0.55.0 - github.com/redpanda-data/common-go/rpadmin v0.1.3 + github.com/redpanda-data/common-go/rpadmin v0.1.4 github.com/rs/xid v1.5.0 github.com/safchain/ethtool v0.4.1 github.com/santhosh-tekuri/jsonschema/v6 v6.0.1 diff --git a/src/go/rpk/go.sum b/src/go/rpk/go.sum index b988503ea9798..0309d360d9621 100644 --- a/src/go/rpk/go.sum +++ b/src/go/rpk/go.sum @@ -210,6 +210,8 @@ github.com/redpanda-data/common-go/net v0.1.0 h1:JnJioRJuL961r1QXiJQ1tW9+yEaJfu8 github.com/redpanda-data/common-go/net v0.1.0/go.mod h1:iOdNkjxM7a1T8F3cYHTaKIPFCHzzp/ia6TN+Z+7Tt5w= github.com/redpanda-data/common-go/rpadmin v0.1.3 h1:JRdr4rHcdr+A0hHr+viJYnPm+dP01bsgVUoQLM7Kz44= github.com/redpanda-data/common-go/rpadmin v0.1.3/go.mod h1:I7umqhnMhIOSEnIA3fvLtdQU7QO/SbWGCwFfFDs3De4= +github.com/redpanda-data/common-go/rpadmin v0.1.4 h1:NkVvurQRmBT9r58UVezC0DhYjR3vzYkQnkFndJBb5xE= +github.com/redpanda-data/common-go/rpadmin v0.1.4/go.mod h1:I7umqhnMhIOSEnIA3fvLtdQU7QO/SbWGCwFfFDs3De4= github.com/redpanda-data/go-avro/v2 v2.0.0-20240405204525-77b1144dc525 h1:vskZrV6q8W8flL0Ud23AJUYAd8ZgTadO45+loFnG2G0= github.com/redpanda-data/go-avro/v2 v2.0.0-20240405204525-77b1144dc525/go.mod h1:3YqAM7pgS5vW/EH7naCjFqnAajSgi0f0CfMe1HGhLxQ= github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ= diff --git a/src/go/rpk/pkg/cli/cluster/partitions/balancer_run.go b/src/go/rpk/pkg/cli/cluster/partitions/balancer_run.go new file mode 100644 index 0000000000000..ee2ed6ee2c0af --- /dev/null +++ b/src/go/rpk/pkg/cli/cluster/partitions/balancer_run.go @@ -0,0 +1,73 @@ +// Copyright 2024 Redpanda Data, Inc. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.md +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0 + +package partitions + +import ( + "fmt" + + "github.com/redpanda-data/redpanda/src/go/rpk/pkg/adminapi" + "github.com/redpanda-data/redpanda/src/go/rpk/pkg/config" + "github.com/redpanda-data/redpanda/src/go/rpk/pkg/out" + "github.com/spf13/afero" + "github.com/spf13/cobra" +) + +func newTriggerBalnacerCommand(fs afero.Fs, p *config.Params) *cobra.Command { + cmd := &cobra.Command{ + Use: "balance", + Short: "Triggers on-demand partition balancer", + Long: `Triggers on-demand partition balancer. + +This command allows you to trigger on-demand partition balancer. + +With Redpanda Community Edition, partition count on each broker +easily becomes uneven, which leads to data skew. In order to +distribute partitions across brokers, user can run this command +to trigger on-demand partition balancer. + +With Redpanda Enterprise Edition, even though Continuous Data Balancing +monitors broker and rack availability, as well as disk usage, to avoid +topic hotspots, there're still some edge cases where it can't handle well, +i.e. a node becoming unavailable for a prolonged time and joining the +cluster back thereafter. In such cases, user needs to run this command +to trigger partition balancer manually. + +After you run this command, monitor the balancer progress using: + + rpk cluster partitions balancer-status + +To see more detailed movement status, monitor the progress using: + + rpk cluster partitions move-status +`, + + Args: cobra.ExactArgs(0), + Run: func(cmd *cobra.Command, args []string) { + f := p.Formatter + if h, ok := f.Help([]string{}); ok { + out.Exit(h) + } + + p, err := p.LoadVirtualProfile(fs) + out.MaybeDie(err, "rpk unable to load config: %v", err) + config.CheckExitCloudAdmin(p) + + cl, err := adminapi.NewClient(fs, p) + out.MaybeDie(err, "unable to initialize admin client: %v", err) + + err = cl.TriggerBalancer(cmd.Context()) + out.MaybeDie(err, "failed to invoke ondemand partition balancer: %v", err) + + fmt.Println("Successfully triggered on demand partition balancer.\n\nPlease find the progress with 'rpk cluster partitions balancer-status'.") + }, + } + p.InstallFormatFlag(cmd) + return cmd +} diff --git a/src/go/rpk/pkg/cli/cluster/partitions/partitions.go b/src/go/rpk/pkg/cli/cluster/partitions/partitions.go index 1d4333ff6a6b3..4d53aea3d1229 100644 --- a/src/go/rpk/pkg/cli/cluster/partitions/partitions.go +++ b/src/go/rpk/pkg/cli/cluster/partitions/partitions.go @@ -33,6 +33,7 @@ func NewPartitionsCommand(fs afero.Fs, p *config.Params) *cobra.Command { newPartitionEnableCommand(fs, p), newPartitionMovementsStatusCommand(fs, p), newTransferLeaderCommand(fs, p), + newTriggerBalnacerCommand(fs, p), newUnsafeRecoveryCommand(fs, p), ) return cmd