Skip to content

Commit

Permalink
Merge pull request redpanda-data#13260 from mmaslankaprv/fix-13239
Browse files Browse the repository at this point in the history
Do not always report all fields in maintenance status
  • Loading branch information
twmb authored Sep 29, 2023
2 parents 677670f + ffb2854 commit b3015e0
Show file tree
Hide file tree
Showing 7 changed files with 59 additions and 44 deletions.
14 changes: 7 additions & 7 deletions src/go/rpk/pkg/adminapi/api_broker.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,13 @@ const (
)

type MaintenanceStatus struct {
Draining bool `json:"draining"`
Finished bool `json:"finished"`
Errors bool `json:"errors"`
Partitions int `json:"partitions"`
Eligible int `json:"eligible"`
Transferring int `json:"transferring"`
Failed int `json:"failed"`
Draining bool `json:"draining"`
Finished *bool `json:"finished"`
Errors *bool `json:"errors"`
Partitions *int `json:"partitions"`
Eligible *int `json:"eligible"`
Transferring *int `json:"transferring"`
Failed *int `json:"failed"`
}

// MembershipStatus enumerates possible membership states for brokers.
Expand Down
2 changes: 1 addition & 1 deletion src/go/rpk/pkg/cli/cluster/maintenance/enable.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ node exists that is already in maintenance mode then an error will be returned.
}
addBrokerMaintenanceReport(table, b)
table.Flush()
if b.Maintenance.Draining && b.Maintenance.Finished {
if b.Maintenance.Draining && b.Maintenance.Finished != nil && *b.Maintenance.Finished {
fmt.Printf("\nAll partitions on node %d have drained.\n", nodeID)
return
}
Expand Down
28 changes: 19 additions & 9 deletions src/go/rpk/pkg/cli/cluster/maintenance/status.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
package maintenance

import (
"fmt"

"github.com/redpanda-data/redpanda/src/go/rpk/pkg/adminapi"
"github.com/redpanda-data/redpanda/src/go/rpk/pkg/config"
"github.com/redpanda-data/redpanda/src/go/rpk/pkg/out"
Expand All @@ -19,22 +21,30 @@ import (

func newMaintenanceReportTable() *out.TabWriter {
headers := []string{
"Node-ID", "Draining", "Finished", "Errors",
"Node-ID", "Enabled", "Finished", "Errors",
"Partitions", "Eligible", "Transferring", "Failed",
}
return out.NewTable(headers...)
}

func nullableToStr[V any](v *V) string {
if v == nil {
return "-"
}

return fmt.Sprint(*v)
}

func addBrokerMaintenanceReport(table *out.TabWriter, b adminapi.Broker) {
table.Print(
b.NodeID,
b.Maintenance.Draining,
b.Maintenance.Finished,
b.Maintenance.Errors,
b.Maintenance.Partitions,
b.Maintenance.Eligible,
b.Maintenance.Transferring,
b.Maintenance.Failed)
nullableToStr(b.Maintenance.Finished),
nullableToStr(b.Maintenance.Errors),
nullableToStr(b.Maintenance.Partitions),
nullableToStr(b.Maintenance.Eligible),
nullableToStr(b.Maintenance.Transferring),
nullableToStr(b.Maintenance.Failed))
}

func newStatusCommand(fs afero.Fs, p *config.Params) *cobra.Command {
Expand All @@ -47,13 +57,13 @@ This command reports maintenance status for each node in the cluster. The output
is presented as a table with each row representing a node in the cluster. The
output can be used to monitor the progress of node draining.
NODE-ID DRAINING FINISHED ERRORS PARTITIONS ELIGIBLE TRANSFERRING FAILED
NODE-ID ENABLED FINISHED ERRORS PARTITIONS ELIGIBLE TRANSFERRING FAILED
1 false false false 0 0 0 0
Field descriptions:
NODE-ID: the node ID
DRAINING: true if the node is actively draining leadership
ENABLED: true if the node is currently in maintenance mode
FINISHED: leadership draining has completed
ERRORS: errors have been encountered while draining
PARTITIONS: number of partitions whose leadership has moved
Expand Down
10 changes: 5 additions & 5 deletions src/v/redpanda/admin/api-doc/broker.json
Original file line number Diff line number Diff line change
Expand Up @@ -304,15 +304,15 @@
},
"maintenance_status": {
"id": "maintenance_status",
"description": "Drain status",
"description": "Status of maintenance mode",
"properties": {
"draining": {
"type": "boolean",
"description": "in maintenance state"
"description": "Flag indicating if maintenance mode is enabled."
},
"finished": {
"type": "boolean",
"description": "drain finished"
"description": "Indicates that draining is finished. Note that the draining flag will always be true if maintenance mode is enable and draining has finished."
},
"errors": {
"type": "boolean",
Expand Down Expand Up @@ -349,7 +349,7 @@
"description": "number of replicas left on a node"
},
"allocation_failures": {
"type" : "array",
"type": "array",
"items": {
"type": "string",
"description": "ntp"
Expand Down Expand Up @@ -414,4 +414,4 @@
}
}
}
}
}
7 changes: 0 additions & 7 deletions src/v/redpanda/admin_server.cc
Original file line number Diff line number Diff line change
Expand Up @@ -850,13 +850,6 @@ fill_maintenance_status(const cluster::broker_state& b_state) {

ret.draining = b_state.get_maintenance_state()
== model::maintenance_state::active;
// ensure that the output json has all fields
ret.finished = false;
ret.errors = false;
ret.partitions = 0;
ret.transferring = 0;
ret.eligible = 0;
ret.failed = 0;

return ret;
}
Expand Down
25 changes: 16 additions & 9 deletions tests/rptest/clients/rpk.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def __init__(self, id, address):

class RpkMaintenanceStatus(typing.NamedTuple):
node_id: int
draining: bool
enabled: bool
finished: bool
errors: bool
partitions: int
Expand Down Expand Up @@ -980,7 +980,7 @@ def parse(line):
return None

# jerry@garcia:~/src/redpanda$ rpk cluster maintenance status
# NODE-ID DRAINING FINISHED ERRORS PARTITIONS ELIGIBLE TRANSFERRING FAILED
# NODE-ID ENABLED FINISHED ERRORS PARTITIONS ELIGIBLE TRANSFERRING FAILED
# 1 false false false 0 0 0 0
line = line.split()

Expand All @@ -990,14 +990,21 @@ def parse(line):
line = [x.strip() for x in line]
if line[0] == "NODE-ID":
return None

def bool_or_none(value: str):
return None if value == "-" else value == "true"

def int_or_none(value: str):
return None if value == "-" else int(value)

return RpkMaintenanceStatus(node_id=int(line[0]),
draining=line[1] == "true",
finished=line[2] == "true",
errors=line[3] == "true",
partitions=int(line[4]),
eligible=int(line[5]),
transferring=int(line[6]),
failed=int(line[7]))
enabled=line[1] == "true",
finished=bool_or_none(line[2]),
errors=bool_or_none(line[3]),
partitions=int_or_none(line[4]),
eligible=int_or_none(line[5]),
transferring=int_or_none(line[6]),
failed=int_or_none(line[7]))

cmd = [
self._rpk_binary(),
Expand Down
17 changes: 11 additions & 6 deletions tests/rptest/tests/maintenance_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,12 @@ def _in_maintenance_mode(self, node):

def _in_maintenance_mode_fully(self, node):
status = self.admin.maintenance_status(node)
return status["finished"] and not status["errors"] and \
status["partitions"] > 0
if all([key in status
for key in ['finished', 'errors', 'partitions']]):
return status["finished"] and not status["errors"] and \
status["partitions"] > 0
else:
return False

def _verify_broker_metadata(self, maintenance_enabled, node):
"""
Expand All @@ -80,17 +84,18 @@ def _verify_broker_metadata(self, maintenance_enabled, node):
return False
# check status wanted
if maintenance_enabled:
return status['draining'] and status['finished']
return status['draining'] and status[
'finished'] if 'finished' in status else True
else:
return not status['draining']

def _verify_maintenance_status(self, node, draining):
def _verify_maintenance_status(self, node, enabled):
"""
Check that cluster reports maintenance status as expected through
both rpk status tooling as well as raw admin interface.
"""
# get status for this node via rpk
node_id = self.redpanda.idx(node)
node_id = self.redpanda.node_id(node)
statuses = self.rpk.cluster_maintenance_status()
self.logger.debug(f"finding node_id {node_id} in rpk "
"maintenance status: {statuses}")
Expand All @@ -108,7 +113,7 @@ def _verify_maintenance_status(self, node, draining):
"{node.name}: {admin_status}")

# ensure that both agree on expected outcome
return admin_status["draining"] == rpk_status.draining == draining
return admin_status["draining"] == rpk_status.enabled == enabled

def _enable_maintenance(self, node):
"""
Expand Down

0 comments on commit b3015e0

Please sign in to comment.