From 4aa94f1f7ccfe06013b7a5cc8d3bf02440f6b6fe Mon Sep 17 00:00:00 2001 From: Chin-Ya Huang Date: Fri, 5 Jul 2024 12:15:24 +0800 Subject: [PATCH] feat(volume-controller): enhance precheck error Signed-off-by: Chin-Ya Huang --- controller/volume_controller.go | 20 +++++++++++++------- controller/volume_controller_test.go | 3 ++- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/controller/volume_controller.go b/controller/volume_controller.go index e68eb305b5..7f64b5dbc2 100644 --- a/controller/volume_controller.go +++ b/controller/volume_controller.go @@ -2251,11 +2251,17 @@ func (c *VolumeController) replenishReplicas(v *longhorn.Volume, e *longhorn.Eng // Bypassing the precheck when hardNodeAffinity is provided, because // we expect the new replica to be relocated to a specific node. if hardNodeAffinity == "" { - if err := c.precheckCreateReplica(newReplica, rs, v); err != nil { + if multiError, err := c.precheckCreateReplica(newReplica, rs, v); err != nil { log.WithError(err).Warnf("Unable to create new replica %v", newReplica.Name) + + aggregatedReplicaScheduledError := util.NewMultiError(longhorn.ErrorReplicaSchedulePrecheckNewReplicaFailed) + if multiError != nil { + aggregatedReplicaScheduledError.Append(multiError) + } + v.Status.Conditions = types.SetCondition(v.Status.Conditions, longhorn.VolumeConditionTypeScheduled, longhorn.ConditionStatusFalse, - longhorn.VolumeConditionReasonReplicaSchedulingFailure, longhorn.ErrorReplicaSchedulePrecheckNewReplicaFailed) + longhorn.VolumeConditionReasonReplicaSchedulingFailure, aggregatedReplicaScheduledError.Join()) continue } } @@ -3406,17 +3412,17 @@ func (c *VolumeController) newReplica(v *longhorn.Volume, e *longhorn.Engine, ha } } -func (c *VolumeController) precheckCreateReplica(replica *longhorn.Replica, replicas map[string]*longhorn.Replica, volume *longhorn.Volume) error { - diskCandidates, _, err := c.scheduler.FindDiskCandidates(replica, replicas, volume) +func (c *VolumeController) precheckCreateReplica(replica *longhorn.Replica, replicas map[string]*longhorn.Replica, volume *longhorn.Volume) (util.MultiError, error) { + diskCandidates, multiError, err := c.scheduler.FindDiskCandidates(replica, replicas, volume) if err != nil { - return err + return nil, err } if len(diskCandidates) == 0 { - return errors.Errorf("No available disk candidates to create a new replica of size %v", replica.Spec.VolumeSize) + return multiError, errors.Errorf("No available disk candidates to create a new replica of size %v", replica.Spec.VolumeSize) } - return nil + return nil, nil } func (c *VolumeController) createReplica(replica *longhorn.Replica, v *longhorn.Volume, rs map[string]*longhorn.Replica, isRebuildingReplica bool) error { diff --git a/controller/volume_controller_test.go b/controller/volume_controller_test.go index 9e4e57fdfb..a2bc547fd9 100644 --- a/controller/volume_controller_test.go +++ b/controller/volume_controller_test.go @@ -133,7 +133,8 @@ func (s *TestSuite) TestVolumeLifeCycle(c *C) { tc.expectVolume.Status.CurrentImage = tc.volume.Spec.Image tc.expectVolume.Status.Robustness = longhorn.VolumeRobustnessFaulted tc.expectVolume.Status.Conditions = setVolumeConditionWithoutTimestamp(tc.expectVolume.Status.Conditions, - longhorn.VolumeConditionTypeScheduled, longhorn.ConditionStatusFalse, longhorn.VolumeConditionReasonReplicaSchedulingFailure, longhorn.ErrorReplicaSchedulePrecheckNewReplicaFailed) + longhorn.VolumeConditionTypeScheduled, longhorn.ConditionStatusFalse, longhorn.VolumeConditionReasonReplicaSchedulingFailure, + fmt.Sprintf("%s;%s", longhorn.ErrorReplicaSchedulePrecheckNewReplicaFailed, longhorn.ErrorReplicaScheduleNodeUnavailable)) testCases["volume create - replica creation failure"] = tc // unable to create volume because no node to schedule