From 0659e3b034964a9fac02407feb90007af7e30608 Mon Sep 17 00:00:00 2001 From: Petr Fedchenkov Date: Mon, 11 Jul 2022 11:06:29 +0300 Subject: [PATCH] Move check for size into doUpdateVol and add retry We expect error "Remaining disk space ... volume needs ..." in case of no space for new volume, but I see this error for several seconds. Seems it was silently hidden by "Error creating zfs zvol at ..., error=exit status 1, output=cannot create '...': out of space" which is not expected. Seems we should move check for space into our volume state handler function (doUpdateVol). Also this commit adds simple retry in case of delete of another volume. Signed-off-by: Petr Fedchenkov --- pkg/pillar/cmd/volumemgr/handlevolume.go | 47 ++++++++++----------- pkg/pillar/cmd/volumemgr/handlevolumeref.go | 9 ++++ pkg/pillar/cmd/volumemgr/sizemgmt.go | 10 ++++- pkg/pillar/cmd/volumemgr/updatestatus.go | 36 +++++++++++++++- 4 files changed, 74 insertions(+), 28 deletions(-) diff --git a/pkg/pillar/cmd/volumemgr/handlevolume.go b/pkg/pillar/cmd/volumemgr/handlevolume.go index 6090220003..cc3f579e54 100644 --- a/pkg/pillar/cmd/volumemgr/handlevolume.go +++ b/pkg/pillar/cmd/volumemgr/handlevolume.go @@ -90,6 +90,7 @@ func handleVolumeDelete(ctxArg interface{}, key string, } updateVolumeStatusRefCount(ctx, status) maybeDeleteVolume(ctx, status) + maybeSpaceAvailable(ctx) } log.Functionf("handleVolumeDelete(%s) Done", key) } @@ -190,31 +191,6 @@ func handleDeferredVolumeCreate(ctx *volumemgrContext, key string, config *types return } publishVolumeStatus(ctx, status) - if !ctx.globalConfig.GlobalValueBool(types.IgnoreDiskCheckForApps) { - // Check disk usage - remaining, err := getRemainingDiskSpace(ctx) - if err != nil { - errStr := fmt.Sprintf("getRemainingDiskSpace failed: %s\n", - err) - status.SetError(errStr, time.Now()) - publishVolumeStatus(ctx, status) - updateVolumeRefStatus(ctx, status) - if err := createOrUpdateAppDiskMetrics(ctx, status); err != nil { - log.Errorf("handleDeferredVolumeCreate(%s): exception while publishing diskmetric. %s", key, err.Error()) - } - return - } else if remaining < status.MaxVolSize { - errStr := fmt.Sprintf("Remaining disk space %d volume needs %d\n", - remaining, status.MaxVolSize) - status.SetError(errStr, time.Now()) - publishVolumeStatus(ctx, status) - updateVolumeRefStatus(ctx, status) - if err := createOrUpdateAppDiskMetrics(ctx, status); err != nil { - log.Errorf("handleDeferredVolumeCreate(%s): exception while publishing diskmetric. %s", key, err.Error()) - } - return - } - } changed, _ := doUpdateVol(ctx, status) if changed { publishVolumeStatus(ctx, status) @@ -351,6 +327,27 @@ func maybeDeleteVolume(ctx *volumemgrContext, status *types.VolumeStatus) { log.Functionf("maybeDeleteVolume for %v Done", status.Key()) } +// maybeSpaceAvailable iterates over VolumeStatus and call doUpdateVol if state is less than CREATING_VOLUME +func maybeSpaceAvailable(ctx *volumemgrContext) { + for _, s := range ctx.pubVolumeStatus.GetAll() { + status := s.(types.VolumeStatus) + if status.State >= types.CREATING_VOLUME { + continue + } + if vc := lookupVolumeConfig(ctx, status.Key()); vc == nil { + continue + } + changed, _ := doUpdateVol(ctx, &status) + if changed { + publishVolumeStatus(ctx, &status) + updateVolumeRefStatus(ctx, &status) + if err := createOrUpdateAppDiskMetrics(ctx, &status); err != nil { + log.Errorf("maybeSpaceAvailable(%s): exception while publishing diskmetric. %s", status.Key(), err.Error()) + } + } + } +} + // updateVolumeStatusRefCount updates the refcount in volume status // Refcount in volume status is sum of refount in volume config and volume ref config func updateVolumeStatusRefCount(ctx *volumemgrContext, vs *types.VolumeStatus) { diff --git a/pkg/pillar/cmd/volumemgr/handlevolumeref.go b/pkg/pillar/cmd/volumemgr/handlevolumeref.go index 3d8f556860..0ee6908236 100644 --- a/pkg/pillar/cmd/volumemgr/handlevolumeref.go +++ b/pkg/pillar/cmd/volumemgr/handlevolumeref.go @@ -62,6 +62,10 @@ func handleVolumeRefCreate(ctxArg interface{}, key string, if changed { publishVolumeStatus(ctx, vs) updateVolumeRefStatus(ctx, vs) + if err := createOrUpdateAppDiskMetrics(ctx, vs); err != nil { + log.Errorf("handleVolumeRefCreate(%s): exception while publishing diskmetric. %s", + status.Key(), err.Error()) + } } } log.Functionf("handleVolumeRefCreate(%s) Done", key) @@ -91,6 +95,10 @@ func handleVolumeRefModify(ctxArg interface{}, key string, } updateVolumeStatusRefCount(ctx, vs) publishVolumeStatus(ctx, vs) + if err := createOrUpdateAppDiskMetrics(ctx, vs); err != nil { + log.Errorf("handleVolumeRefModify(%s): exception while publishing diskmetric. %s", + status.Key(), err.Error()) + } } log.Functionf("handleVolumeRefModify(%s) Done", key) } @@ -107,6 +115,7 @@ func handleVolumeRefDelete(ctxArg interface{}, key string, updateVolumeStatusRefCount(ctx, vs) publishVolumeStatus(ctx, vs) maybeDeleteVolume(ctx, vs) + maybeSpaceAvailable(ctx) } log.Functionf("handleVolumeRefDelete(%s) Done", key) } diff --git a/pkg/pillar/cmd/volumemgr/sizemgmt.go b/pkg/pillar/cmd/volumemgr/sizemgmt.go index a44b964ad2..d7394839db 100644 --- a/pkg/pillar/cmd/volumemgr/sizemgmt.go +++ b/pkg/pillar/cmd/volumemgr/sizemgmt.go @@ -38,8 +38,9 @@ func getRemainingDiskSpace(ctxPtr *volumemgrContext) (uint64, error) { itemsVolume := pubVolume.GetAll() for _, iterVolumeStatusJSON := range itemsVolume { iterVolumeStatus := iterVolumeStatusJSON.(types.VolumeStatus) - if iterVolumeStatus.State < types.CREATED_VOLUME { - log.Tracef("Volume %s State %d < CREATED_VOLUME", + // we start consume space when moving into CREATING_VOLUME state + if iterVolumeStatus.State < types.CREATING_VOLUME { + log.Tracef("Volume %s State %d < CREATING_VOLUME", iterVolumeStatus.Key(), iterVolumeStatus.State) continue } @@ -50,6 +51,11 @@ func getRemainingDiskSpace(ctxPtr *volumemgrContext) (uint64, error) { log.Noticef("getRemainingDiskSpace: Volume %s not found in VolumeConfigs, ignore", iterVolumeStatus.Key()) continue + } + if vault.ReadPersistType() == types.PersistZFS { + log.Noticef("getRemainingDiskSpace: Volume %s is zvol, use MaxVolSize", + iterVolumeStatus.Key()) + sizeToUseInCalculation = iterVolumeStatus.MaxVolSize } else if cfg.ReadOnly { // it is ReadOnly and will not grow log.Noticef("getRemainingDiskSpace: Volume %s is ReadOnly, use CurrentSize", diff --git a/pkg/pillar/cmd/volumemgr/updatestatus.go b/pkg/pillar/cmd/volumemgr/updatestatus.go index 69923536df..06bd50a66b 100644 --- a/pkg/pillar/cmd/volumemgr/updatestatus.go +++ b/pkg/pillar/cmd/volumemgr/updatestatus.go @@ -427,6 +427,8 @@ func doUpdateContentTree(ctx *volumemgrContext, status *types.ContentTreeStatus) // XXX remove "done" boolean return? func doUpdateVol(ctx *volumemgrContext, status *types.VolumeStatus) (bool, bool) { + var changed bool + log.Functionf("doUpdateVol(%s) name %s", status.Key(), status.DisplayName) // Anything to do? @@ -436,6 +438,39 @@ func doUpdateVol(ctx *volumemgrContext, status *types.VolumeStatus) (bool, bool) return false, true } + // do not go into CREATING_VOLUME state if no space available + if status.State < types.CREATING_VOLUME && !ctx.globalConfig.GlobalValueBool(types.IgnoreDiskCheckForApps) { + // Check disk usage + remaining, err := getRemainingDiskSpace(ctx) + if err != nil { + description := types.ErrorDescription{} + description.Error = fmt.Sprintf("getRemainingDiskSpace failed: %s\n", err) + // do not touch time of the error with the same content + if status.Error != description.Error { + status.SetErrorWithSourceAndDescription(description, types.DiskMetric{}) + changed = true + } + return changed, false + } else if remaining < status.MaxVolSize { + description := types.ErrorDescription{} + description.Error = fmt.Sprintf("Remaining disk space %d volume needs %d\n", + remaining, status.MaxVolSize) + description.ErrorRetryCondition = "Will retry when more space will be available" + // do not touch time of the error with the same content + if status.Error != description.Error { + status.SetErrorWithSourceAndDescription(description, types.DiskMetric{}) + changed = true + } + return changed, false + } + } + + if status.IsErrorSource(types.DiskMetric{}) { + log.Functionf("doUpdateVol: Clearing DiskMetric error %s", status.Error) + status.ClearErrorWithSource() + changed = true + } + verifyOnly := false vrc := lookupVolumeRefConfig(ctx, status.Key()) if vrc != nil { @@ -450,7 +485,6 @@ func doUpdateVol(ctx *volumemgrContext, status *types.VolumeStatus) (bool, bool) } } - changed := false switch status.VolumeContentOriginType { case zconfig.VolumeContentOriginType_VCOT_BLANK: if status.MaxVolSize == 0 {