Skip to content

Commit

Permalink
Fix a idempotency issues in CreateVolume
Browse files Browse the repository at this point in the history
In case creating a new volume takes longer than the GRPC timeout,
the provisioner could get in a state where volumes are considered
ready even if no resource was ever created. This commit fixes this
issue by ensuring that volumes are only considered ready after the
expected amount of volumes was placed.

The bug happened in cases where:
* Create() succesfully called saveVolume(), persisting the volume
  information as annotations, which is interpreted as "ready" resources
* Immediatly after, the GRPC timeout cancels the request context, meaning
  the volume scheduler aborted before any resources could be assigned to
  nodes

A simple reording of "volumeScheduler.Create()" and "saveVolume()" would
lead to a different issue, in which continously more volumes are placed
but never marked as ready. To prevent this, volumes that reference at least
the number of resources as required by the parameters are considered ready.

Note that this is a stopgap solution until volume schedulers are re-written
to be idempotent themselves.
  • Loading branch information
WanzenBug committed Dec 2, 2020
1 parent f7ab905 commit a9fd0de
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 5 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Fixed
- Crash when calling NodePublishVolume on non-existent volume ([#96])
- Fix an issue where newly created volumes would not be placed on any nodes, leaving them unusable ([#99])

[#96]: https://github.com/piraeusdatastore/linstor-csi/issues/96
[#99]: https://github.com/piraeusdatastore/linstor-csi/issues/99

## [0.10.1] - 2020-11-19

Expand Down
48 changes: 43 additions & 5 deletions pkg/client/linstor.go
Original file line number Diff line number Diff line change
Expand Up @@ -300,17 +300,22 @@ func (s *Linstor) Create(ctx context.Context, vol *volume.Info, req *csi.CreateV
return err
}

logger.Debug("reconcile volume placement")
err = s.reconcileResourcePlacement(ctx, vol, &params, rDef.Name, req)
if err != nil {
logger.Debugf("reconcile volume placement failed: %v", err)
return err
}

// saveVolume() makes the volume eligible for further use, i.e. this method will not be called again once saveVolume
// was used.
logger.Debug("update volume information with resource definition information")
vol.ID = rDef.Name
if err := s.saveVolume(ctx, vol); err != nil {
return err
}

volumeScheduler, err := s.schedulerByPlacementPolicy(vol)
if err != nil {
return err
}
return volumeScheduler.Create(ctx, vol, req)
return nil
}

// Delete removes a resource, all of its volumes, and snapshots from LINSTOR.
Expand Down Expand Up @@ -774,6 +779,39 @@ func (s *Linstor) reconcileVolumeDefinition(ctx context.Context, info *volume.In
return &vDef, nil
}

func (s *Linstor) reconcileResourcePlacement(ctx context.Context, vol *volume.Info, volParams *volume.Parameters, rdName string, req *csi.CreateVolumeRequest) error {
logger := s.log.WithFields(logrus.Fields{
"volume": vol.Name,
})
logger.Info("reconcile resource placement for volume")


resources, err := s.client.Resources.GetAll(ctx, rdName)
if err != nil {
return err
}

// Check that at least as many resources are placed as specified in the volume parameters
expectedAutoResources := int(volParams.PlacementCount)
expectedManualResources := len(volParams.ClientList) + len(volParams.NodeList)
if len(resources) >= expectedAutoResources && len(resources) >= expectedManualResources {
logger.Debug("resources already placed")
return nil
}

volumeScheduler, err := s.schedulerByPlacementPolicy(vol)
if err != nil {
return err
}

err = volumeScheduler.Create(ctx, vol, req)
if err != nil {
return err
}

return nil
}

// store a representation of a volume into the aux props of a resource definition.
func (s *Linstor) saveVolume(ctx context.Context, vol *volume.Info) error {
serializedVol, err := json.Marshal(vol)
Expand Down

0 comments on commit a9fd0de

Please sign in to comment.