Skip to content

Commit

Permalink
PowerScale Replication: Failback, reprotect and improvements (#159)
Browse files Browse the repository at this point in the history
  • Loading branch information
santhoshatdell authored Jan 6, 2023
1 parent 3144856 commit 12faea5
Show file tree
Hide file tree
Showing 10 changed files with 660 additions and 148 deletions.
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ require (
github.com/dell/dell-csi-extensions/volumeGroupSnapshot v1.2.1
github.com/dell/gocsi v1.6.0
github.com/dell/gofsutil v1.11.0
github.com/dell/goisilon v1.10.0
github.com/dell/goisilon v1.10.1-0.20230105231012-7aec9b1a2f2b
github.com/fsnotify/fsnotify v1.4.9
github.com/golang/protobuf v1.5.2
github.com/google/uuid v1.2.0
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,8 @@ github.com/dell/gocsi v1.6.0 h1:ZmoMi17v1jK0RE0OGEivu52/RqHbOhP5cqs9SHExqa0=
github.com/dell/gocsi v1.6.0/go.mod h1:+ihwgNYeFTv69Ym2X2Ij1idK72JYoNR8CeiWYJrrbho=
github.com/dell/gofsutil v1.11.0 h1:HHLQVdoWF9xjI0/sLGTzX7i8aAGdZwJG/s7nzKwHLbw=
github.com/dell/gofsutil v1.11.0/go.mod h1:j639KWtc61yK9oPBZSZrSEDqhFKe446XK9etJpk/KtI=
github.com/dell/goisilon v1.10.0 h1:3TgECPV/6RzTQsDfhc1rR5rFyKJMNLYopbUJtHpYfyc=
github.com/dell/goisilon v1.10.0/go.mod h1:fJXHyh1JBcbsmPBquEulaNOFTpj1eEN5vISDf/UY1RQ=
github.com/dell/goisilon v1.10.1-0.20230105231012-7aec9b1a2f2b h1:IRgUwx8Jh3zZGSk5YHjRo5K5Qs0emjJac6QWasyOEE8=
github.com/dell/goisilon v1.10.1-0.20230105231012-7aec9b1a2f2b/go.mod h1:fJXHyh1JBcbsmPBquEulaNOFTpj1eEN5vISDf/UY1RQ=
github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ=
github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8PWV+bWy6jNmig1y/TA+kYO4g3RSRF0IAv0no=
github.com/docker/spdystream v0.0.0-20160310174837-449fdfce4d96/go.mod h1:Qh8CwZgvJUkLughtfhJv5dyTYa91l1fOUCrgjqmcifM=
Expand Down
29 changes: 1 addition & 28 deletions service/controller.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package service

/*
Copyright (c) 2019-2022 Dell Inc, or its subsidiaries.
Copyright (c) 2019-2023 Dell Inc, or its subsidiaries.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -423,33 +423,6 @@ func (s *service) CreateVolume(
}
}

_, err = remoteIsiConfig.isiSvc.client.GetPolicyByName(ctx, ppName)
if err != nil {
if apiErr, ok := err.(*isiApi.JSONError); ok && apiErr.StatusCode == 404 {
err := remoteIsiConfig.isiSvc.client.CreatePolicy(ctx, ppName, rpoint, isiPath+"/"+vgName, isiPath+"/"+vgName, isiConfig.Endpoint, isiConfig.ReplicationCertificateID, true)
if err != nil {
return nil, status.Errorf(codes.Internal, "can't create protection policy %s", err.Error())
}
err = remoteIsiConfig.isiSvc.client.WaitForPolicyLastJobState(ctx, ppName, isi.FINISHED)
if err != nil {
return nil, status.Errorf(codes.Internal, "policy job couldn't reach FINISHED state %s", err.Error())
}
}
}

err = isiConfig.isiSvc.client.AllowWrites(ctx, ppName)
if err != nil {
return nil, status.Errorf(codes.Internal, "can't allow writes on local site %s", err.Error())
}
err = remoteIsiConfig.isiSvc.client.DisablePolicy(ctx, ppName)
if err != nil {
return nil, status.Errorf(codes.Internal, "can't disable the policy on TGT %s", err.Error())
}
err = remoteIsiConfig.isiSvc.client.WaitForPolicyEnabledFieldCondition(ctx, ppName, false)
if err != nil {
return nil, status.Errorf(codes.Internal, "policy couldn't reach disabled condition on TGT %s", err.Error())
}

isiPath = isiPath + "/" + VolumeGroupDir
}

Expand Down
21 changes: 21 additions & 0 deletions service/features/controller_create_delete_volume.feature
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,27 @@ Feature: Isilon CSI interface
| "volume1=_=_=10=_=_=System" | "volume1" | "failed to get volume" |
| "volume2=_=_=20=_=_=System" | "volume2" | "none" |

Scenario Outline: Create Volume with Replication Enabled and with invalid arguments
Given a Isilon service
When I call CreateVolumeRequestWithReplicationParams <vgPrefix> <rpo> <remoteSystemName>
Then the error contains <errormsg>
Examples:
| vgPrefix | rpo | remoteSystemName | errormsg |
| "" | "Five_Minutes" | "cluster1" | "replication enabled but no volume group prefix specified" |
| "volumeGroupPrefix" | "" | "cluster1" | "replication enabled but no RPO specified" |
| "volumeGroupPrefix" | "Fifty_Minutes" | "cluster1" | "invalid rpo value" |
| "volumeGroupPrefix" | "Five_Minutes" | "" | "replication enabled but no remote system specified" |

Scenario Outline: Create Volume with Replication Enabled and induced errors
Given a Isilon service
And I induce error <induced>
When I call CreateVolumeRequestWithReplicationParams "volumeGroupPrefix" "Five_Minutes" "cluster1"
Then the error contains <errormsg>
Examples:
| induced | errormsg |
| "GetPolicyInternalError" | "can't ensure protection policy exists" |
| "GetPolicyNotFoundError" | "policy job couldn't reach FINISHED state" |

@deleteVolume
@v1.0.0
Scenario: Delete volume good scenario with quota enabled
Expand Down
91 changes: 61 additions & 30 deletions service/features/replication.feature
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ Feature: Isilon CSI interface
When I call Probe
And I call WithParamsCreateRemoteVolume <volhand> <keyreplremsys>
Then the error contains <errormsg>

Examples:
| volhand | keyreplremsys | errormsg |
| "" | "KeyReplicationRemoteSystem" | "volume ID is required" |
Expand All @@ -42,7 +41,6 @@ Feature: Isilon CSI interface
And I induce error <induced>
And I call CreateRemoteVolume
Then the error contains <errormsg>

Examples:
| induced | errormsg |
| "InstancesError" | "none" |
Expand All @@ -64,7 +62,6 @@ Feature: Isilon CSI interface
And I induce error <serverError2>
And I call CreateRemoteVolume
Then the error contains <errormsg>

Examples:
| getVolumeError | getExportError | serverError1 | serverError2 | errormsg |
| "VolumeExists" | "ExportExists" | "none" | "none" | "none" |
Expand Down Expand Up @@ -96,7 +93,6 @@ Feature: Isilon CSI interface
And I induce error <induced>
And I call CreateStorageProtectionGroup
Then the error contains <errormsg>

Examples:
| induced | errormsg |
| "GetExportByIDNotFoundError" | "Export id 9999999 does not exist" |
Expand All @@ -108,7 +104,6 @@ Feature: Isilon CSI interface
When I call Probe
And I call WithParamsCreateStorageProtectionGroup <volhand> <keyreplremsys>
Then the error contains <errormsg>

Examples:
| volhand | keyreplremsys | errormsg |
| "" | "remoteSystem" | "volume ID is required" |
Expand All @@ -130,20 +125,11 @@ Feature: Isilon CSI interface
| "" | "systemName" | "cluster1" | "Unable to get Volume Group ''" |
| "" | "" | "cluster1" | "Can't get systemName from PG params" |


@getStorageProtectionGroupStatus
@v1.0.0
Scenario: Get storage protection group status
Given a Isilon service
When I call GetStorageProtectionGroupStatus
Then a valid GetStorageProtectionGroupStatusResponse is returned

Scenario Outline: Get storage protection group status with parameters
Given a Isilon service
When I call Probe
And I call WithParamsGetStorageProtectionGroupStatus <id> <localSystemName> <remoteSystemName> <vgname> <clustername1> <clustername2>
Then the error contains <errormsg>

Examples:
| id | localSystemName | remoteSystemName | vgname | clustername1 | clustername2 | errormsg |
| "cluster2" | "wrongSystemName" | "wrongSystemName" | "vgname" | "cluster1" | "cluster1" | "can't find `systemName` in replication group" |
Expand All @@ -159,15 +145,15 @@ Feature: Isilon CSI interface
And I induce error <induced>
And I call GetStorageProtectionGroupStatus
Then the error contains <errormsg>

Examples:
| induced | errormsg |
| "GetJobsInternalError" | "querying active jobs for local or remote policy" |
| "GetPolicyInternalError" | "error while getting link state" |
| "GetTargetPolicyInternalError" | "error while getting link state" |
| "FailedStatus" | "none" |
| "UnknownStatus" | "error while getting link state" |
| "UnknownStatus" | "error while getting link state" |
| "Jobs" | "querying active jobs for local or remote policy" |
| "RunningJob" | "none" |
| "GetSpgErrors" | "error while getting link state" |
| "GetSpgTPErrors" | "error while getting link state" |

Expand All @@ -179,7 +165,7 @@ Feature: Isilon CSI interface
Then the error contains <errormsg>
Examples:
| systemName | clusterNameOne | clusterNameTwo | remoteSystemName | vgname | ppname | errormsg |
| "systemName" | "cluster1" | "cluster1" | "remoteSystemName" | "VolumeGroupName" | "csi-prov-test-19743d82-192-168-111-25-Five_Minutes" | "none" |
| "systemName" | "cluster1" | "cluster1" | "remoteSystemName" | "VolumeGroupName" | "csi-prov-test-19743d82-192-168-111-25-Five_Minutes" | "error while getting link state" |
| "wrongSystemName" | "cluster1" | "cluster1" | "remoteSystemName" | "VolumeGroupName" | "csi-prov-test-19743d82-192-168-111-25-Five_Minutes" | "can't find `systemName` parameter in replication group" |
| "systemName" | "cluster2" | "cluster1" | "remoteSystemName" | "VolumeGroupName" | "csi-prov-test-19743d82-192-168-111-25-Five_Minutes" | "can't find cluster with name cluster2 in driver config: failed to get cluster config details for clusterName: 'cluster2'" |
| "systemName" | "cluster1" | "cluster2" | "remoteSystemName" | "VolumeGroupName" | "csi-prov-test-19743d82-192-168-111-25-Five_Minutes" | "can't find cluster with name cluster2 in driver config: failed to get cluster config details for clusterName: 'cluster2'" |
Expand All @@ -191,21 +177,20 @@ Feature: Isilon CSI interface
And I induce error <induced>
When I call SuspendExecuteAction
Then the error contains <errormsg>

Examples:
| induced | errormsg |
| "UpdatePolicyError" | "suspend: can't disable local policy" |
| "autoProbeFailed" | "auto probe is not enabled" |
| induced | errormsg |
| "UpdatePolicyError" | "suspend: can't disable local policy" |
| "autoProbeFailed" | "auto probe is not enabled" |
| "GetSpgErrors" | "suspend: policy couldn't reach disabled condition" |

Scenario Outline: Execute action sync
Given a Isilon service
And I induce error <induced>
When I call SyncExecuteAction
Then the error contains <errormsg>

Examples:
| induced | errormsg |
| "GetPolicyError" | "policy sync failed" |
| induced | errormsg |
| "GetPolicyError" | "sync: policy sync failed" |

Scenario Outline: Execute action failover
Given a Isilon service
Expand All @@ -215,9 +200,8 @@ Feature: Isilon CSI interface
Examples:
| induced | errormsg |
| "GetPolicyInternalError" | "failover: encountered error when trying to sync policy" |
| "GetJobsInternalError" | "failover: can't allow writes on target site EOF" |
| "GetJobsInternalError" | "failover: can't allow writes on target site" |
| "UpdatePolicyError" | "failover: can't disable local policy" |
| "Failover" | "failover: can't create protection policy" |
| "GetSpgTPErrors" | "failover: can't allow writes on target site" |

Scenario Outline: Execute action unplanned failover
Expand All @@ -229,24 +213,71 @@ Feature: Isilon CSI interface
| induced | errormsg |
| "GetTargetPolicyInternalError" | "unplanned failover: allow writes on target site failed" |

Scenario Outline: Execute action failback discard local
Given a Isilon service
When I induce error <induced>
And I call FailbackExecuteAction
Then the error contains <errormsg>
Examples:
| induced | errormsg |
| "GetPolicyInternalError" | "failback (discard local): can't disable local policy" |
| "GetTargetPolicyInternalError" | "failback (discard local): error waiting for condition on the remote target policy" |
| "UpdatePolicyError" | "failback (discard local): can't disable local policy" |
| "ModifyPolicyError" | "failback (discard local): can't set local policy to manual" |
| "GetJobsInternalError" | "failback (discard local): can't run resync-prep on local policy" |

Scenario Outline: Execute action failback discard remote
Given a Isilon service
When I induce error <induced>
And I call FailbackDiscardExecuteAction
Then the error contains <errormsg>
Examples:
| induced | errormsg |
| "GetPolicyInternalError" | "failback (discard remote): can't disable local policy" |
| "GetTargetPolicyInternalError" | "failback (discard remote): disallow writes on target site failed" |
| "UpdatePolicyError" | "failback (discard remote): can't disable local policy" |
| "ModifyPolicyError" | "failback (discard remote): can't set local policy to manual" |

Scenario Outline: Execute action reprotect
Given a Isilon service
When I induce error <induced>
And I call ReprotectExecuteAction
Then the error contains <errormsg>
Examples:
| induced | errormsg |
| "GetPolicyInternalError" | "requested action does not match with supported actions" |
| induced | errormsg |
| "GetTargetPolicyInternalError" | "reprotect: can't find target policy on the local site, perform reprotect on another side" |
| "GetPolicyInternalError" | "reprotect: can't find remote replication policy" |
| "DeletePolicyError" | "reprotect: delete policy on remote site failed" |
| "CreatePolicyError" | "reprotect: create protection policy on the local site failed" |

Scenario Outline: Execute action
Given a Isilon service
When I induce error <induced>
And I call ExecuteAction to <systemName> to <clusterNameOne> to <clusterNameTwo> to <remoteSystemName> to <vgname> to <ppname>
Then the error contains <errormsg>
Examples:
| induced | systemName | clusterNameOne | clusterNameTwo | remoteSystemName | vgname | ppname | errormsg |
| "GetPolicyInternalError" | "systemName" | "cluster1" | "cluster1" | "remoteSystemName" | "VolumeGroupName" | "csi-prov-test-19743d82-192-168-111-25-Five_Minutes" | "resume: can't enable local policy" |
| induced | systemName | clusterNameOne | clusterNameTwo | remoteSystemName | vgname | ppname | errormsg |
| "GetPolicyInternalError" | "systemName" | "cluster1" | "cluster1" | "remoteSystemName" | "VolumeGroupName" | "csi-prov-test-19743d82-192-168-111-25-Five_Minutes" | "resume: can't enable local policy" |
| "GetSpgErrors" | "systemName" | "cluster1" | "cluster1" | "remoteSystemName" | "VolumeGroupName" | "csi-prov-test-19743d82-192-168-111-25-Five_Minutes" | "resume: policy couldn't reach enabled condition" |

@executeAction
Scenario Outline: Execute action failback with bad params
Given a Isilon service
And I call ExecuteActionFailBackWithParams to <systemName> to <clusterNameOne> to <clusterNameTwo> to <remoteSystemName> to <vgname> to <ppname>
Then the error contains <errormsg>
Examples:
| systemName | clusterNameOne | clusterNameTwo | remoteSystemName | vgname | ppname | errormsg |
| "systemName" | "cluster1" | "cluster1" | "remoteSystemName" | "VolumeGroupName" | "csi-prov-test-19743d82-192-168-111-25-Fifty_Min" | "unable to parse RPO seconds" |

@executeAction
Scenario Outline: Execute action failback discard with bad params
Given a Isilon service
And I call ExecuteActionFailBackDiscardWithParams to <systemName> to <clusterNameOne> to <clusterNameTwo> to <remoteSystemName> to <vgname> to <ppname>
Then the error contains <errormsg>
Examples:
| systemName | clusterNameOne | clusterNameTwo | remoteSystemName | vgname | ppname | errormsg |
| "systemName" | "cluster1" | "cluster1" | "remoteSystemName" | "VolumeGroupName" | "csi-prov-test-19743d82-192-168-111-25-Fifty_Min" | "unable to parse RPO seconds" |

Scenario Outline: Execute bad action
Given a Isilon service
When I call BadExecuteAction
Expand Down
12 changes: 11 additions & 1 deletion service/identity.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package service

/*
Copyright (c) 2019-2022 Dell Inc, or its subsidiaries.
Copyright (c) 2019-2023 Dell Inc, or its subsidiaries.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -152,6 +152,16 @@ func (s *service) GetReplicationCapabilities(ctx context.Context, req *csiext.Ge
Type: csiext.ActionTypes_UNPLANNED_FAILOVER_LOCAL,
},
},
{
Actions: &csiext.SupportedActions_Type{
Type: csiext.ActionTypes_FAILBACK_LOCAL,
},
},
{
Actions: &csiext.SupportedActions_Type{
Type: csiext.ActionTypes_ACTION_FAILBACK_DISCARD_CHANGES_LOCAL,
},
},
{
Actions: &csiext.SupportedActions_Type{
Type: csiext.ActionTypes_REPROTECT_LOCAL,
Expand Down
Loading

0 comments on commit 12faea5

Please sign in to comment.