diff --git a/go.mod b/go.mod index c14f47d9..7f41a3d2 100644 --- a/go.mod +++ b/go.mod @@ -11,7 +11,7 @@ require ( github.com/dell/dell-csi-extensions/volumeGroupSnapshot v1.2.1 github.com/dell/gocsi v1.6.0 github.com/dell/gofsutil v1.11.0 - github.com/dell/goisilon v1.10.0 + github.com/dell/goisilon v1.10.1-0.20230105231012-7aec9b1a2f2b github.com/fsnotify/fsnotify v1.4.9 github.com/golang/protobuf v1.5.2 github.com/google/uuid v1.2.0 diff --git a/go.sum b/go.sum index 684c3307..d3fd105f 100644 --- a/go.sum +++ b/go.sum @@ -121,8 +121,8 @@ github.com/dell/gocsi v1.6.0 h1:ZmoMi17v1jK0RE0OGEivu52/RqHbOhP5cqs9SHExqa0= github.com/dell/gocsi v1.6.0/go.mod h1:+ihwgNYeFTv69Ym2X2Ij1idK72JYoNR8CeiWYJrrbho= github.com/dell/gofsutil v1.11.0 h1:HHLQVdoWF9xjI0/sLGTzX7i8aAGdZwJG/s7nzKwHLbw= github.com/dell/gofsutil v1.11.0/go.mod h1:j639KWtc61yK9oPBZSZrSEDqhFKe446XK9etJpk/KtI= -github.com/dell/goisilon v1.10.0 h1:3TgECPV/6RzTQsDfhc1rR5rFyKJMNLYopbUJtHpYfyc= -github.com/dell/goisilon v1.10.0/go.mod h1:fJXHyh1JBcbsmPBquEulaNOFTpj1eEN5vISDf/UY1RQ= +github.com/dell/goisilon v1.10.1-0.20230105231012-7aec9b1a2f2b h1:IRgUwx8Jh3zZGSk5YHjRo5K5Qs0emjJac6QWasyOEE8= +github.com/dell/goisilon v1.10.1-0.20230105231012-7aec9b1a2f2b/go.mod h1:fJXHyh1JBcbsmPBquEulaNOFTpj1eEN5vISDf/UY1RQ= github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ= github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8PWV+bWy6jNmig1y/TA+kYO4g3RSRF0IAv0no= github.com/docker/spdystream v0.0.0-20160310174837-449fdfce4d96/go.mod h1:Qh8CwZgvJUkLughtfhJv5dyTYa91l1fOUCrgjqmcifM= diff --git a/service/controller.go b/service/controller.go index fdb978a7..cf39ac5e 100644 --- a/service/controller.go +++ b/service/controller.go @@ -1,7 +1,7 @@ package service /* - Copyright (c) 2019-2022 Dell Inc, or its subsidiaries. + Copyright (c) 2019-2023 Dell Inc, or its subsidiaries. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -423,33 +423,6 @@ func (s *service) CreateVolume( } } - _, err = remoteIsiConfig.isiSvc.client.GetPolicyByName(ctx, ppName) - if err != nil { - if apiErr, ok := err.(*isiApi.JSONError); ok && apiErr.StatusCode == 404 { - err := remoteIsiConfig.isiSvc.client.CreatePolicy(ctx, ppName, rpoint, isiPath+"/"+vgName, isiPath+"/"+vgName, isiConfig.Endpoint, isiConfig.ReplicationCertificateID, true) - if err != nil { - return nil, status.Errorf(codes.Internal, "can't create protection policy %s", err.Error()) - } - err = remoteIsiConfig.isiSvc.client.WaitForPolicyLastJobState(ctx, ppName, isi.FINISHED) - if err != nil { - return nil, status.Errorf(codes.Internal, "policy job couldn't reach FINISHED state %s", err.Error()) - } - } - } - - err = isiConfig.isiSvc.client.AllowWrites(ctx, ppName) - if err != nil { - return nil, status.Errorf(codes.Internal, "can't allow writes on local site %s", err.Error()) - } - err = remoteIsiConfig.isiSvc.client.DisablePolicy(ctx, ppName) - if err != nil { - return nil, status.Errorf(codes.Internal, "can't disable the policy on TGT %s", err.Error()) - } - err = remoteIsiConfig.isiSvc.client.WaitForPolicyEnabledFieldCondition(ctx, ppName, false) - if err != nil { - return nil, status.Errorf(codes.Internal, "policy couldn't reach disabled condition on TGT %s", err.Error()) - } - isiPath = isiPath + "/" + VolumeGroupDir } diff --git a/service/features/controller_create_delete_volume.feature b/service/features/controller_create_delete_volume.feature index 33583fe5..691f8d76 100644 --- a/service/features/controller_create_delete_volume.feature +++ b/service/features/controller_create_delete_volume.feature @@ -103,6 +103,27 @@ Feature: Isilon CSI interface | "volume1=_=_=10=_=_=System" | "volume1" | "failed to get volume" | | "volume2=_=_=20=_=_=System" | "volume2" | "none" | + Scenario Outline: Create Volume with Replication Enabled and with invalid arguments + Given a Isilon service + When I call CreateVolumeRequestWithReplicationParams + Then the error contains + Examples: + | vgPrefix | rpo | remoteSystemName | errormsg | + | "" | "Five_Minutes" | "cluster1" | "replication enabled but no volume group prefix specified" | + | "volumeGroupPrefix" | "" | "cluster1" | "replication enabled but no RPO specified" | + | "volumeGroupPrefix" | "Fifty_Minutes" | "cluster1" | "invalid rpo value" | + | "volumeGroupPrefix" | "Five_Minutes" | "" | "replication enabled but no remote system specified" | + + Scenario Outline: Create Volume with Replication Enabled and induced errors + Given a Isilon service + And I induce error + When I call CreateVolumeRequestWithReplicationParams "volumeGroupPrefix" "Five_Minutes" "cluster1" + Then the error contains + Examples: + | induced | errormsg | + | "GetPolicyInternalError" | "can't ensure protection policy exists" | + | "GetPolicyNotFoundError" | "policy job couldn't reach FINISHED state" | + @deleteVolume @v1.0.0 Scenario: Delete volume good scenario with quota enabled diff --git a/service/features/replication.feature b/service/features/replication.feature index 799b4a1c..2efb17c2 100644 --- a/service/features/replication.feature +++ b/service/features/replication.feature @@ -26,7 +26,6 @@ Feature: Isilon CSI interface When I call Probe And I call WithParamsCreateRemoteVolume Then the error contains - Examples: | volhand | keyreplremsys | errormsg | | "" | "KeyReplicationRemoteSystem" | "volume ID is required" | @@ -42,7 +41,6 @@ Feature: Isilon CSI interface And I induce error And I call CreateRemoteVolume Then the error contains - Examples: | induced | errormsg | | "InstancesError" | "none" | @@ -64,7 +62,6 @@ Feature: Isilon CSI interface And I induce error And I call CreateRemoteVolume Then the error contains - Examples: | getVolumeError | getExportError | serverError1 | serverError2 | errormsg | | "VolumeExists" | "ExportExists" | "none" | "none" | "none" | @@ -96,7 +93,6 @@ Feature: Isilon CSI interface And I induce error And I call CreateStorageProtectionGroup Then the error contains - Examples: | induced | errormsg | | "GetExportByIDNotFoundError" | "Export id 9999999 does not exist" | @@ -108,7 +104,6 @@ Feature: Isilon CSI interface When I call Probe And I call WithParamsCreateStorageProtectionGroup Then the error contains - Examples: | volhand | keyreplremsys | errormsg | | "" | "remoteSystem" | "volume ID is required" | @@ -130,20 +125,11 @@ Feature: Isilon CSI interface | "" | "systemName" | "cluster1" | "Unable to get Volume Group ''" | | "" | "" | "cluster1" | "Can't get systemName from PG params" | - - @getStorageProtectionGroupStatus - @v1.0.0 - Scenario: Get storage protection group status - Given a Isilon service - When I call GetStorageProtectionGroupStatus - Then a valid GetStorageProtectionGroupStatusResponse is returned - Scenario Outline: Get storage protection group status with parameters Given a Isilon service When I call Probe And I call WithParamsGetStorageProtectionGroupStatus Then the error contains - Examples: | id | localSystemName | remoteSystemName | vgname | clustername1 | clustername2 | errormsg | | "cluster2" | "wrongSystemName" | "wrongSystemName" | "vgname" | "cluster1" | "cluster1" | "can't find `systemName` in replication group" | @@ -159,15 +145,15 @@ Feature: Isilon CSI interface And I induce error And I call GetStorageProtectionGroupStatus Then the error contains - Examples: | induced | errormsg | | "GetJobsInternalError" | "querying active jobs for local or remote policy" | | "GetPolicyInternalError" | "error while getting link state" | | "GetTargetPolicyInternalError" | "error while getting link state" | | "FailedStatus" | "none" | - | "UnknownStatus" | "error while getting link state" | + | "UnknownStatus" | "error while getting link state" | | "Jobs" | "querying active jobs for local or remote policy" | + | "RunningJob" | "none" | | "GetSpgErrors" | "error while getting link state" | | "GetSpgTPErrors" | "error while getting link state" | @@ -179,7 +165,7 @@ Feature: Isilon CSI interface Then the error contains Examples: | systemName | clusterNameOne | clusterNameTwo | remoteSystemName | vgname | ppname | errormsg | - | "systemName" | "cluster1" | "cluster1" | "remoteSystemName" | "VolumeGroupName" | "csi-prov-test-19743d82-192-168-111-25-Five_Minutes" | "none" | + | "systemName" | "cluster1" | "cluster1" | "remoteSystemName" | "VolumeGroupName" | "csi-prov-test-19743d82-192-168-111-25-Five_Minutes" | "error while getting link state" | | "wrongSystemName" | "cluster1" | "cluster1" | "remoteSystemName" | "VolumeGroupName" | "csi-prov-test-19743d82-192-168-111-25-Five_Minutes" | "can't find `systemName` parameter in replication group" | | "systemName" | "cluster2" | "cluster1" | "remoteSystemName" | "VolumeGroupName" | "csi-prov-test-19743d82-192-168-111-25-Five_Minutes" | "can't find cluster with name cluster2 in driver config: failed to get cluster config details for clusterName: 'cluster2'" | | "systemName" | "cluster1" | "cluster2" | "remoteSystemName" | "VolumeGroupName" | "csi-prov-test-19743d82-192-168-111-25-Five_Minutes" | "can't find cluster with name cluster2 in driver config: failed to get cluster config details for clusterName: 'cluster2'" | @@ -191,21 +177,20 @@ Feature: Isilon CSI interface And I induce error When I call SuspendExecuteAction Then the error contains - Examples: - | induced | errormsg | - | "UpdatePolicyError" | "suspend: can't disable local policy" | - | "autoProbeFailed" | "auto probe is not enabled" | + | induced | errormsg | + | "UpdatePolicyError" | "suspend: can't disable local policy" | + | "autoProbeFailed" | "auto probe is not enabled" | + | "GetSpgErrors" | "suspend: policy couldn't reach disabled condition" | Scenario Outline: Execute action sync Given a Isilon service And I induce error When I call SyncExecuteAction Then the error contains - Examples: - | induced | errormsg | - | "GetPolicyError" | "policy sync failed" | + | induced | errormsg | + | "GetPolicyError" | "sync: policy sync failed" | Scenario Outline: Execute action failover Given a Isilon service @@ -215,9 +200,8 @@ Feature: Isilon CSI interface Examples: | induced | errormsg | | "GetPolicyInternalError" | "failover: encountered error when trying to sync policy" | - | "GetJobsInternalError" | "failover: can't allow writes on target site EOF" | + | "GetJobsInternalError" | "failover: can't allow writes on target site" | | "UpdatePolicyError" | "failover: can't disable local policy" | - | "Failover" | "failover: can't create protection policy" | | "GetSpgTPErrors" | "failover: can't allow writes on target site" | Scenario Outline: Execute action unplanned failover @@ -229,14 +213,42 @@ Feature: Isilon CSI interface | induced | errormsg | | "GetTargetPolicyInternalError" | "unplanned failover: allow writes on target site failed" | + Scenario Outline: Execute action failback discard local + Given a Isilon service + When I induce error + And I call FailbackExecuteAction + Then the error contains + Examples: + | induced | errormsg | + | "GetPolicyInternalError" | "failback (discard local): can't disable local policy" | + | "GetTargetPolicyInternalError" | "failback (discard local): error waiting for condition on the remote target policy" | + | "UpdatePolicyError" | "failback (discard local): can't disable local policy" | + | "ModifyPolicyError" | "failback (discard local): can't set local policy to manual" | + | "GetJobsInternalError" | "failback (discard local): can't run resync-prep on local policy" | + + Scenario Outline: Execute action failback discard remote + Given a Isilon service + When I induce error + And I call FailbackDiscardExecuteAction + Then the error contains + Examples: + | induced | errormsg | + | "GetPolicyInternalError" | "failback (discard remote): can't disable local policy" | + | "GetTargetPolicyInternalError" | "failback (discard remote): disallow writes on target site failed" | + | "UpdatePolicyError" | "failback (discard remote): can't disable local policy" | + | "ModifyPolicyError" | "failback (discard remote): can't set local policy to manual" | + Scenario Outline: Execute action reprotect Given a Isilon service When I induce error And I call ReprotectExecuteAction Then the error contains Examples: - | induced | errormsg | - | "GetPolicyInternalError" | "requested action does not match with supported actions" | + | induced | errormsg | + | "GetTargetPolicyInternalError" | "reprotect: can't find target policy on the local site, perform reprotect on another side" | + | "GetPolicyInternalError" | "reprotect: can't find remote replication policy" | + | "DeletePolicyError" | "reprotect: delete policy on remote site failed" | + | "CreatePolicyError" | "reprotect: create protection policy on the local site failed" | Scenario Outline: Execute action Given a Isilon service @@ -244,9 +256,28 @@ Feature: Isilon CSI interface And I call ExecuteAction to to to to to to Then the error contains Examples: - | induced | systemName | clusterNameOne | clusterNameTwo | remoteSystemName | vgname | ppname | errormsg | - | "GetPolicyInternalError" | "systemName" | "cluster1" | "cluster1" | "remoteSystemName" | "VolumeGroupName" | "csi-prov-test-19743d82-192-168-111-25-Five_Minutes" | "resume: can't enable local policy" | + | induced | systemName | clusterNameOne | clusterNameTwo | remoteSystemName | vgname | ppname | errormsg | + | "GetPolicyInternalError" | "systemName" | "cluster1" | "cluster1" | "remoteSystemName" | "VolumeGroupName" | "csi-prov-test-19743d82-192-168-111-25-Five_Minutes" | "resume: can't enable local policy" | + | "GetSpgErrors" | "systemName" | "cluster1" | "cluster1" | "remoteSystemName" | "VolumeGroupName" | "csi-prov-test-19743d82-192-168-111-25-Five_Minutes" | "resume: policy couldn't reach enabled condition" | + + @executeAction + Scenario Outline: Execute action failback with bad params + Given a Isilon service + And I call ExecuteActionFailBackWithParams to to to to to to + Then the error contains + Examples: + | systemName | clusterNameOne | clusterNameTwo | remoteSystemName | vgname | ppname | errormsg | + | "systemName" | "cluster1" | "cluster1" | "remoteSystemName" | "VolumeGroupName" | "csi-prov-test-19743d82-192-168-111-25-Fifty_Min" | "unable to parse RPO seconds" | + @executeAction + Scenario Outline: Execute action failback discard with bad params + Given a Isilon service + And I call ExecuteActionFailBackDiscardWithParams to to to to to to + Then the error contains + Examples: + | systemName | clusterNameOne | clusterNameTwo | remoteSystemName | vgname | ppname | errormsg | + | "systemName" | "cluster1" | "cluster1" | "remoteSystemName" | "VolumeGroupName" | "csi-prov-test-19743d82-192-168-111-25-Fifty_Min" | "unable to parse RPO seconds" | + Scenario Outline: Execute bad action Given a Isilon service When I call BadExecuteAction diff --git a/service/identity.go b/service/identity.go index bf1080b8..21004de2 100644 --- a/service/identity.go +++ b/service/identity.go @@ -1,7 +1,7 @@ package service /* - Copyright (c) 2019-2022 Dell Inc, or its subsidiaries. + Copyright (c) 2019-2023 Dell Inc, or its subsidiaries. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -152,6 +152,16 @@ func (s *service) GetReplicationCapabilities(ctx context.Context, req *csiext.Ge Type: csiext.ActionTypes_UNPLANNED_FAILOVER_LOCAL, }, }, + { + Actions: &csiext.SupportedActions_Type{ + Type: csiext.ActionTypes_FAILBACK_LOCAL, + }, + }, + { + Actions: &csiext.SupportedActions_Type{ + Type: csiext.ActionTypes_ACTION_FAILBACK_DISCARD_CHANGES_LOCAL, + }, + }, { Actions: &csiext.SupportedActions_Type{ Type: csiext.ActionTypes_REPROTECT_LOCAL, diff --git a/service/mock/jobs/running.json b/service/mock/jobs/running.json new file mode 100644 index 00000000..c7bc2b65 --- /dev/null +++ b/service/mock/jobs/running.json @@ -0,0 +1,109 @@ +{ + "jobs": [ + { + "action": "run", + "ads_streams_replicated": 0, + "block_specs_replicated": 0, + "bytes_recoverable": 0, + "bytes_transferred": 0, + "char_specs_replicated": 0, + "committed_files": 0, + "corrected_lins": 0, + "dead_node": false, + "directories_replicated": 0, + "dirs_changed": 0, + "dirs_deleted": 0, + "dirs_moved": 0, + "dirs_new": 0, + "duration": 1, + "encrypted": false, + "end_time": 1672870562, + "error": "", + "error_checksum_files_skipped": 0, + "error_io_files_skipped": 0, + "error_net_files_skipped": 0, + "errors": [], + "failed_chunks": 0, + "fifos_replicated": 0, + "file_data_bytes": 0, + "files_changed": 0, + "files_linked": 0, + "files_new": 0, + "files_selected": 0, + "files_transferred": 0, + "files_unlinked": 0, + "files_with_ads_replicated": 0, + "flipped_lins": 0, + "hard_links_replicated": 0, + "hash_exceptions_fixed": 0, + "hash_exceptions_found": 0, + "id": "csi-prov-test-19743d82-192-168-111-25-Five_Minutes", + "job_id": 3, + "lins_total": 0, + "network_bytes_to_source": 0, + "network_bytes_to_target": 0, + "new_files_replicated": 0, + "num_retransmitted_files": 0, + "phases": [], + "policy": { + "action": "sync", + "file_matching_pattern": {}, + "name": "csi-prov-test-19743d82-192-168-111-25-Five_Minutes", + "source_exclude_directories": [], + "source_include_directories": [], + "source_root_path": "/ifs/data/new/csi-prov-test-19743d82-192.168.111.25-Five_Minutes", + "target_host": "10.247.100.10", + "target_path": "/ifs/data/new/csi-prov-test-19743d82-192.168.111.25-Five_Minutes" + }, + "policy_action": "sync", + "policy_id": "41c2c9a22de815e045decc0c7605e9f9", + "policy_name": "csi-prov-test-19743d82-192-168-111-25-Five_Minutes", + "quotas_deleted": 0, + "regular_files_replicated": 0, + "resynced_lins": 0, + "retransmitted_files": [], + "retry": 1, + "running_chunks": 0, + "sockets_replicated": 0, + "source_bytes_recovered": 0, + "source_directories_created": 0, + "source_directories_deleted": 0, + "source_directories_linked": 0, + "source_directories_unlinked": 0, + "source_directories_visited": 0, + "source_files_deleted": 0, + "source_files_linked": 0, + "source_files_unlinked": 0, + "sparse_data_bytes": 0, + "start_time": 1672870561, + "state": "running", + "succeeded_chunks": 0, + "symlinks_replicated": 0, + "sync_type": "invalid", + "target_bytes_recovered": 0, + "target_directories_created": 0, + "target_directories_deleted": 0, + "target_directories_linked": 0, + "target_directories_unlinked": 0, + "target_files_deleted": 0, + "target_files_linked": 0, + "target_files_unlinked": 0, + "target_snapshots": [], + "throughput": "0 b/s", + "total_chunks": 0, + "total_data_bytes": 0, + "total_files": 0, + "total_network_bytes": 0, + "total_phases": 0, + "unchanged_data_bytes": 0, + "up_to_date_files_skipped": 0, + "updated_files_replicated": 0, + "user_conflict_files_skipped": 0, + "warnings": [], + "workers": [], + "worm_committed_file_conflicts": 0 + } + ], + "resume": null, + "total": 1 + } \ No newline at end of file diff --git a/service/replication.go b/service/replication.go index 79342bd5..3d9cf557 100644 --- a/service/replication.go +++ b/service/replication.go @@ -1,7 +1,7 @@ package service /* - Copyright (c) 2019-2022 Dell Inc, or its subsidiaries. + Copyright (c) 2019-2023 Dell Inc, or its subsidiaries. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -33,6 +33,15 @@ import ( "google.golang.org/grpc/status" ) +// constants for ease of understanding +const ( + PolicySchedulingManual = "" + PolicySchedulingAutomatic = "when-source-modified" + WritesEnabled = "writes_enabled" + WritesDisabled = "writes_disabled" + ResyncPolicyCreated = "resync_policy_created" +) + func (s *service) CreateRemoteVolume(ctx context.Context, req *csiext.CreateRemoteVolumeRequest) (*csiext.CreateRemoteVolumeResponse, error) { ctx, log, _ := GetRunIDLog(ctx) @@ -414,12 +423,18 @@ func (s *service) ExecuteAction(ctx context.Context, req *csiext.ExecuteActionRe var actionFunc func(context.Context, *IsilonClusterConfig, *IsilonClusterConfig, string, *logrus.Entry) error switch action { - case csiext.ActionTypes_FAILOVER_REMOTE.String(): // FAILOVER_LOCAL is not supported as of now. Need to handle failover steps in the mirrored perspective. + case csiext.ActionTypes_FAILOVER_REMOTE.String(): // FAILOVER_LOCAL is not supported. Need to handle failover steps in the mirrored perspective. actionFunc = failover - case csiext.ActionTypes_UNPLANNED_FAILOVER_LOCAL.String(): + case csiext.ActionTypes_UNPLANNED_FAILOVER_LOCAL.String(): // UNPLANNED_FAILOVER_REMOTE is not supported. actionFunc = failoverUnplanned + case csiext.ActionTypes_FAILBACK_LOCAL.String(): // FAILBACK_REMOTE is not supported. + actionFunc = failbackDiscardLocal + case csiext.ActionTypes_ACTION_FAILBACK_DISCARD_CHANGES_LOCAL.String(): // ACTION_FAILBACK_DISCARD_CHANGES_REMOTE is not supported. + actionFunc = failbackDiscardRemote + case csiext.ActionTypes_REPROTECT_LOCAL.String(): // REPROTECT_REMOTE is not supported. + actionFunc = reprotect case csiext.ActionTypes_SYNC.String(): - actionFunc = syncAction + actionFunc = synchronize case csiext.ActionTypes_SUSPEND.String(): actionFunc = suspend case csiext.ActionTypes_RESUME.String(): @@ -429,7 +444,7 @@ func (s *service) ExecuteAction(ctx context.Context, req *csiext.ExecuteActionRe } if err := actionFunc(ctx, isiConfig, remoteIsiConfig, vgName, log.WithFields(fields)); err != nil { - return nil, err + return nil, status.Errorf(codes.Unknown, err.Error()) // Error while executing action, shouldn't be retried. } statusResp, err := s.GetStorageProtectionGroupStatus(ctx, &csiext.GetStorageProtectionGroupStatusRequest{ @@ -452,6 +467,8 @@ func (s *service) ExecuteAction(ctx context.Context, req *csiext.ExecuteActionRe func (s *service) GetStorageProtectionGroupStatus(ctx context.Context, req *csiext.GetStorageProtectionGroupStatusRequest) (*csiext.GetStorageProtectionGroupStatusResponse, error) { ctx, log, _ := GetRunIDLog(ctx) + + log.Info("Getting storage protection group status") localParams := req.GetProtectionGroupAttributes() groupID := req.GetProtectionGroupId() isiPath := utils.GetIsiPathFromPgID(groupID) @@ -488,25 +505,25 @@ func (s *service) GetStorageProtectionGroupStatus(ctx context.Context, req *csie // obtain local policy for local cluster localP, err := isiConfig.isiSvc.client.GetPolicyByName(ctx, ppName) if err != nil { - log.Error("Can't find local replication policy on local cluster, unexpected error ", err.Error()) + log.Warn("Can't find local replication policy on local cluster, unexpected error ", err.Error()) } // obtain target policy for local cluster localTP, err := isiConfig.isiSvc.client.GetTargetPolicyByName(ctx, ppName) if err != nil { - log.Error("Can't find target replication policy on local cluster, unexpected error ", err.Error()) + log.Warn("Can't find target replication policy on local cluster, unexpected error ", err.Error()) } // obtain local policy for remote cluster remoteP, err := remoteIsiConfig.isiSvc.client.GetPolicyByName(ctx, ppName) if err != nil { - log.Error("Can't find local replication policy on remote cluster, unexpected error ", err.Error()) + log.Warn("Can't find local replication policy on remote cluster, unexpected error ", err.Error()) } // obtain target policy for remote cluster remoteTP, err := remoteIsiConfig.isiSvc.client.GetTargetPolicyByName(ctx, ppName) if err != nil { - log.Error("Can't find target replication policy on remote cluster, unexpected error ", err.Error()) + log.Warn("Can't find target replication policy on remote cluster, unexpected error ", err.Error()) } // Check if any of the policy jobs are currently running @@ -514,7 +531,7 @@ func (s *service) GetStorageProtectionGroupStatus(ctx context.Context, req *csie localJob, err := isiConfig.isiSvc.client.GetJobsByPolicyName(ctx, ppName) if err != nil { if apiErr, ok := err.(*isiApi.JSONError); ok && apiErr.StatusCode != 404 { - log.Error("Unexpected error while querying active jobs for local policy ", err.Error()) + log.Warn("Unexpected error while querying active jobs for local policy ", err.Error()) isSyncCheckFailed = true } } @@ -527,7 +544,7 @@ func (s *service) GetStorageProtectionGroupStatus(ctx context.Context, req *csie remoteJob, err := remoteIsiConfig.isiSvc.client.GetJobsByPolicyName(ctx, ppName) if err != nil { if apiErr, ok := err.(*isiApi.JSONError); ok && apiErr.StatusCode != 404 { - log.Error("Unexpected error while querying active jobs for remote policy ", err.Error()) + log.Warn("Unexpected error while querying active jobs for remote policy ", err.Error()) isSyncCheckFailed = true } } @@ -538,6 +555,7 @@ func (s *service) GetStorageProtectionGroupStatus(ctx context.Context, req *csie } linkState := getGroupLinkState(localP, localTP, remoteP, remoteTP, isSyncInProgress) + log.Infof("The current state for group (%s) is (%s).", groupID, linkState.String()) if linkState == csiext.StorageProtectionGroupStatus_UNKNOWN || isSyncCheckFailed { errMsg := "unexpected error while getting link state" @@ -556,20 +574,18 @@ func (s *service) GetStorageProtectionGroupStatus(ctx context.Context, req *csie log.Info("Trying to get replication direction") source := false - if linkState != csiext.StorageProtectionGroupStatus_FAILEDOVER && // no side can be source when in failed over state - (localP.Enabled || // when synchronized - (!remoteP.Enabled && localTP.FailoverFailbackState == "writes_enabled" && remoteTP.FailoverFailbackState == "writes_disabled")) { // when suspended (source side) + if localP != nil { // Policy can exist only on the source side source = true log.Info("Current side is source") } - log.Infof("The current state for group (%s) is (%s).", groupID, linkState.String()) resp := &csiext.GetStorageProtectionGroupStatusResponse{ Status: &csiext.StorageProtectionGroupStatus{ State: linkState, IsSource: source, }, } + log.Info("Get storage protection group status completed") return resp, nil } @@ -584,32 +600,7 @@ func failover(ctx context.Context, localIsiConfig *IsilonClusterConfig, remoteIs return status.Errorf(codes.Internal, "failover: encountered error when trying to sync policy %s", err.Error()) } - log.Info("Ensuring that mirror policy exists on target site") - // Get local policy to get necessary info - localPolicy, err := localIsiConfig.isiSvc.client.GetPolicyByName(ctx, ppName) - if err != nil { - return status.Errorf(codes.Internal, "failover: can't find local replication policy, unexpected error %s", err.Error()) - } - - _, err = remoteIsiConfig.isiSvc.client.GetPolicyByName(ctx, ppName) - if err != nil { - if apiErr, ok := err.(*isiApi.JSONError); ok && apiErr.StatusCode == 404 { - err := remoteIsiConfig.isiSvc.client.CreatePolicy(ctx, ppName, localPolicy.JobDelay, - localPolicy.SourcePath, localPolicy.TargetPath, localIsiConfig.Endpoint, localIsiConfig.ReplicationCertificateID, false) - if err != nil { - return status.Errorf(codes.Internal, "failover: can't create protection policy %s", err.Error()) - } - err = remoteIsiConfig.isiSvc.client.WaitForPolicyLastJobState(ctx, ppName, isi.UNKNOWN) // UNKNOWN because we created disabled policy - if err != nil { - return status.Errorf(codes.Internal, "failover: remote policy job couldn't reach UNKNOWN state %s", err.Error()) - } - } else { - return status.Errorf(codes.Internal, "failover: can't ensure protection policy exists %s", err.Error()) - } - } - log.Info("Disabling policy on SRC site") - err = localIsiConfig.isiSvc.client.DisablePolicy(ctx, ppName) if err != nil { return status.Errorf(codes.Internal, "failover: can't disable local policy %s", err.Error()) @@ -621,12 +612,12 @@ func failover(ctx context.Context, localIsiConfig *IsilonClusterConfig, remoteIs } log.Info("Enabling writes on TGT site") - err = remoteIsiConfig.isiSvc.client.AllowWrites(ctx, ppName) if err != nil { return status.Errorf(codes.Internal, "failover: can't allow writes on target site %s", err.Error()) } + log.Info("Failover action completed") return nil } @@ -642,10 +633,192 @@ func failoverUnplanned(ctx context.Context, localIsiConfig *IsilonClusterConfig, return status.Errorf(codes.Internal, "unplanned failover: allow writes on target site failed %s", err.Error()) } + log.Info("Unplanned failover action completed") return nil } -func syncAction(ctx context.Context, localIsiConfig *IsilonClusterConfig, remoteIsiConfig *IsilonClusterConfig, vgName string, log *logrus.Entry) error { +func reprotect(ctx context.Context, localIsiConfig *IsilonClusterConfig, remoteIsiConfig *IsilonClusterConfig, vgName string, log *logrus.Entry) error { + log.Info("Running reprotect action") + ppName := strings.ReplaceAll(vgName, ".", "-") + + // Ensure local array's target policy exists and is write enabled (original target) + localTP, err := localIsiConfig.isiSvc.client.GetTargetPolicyByName(ctx, ppName) + if err != nil { + return status.Errorf(codes.Internal, "reprotect: can't find target policy on the local site, perform reprotect on another side. %s", err.Error()) + } + if localTP.FailoverFailbackState != WritesEnabled { + return status.Errorf(codes.InvalidArgument, "reprotect: unable to perform reprotect with writes disabled, should perform failover first.") + } + + // Get remote policy + remotePolicy, err := remoteIsiConfig.isiSvc.client.GetPolicyByName(ctx, ppName) + if err != nil { + return status.Errorf(codes.Internal, "reprotect: can't find remote replication policy, unexpected error %s", err.Error()) + } + + // Delete the remote policy + log.Info("Deleting SyncIQ policy on the remote") + err = remoteIsiConfig.isiSvc.client.DeletePolicy(ctx, ppName) + if err != nil { + return status.Errorf(codes.Internal, "reprotect: delete policy on remote site failed %s", err.Error()) + } + + // Create a new local policy based on previous remote policy's parameters + log.Info("Creating new local SyncIQ policy") + err = localIsiConfig.isiSvc.client.CreatePolicy(ctx, ppName, remotePolicy.JobDelay, + remotePolicy.TargetPath, remotePolicy.SourcePath, remoteIsiConfig.Endpoint, localIsiConfig.ReplicationCertificateID, true) + if err != nil { + return status.Errorf(codes.Internal, "reprotect: create protection policy on the local site failed %s", err.Error()) + } + err = localIsiConfig.isiSvc.client.WaitForPolicyLastJobState(ctx, ppName, isi.FINISHED) + if err != nil { + return status.Errorf(codes.Internal, "reprotect: policy job couldn't reach FINISHED state %s", err.Error()) + } + + log.Info("Reprotect action completed") + return nil +} + +func failbackDiscardLocal(ctx context.Context, localIsiConfig *IsilonClusterConfig, remoteIsiConfig *IsilonClusterConfig, vgName string, log *logrus.Entry) error { + log.Info("Running failback action - discard local") + ppName := strings.ReplaceAll(vgName, ".", "-") + ppNameMirror := ppName + "_mirror" + + log.Info("Obtaining RPO value from policy name") + rpoInt := getRpoInt(vgName) + if rpoInt == -1 { + return status.Errorf(codes.InvalidArgument, "unable to parse RPO seconds") + } + + // If source policy is not disabled (unplanned failover), disable it + log.Info("Ensuring SRC policy is disabled") + err := localIsiConfig.isiSvc.client.DisablePolicy(ctx, ppName) + if err != nil { + return status.Errorf(codes.Internal, "failback (discard local): can't disable local policy %s", err.Error()) + } + + // Edit the source policy to manual. + log.Info("Setting SRC policy to manual") + err = localIsiConfig.isiSvc.client.ModifyPolicy(ctx, ppName, PolicySchedulingManual, 0) + if err != nil { + return status.Errorf(codes.Internal, "failback (discard local): can't set local policy to manual %s", err.Error()) + } + + // Enable the source policy + log.Info("Enabling SRC policy") + err = localIsiConfig.isiSvc.client.EnablePolicy(ctx, ppName) + if err != nil { + return status.Errorf(codes.Internal, "failback (discard local): can't enable local policy %s", err.Error()) + } + + // Run Resync-prep on source (also disables source policy) + log.Info("Running resync-prep on SRC policy") + err = localIsiConfig.isiSvc.client.ResyncPrep(ctx, ppName) + if err != nil { + return status.Errorf(codes.Internal, "failback (discard local): can't run resync-prep on local policy %s", err.Error()) + } + err = remoteIsiConfig.isiSvc.client.WaitForTargetPolicyCondition(ctx, ppName, ResyncPolicyCreated) + if err != nil { + return status.Errorf(codes.Internal, "failback (discard local): error waiting for condition on the remote target policy. %s", err.Error()) + } + err = remoteIsiConfig.isiSvc.client.WaitForPolicyEnabledFieldCondition(ctx, ppNameMirror, true) + if err != nil { + return status.Errorf(codes.Internal, "failback (discard local): TGT mirror policy couldn't reach enabled condition %s", err.Error()) + } + + // Run Sync-Job on target policy (_mirror) + log.Info("Running sync job on TGT mirror policy") + err = remoteIsiConfig.isiSvc.client.SyncPolicy(ctx, ppNameMirror) + if err != nil { + return status.Errorf(codes.Internal, "failback (discard local): policy sync failed %s", err.Error()) + } + + // Allow write on source + log.Info("Allowing write on SRC") + err = localIsiConfig.isiSvc.client.AllowWrites(ctx, ppNameMirror) + if err != nil { + return status.Errorf(codes.Internal, "failback (discard local): allow writes on local site failed %s", err.Error()) + } + + // Run resync-prep on target (also disables target policy) + log.Info("Running resync-prep on TGT mirror policy") + err = remoteIsiConfig.isiSvc.client.ResyncPrep(ctx, ppNameMirror) + if err != nil { + return status.Errorf(codes.Internal, "failback (discard local): can't run resync-prep on remote mirror policy %s", err.Error()) + } + err = localIsiConfig.isiSvc.client.WaitForTargetPolicyCondition(ctx, ppNameMirror, ResyncPolicyCreated) + if err != nil { + return status.Errorf(codes.Internal, "failback (discard local): error waiting for condition on the local target policy. %s", err.Error()) + } + + // Delete the target mirror policy as recommended + log.Info("Deleting TGT mirror policy") + err = remoteIsiConfig.isiSvc.client.DeletePolicy(ctx, ppNameMirror) + if err != nil { + return status.Errorf(codes.Internal, "failback (discard local): delete mirror policy on target site failed %s", err.Error()) + } + + // Edit source policy to automatic + log.Info("Setting SRC policy to automatic") + err = localIsiConfig.isiSvc.client.ModifyPolicy(ctx, ppName, PolicySchedulingAutomatic, rpoInt) + if err != nil { + return status.Errorf(codes.Internal, "failback (discard local): can't set local policy to automatic %s", err.Error()) + } + + log.Info("Failback action - discard local completed") + return nil +} + +func failbackDiscardRemote(ctx context.Context, localIsiConfig *IsilonClusterConfig, remoteIsiConfig *IsilonClusterConfig, vgName string, log *logrus.Entry) error { + log.Info("Running failback action - discard remote") + ppName := strings.ReplaceAll(vgName, ".", "-") + + log.Info("Obtaining RPO value from policy name") + rpoInt := getRpoInt(vgName) + if rpoInt == -1 { + return status.Errorf(codes.InvalidArgument, "unable to parse RPO seconds") + } + + // If source policy is not disabled (unplanned failover), disable it + log.Info("Ensuring SRC policy is disabled") + err := localIsiConfig.isiSvc.client.DisablePolicy(ctx, ppName) + if err != nil { + return status.Errorf(codes.Internal, "failback (discard remote): can't disable local policy %s", err.Error()) + } + + // Edit the source policy to manual. + log.Info("Setting SRC policy to manual") + err = localIsiConfig.isiSvc.client.ModifyPolicy(ctx, ppName, PolicySchedulingManual, 0) + if err != nil { + return status.Errorf(codes.Internal, "failback (discard remote): can't set local policy to manual %s", err.Error()) + } + + // disallow writes on target + log.Info("Disabling writes on TGT site") + err = remoteIsiConfig.isiSvc.client.DisallowWrites(ctx, ppName) + if err != nil { + return status.Errorf(codes.Internal, "failback (discard remote): disallow writes on target site failed %s", err.Error()) + } + + // set source policy to automatic + log.Info("Setting SRC policy to automatic") + err = localIsiConfig.isiSvc.client.ModifyPolicy(ctx, ppName, PolicySchedulingAutomatic, rpoInt) + if err != nil { + return status.Errorf(codes.Internal, "failback (discard remote): can't set local policy to automatic %s", err.Error()) + } + + // enable source policy + log.Info("Enabling SRC policy") + err = localIsiConfig.isiSvc.client.EnablePolicy(ctx, ppName) + if err != nil { + return status.Errorf(codes.Internal, "failback (discard remote): can't enable local policy %s", err.Error()) + } + + log.Info("Failback action - discard remote completed") + return nil +} + +func synchronize(ctx context.Context, localIsiConfig *IsilonClusterConfig, remoteIsiConfig *IsilonClusterConfig, vgName string, log *logrus.Entry) error { log.Info("Running sync action") // get all running // if running - wait for it and succeed @@ -653,9 +826,10 @@ func syncAction(ctx context.Context, localIsiConfig *IsilonClusterConfig, remote ppName := strings.ReplaceAll(vgName, ".", "-") err := localIsiConfig.isiSvc.client.SyncPolicy(ctx, ppName) if err != nil { - return status.Errorf(codes.Internal, "policy sync failed %s", err.Error()) + return status.Errorf(codes.Internal, "sync: policy sync failed %s", err.Error()) } + log.Info("Sync action completed") return nil } @@ -665,7 +839,6 @@ func suspend(ctx context.Context, localIsiConfig *IsilonClusterConfig, remoteIsi ppName := strings.ReplaceAll(vgName, ".", "-") log.Info("Disabling policy on SRC site") - err := localIsiConfig.isiSvc.client.DisablePolicy(ctx, ppName) if err != nil { return status.Errorf(codes.Internal, "suspend: can't disable local policy %s", err.Error()) @@ -676,6 +849,7 @@ func suspend(ctx context.Context, localIsiConfig *IsilonClusterConfig, remoteIsi return status.Errorf(codes.Internal, "suspend: policy couldn't reach disabled condition %s", err.Error()) } + log.Info("Suspend action completed") return nil } @@ -685,7 +859,6 @@ func resume(ctx context.Context, localIsiConfig *IsilonClusterConfig, remoteIsiC ppName := strings.ReplaceAll(vgName, ".", "-") log.Info("Enabling policy on SRC site") - err := localIsiConfig.isiSvc.client.EnablePolicy(ctx, ppName) if err != nil { return status.Errorf(codes.Internal, "resume: can't enable local policy %s", err.Error()) @@ -696,6 +869,7 @@ func resume(ctx context.Context, localIsiConfig *IsilonClusterConfig, remoteIsiC return status.Errorf(codes.Internal, "resume: policy couldn't reach enabled condition %s", err.Error()) } + log.Info("Resume action completed") return nil } @@ -711,42 +885,43 @@ func getRemoteCSIVolume(ctx context.Context, exportID int, volName, accessZone s /* - Identify the status of the protection group from local and remote policies. Synchronized: - - (Source side) If the local policy is enabled, remote policy is disabled, local is write enabled and remote is write disabled - - (Target side) If the local policy is disabled, remote policy is enabled, local is write disabled and remote is write enabled + - (Source side) If the local policy is enabled, remote policy is NIL, local TP is NIL and remote TP is write disabled + - (Target side) If the local policy is NIL, remote policy is enabled, local TP is write disabled and remote TP is NIL Suspended: - - (Source side) If the local policy is disabled, remote policy is disabled, local is write enabled and remote is write disabled - - (Target side) If the local policy is disabled, remote policy is disabled, local is write disabled and remote is write enabled + - (Source side) If the local policy is disabled, remote policy is NIL, local TP is NIL and remote is write disabled + - (Target side) If the local policy is NIL, remote policy is disabled, local TP is write disabled and remote TP is NIL Failover: 1. Planned failover - - (both sides) local policy is disabled, remote policy is disabled, local is write enabled and remote is write enabled + - (Source side) If the local policy is disabled, remote policy is NIL, local TP is NIL and remote TP is write enabled + - (Target side) If the local policy is NIL, remote policy is disabled, local TP is write enabled and remote TP is NIL 2. Unplanned failover (source down) - - (Source side) source is down. remote policy is still disabled and remote is write enabled - - (Target side) source is down. local policy is still disabled and local is write enabled + - (Source side) source is down. remote policy is NIL and remote TP is write enabled + - (Target side) source is down. local policy is NIL and local TP is write enabled 3. Unplanned failover but source is up now - - (Source side) If the local policy is enabled, remote policy is disabled, local is write enabled and remote is write enabled - - (Target side) If the local policy is disabled, remote policy is enabled, local is write enabled and remote is write enabled + - (Source side) If the local policy is enabled, remote policy is NIL, local TP is NIL and remote TP is write enabled + - (Target side) If the local policy is NIL, remote policy is enabled, local TP is write enabled and remote TP is NIL */ func getGroupLinkState(localP isi.Policy, localTP isi.TargetPolicy, remoteP isi.Policy, remoteTP isi.TargetPolicy, isSyncInProgress bool) csiext.StorageProtectionGroupStatus_State { var state csiext.StorageProtectionGroupStatus_State - if isSyncInProgress { // sync-in-progress state - state = csiext.StorageProtectionGroupStatus_SYNC_IN_PROGRESS - } else if (localP == nil && remoteP != nil && !remoteP.Enabled && localTP == nil && remoteTP != nil && remoteTP.FailoverFailbackState == "writes_enabled") || // unplanned failover & source down - source side - (localP != nil && !localP.Enabled && remoteP == nil && localTP != nil && localTP.FailoverFailbackState == "writes_enabled" && remoteTP == nil) { // target side + if (localP != nil && localP.Enabled && remoteP == nil && localTP == nil && remoteTP != nil && remoteTP.FailoverFailbackState == WritesDisabled) || // Synchronized state - source side + (localP == nil && remoteP != nil && remoteP.Enabled && localTP != nil && localTP.FailoverFailbackState == WritesDisabled && remoteTP == nil) { // target side + state = csiext.StorageProtectionGroupStatus_SYNCHRONIZED + } else if (localP != nil && !localP.Enabled && remoteP == nil && localTP == nil && remoteTP != nil && remoteTP.FailoverFailbackState == WritesDisabled) || // Suspended state - source side + (localP == nil && remoteP != nil && !remoteP.Enabled && localTP != nil && localTP.FailoverFailbackState == WritesDisabled && remoteTP == nil) { // target side + state = csiext.StorageProtectionGroupStatus_SUSPENDED + } else if (localP != nil && !localP.Enabled && remoteP == nil && localTP == nil && remoteTP != nil && remoteTP.FailoverFailbackState == WritesEnabled) || // planned failover - source side + (localP == nil && remoteP != nil && !remoteP.Enabled && localTP != nil && localTP.FailoverFailbackState == WritesEnabled && remoteTP == nil) { // target side state = csiext.StorageProtectionGroupStatus_FAILEDOVER - } else if localP == nil || remoteP == nil || localTP == nil || remoteTP == nil { // both arrays should be up - unexpected case - state = csiext.StorageProtectionGroupStatus_UNKNOWN - } else if (localP.Enabled && !remoteP.Enabled && localTP.FailoverFailbackState == "writes_enabled" && remoteTP.FailoverFailbackState == "writes_enabled") || // unplanned failover & source up now - source side - (!localP.Enabled && remoteP.Enabled && localTP.FailoverFailbackState == "writes_enabled" && remoteTP.FailoverFailbackState == "writes_enabled") { // target side + } else if localP == nil && remoteP == nil && localTP == nil && remoteTP != nil && remoteTP.FailoverFailbackState == WritesEnabled { // unplanned failover & source down - source side + state = csiext.StorageProtectionGroupStatus_UNKNOWN // report UNKNOWN and maintain isSource when source is down on failedover + } else if localP == nil && remoteP == nil && localTP != nil && localTP.FailoverFailbackState == WritesEnabled && remoteTP == nil { // unplanned failover & source down - target side state = csiext.StorageProtectionGroupStatus_FAILEDOVER - } else if !localP.Enabled && !remoteP.Enabled && localTP.FailoverFailbackState == "writes_enabled" && remoteTP.FailoverFailbackState == "writes_enabled" { // planned failover - source OR target side + } else if (localP != nil && localP.Enabled && remoteP == nil && localTP == nil && remoteTP != nil && remoteTP.FailoverFailbackState == WritesEnabled) || // unplanned failover & source up now - source side + (localP == nil && remoteP != nil && remoteP.Enabled && localTP != nil && localTP.FailoverFailbackState == WritesEnabled && remoteTP == nil) { // target side state = csiext.StorageProtectionGroupStatus_FAILEDOVER - } else if (localP.Enabled && !remoteP.Enabled && localTP.FailoverFailbackState == "writes_enabled" && remoteTP.FailoverFailbackState == "writes_disabled") || // Synchronized state - source side - (!localP.Enabled && remoteP.Enabled && localTP.FailoverFailbackState == "writes_disabled" && remoteTP.FailoverFailbackState == "writes_enabled") { // target side - state = csiext.StorageProtectionGroupStatus_SYNCHRONIZED - } else if (!localP.Enabled && !remoteP.Enabled && localTP.FailoverFailbackState == "writes_enabled" && remoteTP.FailoverFailbackState == "writes_disabled") || // Suspended state - source side - (!localP.Enabled && !remoteP.Enabled && localTP.FailoverFailbackState == "writes_disabled" && remoteTP.FailoverFailbackState == "writes_enabled") { // target side - state = csiext.StorageProtectionGroupStatus_SUSPENDED - } else if localTP.LastJobState == "failed" || remoteTP.LastJobState == "failed" { // invalid state, sync job failed + } else if isSyncInProgress { // sync-in-progress state + state = csiext.StorageProtectionGroupStatus_SYNC_IN_PROGRESS + } else if (remoteTP != nil && remoteTP.LastJobState == "failed") || (localTP != nil && localTP.LastJobState == "failed") { // invalid state, sync job failed state = csiext.StorageProtectionGroupStatus_INVALID } else { // unknown state state = csiext.StorageProtectionGroupStatus_UNKNOWN @@ -754,3 +929,20 @@ func getGroupLinkState(localP isi.Policy, localTP isi.TargetPolicy, remoteP isi. return state } + +func getRpoInt(vgName string) int { + s := strings.Split(vgName, "-") // split by "_" and get last part -- it would be RPO + rpo := s[len(s)-1] + + rpoEnum := RPOEnum(rpo) + if err := rpoEnum.IsValid(); err != nil { + return -1 + } + + rpoInt, err := rpoEnum.ToInt() + if err != nil { + return -1 + } + + return rpoInt +} diff --git a/service/step_defs_test.go b/service/step_defs_test.go index 0771028d..2c9bd28f 100644 --- a/service/step_defs_test.go +++ b/service/step_defs_test.go @@ -1,7 +1,7 @@ package service /* - Copyright (c) 2019-2022 Dell Inc, or its subsidiaries. + Copyright (c) 2019-2023 Dell Inc, or its subsidiaries. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -393,10 +393,13 @@ func FeatureContext(s *godog.Suite) { s.Step(`^a valid ExecuteActionResponse is returned$`, f.aValidExecuteActionResponseIsReturned) s.Step(`I call FailoverExecuteAction`, f.iCallExecuteActionSyncFailover) s.Step(`I call FailoverUnplannedExecuteAction`, f.iCallExecuteActionSyncFailoverUnplanned) + s.Step(`I call FailbackExecuteAction`, f.iCallExecuteActionFailback) + s.Step(`I call FailbackDiscardExecuteAction`, f.iCallExecuteActionFailbackDiscard) s.Step(`I call BadExecuteAction`, f.iCallExecuteActionBad) s.Step(`^I call BadCreateRemoteVolume`, f.iCallCreateRemoteVolumeBad) s.Step(`^I call BadCreateStorageProtectionGroup`, f.iCallCreateStorageProtectionGroupBad) - //s.Step(`I call ExecuteActionWithParams "([^"]*)" "([^"]*)" "([^"]*)" "([^"]*)" "([^"]*)" "([^"]*)"$`, f.iCallExecuteActionWithParams) + s.Step(`I call ExecuteActionFailBackWithParams to "([^"]*)" to "([^"]*)" to "([^"]*)" to "([^"]*)" to "([^"]*)" to "([^"]*)"$`, f.iCallExecuteActionFailbackWithParams) + s.Step(`I call ExecuteActionFailBackDiscardWithParams to "([^"]*)" to "([^"]*)" to "([^"]*)" to "([^"]*)" to "([^"]*)" to "([^"]*)"$`, f.iCallExecuteActionFailbackDiscardWithParams) s.Step(`^I call GetReplicationCapabilities`, f.iCallGetReplicationCapabilities) s.Step(`^a valid GetReplicationCapabilitiesResponse is returned$`, f.aValidGetReplicationCapabilitiesResponseIsReturned) s.Step(`^I call ValidateConnectivity$`, f.iCallValidateVolumeHostConnectivity) @@ -410,6 +413,7 @@ func FeatureContext(s *godog.Suite) { s.Step(`^I call GetSubDirectoryCount`, f.iCallGetSubDirectoryCount) s.Step(`^I call DeleteSnapshot`, f.iCallDeleteSnapshotIsiService) s.Step(`^I call CreateVolumeRequest$`, f.iCallCreateVolumeReplicationEnabled) + s.Step(`^I call CreateVolumeRequestWithReplicationParams "([^"]*)" "([^"]*)" "([^"]*)"$`, f.iCallCreateVolumeReplicationEnabledWithParams) s.Step(`^I call CreateVolumeFromSnapshotMultiReader "([^"]*)" "([^"]*)"$`, f.iCallCreateVolumeFromSnapshotMultiReader) s.Step(`^a valid DeleteSnapshotResponse is returned$`, f.aValidDeleteSnapshotResponseIsReturned) s.Step(`^I set mode to "([^"]*)"$`, f.iSetModeTo) @@ -869,12 +873,16 @@ func (f *feature) iInduceError(errtype string) error { stepHandlersErrors.DeletePolicyInternalError = true case "DeletePolicyNotAPIError": stepHandlersErrors.DeletePolicyNotAPIError = true + case "CreatePolicyError": + stepHandlersErrors.CreatePolicyError = true case "FailedStatus": stepHandlersErrors.FailedStatus = true case "UnknownStatus": stepHandlersErrors.UnknownStatus = true case "UpdatePolicyError": stepHandlersErrors.UpdatePolicyError = true + case "ModifyPolicyError": + stepHandlersErrors.ModifyPolicyError = true case "Reprotect": stepHandlersErrors.Reprotect = true case "ReprotectTP": @@ -887,6 +895,8 @@ func (f *feature) iInduceError(errtype string) error { stepHandlersErrors.FailoverTP = true case "Jobs": stepHandlersErrors.Jobs = true + case "RunningJob": + stepHandlersErrors.RunningJob = true case "GetSpgErrors": stepHandlersErrors.GetSpgErrors = true case "GetSpgTPErrors": @@ -1113,17 +1123,21 @@ func clearErrors() { stepHandlersErrors.getPolicyTPCount = 0 stepHandlersErrors.getPolicyInternalErrorTPCount = 0 stepHandlersErrors.getPolicyNotFoundTPCount = 0 + stepHandlersErrors.ModifyPolicyCount = 0 stepHandlersErrors.DeletePolicyError = false stepHandlersErrors.DeletePolicyInternalError = false stepHandlersErrors.DeletePolicyNotAPIError = false + stepHandlersErrors.CreatePolicyError = false stepHandlersErrors.FailedStatus = false stepHandlersErrors.UnknownStatus = false stepHandlersErrors.UpdatePolicyError = false + stepHandlersErrors.ModifyPolicyError = false stepHandlersErrors.Reprotect = false stepHandlersErrors.ReprotectTP = false stepHandlersErrors.Failover = false stepHandlersErrors.FailoverTP = false stepHandlersErrors.Jobs = false + stepHandlersErrors.RunningJob = false stepHandlersErrors.GetPolicyError = false stepHandlersErrors.GetSpgErrors = false stepHandlersErrors.GetSpgTPErrors = false @@ -2977,8 +2991,8 @@ func executeActionRequestFailoverUnplanned(s *service) *csiext.ExecuteActionRequ return req } -func (f *feature) iCallExecuteActionSyncFailover() error { - req := executeActionRequestFailover(f.service) +func (f *feature) iCallExecuteActionFailback() error { + req := executeActionRequestFailback(f.service) f.executeActionRequest = req f.executeActionResponse, f.err = f.service.ExecuteAction(context.Background(), req) if f.err != nil { @@ -2987,15 +3001,45 @@ func (f *feature) iCallExecuteActionSyncFailover() error { return nil } -func executeActionRequestWithParams(s *service, systemName, clusterNameOne, clusterNameTwo, remoteSystemName, vgname, ppname string) *csiext.ExecuteActionRequest { +func executeActionRequestFailback(s *service) *csiext.ExecuteActionRequest { action := &csiext.Action{ - ActionTypes: csiext.ActionTypes_RESUME, + ActionTypes: csiext.ActionTypes_FAILBACK_LOCAL, } params := map[string]string{ - //s.opts.replicationContextPrefix + systemName: clusterNameOne, - //s.opts.replicationContextPrefix + remoteSystemName: clusterNameTwo, - //s.opts.replicationContextPrefix + vgname: ppname, + s.opts.replicationContextPrefix + "systemName": "cluster1", + s.opts.replicationContextPrefix + "remoteSystemName": "cluster1", + s.opts.replicationContextPrefix + "VolumeGroupName": "csi-prov-test-19743d82-192-168-111-25-Five_Minutes", + } + req := &csiext.ExecuteActionRequest{ + ActionId: "", + ProtectionGroupId: "", + ActionTypes: &csiext.ExecuteActionRequest_Action{Action: action}, + ProtectionGroupAttributes: params, + RemoteProtectionGroupId: "", + RemoteProtectionGroupAttributes: nil, + } + + return req +} + +func (f *feature) iCallExecuteActionFailbackDiscard() error { + req := executeActionRequestFailbackDiscard(f.service) + f.executeActionRequest = req + f.executeActionResponse, f.err = f.service.ExecuteAction(context.Background(), req) + if f.err != nil { + log.Printf("ExecuteAction call failed: %s\n", f.err.Error()) + } + return nil +} +func executeActionRequestFailbackDiscard(s *service) *csiext.ExecuteActionRequest { + action := &csiext.Action{ + ActionTypes: csiext.ActionTypes_ACTION_FAILBACK_DISCARD_CHANGES_LOCAL, + } + params := map[string]string{ + s.opts.replicationContextPrefix + "systemName": "cluster1", + s.opts.replicationContextPrefix + "remoteSystemName": "cluster1", + s.opts.replicationContextPrefix + "VolumeGroupName": "csi-prov-test-19743d82-192-168-111-25-Five_Minutes", } req := &csiext.ExecuteActionRequest{ ActionId: "", @@ -3009,6 +3053,16 @@ func executeActionRequestWithParams(s *service, systemName, clusterNameOne, clus return req } +func (f *feature) iCallExecuteActionSyncFailover() error { + req := executeActionRequestFailover(f.service) + f.executeActionRequest = req + f.executeActionResponse, f.err = f.service.ExecuteAction(context.Background(), req) + if f.err != nil { + log.Printf("ExecuteAction call failed: %s\n", f.err.Error()) + } + return nil +} + func (f *feature) iCallExecuteActionBad() error { req := executeActionRequestBad(f.service) f.executeActionRequest = req @@ -3040,16 +3094,50 @@ func executeActionRequestBad(s *service) *csiext.ExecuteActionRequest { return req } -func (f *feature) iCallExecuteActionWithParams(s *service, systemName, clusterNameOne, clusterNameTwo, remoteSystemName, vgname, ppname string) error { - req := executeActionRequestWithParams(f.service, systemName, clusterNameOne, clusterNameTwo, remoteSystemName, vgname, ppname) +func (f *feature) iCallExecuteActionFailbackWithParams(systemName, clusterNameOne, clusterNameTwo, remoteSystemName, vgname, ppname string) error { + action := &csiext.Action{ + ActionTypes: csiext.ActionTypes_FAILBACK_LOCAL, + } + req := executeActionFailbackRequestWithParams(f.service, action, systemName, clusterNameOne, clusterNameTwo, remoteSystemName, vgname, ppname) f.executeActionRequest = req f.executeActionResponse, f.err = f.service.ExecuteAction(context.Background(), req) if f.err != nil { - log.Printf("ExecuteAction call failed: %s\n", f.err.Error()) + log.Printf("iCallExecuteActionFailbackWithParams call failed: %s\n", f.err.Error()) } return nil } +func (f *feature) iCallExecuteActionFailbackDiscardWithParams(systemName, clusterNameOne, clusterNameTwo, remoteSystemName, vgname, ppname string) error { + action := &csiext.Action{ + ActionTypes: csiext.ActionTypes_ACTION_FAILBACK_DISCARD_CHANGES_LOCAL, + } + req := executeActionFailbackRequestWithParams(f.service, action, systemName, clusterNameOne, clusterNameTwo, remoteSystemName, vgname, ppname) + f.executeActionRequest = req + f.executeActionResponse, f.err = f.service.ExecuteAction(context.Background(), req) + if f.err != nil { + log.Printf("iCallExecuteActionFailbackDiscardWithParams call failed: %s\n", f.err.Error()) + } + return nil +} + +func executeActionFailbackRequestWithParams(s *service, action *csiext.Action, systemName, clusterNameOne, clusterNameTwo, remoteSystemName, vgname, ppname string) *csiext.ExecuteActionRequest { + params := map[string]string{ + s.opts.replicationContextPrefix + systemName: clusterNameOne, + s.opts.replicationContextPrefix + remoteSystemName: clusterNameTwo, + s.opts.replicationContextPrefix + vgname: ppname, + } + req := &csiext.ExecuteActionRequest{ + ActionId: "", + ProtectionGroupId: "", + ActionTypes: &csiext.ExecuteActionRequest_Action{Action: action}, + ProtectionGroupAttributes: params, + RemoteProtectionGroupId: "", + RemoteProtectionGroupAttributes: nil, + } + + return req +} + func getCreateRemoteVolumeRequestBad(s *service) *csiext.CreateRemoteVolumeRequest { req := new(csiext.CreateRemoteVolumeRequest) req.VolumeHandle = "volume1=_=_=19=_=_=System" @@ -3131,6 +3219,10 @@ func (f *feature) aValidGetReplicationCapabilitiesResponseIsReturned() error { count = count + 1 case csiext.ActionTypes_UNPLANNED_FAILOVER_LOCAL: count = count + 1 + case csiext.ActionTypes_FAILBACK_LOCAL: + count = count + 1 + case csiext.ActionTypes_ACTION_FAILBACK_DISCARD_CHANGES_LOCAL: + count = count + 1 case csiext.ActionTypes_REPROTECT_LOCAL: count = count + 1 case csiext.ActionTypes_SUSPEND: @@ -3348,6 +3440,51 @@ func getCreatevolumeReplicationEnabled(s *service) *csi.CreateVolumeRequest { return req } +func (f *feature) iCallCreateVolumeReplicationEnabledWithParams(vgPrefix, rpo, remoteSystem string) error { + req := getCreatevolumeReplicationEnabledWithParams(f.service, vgPrefix, rpo, remoteSystem) + f.createVolumeRequestTest = req + f.createVolumeResponseTest, f.err = f.service.CreateVolume(context.Background(), req) + return nil +} + +func getCreatevolumeReplicationEnabledWithParams(s *service, vgPrefix, rpo, remoteSystem string) *csi.CreateVolumeRequest { + req := new(csi.CreateVolumeRequest) + req.Name = "volume1" + capacityRange := new(csi.CapacityRange) + capacityRange.RequiredBytes = 8 * 1024 * 1024 * 1024 + req.CapacityRange = capacityRange + mount := new(csi.VolumeCapability_MountVolume) + capability := new(csi.VolumeCapability) + accessType := new(csi.VolumeCapability_Mount) + accessType.Mount = mount + capability.AccessType = accessType + accessMode := new(csi.VolumeCapability_AccessMode) + accessMode.Mode = csi.VolumeCapability_AccessMode_MULTI_NODE_READER_ONLY + capability.AccessMode = accessMode + capabilities := make([]*csi.VolumeCapability, 0) + capabilities = append(capabilities, capability) + parameters := make(map[string]string) + parameters[AccessZoneParam] = "System" + parameters[IsiPathParam] = "/ifs/data/csi-isilon" + parameters[s.WithRP(KeyReplicationEnabled)] = "true" + if vgPrefix != "" { + parameters[s.WithRP(KeyReplicationVGPrefix)] = vgPrefix + } + parameters[s.WithRP(KeyReplicationRemoteAccessZone)] = "remoteAccessZone" + parameters[s.WithRP(KeyReplicationRemoteAzServiceIP)] = "remoteAzServiceIP" + parameters[s.WithRP(KeyReplicationRemoteRootClientEnabled)] = "remoteRootClientEnabled" + if rpo != "" { + parameters[s.WithRP(KeyReplicationRPO)] = rpo + } + if remoteSystem != "" { + parameters[s.WithRP(KeyReplicationRemoteSystem)] = remoteSystem + } + parameters[req.VolumeContentSource.String()] = "contentsource" + req.Parameters = parameters + req.VolumeCapabilities = capabilities + return req +} + func (f *feature) aValidCreateVolumeRespIsReturned() error { if f.err != nil { stepHandlersErrors.ExportNotFoundError = false diff --git a/service/step_handlers_test.go b/service/step_handlers_test.go index 3a5eab9b..77bfb31c 100644 --- a/service/step_handlers_test.go +++ b/service/step_handlers_test.go @@ -1,7 +1,7 @@ package service /* - Copyright (c) 2019-2022 Dell Inc, or its subsidiaries. + Copyright (c) 2019-2023 Dell Inc, or its subsidiaries. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -19,18 +19,20 @@ package service import ( "encoding/json" "fmt" - "github.com/dell/csi-isilon/service/mock/k8s" "io/ioutil" "net/http" "path/filepath" "strings" "time" + "github.com/dell/csi-isilon/service/mock/k8s" + + "sync" + isiapi "github.com/dell/goisilon/api" "github.com/gorilla/mux" log "github.com/sirupsen/logrus" "google.golang.org/grpc/codes" - "sync" ) var ( @@ -77,19 +79,23 @@ var ( getSpgCount int getSpgTPCount int getExportCount int + ModifyPolicyCount int GetPolicyNotFoundError bool DeletePolicyError bool DeletePolicyInternalError bool DeletePolicyNotAPIError bool + CreatePolicyError bool FailedStatus bool UnknownStatus bool UpdatePolicyError bool + ModifyPolicyError bool Reprotect bool ReprotectTP bool Failover bool FailoverTP bool GetPolicyError bool Jobs bool + RunningJob bool GetSpgErrors bool GetSpgTPErrors bool GetExportPolicyError bool @@ -174,6 +180,7 @@ func getRouter() http.Handler { isilonRouter.HandleFunc("/platform/1/snapshot/snapshots/{snapshot_id}/", handleDeleteSnapshot).Methods("DELETE") isilonRouter.HandleFunc("/platform/1/snapshot/snapshots/{snapshot_id}/", handleGetSnapshotByID).Methods("GET") isilonRouter.HandleFunc("/namespace/ifs/.snapshot/{snapshot_name}/data/csi-isilon/{volume_id}", handleGetSnapshotSize).Methods("GET").Queries("detail", "size", "max-depth", "-1") + isilonRouter.HandleFunc("/platform/11/sync/policies/", handleCreatePolicy).Methods("POST") isilonRouter.HandleFunc("/platform/11/sync/policies/", handleGetPoliciesByName).Methods("GET") isilonRouter.HandleFunc("/platform/11/sync/policies/{id}", handleGetPoliciesByName).Methods("GET") isilonRouter.HandleFunc("/platform/11/sync/policies/{name}", handleGetPoliciesByName).Methods("GET") @@ -732,7 +739,7 @@ func handleGetPoliciesByName(w http.ResponseWriter, r *http.Request) { writeError(w, "", http.StatusNotFound, codes.NotFound) } } - if stepHandlersErrors.UpdatePolicyError { + if stepHandlersErrors.UpdatePolicyError || stepHandlersErrors.ModifyPolicyError { w.Write(readFromFile("mock/policy/get_policies.txt")) } @@ -776,6 +783,9 @@ func handleGetJobs(w http.ResponseWriter, r *http.Request) { writeError(w, "", http.StatusInternalServerError, codes.Internal) } } + if stepHandlersErrors.RunningJob { + w.Write(readFromFile("mock/jobs/running.json")) + } w.Write(readFromFile("mock/jobs/empty.json")) } @@ -910,6 +920,14 @@ func handleGetTargetPoliciesByName(w http.ResponseWriter, r *http.Request) { w.Write(readFromFile("mock/policy/empty.txt")) } + if stepHandlersErrors.DeletePolicyError { + w.Write(readFromFile("mock/policy/get_target_policies2.txt")) + } + + if stepHandlersErrors.CreatePolicyError { + w.Write(readFromFile("mock/policy/get_target_policies2.txt")) + } + defer func() { stepHandlersErrors.count++ }() @@ -920,11 +938,32 @@ func handleGetTargetPoliciesByName(w http.ResponseWriter, r *http.Request) { } } +func handleCreatePolicy(w http.ResponseWriter, r *http.Request) { + if stepHandlersErrors.CreatePolicyError { + writeError(w, "", http.StatusNotFound, codes.Internal) + return + } + + w.Write(readFromFile("mock/policy/get_policies.txt")) +} + func handleUpdatePolicy(w http.ResponseWriter, r *http.Request) { if stepHandlersErrors.UpdatePolicyError { w.WriteHeader(http.StatusInternalServerError) return } + if stepHandlersErrors.ModifyPolicyError { + defer func() { + stepHandlersErrors.ModifyPolicyCount++ + }() + if stepHandlersErrors.ModifyPolicyCount%2 == 0 { + w.Write(readFromFile("mock/policy/get_policies2.txt")) + } else { + w.WriteHeader(http.StatusInternalServerError) + return + } + } + w.Write(readFromFile("mock/policy/get_policies2.txt")) }