kubernetes-sigs · ConnorJC3 · Jan 24, 2023 · Jan 24, 2023 · Jan 24, 2023 · Feb 3, 2023
diff --git a/charts/aws-ebs-csi-driver/templates/controller.yaml b/charts/aws-ebs-csi-driver/templates/controller.yaml
@@ -87,6 +87,9 @@ spec:
             {{- if .Values.controller.sdkDebugLog }}
             - --aws-sdk-debug-log=true
             {{- end}}
+            {{- if .Values.controller.volumeHealthMonitoring }}
+            - --volume-health-monitoring=true
+            {{- end}}
             {{- with .Values.controller.loggingFormat }}
             - --logging-format={{ . }}
             {{- end }}
@@ -324,6 +327,42 @@ spec:
           securityContext:
             {{- toYaml . | nindent 12 }}
           {{- end }}
+        {{- if .Values.controller.volumeHealthMonitoring }}
+        - name: csi-health-monitor
+          image: {{ printf "%s%s:%s" (default "" .Values.image.containerRegistry) .Values.sidecars.healthMonitor.image.repository .Values.sidecars.healthMonitor.image.tag }}
+          imagePullPolicy: {{ default .Values.image.pullPolicy .Values.sidecars.healthMonitor.image.pullPolicy }}
+          args:
+            - --csi-address=$(ADDRESS)
+            - --leader-election=true
+            - --v={{ .Values.sidecars.healthMonitor.logLevel }}
+            - --monitor-interval=1m
+            - --list-volumes-interval=1m
+            - --worker-threads=1
+          env:
+            - name: ADDRESS
+              value: /var/lib/csi/sockets/pluginproxy/csi.sock
+            {{- if .Values.proxy.http_proxy }}
+            {{- include "aws-ebs-csi-driver.http-proxy" . | nindent 12 }}
+            {{- end }}
+            {{- with .Values.sidecars.healthMonitor.env }}
+            {{- . | toYaml | nindent 12 }}
+            {{- end }}
+          envFrom:
+            {{- with .Values.controller.envFrom }}
+            {{- . | toYaml | nindent 12 }}
+            {{- end }}
+          volumeMounts:
+            - name: socket-dir
+              mountPath: /var/lib/csi/sockets/pluginproxy/
+          {{- with default .Values.controller.resources .Values.sidecars.healthMonitor.resources }}
+          resources:
+            {{- toYaml . | nindent 12 }}
+          {{- end }}
+          {{- with .Values.sidecars.healthMonitor.securityContext }}
+          securityContext:
+            {{- toYaml . | nindent 12 }}
+          {{- end }}
+        {{- end }}
         - name: liveness-probe
           image: {{ printf "%s%s:%s" (default "" .Values.image.containerRegistry) .Values.sidecars.livenessProbe.image.repository .Values.sidecars.livenessProbe.image.tag }}
           imagePullPolicy: {{ default .Values.image.pullPolicy .Values.sidecars.livenessProbe.image.pullPolicy }}

diff --git a/charts/aws-ebs-csi-driver/values.yaml b/charts/aws-ebs-csi-driver/values.yaml
@@ -89,6 +89,17 @@ sidecars:
     securityContext:
       readOnlyRootFilesystem: true
       allowPrivilegeEscalation: false
+  healthMonitor:
+    env: []
+    image:
+      pullPolicy: IfNotPresent
+      repository: registry.k8s.io/sig-storage/csi-external-health-monitor-controller
+      tag: "v0.8.0"
+    logLevel: 2
+    resources: {}
+    securityContext:
+      readOnlyRootFilesystem: true
+      allowPrivilegeEscalation: false
   nodeDriverRegistrar:
     env: []
     image:
@@ -113,6 +124,10 @@ controller:
   additionalArgs: []
   sdkDebugLog: false
   loggingFormat: text
+  # EXPERIMENTAL
+  # Enable support for Volume Health Monitoring (https://kubernetes.io/docs/concepts/storage/volume-health-monitoring/)
+  # Read `docs/volume-health-monitoring.md` before enabling!
+  volumeHealthMonitoring: false
   affinity:
     nodeAffinity:
       preferredDuringSchedulingIgnoredDuringExecution:

diff --git a/cmd/main.go b/cmd/main.go
@@ -61,6 +61,7 @@ func main() {
 		driver.WithKubernetesClusterID(options.ControllerOptions.KubernetesClusterID),
 		driver.WithAwsSdkDebugLog(options.ControllerOptions.AwsSdkDebugLog),
 		driver.WithWarnOnInvalidTag(options.ControllerOptions.WarnOnInvalidTag),
+		driver.WithVolumeHealthMonitoring(options.ControllerOptions.VolumeHealthMonitoring),
 	)
 	if err != nil {
 		klog.ErrorS(err, "failed to create driver")

diff --git a/cmd/options/controller_options.go b/cmd/options/controller_options.go
@@ -37,6 +37,8 @@ type ControllerOptions struct {
 	AwsSdkDebugLog bool
 	// flag to warn on invalid tag, instead of returning an error
 	WarnOnInvalidTag bool
+	// flag to enable Volume Health Monitoring support
+	VolumeHealthMonitoring bool
 }
 
 func (s *ControllerOptions) AddFlags(fs *flag.FlagSet) {
@@ -45,4 +47,5 @@ func (s *ControllerOptions) AddFlags(fs *flag.FlagSet) {
 	fs.StringVar(&s.KubernetesClusterID, "k8s-tag-cluster-id", "", "ID of the Kubernetes cluster used for tagging provisioned EBS volumes (optional).")
 	fs.BoolVar(&s.AwsSdkDebugLog, "aws-sdk-debug-log", false, "To enable the aws sdk debug log level (default to false).")
 	fs.BoolVar(&s.WarnOnInvalidTag, "warn-on-invalid-tag", false, "To warn on invalid tags, instead of returning an error")
+	fs.BoolVar(&s.VolumeHealthMonitoring, "volume-health-monitoring", false, "To enable experimental Volume Health Monitoring support.")
 }
diff --git a/cmd/options/controller_options_test.go b/cmd/options/controller_options_test.go
@@ -43,6 +43,11 @@ func TestControllerOptions(t *testing.T) {
 			flag:  "aws-sdk-debug-log",
 			found: true,
 		},
+		{
+			name:  "lookup volume-health-monitoring",
+			flag:  "volume-health-monitoring",
+			found: true,
+		},
 		{
 			name:  "fail for non-desired flag",
 			flag:  "some-other-flag",

diff --git a/docs/volume-health-monitoring.md b/docs/volume-health-monitoring.md
@@ -0,0 +1,38 @@
+# Volume Health Monitoring
+
+The EBS CSI Driver provides experimental support for [Volume Health Monitoring](https://kubernetes.io/docs/concepts/storage/volume-health-monitoring/). Because Volume Health Monitoring is an alpha feature, this support is subject to change as the feature evolves.
+
+When this feature is enabled, the EBS CSI Driver will use [`DescribeVolumeStatus`](https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_DescribeVolumeStatus.html) to determine the status of volumes and report abnormal volumes to the Container Orchestrator.
+
+## Prerequisites
+
+If using Kubernetes, Volume Health Monitoring requires Kubernetes 1.21 or later. For other Container Orchestrators, refer to their respective documentation for Volume Health Monitoring support.
+
+The EBS CSI Driver must be given permission to access the [`DescribeVolumeStatus` EC2 API](https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_DescribeVolumeStatus.html) for the volumes in the cluster. This example snippet can be used in an IAM policy to grant access to `DescribeVolumeStatus` on all volumes:
+
+```json
+{
+  "Effect": "Allow",
+  "Action": [
+    "ec2:DescribeVolumeStatus"
+  ],
+  "Resource": "*"
+}
+
+```
+
+The controller must be passed the CLI flag `--volume-health-monitoring=true` (exposed via the Helm parameter `controller.volumeHealthMonitoring`).
+
+Additionally, the controller needs to run with the [External Health Monitor sidecar](https://github.com/kubernetes-csi/external-health-monitor) (this will be performed automatically when using Helm with `controller.volumeHealthMonitoring`).
+
+## `ListVolumes` Support
+
+In the default configuration, the EBS CSI Driver supports Volume Health Monitoring only using `ControllerGetVolume`. This will result in 2 AWS API calls per volume (for `DescribeVolumes` and `DescribeVolumeStatus`). The [`ListVolumes`](https://github.com/container-storage-interface/spec/blob/master/spec.md#listvolumes) CSI RPC call can be used to batch up to 500 volumes with the same 2 AWS API calls.
+
+The EBS CSI Driver can only support `ListVolumes` if the volumes are marked on a per-cluster basis. `ListVolumes` support will be automatically enabled if the EBS CSI Driver is launched using the CLI parameter `--k8s-tag-cluster-id`. This parameter is available using the Helm parameter `controller.k8sTagClusterId`, and is automatically set based on the EKS cluster name when installing the driver as an EKS-managed addon.
+
+## Operation
+
+The EBS CSI Driver will report any volumes with `impaired` status as abnormal. This will record a `VolumeConditionAbnormal` event on the `PersistenVolumeClaim` associated with the abnormal volume. This event will include the most recent message from the AWS API about the volume's status.
+
+At this time, no other action will be taken on abnormal volumes. The cluster administrator and/or users should monitor `PersistentVolumeClaim`s for `VolumeConditionAbnormal` and take manual steps to rectify volumes.
diff --git a/pkg/cloud/cloud.go b/pkg/cloud/cloud.go
@@ -60,6 +60,20 @@ const (
 	VolumeTypeStandard = "standard"
 )
 
+// Constants from pkg/driver/constants.go
+// You MUST also change them there!
+const (
+	// ResourceLifecycleTagPrefix is prefix of tag for provisioned EBS volume that
+	// marks them as owned by the cluster. Used only when --cluster-id is set.
+	ResourceLifecycleTagPrefix = "kubernetes.io/cluster/"
+
+	// ResourceLifecycleOwned is the value we use when tagging resources to indicate
+	// that the resource is considered owned and managed by the cluster,
+	// and in particular that the lifecycle is tied to the lifecycle of the cluster.
+	// From k8s.io/legacy-cloud-providers/aws/tags.go.
+	ResourceLifecycleOwned = "owned"
+)
+
 // AWS provisioning limits.
 // Source: http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/EBSVolumeTypes.html
 const (
@@ -132,6 +146,8 @@ const (
 	KubernetesTagKeyPrefix = "kubernetes.io"
 	// AWSTagKeyPrefix is the prefix of the key value that is reserved for AWS.
 	AWSTagKeyPrefix = "aws:"
+	// FilterTagPrefix is the prefix of a filter to find by tag
+	FilterTagPrefix = "tag:"
 	//AwsEbsDriverTagKey is the tag to identify if a volume/snapshot is managed by ebs csi driver
 	AwsEbsDriverTagKey = "ebs.csi.aws.com/cluster"
 )
@@ -166,6 +182,9 @@ var (
 
 	// VolumeNotBeingModified is returned if volume being described is not being modified
 	VolumeNotBeingModified = fmt.Errorf("volume is not being modified")
+
+	// ErrInternal is an error that is returned when "impossible" events occur.
+	ErrInternal = errors.New("Internal error (please submit a bug report)")
 )
 
 // Set during build time via -ldflags
@@ -200,6 +219,13 @@ type DiskOptions struct {
 	SnapshotID string
 }
 
+// DiskStatus contains information about an EBS volume's status
+type DiskStatus struct {
+	Status string
+	// The description of the most recent event (or blank if no events), regardless of status
+	Description string
+}
+
 // Snapshot represents an EBS volume snapshot
 type Snapshot struct {
 	SnapshotID     string
@@ -694,13 +720,7 @@ func (c *cloud) GetDiskByName(ctx context.Context, name string, capacityBytes in
 		return nil, ErrDiskExistsDiffSize
 	}
 
-	return &Disk{
-		VolumeID:         aws.StringValue(volume.VolumeId),
-		CapacityGiB:      volSizeBytes,
-		AvailabilityZone: aws.StringValue(volume.AvailabilityZone),
-		SnapshotID:       aws.StringValue(volume.SnapshotId),
-		OutpostArn:       aws.StringValue(volume.OutpostArn),
-	}, nil
+	return volumeToDisk(volume), nil
 }
 
 func (c *cloud) GetDiskByID(ctx context.Context, volumeID string) (*Disk, error) {
@@ -716,13 +736,90 @@ func (c *cloud) GetDiskByID(ctx context.Context, volumeID string) (*Disk, error)
 		return nil, err
 	}
 
-	return &Disk{
-		VolumeID:         aws.StringValue(volume.VolumeId),
-		CapacityGiB:      aws.Int64Value(volume.Size),
-		AvailabilityZone: aws.StringValue(volume.AvailabilityZone),
-		OutpostArn:       aws.StringValue(volume.OutpostArn),
-		Attachments:      getVolumeAttachmentsList(volume),
-	}, nil
+	return volumeToDisk(volume), nil
+}
+
+func (c *cloud) GetDiskStatusByID(ctx context.Context, volumeID string) (*DiskStatus, error) {
+	request := &ec2.DescribeVolumeStatusInput{
+		VolumeIds: []*string{
+			aws.String(volumeID),
+		},
+	}
+
+	response, err := c.ec2.DescribeVolumeStatusWithContext(ctx, request)
+	if err != nil {
+		return nil, err
+	}
+
+	if l := len(response.VolumeStatuses); l > 1 {
+		return nil, ErrMultiDisks
+	} else if l < 1 {
+		return nil, ErrNotFound
+	}
+
+	statusItem := response.VolumeStatuses[0]
+	if *statusItem.VolumeId != volumeID {
+		return nil, ErrInternal
+	}
+
+	return volumeStatusToDiskStatus(statusItem), nil
+}
+
+func (c *cloud) ListDisks(ctx context.Context, clusterTag string, maxResults int64, token string) (disks []*Disk, nextToken string, err error) {
+	var requestToken *string
+
+	if token != "" {
+		requestToken = &token
+	}
+
+	request := &ec2.DescribeVolumesInput{
+		Filters: []*ec2.Filter{
+			{
+				Name: aws.String(FilterTagPrefix + ResourceLifecycleTagPrefix + clusterTag),
+				Values: []*string{
+					aws.String(ResourceLifecycleOwned),
+				},
+			},
+		},
+		MaxResults: aws.Int64(maxResults),
+		NextToken:  requestToken,
+	}
+
+	response, err := c.ec2.DescribeVolumesWithContext(ctx, request)
+
+	if err != nil {
+		return nil, "", err
+	}
+
+	for _, volume := range response.Volumes {
+		disks = append(disks, volumeToDisk(volume))
+	}
+
+	nextToken = ""
+	if response.NextToken != nil {
+		nextToken = *response.NextToken
+	}
+
+	return disks, nextToken, nil
+}
+
+func (c *cloud) ListDiskStatus(ctx context.Context, volumeIDs []*string) (diskStatus map[string]*DiskStatus, err error) {
+	request := &ec2.DescribeVolumeStatusInput{
+		VolumeIds: volumeIDs,
+	}
+
+	response, err := c.ec2.DescribeVolumeStatusWithContext(ctx, request)
+
+	if err != nil {
+		return nil, err
+	}
+
+	diskStatus = make(map[string]*DiskStatus)
+	for _, statusItem := range response.VolumeStatuses {
+		diskStatus[*statusItem.VolumeId] = volumeStatusToDiskStatus(statusItem)
+	}
+
+	return diskStatus, nil
 }
 
 func (c *cloud) IsExistInstance(ctx context.Context, nodeID string) bool {
@@ -1282,3 +1379,30 @@ func capIOPS(volumeType string, requestedCapacityGiB int64, requestedIops int64,
 	}
 	return iops, nil
 }
+
+func volumeToDisk(volume *ec2.Volume) *Disk {
+	return &Disk{
+		VolumeID:         aws.StringValue(volume.VolumeId),
+		CapacityGiB:      aws.Int64Value(volume.Size),
+		AvailabilityZone: aws.StringValue(volume.AvailabilityZone),
+		OutpostArn:       aws.StringValue(volume.OutpostArn),
+		Attachments:      getVolumeAttachmentsList(volume),
+	}
+}
+
+func volumeStatusToDiskStatus(statusItem *ec2.VolumeStatusItem) *DiskStatus {
+
+	statusInfo := statusItem.VolumeStatus
+
+	status := *statusInfo.Status
+	description := ""
+
+	if len(statusItem.Events) > 0 {
+		description = *statusItem.Events[0].Description
+	}
+
+	return &DiskStatus{
+		Status:      status,
+		Description: description,
+	}
+}
diff --git a/pkg/cloud/cloud_interface.go b/pkg/cloud/cloud_interface.go
@@ -15,6 +15,9 @@ type Cloud interface {
 	WaitForAttachmentState(ctx context.Context, volumeID, expectedState string, expectedInstance string, expectedDevice string, alreadyAssigned bool) (*ec2.VolumeAttachment, error)
 	GetDiskByName(ctx context.Context, name string, capacityBytes int64) (disk *Disk, err error)
 	GetDiskByID(ctx context.Context, volumeID string) (disk *Disk, err error)
+	GetDiskStatusByID(ctx context.Context, volumeID string) (diskStatus *DiskStatus, err error)
+	ListDisks(ctx context.Context, clusterTag string, maxResults int64, token string) (disks []*Disk, nextToken string, err error)
+	ListDiskStatus(ctx context.Context, volumeIDs []*string) (diskStatus map[string]*DiskStatus, err error)
 	IsExistInstance(ctx context.Context, nodeID string) (success bool)
 	CreateSnapshot(ctx context.Context, volumeID string, snapshotOptions *SnapshotOptions) (snapshot *Snapshot, err error)
 	DeleteSnapshot(ctx context.Context, snapshotID string) (success bool, err error)