Skip to content

Commit

Permalink
Add watch dog as subprocess of the driver
Browse files Browse the repository at this point in the history
Add sub reaper to wait for stunnel processes
  • Loading branch information
Cheng Pan committed Dec 29, 2019
1 parent 906fda6 commit ad65d3f
Show file tree
Hide file tree
Showing 10 changed files with 313 additions and 6 deletions.
19 changes: 19 additions & 0 deletions Dockerfile.dev
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Copyright 2019 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
FROM amazonlinux:2
RUN yum install util-linux amazon-efs-utils -y
COPY bin/aws-efs-csi-driver /bin/aws-efs-csi-driver
COPY THIRD-PARTY /

ENTRYPOINT ["/bin/aws-efs-csi-driver"]
1 change: 1 addition & 0 deletions deploy/kubernetes/base/node.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ spec:
operator: Exists
containers:
- name: efs-plugin
imagePullPolicy: Always
securityContext:
privileged: true
image: amazon/aws-efs-csi-driver:latest
Expand Down
4 changes: 4 additions & 0 deletions examples/kubernetes/volume_path/specs/example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ spec:
- ReadWriteMany
persistentVolumeReclaimPolicy: Retain
storageClassName: efs-sc
mountOptions:
- tls
csi:
driver: efs.csi.aws.com
volumeHandle: fs-e8a95a42:/dir1
Expand Down Expand Up @@ -44,6 +46,8 @@ spec:
- ReadWriteMany
persistentVolumeReclaimPolicy: Retain
storageClassName: efs-sc
mountOptions:
- tls
csi:
driver: efs.csi.aws.com
volumeHandle: fs-e8a95a42:/dir2
Expand Down
1 change: 1 addition & 0 deletions hack/update-gomock
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,5 @@ set -euo pipefail

IMPORT_PATH=github.com/kubernetes-sigs/aws-efs-csi-driver
mockgen -package=mocks -destination=./pkg/driver/mocks/mock_mount.go ${IMPORT_PATH}/pkg/driver Mounter
mockgen -package=mocks -destination=./pkg/driver/mocks/mock_watchdog.go ${IMPORT_PATH}/pkg/driver Watchdog
mockgen -package=mocks -destination=./pkg/cloud/mocks/mock_ec2metadata.go ${IMPORT_PATH}/pkg/cloud EC2Metadata
17 changes: 14 additions & 3 deletions pkg/driver/driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ type Driver struct {
srv *grpc.Server

mounter Mounter

efsWatchdog Watchdog
}

func NewDriver(endpoint string) *Driver {
Expand All @@ -46,10 +48,12 @@ func NewDriver(endpoint string) *Driver {
klog.Fatalln(err)
}

watchdog := newExecWatchdog("amazon-efs-mount-watchdog")
return &Driver{
endpoint: endpoint,
nodeID: cloud.GetMetadata().GetInstanceID(),
mounter: newNodeMounter(),
endpoint: endpoint,
nodeID: cloud.GetMetadata().GetInstanceID(),
mounter: newNodeMounter(),
efsWatchdog: watchdog,
}
}

Expand Down Expand Up @@ -79,6 +83,13 @@ func (d *Driver) Run() error {
csi.RegisterIdentityServer(d.srv, d)
csi.RegisterNodeServer(d.srv, d)

klog.Info("Starting watch dog")
d.efsWatchdog.start()

reaper := newReaper()
klog.Info("Staring subreaper")
reaper.start()

klog.Infof("Listening for connections on address: %#v", listener.Addr())
return d.srv.Serve(listener)
}
133 changes: 133 additions & 0 deletions pkg/driver/efs_watch_dog.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package driver

import (
"fmt"
"os/exec"
"sync"

"k8s.io/klog"
)

// Watchdog defines the interface for process monitoring and supervising
type Watchdog interface {
// start starts the watch dog along with the process
start()

// stop stops the watch dog along with the process
stop()
}

// execWatchdog is a watch dog that monitors a process and restart it
// if it has crashed accidentally
type execWatchdog struct {
// the command to be exec and monitored
execCmd string
// the command arguments
execArg []string
// the cmd that is running
cmd *exec.Cmd
// stopCh indicates if it should be stopped
stopCh chan struct{}

mu sync.Mutex
}

func newExecWatchdog(cmd string, arg ...string) Watchdog {
return &execWatchdog{
execCmd: cmd,
execArg: arg,
stopCh: make(chan struct{}),
}
}

func (w *execWatchdog) start() {
go w.runLoop(w.stopCh)
}

// stop kills the underlying process and stops the watchdog
func (w *execWatchdog) stop() {
close(w.stopCh)

w.mu.Lock()
if w.cmd.Process != nil {
p := w.cmd.Process
err := p.Kill()
if err != nil {
klog.Errorf("Failed to kill process: %s", err)
}
}
w.mu.Unlock()
}

// runLoop starts the monitoring loop
func (w *execWatchdog) runLoop(stopCh <-chan struct{}) {
for {
select {
case <-stopCh:
klog.Info("stopping...")
break
default:
err := w.exec()
if err != nil {
klog.Errorf("Process %s exits %s", w.execCmd, err)
}
}
}
}

func (w *execWatchdog) exec() error {
cmd := exec.Command(w.execCmd, w.execArg...)
cmd.Stdout = newInfoRedirect(w.execCmd)
cmd.Stderr = newErrRedirect(w.execCmd)

w.cmd = cmd

w.mu.Lock()
err := cmd.Start()
if err != nil {
return err
}
w.mu.Unlock()

return cmd.Wait()
}

type logRedirect struct {
processName string
level string
logFunc func(string, ...interface{})
}

func newInfoRedirect(name string) *logRedirect {
return &logRedirect{
processName: name,
level: "Info",
logFunc: klog.V(4).Infof,
}
}

func newErrRedirect(name string) *logRedirect {
return &logRedirect{
processName: name,
level: "Error",
logFunc: klog.Errorf,
}
}
func (l *logRedirect) Write(p []byte) (n int, err error) {
msg := fmt.Sprintf("%s[%s]: %s", l.processName, l.level, string(p))
l.logFunc("%s", msg)
return len(msg), nil
}
26 changes: 26 additions & 0 deletions pkg/driver/efs_watch_dog_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package driver

import (
"testing"
"time"
)

func TestExecWatchdog(t *testing.T) {
w := newExecWatchdog("sleep", "300")
w.start()
time.Sleep(time.Second)
w.stop()
}
72 changes: 72 additions & 0 deletions pkg/driver/reaper.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package driver

import (
"os"
"os/signal"
"syscall"

"k8s.io/klog"
)

type reaper struct {
sigs chan os.Signal
stopCh chan struct{}
}

func newReaper() *reaper {
sigs := make(chan os.Signal, 1)
stopCh := make(chan struct{})

signal.Notify(sigs, syscall.SIGCHLD)
return &reaper{
sigs: sigs,
stopCh: stopCh,
}
}

// start starts the reaper
func (r *reaper) start() {
go r.runLoop()
}

// runLoop waits for all child processes that exit
// currently only stunnel process is created by efs mount helper
// and is inherited as the child process of the driver
func (r *reaper) runLoop() {
for {
select {
case <-r.sigs:
var status syscall.WaitStatus
var rusage syscall.Rusage
childPid, err := syscall.Wait4(-1, &status, syscall.WNOHANG, &rusage)
if err != nil {
klog.Warningf("Failed to wait for child process %s", err)
} else {
klog.V(4).Infof("Waited for child process %d", childPid)
}
case <-r.stopCh:
break
}
}
}

// stop stops the reaper
func (r *reaper) stop() {
r.stopCh <- struct{}{}
}
30 changes: 30 additions & 0 deletions pkg/driver/reaper_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package driver

import (
"testing"
"time"
)

func TestReaper(t *testing.T) {
r := newReaper()

r.start()
time.Sleep(time.Second)
r.stop()
}
16 changes: 13 additions & 3 deletions pkg/driver/sanity_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,15 @@ import (
"github.com/kubernetes-sigs/aws-efs-csi-driver/pkg/driver/mocks"
)

type mockWatchdog struct {
}

func (w *mockWatchdog) start() {
}

func (w *mockWatchdog) stop() {
}

func TestSanityEFSCSI(t *testing.T) {
// Setup the full driver and its environment
dir, err := ioutil.TempDir("", "sanity-efs-csi")
Expand All @@ -48,9 +57,10 @@ func TestSanityEFSCSI(t *testing.T) {

mockCtrl := gomock.NewController(t)
drv := Driver{
endpoint: endpoint,
nodeID: "sanity",
mounter: mocks.NewMockMounter(mockCtrl),
endpoint: endpoint,
nodeID: "sanity",
mounter: mocks.NewMockMounter(mockCtrl),
efsWatchdog: &mockWatchdog{},
}
defer func() {
if r := recover(); r != nil {
Expand Down

0 comments on commit ad65d3f

Please sign in to comment.