Skip to content

Commit

Permalink
Add csi-proxy monitoring to health checker
Browse files Browse the repository at this point in the history
  • Loading branch information
mcshooter committed May 12, 2021
1 parent 228f0f5 commit fffd841
Show file tree
Hide file tree
Showing 7 changed files with 105 additions and 9 deletions.
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,9 @@ $(NPD_NAME_VERSION)-%.tar.gz: $(ALL_BINARIES) test/e2e-install.sh
(cd output/$*/ && tar -zcvf ../../$@ *)
sha512sum $@ > $@.sha512

windows-binaries: ENABLE_JOURNALD=0
windows-binaries: $(foreach binary, $(BINARIES), output/windows_amd64/$(binary).exe)

build-binaries: $(ALL_BINARIES)

build-container: build-binaries Dockerfile
Expand Down
22 changes: 13 additions & 9 deletions cmd/healthchecker/options/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,16 +46,13 @@ type HealthCheckerOptions struct {

// AddFlags adds health checker command line options to pflag.
func (hco *HealthCheckerOptions) AddFlags(fs *pflag.FlagSet) {
fs.StringVar(&hco.Component, "component", types.KubeletComponent,
"The component to check health for. Supports kubelet, docker, kube-proxy, and cri")
fs.StringVar(&hco.Component, "component", types.KubeletComponent, supportedComponentsFlagMessage)
// Deprecated: For backward compatibility on linux environment. Going forward "service" will be used instead of systemd-service
if runtime.GOOS == "linux" {
fs.MarkDeprecated("systemd-service", "please use --service flag instead")
fs.StringVar(&hco.Service, "systemd-service", "",
"The underlying service responsible for the component. Set to the corresponding component for docker and kubelet, containerd for cri.")
fs.StringVar(&hco.Service, "systemd-service", "", supportedServicesFlagMessage)
}
fs.StringVar(&hco.Service, "service", "",
"The underlying service responsible for the component. Set to the corresponding component for docker and kubelet, containerd for cri.")
fs.StringVar(&hco.Service, "service", "", supportedServicesFlagMessage)
fs.BoolVar(&hco.EnableRepair, "enable-repair", true, "Flag to enable/disable repair attempt for the component.")
fs.StringVar(&hco.CriCtlPath, "crictl-path", types.DefaultCriCtl,
"The path to the crictl binary. This is used to check health of cri component.")
Expand All @@ -73,9 +70,16 @@ func (hco *HealthCheckerOptions) AddFlags(fs *pflag.FlagSet) {
// Returns error if invalid, nil otherwise.
func (hco *HealthCheckerOptions) IsValid() error {
// Make sure the component specified is valid.
if hco.Component != types.KubeletComponent && hco.Component != types.DockerComponent &&
hco.Component != types.CRIComponent && hco.Component != types.KubeProxyComponent {
return fmt.Errorf("the component specified is not supported. Supported components are : <kubelet/docker/cri/kube-proxy>")
isSupportedComponents := hco.Component != types.KubeletComponent && hco.Component != types.DockerComponent &&
hco.Component != types.CRIComponent && hco.Component != types.KubeProxyComponent
if runtime.GOOS == "windows" {
if isSupportedComponents && hco.Component != types.CsiProxyComponent {
return fmt.Errorf(validComponentMessage)
}
} else {
if isSupportedComponents {
return fmt.Errorf(validComponentMessage)
}
}
// Make sure the service is specified if repair is enabled.
if hco.EnableRepair && hco.Service == "" {
Expand Down
23 changes: 23 additions & 0 deletions cmd/healthchecker/options/options_linux.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
/*
Copyright 2021 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package options

const (
supportedComponentsFlagMessage = "The component to check health for. Supports kubelet, docker, kube-proxy, and cri"
supportedServicesFlagMessage = "The underlying service responsible for the component. Set to the corresponding component for docker and kubelet, containerd for cri."
validComponentMessage = "the component specified is not supported. Supported components are: <kubelet/docker/cri/kube-proxy>"
)
23 changes: 23 additions & 0 deletions cmd/healthchecker/options/options_windows.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
/*
Copyright 2021 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package options

const (
supportedComponentsFlagMessage = "The component to check health for. Supports csiproxy, kubelet, docker, kube-proxy, and cri"
supportedServicesFlagMessage = "The underlying service responsible for the component. Set to the corresponding component for csiproxy, docker, kubelet, containerd for cri."
validComponentMessage = "the component specified is not supported. Supported components are: <kubelet/docker/cri/kube-proxy/csi-proxy>"
)
35 changes: 35 additions & 0 deletions config/windows-health-checker-csiproxy.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
{
"plugin": "custom",
"pluginConfig": {
"invoke_interval": "10s",
"timeout": "3m",
"max_output_length": 80,
"concurrency": 1
},
"source": "health-checker",
"metricsReporting": true,
"conditions": [
{
"type": "CsiProxyUnhealthy",
"reason": "CsiProxyIsHealthy",
"message": "Csi-Proxy on the node is functioning properly"
}
],
"rules": [
{
"type": "permanent",
"condition": "CsiProxyUnhealthy",
"reason": "CsiProxyUnhealthy",
"path": "C:\\etc\\kubernetes\\node\\bin\\health-checker.exe",
"args": [
"--component=csi-proxy",
"--enable-repair=true",
"--service=csiproxy",
"--cooldown-time=2m",
"--health-check-timeout=60s"
],
"timeout": "3m"
}
]
}

7 changes: 7 additions & 0 deletions pkg/healthchecker/health_checker_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,13 @@ func getHealthCheckFunc(hco *options.HealthCheckerOptions) func() (bool, error)
}
return true, nil
}
case types.CsiProxyComponent:
return func() (bool, error) {
if _, err := powershell("Get-Process", types.CsiProxyComponent); err != nil {
return false, nil
}
return true, nil
}
}
return nil
}
Expand Down
1 change: 1 addition & 0 deletions pkg/healthchecker/types/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ const (
DockerComponent = "docker"
ContainerdService = "containerd"
KubeProxyComponent = "kube-proxy"
CsiProxyComponent = "csi-proxy"

KubeletHealthCheckEndpoint = "http://127.0.0.1:10248/healthz"
KubeProxyHealthCheckEndpoint = "http://127.0.0.1:10256/healthz"
Expand Down

0 comments on commit fffd841

Please sign in to comment.