From 313217e75bec37a7b245b57550b7fc2a379b0ae6 Mon Sep 17 00:00:00 2001 From: Mateusz Gozdek Date: Fri, 11 Sep 2020 15:13:26 +0200 Subject: [PATCH 1/7] pkg/util: move path-related functions to pkg/config As this is the only place which use those functions. Signed-off-by: Mateusz Gozdek --- pkg/config/config.go | 29 +++++++++++++++++++++++++---- pkg/util/path.go | 38 -------------------------------------- 2 files changed, 25 insertions(+), 42 deletions(-) delete mode 100644 pkg/util/path.go diff --git a/pkg/config/config.go b/pkg/config/config.go index bc338ed9f..4434515ed 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -17,6 +17,7 @@ package config import ( "fmt" "io/ioutil" + "os" "path/filepath" "github.com/hashicorp/hcl/v2" @@ -25,8 +26,6 @@ import ( "github.com/mitchellh/go-homedir" "github.com/zclconf/go-cty/cty" "github.com/zclconf/go-cty/cty/function" - - "github.com/kinvolk/lokomotive/pkg/util" ) type variable struct { @@ -62,7 +61,7 @@ type Config struct { } func loadLokocfgPaths(configPath string) ([]string, error) { - isDir, err := util.PathIsDir(configPath) + isDir, err := pathIsDir(configPath) if err != nil { return nil, fmt.Errorf("failed to stat config path %q: %w", configPath, err) } @@ -104,7 +103,7 @@ func LoadConfig(lokocfgPath, lokocfgVarsPath string) (*Config, hcl.Diagnostics) configBody := hcl.MergeFiles(hclFiles) - exists, err := util.PathExists(lokocfgVarsPath) + exists, err := pathExists(lokocfgVarsPath) if err != nil { return nil, hcl.Diagnostics{ &hcl.Diagnostic{ @@ -205,3 +204,25 @@ func (c *Config) LoadComponentConfigBody(componentName string) *hcl.Body { } return nil } + +func pathExists(path string) (bool, error) { + _, err := os.Stat(path) + if err == nil { + return true, nil + } + + if os.IsNotExist(err) { + return false, nil + } + + return true, err +} + +func pathIsDir(path string) (bool, error) { + stat, err := os.Stat(path) + if err == nil { + return stat.IsDir(), nil + } + + return false, err +} diff --git a/pkg/util/path.go b/pkg/util/path.go deleted file mode 100644 index 10aaaa2f8..000000000 --- a/pkg/util/path.go +++ /dev/null @@ -1,38 +0,0 @@ -// Copyright 2020 The Lokomotive Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package util - -import ( - "os" -) - -func PathExists(path string) (bool, error) { - _, err := os.Stat(path) - if err == nil { - return true, nil - } - if os.IsNotExist(err) { - return false, nil - } - return true, err -} - -func PathIsDir(path string) (bool, error) { - stat, err := os.Stat(path) - if err == nil { - return stat.IsDir(), nil - } - return false, err -} From ed91f6bd973527820c8789acec3819b897cf06df Mon Sep 17 00:00:00 2001 From: Mateusz Gozdek Date: Fri, 11 Sep 2020 15:14:20 +0200 Subject: [PATCH 2/7] pkg/util: remove tools sub-package As it is not used anywhere. Signed-off-by: Mateusz Gozdek --- pkg/util/tools/tools.go | 50 ----------------------------------------- 1 file changed, 50 deletions(-) delete mode 100644 pkg/util/tools/tools.go diff --git a/pkg/util/tools/tools.go b/pkg/util/tools/tools.go deleted file mode 100644 index 7fdd636ce..000000000 --- a/pkg/util/tools/tools.go +++ /dev/null @@ -1,50 +0,0 @@ -// Copyright 2020 The Lokomotive Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package tools - -import ( - "os" - "os/exec" - - k8serrs "k8s.io/apimachinery/pkg/util/errors" -) - -func isBinaryInPath(name string) error { - if _, err := exec.LookPath(name); err != nil { - return err - } - return nil -} - -// InstallerBinaries is used from sub-command `install` to check if all the -// needed binaries are in required place -func InstallerBinaries() error { - var errs []error - - if err := isBinaryInPath("terraform"); err != nil { - errs = append(errs, err) - } - - // see if the terraform plugin is in path - plugin := "$HOME/.terraform.d/plugins/terraform-provider-ct" - if _, err := os.Stat(os.ExpandEnv(plugin)); err != nil { - errs = append(errs, err) - } - - if len(errs) > 0 { - return k8serrs.NewAggregate(errs) - } - return nil -} From c7211b4bee4e4ab19869af54d3d75c800d3260de Mon Sep 17 00:00:00 2001 From: Mateusz Gozdek Date: Fri, 11 Sep 2020 15:45:24 +0200 Subject: [PATCH 3/7] test/ingress/aws: use wait.PollImmediate rather than retryutil Signed-off-by: Mateusz Gozdek --- test/ingress/aws/aws_test.go | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/test/ingress/aws/aws_test.go b/test/ingress/aws/aws_test.go index 7b3580448..ab78c3b40 100644 --- a/test/ingress/aws/aws_test.go +++ b/test/ingress/aws/aws_test.go @@ -26,15 +26,16 @@ import ( "testing" "time" - "github.com/kinvolk/lokomotive/pkg/util/retryutil" - testutil "github.com/kinvolk/lokomotive/test/components/util" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/wait" + + testutil "github.com/kinvolk/lokomotive/test/components/util" ) const ( - retryIntervalSeconds = 5 - maxRetries = 60 - httpTimeout = 4 * time.Second + retryInterval = 5 * time.Second + retryTimeout = 9 * time.Minute + httpTimeout = 4 * time.Second ) func TestAWSIngress(t *testing.T) { @@ -70,7 +71,7 @@ func TestAWSIngress(t *testing.T) { h := i.Spec.Rules[0].Host c := getHTTPClient() - err = retryutil.Retry(retryIntervalSeconds*time.Second, maxRetries, func() (bool, error) { + err = wait.PollImmediate(retryInterval, retryTimeout, func() (bool, error) { resp, err := c.Get(fmt.Sprintf("https://%s/get", h)) if err != nil { t.Logf("got an HTTP error: %v", err) From 45e3974e2a43e8ca2dc3ea07739b61598881a892 Mon Sep 17 00:00:00 2001 From: Mateusz Gozdek Date: Fri, 11 Sep 2020 15:52:02 +0200 Subject: [PATCH 4/7] pkg/lokomotive: use wait.PollImmediate instead of retryutil.Retry As they provide the same functionality, but PollImmediate has more obvious interface (as you don't mix duration with number of retries). Signed-off-by: Mateusz Gozdek --- pkg/lokomotive/cluster.go | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/pkg/lokomotive/cluster.go b/pkg/lokomotive/cluster.go index 77ee272f8..6e870063f 100644 --- a/pkg/lokomotive/cluster.go +++ b/pkg/lokomotive/cluster.go @@ -24,20 +24,19 @@ import ( v1 "k8s.io/api/core/v1" meta_v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/kubernetes" - - "github.com/kinvolk/lokomotive/pkg/util/retryutil" ) const ( - // Max number of retries when waiting for cluster to become available. - clusterPingRetries = 18 + // Period after which we assume cluster will not become reachable and we return timeout error to the user. + clusterPingRetryTimeout = 3 * time.Minute // Number of seconds to wait between retires when waiting for cluster to become available. - clusterPingRetryInterval = 10 - // Max number of retries when waiting for nodes to become ready. - nodeReadinessRetries = 18 + clusterPingRetryInterval = 10 * time.Second + // Period after which we assume that nodes will never become ready and we return timeout error to the user. + nodeReadinessRetryTimeout = 3 * time.Minute // Number of seconds to wait between retires when waiting for nodes to become ready. - nodeReadinessRetryInterval = 10 + nodeReadinessRetryInterval = 10 * time.Second ) type Cluster struct { @@ -152,7 +151,7 @@ func (cl *Cluster) Verify() error { fmt.Println("\nNow checking health and readiness of the cluster nodes ...") // Wait for cluster to become available. - err := retryutil.Retry(clusterPingRetryInterval*time.Second, clusterPingRetries, cl.ping) + err := wait.PollImmediate(clusterPingRetryInterval, clusterPingRetryTimeout, cl.ping) if err != nil { return fmt.Errorf("pinging cluster for readiness: %w", err) } @@ -161,7 +160,7 @@ func (cl *Cluster) Verify() error { var nsErr error - err = retryutil.Retry(nodeReadinessRetryInterval*time.Second, nodeReadinessRetries, func() (bool, error) { + err = wait.PollImmediate(nodeReadinessRetryInterval, nodeReadinessRetryTimeout, func() (bool, error) { // Store the original error because Retry would stop too early if we forward it // and anyway overrides the error in case of timeout. ns, nsErr = cl.GetNodeStatus() From 4f74bff81d2831d395b07965daa869c74224db99 Mon Sep 17 00:00:00 2001 From: Mateusz Gozdek Date: Fri, 11 Sep 2020 15:53:00 +0200 Subject: [PATCH 5/7] pkg/lokomotive: improve imports Signed-off-by: Mateusz Gozdek --- pkg/lokomotive/cluster.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pkg/lokomotive/cluster.go b/pkg/lokomotive/cluster.go index 6e870063f..bb9c3b219 100644 --- a/pkg/lokomotive/cluster.go +++ b/pkg/lokomotive/cluster.go @@ -23,7 +23,7 @@ import ( "time" v1 "k8s.io/api/core/v1" - meta_v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/kubernetes" ) @@ -50,7 +50,7 @@ func NewCluster(client *kubernetes.Clientset, expectedNodes int) *Cluster { } func (cl *Cluster) Health() ([]v1.ComponentStatus, error) { - cs, err := cl.KubeClient.CoreV1().ComponentStatuses().List(context.TODO(), meta_v1.ListOptions{}) + cs, err := cl.KubeClient.CoreV1().ComponentStatuses().List(context.TODO(), metav1.ListOptions{}) if err != nil { return nil, err } @@ -75,7 +75,7 @@ type NodeStatus struct { // GetNodeStatus returns the status for all running nodes or an error. func (cl *Cluster) GetNodeStatus() (*NodeStatus, error) { - n, err := cl.KubeClient.CoreV1().Nodes().List(context.TODO(), meta_v1.ListOptions{}) + n, err := cl.KubeClient.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{}) if err != nil { return nil, err } @@ -139,7 +139,7 @@ func (ns *NodeStatus) PrettyPrint() { // ping Cluster to know when its endpoint can be used. func (cl *Cluster) ping() (bool, error) { - _, err := cl.KubeClient.CoreV1().Nodes().List(context.TODO(), meta_v1.ListOptions{}) + _, err := cl.KubeClient.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{}) if err != nil { return false, nil } From 10e572aefef67ca1684deb70cfd46371f210048e Mon Sep 17 00:00:00 2001 From: Mateusz Gozdek Date: Fri, 11 Sep 2020 15:53:13 +0200 Subject: [PATCH 6/7] pkg/util/retryutil: remove package This code is no loger used so can be removed. Signed-off-by: Mateusz Gozdek --- pkg/util/retryutil/retryutil.go | 64 --------------------------------- 1 file changed, 64 deletions(-) delete mode 100644 pkg/util/retryutil/retryutil.go diff --git a/pkg/util/retryutil/retryutil.go b/pkg/util/retryutil/retryutil.go deleted file mode 100644 index 6a51c96e4..000000000 --- a/pkg/util/retryutil/retryutil.go +++ /dev/null @@ -1,64 +0,0 @@ -// Copyright 2017 The etcd-operator Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// https://github.com/coreos/etcd-operator/blob/master/pkg/util/retryutil/retry_util.go - -package retryutil - -import ( - "fmt" - "time" -) - -type RetryError struct { - n int -} - -func (e *RetryError) Error() string { - return fmt.Sprintf("still failing after %d retries", e.n) -} - -func IsRetryFailure(err error) bool { - _, ok := err.(*RetryError) - return ok -} - -type ConditionFunc func() (bool, error) - -// Retry retries f every interval until after maxRetries. -// The interval won't be affected by how long f takes. -// For example, if interval is 3s, f takes 1s, another f will be called 2s later. -// However, if f takes longer than interval, it will be delayed. -func Retry(interval time.Duration, maxRetries int, f ConditionFunc) error { - if maxRetries <= 0 { - return fmt.Errorf("maxRetries (%d) should be > 0", maxRetries) - } - tick := time.NewTicker(interval) - defer tick.Stop() - - for i := 0; ; i++ { - ok, err := f() - if err != nil { - return err - } - if ok { - return nil - } - if i == maxRetries { - break - } - <-tick.C - } - return &RetryError{maxRetries} -} From 5ca727b912e04bb0948c266e633601acca572bc2 Mon Sep 17 00:00:00 2001 From: Mateusz Gozdek Date: Tue, 15 Sep 2020 12:41:18 +0200 Subject: [PATCH 7/7] pkg/lokomotive: adjust timeouts 3 minutes seems like an arbitrary time to wait for cluster to become ready, so let's make it at least round to 5 minutes. As currently we have issues, that nodes do not become ready within 3 minutes timeframe, let's give them max 10 minutes to converge, which should be sufficient in most cases, as sometimes it may take up to 5 minutes for node to become ready. Closes #940. Signed-off-by: Mateusz Gozdek --- pkg/lokomotive/cluster.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/lokomotive/cluster.go b/pkg/lokomotive/cluster.go index bb9c3b219..4259fa38b 100644 --- a/pkg/lokomotive/cluster.go +++ b/pkg/lokomotive/cluster.go @@ -30,11 +30,11 @@ import ( const ( // Period after which we assume cluster will not become reachable and we return timeout error to the user. - clusterPingRetryTimeout = 3 * time.Minute + clusterPingRetryTimeout = 5 * time.Minute // Number of seconds to wait between retires when waiting for cluster to become available. clusterPingRetryInterval = 10 * time.Second // Period after which we assume that nodes will never become ready and we return timeout error to the user. - nodeReadinessRetryTimeout = 3 * time.Minute + nodeReadinessRetryTimeout = 10 * time.Minute // Number of seconds to wait between retires when waiting for nodes to become ready. nodeReadinessRetryInterval = 10 * time.Second )