From a849adcc82dbad78fef31b9660a63451f1249594 Mon Sep 17 00:00:00 2001 From: Mark Hamzy Date: Mon, 10 Jun 2024 14:08:47 -0500 Subject: [PATCH] Support CAPI provider custom timeouts Query the CAPI provider for the timeouts needed during provisioning. This is optional to support. The current default of 15 minutes is sufficient for normal CAPI installations. However, given how the current PowerVS CAPI provider waits for some resources to be created before creating the load balancers, it is possible that the LBs will not create before the 15 minute timeout. An issue was created to track this [1]. [1] https://github.com/kubernetes-sigs/cluster-api-provider-ibmcloud/issues/1837 --- pkg/infrastructure/clusterapi/clusterapi.go | 31 ++++++++++++------- pkg/infrastructure/clusterapi/types.go | 9 ++++++ .../powervs/clusterapi/powervs.go | 13 ++++++++ 3 files changed, 42 insertions(+), 11 deletions(-) diff --git a/pkg/infrastructure/clusterapi/clusterapi.go b/pkg/infrastructure/clusterapi/clusterapi.go index c796080784c..629470a69f1 100644 --- a/pkg/infrastructure/clusterapi/clusterapi.go +++ b/pkg/infrastructure/clusterapi/clusterapi.go @@ -44,9 +44,6 @@ import ( var _ infrastructure.Provider = (*InfraProvider)(nil) const ( - // timeout for each provisioning step. - timeout = 15 * time.Minute - preProvisionStage = "Infrastructure Pre-provisioning" infrastructureStage = "Network-infrastructure Provisioning" infrastructureReadyStage = "Post-network, pre-machine Provisioning" @@ -182,14 +179,20 @@ func (i *InfraProvider) Provision(ctx context.Context, dir string, parents asset } } + var networkTimeout = 15 * time.Minute + + if p, ok := i.impl.(Timeouts); ok { + networkTimeout = p.NetworkTimeout() + } + // Wait for successful provisioning by checking the InfrastructureReady // status on the cluster object. - untilTime := time.Now().Add(timeout) + untilTime := time.Now().Add(networkTimeout) timezone, _ := untilTime.Zone() - logrus.Infof("Waiting up to %v (until %v %s) for network infrastructure to become ready...", timeout, untilTime.Format(time.Kitchen), timezone) + logrus.Infof("Waiting up to %v (until %v %s) for network infrastructure to become ready...", networkTimeout, untilTime.Format(time.Kitchen), timezone) var cluster *clusterv1.Cluster { - if err := wait.PollUntilContextTimeout(ctx, 15*time.Second, timeout, true, + if err := wait.PollUntilContextTimeout(ctx, 15*time.Second, networkTimeout, true, func(ctx context.Context) (bool, error) { c := &clusterv1.Cluster{} if err := cl.Get(ctx, client.ObjectKey{ @@ -205,7 +208,7 @@ func (i *InfraProvider) Provision(ctx context.Context, dir string, parents asset return cluster.Status.InfrastructureReady, nil }); err != nil { if wait.Interrupted(err) { - return fileList, fmt.Errorf("infrastructure was not ready within %v: %w", timeout, err) + return fileList, fmt.Errorf("infrastructure was not ready within %v: %w", networkTimeout, err) } return fileList, fmt.Errorf("infrastructure is not ready: %w", err) } @@ -280,12 +283,18 @@ func (i *InfraProvider) Provision(ctx context.Context, dir string, parents asset logrus.Infof("Created manifest %+T, namespace=%s name=%s", m, m.GetNamespace(), m.GetName()) } + var provisionTimeout = 15 * time.Minute + + if p, ok := i.impl.(Timeouts); ok { + provisionTimeout = p.ProvisionTimeout() + } + { - untilTime := time.Now().Add(timeout) + untilTime := time.Now().Add(provisionTimeout) timezone, _ := untilTime.Zone() reqBootstrapPubIP := installConfig.Config.Publish == types.ExternalPublishingStrategy && i.impl.BootstrapHasPublicIP() - logrus.Infof("Waiting up to %v (until %v %s) for machines %v to provision...", timeout, untilTime.Format(time.Kitchen), timezone, machineNames) - if err := wait.PollUntilContextTimeout(ctx, 15*time.Second, timeout, true, + logrus.Infof("Waiting up to %v (until %v %s) for machines %v to provision...", provisionTimeout, untilTime.Format(time.Kitchen), timezone, machineNames) + if err := wait.PollUntilContextTimeout(ctx, 15*time.Second, provisionTimeout, true, func(ctx context.Context) (bool, error) { allReady := true for _, machineName := range machineNames { @@ -314,7 +323,7 @@ func (i *InfraProvider) Provision(ctx context.Context, dir string, parents asset return allReady, nil }); err != nil { if wait.Interrupted(err) { - return fileList, fmt.Errorf("control-plane machines were not provisioned within %v: %w", timeout, err) + return fileList, fmt.Errorf("control-plane machines were not provisioned within %v: %w", provisionTimeout, err) } return fileList, fmt.Errorf("control-plane machines are not ready: %w", err) } diff --git a/pkg/infrastructure/clusterapi/types.go b/pkg/infrastructure/clusterapi/types.go index 6b56e5902fe..f1ec19bb8c7 100644 --- a/pkg/infrastructure/clusterapi/types.go +++ b/pkg/infrastructure/clusterapi/types.go @@ -2,6 +2,7 @@ package clusterapi import ( "context" + "time" "sigs.k8s.io/controller-runtime/pkg/client" @@ -117,3 +118,11 @@ type PostDestroyer interface { type PostDestroyerInput struct { Metadata types.ClusterMetadata } + +// Timeouts allows platform provider to override the timeouts for certain phases. +type Timeouts interface { + // When waiting for the network infrastructure to become ready. + NetworkTimeout() time.Duration + // When waiting for the machines to provision. + ProvisionTimeout() time.Duration +} diff --git a/pkg/infrastructure/powervs/clusterapi/powervs.go b/pkg/infrastructure/powervs/clusterapi/powervs.go index e161c53c59e..5a30f24011b 100644 --- a/pkg/infrastructure/powervs/clusterapi/powervs.go +++ b/pkg/infrastructure/powervs/clusterapi/powervs.go @@ -29,6 +29,7 @@ type Provider struct { clusterapi.InfraProvider } +var _ clusterapi.Timeouts = (*Provider)(nil) var _ clusterapi.InfraReadyProvider = (*Provider)(nil) var _ clusterapi.Provider = (*Provider)(nil) var _ clusterapi.PostProvider = (*Provider)(nil) @@ -56,6 +57,18 @@ func leftInContext(ctx context.Context) time.Duration { const privatePrefix = "api-int." const publicPrefix = "api." +// NetworkTimeout allows platform provider to override the timeout +// when waiting for the network infrastructure to become ready. +func (p Provider) NetworkTimeout() time.Duration { + return 30 * time.Minute +} + +// ProvisionTimeout allows platform provider to override the timeout +// when waiting for the machines to provision. +func (p Provider) ProvisionTimeout() time.Duration { + return 15 * time.Minute +} + // InfraReady is called once cluster.Status.InfrastructureReady // is true, typically after load balancers have been provisioned. It can be used // to create DNS records.