Skip to content

Commit

Permalink
Support CAPI provider custom timeouts
Browse files Browse the repository at this point in the history
Query the CAPI provider for the timeouts needed during provisioning.  This is optional to support.

The current default of 15 minutes is sufficient for normal CAPI installations.  However, given how the current PowerVS CAPI provider waits for some resources to be created before creating the load balancers, it is possible that the LBs will not create before the 15 minute timeout. An issue was created to track this [1].

[1] kubernetes-sigs/cluster-api-provider-ibmcloud#1837
  • Loading branch information
hamzy authored and openshift-cherrypick-robot committed Jun 18, 2024
1 parent 5cc3bec commit b985214
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 11 deletions.
31 changes: 20 additions & 11 deletions pkg/infrastructure/clusterapi/clusterapi.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,6 @@ import (
var _ infrastructure.Provider = (*InfraProvider)(nil)

const (
// timeout for each provisioning step.
timeout = 15 * time.Minute

preProvisionStage = "Infrastructure Pre-provisioning"
infrastructureStage = "Network-infrastructure Provisioning"
infrastructureReadyStage = "Post-network, pre-machine Provisioning"
Expand Down Expand Up @@ -182,14 +179,20 @@ func (i *InfraProvider) Provision(ctx context.Context, dir string, parents asset
}
}

var networkTimeout = 15 * time.Minute

if p, ok := i.impl.(Timeouts); ok {
networkTimeout = p.NetworkTimeout()
}

// Wait for successful provisioning by checking the InfrastructureReady
// status on the cluster object.
untilTime := time.Now().Add(timeout)
untilTime := time.Now().Add(networkTimeout)
timezone, _ := untilTime.Zone()
logrus.Infof("Waiting up to %v (until %v %s) for network infrastructure to become ready...", timeout, untilTime.Format(time.Kitchen), timezone)
logrus.Infof("Waiting up to %v (until %v %s) for network infrastructure to become ready...", networkTimeout, untilTime.Format(time.Kitchen), timezone)
var cluster *clusterv1.Cluster
{
if err := wait.PollUntilContextTimeout(ctx, 15*time.Second, timeout, true,
if err := wait.PollUntilContextTimeout(ctx, 15*time.Second, networkTimeout, true,
func(ctx context.Context) (bool, error) {
c := &clusterv1.Cluster{}
if err := cl.Get(ctx, client.ObjectKey{
Expand All @@ -205,7 +208,7 @@ func (i *InfraProvider) Provision(ctx context.Context, dir string, parents asset
return cluster.Status.InfrastructureReady, nil
}); err != nil {
if wait.Interrupted(err) {
return fileList, fmt.Errorf("infrastructure was not ready within %v: %w", timeout, err)
return fileList, fmt.Errorf("infrastructure was not ready within %v: %w", networkTimeout, err)
}
return fileList, fmt.Errorf("infrastructure is not ready: %w", err)
}
Expand Down Expand Up @@ -280,12 +283,18 @@ func (i *InfraProvider) Provision(ctx context.Context, dir string, parents asset
logrus.Infof("Created manifest %+T, namespace=%s name=%s", m, m.GetNamespace(), m.GetName())
}

var provisionTimeout = 15 * time.Minute

if p, ok := i.impl.(Timeouts); ok {
provisionTimeout = p.ProvisionTimeout()
}

{
untilTime := time.Now().Add(timeout)
untilTime := time.Now().Add(provisionTimeout)
timezone, _ := untilTime.Zone()
reqBootstrapPubIP := installConfig.Config.Publish == types.ExternalPublishingStrategy && i.impl.BootstrapHasPublicIP()
logrus.Infof("Waiting up to %v (until %v %s) for machines %v to provision...", timeout, untilTime.Format(time.Kitchen), timezone, machineNames)
if err := wait.PollUntilContextTimeout(ctx, 15*time.Second, timeout, true,
logrus.Infof("Waiting up to %v (until %v %s) for machines %v to provision...", provisionTimeout, untilTime.Format(time.Kitchen), timezone, machineNames)
if err := wait.PollUntilContextTimeout(ctx, 15*time.Second, provisionTimeout, true,
func(ctx context.Context) (bool, error) {
allReady := true
for _, machineName := range machineNames {
Expand Down Expand Up @@ -314,7 +323,7 @@ func (i *InfraProvider) Provision(ctx context.Context, dir string, parents asset
return allReady, nil
}); err != nil {
if wait.Interrupted(err) {
return fileList, fmt.Errorf("control-plane machines were not provisioned within %v: %w", timeout, err)
return fileList, fmt.Errorf("control-plane machines were not provisioned within %v: %w", provisionTimeout, err)
}
return fileList, fmt.Errorf("control-plane machines are not ready: %w", err)
}
Expand Down
9 changes: 9 additions & 0 deletions pkg/infrastructure/clusterapi/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package clusterapi

import (
"context"
"time"

"sigs.k8s.io/controller-runtime/pkg/client"

Expand Down Expand Up @@ -117,3 +118,11 @@ type PostDestroyer interface {
type PostDestroyerInput struct {
Metadata types.ClusterMetadata
}

// Timeouts allows platform provider to override the timeouts for certain phases.
type Timeouts interface {
// When waiting for the network infrastructure to become ready.
NetworkTimeout() time.Duration
// When waiting for the machines to provision.
ProvisionTimeout() time.Duration
}
13 changes: 13 additions & 0 deletions pkg/infrastructure/powervs/clusterapi/powervs.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ type Provider struct {
clusterapi.InfraProvider
}

var _ clusterapi.Timeouts = (*Provider)(nil)
var _ clusterapi.InfraReadyProvider = (*Provider)(nil)
var _ clusterapi.Provider = (*Provider)(nil)
var _ clusterapi.PostProvider = (*Provider)(nil)
Expand Down Expand Up @@ -56,6 +57,18 @@ func leftInContext(ctx context.Context) time.Duration {
const privatePrefix = "api-int."
const publicPrefix = "api."

// NetworkTimeout allows platform provider to override the timeout
// when waiting for the network infrastructure to become ready.
func (p Provider) NetworkTimeout() time.Duration {
return 30 * time.Minute
}

// ProvisionTimeout allows platform provider to override the timeout
// when waiting for the machines to provision.
func (p Provider) ProvisionTimeout() time.Duration {
return 15 * time.Minute
}

// InfraReady is called once cluster.Status.InfrastructureReady
// is true, typically after load balancers have been provisioned. It can be used
// to create DNS records.
Expand Down

0 comments on commit b985214

Please sign in to comment.