Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🌱 Add conditions to the DockerMachine object #3122

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 65 additions & 0 deletions test/infrastructure/docker/api/v1alpha3/condition_consts.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
/*
Copyright 2020 The Kubernetes Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package v1alpha3

import clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha3"

// Conditions and condition Reasons for the DockerMachine object

const ConditionsCount = 2

const (
// ContainerProvisionedCondition documents the status of the provisioning of the container
// generated by a DockerMachine.
//
// NOTE: When the container provisioning starts the process completes almost immediately and within
// the same reconciliation, so the user will always see a transition from Wait to Provisioned without
// having evidence that the operation is started/is in progress.
ContainerProvisionedCondition clusterv1.ConditionType = "ContainerProvisioned"

// WaitingForClusterInfrastructureReason (Severity=Info) documents a DockerMachine waiting for the cluster
// infrastructure to be ready before starting to create the container that provides the DockerMachine
// infrastructure.
WaitingForClusterInfrastructureReason = "WaitingForClusterInfrastructure"

// WaitingForBootstrapDataReason (Severity=Info) documents a DockerMachine waiting for the bootstrap
// script to be ready before starting to create the container that provides the DockerMachine infrastructure.
WaitingForBootstrapDataReason = "WaitingForBootstrapData"

// ContainerProvisioningFailedReason (Severity=Warning) documents a DockerMachine controller detecting
// an error while provisioning the container that provides the DockerMachine infrastructure; those kind of
// errors are usually transient and failed provisioning are automatically re-tried by the controller.
ContainerProvisioningFailedReason = "ContainerProvisioningFailed"
)

const (
// BootstrapExecSucceededCondition provide an observation of the DockerMachine bootstrap process.
// The condition gets generated after ContainerProvisionedCondition is True.
//
// NOTE as a difference from other providers, container provisioning and bootstrap are directly managed
// by the DockerMachine controller (not by cloud-init).
BootstrapExecSucceededCondition clusterv1.ConditionType = "BootstrapExecSucceeded"

// BootstrappingReason documents (Severity=Info) a DockerMachine currently executing the bootstrap
// script that creates the Kubernetes node on the newly provisioned machine infrastructure.
BootstrappingReason = "Bootstrapping"

// BootstrapFailedReason documents (Severity=Warning) a DockerMachine controller detecting an error while
// bootstrapping the Kubernetes node on the machine just provisioned; those kind of errors are usually
// transient and failed bootstrap are automatically re-tried by the controller.
BootstrapFailedReason = "BootstrapFailed"
)
13 changes: 13 additions & 0 deletions test/infrastructure/docker/api/v1alpha3/dockermachine_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package v1alpha3

import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha3"
)

const (
Expand Down Expand Up @@ -79,6 +80,10 @@ type DockerMachineStatus struct {
// added to the load balancer
// +optional
LoadBalancerConfigured bool `json:"loadBalancerConfigured,omitempty"`

// Conditions defines current service state of the DockerMachine.
// +optional
Conditions clusterv1.Conditions `json:"conditions,omitempty"`
}

// +kubebuilder:resource:path=dockermachines,scope=Namespaced,categories=cluster-api
Expand All @@ -95,6 +100,14 @@ type DockerMachine struct {
Status DockerMachineStatus `json:"status,omitempty"`
}

func (c *DockerMachine) GetConditions() clusterv1.Conditions {
return c.Status.Conditions
}

func (c *DockerMachine) SetConditions(conditions clusterv1.Conditions) {
c.Status.Conditions = conditions
}

// +kubebuilder:object:root=true

// DockerMachineList contains a list of DockerMachine
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,50 @@ spec:
status:
description: DockerMachineStatus defines the observed state of DockerMachine
properties:
conditions:
description: Conditions defines current service state of the DockerMachine.
items:
description: Condition defines an observation of a Cluster API resource
operational state.
properties:
lastTransitionTime:
description: Last time the condition transitioned from one status
to another. This should be when the underlying condition changed.
If that is not known, then using the time when the API field
changed is acceptable.
format: date-time
type: string
message:
description: A human readable message indicating details about
the transition. This field may be empty.
type: string
reason:
description: The reason for the condition's last transition
in CamelCase. The specific API may choose whether or not this
field is considered a guaranteed API. This field may not be
empty.
type: string
severity:
description: Severity provides an explicit classification of
Reason code, so the users or machines can immediately understand
the current situation and act accordingly. The Severity field
MUST be set only when Status=False.
type: string
status:
description: Status of the condition, one of True, False, Unknown.
type: string
type:
description: Type of condition in CamelCase or in foo.example.com/CamelCase.
Many .condition.type values are consistent across resources
like Available, but because arbitrary conditions can be useful
(see .node.status.conditions), the ability to deconflict is
important.
type: string
required:
- status
- type
type: object
type: array
loadBalancerConfigured:
description: LoadBalancerConfigured denotes that the machine has been
added to the load balancer
Expand Down
91 changes: 62 additions & 29 deletions test/infrastructure/docker/controllers/dockermachine_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import (
infrav1 "sigs.k8s.io/cluster-api/test/infrastructure/docker/api/v1alpha3"
"sigs.k8s.io/cluster-api/test/infrastructure/docker/docker"
"sigs.k8s.io/cluster-api/util"
"sigs.k8s.io/cluster-api/util/conditions"
"sigs.k8s.io/cluster-api/util/patch"
"sigs.k8s.io/cluster-api/util/predicates"
ctrl "sigs.k8s.io/controller-runtime"
Expand Down Expand Up @@ -95,12 +96,6 @@ func (r *DockerMachineReconciler) Reconcile(req ctrl.Request) (_ ctrl.Result, re

log = log.WithValues("cluster", cluster.Name)

// Make sure infrastructure is ready
if !cluster.Status.InfrastructureReady {
log.Info("Waiting for DockerCluster Controller to create cluster infrastructure")
return ctrl.Result{}, nil
}

// Fetch the Docker Cluster.
dockerCluster := &infrav1.DockerCluster{}
dockerClusterName := client.ObjectKey{
Expand All @@ -114,28 +109,16 @@ func (r *DockerMachineReconciler) Reconcile(req ctrl.Request) (_ ctrl.Result, re

log = log.WithValues("docker-cluster", dockerCluster.Name)

// Create a helper for managing the docker container hosting the machine.
externalMachine, err := docker.NewMachine(cluster.Name, machine.Name, dockerMachine.Spec.CustomImage, log)
if err != nil {
return ctrl.Result{}, errors.Wrapf(err, "failed to create helper for managing the externalMachine")
}

// Create a helper for managing a docker container hosting the loadbalancer.
// NB. the machine controller has to manage the cluster load balancer because the current implementation of the
// docker load balancer does not support auto-discovery of control plane nodes, so CAPD should take care of
// updating the cluster load balancer configuration when control plane machines are added/removed
externalLoadBalancer, err := docker.NewLoadBalancer(cluster.Name, log)
if err != nil {
return ctrl.Result{}, errors.Wrapf(err, "failed to create helper for managing the externalLoadBalancer")
}

// Initialize the patch helper
patchHelper, err := patch.NewHelper(dockerMachine, r)
if err != nil {
return ctrl.Result{}, err
}
// Always attempt to Patch the DockerMachine object and status after each reconciliation.
defer func() {
// always update the readyCondition; the summary is represented using the "1 of x completed" notation.
conditions.SetSummary(dockerMachine, conditions.WithStepCounter(infrav1.ConditionsCount))

if err := patchHelper.Patch(ctx, dockerMachine); err != nil {
log.Error(err, "failed to patch DockerMachine")
if rerr == nil {
Expand All @@ -144,6 +127,28 @@ func (r *DockerMachineReconciler) Reconcile(req ctrl.Request) (_ ctrl.Result, re
}
}()

// Check if the infrastructure is ready, otherwise return and wait for the cluster object to be updated
if !cluster.Status.InfrastructureReady {
log.Info("Waiting for DockerCluster Controller to create cluster infrastructure")
conditions.MarkFalse(dockerMachine, infrav1.ContainerProvisionedCondition, infrav1.WaitingForClusterInfrastructureReason, clusterv1.ConditionSeverityInfo, "")
return ctrl.Result{}, nil
}

// Create a helper for managing the docker container hosting the machine.
externalMachine, err := docker.NewMachine(cluster.Name, machine.Name, dockerMachine.Spec.CustomImage, log)
if err != nil {
return ctrl.Result{}, errors.Wrapf(err, "failed to create helper for managing the externalMachine")
}

// Create a helper for managing a docker container hosting the loadbalancer.
// NB. the machine controller has to manage the cluster load balancer because the current implementation of the
// docker load balancer does not support auto-discovery of control plane nodes, so CAPD should take care of
// updating the cluster load balancer configuration when control plane machines are added/removed
externalLoadBalancer, err := docker.NewLoadBalancer(cluster.Name, log)
if err != nil {
return ctrl.Result{}, errors.Wrapf(err, "failed to create helper for managing the externalLoadBalancer")
}

// Handle deleted machines
if !dockerMachine.ObjectMeta.DeletionTimestamp.IsZero() {
return r.reconcileDelete(ctx, machine, dockerMachine, externalMachine, externalLoadBalancer)
Expand All @@ -160,14 +165,16 @@ func (r *DockerMachineReconciler) reconcileNormal(ctx context.Context, machine *
// if the machine is already provisioned, return
if dockerMachine.Spec.ProviderID != nil {
// ensure ready state is set.
// This is required after move, bacuse status is not moved to the target cluster.
// This is required after move, because status is not moved to the target cluster.
dockerMachine.Status.Ready = true
conditions.MarkTrue(dockerMachine, infrav1.ContainerProvisionedCondition)
return ctrl.Result{}, nil
}

// Make sure bootstrap data is available and populated.
if machine.Spec.Bootstrap.DataSecretName == nil {
log.Info("Waiting for the Bootstrap provider controller to set bootstrap data")
conditions.MarkFalse(dockerMachine, infrav1.ContainerProvisionedCondition, infrav1.WaitingForBootstrapDataReason, clusterv1.ConditionSeverityInfo, "")
return ctrl.Result{}, nil
}

Expand All @@ -185,18 +192,30 @@ func (r *DockerMachineReconciler) reconcileNormal(ctx context.Context, machine *
if err := externalMachine.Delete(ctx); err != nil {
log.Info("Failed to cleanup machine")
}
dockerMachine.Status.LoadBalancerConfigured = false
conditions.MarkFalse(dockerMachine, infrav1.ContainerProvisionedCondition, infrav1.ContainerProvisioningFailedReason, clusterv1.ConditionSeverityWarning, "Re-provisioning")
conditions.Delete(dockerMachine, infrav1.BootstrapExecSucceededCondition)

res = ctrl.Result{RequeueAfter: 10 * time.Second}
retErr = nil
}
}()

if err := externalMachine.Create(ctx, role, machine.Spec.Version, dockerMachine.Spec.ExtraMounts); err != nil {
return ctrl.Result{}, errors.Wrap(err, "failed to create worker DockerMachine")
// Create the machine if not existing yet
if !externalMachine.Exists() {
if err := externalMachine.Create(ctx, role, machine.Spec.Version, dockerMachine.Spec.ExtraMounts); err != nil {
return ctrl.Result{}, errors.Wrap(err, "failed to create worker DockerMachine")
}
}

// Update the ContainerProvisionedCondition condition
conditions.MarkTrue(dockerMachine, infrav1.ContainerProvisionedCondition)

// Preload images into the container
if err := externalMachine.PreloadLoadImages(ctx, dockerMachine.Spec.PreLoadImages); err != nil {
return ctrl.Result{}, errors.Wrap(err, "failed to pre-load images into the DockerMachine")
if len(dockerMachine.Spec.PreLoadImages) > 0 {
if err := externalMachine.PreloadLoadImages(ctx, dockerMachine.Spec.PreLoadImages); err != nil {
return ctrl.Result{}, errors.Wrap(err, "failed to pre-load images into the DockerMachine")
}
}

// if the machine is a control plane update the load balancer configuration
Expand All @@ -209,34 +228,48 @@ func (r *DockerMachineReconciler) reconcileNormal(ctx context.Context, machine *
dockerMachine.Status.LoadBalancerConfigured = true
}

// At, this stage, we are ready for bootstrap. However, if the BootstrapExecSucceededCondition is missing we add it and we
// requeue so the user can see the change of state before the bootstrap actually starts.
// NOTE: usually controller should not rely on status they are setting, but on the observed state; however
// in this case we are doing this because we explicitly want to give a feedback to users.
if !conditions.Has(dockerMachine, infrav1.BootstrapExecSucceededCondition) {
fabriziopandini marked this conversation as resolved.
Show resolved Hide resolved
conditions.MarkFalse(dockerMachine, infrav1.BootstrapExecSucceededCondition, infrav1.BootstrappingReason, clusterv1.ConditionSeverityInfo, "")
return ctrl.Result{Requeue: true}, nil
}

// if the machine isn't bootstrapped, only then run bootstrap scripts
if !dockerMachine.Spec.Bootstrapped {
bootstrapData, err := r.getBootstrapData(ctx, machine)
if err != nil {
r.Log.Error(err, "failed to get bootstrap data")
return ctrl.Result{}, nil
return ctrl.Result{}, err
detiber marked this conversation as resolved.
Show resolved Hide resolved
}

timeoutctx, cancel := context.WithTimeout(ctx, 3*time.Minute)
defer cancel()
// Run the bootstrap script. Simulates cloud-init.
if err := externalMachine.ExecBootstrap(timeoutctx, bootstrapData); err != nil {
conditions.MarkFalse(dockerMachine, infrav1.BootstrapExecSucceededCondition, infrav1.BootstrapFailedReason, clusterv1.ConditionSeverityWarning, "Repeating bootstrap")
return ctrl.Result{}, errors.Wrap(err, "failed to exec DockerMachine bootstrap")
}
dockerMachine.Spec.Bootstrapped = true
}

// Update the BootstrapExecSucceededCondition condition
conditions.MarkTrue(dockerMachine, infrav1.BootstrapExecSucceededCondition)

// Usually a cloud provider will do this, but there is no docker-cloud provider.
// Requeue after 1s if there is an error, as this is likely momentary load balancer
// Requeue if there is an error, as this is likely momentary load balancer
// state changes during control plane provisioning.
if err := externalMachine.SetNodeProviderID(ctx); err != nil {
r.Log.Error(err, "failed to patch the Kubernetes node with the machine providerID")
return ctrl.Result{RequeueAfter: time.Second}, nil
return ctrl.Result{RequeueAfter: 5 * time.Second}, nil
}
// Set ProviderID so the Cluster API Machine Controller can pull it
providerID := externalMachine.ProviderID()
dockerMachine.Spec.ProviderID = &providerID
dockerMachine.Status.Ready = true
conditions.MarkTrue(dockerMachine, infrav1.ContainerProvisionedCondition)

return ctrl.Result{}, nil
}
Expand Down
5 changes: 5 additions & 0 deletions test/infrastructure/docker/docker/machine.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,11 @@ func NewMachine(cluster, machine, image string, logger logr.Logger) (*Machine, e
}, nil
}

// Exists returns true if the container for this machine exists.
func (m *Machine) Exists() bool {
return m.container != nil
}

// ContainerName return the name of the container for this machine
func (m *Machine) ContainerName() string {
return machineContainerName(m.cluster, m.machine)
Expand Down