-
Notifications
You must be signed in to change notification settings - Fork 1.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Implement clusterctl delete cluster #406
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,13 +20,15 @@ import ( | |
"fmt" | ||
"io/ioutil" | ||
"os" | ||
"strings" | ||
"time" | ||
|
||
"github.com/golang/glog" | ||
"k8s.io/client-go/kubernetes" | ||
clustercommon "sigs.k8s.io/cluster-api/pkg/apis/cluster/common" | ||
clusterv1 "sigs.k8s.io/cluster-api/pkg/apis/cluster/v1alpha1" | ||
"sigs.k8s.io/cluster-api/pkg/util" | ||
"time" | ||
|
||
"github.com/golang/glog" | ||
) | ||
|
||
// Provider specific logic. Logic here should eventually be optional & additive. | ||
|
@@ -47,11 +49,20 @@ type ClusterProvisioner interface { | |
// Provides interaction with a cluster | ||
type ClusterClient interface { | ||
Apply(string) error | ||
Delete(string) error | ||
WaitForClusterV1alpha1Ready() error | ||
GetClusterObjects() ([]*clusterv1.Cluster, error) | ||
GetMachineDeploymentObjects() ([]*clusterv1.MachineDeployment, error) | ||
GetMachineSetObjects() ([]*clusterv1.MachineSet, error) | ||
GetMachineObjects() ([]*clusterv1.Machine, error) | ||
CreateClusterObject(*clusterv1.Cluster) error | ||
CreateMachineDeploymentObjects([]*clusterv1.MachineDeployment) error | ||
CreateMachineSetObjects([]*clusterv1.MachineSet) error | ||
CreateMachineObjects([]*clusterv1.Machine) error | ||
DeleteClusterObjects() error | ||
DeleteMachineDeploymentObjects() error | ||
DeleteMachineSetObjects() error | ||
DeleteMachineObjects() error | ||
UpdateClusterObjectEndpoint(string) error | ||
Close() error | ||
} | ||
|
@@ -74,28 +85,22 @@ type ProviderComponentsStoreFactory interface { | |
type ClusterDeployer struct { | ||
externalProvisioner ClusterProvisioner | ||
clientFactory ClientFactory | ||
provider ProviderDeployer | ||
providerComponents string | ||
addonComponents string | ||
kubeconfigOutput string | ||
cleanupExternalCluster bool | ||
} | ||
|
||
func New( | ||
externalProvisioner ClusterProvisioner, | ||
clientFactory ClientFactory, | ||
provider ProviderDeployer, | ||
providerComponents string, | ||
addonComponents string, | ||
kubeconfigOutput string, | ||
cleanupExternalCluster bool) *ClusterDeployer { | ||
return &ClusterDeployer{ | ||
externalProvisioner: externalProvisioner, | ||
clientFactory: clientFactory, | ||
provider: provider, | ||
providerComponents: providerComponents, | ||
addonComponents: addonComponents, | ||
kubeconfigOutput: kubeconfigOutput, | ||
cleanupExternalCluster: cleanupExternalCluster, | ||
} | ||
} | ||
|
@@ -106,7 +111,8 @@ const ( | |
) | ||
|
||
// Creates the a cluster from the provided cluster definition and machine list. | ||
func (d *ClusterDeployer) Create(cluster *clusterv1.Cluster, machines []*clusterv1.Machine, providerComponentsStoreFactory ProviderComponentsStoreFactory) error { | ||
|
||
func (d *ClusterDeployer) Create(cluster *clusterv1.Cluster, machines []*clusterv1.Machine, provider ProviderDeployer, kubeconfigOutput string, providerComponentsStoreFactory ProviderComponentsStoreFactory) error { | ||
master, nodes, err := splitMachineRoles(machines) | ||
if err != nil { | ||
return fmt.Errorf("unable to seperate master machines from node machines: %v", err) | ||
|
@@ -118,12 +124,7 @@ func (d *ClusterDeployer) Create(cluster *clusterv1.Cluster, machines []*cluster | |
if err != nil { | ||
return fmt.Errorf("could not create external client: %v", err) | ||
} | ||
defer func() { | ||
err := externalClient.Close() | ||
if err != nil { | ||
glog.Errorf("Could not close external client: %v", err) | ||
} | ||
}() | ||
defer closeClient(externalClient, "external") | ||
|
||
glog.Info("Applying Cluster API stack to external cluster") | ||
if err := d.applyClusterAPIStack(externalClient); err != nil { | ||
|
@@ -143,37 +144,32 @@ func (d *ClusterDeployer) Create(cluster *clusterv1.Cluster, machines []*cluster | |
} | ||
|
||
glog.Infof("Updating external cluster object with master (%s) endpoint", master.Name) | ||
if err := d.updateClusterEndpoint(externalClient); err != nil { | ||
if err := d.updateClusterEndpoint(externalClient, provider); err != nil { | ||
return fmt.Errorf("unable to update external cluster endpoint: %v", err) | ||
} | ||
|
||
glog.Info("Creating internal cluster") | ||
internalClient, err := d.createInternalCluster(externalClient) | ||
internalClient, err := d.createInternalCluster(externalClient, provider, kubeconfigOutput) | ||
if err != nil { | ||
return fmt.Errorf("unable to create internal cluster: %v", err) | ||
} | ||
defer func() { | ||
err := internalClient.Close() | ||
if err != nil { | ||
glog.Errorf("Could not close internal client: %v", err) | ||
} | ||
}() | ||
defer closeClient(internalClient, "internal") | ||
|
||
glog.Info("Applying Cluster API stack to internal cluster") | ||
if err := d.applyClusterAPIStackWithPivoting(internalClient, externalClient); err != nil { | ||
return fmt.Errorf("unable to apply cluster api stack to internal cluster: %v", err) | ||
} | ||
|
||
glog.Info("Saving provider components to the internal cluster") | ||
err = d.saveProviderComponentsToCluster(providerComponentsStoreFactory, d.kubeconfigOutput) | ||
err = d.saveProviderComponentsToCluster(providerComponentsStoreFactory, kubeconfigOutput) | ||
if err != nil { | ||
return fmt.Errorf("unable to save provider components to internal cluster: %v", err) | ||
} | ||
|
||
// For some reason, endpoint doesn't get updated in external cluster sometimes. So we | ||
// update the internal cluster endpoint as well to be sure. | ||
glog.Infof("Updating internal cluster object with master (%s) endpoint", master.Name) | ||
if err := d.updateClusterEndpoint(internalClient); err != nil { | ||
if err := d.updateClusterEndpoint(internalClient, provider); err != nil { | ||
return fmt.Errorf("unable to update internal cluster endpoint: %v", err) | ||
} | ||
|
||
|
@@ -189,7 +185,41 @@ func (d *ClusterDeployer) Create(cluster *clusterv1.Cluster, machines []*cluster | |
} | ||
} | ||
|
||
glog.Infof("Done provisioning cluster. You can now access your cluster with kubectl --kubeconfig %v", d.kubeconfigOutput) | ||
glog.Infof("Done provisioning cluster. You can now access your cluster with kubectl --kubeconfig %v", kubeconfigOutput) | ||
|
||
return nil | ||
} | ||
|
||
func (d *ClusterDeployer) Delete(internalClient ClusterClient) error { | ||
glog.Info("Creating external cluster") | ||
externalClient, cleanupExternalCluster, err := d.createExternalCluster() | ||
defer cleanupExternalCluster() | ||
if err != nil { | ||
return fmt.Errorf("could not create external cluster: %v", err) | ||
} | ||
defer closeClient(externalClient, "external") | ||
|
||
glog.Info("Applying Cluster API stack to external cluster") | ||
if err = d.applyClusterAPIStack(externalClient); err != nil { | ||
return fmt.Errorf("unable to apply cluster api stack to external cluster: %v", err) | ||
} | ||
|
||
glog.Info("Deleting Cluster API Provider Components from internal cluster") | ||
if err = internalClient.Delete(d.providerComponents); err != nil { | ||
glog.Infof("error while removing provider components from internal cluster: %v", err) | ||
glog.Infof("Continuing with a best effort delete") | ||
} | ||
|
||
glog.Info("Copying objects from internal cluster to external cluster") | ||
if err = pivot(internalClient, externalClient); err != nil { | ||
return fmt.Errorf("unable to copy objects from internal to external cluster: %v", err) | ||
} | ||
|
||
glog.Info("Deleting objects from external cluster") | ||
if err = deleteObjects(externalClient); err != nil { | ||
return fmt.Errorf("unable to finish deleting objects in external cluster, resources may have been leaked: %v", err) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is a pretty scary log line. Can you provide some guidance here as to what resources might have leaked? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's hard to build a generalized line of text here because the underlying resources are sort of provider specific. For example, in GCP when the cluster deletion fails it means that you may have 'leaked' a firewall rule. However, that would likely not be true for a vsphere implementation. Do you have any ideas on how to improve the messaging? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't have a good solution right now, but perhaps we could make a generic statement ("VMs, firewall rules, LB rules etc") for now? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure that is actually more clear because the resources in question are extremely specific to a given provider, for example, firewall rule is a GCP concept, in AWS there is "security groups", VMs might be safe, but in AWS land those would be called instances. When talking about load balancers, within AWS there are at least 3 different types of load balancers with different APIs, etc, (application, network, and classic). I see two options:
Do you think we should implement either of these or leave things how they are? |
||
} | ||
glog.Info("Deletion of cluster complete") | ||
|
||
return nil | ||
} | ||
|
@@ -216,39 +246,39 @@ func (d *ClusterDeployer) createExternalCluster() (ClusterClient, func(), error) | |
return externalClient, cleanupFn, nil | ||
} | ||
|
||
func (d *ClusterDeployer) createInternalCluster(externalClient ClusterClient) (ClusterClient, error) { | ||
func (d *ClusterDeployer) createInternalCluster(externalClient ClusterClient, provider ProviderDeployer, kubeconfigOutput string) (ClusterClient, error) { | ||
cluster, master, _, err := getClusterAPIObjects(externalClient) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
glog.V(1).Info("Getting internal cluster kubeconfig.") | ||
internalKubeconfig, err := waitForKubeconfigReady(d.provider, cluster, master) | ||
internalKubeconfig, err := waitForKubeconfigReady(provider, cluster, master) | ||
if err != nil { | ||
return nil, fmt.Errorf("unable to get internal cluster kubeconfig: %v", err) | ||
} | ||
|
||
err = d.writeKubeconfig(internalKubeconfig) | ||
if err != nil { | ||
if err = d.writeKubeconfig(internalKubeconfig, kubeconfigOutput); err != nil { | ||
return nil, err | ||
} | ||
|
||
internalClient, err := d.clientFactory.NewClusterClientFromKubeconfig(internalKubeconfig) | ||
if err != nil { | ||
return nil, fmt.Errorf("unable to create internal cluster client: %v", err) | ||
} | ||
|
||
return internalClient, nil | ||
} | ||
|
||
func (d *ClusterDeployer) updateClusterEndpoint(client ClusterClient) error { | ||
func (d *ClusterDeployer) updateClusterEndpoint(client ClusterClient, provider ProviderDeployer) error { | ||
// Update cluster endpoint. Needed till this logic moves into cluster controller. | ||
// TODO: https://github.com/kubernetes-sigs/cluster-api/issues/158 | ||
// Fetch fresh objects. | ||
cluster, master, _, err := getClusterAPIObjects(client) | ||
if err != nil { | ||
return err | ||
} | ||
masterIP, err := d.provider.GetIP(cluster, master) | ||
masterIP, err := provider.GetIP(cluster, master) | ||
if err != nil { | ||
return fmt.Errorf("unable to get master IP: %v", err) | ||
} | ||
|
@@ -329,10 +359,10 @@ func (d *ClusterDeployer) applyClusterAPIControllers(client ClusterClient) error | |
return client.Apply(d.providerComponents) | ||
} | ||
|
||
func (d *ClusterDeployer) writeKubeconfig(kubeconfig string) error { | ||
func (d *ClusterDeployer) writeKubeconfig(kubeconfig string, kubeconfigOutput string) error { | ||
const fileMode = 0666 | ||
os.Remove(d.kubeconfigOutput) | ||
return ioutil.WriteFile(d.kubeconfigOutput, []byte(kubeconfig), fileMode) | ||
os.Remove(kubeconfigOutput) | ||
return ioutil.WriteFile(kubeconfigOutput, []byte(kubeconfig), fileMode) | ||
} | ||
|
||
func waitForKubeconfigReady(provider ProviderDeployer, cluster *clusterv1.Cluster, machine *clusterv1.Machine) (string, error) { | ||
|
@@ -356,11 +386,11 @@ func waitForKubeconfigReady(provider ProviderDeployer, cluster *clusterv1.Cluste | |
|
||
func pivot(from, to ClusterClient) error { | ||
if err := from.WaitForClusterV1alpha1Ready(); err != nil { | ||
return fmt.Errorf("Cluster v1aplpha1 resource not ready on source cluster.") | ||
return fmt.Errorf("cluster v1alpha1 resource not ready on source cluster") | ||
} | ||
|
||
if err := to.WaitForClusterV1alpha1Ready(); err != nil { | ||
return fmt.Errorf("Cluster v1aplpha1 resource not ready on target cluster.") | ||
return fmt.Errorf("cluster v1alpha1 resource not ready on target cluster") | ||
} | ||
|
||
clusters, err := from.GetClusterObjects() | ||
|
@@ -371,13 +401,38 @@ func pivot(from, to ClusterClient) error { | |
for _, cluster := range clusters { | ||
// New objects cannot have a specified resource version. Clear it out. | ||
cluster.SetResourceVersion("") | ||
err = to.CreateClusterObject(cluster) | ||
if err != nil { | ||
return err | ||
if err = to.CreateClusterObject(cluster); err != nil { | ||
return fmt.Errorf("error moving Cluster '%v': %v", cluster.GetName(), err) | ||
} | ||
glog.Infof("Moved Cluster '%s'", cluster.GetName()) | ||
} | ||
|
||
fromDeployments, err := from.GetMachineDeploymentObjects() | ||
if err != nil { | ||
return err | ||
} | ||
for _, deployment := range fromDeployments { | ||
// New objects cannot have a specified resource version. Clear it out. | ||
deployment.SetResourceVersion("") | ||
if err = to.CreateMachineDeploymentObjects([]*clusterv1.MachineDeployment{deployment}); err != nil { | ||
return fmt.Errorf("error moving MachineDeployment '%v': %v", deployment.GetName(), err) | ||
} | ||
glog.Infof("Moved MachineDeployment %v", deployment.GetName()) | ||
} | ||
|
||
fromMachineSets, err := from.GetMachineSetObjects() | ||
if err != nil { | ||
return err | ||
} | ||
for _, machineSet := range fromMachineSets { | ||
// New objects cannot have a specified resource version. Clear it out. | ||
machineSet.SetResourceVersion("") | ||
if err := to.CreateMachineSetObjects([]*clusterv1.MachineSet{machineSet}); err != nil { | ||
return fmt.Errorf("error moving MachineSet '%v': %v", machineSet.GetName(), err) | ||
} | ||
glog.Infof("Moved MachineSet %v", machineSet.GetName()) | ||
} | ||
|
||
machines, err := from.GetMachineObjects() | ||
if err != nil { | ||
return err | ||
|
@@ -386,15 +441,42 @@ func pivot(from, to ClusterClient) error { | |
for _, machine := range machines { | ||
// New objects cannot have a specified resource version. Clear it out. | ||
machine.SetResourceVersion("") | ||
err = to.CreateMachineObjects([]*clusterv1.Machine{machine}) | ||
if err != nil { | ||
return err | ||
if err = to.CreateMachineObjects([]*clusterv1.Machine{machine}); err != nil { | ||
return fmt.Errorf("error moving Machine '%v': %v", machine.GetName(), err) | ||
} | ||
glog.Infof("Moved Machine '%s'", machine.GetName()) | ||
} | ||
return nil | ||
} | ||
|
||
func deleteObjects(client ClusterClient) error { | ||
var errors []string | ||
glog.Infof("Deleting machine deployments") | ||
if err := client.DeleteMachineDeploymentObjects(); err != nil { | ||
err = fmt.Errorf("error deleting machine deployments: %v", err) | ||
errors = append(errors, err.Error()) | ||
} | ||
glog.Infof("Deleting machine sets") | ||
if err := client.DeleteMachineSetObjects(); err != nil { | ||
err = fmt.Errorf("error deleting machine sets: %v", err) | ||
errors = append(errors, err.Error()) | ||
} | ||
glog.Infof("Deleting machines") | ||
if err := client.DeleteMachineObjects(); err != nil { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this safe considering some machines will be deleted by deletion of MachineSets? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's safe in the sense that we are simply calling the 'delete all' method in the cluster API. It is up to the cluster API to properly handle machines that already have a deletion in progress due to the fact that their parent object has been deleted. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's still not great because attempting to delete a machine twice (or three times) could spit a couple of error logs which would be misleading unless the user knows exactly how this code works, and exactly how MachineSets work. I think here, we should delete all Machines that are not in a MachineSet. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is no actual deletion going on here, there is just a call to the cluster-api "delete all" methods, i.e. delete all machines, delete all machinesets, delete all machine deployments. Those are asynchronous methods that don't actually do the deletion -- the deletion itself is eventually reconciled by the controllers. IN this case, the underlying deletion for machines & machine sets is done by the machine controller. I'm not convinced there actually is a problem or that the cluster API would do the wrong thing. Rather than put a lot of complicated deletion logic in clusterctl it seems better to have the controllers do the right thing. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I synced with @k4leung4 and he confirmed that there is not an issue here, there are a couple scenarios / points that we discussed:
Here is the
|
||
err = fmt.Errorf("error deleting machines: %v", err) | ||
errors = append(errors, err.Error()) | ||
} | ||
glog.Infof("Deleting clusters") | ||
if err := client.DeleteClusterObjects(); err != nil { | ||
err = fmt.Errorf("error deleting clusters: %v", err) | ||
errors = append(errors, err.Error()) | ||
} | ||
if len(errors) > 0 { | ||
return fmt.Errorf("error(s) encountered deleting objects from external cluster: [%v]", strings.Join(errors, ", ")) | ||
} | ||
return nil | ||
} | ||
|
||
func getClusterAPIObjects(client ClusterClient) (*clusterv1.Cluster, *clusterv1.Machine, []*clusterv1.Machine, error) { | ||
machines, err := client.GetMachineObjects() | ||
if err != nil { | ||
|
@@ -440,3 +522,9 @@ func containsMasterRole(roles []clustercommon.MachineRole) bool { | |
} | ||
return false | ||
} | ||
|
||
func closeClient(client ClusterClient, name string) { | ||
if err := client.Close(); err != nil { | ||
glog.Errorf("Could not close %v client: %v", name, err) | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What happens if the external cluster already exists?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I believe this will fail -- much like the CreateCluster command. However, I'll test this manually to make sure we understand the behavior and report back in this comment.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I just tried this and this is the behavior:
Given that we do nothing special in
kubectl create cluster
if there is an existing minikube cluster I'd prefer to punt on the problems as it is an existing issue and not a change in the architecture.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we skip the external cluster creation if the minikube cluster already exists (pretty much the minikube status result). Not needed in this PR, but an issue+TODO would do.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I created a less prescriptive issue -- I think there are some other possible solutions. Here it is: #413