Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

✨ Add support for kube-vip #320

Merged
merged 27 commits into from
May 17, 2022
Merged
Show file tree
Hide file tree
Changes from 22 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
ecc3976
init
davidspek Apr 8, 2022
843f036
run make generate
davidspek Apr 8, 2022
5d41478
don't have CCM manage the control plane EIP
davidspek Apr 8, 2022
bf68262
fix linting errors
davidspek Apr 8, 2022
9751345
remove comments from template
davidspek Apr 8, 2022
be61a92
Add facility to CCM config so EIPs can be created for LoadBalancer se…
davidspek Apr 8, 2022
d95b01a
pin kube-vip version
davidspek Apr 8, 2022
088bbf9
fix hardcoded project id and make kube-vip version configurable
davidspek Apr 8, 2022
a6c222c
Clean up scripting, fix awk, add ip routes for IBX datacenters
cprivitere Apr 8, 2022
40c7de0
remove caching of metadata where it isn't used
cprivitere Apr 8, 2022
b68c838
Remove echo used for debuggin
cprivitere Apr 8, 2022
a0fc851
Adding a space for readability.
cprivitere Apr 8, 2022
b05171b
Have kubeadm ignore manifests directory already existing.
cprivitere Apr 8, 2022
7d7c0b7
run make generate
davidspek Apr 8, 2022
f3f5d78
revert to non-kubevip state
cprivitere Apr 8, 2022
857424f
split kube-vip version to separate template
cprivitere Apr 8, 2022
277e4a4
run make generate
cprivitere Apr 8, 2022
d1b8a2b
Generate kube-vip template via kustomize
cprivitere Apr 8, 2022
d552aad
Add ignore preflight errors to base template as it applies to all clu…
cprivitere Apr 8, 2022
ae0ed29
Update templates to set up BGP routes in IBX datacenters, update CPEM…
cprivitere Apr 8, 2022
f1fb7a9
Further refinements to the templates. bgp sections are only for kube-…
cprivitere Apr 8, 2022
84f9d04
make EIP management configurable
davidspek Apr 11, 2022
4509155
add suggested changes
davidspek Apr 21, 2022
1e09a52
run make generate
davidspek May 2, 2022
467fadc
fix lint error
davidspek May 3, 2022
98c29cd
remove services from kube-vip config
davidspek May 4, 2022
b0cff13
Convert to having the EIP_MANAGEMENT variable as part of the packetcl…
cprivitere May 16, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,7 @@ generate: ## Generate code

.PHONY: generate-templates
generate-templates: $(KUSTOMIZE) ## Generate cluster templates
$(KUSTOMIZE) build templates/experimental-kube-vip --load-restrictor LoadRestrictionsNone > templates/cluster-template-kube-vip.yaml
$(KUSTOMIZE) build templates/experimental-crs-cni --load-restrictor LoadRestrictionsNone > templates/cluster-template-crs-cni.yaml
$(KUSTOMIZE) build templates/addons/calico > templates/addons/calico.yaml

Expand Down
7 changes: 7 additions & 0 deletions config/default/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: manager-config
namespace: system
data:
EIP_MANAGEMENT: "${EIP_MANAGEMENT:=CPEM}"
3 changes: 2 additions & 1 deletion config/default/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ commonLabels:

resources:
- namespace.yaml
- config.yaml
- credentials.yaml

bases:
Expand All @@ -26,7 +27,7 @@ patchesStrategicMerge:
- manager_pull_policy.yaml
- manager_webhook_patch.yaml
- webhookcainjection_patch.yaml
- manager_credentials_patch.yaml
- manager_credentials_config_patch.yaml

vars:
- name: CERTIFICATE_NAMESPACE # namespace of the certificate CR
Expand Down
2 changes: 2 additions & 0 deletions config/default/kustomizeconfig.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,5 @@
varReference:
- kind: Deployment
path: spec/template/spec/volumes/secret/secretName
- kind: Deployment
path: spec/template/spec/volumes/configMap/configMapName
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,5 @@ spec:
envFrom:
- secretRef:
name: manager-api-credentials
- configMapRef:
name: manager-config
8 changes: 8 additions & 0 deletions controllers/packetcluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package controllers
import (
"context"
"errors"
"os"

apierrors "k8s.io/apimachinery/pkg/api/errors"
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
Expand Down Expand Up @@ -137,6 +138,13 @@ func (r *PacketClusterReconciler) reconcileNormal(ctx context.Context, clusterSc
}
}

if os.Getenv("EIP_MANAGEMENT") == "KUBE_VIP" {
if err := r.PacketClient.EnableProjectBGP(packetCluster.Spec.ProjectID); err != nil {
log.Error(err, "error enabling bgp for project")
return ctrl.Result{}, err
}
}

clusterScope.PacketCluster.Status.Ready = true
conditions.MarkTrue(packetCluster, infrav1.NetworkInfrastructureReadyCondition)

Expand Down
55 changes: 31 additions & 24 deletions controllers/packetmachine_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"errors"
"fmt"
"net/http"
"os"
"strings"
"time"

Expand Down Expand Up @@ -222,7 +223,7 @@ func (r *PacketMachineReconciler) PacketClusterToPacketMachines(ctx context.Cont
}
}

func (r *PacketMachineReconciler) reconcile(ctx context.Context, machineScope *scope.MachineScope) (ctrl.Result, error) { //nolint:gocyclo
func (r *PacketMachineReconciler) reconcile(ctx context.Context, machineScope *scope.MachineScope) (ctrl.Result, error) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
func (r *PacketMachineReconciler) reconcile(ctx context.Context, machineScope *scope.MachineScope) (ctrl.Result, error) {
func (r *PacketMachineReconciler) reconcile(ctx context.Context, machineScope *scope.MachineScope) (ctrl.Result, error) { //nolint:gocyclo

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we need to add this nolint comment back in.

log := ctrl.LoggerFrom(ctx, "machine", machineScope.Machine.Name, "cluster", machineScope.Cluster.Name)
log.Info("Reconciling PacketMachine")

Expand Down Expand Up @@ -316,21 +317,21 @@ func (r *PacketMachineReconciler) reconcile(ctx context.Context, machineScope *s
ExtraTags: packet.DefaultCreateTags(machineScope.Namespace(), machineScope.Machine.Name, machineScope.Cluster.Name),
}

// TODO: see if this can be removed with kube-vip in place
// when the node is a control plan we should check if the elastic ip
// for this cluster is not assigned. If it is free we can prepare the
// current node to use it.
// when the node is a control plan we need the elastic IP
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// when the node is a control plan we need the elastic IP
// when a node is a control plane node we need the elastic IP

// to template out the kube-vip deployment
if machineScope.IsControlPlane() {
controlPlaneEndpoint, _ = r.PacketClient.GetIPByClusterIdentifier(
machineScope.Cluster.Namespace,
machineScope.Cluster.Name,
machineScope.PacketCluster.Spec.ProjectID)
if len(controlPlaneEndpoint.Assignments) == 0 {
a := corev1.NodeAddress{
Type: corev1.NodeExternalIP,
Address: controlPlaneEndpoint.Address,
if os.Getenv("EIP_MANAGEMENT") == "CPEM" {
if len(controlPlaneEndpoint.Assignments) == 0 {
a := corev1.NodeAddress{
Type: corev1.NodeExternalIP,
Address: controlPlaneEndpoint.Address,
}
addrs = append(addrs, a)
}
addrs = append(addrs, a)
}
createDeviceReq.ControlPlaneEndpoint = controlPlaneEndpoint.Address
}
Expand Down Expand Up @@ -362,6 +363,13 @@ func (r *PacketMachineReconciler) reconcile(ctx context.Context, machineScope *s
machineScope.SetProviderID(dev.ID)
machineScope.SetInstanceStatus(infrav1.PacketResourceStatus(dev.State))

if os.Getenv("EIP_MANAGEMENT") == "KUBE_VIP" {
if err := r.PacketClient.EnsureNodeBGPEnabled(dev.ID); err != nil {
// Do not treat an error enabling bgp on machine as fatal
return ctrl.Result{RequeueAfter: time.Second * 20}, fmt.Errorf("failed to enable bpg on machine %s: %w", machineScope.Name(), err)
}
}

deviceAddr := r.PacketClient.GetDeviceAddresses(dev)
machineScope.SetAddresses(append(addrs, deviceAddr...))

Expand All @@ -376,22 +384,21 @@ func (r *PacketMachineReconciler) reconcile(ctx context.Context, machineScope *s
case infrav1.PacketResourceStatusRunning:
log.Info("Machine instance is active", "instance-id", machineScope.GetInstanceID())

// TODO: see if this can be removed with kube-vip in place
// This logic is here because an elastic ip can be assigned only an
// active node. It needs to be a control plane and the IP should not be
// assigned to anything at this point.
controlPlaneEndpoint, _ = r.PacketClient.GetIPByClusterIdentifier(
machineScope.Cluster.Namespace,
machineScope.Cluster.Name,
machineScope.PacketCluster.Spec.ProjectID)
if len(controlPlaneEndpoint.Assignments) == 0 && machineScope.IsControlPlane() {
if _, _, err := r.PacketClient.DeviceIPs.Assign(dev.ID, &packngo.AddressStruct{
Address: controlPlaneEndpoint.Address,
}); err != nil {
log.Error(err, "err assigining elastic ip to control plane. retrying...")
return ctrl.Result{RequeueAfter: time.Second * 20}, nil
if os.Getenv("EIP_MANAGEMENT") == "CPEM" {
controlPlaneEndpoint, _ = r.PacketClient.GetIPByClusterIdentifier(
machineScope.Cluster.Namespace,
machineScope.Cluster.Name,
machineScope.PacketCluster.Spec.ProjectID)
if len(controlPlaneEndpoint.Assignments) == 0 && machineScope.IsControlPlane() {
if _, _, err := r.PacketClient.DeviceIPs.Assign(dev.ID, &packngo.AddressStruct{
Address: controlPlaneEndpoint.Address,
}); err != nil {
log.Error(err, "err assigining elastic ip to control plane. retrying...")
return ctrl.Result{RequeueAfter: time.Second * 20}, nil
}
}
}

machineScope.SetReady()
conditions.MarkTrue(machineScope.PacketMachine, infrav1.DeviceReadyCondition)

Expand Down
66 changes: 66 additions & 0 deletions pkg/cloud/packet/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"net"
"net/http"
"os"
"strconv"
"strings"
"text/template"

Expand All @@ -38,6 +39,9 @@ const (
apiTokenVarName = "PACKET_API_KEY" //nolint:gosec
clientName = "CAPP-v1beta1"
ipxeOS = "custom_ipxe"
envVarLocalASN = "METAL_LOCAL_ASN"
envVarBGPPass = "METAL_BGP_PASS" //nolint:gosec
DefaultLocalASN = 65000
)

var (
Expand Down Expand Up @@ -230,6 +234,68 @@ func (p *Client) CreateIP(namespace, clusterName, projectID, facility string) (n
return ip, nil
}

// enableBGP enable bgp on the project
func (p *Client) EnableProjectBGP(projectID string) error {
// first check if it is enabled before trying to create it
bgpConfig, _, err := p.BGPConfig.Get(projectID, &packngo.GetOptions{})
// if we already have a config, just return
// we need some extra handling logic because the API always returns 200, even if
// not BGP config is in place.
// We treat it as valid config already exists only if ALL of the above is true:
// - no error
// - bgpConfig struct exists
// - bgpConfig struct has non-blank ID
// - bgpConfig struct does not have Status=="disabled"
if err == nil && bgpConfig != nil && bgpConfig.ID != "" && strings.ToLower(bgpConfig.Status) != "disabled" {
return nil
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What will we do if the BGPConfig Get request fails? Invalid project or token, timeout, API availability issues?
Let's handle err != nil here. We should log at the very least.

If BGP can not be enabled, we can return that error and this error will cascade through the reconciliation loop and we will await the next reconciliation loop to attempt to enable BGP. That sounds good. The log messages will be helpful if the resource can not resolve because of this.


// get the local ASN
localASN := os.Getenv(envVarLocalASN)
var outLocalASN int
switch {
case localASN != "":
localASNNo, err := strconv.Atoi(localASN)
if err != nil {
return fmt.Errorf("env var %s must be a number, was %s: %w", envVarLocalASN, localASN, err)
}
outLocalASN = localASNNo
default:
outLocalASN = DefaultLocalASN
}

var outBGPPass string
bgpPass := os.Getenv(envVarBGPPass)
if bgpPass != "" {
outBGPPass = bgpPass
}

// we did not have a valid one, so create it
req := packngo.CreateBGPConfigRequest{
Asn: outLocalASN,
Md5: outBGPPass,
DeploymentType: "local",
UseCase: "kubernetes-load-balancer",
}
_, err = p.BGPConfig.Create(projectID, req)
return err
}

// ensureNodeBGPEnabled check if the node has bgp enabled, and set it if it does not
func (p *Client) EnsureNodeBGPEnabled(id string) error {
// fortunately, this is idempotent, so just create
req := packngo.CreateBGPSessionRequest{
AddressFamily: "ipv4",
}
_, response, err := p.BGPSessions.Create(id, req)
// if we already had one, then we can ignore the error
// this really should be a 409, but 422 is what is returned
if response.StatusCode == 422 && strings.Contains(fmt.Sprintf("%s", err), "already has session") {
err = nil
}
return err
}

func (p *Client) GetIPByClusterIdentifier(namespace, name, projectID string) (packngo.IPAddressReservation, error) {
var err error
var reservedIP packngo.IPAddressReservation
Expand Down
71 changes: 31 additions & 40 deletions templates/cluster-template-crs-cni.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,34 +30,30 @@ spec:
cloud-provider: external
provider-id: equinixmetal://{{ `{{ v1.instance_id }}` }}
preKubeadmCommands:
- sed -ri '/\sswap\s/s/^#?/#/' /etc/fstab
- swapoff -a
- mount -a
- |
sed -ri '/\sswap\s/s/^#?/#/' /etc/fstab
swapoff -a
mount -a
cat <<EOF > /etc/modules-load.d/containerd.conf
overlay
br_netfilter
EOF
- modprobe overlay
- modprobe br_netfilter
- |
modprobe overlay
modprobe br_netfilter
cat <<EOF > /etc/sysctl.d/99-kubernetes-cri.conf
net.bridge.bridge-nf-call-iptables = 1
net.ipv4.ip_forward = 1
net.bridge.bridge-nf-call-ip6tables = 1
EOF
- sysctl --system
- apt-get -y update
- DEBIAN_FRONTEND=noninteractive apt-get install -y apt-transport-https curl
- curl -s https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add -
- echo "deb https://apt.kubernetes.io/ kubernetes-xenial main" > /etc/apt/sources.list.d/kubernetes.list
- apt-get update -y
- TRIMMED_KUBERNETES_VERSION=$(echo {{ .kubernetesVersion }} | sed 's/\./\\./g' | sed 's/^v//')
- RESOLVED_KUBERNETES_VERSION=$(apt-cache policy kubelet | awk -v VERSION=$${TRIMMED_KUBERNETES_VERSION} '$1~ VERSION { print $1 }' | head -n1)
- apt-get install -y ca-certificates socat jq ebtables apt-transport-https cloud-utils prips containerd kubelet=$${RESOLVED_KUBERNETES_VERSION} kubeadm=$${RESOLVED_KUBERNETES_VERSION} kubectl=$${RESOLVED_KUBERNETES_VERSION}
- systemctl daemon-reload
- systemctl enable containerd
- systemctl start containerd
sysctl --system
apt-get -y update
DEBIAN_FRONTEND=noninteractive apt-get install -y apt-transport-https ca-certificates curl gnupg lsb-release linux-generic jq
curl -s https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add -
echo "deb https://apt.kubernetes.io/ kubernetes-xenial main" > /etc/apt/sources.list.d/kubernetes.list
apt-get update -y
TRIMMED_KUBERNETES_VERSION=$(echo {{ .kubernetesVersion }} | sed 's/\./\\\\./g' | sed 's/^v//')
RESOLVED_KUBERNETES_VERSION=$(apt-cache policy kubelet | awk -v VERSION=$${TRIMMED_KUBERNETES_VERSION} '$1~ VERSION { print $1 }' | head -n1)
DEBIAN_FRONTEND=noninteractive apt-get install -y containerd kubelet=$${RESOLVED_KUBERNETES_VERSION} kubeadm=$${RESOLVED_KUBERNETES_VERSION} kubectl=$${RESOLVED_KUBERNETES_VERSION}
---
apiVersion: cluster.x-k8s.io/v1beta1
kind: Cluster
Expand Down Expand Up @@ -134,7 +130,8 @@ spec:
provider-id: equinixmetal://{{ `{{ v1.instance_id }}` }}
joinConfiguration:
nodeRegistration:
ignorePreflightErrors: []
ignorePreflightErrors:
- DirAvailable--etc-kubernetes-manifests
kubeletExtraArgs:
cloud-provider: external
provider-id: equinixmetal://{{ `{{ v1.instance_id }}` }}
Expand All @@ -146,8 +143,6 @@ spec:
address {{ .controlPlaneEndpoint }}
netmask 255.255.255.255
EOF
- systemctl restart networking
- |
if [ -f "/run/kubeadm/kubeadm.yaml" ]; then
export KUBECONFIG=/etc/kubernetes/admin.conf
export CPEM_YAML=https://github.com/equinix/cloud-provider-equinix-metal/releases/download/v3.4.0/deployment.yaml
Expand All @@ -156,35 +151,31 @@ spec:
kubectl apply -f $${CPEM_YAML} || (sleep 1 && kubectl apply -f $${CPEM_YAML}) || (sleep 1 && kubectl apply -f $${CPEM_YAML})
fi
preKubeadmCommands:
- sed -ri '/\sswap\s/s/^#?/#/' /etc/fstab
- swapoff -a
- mount -a
- |
sed -ri '/\sswap\s/s/^#?/#/' /etc/fstab
swapoff -a
mount -a
cat <<EOF > /etc/modules-load.d/containerd.conf
overlay
br_netfilter
EOF
- modprobe overlay
- modprobe br_netfilter
- |
modprobe overlay
modprobe br_netfilter
cat <<EOF > /etc/sysctl.d/99-kubernetes-cri.conf
net.bridge.bridge-nf-call-iptables = 1
net.ipv4.ip_forward = 1
net.bridge.bridge-nf-call-ip6tables = 1
EOF
- sysctl --system
- apt-get -y update
- DEBIAN_FRONTEND=noninteractive apt-get install -y apt-transport-https curl
- curl -s https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add -
- echo "deb https://apt.kubernetes.io/ kubernetes-xenial main" > /etc/apt/sources.list.d/kubernetes.list
- apt-get update -y
- TRIMMED_KUBERNETES_VERSION=$(echo {{ .kubernetesVersion }} | sed 's/\./\\./g' | sed 's/^v//')
- RESOLVED_KUBERNETES_VERSION=$(apt-cache policy kubelet | awk -v VERSION=$${TRIMMED_KUBERNETES_VERSION} '$1~ VERSION { print $1 }' | head -n1)
- apt-get install -y ca-certificates socat jq ebtables apt-transport-https cloud-utils prips containerd kubelet=$${RESOLVED_KUBERNETES_VERSION} kubeadm=$${RESOLVED_KUBERNETES_VERSION} kubectl=$${RESOLVED_KUBERNETES_VERSION}
- systemctl daemon-reload
- systemctl enable containerd
- systemctl start containerd
- ping -c 3 -q {{ .controlPlaneEndpoint }} && echo OK || ip addr add {{ .controlPlaneEndpoint }} dev lo
sysctl --system
apt-get -y update
DEBIAN_FRONTEND=noninteractive apt-get install -y apt-transport-https ca-certificates curl gnupg lsb-release linux-generic jq
curl -s https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add -
echo "deb https://apt.kubernetes.io/ kubernetes-xenial main" > /etc/apt/sources.list.d/kubernetes.list
apt-get update -y
TRIMMED_KUBERNETES_VERSION=$(echo {{ .kubernetesVersion }} | sed 's/\./\\\\./g' | sed 's/^v//')
RESOLVED_KUBERNETES_VERSION=$(apt-cache madison kubelet | awk -v VERSION=$${TRIMMED_KUBERNETES_VERSION} '$3~ VERSION { print $3 }' | head -n1)
DEBIAN_FRONTEND=noninteractive apt-get install -y containerd kubelet=$${RESOLVED_KUBERNETES_VERSION} kubeadm=$${RESOLVED_KUBERNETES_VERSION} kubectl=$${RESOLVED_KUBERNETES_VERSION}
ping -c 3 -q {{ .controlPlaneEndpoint }} && echo OK || ip addr add {{ .controlPlaneEndpoint }} dev lo
machineTemplate:
infrastructureRef:
apiVersion: infrastructure.cluster.x-k8s.io/v1beta1
Expand Down
Loading