diff --git a/deploy/virtual-cluster-host-port-cm.yaml b/deploy/virtual-cluster-host-port-cm.yaml new file mode 100644 index 000000000..8c2da3cce --- /dev/null +++ b/deploy/virtual-cluster-host-port-cm.yaml @@ -0,0 +1,22 @@ +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: kosmos-hostports + namespace: kosmos-system +data: + config.yaml: | + # ports allocate for virtual cluster api server,from 33001, increment by 1 for each virtual cluster.Be careful not to use ports that are already in use + portsPool: + - 33001 + - 33002 + - 33003 + - 33004 + - 33005 + - 33006 + - 33007 + - 33008 + - 33009 + - 33010 + # when port is allocate from pool,it will be used for virtual cluster api server,and the port will be released after virtual cluster is deleted + clusterPorts: \ No newline at end of file diff --git a/deploy/virtual-cluster-operator.yml b/deploy/virtual-cluster-operator.yml index 526a48690..3884a668f 100644 --- a/deploy/virtual-cluster-operator.yml +++ b/deploy/virtual-cluster-operator.yml @@ -51,287 +51,15 @@ metadata: name: virtual-cluster-operator namespace: kosmos-system data: + # Generate by script hack/k8s-in-k8s/generate_env.sh env.sh: | - #!/usr/bin/env bash - - SCRIPT_VERSION=0.0.1 - # save tmp file - PATH_FILE_TMP=/apps/conf/kosmos/tmp - ################################################### - # path for kubeadm - PATH_KUBEADM=/usr/bin/kubeadm - ################################################## - # path for kubeadm config - PATH_KUBEADM_CONFIG=/etc/kubeadm - ################################################## - # path for kubernetes - PATH_KUBERNETES=/etc/kubernetes/ - PATH_KUBERNETES_PKI="$PATH_KUBERNETES/pki" - # scpKCCmd.name - KUBELET_KUBE_CONFIG_NAME=kubelet.conf - ################################################## - # path for kubelet - PATH_KUBELET_LIB=/var/lib/kubelet - # scpKubeletConfigCmd.name - KUBELET_CONFIG_NAME=config.yaml + __env.sh__ + # Copy from hack/k8s-in-k8s/kubelet_node_helper.sh kubelet_node_helper.sh: | - #!/usr/bin/env bash - - source "env.sh" - - # args - DNS_ADDRESS=${2:-10.237.0.10} - LOG_NAME=${2:-kubelet} - JOIN_HOST=$2 - JOIN_TOKEN=$3 - JOIN_CA_HASH=$4 - - function unjoin() { - # before unjoin, you need delete node by kubectl - echo "exec(1/2): kubeadm reset...." - echo "y" | ${PATH_KUBEADM} reset - if [ $? -ne 0 ]; then - exit 1 - fi - - echo "exec(2/2): delete cni...." - if [ -d "/etc/cni/net.d" ]; then - mv /etc/cni/net.d '/etc/cni/net.d.back'`date +%Y_%m_%d_%H_%M_%S` - if [ $? -ne 0 ]; then - exit 1 - fi - fi - } - - function revert() { - if [ ! -f "$PATH_KUBEADM_CONFIG/kubeadm.cfg" ]; then - echo "exec(1/1): execure join cmd" - kubeadm join $JOIN_HOST --token $JOIN_TOKEN --discovery-token-ca-cert-hash $JOIN_CA_HASH - if [ $? -ne 0 ]; then - exit 1 - fi - exit 0 - fi - - echo "exec(1/3): update kubeadm.cfg..." - sed -e "s|token: .*$|token: $JOIN_TOKEN|g" -e "w $PATH_FILE_TMP/kubeadm.cfg.current" "$PATH_KUBEADM_CONFIG/kubeadm.cfg" - if [ $? -ne 0 ]; then - exit 1 - fi - - # add taints - echo "exec(2/3): update kubeadm.cfg tanits..." - sed -i "/kubeletExtraArgs/a \ register-with-taints: node.kosmos.io/unschedulable:NoSchedule" "$PATH_FILE_TMP/kubeadm.cfg.current" - if [ $? -ne 0 ]; then - exit 1 - fi - - echo "exec(3/3): execute join cmd...." - kubeadm join --config "$PATH_FILE_TMP/kubeadm.cfg.current" - if [ $? -ne 0 ]; then - exit 1 - fi - } - - # before join, you need upload ca.crt and kubeconfig to tmp dir!!! - function join() { - echo "exec(1/8): stop containerd...." - systemctl stop containerd - if [ $? -ne 0 ]; then - exit 1 - fi - echo "exec(2/8): copy ca.crt...." - cp "$PATH_FILE_TMP/ca.crt" "$PATH_KUBERNETES_PKI/ca.crt" - if [ $? -ne 0 ]; then - exit 1 - fi - echo "exec(3/8): copy kubeconfig...." - cp "$PATH_FILE_TMP/$KUBELET_KUBE_CONFIG_NAME" "$PATH_KUBERNETES/$KUBELET_KUBE_CONFIG_NAME" - if [ $? -ne 0 ]; then - exit 1 - fi - echo "exec(4/8): set core dns address...." - sed -e "s|__DNS_ADDRESS__|$DNS_ADDRESS|g" -e "w ${PATH_KUBELET_LIB}/${KUBELET_CONFIG_NAME}" "$PATH_FILE_TMP"/"$KUBELET_CONFIG_NAME" - if [ $? -ne 0 ]; then - exit 1 - fi - echo "exec(5/8): copy kubeadm-flags.env...." - cp "$PATH_FILE_TMP/kubeadm-flags.env" "$PATH_KUBELET_LIB/kubeadm-flags.env" - if [ $? -ne 0 ]; then - exit 1 - fi - echo "exec(6/8): start containerd" - systemctl start containerd - if [ $? -ne 0 ]; then - exit 1 - fi - - echo "exec(7/8): delete cni...." - if [ -d "/etc/cni/net.d" ]; then - mv /etc/cni/net.d '/etc/cni/net.d.back'`date +%Y_%m_%d_%H_%M_%S` - if [ $? -ne 0 ]; then - exit 1 - fi - fi - - echo "exec(8/8): start kubelet...." - systemctl start kubelet - if [ $? -ne 0 ]; then - exit 1 - fi - } - - function health() { - result=`systemctl is-active containerd` - if [[ $result != "active" ]]; then - echo "health(1/2): containerd is inactive" - exit 1 - else - echo "health(1/2): containerd is active" - fi - - result=`systemctl is-active kubelet` - if [[ $result != "active" ]]; then - echo "health(2/2): kubelet is inactive" - exit 1 - else - echo "health(2/2): containerd is active" - fi - } - - function log() { - systemctl status $LOG_NAME - } - - # check the environments - function check() { - echo "check(1/3): try to create $PATH_FILE_TMP" - if [ ! -d "$PATH_FILE_TMP" ]; then - mkdir -p "$PATH_FILE_TMP" - if [ $? -ne 0 ]; then - exit 1 - fi - fi - - echo "check(2/3): check dir: $PATH_KUBEADM_CONFIG" - if [ ! -d "$PATH_KUBEADM_CONFIG" ]; then - mkdir -p "$PATH_KUBEADM_CONFIG" - if [ $? -ne 0 ]; then - exit 1 - fi - - echo "--- - apiVersion: kubeadm.k8s.io/v1beta2 - discovery: - bootstrapToken: - apiServerEndpoint: apiserver.cluster.local:6443 - token: xxxxxxxx - unsafeSkipCAVerification: true - kind: JoinConfiguration - nodeRegistration: - criSocket: /run/containerd/containerd.sock - kubeletExtraArgs: - container-runtime: remote - container-runtime-endpoint: unix:///run/containerd/containerd.sock - taints: null" > $PATH_KUBEADM_CONFIG/kubeadm.cfg - - fi - - echo "check(3/3): copy kubeadm-flags.env to create $PATH_FILE_TMP , remove args[cloud-provider] and taints" - sed -e "s| --cloud-provider=external | |g" -e "w ${PATH_FILE_TMP}/kubeadm-flags.env" "$PATH_KUBELET_LIB/kubeadm-flags.env" - sed -i "s| --register-with-taints=node.kosmos.io/unschedulable:NoSchedule||g" "${PATH_FILE_TMP}/kubeadm-flags.env" - if [ $? -ne 0 ]; then - exit 1 - fi - - echo "environments is ok" - } - - function version() { - echo "$SCRIPT_VERSION" - } - - # See how we were called. - case "$1" in - unjoin) - unjoin - ;; - join) - join - ;; - health) - health - ;; - check) - check - ;; - log) - log - ;; - revert) - revert - ;; - version) - version - ;; - *) - echo $"usage: $0 unjoin|join|health|log|check|version|revert" - exit 1 - esac + __kubelet_node_helper__ + # Obtain through the command "kubectl get cm kubelet-config -nkube-system -oyaml", change dns address to `__DNS_ADDRESS__` config.yaml: | - apiVersion: kubelet.config.k8s.io/v1beta1 - authentication: - anonymous: - enabled: false - webhook: - cacheTTL: 0s - enabled: true - x509: - clientCAFile: /etc/kubernetes/pki/ca.crt - authorization: - mode: Webhook - webhook: - cacheAuthorizedTTL: 0s - cacheUnauthorizedTTL: 0s - cgroupDriver: cgroupfs - clusterDNS: - - __DNS_ADDRESS__ - clusterDomain: cluster.local - cpuManagerReconcilePeriod: 0s - evictionHard: - imagefs.available: 15% - memory.available: 100Mi - nodefs.available: 10% - nodefs.inodesFree: 5% - evictionPressureTransitionPeriod: 5m0s - fileCheckFrequency: 0s - healthzBindAddress: 127.0.0.1 - healthzPort: 10248 - httpCheckFrequency: 0s - imageMinimumGCAge: 0s - kind: KubeletConfiguration - kubeAPIBurst: 100 - kubeAPIQPS: 100 - kubeReserved: - cpu: 140m - memory: 1.80G - logging: - flushFrequency: 0 - options: - json: - infoBufferSize: "0" - verbosity: 0 - memorySwap: {} - nodeStatusReportFrequency: 0s - nodeStatusUpdateFrequency: 0s - rotateCertificates: true - runtimeRequestTimeout: 0s - shutdownGracePeriod: 0s - shutdownGracePeriodCriticalPods: 0s - staticPodPath: /etc/kubernetes/manifests - streamingConnectionIdleTimeout: 0s - syncFrequency: 0s - volumeStatsAggPeriod: 0s + __config.yaml__ --- apiVersion: apps/v1 kind: Deployment @@ -388,6 +116,12 @@ spec: # Enter the ip address of a master node - name: EXECTOR_HOST_MASTER_NODE_IP value: 192.168.0.1 + # env.sh KUBELET_CONFIG_NAME + - name: KUBELET_CONFIG_NAME + value: config.yaml + # env.sh KUBELET_KUBE_CONFIG_NAME + - name: KUBELET_KUBE_CONFIG_NAME + value: kubelet.conf # WEB_USER and WEB_PASS for generate token that can be used to access the node-agent - name: WEB_USER valueFrom: @@ -399,6 +133,8 @@ spec: secretKeyRef: name: node-agent-secret key: password + - name: EXECTOR_SHELL_NAME + value: kubelet_node_helper.sh volumeMounts: - name: credentials mountPath: /etc/virtual-cluster-operator @@ -513,25 +249,4 @@ type: kubernetes.io/basic-auth data: username: {{ .USERNAME }} password: {{ .PASSWORD }} ---- -apiVersion: v1 -kind: ConfigMap -metadata: - name: kosmos-hostports - namespace: kosmos-system -data: - config.yaml: | - # ports allocate for virtual cluster api server,from 33001, increment by 1 for each virtual cluster.Be careful not to use ports that are already in use - portsPool: - - 33001 - - 33002 - - 33003 - - 33004 - - 33005 - - 33006 - - 33007 - - 33008 - - 33009 - - 33010 - # when port is allocate from pool,it will be used for virtual cluster api server,and the port will be released after virtual cluster is deleted - clusterPorts: + diff --git a/hack/k8s-in-k8s/env.dev.sh b/hack/k8s-in-k8s/env.dev.sh deleted file mode 100644 index 96246634f..000000000 --- a/hack/k8s-in-k8s/env.dev.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/usr/bin/env bash - -SCRIPT_VERSION=0.0.1 -# save tmp file -PATH_FILE_TMP=/apps/conf/kosmos/tmp -################################################### -# path for kubeadm -PATH_KUBEADM=/usr/bin/kubeadm -################################################## -# path for kubeadm config -PATH_KUBEADM_CONFIG=/etc/kubeadm -################################################## -# path for kubernetes -PATH_KUBERNETES=/etc/kubernetes/ -PATH_KUBERNETES_PKI="$PATH_KUBERNETES/pki" -# scpKCCmd.name -KUBELET_KUBE_CONFIG_NAME=kubelet.conf -################################################## -# path for kubelet -PATH_KUBELET_LIB=/var/lib/kubelet -# scpKubeletConfigCmd.name -KUBELET_CONFIG_NAME=config.yaml \ No newline at end of file diff --git a/hack/k8s-in-k8s/env.sh b/hack/k8s-in-k8s/env.sh deleted file mode 100644 index 13fd54987..000000000 --- a/hack/k8s-in-k8s/env.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/usr/bin/env bash - -SCRIPT_VERSION=0.0.1 -# save tmp file -PATH_FILE_TMP=/apps/conf/kosmos/tmp -################################################### -# path for kubeadm -PATH_KUBEADM=/usr/bin/kubeadm -################################################## -# path for kubeadm config -PATH_KUBEADM_CONFIG=/etc/kubeadm -################################################## -# path for kubernetes -PATH_KUBERNETES=/apps/conf/kubernetes/ -PATH_KUBERNETES_PKI="$PATH_KUBERNETES/ssl" -# scpKCCmd.name -KUBELET_KUBE_CONFIG_NAME=kubelet.conf -################################################## -# path for kubelet -PATH_KUBELET_LIB=/var/lib/kubelet -# scpKubeletConfigCmd.name -KUBELET_CONFIG_NAME=config.yaml \ No newline at end of file diff --git a/hack/k8s-in-k8s/generate_env.sh b/hack/k8s-in-k8s/generate_env.sh new file mode 100644 index 000000000..c6d4bafe5 --- /dev/null +++ b/hack/k8s-in-k8s/generate_env.sh @@ -0,0 +1,141 @@ +#!/usr/bin/env bash + +# This script will generate an g.env.sh file, like the following: +# #!/usr/bin/env bash + +# # ##### +# # Generate by script generate_env.sh +# # ##### + +# SCRIPT_VERSION=0.0.1 +# # tmp dir of kosmos +# PATH_FILE_TMP=/apps/conf/kosmos/tmp +# ################################################## +# # path for kubeadm config +# PATH_KUBEADM_CONFIG=/etc/kubeadm +# ################################################## +# # path for kubernetes, from kubelet args --config +# PATH_KUBERNETES=/etc/kubernetes +# PATH_KUBERNETES_PKI=/etc/kubernetes/pki +# # name for kubelet kubeconfig file +# KUBELET_KUBE_CONFIG_NAME=kubelet.conf +# ################################################## +# # path for kubelet +# PATH_KUBELET_LIB=/var/lib/kubelet +# # path for kubelet +# PATH_KUBELET_CONF=/var/lib/kubelet +# # name for config file of kubelet +# KUBELET_CONFIG_NAME=config.yaml + +# function GenerateKubeadmConfig() { +# echo "--- +# apiVersion: kubeadm.k8s.io/v1beta2 +# discovery: +# bootstrapToken: +# apiServerEndpoint: apiserver.cluster.local:6443 +# token: $1 +# unsafeSkipCAVerification: true +# kind: JoinConfiguration +# nodeRegistration: +# criSocket: /run/containerd/containerd.sock +# kubeletExtraArgs: +# container-runtime: remote +# container-runtime-endpoint: unix:///run/containerd/containerd.sock +# taints: null" > $2/kubeadm.cfg.current +# } + + + + +SCRIPT_VERSION=0.0.1 +# save tmp file +PATH_FILE_TMP=/apps/conf/kosmos/tmp +# path for kubeadm config +PATH_KUBEADM_CONFIG=/etc/kubeadm +# path for kubelet lib +PATH_KUBELET_LIB=/var/lib/kubelet + + +function GetKubeletConfigFilePath() { + systemctl status kubelet | grep -o '\--config=[^ ]*' | awk -F= '{print $2}' +} + +function GetKubeletKubeConfigFilePath() { + systemctl status kubelet | grep -o '\--kubeconfig=[^ ]*' | awk -F= '{print $2}' +} + +function GetKubernetesCaPath() { + kubectl get cm kubelet-config -nkube-system -oyaml | awk '/clientCAFile:/{print $2}' +} + +function GetFileName() { + local fullpath="$1" + local filename=$(basename "$fullpath") + echo "$filename" +} + +function GetDirectory() { + local fullpath="$1" + local directory=$(dirname "$fullpath") + echo "$directory" +} + +# kubelet config name +KUBELET_CONFIG_NAME=$(GetFileName "$(GetKubeletConfigFilePath)") +# path for kubelet +PATH_KUBELET_CONF=$(GetDirectory "$(GetKubeletConfigFilePath)") +# kubelet kubeconfig file name +KUBELET_KUBE_CONFIG_NAME=$(GetFileName "$(GetKubeletKubeConfigFilePath)") + +# ca.crt path +PATH_KUBERNETES_PKI=$(GetDirectory "$(GetKubernetesCaPath)") +# length=${#PATH_KUBERNETES_PKI} +PATH_KUBERNETES=$(GetDirectory $PATH_KUBERNETES_PKI) + +echo "#!/usr/bin/env bash + +# ##### +# Generate by script generate_env.sh +# ##### + +SCRIPT_VERSION=$SCRIPT_VERSION +# tmp dir of kosmos +PATH_FILE_TMP=$PATH_FILE_TMP +################################################## +# path for kubeadm config +PATH_KUBEADM_CONFIG=$PATH_KUBEADM_CONFIG +################################################## +# path for kubernetes, from kubelet args --config +PATH_KUBERNETES=$PATH_KUBERNETES +PATH_KUBERNETES_PKI=$PATH_KUBERNETES_PKI +# name for kubelet kubeconfig file +KUBELET_KUBE_CONFIG_NAME=$KUBELET_KUBE_CONFIG_NAME +################################################## +# path for kubelet +PATH_KUBELET_LIB=$PATH_KUBELET_LIB +# path for kubelet +PATH_KUBELET_CONF=$PATH_KUBELET_CONF +# name for config file of kubelet +KUBELET_CONFIG_NAME=$KUBELET_CONFIG_NAME + +function GenerateKubeadmConfig() { + echo \"--- +apiVersion: kubeadm.k8s.io/v1beta2 +discovery: + bootstrapToken: + apiServerEndpoint: apiserver.cluster.local:6443 + token: \$1 + unsafeSkipCAVerification: true +kind: JoinConfiguration +nodeRegistration: + criSocket: /run/containerd/containerd.sock + kubeletExtraArgs: + container-runtime: remote + container-runtime-endpoint: unix:///run/containerd/containerd.sock + taints: null\" > \$2/kubeadm.cfg.current +} + +" > g.env.sh + + +cat g.env.sh \ No newline at end of file diff --git a/hack/k8s-in-k8s/kubelet_node_helper.sh b/hack/k8s-in-k8s/kubelet_node_helper.sh index ae08a2868..e70b204cd 100755 --- a/hack/k8s-in-k8s/kubelet_node_helper.sh +++ b/hack/k8s-in-k8s/kubelet_node_helper.sh @@ -12,14 +12,20 @@ JOIN_CA_HASH=$4 function unjoin() { # before unjoin, you need delete node by kubectl echo "exec(1/2): kubeadm reset...." - echo "y" | ${PATH_KUBEADM} reset + echo "y" | kubeadm reset if [ $? -ne 0 ]; then exit 1 fi - echo "exec(2/2): delete cni...." + echo "exec(2/3): restart cotnainerd...." + systemctl restart containerd + if [ $? -ne 0 ]; then + exit 1 + fi + + echo "exec(3/3): delete cni...." if [ -d "/etc/cni/net.d" ]; then - mv /etc/cni/net.d '/etc/cni/net.d.back'`date +%Y_%m_%d_%H_%M_%S` + mv /etc/cni/net.d '/etc/cni/net.d.kosmos.back'`date +%Y_%m_%d_%H_%M_%S` if [ $? -ne 0 ]; then exit 1 fi @@ -27,33 +33,32 @@ function unjoin() { } function revert() { + echo "exec(1/4): update kubeadm.cfg..." if [ ! -f "$PATH_KUBEADM_CONFIG/kubeadm.cfg" ]; then - echo "exec(1/1): execure join cmd" - kubeadm join $JOIN_HOST --token $JOIN_TOKEN --discovery-token-ca-cert-hash $JOIN_CA_HASH - if [ $? -ne 0 ]; then - exit 1 - fi - exit 0 - fi - - echo "exec(1/3): update kubeadm.cfg..." - sed -e "s|token: .*$|token: $JOIN_TOKEN|g" -e "w $PATH_FILE_TMP/kubeadm.cfg.current" "$PATH_KUBEADM_CONFIG/kubeadm.cfg" - if [ $? -ne 0 ]; then - exit 1 + GenerateKubeadmConfig $JOIN_TOKEN $PATH_FILE_TMP + else + sed -e "s|token: .*$|token: $JOIN_TOKEN|g" -e "w $PATH_FILE_TMP/kubeadm.cfg.current" "$PATH_KUBEADM_CONFIG/kubeadm.cfg" fi + # add taints - echo "exec(2/3): update kubeadm.cfg tanits..." + echo "exec(2/4): update kubeadm.cfg tanits..." sed -i "/kubeletExtraArgs/a \ register-with-taints: node.kosmos.io/unschedulable:NoSchedule" "$PATH_FILE_TMP/kubeadm.cfg.current" if [ $? -ne 0 ]; then exit 1 fi - echo "exec(3/3): execute join cmd...." + echo "exec(3/4): execute join cmd...." kubeadm join --config "$PATH_FILE_TMP/kubeadm.cfg.current" if [ $? -ne 0 ]; then exit 1 fi + + echo "exec(4/4): restart cotnainerd...." + systemctl restart containerd + if [ $? -ne 0 ]; then + exit 1 + fi } # before join, you need upload ca.crt and kubeconfig to tmp dir!!! @@ -74,7 +79,7 @@ function join() { exit 1 fi echo "exec(4/8): set core dns address...." - sed -e "s|__DNS_ADDRESS__|$DNS_ADDRESS|g" -e "w ${PATH_KUBELET_LIB}/${KUBELET_CONFIG_NAME}" "$PATH_FILE_TMP"/"$KUBELET_CONFIG_NAME" + sed -e "s|__DNS_ADDRESS__|$DNS_ADDRESS|g" -e "w ${PATH_KUBELET_CONF}/${KUBELET_CONFIG_NAME}" "$PATH_FILE_TMP"/"$KUBELET_CONFIG_NAME" if [ $? -ne 0 ]; then exit 1 fi @@ -83,13 +88,8 @@ function join() { if [ $? -ne 0 ]; then exit 1 fi - echo "exec(6/8): start containerd" - systemctl start containerd - if [ $? -ne 0 ]; then - exit 1 - fi - echo "exec(7/8): delete cni...." + echo "exec(6/8): delete cni...." if [ -d "/etc/cni/net.d" ]; then mv /etc/cni/net.d '/etc/cni/net.d.back'`date +%Y_%m_%d_%H_%M_%S` if [ $? -ne 0 ]; then @@ -97,6 +97,12 @@ function join() { fi fi + echo "exec(7/8): start containerd" + systemctl start containerd + if [ $? -ne 0 ]; then + exit 1 + fi + echo "exec(8/8): start kubelet...." systemctl start kubelet if [ $? -ne 0 ]; then @@ -128,39 +134,16 @@ function log() { # check the environments function check() { - echo "check(1/3): try to create $PATH_FILE_TMP" + # TODO: create env file + echo "check(1/2): try to create $PATH_FILE_TMP" if [ ! -d "$PATH_FILE_TMP" ]; then mkdir -p "$PATH_FILE_TMP" if [ $? -ne 0 ]; then exit 1 fi fi - - echo "check(2/3): check dir: $PATH_KUBEADM_CONFIG" - if [ ! -d "$PATH_KUBEADM_CONFIG" ]; then - mkdir -p "$PATH_KUBEADM_CONFIG" - if [ $? -ne 0 ]; then - exit 1 - fi - - echo "--- -apiVersion: kubeadm.k8s.io/v1beta2 -discovery: - bootstrapToken: - apiServerEndpoint: apiserver.cluster.local:6443 - token: xxxxxxxx - unsafeSkipCAVerification: true -kind: JoinConfiguration -nodeRegistration: - criSocket: /run/containerd/containerd.sock - kubeletExtraArgs: - container-runtime: remote - container-runtime-endpoint: unix:///run/containerd/containerd.sock - taints: null" > $PATH_KUBEADM_CONFIG/kubeadm.cfg - - fi - echo "check(3/3): copy kubeadm-flags.env to create $PATH_FILE_TMP , remove args[cloud-provider] and taints" + echo "check(2/2): copy kubeadm-flags.env to create $PATH_FILE_TMP , remove args[cloud-provider] and taints" sed -e "s| --cloud-provider=external | |g" -e "w ${PATH_FILE_TMP}/kubeadm-flags.env" "$PATH_KUBELET_LIB/kubeadm-flags.env" sed -i "s| --register-with-taints=node.kosmos.io/unschedulable:NoSchedule||g" "${PATH_FILE_TMP}/kubeadm-flags.env" if [ $? -ne 0 ]; then diff --git a/pkg/kubenest/controller/global.node.controller/global_node_controller.go b/pkg/kubenest/controller/global.node.controller/global_node_controller.go index 904b2b59a..d2b01c455 100644 --- a/pkg/kubenest/controller/global.node.controller/global_node_controller.go +++ b/pkg/kubenest/controller/global.node.controller/global_node_controller.go @@ -219,6 +219,15 @@ func (r *GlobalNodeController) Reconcile(ctx context.Context, request reconcile. // klog.V(4).Infof("sync state successed, %s", request.NamespacedName) // } + _, err := r.RootClientSet.CoreV1().Nodes().Get(ctx, globalNode.Name, metav1.GetOptions{}) + if err != nil { + if apierrors.IsNotFound(err) { + return reconcile.Result{}, nil + } + klog.Errorf("can not get root node: %s", globalNode.Name) + return reconcile.Result{RequeueAfter: utils.DefaultRequeueTime}, nil + } + if err := r.SyncLabel(ctx, &globalNode); err != nil { klog.Warningf("sync label %s error: %v", request.NamespacedName, err) return reconcile.Result{RequeueAfter: utils.DefaultRequeueTime}, nil diff --git a/pkg/kubenest/controller/virtualcluster.node.controller/env/env.go b/pkg/kubenest/controller/virtualcluster.node.controller/env/env.go index bd15bb7cb..52faead6d 100644 --- a/pkg/kubenest/controller/virtualcluster.node.controller/env/env.go +++ b/pkg/kubenest/controller/virtualcluster.node.controller/env/env.go @@ -17,6 +17,24 @@ func GetExectorTmpPath() string { return tmpPath } +func GetKubeletKubeConfigName() string { + kubeletKubeConfigName := os.Getenv("KUBELET_KUBE_CONFIG_NAME") + if len(kubeletKubeConfigName) == 0 { + // env.sh KUBELET_KUBE_CONFIG_NAME + kubeletKubeConfigName = "kubelet.conf" + } + return kubeletKubeConfigName +} + +func GetKubeletConfigName() string { + kubeletConfigName := os.Getenv("KUBELET_CONFIG_NAME") + if len(kubeletConfigName) == 0 { + // env.sh KUBELET_CONFIG_NAME + kubeletConfigName = "config.yaml" + } + return kubeletConfigName +} + func GetExectorWorkerDir() string { exectorWorkDir := os.Getenv("EXECTOR_WORKER_PATH") if len(exectorWorkDir) == 0 { diff --git a/pkg/kubenest/controller/virtualcluster.node.controller/node_controller.go b/pkg/kubenest/controller/virtualcluster.node.controller/node_controller.go index 33b588261..98a0d603d 100644 --- a/pkg/kubenest/controller/virtualcluster.node.controller/node_controller.go +++ b/pkg/kubenest/controller/virtualcluster.node.controller/node_controller.go @@ -286,6 +286,7 @@ func (r *NodeController) cleanGlobalNode(ctx context.Context, nodeInfos []v1alph VirtualCluster: virtualCluster, HostClient: r.Client, HostK8sClient: r.RootClientSet, + Opt: r.Options, // VirtualK8sClient: _, }); err != nil { return fmt.Errorf("unjoin node %s failed: %s", nodeInfo.Name, err) @@ -332,6 +333,7 @@ func (r *NodeController) unjoinNode(ctx context.Context, nodeInfos []v1alpha1.Gl HostClient: r.Client, HostK8sClient: r.RootClientSet, VirtualK8sClient: k8sClient, + Opt: r.Options, }); err != nil { return fmt.Errorf("unjoin node %s failed: %s", nodeInfo.Name, err) } diff --git a/pkg/kubenest/controller/virtualcluster.node.controller/workflow/task/task.go b/pkg/kubenest/controller/virtualcluster.node.controller/workflow/task/task.go index a6bb216a1..9db0c296a 100644 --- a/pkg/kubenest/controller/virtualcluster.node.controller/workflow/task/task.go +++ b/pkg/kubenest/controller/virtualcluster.node.controller/workflow/task/task.go @@ -85,12 +85,16 @@ func NewKubeadmResetTask() Task { } } +// nolint:dupl func NewDrainHostNodeTask() Task { return Task{ Name: "drain host node", Retry: true, Skip: func(ctx context.Context, opt TaskOpt) bool { - return opt.Opt.ForceDestroy + if opt.Opt != nil { + return opt.Opt.ForceDestroy + } + return false }, Run: func(ctx context.Context, to TaskOpt, _ interface{}) (interface{}, error) { targetNode, err := to.HostK8sClient.CoreV1().Nodes().Get(ctx, to.NodeInfo.Name, metav1.GetOptions{}) @@ -109,13 +113,17 @@ func NewDrainHostNodeTask() Task { } } +// nolint:dupl func NewDrainVirtualNodeTask() Task { return Task{ Name: "drain virtual-control-plane node", Retry: true, // ErrorIgnore: true, Skip: func(ctx context.Context, opt TaskOpt) bool { - return opt.Opt.ForceDestroy + if opt.Opt != nil { + return opt.Opt.ForceDestroy + } + return false }, Run: func(ctx context.Context, to TaskOpt, _ interface{}) (interface{}, error) { targetNode, err := to.VirtualK8sClient.CoreV1().Nodes().Get(ctx, to.NodeInfo.Name, metav1.GetOptions{}) @@ -203,7 +211,7 @@ func NewRemoteUpdateKubeletConfTask() Task { scpKCCmd := &exector.SCPExector{ DstFilePath: env.GetExectorTmpPath(), - DstFileName: "kubelet.conf", + DstFileName: env.GetKubeletKubeConfigName(), SrcByte: kubeconfig, } ret := exectHelper.DoExector(ctx.Done(), scpKCCmd) @@ -224,8 +232,8 @@ func NewRemoteUpdateConfigYamlTask() Task { scpKubeletConfigCmd := &exector.SCPExector{ DstFilePath: env.GetExectorTmpPath(), - DstFileName: "config.yaml", - SrcFile: env.GetExectorWorkerDir() + "config.yaml", // from configmap volumn + DstFileName: env.GetKubeletConfigName(), + SrcFile: env.GetExectorWorkerDir() + env.GetKubeletConfigName(), } ret := exectHelper.DoExector(ctx.Done(), scpKubeletConfigCmd) @@ -257,29 +265,64 @@ func NewRemoteNodeJoinTask() Task { } } -func NewWaitNodeReadyTask() Task { +func NewWaitNodeReadyTask(isHost bool) Task { return Task{ Name: "wait new node ready", Run: func(ctx context.Context, to TaskOpt, _ interface{}) (interface{}, error) { - waitCtx, cancel := context.WithTimeout(ctx, 60*time.Second) // total waiting time - defer cancel() - isReady := false - wait.UntilWithContext(waitCtx, func(ctx context.Context) { - node, err := to.VirtualK8sClient.CoreV1().Nodes().Get(waitCtx, to.NodeInfo.Name, metav1.GetOptions{}) - if err == nil { - if util.IsNodeReady(node.Status.Conditions) { - klog.V(4).Infof("node %s is ready", to.NodeInfo.Name) - isReady = true - cancel() + waitFunc := func() { + waitCtx, cancel := context.WithTimeout(ctx, 60*time.Second) // total waiting time + defer cancel() + wait.UntilWithContext(waitCtx, func(ctx context.Context) { + client := to.VirtualK8sClient + if isHost { + client = to.HostK8sClient + } + + node, err := client.CoreV1().Nodes().Get(waitCtx, to.NodeInfo.Name, metav1.GetOptions{}) + if err == nil { + if util.IsNodeReady(node.Status.Conditions) { + klog.V(4).Infof("node %s is ready", to.NodeInfo.Name) + isReady = true + cancel() + } else { + klog.V(4).Infof("node %s is not ready, status: %s", to.NodeInfo.Name, node.Status.Phase) + } } else { - klog.V(4).Infof("node %s is not ready, status: %s", to.NodeInfo.Name, node.Status.Phase) + klog.V(4).Infof("get node %s failed: %s", to.NodeInfo.Name, err) } - } else { - klog.V(4).Infof("get node %s failed: %s", to.NodeInfo.Name, err) - } - }, 10*time.Second) // Interval time + }, 10*time.Second) // Interval time + } + + waitFunc() + + if isReady { + return nil, nil + } + + // try to restart containerd and kubelet + klog.V(4).Infof("try to restart containerd and kubelet on node: %s", to.NodeInfo.Name) + exectHelper := exector.NewExectorHelper(to.NodeInfo.Spec.NodeIP, "") + + restartContainerdCmd := &exector.CMDExector{ + Cmd: "systemctl restart containerd", + } + ret := exectHelper.DoExector(ctx.Done(), restartContainerdCmd) + if ret.Status != exector.SUCCESS { + return nil, fmt.Errorf("cannot restart containerd: %s", ret.String()) + } + + restartKubeletCmd := &exector.CMDExector{ + Cmd: "systemctl restart kubelet", + } + ret = exectHelper.DoExector(ctx.Done(), restartKubeletCmd) + if ret.Status != exector.SUCCESS { + return nil, fmt.Errorf("cannot restart kubelet: %s", ret.String()) + } + + klog.V(4).Infof("wait for the node to be ready again. %s", to.NodeInfo.Name) + waitFunc() if isReady { return nil, nil @@ -446,6 +489,7 @@ func NewJoinNodeToHostCmd() Task { SubTasks: []Task{ NewGetJoinNodeToHostCmdTask(), NewExecJoinNodeToHostCmdTask(), + NewWaitNodeReadyTask(true), }, } } @@ -488,7 +532,7 @@ func NewExecJoinNodeToHostCmdTask() Task { return nil, err } joinCmd := &exector.CMDExector{ - Cmd: fmt.Sprintf("bash %s revert %s %s %s", env.GetExectorShellName(), host, token, certHash), + Cmd: fmt.Sprintf("bash %s revert %s %s %s", env.GetExectorShellName(), host, token, certHash), } exectHelper := exector.NewExectorHelper(to.NodeInfo.Spec.NodeIP, "") diff --git a/pkg/kubenest/controller/virtualcluster.node.controller/workflow/workflow.go b/pkg/kubenest/controller/virtualcluster.node.controller/workflow/workflow.go index 3c550ee0d..49a663f5e 100644 --- a/pkg/kubenest/controller/virtualcluster.node.controller/workflow/workflow.go +++ b/pkg/kubenest/controller/virtualcluster.node.controller/workflow/workflow.go @@ -46,14 +46,14 @@ func RunWithRetry(ctx context.Context, task task.Task, opt task.TaskOpt, preArgs func (w WorkflowData) RunTask(ctx context.Context, opt task.TaskOpt) error { var args interface{} for i, t := range w.Tasks { - klog.V(4).Infof("HHHHHHHHHHHH (%d/%d) work flow run t %s HHHHHHHHHHHH", i+1, len(w.Tasks), t.Name) + klog.V(4).Infof("HHHHHHHHHHHH (%d/%d) work flow run task %s HHHHHHHHHHHH", i+1, len(w.Tasks), t.Name) if t.Skip != nil && t.Skip(ctx, opt) { klog.V(4).Infof("work flow skip task %s", t.Name) continue } if len(t.SubTasks) > 0 { for j, subTask := range t.SubTasks { - klog.V(4).Infof("HHHHHHHHHHHH (%d/%d) work flow run sub t %s HHHHHHHHHHHH", j+1, len(t.SubTasks), subTask.Name) + klog.V(4).Infof("HHHHHHHHHHHH (%d/%d) work flow run sub task %s HHHHHHHHHHHH", j+1, len(t.SubTasks), subTask.Name) if t.Skip != nil && t.Skip(ctx, opt) { klog.V(4).Infof("work flow skip sub task %s", t.Name) continue @@ -86,7 +86,7 @@ func NewJoinWorkFlow() WorkflowData { task.NewRemoteUpdateKubeletConfTask(), task.NewRemoteUpdateConfigYamlTask(), task.NewRemoteNodeJoinTask(), - task.NewWaitNodeReadyTask(), + task.NewWaitNodeReadyTask(false), task.NewUpdateVirtualNodeLabelsTask(), task.NewUpdateNodePoolItemStatusTask(v1alpha1.NodeInUse, false), }