From 25d0fee488712d568f2ceae8f940398ca9c1d928 Mon Sep 17 00:00:00 2001 From: Anand Kumar Date: Tue, 6 Sep 2022 17:08:00 +0530 Subject: [PATCH] Run cleanup before running antrea-agent service Before running the antrea-agent, validate whether ovs configuration is in good state. If there is an error, cleanup ovs configuration and then start antrea-agent. Also fix typo and style issues. Also update documentation. Fixes #4122 Signed-off-by: Anand Kumar --- docs/external-node.md | 71 +++++----------- hack/externalnode/install-vm.ps1 | 138 +++++++++++++++++++++++-------- hack/externalnode/install-vm.sh | 104 +++++++++++++++++------ 3 files changed, 202 insertions(+), 111 deletions(-) diff --git a/docs/external-node.md b/docs/external-node.md index eb64f28e96d..22c9ed64806 100644 --- a/docs/external-node.md +++ b/docs/external-node.md @@ -25,7 +25,6 @@ - [Non-IP packet](#non-ip-packet) - [IP packet](#ip-packet) - [Limitations](#limitations) -- [Known issues](#known-issues) ## What is ExternalNode? @@ -195,21 +194,23 @@ spec: change `vm-ns` to the right Namespace. ```bash - kubectl apply -f https://raw.githubusercontent.com/antrea-io/antrea/feature/externalnode/build/yamls/externalnode/vm-agent-rbac.yml + kubectl apply -f https://raw.githubusercontent.com/antrea-io/antrea/main/build/yamls/externalnode/vm-agent-rbac.yml ``` 4. Create `antrea-agent.kubeconfig` file for `antrea-agent` to access the K8S API server. ```bash - export CLUSTER_NAME="kubernetes" - export SERVICE_ACCOUNT="vm-agent" + CLUSTER_NAME="kubernetes" + SERVICE_ACCOUNT="vm-agent" + NAMESPACE="vm-ns" + KUBECONFIG="antrea-agent.kubeconfig" APISERVER=$(kubectl config view -o jsonpath="{.clusters[?(@.name==\"$CLUSTER_NAME\")].cluster.server}") - TOKEN=$(kubectl -n vm-ns get secrets -o jsonpath="{.items[?(@.metadata.annotations['kubernetes\.io/service-account\.name']=='$SERVICE_ACCOUNT')].data.token}"|base64 --decode) - kubectl config --kubeconfig=antrea-agent.kubeconfig set-cluster $CLUSTER_NAME --server=$APISERVER --insecure-skip-tls-verify=true - kubectl config --kubeconfig=antrea-agent.kubeconfig set-credentials antrea-agent --token=$TOKEN - kubectl config --kubeconfig=antrea-agent.kubeconfig set-context antrea-agent@$CLUSTER_NAME --cluster=$CLUSTER_NAME --user=antrea-agent - kubectl config --kubeconfig=antrea-agent.kubeconfig use-context antrea-agent@$CLUSTER_NAME + TOKEN=$(kubectl -n $NAMESPACE get secrets -o jsonpath="{.items[?(@.metadata.annotations['kubernetes\.io/service-account\.name']=='$SERVICE_ACCOUNT')].data.token}"|base64 --decode) + kubectl config --kubeconfig=$KUBECONFIG set-cluster $CLUSTER_NAME --server=$APISERVER --insecure-skip-tls-verify=true + kubectl config --kubeconfig=$KUBECONFIG set-credentials antrea-agent --token=$TOKEN + kubectl config --kubeconfig=$KUBECONFIG set-context antrea-agent@$CLUSTER_NAME --cluster=$CLUSTER_NAME --user=antrea-agent + kubectl config --kubeconfig=$KUBECONFIG use-context antrea-agent@$CLUSTER_NAME # Copy antrea-agent.kubeconfig to the VM ``` @@ -219,13 +220,15 @@ spec: ```bash # Specify the antrea-controller API server endpoint. Antrea-Controller needs # to be exposed via the Node IP or a public IP that is reachable from the VM - export ANTREA_API_SERVER="https://172.18.0.1:443" - export ANTREA_CLUSTER_NAME="antrea" - TOKEN=$(kubectl -n vm-ns get secrets -o jsonpath="{.items[?(@.metadata.annotations['kubernetes\.io/service-account\.name']=='$SERVICE_ACCOUNT')].data.token}"|base64 --decode) - kubectl config --kubeconfig=antrea-agent.antrea.kubeconfig set-cluster $ANTREA_CLUSTER_NAME --server=$ANTREA_API_SERVER --insecure-skip-tls-verify=true - kubectl config --kubeconfig=antrea-agent.antrea.kubeconfig set-credentials antrea-agent --token=$TOKEN - kubectl config --kubeconfig=antrea-agent.antrea.kubeconfig set-context antrea-agent@$ANTREA_CLUSTER_NAME --cluster=$ANTREA_CLUSTER_NAME --user=antrea-agent - kubectl config --kubeconfig=antrea-agent.antrea.kubeconfig use-context antrea-agent@$ANTREA_CLUSTER_NAME + ANTREA_API_SERVER="https://172.18.0.1:443" + ANTREA_CLUSTER_NAME="antrea" + NAMESPACE="vm-ns" + KUBECONFIG="antrea-agent.antrea.kubeconfig" + TOKEN=$(kubectl -n $NAMESPACE get secrets -o jsonpath="{.items[?(@.metadata.annotations['kubernetes\.io/service-account\.name']=='$SERVICE_ACCOUNT')].data.token}"|base64 --decode) + kubectl config --kubeconfig=$KUBECONFIG set-cluster $ANTREA_CLUSTER_NAME --server=$ANTREA_API_SERVER --insecure-skip-tls-verify=true + kubectl config --kubeconfig=$KUBECONFIG set-credentials antrea-agent --token=$TOKEN + kubectl config --kubeconfig=$KUBECONFIG set-context antrea-agent@$ANTREA_CLUSTER_NAME --cluster=$ANTREA_CLUSTER_NAME --user=antrea-agent + kubectl config --kubeconfig=$KUBECONFIG use-context antrea-agent@$ANTREA_CLUSTER_NAME # Copy antrea-agent.antrea.kubeconfig to the VM ``` @@ -592,39 +595,3 @@ interfaces will be added in the future. `ExternalNode` name must be unique in the `cluster` scope even though it is itself a Namespaced resource. - -## Known issues - -`antrea-agent` will fail to re-attach the VM's network interface to the OVS -bridge if the VM is rebooted. On a Windows VM, network connectivity will be -lost after reboot. - -As a workaround for [this issue](https://github.com/antrea-io/antrea/issues/4122), -you can manually remove OVS configurations and then restart `antrea-agent` after -the VM is rebooted. - -To remove OVS configurations and restart `antrea-agent` on Linux VM, - -```shell -sudo systemctl stop antrea-agent -sudo ovs-vsctl del-br br-int -sudo systemctl start antrea-agent -``` - -To remove OVS configurations and restart `antrea-agent` on Windows VM, - -```powershell -$adapterName="Ethernet 0" -Stop-Service antrea-agent -ovs-vsctl.exe del-br br-int -Remove-VMSwitch -ComputerName $(hostname.exe) antrea-switch -Force -Rename-NetAdapter -Name "$adapterName~" -NewName "$adapterName" -Start-Service antrea-agent -``` - -Note: - -- `$adapterName` should be set to the `ExternalNode` interface. -- You may need a separate network interface to RDP into the Windows VM to run - these commands, since the network connectivity on the `ExternalNode` interface - is lost. diff --git a/hack/externalnode/install-vm.ps1 b/hack/externalnode/install-vm.ps1 index 00668a5fb44..6b4bc3e9a2d 100644 --- a/hack/externalnode/install-vm.ps1 +++ b/hack/externalnode/install-vm.ps1 @@ -17,6 +17,12 @@ .PARAMETER AntreaKubeConfigPath Specifies the path of the kubeconfig to access Antrea API Server. + .PARAMETER NodeName + Specifies the ExternalNode name to be used by the antrea-agent. + + .PARAMETER OVSBridge + Specifies the OVS bridge name. + .PARAMETER InstallDir The target installation directory. The default path is "C:\antrea-agent". #> @@ -27,28 +33,30 @@ Param( [parameter(Mandatory = $true)] [string] $KubeConfigPath, [parameter(Mandatory = $true)] [string] $AntreaKubeConfigPath, [parameter(Mandatory = $false)] [string] $NodeName = $(hostname), + [parameter(Mandatory = $false)] [string] $OVSBridge = "br-int", [parameter(Mandatory = $false)] [string] $InstallDir = "C:\antrea-agent" ) $ErrorActionPreference = "Stop" - -$WorkDir = [System.IO.Path]::GetDirectoryName($myInvocation.MyCommand.Definition) -$InstallLog = "$WorkDir\install_vm.log" +$Powershell = (Get-Command powershell).Source +$PowershellArgs = "-ExecutionPolicy Bypass -NoProfile -File" # Antrea paths -$AntreaAgentPath = [io.path]::combine($InstallDir, "antrea-agent.exe") $AntreaAgentConfDir = [io.path]::combine($InstallDir, "conf") $AntreaAgentLogDir = [io.path]::combine($InstallDir, "logs") $AntreaAgentConfPath = [io.path]::combine($AntreaAgentConfDir, "antrea-agent.conf") -$AntreaAgentLogFile = [io.path]::combine($AntreaAgentLogDir, "antrea-agent.log") +$LogFile = [io.path]::combine($AntreaAgentLogDir, "antrea-agent-service.log") +$StartAntreaAgentScript = "" # Constants +$AntreaAgent = "antrea-agent" +$OVSServices = "ovsdb-server", "ovs-vswitchd" +$OVSVswitchd = "ovs-vswitchd" $K8sKubeconfig = "antrea-agent.kubeconfig" $AntreaKubeconfig = "antrea-agent.antrea.kubeconfig" -$OVSServices = "ovsdb-server", "ovs-vswitchd" -$AntreaAgent = "antrea-agent" -$Kubeconfig = "kubeconfig" +$Bridge = "ovsBridge" $ExternalNodeNamespace = "externalNodeNamespace" +$Kubeconfig = "kubeconfig" # List of supported OS versions, verified by antrea # Versions are named like Major.Minor.Build @@ -56,7 +64,7 @@ $SupportedVersions = @("10.0.17763") function Log($Info) { $time = $(get-date -Format g) - "$time $Info " | Tee-Object $InstallLog -Append | Write-Host + "$time $Info " | Tee-Object $LogFile -Append | Write-Host } function ServiceExists($ServiceName) { @@ -67,7 +75,7 @@ function ServiceExists($ServiceName) { } function CheckSupportedVersions() { - echo "Checking supported Windows OS versions" + Log "Checking supported Windows OS versions" $OSVersion = [System.Environment]::OSVersion.Version $Version = $OSVersion.Major.ToString() + "." + $OSVersion.Minor.ToString() + "." + $OSVersion.Build.ToString() foreach ($v in $SupportedVersions) { @@ -79,16 +87,14 @@ function CheckSupportedVersions() { exit 1 } -function PrintPrerequisites() -{ - echo "Please execute these commands to enable Hyper-V" - echo "Install-WindowsFeature Hyper-V-Powershell" - echo "Enable-WindowsOptionalFeature -Online -FeatureName Microsoft-Hyper-V -All -NoRestart" +function PrintPrerequisites() { + Write-Host "Please execute these commands to enable Hyper-V" + Write-Host "Install-WindowsFeature Hyper-V-Powershell" + Write-Host "Enable-WindowsOptionalFeature -Online -FeatureName Microsoft-Hyper-V -All -NoRestart" exit 1 } -function CheckPrerequisites() -{ +function CheckPrerequisites() { CheckSupportedVersions $valid = $true Log "Check Hyper-v feature is enabled" @@ -116,7 +122,6 @@ function CheckPrerequisites() } function SetupInstallDir() { - Log "Create install directories" if (-Not (Test-Path $AntreaAgentConfDir)) { New-Item $AntreaAgentConfDir -type directory -Force | Out-Null } @@ -167,6 +172,10 @@ function UpdateAgentConf() { Log "Updating $AntreaAgentConfPath with ${ExternalNodeNamespace}: ${Namespace}" [System.IO.File]::AppendAllText($AntreaAgentConfPath, " ${ExternalNodeNamespace}: ${Namespace}" + ([Environment]::NewLine)) + } elseif ($line -like "*$Bridge*") { + Log "Updating $AntreaAgentConfPath with ${Bridge}: ${OVSBridge}" + [System.IO.File]::AppendAllText($AntreaAgentConfPath, "${Bridge}: ${OVSBridge}" + + ([Environment]::NewLine)) } else { [System.IO.File]::AppendAllText($AntreaAgentConfPath, $line + ([Environment]::NewLine)) @@ -174,34 +183,95 @@ function UpdateAgentConf() { } } -function ConfigureAntreaAgentService() { - # Set environment variables - [Environment]::SetEnvironmentVariable("NODE_NAME", $NodeName, [System.EnvironmentVariableTarget]::Machine) - # Assume nssm is installed and configure service - $AntreaAgentArgs = "--config $AntreaAgentConfPath --log_file $AntreaAgentLogFile --logtostderr=false" - log "Creating service $AntreaAgent $AntreaAgentPath $AntreaAgentArgs" +function CreateAntreaAgentStartupScript() { + $Script:StartAntreaAgentScript = "$AntreaAgentConfDir\Start-AntreaAgent.ps1" + $StartAntreaAgentScriptContent = ' +Param( + [parameter(Mandatory = $true)] [string] $OVSBridge, + [parameter(Mandatory = $true)] [string] $InstallDir +) + +$AntreaSwitch = "antrea-switch" +$AntreaAgentConfDir = [io.path]::combine($InstallDir, "conf") +$AntreaAgentLogDir = [io.path]::combine($InstallDir, "logs") +$AntreaAgentConfPath = [io.path]::combine($AntreaAgentConfDir, "antrea-agent.conf") +$AntreaAgentLogFile = [io.path]::combine($AntreaAgentLogDir, "antrea-agent.log") +$AntreaAgentPath = [io.path]::combine($InstallDir, "antrea-agent.exe") +$LogFile = [io.path]::combine($AntreaAgentLogDir, "antrea-agent-service.log") + +function Log($Info) { + $time = $(get-date -Format g) + "$time $Info " | Tee-Object $LogFile -Append | Write-Host +} + +function ClearOVSConfig() { + Log "Deleting OVS bridge $OVSBridge" try { - # Configured to auto-restart upon reboot - & nssm install $AntreaAgent $AntreaAgentPath $AntreaAgentArgs - } catch { - log "Failed to create service for $AntreaAgent, rc $_" + $adapterName = (Get-VMNetworkAdapter -ComputerName $(hostname.exe) -SwitchName $AntreaSwitch -ManagementOS).Name + ovs-vsctl.exe del-br $OVSBridge + } catch { + Log "Failed to get VMSwitch $AntreaSwitch, rc $_" exit 1 } -} -function StartAntreaAgentService() -{ try { - & nssm start $AntreaAgent + Remove-VMSwitch -ComputerName $(hostname.exe) $AntreaSwitch -Force } catch { - log "Failed to start service for $AntreaAgent, rc $_" + Log "Ignore error while removing VMSwitch, rc $_" + } + + try { + Rename-NetAdapter -Name "$adapterName~" -NewName "$adapterName" + } catch { + Log "Failed to rename network adapter $adapterName~ to $adapterName, rc $_" exit 1 } } -CheckPrerequisites +function CheckOVSConfigAndCleanup() { + $bridges = ovs-vsctl list-br + foreach ($br in $bridges) { + if ($br -ne $OVSBridge) { + continue + } + $ports = ovs-vsctl list-ports $OVSBridge + foreach ($port in $ports) { + $output = ovs-vsctl --no-headings --columns=error list interface "$port" + if ($output -ne "[]") { + ClearOVSConfig + break + } + } + } +} + +function StartAntreaAgent() { + $antreaAgentArgs = "--config $AntreaAgentConfPath --log_file $AntreaAgentLogFile --logtostderr=false" + $cmd = "$AntreaAgentPath $antreaAgentArgs" + Invoke-Expression $cmd +} + +CheckOVSConfigAndCleanup +StartAntreaAgent +' + Set-Content -Path $StartAntreaAgentScript -Value $StartAntreaAgentScriptContent +} + +function ConfigureAntreaAgentService() { + $AntreaAgentArgs = "$StartAntreaAgentScript -InstallDir $InstallDir -OVSBridge $OVSBridge" + nssm install $AntreaAgent $Powershell $PowershellArgs $AntreaAgentArgs + # Add OVS as a dependent service + nssm set $AntreaAgent DependOnService $OVSVswitchd +} + +function StartAntreaAgentService() { + nssm start $AntreaAgent +} + SetupInstallDir +CheckPrerequisites CopyAntreaAgentFiles UpdateAgentConf +CreateAntreaAgentStartupScript ConfigureAntreaAgentService StartAntreaAgentService diff --git a/hack/externalnode/install-vm.sh b/hack/externalnode/install-vm.sh index 07c3964d2b8..d7d81d08766 100644 --- a/hack/externalnode/install-vm.sh +++ b/hack/externalnode/install-vm.sh @@ -20,13 +20,15 @@ function echoerr { >&2 echo "$@" } -_usage="Usage: $0 [--ns ] [--bin ] [--config ] [--kubeconfig ] [--antrea-kubeconfig ] [--nodename ] [--help|-h] - --ns Namespace to be used by the antrea-agent. +_usage="Usage: $0 [--ns ] [--bin ] [--config ] [--kubeconfig ] [--antrea-kubeconfig ] [--nodename ] [--ovs-bridge ] [--validate-ovs] [--help|-h] + --ns Namespace to be used by the antrea-agent --bin Path of the antrea-agent binary --config Path of the antrea-agent configuration file --kubeconfig Path of the kubeconfig to access K8s API Server --antrea-kubeconfig Path of the kubeconfig to access Antrea API Server --nodename ExternalNode name to be used by the antrea-agent + --ovs-bridge Specify the OVS bridge name + --validate-ovs Validate OVS configuration and performs cleanup when any error is detected. --help, -h Print this message and exit Please run the script as sudo user" @@ -40,24 +42,26 @@ function print_help { } INSTALL_PATH="/usr/sbin" -AGENT_BIN_PATH="" -CONFIG_PATH="" -KUBECONFIG="" -ANTREAKUBECONFIG="" -AGENT_NAMESPACE="" -NODE_NAME="$(hostname)" +ANTREA_AGENT="antrea-agent" AGENT_LOG_DIR="/var/log/antrea" AGENT_CONF_PATH="/etc/antrea" +OVS_BRIDGE="br-int" +OVS_VSWITCHD="ovs-vswitchd.service" + +# Optional arguments +VALIDATE_OVS_CONFIG=false +NODE_NAME="$(hostname)" + # List of supported OS versions, verified by antrea. -declare -a SUPPORTED_OS=("Ubuntu 18.04", "Ubuntu 20.04") +declare -a SUPPORTED_OS=("Ubuntu 18.04" "Ubuntu 20.04") check_supported_platform() { echo "Checking supported OS platform" dist_version="$(lsb_release -is) $(lsb_release -rs)" for ver in "${SUPPORTED_OS[@]}"; do - if [ "$ver" == "$dist_version" ]; then - return - fi + if [ "$ver" == "$dist_version" ]; then + return + fi done echoerr "Error ${SUPPORTED_OS[*]} are supported" exit 1 @@ -65,16 +69,16 @@ check_supported_platform() { copy_antrea_agent_files() { if [[ ! -f "$CONFIG_PATH" ]]; then - echoerr "Error $CONFIG_PATH file not found" - exit 1 + echoerr "Error $CONFIG_PATH file not found" + exit 1 fi mkdir -p $AGENT_CONF_PATH echo "Copying $CONFIG_PATH to $AGENT_CONF_PATH" - cp $CONFIG_PATH $AGENT_CONF_PATH + cp "$CONFIG_PATH" $AGENT_CONF_PATH if [[ ! -f "$KUBECONFIG" ]]; then - echoerr "Error $KUBECONFIG file not found" - exit 1 + echoerr "Error $KUBECONFIG file not found" + exit 1 fi echo "Copying $KUBECONFIG to $AGENT_CONF_PATH" @@ -82,8 +86,8 @@ copy_antrea_agent_files() { chmod 600 "${AGENT_CONF_PATH}/antrea-agent.kubeconfig" if [[ ! -f "$ANTREA_KUBECONFIG" ]]; then - echoerr "Error $ANTREA_KUBECONFIG file not found" - exit 1 + echoerr "Error $ANTREA_KUBECONFIG file not found" + exit 1 fi echo "Copying $ANTREA_KUBECONFIG to $AGENT_CONF_PATH" cp "$ANTREA_KUBECONFIG" "${AGENT_CONF_PATH}/antrea-agent.antrea.kubeconfig" @@ -93,24 +97,33 @@ copy_antrea_agent_files() { update_antrea_agent_conf() { echo "Updating clientConnection and antreaClientConnection" sed -i "s|kubeconfig: |kubeconfig: $AGENT_CONF_PATH/|g" $AGENT_CONF_PATH/antrea-agent.conf + if [[ -z "$AGENT_NAMESPACE" ]]; then + AGENT_NAMESPACE="default" + fi echo "Updating externalNodeNamespace to $AGENT_NAMESPACE" sed -i "s|#externalNodeNamespace: default|externalNodeNamespace: $AGENT_NAMESPACE|g" $AGENT_CONF_PATH/antrea-agent.conf + echo "Updating ovsBridge to $OVS_BRIDGE" + sed -i "s|#ovsBridge: br-int|ovsBridge: $OVS_BRIDGE|g" $AGENT_CONF_PATH/antrea-agent.conf } start_antrea_agent_service() { if [[ ! -f "$AGENT_BIN_PATH" ]]; then - echoerr "Error $AGENT_BIN_PATH file not found" - exit 1 + echoerr "Error $AGENT_BIN_PATH file not found" + exit 1 fi mkdir -p $AGENT_LOG_DIR mkdir -p $INSTALL_PATH cp "$AGENT_BIN_PATH" "$INSTALL_PATH" + echo "Copying $BASH_SOURCE to ${AGENT_CONF_PATH}/install-vm.sh" + cp "$BASH_SOURCE" "${AGENT_CONF_PATH}/install-vm.sh" + chmod +x "${AGENT_CONF_PATH}/install-vm.sh" cat >/etc/systemd/system/antrea-agent.service << EOF [Unit] Description="antrea-agent as a systemd service" After=network.target [Service] Environment="NODE_NAME=$NODE_NAME" +ExecStartPre=${AGENT_CONF_PATH}/install-vm.sh --validate-ovs --ovs-bridge $OVS_BRIDGE ExecStart=$INSTALL_PATH/antrea-agent \ --config=$AGENT_CONF_PATH/antrea-agent.conf \ --logtostderr=false \ @@ -120,10 +133,30 @@ Restart=on-failure WantedBy=multi-user.target EOF systemctl daemon-reload - systemctl enable antrea-agent - echo "Starting antrea-agent service" - systemctl start antrea-agent - systemctl status antrea-agent + systemctl enable "$ANTREA_AGENT" + echo "Starting ${ANTREA_AGENT} service" + systemctl start "$ANTREA_AGENT" + systemctl status "$ANTREA_AGENT" +} + +check_ovs_config_and_cleanup() { + bridges=$(ovs-vsctl list-br) + for br in $bridges; do + if [ "$br" != "$OVS_BRIDGE" ] ; then + continue + fi + # Check if any of the interface is in error state. + ports=$(ovs-vsctl list-ports $OVS_BRIDGE) + for port in $ports; do + output=$(ovs-vsctl --no-headings --columns=error list interface "$port") + if [ "$output" != '[]' ] ; then + echoerr "Error while listing interface $port, deleting bridge $OVS_BRIDGE" + ovs-vsctl del-br "$OVS_BRIDGE" + break + fi + done + exit 0 + done } validate_argument() { @@ -168,6 +201,14 @@ case $key in validate_argument $1, $2 shift 2 ;; + --ovs-bridge) + OVS_BRIDGE="$2" + shift 2 + ;; + --validate-ovs) + VALIDATE_OVS_CONFIG=true + shift 1 + ;; -h|--help) print_usage exit 0 @@ -179,6 +220,19 @@ case $key in esac done +# Check whether OVS configuration needs to be cleaned up. +if [ "$VALIDATE_OVS_CONFIG" = true ] ; then + check_ovs_config_and_cleanup + exit 0 +fi + +# Check for mandatory arguments. +if [ -z "$AGENT_BIN_PATH" ] || [ -z "$CONFIG_PATH" ] || [ -z "$KUBECONFIG" ] || [ -z "$ANTREA_KUBECONFIG" ] ; then + echoerr "Missing argument(s)" + print_usage + exit 1 +fi + check_supported_platform copy_antrea_agent_files update_antrea_agent_conf