diff --git a/pkg/ovncontroller/daemonset.go b/pkg/ovncontroller/daemonset.go index dfb9e0f1..215bd621 100644 --- a/pkg/ovncontroller/daemonset.go +++ b/pkg/ovncontroller/daemonset.go @@ -177,7 +177,7 @@ func CreateOVSDaemonSet( Lifecycle: &corev1.Lifecycle{ PreStop: &corev1.LifecycleHandler{ Exec: &corev1.ExecAction{ - Command: []string{"/usr/share/openvswitch/scripts/ovs-ctl", "stop", "--no-ovs-vswitchd"}, + Command: []string{"/usr/local/bin/container-scripts/stop-ovsdb-server.sh"}, }, }, }, @@ -199,12 +199,11 @@ func CreateOVSDaemonSet( }, { Name: "ovs-vswitchd", - Command: []string{"/bin/bash", "-c"}, - Args: []string{"/usr/local/bin/container-scripts/net_setup.sh && /usr/sbin/ovs-vswitchd --pidfile --mlockall"}, + Command: []string{"/usr/local/bin/container-scripts/start-vswitchd.sh"}, Lifecycle: &corev1.Lifecycle{ PreStop: &corev1.LifecycleHandler{ Exec: &corev1.ExecAction{ - Command: []string{"/usr/share/openvswitch/scripts/ovs-ctl", "stop", "--no-ovsdb-server"}, + Command: []string{"/usr/local/bin/container-scripts/stop-vswitchd.sh"}, }, }, }, diff --git a/templates/ovncontroller/bin/functions b/templates/ovncontroller/bin/functions index c1a8a79c..8b4d7388 100755 --- a/templates/ovncontroller/bin/functions +++ b/templates/ovncontroller/bin/functions @@ -23,6 +23,20 @@ EnableChassisAsGateway=${EnableChassisAsGateway:-true} PhysicalNetworks=${PhysicalNetworks:-""} OVNHostName=${OVNHostName:-""} +ovs_dir=/var/lib/openvswitch +FLOWS_RESTORE_SCRIPT=$ovs_dir/flows-script +FLOWS_RESTORE_DIR=$ovs_dir/saved-flows +SAFE_TO_STOP_OVSDB_SERVER_SEMAPHORE=$ovs_dir/is_safe_to_stop_ovsdb_server + +function cleanup_ovsdb_server_semaphore() { + rm -f $SAFE_TO_STOP_OVSDB_SERVER_SEMAPHORE 2>&1 > /dev/null +} + +function cleanup_flows_backup() { + rm -f $FLOWS_RESTORE_SCRIPT 2>&1 > /dev/null + rm -rf $FLOWS_RESTORE_DIR 2>&1 > /dev/null +} + function wait_for_ovsdb_server { while true; do /usr/bin/ovs-vsctl show diff --git a/templates/ovncontroller/bin/start-ovsdb-server.sh b/templates/ovncontroller/bin/start-ovsdb-server.sh index 1a3e577c..42bf8f63 100755 --- a/templates/ovncontroller/bin/start-ovsdb-server.sh +++ b/templates/ovncontroller/bin/start-ovsdb-server.sh @@ -13,11 +13,15 @@ # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. + set -ex +source $(dirname $0)/functions -CTL_ARGS="--system-id=random --no-ovs-vswitchd" +# Remove the obsolete semaphore file in case it still exists. +cleanup_ovsdb_server_semaphore # Initialize or upgrade database if needed +CTL_ARGS="--system-id=random --no-ovs-vswitchd" /usr/share/openvswitch/scripts/ovs-ctl start $CTL_ARGS /usr/share/openvswitch/scripts/ovs-ctl stop $CTL_ARGS diff --git a/templates/ovncontroller/bin/start-vswitchd.sh b/templates/ovncontroller/bin/start-vswitchd.sh new file mode 100755 index 00000000..c9ee0a24 --- /dev/null +++ b/templates/ovncontroller/bin/start-vswitchd.sh @@ -0,0 +1,54 @@ +#!/bin/sh +# +# Copyright 2024 Red Hat Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +source $(dirname $0)/functions +wait_for_ovsdb_server + +# The order - first wait for db server, then set -ex - is important. Otherwise, +# wait_for_ovsdb_server interrim check would make the script exit. +set -ex + +# Configure encap IP. +OVNEncapIP=$(ip -o addr show dev {{ .OVNEncapNIC }} scope global | awk '{print $4}' | cut -d/ -f1) +ovs-vsctl --no-wait set open . external-ids:ovn-encap-ip=${OVNEncapIP} + +# Before starting vswitchd, block it from flushing existing datapath flows. +ovs-vsctl --no-wait set open_vswitch . other_config:flow-restore-wait=true + +# It's safe to start vswitchd now. Do it. +# --detach to allow the execution to continue to restoring the flows. +/usr/sbin/ovs-vswitchd --pidfile --mlockall --detach + +# Restore saved flows. +if [ -f $FLOWS_RESTORE_SCRIPT ]; then + # It's unsafe to leave these files in place if they fail once. Make sure we + # remove them if the eval fails. + trap cleanup_flows_backup EXIT + eval "$(cat $FLOWS_RESTORE_SCRIPT)" + trap - EXIT +fi + +# It's also unsafe to leave these files after flow-restore-wait flag is removed +# because the backup will become stale and if a container later crashes, it may +# mistakenly try to restore from this old backup. +cleanup_flows_backup + +# Now, inform vswitchd that we are done. +ovs-vsctl remove open_vswitch . other_config flow-restore-wait + +# This is container command script. Block it from exiting, otherwise k8s will +# restart the container again. +sleep infinity diff --git a/templates/ovncontroller/bin/net_setup.sh b/templates/ovncontroller/bin/stop-ovsdb-server.sh similarity index 53% rename from templates/ovncontroller/bin/net_setup.sh rename to templates/ovncontroller/bin/stop-ovsdb-server.sh index 305200e4..2d15c507 100755 --- a/templates/ovncontroller/bin/net_setup.sh +++ b/templates/ovncontroller/bin/stop-ovsdb-server.sh @@ -1,6 +1,6 @@ #!/bin/sh # -# Copyright 2023 Red Hat Inc. +# Copyright 2024 Red Hat Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain @@ -14,11 +14,17 @@ # License for the specific language governing permissions and limitations # under the License. -OVNEncapIP=$(ip -o addr show dev {{ .OVNEncapNIC }} scope global | awk '{print $4}' | cut -d/ -f1) - +set -ex source $(dirname $0)/functions -wait_for_ovsdb_server +# The ovs_vswitchd container has to terminate before ovsdb-server because it +# needs access to db in its preStop script. The preStop script backs up flows +# for restoration during the next startup. This semaphore ensures the vswitchd +# container is not torn down before flows are saved. +while [ ! -f $SAFE_TO_STOP_OVSDB_SERVER_SEMAPHORE ]; do + sleep 0.5 +done +cleanup_ovsdb_server_semaphore -set -ex -ovs-vsctl --no-wait set open . external-ids:ovn-encap-ip=${OVNEncapIP} +# Now it's safe to stop db server. Do it. +/usr/share/openvswitch/scripts/ovs-ctl stop --no-ovs-vswitchd diff --git a/templates/ovncontroller/bin/stop-vswitchd.sh b/templates/ovncontroller/bin/stop-vswitchd.sh new file mode 100755 index 00000000..d12f74c9 --- /dev/null +++ b/templates/ovncontroller/bin/stop-vswitchd.sh @@ -0,0 +1,36 @@ +#!/bin/sh +# +# Copyright 2024 Red Hat Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +set -ex +source $(dirname $0)/functions + +# Clean up any previously created flow backups to avoid conflict with newly +# generated backup. +cleanup_flows_backup + +# Passing --real to mimic what upstream startup scripts do; maybe redundant. +bridges=$(ovs-vsctl -- --real list-br) + +# Saving flows to avoid disrupting gateway datapath. +mkdir $FLOWS_RESTORE_DIR +TMPDIR=$FLOWS_RESTORE_DIR /usr/share/openvswitch/scripts/ovs-save save-flows $bridges > $FLOWS_RESTORE_SCRIPT + +# Once save-flows logic is complete it no longer needs ovsdb-server, this file +# unlocks the db preStop script, working as a semaphore +touch $SAFE_TO_STOP_OVSDB_SERVER_SEMAPHORE + +# Finally, stop vswitchd. +/usr/share/openvswitch/scripts/ovs-ctl stop --no-ovsdb-server diff --git a/tests/functional/ovncontroller_controller_test.go b/tests/functional/ovncontroller_controller_test.go index cf0d7d3d..ba321325 100644 --- a/tests/functional/ovncontroller_controller_test.go +++ b/tests/functional/ovncontroller_controller_test.go @@ -65,7 +65,7 @@ var _ = Describe("OVNController controller", func() { }, timeout, interval).Should(ContainElement("openstack.org/ovncontroller")) }) - It("should create a ConfigMap for net_setup.sh with eth0 as Interface Name", func() { + It("should create a ConfigMap for start-vswitchd.sh with eth0 as Interface Name", func() { scriptsCM := types.NamespacedName{ Namespace: OVNControllerName.Namespace, Name: fmt.Sprintf("%s-%s", OVNControllerName.Name, "scripts"), @@ -74,7 +74,7 @@ var _ = Describe("OVNController controller", func() { return *th.GetConfigMap(scriptsCM) }, timeout, interval).ShouldNot(BeNil()) - Expect(th.GetConfigMap(scriptsCM).Data["net_setup.sh"]).Should( + Expect(th.GetConfigMap(scriptsCM).Data["start-vswitchd.sh"]).Should( ContainSubstring("addr show dev eth0")) th.ExpectCondition( @@ -179,12 +179,12 @@ var _ = Describe("OVNController controller", func() { th.AssertJobDoesNotExist(configJobOVS) }) - It("should create a ConfigMap for net_setup.sh with eth0 as Interface Name", func() { + It("should create a ConfigMap for start-vswitchd.sh with eth0 as Interface Name", func() { Eventually(func() corev1.ConfigMap { return *th.GetConfigMap(scriptsCM) }, timeout, interval).ShouldNot(BeNil()) - Expect(th.GetConfigMap(scriptsCM).Data["net_setup.sh"]).Should( + Expect(th.GetConfigMap(scriptsCM).Data["start-vswitchd.sh"]).Should( ContainSubstring("addr show dev eth0")) th.ExpectCondition( @@ -445,7 +445,7 @@ var _ = Describe("OVNController controller", func() { }, timeout, interval).Should(Succeed()) }) - It("should create a ConfigMap for net_setup.sh with nic name as Network Attachment and OwnerReferences set", func() { + It("should create a ConfigMap for start-vswitchd.sh with nic name as Network Attachment and OwnerReferences set", func() { scriptsCM := types.NamespacedName{ Namespace: OVNControllerName.Namespace, @@ -461,7 +461,7 @@ var _ = Describe("OVNController controller", func() { Expect(th.GetConfigMap(scriptsCM).ObjectMeta.OwnerReferences[0].Kind).To(Equal("OVNController")) ovncontroller := GetOVNController(OVNControllerName) - Expect(th.GetConfigMap(scriptsCM).Data["net_setup.sh"]).Should( + Expect(th.GetConfigMap(scriptsCM).Data["start-vswitchd.sh"]).Should( ContainSubstring("addr show dev %s", ovncontroller.Spec.NetworkAttachment)) }) It("should create an external ConfigMap with expected key-value pairs and OwnerReferences set", func() { diff --git a/tests/kuttl/tests/ovn_restart_flow/01-assert.yaml b/tests/kuttl/tests/ovn_restart_flow/01-assert.yaml new file mode 120000 index 00000000..461654ea --- /dev/null +++ b/tests/kuttl/tests/ovn_restart_flow/01-assert.yaml @@ -0,0 +1 @@ +../../common/assert_sample_deployment.yaml \ No newline at end of file diff --git a/tests/kuttl/tests/ovn_restart_flow/01-deploy-ovn.yaml b/tests/kuttl/tests/ovn_restart_flow/01-deploy-ovn.yaml new file mode 120000 index 00000000..ec519de5 --- /dev/null +++ b/tests/kuttl/tests/ovn_restart_flow/01-deploy-ovn.yaml @@ -0,0 +1 @@ +../../common/deploy_ovn.yaml \ No newline at end of file diff --git a/tests/kuttl/tests/ovn_restart_flow/02-add-bridge-and-flows.yaml b/tests/kuttl/tests/ovn_restart_flow/02-add-bridge-and-flows.yaml new file mode 100644 index 00000000..680f212f --- /dev/null +++ b/tests/kuttl/tests/ovn_restart_flow/02-add-bridge-and-flows.yaml @@ -0,0 +1,9 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: + - script: | + node=$(oc get nodes -o name|sort|head -1| sed "s|node/||g") + controller_pod=$(oc get pod -n $NAMESPACE -l service=ovn-controller-ovs --field-selector spec.nodeName=$node -o name | head -1) + oc rsh -n $NAMESPACE --container ovs-vswitchd $controller_pod ovs-vsctl --if-exists del-br br-test-flows || exit 1 + oc rsh -n $NAMESPACE --container ovs-vswitchd $controller_pod ovs-vsctl add-br br-test-flows || exit 1 + oc rsh -n $NAMESPACE --container ovs-vswitchd $controller_pod ovs-ofctl add-flow br-test-flows "table=100, priority=200 action=drop" || exit 1 diff --git a/tests/kuttl/tests/ovn_restart_flow/03-assert.yaml b/tests/kuttl/tests/ovn_restart_flow/03-assert.yaml new file mode 120000 index 00000000..461654ea --- /dev/null +++ b/tests/kuttl/tests/ovn_restart_flow/03-assert.yaml @@ -0,0 +1 @@ +../../common/assert_sample_deployment.yaml \ No newline at end of file diff --git a/tests/kuttl/tests/ovn_restart_flow/03-delete-pods.yaml b/tests/kuttl/tests/ovn_restart_flow/03-delete-pods.yaml new file mode 100644 index 00000000..8c86bab7 --- /dev/null +++ b/tests/kuttl/tests/ovn_restart_flow/03-delete-pods.yaml @@ -0,0 +1,7 @@ +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: + - script: | + node=$(oc get nodes -o name|sort|head -1| sed "s|node/||g") + controller_pod=$(oc get pod -n $NAMESPACE -l service=ovn-controller-ovs --field-selector spec.nodeName=$node -o name | head -1) + oc delete -n $NAMESPACE $controller_pod diff --git a/tests/kuttl/tests/ovn_restart_flow/04-assert-flows-present.yaml b/tests/kuttl/tests/ovn_restart_flow/04-assert-flows-present.yaml new file mode 100644 index 00000000..fff4833e --- /dev/null +++ b/tests/kuttl/tests/ovn_restart_flow/04-assert-flows-present.yaml @@ -0,0 +1,17 @@ +# +# Check for: +# +# - Check that the flow added to the test bridge "br-test-flows" is present +# after the ovn-controller has been restarted. +# Expected flow: "table=100, priority=200 actions=drop" + +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 300 +commands: + - script: | + node=$(oc get nodes -o name|sort|head -1| sed "s|node/||g") + controller_pod=$(oc get pod -n $NAMESPACE -l service=ovn-controller-ovs --field-selector spec.nodeName=$node -o name | head -1) + expected_flows="table=100, priority=200 actions=drop" + oc rsh -n $NAMESPACE --container ovs-vswitchd $controller_pod ovs-ofctl dump-flows br-test-flows --no-stats | grep -q "$expected_flows" || exit 1 + oc rsh -n $NAMESPACE --container ovs-vswitchd $controller_pod ovs-vsctl --if-exists del-br br-test-flows diff --git a/tests/kuttl/tests/ovn_restart_flow/06-cleanup-ovn.yaml b/tests/kuttl/tests/ovn_restart_flow/06-cleanup-ovn.yaml new file mode 120000 index 00000000..02360341 --- /dev/null +++ b/tests/kuttl/tests/ovn_restart_flow/06-cleanup-ovn.yaml @@ -0,0 +1 @@ +../../common/cleanup-ovn.yaml \ No newline at end of file diff --git a/tests/kuttl/tests/ovn_restart_flow/06-errors.yaml b/tests/kuttl/tests/ovn_restart_flow/06-errors.yaml new file mode 120000 index 00000000..7314c155 --- /dev/null +++ b/tests/kuttl/tests/ovn_restart_flow/06-errors.yaml @@ -0,0 +1 @@ +../../common/errors_cleanup_ovn.yaml \ No newline at end of file