From 2116b37f5342e1b150f226b59c299544b0d628b3 Mon Sep 17 00:00:00 2001 From: Michele Baldessari Date: Sun, 28 Feb 2021 09:32:28 +0100 Subject: [PATCH] Allow rabbitmq to run in a larger cluster composed of also non-rabbitmq nodes We introduce the OCF_RESKEY_allowed_cluster_node parameter which can be used to specify which nodes of the cluster rabbitmq is expected to run on. When this variable is not set the resource agent assumes that all nodes of the cluster (output of crm_node -l) are eligible to run rabbitmq. The use case here is clusters that have a large numbers of node, where only a specific subset is used for rabbitmq (usually this is done with some constraints). Tested in a 9-node cluster as follows: [root@messaging-0 ~]# pcs resource config rabbitmq Resource: rabbitmq (class=ocf provider=rabbitmq type=rabbitmq-server-ha) Attributes: allowed_cluster_nodes="messaging-0 messaging-1 messaging-2" avoid_using_iptables=true Meta Attrs: container-attribute-target=host master-max=3 notify=true ordered=true Operations: demote interval=0s timeout=30 (rabbitmq-demote-interval-0s) monitor interval=5 timeout=30 (rabbitmq-monitor-interval-5) monitor interval=3 role=Master timeout=30 (rabbitmq-monitor-interval-3) notify interval=0s timeout=20 (rabbitmq-notify-interval-0s) promote interval=0s timeout=60s (rabbitmq-promote-interval-0s) start interval=0s timeout=200s (rabbitmq-start-interval-0s) stop interval=0s timeout=200s (rabbitmq-stop-interval-0s) [root@messaging-0 ~]# pcs status |grep -e rabbitmq -e messaging * Online: [ controller-0 controller-1 controller-2 database-0 database-1 database-2 messaging-0 messaging-1 messaging-2 ] ... * Container bundle set: rabbitmq-bundle [cluster.common.tag/rhosp16-openstack-rabbitmq:pcmklatest]: * rabbitmq-bundle-0 (ocf::rabbitmq:rabbitmq-server-ha): Master messaging-0 * rabbitmq-bundle-1 (ocf::rabbitmq:rabbitmq-server-ha): Master messaging-1 * rabbitmq-bundle-2 (ocf::rabbitmq:rabbitmq-server-ha): Master messaging-2 --- scripts/rabbitmq-server-ha.ocf | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/scripts/rabbitmq-server-ha.ocf b/scripts/rabbitmq-server-ha.ocf index 2797d6264864..689c4f074c23 100755 --- a/scripts/rabbitmq-server-ha.ocf +++ b/scripts/rabbitmq-server-ha.ocf @@ -53,6 +53,7 @@ OCF_RESKEY_rmq_feature_health_check_default=true OCF_RESKEY_rmq_feature_local_list_queues_default=true OCF_RESKEY_limit_nofile_default=65535 OCF_RESKEY_avoid_using_iptables_default=false +OCF_RESKEY_allowed_cluster_nodes_default="" : ${HA_LOGTAG="lrmd"} : ${HA_LOGFACILITY="daemon"} @@ -80,6 +81,7 @@ OCF_RESKEY_avoid_using_iptables_default=false : ${OCF_RESKEY_rmq_feature_local_list_queues=${OCF_RESKEY_rmq_feature_local_list_queues_default}} : ${OCF_RESKEY_limit_nofile=${OCF_RESKEY_limit_nofile_default}} : ${OCF_RESKEY_avoid_using_iptables=${OCF_RESKEY_avoid_using_iptables_default}} +: ${OCF_RESKEY_allowed_cluster_nodes=${OCF_RESKEY_allowed_cluster_nodes_default}} ####################################################################### @@ -368,6 +370,18 @@ noops. This is useful when we run inside containers. + + +When set to anything other than the empty string it must container the list of +cluster node names, separated by spaces, where the rabbitmq resource is allowed to run. +Tis is needed when rabbitmq is running on a subset of nodes part of a larger +cluster. The default ("") is to assume that all nodes part of the cluster will +run the rabbitmq resource. + +List of cluster nodes where rabbitmq is allowed to run + + + $EXTENDED_OCF_PARAMS @@ -854,10 +868,18 @@ get_running_nodes() { get_alive_pacemaker_nodes_but() { if [ -z "$1" ]; then - echo `crm_node -l -p | sed -e '/(null)/d'` + tmp_pcmk_node_list=`crm_node -l -p | sed -e '/(null)/d'` + else + tmp_pcmk_node_list=`crm_node -l -p | sed -e "s/${1}//g" | sed -e '/(null)/d'` + fi + # If OCF_RESKEY_allowed_cluster_nodes is set then we only want the intersection + # of the cluster node output and the allowed_cluster_nodes list + if [ -z "${OCF_RESKEY_allowed_cluster_nodes}" ]; then + pcmk_node_list=$tmp_pcmk_node_list else - echo `crm_node -l -p | sed -e "s/${1}//g" | sed -e '/(null)/d'` + pcmk_node_list=`for i in $tmp_pcmk_node_list ${OCF_RESKEY_allowed_cluster_nodes}; do echo $i; done | sort | uniq -d` fi + echo $pcmk_node_list } # Get current master. If a parameter is provided,