From a870cf362c284b81b7149b49b79f4d4d0e9e94ab Mon Sep 17 00:00:00 2001 From: bczoma Date: Fri, 27 Nov 2020 16:38:13 -0500 Subject: [PATCH 01/19] Initial server certificate support work --- pubsubplus/templates/solaceConfigMap.yaml | 416 +++++++++++--------- pubsubplus/templates/solaceStatefulSet.yaml | 13 +- pubsubplus/values.yaml | 13 + 3 files changed, 248 insertions(+), 194 deletions(-) diff --git a/pubsubplus/templates/solaceConfigMap.yaml b/pubsubplus/templates/solaceConfigMap.yaml index 7e6db6fb..90d5ed54 100644 --- a/pubsubplus/templates/solaceConfigMap.yaml +++ b/pubsubplus/templates/solaceConfigMap.yaml @@ -9,224 +9,248 @@ metadata: helm.sh/chart: {{ .Chart.Name }}-{{ .Chart.Version }} data: init.sh: |- - # export username_admin_passwordfilepath=/mnt/disks/secrets/username_admin_password - export username_admin_password=`cat /mnt/disks/secrets/username_admin_password` - export username_admin_globalaccesslevel=admin - export service_ssh_port='2222' - export service_webtransport_port='8008' - export service_webtransport_tlsport='1443' - export service_semp_tlsport='1943' - export logging_debug_output=all + export username_admin_passwordfilepath="/mnt/disks/secrets/username_admin_password" + export username_admin_globalaccesslevel=admin + export service_ssh_port='2222' + export service_webtransport_port='8008' + export service_webtransport_tlsport='1443' + export service_semp_tlsport='1943' + export logging_debug_output=all {{- if eq .Values.solace.size "dev" }} - export system_scaling_maxconnectioncount="100" + export system_scaling_maxconnectioncount="100" {{- else if eq .Values.solace.size "prod100" }} - export system_scaling_maxconnectioncount="100" + export system_scaling_maxconnectioncount="100" {{- else if eq .Values.solace.size "prod1k" }} - export system_scaling_maxconnectioncount="1000" + export system_scaling_maxconnectioncount="1000" {{- else if eq .Values.solace.size "prod10k" }} - export system_scaling_maxconnectioncount="10000" + export system_scaling_maxconnectioncount="10000" {{- else if eq .Values.solace.size "prod100k" }} - export system_scaling_maxconnectioncount="100000" + export system_scaling_maxconnectioncount="100000" {{- else if eq .Values.solace.size "prod200k" }} - export system_scaling_maxconnectioncount="200000" + export system_scaling_maxconnectioncount="200000" +{{- end }} +{{- if and (.Values.tls) (.Values.tls.enabled) }} + cat /mnt/disks/certs/server/tls.key /mnt/disks/certs/server/tls.crt > /dev/shm/server.cert + export tls_servercertificate_filepath="/dev/shm/server.cert" {{- end }} {{- if .Values.solace.redundancy }} - # [TODO] KBARR not using correct method of finding ordinal until we bump min Kubernetes release above 1.8.1 - # https://github.com/kubernetes/kubernetes/issues/40651 - # node_ordinal=$(STATEFULSET_ORDINAL) - IFS='-' read -ra host_array <<< $(hostname) - node_ordinal=${host_array[-1]} - if [[ ! -z `echo $STATEFULSET_NAMESPACE` ]]; then - namespace=`echo $STATEFULSET_NAMESPACE` - else - namespace=default - fi - service={{ template "solace.fullname" . }} - # Deal with the fact we cannot accept "-" in routre names - service_name=$(echo ${service} | sed 's/-//g') - export routername=$(echo $(hostname) | sed 's/-//g') - export redundancy_enable=yes - export configsync_enable=yes - export redundancy_authentication_presharedkey_key=`cat /mnt/disks/secrets/username_admin_password | awk '{x=$0;for(i=length;i<51;i++)x=x "0";}END{print x}' | base64` # Right-pad with 0s to 50 length - export redundancy_group_node_${service_name}0_nodetype=message_routing - export redundancy_group_node_${service_name}0_connectvia=${service}-0.${service}-discovery.${namespace}.svc - export redundancy_group_node_${service_name}1_nodetype=message_routing - export redundancy_group_node_${service_name}1_connectvia=${service}-1.${service}-discovery.${namespace}.svc - export redundancy_group_node_${service_name}2_nodetype=monitoring - export redundancy_group_node_${service_name}2_connectvia=${service}-2.${service}-discovery.${namespace}.svc + # [TODO] KBARR not using correct method of finding ordinal until we bump min Kubernetes release above 1.8.1 + # https://github.com/kubernetes/kubernetes/issues/40651 + # node_ordinal=$(STATEFULSET_ORDINAL) + IFS='-' read -ra host_array <<< $(hostname) + node_ordinal=${host_array[-1]} + if [[ ! -z `echo $STATEFULSET_NAMESPACE` ]]; then + namespace=`echo $STATEFULSET_NAMESPACE` + else + namespace=default + fi + service={{ template "solace.fullname" . }} + # Deal with the fact we cannot accept "-" in routre names + service_name=$(echo ${service} | sed 's/-//g') + export routername=$(echo $(hostname) | sed 's/-//g') + export redundancy_enable=yes + export configsync_enable=yes + export redundancy_authentication_presharedkey_key=`cat /mnt/disks/secrets/username_admin_password | awk '{x=$0;for(i=length;i<51;i++)x=x "0";}END{print x}' | base64` # Right-pad with 0s to 50 length + export redundancy_group_node_${service_name}0_nodetype=message_routing + export redundancy_group_node_${service_name}0_connectvia=${service}-0.${service}-discovery.${namespace}.svc + export redundancy_group_node_${service_name}1_nodetype=message_routing + export redundancy_group_node_${service_name}1_connectvia=${service}-1.${service}-discovery.${namespace}.svc + export redundancy_group_node_${service_name}2_nodetype=monitoring + export redundancy_group_node_${service_name}2_connectvia=${service}-2.${service}-discovery.${namespace}.svc - case ${node_ordinal} in - 0) - export nodetype=message_routing - export redundancy_matelink_connectvia=${service}-1.${service}-discovery.${namespace}.svc - export redundancy_activestandbyrole=primary - ;; - 1) - export nodetype=message_routing - export redundancy_matelink_connectvia=${service}-0.${service}-discovery.${namespace}.svc - export redundancy_activestandbyrole=backup - ;; - 2) - export nodetype=monitoring - ;; - esac + case ${node_ordinal} in + 0) + export nodetype=message_routing + export redundancy_matelink_connectvia=${service}-1.${service}-discovery.${namespace}.svc + export redundancy_activestandbyrole=primary + ;; + 1) + export nodetype=message_routing + export redundancy_matelink_connectvia=${service}-0.${service}-discovery.${namespace}.svc + export redundancy_activestandbyrole=backup + ;; + 2) + export nodetype=monitoring + ;; + esac {{- end }} - setup-config-sync.sh: |- - #!/bin/bash + startup-broker.sh: |- + #!/bin/bash + APP=`basename "$0"` + node_ordinal=${host_array[-1]} + echo "`date` INFO: ${APP}-Node ordinal: ${node_ordinal}" + echo "`date` INFO: ${APP}-Waiting for management API to become available" + password=`cat /mnt/disks/secrets/username_admin_password` + loop_guard=60 + pause=10 + count=0 + while [ ${count} -lt ${loop_guard} ]; do + if /mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080/SEMP -t ; then + break + fi + run_time=$((${count} * ${pause})) + ((count++)) + echo "`date` INFO: ${APP}-Waited ${run_time} seconds, Management API not yet accessible" + sleep ${pause} + done + if [ ${count} -eq ${loop_guard} ]; then + echo "`date` ERROR: ${APP}-Solace Management API never came up" >&2 + exit 1 + fi +{{- if and (.Values.tls) (.Values.tls.enabled) }} + cert_results=`curl -k -sS -u admin:${password} https://localhost:1943/SEMP \ + -d "../../../../../dev/shm/server.cert"` + rm /dev/shm/server.cert + if ! grep -q '' <<< "$cert_results"; then + echo "`date` ERROR: ${APP}-Unable to set the server certificate, exiting" >&2 + exit 1 + fi +{{- end }} {{- if .Values.solace.redundancy }} - APP=`basename "$0"` - # [TODO] KBARR not using correct method of finding ordinal until we bump min Kubernetes release above 1.8.1 - # https://github.com/kubernetes/kubernetes/issues/40651 - # node_ordinal=$(STATEFULSET_ORDINAL) - IFS='-' read -ra host_array <<< $(hostname) - node_ordinal=${host_array[-1]} - echo "`date` INFO: ${APP}-node ordinal: ${node_ordinal}" - password=`cat /mnt/disks/secrets/username_admin_password` - loop_guard=60 - pause=10 - count=0 + IFS='-' read -ra host_array <<< $(hostname) + #exclude monitor node from config-sync check + if [ "${node_ordinal}" != "2" ]; then resync_step="" role="" - #exclude monitor node from config-sync check - if [ "${node_ordinal}" != "2" ]; then - while [ ${count} -lt ${loop_guard} ]; do - role_results=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080/SEMP \ - -q "" \ - -v "/rpc-reply/rpc/show/redundancy/active-standby-role[text()]"` - run_time=$((${count} * ${pause})) - case "`echo ${role_results} | xmllint -xpath "string(returnInfo/valueSearchResult)" -`" in - "Primary") - role="primary" + count=0 + while [ ${count} -lt ${loop_guard} ]; do + role_results=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080/SEMP \ + -q "" \ + -v "/rpc-reply/rpc/show/redundancy/active-standby-role[text()]"` + run_time=$((${count} * ${pause})) + case "`echo ${role_results} | xmllint -xpath "string(returnInfo/valueSearchResult)" -`" in + "Primary") + role="primary" + break + ;; + "Backup") + role="backup" + break + ;; + esac + ((count++)) + echo "`date` INFO: ${APP}-Waited ${run_time} seconds, got ${role_results} for this node's active-standby role" + sleep ${pause} + done + if [ ${count} -eq ${loop_guard} ]; then + echo "`date` ERROR: ${APP}-Could not determine this node's active-standby role" >&2 + exit 1 + fi + # Determine local activity + count=0 + echo "`date` INFO: ${APP}-Management API is up, determined that this node's active-standby role is: ${role}" + while [ ${count} -lt ${loop_guard} ]; do + online_results=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080/SEMP \ + -q "" \ + -v "/rpc-reply/rpc/show/redundancy/virtual-routers/${role}/status/activity[text()]"` + local_activity=`echo ${online_results} | xmllint -xpath "string(returnInfo/valueSearchResult)" -` + run_time=$((${count} * ${pause})) + case "${local_activity}" in + "Local Active") + echo "`date` INFO: ${APP}-Node activity status is Local Active, after ${run_time} seconds" + # We should only be here on new cluster create, if not likely a bug + # Need to issue assert master to get back into sync" + resync_step="assert-master" break ;; - "Backup") - role="backup" + "Mate Active") + echo "`date` INFO: ${APP}-Node activity status is Mate Active, after ${run_time} seconds" + # This is normal state if we are backup or recreated later on + # will issue a resync master to get back into sync + resync_step="resync-master" break ;; - esac - ((count++)) - echo "`date` INFO: ${APP}-Waited ${run_time} seconds, Management API not yet accessible" - sleep ${pause} - done - if [ ${count} -eq ${loop_guard} ]; then - echo "`date` ERROR: ${APP}-Solace Management API never came up" >&2 - exit 1 - fi - # Determine local activity + esac + ((count++)) + echo "`date` INFO: ${APP}-Waited ${run_time} seconds, Local activity state is: ${local_activity}" + sleep ${pause} + done + if [ ${count} -eq ${loop_guard} ]; then + echo "`date` ERROR: ${APP}-Local activity state never become Local Active or Mate Active" >&2 + exit 1 + fi + # If we need to assert master, then we need to wait for mate to reconcile + if [ "${resync_step}" = "assert-master" ]; then count=0 - echo "`date` INFO: ${APP}-Management API is up, determined that this node's active-standby role is: ${role}" + echo "`date` INFO: ${APP}-Waiting for mate activity state to be 'Standby'" while [ ${count} -lt ${loop_guard} ]; do online_results=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080/SEMP \ -q "" \ - -v "/rpc-reply/rpc/show/redundancy/virtual-routers/${role}/status/activity[text()]"` - local_activity=`echo ${online_results} | xmllint -xpath "string(returnInfo/valueSearchResult)" -` + -v "/rpc-reply/rpc/show/redundancy/virtual-routers/${role}/status/detail/priority-reported-by-mate/summary[text()]"` + mate_activity=`echo ${online_results} | xmllint -xpath "string(returnInfo/valueSearchResult)" -` run_time=$((${count} * ${pause})) - case "${local_activity}" in - "Local Active") - echo "`date` INFO: ${APP}-Node activity status is Local Active, after ${run_time} seconds" - # We should only be here on new cluster create, if not likely a bug - # Need to issue assert master to get back into sync" - resync_step="assert-master" - break - ;; - "Mate Active") - echo "`date` INFO: ${APP}-Node activity status is Mate Active, after ${run_time} seconds" - # This is normal state if we are backup or recreated later on - # will issue a resync master to get back into sync - resync_step="resync-master" + case "${mate_activity}" in + "Standby") + echo "`date` INFO: ${APP}-Activity state reported by mate is Standby, after ${run_time} seconds" break ;; esac ((count++)) - echo "`date` INFO: ${APP}-Waited ${run_time} seconds, Local activity state is: ${local_activity}" + echo "`date` INFO: ${APP}-Waited ${run_time} seconds, Mate activity state is: ${mate_activity}, not yet in sync" sleep ${pause} done if [ ${count} -eq ${loop_guard} ]; then - echo "`date` ERROR: ${APP}-Local activity state never become Local Active or Mate Active" >&2 + echo "`date` ERROR: ${APP}-Mate not in sync, never reached Standby" >&2 exit 1 fi - # If we need to assert master, then we need to wait for mate to reconcile - if [ "${resync_step}" = "assert-master" ]; then - count=0 - echo "`date` INFO: ${APP}-Waiting for mate activity state to be 'Standby'" - while [ ${count} -lt ${loop_guard} ]; do - online_results=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080/SEMP \ - -q "" \ - -v "/rpc-reply/rpc/show/redundancy/virtual-routers/${role}/status/detail/priority-reported-by-mate/summary[text()]"` - mate_activity=`echo ${online_results} | xmllint -xpath "string(returnInfo/valueSearchResult)" -` - run_time=$((${count} * ${pause})) - case "${mate_activity}" in - "Standby") - echo "`date` INFO: ${APP}-Activity state reported by mate is Standby, after ${run_time} seconds" - break - ;; - esac - ((count++)) - echo "`date` INFO: ${APP}-Waited ${run_time} seconds, Mate activity state is: ${mate_activity}, not yet in sync" - sleep ${pause} - done - if [ ${count} -eq ${loop_guard} ]; then - echo "`date` ERROR: ${APP}-Mate not in sync, never reached Standby" >&2 - exit 1 - fi - fi # if assert-master - # Ensure Config-sync connection state is Connected before proceeding - count=0 - echo "`date` INFO: ${APP}-Waiting for config-sync connected" - while [ ${count} -lt ${loop_guard} ]; do - online_results=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080/SEMP \ - -q "" \ - -v "/rpc-reply/rpc/show/config-sync/status/client/connection-state"` - connection_state=`echo ${online_results} | xmllint -xpath "string(returnInfo/valueSearchResult)" -` - run_time=$((${count} * ${pause})) - case "${connection_state}" in - "Connected") - echo "`date` INFO: ${APP}-Config-sync connection state is Connected, after ${run_time} seconds" - break - ;; - esac - ((count++)) - echo "`date` INFO: ${APP}-Waited ${run_time} seconds, Config-sync connection state is: ${connection_state}, not yet in Connected" - sleep ${pause} - done - if [ ${count} -eq ${loop_guard} ]; then - echo "`date` ERROR: ${APP}-Config-sync connection state never reached Connected" >&2 - exit 1 - fi - # Now can issue {resync_step} command - echo "`date` INFO: ${APP}-Initiating ${resync_step}" - /mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080/SEMP \ - -q "<${resync_step}>" - /mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080/SEMP \ - -q "<${resync_step}>default" - # Wait for config-sync results - count=0 - echo "`date` INFO: ${APP}-Waiting for config-sync connected" - while [ ${count} -lt ${loop_guard} ]; do - online_results=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080/SEMP \ - -q "" \ - -v "/rpc-reply/rpc/show/config-sync/status/oper-status"` - confsyncstatus_results=`echo ${online_results} | xmllint -xpath "string(returnInfo/valueSearchResult)" -` - run_time=$((${count} * ${pause})) - case "${confsyncstatus_results}" in - "Up") - echo "`date` INFO: ${APP}-Config-sync is Up, after ${run_time} seconds" - break - ;; - esac - ((count++)) - echo "`date` INFO: ${APP}-Waited ${run_time} seconds, Config-sync is: ${confsyncstatus_results}, not yet Up" - sleep ${pause} - done - if [ ${count} -eq ${loop_guard} ]; then - echo "`date` ERROR: ${APP}-Config-sync never reached state \"Up\"" >&2 - exit 1 - fi - fi # if not monitor - echo "`date` INFO: ${APP}-Solace Event Broker bringup is complete for this node." + fi # if assert-master + # Ensure Config-sync connection state is Connected before proceeding + count=0 + echo "`date` INFO: ${APP}-Waiting for config-sync connected" + while [ ${count} -lt ${loop_guard} ]; do + online_results=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080/SEMP \ + -q "" \ + -v "/rpc-reply/rpc/show/config-sync/status/client/connection-state"` + connection_state=`echo ${online_results} | xmllint -xpath "string(returnInfo/valueSearchResult)" -` + run_time=$((${count} * ${pause})) + case "${connection_state}" in + "Connected") + echo "`date` INFO: ${APP}-Config-sync connection state is Connected, after ${run_time} seconds" + break + ;; + esac + ((count++)) + echo "`date` INFO: ${APP}-Waited ${run_time} seconds, Config-sync connection state is: ${connection_state}, not yet in Connected" + sleep ${pause} + done + if [ ${count} -eq ${loop_guard} ]; then + echo "`date` ERROR: ${APP}-Config-sync connection state never reached Connected" >&2 + exit 1 + fi + # Now can issue {resync_step} command + echo "`date` INFO: ${APP}-Initiating ${resync_step}" + /mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080/SEMP \ + -q "<${resync_step}>" + /mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080/SEMP \ + -q "<${resync_step}>default" + # Wait for config-sync results + count=0 + echo "`date` INFO: ${APP}-Waiting for config-sync connected" + while [ ${count} -lt ${loop_guard} ]; do + online_results=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080/SEMP \ + -q "" \ + -v "/rpc-reply/rpc/show/config-sync/status/oper-status"` + confsyncstatus_results=`echo ${online_results} | xmllint -xpath "string(returnInfo/valueSearchResult)" -` + run_time=$((${count} * ${pause})) + case "${confsyncstatus_results}" in + "Up") + echo "`date` INFO: ${APP}-Config-sync is Up, after ${run_time} seconds" + break + ;; + esac + ((count++)) + echo "`date` INFO: ${APP}-Waited ${run_time} seconds, Config-sync is: ${confsyncstatus_results}, not yet Up" + sleep ${pause} + done + if [ ${count} -eq ${loop_guard} ]; then + echo "`date` ERROR: ${APP}-Config-sync never reached state \"Up\"" >&2 + exit 1 + fi + fi # if not monitor + echo "`date` INFO: ${APP}-Solace Event Broker bringup is complete for this node." {{- end }} - exit 0 + exit 0 readiness_check.sh: |- @@ -400,13 +424,14 @@ data: query="" url="" value_search="" + test_connection_only=false script_name=$0 verbose=0 - while getopts "c:n:p:q:u:v:" opt; do + while getopts "c:n:p:q:u:v:t" opt; do case "$opt" in c) count_search=$OPTARG ;; - n) name=$OPTARG + n) username=$OPTARG ;; p) password=$OPTARG ;; @@ -415,24 +440,29 @@ data: u) url=$OPTARG ;; v) value_search=$OPTARG - ;; + ;; + t) test_connection_only=true + ;; esac done shift $((OPTIND-1)) [ "$1" = "--" ] && shift verbose=1 - #echo "`date` INFO: ${APP}-${script_name}: count_search=${count_search} ,name=${name} ,password=xxx query=${query} \ + #echo "`date` INFO: ${APP}-${script_name}: count_search=${count_search} ,username=${username} ,password=xxx query=${query} \ # ,url=${url} ,value_search=${value_search} ,Leftovers: $@" >&2 - if [[ ${url} = "" || ${name} = "" || ${password} = "" || ${query} = "" ]]; then - echo "`date` ERROR: ${APP}-${script_name}: url, name, password and query are madatory fields" >&2 + if [[ ${url} = "" || ${username} = "" || ${password} = "" ]]; then + echo "`date` ERROR: ${APP}-${script_name}: url, username, password are madatory fields" >&2 echo 'missing parameter' exit 1 fi - if [ `curl --write-out '%{http_code}' --silent --output /dev/null -u ${name}:${password} ${url} -d ""` != "200" ] ; then + if [ `curl --write-out '%{http_code}' --silent --output /dev/null -u ${username}:${password} ${url} -d ""` != "200" ] ; then echo "management host is not responding" exit 1 fi - query_response=`curl -sS -u ${name}:${password} ${url} -d "${query}"` + if [ "$test_connection_only" = true ] ; then + exit 0 # done here, connection is up + fi + query_response=`curl -sS -u ${username}:${password} ${url} -d "${query}"` # Validate first char of response is "<", otherwise no hope of being valid xml if [[ ${query_response:0:1} != "<" ]] ; then echo "no valid xml returned" diff --git a/pubsubplus/templates/solaceStatefulSet.yaml b/pubsubplus/templates/solaceStatefulSet.yaml index 0c90b3a9..250d1efb 100644 --- a/pubsubplus/templates/solaceStatefulSet.yaml +++ b/pubsubplus/templates/solaceStatefulSet.yaml @@ -120,7 +120,7 @@ spec: source /mnt/disks/solace/init.sh # not using postinstall hooks because of order dependencies # launch config check - readiness check script will be launched by readinessProbe - nohup /mnt/disks/solace/setup-config-sync.sh & + nohup /mnt/disks/solace/startup-broker.sh & /usr/sbin/boot.sh lifecycle: preStop: @@ -138,6 +138,11 @@ spec: - name: secrets mountPath: /mnt/disks/secrets readOnly: true +{{- if and (.Values.tls) (.Values.tls.enabled) }} + - name: server-certs + mountPath: /mnt/disks/certs/server + readOnly: false +{{- end }} - name: dshm mountPath: /dev/shm - name: data @@ -178,6 +183,12 @@ spec: secret: secretName: {{ template "solace.fullname" . }}-secrets defaultMode: 0400 +{{- if and (.Values.tls) (.Values.tls.enabled) }} + - name: server-certs + secret: + secretName: {{ required "A secret containing the server key and certificates is required when TLS in enabled" .Values.tls.serverCertificatesSecret }} + defaultMode: 0400 +{{- end }} - name: dshm emptyDir: medium: Memory diff --git a/pubsubplus/values.yaml b/pubsubplus/values.yaml index 2de9aecd..5f9f3578 100644 --- a/pubsubplus/values.yaml +++ b/pubsubplus/values.yaml @@ -52,6 +52,19 @@ serviceAccount: # name if specified will be used as service account name - must exist if create=false # name: +tls: + # Enable TLS, default is false (not enabled) + enabled: false + # + # Name of the Secret that contains the certificates - required if TLS enabled, no default + # serverCertificatesSecret: 'example-tls-secret' + # + # Certificate filename, default tls.crt + # certFilename: 'tls.crt' + # + # Certificate Key filename, default tls.key + # certKeyFilename: 'tls.key' + service: # service.type specifies how to expose the service: options include ClusterIP, NodePort, LoadBalancer (default if not specified) type: LoadBalancer From 73c9eefa808a5e6a85ccddee5fe5e9534531dd51 Mon Sep 17 00:00:00 2001 From: bczoma Date: Mon, 30 Nov 2020 16:11:21 -0500 Subject: [PATCH 02/19] Server cert set logic improvements --- pubsubplus/templates/solaceConfigMap.yaml | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/pubsubplus/templates/solaceConfigMap.yaml b/pubsubplus/templates/solaceConfigMap.yaml index 90d5ed54..b909ef6e 100644 --- a/pubsubplus/templates/solaceConfigMap.yaml +++ b/pubsubplus/templates/solaceConfigMap.yaml @@ -78,6 +78,7 @@ data: startup-broker.sh: |- #!/bin/bash APP=`basename "$0"` + IFS='-' read -ra host_array <<< $(hostname) node_ordinal=${host_array[-1]} echo "`date` INFO: ${APP}-Node ordinal: ${node_ordinal}" echo "`date` INFO: ${APP}-Waiting for management API to become available" @@ -99,17 +100,18 @@ data: exit 1 fi {{- if and (.Values.tls) (.Values.tls.enabled) }} - cert_results=`curl -k -sS -u admin:${password} https://localhost:1943/SEMP \ - -d "../../../../../dev/shm/server.cert"` + cert_results=$(curl --write-out '%{http_code}' --silent --output /dev/null -k -X PATCH -u admin:${password} https://localhost:1943/SEMP/v2/config/ \ + -H "content-type: application/json" \ + -d "{\"tlsServerCertContent\":\"$(cat /dev/shm/server.cert | awk '{printf "%s\\n", $0}')\"}") rm /dev/shm/server.cert - if ! grep -q '' <<< "$cert_results"; then + if [ "${cert_results}" != "200" ]; then echo "`date` ERROR: ${APP}-Unable to set the server certificate, exiting" >&2 exit 1 fi + echo "`date` INFO: ${APP}-Server certificate has been configured" {{- end }} {{- if .Values.solace.redundancy }} - IFS='-' read -ra host_array <<< $(hostname) - #exclude monitor node from config-sync check + # for non-monitor nodes setup redundancy and config-sync if [ "${node_ordinal}" != "2" ]; then resync_step="" role="" @@ -248,8 +250,8 @@ data: exit 1 fi fi # if not monitor - echo "`date` INFO: ${APP}-Solace Event Broker bringup is complete for this node." {{- end }} + echo "`date` INFO: ${APP}-PubSub+ Event Broker bringup is complete for this node." exit 0 From a2739807b14a1a8da6d218e1d73bd754ded71ece Mon Sep 17 00:00:00 2001 From: bczoma Date: Fri, 18 Dec 2020 17:36:30 -0500 Subject: [PATCH 03/19] Updated certificates configuration logic, added initial documentations --- docs/PubSubPlusK8SDeployment.md | 48 +++++++++++++++++++++-- pubsubplus/templates/NOTES.txt | 7 ++++ pubsubplus/templates/solaceConfigMap.yaml | 6 +-- 3 files changed, 55 insertions(+), 6 deletions(-) diff --git a/docs/PubSubPlusK8SDeployment.md b/docs/PubSubPlusK8SDeployment.md index 0ed9b81d..742863a3 100644 --- a/docs/PubSubPlusK8SDeployment.md +++ b/docs/PubSubPlusK8SDeployment.md @@ -18,9 +18,9 @@ Contents: - [Creating a new storage class](#creating-a-new-storage-class) - [Using an existing PVC (Persistent Volume Claim)](#using-an-existing-pvc-persistent-volume-claim-) - [Using a pre-created provider-specific volume](#using-a-pre-created-provider-specific-volume) - + [Exposing the PubSub+ Event Broker Services](#exposing-the-pubsub-event-broker-services) + + [Exposing the PubSub+ Event Broker Services](#exposing-the-pubsub-software-event-broker-services) - [Using pod label "active" to identify the active event broker node](#using-pod-label-active-to-identify-the-active-event-broker-node) - + [The PubSub+ Docker image](#the-pubsub-docker-image) + + [The PubSub+ Docker image](#the-pubsub-software-event-broker-docker-image) - [Using a public registry](#using-a-public-registry) - [Using private registries](#using-private-registries) - [Using ImagePullSecrets for signed images](#using-imagepullsecrets-for-signed-images) @@ -242,6 +242,45 @@ This label is set by the `readiness_check.sh` script in `pubsubplus/templates/so - the Kubernetes service account associated with the Solace pod must have sufficient rights to patch the pod's label when the active event broker is service ready - the Solace pods must be able to communicate with the Kubernetes API at `kubernetes.default.svc.cluster.local` at port $KUBERNETES_SERVICE_PORT. You can find out the address and port by [SSH into the pod](#ssh-access-to-individual-message-brokers). + +************************************************************************************************ + + +### Enabling to use TLS to access broker services + +#### Setting up TLS for use + +Default deployment does not have TLS over TCP enabled. Although the exposed `service.ports` include ports for secured TCP, only the insecure ports can be used by default. + +To enable accessing services over TLS a server key and certificate must be configured on the broker. + +It is assumed that a provider out of scope of this document will be used to create a server key and certificate for the event broker, that meet the [requirements described in the Solace Documentation](https://docs.solace.com/Configuring-and-Managing/Managing-Server-Certs.htm). + +The server key and certificate must be packaged in a Kubernetes secret, for example by [creating a TLS secret](https://kubernetes.io/docs/concepts/configuration/secret/#tls-secrets). + +This secret name and related parameters shall be specified when deploying the PubSub+ Helm chart: +``` +tls: + enabled: true # set to false by default + serverCertificatesSecret: # replace by the actual name + certFilename: # optional, default if not provided: tls.crt + certKeyFilename: # optional, default if not provided: tls.key +``` + +Here is an example new deployment with TLS enabled using default `certFilename` and `certKeyFilename`: +``` +helm install my-release solacecharts/pubsubplus \ +--set tls.enabled=true,tls.serverCertificatesSecret=my-tls-secret +``` + +Note: it is not possible to update an existing deployment to enable TLS that has been created without TLS enabled, by a simply using the [modify deployment](#modifying-or-upgrading-a-deployment) procedure. In this case, for the first time, certificates need to be [manually loaded and set up](//docs.solace.com/Configuring-and-Managing/Managing-Server-Certs.htm) on each broker node. After that it is possible to use `helm upgrade` with a secret specified. + +#### Rotating the server key + +In the event the server key or certificate need to be rotated a new Kubernetes secret must be created, which may require deleting and recreating the old secret if using the same name. + +Next, if using the same secret name, the broker Pods need to be restarted, one at a time waiting to reach `1/1` availability before continuing on the next one: starting with the Monitor (ordinal -2), followed by the node in backup role with `active=false` label, and finally the third node. If using a new secret name, the [modify deployment](#modifying-or-upgrading-a-deployment) procedure can be used and an automatic rolling update will follow these steps restarting the nodes one at a time. + ### The PubSub+ Software Event Broker Docker image The `image.repository` and `image.tag` parameters combined specify the PubSub+ Software Event Broker Docker image to be used for the deployment. They can either point to an image in a public or a private Docker container registry. @@ -318,7 +357,7 @@ If other settings control `fsGroup` and `runAsUser`, e.g: when using a [PodSecur #### Securing Helm v2 -Using current Helm v2, Helm's server-side component Tiller must be installed in your Kubernetes environment with rights granted to manage deployments. By default, Tiller is deployed in a permissive configuration. There are best practices to secure Helm and Tiller, and they need to be applied carefully if strict security is required; for example, in a production environment. +Using Helm v2, Helm's server-side component Tiller must be installed in your Kubernetes environment with rights granted to manage deployments. By default, Tiller is deployed in a permissive configuration. There are best practices to secure Helm and Tiller, and they need to be applied carefully if strict security is required; for example, in a production environment. [Securing your Helm Installation](//v2.helm.sh/docs/using_helm/#securing-your-helm-installation ) provides an overview of the Tiller-related security issues and recommended best practices. @@ -329,6 +368,9 @@ Particularly, the [Role-based Access Control section of the Helm documentation]( Services require [pod label "active"](#using-pod-label-active-to-identify-the-active-event-broker-node) of the serving event broker. * In a controlled environment it may be necessary to add a [NetworkPolicy](//kubernetes.io/docs/concepts/services-networking/network-policies/ ) to enable [required communication](#using-pod-label-active-to-identify-the-active-event-broker-node). +#### Securing TLS server key and certificate + +Using secrets for TLS server keys and certificates follows Kubernetes recommendations, however, particularly in a production environment, additional steps are required to ensure only authorized access to these secrets following Kubernetes industry best practices, including setting tight RBAC permissions and fixing possible security holes. ## Deployment Prerequisites diff --git a/pubsubplus/templates/NOTES.txt b/pubsubplus/templates/NOTES.txt index 9b6d3b86..0313237a 100644 --- a/pubsubplus/templates/NOTES.txt +++ b/pubsubplus/templates/NOTES.txt @@ -6,6 +6,13 @@ Watch progress by running: For troubleshooting, refer to ***TroubleShooting.md*** +== TLS support == +{{- if not .Values.tls.enabled }} +TLS has not been enabled for this deployment. +{{- else }} +TLS is enabled, using secret {{ .Values.tls.serverCertificatesSecret }} for server certificates configuration. +{{- end }} + == Admin credentials and access == {{- if not .Values.solace.usernameAdminPassword }} ********************************************************************* diff --git a/pubsubplus/templates/solaceConfigMap.yaml b/pubsubplus/templates/solaceConfigMap.yaml index b909ef6e..a2963642 100644 --- a/pubsubplus/templates/solaceConfigMap.yaml +++ b/pubsubplus/templates/solaceConfigMap.yaml @@ -30,7 +30,7 @@ data: export system_scaling_maxconnectioncount="200000" {{- end }} {{- if and (.Values.tls) (.Values.tls.enabled) }} - cat /mnt/disks/certs/server/tls.key /mnt/disks/certs/server/tls.crt > /dev/shm/server.cert + cat /mnt/disks/certs/server/{{.Values.tls.certFilename | default "tls.key"}} /mnt/disks/certs/server/{{.Values.tls.certKeyFilename | default "tls.crt"}} > /dev/shm/server.cert export tls_servercertificate_filepath="/dev/shm/server.cert" {{- end }} {{- if .Values.solace.redundancy }} @@ -100,10 +100,10 @@ data: exit 1 fi {{- if and (.Values.tls) (.Values.tls.enabled) }} + rm /dev/shm/server.cert # remove as soon as possible cert_results=$(curl --write-out '%{http_code}' --silent --output /dev/null -k -X PATCH -u admin:${password} https://localhost:1943/SEMP/v2/config/ \ -H "content-type: application/json" \ - -d "{\"tlsServerCertContent\":\"$(cat /dev/shm/server.cert | awk '{printf "%s\\n", $0}')\"}") - rm /dev/shm/server.cert + -d "{\"tlsServerCertContent\":\"$(cat /mnt/disks/certs/server/{{.Values.tls.certFilename | default "tls.key"}} /mnt/disks/certs/server/{{.Values.tls.certKeyFilename | default "tls.crt"}} | awk '{printf "%s\\n", $0}')\"}") if [ "${cert_results}" != "200" ]; then echo "`date` ERROR: ${APP}-Unable to set the server certificate, exiting" >&2 exit 1 From 2e93a6684cc4f929e7ce7cfb6c133cf42af2ed41 Mon Sep 17 00:00:00 2001 From: bczoma Date: Mon, 21 Dec 2020 16:11:15 -0500 Subject: [PATCH 04/19] Additional documentation updates to use of certificates --- docs/PubSubPlusK8SDeployment.md | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/docs/PubSubPlusK8SDeployment.md b/docs/PubSubPlusK8SDeployment.md index 742863a3..ba444aa4 100644 --- a/docs/PubSubPlusK8SDeployment.md +++ b/docs/PubSubPlusK8SDeployment.md @@ -20,6 +20,12 @@ Contents: - [Using a pre-created provider-specific volume](#using-a-pre-created-provider-specific-volume) + [Exposing the PubSub+ Event Broker Services](#exposing-the-pubsub-software-event-broker-services) - [Using pod label "active" to identify the active event broker node](#using-pod-label-active-to-identify-the-active-event-broker-node) + + [Enabling use of TLS to access broker services](#enabling-use-of-tls-to-access-broker-services) + - [Setting up TLS](#setting-up-tls) + - [Rotating the server key](#rotating-the-server-key) + + [Enabling use of TLS to access broker services](#enabling-use-of-tls-to-access-broker-services) + - [Setting up TLS](#setting-up-tls) + - [Rotating the server key](#rotating-the-server-key) + [The PubSub+ Docker image](#the-pubsub-software-event-broker-docker-image) - [Using a public registry](#using-a-public-registry) - [Using private registries](#using-private-registries) @@ -242,15 +248,11 @@ This label is set by the `readiness_check.sh` script in `pubsubplus/templates/so - the Kubernetes service account associated with the Solace pod must have sufficient rights to patch the pod's label when the active event broker is service ready - the Solace pods must be able to communicate with the Kubernetes API at `kubernetes.default.svc.cluster.local` at port $KUBERNETES_SERVICE_PORT. You can find out the address and port by [SSH into the pod](#ssh-access-to-individual-message-brokers). +### Enabling use of TLS to access broker services -************************************************************************************************ +#### Setting up TLS - -### Enabling to use TLS to access broker services - -#### Setting up TLS for use - -Default deployment does not have TLS over TCP enabled. Although the exposed `service.ports` include ports for secured TCP, only the insecure ports can be used by default. +Default deployment does not have TLS over TCP enabled to access broker services. Although the exposed `service.ports` include ports for secured TCP, only the insecure ports can be used by default. To enable accessing services over TLS a server key and certificate must be configured on the broker. From 68befe16175d725437a6f0161be1e586978d69cb Mon Sep 17 00:00:00 2001 From: bczoma Date: Mon, 21 Dec 2020 16:51:56 -0500 Subject: [PATCH 05/19] Added automation --- .github/workflows/build-test.yml | 127 +++++++++++++++++++++++++++++++ 1 file changed, 127 insertions(+) create mode 100644 .github/workflows/build-test.yml diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml new file mode 100644 index 00000000..90da7e78 --- /dev/null +++ b/.github/workflows/build-test.yml @@ -0,0 +1,127 @@ +name: build + +# Controls when the action will run. +on: + # pull_request: + + push: + +jobs: + deploy: + name: K8s QuickStart CI test + runs-on: ubuntu-latest + timeout-minutes: 20 + + steps: + - name: Set env and tools + run: | + echo "TESTCLUSTERNAME=k8s-gha-test-$(date +%s)" >> $GITHUB_ENV + echo "TESTRUNBRANCH=${GITHUB_REF##*/}" >> $GITHUB_ENV + # + sudo gem update --system 3.0.6 + sudo gem install yaml-lint + sudo snap install kubectl --classic + kubectl version --client + curl https://raw.githubusercontent.com/helm/helm/master/scripts/get-helm-3 | bash # setup Helm 3 + + - name: Checkout + uses: actions/checkout@v2 + + - name: Set up Cloud SDK + uses: google-github-actions/setup-gcloud@master + with: + project_id: ${{ secrets.GCP_PROJECT_ID }} + service_account_key: ${{ secrets.GCP_SA_KEY }} + export_default_credentials: true + + - name: Linting yaml files and chart + run: | + yaml-lint -n pubsubplus/*.yaml + helm lint pubsubplus + + - name: Setup K8s env in GKE + run: | + mkdir gke_test; pushd gke_test + wget https://raw.githubusercontent.com/SolaceProducts/solace-gke-quickstart/master/scripts/create_cluster.sh + chmod +x create_cluster.sh + ./create_cluster.sh -z us-east4-a,us-east4-b,us-east4-c -c $TESTCLUSTERNAME -m e2-standard-2 + gcloud container clusters get-credentials $TESTCLUSTERNAME --zone us-east4-a --project capable-stream-180018 + popd + kubectl get statefulset,svc,pods,pvc,pv + + - name: Deploy HA broker and test + run: | + REPO=$(echo ${{ secrets.BROKER_DOCKER_IMAGE_REF }} | cut -d ":" -f 1) + TAG=$(echo ${{ secrets.BROKER_DOCKER_IMAGE_REF }} | cut -d ":" -f 2) + openssl req -x509 -nodes -days 365 -newkey rsa:2048 -keyout tls.key -out tls.crt -subj "/CN=*" + kubectl create secret tls test-tls --key="tls.key" --cert="tls.crt" + + helm install my-release pubsubplus --set solace.size=dev,solace.redundancy=true,tls.enabled=true,tls.serverCertificatesSecret=test-tls,solace.usernameAdminPassword=admin,image.repository=$REPO,image.tag=$TAG + kubectl get statefulset,svc,pods,pvc,pv --show-labels + echo "Waiting for broker to become active" + sleep 40; kubectl describe nodes + until kubectl get pods --show-labels | grep pubsubplus-0 | grep -m 1 -E '1/1'; do sleep 10; done + until kubectl get pods --show-labels | grep pubsubplus-1 | grep -m 1 -E '1/1'; do sleep 10; done + until kubectl get pods --show-labels | grep pubsubplus-2 | grep -m 1 -E '1/1'; do sleep 10; done + until kubectl get pods --show-labels | grep pubsubplus- | grep -m 1 -E 'active=true'; do sleep 10; done + kubectl get statefulset,svc,pods,pvc,pv --show-labels + bash -c 'if [[ `kubectl get po --show-labels | grep -c "1/1"` -ne 3 ]]; then echo "Some pods are not ready!"; kubectl get po --show-labels; exit 1; fi' + export url="$(kubectl get statefulset,svc,pods,pvc,pv --show-labels | grep LoadBalancer | awk '{print $4}')"; echo $url + curl -O https://sftp.solace.com/download/SDKPERF_C_LINUX64 + tar -xvf SDKPERF_C_LINUX64 + pubSubTools/sdkperf_c -cip=tcp://$url:55555 -mn=10000 -mr=0 -ptl=t1 -stl=t1 | grep "Total Messages" + pubSubTools/sdkperf_c -cip=tcps://$url:55443 -mn=10000 -mr=0 -ptl=t1 -stl=t1 | grep "Total Messages" + sleep 30 + curl -k -sS -u admin:admin http://$url:1943/SEMP -d "" + curl -k -sS -u admin:admin http://$url:1943/SEMP -d "" + if [[ -z `curl -sS -u admin:admin http://$url:8080/SEMP -d "" | grep "Up"` ]] ; then echo "config-sync not up!"; exit 1; fi + helm list + helm delete $(helm list | grep deployed | awk '{print $1}') + kubectl delete pvc --all + + - name: Create chart variants + run: | + bash docs/helm-charts/create-chart-variants.sh; # Create chart variants + helm lint pubsubplus + helm install --generate-name pubsubplus --dry-run + helm lint pubsubplus-ha + helm install --generate-name pubsubplus-ha --dry-run + helm lint pubsubplus-dev + helm install --generate-name pubsubplus-dev --dry-run + + - name: Publish artifacts + run: | + git config --global user.name "GitHub Actions Automation" + git config --global user.email "<>" + mkdir gh-pages; # Now update gh-pages + if [ ${{ github.ref }} == 'refs/heads/master' ] && [ ${{ github.repository_owner }} == 'SolaceProducts' ] ; then + echo "Using master on SolaceProducts" + git clone --quiet --branch=gh-pages https://${{ secrets.GH_TOKEN }}@github.com/SolaceProducts/pubsubplus-kubernetes-quickstart gh-pages > /dev/null 2>&1 + mv pubsubplus-*.tgz gh-pages/helm-charts/ + helm repo index gh-pages/helm-charts/ --url https://solaceproducts.github.io/pubsubplus-kubernetes-quickstart/helm-charts + pushd gh-pages + git add -f . + git commit -m "Latest helm chart updates on successful gha-test build ${{ github.run_number }} auto-pushed to gh-pages" + git remote add origin-pages https://${{ secrets.GH_TOKEN }}@github.com/SolaceProducts/pubsubplus-kubernetes-quickstart.git > /dev/null 2>&1 + git push --quiet --set-upstream origin-pages gh-pages + popd + echo "Updated and pushed GH pages!" + elif [ ${{ github.ref }} != 'refs/heads/gh-pages' ] && [ ${{ github.repository_owner }} != 'SolaceProducts' ] ; then + echo "Using $TESTRUNBRANCH on ${{ github.repository_owner }}" + git clone --quiet --branch=gh-pages https://${{ secrets.GH_TOKEN }}@github.com/${{ github.repository }} gh-pages > /dev/null 2>&1 + mv pubsubplus-*.tgz gh-pages/helm-charts/ + helm repo index gh-pages/helm-charts/ --url https://solacedev.github.io/pubsubplus-kubernetes-quickstart/helm-charts + pushd gh-pages + git add -f . + git commit -m "Latest helm chart updates on successful gha-test build ${{ github.run_number }} auto-pushed to gh-pages" + git remote add origin-pages https://${{ secrets.GH_TOKEN }}@github.com/${{ github.repository }}.git > /dev/null 2>&1 + git push --quiet --set-upstream origin-pages gh-pages + popd + echo "Updated and pushed GH pages!" + fi + + - name: Delete test resources (Cleanup) + if: ${{ always() }} + run: | + gcloud container clusters delete $TESTCLUSTERNAME --quiet --zone us-east4-a + gcloud compute disks list | grep gha-test | sed 1d $rpt | while read -r a b c; do gcloud compute disks delete $a --zone $b --quiet; done From f8346e2aa9026a3d372806915e46fbfc535c726d Mon Sep 17 00:00:00 2001 From: bczoma Date: Mon, 21 Dec 2020 17:04:26 -0500 Subject: [PATCH 06/19] fixed semp test request protocol --- .github/workflows/build-test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml index 90da7e78..4a47e10f 100644 --- a/.github/workflows/build-test.yml +++ b/.github/workflows/build-test.yml @@ -72,8 +72,8 @@ jobs: pubSubTools/sdkperf_c -cip=tcp://$url:55555 -mn=10000 -mr=0 -ptl=t1 -stl=t1 | grep "Total Messages" pubSubTools/sdkperf_c -cip=tcps://$url:55443 -mn=10000 -mr=0 -ptl=t1 -stl=t1 | grep "Total Messages" sleep 30 - curl -k -sS -u admin:admin http://$url:1943/SEMP -d "" - curl -k -sS -u admin:admin http://$url:1943/SEMP -d "" + curl -k -sS -u admin:admin https://$url:1943/SEMP -d "" + curl -k -sS -u admin:admin https://$url:1943/SEMP -d "" if [[ -z `curl -sS -u admin:admin http://$url:8080/SEMP -d "" | grep "Up"` ]] ; then echo "config-sync not up!"; exit 1; fi helm list helm delete $(helm list | grep deployed | awk '{print $1}') From d2f3bf5f800c38472b2e8fe59299cbe829db3257 Mon Sep 17 00:00:00 2001 From: bczoma Date: Mon, 21 Dec 2020 17:19:48 -0500 Subject: [PATCH 07/19] Removed duplicate in documentation TOC --- docs/PubSubPlusK8SDeployment.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/docs/PubSubPlusK8SDeployment.md b/docs/PubSubPlusK8SDeployment.md index ba444aa4..cab8622f 100644 --- a/docs/PubSubPlusK8SDeployment.md +++ b/docs/PubSubPlusK8SDeployment.md @@ -23,9 +23,6 @@ Contents: + [Enabling use of TLS to access broker services](#enabling-use-of-tls-to-access-broker-services) - [Setting up TLS](#setting-up-tls) - [Rotating the server key](#rotating-the-server-key) - + [Enabling use of TLS to access broker services](#enabling-use-of-tls-to-access-broker-services) - - [Setting up TLS](#setting-up-tls) - - [Rotating the server key](#rotating-the-server-key) + [The PubSub+ Docker image](#the-pubsub-software-event-broker-docker-image) - [Using a public registry](#using-a-public-registry) - [Using private registries](#using-private-registries) From 33f24b1079a21172d17cf95b35caa9ab546bc9f3 Mon Sep 17 00:00:00 2001 From: bczoma Date: Tue, 12 Jan 2021 11:00:16 -0500 Subject: [PATCH 08/19] Additional documentation --- docs/PubSubPlusK8SDeployment.md | 4 +++- pubsubplus/README.md | 4 ++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/docs/PubSubPlusK8SDeployment.md b/docs/PubSubPlusK8SDeployment.md index cab8622f..7ab0fb30 100644 --- a/docs/PubSubPlusK8SDeployment.md +++ b/docs/PubSubPlusK8SDeployment.md @@ -266,13 +266,15 @@ tls: certKeyFilename: # optional, default if not provided: tls.key ``` +Note: ensure filenames are matching the files reported from running `kubectl describe secret `. + Here is an example new deployment with TLS enabled using default `certFilename` and `certKeyFilename`: ``` helm install my-release solacecharts/pubsubplus \ --set tls.enabled=true,tls.serverCertificatesSecret=my-tls-secret ``` -Note: it is not possible to update an existing deployment to enable TLS that has been created without TLS enabled, by a simply using the [modify deployment](#modifying-or-upgrading-a-deployment) procedure. In this case, for the first time, certificates need to be [manually loaded and set up](//docs.solace.com/Configuring-and-Managing/Managing-Server-Certs.htm) on each broker node. After that it is possible to use `helm upgrade` with a secret specified. +Important: it is not possible to update an existing deployment to enable TLS that has been created without TLS enabled, by a simply using the [modify deployment](#modifying-or-upgrading-a-deployment) procedure. In this case, for the first time, certificates need to be [manually loaded and set up](//docs.solace.com/Configuring-and-Managing/Managing-Server-Certs.htm) on each broker node. After that it is possible to use `helm upgrade` with a secret specified. #### Rotating the server key diff --git a/pubsubplus/README.md b/pubsubplus/README.md index bf7de8ca..628d8584 100644 --- a/pubsubplus/README.md +++ b/pubsubplus/README.md @@ -91,6 +91,10 @@ For more ways to override default chart values, refer to [Customizing the Helm C | `securityContext.runAsUser` | Specifies `runAsUser` in pod security context | set to default PubSub+ appuser id 1000001 | | `serviceAccount.create` | `true` will create a service account dedicated to the deployment in the namespace | `true` | | `serviceAccount.name` | Refer to https://helm.sh/docs/topics/chart_best_practices/rbac/#using-rbac-resources | Undefined | +| `tls.enabled` | Enable to use TLS to access exposed broker services | `false` (not enabled) | +| `tls.serverCertificatesSecret` | Name of the Kubernetes Secret that contains the certificates - required if TLS is enabled | Undefined | +| `tls.certFilename` | Name of the Certificate file in the `serverCertificatesSecret` | `tls.crt` | +| `tls.certKeyFilename` | Name of the Key file in the `serverCertificatesSecret` | `tls.key` | | `service.type` | How to expose the service: options include ClusterIP, NodePort, LoadBalancer | `LoadBalancer` | | `service.annotations` | service.annotations allows to add provider-specific service annotations | Undefined | | `service.ports` | Define PubSub+ service ports exposed. servicePorts are external, mapping to cluster-local pod containerPorts | initial set of frequently used ports, refer to values.yaml | From 72ff20e17e71bf0297a71a5f08993aece7aaad1b Mon Sep 17 00:00:00 2001 From: bczoma Date: Fri, 15 Jan 2021 17:18:59 -0500 Subject: [PATCH 09/19] Clarified certs packaging documentation --- docs/PubSubPlusK8SDeployment.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/PubSubPlusK8SDeployment.md b/docs/PubSubPlusK8SDeployment.md index 7ab0fb30..a44fc503 100644 --- a/docs/PubSubPlusK8SDeployment.md +++ b/docs/PubSubPlusK8SDeployment.md @@ -255,7 +255,10 @@ To enable accessing services over TLS a server key and certificate must be confi It is assumed that a provider out of scope of this document will be used to create a server key and certificate for the event broker, that meet the [requirements described in the Solace Documentation](https://docs.solace.com/Configuring-and-Managing/Managing-Server-Certs.htm). -The server key and certificate must be packaged in a Kubernetes secret, for example by [creating a TLS secret](https://kubernetes.io/docs/concepts/configuration/secret/#tls-secrets). +The server key and certificate must be packaged in a Kubernetes secret, for example by [creating a TLS secret](https://kubernetes.io/docs/concepts/configuration/secret/#tls-secrets). Example: +``` +kubectl create secret tls --key="" --cert="" +``` This secret name and related parameters shall be specified when deploying the PubSub+ Helm chart: ``` From d3cbd05c09da990efc95c6f41dc2fba4bc50b57d Mon Sep 17 00:00:00 2001 From: bczoma Date: Fri, 15 Jan 2021 17:21:34 -0500 Subject: [PATCH 10/19] minor doc update --- docs/PubSubPlusK8SDeployment.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/PubSubPlusK8SDeployment.md b/docs/PubSubPlusK8SDeployment.md index a44fc503..fbf42104 100644 --- a/docs/PubSubPlusK8SDeployment.md +++ b/docs/PubSubPlusK8SDeployment.md @@ -274,7 +274,7 @@ Note: ensure filenames are matching the files reported from running `kubectl des Here is an example new deployment with TLS enabled using default `certFilename` and `certKeyFilename`: ``` helm install my-release solacecharts/pubsubplus \ ---set tls.enabled=true,tls.serverCertificatesSecret=my-tls-secret +--set tls.enabled=true,tls.serverCertificatesSecret= ``` Important: it is not possible to update an existing deployment to enable TLS that has been created without TLS enabled, by a simply using the [modify deployment](#modifying-or-upgrading-a-deployment) procedure. In this case, for the first time, certificates need to be [manually loaded and set up](//docs.solace.com/Configuring-and-Managing/Managing-Server-Certs.htm) on each broker node. After that it is possible to use `helm upgrade` with a secret specified. From 2b0c9d6bfc48b88aef87c908aac5db22f6301396 Mon Sep 17 00:00:00 2001 From: bczoma Date: Wed, 20 Jan 2021 10:03:14 -0500 Subject: [PATCH 11/19] Server certificate documentation updates --- README.md | 2 +- docs/PubSubPlusK8SDeployment.md | 4 +++- pubsubplus/Chart.yaml | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 6173a6ea..cfab566b 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,7 @@ Solace PubSub+ Software Event Broker can be deployed in either a three-node High We recommend using the Helm tool for convenience. An [alternative method](/docs/PubSubPlusK8SDeployment.md#alternative-deployment-with-generating-templates-for-the-kubernetes-kubectl-tool) using generated templates is also provided. -In this quick start we go through the steps to set up a PubSub+ Software Event Broker using [Solace PubSub+ Helm charts](//hub.helm.sh/charts/solace). +In this quick start we go through the steps to set up a PubSub+ Software Event Broker using [Solace PubSub+ Helm charts](//artifacthub.io/packages/search?ts_query_web=solace). There are three Helm chart variants available with default small-size configurations: 1. `pubsubplus-dev` - recommended PubSub+ Software Event Broker for Developers (standalone) - no guaranteed performance diff --git a/docs/PubSubPlusK8SDeployment.md b/docs/PubSubPlusK8SDeployment.md index fbf42104..70b5d1c3 100644 --- a/docs/PubSubPlusK8SDeployment.md +++ b/docs/PubSubPlusK8SDeployment.md @@ -253,7 +253,7 @@ Default deployment does not have TLS over TCP enabled to access broker services. To enable accessing services over TLS a server key and certificate must be configured on the broker. -It is assumed that a provider out of scope of this document will be used to create a server key and certificate for the event broker, that meet the [requirements described in the Solace Documentation](https://docs.solace.com/Configuring-and-Managing/Managing-Server-Certs.htm). +It is assumed that a provider out of scope of this document will be used to create a server key and certificate for the event broker, that meet the [requirements described in the Solace Documentation](https://docs.solace.com/Configuring-and-Managing/Managing-Server-Certs.htm). If the server key is password protected it shall be transformed to an unencrypted key, e.g.: `openssl rsa -in encryedprivate.key -out unencryed.key`. The server key and certificate must be packaged in a Kubernetes secret, for example by [creating a TLS secret](https://kubernetes.io/docs/concepts/configuration/secret/#tls-secrets). Example: ``` @@ -285,6 +285,8 @@ In the event the server key or certificate need to be rotated a new Kubernetes s Next, if using the same secret name, the broker Pods need to be restarted, one at a time waiting to reach `1/1` availability before continuing on the next one: starting with the Monitor (ordinal -2), followed by the node in backup role with `active=false` label, and finally the third node. If using a new secret name, the [modify deployment](#modifying-or-upgrading-a-deployment) procedure can be used and an automatic rolling update will follow these steps restarting the nodes one at a time. +Note: a pod restart will result in provisioning the server certificate from the secret again so it will revert back from any other server certificate that may have been provisioned on the broker through other mechanism. + ### The PubSub+ Software Event Broker Docker image The `image.repository` and `image.tag` parameters combined specify the PubSub+ Software Event Broker Docker image to be used for the deployment. They can either point to an image in a public or a private Docker container registry. diff --git a/pubsubplus/Chart.yaml b/pubsubplus/Chart.yaml index 550f923d..407a2467 100644 --- a/pubsubplus/Chart.yaml +++ b/pubsubplus/Chart.yaml @@ -1,7 +1,7 @@ apiVersion: v1 description: Deploy Solace PubSub+ Event Broker Singleton or HA redundancy group onto a Kubernetes Cluster name: pubsubplus -version: 2.3.0 +version: 2.4.0 icon: https://solaceproducts.github.io/pubsubplus-kubernetes-quickstart/images/PubSubPlus.png maintainers: - name: Solace Community Forum From 968b8a9a24df22b9591022831f10d79f085b5e76 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bal=C3=A1zs=20Czoma?= Date: Wed, 20 Jan 2021 16:40:22 -0500 Subject: [PATCH 12/19] Pod state logging improvements --- pubsubplus/templates/solaceConfigMap.yaml | 99 +++++++++++++---------- 1 file changed, 56 insertions(+), 43 deletions(-) diff --git a/pubsubplus/templates/solaceConfigMap.yaml b/pubsubplus/templates/solaceConfigMap.yaml index a2963642..41e38244 100644 --- a/pubsubplus/templates/solaceConfigMap.yaml +++ b/pubsubplus/templates/solaceConfigMap.yaml @@ -87,7 +87,7 @@ data: pause=10 count=0 while [ ${count} -lt ${loop_guard} ]; do - if /mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080/SEMP -t ; then + if /mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 -t ; then break fi run_time=$((${count} * ${pause})) @@ -117,7 +117,7 @@ data: role="" count=0 while [ ${count} -lt ${loop_guard} ]; do - role_results=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080/SEMP \ + role_results=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \ -q "" \ -v "/rpc-reply/rpc/show/redundancy/active-standby-role[text()]"` run_time=$((${count} * ${pause})) @@ -143,7 +143,7 @@ data: count=0 echo "`date` INFO: ${APP}-Management API is up, determined that this node's active-standby role is: ${role}" while [ ${count} -lt ${loop_guard} ]; do - online_results=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080/SEMP \ + online_results=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \ -q "" \ -v "/rpc-reply/rpc/show/redundancy/virtual-routers/${role}/status/activity[text()]"` local_activity=`echo ${online_results} | xmllint -xpath "string(returnInfo/valueSearchResult)" -` @@ -177,7 +177,7 @@ data: count=0 echo "`date` INFO: ${APP}-Waiting for mate activity state to be 'Standby'" while [ ${count} -lt ${loop_guard} ]; do - online_results=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080/SEMP \ + online_results=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \ -q "" \ -v "/rpc-reply/rpc/show/redundancy/virtual-routers/${role}/status/detail/priority-reported-by-mate/summary[text()]"` mate_activity=`echo ${online_results} | xmllint -xpath "string(returnInfo/valueSearchResult)" -` @@ -201,7 +201,7 @@ data: count=0 echo "`date` INFO: ${APP}-Waiting for config-sync connected" while [ ${count} -lt ${loop_guard} ]; do - online_results=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080/SEMP \ + online_results=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \ -q "" \ -v "/rpc-reply/rpc/show/config-sync/status/client/connection-state"` connection_state=`echo ${online_results} | xmllint -xpath "string(returnInfo/valueSearchResult)" -` @@ -222,15 +222,15 @@ data: fi # Now can issue {resync_step} command echo "`date` INFO: ${APP}-Initiating ${resync_step}" - /mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080/SEMP \ + /mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \ -q "<${resync_step}>" - /mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080/SEMP \ + /mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \ -q "<${resync_step}>default" # Wait for config-sync results count=0 echo "`date` INFO: ${APP}-Waiting for config-sync connected" while [ ${count} -lt ${loop_guard} ]; do - online_results=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080/SEMP \ + online_results=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \ -q "" \ -v "/rpc-reply/rpc/show/config-sync/status/oper-status"` confsyncstatus_results=`echo ${online_results} | xmllint -xpath "string(returnInfo/valueSearchResult)" -` @@ -257,9 +257,15 @@ data: readiness_check.sh: |- #!/bin/bash + APP=`basename "$0"` LOG_FILE=/usr/sw/var/k8s_readiness_check.log # STDOUT/STDERR goes to k8s event logs but gets cleaned out eventually. This will also persist it. tail -n 1000 ${LOG_FILE} > ${LOG_FILE}.tmp; mv -f ${LOG_FILE}.tmp ${LOG_FILE} || : # Limit logs size - exec > >(tee -a ${LOG_FILE}) 2>&1 + exec > >(tee -a ${LOG_FILE}) 2>&1 # Setup logging + FINAL_ACTIVITY_LOGGED_TRACKING_FILE=/tmp/final_activity_state_logged + STATE_FILE=/tmp/activity_state + if [ ! -f ${STATE_FILE} ]; then # State file not found, creating + echo "unknown" > ${STATE_FILE} + fi # Function to set Kubernetes metadata labels set_label () { @@ -285,19 +291,14 @@ data: -H "Content-Type:application/json-patch+json" \ https://$KUBERNETES_SERVICE_HOST:$KUBERNETES_PORT_443_TCP_PORT/api/v1/namespaces/$STATEFULSET_NAMESPACE/pods/$HOSTNAME ; then echo "`date` ERROR: ${APP}-Unable to update pod label, check access from pod to K8s API or RBAC authorization" >&2 - exit 1 + rm -f ${FINAL_ACTIVITY_LOGGED_TRACKING_FILE}; exit 1 fi fi echo ${2} > ${3} # Track latest value of label fi } - # Main logic: note that there are no re-tries here, if check fails then return not ready. - APP=`basename "$0"` - state_file=/tmp/activity_state - if [ ! -f ${state_file} ]; then # State file not found, creating - echo "unknown" > ${state_file} - fi + # Main logic: note that there are no re-tries here, if check fails then return not ready. {{- if .Values.solace.redundancy }} # HA config IFS='-' read -ra host_array <<< $(hostname) @@ -310,23 +311,23 @@ data: if [ -f ${lastversion_file} ] && [[ $(cat ${lastversion_file}) != $(readlink /usr/sw/loads/currentload) ]] ; then echo "`date` INFO: ${APP}-Upgrade detected, running additional checks..." # Check redundancy - results=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080/SEMP \ + results=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \ -q "" \ -v "/rpc-reply/rpc/show/redundancy/redundancy-status"` redundancystatus_results=`echo ${results} | xmllint -xpath "string(returnInfo/valueSearchResult)" -` if [ "${redundancystatus_results}" != "Up" ]; then echo "`date` INFO: ${APP}-Redundancy state is not yet up." - exit 1 + rm -f ${FINAL_ACTIVITY_LOGGED_TRACKING_FILE}; exit 1 fi # Additionally check config-sync status for non-monitoring nodes if [ "${node_ordinal}" != "2" ]; then - results=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080/SEMP \ + results=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \ -q "" \ -v "/rpc-reply/rpc/show/config-sync/status/oper-status"` confsyncstatus_results=`echo ${results} | xmllint -xpath "string(returnInfo/valueSearchResult)" -` if [ "${confsyncstatus_results}" != "Up" ]; then echo "`date` INFO: ${APP}-Config-sync state is not yet up." - exit 1 + rm -f ${FINAL_ACTIVITY_LOGGED_TRACKING_FILE}; exit 1 fi fi fi @@ -334,40 +335,46 @@ data: readlink /usr/sw/loads/currentload > ${lastversion_file} # For monitor node just check for 3 online nodes in group; active label will never be set if [ "${node_ordinal}" = "2" ]; then - role_results=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080/SEMP \ + role_results=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \ -q "" \ -c "/rpc-reply/rpc/show/redundancy/group-node/status[text() = \"Online\"]"` if [[ ${role_results} != *""* ]]; then - errorinfo=`echo ${results} | xmllint -xpath "string(returnInfo/errorInfo)" -` || errorinfo= + errorinfo=`echo ${results} | xmllint -xpath "string(returnInfo/errorInfo)" - 2>/dev/null` || errorinfo= echo "`date` INFO: ${APP}-Waiting for valid server status response, got ${errorinfo}" - exit 1 + rm -f ${FINAL_ACTIVITY_LOGGED_TRACKING_FILE}; exit 1 fi nodes_online=`echo ${role_results} | xmllint -xpath "string(returnInfo/countSearchResult)" -` if [ "$nodes_online" -eq "3" ]; then - #echo "`date` INFO: ${APP}-Monitor node is redundancy ready" + if [ ! -f ${FINAL_ACTIVITY_LOGGED_TRACKING_FILE} ]; then + echo "`date` INFO: ${APP}-All nodes online, monitor node is redundancy ready" + touch ${FINAL_ACTIVITY_LOGGED_TRACKING_FILE} + fi exit 0 else echo "`date` INFO: ${APP}-Monitor node is not redundancy ready, ${nodes_online} of 3 nodes online" - exit 1 + rm -f ${FINAL_ACTIVITY_LOGGED_TRACKING_FILE}; exit 1 fi fi # End Monitor Node # For Primary or Backup nodes set both service readiness (active label) and k8s readiness (exit return value) health_result=`curl -s -o /dev/null -w "%{http_code}" http://localhost:5550/health-check/guaranteed-active` case "${health_result}" in "200") - if [[ $(cat $state_file) = "false" ]]; then echo "`date` INFO: ${APP}-HA Event Broker health check reported 200, message spool is up"; fi - set_label "active" "true" $state_file + if [ ! -f ${FINAL_ACTIVITY_LOGGED_TRACKING_FILE} ]; then + echo "`date` INFO: ${APP}-HA Event Broker health check reported 200, message spool is up" + touch ${FINAL_ACTIVITY_LOGGED_TRACKING_FILE} + fi + set_label "active" "true" $STATE_FILE exit 0 ;; "503") - if [[ $(cat $state_file) = "true" ]]; then echo "`date` INFO: ${APP}-HA Event Broker health check reported 503"; fi - set_label "active" "false" $state_file + if [[ $(cat $STATE_FILE) = "true" ]]; then echo "`date` INFO: ${APP}-HA Event Broker health check reported 503"; fi + set_label "active" "false" $STATE_FILE # Further check is required to determine readiness ;; *) echo "`date` WARN: ${APP}-HA Event Broker health check reported unexpected ${health_result}" - set_label "active" "false" $state_file - exit 1 + set_label "active" "false" $STATE_FILE + rm -f ${FINAL_ACTIVITY_LOGGED_TRACKING_FILE}; exit 1 esac # At this point analyzing readiness after health check returned 503 - checking if Event Broker is Standby case "${node_ordinal}" in @@ -378,19 +385,22 @@ data: config_role="backup" ;; esac - online_results=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080/SEMP \ + online_results=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \ -q "" \ -v "/rpc-reply/rpc/show/redundancy/virtual-routers/${config_role}/status/activity[text()]"` local_activity=`echo ${online_results} | xmllint -xpath "string(returnInfo/valueSearchResult)" -` case "${local_activity}" in "Mate Active") - # Redundancy is up and node is mate Active" # Pass readiness check + if [ ! -f ${FINAL_ACTIVITY_LOGGED_TRACKING_FILE} ]; then + echo "`date` INFO: ${APP}-Redundancy is up and node is mate Active" + touch ${FINAL_ACTIVITY_LOGGED_TRACKING_FILE} + fi exit 0 ;; *) echo "`date` WARN: ${APP}-Health check returned 503 and local activity state is: ${local_activity}, failing readiness check." - exit 1 + rm -f ${FINAL_ACTIVITY_LOGGED_TRACKING_FILE}; exit 1 ;; esac {{- else }} @@ -398,21 +408,24 @@ data: health_result=`curl -s -o /dev/null -w "%{http_code}" http://localhost:5550/health-check/guaranteed-active` case "${health_result}" in "200") - if [[ $(cat $state_file) = "false" ]]; then echo "`date` INFO: ${APP}-nonHA Event Broker health check reported 200, message spool is up"; fi - set_label "active" "true" $state_file + if [ ! -f ${FINAL_ACTIVITY_LOGGED_TRACKING_FILE} ]; then + echo "`date` INFO: ${APP}-nonHA Event Broker health check reported 200, message spool is up" + touch ${FINAL_ACTIVITY_LOGGED_TRACKING_FILE} + fi + set_label "active" "true" $STATE_FILE exit 0 ;; "503") - if [[ $(cat $state_file) = "true" ]]; then echo "`date` INFO: ${APP}-nonHA Event Broker health check reported 503, message spool is down"; fi - set_label "active" "false" $state_file + if [[ $(cat $STATE_FILE) = "true" ]]; then echo "`date` INFO: ${APP}-nonHA Event Broker health check reported 503, message spool is down"; fi + set_label "active" "false" $STATE_FILE # Fail readiness check - exit 1 + rm -f ${FINAL_ACTIVITY_LOGGED_TRACKING_FILE}; exit 1 ;; *) echo "`date` WARN: ${APP}-nonHA Event Broker health check reported ${health_result}" - set_label "active" "false" $state_file + set_label "active" "false" $STATE_FILE # Fail readiness check - exit 1 + rm -f ${FINAL_ACTIVITY_LOGGED_TRACKING_FILE}; exit 1 esac {{- end }} semp_query.sh: |- @@ -457,14 +470,14 @@ data: echo 'missing parameter' exit 1 fi - if [ `curl --write-out '%{http_code}' --silent --output /dev/null -u ${username}:${password} ${url} -d ""` != "200" ] ; then + if [ "`curl --write-out '%{http_code}' --silent --output /dev/null -u ${username}:${password} ${url}`" != "200" ] ; then echo "management host is not responding" exit 1 fi if [ "$test_connection_only" = true ] ; then exit 0 # done here, connection is up fi - query_response=`curl -sS -u ${username}:${password} ${url} -d "${query}"` + query_response=`curl -sS -u ${username}:${password} ${url}/SEMP -d "${query}"` # Validate first char of response is "<", otherwise no hope of being valid xml if [[ ${query_response:0:1} != "<" ]] ; then echo "no valid xml returned" From d887d5964bcb3b4bb7dc6e4e509f42f69a4f6bdb Mon Sep 17 00:00:00 2001 From: bczoma Date: Wed, 20 Jan 2021 16:42:30 -0500 Subject: [PATCH 13/19] Updated main readme --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index cfab566b..ad8314f2 100644 --- a/README.md +++ b/README.md @@ -6,11 +6,11 @@ The [Solace PubSub+ Platform](https://solace.com/products/platform/)'s [software ## Overview -This document provides a quick getting started guide to install a software event broker in various configurations onto a [Kubernetes](https://kubernetes.io/docs/home/) cluster. The recommended software event broker version is 9.4 or later. +This project is a best practice template intended for development and demo purposes. The tested and recommended Solace PubSub+ Software Event Broker version is 9.8. -*Detailed* *documentation* is provided in the [Solace PubSub+ Software Event Broker on Kubernetes Documentation](docs/PubSubPlusK8SDeployment.md). +This document provides a quick getting started guide to install a software event broker in various configurations onto a [Kubernetes](https://kubernetes.io/docs/home/) cluster. -This quick start is intended mainly for development and demo purposes. Consult the [Deployment Considerations](https://github.com/SolaceProducts/pubsubplus-kubernetes-quickstart/blob/master/docs/PubSubPlusK8SDeployment.md#pubsub-event-broker-deployment-considerations) section of the Documentation when planning your deployment. +Detailed documentation is provided in the [Solace PubSub+ Software Event Broker on Kubernetes Documentation](docs/PubSubPlusK8SDeployment.md). Consult the [Deployment Coonsiderations](https://github.com/SolaceProducts/pubsubplus-kubernetes-quickstart/blob/master/docs/PubSubPlusK8SDeployment.md#pubsub-event-broker-deployment-considerations) section of the Documentation when planning your deployment. This document is applicable to any platform supporting Kubernetes, with specific hints on how to set up a simple MiniKube deployment on a Linux-based machine. To view examples of other Kubernetes platforms see: From b2aa0f258bc3cbb445e343a322740ffb9477e9ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bal=C3=A1zs=20Czoma?= Date: Fri, 22 Jan 2021 10:02:54 -0500 Subject: [PATCH 14/19] Updated setting and controlling label and handling updates in readiness check --- pubsubplus/templates/solaceConfigMap.yaml | 43 +++++++++++---------- pubsubplus/templates/solaceStatefulSet.yaml | 8 ++++ 2 files changed, 30 insertions(+), 21 deletions(-) diff --git a/pubsubplus/templates/solaceConfigMap.yaml b/pubsubplus/templates/solaceConfigMap.yaml index 41e38244..568e524a 100644 --- a/pubsubplus/templates/solaceConfigMap.yaml +++ b/pubsubplus/templates/solaceConfigMap.yaml @@ -262,21 +262,23 @@ data: tail -n 1000 ${LOG_FILE} > ${LOG_FILE}.tmp; mv -f ${LOG_FILE}.tmp ${LOG_FILE} || : # Limit logs size exec > >(tee -a ${LOG_FILE}) 2>&1 # Setup logging FINAL_ACTIVITY_LOGGED_TRACKING_FILE=/tmp/final_activity_state_logged - STATE_FILE=/tmp/activity_state - if [ ! -f ${STATE_FILE} ]; then # State file not found, creating - echo "unknown" > ${STATE_FILE} - fi + + # Function to read Kubernetes metadata labels + get_label () { + # Params: $1 label name + echo $(cat /etc/podinfo/labels | awk -F= '$1=="'${1}'"{print $2}' | xargs); + } # Function to set Kubernetes metadata labels set_label () { - # Params: $2 label set value; $3 file where internal tracking of label value stored + # Params: $1 label name, $2 label set value #Prevent overdriving Kubernetes infra, don't set activity state to same as previous state - previous_state=`cat $3` + previous_state=$(get_label "active") if [ "${2}" = "${previous_state}" ]; then #echo "`date` INFO: ${APP}-Current and Previous state match (${2}), not updating pod label" : else - echo "`date` INFO: ${APP}-Updating pod label using K8s API from `cat ${3}` to ${2}" + echo "`date` INFO: ${APP}-Updating pod label using K8s API from ${previous_state} to ${2}" echo "[{\"op\": \"add\", \"path\": \"/metadata/labels/${1}\", \"value\": \"${2}\" }]" > /tmp/patch_label.json K8S=https://kubernetes.default.svc.cluster.local:$KUBERNETES_SERVICE_PORT KUBE_TOKEN=$( ${3} # Track latest value of label fi } @@ -305,10 +306,10 @@ data: node_ordinal=${host_array[-1]} password=`cat /mnt/disks/secrets/username_admin_password` - # For upgrade purposes, additional checks are required for readiness state when the pod has been started - # This is an upgrade if the lastversion_file exists and contents differ from /usr/sw/loads/currentload - lastversion_file=/usr/sw/var/lastBrokerVersionBeforeReboot - if [ -f ${lastversion_file} ] && [[ $(cat ${lastversion_file}) != $(readlink /usr/sw/loads/currentload) ]] ; then + # For update (includes SolOS upgrade) purposes, additional checks are required for readiness state when the pod has been started + # This is an update if the lastversion_file exists and contents differ from /usr/sw/loads/currentload + lastversion_file=/usr/sw/var/lastControllerRevisionHashBeforeReboot + if [ -f ${lastversion_file} ] && [[ $(cat ${lastversion_file}) != $(get_label "controller-revision-hash") ]] ; then echo "`date` INFO: ${APP}-Upgrade detected, running additional checks..." # Check redundancy results=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \ @@ -332,7 +333,7 @@ data: fi fi # Record current version in lastversion_file - readlink /usr/sw/loads/currentload > ${lastversion_file} + echo $(get_label "controller-revision-hash") > ${lastversion_file} # For monitor node just check for 3 online nodes in group; active label will never be set if [ "${node_ordinal}" = "2" ]; then role_results=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \ @@ -363,17 +364,17 @@ data: echo "`date` INFO: ${APP}-HA Event Broker health check reported 200, message spool is up" touch ${FINAL_ACTIVITY_LOGGED_TRACKING_FILE} fi - set_label "active" "true" $STATE_FILE + set_label "active" "true" exit 0 ;; "503") - if [[ $(cat $STATE_FILE) = "true" ]]; then echo "`date` INFO: ${APP}-HA Event Broker health check reported 503"; fi - set_label "active" "false" $STATE_FILE + if [[ $(get_label "active") = "true" ]]; then echo "`date` INFO: ${APP}-HA Event Broker health check reported 503"; fi + set_label "active" "false" # Further check is required to determine readiness ;; *) echo "`date` WARN: ${APP}-HA Event Broker health check reported unexpected ${health_result}" - set_label "active" "false" $STATE_FILE + set_label "active" "false" rm -f ${FINAL_ACTIVITY_LOGGED_TRACKING_FILE}; exit 1 esac # At this point analyzing readiness after health check returned 503 - checking if Event Broker is Standby @@ -412,18 +413,18 @@ data: echo "`date` INFO: ${APP}-nonHA Event Broker health check reported 200, message spool is up" touch ${FINAL_ACTIVITY_LOGGED_TRACKING_FILE} fi - set_label "active" "true" $STATE_FILE + set_label "active" "true" exit 0 ;; "503") - if [[ $(cat $STATE_FILE) = "true" ]]; then echo "`date` INFO: ${APP}-nonHA Event Broker health check reported 503, message spool is down"; fi - set_label "active" "false" $STATE_FILE + if [[ $(get_label "active") = "true" ]]; then echo "`date` INFO: ${APP}-nonHA Event Broker health check reported 503, message spool is down"; fi + set_label "active" "false" # Fail readiness check rm -f ${FINAL_ACTIVITY_LOGGED_TRACKING_FILE}; exit 1 ;; *) echo "`date` WARN: ${APP}-nonHA Event Broker health check reported ${health_result}" - set_label "active" "false" $STATE_FILE + set_label "active" "false" # Fail readiness check rm -f ${FINAL_ACTIVITY_LOGGED_TRACKING_FILE}; exit 1 esac diff --git a/pubsubplus/templates/solaceStatefulSet.yaml b/pubsubplus/templates/solaceStatefulSet.yaml index 250d1efb..33111eba 100644 --- a/pubsubplus/templates/solaceStatefulSet.yaml +++ b/pubsubplus/templates/solaceStatefulSet.yaml @@ -133,6 +133,8 @@ spec: killall solacedaemon; while [ ! -d /usr/sw/var/db.upgrade ]; do sleep 1; done; volumeMounts: + - name: podinfo + mountPath: /etc/podinfo - name: config-map mountPath: /mnt/disks/solace - name: secrets @@ -175,6 +177,12 @@ spec: protocol: {{ $item.protocol }} {{- end}} volumes: + - name: podinfo + downwardAPI: + items: + - path: "labels" + fieldRef: + fieldPath: metadata.labels - name: config-map configMap: name: {{ template "solace.fullname" . }} From 0ec51baea66f47dcc153faf8818b8380788c54dd Mon Sep 17 00:00:00 2001 From: bczoma Date: Mon, 25 Jan 2021 17:09:40 -0500 Subject: [PATCH 15/19] - exposed additioal ports by default - added explicit definition to setting redundancy listen port - adjusted all port names with protocol tag prefix --- .travis.yml | 110 -------------------- pubsubplus/templates/service-discovery.yaml | 12 +-- pubsubplus/templates/solaceConfigMap.yaml | 7 +- pubsubplus/values.yaml | 62 +++++------ 4 files changed, 41 insertions(+), 150 deletions(-) delete mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index ca884ae5..00000000 --- a/.travis.yml +++ /dev/null @@ -1,110 +0,0 @@ -language: ruby -sudo: required -services: -- docker -before_install: -- echo "Installing test gems" -- gem install yaml-lint -- export CLOUD_SDK_REPO="cloud-sdk-$(lsb_release -c -s)" -- echo "deb http://packages.cloud.google.com/apt $CLOUD_SDK_REPO main" | sudo tee - -a /etc/apt/sources.list.d/google-cloud-sdk.list -- curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key add - -- sudo apt-get update -- sudo apt-get install -y dpkg -- sudo apt-get install google-cloud-sdk -- curl -LO https://storage.googleapis.com/kubernetes-release/release/v1.10.5/bin/linux/amd64/kubectl; - chmod +x ./kubectl; sudo mv ./kubectl /usr/bin/ -- sudo apt-get install jq -- echo $GCP_KEY_FILE | base64 -d > ./keyfile -- gcloud auth activate-service-account -q $(jq -r .client_email keyfile) --key-file=./keyfile - --project $(jq -r .project_id keyfile) -- rm ./keyfile -- export DESIRED_VERSION=; curl https://raw.githubusercontent.com/helm/helm/master/scripts/get-helm-3 - | bash; helm version -install: true -script: -- pwd -- yaml-lint -n pubsubplus/*.yaml -- echo "Testing GKE deployment, cloud provider Google Cloud Platform (gcp) " -- mkdir gke_test; pushd gke_test -- export TESTCLUSTERNAME="sol-gke-travistest-$(date +%s)" -- wget https://raw.githubusercontent.com/SolaceProducts/solace-gke-quickstart/master/scripts/create_cluster.sh -- chmod 755 create_cluster.sh -- "./create_cluster.sh -z us-east4-a,us-east4-b,us-east4-c -c $TESTCLUSTERNAME -m e2-standard-2" -- gcloud container clusters get-credentials $TESTCLUSTERNAME --zone us-east4-a --project - capable-stream-180018 -- popd -- kubectl get statefulset,svc,pods,pvc,pv -- #kubectl -n kube-system create serviceaccount tiller -- #kubectl create clusterrolebinding tiller --clusterrole cluster-admin --serviceaccount=kube-system:tiller -- #helm init --wait --service-account=tiller --upgrade -- #helm version -- helm lint pubsubplus -- helm install my-release pubsubplus --set solace.size=dev,solace.redundancy=true,solace.usernameAdminPassword=admin,image.repository=bczoma/solace-pubsub-standard,image.tag=test -- kubectl get statefulset,svc,pods,pvc,pv --show-labels -- echo "Waiting for cluster to become active" -- travis_wait 30 sleep 1800 & -- sleep 40; kubectl describe nodes -- until kubectl get pods --show-labels | grep pubsubplus-0 | grep -m 1 -E '1/1'; do sleep - 10; done -- until kubectl get pods --show-labels | grep pubsubplus-1 | grep -m 1 -E '1/1'; do sleep - 10; done -- until kubectl get pods --show-labels | grep pubsubplus-2 | grep -m 1 -E '1/1'; do sleep - 10; done -- until kubectl get pods --show-labels | grep pubsubplus- | grep -m 1 -E 'active=true'; - do sleep 10; done -- kubectl get statefulset,svc,pods,pvc,pv --show-labels -- bash -c 'if [[ `kubectl get po --show-labels | grep -c "1/1"` -ne 3 ]]; then echo - "Some pods are not ready!"; kubectl get po --show-labels; exit 1; fi' -- export url="$(kubectl get statefulset,svc,pods,pvc,pv --show-labels | grep LoadBalancer - | awk '{print $4}')"; echo $url -- curl -O https://sftp.solace.com/download/SDKPERF_C_LINUX64 -- tar -xvf SDKPERF_C_LINUX64 -- pubSubTools/sdkperf_c -cip=$url -mn=100000 -mr=0 -ptl=t1 -stl=t1 | grep "Total Messages" -- sleep 30 -- bash -c 'if [[ -z `curl -sS -u admin:admin http://$url:8080/SEMP -d "" - | grep "Up"` ]] ; then echo "config-sync not up!"; exit - 1; fi' -- curl -sS -u admin:admin http://$url:8080/SEMP -d "" -- curl -sS -u admin:admin http://$url:8080/SEMP -d "" -- helm list -- helm delete $(helm list | grep deployed | awk '{print $1}') -- kubectl delete pvc --all -- bash docs/helm-charts/create-chart-variants.sh; # Create chart variants -- helm lint pubsubplus -- helm install --generate-name pubsubplus --dry-run -- helm lint pubsubplus-ha -- helm install --generate-name pubsubplus-ha --dry-run -- helm lint pubsubplus-dev -- helm install --generate-name pubsubplus-dev --dry-run -- # Publish to gh-pages and test -- > - if [ "$TRAVIS_PULL_REQUEST" = "false" ] && [ "$TRAVIS_BRANCH" = "master" ]; then - git config --global user.email "travis@travis-ci.org"; - git config --global user.name "travis-ci"; - mkdir gh-pages; # Now update gh-pages - git clone --quiet --branch=gh-pages https://${GH_TOKEN}@github.com/SolaceProducts/pubsubplus-kubernetes-quickstart gh-pages > /dev/null 2>&1; - mv pubsubplus-*.tgz gh-pages/helm-charts/; - helm repo index gh-pages/helm-charts/ --url https://solaceproducts.github.io/pubsubplus-kubernetes-quickstart/helm-charts; - pushd gh-pages; - git add -f .; - git commit -m "Latest helm chart updates on successful travis build $TRAVIS_BUILD_NUMBER auto-pushed to gh-pages"; - git remote add origin-pages https://${GH_TOKEN}@github.com/SolaceProducts/pubsubplus-kubernetes-quickstart.git > /dev/null 2>&1; - git push --quiet --set-upstream origin-pages gh-pages; - echo "Updated and pushed GH pages!"; - popd; - # sleep 60; # Ensure pages updated - # bash docs/helm-charts/test-chart-variants-from-gh-pages.sh https://solaceproducts.github.io/pubsubplus-kubernetes-quickstart/helm-charts; - fi - -after_success: -- echo "Test Success - Branch($TRAVIS_BRANCH) Pull Request($TRAVIS_PULL_REQUEST) Tag($TRAVIS_TAG)" -- echo "YAML linted" -- echo "GKE cluster deployment tested" -- echo "Messaging tested" -- echo "Chart ha and dev variants created and smoke-tested" - -after_script: -- gcloud container clusters delete $TESTCLUSTERNAME --quiet --zone us-east4-a -- gcloud compute disks list | grep travis | sed 1d $rpt | while read -r a b c; do - gcloud compute disks delete $a --zone $b --quiet; done diff --git a/pubsubplus/templates/service-discovery.yaml b/pubsubplus/templates/service-discovery.yaml index 06b91d3c..6151a95f 100644 --- a/pubsubplus/templates/service-discovery.yaml +++ b/pubsubplus/templates/service-discovery.yaml @@ -13,18 +13,18 @@ metadata: spec: ports: - port: 8080 - name: semp + name: tcp-semp - port: 8741 - name: ha-mate-link + name: tcp-ha-mate-link - port: 8300 - name: ha-conf-sync0 + name: tcp-ha-conf-sync0 - port: 8301 - name: ha-conf-sync1 + name: tcp-ha-conf-sync1 - port: 8302 - name: ha-conf-sync2 + name: tcp-ha-conf-sync2 clusterIP: None selector: app.kubernetes.io/name: {{ template "solace.name" . }} app.kubernetes.io/instance: {{ .Release.Name }} publishNotReadyAddresses: true -{{- end }} \ No newline at end of file +{{- end }} diff --git a/pubsubplus/templates/solaceConfigMap.yaml b/pubsubplus/templates/solaceConfigMap.yaml index 41e38244..a2f3aed0 100644 --- a/pubsubplus/templates/solaceConfigMap.yaml +++ b/pubsubplus/templates/solaceConfigMap.yaml @@ -51,12 +51,13 @@ data: export redundancy_enable=yes export configsync_enable=yes export redundancy_authentication_presharedkey_key=`cat /mnt/disks/secrets/username_admin_password | awk '{x=$0;for(i=length;i<51;i++)x=x "0";}END{print x}' | base64` # Right-pad with 0s to 50 length + export service_redundancy_firstlistenport='8300' export redundancy_group_node_${service_name}0_nodetype=message_routing - export redundancy_group_node_${service_name}0_connectvia=${service}-0.${service}-discovery.${namespace}.svc + export redundancy_group_node_${service_name}0_connectvia=${service}-0.${service}-discovery.${namespace}.svc:${service_redundancy_firstlistenport} export redundancy_group_node_${service_name}1_nodetype=message_routing - export redundancy_group_node_${service_name}1_connectvia=${service}-1.${service}-discovery.${namespace}.svc + export redundancy_group_node_${service_name}1_connectvia=${service}-1.${service}-discovery.${namespace}.svc:${service_redundancy_firstlistenport} export redundancy_group_node_${service_name}2_nodetype=monitoring - export redundancy_group_node_${service_name}2_connectvia=${service}-2.${service}-discovery.${namespace}.svc + export redundancy_group_node_${service_name}2_connectvia=${service}-2.${service}-discovery.${namespace}.svc:${service_redundancy_firstlistenport} case ${node_ordinal} in 0) diff --git a/pubsubplus/values.yaml b/pubsubplus/values.yaml index 5f9f3578..55fc308a 100644 --- a/pubsubplus/values.yaml +++ b/pubsubplus/values.yaml @@ -79,67 +79,67 @@ service: - servicePort: 2222 containerPort: 2222 protocol: TCP - name: ssh + name: tcp-ssh - servicePort: 8080 containerPort: 8080 protocol: TCP - name: semp + name: tcp-semp - servicePort: 1943 containerPort: 1943 protocol: TCP - name: semptls + name: tcp-semptls - servicePort: 55555 containerPort: 55555 protocol: TCP - name: smf + name: tcp-smf - servicePort: 55003 containerPort: 55003 protocol: TCP - name: smfcomp + name: tcp-smfcomp - servicePort: 55443 containerPort: 55443 protocol: TCP - name: smftls + name: tcp-smftls - servicePort: 8008 containerPort: 8008 protocol: TCP - name: web + name: tcp-web - servicePort: 1443 containerPort: 1443 protocol: TCP - name: webtls + name: tcp-webtls - servicePort: 5672 containerPort: 5672 protocol: TCP - name: amqp + name: tcp-amqp - servicePort: 1883 containerPort: 1883 protocol: TCP - name: mqtt + name: tcp-mqtt - servicePort: 9000 containerPort: 9000 protocol: TCP - name: rest - # - servicePort: 5671 - # containerPort: 5671 - # protocol: TCP - # name: amqptls - # - servicePort: 8883 - # containerPort: 8883 - # protocol: TCP - # name: mqtttls - # - servicePort: 8000 - # containerPort: 8000 - # protocol: TCP - # name: mqttws - # - servicePort: 8443 - # containerPort: 8443 - # protocol: TCP - # name: mqttwss - # - servicePort: 9443 - # containerPort: 9443 - # protocol: TCP - # name: resttls + name: tcp-rest + - servicePort: 5671 + containerPort: 5671 + protocol: TCP + name: tcp-amqptls + - servicePort: 8883 + containerPort: 8883 + protocol: TCP + name: tcp-mqtttls + - servicePort: 8000 + containerPort: 8000 + protocol: TCP + name: tcp-mqttws + - servicePort: 8443 + containerPort: 8443 + protocol: TCP + name: tcp-mqttwss + - servicePort: 9443 + containerPort: 9443 + protocol: TCP + name: tcp-resttls storage: # storage.persistent set to false will use ephemeral storage and the rest of the storage params will be ignored From 4d15767032231b3103965508fe634b0d12e3a4c8 Mon Sep 17 00:00:00 2001 From: bczoma Date: Tue, 26 Jan 2021 10:54:05 -0500 Subject: [PATCH 16/19] Fixed server-certs mount should be read-only --- pubsubplus/templates/solaceStatefulSet.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pubsubplus/templates/solaceStatefulSet.yaml b/pubsubplus/templates/solaceStatefulSet.yaml index 33111eba..83a4fbea 100644 --- a/pubsubplus/templates/solaceStatefulSet.yaml +++ b/pubsubplus/templates/solaceStatefulSet.yaml @@ -143,7 +143,7 @@ spec: {{- if and (.Values.tls) (.Values.tls.enabled) }} - name: server-certs mountPath: /mnt/disks/certs/server - readOnly: false + readOnly: true {{- end }} - name: dshm mountPath: /dev/shm From 9fe2699c37d38b1504eef3d0a2e73311ea9ada09 Mon Sep 17 00:00:00 2001 From: bczoma Date: Tue, 26 Jan 2021 11:29:00 -0500 Subject: [PATCH 17/19] Reorganized default port names and order in values --- pubsubplus/values.yaml | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/pubsubplus/values.yaml b/pubsubplus/values.yaml index 55fc308a..2d7e8e3f 100644 --- a/pubsubplus/values.yaml +++ b/pubsubplus/values.yaml @@ -87,7 +87,7 @@ service: - servicePort: 1943 containerPort: 1943 protocol: TCP - name: tcp-semptls + name: tls-semp - servicePort: 55555 containerPort: 55555 protocol: TCP @@ -99,7 +99,7 @@ service: - servicePort: 55443 containerPort: 55443 protocol: TCP - name: tcp-smftls + name: tls-smf - servicePort: 8008 containerPort: 8008 protocol: TCP @@ -107,39 +107,39 @@ service: - servicePort: 1443 containerPort: 1443 protocol: TCP - name: tcp-webtls + name: tls-web + - servicePort: 9000 + containerPort: 9000 + protocol: TCP + name: tcp-rest + - servicePort: 9443 + containerPort: 9443 + protocol: TCP + name: tls-rest - servicePort: 5672 containerPort: 5672 protocol: TCP name: tcp-amqp + - servicePort: 5671 + containerPort: 5671 + protocol: TCP + name: tls-amqp - servicePort: 1883 containerPort: 1883 protocol: TCP name: tcp-mqtt - - servicePort: 9000 - containerPort: 9000 - protocol: TCP - name: tcp-rest - - servicePort: 5671 - containerPort: 5671 - protocol: TCP - name: tcp-amqptls - servicePort: 8883 containerPort: 8883 protocol: TCP - name: tcp-mqtttls + name: tls-mqtt - servicePort: 8000 containerPort: 8000 protocol: TCP - name: tcp-mqttws + name: tcp-mqttweb - servicePort: 8443 containerPort: 8443 protocol: TCP - name: tcp-mqttwss - - servicePort: 9443 - containerPort: 9443 - protocol: TCP - name: tcp-resttls + name: tls-mqttweb storage: # storage.persistent set to false will use ephemeral storage and the rest of the storage params will be ignored From 21b8428f73762cf5582aba26a52b2d004dfe1192 Mon Sep 17 00:00:00 2001 From: bczoma Date: Tue, 26 Jan 2021 12:08:02 -0500 Subject: [PATCH 18/19] also added port for SMF routing --- pubsubplus/values.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pubsubplus/values.yaml b/pubsubplus/values.yaml index 2d7e8e3f..1a0dcc20 100644 --- a/pubsubplus/values.yaml +++ b/pubsubplus/values.yaml @@ -100,6 +100,10 @@ service: containerPort: 55443 protocol: TCP name: tls-smf + - servicePort: 55556 + containerPort: 55556 + protocol: TCP + name: tcp-smfroute - servicePort: 8008 containerPort: 8008 protocol: TCP From 1906e5a4272cf2694ac05f705064a421b4a20db5 Mon Sep 17 00:00:00 2001 From: bczoma Date: Wed, 27 Jan 2021 09:32:54 -0500 Subject: [PATCH 19/19] Updated location of LASTVERSION_FILE where gather-diagnostics expects it --- pubsubplus/templates/solaceConfigMap.yaml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pubsubplus/templates/solaceConfigMap.yaml b/pubsubplus/templates/solaceConfigMap.yaml index 934f9298..638972da 100644 --- a/pubsubplus/templates/solaceConfigMap.yaml +++ b/pubsubplus/templates/solaceConfigMap.yaml @@ -308,9 +308,9 @@ data: password=`cat /mnt/disks/secrets/username_admin_password` # For update (includes SolOS upgrade) purposes, additional checks are required for readiness state when the pod has been started - # This is an update if the lastversion_file exists and contents differ from /usr/sw/loads/currentload - lastversion_file=/usr/sw/var/lastControllerRevisionHashBeforeReboot - if [ -f ${lastversion_file} ] && [[ $(cat ${lastversion_file}) != $(get_label "controller-revision-hash") ]] ; then + # This is an update if the LASTVERSION_FILE with K8s controller-revision-hash exists and contents differ from current value + LASTVERSION_FILE=/var/lib/solace/var/lastConfigRevisionBeforeReboot + if [ -f ${LASTVERSION_FILE} ] && [[ $(cat ${LASTVERSION_FILE}) != $(get_label "controller-revision-hash") ]] ; then echo "`date` INFO: ${APP}-Upgrade detected, running additional checks..." # Check redundancy results=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \ @@ -333,8 +333,8 @@ data: fi fi fi - # Record current version in lastversion_file - echo $(get_label "controller-revision-hash") > ${lastversion_file} + # Record current version in LASTVERSION_FILE + echo $(get_label "controller-revision-hash") > ${LASTVERSION_FILE} # For monitor node just check for 3 online nodes in group; active label will never be set if [ "${node_ordinal}" = "2" ]; then role_results=`/mnt/disks/solace/semp_query.sh -n admin -p ${password} -u http://localhost:8080 \